AINode.jsAutomation
SQL Query Builder from Natural Language
TT
TopicTrick Team
SQL Query Builder from Natural Language
Ask your database a question in plain English and get back a correct SQL query — with explanation and optional execution. This tool reads your actual database schema, feeds it to GPT-4o, and generates accurate queries tailored to your exact table and column names.
This is Tool 25 of the Build 50 AI Automation Tools course.
What You'll Build
- Schema introspection — automatically reads table structure from your database
- Natural language → SQL translation with GPT-4o
- Query explanation in plain English
- Safe auto-execution (SELECT only by default)
- Interactive CLI for rapid querying
Setup
bash
mkdir nl-to-sql && cd nl-to-sql
npm init -y
npm install express better-sqlite3 openai dotenvbash
# .env
OPENAI_API_KEY=sk-your-key-here
DB_PATH=./database.sqlite
DB_DIALECT=sqlite # sqlite | postgres | mysql
PORT=3000Schema Introspection Service
js
// src/services/schemaService.js
import Database from 'better-sqlite3';
let cachedSchema = null;
export function getSchema(dbPath) {
if (cachedSchema) return cachedSchema;
const db = new Database(dbPath, { readonly: true });
const tables = db.prepare(
`SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'`
).all();
const schema = {};
for (const { name } of tables) {
const columns = db.prepare(`PRAGMA table_info(${name})`).all();
const foreignKeys = db.prepare(`PRAGMA foreign_key_list(${name})`).all();
schema[name] = {
columns: columns.map(c => ({
name: c.name,
type: c.type,
notNull: c.notnull === 1,
primaryKey: c.pk === 1,
defaultValue: c.dflt_value,
})),
foreignKeys: foreignKeys.map(fk => ({
column: fk.from,
referencesTable: fk.table,
referencesColumn: fk.to,
})),
};
}
db.close();
cachedSchema = schema;
return schema;
}
export function schemaToPromptString(schema) {
return Object.entries(schema).map(([table, info]) => {
const cols = info.columns.map(c => {
let def = ` ${c.name} ${c.type}`;
if (c.primaryKey) def += ' PRIMARY KEY';
if (c.notNull) def += ' NOT NULL';
return def;
}).join(',\n');
const fks = info.foreignKeys.map(fk =>
` -- ${fk.column} references ${fk.referencesTable}(${fk.referencesColumn})`
).join('\n');
return `TABLE ${table}:\n${cols}${fks ? '\n' + fks : ''}`;
}).join('\n\n');
}
export function refreshSchema() {
cachedSchema = null;
}Query Generation Service
js
// src/services/queryService.js
import Database from 'better-sqlite3';
import OpenAI from 'openai';
import { getSchema, schemaToPromptString } from './schemaService.js';
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
async function generateQuery(question, schemaString, dialect = 'sqlite') {
const response = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'system',
content: `You are an expert SQL developer specialising in ${dialect.toUpperCase()}.
Given the database schema and a natural language question, generate the correct SQL query.
Database Schema:
${schemaString}
Rules:
- Use only tables and columns that exist in the schema above
- Generate ${dialect.toUpperCase()}-compatible syntax
- Use table aliases for readability in JOINs
- For aggregations, always include GROUP BY where needed
- Limit results to 100 rows unless the question asks for a count or total
- Never generate DROP, TRUNCATE, or ALTER statements
Return ONLY a JSON object:
{
"sql": "the SQL query",
"explanation": "plain English explanation of what this query does and how",
"queryType": "SELECT | INSERT | UPDATE | DELETE",
"tables": ["list of tables used"],
"warnings": ["any warnings or assumptions made"]
}`,
},
{ role: 'user', content: question },
],
temperature: 0.1,
response_format: { type: 'json_object' },
});
return JSON.parse(response.choices[0].message.content);
}
function executeQuery(dbPath, sql) {
const db = new Database(dbPath, { readonly: true });
try {
const stmt = db.prepare(sql);
const rows = stmt.all();
return { rows, rowCount: rows.length };
} finally {
db.close();
}
}
export async function queryFromNaturalLanguage(question, dbPath, dialect, execute = false) {
const schema = getSchema(dbPath);
const schemaString = schemaToPromptString(schema);
const generated = await generateQuery(question, schemaString, dialect);
const result = { question, ...generated };
if (execute && generated.queryType === 'SELECT') {
try {
const { rows, rowCount } = executeQuery(dbPath, generated.sql);
result.results = rows;
result.rowCount = rowCount;
} catch (err) {
result.executionError = err.message;
}
}
return result;
}Server
js
// src/server.js
import 'dotenv/config';
import express from 'express';
import { queryFromNaturalLanguage } from './services/queryService.js';
import { getSchema, refreshSchema } from './services/schemaService.js';
const app = express();
app.use(express.json());
const DB_PATH = process.env.DB_PATH || './database.sqlite';
const DIALECT = process.env.DB_DIALECT || 'sqlite';
app.post('/query', async (req, res, next) => {
try {
const { question, execute = false } = req.body;
if (!question?.trim()) return res.status(400).json({ error: 'Question is required' });
const result = await queryFromNaturalLanguage(question, DB_PATH, DIALECT, execute);
res.json({ success: true, ...result });
} catch (err) { next(err); }
});
app.get('/schema', (_req, res) => {
const schema = getSchema(DB_PATH);
res.json({ success: true, tables: Object.keys(schema), schema });
});
app.post('/schema/refresh', (_req, res) => {
refreshSchema();
res.json({ success: true, message: 'Schema cache cleared' });
});
app.use((err, _req, res, _next) => res.status(500).json({ error: err.message }));
app.listen(process.env.PORT ?? 3000, () => console.log('NL-to-SQL running'));Testing
bash
curl -X POST http://localhost:3000/query \
-H "Content-Type: application/json" \
-d '{
"question": "Show me the top 5 customers by total order value in the last 30 days",
"execute": true
}'Sample response:
json
{
"question": "Show me the top 5 customers by total order value in the last 30 days",
"sql": "SELECT c.id, c.name, c.email, SUM(o.total_amount) AS total_spent\nFROM customers c\nJOIN orders o ON o.customer_id = c.id\nWHERE o.created_at >= datetime('now', '-30 days')\nGROUP BY c.id, c.name, c.email\nORDER BY total_spent DESC\nLIMIT 5",
"explanation": "Joins the customers and orders tables, filters to orders from the last 30 days, sums the total_amount per customer, and returns the top 5 by total spend.",
"queryType": "SELECT",
"tables": ["customers", "orders"],
"warnings": [],
"rowCount": 5,
"results": [
{ "id": 42, "name": "Acme Corp", "email": "billing@acme.com", "total_spent": 15420.00 }
]
}Interactive CLI
js
// src/cli.js
import 'dotenv/config';
import readline from 'readline';
import { queryFromNaturalLanguage } from './services/queryService.js';
const DB_PATH = process.env.DB_PATH || './database.sqlite';
const DIALECT = process.env.DB_DIALECT || 'sqlite';
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
console.log('NL-to-SQL CLI (type "exit" to quit, "exec: <question>" to also run the query)\n');
function ask() {
rl.question('> ', async input => {
if (input.toLowerCase() === 'exit') { rl.close(); return; }
const execute = input.startsWith('exec: ');
const question = execute ? input.slice(6) : input;
try {
const result = await queryFromNaturalLanguage(question, DB_PATH, DIALECT, execute);
console.log('\nSQL:\n' + result.sql);
console.log('\nExplanation:', result.explanation);
if (result.results) {
console.log(`\nResults (${result.rowCount} rows):`);
console.table(result.results.slice(0, 10));
}
} catch (err) {
console.error('Error:', err.message);
}
console.log('');
ask();
});
}
ask();bash
node src/cli.js
> How many users signed up each month this year?
> exec: What is the average order value by product category?Build 50 AI Automation Tools — Tool 25 of 50
NL-to-SQL is live. Continue to Tool 26 to build an AI git commit message generator.
Summary
- Schema introspection grounds GPT-4o in your actual table structure — no hallucinated columns
- SELECT-only execution by default prevents accidental data modification
- Dialect flag adapts syntax for SQLite, PostgreSQL, MySQL, or SQL Server
- Warnings field surfaces assumptions the AI made — useful for validating complex queries
- The interactive CLI makes this a powerful tool for analysts who know the data but not the SQL
Continue to Tool 26: AI Git Commit Message Generator →
