AINode.jsAutomation

SQL Query Builder from Natural Language

TT
TopicTrick Team
SQL Query Builder from Natural Language

SQL Query Builder from Natural Language

Ask your database a question in plain English and get back a correct SQL query — with explanation and optional execution. This tool reads your actual database schema, feeds it to GPT-4o, and generates accurate queries tailored to your exact table and column names.

This is Tool 25 of the Build 50 AI Automation Tools course.


What You'll Build

  • Schema introspection — automatically reads table structure from your database
  • Natural language → SQL translation with GPT-4o
  • Query explanation in plain English
  • Safe auto-execution (SELECT only by default)
  • Interactive CLI for rapid querying

Setup

bash
mkdir nl-to-sql && cd nl-to-sql
npm init -y
npm install express better-sqlite3 openai dotenv
bash
# .env
OPENAI_API_KEY=sk-your-key-here
DB_PATH=./database.sqlite
DB_DIALECT=sqlite   # sqlite | postgres | mysql
PORT=3000

Schema Introspection Service

js
// src/services/schemaService.js
import Database from 'better-sqlite3';

let cachedSchema = null;

export function getSchema(dbPath) {
  if (cachedSchema) return cachedSchema;

  const db = new Database(dbPath, { readonly: true });

  const tables = db.prepare(
    `SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'`
  ).all();

  const schema = {};

  for (const { name } of tables) {
    const columns = db.prepare(`PRAGMA table_info(${name})`).all();
    const foreignKeys = db.prepare(`PRAGMA foreign_key_list(${name})`).all();

    schema[name] = {
      columns: columns.map(c => ({
        name: c.name,
        type: c.type,
        notNull: c.notnull === 1,
        primaryKey: c.pk === 1,
        defaultValue: c.dflt_value,
      })),
      foreignKeys: foreignKeys.map(fk => ({
        column: fk.from,
        referencesTable: fk.table,
        referencesColumn: fk.to,
      })),
    };
  }

  db.close();
  cachedSchema = schema;
  return schema;
}

export function schemaToPromptString(schema) {
  return Object.entries(schema).map(([table, info]) => {
    const cols = info.columns.map(c => {
      let def = `  ${c.name} ${c.type}`;
      if (c.primaryKey) def += ' PRIMARY KEY';
      if (c.notNull) def += ' NOT NULL';
      return def;
    }).join(',\n');

    const fks = info.foreignKeys.map(fk =>
      `  -- ${fk.column} references ${fk.referencesTable}(${fk.referencesColumn})`
    ).join('\n');

    return `TABLE ${table}:\n${cols}${fks ? '\n' + fks : ''}`;
  }).join('\n\n');
}

export function refreshSchema() {
  cachedSchema = null;
}

Query Generation Service

js
// src/services/queryService.js
import Database from 'better-sqlite3';
import OpenAI from 'openai';
import { getSchema, schemaToPromptString } from './schemaService.js';

const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });

async function generateQuery(question, schemaString, dialect = 'sqlite') {
  const response = await openai.chat.completions.create({
    model: 'gpt-4o',
    messages: [
      {
        role: 'system',
        content: `You are an expert SQL developer specialising in ${dialect.toUpperCase()}.
Given the database schema and a natural language question, generate the correct SQL query.

Database Schema:
${schemaString}

Rules:
- Use only tables and columns that exist in the schema above
- Generate ${dialect.toUpperCase()}-compatible syntax
- Use table aliases for readability in JOINs
- For aggregations, always include GROUP BY where needed
- Limit results to 100 rows unless the question asks for a count or total
- Never generate DROP, TRUNCATE, or ALTER statements

Return ONLY a JSON object:
{
  "sql": "the SQL query",
  "explanation": "plain English explanation of what this query does and how",
  "queryType": "SELECT | INSERT | UPDATE | DELETE",
  "tables": ["list of tables used"],
  "warnings": ["any warnings or assumptions made"]
}`,
      },
      { role: 'user', content: question },
    ],
    temperature: 0.1,
    response_format: { type: 'json_object' },
  });

  return JSON.parse(response.choices[0].message.content);
}

function executeQuery(dbPath, sql) {
  const db = new Database(dbPath, { readonly: true });
  try {
    const stmt = db.prepare(sql);
    const rows = stmt.all();
    return { rows, rowCount: rows.length };
  } finally {
    db.close();
  }
}

export async function queryFromNaturalLanguage(question, dbPath, dialect, execute = false) {
  const schema = getSchema(dbPath);
  const schemaString = schemaToPromptString(schema);

  const generated = await generateQuery(question, schemaString, dialect);

  const result = { question, ...generated };

  if (execute && generated.queryType === 'SELECT') {
    try {
      const { rows, rowCount } = executeQuery(dbPath, generated.sql);
      result.results = rows;
      result.rowCount = rowCount;
    } catch (err) {
      result.executionError = err.message;
    }
  }

  return result;
}

Server

js
// src/server.js
import 'dotenv/config';
import express from 'express';
import { queryFromNaturalLanguage } from './services/queryService.js';
import { getSchema, refreshSchema } from './services/schemaService.js';

const app = express();
app.use(express.json());

const DB_PATH  = process.env.DB_PATH  || './database.sqlite';
const DIALECT  = process.env.DB_DIALECT || 'sqlite';

app.post('/query', async (req, res, next) => {
  try {
    const { question, execute = false } = req.body;
    if (!question?.trim()) return res.status(400).json({ error: 'Question is required' });

    const result = await queryFromNaturalLanguage(question, DB_PATH, DIALECT, execute);
    res.json({ success: true, ...result });
  } catch (err) { next(err); }
});

app.get('/schema', (_req, res) => {
  const schema = getSchema(DB_PATH);
  res.json({ success: true, tables: Object.keys(schema), schema });
});

app.post('/schema/refresh', (_req, res) => {
  refreshSchema();
  res.json({ success: true, message: 'Schema cache cleared' });
});

app.use((err, _req, res, _next) => res.status(500).json({ error: err.message }));
app.listen(process.env.PORT ?? 3000, () => console.log('NL-to-SQL running'));

Testing

bash
curl -X POST http://localhost:3000/query \
  -H "Content-Type: application/json" \
  -d '{
    "question": "Show me the top 5 customers by total order value in the last 30 days",
    "execute": true
  }'

Sample response:

json
{
  "question": "Show me the top 5 customers by total order value in the last 30 days",
  "sql": "SELECT c.id, c.name, c.email, SUM(o.total_amount) AS total_spent\nFROM customers c\nJOIN orders o ON o.customer_id = c.id\nWHERE o.created_at >= datetime('now', '-30 days')\nGROUP BY c.id, c.name, c.email\nORDER BY total_spent DESC\nLIMIT 5",
  "explanation": "Joins the customers and orders tables, filters to orders from the last 30 days, sums the total_amount per customer, and returns the top 5 by total spend.",
  "queryType": "SELECT",
  "tables": ["customers", "orders"],
  "warnings": [],
  "rowCount": 5,
  "results": [
    { "id": 42, "name": "Acme Corp", "email": "billing@acme.com", "total_spent": 15420.00 }
  ]
}

Interactive CLI

js
// src/cli.js
import 'dotenv/config';
import readline from 'readline';
import { queryFromNaturalLanguage } from './services/queryService.js';

const DB_PATH = process.env.DB_PATH || './database.sqlite';
const DIALECT = process.env.DB_DIALECT || 'sqlite';

const rl = readline.createInterface({ input: process.stdin, output: process.stdout });

console.log('NL-to-SQL CLI (type "exit" to quit, "exec: <question>" to also run the query)\n');

function ask() {
  rl.question('> ', async input => {
    if (input.toLowerCase() === 'exit') { rl.close(); return; }

    const execute = input.startsWith('exec: ');
    const question = execute ? input.slice(6) : input;

    try {
      const result = await queryFromNaturalLanguage(question, DB_PATH, DIALECT, execute);
      console.log('\nSQL:\n' + result.sql);
      console.log('\nExplanation:', result.explanation);
      if (result.results) {
        console.log(`\nResults (${result.rowCount} rows):`);
        console.table(result.results.slice(0, 10));
      }
    } catch (err) {
      console.error('Error:', err.message);
    }
    console.log('');
    ask();
  });
}
ask();
bash
node src/cli.js
> How many users signed up each month this year?
> exec: What is the average order value by product category?

Build 50 AI Automation Tools — Tool 25 of 50

NL-to-SQL is live. Continue to Tool 26 to build an AI git commit message generator.


    Summary

    • Schema introspection grounds GPT-4o in your actual table structure — no hallucinated columns
    • SELECT-only execution by default prevents accidental data modification
    • Dialect flag adapts syntax for SQLite, PostgreSQL, MySQL, or SQL Server
    • Warnings field surfaces assumptions the AI made — useful for validating complex queries
    • The interactive CLI makes this a powerful tool for analysts who know the data but not the SQL

    Continue to Tool 26: AI Git Commit Message Generator →