AINode.jsAutomation
Contract Clause Extractor with LLM
Build an AI tool that reads legal contracts and extracts key clauses — payment terms, termination conditions, liability caps, and renewal dates — using Node.js and GPT-4o structured output.
TT
Emily Ross
Contract Clause Extractor with LLM
Legal review is expensive. This tool reads any commercial contract and extracts the clauses that matter most — payment terms, termination rights, liability caps, auto-renewal dates, and risk flags — in seconds using GPT-4o.
This is Tool 3 of the Build 50 AI Automation Tools course.
What You'll Build
A REST API that:
POST /analyze— accepts a contract PDF and returns a structured clause extraction- Identifies 10+ standard clause types and flags missing or risky clauses
- Returns a risk summary with a plain-English assessment
Setup
bash
mkdir contract-extractor && cd contract-extractor
npm init -y
npm install express multer pdf-parse openai dotenvbash
# .env
OPENAI_API_KEY=sk-your-key-here
PORT=3000Contract Analysis Service
js
// src/services/contractService.js
import pdfParse from 'pdf-parse';
import OpenAI from 'openai';
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
const CLAUSE_SCHEMA = `{
"contractType": "string (e.g. NDA, SaaS Agreement, Employment Contract)",
"parties": [{ "name": "string", "role": "string (e.g. Vendor, Client)" }],
"effectiveDate": "string or null",
"expirationDate": "string or null",
"autoRenewal": {
"found": true/false,
"terms": "string or null",
"noticePeriod": "string or null"
},
"paymentTerms": {
"found": true/false,
"amount": "string or null",
"schedule": "string or null",
"latePenalty": "string or null"
},
"terminationRights": {
"found": true/false,
"forCause": "string or null",
"forConvenience": "string or null",
"noticePeriod": "string or null"
},
"limitationOfLiability": {
"found": true/false,
"cap": "string or null",
"exclusions": ["string"]
},
"confidentiality": {
"found": true/false,
"duration": "string or null",
"scope": "string or null"
},
"intellectualProperty": {
"found": true/false,
"ownership": "string or null"
},
"disputeResolution": {
"found": true/false,
"method": "string (e.g. arbitration, litigation)",
"jurisdiction": "string or null"
},
"indemnification": {
"found": true/false,
"scope": "string or null"
},
"riskFlags": [
{
"severity": "high/medium/low",
"clause": "string",
"description": "string"
}
],
"missingClauses": ["list of standard clauses not found in this contract"],
"plainEnglishSummary": "string — 3-4 sentence plain-English summary of key obligations and risks"
}`;
export async function analyzeContract(buffer) {
const { text } = await pdfParse(buffer);
if (!text?.trim()) throw new Error('No extractable text in PDF');
// For long contracts, split and analyze in chunks then synthesize
const MAX_CHARS = 80_000; // ~20k tokens
const truncated = text.length > MAX_CHARS ? text.slice(0, MAX_CHARS) + '\n[Document truncated for analysis]' : text;
const response = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'system',
content: `You are a senior contract attorney with expertise in commercial agreements.
Analyze the contract below and extract all key clauses.
Return ONLY a JSON object matching this exact schema — no markdown, no explanation:
${CLAUSE_SCHEMA}
Be thorough. Flag any unusual, one-sided, or missing clauses as risk flags.`,
},
{ role: 'user', content: truncated },
],
temperature: 0.1,
response_format: { type: 'json_object' },
});
return JSON.parse(response.choices[0].message.content);
}API Route
js
// src/routes/analyze.js
import { Router } from 'express';
import multer from 'multer';
import { analyzeContract } from '../services/contractService.js';
const router = Router();
const upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 20 * 1024 * 1024 } });
router.post('/', upload.single('contract'), async (req, res, next) => {
try {
if (!req.file) return res.status(400).json({ error: 'No contract PDF uploaded' });
const analysis = await analyzeContract(req.file.buffer);
res.json({ success: true, filename: req.file.originalname, analysis });
} catch (err) { next(err); }
});
export default router;Server
js
// src/server.js
import 'dotenv/config';
import express from 'express';
import analyzeRouter from './routes/analyze.js';
const app = express();
app.use(express.json());
app.use('/analyze', analyzeRouter);
app.use((err, _req, res, _next) => res.status(500).json({ error: err.message }));
app.listen(process.env.PORT ?? 3000, () => console.log('Contract Extractor running'));Testing
bash
curl -X POST http://localhost:3000/analyze \
-F "contract=@vendor-agreement.pdf"Sample response:
json
{
"contractType": "SaaS Vendor Agreement",
"parties": [
{ "name": "Acme Corp", "role": "Vendor" },
{ "name": "Client Inc", "role": "Client" }
],
"autoRenewal": {
"found": true,
"terms": "Automatically renews for successive 12-month periods",
"noticePeriod": "60 days written notice required to cancel"
},
"limitationOfLiability": {
"found": true,
"cap": "Capped at fees paid in the preceding 3 months",
"exclusions": ["gross negligence", "wilful misconduct"]
},
"riskFlags": [
{
"severity": "high",
"clause": "Limitation of Liability",
"description": "3-month fee cap is unusually low for a multi-year agreement. Consider negotiating to 12 months."
},
{
"severity": "medium",
"clause": "Auto-Renewal",
"description": "60-day cancellation notice required. Calendar a reminder 90 days before renewal date."
}
],
"missingClauses": ["Force Majeure", "Data Processing Agreement"],
"plainEnglishSummary": "This is a standard SaaS agreement with an automatic annual renewal. The vendor's liability is capped at just 3 months of fees, which is below industry standard. A Data Processing Agreement (DPA) is missing — required if processing personal data under GDPR."
}Risk Dashboard Extension
Add a simple HTML report generator:
js
export function generateRiskReport(analysis) {
const highRisks = analysis.riskFlags.filter(f => f.severity === 'high');
const html = `
<html><body>
<h1>Contract Risk Report: ${analysis.contractType}</h1>
<h2>High Risk Flags (${highRisks.length})</h2>
${highRisks.map(f => `<div class="risk"><strong>${f.clause}</strong>: ${f.description}</div>`).join('')}
<h2>Missing Clauses</h2>
<ul>${analysis.missingClauses.map(c => `<li>${c}</li>`).join('')}</ul>
<h2>Summary</h2>
<p>${analysis.plainEnglishSummary}</p>
</body></html>`;
return html;
}Build 50 AI Automation Tools — Tool 3 of 50
Contract analysis is live. Continue to Tool 4 to extract invoice data from PDFs and images using GPT-4o Vision.
Summary
- GPT-4o understands legal language and extracts clauses semantically, not by keyword matching
- The risk flags array makes it easy to surface critical issues without reading the full analysis
- Missing clauses detection identifies gaps in standard contract protections
- The truncation strategy handles long contracts within the context window
- Extend with an HTML/PDF report generator to share results with non-technical stakeholders
Continue to Tool 4: Invoice Data Extractor with GPT-4 Vision →
