AINode.jsAutomation

Contract Clause Extractor with LLM

TT
TopicTrick Team
Contract Clause Extractor with LLM

Contract Clause Extractor with LLM

Legal review is expensive. This tool reads any commercial contract and extracts the clauses that matter most — payment terms, termination rights, liability caps, auto-renewal dates, and risk flags — in seconds using GPT-4o.

This is Tool 3 of the Build 50 AI Automation Tools course.


What You'll Build

A REST API that:

  • POST /analyze — accepts a contract PDF and returns a structured clause extraction
  • Identifies 10+ standard clause types and flags missing or risky clauses
  • Returns a risk summary with a plain-English assessment

Setup

bash
mkdir contract-extractor && cd contract-extractor
npm init -y
npm install express multer pdf-parse openai dotenv
bash
# .env
OPENAI_API_KEY=sk-your-key-here
PORT=3000

Contract Analysis Service

js
// src/services/contractService.js
import pdfParse from 'pdf-parse';
import OpenAI from 'openai';

const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });

const CLAUSE_SCHEMA = `{
  "contractType": "string (e.g. NDA, SaaS Agreement, Employment Contract)",
  "parties": [{ "name": "string", "role": "string (e.g. Vendor, Client)" }],
  "effectiveDate": "string or null",
  "expirationDate": "string or null",
  "autoRenewal": {
    "found": true/false,
    "terms": "string or null",
    "noticePeriod": "string or null"
  },
  "paymentTerms": {
    "found": true/false,
    "amount": "string or null",
    "schedule": "string or null",
    "latePenalty": "string or null"
  },
  "terminationRights": {
    "found": true/false,
    "forCause": "string or null",
    "forConvenience": "string or null",
    "noticePeriod": "string or null"
  },
  "limitationOfLiability": {
    "found": true/false,
    "cap": "string or null",
    "exclusions": ["string"]
  },
  "confidentiality": {
    "found": true/false,
    "duration": "string or null",
    "scope": "string or null"
  },
  "intellectualProperty": {
    "found": true/false,
    "ownership": "string or null"
  },
  "disputeResolution": {
    "found": true/false,
    "method": "string (e.g. arbitration, litigation)",
    "jurisdiction": "string or null"
  },
  "indemnification": {
    "found": true/false,
    "scope": "string or null"
  },
  "riskFlags": [
    {
      "severity": "high/medium/low",
      "clause": "string",
      "description": "string"
    }
  ],
  "missingClauses": ["list of standard clauses not found in this contract"],
  "plainEnglishSummary": "string — 3-4 sentence plain-English summary of key obligations and risks"
}`;

export async function analyzeContract(buffer) {
  const { text } = await pdfParse(buffer);

  if (!text?.trim()) throw new Error('No extractable text in PDF');

  // For long contracts, split and analyze in chunks then synthesize
  const MAX_CHARS = 80_000; // ~20k tokens
  const truncated = text.length > MAX_CHARS ? text.slice(0, MAX_CHARS) + '\n[Document truncated for analysis]' : text;

  const response = await openai.chat.completions.create({
    model: 'gpt-4o',
    messages: [
      {
        role: 'system',
        content: `You are a senior contract attorney with expertise in commercial agreements. 
Analyze the contract below and extract all key clauses.
Return ONLY a JSON object matching this exact schema — no markdown, no explanation:
${CLAUSE_SCHEMA}
Be thorough. Flag any unusual, one-sided, or missing clauses as risk flags.`,
      },
      { role: 'user', content: truncated },
    ],
    temperature: 0.1,
    response_format: { type: 'json_object' },
  });

  return JSON.parse(response.choices[0].message.content);
}

API Route

js
// src/routes/analyze.js
import { Router } from 'express';
import multer from 'multer';
import { analyzeContract } from '../services/contractService.js';

const router = Router();
const upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 20 * 1024 * 1024 } });

router.post('/', upload.single('contract'), async (req, res, next) => {
  try {
    if (!req.file) return res.status(400).json({ error: 'No contract PDF uploaded' });
    const analysis = await analyzeContract(req.file.buffer);
    res.json({ success: true, filename: req.file.originalname, analysis });
  } catch (err) { next(err); }
});

export default router;

Server

js
// src/server.js
import 'dotenv/config';
import express from 'express';
import analyzeRouter from './routes/analyze.js';

const app = express();
app.use(express.json());
app.use('/analyze', analyzeRouter);
app.use((err, _req, res, _next) => res.status(500).json({ error: err.message }));
app.listen(process.env.PORT ?? 3000, () => console.log('Contract Extractor running'));

Testing

bash
curl -X POST http://localhost:3000/analyze \
  -F "contract=@vendor-agreement.pdf"

Sample response:

json
{
  "contractType": "SaaS Vendor Agreement",
  "parties": [
    { "name": "Acme Corp", "role": "Vendor" },
    { "name": "Client Inc", "role": "Client" }
  ],
  "autoRenewal": {
    "found": true,
    "terms": "Automatically renews for successive 12-month periods",
    "noticePeriod": "60 days written notice required to cancel"
  },
  "limitationOfLiability": {
    "found": true,
    "cap": "Capped at fees paid in the preceding 3 months",
    "exclusions": ["gross negligence", "wilful misconduct"]
  },
  "riskFlags": [
    {
      "severity": "high",
      "clause": "Limitation of Liability",
      "description": "3-month fee cap is unusually low for a multi-year agreement. Consider negotiating to 12 months."
    },
    {
      "severity": "medium",
      "clause": "Auto-Renewal",
      "description": "60-day cancellation notice required. Calendar a reminder 90 days before renewal date."
    }
  ],
  "missingClauses": ["Force Majeure", "Data Processing Agreement"],
  "plainEnglishSummary": "This is a standard SaaS agreement with an automatic annual renewal. The vendor's liability is capped at just 3 months of fees, which is below industry standard. A Data Processing Agreement (DPA) is missing — required if processing personal data under GDPR."
}

Risk Dashboard Extension

Add a simple HTML report generator:

js
export function generateRiskReport(analysis) {
  const highRisks = analysis.riskFlags.filter(f => f.severity === 'high');
  const html = `
<html><body>
<h1>Contract Risk Report: ${analysis.contractType}</h1>
<h2>High Risk Flags (${highRisks.length})</h2>
${highRisks.map(f => `<div class="risk"><strong>${f.clause}</strong>: ${f.description}</div>`).join('')}
<h2>Missing Clauses</h2>
<ul>${analysis.missingClauses.map(c => `<li>${c}</li>`).join('')}</ul>
<h2>Summary</h2>
<p>${analysis.plainEnglishSummary}</p>
</body></html>`;
  return html;
}

Build 50 AI Automation Tools — Tool 3 of 50

Contract analysis is live. Continue to Tool 4 to extract invoice data from PDFs and images using GPT-4o Vision.


    Summary

    • GPT-4o understands legal language and extracts clauses semantically, not by keyword matching
    • The risk flags array makes it easy to surface critical issues without reading the full analysis
    • Missing clauses detection identifies gaps in standard contract protections
    • The truncation strategy handles long contracts within the context window
    • Extend with an HTML/PDF report generator to share results with non-technical stakeholders

    Continue to Tool 4: Invoice Data Extractor with GPT-4 Vision →