AINode.jsAutomation
Document Comparison Tool with AI
TT
TopicTrick Team
Document Comparison Tool with AI
Reviewing the differences between two contract versions, policy documents, or terms of service is time-consuming and error-prone. This tool compares any two text documents and returns a plain-English analysis of what changed, what was added, and what was removed — with a significance assessment for each change.
This is Tool 5 of the Build 50 AI Automation Tools course.
What You'll Build
POST /compare— upload two documents (or send text directly), receive structured diff analysis- Identifies additions, removals, and modifications with semantic explanations
- Assigns significance scores so you focus on material changes first
Setup
bash
mkdir doc-compare && cd doc-compare
npm init -y
npm install express multer pdf-parse openai diff-match-patch dotenvbash
# .env
OPENAI_API_KEY=sk-your-key-hereComparison Service
js
// src/services/compareService.js
import pdfParse from 'pdf-parse';
import OpenAI from 'openai';
import { diff_match_patch } from 'diff-match-patch';
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
const dmp = new diff_match_patch();
async function extractText(buffer, mimetype) {
if (mimetype === 'application/pdf') {
const { text } = await pdfParse(buffer);
return text;
}
return buffer.toString('utf-8');
}
function getChangedRegions(textA, textB) {
// Use Myers diff to find changed regions
const diffs = dmp.diff_main(textA, textB);
dmp.diff_cleanupSemantic(diffs);
const additions = diffs.filter(([op]) => op === 1).map(([, text]) => text.trim()).filter(Boolean);
const removals = diffs.filter(([op]) => op === -1).map(([, text]) => text.trim()).filter(Boolean);
return { additions, removals };
}
export async function compareDocuments(bufferA, mimetypeA, bufferB, mimetypeB) {
const [textA, textB] = await Promise.all([
extractText(bufferA, mimetypeA),
extractText(bufferB, mimetypeB),
]);
const { additions, removals } = getChangedRegions(textA, textB);
// If documents are very similar, skip AI call
if (additions.length === 0 && removals.length === 0) {
return { identical: true, changes: [], summary: 'The documents are identical.' };
}
const changedContent = `
ADDED TEXT:
${additions.slice(0, 50).join('\n---\n')}
REMOVED TEXT:
${removals.slice(0, 50).join('\n---\n')}`;
const response = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'system',
content: `You are an expert document analyst. Analyze the text additions and removals between two document versions.
Return ONLY a JSON object — no markdown:
{
"identical": false,
"summary": "2-3 sentence plain-English summary of what changed and why it matters",
"changes": [
{
"type": "addition | removal | modification",
"section": "best guess at which section this change is in",
"description": "plain-English explanation of this specific change",
"significance": "high | medium | low",
"addedText": "relevant added text or null",
"removedText": "relevant removed text or null"
}
],
"overallSignificance": "high | medium | low",
"recommendedAction": "string — what the reviewer should do next"
}`,
},
{ role: 'user', content: changedContent },
],
temperature: 0.2,
response_format: { type: 'json_object' },
});
return JSON.parse(response.choices[0].message.content);
}API Route
js
// src/routes/compare.js
import { Router } from 'express';
import multer from 'multer';
import { compareDocuments } from '../services/compareService.js';
const router = Router();
const upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 10 * 1024 * 1024 } });
router.post('/', upload.fields([{ name: 'docA', maxCount: 1 }, { name: 'docB', maxCount: 1 }]), async (req, res, next) => {
try {
const docA = req.files?.docA?.[0];
const docB = req.files?.docB?.[0];
if (!docA || !docB) return res.status(400).json({ error: 'Upload two documents: docA and docB' });
const result = await compareDocuments(docA.buffer, docA.mimetype, docB.buffer, docB.mimetype);
res.json({ success: true, result });
} catch (err) { next(err); }
});
export default router;Server
js
// src/server.js
import 'dotenv/config';
import express from 'express';
import compareRouter from './routes/compare.js';
const app = express();
app.use(express.json());
app.use('/compare', compareRouter);
app.use((err, _req, res, _next) => res.status(500).json({ error: err.message }));
app.listen(process.env.PORT ?? 3000, () => console.log('Doc Compare running'));Testing
bash
curl -X POST http://localhost:3000/compare \
-F "docA=@contract-v1.pdf" \
-F "docB=@contract-v2.pdf"Sample response:
json
{
"summary": "Version 2 reduces the liability cap from 12 months to 3 months of fees and adds a mandatory arbitration clause replacing litigation. These are material changes that significantly increase risk for the customer.",
"overallSignificance": "high",
"changes": [
{
"type": "modification",
"section": "Section 8 — Limitation of Liability",
"description": "Liability cap reduced from 12 months of fees to 3 months",
"significance": "high",
"removedText": "not exceed the total fees paid in the twelve (12) months",
"addedText": "not exceed the total fees paid in the three (3) months"
},
{
"type": "addition",
"section": "Section 12 — Dispute Resolution",
"description": "Mandatory binding arbitration clause added, waiving right to jury trial",
"significance": "high",
"addedText": "Any dispute shall be resolved by binding arbitration..."
}
],
"recommendedAction": "Escalate to legal counsel before signing. The liability cap reduction and arbitration waiver are non-standard and significantly favour the vendor."
}Build 50 AI Automation Tools — Tool 5 of 50
Document comparison is done. Continue to Tool 6 to convert meeting notes into structured action items.
Summary
- diff-match-patch provides a fast, accurate text diff before involving the AI
- Sending only changed regions to GPT-4o dramatically reduces token cost and latency
- The significance score lets users triage changes — review high-significance items first
- Works on PDFs, plain text, and Markdown — extend with mammoth for Word documents
Continue to Tool 6: Meeting Notes to Action Items →
