AINode.jsAutomation
Resume Parser & Skill Extractor with AI
TT
TopicTrick Team
Resume Parser & Skill Extractor with AI
Hiring teams spend hours manually extracting data from resumes. This tool automates that entirely — upload any PDF resume and receive a clean, structured JSON object with the candidate's skills, experience, education, and contact details extracted by GPT-4o.
This is Tool 2 of the Build 50 AI Automation Tools course.
What You'll Build
A REST API with two endpoints:
POST /parse— upload a single resume PDF, receive structured JSONPOST /parse/batch— upload multiple resumes, receive a ranked candidate array
Project Setup
bash
mkdir resume-parser && cd resume-parser
npm init -y
npm install express multer pdf-parse openai dotenvtext
resume-parser/
├── src/
│ ├── server.js
│ ├── routes/parse.js
│ └── services/resumeService.js
├── .env
└── package.jsonjson
// package.json
{ "type": "module" }Environment Variables
bash
# .env
OPENAI_API_KEY=sk-your-key-here
PORT=3000The Resume Service
js
// src/services/resumeService.js
import pdfParse from 'pdf-parse';
import OpenAI from 'openai';
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
const RESUME_SCHEMA = `{
"name": "string",
"email": "string or null",
"phone": "string or null",
"location": "string or null",
"summary": "string — 2-3 sentence professional summary",
"skills": ["array of technical and soft skills"],
"experience": [
{
"company": "string",
"title": "string",
"startDate": "string (e.g. Jan 2022)",
"endDate": "string or 'Present'",
"description": "string — key achievements"
}
],
"education": [
{
"institution": "string",
"degree": "string",
"field": "string",
"year": "string"
}
],
"certifications": ["array of certifications"],
"languages": ["array of languages spoken"],
"totalYearsExperience": "number (estimate)"
}`;
export async function parseResume(buffer) {
const { text } = await pdfParse(buffer);
if (!text?.trim()) {
throw new Error('No text found in PDF. The resume may be image-based.');
}
const response = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'system',
content: `You are an expert HR data extraction system. Extract structured candidate data from the resume text below.
Return ONLY a JSON object matching this exact schema — no markdown, no explanation:
${RESUME_SCHEMA}
If a field is not present in the resume, use null for strings or [] for arrays.`,
},
{ role: 'user', content: text },
],
temperature: 0.1,
response_format: { type: 'json_object' },
});
return JSON.parse(response.choices[0].message.content);
}
export async function parseMultipleResumes(buffers) {
const results = [];
for (const { filename, buffer } of buffers) {
try {
const parsed = await parseResume(buffer);
results.push({ filename, success: true, ...parsed });
} catch (err) {
results.push({ filename, success: false, error: err.message });
}
}
return results;
}API Routes
js
// src/routes/parse.js
import { Router } from 'express';
import multer from 'multer';
import { parseResume, parseMultipleResumes } from '../services/resumeService.js';
const router = Router();
const upload = multer({
storage: multer.memoryStorage(),
limits: { fileSize: 10 * 1024 * 1024 },
fileFilter: (_req, file, cb) =>
file.mimetype === 'application/pdf' ? cb(null, true) : cb(new Error('PDF only')),
});
// Single resume
router.post('/', upload.single('resume'), async (req, res, next) => {
try {
if (!req.file) return res.status(400).json({ error: 'No resume uploaded' });
const data = await parseResume(req.file.buffer);
res.json({ success: true, filename: req.file.originalname, candidate: data });
} catch (err) { next(err); }
});
// Multiple resumes
router.post('/batch', upload.array('resumes', 20), async (req, res, next) => {
try {
if (!req.files?.length) return res.status(400).json({ error: 'No resumes uploaded' });
const buffers = req.files.map(f => ({ filename: f.originalname, buffer: f.buffer }));
const results = await parseMultipleResumes(buffers);
res.json({ success: true, count: results.length, candidates: results });
} catch (err) { next(err); }
});
export default router;Server
js
// src/server.js
import 'dotenv/config';
import express from 'express';
import parseRouter from './routes/parse.js';
const app = express();
app.use(express.json());
app.use('/parse', parseRouter);
app.get('/health', (_req, res) => res.json({ status: 'ok' }));
app.use((err, _req, res, _next) => res.status(500).json({ error: err.message }));
app.listen(process.env.PORT ?? 3000, () => console.log('Resume Parser running'));Testing
bash
# Single resume
curl -X POST http://localhost:3000/parse \
-F "resume=@john-doe-resume.pdf"
# Multiple resumes
curl -X POST http://localhost:3000/parse/batch \
-F "resumes=@resume1.pdf" \
-F "resumes=@resume2.pdf"Sample response:
json
{
"success": true,
"candidate": {
"name": "Sarah Chen",
"email": "sarah.chen@email.com",
"phone": "+1 415-555-0192",
"location": "San Francisco, CA",
"skills": ["TypeScript", "React", "Node.js", "PostgreSQL", "AWS", "Docker"],
"totalYearsExperience": 6,
"experience": [
{
"company": "Stripe",
"title": "Senior Software Engineer",
"startDate": "Mar 2022",
"endDate": "Present",
"description": "Led migration of payment processing service to microservices architecture, reducing latency by 40%."
}
],
"education": [
{
"institution": "UC Berkeley",
"degree": "B.S.",
"field": "Computer Science",
"year": "2019"
}
]
}
}Skill Matching Extension
Add a job description matcher to score candidates:
js
export async function scoreCandidate(candidate, jobDescription) {
const response = await openai.chat.completions.create({
model: 'gpt-4o-mini',
messages: [
{
role: 'system',
content: `Score how well this candidate matches the job description. Return JSON:
{"score": 0-100, "matchedSkills": [], "missingSkills": [], "recommendation": "string"}`,
},
{
role: 'user',
content: `Candidate: ${JSON.stringify(candidate)}\n\nJob Description: ${jobDescription}`,
},
],
response_format: { type: 'json_object' },
});
return JSON.parse(response.choices[0].message.content);
}Build 50 AI Automation Tools — Tool 2 of 50
Resume parsing is live. Continue to Tool 3 to extract key clauses from legal contracts automatically.
Summary
- pdf-parse extracts resume text regardless of layout
- JSON schema prompt tells GPT-4o exactly what structure to return
- response_format: json_object guarantees valid JSON — never crashes on malformed output
- Batch endpoint processes multiple resumes sequentially to stay within rate limits
- Extend with skill matching to rank candidates against a job description automatically
Continue to Tool 3: Contract Clause Extractor →
