AINode.jsAutomation

Resume Parser & Skill Extractor with AI

TT
TopicTrick Team
Resume Parser & Skill Extractor with AI

Resume Parser & Skill Extractor with AI

Hiring teams spend hours manually extracting data from resumes. This tool automates that entirely — upload any PDF resume and receive a clean, structured JSON object with the candidate's skills, experience, education, and contact details extracted by GPT-4o.

This is Tool 2 of the Build 50 AI Automation Tools course.


What You'll Build

A REST API with two endpoints:

  • POST /parse — upload a single resume PDF, receive structured JSON
  • POST /parse/batch — upload multiple resumes, receive a ranked candidate array

Project Setup

bash
mkdir resume-parser && cd resume-parser
npm init -y
npm install express multer pdf-parse openai dotenv
text
resume-parser/
├── src/
│   ├── server.js
│   ├── routes/parse.js
│   └── services/resumeService.js
├── .env
└── package.json
json
// package.json
{ "type": "module" }

Environment Variables

bash
# .env
OPENAI_API_KEY=sk-your-key-here
PORT=3000

The Resume Service

js
// src/services/resumeService.js
import pdfParse from 'pdf-parse';
import OpenAI from 'openai';

const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });

const RESUME_SCHEMA = `{
  "name": "string",
  "email": "string or null",
  "phone": "string or null",
  "location": "string or null",
  "summary": "string — 2-3 sentence professional summary",
  "skills": ["array of technical and soft skills"],
  "experience": [
    {
      "company": "string",
      "title": "string",
      "startDate": "string (e.g. Jan 2022)",
      "endDate": "string or 'Present'",
      "description": "string — key achievements"
    }
  ],
  "education": [
    {
      "institution": "string",
      "degree": "string",
      "field": "string",
      "year": "string"
    }
  ],
  "certifications": ["array of certifications"],
  "languages": ["array of languages spoken"],
  "totalYearsExperience": "number (estimate)"
}`;

export async function parseResume(buffer) {
  const { text } = await pdfParse(buffer);

  if (!text?.trim()) {
    throw new Error('No text found in PDF. The resume may be image-based.');
  }

  const response = await openai.chat.completions.create({
    model: 'gpt-4o',
    messages: [
      {
        role: 'system',
        content: `You are an expert HR data extraction system. Extract structured candidate data from the resume text below.
Return ONLY a JSON object matching this exact schema — no markdown, no explanation:
${RESUME_SCHEMA}
If a field is not present in the resume, use null for strings or [] for arrays.`,
      },
      { role: 'user', content: text },
    ],
    temperature: 0.1,
    response_format: { type: 'json_object' },
  });

  return JSON.parse(response.choices[0].message.content);
}

export async function parseMultipleResumes(buffers) {
  const results = [];
  for (const { filename, buffer } of buffers) {
    try {
      const parsed = await parseResume(buffer);
      results.push({ filename, success: true, ...parsed });
    } catch (err) {
      results.push({ filename, success: false, error: err.message });
    }
  }
  return results;
}

API Routes

js
// src/routes/parse.js
import { Router } from 'express';
import multer from 'multer';
import { parseResume, parseMultipleResumes } from '../services/resumeService.js';

const router = Router();
const upload = multer({
  storage: multer.memoryStorage(),
  limits: { fileSize: 10 * 1024 * 1024 },
  fileFilter: (_req, file, cb) =>
    file.mimetype === 'application/pdf' ? cb(null, true) : cb(new Error('PDF only')),
});

// Single resume
router.post('/', upload.single('resume'), async (req, res, next) => {
  try {
    if (!req.file) return res.status(400).json({ error: 'No resume uploaded' });
    const data = await parseResume(req.file.buffer);
    res.json({ success: true, filename: req.file.originalname, candidate: data });
  } catch (err) { next(err); }
});

// Multiple resumes
router.post('/batch', upload.array('resumes', 20), async (req, res, next) => {
  try {
    if (!req.files?.length) return res.status(400).json({ error: 'No resumes uploaded' });
    const buffers = req.files.map(f => ({ filename: f.originalname, buffer: f.buffer }));
    const results = await parseMultipleResumes(buffers);
    res.json({ success: true, count: results.length, candidates: results });
  } catch (err) { next(err); }
});

export default router;

Server

js
// src/server.js
import 'dotenv/config';
import express from 'express';
import parseRouter from './routes/parse.js';

const app = express();
app.use(express.json());
app.use('/parse', parseRouter);
app.get('/health', (_req, res) => res.json({ status: 'ok' }));
app.use((err, _req, res, _next) => res.status(500).json({ error: err.message }));
app.listen(process.env.PORT ?? 3000, () => console.log('Resume Parser running'));

Testing

bash
# Single resume
curl -X POST http://localhost:3000/parse \
  -F "resume=@john-doe-resume.pdf"

# Multiple resumes
curl -X POST http://localhost:3000/parse/batch \
  -F "resumes=@resume1.pdf" \
  -F "resumes=@resume2.pdf"

Sample response:

json
{
  "success": true,
  "candidate": {
    "name": "Sarah Chen",
    "email": "sarah.chen@email.com",
    "phone": "+1 415-555-0192",
    "location": "San Francisco, CA",
    "skills": ["TypeScript", "React", "Node.js", "PostgreSQL", "AWS", "Docker"],
    "totalYearsExperience": 6,
    "experience": [
      {
        "company": "Stripe",
        "title": "Senior Software Engineer",
        "startDate": "Mar 2022",
        "endDate": "Present",
        "description": "Led migration of payment processing service to microservices architecture, reducing latency by 40%."
      }
    ],
    "education": [
      {
        "institution": "UC Berkeley",
        "degree": "B.S.",
        "field": "Computer Science",
        "year": "2019"
      }
    ]
  }
}

Skill Matching Extension

Add a job description matcher to score candidates:

js
export async function scoreCandidate(candidate, jobDescription) {
  const response = await openai.chat.completions.create({
    model: 'gpt-4o-mini',
    messages: [
      {
        role: 'system',
        content: `Score how well this candidate matches the job description. Return JSON:
{"score": 0-100, "matchedSkills": [], "missingSkills": [], "recommendation": "string"}`,
      },
      {
        role: 'user',
        content: `Candidate: ${JSON.stringify(candidate)}\n\nJob Description: ${jobDescription}`,
      },
    ],
    response_format: { type: 'json_object' },
  });
  return JSON.parse(response.choices[0].message.content);
}

Build 50 AI Automation Tools — Tool 2 of 50

Resume parsing is live. Continue to Tool 3 to extract key clauses from legal contracts automatically.


    Summary

    • pdf-parse extracts resume text regardless of layout
    • JSON schema prompt tells GPT-4o exactly what structure to return
    • response_format: json_object guarantees valid JSON — never crashes on malformed output
    • Batch endpoint processes multiple resumes sequentially to stay within rate limits
    • Extend with skill matching to rank candidates against a job description automatically

    Continue to Tool 3: Contract Clause Extractor →