AINode.jsAutomation
AI Podcast Transcript Summarizer with Whisper
TT
TopicTrick Team
AI Podcast Transcript Summarizer with Whisper
Upload any podcast audio file and receive a full transcript, structured summary, guest insights, and timestamped highlights. This tool combines OpenAI Whisper for transcription with GPT-4o for intelligent summarization.
This is Tool 30 of the Build 50 AI Automation Tools course.
What You'll Build
POST /transcribe— upload audio, get transcript + structured summary- OpenAI Whisper API for accurate multi-speaker transcription
- GPT-4o analysis: summary, key quotes, timestamps, guest insights
POST /transcribe/rss— process podcast RSS feed episodes in bulk
Setup
bash
mkdir ai-podcast && cd ai-podcast
npm init -y
npm install express multer openai rss-parser axios dotenvbash
# .env
OPENAI_API_KEY=sk-your-key-here
PORT=3000
MAX_AUDIO_MB=25Transcription Service
js
// src/services/transcribeService.js
import fs from 'fs';
import { createWriteStream } from 'fs';
import { pipeline } from 'stream/promises';
import OpenAI from 'openai';
import axios from 'axios';
import path from 'path';
import os from 'os';
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
export async function transcribeBuffer(buffer, filename, language = 'en') {
// Write buffer to temp file (Whisper API requires a file)
const tmpPath = path.join(os.tmpdir(), filename);
fs.writeFileSync(tmpPath, buffer);
try {
const transcription = await openai.audio.transcriptions.create({
file: fs.createReadStream(tmpPath),
model: 'whisper-1',
language,
response_format: 'verbose_json',
timestamp_granularities: ['segment'],
});
return {
text: transcription.text,
segments: transcription.segments?.map(s => ({
start: s.start,
end: s.end,
text: s.text.trim(),
})) || [],
language: transcription.language,
duration: transcription.duration,
};
} finally {
fs.unlinkSync(tmpPath);
}
}
export async function downloadAndTranscribe(audioUrl, language = 'en') {
const filename = `podcast-${Date.now()}.mp3`;
const tmpPath = path.join(os.tmpdir(), filename);
const response = await axios({ url: audioUrl, method: 'GET', responseType: 'stream', timeout: 120_000 });
await pipeline(response.data, createWriteStream(tmpPath));
const buffer = fs.readFileSync(tmpPath);
fs.unlinkSync(tmpPath);
return transcribeBuffer(buffer, filename, language);
}Summary Service
js
// src/services/summaryService.js
import OpenAI from 'openai';
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
function formatTimestamp(seconds) {
const h = Math.floor(seconds / 3600);
const m = Math.floor((seconds % 3600) / 60);
const s = Math.floor(seconds % 60);
return h > 0
? `${h}:${String(m).padStart(2, '0')}:${String(s).padStart(2, '0')}`
: `${m}:${String(s).padStart(2, '0')}`;
}
export async function summarizePodcast(transcript, showName = '', episodeTitle = '') {
// For very long transcripts, chunk and reduce
const text = transcript.text;
const truncated = text.length > 80_000 ? text.slice(0, 80_000) + ' [truncated]' : text;
const segmentsContext = transcript.segments?.slice(0, 30)
.map(s => `[${formatTimestamp(s.start)}] ${s.text}`)
.join('\n') || '';
const response = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'system',
content: `You are an expert podcast analyst. Analyze this podcast transcript and provide a comprehensive summary.
Return ONLY a JSON object:
{
"episodeTitle": "episode title (infer from content if not provided)",
"showName": "podcast show name if identifiable",
"hosts": ["host names if identifiable"],
"guests": [{ "name": "string", "title": "string or null", "expertise": "string" }],
"oneSentenceSummary": "one sentence capture of the episode",
"summary": "3-4 paragraph summary of the episode",
"mainTopics": ["5-7 main topics discussed"],
"highlights": [
{ "timestamp": "MM:SS", "description": "what happens at this moment", "significance": "why it matters" }
],
"keyQuotes": [
{ "quote": "exact or near-exact quote", "speaker": "who said it", "context": "brief context" }
],
"insights": ["5-7 most valuable insights from this episode"],
"actionItems": ["concrete takeaways for listeners"],
"booksMentioned": ["any books referenced"],
"toolsMentioned": ["any tools, apps, or software mentioned"],
"episodeRating": "educational | entertaining | both | unclear"
}`,
},
{
role: 'user',
content: `Show: ${showName}\nEpisode: ${episodeTitle}\n\nEarly transcript segments:\n${segmentsContext}\n\nFull transcript:\n${truncated}`,
},
],
temperature: 0.3,
response_format: { type: 'json_object' },
});
return JSON.parse(response.choices[0].message.content);
}Server
js
// src/server.js
import 'dotenv/config';
import express from 'express';
import multer from 'multer';
import Parser from 'rss-parser';
import pLimit from 'p-limit';
import { transcribeBuffer, downloadAndTranscribe } from './services/transcribeService.js';
import { summarizePodcast } from './services/summaryService.js';
const app = express();
app.use(express.json());
const MAX_MB = parseInt(process.env.MAX_AUDIO_MB || '25');
const upload = multer({
storage: multer.memoryStorage(),
limits: { fileSize: MAX_MB * 1024 * 1024 },
});
app.post('/transcribe', upload.single('audio'), async (req, res, next) => {
try {
if (!req.file) return res.status(400).json({ error: 'Audio file required' });
const { showName, episodeTitle, language } = req.body;
const transcript = await transcribeBuffer(req.file.buffer, req.file.originalname, language);
const summary = await summarizePodcast(transcript, showName, episodeTitle);
res.json({
success: true,
duration: transcript.duration,
wordCount: transcript.text.split(/\s+/).length,
transcript: transcript.text,
...summary,
});
} catch (err) { next(err); }
});
app.post('/transcribe/url', async (req, res, next) => {
try {
const { url, showName, episodeTitle, language } = req.body;
if (!url) return res.status(400).json({ error: 'Audio URL required' });
const transcript = await downloadAndTranscribe(url, language);
const summary = await summarizePodcast(transcript, showName, episodeTitle);
res.json({ success: true, duration: transcript.duration, ...summary });
} catch (err) { next(err); }
});
// RSS feed: transcribe latest N episodes
app.post('/transcribe/rss', async (req, res, next) => {
try {
const { feedUrl, limit: count = 3 } = req.body;
if (!feedUrl) return res.status(400).json({ error: 'feedUrl required' });
const parser = new Parser();
const feed = await parser.parseURL(feedUrl);
const episodes = feed.items.slice(0, count);
const limiter = pLimit(2);
const results = await Promise.all(
episodes.map(ep =>
limiter(async () => {
const audioUrl = ep.enclosure?.url;
if (!audioUrl) return { title: ep.title, error: 'No audio URL' };
try {
const transcript = await downloadAndTranscribe(audioUrl);
const summary = await summarizePodcast(transcript, feed.title, ep.title);
return { title: ep.title, pubDate: ep.pubDate, success: true, ...summary };
} catch (err) {
return { title: ep.title, success: false, error: err.message };
}
})
)
);
res.json({ show: feed.title, processedEpisodes: results.length, episodes: results });
} catch (err) { next(err); }
});
app.use((err, _req, res, _next) => res.status(500).json({ error: err.message }));
app.listen(process.env.PORT ?? 3000, () => console.log('Podcast summarizer running'));Testing
bash
# Upload audio file
curl -X POST http://localhost:3000/transcribe \
-F "audio=@episode.mp3" \
-F "showName=The Tim Ferriss Show" \
-F "episodeTitle=Episode 600"
# From URL
curl -X POST http://localhost:3000/transcribe/url \
-H "Content-Type: application/json" \
-d '{ "url": "https://example.com/podcast/episode-100.mp3", "language": "en" }'
# RSS feed
curl -X POST http://localhost:3000/transcribe/rss \
-H "Content-Type: application/json" \
-d '{ "feedUrl": "https://rss.art19.com/the-daily", "limit": 2 }'Build 50 AI Automation Tools — Tool 30 of 50
Podcast summarizer is live. Phase 6 complete. Continue to Tool 31 to build an AI product description generator.
Summary
- Whisper verbose_json returns timestamps — enabling timestamped highlights without guesswork
- Temp file pattern is required for Whisper's file-based API — always clean up with try/finally
- RSS batch processing with p-limit(2) prevents rate limit errors on the Whisper API
- Guest extraction makes episode summaries useful for booking and research workflows
- Persist summaries to SQLite to build a searchable podcast library across hundreds of episodes
Continue to Tool 31: AI Product Description Generator →
