AINode.jsAutomation

AI Podcast Transcript Summarizer with Whisper

TT
TopicTrick Team
AI Podcast Transcript Summarizer with Whisper

AI Podcast Transcript Summarizer with Whisper

Upload any podcast audio file and receive a full transcript, structured summary, guest insights, and timestamped highlights. This tool combines OpenAI Whisper for transcription with GPT-4o for intelligent summarization.

This is Tool 30 of the Build 50 AI Automation Tools course.


What You'll Build

  • POST /transcribe — upload audio, get transcript + structured summary
  • OpenAI Whisper API for accurate multi-speaker transcription
  • GPT-4o analysis: summary, key quotes, timestamps, guest insights
  • POST /transcribe/rss — process podcast RSS feed episodes in bulk

Setup

bash
mkdir ai-podcast && cd ai-podcast
npm init -y
npm install express multer openai rss-parser axios dotenv
bash
# .env
OPENAI_API_KEY=sk-your-key-here
PORT=3000
MAX_AUDIO_MB=25

Transcription Service

js
// src/services/transcribeService.js
import fs from 'fs';
import { createWriteStream } from 'fs';
import { pipeline } from 'stream/promises';
import OpenAI from 'openai';
import axios from 'axios';
import path from 'path';
import os from 'os';

const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });

export async function transcribeBuffer(buffer, filename, language = 'en') {
  // Write buffer to temp file (Whisper API requires a file)
  const tmpPath = path.join(os.tmpdir(), filename);
  fs.writeFileSync(tmpPath, buffer);

  try {
    const transcription = await openai.audio.transcriptions.create({
      file: fs.createReadStream(tmpPath),
      model: 'whisper-1',
      language,
      response_format: 'verbose_json',
      timestamp_granularities: ['segment'],
    });

    return {
      text: transcription.text,
      segments: transcription.segments?.map(s => ({
        start: s.start,
        end: s.end,
        text: s.text.trim(),
      })) || [],
      language: transcription.language,
      duration: transcription.duration,
    };
  } finally {
    fs.unlinkSync(tmpPath);
  }
}

export async function downloadAndTranscribe(audioUrl, language = 'en') {
  const filename = `podcast-${Date.now()}.mp3`;
  const tmpPath = path.join(os.tmpdir(), filename);

  const response = await axios({ url: audioUrl, method: 'GET', responseType: 'stream', timeout: 120_000 });
  await pipeline(response.data, createWriteStream(tmpPath));

  const buffer = fs.readFileSync(tmpPath);
  fs.unlinkSync(tmpPath);

  return transcribeBuffer(buffer, filename, language);
}

Summary Service

js
// src/services/summaryService.js
import OpenAI from 'openai';

const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });

function formatTimestamp(seconds) {
  const h = Math.floor(seconds / 3600);
  const m = Math.floor((seconds % 3600) / 60);
  const s = Math.floor(seconds % 60);
  return h > 0
    ? `${h}:${String(m).padStart(2, '0')}:${String(s).padStart(2, '0')}`
    : `${m}:${String(s).padStart(2, '0')}`;
}

export async function summarizePodcast(transcript, showName = '', episodeTitle = '') {
  // For very long transcripts, chunk and reduce
  const text = transcript.text;
  const truncated = text.length > 80_000 ? text.slice(0, 80_000) + ' [truncated]' : text;

  const segmentsContext = transcript.segments?.slice(0, 30)
    .map(s => `[${formatTimestamp(s.start)}] ${s.text}`)
    .join('\n') || '';

  const response = await openai.chat.completions.create({
    model: 'gpt-4o',
    messages: [
      {
        role: 'system',
        content: `You are an expert podcast analyst. Analyze this podcast transcript and provide a comprehensive summary.

Return ONLY a JSON object:
{
  "episodeTitle": "episode title (infer from content if not provided)",
  "showName": "podcast show name if identifiable",
  "hosts": ["host names if identifiable"],
  "guests": [{ "name": "string", "title": "string or null", "expertise": "string" }],
  "oneSentenceSummary": "one sentence capture of the episode",
  "summary": "3-4 paragraph summary of the episode",
  "mainTopics": ["5-7 main topics discussed"],
  "highlights": [
    { "timestamp": "MM:SS", "description": "what happens at this moment", "significance": "why it matters" }
  ],
  "keyQuotes": [
    { "quote": "exact or near-exact quote", "speaker": "who said it", "context": "brief context" }
  ],
  "insights": ["5-7 most valuable insights from this episode"],
  "actionItems": ["concrete takeaways for listeners"],
  "booksMentioned": ["any books referenced"],
  "toolsMentioned": ["any tools, apps, or software mentioned"],
  "episodeRating": "educational | entertaining | both | unclear"
}`,
      },
      {
        role: 'user',
        content: `Show: ${showName}\nEpisode: ${episodeTitle}\n\nEarly transcript segments:\n${segmentsContext}\n\nFull transcript:\n${truncated}`,
      },
    ],
    temperature: 0.3,
    response_format: { type: 'json_object' },
  });

  return JSON.parse(response.choices[0].message.content);
}

Server

js
// src/server.js
import 'dotenv/config';
import express from 'express';
import multer from 'multer';
import Parser from 'rss-parser';
import pLimit from 'p-limit';
import { transcribeBuffer, downloadAndTranscribe } from './services/transcribeService.js';
import { summarizePodcast } from './services/summaryService.js';

const app = express();
app.use(express.json());

const MAX_MB = parseInt(process.env.MAX_AUDIO_MB || '25');
const upload = multer({
  storage: multer.memoryStorage(),
  limits: { fileSize: MAX_MB * 1024 * 1024 },
});

app.post('/transcribe', upload.single('audio'), async (req, res, next) => {
  try {
    if (!req.file) return res.status(400).json({ error: 'Audio file required' });
    const { showName, episodeTitle, language } = req.body;

    const transcript = await transcribeBuffer(req.file.buffer, req.file.originalname, language);
    const summary = await summarizePodcast(transcript, showName, episodeTitle);

    res.json({
      success: true,
      duration: transcript.duration,
      wordCount: transcript.text.split(/\s+/).length,
      transcript: transcript.text,
      ...summary,
    });
  } catch (err) { next(err); }
});

app.post('/transcribe/url', async (req, res, next) => {
  try {
    const { url, showName, episodeTitle, language } = req.body;
    if (!url) return res.status(400).json({ error: 'Audio URL required' });

    const transcript = await downloadAndTranscribe(url, language);
    const summary = await summarizePodcast(transcript, showName, episodeTitle);

    res.json({ success: true, duration: transcript.duration, ...summary });
  } catch (err) { next(err); }
});

// RSS feed: transcribe latest N episodes
app.post('/transcribe/rss', async (req, res, next) => {
  try {
    const { feedUrl, limit: count = 3 } = req.body;
    if (!feedUrl) return res.status(400).json({ error: 'feedUrl required' });

    const parser = new Parser();
    const feed = await parser.parseURL(feedUrl);
    const episodes = feed.items.slice(0, count);

    const limiter = pLimit(2);
    const results = await Promise.all(
      episodes.map(ep =>
        limiter(async () => {
          const audioUrl = ep.enclosure?.url;
          if (!audioUrl) return { title: ep.title, error: 'No audio URL' };
          try {
            const transcript = await downloadAndTranscribe(audioUrl);
            const summary = await summarizePodcast(transcript, feed.title, ep.title);
            return { title: ep.title, pubDate: ep.pubDate, success: true, ...summary };
          } catch (err) {
            return { title: ep.title, success: false, error: err.message };
          }
        })
      )
    );

    res.json({ show: feed.title, processedEpisodes: results.length, episodes: results });
  } catch (err) { next(err); }
});

app.use((err, _req, res, _next) => res.status(500).json({ error: err.message }));
app.listen(process.env.PORT ?? 3000, () => console.log('Podcast summarizer running'));

Testing

bash
# Upload audio file
curl -X POST http://localhost:3000/transcribe \
  -F "audio=@episode.mp3" \
  -F "showName=The Tim Ferriss Show" \
  -F "episodeTitle=Episode 600"

# From URL
curl -X POST http://localhost:3000/transcribe/url \
  -H "Content-Type: application/json" \
  -d '{ "url": "https://example.com/podcast/episode-100.mp3", "language": "en" }'

# RSS feed
curl -X POST http://localhost:3000/transcribe/rss \
  -H "Content-Type: application/json" \
  -d '{ "feedUrl": "https://rss.art19.com/the-daily", "limit": 2 }'

Build 50 AI Automation Tools — Tool 30 of 50

Podcast summarizer is live. Phase 6 complete. Continue to Tool 31 to build an AI product description generator.


    Summary

    • Whisper verbose_json returns timestamps — enabling timestamped highlights without guesswork
    • Temp file pattern is required for Whisper's file-based API — always clean up with try/finally
    • RSS batch processing with p-limit(2) prevents rate limit errors on the Whisper API
    • Guest extraction makes episode summaries useful for booking and research workflows
    • Persist summaries to SQLite to build a searchable podcast library across hundreds of episodes

    Continue to Tool 31: AI Product Description Generator →