AINode.jsAutomation

News Aggregator & AI Summarizer

TT
TopicTrick Team
News Aggregator & AI Summarizer

News Aggregator & AI Summarizer

Information overload is real. This tool fetches articles from any set of RSS feeds, removes duplicates, and uses GPT-4o to generate a concise daily briefing — grouped by topic, with key takeaways for each story.

This is Tool 8 of the Build 50 AI Automation Tools course.


What You'll Build

  • Fetch articles from configurable RSS feeds
  • Deduplicate stories from multiple sources
  • Generate AI summaries grouped by topic
  • Schedule automatic daily briefings with node-cron

Setup

bash
mkdir news-aggregator && cd news-aggregator
npm init -y
npm install express rss-parser openai node-cron dotenv
bash
# .env
OPENAI_API_KEY=sk-your-key-here
PORT=3000

Feed Configuration

js
// src/config/feeds.js
export const FEEDS = [
  { name: 'TechCrunch', url: 'https://techcrunch.com/feed/', topic: 'Technology' },
  { name: 'The Verge', url: 'https://www.theverge.com/rss/index.xml', topic: 'Technology' },
  { name: 'Hacker News', url: 'https://hnrss.org/frontpage', topic: 'Engineering' },
  { name: 'Wired', url: 'https://www.wired.com/feed/rss', topic: 'Technology' },
  { name: 'MIT Tech Review', url: 'https://www.technologyreview.com/feed/', topic: 'AI & Science' },
  // Add your own feeds here
];

Aggregator Service

js
// src/services/aggregatorService.js
import Parser from 'rss-parser';
import OpenAI from 'openai';

const parser = new Parser({ timeout: 10_000 });
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });

async function fetchFeed(feed) {
  try {
    const parsed = await parser.parseURL(feed.url);
    return parsed.items.slice(0, 10).map(item => ({
      title:       item.title,
      link:        item.link,
      summary:     item.contentSnippet?.slice(0, 300) || item.summary?.slice(0, 300),
      published:   item.pubDate,
      source:      feed.name,
      topic:       feed.topic,
    }));
  } catch (err) {
    console.warn(`Failed to fetch ${feed.name}: ${err.message}`);
    return [];
  }
}

function deduplicateArticles(articles) {
  const seen = new Set();
  return articles.filter(a => {
    const key = a.title.toLowerCase().replace(/[^a-z0-9]/g, '').slice(0, 50);
    if (seen.has(key)) return false;
    seen.add(key);
    return true;
  });
}

async function summarizeArticles(articles) {
  const articleList = articles.map((a, i) =>
    `${i + 1}. [${a.source}] ${a.title}\n${a.summary || 'No summary available'}`
  ).join('\n\n');

  const response = await openai.chat.completions.create({
    model: 'gpt-4o',
    messages: [
      {
        role: 'system',
        content: `You are a professional news editor. Create a concise daily briefing from these articles.
Return ONLY a JSON object — no markdown:
{
  "date": "today's date YYYY-MM-DD",
  "headline": "string — 1 sentence capturing the most important story of the day",
  "topStories": [
    {
      "title": "string",
      "source": "string",
      "topic": "string",
      "summary": "2-3 sentence AI summary of this story",
      "significance": "1 sentence explaining why this matters",
      "link": "string"
    }
  ],
  "quickBullets": ["5-8 brief bullet points of other notable stories"],
  "editorNote": "1-2 sentence commentary on the day's news themes"
}
Include the top 8-10 most newsworthy stories. Ignore duplicates.`,
      },
      { role: 'user', content: articleList },
    ],
    temperature: 0.4,
    response_format: { type: 'json_object' },
  });

  return JSON.parse(response.choices[0].message.content);
}

export async function generateDailyBriefing(feeds) {
  // Fetch all feeds in parallel
  const allArticles = (await Promise.all(feeds.map(fetchFeed))).flat();
  const deduped = deduplicateArticles(allArticles);

  // Sort by recency
  const sorted = deduped.sort((a, b) => new Date(b.published) - new Date(a.published));

  const briefing = await summarizeArticles(sorted.slice(0, 50));
  return { totalFetched: allArticles.length, afterDedup: deduped.length, ...briefing };
}

Server + Scheduled Job

js
// src/server.js
import 'dotenv/config';
import express from 'express';
import cron from 'node-cron';
import { generateDailyBriefing } from './services/aggregatorService.js';
import { FEEDS } from './config/feeds.js';

const app = express();
let latestBriefing = null;

// Generate on-demand
app.get('/briefing', async (req, res, next) => {
  try {
    const briefing = await generateDailyBriefing(FEEDS);
    latestBriefing = briefing;
    res.json({ success: true, briefing });
  } catch (err) { next(err); }
});

// Serve latest cached briefing
app.get('/briefing/latest', (req, res) => {
  if (!latestBriefing) return res.status(404).json({ error: 'No briefing generated yet. Call GET /briefing first.' });
  res.json({ success: true, briefing: latestBriefing });
});

// Schedule daily at 7:00 AM
cron.schedule('0 7 * * *', async () => {
  console.log('Generating daily briefing...');
  try {
    latestBriefing = await generateDailyBriefing(FEEDS);
    console.log(`Briefing generated: ${latestBriefing.topStories.length} top stories`);
  } catch (err) {
    console.error('Briefing failed:', err.message);
  }
});

app.get('/health', (_req, res) => res.json({ status: 'ok' }));
app.use((err, _req, res, _next) => res.status(500).json({ error: err.message }));
app.listen(process.env.PORT ?? 3000, () => console.log('News Aggregator running'));

Testing

bash
curl http://localhost:3000/briefing

Sample response:

json
{
  "date": "2025-11-15",
  "headline": "OpenAI announces GPT-5 with 10x improved reasoning as tech giants accelerate AI investment",
  "topStories": [
    {
      "title": "OpenAI Unveils GPT-5 with Breakthrough Reasoning Capabilities",
      "source": "TechCrunch",
      "topic": "AI & Science",
      "summary": "OpenAI launched GPT-5, claiming a 10x improvement in complex reasoning tasks. The model passes bar exams and medical licensing tests at expert level. Pricing starts at $0.01 per 1k tokens.",
      "significance": "Represents the biggest capability leap in AI since GPT-4, accelerating AI adoption across professional sectors.",
      "link": "https://techcrunch.com/..."
    }
  ],
  "quickBullets": [
    "Apple reports record $98B quarter, services revenue up 16% YoY",
    "EU finalizes AI Act enforcement guidelines for high-risk systems",
    "Startup raises $200M to build autonomous AI software engineers"
  ],
  "editorNote": "Today's news is dominated by AI capability announcements and regulatory developments — two forces that will define the tech landscape through 2026."
}

Email Delivery

js
import nodemailer from 'nodemailer';

export async function emailBriefing(briefing, recipients) {
  const transporter = nodemailer.createTransporter({
    host: process.env.SMTP_HOST,
    port: 587,
    auth: { user: process.env.SMTP_USER, pass: process.env.SMTP_PASS },
  });

  const text = `DAILY BRIEFING — ${briefing.date}\n\n${briefing.headline}\n\n` +
    briefing.topStories.map(s => `• ${s.title} (${s.source})\n  ${s.summary}\n`).join('\n');

  await transporter.sendMail({
    from: 'briefing@yourdomain.com',
    to: recipients.join(', '),
    subject: `Daily Briefing: ${briefing.headline.slice(0, 60)}...`,
    text,
  });
}

Build 50 AI Automation Tools — Tool 8 of 50

Your news aggregator is live. Continue to Tool 9 to build an AI product price monitor with smart alerts.


    Summary

    • rss-parser fetches any RSS/Atom feed reliably with timeout handling
    • Deduplication by title similarity removes the same story appearing across multiple sources
    • GPT-4o briefing generation groups articles by significance and writes editorial summaries
    • node-cron schedules daily generation without a separate task queue
    • Add Nodemailer to deliver the briefing to subscribers automatically each morning

    Continue to Tool 9: Product Price Monitor →