phase-10.5: screener enhancements

2026-06-11 19:18:19 -04:00
parent bac00ab5d5
commit e953822bab
51 changed files with 3745 additions and 36 deletions
@@ -0,0 +1,165 @@
+import { createHash } from 'crypto';
+import { NewsRepository } from './NewsRepository';
+import type { CatalystType, IngestStats, NormalizedStory } from '../shared/types';
+
+/**
+ * Shared ingest pipeline (FREE-DATA-STACK §2) — every source flows through
+ * here: FILTER → DEDUPE → CLASSIFY → STORE. All drops happen BEFORE insert,
+ * cheapest check first, so the tables stay small by construction (§4).
+ */
+export class NewsPipeline {
+  /** §4.4 — max stories linked per ticker per day (filings exempt). */
+  private static readonly DAILY_CAP = 25;
+  /** §4.3 — syndicated-copy window for title dedupe. */
+  private static readonly TITLE_WINDOW_MS = 48 * 60 * 60 * 1000;
+
+  /** §4.2 — headlines with no decision value are never stored. */
+  private static readonly NOISE_PATTERNS: RegExp[] = [
+    /\b\d+\s+(?:best|top|hot)\s+stocks?\b/i,
+    /\bstocks?\s+to\s+(?:watch|buy|sell)\b/i,
+    /\bprice\s+target\s+(?:raised|lowered|reiterated|maintained)\b/i,
+    /\b(?:premarket|after-?hours?)\s+movers?\b/i,
+    /\bwhy\s+.{0,40}\s+stock\s+(?:jumped|popped|soared|plunged|tanked)\b/i,
+    /\bmotley\s+fool\b/i,
+  ];
+
+  constructor(private readonly repo: NewsRepository) {}
+
+  /**
+   * Run a batch of normalized stories through the pipeline.
+   * `universe` is the tracked-ticker set from UniverseProvider.
+   */
+  ingest(stories: NormalizedStory[], universe: Set<string>): IngestStats {
+    const stats: IngestStats = {
+      fetched: stories.length,
+      stored: 0,
+      droppedNoUniverseTicker: 0,
+      droppedNoise: 0,
+      droppedDuplicate: 0,
+      droppedCapped: 0,
+    };
+
+    for (const story of stories) {
+      this.ingestOne(story, universe, stats);
+    }
+    return stats;
+  }
+
+  private ingestOne(story: NormalizedStory, universe: Set<string>, stats: IngestStats): void {
+    const isFiling = story.source === 'edgar';
+
+    // 1. Universe filter — the big one (§4.1)
+    const tickers = [...new Set(story.tickers.map((t) => t.toUpperCase()))].filter((t) =>
+      universe.has(t),
+    );
+    if (tickers.length === 0) {
+      stats.droppedNoUniverseTicker++;
+      return;
+    }
+
+    // 2. Noise blocklist (§4.2) — filings are never noise
+    if (!isFiling && NewsPipeline.isNoise(story.headline)) {
+      stats.droppedNoise++;
+      return;
+    }
+
+    // 3. Dedupe (§4.3): url hash (storage-level PK) + recent title match
+    const urlHash = NewsPipeline.sha(story.url);
+    const titleHash = NewsPipeline.sha(NewsPipeline.normalizeTitle(story.headline));
+    const titleCutoff = new Date(Date.now() - NewsPipeline.TITLE_WINDOW_MS).toISOString();
+    if (this.repo.titleSeenSince(titleHash, titleCutoff)) {
+      stats.droppedDuplicate++;
+      return;
+    }
+
+    // 4. Per-ticker daily cap (§4.4) — filings keep priority past the cap
+    const day = story.publishedAt.slice(0, 10);
+    const eligible = isFiling
+      ? tickers
+      : tickers.filter((t) => this.repo.countTickerDay(t, day) < NewsPipeline.DAILY_CAP);
+    if (eligible.length === 0) {
+      stats.droppedCapped++;
+      return;
+    }
+
+    // 5. Classify + store
+    const catalyst = story.catalystHint ?? NewsPipeline.classify(story.headline);
+    const inserted = this.repo.insertArticle({
+      urlHash,
+      titleHash,
+      tickers: eligible,
+      headline: story.headline.trim(),
+      body: story.body ?? null,
+      source: story.source,
+      catalyst,
+      url: story.url,
+      publishedAt: story.publishedAt,
+    });
+    if (!inserted) {
+      stats.droppedDuplicate++; // url_hash collision — already stored
+      return;
+    }
+
+    for (const ticker of eligible) {
+      this.repo.linkTicker(ticker, day, urlHash);
+    }
+    stats.stored++;
+  }
+
+  /** Retention jobs (§5) — call once daily. */
+  runRetention(now = new Date()): { bodiesPurged: number; rowsDeleted: number } {
+    const bodyCutoff = new Date(now.getTime() - 90 * 24 * 60 * 60 * 1000).toISOString();
+    const rowCutoff = new Date(now.getTime() - 548 * 24 * 60 * 60 * 1000).toISOString(); // ~18mo
+    return {
+      bodiesPurged: this.repo.purgeBodiesBefore(bodyCutoff),
+      rowsDeleted: this.repo.deleteUnreferencedBefore(rowCutoff),
+    };
+  }
+
+  // ── Pure helpers (exposed for tests) ──────────────────────────────────────
+
+  static isNoise(headline: string): boolean {
+    return NewsPipeline.NOISE_PATTERNS.some((re) => re.test(headline));
+  }
+
+  /**
+   * Keyword catalyst classifier. Order matters: M&A beats earnings
+   * ("acquisition closes in Q2" is an M&A story).
+   */
+  static classify(headline: string): CatalystType | null {
+    const h = headline.toLowerCase();
+    if (
+      /\b(acqui[sr]|merger|takeover|buyout|tender offer|business combination|to be acquired)/.test(
+        h,
+      )
+    )
+      return 'ma';
+    if (/\b(guidance|outlook|forecast|raises full[- ]year|lowers full[- ]year)/.test(h))
+      return 'guidance';
+    if (
+      /\b(earnings|results|eps|quarterly report|q[1-4] (?:20\d\d|results)|fiscal (?:year|q[1-4]))/.test(
+        h,
+      )
+    )
+      return 'earnings';
+    if (
+      /\b(sec |fda|doj|ftc|antitrust|investigation|subpoena|lawsuit|settl|recall|approval)/.test(h)
+    )
+      return 'regulatory';
+    if (/\b(fed |fomc|inflation|cpi|jobs report|rate (?:cut|hike)|treasury yield)/.test(h))
+      return 'macro';
+    return null;
+  }
+
+  static normalizeTitle(title: string): string {
+    return title
+      .toLowerCase()
+      .replace(/[^a-z0-9 ]/g, '')
+      .replace(/\s+/g, ' ')
+      .trim();
+  }
+
+  private static sha(input: string): string {
+    return createHash('sha256').update(input).digest('hex');
+  }
+}
@@ -0,0 +1,76 @@
+import { DatabaseConnection } from '../shared/db/index';
+import { QueryBuilder } from '../shared/utils/QueryBuilder';
+import type { NewsArticleRow } from '../shared/types';
+
+/**
+ * Persistence for the free-tier news pipeline (FREE-DATA-STACK §3).
+ * Pure data access — all filtering/dedupe decisions live in NewsPipeline.
+ */
+export class NewsRepository {
+  constructor(private readonly db: DatabaseConnection) {}
+
+  /** Returns true if the row was inserted (false = duplicate url_hash). */
+  insertArticle(a: {
+    urlHash: string;
+    titleHash: string;
+    tickers: string[];
+    headline: string;
+    body: string | null;
+    source: string;
+    catalyst: string | null;
+    url: string;
+    publishedAt: string;
+  }): boolean {
+    const qb = new QueryBuilder('NEWS_QUERIES.INSERT_ARTICLE', [
+      a.urlHash,
+      a.titleHash,
+      JSON.stringify(a.tickers),
+      a.headline,
+      a.body,
+      a.source,
+      a.catalyst,
+      a.url,
+      a.publishedAt,
+      new Date().toISOString(),
+    ]);
+    return this.db.run(qb) > 0;
+  }
+
+  titleSeenSince(titleHash: string, sinceIso: string): boolean {
+    const qb = new QueryBuilder('NEWS_QUERIES.TITLE_SEEN_SINCE', [titleHash, sinceIso]);
+    return this.db.get(qb) != null;
+  }
+
+  linkTicker(ticker: string, day: string, urlHash: string): void {
+    const qb = new QueryBuilder('NEWS_QUERIES.INSERT_CATALYST_LINK', [ticker, day, urlHash]);
+    this.db.run(qb);
+  }
+
+  countTickerDay(ticker: string, day: string): number {
+    const qb = new QueryBuilder('NEWS_QUERIES.COUNT_TICKER_DAY', [ticker, day]);
+    return this.db.get<{ n: number }>(qb)?.n ?? 0;
+  }
+
+  newsForTicker(ticker: string, sinceDay: string): NewsArticleRow[] {
+    const qb = new QueryBuilder('NEWS_QUERIES.SELECT_TICKER_NEWS', [
+      ticker.toUpperCase(),
+      sinceDay,
+    ]);
+    return this.db.all<NewsArticleRow>(qb);
+  }
+
+  recent(limit: number): NewsArticleRow[] {
+    const qb = new QueryBuilder('NEWS_QUERIES.SELECT_RECENT', [limit]);
+    return this.db.all<NewsArticleRow>(qb);
+  }
+
+  /** Retention: null out bodies older than cutoff. Returns rows changed. */
+  purgeBodiesBefore(cutoffIso: string): number {
+    return this.db.run(new QueryBuilder('NEWS_QUERIES.PURGE_BODIES_BEFORE', [cutoffIso]));
+  }
+
+  /** Retention: delete old rows no ticker references. Returns rows deleted. */
+  deleteUnreferencedBefore(cutoffIso: string): number {
+    return this.db.run(new QueryBuilder('NEWS_QUERIES.DELETE_UNREFERENCED_BEFORE', [cutoffIso]));
+  }
+}
@@ -0,0 +1,106 @@
+import { NewsPipeline } from './NewsPipeline';
+import { UniverseProvider } from './UniverseProvider';
+import { EdgarPoller } from './pollers/EdgarPoller';
+import { PrWirePoller } from './pollers/PrWirePoller';
+import type { IngestStats, Logger } from '../shared/types';
+
+/**
+ * In-process polling scheduler (FREE-DATA-STACK §2). No Redis/BullMQ at the
+ * free tier — plain intervals, unref'd so they never hold the process open.
+ *
+ * Cadences: EDGAR 10 min, PR-wire 15 min, retention daily.
+ * Disable entirely with NEWS_POLL=off (e.g. when running bin/poll-news.ts
+ * from cron instead of inside the server).
+ */
+export class NewsScheduler {
+  private static readonly EDGAR_INTERVAL_MS = 10 * 60 * 1000;
+  private static readonly PRWIRE_INTERVAL_MS = 15 * 60 * 1000;
+  private static readonly RETENTION_INTERVAL_MS = 24 * 60 * 60 * 1000;
+
+  private timers: NodeJS.Timeout[] = [];
+
+  constructor(
+    private readonly pipeline: NewsPipeline,
+    private readonly universe: UniverseProvider,
+    private readonly edgar: EdgarPoller,
+    private readonly prwire: PrWirePoller,
+    private readonly logger: Logger,
+  ) {}
+
+  start(): void {
+    if (this.timers.length > 0) return; // already running
+
+    const every = (ms: number, fn: () => void) => {
+      const t = setInterval(fn, ms);
+      t.unref(); // never keep the process alive just for polling
+      this.timers.push(t);
+    };
+
+    every(NewsScheduler.EDGAR_INTERVAL_MS, () => void this.runEdgar());
+    every(NewsScheduler.PRWIRE_INTERVAL_MS, () => void this.runPrWire());
+    every(NewsScheduler.RETENTION_INTERVAL_MS, () => this.runRetention());
+
+    // Prime once shortly after boot (delay keeps server startup fast)
+    const boot = setTimeout(() => void this.runOnce(), 15_000);
+    boot.unref();
+    this.timers.push(boot);
+
+    this.logger.log('News scheduler started (EDGAR 10m, PR-wire 15m, retention 24h)');
+  }
+
+  stop(): void {
+    for (const t of this.timers) clearInterval(t);
+    this.timers = [];
+  }
+
+  /** One full cycle of everything — used at boot and by bin/poll-news.ts. */
+  async runOnce(): Promise<{ edgar: IngestStats; prwire: IngestStats }> {
+    const edgar = await this.runEdgar();
+    const prwire = await this.runPrWire();
+    return { edgar, prwire };
+  }
+
+  private async runEdgar(): Promise<IngestStats> {
+    try {
+      const stories = await this.edgar.poll(this.universe.getUniverse());
+      const stats = this.pipeline.ingest(stories, this.universe.getUniverse());
+      if (stats.stored > 0) this.logger.log(`EDGAR: stored ${stats.stored}/${stats.fetched}`);
+      return stats;
+    } catch (err) {
+      this.logger.warn('EDGAR poll cycle failed:', (err as Error).message);
+      return NewsScheduler.emptyStats();
+    }
+  }
+
+  private async runPrWire(): Promise<IngestStats> {
+    try {
+      const stories = await this.prwire.poll();
+      const stats = this.pipeline.ingest(stories, this.universe.getUniverse());
+      if (stats.stored > 0) this.logger.log(`PR-wire: stored ${stats.stored}/${stats.fetched}`);
+      return stats;
+    } catch (err) {
+      this.logger.warn('PR-wire poll cycle failed:', (err as Error).message);
+      return NewsScheduler.emptyStats();
+    }
+  }
+
+  private runRetention(): void {
+    try {
+      const { bodiesPurged, rowsDeleted } = this.pipeline.runRetention();
+      this.logger.log(`News retention: ${bodiesPurged} bodies purged, ${rowsDeleted} rows deleted`);
+    } catch (err) {
+      this.logger.warn('News retention failed:', (err as Error).message);
+    }
+  }
+
+  private static emptyStats(): IngestStats {
+    return {
+      fetched: 0,
+      stored: 0,
+      droppedNoUniverseTicker: 0,
+      droppedNoise: 0,
+      droppedDuplicate: 0,
+      droppedCapped: 0,
+    };
+  }
+}
@@ -0,0 +1,50 @@
+import { DatabaseConnection } from '../shared/db/index';
+import { QueryBuilder } from '../shared/utils/QueryBuilder';
+
+/**
+ * The tracked-ticker universe (FREE-DATA-STACK §4.1):
+ * watchlist ∪ holdings ∪ tickers screened in the last 30 days.
+ *
+ * This is the news pipeline's first and biggest filter — stories about
+ * tickers outside the universe are never stored. Cached for 10 minutes;
+ * the universe changes slowly.
+ */
+export class UniverseProvider {
+  private static readonly CACHE_TTL_MS = 10 * 60 * 1000;
+  private static readonly SNAPSHOT_LOOKBACK_DAYS = 30;
+
+  private cache: { universe: Set<string>; expiresAt: number } = {
+    universe: new Set(),
+    expiresAt: 0,
+  };
+
+  constructor(private readonly db: DatabaseConnection) {}
+
+  getUniverse(): Set<string> {
+    if (Date.now() < this.cache.expiresAt) return this.cache.universe;
+
+    const sinceDay = new Date(
+      Date.now() - UniverseProvider.SNAPSHOT_LOOKBACK_DAYS * 24 * 60 * 60 * 1000,
+    )
+      .toISOString()
+      .slice(0, 10);
+
+    const tickers = new Set<string>();
+    const add = (rows: { ticker: string }[]) =>
+      rows.forEach((r) => tickers.add(r.ticker.toUpperCase()));
+
+    add(this.db.all(new QueryBuilder('UNIVERSE_QUERIES.DISTINCT_WATCHLIST_TICKERS')));
+    add(this.db.all(new QueryBuilder('UNIVERSE_QUERIES.DISTINCT_HOLDING_TICKERS')));
+    add(
+      this.db.all(new QueryBuilder('UNIVERSE_QUERIES.DISTINCT_SNAPSHOT_TICKERS_SINCE', [sinceDay])),
+    );
+
+    this.cache = { universe: tickers, expiresAt: Date.now() + UniverseProvider.CACHE_TTL_MS };
+    return tickers;
+  }
+
+  /** Force next getUniverse() to re-read (e.g. after a watchlist change). */
+  invalidate(): void {
+    this.cache.expiresAt = 0;
+  }
+}
@@ -0,0 +1,10 @@
+// News domain — free-tier news ingestion pipeline (FREE-DATA-STACK.md)
+
+export { NewsController } from './news.controller';
+export { NewsRepository } from './NewsRepository';
+export { NewsPipeline } from './NewsPipeline';
+export { UniverseProvider } from './UniverseProvider';
+export { NewsScheduler } from './NewsScheduler';
+export { EdgarPoller } from './pollers/EdgarPoller';
+export { PrWirePoller } from './pollers/PrWirePoller';
+export { RssParser } from './rss';
@@ -0,0 +1,90 @@
+import type { FastifyInstance, FastifyRequest } from 'fastify';
+import { NewsRepository } from './NewsRepository';
+import { YahooFinanceClient } from '../shared';
+import type { NewsArticleRow } from '../shared/types';
+
+interface StoryView {
+  headline: string;
+  tickers: string[];
+  source: string;
+  catalyst: string | null;
+  url: string;
+  publishedAt: string;
+}
+
+/**
+ * Read side of the news pipeline. Stored pipeline stories (curated, catalyst-
+ * tagged, historical) are merged with a live per-ticker Yahoo search on
+ * request — stored gives depth, live gives freshness. The RSS firehoses
+ * can't be queried per-ticker on demand, which is why they go through the
+ * polling pipeline instead.
+ */
+export class NewsController {
+  constructor(
+    private readonly repo: NewsRepository,
+    private readonly yahoo?: YahooFinanceClient,
+  ) {}
+
+  register(app: FastifyInstance): void {
+    app.get('/api/news/recent', this.recent.bind(this));
+    app.get('/api/news/:ticker', this.byTicker.bind(this));
+  }
+
+  /** GET /api/news/:ticker?days=7&live=1 (live Yahoo merge on by default) */
+  private async byTicker(req: FastifyRequest) {
+    const ticker = (req.params as { ticker: string }).ticker.toUpperCase();
+    const query = req.query as { days?: string; live?: string };
+    const days = Math.min(Number(query.days ?? 7) || 7, 90);
+    const live = query.live !== '0';
+    const sinceDay = new Date(Date.now() - days * 24 * 60 * 60 * 1000).toISOString().slice(0, 10);
+
+    const stored = this.repo.newsForTicker(ticker, sinceDay).map(NewsController.serialize);
+    const fresh = live ? await this.fetchLive(ticker) : [];
+
+    // Merge, dedupe by URL, newest first
+    const byUrl = new Map<string, StoryView>();
+    for (const s of [...stored, ...fresh]) byUrl.set(s.url, byUrl.get(s.url) ?? s);
+    const stories = [...byUrl.values()].sort((a, b) => b.publishedAt.localeCompare(a.publishedAt));
+
+    return { ticker, days, stories };
+  }
+
+  /** Live per-ticker Yahoo news search — freshness layer, best-effort. */
+  private async fetchLive(ticker: string): Promise<StoryView[]> {
+    if (!this.yahoo) return [];
+    try {
+      const items = await this.yahoo.search(ticker, { newsCount: 8 });
+      return items
+        .filter((n) => n.title && n.link)
+        .map((n) => ({
+          headline: n.title as string,
+          tickers: [ticker],
+          source: 'yahoo',
+          catalyst: null,
+          url: n.link as string,
+          publishedAt: n.providerPublishTime
+            ? new Date(n.providerPublishTime).toISOString()
+            : new Date().toISOString(),
+        }));
+    } catch {
+      return [];
+    }
+  }
+
+  /** GET /api/news/recent?limit=50 */
+  private async recent(req: FastifyRequest) {
+    const limit = Math.min(Number((req.query as { limit?: string }).limit ?? 50) || 50, 200);
+    return { stories: this.repo.recent(limit).map(NewsController.serialize) };
+  }
+
+  private static serialize(row: NewsArticleRow) {
+    return {
+      headline: row.headline,
+      tickers: JSON.parse(row.ticker_list) as string[],
+      source: row.source,
+      catalyst: row.catalyst,
+      url: row.url,
+      publishedAt: row.published_at,
+    };
+  }
+}
@@ -0,0 +1,122 @@
+import { RssParser } from '../rss';
+import type { CatalystType, Logger, NormalizedStory } from '../../shared/types';
+
+/**
+ * SEC EDGAR poller (FREE-DATA-STACK §1.3 / P1.2 Tier 2). Free forever, and
+ * the highest-value source: filings frequently precede the headline.
+ *
+ * Strategy: poll the site-wide "current filings" atom feed once per form
+ * type (4 requests/cycle total, well inside SEC fair use), map filer CIK →
+ * ticker via the daily-cached company_tickers.json, and emit stories only
+ * for universe tickers. The pipeline applies its own universe filter again —
+ * defense in depth.
+ *
+ * SEC requires a descriptive User-Agent with contact info: set
+ * EDGAR_USER_AGENT in .env (e.g. "market-screener/1.0 you@example.com").
+ */
+export class EdgarPoller {
+  private static readonly TICKER_MAP_URL = 'https://www.sec.gov/files/company_tickers.json';
+  private static readonly TICKER_MAP_TTL_MS = 24 * 60 * 60 * 1000;
+
+  /** form type → catalyst classification (overrides keyword classify). */
+  private static readonly FORMS: Array<{ form: string; catalyst: CatalystType }> = [
+    { form: '8-K', catalyst: 'regulatory' }, // material events
+    { form: 'SC 13D', catalyst: 'ma' }, // activist stake >5% — classic pre-M&A tell
+    { form: 'S-4', catalyst: 'ma' }, // merger registration
+    { form: 'DEFM14A', catalyst: 'ma' }, // merger proxy
+  ];
+
+  private cikToTicker: Map<string, string> = new Map();
+  private mapExpiresAt = 0;
+
+  constructor(
+    private readonly logger: Logger,
+    private readonly userAgent = process.env.EDGAR_USER_AGENT ??
+      'market-screener/1.0 (set EDGAR_USER_AGENT in .env)',
+  ) {}
+
+  /** Fetch all form feeds and return normalized stories for universe tickers. */
+  async poll(universe: Set<string>): Promise<NormalizedStory[]> {
+    if (universe.size === 0) return [];
+    await this.refreshTickerMap();
+
+    const stories: NormalizedStory[] = [];
+    for (const { form, catalyst } of EdgarPoller.FORMS) {
+      try {
+        const xml = await this.fetchText(EdgarPoller.feedUrl(form));
+        stories.push(...this.parseFeed(xml, form, catalyst, universe));
+      } catch (err) {
+        this.logger.warn(`EDGAR ${form} feed failed:`, (err as Error).message);
+      }
+    }
+    return stories;
+  }
+
+  /** Parse one atom feed. Public for fixture tests. */
+  parseFeed(
+    xml: string,
+    form: string,
+    catalyst: CatalystType,
+    universe: Set<string>,
+  ): NormalizedStory[] {
+    const stories: NormalizedStory[] = [];
+    for (const entry of RssParser.blocks(xml, 'entry')) {
+      const title = RssParser.tag(entry, 'title') ?? '';
+      const updated = RssParser.tag(entry, 'updated');
+      const url = RssParser.link(entry);
+      if (!title || !url || !updated) continue;
+
+      // Title format: "8-K - APPLE INC (0000320193) (Filer)"
+      const cikMatch = title.match(/\((\d{10})\)/);
+      if (!cikMatch) continue;
+      const ticker = this.cikToTicker.get(cikMatch[1]);
+      if (!ticker || !universe.has(ticker)) continue;
+
+      const company = title
+        .replace(/^[^-]+-\s*/, '')
+        .replace(/\(\d{10}\)/g, '')
+        .replace(/\((Filer|Subject|Reporting)\)/gi, '')
+        .trim();
+
+      stories.push({
+        tickers: [ticker],
+        headline: `${form} filing: ${company}`,
+        body: null,
+        source: 'edgar',
+        url,
+        publishedAt: new Date(updated).toISOString(),
+        catalystHint: catalyst,
+      });
+    }
+    return stories;
+  }
+
+  /** Inject a CIK→ticker map directly (tests). CIKs are 10-digit zero-padded. */
+  setTickerMap(map: Map<string, string>): void {
+    this.cikToTicker = map;
+    this.mapExpiresAt = Date.now() + EdgarPoller.TICKER_MAP_TTL_MS;
+  }
+
+  private async refreshTickerMap(): Promise<void> {
+    if (Date.now() < this.mapExpiresAt && this.cikToTicker.size > 0) return;
+    const raw = await this.fetchText(EdgarPoller.TICKER_MAP_URL);
+    const data = JSON.parse(raw) as Record<string, { cik_str: number; ticker: string }>;
+    const map = new Map<string, string>();
+    for (const entry of Object.values(data)) {
+      map.set(String(entry.cik_str).padStart(10, '0'), entry.ticker.toUpperCase());
+    }
+    this.setTickerMap(map);
+    this.logger.log(`EDGAR ticker map refreshed: ${map.size} companies`);
+  }
+
+  private static feedUrl(form: string): string {
+    const type = encodeURIComponent(form);
+    return `https://www.sec.gov/cgi-bin/browse-edgar?action=getcurrent&type=${type}&company=&dateb=&owner=include&count=100&output=atom`;
+  }
+
+  private async fetchText(url: string): Promise<string> {
+    const res = await fetch(url, { headers: { 'User-Agent': this.userAgent } });
+    if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
+    return res.text();
+  }
+}
@@ -0,0 +1,91 @@
+import { RssParser } from '../rss';
+import type { Logger, NormalizedStory } from '../../shared/types';
+
+/**
+ * PR-wire RSS poller (FREE-DATA-STACK §1.4 / P1.2 Tier 3) — press releases
+ * that the other free feeds miss, mostly small-caps.
+ *
+ * Ticker extraction relies on the wire convention of exchange tags in the
+ * text: "(NYSE: ABC)", "(Nasdaq: XYZ)". Stories without an exchange tag
+ * produce no tickers and are dropped by the pipeline's universe filter —
+ * that's intentional; untagged wire stories are rarely decision-grade.
+ *
+ * Feed list is overridable: NEWS_PRWIRE_FEEDS="url1,url2" in .env
+ * (wire RSS URLs change occasionally — if a feed 404s, update the env var).
+ */
+export class PrWirePoller {
+  private static readonly DEFAULT_FEEDS = [
+    // GlobeNewswire — public-company news
+    'https://www.globenewswire.com/RssFeed/orgclass/1/feedTitle/GlobeNewswire%20-%20News%20about%20Public%20Companies',
+    // PR Newswire — all news releases
+    'https://www.prnewswire.com/rss/news-releases-list.rss',
+  ];
+
+  private static readonly EXCHANGE_TAG =
+    /\((?:NYSE(?:\s+American)?|NASDAQ|Nasdaq|AMEX|CBOE|OTC(?:QB|QX|MKTS)?)\s*:\s*([A-Za-z][A-Za-z.]{0,5})\)/g;
+
+  private readonly feeds: string[];
+
+  constructor(
+    private readonly logger: Logger,
+    feeds?: string[],
+  ) {
+    const env = process.env.NEWS_PRWIRE_FEEDS;
+    this.feeds = feeds ?? (env ? env.split(',').map((s) => s.trim()) : PrWirePoller.DEFAULT_FEEDS);
+  }
+
+  async poll(): Promise<NormalizedStory[]> {
+    const stories: NormalizedStory[] = [];
+    for (const feed of this.feeds) {
+      try {
+        const xml = await this.fetchText(feed);
+        stories.push(...PrWirePoller.parseFeed(xml));
+      } catch (err) {
+        this.logger.warn(`PR-wire feed failed (${feed}):`, (err as Error).message);
+      }
+    }
+    return stories;
+  }
+
+  /** Parse one RSS feed. Public static for fixture tests. */
+  static parseFeed(xml: string): NormalizedStory[] {
+    const stories: NormalizedStory[] = [];
+    for (const item of RssParser.blocks(xml, 'item')) {
+      const title = RssParser.tag(item, 'title');
+      const url = RssParser.link(item);
+      const pubDate = RssParser.tag(item, 'pubDate');
+      if (!title || !url) continue;
+
+      const description = RssParser.tag(item, 'description') ?? '';
+      const tickers = PrWirePoller.extractTickers(`${title} ${description}`);
+      if (tickers.length === 0) continue; // no exchange tag → skip early
+
+      stories.push({
+        tickers,
+        headline: title,
+        body: description || null,
+        source: 'prwire',
+        url,
+        publishedAt: pubDate ? new Date(pubDate).toISOString() : new Date().toISOString(),
+      });
+    }
+    return stories;
+  }
+
+  /** "(NYSE: ABC)" / "(Nasdaq: XYZ)" → ['ABC', 'XYZ']. Public for tests. */
+  static extractTickers(text: string): string[] {
+    const out = new Set<string>();
+    for (const m of text.matchAll(PrWirePoller.EXCHANGE_TAG)) {
+      out.add(m[1].toUpperCase());
+    }
+    return [...out];
+  }
+
+  private async fetchText(url: string): Promise<string> {
+    const res = await fetch(url, {
+      headers: { 'User-Agent': 'market-screener/1.0 (+rss reader)' },
+    });
+    if (!res.ok) throw new Error(`HTTP ${res.status}`);
+    return res.text();
+  }
+}
@@ -0,0 +1,43 @@
+/**
+ * Minimal RSS/Atom extraction — enough for EDGAR atom feeds and PR-wire RSS.
+ * Deliberately dependency-free; if a feed outgrows this, swap in
+ * fast-xml-parser without touching the pollers' output shape.
+ */
+export class RssParser {
+  /** Extract raw <item>…</item> or <entry>…</entry> blocks. */
+  static blocks(xml: string, tag: 'item' | 'entry'): string[] {
+    const re = new RegExp(`<${tag}[\\s>][\\s\\S]*?<\\/${tag}>`, 'g');
+    return xml.match(re) ?? [];
+  }
+
+  /** First occurrence of a simple tag's text content, entity-decoded. */
+  static tag(block: string, name: string): string | null {
+    const re = new RegExp(`<${name}[^>]*>([\\s\\S]*?)<\\/${name}>`, 'i');
+    const m = block.match(re);
+    return m ? RssParser.clean(m[1]) : null;
+  }
+
+  /** Atom-style <link href="…"/> (self-closing) or RSS <link>…</link>. */
+  static link(block: string): string | null {
+    const href = block.match(/<link[^>]*href="([^"]+)"/i);
+    if (href) return RssParser.decode(href[1].trim());
+    return RssParser.tag(block, 'link');
+  }
+
+  private static clean(raw: string): string {
+    const noCdata = raw.replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g, '$1');
+    const noTags = noCdata.replace(/<[^>]+>/g, ' ');
+    return RssParser.decode(noTags).replace(/\s+/g, ' ').trim();
+  }
+
+  private static decode(s: string): string {
+    return s
+      .replace(/&amp;/g, '&')
+      .replace(/&lt;/g, '<')
+      .replace(/&gt;/g, '>')
+      .replace(/&quot;/g, '"')
+      .replace(/&#0?39;/g, "'")
+      .replace(/&apos;/g, "'")
+      .replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n)));
+  }
+}