import { RssParser } from '../rss'; import type { CatalystType, Logger, NormalizedStory } from '../../shared/types'; /** * SEC EDGAR poller (FREE-DATA-STACK §1.3 / P1.2 Tier 2). Free forever, and * the highest-value source: filings frequently precede the headline. * * Strategy: poll the site-wide "current filings" atom feed once per form * type (4 requests/cycle total, well inside SEC fair use), map filer CIK → * ticker via the daily-cached company_tickers.json, and emit stories only * for universe tickers. The pipeline applies its own universe filter again — * defense in depth. * * SEC requires a descriptive User-Agent with contact info: set * EDGAR_USER_AGENT in .env (e.g. "market-screener/1.0 you@example.com"). */ export class EdgarPoller { private static readonly TICKER_MAP_URL = 'https://www.sec.gov/files/company_tickers.json'; private static readonly TICKER_MAP_TTL_MS = 24 * 60 * 60 * 1000; /** form type → catalyst classification (overrides keyword classify). */ private static readonly FORMS: Array<{ form: string; catalyst: CatalystType }> = [ { form: '8-K', catalyst: 'regulatory' }, // material events { form: 'SC 13D', catalyst: 'ma' }, // activist stake >5% — classic pre-M&A tell { form: 'S-4', catalyst: 'ma' }, // merger registration { form: 'DEFM14A', catalyst: 'ma' }, // merger proxy ]; private cikToTicker: Map = new Map(); private mapExpiresAt = 0; constructor( private readonly logger: Logger, private readonly userAgent = process.env.EDGAR_USER_AGENT ?? 'market-screener/1.0 (set EDGAR_USER_AGENT in .env)', ) {} /** Fetch all form feeds and return normalized stories for universe tickers. */ async poll(universe: Set): Promise { if (universe.size === 0) return []; await this.refreshTickerMap(); const stories: NormalizedStory[] = []; for (const { form, catalyst } of EdgarPoller.FORMS) { try { const xml = await this.fetchText(EdgarPoller.feedUrl(form)); stories.push(...this.parseFeed(xml, form, catalyst, universe)); } catch (err) { this.logger.warn(`EDGAR ${form} feed failed:`, (err as Error).message); } } return stories; } /** Parse one atom feed. Public for fixture tests. */ parseFeed( xml: string, form: string, catalyst: CatalystType, universe: Set, ): NormalizedStory[] { const stories: NormalizedStory[] = []; for (const entry of RssParser.blocks(xml, 'entry')) { const title = RssParser.tag(entry, 'title') ?? ''; const updated = RssParser.tag(entry, 'updated'); const url = RssParser.link(entry); if (!title || !url || !updated) continue; // Title format: "8-K - APPLE INC (0000320193) (Filer)" const cikMatch = title.match(/\((\d{10})\)/); if (!cikMatch) continue; const ticker = this.cikToTicker.get(cikMatch[1]); if (!ticker || !universe.has(ticker)) continue; const company = title .replace(/^[^-]+-\s*/, '') .replace(/\(\d{10}\)/g, '') .replace(/\((Filer|Subject|Reporting)\)/gi, '') .trim(); stories.push({ tickers: [ticker], headline: `${form} filing: ${company}`, body: null, source: 'edgar', url, publishedAt: new Date(updated).toISOString(), catalystHint: catalyst, }); } return stories; } /** Inject a CIK→ticker map directly (tests). CIKs are 10-digit zero-padded. */ setTickerMap(map: Map): void { this.cikToTicker = map; this.mapExpiresAt = Date.now() + EdgarPoller.TICKER_MAP_TTL_MS; } private async refreshTickerMap(): Promise { if (Date.now() < this.mapExpiresAt && this.cikToTicker.size > 0) return; const raw = await this.fetchText(EdgarPoller.TICKER_MAP_URL); const data = JSON.parse(raw) as Record; const map = new Map(); for (const entry of Object.values(data)) { map.set(String(entry.cik_str).padStart(10, '0'), entry.ticker.toUpperCase()); } this.setTickerMap(map); this.logger.log(`EDGAR ticker map refreshed: ${map.size} companies`); } private static feedUrl(form: string): string { const type = encodeURIComponent(form); return `https://www.sec.gov/cgi-bin/browse-edgar?action=getcurrent&type=${type}&company=&dateb=&owner=include&count=100&output=atom`; } private async fetchText(url: string): Promise { const res = await fetch(url, { headers: { 'User-Agent': this.userAgent } }); if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`); return res.text(); } }