phase-10.5: screener enhancements

This commit is contained in:
Kazuma
2026-06-11 19:18:19 -04:00
parent f0c794f0c0
commit bf2a85b5c4
51 changed files with 3745 additions and 36 deletions
+122
View File
@@ -0,0 +1,122 @@
import { RssParser } from '../rss';
import type { CatalystType, Logger, NormalizedStory } from '../../shared/types';
/**
* SEC EDGAR poller (FREE-DATA-STACK §1.3 / P1.2 Tier 2). Free forever, and
* the highest-value source: filings frequently precede the headline.
*
* Strategy: poll the site-wide "current filings" atom feed once per form
* type (4 requests/cycle total, well inside SEC fair use), map filer CIK →
* ticker via the daily-cached company_tickers.json, and emit stories only
* for universe tickers. The pipeline applies its own universe filter again —
* defense in depth.
*
* SEC requires a descriptive User-Agent with contact info: set
* EDGAR_USER_AGENT in .env (e.g. "market-screener/1.0 you@example.com").
*/
export class EdgarPoller {
private static readonly TICKER_MAP_URL = 'https://www.sec.gov/files/company_tickers.json';
private static readonly TICKER_MAP_TTL_MS = 24 * 60 * 60 * 1000;
/** form type → catalyst classification (overrides keyword classify). */
private static readonly FORMS: Array<{ form: string; catalyst: CatalystType }> = [
{ form: '8-K', catalyst: 'regulatory' }, // material events
{ form: 'SC 13D', catalyst: 'ma' }, // activist stake >5% — classic pre-M&A tell
{ form: 'S-4', catalyst: 'ma' }, // merger registration
{ form: 'DEFM14A', catalyst: 'ma' }, // merger proxy
];
private cikToTicker: Map<string, string> = new Map();
private mapExpiresAt = 0;
constructor(
private readonly logger: Logger,
private readonly userAgent = process.env.EDGAR_USER_AGENT ??
'market-screener/1.0 (set EDGAR_USER_AGENT in .env)',
) {}
/** Fetch all form feeds and return normalized stories for universe tickers. */
async poll(universe: Set<string>): Promise<NormalizedStory[]> {
if (universe.size === 0) return [];
await this.refreshTickerMap();
const stories: NormalizedStory[] = [];
for (const { form, catalyst } of EdgarPoller.FORMS) {
try {
const xml = await this.fetchText(EdgarPoller.feedUrl(form));
stories.push(...this.parseFeed(xml, form, catalyst, universe));
} catch (err) {
this.logger.warn(`EDGAR ${form} feed failed:`, (err as Error).message);
}
}
return stories;
}
/** Parse one atom feed. Public for fixture tests. */
parseFeed(
xml: string,
form: string,
catalyst: CatalystType,
universe: Set<string>,
): NormalizedStory[] {
const stories: NormalizedStory[] = [];
for (const entry of RssParser.blocks(xml, 'entry')) {
const title = RssParser.tag(entry, 'title') ?? '';
const updated = RssParser.tag(entry, 'updated');
const url = RssParser.link(entry);
if (!title || !url || !updated) continue;
// Title format: "8-K - APPLE INC (0000320193) (Filer)"
const cikMatch = title.match(/\((\d{10})\)/);
if (!cikMatch) continue;
const ticker = this.cikToTicker.get(cikMatch[1]);
if (!ticker || !universe.has(ticker)) continue;
const company = title
.replace(/^[^-]+-\s*/, '')
.replace(/\(\d{10}\)/g, '')
.replace(/\((Filer|Subject|Reporting)\)/gi, '')
.trim();
stories.push({
tickers: [ticker],
headline: `${form} filing: ${company}`,
body: null,
source: 'edgar',
url,
publishedAt: new Date(updated).toISOString(),
catalystHint: catalyst,
});
}
return stories;
}
/** Inject a CIK→ticker map directly (tests). CIKs are 10-digit zero-padded. */
setTickerMap(map: Map<string, string>): void {
this.cikToTicker = map;
this.mapExpiresAt = Date.now() + EdgarPoller.TICKER_MAP_TTL_MS;
}
private async refreshTickerMap(): Promise<void> {
if (Date.now() < this.mapExpiresAt && this.cikToTicker.size > 0) return;
const raw = await this.fetchText(EdgarPoller.TICKER_MAP_URL);
const data = JSON.parse(raw) as Record<string, { cik_str: number; ticker: string }>;
const map = new Map<string, string>();
for (const entry of Object.values(data)) {
map.set(String(entry.cik_str).padStart(10, '0'), entry.ticker.toUpperCase());
}
this.setTickerMap(map);
this.logger.log(`EDGAR ticker map refreshed: ${map.size} companies`);
}
private static feedUrl(form: string): string {
const type = encodeURIComponent(form);
return `https://www.sec.gov/cgi-bin/browse-edgar?action=getcurrent&type=${type}&company=&dateb=&owner=include&count=100&output=atom`;
}
private async fetchText(url: string): Promise<string> {
const res = await fetch(url, { headers: { 'User-Agent': this.userAgent } });
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
return res.text();
}
}