phase-10.5: screener enhancements
This commit is contained in:
@@ -0,0 +1,122 @@
|
||||
import { RssParser } from '../rss';
|
||||
import type { CatalystType, Logger, NormalizedStory } from '../../shared/types';
|
||||
|
||||
/**
|
||||
* SEC EDGAR poller (FREE-DATA-STACK §1.3 / P1.2 Tier 2). Free forever, and
|
||||
* the highest-value source: filings frequently precede the headline.
|
||||
*
|
||||
* Strategy: poll the site-wide "current filings" atom feed once per form
|
||||
* type (4 requests/cycle total, well inside SEC fair use), map filer CIK →
|
||||
* ticker via the daily-cached company_tickers.json, and emit stories only
|
||||
* for universe tickers. The pipeline applies its own universe filter again —
|
||||
* defense in depth.
|
||||
*
|
||||
* SEC requires a descriptive User-Agent with contact info: set
|
||||
* EDGAR_USER_AGENT in .env (e.g. "market-screener/1.0 you@example.com").
|
||||
*/
|
||||
export class EdgarPoller {
|
||||
private static readonly TICKER_MAP_URL = 'https://www.sec.gov/files/company_tickers.json';
|
||||
private static readonly TICKER_MAP_TTL_MS = 24 * 60 * 60 * 1000;
|
||||
|
||||
/** form type → catalyst classification (overrides keyword classify). */
|
||||
private static readonly FORMS: Array<{ form: string; catalyst: CatalystType }> = [
|
||||
{ form: '8-K', catalyst: 'regulatory' }, // material events
|
||||
{ form: 'SC 13D', catalyst: 'ma' }, // activist stake >5% — classic pre-M&A tell
|
||||
{ form: 'S-4', catalyst: 'ma' }, // merger registration
|
||||
{ form: 'DEFM14A', catalyst: 'ma' }, // merger proxy
|
||||
];
|
||||
|
||||
private cikToTicker: Map<string, string> = new Map();
|
||||
private mapExpiresAt = 0;
|
||||
|
||||
constructor(
|
||||
private readonly logger: Logger,
|
||||
private readonly userAgent = process.env.EDGAR_USER_AGENT ??
|
||||
'market-screener/1.0 (set EDGAR_USER_AGENT in .env)',
|
||||
) {}
|
||||
|
||||
/** Fetch all form feeds and return normalized stories for universe tickers. */
|
||||
async poll(universe: Set<string>): Promise<NormalizedStory[]> {
|
||||
if (universe.size === 0) return [];
|
||||
await this.refreshTickerMap();
|
||||
|
||||
const stories: NormalizedStory[] = [];
|
||||
for (const { form, catalyst } of EdgarPoller.FORMS) {
|
||||
try {
|
||||
const xml = await this.fetchText(EdgarPoller.feedUrl(form));
|
||||
stories.push(...this.parseFeed(xml, form, catalyst, universe));
|
||||
} catch (err) {
|
||||
this.logger.warn(`EDGAR ${form} feed failed:`, (err as Error).message);
|
||||
}
|
||||
}
|
||||
return stories;
|
||||
}
|
||||
|
||||
/** Parse one atom feed. Public for fixture tests. */
|
||||
parseFeed(
|
||||
xml: string,
|
||||
form: string,
|
||||
catalyst: CatalystType,
|
||||
universe: Set<string>,
|
||||
): NormalizedStory[] {
|
||||
const stories: NormalizedStory[] = [];
|
||||
for (const entry of RssParser.blocks(xml, 'entry')) {
|
||||
const title = RssParser.tag(entry, 'title') ?? '';
|
||||
const updated = RssParser.tag(entry, 'updated');
|
||||
const url = RssParser.link(entry);
|
||||
if (!title || !url || !updated) continue;
|
||||
|
||||
// Title format: "8-K - APPLE INC (0000320193) (Filer)"
|
||||
const cikMatch = title.match(/\((\d{10})\)/);
|
||||
if (!cikMatch) continue;
|
||||
const ticker = this.cikToTicker.get(cikMatch[1]);
|
||||
if (!ticker || !universe.has(ticker)) continue;
|
||||
|
||||
const company = title
|
||||
.replace(/^[^-]+-\s*/, '')
|
||||
.replace(/\(\d{10}\)/g, '')
|
||||
.replace(/\((Filer|Subject|Reporting)\)/gi, '')
|
||||
.trim();
|
||||
|
||||
stories.push({
|
||||
tickers: [ticker],
|
||||
headline: `${form} filing: ${company}`,
|
||||
body: null,
|
||||
source: 'edgar',
|
||||
url,
|
||||
publishedAt: new Date(updated).toISOString(),
|
||||
catalystHint: catalyst,
|
||||
});
|
||||
}
|
||||
return stories;
|
||||
}
|
||||
|
||||
/** Inject a CIK→ticker map directly (tests). CIKs are 10-digit zero-padded. */
|
||||
setTickerMap(map: Map<string, string>): void {
|
||||
this.cikToTicker = map;
|
||||
this.mapExpiresAt = Date.now() + EdgarPoller.TICKER_MAP_TTL_MS;
|
||||
}
|
||||
|
||||
private async refreshTickerMap(): Promise<void> {
|
||||
if (Date.now() < this.mapExpiresAt && this.cikToTicker.size > 0) return;
|
||||
const raw = await this.fetchText(EdgarPoller.TICKER_MAP_URL);
|
||||
const data = JSON.parse(raw) as Record<string, { cik_str: number; ticker: string }>;
|
||||
const map = new Map<string, string>();
|
||||
for (const entry of Object.values(data)) {
|
||||
map.set(String(entry.cik_str).padStart(10, '0'), entry.ticker.toUpperCase());
|
||||
}
|
||||
this.setTickerMap(map);
|
||||
this.logger.log(`EDGAR ticker map refreshed: ${map.size} companies`);
|
||||
}
|
||||
|
||||
private static feedUrl(form: string): string {
|
||||
const type = encodeURIComponent(form);
|
||||
return `https://www.sec.gov/cgi-bin/browse-edgar?action=getcurrent&type=${type}&company=&dateb=&owner=include&count=100&output=atom`;
|
||||
}
|
||||
|
||||
private async fetchText(url: string): Promise<string> {
|
||||
const res = await fetch(url, { headers: { 'User-Agent': this.userAgent } });
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
|
||||
return res.text();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
import { RssParser } from '../rss';
|
||||
import type { Logger, NormalizedStory } from '../../shared/types';
|
||||
|
||||
/**
|
||||
* PR-wire RSS poller (FREE-DATA-STACK §1.4 / P1.2 Tier 3) — press releases
|
||||
* that the other free feeds miss, mostly small-caps.
|
||||
*
|
||||
* Ticker extraction relies on the wire convention of exchange tags in the
|
||||
* text: "(NYSE: ABC)", "(Nasdaq: XYZ)". Stories without an exchange tag
|
||||
* produce no tickers and are dropped by the pipeline's universe filter —
|
||||
* that's intentional; untagged wire stories are rarely decision-grade.
|
||||
*
|
||||
* Feed list is overridable: NEWS_PRWIRE_FEEDS="url1,url2" in .env
|
||||
* (wire RSS URLs change occasionally — if a feed 404s, update the env var).
|
||||
*/
|
||||
export class PrWirePoller {
|
||||
private static readonly DEFAULT_FEEDS = [
|
||||
// GlobeNewswire — public-company news
|
||||
'https://www.globenewswire.com/RssFeed/orgclass/1/feedTitle/GlobeNewswire%20-%20News%20about%20Public%20Companies',
|
||||
// PR Newswire — all news releases
|
||||
'https://www.prnewswire.com/rss/news-releases-list.rss',
|
||||
];
|
||||
|
||||
private static readonly EXCHANGE_TAG =
|
||||
/\((?:NYSE(?:\s+American)?|NASDAQ|Nasdaq|AMEX|CBOE|OTC(?:QB|QX|MKTS)?)\s*:\s*([A-Za-z][A-Za-z.]{0,5})\)/g;
|
||||
|
||||
private readonly feeds: string[];
|
||||
|
||||
constructor(
|
||||
private readonly logger: Logger,
|
||||
feeds?: string[],
|
||||
) {
|
||||
const env = process.env.NEWS_PRWIRE_FEEDS;
|
||||
this.feeds = feeds ?? (env ? env.split(',').map((s) => s.trim()) : PrWirePoller.DEFAULT_FEEDS);
|
||||
}
|
||||
|
||||
async poll(): Promise<NormalizedStory[]> {
|
||||
const stories: NormalizedStory[] = [];
|
||||
for (const feed of this.feeds) {
|
||||
try {
|
||||
const xml = await this.fetchText(feed);
|
||||
stories.push(...PrWirePoller.parseFeed(xml));
|
||||
} catch (err) {
|
||||
this.logger.warn(`PR-wire feed failed (${feed}):`, (err as Error).message);
|
||||
}
|
||||
}
|
||||
return stories;
|
||||
}
|
||||
|
||||
/** Parse one RSS feed. Public static for fixture tests. */
|
||||
static parseFeed(xml: string): NormalizedStory[] {
|
||||
const stories: NormalizedStory[] = [];
|
||||
for (const item of RssParser.blocks(xml, 'item')) {
|
||||
const title = RssParser.tag(item, 'title');
|
||||
const url = RssParser.link(item);
|
||||
const pubDate = RssParser.tag(item, 'pubDate');
|
||||
if (!title || !url) continue;
|
||||
|
||||
const description = RssParser.tag(item, 'description') ?? '';
|
||||
const tickers = PrWirePoller.extractTickers(`${title} ${description}`);
|
||||
if (tickers.length === 0) continue; // no exchange tag → skip early
|
||||
|
||||
stories.push({
|
||||
tickers,
|
||||
headline: title,
|
||||
body: description || null,
|
||||
source: 'prwire',
|
||||
url,
|
||||
publishedAt: pubDate ? new Date(pubDate).toISOString() : new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
return stories;
|
||||
}
|
||||
|
||||
/** "(NYSE: ABC)" / "(Nasdaq: XYZ)" → ['ABC', 'XYZ']. Public for tests. */
|
||||
static extractTickers(text: string): string[] {
|
||||
const out = new Set<string>();
|
||||
for (const m of text.matchAll(PrWirePoller.EXCHANGE_TAG)) {
|
||||
out.add(m[1].toUpperCase());
|
||||
}
|
||||
return [...out];
|
||||
}
|
||||
|
||||
private async fetchText(url: string): Promise<string> {
|
||||
const res = await fetch(url, {
|
||||
headers: { 'User-Agent': 'market-screener/1.0 (+rss reader)' },
|
||||
});
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||
return res.text();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user