Files
market_screener/tests/news-pipeline.test.ts
2026-06-11 19:18:19 -04:00

130 lines
5.1 KiB
TypeScript

import test from 'node:test';
import assert from 'node:assert/strict';
import { NewsPipeline } from '../server/domains/news/NewsPipeline.js';
import type { NewsRepository } from '../server/domains/news/NewsRepository.js';
import type { NormalizedStory } from '../server/domains/shared/types/index.js';
/** In-memory stub that records what the pipeline stores. */
class StubRepo {
articles: Array<{ urlHash: string; tickers: string[]; catalyst: string | null }> = [];
links: Array<{ ticker: string; day: string }> = [];
seenTitles = new Set<string>();
capCounts = new Map<string, number>(); // `${ticker}|${day}` → count
insertArticle(a: { urlHash: string; tickers: string[]; catalyst: string | null }): boolean {
if (this.articles.some((x) => x.urlHash === a.urlHash)) return false;
this.articles.push(a);
return true;
}
titleSeenSince(titleHash: string): boolean {
return this.seenTitles.has(titleHash);
}
linkTicker(ticker: string, day: string): void {
this.links.push({ ticker, day });
}
countTickerDay(ticker: string, day: string): number {
return this.capCounts.get(`${ticker}|${day}`) ?? 0;
}
purgeBodiesBefore(): number {
return 0;
}
deleteUnreferencedBefore(): number {
return 0;
}
}
const UNIVERSE = new Set(['AAPL', 'MSFT']);
function story(overrides: Partial<NormalizedStory> = {}): NormalizedStory {
return {
tickers: ['AAPL'],
headline: 'Apple announces quarterly results beat estimates',
source: 'prwire',
url: `https://example.com/${Math.random()}`,
publishedAt: '2026-06-09T14:00:00.000Z',
...overrides,
};
}
function makePipeline(repo: StubRepo): NewsPipeline {
return new NewsPipeline(repo as unknown as NewsRepository);
}
test('NewsPipeline', async (t) => {
await t.test('stores universe stories and links tickers', () => {
const repo = new StubRepo();
const stats = makePipeline(repo).ingest([story()], UNIVERSE);
assert.equal(stats.stored, 1);
assert.equal(repo.links.length, 1);
assert.equal(repo.links[0].ticker, 'AAPL');
assert.equal(repo.links[0].day, '2026-06-09');
});
await t.test('drops stories with no universe ticker (§4.1)', () => {
const repo = new StubRepo();
const stats = makePipeline(repo).ingest([story({ tickers: ['ZZZZ'] })], UNIVERSE);
assert.equal(stats.stored, 0);
assert.equal(stats.droppedNoUniverseTicker, 1);
assert.equal(repo.articles.length, 0);
});
await t.test('drops noise headlines, but never filings (§4.2)', () => {
const repo = new StubRepo();
const noise = story({ headline: '5 best stocks to buy now including Apple' });
const filing = story({
headline: '8-K filing: 5 best stocks edge case',
source: 'edgar',
catalystHint: 'regulatory',
});
const stats = makePipeline(repo).ingest([noise, filing], UNIVERSE);
assert.equal(stats.droppedNoise, 1);
assert.equal(stats.stored, 1);
assert.equal(repo.articles[0].catalyst, 'regulatory');
});
await t.test('drops syndicated duplicates by normalized title (§4.3)', () => {
const repo = new StubRepo();
const pipeline = makePipeline(repo);
// First copy stored; mark its normalized-title hash as seen
pipeline.ingest([story({ headline: 'Apple Beats Q2 Estimates!' })], UNIVERSE);
repo.seenTitles.add(sha256(NewsPipeline.normalizeTitle('Apple Beats Q2 Estimates!')));
// Same story, different casing/punctuation/URL → syndicated copy
const stats = pipeline.ingest(
[story({ headline: 'APPLE BEATS Q2 ESTIMATES', url: 'https://other.com/copy' })],
UNIVERSE,
);
assert.equal(stats.droppedDuplicate, 1);
});
await t.test('enforces per-ticker daily cap, filings exempt (§4.4)', () => {
const repo = new StubRepo();
repo.capCounts.set('AAPL|2026-06-09', 25); // at cap
const wire = story();
const filing = story({ source: 'edgar', catalystHint: 'ma', url: 'https://sec.gov/x' });
const stats = makePipeline(repo).ingest([wire, filing], UNIVERSE);
assert.equal(stats.droppedCapped, 1);
assert.equal(stats.stored, 1); // the filing
});
await t.test('classifies catalysts with M&A taking priority', () => {
assert.equal(NewsPipeline.classify('Acme to be acquired by MegaCorp in Q2 deal'), 'ma');
assert.equal(NewsPipeline.classify('Acme reports record quarterly results'), 'earnings');
assert.equal(NewsPipeline.classify('Acme raises full-year guidance'), 'guidance');
assert.equal(NewsPipeline.classify('FDA approval granted for Acme drug'), 'regulatory');
assert.equal(NewsPipeline.classify('Fed holds rates steady amid CPI data'), 'macro');
assert.equal(NewsPipeline.classify('Acme appoints new CMO'), null);
});
await t.test('noise detector catches listicles and target reiterations', () => {
assert.ok(NewsPipeline.isNoise('3 Top Stocks to Watch This Week'));
assert.ok(NewsPipeline.isNoise('Analyst price target raised on momentum'));
assert.ok(!NewsPipeline.isNoise('Apple announces $90B buyback'));
});
});
// Helper mirroring NewsPipeline's title hashing for the dedupe test
import { createHash } from 'crypto';
function sha256(input: string): string {
return createHash('sha256').update(input).digest('hex');
}