phase-10.5: screener enhancements
This commit is contained in:
@@ -0,0 +1,129 @@
|
||||
import test from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { NewsPipeline } from '../server/domains/news/NewsPipeline.js';
|
||||
import type { NewsRepository } from '../server/domains/news/NewsRepository.js';
|
||||
import type { NormalizedStory } from '../server/domains/shared/types/index.js';
|
||||
|
||||
/** In-memory stub that records what the pipeline stores. */
|
||||
class StubRepo {
|
||||
articles: Array<{ urlHash: string; tickers: string[]; catalyst: string | null }> = [];
|
||||
links: Array<{ ticker: string; day: string }> = [];
|
||||
seenTitles = new Set<string>();
|
||||
capCounts = new Map<string, number>(); // `${ticker}|${day}` → count
|
||||
|
||||
insertArticle(a: { urlHash: string; tickers: string[]; catalyst: string | null }): boolean {
|
||||
if (this.articles.some((x) => x.urlHash === a.urlHash)) return false;
|
||||
this.articles.push(a);
|
||||
return true;
|
||||
}
|
||||
titleSeenSince(titleHash: string): boolean {
|
||||
return this.seenTitles.has(titleHash);
|
||||
}
|
||||
linkTicker(ticker: string, day: string): void {
|
||||
this.links.push({ ticker, day });
|
||||
}
|
||||
countTickerDay(ticker: string, day: string): number {
|
||||
return this.capCounts.get(`${ticker}|${day}`) ?? 0;
|
||||
}
|
||||
purgeBodiesBefore(): number {
|
||||
return 0;
|
||||
}
|
||||
deleteUnreferencedBefore(): number {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
const UNIVERSE = new Set(['AAPL', 'MSFT']);
|
||||
|
||||
function story(overrides: Partial<NormalizedStory> = {}): NormalizedStory {
|
||||
return {
|
||||
tickers: ['AAPL'],
|
||||
headline: 'Apple announces quarterly results beat estimates',
|
||||
source: 'prwire',
|
||||
url: `https://example.com/${Math.random()}`,
|
||||
publishedAt: '2026-06-09T14:00:00.000Z',
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function makePipeline(repo: StubRepo): NewsPipeline {
|
||||
return new NewsPipeline(repo as unknown as NewsRepository);
|
||||
}
|
||||
|
||||
test('NewsPipeline', async (t) => {
|
||||
await t.test('stores universe stories and links tickers', () => {
|
||||
const repo = new StubRepo();
|
||||
const stats = makePipeline(repo).ingest([story()], UNIVERSE);
|
||||
assert.equal(stats.stored, 1);
|
||||
assert.equal(repo.links.length, 1);
|
||||
assert.equal(repo.links[0].ticker, 'AAPL');
|
||||
assert.equal(repo.links[0].day, '2026-06-09');
|
||||
});
|
||||
|
||||
await t.test('drops stories with no universe ticker (§4.1)', () => {
|
||||
const repo = new StubRepo();
|
||||
const stats = makePipeline(repo).ingest([story({ tickers: ['ZZZZ'] })], UNIVERSE);
|
||||
assert.equal(stats.stored, 0);
|
||||
assert.equal(stats.droppedNoUniverseTicker, 1);
|
||||
assert.equal(repo.articles.length, 0);
|
||||
});
|
||||
|
||||
await t.test('drops noise headlines, but never filings (§4.2)', () => {
|
||||
const repo = new StubRepo();
|
||||
const noise = story({ headline: '5 best stocks to buy now including Apple' });
|
||||
const filing = story({
|
||||
headline: '8-K filing: 5 best stocks edge case',
|
||||
source: 'edgar',
|
||||
catalystHint: 'regulatory',
|
||||
});
|
||||
const stats = makePipeline(repo).ingest([noise, filing], UNIVERSE);
|
||||
assert.equal(stats.droppedNoise, 1);
|
||||
assert.equal(stats.stored, 1);
|
||||
assert.equal(repo.articles[0].catalyst, 'regulatory');
|
||||
});
|
||||
|
||||
await t.test('drops syndicated duplicates by normalized title (§4.3)', () => {
|
||||
const repo = new StubRepo();
|
||||
const pipeline = makePipeline(repo);
|
||||
// First copy stored; mark its normalized-title hash as seen
|
||||
pipeline.ingest([story({ headline: 'Apple Beats Q2 Estimates!' })], UNIVERSE);
|
||||
repo.seenTitles.add(sha256(NewsPipeline.normalizeTitle('Apple Beats Q2 Estimates!')));
|
||||
// Same story, different casing/punctuation/URL → syndicated copy
|
||||
const stats = pipeline.ingest(
|
||||
[story({ headline: 'APPLE BEATS Q2 ESTIMATES', url: 'https://other.com/copy' })],
|
||||
UNIVERSE,
|
||||
);
|
||||
assert.equal(stats.droppedDuplicate, 1);
|
||||
});
|
||||
|
||||
await t.test('enforces per-ticker daily cap, filings exempt (§4.4)', () => {
|
||||
const repo = new StubRepo();
|
||||
repo.capCounts.set('AAPL|2026-06-09', 25); // at cap
|
||||
const wire = story();
|
||||
const filing = story({ source: 'edgar', catalystHint: 'ma', url: 'https://sec.gov/x' });
|
||||
const stats = makePipeline(repo).ingest([wire, filing], UNIVERSE);
|
||||
assert.equal(stats.droppedCapped, 1);
|
||||
assert.equal(stats.stored, 1); // the filing
|
||||
});
|
||||
|
||||
await t.test('classifies catalysts with M&A taking priority', () => {
|
||||
assert.equal(NewsPipeline.classify('Acme to be acquired by MegaCorp in Q2 deal'), 'ma');
|
||||
assert.equal(NewsPipeline.classify('Acme reports record quarterly results'), 'earnings');
|
||||
assert.equal(NewsPipeline.classify('Acme raises full-year guidance'), 'guidance');
|
||||
assert.equal(NewsPipeline.classify('FDA approval granted for Acme drug'), 'regulatory');
|
||||
assert.equal(NewsPipeline.classify('Fed holds rates steady amid CPI data'), 'macro');
|
||||
assert.equal(NewsPipeline.classify('Acme appoints new CMO'), null);
|
||||
});
|
||||
|
||||
await t.test('noise detector catches listicles and target reiterations', () => {
|
||||
assert.ok(NewsPipeline.isNoise('3 Top Stocks to Watch This Week'));
|
||||
assert.ok(NewsPipeline.isNoise('Analyst price target raised on momentum'));
|
||||
assert.ok(!NewsPipeline.isNoise('Apple announces $90B buyback'));
|
||||
});
|
||||
});
|
||||
|
||||
// Helper mirroring NewsPipeline's title hashing for the dedupe test
|
||||
import { createHash } from 'crypto';
|
||||
function sha256(input: string): string {
|
||||
return createHash('sha256').update(input).digest('hex');
|
||||
}
|
||||
Reference in New Issue
Block a user