phase-10.5: screener enhancements

This commit is contained in:
saikiranvella
2026-06-11 19:18:19 -04:00
parent bac00ab5d5
commit e953822bab
51 changed files with 3745 additions and 36 deletions
@@ -163,6 +163,68 @@ export const UNIVERSE_QUERIES = {
WHERE type != 'crypto'
ORDER BY ticker
`,
// Every ticker screened recently (snapshot ledger) — part of the news universe
DISTINCT_SNAPSHOT_TICKERS_SINCE: `
SELECT DISTINCT ticker FROM signal_snapshots
WHERE snapshot_date >= ?
ORDER BY ticker
`,
};
// ── News Queries (FREE-DATA-STACK §25 — free-tier news pipeline) ───────────
export const NEWS_QUERIES = {
// INSERT OR IGNORE — url_hash PK is the first dedupe line (returns 0 changes on dup)
INSERT_ARTICLE: `
INSERT OR IGNORE INTO news_articles
(url_hash, title_hash, ticker_list, headline, body, source, catalyst, url, published_at, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
// Second dedupe line: same (normalized) title seen recently → syndicated copy
TITLE_SEEN_SINCE: `
SELECT 1 FROM news_articles
WHERE title_hash = ? AND published_at >= ?
LIMIT 1
`,
INSERT_CATALYST_LINK: `
INSERT OR IGNORE INTO ticker_catalysts (ticker, day, url_hash)
VALUES (?, ?, ?)
`,
// Per-ticker daily cap check (FREE-DATA-STACK §4.4)
COUNT_TICKER_DAY: `
SELECT COUNT(*) AS n FROM ticker_catalysts
WHERE ticker = ? AND day = ?
`,
// Stories for one ticker since a given day — what the UI reads (never Yahoo live)
SELECT_TICKER_NEWS: `
SELECT a.* FROM ticker_catalysts c
JOIN news_articles a ON a.url_hash = c.url_hash
WHERE c.ticker = ? AND c.day >= ?
ORDER BY a.published_at DESC
`,
SELECT_RECENT: `
SELECT * FROM news_articles
ORDER BY published_at DESC
LIMIT ?
`,
// Retention (FREE-DATA-STACK §5): purge bodies after 90d, drop unreferenced after 18mo
PURGE_BODIES_BEFORE: `
UPDATE news_articles SET body = NULL
WHERE body IS NOT NULL AND published_at < ?
`,
DELETE_UNREFERENCED_BEFORE: `
DELETE FROM news_articles
WHERE published_at < ?
AND url_hash NOT IN (SELECT url_hash FROM ticker_catalysts)
`,
};
// ── Signal Snapshot Queries (P0.1 — signal track record) ────────────────────
@@ -287,6 +349,31 @@ export const DDL = `
CREATE INDEX IF NOT EXISTS idx_snapshots_date ON signal_snapshots(snapshot_date);
CREATE INDEX IF NOT EXISTS idx_snapshots_signal ON signal_snapshots(signal, snapshot_date);
CREATE TABLE IF NOT EXISTS news_articles (
url_hash TEXT PRIMARY KEY, -- sha256(url)
title_hash TEXT NOT NULL, -- sha256(normalized headline) — syndication dedupe
ticker_list TEXT NOT NULL, -- JSON array of matched universe tickers
headline TEXT NOT NULL,
body TEXT, -- nullable; purged after 90 days (retention job)
source TEXT NOT NULL, -- 'edgar' | 'prwire' | 'yahoo'
catalyst TEXT, -- 'earnings'|'ma'|'guidance'|'regulatory'|'macro'|NULL
url TEXT NOT NULL,
published_at TEXT NOT NULL, -- ISO timestamp
created_at TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_news_published ON news_articles(published_at DESC);
CREATE INDEX IF NOT EXISTS idx_news_title ON news_articles(title_hash, published_at);
CREATE TABLE IF NOT EXISTS ticker_catalysts (
ticker TEXT NOT NULL,
day TEXT NOT NULL, -- YYYY-MM-DD (published date)
url_hash TEXT NOT NULL REFERENCES news_articles(url_hash),
PRIMARY KEY (ticker, day, url_hash)
);
CREATE INDEX IF NOT EXISTS idx_catalysts_ticker ON ticker_catalysts(ticker, day DESC);
`;
// ── Runtime migrations (ALTER TABLE for existing DBs) ────────────────────────