mirror of
https://github.com/juherr/kill-the-news.git
synced 2026-06-20 22:03:48 +00:00
refactor(domain): slim detectConfirmation contract
Return the ranked links directly (string[] | null) instead of an unused
{score, links} wrapper, and drop the redundant hasKeyword helper in favor
of matchesAny(_, KEYWORDS). No behavior change.
This commit is contained in:
@@ -187,7 +187,7 @@ async function storeEmail(
|
||||
return false; // signal: skipped (not stored)
|
||||
}
|
||||
|
||||
const confirmation = detectConfirmation({
|
||||
const confirmationLinks = detectConfirmation({
|
||||
subject: input.subject,
|
||||
text: htmlToText(input.content),
|
||||
links: extractLinks(input.content),
|
||||
@@ -229,7 +229,9 @@ async function storeEmail(
|
||||
...(inlineIds.length > 0 ? { inlineAttachmentIds: inlineIds } : {}),
|
||||
...(messageId ? { messageId } : {}),
|
||||
dedupHash,
|
||||
...(confirmation ? { confirmation: { links: confirmation.links } } : {}),
|
||||
...(confirmationLinks
|
||||
? { confirmation: { links: confirmationLinks } }
|
||||
: {}),
|
||||
};
|
||||
|
||||
// Track the latest sender's domain (feed icon) and capture the RFC 8058
|
||||
|
||||
@@ -15,9 +15,7 @@ describe("detectConfirmation", () => {
|
||||
],
|
||||
});
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.links[0]).toBe(
|
||||
"https://news.example.com/confirm?token=abc123",
|
||||
);
|
||||
expect(result![0]).toBe("https://news.example.com/confirm?token=abc123");
|
||||
});
|
||||
|
||||
it("detects a French confirmation email (accent-insensitive)", () => {
|
||||
@@ -32,7 +30,7 @@ describe("detectConfirmation", () => {
|
||||
],
|
||||
});
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.links[0]).toBe("https://lettre.example.fr/valider/xyz");
|
||||
expect(result![0]).toBe("https://lettre.example.fr/valider/xyz");
|
||||
});
|
||||
|
||||
it("returns null for a normal newsletter with only an unsubscribe link", () => {
|
||||
@@ -69,7 +67,7 @@ describe("detectConfirmation", () => {
|
||||
],
|
||||
});
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.links).not.toContain("https://x.example/unsubscribe");
|
||||
expect(result!).not.toContain("https://x.example/unsubscribe");
|
||||
});
|
||||
|
||||
it("ranks the strongest candidate first and caps at three links", () => {
|
||||
@@ -85,8 +83,8 @@ describe("detectConfirmation", () => {
|
||||
],
|
||||
});
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.links.length).toBeLessThanOrEqual(3);
|
||||
expect(result!.links[0]).toBe("https://x.example/confirm?token=2");
|
||||
expect(result!.length).toBeLessThanOrEqual(3);
|
||||
expect(result![0]).toBe("https://x.example/confirm?token=2");
|
||||
});
|
||||
|
||||
it("ignores non-http(s) links", () => {
|
||||
|
||||
@@ -15,11 +15,6 @@ export interface DetectConfirmationInput {
|
||||
links: { href: string; text: string }[];
|
||||
}
|
||||
|
||||
export interface ConfirmationResult {
|
||||
score: number;
|
||||
links: string[];
|
||||
}
|
||||
|
||||
// Confirmation-positive stems, already normalized (lowercased, diacritics stripped).
|
||||
// EN / FR / DE / ES — extend here to add a language.
|
||||
const KEYWORDS = [
|
||||
@@ -79,10 +74,6 @@ function matchesAny(haystack: string, needles: string[]): boolean {
|
||||
return needles.some((n) => haystack.includes(n));
|
||||
}
|
||||
|
||||
function hasKeyword(haystack: string): boolean {
|
||||
return KEYWORDS.some((kw) => haystack.includes(kw));
|
||||
}
|
||||
|
||||
function linkScore(href: string, text: string): number {
|
||||
const h = normalize(href);
|
||||
const t = normalize(text);
|
||||
@@ -101,7 +92,7 @@ function stripNegatives(text: string): string {
|
||||
|
||||
export function detectConfirmation(
|
||||
input: DetectConfirmationInput,
|
||||
): ConfirmationResult | null {
|
||||
): string[] | null {
|
||||
const candidates = input.links
|
||||
.filter((l) => isHttp(l.href))
|
||||
.map((l) => ({ href: l.href.trim(), score: linkScore(l.href, l.text) }))
|
||||
@@ -113,12 +104,11 @@ export function detectConfirmation(
|
||||
const subject = stripNegatives(normalize(input.subject));
|
||||
const text = stripNegatives(normalize(input.text));
|
||||
|
||||
const subjectScore = hasKeyword(subject) ? 2 : 0;
|
||||
const bodyScore = hasKeyword(text) ? 1 : 0;
|
||||
const subjectScore = matchesAny(subject, KEYWORDS) ? 2 : 0;
|
||||
const bodyScore = matchesAny(text, KEYWORDS) ? 1 : 0;
|
||||
const bestLinkScore = candidates[0].score;
|
||||
|
||||
const score = subjectScore + bodyScore + bestLinkScore;
|
||||
if (score < THRESHOLD) return null;
|
||||
if (subjectScore + bodyScore + bestLinkScore < THRESHOLD) return null;
|
||||
|
||||
return { score, links: candidates.slice(0, 3).map((c) => c.href) };
|
||||
return candidates.slice(0, 3).map((c) => c.href);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user