diff --git a/src/domain/confirmation.test.ts b/src/domain/confirmation.test.ts index 5d8a916..e6dc7be 100644 --- a/src/domain/confirmation.test.ts +++ b/src/domain/confirmation.test.ts @@ -95,4 +95,79 @@ describe("detectConfirmation", () => { }); expect(result).toBeNull(); }); + + // ── False-positive guards: ordinary newsletters must NOT be flagged ────────── + // A "manage subscription" footer link is only a weak signal (+1), so a stray + // body keyword (active/valid) cannot push it over the threshold. + + it("does not flag a newsletter with a manage-subscription footer + 'active' in body", () => { + const result = detectConfirmation({ + subject: "This week in tech", + text: "Thanks to our most active community members for the great discussion.", + links: [ + { href: "https://news.example.com/article/42", text: "Read more" }, + { + href: "https://news.example.com/account/subscription", + text: "Manage your subscription", + }, + ], + }); + expect(result).toBeNull(); + }); + + it("does not flag a newsletter with a subscription-preferences link + 'valid' in body", () => { + const result = detectConfirmation({ + subject: "Weekend deals are here", + text: "These offers are valid until Friday — don't miss out.", + links: [ + { + href: "https://shop.example.com/subscription/preferences", + text: "Subscription preferences", + }, + ], + }); + expect(result).toBeNull(); + }); + + it("does not flag a marketing 'Subscribe & save' CTA + 'activate' in body", () => { + const result = detectConfirmation({ + subject: "Your weekly digest", + text: "Activate your free trial and start saving today.", + links: [ + { + href: "https://shop.example.com/subscribe", + text: "Subscribe & save", + }, + ], + }); + expect(result).toBeNull(); + }); + + // ── Recall: a genuine confirmation still passes via the weak signal ────────── + it("detects a genuine confirm-subscription email whose only link is a bare /subscribe", () => { + const result = detectConfirmation({ + subject: "Please confirm your subscription", + text: "Tap the button to finish signing up.", + links: [ + { + href: "https://news.example.com/subscribe/abc123", + text: "Subscribe", + }, + ], + }); + expect(result).not.toBeNull(); + expect(result![0]).toBe("https://news.example.com/subscribe/abc123"); + }); + + it("dedupes a confirmation link repeated in the body", () => { + const result = detectConfirmation({ + subject: "Confirm your subscription", + text: "verify your address", + links: [ + { href: "https://x.example/confirm?token=1", text: "Confirm" }, + { href: "https://x.example/confirm?token=1", text: "Confirm here" }, + ], + }); + expect(result).toEqual(["https://x.example/confirm?token=1"]); + }); }); diff --git a/src/domain/confirmation.ts b/src/domain/confirmation.ts index 79d1975..c259803 100644 --- a/src/domain/confirmation.ts +++ b/src/domain/confirmation.ts @@ -29,8 +29,9 @@ const KEYWORDS = [ "optin", ]; -// Link URL/anchor signals (normalized). A link matching any → candidate. -const LINK_SIGNALS = [ +// Strong URL signals: an unambiguous confirm/verify/activate action or a token. +// A link URL matching any scores +2. +const STRONG_LINK_SIGNALS = [ "confirm", "verif", "activ", @@ -40,13 +41,17 @@ const LINK_SIGNALS = [ "optin", "opt-in", "double-optin", - "subscription", - "subscribe", "token=", "confirm=", "activation", ]; +// Weak URL signals: ambiguous subscribe/subscription words that also appear in +// ordinary "manage subscription" footers. Worth only +1 so they cannot, on their +// own (with a stray body keyword), cross the threshold and cry wolf — but still +// let a genuine "confirm your subscription" subject + a bare /subscribe link pass. +const WEAK_LINK_SIGNALS = ["subscription", "subscribe"]; + // Negative patterns: a link matching any of these is NEVER a candidate, and these // tokens are stripped from text before keyword scanning (kills the unsubscribe // false positive — "unsubscribe" contains "subscribe"). @@ -79,7 +84,8 @@ function linkScore(href: string, text: string): number { const t = normalize(text); if (matchesAny(h, NEGATIVE) || matchesAny(t, NEGATIVE)) return 0; let score = 0; - if (matchesAny(h, LINK_SIGNALS)) score += 2; + if (matchesAny(h, STRONG_LINK_SIGNALS)) score += 2; + else if (matchesAny(h, WEAK_LINK_SIGNALS)) score += 1; if (matchesAny(t, KEYWORDS)) score += 2; return score; } @@ -110,5 +116,7 @@ export function detectConfirmation( if (subjectScore + bodyScore + bestLinkScore < THRESHOLD) return null; - return candidates.slice(0, 3).map((c) => c.href); + // Dedupe by href before capping, so a link repeated in the body never wastes + // one of the three surfaced slots. + return [...new Set(candidates.map((c) => c.href))].slice(0, 3); }