diff --git a/CHANGELOG.md b/CHANGELOG.md index b8bfc43..e3df849 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,13 @@ verbatim as the GitHub Release notes — so what you write here is what ships. ## [Unreleased] +### Fixed + +- Subscription-confirmation detection now recognises a confirm email whose CTA + button carries the subscribe/subscription hint only in its visible text (e.g. + "Yes, subscribe me to this mailing list.") over an opaque tracking-redirect + href — previously the link scored zero and the email was missed. + ## [0.3.1] - 2026-05-25 ### Fixed diff --git a/src/domain/confirmation.test.ts b/src/domain/confirmation.test.ts index e6dc7be..c15ea7e 100644 --- a/src/domain/confirmation.test.ts +++ b/src/domain/confirmation.test.ts @@ -159,6 +159,25 @@ describe("detectConfirmation", () => { expect(result![0]).toBe("https://news.example.com/subscribe/abc123"); }); + it("detects a confirm email whose CTA link carries the weak signal only in its text (opaque tracking href)", () => { + // Real-world Mailchimp double opt-in: the subject/body clearly confirm, but + // the button's href is an opaque base64 tracking redirect (no signal) and its + // visible text — "Yes, subscribe me…" — is only a weak signal. The link must + // still qualify as a candidate so the email is flagged. + const result = detectConfirmation({ + subject: "Action Required | Please Confirm Your Subscription", + text: "Please confirm your mailing list subscription (double opt-in) by clicking the button below. You won't be subscribed if you don't click the confirmation link above.", + links: [ + { + href: "https://click.example.com/track/click/00000000/list.example.com?p=eyJzIjoiQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUEiLCJ2", + text: "Yes, subscribe me to this mailing list.", + }, + ], + }); + expect(result).not.toBeNull(); + expect(result![0]).toContain("click.example.com"); + }); + it("dedupes a confirmation link repeated in the body", () => { const result = detectConfirmation({ subject: "Confirm your subscription", diff --git a/src/domain/confirmation.ts b/src/domain/confirmation.ts index c259803..fee84ba 100644 --- a/src/domain/confirmation.ts +++ b/src/domain/confirmation.ts @@ -46,10 +46,12 @@ const STRONG_LINK_SIGNALS = [ "activation", ]; -// Weak URL signals: ambiguous subscribe/subscription words that also appear in -// ordinary "manage subscription" footers. Worth only +1 so they cannot, on their -// own (with a stray body keyword), cross the threshold and cry wolf — but still -// let a genuine "confirm your subscription" subject + a bare /subscribe link pass. +// Weak signals: ambiguous subscribe/subscription words that also appear in +// ordinary "manage subscription" footers. Matched on the link href OR its visible +// text (a CTA button often reads "Yes, subscribe me…" over an opaque tracking +// redirect). Worth only +1 — and only once, never href+text additively — so they +// cannot, on their own (with a stray body keyword), cross the threshold and cry +// wolf, yet still let a genuine "confirm your subscription" email pass. const WEAK_LINK_SIGNALS = ["subscription", "subscribe"]; // Negative patterns: a link matching any of these is NEVER a candidate, and these @@ -85,7 +87,8 @@ function linkScore(href: string, text: string): number { if (matchesAny(h, NEGATIVE) || matchesAny(t, NEGATIVE)) return 0; let score = 0; if (matchesAny(h, STRONG_LINK_SIGNALS)) score += 2; - else if (matchesAny(h, WEAK_LINK_SIGNALS)) score += 1; + else if (matchesAny(h, WEAK_LINK_SIGNALS) || matchesAny(t, WEAK_LINK_SIGNALS)) + score += 1; if (matchesAny(t, KEYWORDS)) score += 2; return score; }