From bb9fce72ff33ce0d06dfa69de4a594216bfd088a Mon Sep 17 00:00:00 2001 From: Julien Herr Date: Mon, 25 May 2026 22:36:16 +0200 Subject: [PATCH] fix(confirmation): detect confirm emails whose CTA hint is in the link text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Weak subscribe/subscription signals are now matched on the link href OR its visible text (matched once, not additively), so a double opt-in email whose button reads "Yes, subscribe me…" over an opaque tracking-redirect href is no longer missed. Adds a regression test with anonymized fixture data. Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 7 +++++++ src/domain/confirmation.test.ts | 19 +++++++++++++++++++ src/domain/confirmation.ts | 13 ++++++++----- 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b8bfc43..e3df849 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,13 @@ verbatim as the GitHub Release notes — so what you write here is what ships. ## [Unreleased] +### Fixed + +- Subscription-confirmation detection now recognises a confirm email whose CTA + button carries the subscribe/subscription hint only in its visible text (e.g. + "Yes, subscribe me to this mailing list.") over an opaque tracking-redirect + href — previously the link scored zero and the email was missed. + ## [0.3.1] - 2026-05-25 ### Fixed diff --git a/src/domain/confirmation.test.ts b/src/domain/confirmation.test.ts index e6dc7be..c15ea7e 100644 --- a/src/domain/confirmation.test.ts +++ b/src/domain/confirmation.test.ts @@ -159,6 +159,25 @@ describe("detectConfirmation", () => { expect(result![0]).toBe("https://news.example.com/subscribe/abc123"); }); + it("detects a confirm email whose CTA link carries the weak signal only in its text (opaque tracking href)", () => { + // Real-world Mailchimp double opt-in: the subject/body clearly confirm, but + // the button's href is an opaque base64 tracking redirect (no signal) and its + // visible text — "Yes, subscribe me…" — is only a weak signal. The link must + // still qualify as a candidate so the email is flagged. + const result = detectConfirmation({ + subject: "Action Required | Please Confirm Your Subscription", + text: "Please confirm your mailing list subscription (double opt-in) by clicking the button below. You won't be subscribed if you don't click the confirmation link above.", + links: [ + { + href: "https://click.example.com/track/click/00000000/list.example.com?p=eyJzIjoiQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUEiLCJ2", + text: "Yes, subscribe me to this mailing list.", + }, + ], + }); + expect(result).not.toBeNull(); + expect(result![0]).toContain("click.example.com"); + }); + it("dedupes a confirmation link repeated in the body", () => { const result = detectConfirmation({ subject: "Confirm your subscription", diff --git a/src/domain/confirmation.ts b/src/domain/confirmation.ts index c259803..fee84ba 100644 --- a/src/domain/confirmation.ts +++ b/src/domain/confirmation.ts @@ -46,10 +46,12 @@ const STRONG_LINK_SIGNALS = [ "activation", ]; -// Weak URL signals: ambiguous subscribe/subscription words that also appear in -// ordinary "manage subscription" footers. Worth only +1 so they cannot, on their -// own (with a stray body keyword), cross the threshold and cry wolf — but still -// let a genuine "confirm your subscription" subject + a bare /subscribe link pass. +// Weak signals: ambiguous subscribe/subscription words that also appear in +// ordinary "manage subscription" footers. Matched on the link href OR its visible +// text (a CTA button often reads "Yes, subscribe me…" over an opaque tracking +// redirect). Worth only +1 — and only once, never href+text additively — so they +// cannot, on their own (with a stray body keyword), cross the threshold and cry +// wolf, yet still let a genuine "confirm your subscription" email pass. const WEAK_LINK_SIGNALS = ["subscription", "subscribe"]; // Negative patterns: a link matching any of these is NEVER a candidate, and these @@ -85,7 +87,8 @@ function linkScore(href: string, text: string): number { if (matchesAny(h, NEGATIVE) || matchesAny(t, NEGATIVE)) return 0; let score = 0; if (matchesAny(h, STRONG_LINK_SIGNALS)) score += 2; - else if (matchesAny(h, WEAK_LINK_SIGNALS)) score += 1; + else if (matchesAny(h, WEAK_LINK_SIGNALS) || matchesAny(t, WEAK_LINK_SIGNALS)) + score += 1; if (matchesAny(t, KEYWORDS)) score += 2; return score; }