fix(confirmation): detect confirm emails whose CTA hint is in the link text

Weak subscribe/subscription signals are now matched on the link href OR its
visible text (matched once, not additively), so a double opt-in email whose
button reads "Yes, subscribe me…" over an opaque tracking-redirect href is no
longer missed. Adds a regression test with anonymized fixture data.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Julien Herr
2026-05-25 22:36:16 +02:00
parent b6b160a186
commit bb9fce72ff
3 changed files with 34 additions and 5 deletions
+7
View File
@@ -12,6 +12,13 @@ verbatim as the GitHub Release notes — so what you write here is what ships.
## [Unreleased]
### Fixed
- Subscription-confirmation detection now recognises a confirm email whose CTA
button carries the subscribe/subscription hint only in its visible text (e.g.
"Yes, subscribe me to this mailing list.") over an opaque tracking-redirect
href — previously the link scored zero and the email was missed.
## [0.3.1] - 2026-05-25
### Fixed
+19
View File
@@ -159,6 +159,25 @@ describe("detectConfirmation", () => {
expect(result![0]).toBe("https://news.example.com/subscribe/abc123");
});
it("detects a confirm email whose CTA link carries the weak signal only in its text (opaque tracking href)", () => {
// Real-world Mailchimp double opt-in: the subject/body clearly confirm, but
// the button's href is an opaque base64 tracking redirect (no signal) and its
// visible text — "Yes, subscribe me…" — is only a weak signal. The link must
// still qualify as a candidate so the email is flagged.
const result = detectConfirmation({
subject: "Action Required | Please Confirm Your Subscription",
text: "Please confirm your mailing list subscription (double opt-in) by clicking the button below. You won't be subscribed if you don't click the confirmation link above.",
links: [
{
href: "https://click.example.com/track/click/00000000/list.example.com?p=eyJzIjoiQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUEiLCJ2",
text: "Yes, subscribe me to this mailing list.",
},
],
});
expect(result).not.toBeNull();
expect(result![0]).toContain("click.example.com");
});
it("dedupes a confirmation link repeated in the body", () => {
const result = detectConfirmation({
subject: "Confirm your subscription",
+8 -5
View File
@@ -46,10 +46,12 @@ const STRONG_LINK_SIGNALS = [
"activation",
];
// Weak URL signals: ambiguous subscribe/subscription words that also appear in
// ordinary "manage subscription" footers. Worth only +1 so they cannot, on their
// own (with a stray body keyword), cross the threshold and cry wolf — but still
// let a genuine "confirm your subscription" subject + a bare /subscribe link pass.
// Weak signals: ambiguous subscribe/subscription words that also appear in
// ordinary "manage subscription" footers. Matched on the link href OR its visible
// text (a CTA button often reads "Yes, subscribe me…" over an opaque tracking
// redirect). Worth only +1 — and only once, never href+text additively — so they
// cannot, on their own (with a stray body keyword), cross the threshold and cry
// wolf, yet still let a genuine "confirm your subscription" email pass.
const WEAK_LINK_SIGNALS = ["subscription", "subscribe"];
// Negative patterns: a link matching any of these is NEVER a candidate, and these
@@ -85,7 +87,8 @@ function linkScore(href: string, text: string): number {
if (matchesAny(h, NEGATIVE) || matchesAny(t, NEGATIVE)) return 0;
let score = 0;
if (matchesAny(h, STRONG_LINK_SIGNALS)) score += 2;
else if (matchesAny(h, WEAK_LINK_SIGNALS)) score += 1;
else if (matchesAny(h, WEAK_LINK_SIGNALS) || matchesAny(t, WEAK_LINK_SIGNALS))
score += 1;
if (matchesAny(t, KEYWORDS)) score += 2;
return score;
}