mirror of
https://github.com/juherr/kill-the-news.git
synced 2026-06-20 22:03:48 +00:00
fix(confirmation): detect confirm emails whose CTA hint is in the link text
Weak subscribe/subscription signals are now matched on the link href OR its visible text (matched once, not additively), so a double opt-in email whose button reads "Yes, subscribe me…" over an opaque tracking-redirect href is no longer missed. Adds a regression test with anonymized fixture data. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -12,6 +12,13 @@ verbatim as the GitHub Release notes — so what you write here is what ships.
|
|||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- Subscription-confirmation detection now recognises a confirm email whose CTA
|
||||||
|
button carries the subscribe/subscription hint only in its visible text (e.g.
|
||||||
|
"Yes, subscribe me to this mailing list.") over an opaque tracking-redirect
|
||||||
|
href — previously the link scored zero and the email was missed.
|
||||||
|
|
||||||
## [0.3.1] - 2026-05-25
|
## [0.3.1] - 2026-05-25
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
|
|||||||
@@ -159,6 +159,25 @@ describe("detectConfirmation", () => {
|
|||||||
expect(result![0]).toBe("https://news.example.com/subscribe/abc123");
|
expect(result![0]).toBe("https://news.example.com/subscribe/abc123");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("detects a confirm email whose CTA link carries the weak signal only in its text (opaque tracking href)", () => {
|
||||||
|
// Real-world Mailchimp double opt-in: the subject/body clearly confirm, but
|
||||||
|
// the button's href is an opaque base64 tracking redirect (no signal) and its
|
||||||
|
// visible text — "Yes, subscribe me…" — is only a weak signal. The link must
|
||||||
|
// still qualify as a candidate so the email is flagged.
|
||||||
|
const result = detectConfirmation({
|
||||||
|
subject: "Action Required | Please Confirm Your Subscription",
|
||||||
|
text: "Please confirm your mailing list subscription (double opt-in) by clicking the button below. You won't be subscribed if you don't click the confirmation link above.",
|
||||||
|
links: [
|
||||||
|
{
|
||||||
|
href: "https://click.example.com/track/click/00000000/list.example.com?p=eyJzIjoiQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUEiLCJ2",
|
||||||
|
text: "Yes, subscribe me to this mailing list.",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result![0]).toContain("click.example.com");
|
||||||
|
});
|
||||||
|
|
||||||
it("dedupes a confirmation link repeated in the body", () => {
|
it("dedupes a confirmation link repeated in the body", () => {
|
||||||
const result = detectConfirmation({
|
const result = detectConfirmation({
|
||||||
subject: "Confirm your subscription",
|
subject: "Confirm your subscription",
|
||||||
|
|||||||
@@ -46,10 +46,12 @@ const STRONG_LINK_SIGNALS = [
|
|||||||
"activation",
|
"activation",
|
||||||
];
|
];
|
||||||
|
|
||||||
// Weak URL signals: ambiguous subscribe/subscription words that also appear in
|
// Weak signals: ambiguous subscribe/subscription words that also appear in
|
||||||
// ordinary "manage subscription" footers. Worth only +1 so they cannot, on their
|
// ordinary "manage subscription" footers. Matched on the link href OR its visible
|
||||||
// own (with a stray body keyword), cross the threshold and cry wolf — but still
|
// text (a CTA button often reads "Yes, subscribe me…" over an opaque tracking
|
||||||
// let a genuine "confirm your subscription" subject + a bare /subscribe link pass.
|
// redirect). Worth only +1 — and only once, never href+text additively — so they
|
||||||
|
// cannot, on their own (with a stray body keyword), cross the threshold and cry
|
||||||
|
// wolf, yet still let a genuine "confirm your subscription" email pass.
|
||||||
const WEAK_LINK_SIGNALS = ["subscription", "subscribe"];
|
const WEAK_LINK_SIGNALS = ["subscription", "subscribe"];
|
||||||
|
|
||||||
// Negative patterns: a link matching any of these is NEVER a candidate, and these
|
// Negative patterns: a link matching any of these is NEVER a candidate, and these
|
||||||
@@ -85,7 +87,8 @@ function linkScore(href: string, text: string): number {
|
|||||||
if (matchesAny(h, NEGATIVE) || matchesAny(t, NEGATIVE)) return 0;
|
if (matchesAny(h, NEGATIVE) || matchesAny(t, NEGATIVE)) return 0;
|
||||||
let score = 0;
|
let score = 0;
|
||||||
if (matchesAny(h, STRONG_LINK_SIGNALS)) score += 2;
|
if (matchesAny(h, STRONG_LINK_SIGNALS)) score += 2;
|
||||||
else if (matchesAny(h, WEAK_LINK_SIGNALS)) score += 1;
|
else if (matchesAny(h, WEAK_LINK_SIGNALS) || matchesAny(t, WEAK_LINK_SIGNALS))
|
||||||
|
score += 1;
|
||||||
if (matchesAny(t, KEYWORDS)) score += 2;
|
if (matchesAny(t, KEYWORDS)) score += 2;
|
||||||
return score;
|
return score;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user