mirror of
https://github.com/juherr/kill-the-news.git
synced 2026-06-20 22:03:48 +00:00
fix(confirmation): detect confirm emails whose CTA hint is in the link text
Weak subscribe/subscription signals are now matched on the link href OR its visible text (matched once, not additively), so a double opt-in email whose button reads "Yes, subscribe me…" over an opaque tracking-redirect href is no longer missed. Adds a regression test with anonymized fixture data. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -12,6 +12,13 @@ verbatim as the GitHub Release notes — so what you write here is what ships.
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Fixed
|
||||
|
||||
- Subscription-confirmation detection now recognises a confirm email whose CTA
|
||||
button carries the subscribe/subscription hint only in its visible text (e.g.
|
||||
"Yes, subscribe me to this mailing list.") over an opaque tracking-redirect
|
||||
href — previously the link scored zero and the email was missed.
|
||||
|
||||
## [0.3.1] - 2026-05-25
|
||||
|
||||
### Fixed
|
||||
|
||||
@@ -159,6 +159,25 @@ describe("detectConfirmation", () => {
|
||||
expect(result![0]).toBe("https://news.example.com/subscribe/abc123");
|
||||
});
|
||||
|
||||
it("detects a confirm email whose CTA link carries the weak signal only in its text (opaque tracking href)", () => {
|
||||
// Real-world Mailchimp double opt-in: the subject/body clearly confirm, but
|
||||
// the button's href is an opaque base64 tracking redirect (no signal) and its
|
||||
// visible text — "Yes, subscribe me…" — is only a weak signal. The link must
|
||||
// still qualify as a candidate so the email is flagged.
|
||||
const result = detectConfirmation({
|
||||
subject: "Action Required | Please Confirm Your Subscription",
|
||||
text: "Please confirm your mailing list subscription (double opt-in) by clicking the button below. You won't be subscribed if you don't click the confirmation link above.",
|
||||
links: [
|
||||
{
|
||||
href: "https://click.example.com/track/click/00000000/list.example.com?p=eyJzIjoiQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUEiLCJ2",
|
||||
text: "Yes, subscribe me to this mailing list.",
|
||||
},
|
||||
],
|
||||
});
|
||||
expect(result).not.toBeNull();
|
||||
expect(result![0]).toContain("click.example.com");
|
||||
});
|
||||
|
||||
it("dedupes a confirmation link repeated in the body", () => {
|
||||
const result = detectConfirmation({
|
||||
subject: "Confirm your subscription",
|
||||
|
||||
@@ -46,10 +46,12 @@ const STRONG_LINK_SIGNALS = [
|
||||
"activation",
|
||||
];
|
||||
|
||||
// Weak URL signals: ambiguous subscribe/subscription words that also appear in
|
||||
// ordinary "manage subscription" footers. Worth only +1 so they cannot, on their
|
||||
// own (with a stray body keyword), cross the threshold and cry wolf — but still
|
||||
// let a genuine "confirm your subscription" subject + a bare /subscribe link pass.
|
||||
// Weak signals: ambiguous subscribe/subscription words that also appear in
|
||||
// ordinary "manage subscription" footers. Matched on the link href OR its visible
|
||||
// text (a CTA button often reads "Yes, subscribe me…" over an opaque tracking
|
||||
// redirect). Worth only +1 — and only once, never href+text additively — so they
|
||||
// cannot, on their own (with a stray body keyword), cross the threshold and cry
|
||||
// wolf, yet still let a genuine "confirm your subscription" email pass.
|
||||
const WEAK_LINK_SIGNALS = ["subscription", "subscribe"];
|
||||
|
||||
// Negative patterns: a link matching any of these is NEVER a candidate, and these
|
||||
@@ -85,7 +87,8 @@ function linkScore(href: string, text: string): number {
|
||||
if (matchesAny(h, NEGATIVE) || matchesAny(t, NEGATIVE)) return 0;
|
||||
let score = 0;
|
||||
if (matchesAny(h, STRONG_LINK_SIGNALS)) score += 2;
|
||||
else if (matchesAny(h, WEAK_LINK_SIGNALS)) score += 1;
|
||||
else if (matchesAny(h, WEAK_LINK_SIGNALS) || matchesAny(t, WEAK_LINK_SIGNALS))
|
||||
score += 1;
|
||||
if (matchesAny(t, KEYWORDS)) score += 2;
|
||||
return score;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user