mirror of
https://github.com/juherr/kill-the-news.git
synced 2026-06-20 22:03:48 +00:00
fix(confirmation): flag code-based OTP signups with no clickable link
Detect verification-code signups (e.g. "your verification code is 371404") whose only link is a mailto. These cleared the keyword threshold but were dropped because the detector required an http(s) candidate link. A code path now raises the flag/badge/banner when a verification keyword sits next to an OTP-style code; the code is never extracted or surfaced. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -21,6 +21,12 @@ verbatim as the GitHub Release notes — so what you write here is what ships.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Subscription-confirmation detection now flags code-based signup verifications
|
||||
(OTP) that have no link to click — e.g. "Your verification code is 371404",
|
||||
whose only link is a `mailto:` support address. These cleared the keyword
|
||||
threshold but were dropped because the detector required an http(s) candidate
|
||||
link. A code path now raises the flag/badge/banner when a verification keyword
|
||||
sits next to an OTP-style code; the code itself is never extracted or surfaced.
|
||||
- Subscription-confirmation detection now recognizes localized "subscribe" CTAs.
|
||||
The weak link-signal vocabulary was English-only (`subscrib`),
|
||||
so a genuine double opt-in whose confirm button reads "Je m'inscris…" over an
|
||||
|
||||
@@ -236,7 +236,9 @@ async function storeEmail(
|
||||
...(inlineIds.length > 0 ? { inlineAttachmentIds: inlineIds } : {}),
|
||||
...(messageId ? { messageId } : {}),
|
||||
dedupHash,
|
||||
...(confirmationLinks
|
||||
// null = not a confirmation; [] = a code-based confirmation (flag it, no
|
||||
// link to surface). Both an empty and a populated array mean "detected".
|
||||
...(confirmationLinks !== null
|
||||
? { confirmation: { links: confirmationLinks } }
|
||||
: {}),
|
||||
};
|
||||
|
||||
@@ -199,6 +199,54 @@ describe("detectConfirmation", () => {
|
||||
expect(result![0]).toContain("proc.php");
|
||||
});
|
||||
|
||||
// ── Code-based signup confirmations (OTP) with no clickable link ─────────────
|
||||
// Some signups send a verification *code* to enter manually — there is nothing
|
||||
// to click. We still flag these (empty links: detected but no actionable link),
|
||||
// but never extract or surface the code itself.
|
||||
|
||||
it("flags an OTP signup email whose only link is a mailto", () => {
|
||||
const result = detectConfirmation({
|
||||
subject: "❄️ Ton code de vérification est 371404",
|
||||
text: "Salut ! Entre le code de vérification ci-dessous lorsqu'il te sera demandé : 371404. Tu n'as rien demandé ?",
|
||||
links: [
|
||||
{
|
||||
href: "mailto:hey@example.com?subject=Acc%C3%A8s+frauduleux",
|
||||
text: "contacter le support",
|
||||
},
|
||||
],
|
||||
});
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it("flags a code email via a body keyword + code pattern when there are no links", () => {
|
||||
const result = detectConfirmation({
|
||||
subject: "Welcome to Acme",
|
||||
text: "Your verification code is 246810. Enter it to finish signing up.",
|
||||
links: [],
|
||||
});
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it("does not flag a transactional email with a big number but no code-near-code-word", () => {
|
||||
const result = detectConfirmation({
|
||||
subject: "Order confirmed",
|
||||
text: "Your order 12345678 ships Monday.",
|
||||
links: [
|
||||
{ href: "https://shop.example.com/track/12345678", text: "Track" },
|
||||
],
|
||||
});
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it("does not flag a newsletter with numbers but no verification keyword", () => {
|
||||
const result = detectConfirmation({
|
||||
subject: "Your 2026 wrapped: 4567 minutes listened",
|
||||
text: "Here is your year in review with code 9999 highlights.",
|
||||
links: [{ href: "https://music.example.com/wrapped", text: "See more" }],
|
||||
});
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it("dedupes a confirmation link repeated in the body", () => {
|
||||
const result = detectConfirmation({
|
||||
subject: "Confirm your subscription",
|
||||
|
||||
@@ -5,8 +5,11 @@
|
||||
* the link-signal patterns, the scoring weights and the threshold.
|
||||
*
|
||||
* Returns the ranked candidate confirmation links (top 3) when the combined score
|
||||
* clears the threshold AND at least one candidate link exists; otherwise null.
|
||||
* Only http(s) links are ever considered or returned.
|
||||
* clears the threshold AND at least one candidate link exists. When the email is a
|
||||
* code-based signup verification (a verification keyword next to an OTP-style code,
|
||||
* with no clickable link — e.g. "your verification code is 371404") it returns an
|
||||
* empty array: detected, but nothing to click. Returns null when not a confirmation.
|
||||
* Only http(s) links are ever considered or returned; the code is never extracted.
|
||||
*/
|
||||
|
||||
export interface DetectConfirmationInput {
|
||||
@@ -76,6 +79,21 @@ const NEGATIVE = [
|
||||
|
||||
const THRESHOLD = 3;
|
||||
|
||||
// A verification code (OTP) sitting next to a code-ish word, in either order and
|
||||
// within a short window — "your verification code is 371404" / "371404 is your
|
||||
// code". This is the signup-by-code case that has no link to click. Run on the
|
||||
// already-normalized (lowercased, diacritics-stripped) subject/body. We only test
|
||||
// for presence to raise the flag; the code value is never captured or surfaced.
|
||||
const CODE_WORDS = "code|codigo|otp|verif";
|
||||
const CODE_PROXIMITY = 48;
|
||||
const CODE_PATTERN = new RegExp(
|
||||
`(?:${CODE_WORDS})[\\s\\S]{0,${CODE_PROXIMITY}}?\\b\\d{4,8}\\b|\\b\\d{4,8}\\b[\\s\\S]{0,${CODE_PROXIMITY}}?(?:${CODE_WORDS})`,
|
||||
);
|
||||
|
||||
function hasVerificationCode(text: string): boolean {
|
||||
return CODE_PATTERN.test(text);
|
||||
}
|
||||
|
||||
function normalize(s: string): string {
|
||||
return s.normalize("NFD").replace(/[̀-ͯ]/g, "").toLowerCase();
|
||||
}
|
||||
@@ -115,18 +133,32 @@ export function detectConfirmation(
|
||||
.filter((l) => l.score > 0)
|
||||
.sort((a, b) => b.score - a.score);
|
||||
|
||||
if (candidates.length === 0) return null;
|
||||
|
||||
const subject = stripNegatives(normalize(input.subject));
|
||||
const text = stripNegatives(normalize(input.text));
|
||||
|
||||
const subjectScore = matchesAny(subject, KEYWORDS) ? 2 : 0;
|
||||
const bodyScore = matchesAny(text, KEYWORDS) ? 1 : 0;
|
||||
const bestLinkScore = candidates[0].score;
|
||||
|
||||
if (subjectScore + bodyScore + bestLinkScore < THRESHOLD) return null;
|
||||
// Link path: a clickable confirm/verify/subscribe link clears the threshold.
|
||||
if (candidates.length > 0) {
|
||||
const bestLinkScore = candidates[0].score;
|
||||
if (subjectScore + bodyScore + bestLinkScore >= THRESHOLD) {
|
||||
// Dedupe by href before capping, so a link repeated in the body never
|
||||
// wastes one of the three surfaced slots.
|
||||
return [...new Set(candidates.map((c) => c.href))].slice(0, 3);
|
||||
}
|
||||
}
|
||||
|
||||
// Dedupe by href before capping, so a link repeated in the body never wastes
|
||||
// one of the three surfaced slots.
|
||||
return [...new Set(candidates.map((c) => c.href))].slice(0, 3);
|
||||
// Code path: an OTP-style signup verification with no link to click. Requires
|
||||
// both a verification keyword (subject or body) and a code-near-code-word
|
||||
// pattern, so a stray number or a lone keyword cannot cry wolf. Flag it with
|
||||
// an empty link list — detected, but nothing actionable to surface.
|
||||
if (
|
||||
(subjectScore > 0 || bodyScore > 0) &&
|
||||
(hasVerificationCode(subject) || hasVerificationCode(text))
|
||||
) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user