mirror of
https://github.com/juherr/kill-the-news.git
synced 2026-06-20 22:03:48 +00:00
refactor(domain): slim detectConfirmation contract
Return the ranked links directly (string[] | null) instead of an unused
{score, links} wrapper, and drop the redundant hasKeyword helper in favor
of matchesAny(_, KEYWORDS). No behavior change.
This commit is contained in:
@@ -187,7 +187,7 @@ async function storeEmail(
|
|||||||
return false; // signal: skipped (not stored)
|
return false; // signal: skipped (not stored)
|
||||||
}
|
}
|
||||||
|
|
||||||
const confirmation = detectConfirmation({
|
const confirmationLinks = detectConfirmation({
|
||||||
subject: input.subject,
|
subject: input.subject,
|
||||||
text: htmlToText(input.content),
|
text: htmlToText(input.content),
|
||||||
links: extractLinks(input.content),
|
links: extractLinks(input.content),
|
||||||
@@ -229,7 +229,9 @@ async function storeEmail(
|
|||||||
...(inlineIds.length > 0 ? { inlineAttachmentIds: inlineIds } : {}),
|
...(inlineIds.length > 0 ? { inlineAttachmentIds: inlineIds } : {}),
|
||||||
...(messageId ? { messageId } : {}),
|
...(messageId ? { messageId } : {}),
|
||||||
dedupHash,
|
dedupHash,
|
||||||
...(confirmation ? { confirmation: { links: confirmation.links } } : {}),
|
...(confirmationLinks
|
||||||
|
? { confirmation: { links: confirmationLinks } }
|
||||||
|
: {}),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Track the latest sender's domain (feed icon) and capture the RFC 8058
|
// Track the latest sender's domain (feed icon) and capture the RFC 8058
|
||||||
|
|||||||
@@ -15,9 +15,7 @@ describe("detectConfirmation", () => {
|
|||||||
],
|
],
|
||||||
});
|
});
|
||||||
expect(result).not.toBeNull();
|
expect(result).not.toBeNull();
|
||||||
expect(result!.links[0]).toBe(
|
expect(result![0]).toBe("https://news.example.com/confirm?token=abc123");
|
||||||
"https://news.example.com/confirm?token=abc123",
|
|
||||||
);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it("detects a French confirmation email (accent-insensitive)", () => {
|
it("detects a French confirmation email (accent-insensitive)", () => {
|
||||||
@@ -32,7 +30,7 @@ describe("detectConfirmation", () => {
|
|||||||
],
|
],
|
||||||
});
|
});
|
||||||
expect(result).not.toBeNull();
|
expect(result).not.toBeNull();
|
||||||
expect(result!.links[0]).toBe("https://lettre.example.fr/valider/xyz");
|
expect(result![0]).toBe("https://lettre.example.fr/valider/xyz");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("returns null for a normal newsletter with only an unsubscribe link", () => {
|
it("returns null for a normal newsletter with only an unsubscribe link", () => {
|
||||||
@@ -69,7 +67,7 @@ describe("detectConfirmation", () => {
|
|||||||
],
|
],
|
||||||
});
|
});
|
||||||
expect(result).not.toBeNull();
|
expect(result).not.toBeNull();
|
||||||
expect(result!.links).not.toContain("https://x.example/unsubscribe");
|
expect(result!).not.toContain("https://x.example/unsubscribe");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("ranks the strongest candidate first and caps at three links", () => {
|
it("ranks the strongest candidate first and caps at three links", () => {
|
||||||
@@ -85,8 +83,8 @@ describe("detectConfirmation", () => {
|
|||||||
],
|
],
|
||||||
});
|
});
|
||||||
expect(result).not.toBeNull();
|
expect(result).not.toBeNull();
|
||||||
expect(result!.links.length).toBeLessThanOrEqual(3);
|
expect(result!.length).toBeLessThanOrEqual(3);
|
||||||
expect(result!.links[0]).toBe("https://x.example/confirm?token=2");
|
expect(result![0]).toBe("https://x.example/confirm?token=2");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("ignores non-http(s) links", () => {
|
it("ignores non-http(s) links", () => {
|
||||||
|
|||||||
@@ -15,11 +15,6 @@ export interface DetectConfirmationInput {
|
|||||||
links: { href: string; text: string }[];
|
links: { href: string; text: string }[];
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ConfirmationResult {
|
|
||||||
score: number;
|
|
||||||
links: string[];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Confirmation-positive stems, already normalized (lowercased, diacritics stripped).
|
// Confirmation-positive stems, already normalized (lowercased, diacritics stripped).
|
||||||
// EN / FR / DE / ES — extend here to add a language.
|
// EN / FR / DE / ES — extend here to add a language.
|
||||||
const KEYWORDS = [
|
const KEYWORDS = [
|
||||||
@@ -79,10 +74,6 @@ function matchesAny(haystack: string, needles: string[]): boolean {
|
|||||||
return needles.some((n) => haystack.includes(n));
|
return needles.some((n) => haystack.includes(n));
|
||||||
}
|
}
|
||||||
|
|
||||||
function hasKeyword(haystack: string): boolean {
|
|
||||||
return KEYWORDS.some((kw) => haystack.includes(kw));
|
|
||||||
}
|
|
||||||
|
|
||||||
function linkScore(href: string, text: string): number {
|
function linkScore(href: string, text: string): number {
|
||||||
const h = normalize(href);
|
const h = normalize(href);
|
||||||
const t = normalize(text);
|
const t = normalize(text);
|
||||||
@@ -101,7 +92,7 @@ function stripNegatives(text: string): string {
|
|||||||
|
|
||||||
export function detectConfirmation(
|
export function detectConfirmation(
|
||||||
input: DetectConfirmationInput,
|
input: DetectConfirmationInput,
|
||||||
): ConfirmationResult | null {
|
): string[] | null {
|
||||||
const candidates = input.links
|
const candidates = input.links
|
||||||
.filter((l) => isHttp(l.href))
|
.filter((l) => isHttp(l.href))
|
||||||
.map((l) => ({ href: l.href.trim(), score: linkScore(l.href, l.text) }))
|
.map((l) => ({ href: l.href.trim(), score: linkScore(l.href, l.text) }))
|
||||||
@@ -113,12 +104,11 @@ export function detectConfirmation(
|
|||||||
const subject = stripNegatives(normalize(input.subject));
|
const subject = stripNegatives(normalize(input.subject));
|
||||||
const text = stripNegatives(normalize(input.text));
|
const text = stripNegatives(normalize(input.text));
|
||||||
|
|
||||||
const subjectScore = hasKeyword(subject) ? 2 : 0;
|
const subjectScore = matchesAny(subject, KEYWORDS) ? 2 : 0;
|
||||||
const bodyScore = hasKeyword(text) ? 1 : 0;
|
const bodyScore = matchesAny(text, KEYWORDS) ? 1 : 0;
|
||||||
const bestLinkScore = candidates[0].score;
|
const bestLinkScore = candidates[0].score;
|
||||||
|
|
||||||
const score = subjectScore + bodyScore + bestLinkScore;
|
if (subjectScore + bodyScore + bestLinkScore < THRESHOLD) return null;
|
||||||
if (score < THRESHOLD) return null;
|
|
||||||
|
|
||||||
return { score, links: candidates.slice(0, 3).map((c) => c.href) };
|
return candidates.slice(0, 3).map((c) => c.href);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user