mirror of
https://github.com/juherr/kill-the-news.git
synced 2026-06-20 22:03:48 +00:00
feat(app): detect native feeds during email ingestion
Wire extractFeedLinks + detectNativeFeeds into storeEmail so that RSS/Atom/JSON feed <link> tags in the newsletter HTML are detected and stored per-sender on the feed metadata. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -994,3 +994,57 @@ describe("processEmail — unsubscribe capture", () => {
|
||||
expect(metadata.unsubscribe).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe("native feed detection on ingest", () => {
|
||||
let env: ReturnType<typeof createMockEnv>;
|
||||
|
||||
beforeEach(async () => {
|
||||
env = createMockEnv();
|
||||
await env.EMAIL_STORAGE.put(
|
||||
`feed:${VALID_FEED_ID}:config`,
|
||||
JSON.stringify({}),
|
||||
);
|
||||
await seedInboundIndex(env, VALID_FEED_ID);
|
||||
});
|
||||
|
||||
it("stores detected native feeds on the feed metadata (TEST A)", async () => {
|
||||
const result = await processEmail(
|
||||
makeInput({
|
||||
from: "news@blog.example.com",
|
||||
senders: ["news@blog.example.com"],
|
||||
content:
|
||||
'<html><head><link rel="alternate" type="application/rss+xml" href="https://blog.example.com/feed.xml"></head><body>hello</body></html>',
|
||||
}),
|
||||
env as any,
|
||||
);
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
|
||||
const metadata = (await env.EMAIL_STORAGE.get(
|
||||
`feed:${VALID_FEED_ID}:metadata`,
|
||||
"json",
|
||||
)) as {
|
||||
nativeFeeds?: Record<string, Array<{ url: string; type: string }>>;
|
||||
};
|
||||
expect(Object.values(metadata.nativeFeeds!).flat()).toEqual([
|
||||
{ url: "https://blog.example.com/feed.xml", type: "rss" },
|
||||
]);
|
||||
});
|
||||
|
||||
it("does not store nativeFeeds when no feed links are found (TEST B)", async () => {
|
||||
const result = await processEmail(
|
||||
makeInput({
|
||||
content: "<p>no feed here</p>",
|
||||
}),
|
||||
env as any,
|
||||
);
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
|
||||
const metadata = (await env.EMAIL_STORAGE.get(
|
||||
`feed:${VALID_FEED_ID}:metadata`,
|
||||
"json",
|
||||
)) as { nativeFeeds?: Record<string, unknown> };
|
||||
expect(metadata.nativeFeeds).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -6,11 +6,13 @@ import { extractEmailDomain } from "../infrastructure/favicon-fetcher";
|
||||
import { parseOneClickUnsubscribe } from "../infrastructure/unsubscribe";
|
||||
import { getAttachmentBucket } from "../infrastructure/attachments";
|
||||
import {
|
||||
extractFeedLinks,
|
||||
extractInlineCids,
|
||||
extractLinks,
|
||||
htmlToText,
|
||||
} from "../infrastructure/html-processor";
|
||||
import { detectConfirmation } from "../domain/confirmation";
|
||||
import { detectNativeFeeds } from "../domain/native-feed";
|
||||
import { attachmentIdsForCleanup } from "./feed-cleanup";
|
||||
import { FeedRepository } from "../infrastructure/feed-repository";
|
||||
import { BackgroundScheduler } from "../infrastructure/worker";
|
||||
@@ -18,6 +20,13 @@ import { Feed } from "../domain/feed.aggregate";
|
||||
import { logger } from "../infrastructure/logger";
|
||||
import { FEED_MAX_BYTES } from "../config/constants";
|
||||
|
||||
// Best-effort site base for absolutizing a sender's relative feed link.
|
||||
function iconBase(from: string): string {
|
||||
const at = from.lastIndexOf("@");
|
||||
const domain = at >= 0 ? from.slice(at + 1).trim() : "";
|
||||
return domain ? `https://${domain}` : "";
|
||||
}
|
||||
|
||||
export interface RawAttachment {
|
||||
filename: string;
|
||||
contentType: string;
|
||||
@@ -193,6 +202,10 @@ async function storeEmail(
|
||||
links: extractLinks(input.content),
|
||||
});
|
||||
|
||||
const nativeFeedList = detectNativeFeeds(
|
||||
extractFeedLinks(input.content, iconBase(input.from)),
|
||||
);
|
||||
|
||||
const attachmentBucket = getAttachmentBucket(env);
|
||||
const inlineCids = extractInlineCids(input.content);
|
||||
const storedAttachments: AttachmentData[] =
|
||||
@@ -238,13 +251,9 @@ async function storeEmail(
|
||||
// one-click unsubscribe link, keyed by sender so each newsletter keeps its
|
||||
// own latest URL (fired when the feed is deleted).
|
||||
const iconDomain = extractEmailDomain(input.from);
|
||||
const senderKey = input.senders[0] || iconDomain || input.from;
|
||||
const unsubUrl = parseOneClickUnsubscribe(input.headers ?? {});
|
||||
const unsub = unsubUrl
|
||||
? {
|
||||
senderKey: input.senders[0] || iconDomain || input.from,
|
||||
url: unsubUrl,
|
||||
}
|
||||
: undefined;
|
||||
const unsub = unsubUrl ? { senderKey, url: unsubUrl } : undefined;
|
||||
|
||||
const maxBytes =
|
||||
parseInt(env.FEED_MAX_SIZE_BYTES ?? "", 10) || FEED_MAX_BYTES;
|
||||
@@ -253,6 +262,9 @@ async function storeEmail(
|
||||
maxBytes,
|
||||
iconDomain: iconDomain ?? undefined,
|
||||
unsub,
|
||||
...(nativeFeedList.length > 0
|
||||
? { nativeFeeds: { senderKey, feeds: nativeFeedList } }
|
||||
: {}),
|
||||
});
|
||||
|
||||
const r2Deletions =
|
||||
|
||||
Reference in New Issue
Block a user