feat(app): detect native feeds during email ingestion

Wire extractFeedLinks + detectNativeFeeds into storeEmail so that RSS/Atom/JSON
feed <link> tags in the newsletter HTML are detected and stored per-sender on the
feed metadata.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Julien Herr
2026-05-25 17:22:05 +02:00
parent ee0e7eef5d
commit 5362d478e3
2 changed files with 72 additions and 6 deletions
+54
View File
@@ -994,3 +994,57 @@ describe("processEmail — unsubscribe capture", () => {
expect(metadata.unsubscribe).toBeUndefined();
});
});
describe("native feed detection on ingest", () => {
let env: ReturnType<typeof createMockEnv>;
beforeEach(async () => {
env = createMockEnv();
await env.EMAIL_STORAGE.put(
`feed:${VALID_FEED_ID}:config`,
JSON.stringify({}),
);
await seedInboundIndex(env, VALID_FEED_ID);
});
it("stores detected native feeds on the feed metadata (TEST A)", async () => {
const result = await processEmail(
makeInput({
from: "news@blog.example.com",
senders: ["news@blog.example.com"],
content:
'<html><head><link rel="alternate" type="application/rss+xml" href="https://blog.example.com/feed.xml"></head><body>hello</body></html>',
}),
env as any,
);
expect(result.ok).toBe(true);
const metadata = (await env.EMAIL_STORAGE.get(
`feed:${VALID_FEED_ID}:metadata`,
"json",
)) as {
nativeFeeds?: Record<string, Array<{ url: string; type: string }>>;
};
expect(Object.values(metadata.nativeFeeds!).flat()).toEqual([
{ url: "https://blog.example.com/feed.xml", type: "rss" },
]);
});
it("does not store nativeFeeds when no feed links are found (TEST B)", async () => {
const result = await processEmail(
makeInput({
content: "<p>no feed here</p>",
}),
env as any,
);
expect(result.ok).toBe(true);
const metadata = (await env.EMAIL_STORAGE.get(
`feed:${VALID_FEED_ID}:metadata`,
"json",
)) as { nativeFeeds?: Record<string, unknown> };
expect(metadata.nativeFeeds).toBeUndefined();
});
});
+18 -6
View File
@@ -6,11 +6,13 @@ import { extractEmailDomain } from "../infrastructure/favicon-fetcher";
import { parseOneClickUnsubscribe } from "../infrastructure/unsubscribe";
import { getAttachmentBucket } from "../infrastructure/attachments";
import {
extractFeedLinks,
extractInlineCids,
extractLinks,
htmlToText,
} from "../infrastructure/html-processor";
import { detectConfirmation } from "../domain/confirmation";
import { detectNativeFeeds } from "../domain/native-feed";
import { attachmentIdsForCleanup } from "./feed-cleanup";
import { FeedRepository } from "../infrastructure/feed-repository";
import { BackgroundScheduler } from "../infrastructure/worker";
@@ -18,6 +20,13 @@ import { Feed } from "../domain/feed.aggregate";
import { logger } from "../infrastructure/logger";
import { FEED_MAX_BYTES } from "../config/constants";
// Best-effort site base for absolutizing a sender's relative feed link.
function iconBase(from: string): string {
const at = from.lastIndexOf("@");
const domain = at >= 0 ? from.slice(at + 1).trim() : "";
return domain ? `https://${domain}` : "";
}
export interface RawAttachment {
filename: string;
contentType: string;
@@ -193,6 +202,10 @@ async function storeEmail(
links: extractLinks(input.content),
});
const nativeFeedList = detectNativeFeeds(
extractFeedLinks(input.content, iconBase(input.from)),
);
const attachmentBucket = getAttachmentBucket(env);
const inlineCids = extractInlineCids(input.content);
const storedAttachments: AttachmentData[] =
@@ -238,13 +251,9 @@ async function storeEmail(
// one-click unsubscribe link, keyed by sender so each newsletter keeps its
// own latest URL (fired when the feed is deleted).
const iconDomain = extractEmailDomain(input.from);
const senderKey = input.senders[0] || iconDomain || input.from;
const unsubUrl = parseOneClickUnsubscribe(input.headers ?? {});
const unsub = unsubUrl
? {
senderKey: input.senders[0] || iconDomain || input.from,
url: unsubUrl,
}
: undefined;
const unsub = unsubUrl ? { senderKey, url: unsubUrl } : undefined;
const maxBytes =
parseInt(env.FEED_MAX_SIZE_BYTES ?? "", 10) || FEED_MAX_BYTES;
@@ -253,6 +262,9 @@ async function storeEmail(
maxBytes,
iconDomain: iconDomain ?? undefined,
unsub,
...(nativeFeedList.length > 0
? { nativeFeeds: { senderKey, feeds: nativeFeedList } }
: {}),
});
const r2Deletions =