mirror of
https://github.com/juherr/kill-the-news.git
synced 2026-06-20 22:03:48 +00:00
feat(domain): add native-feed detector (Atom/RSS/JSON)
This commit is contained in:
@@ -0,0 +1,65 @@
|
|||||||
|
import { describe, it, expect } from "vitest";
|
||||||
|
import { detectNativeFeeds, unionNativeFeeds } from "./native-feed";
|
||||||
|
|
||||||
|
describe("detectNativeFeeds", () => {
|
||||||
|
it("maps the three canonical MIME types to kinds", () => {
|
||||||
|
expect(
|
||||||
|
detectNativeFeeds([
|
||||||
|
{ href: "https://x.com/atom", type: "application/atom+xml" },
|
||||||
|
{ href: "https://x.com/rss", type: "application/rss+xml" },
|
||||||
|
{ href: "https://x.com/json", type: "application/feed+json" },
|
||||||
|
]),
|
||||||
|
).toEqual([
|
||||||
|
{ url: "https://x.com/atom", type: "atom" },
|
||||||
|
{ url: "https://x.com/rss", type: "rss" },
|
||||||
|
{ url: "https://x.com/json", type: "json" },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("ignores unknown MIME types (application/json, text/html)", () => {
|
||||||
|
expect(
|
||||||
|
detectNativeFeeds([
|
||||||
|
{ href: "https://x.com/api", type: "application/json" },
|
||||||
|
{ href: "https://x.com/", type: "text/html" },
|
||||||
|
]),
|
||||||
|
).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("strips MIME parameters and is case-insensitive", () => {
|
||||||
|
expect(
|
||||||
|
detectNativeFeeds([
|
||||||
|
{ href: "https://x.com/f", type: "Application/RSS+XML; charset=utf-8" },
|
||||||
|
]),
|
||||||
|
).toEqual([{ url: "https://x.com/f", type: "rss" }]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("dedupes by URL (first kind wins)", () => {
|
||||||
|
expect(
|
||||||
|
detectNativeFeeds([
|
||||||
|
{ href: "https://x.com/f", type: "application/rss+xml" },
|
||||||
|
{ href: "https://x.com/f", type: "application/atom+xml" },
|
||||||
|
]),
|
||||||
|
).toEqual([{ url: "https://x.com/f", type: "rss" }]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("unionNativeFeeds", () => {
|
||||||
|
it("returns [] for undefined", () => {
|
||||||
|
expect(unionNativeFeeds(undefined)).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("unions across senders, deduping by URL", () => {
|
||||||
|
expect(
|
||||||
|
unionNativeFeeds({
|
||||||
|
"a@x.com": [{ url: "https://x.com/rss", type: "rss" }],
|
||||||
|
"b@y.com": [
|
||||||
|
{ url: "https://x.com/rss", type: "rss" },
|
||||||
|
{ url: "https://y.com/atom", type: "atom" },
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
).toEqual([
|
||||||
|
{ url: "https://x.com/rss", type: "rss" },
|
||||||
|
{ url: "https://y.com/atom", type: "atom" },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,54 @@
|
|||||||
|
/**
|
||||||
|
* Pure detection of a newsletter's own syndication feed. No DOM, no I/O — it
|
||||||
|
* receives already-extracted <link> tuples (infra parses the HTML) and decides
|
||||||
|
* which ones are real feeds. This module owns the business knowledge: the strict
|
||||||
|
* set of recognized feed MIME types.
|
||||||
|
*/
|
||||||
|
import { NativeFeed } from "../types";
|
||||||
|
|
||||||
|
// MIME type → feed kind. Strict: only the three canonical syndication types.
|
||||||
|
// `application/json` is deliberately excluded — too broad, captures non-feeds.
|
||||||
|
const MIME_TO_KIND: Record<string, NativeFeed["type"]> = {
|
||||||
|
"application/atom+xml": "atom",
|
||||||
|
"application/rss+xml": "rss",
|
||||||
|
"application/feed+json": "json",
|
||||||
|
};
|
||||||
|
|
||||||
|
// Drop MIME parameters ("; charset=…"), trim, lowercase.
|
||||||
|
function normalizeMime(type: string): string {
|
||||||
|
return type.split(";")[0].trim().toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Map raw <link> tuples to recognized native feeds, deduped by URL. */
|
||||||
|
export function detectNativeFeeds(
|
||||||
|
links: { href: string; type: string }[],
|
||||||
|
): NativeFeed[] {
|
||||||
|
const out: NativeFeed[] = [];
|
||||||
|
const seen = new Set<string>();
|
||||||
|
for (const link of links) {
|
||||||
|
const kind = MIME_TO_KIND[normalizeMime(link.type)];
|
||||||
|
if (!kind) continue;
|
||||||
|
const url = link.href.trim();
|
||||||
|
if (!url || seen.has(url)) continue;
|
||||||
|
seen.add(url);
|
||||||
|
out.push({ url, type: kind });
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Flatten per-sender native feeds into one list, deduped by URL (first wins). */
|
||||||
|
export function unionNativeFeeds(
|
||||||
|
bySender: Record<string, NativeFeed[]> | undefined,
|
||||||
|
): NativeFeed[] {
|
||||||
|
if (!bySender) return [];
|
||||||
|
const out: NativeFeed[] = [];
|
||||||
|
const seen = new Set<string>();
|
||||||
|
for (const feeds of Object.values(bySender)) {
|
||||||
|
for (const feed of feeds) {
|
||||||
|
if (seen.has(feed.url)) continue;
|
||||||
|
seen.add(feed.url);
|
||||||
|
out.push({ ...feed });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
@@ -69,6 +69,13 @@ export interface FeedMetadata {
|
|||||||
pendingConfirmation?: boolean;
|
pendingConfirmation?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// A syndication feed a newsletter advertises about itself (via
|
||||||
|
// <link rel="alternate">), as opposed to the KTN-generated feed.
|
||||||
|
export interface NativeFeed {
|
||||||
|
url: string;
|
||||||
|
type: "rss" | "atom" | "json";
|
||||||
|
}
|
||||||
|
|
||||||
// Email metadata interface (summary info for listing)
|
// Email metadata interface (summary info for listing)
|
||||||
export interface EmailMetadata {
|
export interface EmailMetadata {
|
||||||
key: string;
|
key: string;
|
||||||
|
|||||||
Reference in New Issue
Block a user