mirror of
https://github.com/juherr/kill-the-news.git
synced 2026-06-20 22:03:48 +00:00
fix(feed): use permalink URL as Atom entry id, strip mso-* inline styles
- Entry <id> was a non-URL string (timestamp + base64 snippet), which is invalid per the Atom spec; now uses the entry permalink URL which is both valid and stable across feed regeneration - Strip mso-* properties from inline style attributes in extracted body content to eliminate the feed validator DangerousStyleAttr warning caused by Microsoft Office HTML in newsletter emails Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -17,11 +17,23 @@ function parseFromAddress(from: string): { name: string; email?: string } {
|
||||
// the body fragment in <description>/<content:encoded>, not a full document.
|
||||
export function extractBodyContent(html: string): string {
|
||||
const withClose = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
|
||||
if (withClose) return withClose[1];
|
||||
// Some HTML emails omit </body>; capture everything after the opening tag
|
||||
const withoutClose = html.match(/<body[^>]*>([\s\S]*)/i);
|
||||
if (withoutClose) return withoutClose[1].replace(/<\/html>\s*$/i, "");
|
||||
return html;
|
||||
const body = withClose
|
||||
? withClose[1]
|
||||
: (() => {
|
||||
const withoutClose = html.match(/<body[^>]*>([\s\S]*)/i);
|
||||
return withoutClose
|
||||
? withoutClose[1].replace(/<\/html>\s*$/i, "")
|
||||
: html;
|
||||
})();
|
||||
// Strip mso-* properties from inline styles (Office HTML — triggers feed validator warnings)
|
||||
return body.replace(/\bstyle="([^"]*)"/gi, (_match, style: string) => {
|
||||
const cleaned = style
|
||||
.split(";")
|
||||
.map((p) => p.trim())
|
||||
.filter((p) => p && !/^mso-/i.test(p))
|
||||
.join("; ");
|
||||
return cleaned ? `style="${cleaned}"` : "";
|
||||
});
|
||||
}
|
||||
|
||||
function buildFeed(
|
||||
@@ -56,13 +68,13 @@ function buildFeed(
|
||||
});
|
||||
|
||||
for (const email of emails) {
|
||||
const uniqueId = `${email.receivedAt}-${Buffer.from(email.subject).toString("base64").substring(0, 10)}`;
|
||||
const entryUrl = `${baseUrl}/entries/${feedId}/${email.receivedAt}`;
|
||||
const firstAttachment = email.attachments?.[0];
|
||||
const bodyContent = extractBodyContent(email.content);
|
||||
feed.addItem({
|
||||
title: email.subject,
|
||||
id: uniqueId,
|
||||
link: `${baseUrl}/entries/${feedId}/${email.receivedAt}`,
|
||||
id: entryUrl,
|
||||
link: entryUrl,
|
||||
description: bodyContent,
|
||||
content: bodyContent,
|
||||
author: [parseFromAddress(email.from)],
|
||||
|
||||
Reference in New Issue
Block a user