Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion defaultmodules/newsfeed/newsfeed.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ Module.register("newsfeed", {
prohibitedWords: [],
scrollLength: 500,
logFeedWarnings: false,
dangerouslyDisableAutoEscaping: false
dangerouslyDisableAutoEscaping: false,
allowBasicHtmlTags: false
},

getUrlPrefix (item) {
Expand Down
12 changes: 6 additions & 6 deletions defaultmodules/newsfeed/newsfeed.njk
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@
{% if config.showPublishDate %}{{ item.publishDate }}:{% endif %}
</div>
{% endif %}
<div class="newsfeed-title bright medium light{{ ' no-wrap' if not config.wrapTitle }}">{{ escapeTitle(item.title, item.url, config.dangerouslyDisableAutoEscaping, config.showTitleAsUrl) }}</div>
<div class="newsfeed-title bright medium light{{ ' no-wrap' if not config.wrapTitle }}">{{ escapeTitle(item.title, item.url, config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags, config.showTitleAsUrl) }}</div>
{% if config.showDescription %}
<div class="newsfeed-desc small light{{ ' no-wrap' if not config.wrapDescription }}">
{% if config.truncDescription %}
{{ escapeText(item.description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping) }}
{{ escapeText(item.description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags) }}
{% else %}
{{ escapeText(item.description, config.dangerouslyDisableAutoEscaping) }}
{{ escapeText(item.description, config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags) }}
{% endif %}
</div>
{% endif %}
Expand All @@ -68,13 +68,13 @@
{% if config.showPublishDate %}{{ publishDate }}:{% endif %}
</div>
{% endif %}
<div class="newsfeed-title bright medium light{{ ' no-wrap' if not config.wrapTitle }}">{{ escapeTitle(title, url, config.dangerouslyDisableAutoEscaping, config.showTitleAsUrl) }}</div>
<div class="newsfeed-title bright medium light{{ ' no-wrap' if not config.wrapTitle }}">{{ escapeTitle(title, url, config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags, config.showTitleAsUrl) }}</div>
{% if config.showDescription %}
<div class="newsfeed-desc small light{{ ' no-wrap' if not config.wrapDescription }}">
{% if config.truncDescription %}
{{ escapeText(description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping) }}
{{ escapeText(description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags) }}
{% else %}
{{ escapeText(description, config.dangerouslyDisableAutoEscaping) }}
{{ escapeText(description, config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags) }}
{% endif %}
</div>
{% endif %}
Expand Down
80 changes: 69 additions & 11 deletions defaultmodules/newsfeed/newsfeedfetcher.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,23 @@ const { htmlToText } = require("html-to-text");
const Log = require("logger");
const HTTPFetcher = require("#http_fetcher");

// Inline formatting tags that are safe to render: bold, italic and underline.
// These never carry attributes once sanitized, so they cannot be used for injection.
const ALLOWED_TAGS = ["b", "strong", "i", "em", "u"];

// html-to-text formatter that re-emits an allowed inline tag around its content,
// so feeds that send real <em>/<strong> elements keep their emphasis.
const keepTagFormatter = (elem, walk, builder, formatOptions) => {
builder.addLiteral(`<${formatOptions.tagName}>`);
walk(elem.children, builder);
builder.addLiteral(`</${formatOptions.tagName}>`);
};

const allowedTagSelectors = ALLOWED_TAGS.map((tagName) => ({ selector: tagName, format: "keepTag", options: { tagName } }));

// Matches only the exact, attribute-free opening/closing allowlisted tags after escaping.
const restoreAllowedTags = new RegExp(`&lt;(/?(?:${ALLOWED_TAGS.join("|")}))&gt;`, "g");

/**
* NewsfeedFetcher - Fetches and parses RSS/Atom feed data
* Uses HTTPFetcher for HTTP handling with intelligent error handling
Expand All @@ -20,12 +37,14 @@ class NewsfeedFetcher {
* @param {string} encoding - Encoding of the feed (e.g., 'UTF-8')
* @param {boolean} logFeedWarnings - If true log warnings when there is an error parsing a news article
* @param {boolean} useCorsProxy - If true cors proxy is used for article url's
* @param {boolean} allowBasicHtmlTags - If true keep basic formatting tags (bold/italic/underline) in title and description
*/
constructor (url, reloadInterval, encoding, logFeedWarnings, useCorsProxy) {
constructor (url, reloadInterval, encoding, logFeedWarnings, useCorsProxy, allowBasicHtmlTags = false) {
this.url = url;
this.encoding = encoding;
this.logFeedWarnings = logFeedWarnings;
this.useCorsProxy = useCorsProxy;
this.allowBasicHtmlTags = allowBasicHtmlTags;
this.items = [];
this.fetchFailedCallback = () => {};
this.itemsReceivedCallback = () => {};
Expand All @@ -44,6 +63,37 @@ class NewsfeedFetcher {
this.httpFetcher.on("error", (errorInfo) => this.fetchFailedCallback(this, errorInfo));
}

/**
* Sanitizes a feed string, keeping only a strict allowlist of basic
* formatting tags (bold, italic, underline) and neutralizing everything else.
*
* The approach is allowlist-only and therefore safe to render unescaped:
* html-to-text first strips all real markup (scripts, links, images, …) and
* decodes entities to text, then EVERYTHING is HTML-escaped and ONLY the exact,
* attribute-free allowlisted tags are restored. No attributes, event handlers,
* or other tags can survive, so arbitrary HTML/script injection is impossible.
* @param {string} html - The raw title or description from the feed.
* @returns {string} Safe HTML containing at most the allowlisted formatting tags.
*/
static sanitizeBasicHtml (html) {
const text = htmlToText(html, {
wordwrap: false,
formatters: { keepTag: keepTagFormatter },
selectors: [
{ selector: "a", options: { ignoreHref: true, noAnchorUrl: true } },
{ selector: "br", format: "inlineSurround", options: { prefix: " " } },
{ selector: "img", format: "skip" },
...allowedTagSelectors
]
});

return text
.replaceAll("&", "&amp;")
.replaceAll("<", "&lt;")
.replaceAll(">", "&gt;")
.replace(restoreAllowedTags, "<$1>");
}

/**
* Creates a parse error info object
* @param {string} message - Error message
Expand Down Expand Up @@ -78,22 +128,30 @@ class NewsfeedFetcher {
const url = item.url || item.link || "";

if (title && pubdate) {
// Convert HTML entities, codes and tag
description = htmlToText(description, {
wordwrap: false,
selectors: [
{ selector: "a", options: { ignoreHref: true, noAnchorUrl: true } },
{ selector: "br", format: "inlineSurround", options: { prefix: " " } },
{ selector: "img", format: "skip" }
]
});
let displayTitle = title;
if (this.allowBasicHtmlTags) {
// Keep basic formatting (bold/italic/underline) in both fields, strip everything else
description = NewsfeedFetcher.sanitizeBasicHtml(description);
displayTitle = NewsfeedFetcher.sanitizeBasicHtml(title);
} else {
// Convert HTML entities, codes and tag
description = htmlToText(description, {
wordwrap: false,
selectors: [
{ selector: "a", options: { ignoreHref: true, noAnchorUrl: true } },
{ selector: "br", format: "inlineSurround", options: { prefix: " " } },
{ selector: "img", format: "skip" }
]
});
}

this.items.push({
title,
title: displayTitle,
description,
pubdate,
url,
useCorsProxy: this.useCorsProxy,
// Hash on the original title so the dedup identity is stable regardless of allowBasicHtmlTags
hash: crypto.createHash("sha256").update(`${pubdate} :: ${title} :: ${url}`).digest("hex")
});
} else if (this.logFeedWarnings) {
Expand Down
2 changes: 1 addition & 1 deletion defaultmodules/newsfeed/node_helper.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ module.exports = NodeHelper.create({
let fetcher;
if (typeof this.fetchers[url] === "undefined") {
Log.log(`Create new newsfetcher for url: ${url} - Interval: ${reloadInterval}`);
fetcher = new NewsfeedFetcher(url, reloadInterval, encoding, config.logFeedWarnings, useCorsProxy);
fetcher = new NewsfeedFetcher(url, reloadInterval, encoding, config.logFeedWarnings, useCorsProxy, config.allowBasicHtmlTags);

fetcher.onReceive(() => {
this.broadcastFeeds();
Expand Down
28 changes: 28 additions & 0 deletions tests/configs/modules/newsfeed/basic_html.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
let config = {
address: "0.0.0.0",
ipWhitelist: [],
timeFormat: 12,

modules: [
{
module: "newsfeed",
position: "bottom_bar",
config: {
feeds: [
{
title: "Formatting Feed",
url: "http://localhost:8080/tests/mocks/newsfeed_basic_html.xml"
}
],
showDescription: true,
truncDescription: false,
allowBasicHtmlTags: true
}
}
]
};

/*************** DO NOT EDIT THE LINE BELOW ***************/
if (typeof module !== "undefined") {
module.exports = config;
}
26 changes: 26 additions & 0 deletions tests/e2e/modules/newsfeed_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,32 @@ const runTests = () => {
});
});

describe("Basic HTML tags", () => {
beforeAll(async () => {
await helpers.startApplication("tests/configs/modules/newsfeed/basic_html.js");
await helpers.getDocument();
page = helpers.getPage();
});

it("should render allowlisted formatting tags in title and description", async () => {
await expect(page.locator(".newsfeed .newsfeed-desc")).toBeVisible();
const descHtml = await page.locator(".newsfeed .newsfeed-desc").innerHTML();
expect(descHtml).toContain("<em>");
expect(descHtml).toContain("<strong>");
expect(descHtml).toContain("<u>");
const titleHtml = await page.locator(".newsfeed .newsfeed-title").innerHTML();
expect(titleHtml).toContain("<em>");
});

it("should strip disallowed HTML and not execute injected scripts", async () => {
const descHtml = await page.locator(".newsfeed .newsfeed-desc").innerHTML();
expect(descHtml).not.toContain("<script");
expect(descHtml).not.toContain("onerror");
const xss = await page.evaluate(() => window.__newsfeedXss);
expect(xss).toBeUndefined();
});
});

describe("Invalid configuration", () => {
beforeAll(async () => {
await helpers.startApplication("tests/configs/modules/newsfeed/incorrect_url.js");
Expand Down
15 changes: 15 additions & 0 deletions tests/mocks/newsfeed_basic_html.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Formatting Feed</title>
<link>http://localhost:8080</link>
<description>Feed used to test the allowBasicHtmlTags option.</description>
<item>
<title>News &lt;em&gt;Flash&lt;/em&gt;</title>
<link>http://localhost:8080/article</link>
<pubDate>Tue, 20 Sep 2016 11:16:08 +0000</pubDate>
<guid isPermaLink="false">http://localhost:8080/?p=1</guid>
<description><![CDATA[<p><em>Italic</em> and <strong>Bold</strong> and <u>Underlined</u> text.</p><script>window.__newsfeedXss = true;</script><img src="x" onerror="window.__newsfeedXss = true">]]></description>
</item>
</channel>
</rss>
52 changes: 52 additions & 0 deletions tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
const defaults = require("../../../../../js/defaults");

const NewsfeedFetcher = require(`../../../../../${defaults.defaultModulesDir}/newsfeed/newsfeedfetcher`);

const { sanitizeBasicHtml } = NewsfeedFetcher;

describe("NewsfeedFetcher.sanitizeBasicHtml", () => {
it("keeps real basic formatting tags", () => {
expect(sanitizeBasicHtml("<b>a</b> <strong>b</strong> <i>c</i> <em>d</em> <u>e</u>"))
.toBe("<b>a</b> <strong>b</strong> <i>c</i> <em>d</em> <u>e</u>");
});

it("renders entity-encoded formatting tags (e.g. The Atlantic feed)", () => {
// Feeds like theatlantic.com ship emphasis as escaped entities
expect(sanitizeBasicHtml("the &lt;em&gt;Atlantic&lt;/em&gt; ocean")).toBe("the <em>Atlantic</em> ocean");
});

it("handles emphasis inside titles regardless of how the parser delivers it", () => {
// The Atlantic uses <em> in titles, e.g. "That's Enough, <em>Euphoria</em>"
const expected = "That’s Enough, <em>Euphoria</em>";
expect(sanitizeBasicHtml("That’s Enough, <em>Euphoria</em>")).toBe(expected);
expect(sanitizeBasicHtml("That’s Enough, &lt;em&gt;Euphoria&lt;/em&gt;")).toBe(expected);
});

it("strips attributes from allowed tags", () => {
const result = sanitizeBasicHtml("<b onclick=\"steal()\" class=\"x\">bold</b>");
expect(result).toBe("<b>bold</b>");
expect(result).not.toContain("onclick");
expect(result).not.toContain("class");
});

it("neutralizes script tags", () => {
expect(sanitizeBasicHtml("<script>alert(1)</script>hello")).not.toContain("<script");
// Entity-encoded scripts must stay inert text, never become live markup
const encoded = sanitizeBasicHtml("&lt;script&gt;alert(1)&lt;/script&gt;");
expect(encoded).not.toContain("<script");
expect(encoded).toContain("&lt;script&gt;");
});

it("drops images and link hrefs but keeps disallowed-tag text", () => {
const result = sanitizeBasicHtml("<img src=\"x\" onerror=\"alert(1)\"><a href=\"https://evil.example\">link</a><h1>title</h1>");
expect(result).not.toContain("onerror");
expect(result).not.toContain("href");
expect(result).not.toContain("<h1>");
expect(result).toContain("link");
expect(result.toLowerCase()).toContain("title");
});

it("escapes bare HTML special characters in plain text", () => {
expect(sanitizeBasicHtml("Fish &amp; Chips for &lt; 5")).toBe("Fish &amp; Chips for &lt; 5");
});
});