From ea04d515ea0ccecd29c3148d06676ae07be63e93 Mon Sep 17 00:00:00 2001 From: pelgraine <140762863+pelgraine@users.noreply.github.com> Date: Tue, 24 Feb 2026 01:11:51 +1100 Subject: [PATCH] html spacing display cleanup --- .../companion_radio/ui-new/Webreaderscreen.h | 164 ++++++++++++++---- 1 file changed, 130 insertions(+), 34 deletions(-) diff --git a/examples/companion_radio/ui-new/Webreaderscreen.h b/examples/companion_radio/ui-new/Webreaderscreen.h index acb46a3..c097f5f 100644 --- a/examples/companion_radio/ui-new/Webreaderscreen.h +++ b/examples/companion_radio/ui-new/Webreaderscreen.h @@ -154,9 +154,14 @@ static const char* HTML_SKIP_TAGS[] = { // Tags that produce a paragraph break static const char* HTML_BLOCK_TAGS[] = { - "p", "div", "br", "h1", "h2", "h3", "h4", "h5", "h6", - "tr", "blockquote", "article", "section", "figcaption", - "dt", "dd", nullptr + "div", "br", "tr", "blockquote", "article", "section", "figcaption", + "ul", "ol", "dl", + nullptr +}; + +// Tags that get paragraph-style double breaks +static const char* HTML_PARA_TAGS[] = { + "p", nullptr }; inline bool tagNameEquals(const char* tag, int tagLen, const char* name) { @@ -204,6 +209,18 @@ inline int decodeHtmlEntity(const char* src, int srcLen, int pos, char* outChar) if (entLen == 4 && memcmp(ent, "quot", 4) == 0) { *outChar = '"'; return end - pos + 1; } if (entLen == 4 && memcmp(ent, "apos", 4) == 0) { *outChar = '\''; return end - pos + 1; } if (entLen == 4 && memcmp(ent, "nbsp", 4) == 0) { *outChar = ' '; return end - pos + 1; } + if (entLen == 5 && memcmp(ent, "mdash", 5) == 0) { *outChar = '-'; return end - pos + 1; } + if (entLen == 5 && memcmp(ent, "ndash", 5) == 0) { *outChar = '-'; return end - pos + 1; } + if (entLen == 5 && memcmp(ent, "lsquo", 5) == 0) { *outChar = '\''; return end - pos + 1; } + if (entLen == 5 && memcmp(ent, "rsquo", 5) == 0) { *outChar = '\''; return end - pos + 1; } + if (entLen == 5 && memcmp(ent, "ldquo", 5) == 0) { *outChar = '"'; return end - pos + 1; } + if (entLen == 5 && memcmp(ent, "rdquo", 5) == 0) { *outChar = '"'; return end - pos + 1; } + if (entLen == 5 && memcmp(ent, "laquo", 5) == 0) { *outChar = '<'; return end - pos + 1; } + if (entLen == 5 && memcmp(ent, "raquo", 5) == 0) { *outChar = '>'; return end - pos + 1; } + if (entLen == 5 && memcmp(ent, "trade", 5) == 0) { *outChar = ' '; return end - pos + 1; } + if (entLen == 4 && memcmp(ent, "copy", 4) == 0) { *outChar = 'c'; return end - pos + 1; } + if (entLen == 4 && memcmp(ent, "bull", 4) == 0) { *outChar = '*'; return end - pos + 1; } + // hellip handled specially in caller (outputs "..." multi-char) // Numeric: NNN; or HH; if (entLen >= 2 && ent[0] == '#') { @@ -493,8 +510,12 @@ inline ParseResult parseHtml(const char* html, int htmlLen, continue; } - // Handle block tags - emit paragraph break - if (isBlockTag(tagName, tagNameLen)) { + // Handle paragraph tags - emit double break + bool isPara = false; + for (int pt = 0; HTML_PARA_TAGS[pt]; pt++) { + if (tagNameEquals(tagName, tagNameLen, HTML_PARA_TAGS[pt])) { isPara = true; break; } + } + if (isPara) { if (!lastWasBreak && ti > 0) { textOut[ti++] = '\n'; if (ti < textMax - 2) textOut[ti++] = '\n'; @@ -503,31 +524,35 @@ inline ParseResult parseHtml(const char* html, int htmlLen, } } - // Handle