LibWebView: Normalize source-code text before highlighting it

The previous code to determine the SourceDocument's lines was too naive:
the source text can contain other newline characters and sequences, and
the HTML/CSS/JS syntax highlighters would take those into account when
determining what line a token is on. This disagreement would cause
incorrect highlighting, or even crashes, if the source didn't solely use
`\n` for its newlines.

In order to have everyone agree on what a line is, this patch first
processes the source to replace all newlines with `\n`. The need to
copy the source like this is unfortunate, but viewing the source is a
rare enough action that this should not cause any noticeable
performance problems.

As the callers have a String, and we want a String, this also changes
the function parameters to keep the source as a String instead of
converting it to StringView and back.

Fixes https://github.com/LadybirdBrowser/ladybird/issues/3169
This commit is contained in:
Sam Atkins 2025-01-08 13:47:24 +00:00
parent b16b24c615
commit 6147557999
Notes: github-actions[bot] 2025-02-04 08:50:24 +00:00
2 changed files with 45 additions and 12 deletions

View file

@ -17,12 +17,45 @@
namespace WebView {
SourceDocument::SourceDocument(StringView source)
: m_source(source)
SourceDocument::SourceDocument(String const& source)
{
m_source.for_each_split_view('\n', AK::SplitBehavior::KeepEmpty, [&](auto line) {
m_lines.append(Syntax::TextDocumentLine { *this, line });
});
// HTML, CSS and JS differ slightly on what they consider a newline to be.
// In order to make them get along in documents that include a mix of the three, process the source to make the
// newlines consistent before doing any highlighting.
// Optimization: If all the newlines are \n, just use the input string.
if (!source.code_points().contains_any_of(Array<u32, 3> { '\r', 0x2028, 0x2029 })) {
m_source = source;
} else {
StringBuilder builder { source.byte_count() };
// Convert any '\r\n', \r, <LS> or <PS> to \n
bool previous_was_cr = false;
for (u32 code_point : source.code_points()) {
if (previous_was_cr && code_point != '\n')
builder.append('\n');
previous_was_cr = false;
switch (code_point) {
case '\r':
previous_was_cr = true;
break;
case JS::LINE_SEPARATOR:
case JS::PARAGRAPH_SEPARATOR:
builder.append('\n');
break;
default:
builder.append_code_point(code_point);
}
}
m_source = builder.to_string_without_validation();
}
m_source.code_points().for_each_split_view(
[](u32 it) { return it == '\n'; },
SplitBehavior::KeepEmpty,
[&](auto line) {
m_lines.append(Syntax::TextDocumentLine { *this, line.as_string() });
});
}
Syntax::TextDocumentLine& SourceDocument::line(size_t line_index)
@ -35,7 +68,7 @@ Syntax::TextDocumentLine const& SourceDocument::line(size_t line_index) const
return m_lines[line_index];
}
SourceHighlighterClient::SourceHighlighterClient(StringView source, Syntax::Language language)
SourceHighlighterClient::SourceHighlighterClient(String const& source, Syntax::Language language)
: m_document(SourceDocument::create(source))
{
// HACK: Syntax highlighters require a palette, but we don't actually care about the output styling, only the type of token for each span.
@ -114,7 +147,7 @@ void SourceHighlighterClient::highlighter_did_set_folding_regions(Vector<Syntax:
document().set_folding_regions(move(folding_regions));
}
String highlight_source(URL::URL const& url, URL::URL const& base_url, StringView source, Syntax::Language language, HighlightOutputMode mode)
String highlight_source(URL::URL const& url, URL::URL const& base_url, String const& source, Syntax::Language language, HighlightOutputMode mode)
{
SourceHighlighterClient highlighter_client { source, language };
return highlighter_client.to_html_string(url, base_url, mode);

View file

@ -24,7 +24,7 @@ enum class HighlightOutputMode {
class SourceDocument final : public Syntax::Document {
public:
static NonnullRefPtr<SourceDocument> create(StringView source)
static NonnullRefPtr<SourceDocument> create(String const& source)
{
return adopt_ref(*new (nothrow) SourceDocument(source));
}
@ -38,18 +38,18 @@ public:
virtual Syntax::TextDocumentLine& line(size_t line_index) override;
private:
SourceDocument(StringView source);
SourceDocument(String const& source);
// ^ Syntax::Document
virtual void update_views(Badge<Syntax::TextDocumentLine>) override { }
StringView m_source;
String m_source;
Vector<Syntax::TextDocumentLine> m_lines;
};
class SourceHighlighterClient final : public Syntax::HighlighterClient {
public:
SourceHighlighterClient(StringView source, Syntax::Language);
SourceHighlighterClient(String const& source, Syntax::Language);
virtual ~SourceHighlighterClient() = default;
String to_html_string(URL::URL const& url, URL::URL const& base_url, HighlightOutputMode) const;
@ -75,7 +75,7 @@ private:
OwnPtr<Syntax::Highlighter> m_highlighter;
};
String highlight_source(URL::URL const& url, URL::URL const& base_url, StringView, Syntax::Language, HighlightOutputMode);
String highlight_source(URL::URL const& url, URL::URL const& base_url, String const& source, Syntax::Language, HighlightOutputMode);
constexpr inline StringView HTML_HIGHLIGHTER_STYLE = R"~~~(
@media (prefers-color-scheme: dark) {