From 1db243c0060c2ae74194f3c5996093b8507ba095 Mon Sep 17 00:00:00 2001 From: Sam Atkins Date: Tue, 24 Sep 2024 15:50:15 +0100 Subject: [PATCH] LibWebView: Use LibSyntax to highlight document source This has no visible effect, but internally it's also highlighting any CSS and JS embedded in the page, which will be made use of later. We'll also be able to use this code for highlighting CSS or JS files directly in the future. It's not a perfect fit - the syntax highlighters give specific styles to their spans, which we then ignore and just use their data integer to figure out which CSS class to give to the span. It feels cleaner to me to produce HTML styled that way, instead of every token having `style="color: ...; font-weight: ...; text-decoration: ...;"` set on it. Most of this new `to_html_string()` code is adapted from Serenity's `TextEditor::paint_event()`, so it should be pretty solid. --- Userland/Libraries/LibWebView/CMakeLists.txt | 2 +- .../LibWebView/SourceHighlighter.cpp | 265 +++++++++++++----- .../Libraries/LibWebView/SourceHighlighter.h | 57 ++++ 3 files changed, 258 insertions(+), 66 deletions(-) diff --git a/Userland/Libraries/LibWebView/CMakeLists.txt b/Userland/Libraries/LibWebView/CMakeLists.txt index fa851a82b84..fd51ad6ea73 100644 --- a/Userland/Libraries/LibWebView/CMakeLists.txt +++ b/Userland/Libraries/LibWebView/CMakeLists.txt @@ -48,7 +48,7 @@ set(GENERATED_SOURCES ) serenity_lib(LibWebView webview) -target_link_libraries(LibWebView PRIVATE LibCore LibFileSystem LibGfx LibImageDecoderClient LibIPC LibRequests LibJS LibWeb LibUnicode LibURL) +target_link_libraries(LibWebView PRIVATE LibCore LibFileSystem LibGfx LibImageDecoderClient LibIPC LibRequests LibJS LibWeb LibUnicode LibURL LibSyntax) target_compile_definitions(LibWebView PRIVATE ENABLE_PUBLIC_SUFFIX=$) # Third-party diff --git a/Userland/Libraries/LibWebView/SourceHighlighter.cpp b/Userland/Libraries/LibWebView/SourceHighlighter.cpp index 1a97a49483b..8da836d4475 100644 --- a/Userland/Libraries/LibWebView/SourceHighlighter.cpp +++ b/Userland/Libraries/LibWebView/SourceHighlighter.cpp @@ -1,16 +1,136 @@ /* * Copyright (c) 2023, Tim Flynn + * Copyright (c) 2024, Sam Atkins * * SPDX-License-Identifier: BSD-2-Clause */ #include #include -#include +#include #include namespace WebView { +SourceDocument::SourceDocument(StringView source) + : m_source(source) +{ + m_source.for_each_split_view('\n', AK::SplitBehavior::KeepEmpty, [&](auto line) { + m_lines.append(Syntax::TextDocumentLine { *this, line }); + }); +} + +Syntax::TextDocumentLine& SourceDocument::line(size_t line_index) +{ + return m_lines[line_index]; +} + +Syntax::TextDocumentLine const& SourceDocument::line(size_t line_index) const +{ + return m_lines[line_index]; +} + +SourceHighlighterClient::SourceHighlighterClient(StringView source, Syntax::Language language) + : m_document(SourceDocument::create(source)) +{ + // HACK: Syntax highlighters require a palette, but we don't actually care about the output styling, only the type of token for each span. + // Also, getting a palette from the chrome is nontrivial. So, create a dummy blank one and use that. + auto buffer = MUST(Core::AnonymousBuffer::create_with_size(sizeof(Gfx::SystemTheme))); + auto palette_impl = Gfx::PaletteImpl::create_with_anonymous_buffer(buffer); + Gfx::Palette dummy_palette { palette_impl }; + + switch (language) { + case Syntax::Language::HTML: + m_highlighter = make(); + break; + default: + break; + } + + if (m_highlighter) { + m_highlighter->attach(*this); + m_highlighter->rehighlight(dummy_palette); + } +} + +Vector const& SourceHighlighterClient::spans() const +{ + return document().spans(); +} + +void SourceHighlighterClient::set_span_at_index(size_t index, Syntax::TextDocumentSpan span) +{ + document().set_span_at_index(index, span); +} + +Vector& SourceHighlighterClient::folding_regions() +{ + return document().folding_regions(); +} + +Vector const& SourceHighlighterClient::folding_regions() const +{ + return document().folding_regions(); +} + +ByteString SourceHighlighterClient::highlighter_did_request_text() const +{ + return document().text(); +} + +void SourceHighlighterClient::highlighter_did_request_update() +{ + // No-op +} + +Syntax::Document& SourceHighlighterClient::highlighter_did_request_document() +{ + return document(); +} + +Syntax::TextPosition SourceHighlighterClient::highlighter_did_request_cursor() const +{ + return {}; +} + +void SourceHighlighterClient::highlighter_did_set_spans(Vector spans) +{ + document().set_spans(span_collection_index, move(spans)); +} + +void SourceHighlighterClient::highlighter_did_set_folding_regions(Vector folding_regions) +{ + document().set_folding_regions(move(folding_regions)); +} + +String highlight_source(URL::URL const& url, StringView source) +{ + SourceHighlighterClient highlighter_client { source, Syntax::Language::HTML }; + return highlighter_client.to_html_string(url); +} + +StringView SourceHighlighterClient::class_for_token(u64 token_type) const +{ + switch (static_cast(token_type)) { + case Web::HTML::AugmentedTokenKind::AttributeName: + return "attribute-name"sv; + case Web::HTML::AugmentedTokenKind::AttributeValue: + return "attribute-value"sv; + case Web::HTML::AugmentedTokenKind::OpenTag: + case Web::HTML::AugmentedTokenKind::CloseTag: + return "tag"sv; + case Web::HTML::AugmentedTokenKind::Comment: + return "comment"sv; + case Web::HTML::AugmentedTokenKind::Doctype: + return "doctype"sv; + case Web::HTML::AugmentedTokenKind::__Count: + default: + break; + } + + return "unknown"sv; +} + static String generate_style() { StringBuilder builder; @@ -52,45 +172,12 @@ static String generate_style() return MUST(builder.to_string()); } -String highlight_source(URL::URL const& url, StringView source) +String SourceHighlighterClient::to_html_string(URL::URL const& url) const { - Web::HTML::HTMLTokenizer tokenizer { source, "utf-8"sv }; StringBuilder builder; - builder.append(R"~~~( - - - - )~~~"sv); - - builder.appendff("View Source - {}", url); - builder.appendff("", generate_style()); - builder.append(R"~~~( - - -
-)~~~"sv);
-
-    size_t previous_position = 0;
-
-    auto append_source = [&](auto end_position, Optional const& class_name = {}) {
-        if (end_position <= previous_position)
-            return;
-
-        auto segment = source.substring_view(previous_position, end_position - previous_position);
-
-        auto append_class_start = [&]() {
-            if (class_name.has_value())
-                builder.appendff(""sv, *class_name);
-        };
-        auto append_class_end = [&]() {
-            if (class_name.has_value())
-                builder.append(""sv);
-        };
-
-        append_class_start();
-
-        for (auto code_point : Utf8View { segment }) {
+    auto append_escaped = [&](Utf32View text) {
+        for (auto code_point : text) {
             if (code_point == '&') {
                 builder.append("&"sv);
             } else if (code_point == 0xA0) {
@@ -99,56 +186,104 @@ String highlight_source(URL::URL const& url, StringView source)
                 builder.append("<"sv);
             } else if (code_point == '>') {
                 builder.append(">"sv);
-            } else if (code_point == '\n') {
-                append_class_end();
-                builder.append("\n"sv);
-                append_class_start();
             } else {
                 builder.append_code_point(code_point);
             }
         }
-
-        append_class_end();
-        previous_position = end_position;
     };
 
-    for (auto token = tokenizer.next_token(); token.has_value(); token = tokenizer.next_token()) {
-        if (token->is_comment()) {
-            append_source(token->start_position().byte_offset);
-            append_source(token->end_position().byte_offset, "comment"sv);
-        } else if (token->is_start_tag() || token->is_end_tag()) {
-            auto tag_name_start = token->start_position().byte_offset;
+    auto start_token = [&](u64 type) {
+        builder.appendff("", class_for_token(type));
+    };
+    auto end_token = [&]() {
+        builder.append(""sv);
+    };
 
-            append_source(tag_name_start);
-            append_source(tag_name_start + token->tag_name().bytes().size(), "tag"sv);
+    builder.append(R"~~~(
+
+
+
+    )~~~"sv);
 
-            token->for_each_attribute([&](auto const& attribute) {
-                append_source(attribute.name_start_position.byte_offset);
-                append_source(attribute.name_end_position.byte_offset, "attribute-name"sv);
+    builder.appendff("View Source - {}", escape_html_entities(MUST(url.to_string())));
+    builder.appendff("", generate_style());
+    builder.append(R"~~~(
+
+
+
)~~~"sv);
 
-                append_source(attribute.value_start_position.byte_offset);
-                append_source(attribute.value_end_position.byte_offset, "attribute-value"sv);
+    size_t span_index = 0;
+    for (size_t line_index = 0; line_index < document().line_count(); ++line_index) {
+        auto& line = document().line(line_index);
+        auto line_view = line.view();
+        builder.append("
"sv); - return IterationDecision::Continue; - }); + size_t next_column = 0; - append_source(token->end_position().byte_offset); - } else { - append_source(token->end_position().byte_offset); + auto draw_text_helper = [&](size_t start, size_t end, Optional span) { + size_t length = end - start; + if (length == 0) + return; + auto text = line_view.substring_view(start, length); + if (span.has_value()) { + start_token(span->data); + append_escaped(text); + end_token(); + } else { + append_escaped(text); + } + }; - if (token->is_end_of_file()) + while (span_index < document().spans().size()) { + auto& span = document().spans()[span_index]; + if (span.range.start().line() > line_index) { + // No more spans in this line, moving on break; + } + size_t span_start; + if (span.range.start().line() < line_index) { + span_start = 0; + } else { + span_start = span.range.start().column(); + } + size_t span_end; + bool span_consumed; + if (span.range.end().line() > line_index) { + span_end = line.length(); + span_consumed = false; + } else { + span_end = span.range.end().column(); + span_consumed = true; + } + + if (span_start != next_column) { + // Draw unspanned text between spans + draw_text_helper(next_column, span_start, {}); + } + draw_text_helper(span_start, span_end, span); + next_column = span_end; + if (!span_consumed) { + // Continue with same span on next line + break; + } else { + ++span_index; + } } + // Draw unspanned text after last span + if (next_column < line.length()) { + draw_text_helper(next_column, line.length(), {}); + } + + builder.append("
"sv); } builder.append(R"~~~( -
)~~~"sv); - return MUST(builder.to_string()); + return builder.to_string_without_validation(); } } diff --git a/Userland/Libraries/LibWebView/SourceHighlighter.h b/Userland/Libraries/LibWebView/SourceHighlighter.h index 1ab7cf4f411..1d5f5bb0705 100644 --- a/Userland/Libraries/LibWebView/SourceHighlighter.h +++ b/Userland/Libraries/LibWebView/SourceHighlighter.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2023, Tim Flynn + * Copyright (c) 2024, Sam Atkins * * SPDX-License-Identifier: BSD-2-Clause */ @@ -8,9 +9,65 @@ #include #include +#include +#include +#include namespace WebView { +class SourceDocument final : public Syntax::Document { +public: + static NonnullRefPtr create(StringView source) + { + return adopt_ref(*new (nothrow) SourceDocument(source)); + } + virtual ~SourceDocument() = default; + + StringView text() const { return m_source; } + size_t line_count() const { return m_lines.size(); } + + // ^ Syntax::Document + virtual Syntax::TextDocumentLine const& line(size_t line_index) const override; + virtual Syntax::TextDocumentLine& line(size_t line_index) override; + +private: + SourceDocument(StringView source); + + // ^ Syntax::Document + virtual void update_views(Badge) override { } + + StringView m_source; + Vector m_lines; +}; + +class SourceHighlighterClient final : public Syntax::HighlighterClient { +public: + SourceHighlighterClient(StringView source, Syntax::Language); + virtual ~SourceHighlighterClient() = default; + + String to_html_string(URL::URL const&) const; + +private: + // ^ Syntax::HighlighterClient + virtual Vector const& spans() const override; + virtual void set_span_at_index(size_t index, Syntax::TextDocumentSpan span) override; + virtual Vector& folding_regions() override; + virtual Vector const& folding_regions() const override; + virtual ByteString highlighter_did_request_text() const override; + virtual void highlighter_did_request_update() override; + virtual Syntax::Document& highlighter_did_request_document() override; + virtual Syntax::TextPosition highlighter_did_request_cursor() const override; + virtual void highlighter_did_set_spans(Vector) override; + virtual void highlighter_did_set_folding_regions(Vector) override; + + StringView class_for_token(u64 token_type) const; + + SourceDocument& document() const { return *m_document; } + + NonnullRefPtr m_document; + OwnPtr m_highlighter; +}; + String highlight_source(URL::URL const&, StringView); constexpr inline StringView HTML_HIGHLIGHTER_STYLE = R"~~~(