JSSpecCompiler: Get rid of ParseError in Lexer

Author: https://github.com/DanShaders Commit: 9a2337f7ad Pull-request: https://github.com/SerenityOS/serenity/pull/22899 Reviewed-by: https://github.com/ADKaster ✅
2025-09-20 00:08:55 +00:00 · 2024-01-20 19:47:02 -05:00 · 2024-01-20 19:47:02 -05:00 · 9a2337f7ad · 2024-07-17 12:02:22 +09:00
commit 9a2337f7ad
parent ed04aff1de
5 changed files with 125 additions and 88 deletions
--- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.cpp
+++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.cpp
@ -43,9 +43,8 @@ bool can_end_word_token(char c)
 {
    return is_ascii_space(c) || ".,"sv.contains(c);
 }
-}

-ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens)
+void tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens)
 {
    static constexpr struct {
        StringView text_to_match;
@ -103,74 +102,131 @@ ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node c
        if (word.length())
            tokens.append({ TokenType::Word, word, node, move(token_location) });
    }
-    return {};
 }

-ParseErrorOr<TokenizeTreeResult> tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps)
+enum class TreeType {
+    AlgorithmStep,
+    Header,
+};
+
+struct TokenizerState {
+    Vector<Token> tokens;
+    XML::Node const* substeps = nullptr;
+    bool has_errors = false;
+};
+
+void tokenize_tree(SpecificationParsingContext& ctx, TokenizerState& state, XML::Node const* node, TreeType tree_type)
 {
-    TokenizeTreeResult result;
-    auto& tokens = result.tokens;
+    // FIXME: Use structured binding once macOS Lagom CI updates to Clang >= 16.
+    auto& tokens = state.tokens;
+    auto& substeps = state.substeps;
+    auto& has_errors = state.has_errors;

    for (auto const& child : node->as_element().children) {
-        TRY(child->content.visit(
-            [&](XML::Node::Element const& element) -> ParseErrorOr<void> {
-                if (result.substeps != nullptr)
-                    return ParseError::create("Substeps list must be the last non-empty child"sv, child);
+        if (has_errors)
+            break;

+        child->content.visit(
+            [&](XML::Node::Element const& element) -> void {
                Location child_location = ctx.location_from_xml_offset(child->offset);
+                auto report_error = [&]<typename... Parameters>(AK::CheckedFormatString<Parameters...>&& fmt, Parameters const&... parameters) {
+                    ctx.diag().error(child_location, move(fmt), parameters...);
+                    has_errors = true;
+                };

-                if (element.name == tag_var) {
-                    tokens.append({ TokenType::Identifier, TRY(get_text_contents(child)), child, move(child_location) });
-                    return {};
+                if (substeps) {
+                    report_error("substeps list must be the last child of algorithm step");
+                    return;
                }

-                if (element.name == tag_span) {
-                    auto element_class = TRY(deprecated_get_attribute_by_name(child, attribute_class));
-                    if (element_class != class_secnum)
-                        return ParseError::create(String::formatted("Expected 'secnum' as a class name of <span>, but found '{}'", element_class), child);
-                    tokens.append({ TokenType::SectionNumber, TRY(get_text_contents(child)), child, move(child_location) });
-                    return {};
+                if (element.name == tag_var) {
+                    auto variable_name = get_text_contents(child);
+                    if (!variable_name.has_value())
+                        report_error("malformed <var> subtree, expected single text child node");
+
+                    tokens.append({ TokenType::Identifier, variable_name.value_or(""sv), child, move(child_location) });
+                    return;
                }

                if (element.name == tag_emu_val) {
-                    auto contents = TRY(get_text_contents(child));
+                    auto maybe_contents = get_text_contents(child);
+                    if (!maybe_contents.has_value())
+                        report_error("malformed <emu-val> subtree, expected single text child node");
+
+                    auto contents = maybe_contents.value_or(""sv);
+
                    if (contents.length() >= 2 && contents.starts_with('"') && contents.ends_with('"'))
                        tokens.append({ TokenType::String, contents.substring_view(1, contents.length() - 2), child, move(child_location) });
                    else if (contents == "undefined")
                        tokens.append({ TokenType::Undefined, contents, child, move(child_location) });
                    else
                        tokens.append({ TokenType::Identifier, contents, child, move(child_location) });
-                    return {};
+                    return;
                }

                if (element.name == tag_emu_xref) {
-                    auto contents = TRY(get_text_contents(TRY(get_only_child(child, "a"sv))));
-                    tokens.append({ TokenType::Identifier, contents, child, move(child_location) });
-                    return {};
+                    auto identifier = get_single_child_with_tag(child, "a"sv).map([](XML::Node const* node) {
+                        return get_text_contents(node).value_or(""sv);
+                    });
+                    if (!identifier.has_value() || identifier.value().is_empty())
+                        report_error("malformed <emu-xref> subtree, expected <a> with nested single text node");
+
+                    tokens.append({ TokenType::Identifier, identifier.value_or(""sv), child, move(child_location) });
+                    return;
                }

-                if (element.name == tag_ol) {
-                    if (!allow_substeps)
-                        return ParseError::create("Found nested list but substeps are not allowed"sv, child);
-                    result.substeps = child;
-                    return {};
+                if (tree_type == TreeType::Header && element.name == tag_span) {
+                    auto element_class = get_attribute_by_name(child, attribute_class);
+                    if (element_class != class_secnum)
+                        report_error("expected <span> to have class='secnum' attribute");
+
+                    auto section_number = get_text_contents(child);
+                    if (!section_number.has_value())
+                        report_error("malformed section number span subtree, expected single text child node");
+
+                    tokens.append({ TokenType::SectionNumber, section_number.value_or(""sv), child, move(child_location) });
+                    return;
                }

-                return ParseError::create(String::formatted("Unexpected child element with tag {}", element.name), child);
+                if (tree_type == TreeType::AlgorithmStep && element.name == tag_ol) {
+                    substeps = child;
+                    return;
+                }
+
+                report_error("<{}> should not be a child of algorithm step", element.name);
            },
-            [&](XML::Node::Text const& text) -> ParseErrorOr<void> {
+            [&](XML::Node::Text const& text) {
                auto view = text.builder.string_view();
-                if (result.substeps && !contains_empty_text(child))
-                    return ParseError::create("Substeps list must be the last non-empty child"sv, child);
-                return tokenize_string(ctx, child, view, tokens);
+                if (substeps != nullptr && !contains_empty_text(child)) {
+                    ctx.diag().error(ctx.location_from_xml_offset(child->offset),
+                        "substeps list must be the last child of algorithm step");
+                } else {
+                    tokenize_string(ctx, child, view, tokens);
+                }
            },
-            move(ignore_comments)));
+            [&](auto const&) {});
    }

    if (tokens.size() && tokens.last().type == TokenType::MemberAccess)
        tokens.last().type = TokenType::Dot;
+}
+}

-    return result;
+StepTokenizationResult tokenize_step(SpecificationParsingContext& ctx, XML::Node const* node)
+{
+    TokenizerState state;
+    tokenize_tree(ctx, state, node, TreeType::AlgorithmStep);
+    return {
+        .tokens = state.has_errors ? OptionalNone {} : Optional<Vector<Token>> { move(state.tokens) },
+        .substeps = state.substeps,
+    };
+}
+
+Optional<Vector<Token>> tokenize_header(SpecificationParsingContext& ctx, XML::Node const* node)
+{
+    TokenizerState state;
+    tokenize_tree(ctx, state, node, TreeType::Header);
+    return state.has_errors ? OptionalNone {} : Optional<Vector<Token>> { state.tokens };
 }

 }