diff --git a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.cpp b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.cpp index 34b0e47c7af..ca0974cc0ff 100644 --- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.cpp @@ -43,9 +43,8 @@ bool can_end_word_token(char c) { return is_ascii_space(c) || ".,"sv.contains(c); } -} -ParseErrorOr tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector& tokens) +void tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector& tokens) { static constexpr struct { StringView text_to_match; @@ -103,74 +102,131 @@ ParseErrorOr tokenize_string(SpecificationParsingContext& ctx, XML::Node c if (word.length()) tokens.append({ TokenType::Word, word, node, move(token_location) }); } - return {}; } -ParseErrorOr tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps) +enum class TreeType { + AlgorithmStep, + Header, +}; + +struct TokenizerState { + Vector tokens; + XML::Node const* substeps = nullptr; + bool has_errors = false; +}; + +void tokenize_tree(SpecificationParsingContext& ctx, TokenizerState& state, XML::Node const* node, TreeType tree_type) { - TokenizeTreeResult result; - auto& tokens = result.tokens; + // FIXME: Use structured binding once macOS Lagom CI updates to Clang >= 16. + auto& tokens = state.tokens; + auto& substeps = state.substeps; + auto& has_errors = state.has_errors; for (auto const& child : node->as_element().children) { - TRY(child->content.visit( - [&](XML::Node::Element const& element) -> ParseErrorOr { - if (result.substeps != nullptr) - return ParseError::create("Substeps list must be the last non-empty child"sv, child); + if (has_errors) + break; + child->content.visit( + [&](XML::Node::Element const& element) -> void { Location child_location = ctx.location_from_xml_offset(child->offset); + auto report_error = [&](AK::CheckedFormatString&& fmt, Parameters const&... parameters) { + ctx.diag().error(child_location, move(fmt), parameters...); + has_errors = true; + }; - if (element.name == tag_var) { - tokens.append({ TokenType::Identifier, TRY(get_text_contents(child)), child, move(child_location) }); - return {}; + if (substeps) { + report_error("substeps list must be the last child of algorithm step"); + return; } - if (element.name == tag_span) { - auto element_class = TRY(deprecated_get_attribute_by_name(child, attribute_class)); - if (element_class != class_secnum) - return ParseError::create(String::formatted("Expected 'secnum' as a class name of , but found '{}'", element_class), child); - tokens.append({ TokenType::SectionNumber, TRY(get_text_contents(child)), child, move(child_location) }); - return {}; + if (element.name == tag_var) { + auto variable_name = get_text_contents(child); + if (!variable_name.has_value()) + report_error("malformed subtree, expected single text child node"); + + tokens.append({ TokenType::Identifier, variable_name.value_or(""sv), child, move(child_location) }); + return; } if (element.name == tag_emu_val) { - auto contents = TRY(get_text_contents(child)); + auto maybe_contents = get_text_contents(child); + if (!maybe_contents.has_value()) + report_error("malformed subtree, expected single text child node"); + + auto contents = maybe_contents.value_or(""sv); + if (contents.length() >= 2 && contents.starts_with('"') && contents.ends_with('"')) tokens.append({ TokenType::String, contents.substring_view(1, contents.length() - 2), child, move(child_location) }); else if (contents == "undefined") tokens.append({ TokenType::Undefined, contents, child, move(child_location) }); else tokens.append({ TokenType::Identifier, contents, child, move(child_location) }); - return {}; + return; } if (element.name == tag_emu_xref) { - auto contents = TRY(get_text_contents(TRY(get_only_child(child, "a"sv)))); - tokens.append({ TokenType::Identifier, contents, child, move(child_location) }); - return {}; + auto identifier = get_single_child_with_tag(child, "a"sv).map([](XML::Node const* node) { + return get_text_contents(node).value_or(""sv); + }); + if (!identifier.has_value() || identifier.value().is_empty()) + report_error("malformed subtree, expected with nested single text node"); + + tokens.append({ TokenType::Identifier, identifier.value_or(""sv), child, move(child_location) }); + return; } - if (element.name == tag_ol) { - if (!allow_substeps) - return ParseError::create("Found nested list but substeps are not allowed"sv, child); - result.substeps = child; - return {}; + if (tree_type == TreeType::Header && element.name == tag_span) { + auto element_class = get_attribute_by_name(child, attribute_class); + if (element_class != class_secnum) + report_error("expected to have class='secnum' attribute"); + + auto section_number = get_text_contents(child); + if (!section_number.has_value()) + report_error("malformed section number span subtree, expected single text child node"); + + tokens.append({ TokenType::SectionNumber, section_number.value_or(""sv), child, move(child_location) }); + return; } - return ParseError::create(String::formatted("Unexpected child element with tag {}", element.name), child); + if (tree_type == TreeType::AlgorithmStep && element.name == tag_ol) { + substeps = child; + return; + } + + report_error("<{}> should not be a child of algorithm step", element.name); }, - [&](XML::Node::Text const& text) -> ParseErrorOr { + [&](XML::Node::Text const& text) { auto view = text.builder.string_view(); - if (result.substeps && !contains_empty_text(child)) - return ParseError::create("Substeps list must be the last non-empty child"sv, child); - return tokenize_string(ctx, child, view, tokens); + if (substeps != nullptr && !contains_empty_text(child)) { + ctx.diag().error(ctx.location_from_xml_offset(child->offset), + "substeps list must be the last child of algorithm step"); + } else { + tokenize_string(ctx, child, view, tokens); + } }, - move(ignore_comments))); + [&](auto const&) {}); } if (tokens.size() && tokens.last().type == TokenType::MemberAccess) tokens.last().type = TokenType::Dot; +} +} - return result; +StepTokenizationResult tokenize_step(SpecificationParsingContext& ctx, XML::Node const* node) +{ + TokenizerState state; + tokenize_tree(ctx, state, node, TreeType::AlgorithmStep); + return { + .tokens = state.has_errors ? OptionalNone {} : Optional> { move(state.tokens) }, + .substeps = state.substeps, + }; +} + +Optional> tokenize_header(SpecificationParsingContext& ctx, XML::Node const* node) +{ + TokenizerState state; + tokenize_tree(ctx, state, node, TreeType::Header); + return state.has_errors ? OptionalNone {} : Optional> { state.tokens }; } } diff --git a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.h b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.h index 3756309a397..b41e3158d22 100644 --- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.h +++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.h @@ -31,13 +31,12 @@ inline constexpr StringView attribute_id = "id"sv; inline constexpr StringView class_secnum = "secnum"sv; -ParseErrorOr tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector& tokens); - -struct TokenizeTreeResult { - Vector tokens; +struct StepTokenizationResult { + Optional> tokens; XML::Node const* substeps = nullptr; }; -ParseErrorOr tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps = false); +StepTokenizationResult tokenize_step(SpecificationParsingContext& ctx, XML::Node const* node); +Optional> tokenize_header(SpecificationParsingContext& ctx, XML::Node const* node); } diff --git a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.cpp b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.cpp index 9bad683b839..8070ed6c83b 100644 --- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.cpp @@ -64,16 +64,9 @@ Optional AlgorithmStep::create(SpecificationParsingContext& ctx, { VERIFY(element->as_element().name == tag_li); - auto tokenization_result = tokenize_tree(ctx, element, true); - if (tokenization_result.is_error()) { - ctx.diag().error(ctx.location_from_xml_offset(tokenization_result.error()->offset()), - "{}", tokenization_result.error()->to_string()); - return {}; - } + auto [maybe_tokens, substeps] = tokenize_step(ctx, element); - auto [tokens, substeps] = tokenization_result.release_value(); AlgorithmStep result(ctx); - result.m_tokens = move(tokens); result.m_node = element; if (substeps) { @@ -86,6 +79,10 @@ Optional AlgorithmStep::create(SpecificationParsingContext& ctx, result.m_substeps = step_list.has_value() ? step_list->tree() : error_tree; } + if (!maybe_tokens.has_value()) + return {}; + result.m_tokens = maybe_tokens.release_value(); + if (!result.parse()) return {}; return result; @@ -260,14 +257,11 @@ Optional SpecificationClause::parse_header(XML::Node auto& ctx = *m_ctx_pointer; VERIFY(element->as_element().name == tag_h1); - auto tokenization_result = tokenize_tree(ctx, element, false); - if (tokenization_result.is_error()) { - return FailedTextParseDiagnostic { - ctx.location_from_xml_offset(tokenization_result.error()->offset()), - tokenization_result.error()->to_string() - }; - } - auto const& tokens = tokenization_result.release_value().tokens; + auto maybe_tokens = tokenize_header(ctx, element); + if (!maybe_tokens.has_value()) + return {}; + + auto const& tokens = maybe_tokens.release_value(); TextParser parser(ctx, tokens, element); auto parse_result = parser.parse_clause_header(); @@ -289,6 +283,7 @@ void SpecificationClause::parse(XML::Node const* element) auto& ctx = context(); u32 child_index = 0; + bool node_ignored_warning_issued = false; Optional header_parse_error; for (auto const& child : element->as_element().children) { @@ -312,10 +307,12 @@ void SpecificationClause::parse(XML::Node const* element) m_subclauses.append(create(ctx, child)); return; } - if (header_parse_error.has_value()) { + if (!node_ignored_warning_issued && m_header.header.has()) { + node_ignored_warning_issued = true; ctx.diag().warn(ctx.location_from_xml_offset(child->offset), "node content will be ignored since section header was not parsed successfully"); - ctx.diag().note(header_parse_error->location, "{}", header_parse_error->message); + if (header_parse_error.has_value()) + ctx.diag().note(header_parse_error->location, "{}", header_parse_error->message); } } ++child_index; diff --git a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/XMLUtils.cpp b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/XMLUtils.cpp index 283eb14b2e5..5d45e3fb307 100644 --- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/XMLUtils.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/XMLUtils.cpp @@ -16,15 +16,6 @@ bool contains_empty_text(XML::Node const* node) return node->as_text().builder.string_view().trim_whitespace().is_empty(); } -ParseErrorOr deprecated_get_attribute_by_name(XML::Node const* node, StringView attribute_name) -{ - auto const& attribute = node->as_element().attributes.get(attribute_name); - - if (!attribute.has_value()) - return ParseError::create(String::formatted("Attribute {} is not present", attribute_name), node); - return attribute.value(); -} - Optional get_attribute_by_name(XML::Node const* node, StringView attribute_name) { auto const& attribute = node->as_element().attributes.get(attribute_name); @@ -34,39 +25,34 @@ Optional get_attribute_by_name(XML::Node const* node, StringView att return attribute.value(); } -ParseErrorOr get_text_contents(XML::Node const* node) +Optional get_text_contents(XML::Node const* node) { auto const& children = node->as_element().children; - if (children.size() != 1 || !children[0]->is_text()) - return ParseError::create("Expected single text node in a child list of the node"sv, node); + return {}; return children[0]->as_text().builder.string_view(); } -ParseErrorOr get_only_child(XML::Node const* element, StringView tag_name) +Optional get_single_child_with_tag(XML::Node const* element, StringView tag_name) { XML::Node const* result = nullptr; for (auto const& child : element->as_element().children) { - TRY(child->content.visit( - [&](XML::Node::Element const& element) -> ParseErrorOr { - if (element.name != tag_name) - return ParseError::create(String::formatted("Expected child with the tag name {} but found {}", tag_name, element.name), child); - if (result != nullptr) - return ParseError::create("Element must have only one child"sv, child); + auto is_valid = child->content.visit( + [&](XML::Node::Element const& element) { result = child; - return {}; + return result != nullptr || element.name != tag_name; }, - [&](XML::Node::Text const&) -> ParseErrorOr { - if (!contains_empty_text(child)) - return ParseError::create("Element should not have non-empty child text nodes"sv, element); - return {}; + [&](XML::Node::Text const&) { + return contains_empty_text(child); }, - move(ignore_comments))); + [&](auto const&) { return true; }); + if (!is_valid) + return {}; } if (result == nullptr) - return ParseError::create(String::formatted("Element must have only one child"), element); + return {}; return result; } diff --git a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/XMLUtils.h b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/XMLUtils.h index bad64a7cc8a..b0dd2659894 100644 --- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/XMLUtils.h +++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/XMLUtils.h @@ -20,11 +20,10 @@ inline constexpr IgnoreComments ignore_comments {}; bool contains_empty_text(XML::Node const* node); -ParseErrorOr deprecated_get_attribute_by_name(XML::Node const* node, StringView attribute_name); Optional get_attribute_by_name(XML::Node const* node, StringView attribute_name); -ParseErrorOr get_text_contents(XML::Node const* node); +Optional get_text_contents(XML::Node const* node); -ParseErrorOr get_only_child(XML::Node const* element, StringView tag_name); +Optional get_single_child_with_tag(XML::Node const* element, StringView tag_name); }