LibXML: Avoid ByteString::formatted() on static "expected" errors

This is a fairly significant (~8%) speedup when parsing references.
This commit is contained in:
Ali Mohammad Pur 2024-10-10 09:59:19 +02:00 committed by Tim Ledbetter
commit 70e769a18a
Notes: github-actions[bot] 2024-10-10 22:54:52 +00:00
2 changed files with 28 additions and 17 deletions

View file

@ -195,7 +195,7 @@ ErrorOr<void, ParseError> Parser::skip_whitespace(Required required)
// S ::= (#x20 | #x9 | #xD | #xA)+ // S ::= (#x20 | #x9 | #xD | #xA)+
auto matched = m_lexer.consume_while(is_any_of("\x20\x09\x0d\x0a"sv)); auto matched = m_lexer.consume_while(is_any_of("\x20\x09\x0d\x0a"sv));
if (required == Required::Yes && matched.is_empty()) if (required == Required::Yes && matched.is_empty())
return parse_error(m_lexer.current_position(), "Expected whitespace"); return parse_error(m_lexer.current_position(), Expectation { "whitespace"sv });
rollback.disarm(); rollback.disarm();
return {}; return {};
@ -225,7 +225,7 @@ ErrorOr<void, ParseError> Parser::parse_internal()
} }
if (!m_lexer.is_eof()) if (!m_lexer.is_eof())
return parse_error(m_lexer.current_position(), "Garbage after document"); return parse_error(m_lexer.current_position(), ByteString { "Garbage after document"sv });
return {}; return {};
} }
@ -250,7 +250,7 @@ requires(IsCallableWithArguments<Pred, bool, char>) ErrorOr<StringView, ParseErr
auto start = m_lexer.tell(); auto start = m_lexer.tell();
if (!m_lexer.next_is(predicate)) { if (!m_lexer.next_is(predicate)) {
if (m_options.treat_errors_as_fatal) if (m_options.treat_errors_as_fatal)
return parse_error(m_lexer.current_position(), ByteString::formatted("Expected {}", description)); return parse_error(m_lexer.current_position(), Expectation { description });
} }
m_lexer.ignore(); m_lexer.ignore();
@ -271,7 +271,7 @@ requires(IsCallableWithArguments<Pred, bool, char>) ErrorOr<StringView, ParseErr
if (m_lexer.tell() == start) { if (m_lexer.tell() == start) {
if (m_options.treat_errors_as_fatal) { if (m_options.treat_errors_as_fatal) {
return parse_error(m_lexer.current_position(), ByteString::formatted("Expected {}", description)); return parse_error(m_lexer.current_position(), Expectation { description });
} }
} }
@ -415,7 +415,7 @@ ErrorOr<void, ParseError> Parser::parse_standalone_document_decl()
auto value = m_lexer.consume_quoted_string(); auto value = m_lexer.consume_quoted_string();
if (!value.is_one_of("yes", "no")) if (!value.is_one_of("yes", "no"))
return parse_error(m_lexer.position_for(m_lexer.tell() - value.length()), "Expected one of 'yes' or 'no'"); return parse_error(m_lexer.position_for(m_lexer.tell() - value.length()), Expectation { "one of 'yes' or 'no'"sv });
m_standalone = value == "yes"; m_standalone = value == "yes";
@ -445,7 +445,7 @@ ErrorOr<void, ParseError> Parser::parse_misc()
return {}; return {};
} }
return parse_error(m_lexer.current_position(), "Expected a match for 'Misc', but found none"); return parse_error(m_lexer.current_position(), Expectation { "a match for 'Misc'"sv });
} }
// 2.5.15 Comment, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Comment // 2.5.15 Comment, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Comment
@ -522,7 +522,7 @@ ErrorOr<Name, ParseError> Parser::parse_processing_instruction_target()
if (target.equals_ignoring_ascii_case("xml"sv) && m_options.treat_errors_as_fatal) { if (target.equals_ignoring_ascii_case("xml"sv) && m_options.treat_errors_as_fatal) {
return parse_error( return parse_error(
m_lexer.position_for(m_lexer.tell() - target.length()), m_lexer.position_for(m_lexer.tell() - target.length()),
"Use of the reserved 'xml' name for processing instruction target name is disallowed"); ByteString { "Use of the reserved 'xml' name for processing instruction target name is disallowed"sv });
} }
rollback.disarm(); rollback.disarm();
@ -634,7 +634,7 @@ ErrorOr<void, ParseError> Parser::parse_element()
// Well-formedness constraint: The Name in an element's end-tag MUST match the element type in the start-tag. // Well-formedness constraint: The Name in an element's end-tag MUST match the element type in the start-tag.
if (m_options.treat_errors_as_fatal && closing_name != tag.name) if (m_options.treat_errors_as_fatal && closing_name != tag.name)
return parse_error(m_lexer.position_for(tag_location), "Invalid closing tag"); return parse_error(m_lexer.position_for(tag_location), ByteString { "Invalid closing tag"sv });
rollback.disarm(); rollback.disarm();
return {}; return {};
@ -720,7 +720,7 @@ ErrorOr<ByteString, ParseError> Parser::parse_attribute_value_inner(StringView d
if (m_lexer.next_is('<')) { if (m_lexer.next_is('<')) {
// Not allowed, return a nice error to make it easier to debug. // Not allowed, return a nice error to make it easier to debug.
return parse_error(m_lexer.current_position(), "Unescaped '<' not allowed in attribute values"); return parse_error(m_lexer.current_position(), ByteString { "Unescaped '<' not allowed in attribute values"sv });
} }
if (m_lexer.next_is('&')) { if (m_lexer.next_is('&')) {
@ -774,7 +774,7 @@ ErrorOr<Variant<Parser::EntityReference, ByteString>, ParseError> Parser::parse_
} }
if (!code_point.has_value() || !s_characters.contains(*code_point)) if (!code_point.has_value() || !s_characters.contains(*code_point))
return parse_error(m_lexer.position_for(reference_start), "Invalid character reference"); return parse_error(m_lexer.position_for(reference_start), ByteString { "Invalid character reference"sv });
TRY(expect(";"sv)); TRY(expect(";"sv));
@ -995,7 +995,7 @@ ErrorOr<Optional<MarkupDeclaration>, ParseError> Parser::parse_markup_declaratio
return Optional<MarkupDeclaration> {}; return Optional<MarkupDeclaration> {};
} }
return parse_error(m_lexer.current_position(), "Expected one of elementdecl, attlistdecl, entitydecl, notationdecl, PI or comment"); return parse_error(m_lexer.current_position(), Expectation { "one of elementdecl, attlistdecl, entitydecl, notationdecl, PI or comment"sv });
} }
// 2.8.28a DeclSep, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-DeclSep // 2.8.28a DeclSep, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-DeclSep
@ -1016,7 +1016,7 @@ ErrorOr<Optional<ByteString>, ParseError> Parser::parse_declaration_separator()
return Optional<ByteString> {}; return Optional<ByteString> {};
} }
return parse_error(m_lexer.current_position(), "Expected either whitespace, or a PEReference"); return parse_error(m_lexer.current_position(), Expectation { "either whitespace, or a PEReference"sv });
} }
// 4.1.69 PEReference, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PEReference // 4.1.69 PEReference, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PEReference
@ -1269,7 +1269,7 @@ ErrorOr<ElementDeclaration::ContentSpec, ParseError> Parser::parse_content_spec(
if (auto result = parse_name(); !result.is_error()) if (auto result = parse_name(); !result.is_error())
names.set(result.release_value()); names.set(result.release_value());
else else
return parse_error(m_lexer.current_position(), "Expected a Name"); return parse_error(m_lexer.current_position(), Expectation { "a Name"sv });
} }
TRY(skip_whitespace()); TRY(skip_whitespace());
TRY(expect(")*"sv)); TRY(expect(")*"sv));
@ -1331,7 +1331,7 @@ ErrorOr<ElementDeclaration::ContentSpec, ParseError> Parser::parse_content_spec(
TRY(expect(")"sv)); TRY(expect(")"sv));
if (choices.size() < 2) if (choices.size() < 2)
return parse_error(m_lexer.current_position(), "Expected more than one choice"); return parse_error(m_lexer.current_position(), Expectation { "more than one choice"sv });
TRY(skip_whitespace()); TRY(skip_whitespace());
auto qualifier = parse_qualifier(); auto qualifier = parse_qualifier();

View file

@ -21,9 +21,13 @@
namespace XML { namespace XML {
struct Expectation {
StringView expected;
};
struct ParseError { struct ParseError {
LineTrackingLexer::Position position {}; LineTrackingLexer::Position position {};
ByteString error; Variant<ByteString, Expectation> error;
}; };
struct Listener { struct Listener {
@ -183,9 +187,13 @@ private:
auto rule_name = m_current_rule.rule.value_or("<?>"); auto rule_name = m_current_rule.rule.value_or("<?>");
if (rule_name.starts_with("parse_"sv)) if (rule_name.starts_with("parse_"sv))
rule_name = rule_name.substring_view(6); rule_name = rule_name.substring_view(6);
auto error_string = error.error.visit(
[](ByteString const& error) -> ByteString { return error; },
[](XML::Expectation const& expectation) -> ByteString { return ByteString::formatted("Expected {}", expectation.expected); });
m_parse_errors.append({ m_parse_errors.append({
error.position, error.position,
ByteString::formatted("{}: {}", rule_name, error.error), ByteString::formatted("{}: {}", rule_name, error_string),
}); });
} }
return error; return error;
@ -218,6 +226,9 @@ template<>
struct AK::Formatter<XML::ParseError> : public AK::Formatter<FormatString> { struct AK::Formatter<XML::ParseError> : public AK::Formatter<FormatString> {
ErrorOr<void> format(FormatBuilder& builder, XML::ParseError const& error) ErrorOr<void> format(FormatBuilder& builder, XML::ParseError const& error)
{ {
return Formatter<FormatString>::format(builder, "{} at line: {}, col: {} (offset {})"sv, error.error, error.position.line, error.position.column, error.position.offset); auto error_string = error.error.visit(
[](ByteString const& error) -> ByteString { return error; },
[](XML::Expectation const& expectation) -> ByteString { return ByteString::formatted("Expected {}", expectation.expected); });
return Formatter<FormatString>::format(builder, "{} at line: {}, col: {} (offset {})"sv, error_string, error.position.line, error.position.column, error.position.offset);
} }
}; };