From 28d9d3a2c7c1ebb4bc7d12143fdcf555bb8cc86d Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Tue, 5 Aug 2025 13:22:17 -0400 Subject: [PATCH] AK+Libraries: Reduce API surface of GenericLexer a bit * Remove completely unused methods. * Deduplicate methods that were overloaded with both StringView and char const* parameters. A future commit will templatize GenericLexer by char type. This patch serves to make that a tiny bit easier. --- AK/GenericLexer.cpp | 23 ----------------- AK/GenericLexer.h | 41 ++++++------------------------ Libraries/LibCrypto/ASN1/PEM.cpp | 8 +++--- Libraries/LibIDL/IDLParser.cpp | 38 +++++++++++++-------------- Libraries/LibJS/Token.cpp | 2 +- Libraries/LibLine/Editor.cpp | 10 ++++---- Libraries/LibWeb/HTML/Dates.cpp | 2 +- Libraries/LibXML/Parser/Parser.cpp | 5 ++-- Tests/AK/TestGenericLexer.cpp | 11 -------- 9 files changed, 41 insertions(+), 99 deletions(-) diff --git a/AK/GenericLexer.cpp b/AK/GenericLexer.cpp index 1a2219f4599..05f81f889a1 100644 --- a/AK/GenericLexer.cpp +++ b/AK/GenericLexer.cpp @@ -57,17 +57,6 @@ StringView GenericLexer::consume_until(char stop) return m_input.substring_view(start, length); } -// Consume and return characters until the string `stop` is found -StringView GenericLexer::consume_until(char const* stop) -{ - size_t start = m_index; - while (!is_eof() && !next_is(stop)) - m_index++; - size_t length = m_index - start; - - return m_input.substring_view(start, length); -} - // Consume and return characters until the string `stop` is found StringView GenericLexer::consume_until(StringView stop) { @@ -199,18 +188,6 @@ template ErrorOr GenericLexer::consume_decimal_integer(); template ErrorOr GenericLexer::consume_decimal_integer(); template ErrorOr GenericLexer::consume_decimal_integer(); -Optional GenericLexer::consume_and_unescape_string(char escape_char) -{ - auto view = consume_quoted_string(escape_char); - if (view.is_null()) - return {}; - - StringBuilder builder; - for (size_t i = 0; i < view.length(); ++i) - builder.append(consume_escaped_character(escape_char)); - return builder.to_byte_string(); -} - auto GenericLexer::consume_escaped_code_point(bool combine_surrogate_pairs) -> Result { if (!consume_specific("\\u"sv)) diff --git a/AK/GenericLexer.h b/AK/GenericLexer.h index c4148daa1df..bf7b891e502 100644 --- a/AK/GenericLexer.h +++ b/AK/GenericLexer.h @@ -9,7 +9,6 @@ #include #include #include -#include #include namespace AK { @@ -54,14 +53,6 @@ public: return true; } - constexpr bool next_is(char const* expected) const - { - for (size_t i = 0; expected[i] != '\0'; ++i) - if (peek(i) != expected[i]) - return false; - return true; - } - constexpr void retreat() { VERIFY(m_index > 0); @@ -80,30 +71,22 @@ public: return m_input[m_index++]; } - template - constexpr bool consume_specific(T const& next) + constexpr bool consume_specific(StringView next) { if (!next_is(next)) return false; - if constexpr (requires { next.length(); }) { - ignore(next.length()); - } else { - ignore(sizeof(next)); - } + ignore(next.length()); return true; } - bool consume_specific(ByteString next) = delete; - - bool consume_specific(String const& next) + constexpr bool consume_specific(char next) { - return consume_specific(next.bytes_as_string_view()); - } + if (!next_is(next)) + return false; - constexpr bool consume_specific(char const* next) - { - return consume_specific(StringView { next, __builtin_strlen(next) }); + ignore(sizeof(next)); + return true; } constexpr char consume_escaped_character(char escape_char = '\\', StringView escape_map = "n\nr\rt\tb\bf\f"sv) @@ -125,10 +108,9 @@ public: StringView consume_all(); StringView consume_line(); StringView consume_until(char); - StringView consume_until(char const*); StringView consume_until(StringView); StringView consume_quoted_string(char escape_char = 0); - Optional consume_and_unescape_string(char escape_char = '\\'); + template ErrorOr consume_decimal_integer(); @@ -152,13 +134,6 @@ public: } } - constexpr void ignore_until(char const* stop) - { - while (!is_eof() && !next_is(stop)) { - ++m_index; - } - } - /* * Conditions are used to match arbitrary characters. You can use lambdas, * ctype functions, or is_any_of() and its derivatives (see below). diff --git a/Libraries/LibCrypto/ASN1/PEM.cpp b/Libraries/LibCrypto/ASN1/PEM.cpp index a9f782d49dd..08e920eb27b 100644 --- a/Libraries/LibCrypto/ASN1/PEM.cpp +++ b/Libraries/LibCrypto/ASN1/PEM.cpp @@ -42,7 +42,7 @@ DecodedPEM decode_pem(ReadonlyBytes data) case PreStartData: if (lexer.consume_specific("-----BEGIN "sv)) { state = Started; - header_type = lexer.consume_until("-----"); + header_type = lexer.consume_until("-----"sv); } lexer.consume_line(); break; @@ -50,7 +50,7 @@ DecodedPEM decode_pem(ReadonlyBytes data) if (lexer.consume_specific("-----END "sv)) { state = Ended; - if (lexer.consume_until("-----") != header_type) { + if (lexer.consume_until("-----"sv) != header_type) { dbgln("PEM type mismatch"); return {}; } @@ -98,7 +98,7 @@ ErrorOr> decode_pems(ReadonlyBytes data) case Junk: if (lexer.consume_specific("-----BEGIN "sv)) { state = Parsing; - header_type = lexer.consume_until("-----"); + header_type = lexer.consume_until("-----"sv); } lexer.consume_line(); break; @@ -106,7 +106,7 @@ ErrorOr> decode_pems(ReadonlyBytes data) if (lexer.consume_specific("-----END "sv)) { state = Junk; - if (lexer.consume_until("-----") != header_type) { + if (lexer.consume_until("-----"sv) != header_type) { return Error::from_string_literal("PEM type mismatch"); } lexer.consume_line(); diff --git a/Libraries/LibIDL/IDLParser.cpp b/Libraries/LibIDL/IDLParser.cpp index 8cda82eeebf..0b8af808210 100644 --- a/Libraries/LibIDL/IDLParser.cpp +++ b/Libraries/LibIDL/IDLParser.cpp @@ -732,63 +732,63 @@ void Parser::parse_interface(Interface& interface) interface.has_unscopable_member = true; } - if (lexer.next_is("async")) { + if (lexer.next_is("async"sv)) { parse_async_iterable(interface); continue; } - if (lexer.next_is("constructor")) { + if (lexer.next_is("constructor"sv)) { parse_constructor(extended_attributes, interface); continue; } - if (lexer.next_is("const")) { + if (lexer.next_is("const"sv)) { parse_constant(interface); continue; } - if (lexer.next_is("stringifier")) { + if (lexer.next_is("stringifier"sv)) { parse_stringifier(extended_attributes, interface); continue; } - if (lexer.next_is("iterable")) { + if (lexer.next_is("iterable"sv)) { parse_iterable(interface); continue; } - if (lexer.next_is("setlike")) { + if (lexer.next_is("setlike"sv)) { bool is_readonly = false; parse_setlike(interface, is_readonly); continue; } - if (lexer.next_is("inherit") || lexer.next_is("readonly") || lexer.next_is("attribute")) { + if (lexer.next_is("inherit"sv) || lexer.next_is("readonly"sv) || lexer.next_is("attribute"sv)) { parse_attribute(extended_attributes, interface); continue; } - if (lexer.next_is("getter")) { + if (lexer.next_is("getter"sv)) { parse_getter(extended_attributes, interface); continue; } - if (lexer.next_is("setter")) { + if (lexer.next_is("setter"sv)) { parse_setter(extended_attributes, interface); continue; } - if (lexer.next_is("deleter")) { + if (lexer.next_is("deleter"sv)) { parse_deleter(extended_attributes, interface); continue; } - bool is_static = lexer.consume_specific("static"); + bool is_static = lexer.consume_specific("static"sv); if (!is_static) { parse_function(extended_attributes, interface, IsStatic::No); } else { consume_whitespace(); - if (lexer.next_is("readonly") || lexer.next_is("attribute")) { + if (lexer.next_is("readonly"sv) || lexer.next_is("attribute"sv)) { parse_attribute(extended_attributes, interface, IsStatic::Yes); } else { parse_function(extended_attributes, interface, IsStatic::Yes); @@ -922,7 +922,7 @@ void Parser::parse_typedef(Interface& interface) void Parser::parse_dictionary(HashMap extended_attributes, Interface& interface) { bool partial = false; - if (lexer.next_is("partial")) { + if (lexer.next_is("partial"sv)) { assert_string("partial"sv); consume_whitespace(); partial = true; @@ -1061,19 +1061,19 @@ void Parser::parse_non_interface_entities(bool allow_interface, Interface& inter HashMap extended_attributes; if (lexer.consume_specific('[')) extended_attributes = parse_extended_attributes(); - if (lexer.next_is("dictionary") || lexer.next_is("partial dictionary")) { + if (lexer.next_is("dictionary"sv) || lexer.next_is("partial dictionary"sv)) { parse_dictionary(extended_attributes, interface); - } else if (lexer.next_is("enum")) { + } else if (lexer.next_is("enum"sv)) { parse_enumeration(extended_attributes, interface); - } else if (lexer.next_is("typedef")) { + } else if (lexer.next_is("typedef"sv)) { parse_typedef(interface); } else if (lexer.next_is("partial interface"sv)) { parse_partial_interface(extended_attributes, interface); - } else if (lexer.next_is("interface mixin")) { + } else if (lexer.next_is("interface mixin"sv)) { parse_interface_mixin(interface); - } else if (lexer.next_is("callback")) { + } else if (lexer.next_is("callback"sv)) { parse_callback_function(extended_attributes, interface); - } else if ((allow_interface && !lexer.next_is("interface") && !lexer.next_is("namespace")) || !allow_interface) { + } else if ((allow_interface && !lexer.next_is("interface"sv) && !lexer.next_is("namespace"sv)) || !allow_interface) { auto current_offset = lexer.tell(); auto name = parse_identifier_ending_with_space(); consume_whitespace(); diff --git a/Libraries/LibJS/Token.cpp b/Libraries/LibJS/Token.cpp index 8b5763946c9..c037843bb59 100644 --- a/Libraries/LibJS/Token.cpp +++ b/Libraries/LibJS/Token.cpp @@ -139,7 +139,7 @@ ByteString Token::string_value(StringValueStatus& status) const // Line continuation if (lexer.next_is('\n') || lexer.next_is('\r')) { - if (lexer.next_is("\r\n")) + if (lexer.next_is("\r\n"sv)) lexer.ignore(); lexer.ignore(); continue; diff --git a/Libraries/LibLine/Editor.cpp b/Libraries/LibLine/Editor.cpp index 500a8010169..d6cec44ba19 100644 --- a/Libraries/LibLine/Editor.cpp +++ b/Libraries/LibLine/Editor.cpp @@ -84,23 +84,23 @@ Configuration Configuration::from_config(StringView libname) key = key_lexer.consume_escaped_character(); escape = false; } else { - if (key_lexer.next_is("alt+")) { + if (key_lexer.next_is("alt+"sv)) { alt = key_lexer.consume_specific("alt+"sv); continue; } - if (key_lexer.next_is("^[")) { + if (key_lexer.next_is("^["sv)) { alt = key_lexer.consume_specific("^["sv); continue; } - if (key_lexer.next_is("^")) { + if (key_lexer.next_is("^"sv)) { has_ctrl = key_lexer.consume_specific("^"sv); continue; } - if (key_lexer.next_is("ctrl+")) { + if (key_lexer.next_is("ctrl+"sv)) { has_ctrl = key_lexer.consume_specific("ctrl+"sv); continue; } - if (key_lexer.next_is("\\")) { + if (key_lexer.next_is("\\"sv)) { escape = true; continue; } diff --git a/Libraries/LibWeb/HTML/Dates.cpp b/Libraries/LibWeb/HTML/Dates.cpp index 575a712fa18..a9dcbdc9db4 100644 --- a/Libraries/LibWeb/HTML/Dates.cpp +++ b/Libraries/LibWeb/HTML/Dates.cpp @@ -508,7 +508,7 @@ Optional parse_a_local_date_and_time_string(StringView input_view) return {}; // 4. If position is beyond the end of input or if the character at position is neither a U+0054 LATIN CAPITAL // LETTER T character (T) nor a U+0020 SPACE character, then fail. Otherwise, move position forwards one character. - if (!input.consume_specific("T") && !input.consume_specific(" ")) + if (!input.consume_specific('T') && !input.consume_specific(' ')) return {}; // 5. Parse a time component to obtain hour, minute, and second. If this returns nothing, then fail. auto hour_minute_second = parse_a_time_component(input); diff --git a/Libraries/LibXML/Parser/Parser.cpp b/Libraries/LibXML/Parser/Parser.cpp index 9106f4d37da..d79be4ed728 100644 --- a/Libraries/LibXML/Parser/Parser.cpp +++ b/Libraries/LibXML/Parser/Parser.cpp @@ -5,6 +5,7 @@ */ #include +#include #include #include @@ -523,7 +524,7 @@ ErrorOr Parser::parse_processing_instruction() auto target = TRY(parse_processing_instruction_target()); ByteString data; if (auto result = skip_whitespace(Required::Yes); !result.is_error()) - data = m_lexer.consume_until("?>"); + data = m_lexer.consume_until("?>"sv); TRY(expect("?>"sv)); append_processing_instruction(target, data); @@ -1714,7 +1715,7 @@ ErrorOr Parser::parse_cdata_section() auto accept = accept_rule(); auto section_start = m_lexer.tell(); - while (!m_lexer.next_is("]]>")) { + while (!m_lexer.next_is("]]>"sv)) { if (m_lexer.is_eof()) break; m_lexer.ignore(); diff --git a/Tests/AK/TestGenericLexer.cpp b/Tests/AK/TestGenericLexer.cpp index 3dd04299f29..cfb443cd70a 100644 --- a/Tests/AK/TestGenericLexer.cpp +++ b/Tests/AK/TestGenericLexer.cpp @@ -45,7 +45,6 @@ TEST_CASE(should_constexpr_next_is) { constexpr GenericLexer sut("abcdef"sv); static_assert(sut.next_is('a')); - static_assert(sut.next_is("abc")); static_assert(sut.next_is("abc"sv)); } @@ -124,16 +123,6 @@ TEST_CASE(should_constexpr_ignore_until) static_assert(sut.peek() == 'd'); } -TEST_CASE(should_constexpr_ignore_until_cstring) -{ - constexpr auto sut = [] { - GenericLexer sut("abcdef"sv); - sut.ignore_until("cde"); - return sut; - }(); - static_assert(sut.peek() == 'c'); -} - TEST_CASE(should_constexpr_next_is_pred) { constexpr auto pred = [](auto c) {