AK+Libraries: Reduce API surface of GenericLexer a bit

* Remove completely unused methods. * Deduplicate methods that were overloaded with both StringView and char const* parameters. A future commit will templatize GenericLexer by char type. This patch serves to make that a tiny bit easier.
Author: https://github.com/trflynn89 Commit: 28d9d3a2c7 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5762
2025-10-17 21:49:42 +00:00 · 2025-08-05 13:22:17 -04:00 · 2025-08-05 13:22:17 -04:00 · 28d9d3a2c7 · 2025-08-13 13:58:03 +00:00
commit 28d9d3a2c7
parent 213683956c
9 changed files with 41 additions and 99 deletions
--- a/AK/GenericLexer.cpp
+++ b/AK/GenericLexer.cpp
@ -57,17 +57,6 @@ StringView GenericLexer::consume_until(char stop)
    return m_input.substring_view(start, length);
 }

-// Consume and return characters until the string `stop` is found
-StringView GenericLexer::consume_until(char const* stop)
-{
-    size_t start = m_index;
-    while (!is_eof() && !next_is(stop))
-        m_index++;
-    size_t length = m_index - start;
-
-    return m_input.substring_view(start, length);
-}
-
 // Consume and return characters until the string `stop` is found
 StringView GenericLexer::consume_until(StringView stop)
 {
@ -199,18 +188,6 @@ template ErrorOr<i32> GenericLexer::consume_decimal_integer<i32>();
 template ErrorOr<u64> GenericLexer::consume_decimal_integer<u64>();
 template ErrorOr<i64> GenericLexer::consume_decimal_integer<i64>();

-Optional<ByteString> GenericLexer::consume_and_unescape_string(char escape_char)
-{
-    auto view = consume_quoted_string(escape_char);
-    if (view.is_null())
-        return {};
-
-    StringBuilder builder;
-    for (size_t i = 0; i < view.length(); ++i)
-        builder.append(consume_escaped_character(escape_char));
-    return builder.to_byte_string();
-}
-
 auto GenericLexer::consume_escaped_code_point(bool combine_surrogate_pairs) -> Result<u32, UnicodeEscapeError>
 {
    if (!consume_specific("\\u"sv))
--- a/AK/GenericLexer.h
+++ b/AK/GenericLexer.h
@ -9,7 +9,6 @@
 #include <AK/NonnullOwnPtr.h>
 #include <AK/RedBlackTree.h>
 #include <AK/Result.h>
-#include <AK/String.h>
 #include <AK/StringView.h>

 namespace AK {
@ -54,14 +53,6 @@ public:
        return true;
    }

-    constexpr bool next_is(char const* expected) const
-    {
-        for (size_t i = 0; expected[i] != '\0'; ++i)
-            if (peek(i) != expected[i])
-                return false;
-        return true;
-    }
-
    constexpr void retreat()
    {
        VERIFY(m_index > 0);
@ -80,30 +71,22 @@ public:
        return m_input[m_index++];
    }

-    template<typename T>
-    constexpr bool consume_specific(T const& next)
+    constexpr bool consume_specific(StringView next)
    {
        if (!next_is(next))
            return false;

-        if constexpr (requires { next.length(); }) {
        ignore(next.length());
-        } else {
-            ignore(sizeof(next));
-        }
        return true;
    }

-    bool consume_specific(ByteString next) = delete;
-
-    bool consume_specific(String const& next)
+    constexpr bool consume_specific(char next)
    {
-        return consume_specific(next.bytes_as_string_view());
-    }
+        if (!next_is(next))
+            return false;

-    constexpr bool consume_specific(char const* next)
-    {
-        return consume_specific(StringView { next, __builtin_strlen(next) });
+        ignore(sizeof(next));
+        return true;
    }

    constexpr char consume_escaped_character(char escape_char = '\\', StringView escape_map = "n\nr\rt\tb\bf\f"sv)
@ -125,10 +108,9 @@ public:
    StringView consume_all();
    StringView consume_line();
    StringView consume_until(char);
-    StringView consume_until(char const*);
    StringView consume_until(StringView);
    StringView consume_quoted_string(char escape_char = 0);
-    Optional<ByteString> consume_and_unescape_string(char escape_char = '\\');
+
    template<Integral T>
    ErrorOr<T> consume_decimal_integer();

@ -152,13 +134,6 @@ public:
        }
    }

-    constexpr void ignore_until(char const* stop)
-    {
-        while (!is_eof() && !next_is(stop)) {
-            ++m_index;
-        }
-    }
-
    /*
     * Conditions are used to match arbitrary characters. You can use lambdas,
     * ctype functions, or is_any_of() and its derivatives (see below).
--- a/Libraries/LibCrypto/ASN1/PEM.cpp
+++ b/Libraries/LibCrypto/ASN1/PEM.cpp
@ -42,7 +42,7 @@ DecodedPEM decode_pem(ReadonlyBytes data)
        case PreStartData:
            if (lexer.consume_specific("-----BEGIN "sv)) {
                state = Started;
-                header_type = lexer.consume_until("-----");
+                header_type = lexer.consume_until("-----"sv);
            }
            lexer.consume_line();
            break;
@ -50,7 +50,7 @@ DecodedPEM decode_pem(ReadonlyBytes data)
            if (lexer.consume_specific("-----END "sv)) {
                state = Ended;

-                if (lexer.consume_until("-----") != header_type) {
+                if (lexer.consume_until("-----"sv) != header_type) {
                    dbgln("PEM type mismatch");
                    return {};
                }
@ -98,7 +98,7 @@ ErrorOr<Vector<DecodedPEM>> decode_pems(ReadonlyBytes data)
        case Junk:
            if (lexer.consume_specific("-----BEGIN "sv)) {
                state = Parsing;
-                header_type = lexer.consume_until("-----");
+                header_type = lexer.consume_until("-----"sv);
            }
            lexer.consume_line();
            break;
@ -106,7 +106,7 @@ ErrorOr<Vector<DecodedPEM>> decode_pems(ReadonlyBytes data)
            if (lexer.consume_specific("-----END "sv)) {
                state = Junk;

-                if (lexer.consume_until("-----") != header_type) {
+                if (lexer.consume_until("-----"sv) != header_type) {
                    return Error::from_string_literal("PEM type mismatch");
                }
                lexer.consume_line();
--- a/Libraries/LibIDL/IDLParser.cpp
+++ b/Libraries/LibIDL/IDLParser.cpp
@ -732,63 +732,63 @@ void Parser::parse_interface(Interface& interface)
                interface.has_unscopable_member = true;
        }

-        if (lexer.next_is("async")) {
+        if (lexer.next_is("async"sv)) {
            parse_async_iterable(interface);
            continue;
        }

-        if (lexer.next_is("constructor")) {
+        if (lexer.next_is("constructor"sv)) {
            parse_constructor(extended_attributes, interface);
            continue;
        }

-        if (lexer.next_is("const")) {
+        if (lexer.next_is("const"sv)) {
            parse_constant(interface);
            continue;
        }

-        if (lexer.next_is("stringifier")) {
+        if (lexer.next_is("stringifier"sv)) {
            parse_stringifier(extended_attributes, interface);
            continue;
        }

-        if (lexer.next_is("iterable")) {
+        if (lexer.next_is("iterable"sv)) {
            parse_iterable(interface);
            continue;
        }

-        if (lexer.next_is("setlike")) {
+        if (lexer.next_is("setlike"sv)) {
            bool is_readonly = false;
            parse_setlike(interface, is_readonly);
            continue;
        }

-        if (lexer.next_is("inherit") || lexer.next_is("readonly") || lexer.next_is("attribute")) {
+        if (lexer.next_is("inherit"sv) || lexer.next_is("readonly"sv) || lexer.next_is("attribute"sv)) {
            parse_attribute(extended_attributes, interface);
            continue;
        }

-        if (lexer.next_is("getter")) {
+        if (lexer.next_is("getter"sv)) {
            parse_getter(extended_attributes, interface);
            continue;
        }

-        if (lexer.next_is("setter")) {
+        if (lexer.next_is("setter"sv)) {
            parse_setter(extended_attributes, interface);
            continue;
        }

-        if (lexer.next_is("deleter")) {
+        if (lexer.next_is("deleter"sv)) {
            parse_deleter(extended_attributes, interface);
            continue;
        }

-        bool is_static = lexer.consume_specific("static");
+        bool is_static = lexer.consume_specific("static"sv);
        if (!is_static) {
            parse_function(extended_attributes, interface, IsStatic::No);
        } else {
            consume_whitespace();
-            if (lexer.next_is("readonly") || lexer.next_is("attribute")) {
+            if (lexer.next_is("readonly"sv) || lexer.next_is("attribute"sv)) {
                parse_attribute(extended_attributes, interface, IsStatic::Yes);
            } else {
                parse_function(extended_attributes, interface, IsStatic::Yes);
@ -922,7 +922,7 @@ void Parser::parse_typedef(Interface& interface)
 void Parser::parse_dictionary(HashMap<ByteString, ByteString> extended_attributes, Interface& interface)
 {
    bool partial = false;
-    if (lexer.next_is("partial")) {
+    if (lexer.next_is("partial"sv)) {
        assert_string("partial"sv);
        consume_whitespace();
        partial = true;
@ -1061,19 +1061,19 @@ void Parser::parse_non_interface_entities(bool allow_interface, Interface& inter
        HashMap<ByteString, ByteString> extended_attributes;
        if (lexer.consume_specific('['))
            extended_attributes = parse_extended_attributes();
-        if (lexer.next_is("dictionary") || lexer.next_is("partial dictionary")) {
+        if (lexer.next_is("dictionary"sv) || lexer.next_is("partial dictionary"sv)) {
            parse_dictionary(extended_attributes, interface);
-        } else if (lexer.next_is("enum")) {
+        } else if (lexer.next_is("enum"sv)) {
            parse_enumeration(extended_attributes, interface);
-        } else if (lexer.next_is("typedef")) {
+        } else if (lexer.next_is("typedef"sv)) {
            parse_typedef(interface);
        } else if (lexer.next_is("partial interface"sv)) {
            parse_partial_interface(extended_attributes, interface);
-        } else if (lexer.next_is("interface mixin")) {
+        } else if (lexer.next_is("interface mixin"sv)) {
            parse_interface_mixin(interface);
-        } else if (lexer.next_is("callback")) {
+        } else if (lexer.next_is("callback"sv)) {
            parse_callback_function(extended_attributes, interface);
-        } else if ((allow_interface && !lexer.next_is("interface") && !lexer.next_is("namespace")) || !allow_interface) {
+        } else if ((allow_interface && !lexer.next_is("interface"sv) && !lexer.next_is("namespace"sv)) || !allow_interface) {
            auto current_offset = lexer.tell();
            auto name = parse_identifier_ending_with_space();
            consume_whitespace();
--- a/Libraries/LibJS/Token.cpp
+++ b/Libraries/LibJS/Token.cpp
@ -139,7 +139,7 @@ ByteString Token::string_value(StringValueStatus& status) const

        // Line continuation
        if (lexer.next_is('\n') || lexer.next_is('\r')) {
-            if (lexer.next_is("\r\n"))
+            if (lexer.next_is("\r\n"sv))
                lexer.ignore();
            lexer.ignore();
            continue;
--- a/Libraries/LibLine/Editor.cpp
+++ b/Libraries/LibLine/Editor.cpp
@ -84,23 +84,23 @@ Configuration Configuration::from_config(StringView libname)
                key = key_lexer.consume_escaped_character();
                escape = false;
            } else {
-                if (key_lexer.next_is("alt+")) {
+                if (key_lexer.next_is("alt+"sv)) {
                    alt = key_lexer.consume_specific("alt+"sv);
                    continue;
                }
-                if (key_lexer.next_is("^[")) {
+                if (key_lexer.next_is("^["sv)) {
                    alt = key_lexer.consume_specific("^["sv);
                    continue;
                }
-                if (key_lexer.next_is("^")) {
+                if (key_lexer.next_is("^"sv)) {
                    has_ctrl = key_lexer.consume_specific("^"sv);
                    continue;
                }
-                if (key_lexer.next_is("ctrl+")) {
+                if (key_lexer.next_is("ctrl+"sv)) {
                    has_ctrl = key_lexer.consume_specific("ctrl+"sv);
                    continue;
                }
-                if (key_lexer.next_is("\\")) {
+                if (key_lexer.next_is("\\"sv)) {
                    escape = true;
                    continue;
                }
--- a/Libraries/LibWeb/HTML/Dates.cpp
+++ b/Libraries/LibWeb/HTML/Dates.cpp
@ -508,7 +508,7 @@ Optional<DateAndTime> parse_a_local_date_and_time_string(StringView input_view)
        return {};
    // 4. If position is beyond the end of input or if the character at position is neither a U+0054 LATIN CAPITAL
    //    LETTER T character (T) nor a U+0020 SPACE character, then fail. Otherwise, move position forwards one character.
-    if (!input.consume_specific("T") && !input.consume_specific(" "))
+    if (!input.consume_specific('T') && !input.consume_specific(' '))
        return {};
    // 5. Parse a time component to obtain hour, minute, and second. If this returns nothing, then fail.
    auto hour_minute_second = parse_a_time_component(input);
--- a/Libraries/LibXML/Parser/Parser.cpp
+++ b/Libraries/LibXML/Parser/Parser.cpp
@ -5,6 +5,7 @@
 */

 #include <AK/StringConversions.h>
+#include <AK/Utf8View.h>
 #include <LibXML/DOM/Document.h>
 #include <LibXML/Parser/Parser.h>

@ -523,7 +524,7 @@ ErrorOr<void, ParseError> Parser::parse_processing_instruction()
    auto target = TRY(parse_processing_instruction_target());
    ByteString data;
    if (auto result = skip_whitespace(Required::Yes); !result.is_error())
-        data = m_lexer.consume_until("?>");
+        data = m_lexer.consume_until("?>"sv);
    TRY(expect("?>"sv));

    append_processing_instruction(target, data);
@ -1714,7 +1715,7 @@ ErrorOr<StringView, ParseError> Parser::parse_cdata_section()
    auto accept = accept_rule();

    auto section_start = m_lexer.tell();
-    while (!m_lexer.next_is("]]>")) {
+    while (!m_lexer.next_is("]]>"sv)) {
        if (m_lexer.is_eof())
            break;
        m_lexer.ignore();
--- a/Tests/AK/TestGenericLexer.cpp
+++ b/Tests/AK/TestGenericLexer.cpp
@ -45,7 +45,6 @@ TEST_CASE(should_constexpr_next_is)
 {
    constexpr GenericLexer sut("abcdef"sv);
    static_assert(sut.next_is('a'));
-    static_assert(sut.next_is("abc"));
    static_assert(sut.next_is("abc"sv));
 }

@ -124,16 +123,6 @@ TEST_CASE(should_constexpr_ignore_until)
    static_assert(sut.peek() == 'd');
 }

-TEST_CASE(should_constexpr_ignore_until_cstring)
-{
-    constexpr auto sut = [] {
-        GenericLexer sut("abcdef"sv);
-        sut.ignore_until("cde");
-        return sut;
-    }();
-    static_assert(sut.peek() == 'c');
-}
-
 TEST_CASE(should_constexpr_next_is_pred)
 {
    constexpr auto pred = [](auto c) {