From 28d9d3a2c7c1ebb4bc7d12143fdcf555bb8cc86d Mon Sep 17 00:00:00 2001
From: Timothy Flynn <trflynn89@pm.me>
Date: Tue, 5 Aug 2025 13:22:17 -0400
Subject: [PATCH] AK+Libraries: Reduce API surface of GenericLexer a bit

* Remove completely unused methods.
* Deduplicate methods that were overloaded with both StringView and
  char const* parameters.

A future commit will templatize GenericLexer by char type. This patch
serves to make that a tiny bit easier.
---
 AK/GenericLexer.cpp                | 23 -----------------
 AK/GenericLexer.h                  | 41 ++++++------------------------
 Libraries/LibCrypto/ASN1/PEM.cpp   |  8 +++---
 Libraries/LibIDL/IDLParser.cpp     | 38 +++++++++++++--------------
 Libraries/LibJS/Token.cpp          |  2 +-
 Libraries/LibLine/Editor.cpp       | 10 ++++----
 Libraries/LibWeb/HTML/Dates.cpp    |  2 +-
 Libraries/LibXML/Parser/Parser.cpp |  5 ++--
 Tests/AK/TestGenericLexer.cpp      | 11 --------
 9 files changed, 41 insertions(+), 99 deletions(-)
diff --git a/AK/GenericLexer.cpp b/AK/GenericLexer.cpp
index 1a2219f4599..05f81f889a1 100644
--- a/AK/GenericLexer.cpp
+++ b/AK/GenericLexer.cpp
@@ -57,17 +57,6 @@ StringView GenericLexer::consume_until(char stop)
     return m_input.substring_view(start, length);
 }
 
-// Consume and return characters until the string `stop` is found
-StringView GenericLexer::consume_until(char const* stop)
-{
-    size_t start = m_index;
-    while (!is_eof() && !next_is(stop))
-        m_index++;
-    size_t length = m_index - start;
-
-    return m_input.substring_view(start, length);
-}
-
 // Consume and return characters until the string `stop` is found
 StringView GenericLexer::consume_until(StringView stop)
 {
@@ -199,18 +188,6 @@ template ErrorOr<i32> GenericLexer::consume_decimal_integer<i32>();
 template ErrorOr<u64> GenericLexer::consume_decimal_integer<u64>();
 template ErrorOr<i64> GenericLexer::consume_decimal_integer<i64>();
 
-Optional<ByteString> GenericLexer::consume_and_unescape_string(char escape_char)
-{
-    auto view = consume_quoted_string(escape_char);
-    if (view.is_null())
-        return {};
-
-    StringBuilder builder;
-    for (size_t i = 0; i < view.length(); ++i)
-        builder.append(consume_escaped_character(escape_char));
-    return builder.to_byte_string();
-}
-
 auto GenericLexer::consume_escaped_code_point(bool combine_surrogate_pairs) -> Result<u32, UnicodeEscapeError>
 {
     if (!consume_specific("\\u"sv))
diff --git a/AK/GenericLexer.h b/AK/GenericLexer.h
index c4148daa1df..bf7b891e502 100644
--- a/AK/GenericLexer.h
+++ b/AK/GenericLexer.h
@@ -9,7 +9,6 @@
 #include <AK/NonnullOwnPtr.h>
 #include <AK/RedBlackTree.h>
 #include <AK/Result.h>
-#include <AK/String.h>
 #include <AK/StringView.h>
 
 namespace AK {
@@ -54,14 +53,6 @@ public:
         return true;
     }
 
-    constexpr bool next_is(char const* expected) const
-    {
-        for (size_t i = 0; expected[i] != '\0'; ++i)
-            if (peek(i) != expected[i])
-                return false;
-        return true;
-    }
-
     constexpr void retreat()
     {
         VERIFY(m_index > 0);
@@ -80,30 +71,22 @@ public:
         return m_input[m_index++];
     }
 
-    template<typename T>
-    constexpr bool consume_specific(T const& next)
+    constexpr bool consume_specific(StringView next)
     {
         if (!next_is(next))
             return false;
 
-        if constexpr (requires { next.length(); }) {
-            ignore(next.length());
-        } else {
-            ignore(sizeof(next));
-        }
+        ignore(next.length());
         return true;
     }
 
-    bool consume_specific(ByteString next) = delete;
-
-    bool consume_specific(String const& next)
+    constexpr bool consume_specific(char next)
     {
-        return consume_specific(next.bytes_as_string_view());
-    }
+        if (!next_is(next))
+            return false;
 
-    constexpr bool consume_specific(char const* next)
-    {
-        return consume_specific(StringView { next, __builtin_strlen(next) });
+        ignore(sizeof(next));
+        return true;
     }
 
     constexpr char consume_escaped_character(char escape_char = '\\', StringView escape_map = "n\nr\rt\tb\bf\f"sv)
@@ -125,10 +108,9 @@ public:
     StringView consume_all();
     StringView consume_line();
     StringView consume_until(char);
-    StringView consume_until(char const*);
     StringView consume_until(StringView);
     StringView consume_quoted_string(char escape_char = 0);
-    Optional<ByteString> consume_and_unescape_string(char escape_char = '\\');
+
     template<Integral T>
     ErrorOr<T> consume_decimal_integer();
 
@@ -152,13 +134,6 @@ public:
         }
     }
 
-    constexpr void ignore_until(char const* stop)
-    {
-        while (!is_eof() && !next_is(stop)) {
-            ++m_index;
-        }
-    }
-
     /*
      * Conditions are used to match arbitrary characters. You can use lambdas,
      * ctype functions, or is_any_of() and its derivatives (see below).
diff --git a/Libraries/LibCrypto/ASN1/PEM.cpp b/Libraries/LibCrypto/ASN1/PEM.cpp
index a9f782d49dd..08e920eb27b 100644
--- a/Libraries/LibCrypto/ASN1/PEM.cpp
+++ b/Libraries/LibCrypto/ASN1/PEM.cpp
@@ -42,7 +42,7 @@ DecodedPEM decode_pem(ReadonlyBytes data)
         case PreStartData:
             if (lexer.consume_specific("-----BEGIN "sv)) {
                 state = Started;
-                header_type = lexer.consume_until("-----");
+                header_type = lexer.consume_until("-----"sv);
             }
             lexer.consume_line();
             break;
@@ -50,7 +50,7 @@ DecodedPEM decode_pem(ReadonlyBytes data)
             if (lexer.consume_specific("-----END "sv)) {
                 state = Ended;
 
-                if (lexer.consume_until("-----") != header_type) {
+                if (lexer.consume_until("-----"sv) != header_type) {
                     dbgln("PEM type mismatch");
                     return {};
                 }
@@ -98,7 +98,7 @@ ErrorOr<Vector<DecodedPEM>> decode_pems(ReadonlyBytes data)
         case Junk:
             if (lexer.consume_specific("-----BEGIN "sv)) {
                 state = Parsing;
-                header_type = lexer.consume_until("-----");
+                header_type = lexer.consume_until("-----"sv);
             }
             lexer.consume_line();
             break;
@@ -106,7 +106,7 @@ ErrorOr<Vector<DecodedPEM>> decode_pems(ReadonlyBytes data)
             if (lexer.consume_specific("-----END "sv)) {
                 state = Junk;
 
-                if (lexer.consume_until("-----") != header_type) {
+                if (lexer.consume_until("-----"sv) != header_type) {
                     return Error::from_string_literal("PEM type mismatch");
                 }
                 lexer.consume_line();
diff --git a/Libraries/LibIDL/IDLParser.cpp b/Libraries/LibIDL/IDLParser.cpp
index 8cda82eeebf..0b8af808210 100644
--- a/Libraries/LibIDL/IDLParser.cpp
+++ b/Libraries/LibIDL/IDLParser.cpp
@@ -732,63 +732,63 @@ void Parser::parse_interface(Interface& interface)
                 interface.has_unscopable_member = true;
         }
 
-        if (lexer.next_is("async")) {
+        if (lexer.next_is("async"sv)) {
             parse_async_iterable(interface);
             continue;
         }
 
-        if (lexer.next_is("constructor")) {
+        if (lexer.next_is("constructor"sv)) {
             parse_constructor(extended_attributes, interface);
             continue;
         }
 
-        if (lexer.next_is("const")) {
+        if (lexer.next_is("const"sv)) {
             parse_constant(interface);
             continue;
         }
 
-        if (lexer.next_is("stringifier")) {
+        if (lexer.next_is("stringifier"sv)) {
             parse_stringifier(extended_attributes, interface);
             continue;
         }
 
-        if (lexer.next_is("iterable")) {
+        if (lexer.next_is("iterable"sv)) {
             parse_iterable(interface);
             continue;
         }
 
-        if (lexer.next_is("setlike")) {
+        if (lexer.next_is("setlike"sv)) {
             bool is_readonly = false;
             parse_setlike(interface, is_readonly);
             continue;
         }
 
-        if (lexer.next_is("inherit") || lexer.next_is("readonly") || lexer.next_is("attribute")) {
+        if (lexer.next_is("inherit"sv) || lexer.next_is("readonly"sv) || lexer.next_is("attribute"sv)) {
             parse_attribute(extended_attributes, interface);
             continue;
         }
 
-        if (lexer.next_is("getter")) {
+        if (lexer.next_is("getter"sv)) {
             parse_getter(extended_attributes, interface);
             continue;
         }
 
-        if (lexer.next_is("setter")) {
+        if (lexer.next_is("setter"sv)) {
             parse_setter(extended_attributes, interface);
             continue;
         }
 
-        if (lexer.next_is("deleter")) {
+        if (lexer.next_is("deleter"sv)) {
             parse_deleter(extended_attributes, interface);
             continue;
         }
 
-        bool is_static = lexer.consume_specific("static");
+        bool is_static = lexer.consume_specific("static"sv);
         if (!is_static) {
             parse_function(extended_attributes, interface, IsStatic::No);
         } else {
             consume_whitespace();
-            if (lexer.next_is("readonly") || lexer.next_is("attribute")) {
+            if (lexer.next_is("readonly"sv) || lexer.next_is("attribute"sv)) {
                 parse_attribute(extended_attributes, interface, IsStatic::Yes);
             } else {
                 parse_function(extended_attributes, interface, IsStatic::Yes);
@@ -922,7 +922,7 @@ void Parser::parse_typedef(Interface& interface)
 void Parser::parse_dictionary(HashMap<ByteString, ByteString> extended_attributes, Interface& interface)
 {
     bool partial = false;
-    if (lexer.next_is("partial")) {
+    if (lexer.next_is("partial"sv)) {
         assert_string("partial"sv);
         consume_whitespace();
         partial = true;
@@ -1061,19 +1061,19 @@ void Parser::parse_non_interface_entities(bool allow_interface, Interface& inter
         HashMap<ByteString, ByteString> extended_attributes;
         if (lexer.consume_specific('['))
             extended_attributes = parse_extended_attributes();
-        if (lexer.next_is("dictionary") || lexer.next_is("partial dictionary")) {
+        if (lexer.next_is("dictionary"sv) || lexer.next_is("partial dictionary"sv)) {
             parse_dictionary(extended_attributes, interface);
-        } else if (lexer.next_is("enum")) {
+        } else if (lexer.next_is("enum"sv)) {
             parse_enumeration(extended_attributes, interface);
-        } else if (lexer.next_is("typedef")) {
+        } else if (lexer.next_is("typedef"sv)) {
             parse_typedef(interface);
         } else if (lexer.next_is("partial interface"sv)) {
             parse_partial_interface(extended_attributes, interface);
-        } else if (lexer.next_is("interface mixin")) {
+        } else if (lexer.next_is("interface mixin"sv)) {
             parse_interface_mixin(interface);
-        } else if (lexer.next_is("callback")) {
+        } else if (lexer.next_is("callback"sv)) {
             parse_callback_function(extended_attributes, interface);
-        } else if ((allow_interface && !lexer.next_is("interface") && !lexer.next_is("namespace")) || !allow_interface) {
+        } else if ((allow_interface && !lexer.next_is("interface"sv) && !lexer.next_is("namespace"sv)) || !allow_interface) {
             auto current_offset = lexer.tell();
             auto name = parse_identifier_ending_with_space();
             consume_whitespace();
diff --git a/Libraries/LibJS/Token.cpp b/Libraries/LibJS/Token.cpp
index 8b5763946c9..c037843bb59 100644
--- a/Libraries/LibJS/Token.cpp
+++ b/Libraries/LibJS/Token.cpp
@@ -139,7 +139,7 @@ ByteString Token::string_value(StringValueStatus& status) const
 
         // Line continuation
         if (lexer.next_is('\n') || lexer.next_is('\r')) {
-            if (lexer.next_is("\r\n"))
+            if (lexer.next_is("\r\n"sv))
                 lexer.ignore();
             lexer.ignore();
             continue;
diff --git a/Libraries/LibLine/Editor.cpp b/Libraries/LibLine/Editor.cpp
index 500a8010169..d6cec44ba19 100644
--- a/Libraries/LibLine/Editor.cpp
+++ b/Libraries/LibLine/Editor.cpp
@@ -84,23 +84,23 @@ Configuration Configuration::from_config(StringView libname)
                 key = key_lexer.consume_escaped_character();
                 escape = false;
             } else {
-                if (key_lexer.next_is("alt+")) {
+                if (key_lexer.next_is("alt+"sv)) {
                     alt = key_lexer.consume_specific("alt+"sv);
                     continue;
                 }
-                if (key_lexer.next_is("^[")) {
+                if (key_lexer.next_is("^["sv)) {
                     alt = key_lexer.consume_specific("^["sv);
                     continue;
                 }
-                if (key_lexer.next_is("^")) {
+                if (key_lexer.next_is("^"sv)) {
                     has_ctrl = key_lexer.consume_specific("^"sv);
                     continue;
                 }
-                if (key_lexer.next_is("ctrl+")) {
+                if (key_lexer.next_is("ctrl+"sv)) {
                     has_ctrl = key_lexer.consume_specific("ctrl+"sv);
                     continue;
                 }
-                if (key_lexer.next_is("\\")) {
+                if (key_lexer.next_is("\\"sv)) {
                     escape = true;
                     continue;
                 }
diff --git a/Libraries/LibWeb/HTML/Dates.cpp b/Libraries/LibWeb/HTML/Dates.cpp
index 575a712fa18..a9dcbdc9db4 100644
--- a/Libraries/LibWeb/HTML/Dates.cpp
+++ b/Libraries/LibWeb/HTML/Dates.cpp
@@ -508,7 +508,7 @@ Optional<DateAndTime> parse_a_local_date_and_time_string(StringView input_view)
         return {};
     // 4. If position is beyond the end of input or if the character at position is neither a U+0054 LATIN CAPITAL
     //    LETTER T character (T) nor a U+0020 SPACE character, then fail. Otherwise, move position forwards one character.
-    if (!input.consume_specific("T") && !input.consume_specific(" "))
+    if (!input.consume_specific('T') && !input.consume_specific(' '))
         return {};
     // 5. Parse a time component to obtain hour, minute, and second. If this returns nothing, then fail.
     auto hour_minute_second = parse_a_time_component(input);
diff --git a/Libraries/LibXML/Parser/Parser.cpp b/Libraries/LibXML/Parser/Parser.cpp
index 9106f4d37da..d79be4ed728 100644
--- a/Libraries/LibXML/Parser/Parser.cpp
+++ b/Libraries/LibXML/Parser/Parser.cpp
@@ -5,6 +5,7 @@
  */
 
 #include <AK/StringConversions.h>
+#include <AK/Utf8View.h>
 #include <LibXML/DOM/Document.h>
 #include <LibXML/Parser/Parser.h>
 
@@ -523,7 +524,7 @@ ErrorOr<void, ParseError> Parser::parse_processing_instruction()
     auto target = TRY(parse_processing_instruction_target());
     ByteString data;
     if (auto result = skip_whitespace(Required::Yes); !result.is_error())
-        data = m_lexer.consume_until("?>");
+        data = m_lexer.consume_until("?>"sv);
     TRY(expect("?>"sv));
 
     append_processing_instruction(target, data);
@@ -1714,7 +1715,7 @@ ErrorOr<StringView, ParseError> Parser::parse_cdata_section()
     auto accept = accept_rule();
 
     auto section_start = m_lexer.tell();
-    while (!m_lexer.next_is("]]>")) {
+    while (!m_lexer.next_is("]]>"sv)) {
         if (m_lexer.is_eof())
             break;
         m_lexer.ignore();
diff --git a/Tests/AK/TestGenericLexer.cpp b/Tests/AK/TestGenericLexer.cpp
index 3dd04299f29..cfb443cd70a 100644
--- a/Tests/AK/TestGenericLexer.cpp
+++ b/Tests/AK/TestGenericLexer.cpp
@@ -45,7 +45,6 @@ TEST_CASE(should_constexpr_next_is)
 {
     constexpr GenericLexer sut("abcdef"sv);
     static_assert(sut.next_is('a'));
-    static_assert(sut.next_is("abc"));
     static_assert(sut.next_is("abc"sv));
 }
 
@@ -124,16 +123,6 @@ TEST_CASE(should_constexpr_ignore_until)
     static_assert(sut.peek() == 'd');
 }
 
-TEST_CASE(should_constexpr_ignore_until_cstring)
-{
-    constexpr auto sut = [] {
-        GenericLexer sut("abcdef"sv);
-        sut.ignore_until("cde");
-        return sut;
-    }();
-    static_assert(sut.peek() == 'c');
-}
-
 TEST_CASE(should_constexpr_next_is_pred)
 {
     constexpr auto pred = [](auto c) {