AK+Libraries: Reduce API surface of GenericLexer a bit

* Remove completely unused methods.
* Deduplicate methods that were overloaded with both StringView and
  char const* parameters.

A future commit will templatize GenericLexer by char type. This patch
serves to make that a tiny bit easier.
This commit is contained in:
Timothy Flynn 2025-08-05 13:22:17 -04:00 committed by Tim Flynn
commit 28d9d3a2c7
Notes: github-actions[bot] 2025-08-13 13:58:03 +00:00
9 changed files with 41 additions and 99 deletions

View file

@ -57,17 +57,6 @@ StringView GenericLexer::consume_until(char stop)
return m_input.substring_view(start, length);
}
// Consume and return characters until the string `stop` is found
StringView GenericLexer::consume_until(char const* stop)
{
size_t start = m_index;
while (!is_eof() && !next_is(stop))
m_index++;
size_t length = m_index - start;
return m_input.substring_view(start, length);
}
// Consume and return characters until the string `stop` is found
StringView GenericLexer::consume_until(StringView stop)
{
@ -199,18 +188,6 @@ template ErrorOr<i32> GenericLexer::consume_decimal_integer<i32>();
template ErrorOr<u64> GenericLexer::consume_decimal_integer<u64>();
template ErrorOr<i64> GenericLexer::consume_decimal_integer<i64>();
Optional<ByteString> GenericLexer::consume_and_unescape_string(char escape_char)
{
auto view = consume_quoted_string(escape_char);
if (view.is_null())
return {};
StringBuilder builder;
for (size_t i = 0; i < view.length(); ++i)
builder.append(consume_escaped_character(escape_char));
return builder.to_byte_string();
}
auto GenericLexer::consume_escaped_code_point(bool combine_surrogate_pairs) -> Result<u32, UnicodeEscapeError>
{
if (!consume_specific("\\u"sv))

View file

@ -9,7 +9,6 @@
#include <AK/NonnullOwnPtr.h>
#include <AK/RedBlackTree.h>
#include <AK/Result.h>
#include <AK/String.h>
#include <AK/StringView.h>
namespace AK {
@ -54,14 +53,6 @@ public:
return true;
}
constexpr bool next_is(char const* expected) const
{
for (size_t i = 0; expected[i] != '\0'; ++i)
if (peek(i) != expected[i])
return false;
return true;
}
constexpr void retreat()
{
VERIFY(m_index > 0);
@ -80,30 +71,22 @@ public:
return m_input[m_index++];
}
template<typename T>
constexpr bool consume_specific(T const& next)
constexpr bool consume_specific(StringView next)
{
if (!next_is(next))
return false;
if constexpr (requires { next.length(); }) {
ignore(next.length());
} else {
ignore(sizeof(next));
}
return true;
}
bool consume_specific(ByteString next) = delete;
bool consume_specific(String const& next)
constexpr bool consume_specific(char next)
{
return consume_specific(next.bytes_as_string_view());
}
if (!next_is(next))
return false;
constexpr bool consume_specific(char const* next)
{
return consume_specific(StringView { next, __builtin_strlen(next) });
ignore(sizeof(next));
return true;
}
constexpr char consume_escaped_character(char escape_char = '\\', StringView escape_map = "n\nr\rt\tb\bf\f"sv)
@ -125,10 +108,9 @@ public:
StringView consume_all();
StringView consume_line();
StringView consume_until(char);
StringView consume_until(char const*);
StringView consume_until(StringView);
StringView consume_quoted_string(char escape_char = 0);
Optional<ByteString> consume_and_unescape_string(char escape_char = '\\');
template<Integral T>
ErrorOr<T> consume_decimal_integer();
@ -152,13 +134,6 @@ public:
}
}
constexpr void ignore_until(char const* stop)
{
while (!is_eof() && !next_is(stop)) {
++m_index;
}
}
/*
* Conditions are used to match arbitrary characters. You can use lambdas,
* ctype functions, or is_any_of() and its derivatives (see below).

View file

@ -42,7 +42,7 @@ DecodedPEM decode_pem(ReadonlyBytes data)
case PreStartData:
if (lexer.consume_specific("-----BEGIN "sv)) {
state = Started;
header_type = lexer.consume_until("-----");
header_type = lexer.consume_until("-----"sv);
}
lexer.consume_line();
break;
@ -50,7 +50,7 @@ DecodedPEM decode_pem(ReadonlyBytes data)
if (lexer.consume_specific("-----END "sv)) {
state = Ended;
if (lexer.consume_until("-----") != header_type) {
if (lexer.consume_until("-----"sv) != header_type) {
dbgln("PEM type mismatch");
return {};
}
@ -98,7 +98,7 @@ ErrorOr<Vector<DecodedPEM>> decode_pems(ReadonlyBytes data)
case Junk:
if (lexer.consume_specific("-----BEGIN "sv)) {
state = Parsing;
header_type = lexer.consume_until("-----");
header_type = lexer.consume_until("-----"sv);
}
lexer.consume_line();
break;
@ -106,7 +106,7 @@ ErrorOr<Vector<DecodedPEM>> decode_pems(ReadonlyBytes data)
if (lexer.consume_specific("-----END "sv)) {
state = Junk;
if (lexer.consume_until("-----") != header_type) {
if (lexer.consume_until("-----"sv) != header_type) {
return Error::from_string_literal("PEM type mismatch");
}
lexer.consume_line();

View file

@ -732,63 +732,63 @@ void Parser::parse_interface(Interface& interface)
interface.has_unscopable_member = true;
}
if (lexer.next_is("async")) {
if (lexer.next_is("async"sv)) {
parse_async_iterable(interface);
continue;
}
if (lexer.next_is("constructor")) {
if (lexer.next_is("constructor"sv)) {
parse_constructor(extended_attributes, interface);
continue;
}
if (lexer.next_is("const")) {
if (lexer.next_is("const"sv)) {
parse_constant(interface);
continue;
}
if (lexer.next_is("stringifier")) {
if (lexer.next_is("stringifier"sv)) {
parse_stringifier(extended_attributes, interface);
continue;
}
if (lexer.next_is("iterable")) {
if (lexer.next_is("iterable"sv)) {
parse_iterable(interface);
continue;
}
if (lexer.next_is("setlike")) {
if (lexer.next_is("setlike"sv)) {
bool is_readonly = false;
parse_setlike(interface, is_readonly);
continue;
}
if (lexer.next_is("inherit") || lexer.next_is("readonly") || lexer.next_is("attribute")) {
if (lexer.next_is("inherit"sv) || lexer.next_is("readonly"sv) || lexer.next_is("attribute"sv)) {
parse_attribute(extended_attributes, interface);
continue;
}
if (lexer.next_is("getter")) {
if (lexer.next_is("getter"sv)) {
parse_getter(extended_attributes, interface);
continue;
}
if (lexer.next_is("setter")) {
if (lexer.next_is("setter"sv)) {
parse_setter(extended_attributes, interface);
continue;
}
if (lexer.next_is("deleter")) {
if (lexer.next_is("deleter"sv)) {
parse_deleter(extended_attributes, interface);
continue;
}
bool is_static = lexer.consume_specific("static");
bool is_static = lexer.consume_specific("static"sv);
if (!is_static) {
parse_function(extended_attributes, interface, IsStatic::No);
} else {
consume_whitespace();
if (lexer.next_is("readonly") || lexer.next_is("attribute")) {
if (lexer.next_is("readonly"sv) || lexer.next_is("attribute"sv)) {
parse_attribute(extended_attributes, interface, IsStatic::Yes);
} else {
parse_function(extended_attributes, interface, IsStatic::Yes);
@ -922,7 +922,7 @@ void Parser::parse_typedef(Interface& interface)
void Parser::parse_dictionary(HashMap<ByteString, ByteString> extended_attributes, Interface& interface)
{
bool partial = false;
if (lexer.next_is("partial")) {
if (lexer.next_is("partial"sv)) {
assert_string("partial"sv);
consume_whitespace();
partial = true;
@ -1061,19 +1061,19 @@ void Parser::parse_non_interface_entities(bool allow_interface, Interface& inter
HashMap<ByteString, ByteString> extended_attributes;
if (lexer.consume_specific('['))
extended_attributes = parse_extended_attributes();
if (lexer.next_is("dictionary") || lexer.next_is("partial dictionary")) {
if (lexer.next_is("dictionary"sv) || lexer.next_is("partial dictionary"sv)) {
parse_dictionary(extended_attributes, interface);
} else if (lexer.next_is("enum")) {
} else if (lexer.next_is("enum"sv)) {
parse_enumeration(extended_attributes, interface);
} else if (lexer.next_is("typedef")) {
} else if (lexer.next_is("typedef"sv)) {
parse_typedef(interface);
} else if (lexer.next_is("partial interface"sv)) {
parse_partial_interface(extended_attributes, interface);
} else if (lexer.next_is("interface mixin")) {
} else if (lexer.next_is("interface mixin"sv)) {
parse_interface_mixin(interface);
} else if (lexer.next_is("callback")) {
} else if (lexer.next_is("callback"sv)) {
parse_callback_function(extended_attributes, interface);
} else if ((allow_interface && !lexer.next_is("interface") && !lexer.next_is("namespace")) || !allow_interface) {
} else if ((allow_interface && !lexer.next_is("interface"sv) && !lexer.next_is("namespace"sv)) || !allow_interface) {
auto current_offset = lexer.tell();
auto name = parse_identifier_ending_with_space();
consume_whitespace();

View file

@ -139,7 +139,7 @@ ByteString Token::string_value(StringValueStatus& status) const
// Line continuation
if (lexer.next_is('\n') || lexer.next_is('\r')) {
if (lexer.next_is("\r\n"))
if (lexer.next_is("\r\n"sv))
lexer.ignore();
lexer.ignore();
continue;

View file

@ -84,23 +84,23 @@ Configuration Configuration::from_config(StringView libname)
key = key_lexer.consume_escaped_character();
escape = false;
} else {
if (key_lexer.next_is("alt+")) {
if (key_lexer.next_is("alt+"sv)) {
alt = key_lexer.consume_specific("alt+"sv);
continue;
}
if (key_lexer.next_is("^[")) {
if (key_lexer.next_is("^["sv)) {
alt = key_lexer.consume_specific("^["sv);
continue;
}
if (key_lexer.next_is("^")) {
if (key_lexer.next_is("^"sv)) {
has_ctrl = key_lexer.consume_specific("^"sv);
continue;
}
if (key_lexer.next_is("ctrl+")) {
if (key_lexer.next_is("ctrl+"sv)) {
has_ctrl = key_lexer.consume_specific("ctrl+"sv);
continue;
}
if (key_lexer.next_is("\\")) {
if (key_lexer.next_is("\\"sv)) {
escape = true;
continue;
}

View file

@ -508,7 +508,7 @@ Optional<DateAndTime> parse_a_local_date_and_time_string(StringView input_view)
return {};
// 4. If position is beyond the end of input or if the character at position is neither a U+0054 LATIN CAPITAL
// LETTER T character (T) nor a U+0020 SPACE character, then fail. Otherwise, move position forwards one character.
if (!input.consume_specific("T") && !input.consume_specific(" "))
if (!input.consume_specific('T') && !input.consume_specific(' '))
return {};
// 5. Parse a time component to obtain hour, minute, and second. If this returns nothing, then fail.
auto hour_minute_second = parse_a_time_component(input);

View file

@ -5,6 +5,7 @@
*/
#include <AK/StringConversions.h>
#include <AK/Utf8View.h>
#include <LibXML/DOM/Document.h>
#include <LibXML/Parser/Parser.h>
@ -523,7 +524,7 @@ ErrorOr<void, ParseError> Parser::parse_processing_instruction()
auto target = TRY(parse_processing_instruction_target());
ByteString data;
if (auto result = skip_whitespace(Required::Yes); !result.is_error())
data = m_lexer.consume_until("?>");
data = m_lexer.consume_until("?>"sv);
TRY(expect("?>"sv));
append_processing_instruction(target, data);
@ -1714,7 +1715,7 @@ ErrorOr<StringView, ParseError> Parser::parse_cdata_section()
auto accept = accept_rule();
auto section_start = m_lexer.tell();
while (!m_lexer.next_is("]]>")) {
while (!m_lexer.next_is("]]>"sv)) {
if (m_lexer.is_eof())
break;
m_lexer.ignore();

View file

@ -45,7 +45,6 @@ TEST_CASE(should_constexpr_next_is)
{
constexpr GenericLexer sut("abcdef"sv);
static_assert(sut.next_is('a'));
static_assert(sut.next_is("abc"));
static_assert(sut.next_is("abc"sv));
}
@ -124,16 +123,6 @@ TEST_CASE(should_constexpr_ignore_until)
static_assert(sut.peek() == 'd');
}
TEST_CASE(should_constexpr_ignore_until_cstring)
{
constexpr auto sut = [] {
GenericLexer sut("abcdef"sv);
sut.ignore_until("cde");
return sut;
}();
static_assert(sut.peek() == 'c');
}
TEST_CASE(should_constexpr_next_is_pred)
{
constexpr auto pred = [](auto c) {