mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-08-27 04:37:22 +00:00
AK+Libraries: Reduce API surface of GenericLexer a bit
* Remove completely unused methods. * Deduplicate methods that were overloaded with both StringView and char const* parameters. A future commit will templatize GenericLexer by char type. This patch serves to make that a tiny bit easier.
This commit is contained in:
parent
213683956c
commit
28d9d3a2c7
Notes:
github-actions[bot]
2025-08-13 13:58:03 +00:00
Author: https://github.com/trflynn89
Commit: 28d9d3a2c7
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5762
9 changed files with 41 additions and 99 deletions
|
@ -57,17 +57,6 @@ StringView GenericLexer::consume_until(char stop)
|
||||||
return m_input.substring_view(start, length);
|
return m_input.substring_view(start, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Consume and return characters until the string `stop` is found
|
|
||||||
StringView GenericLexer::consume_until(char const* stop)
|
|
||||||
{
|
|
||||||
size_t start = m_index;
|
|
||||||
while (!is_eof() && !next_is(stop))
|
|
||||||
m_index++;
|
|
||||||
size_t length = m_index - start;
|
|
||||||
|
|
||||||
return m_input.substring_view(start, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Consume and return characters until the string `stop` is found
|
// Consume and return characters until the string `stop` is found
|
||||||
StringView GenericLexer::consume_until(StringView stop)
|
StringView GenericLexer::consume_until(StringView stop)
|
||||||
{
|
{
|
||||||
|
@ -199,18 +188,6 @@ template ErrorOr<i32> GenericLexer::consume_decimal_integer<i32>();
|
||||||
template ErrorOr<u64> GenericLexer::consume_decimal_integer<u64>();
|
template ErrorOr<u64> GenericLexer::consume_decimal_integer<u64>();
|
||||||
template ErrorOr<i64> GenericLexer::consume_decimal_integer<i64>();
|
template ErrorOr<i64> GenericLexer::consume_decimal_integer<i64>();
|
||||||
|
|
||||||
Optional<ByteString> GenericLexer::consume_and_unescape_string(char escape_char)
|
|
||||||
{
|
|
||||||
auto view = consume_quoted_string(escape_char);
|
|
||||||
if (view.is_null())
|
|
||||||
return {};
|
|
||||||
|
|
||||||
StringBuilder builder;
|
|
||||||
for (size_t i = 0; i < view.length(); ++i)
|
|
||||||
builder.append(consume_escaped_character(escape_char));
|
|
||||||
return builder.to_byte_string();
|
|
||||||
}
|
|
||||||
|
|
||||||
auto GenericLexer::consume_escaped_code_point(bool combine_surrogate_pairs) -> Result<u32, UnicodeEscapeError>
|
auto GenericLexer::consume_escaped_code_point(bool combine_surrogate_pairs) -> Result<u32, UnicodeEscapeError>
|
||||||
{
|
{
|
||||||
if (!consume_specific("\\u"sv))
|
if (!consume_specific("\\u"sv))
|
||||||
|
|
|
@ -9,7 +9,6 @@
|
||||||
#include <AK/NonnullOwnPtr.h>
|
#include <AK/NonnullOwnPtr.h>
|
||||||
#include <AK/RedBlackTree.h>
|
#include <AK/RedBlackTree.h>
|
||||||
#include <AK/Result.h>
|
#include <AK/Result.h>
|
||||||
#include <AK/String.h>
|
|
||||||
#include <AK/StringView.h>
|
#include <AK/StringView.h>
|
||||||
|
|
||||||
namespace AK {
|
namespace AK {
|
||||||
|
@ -54,14 +53,6 @@ public:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr bool next_is(char const* expected) const
|
|
||||||
{
|
|
||||||
for (size_t i = 0; expected[i] != '\0'; ++i)
|
|
||||||
if (peek(i) != expected[i])
|
|
||||||
return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
constexpr void retreat()
|
constexpr void retreat()
|
||||||
{
|
{
|
||||||
VERIFY(m_index > 0);
|
VERIFY(m_index > 0);
|
||||||
|
@ -80,30 +71,22 @@ public:
|
||||||
return m_input[m_index++];
|
return m_input[m_index++];
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
constexpr bool consume_specific(StringView next)
|
||||||
constexpr bool consume_specific(T const& next)
|
|
||||||
{
|
{
|
||||||
if (!next_is(next))
|
if (!next_is(next))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if constexpr (requires { next.length(); }) {
|
ignore(next.length());
|
||||||
ignore(next.length());
|
|
||||||
} else {
|
|
||||||
ignore(sizeof(next));
|
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool consume_specific(ByteString next) = delete;
|
constexpr bool consume_specific(char next)
|
||||||
|
|
||||||
bool consume_specific(String const& next)
|
|
||||||
{
|
{
|
||||||
return consume_specific(next.bytes_as_string_view());
|
if (!next_is(next))
|
||||||
}
|
return false;
|
||||||
|
|
||||||
constexpr bool consume_specific(char const* next)
|
ignore(sizeof(next));
|
||||||
{
|
return true;
|
||||||
return consume_specific(StringView { next, __builtin_strlen(next) });
|
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr char consume_escaped_character(char escape_char = '\\', StringView escape_map = "n\nr\rt\tb\bf\f"sv)
|
constexpr char consume_escaped_character(char escape_char = '\\', StringView escape_map = "n\nr\rt\tb\bf\f"sv)
|
||||||
|
@ -125,10 +108,9 @@ public:
|
||||||
StringView consume_all();
|
StringView consume_all();
|
||||||
StringView consume_line();
|
StringView consume_line();
|
||||||
StringView consume_until(char);
|
StringView consume_until(char);
|
||||||
StringView consume_until(char const*);
|
|
||||||
StringView consume_until(StringView);
|
StringView consume_until(StringView);
|
||||||
StringView consume_quoted_string(char escape_char = 0);
|
StringView consume_quoted_string(char escape_char = 0);
|
||||||
Optional<ByteString> consume_and_unescape_string(char escape_char = '\\');
|
|
||||||
template<Integral T>
|
template<Integral T>
|
||||||
ErrorOr<T> consume_decimal_integer();
|
ErrorOr<T> consume_decimal_integer();
|
||||||
|
|
||||||
|
@ -152,13 +134,6 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr void ignore_until(char const* stop)
|
|
||||||
{
|
|
||||||
while (!is_eof() && !next_is(stop)) {
|
|
||||||
++m_index;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Conditions are used to match arbitrary characters. You can use lambdas,
|
* Conditions are used to match arbitrary characters. You can use lambdas,
|
||||||
* ctype functions, or is_any_of() and its derivatives (see below).
|
* ctype functions, or is_any_of() and its derivatives (see below).
|
||||||
|
|
|
@ -42,7 +42,7 @@ DecodedPEM decode_pem(ReadonlyBytes data)
|
||||||
case PreStartData:
|
case PreStartData:
|
||||||
if (lexer.consume_specific("-----BEGIN "sv)) {
|
if (lexer.consume_specific("-----BEGIN "sv)) {
|
||||||
state = Started;
|
state = Started;
|
||||||
header_type = lexer.consume_until("-----");
|
header_type = lexer.consume_until("-----"sv);
|
||||||
}
|
}
|
||||||
lexer.consume_line();
|
lexer.consume_line();
|
||||||
break;
|
break;
|
||||||
|
@ -50,7 +50,7 @@ DecodedPEM decode_pem(ReadonlyBytes data)
|
||||||
if (lexer.consume_specific("-----END "sv)) {
|
if (lexer.consume_specific("-----END "sv)) {
|
||||||
state = Ended;
|
state = Ended;
|
||||||
|
|
||||||
if (lexer.consume_until("-----") != header_type) {
|
if (lexer.consume_until("-----"sv) != header_type) {
|
||||||
dbgln("PEM type mismatch");
|
dbgln("PEM type mismatch");
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
@ -98,7 +98,7 @@ ErrorOr<Vector<DecodedPEM>> decode_pems(ReadonlyBytes data)
|
||||||
case Junk:
|
case Junk:
|
||||||
if (lexer.consume_specific("-----BEGIN "sv)) {
|
if (lexer.consume_specific("-----BEGIN "sv)) {
|
||||||
state = Parsing;
|
state = Parsing;
|
||||||
header_type = lexer.consume_until("-----");
|
header_type = lexer.consume_until("-----"sv);
|
||||||
}
|
}
|
||||||
lexer.consume_line();
|
lexer.consume_line();
|
||||||
break;
|
break;
|
||||||
|
@ -106,7 +106,7 @@ ErrorOr<Vector<DecodedPEM>> decode_pems(ReadonlyBytes data)
|
||||||
if (lexer.consume_specific("-----END "sv)) {
|
if (lexer.consume_specific("-----END "sv)) {
|
||||||
state = Junk;
|
state = Junk;
|
||||||
|
|
||||||
if (lexer.consume_until("-----") != header_type) {
|
if (lexer.consume_until("-----"sv) != header_type) {
|
||||||
return Error::from_string_literal("PEM type mismatch");
|
return Error::from_string_literal("PEM type mismatch");
|
||||||
}
|
}
|
||||||
lexer.consume_line();
|
lexer.consume_line();
|
||||||
|
|
|
@ -732,63 +732,63 @@ void Parser::parse_interface(Interface& interface)
|
||||||
interface.has_unscopable_member = true;
|
interface.has_unscopable_member = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lexer.next_is("async")) {
|
if (lexer.next_is("async"sv)) {
|
||||||
parse_async_iterable(interface);
|
parse_async_iterable(interface);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lexer.next_is("constructor")) {
|
if (lexer.next_is("constructor"sv)) {
|
||||||
parse_constructor(extended_attributes, interface);
|
parse_constructor(extended_attributes, interface);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lexer.next_is("const")) {
|
if (lexer.next_is("const"sv)) {
|
||||||
parse_constant(interface);
|
parse_constant(interface);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lexer.next_is("stringifier")) {
|
if (lexer.next_is("stringifier"sv)) {
|
||||||
parse_stringifier(extended_attributes, interface);
|
parse_stringifier(extended_attributes, interface);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lexer.next_is("iterable")) {
|
if (lexer.next_is("iterable"sv)) {
|
||||||
parse_iterable(interface);
|
parse_iterable(interface);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lexer.next_is("setlike")) {
|
if (lexer.next_is("setlike"sv)) {
|
||||||
bool is_readonly = false;
|
bool is_readonly = false;
|
||||||
parse_setlike(interface, is_readonly);
|
parse_setlike(interface, is_readonly);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lexer.next_is("inherit") || lexer.next_is("readonly") || lexer.next_is("attribute")) {
|
if (lexer.next_is("inherit"sv) || lexer.next_is("readonly"sv) || lexer.next_is("attribute"sv)) {
|
||||||
parse_attribute(extended_attributes, interface);
|
parse_attribute(extended_attributes, interface);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lexer.next_is("getter")) {
|
if (lexer.next_is("getter"sv)) {
|
||||||
parse_getter(extended_attributes, interface);
|
parse_getter(extended_attributes, interface);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lexer.next_is("setter")) {
|
if (lexer.next_is("setter"sv)) {
|
||||||
parse_setter(extended_attributes, interface);
|
parse_setter(extended_attributes, interface);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lexer.next_is("deleter")) {
|
if (lexer.next_is("deleter"sv)) {
|
||||||
parse_deleter(extended_attributes, interface);
|
parse_deleter(extended_attributes, interface);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_static = lexer.consume_specific("static");
|
bool is_static = lexer.consume_specific("static"sv);
|
||||||
if (!is_static) {
|
if (!is_static) {
|
||||||
parse_function(extended_attributes, interface, IsStatic::No);
|
parse_function(extended_attributes, interface, IsStatic::No);
|
||||||
} else {
|
} else {
|
||||||
consume_whitespace();
|
consume_whitespace();
|
||||||
if (lexer.next_is("readonly") || lexer.next_is("attribute")) {
|
if (lexer.next_is("readonly"sv) || lexer.next_is("attribute"sv)) {
|
||||||
parse_attribute(extended_attributes, interface, IsStatic::Yes);
|
parse_attribute(extended_attributes, interface, IsStatic::Yes);
|
||||||
} else {
|
} else {
|
||||||
parse_function(extended_attributes, interface, IsStatic::Yes);
|
parse_function(extended_attributes, interface, IsStatic::Yes);
|
||||||
|
@ -922,7 +922,7 @@ void Parser::parse_typedef(Interface& interface)
|
||||||
void Parser::parse_dictionary(HashMap<ByteString, ByteString> extended_attributes, Interface& interface)
|
void Parser::parse_dictionary(HashMap<ByteString, ByteString> extended_attributes, Interface& interface)
|
||||||
{
|
{
|
||||||
bool partial = false;
|
bool partial = false;
|
||||||
if (lexer.next_is("partial")) {
|
if (lexer.next_is("partial"sv)) {
|
||||||
assert_string("partial"sv);
|
assert_string("partial"sv);
|
||||||
consume_whitespace();
|
consume_whitespace();
|
||||||
partial = true;
|
partial = true;
|
||||||
|
@ -1061,19 +1061,19 @@ void Parser::parse_non_interface_entities(bool allow_interface, Interface& inter
|
||||||
HashMap<ByteString, ByteString> extended_attributes;
|
HashMap<ByteString, ByteString> extended_attributes;
|
||||||
if (lexer.consume_specific('['))
|
if (lexer.consume_specific('['))
|
||||||
extended_attributes = parse_extended_attributes();
|
extended_attributes = parse_extended_attributes();
|
||||||
if (lexer.next_is("dictionary") || lexer.next_is("partial dictionary")) {
|
if (lexer.next_is("dictionary"sv) || lexer.next_is("partial dictionary"sv)) {
|
||||||
parse_dictionary(extended_attributes, interface);
|
parse_dictionary(extended_attributes, interface);
|
||||||
} else if (lexer.next_is("enum")) {
|
} else if (lexer.next_is("enum"sv)) {
|
||||||
parse_enumeration(extended_attributes, interface);
|
parse_enumeration(extended_attributes, interface);
|
||||||
} else if (lexer.next_is("typedef")) {
|
} else if (lexer.next_is("typedef"sv)) {
|
||||||
parse_typedef(interface);
|
parse_typedef(interface);
|
||||||
} else if (lexer.next_is("partial interface"sv)) {
|
} else if (lexer.next_is("partial interface"sv)) {
|
||||||
parse_partial_interface(extended_attributes, interface);
|
parse_partial_interface(extended_attributes, interface);
|
||||||
} else if (lexer.next_is("interface mixin")) {
|
} else if (lexer.next_is("interface mixin"sv)) {
|
||||||
parse_interface_mixin(interface);
|
parse_interface_mixin(interface);
|
||||||
} else if (lexer.next_is("callback")) {
|
} else if (lexer.next_is("callback"sv)) {
|
||||||
parse_callback_function(extended_attributes, interface);
|
parse_callback_function(extended_attributes, interface);
|
||||||
} else if ((allow_interface && !lexer.next_is("interface") && !lexer.next_is("namespace")) || !allow_interface) {
|
} else if ((allow_interface && !lexer.next_is("interface"sv) && !lexer.next_is("namespace"sv)) || !allow_interface) {
|
||||||
auto current_offset = lexer.tell();
|
auto current_offset = lexer.tell();
|
||||||
auto name = parse_identifier_ending_with_space();
|
auto name = parse_identifier_ending_with_space();
|
||||||
consume_whitespace();
|
consume_whitespace();
|
||||||
|
|
|
@ -139,7 +139,7 @@ ByteString Token::string_value(StringValueStatus& status) const
|
||||||
|
|
||||||
// Line continuation
|
// Line continuation
|
||||||
if (lexer.next_is('\n') || lexer.next_is('\r')) {
|
if (lexer.next_is('\n') || lexer.next_is('\r')) {
|
||||||
if (lexer.next_is("\r\n"))
|
if (lexer.next_is("\r\n"sv))
|
||||||
lexer.ignore();
|
lexer.ignore();
|
||||||
lexer.ignore();
|
lexer.ignore();
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -84,23 +84,23 @@ Configuration Configuration::from_config(StringView libname)
|
||||||
key = key_lexer.consume_escaped_character();
|
key = key_lexer.consume_escaped_character();
|
||||||
escape = false;
|
escape = false;
|
||||||
} else {
|
} else {
|
||||||
if (key_lexer.next_is("alt+")) {
|
if (key_lexer.next_is("alt+"sv)) {
|
||||||
alt = key_lexer.consume_specific("alt+"sv);
|
alt = key_lexer.consume_specific("alt+"sv);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (key_lexer.next_is("^[")) {
|
if (key_lexer.next_is("^["sv)) {
|
||||||
alt = key_lexer.consume_specific("^["sv);
|
alt = key_lexer.consume_specific("^["sv);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (key_lexer.next_is("^")) {
|
if (key_lexer.next_is("^"sv)) {
|
||||||
has_ctrl = key_lexer.consume_specific("^"sv);
|
has_ctrl = key_lexer.consume_specific("^"sv);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (key_lexer.next_is("ctrl+")) {
|
if (key_lexer.next_is("ctrl+"sv)) {
|
||||||
has_ctrl = key_lexer.consume_specific("ctrl+"sv);
|
has_ctrl = key_lexer.consume_specific("ctrl+"sv);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (key_lexer.next_is("\\")) {
|
if (key_lexer.next_is("\\"sv)) {
|
||||||
escape = true;
|
escape = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -508,7 +508,7 @@ Optional<DateAndTime> parse_a_local_date_and_time_string(StringView input_view)
|
||||||
return {};
|
return {};
|
||||||
// 4. If position is beyond the end of input or if the character at position is neither a U+0054 LATIN CAPITAL
|
// 4. If position is beyond the end of input or if the character at position is neither a U+0054 LATIN CAPITAL
|
||||||
// LETTER T character (T) nor a U+0020 SPACE character, then fail. Otherwise, move position forwards one character.
|
// LETTER T character (T) nor a U+0020 SPACE character, then fail. Otherwise, move position forwards one character.
|
||||||
if (!input.consume_specific("T") && !input.consume_specific(" "))
|
if (!input.consume_specific('T') && !input.consume_specific(' '))
|
||||||
return {};
|
return {};
|
||||||
// 5. Parse a time component to obtain hour, minute, and second. If this returns nothing, then fail.
|
// 5. Parse a time component to obtain hour, minute, and second. If this returns nothing, then fail.
|
||||||
auto hour_minute_second = parse_a_time_component(input);
|
auto hour_minute_second = parse_a_time_component(input);
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <AK/StringConversions.h>
|
#include <AK/StringConversions.h>
|
||||||
|
#include <AK/Utf8View.h>
|
||||||
#include <LibXML/DOM/Document.h>
|
#include <LibXML/DOM/Document.h>
|
||||||
#include <LibXML/Parser/Parser.h>
|
#include <LibXML/Parser/Parser.h>
|
||||||
|
|
||||||
|
@ -523,7 +524,7 @@ ErrorOr<void, ParseError> Parser::parse_processing_instruction()
|
||||||
auto target = TRY(parse_processing_instruction_target());
|
auto target = TRY(parse_processing_instruction_target());
|
||||||
ByteString data;
|
ByteString data;
|
||||||
if (auto result = skip_whitespace(Required::Yes); !result.is_error())
|
if (auto result = skip_whitespace(Required::Yes); !result.is_error())
|
||||||
data = m_lexer.consume_until("?>");
|
data = m_lexer.consume_until("?>"sv);
|
||||||
TRY(expect("?>"sv));
|
TRY(expect("?>"sv));
|
||||||
|
|
||||||
append_processing_instruction(target, data);
|
append_processing_instruction(target, data);
|
||||||
|
@ -1714,7 +1715,7 @@ ErrorOr<StringView, ParseError> Parser::parse_cdata_section()
|
||||||
auto accept = accept_rule();
|
auto accept = accept_rule();
|
||||||
|
|
||||||
auto section_start = m_lexer.tell();
|
auto section_start = m_lexer.tell();
|
||||||
while (!m_lexer.next_is("]]>")) {
|
while (!m_lexer.next_is("]]>"sv)) {
|
||||||
if (m_lexer.is_eof())
|
if (m_lexer.is_eof())
|
||||||
break;
|
break;
|
||||||
m_lexer.ignore();
|
m_lexer.ignore();
|
||||||
|
|
|
@ -45,7 +45,6 @@ TEST_CASE(should_constexpr_next_is)
|
||||||
{
|
{
|
||||||
constexpr GenericLexer sut("abcdef"sv);
|
constexpr GenericLexer sut("abcdef"sv);
|
||||||
static_assert(sut.next_is('a'));
|
static_assert(sut.next_is('a'));
|
||||||
static_assert(sut.next_is("abc"));
|
|
||||||
static_assert(sut.next_is("abc"sv));
|
static_assert(sut.next_is("abc"sv));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -124,16 +123,6 @@ TEST_CASE(should_constexpr_ignore_until)
|
||||||
static_assert(sut.peek() == 'd');
|
static_assert(sut.peek() == 'd');
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE(should_constexpr_ignore_until_cstring)
|
|
||||||
{
|
|
||||||
constexpr auto sut = [] {
|
|
||||||
GenericLexer sut("abcdef"sv);
|
|
||||||
sut.ignore_until("cde");
|
|
||||||
return sut;
|
|
||||||
}();
|
|
||||||
static_assert(sut.peek() == 'c');
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_CASE(should_constexpr_next_is_pred)
|
TEST_CASE(should_constexpr_next_is_pred)
|
||||||
{
|
{
|
||||||
constexpr auto pred = [](auto c) {
|
constexpr auto pred = [](auto c) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue