LibWeb/CSS: Remove tiny-oom propagation from CSS Tokenizer

This commit is contained in:
Sam Atkins 2024-07-26 15:11:57 +01:00 committed by Andreas Kling
commit 89c5f25016
Notes: github-actions[bot] 2024-07-26 15:30:20 +00:00
4 changed files with 76 additions and 76 deletions

View file

@ -95,7 +95,7 @@ namespace Web::CSS::Parser {
ErrorOr<Parser> Parser::create(ParsingContext const& context, StringView input, StringView encoding) ErrorOr<Parser> Parser::create(ParsingContext const& context, StringView input, StringView encoding)
{ {
auto tokens = TRY(Tokenizer::tokenize(input, encoding)); auto tokens = Tokenizer::tokenize(input, encoding);
return Parser { context, move(tokens) }; return Parser { context, move(tokens) };
} }

View file

@ -195,14 +195,14 @@ static inline bool is_E(u32 code_point)
return code_point == 0x45; return code_point == 0x45;
} }
ErrorOr<Vector<Token>> Tokenizer::tokenize(StringView input, StringView encoding) Vector<Token> Tokenizer::tokenize(StringView input, StringView encoding)
{ {
// https://www.w3.org/TR/css-syntax-3/#css-filter-code-points // https://www.w3.org/TR/css-syntax-3/#css-filter-code-points
auto filter_code_points = [](StringView input, auto encoding) -> ErrorOr<String> { auto filter_code_points = [](StringView input, auto encoding) -> String {
auto decoder = TextCodec::decoder_for(encoding); auto decoder = TextCodec::decoder_for(encoding);
VERIFY(decoder.has_value()); VERIFY(decoder.has_value());
auto decoded_input = TRY(decoder->to_utf8(input)); auto decoded_input = MUST(decoder->to_utf8(input));
// OPTIMIZATION: If the input doesn't contain any CR or FF, we can skip the filtering // OPTIMIZATION: If the input doesn't contain any CR or FF, we can skip the filtering
bool const contains_cr_or_ff = [&] { bool const contains_cr_or_ff = [&] {
@ -227,25 +227,25 @@ ErrorOr<Vector<Token>> Tokenizer::tokenize(StringView input, StringView encoding
// in input by a single U+000A LINE FEED (LF) code point. // in input by a single U+000A LINE FEED (LF) code point.
if (code_point == '\r') { if (code_point == '\r') {
if (last_was_carriage_return) { if (last_was_carriage_return) {
TRY(builder.try_append('\n')); builder.append('\n');
} else { } else {
last_was_carriage_return = true; last_was_carriage_return = true;
} }
} else { } else {
if (last_was_carriage_return) if (last_was_carriage_return)
TRY(builder.try_append('\n')); builder.append('\n');
if (code_point == '\n') { if (code_point == '\n') {
if (!last_was_carriage_return) if (!last_was_carriage_return)
TRY(builder.try_append('\n')); builder.append('\n');
} else if (code_point == '\f') { } else if (code_point == '\f') {
TRY(builder.try_append('\n')); builder.append('\n');
// Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (<28>). // Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (<28>).
} else if (code_point == 0x00 || (code_point >= 0xD800 && code_point <= 0xDFFF)) { } else if (code_point == 0x00 || (code_point >= 0xD800 && code_point <= 0xDFFF)) {
TRY(builder.try_append_code_point(REPLACEMENT_CHARACTER)); builder.append_code_point(REPLACEMENT_CHARACTER);
} else { } else {
TRY(builder.try_append_code_point(code_point)); builder.append_code_point(code_point);
} }
last_was_carriage_return = false; last_was_carriage_return = false;
@ -254,7 +254,7 @@ ErrorOr<Vector<Token>> Tokenizer::tokenize(StringView input, StringView encoding
return builder.to_string_without_validation(); return builder.to_string_without_validation();
}; };
Tokenizer tokenizer { TRY(filter_code_points(input, encoding)) }; Tokenizer tokenizer { filter_code_points(input, encoding) };
return tokenizer.tokenize(); return tokenizer.tokenize();
} }
@ -265,15 +265,15 @@ Tokenizer::Tokenizer(String decoded_input)
{ {
} }
ErrorOr<Vector<Token>> Tokenizer::tokenize() Vector<Token> Tokenizer::tokenize()
{ {
Vector<Token> tokens; Vector<Token> tokens;
for (;;) { for (;;) {
auto token_start = m_position; auto token_start = m_position;
auto token = TRY(consume_a_token()); auto token = consume_a_token();
token.m_start_position = token_start; token.m_start_position = token_start;
token.m_end_position = m_position; token.m_end_position = m_position;
TRY(tokens.try_append(token)); tokens.append(token);
if (token.is(Token::Type::EndOfFile)) { if (token.is(Token::Type::EndOfFile)) {
return tokens; return tokens;
@ -441,14 +441,14 @@ u32 Tokenizer::consume_escaped_code_point()
} }
// https://www.w3.org/TR/css-syntax-3/#consume-ident-like-token // https://www.w3.org/TR/css-syntax-3/#consume-ident-like-token
ErrorOr<Token> Tokenizer::consume_an_ident_like_token() Token Tokenizer::consume_an_ident_like_token()
{ {
// This section describes how to consume an ident-like token from a stream of code points. // This section describes how to consume an ident-like token from a stream of code points.
// It returns an <ident-token>, <function-token>, <url-token>, or <bad-url-token>. // It returns an <ident-token>, <function-token>, <url-token>, or <bad-url-token>.
// Consume an ident sequence, and let string be the result. // Consume an ident sequence, and let string be the result.
auto start_byte_offset = current_byte_offset(); auto start_byte_offset = current_byte_offset();
auto string = TRY(consume_an_ident_sequence()); auto string = consume_an_ident_sequence();
// If strings value is an ASCII case-insensitive match for "url", and the next input code // If strings value is an ASCII case-insensitive match for "url", and the next input code
// point is U+0028 LEFT PARENTHESIS ((), consume it. // point is U+0028 LEFT PARENTHESIS ((), consume it.
@ -470,7 +470,7 @@ ErrorOr<Token> Tokenizer::consume_an_ident_like_token()
// <function-token> with its value set to string and return it. // <function-token> with its value set to string and return it.
auto next_two = peek_twin(); auto next_two = peek_twin();
if (is_quotation_mark(next_two.first) || is_apostrophe(next_two.first) || (is_whitespace(next_two.first) && (is_quotation_mark(next_two.second) || is_apostrophe(next_two.second)))) { if (is_quotation_mark(next_two.first) || is_apostrophe(next_two.first) || (is_whitespace(next_two.first) && (is_quotation_mark(next_two.second) || is_apostrophe(next_two.second)))) {
return create_value_token(Token::Type::Function, move(string), TRY(input_since(start_byte_offset))); return create_value_token(Token::Type::Function, move(string), input_since(start_byte_offset));
} }
// Otherwise, consume a url token, and return it. // Otherwise, consume a url token, and return it.
@ -482,11 +482,11 @@ ErrorOr<Token> Tokenizer::consume_an_ident_like_token()
(void)next_code_point(); (void)next_code_point();
// Create a <function-token> with its value set to string and return it. // Create a <function-token> with its value set to string and return it.
return create_value_token(Token::Type::Function, move(string), TRY(input_since(start_byte_offset))); return create_value_token(Token::Type::Function, move(string), input_since(start_byte_offset));
} }
// Otherwise, create an <ident-token> with its value set to string and return it. // Otherwise, create an <ident-token> with its value set to string and return it.
return create_value_token(Token::Type::Ident, move(string), TRY(input_since(start_byte_offset))); return create_value_token(Token::Type::Ident, move(string), input_since(start_byte_offset));
} }
// https://www.w3.org/TR/css-syntax-3/#consume-number // https://www.w3.org/TR/css-syntax-3/#consume-number
@ -595,7 +595,7 @@ double Tokenizer::convert_a_string_to_a_number(StringView string)
} }
// https://www.w3.org/TR/css-syntax-3/#consume-name // https://www.w3.org/TR/css-syntax-3/#consume-name
ErrorOr<FlyString> Tokenizer::consume_an_ident_sequence() FlyString Tokenizer::consume_an_ident_sequence()
{ {
// This section describes how to consume an ident sequence from a stream of code points. // This section describes how to consume an ident sequence from a stream of code points.
// It returns a string containing the largest name that can be formed from adjacent // It returns a string containing the largest name that can be formed from adjacent
@ -619,14 +619,14 @@ ErrorOr<FlyString> Tokenizer::consume_an_ident_sequence()
// name code point // name code point
if (is_ident_code_point(input)) { if (is_ident_code_point(input)) {
// Append the code point to result. // Append the code point to result.
TRY(result.try_append_code_point(input)); result.append_code_point(input);
continue; continue;
} }
// the stream starts with a valid escape // the stream starts with a valid escape
if (is_valid_escape_sequence(start_of_input_stream_twin())) { if (is_valid_escape_sequence(start_of_input_stream_twin())) {
// Consume an escaped code point. Append the returned code point to result. // Consume an escaped code point. Append the returned code point to result.
TRY(result.try_append_code_point(consume_escaped_code_point())); result.append_code_point(consume_escaped_code_point());
continue; continue;
} }
@ -640,7 +640,7 @@ ErrorOr<FlyString> Tokenizer::consume_an_ident_sequence()
} }
// https://www.w3.org/TR/css-syntax-3/#consume-url-token // https://www.w3.org/TR/css-syntax-3/#consume-url-token
ErrorOr<Token> Tokenizer::consume_a_url_token() Token Tokenizer::consume_a_url_token()
{ {
// This section describes how to consume a url token from a stream of code points. // This section describes how to consume a url token from a stream of code points.
// It returns either a <url-token> or a <bad-url-token>. // It returns either a <url-token> or a <bad-url-token>.
@ -659,9 +659,9 @@ ErrorOr<Token> Tokenizer::consume_a_url_token()
// 2. Consume as much whitespace as possible. // 2. Consume as much whitespace as possible.
consume_as_much_whitespace_as_possible(); consume_as_much_whitespace_as_possible();
auto make_token = [&]() -> ErrorOr<Token> { auto make_token = [&]() -> Token {
token.m_value = TRY(FlyString::from_utf8(builder.string_view())); token.m_value = builder.to_fly_string_without_validation();
token.m_representation = TRY(input_since(start_byte_offset)); token.m_representation = input_since(start_byte_offset);
return token; return token;
}; };
@ -705,7 +705,7 @@ ErrorOr<Token> Tokenizer::consume_a_url_token()
// otherwise, consume the remnants of a bad url, create a <bad-url-token>, and return it. // otherwise, consume the remnants of a bad url, create a <bad-url-token>, and return it.
consume_the_remnants_of_a_bad_url(); consume_the_remnants_of_a_bad_url();
auto bad_url_token = create_new_token(Token::Type::BadUrl); auto bad_url_token = create_new_token(Token::Type::BadUrl);
bad_url_token.m_representation = TRY(input_since(start_byte_offset)); bad_url_token.m_representation = input_since(start_byte_offset);
return bad_url_token; return bad_url_token;
} }
@ -718,7 +718,7 @@ ErrorOr<Token> Tokenizer::consume_a_url_token()
log_parse_error(); log_parse_error();
consume_the_remnants_of_a_bad_url(); consume_the_remnants_of_a_bad_url();
auto bad_url_token = create_new_token(Token::Type::BadUrl); auto bad_url_token = create_new_token(Token::Type::BadUrl);
bad_url_token.m_representation = TRY(input_since(start_byte_offset)); bad_url_token.m_representation = input_since(start_byte_offset);
return bad_url_token; return bad_url_token;
} }
@ -735,7 +735,7 @@ ErrorOr<Token> Tokenizer::consume_a_url_token()
// Consume the remnants of a bad url, create a <bad-url-token>, and return it. // Consume the remnants of a bad url, create a <bad-url-token>, and return it.
consume_the_remnants_of_a_bad_url(); consume_the_remnants_of_a_bad_url();
auto bad_url_token = create_new_token(Token::Type::BadUrl); auto bad_url_token = create_new_token(Token::Type::BadUrl);
bad_url_token.m_representation = TRY(input_since(start_byte_offset)); bad_url_token.m_representation = input_since(start_byte_offset);
return bad_url_token; return bad_url_token;
} }
} }
@ -792,7 +792,7 @@ void Tokenizer::reconsume_current_input_code_point()
} }
// https://www.w3.org/TR/css-syntax-3/#consume-numeric-token // https://www.w3.org/TR/css-syntax-3/#consume-numeric-token
ErrorOr<Token> Tokenizer::consume_a_numeric_token() Token Tokenizer::consume_a_numeric_token()
{ {
// This section describes how to consume a numeric token from a stream of code points. // This section describes how to consume a numeric token from a stream of code points.
// It returns either a <number-token>, <percentage-token>, or <dimension-token>. // It returns either a <number-token>, <percentage-token>, or <dimension-token>.
@ -810,13 +810,13 @@ ErrorOr<Token> Tokenizer::consume_a_numeric_token()
token.m_number_value = number; token.m_number_value = number;
// 2. Consume an ident sequence. Set the <dimension-token>s unit to the returned value. // 2. Consume an ident sequence. Set the <dimension-token>s unit to the returned value.
auto unit = TRY(consume_an_ident_sequence()); auto unit = consume_an_ident_sequence();
VERIFY(!unit.is_empty()); VERIFY(!unit.is_empty());
// NOTE: We intentionally store this in the `value`, to save space. // NOTE: We intentionally store this in the `value`, to save space.
token.m_value = move(unit); token.m_value = move(unit);
// 3. Return the <dimension-token>. // 3. Return the <dimension-token>.
token.m_representation = TRY(input_since(start_byte_offset)); token.m_representation = input_since(start_byte_offset);
return token; return token;
} }
@ -827,14 +827,14 @@ ErrorOr<Token> Tokenizer::consume_a_numeric_token()
// Create a <percentage-token> with the same value as number, and return it. // Create a <percentage-token> with the same value as number, and return it.
auto token = create_new_token(Token::Type::Percentage); auto token = create_new_token(Token::Type::Percentage);
token.m_number_value = number; token.m_number_value = number;
token.m_representation = TRY(input_since(start_byte_offset)); token.m_representation = input_since(start_byte_offset);
return token; return token;
} }
// Otherwise, create a <number-token> with the same value and type flag as number, and return it. // Otherwise, create a <number-token> with the same value and type flag as number, and return it.
auto token = create_new_token(Token::Type::Number); auto token = create_new_token(Token::Type::Number);
token.m_number_value = number; token.m_number_value = number;
token.m_representation = TRY(input_since(start_byte_offset)); token.m_representation = input_since(start_byte_offset);
return token; return token;
} }
@ -949,7 +949,7 @@ bool Tokenizer::would_start_an_ident_sequence(U32Triplet values)
} }
// https://www.w3.org/TR/css-syntax-3/#consume-string-token // https://www.w3.org/TR/css-syntax-3/#consume-string-token
ErrorOr<Token> Tokenizer::consume_string_token(u32 ending_code_point) Token Tokenizer::consume_string_token(u32 ending_code_point)
{ {
// This section describes how to consume a string token from a stream of code points. // This section describes how to consume a string token from a stream of code points.
// It returns either a <string-token> or <bad-string-token>. // It returns either a <string-token> or <bad-string-token>.
@ -963,9 +963,9 @@ ErrorOr<Token> Tokenizer::consume_string_token(u32 ending_code_point)
auto token = create_new_token(Token::Type::String); auto token = create_new_token(Token::Type::String);
StringBuilder builder; StringBuilder builder;
auto make_token = [&]() -> ErrorOr<Token> { auto make_token = [&]() -> Token {
token.m_value = TRY(FlyString::from_utf8(builder.string_view())); token.m_value = builder.to_fly_string_without_validation();
token.m_representation = TRY(input_since(start_byte_offset)); token.m_representation = input_since(start_byte_offset);
return token; return token;
}; };
@ -990,7 +990,7 @@ ErrorOr<Token> Tokenizer::consume_string_token(u32 ending_code_point)
// <bad-string-token>, and return it. // <bad-string-token>, and return it.
reconsume_current_input_code_point(); reconsume_current_input_code_point();
auto bad_string_token = create_new_token(Token::Type::BadString); auto bad_string_token = create_new_token(Token::Type::BadString);
bad_string_token.m_representation = TRY(input_since(start_byte_offset)); bad_string_token.m_representation = input_since(start_byte_offset);
return bad_string_token; return bad_string_token;
} }
@ -1059,7 +1059,7 @@ start:
} }
// https://www.w3.org/TR/css-syntax-3/#consume-token // https://www.w3.org/TR/css-syntax-3/#consume-token
ErrorOr<Token> Tokenizer::consume_a_token() Token Tokenizer::consume_a_token()
{ {
// This section describes how to consume a token from a stream of code points. // This section describes how to consume a token from a stream of code points.
// It will return a single token of any type. // It will return a single token of any type.
@ -1077,7 +1077,7 @@ ErrorOr<Token> Tokenizer::consume_a_token()
// Consume as much whitespace as possible. Return a <whitespace-token>. // Consume as much whitespace as possible. Return a <whitespace-token>.
consume_as_much_whitespace_as_possible(); consume_as_much_whitespace_as_possible();
auto token = create_new_token(Token::Type::Whitespace); auto token = create_new_token(Token::Type::Whitespace);
token.m_representation = TRY(input_since(start_byte_offset)); token.m_representation = input_since(start_byte_offset);
return token; return token;
} }
@ -1107,16 +1107,16 @@ ErrorOr<Token> Tokenizer::consume_a_token()
token.m_hash_type = Token::HashType::Id; token.m_hash_type = Token::HashType::Id;
// 3. Consume an ident sequence, and set the <hash-token>s value to the returned string. // 3. Consume an ident sequence, and set the <hash-token>s value to the returned string.
auto name = TRY(consume_an_ident_sequence()); auto name = consume_an_ident_sequence();
token.m_value = move(name); token.m_value = move(name);
// 4. Return the <hash-token>. // 4. Return the <hash-token>.
token.m_representation = TRY(input_since(start_byte_offset)); token.m_representation = input_since(start_byte_offset);
return token; return token;
} }
// Otherwise, return a <delim-token> with its value set to the current input code point. // Otherwise, return a <delim-token> with its value set to the current input code point.
return create_value_token(Token::Type::Delim, input, TRY(input_since(start_byte_offset))); return create_value_token(Token::Type::Delim, input, input_since(start_byte_offset));
} }
// U+0027 APOSTROPHE (') // U+0027 APOSTROPHE (')
@ -1131,7 +1131,7 @@ ErrorOr<Token> Tokenizer::consume_a_token()
dbgln_if(CSS_TOKENIZER_DEBUG, "is left paren"); dbgln_if(CSS_TOKENIZER_DEBUG, "is left paren");
// Return a <(-token>. // Return a <(-token>.
Token token = create_new_token(Token::Type::OpenParen); Token token = create_new_token(Token::Type::OpenParen);
token.m_representation = TRY(input_since(start_byte_offset)); token.m_representation = input_since(start_byte_offset);
return token; return token;
} }
@ -1140,7 +1140,7 @@ ErrorOr<Token> Tokenizer::consume_a_token()
dbgln_if(CSS_TOKENIZER_DEBUG, "is right paren"); dbgln_if(CSS_TOKENIZER_DEBUG, "is right paren");
// Return a <)-token>. // Return a <)-token>.
Token token = create_new_token(Token::Type::CloseParen); Token token = create_new_token(Token::Type::CloseParen);
token.m_representation = TRY(input_since(start_byte_offset)); token.m_representation = input_since(start_byte_offset);
return token; return token;
} }
@ -1155,7 +1155,7 @@ ErrorOr<Token> Tokenizer::consume_a_token()
} }
// Otherwise, return a <delim-token> with its value set to the current input code point. // Otherwise, return a <delim-token> with its value set to the current input code point.
return create_value_token(Token::Type::Delim, input, TRY(input_since(start_byte_offset))); return create_value_token(Token::Type::Delim, input, input_since(start_byte_offset));
} }
// U+002C COMMA (,) // U+002C COMMA (,)
@ -1163,7 +1163,7 @@ ErrorOr<Token> Tokenizer::consume_a_token()
dbgln_if(CSS_TOKENIZER_DEBUG, "is comma"); dbgln_if(CSS_TOKENIZER_DEBUG, "is comma");
// Return a <comma-token>. // Return a <comma-token>.
Token token = create_new_token(Token::Type::Comma); Token token = create_new_token(Token::Type::Comma);
token.m_representation = TRY(input_since(start_byte_offset)); token.m_representation = input_since(start_byte_offset);
return token; return token;
} }
@ -1185,7 +1185,7 @@ ErrorOr<Token> Tokenizer::consume_a_token()
(void)next_code_point(); (void)next_code_point();
Token token = create_new_token(Token::Type::CDC); Token token = create_new_token(Token::Type::CDC);
token.m_representation = TRY(input_since(start_byte_offset)); token.m_representation = input_since(start_byte_offset);
return token; return token;
} }
@ -1197,7 +1197,7 @@ ErrorOr<Token> Tokenizer::consume_a_token()
} }
// Otherwise, return a <delim-token> with its value set to the current input code point. // Otherwise, return a <delim-token> with its value set to the current input code point.
return create_value_token(Token::Type::Delim, input, TRY(input_since(start_byte_offset))); return create_value_token(Token::Type::Delim, input, input_since(start_byte_offset));
} }
// U+002E FULL STOP (.) // U+002E FULL STOP (.)
@ -1211,7 +1211,7 @@ ErrorOr<Token> Tokenizer::consume_a_token()
} }
// Otherwise, return a <delim-token> with its value set to the current input code point. // Otherwise, return a <delim-token> with its value set to the current input code point.
return create_value_token(Token::Type::Delim, input, TRY(input_since(start_byte_offset))); return create_value_token(Token::Type::Delim, input, input_since(start_byte_offset));
} }
// U+003A COLON (:) // U+003A COLON (:)
@ -1219,7 +1219,7 @@ ErrorOr<Token> Tokenizer::consume_a_token()
dbgln_if(CSS_TOKENIZER_DEBUG, "is colon"); dbgln_if(CSS_TOKENIZER_DEBUG, "is colon");
// Return a <colon-token>. // Return a <colon-token>.
Token token = create_new_token(Token::Type::Colon); Token token = create_new_token(Token::Type::Colon);
token.m_representation = TRY(input_since(start_byte_offset)); token.m_representation = input_since(start_byte_offset);
return token; return token;
} }
@ -1228,7 +1228,7 @@ ErrorOr<Token> Tokenizer::consume_a_token()
dbgln_if(CSS_TOKENIZER_DEBUG, "is semicolon"); dbgln_if(CSS_TOKENIZER_DEBUG, "is semicolon");
// Return a <semicolon-token>. // Return a <semicolon-token>.
Token token = create_new_token(Token::Type::Semicolon); Token token = create_new_token(Token::Type::Semicolon);
token.m_representation = TRY(input_since(start_byte_offset)); token.m_representation = input_since(start_byte_offset);
return token; return token;
} }
@ -1244,12 +1244,12 @@ ErrorOr<Token> Tokenizer::consume_a_token()
(void)next_code_point(); (void)next_code_point();
Token token = create_new_token(Token::Type::CDO); Token token = create_new_token(Token::Type::CDO);
token.m_representation = TRY(input_since(start_byte_offset)); token.m_representation = input_since(start_byte_offset);
return token; return token;
} }
// Otherwise, return a <delim-token> with its value set to the current input code point. // Otherwise, return a <delim-token> with its value set to the current input code point.
return create_value_token(Token::Type::Delim, input, TRY(input_since(start_byte_offset))); return create_value_token(Token::Type::Delim, input, input_since(start_byte_offset));
} }
// U+0040 COMMERCIAL AT (@) // U+0040 COMMERCIAL AT (@)
@ -1258,12 +1258,12 @@ ErrorOr<Token> Tokenizer::consume_a_token()
// If the next 3 input code points would start an ident sequence, consume an ident sequence, create // If the next 3 input code points would start an ident sequence, consume an ident sequence, create
// an <at-keyword-token> with its value set to the returned value, and return it. // an <at-keyword-token> with its value set to the returned value, and return it.
if (would_start_an_ident_sequence(peek_triplet())) { if (would_start_an_ident_sequence(peek_triplet())) {
auto name = TRY(consume_an_ident_sequence()); auto name = consume_an_ident_sequence();
return create_value_token(Token::Type::AtKeyword, move(name), TRY(input_since(start_byte_offset))); return create_value_token(Token::Type::AtKeyword, move(name), input_since(start_byte_offset));
} }
// Otherwise, return a <delim-token> with its value set to the current input code point. // Otherwise, return a <delim-token> with its value set to the current input code point.
return create_value_token(Token::Type::Delim, input, TRY(input_since(start_byte_offset))); return create_value_token(Token::Type::Delim, input, input_since(start_byte_offset));
} }
// U+005B LEFT SQUARE BRACKET ([) // U+005B LEFT SQUARE BRACKET ([)
@ -1271,7 +1271,7 @@ ErrorOr<Token> Tokenizer::consume_a_token()
dbgln_if(CSS_TOKENIZER_DEBUG, "is open square"); dbgln_if(CSS_TOKENIZER_DEBUG, "is open square");
// Return a <[-token>. // Return a <[-token>.
Token token = create_new_token(Token::Type::OpenSquare); Token token = create_new_token(Token::Type::OpenSquare);
token.m_representation = TRY(input_since(start_byte_offset)); token.m_representation = input_since(start_byte_offset);
return token; return token;
} }
@ -1288,7 +1288,7 @@ ErrorOr<Token> Tokenizer::consume_a_token()
// Otherwise, this is a parse error. Return a <delim-token> with its value set to the // Otherwise, this is a parse error. Return a <delim-token> with its value set to the
// current input code point. // current input code point.
log_parse_error(); log_parse_error();
return create_value_token(Token::Type::Delim, input, TRY(input_since(start_byte_offset))); return create_value_token(Token::Type::Delim, input, input_since(start_byte_offset));
} }
// U+005D RIGHT SQUARE BRACKET (]) // U+005D RIGHT SQUARE BRACKET (])
@ -1296,7 +1296,7 @@ ErrorOr<Token> Tokenizer::consume_a_token()
dbgln_if(CSS_TOKENIZER_DEBUG, "is closed square"); dbgln_if(CSS_TOKENIZER_DEBUG, "is closed square");
// Return a <]-token>. // Return a <]-token>.
Token token = create_new_token(Token::Type::CloseSquare); Token token = create_new_token(Token::Type::CloseSquare);
token.m_representation = TRY(input_since(start_byte_offset)); token.m_representation = input_since(start_byte_offset);
return token; return token;
} }
@ -1305,7 +1305,7 @@ ErrorOr<Token> Tokenizer::consume_a_token()
dbgln_if(CSS_TOKENIZER_DEBUG, "is open curly"); dbgln_if(CSS_TOKENIZER_DEBUG, "is open curly");
// Return a <{-token>. // Return a <{-token>.
Token token = create_new_token(Token::Type::OpenCurly); Token token = create_new_token(Token::Type::OpenCurly);
token.m_representation = TRY(input_since(start_byte_offset)); token.m_representation = input_since(start_byte_offset);
return token; return token;
} }
@ -1314,7 +1314,7 @@ ErrorOr<Token> Tokenizer::consume_a_token()
dbgln_if(CSS_TOKENIZER_DEBUG, "is closed curly"); dbgln_if(CSS_TOKENIZER_DEBUG, "is closed curly");
// Return a <}-token>. // Return a <}-token>.
Token token = create_new_token(Token::Type::CloseCurly); Token token = create_new_token(Token::Type::CloseCurly);
token.m_representation = TRY(input_since(start_byte_offset)); token.m_representation = input_since(start_byte_offset);
return token; return token;
} }
@ -1343,7 +1343,7 @@ ErrorOr<Token> Tokenizer::consume_a_token()
// anything else // anything else
dbgln_if(CSS_TOKENIZER_DEBUG, "is delimiter"); dbgln_if(CSS_TOKENIZER_DEBUG, "is delimiter");
// Return a <delim-token> with its value set to the current input code point. // Return a <delim-token> with its value set to the current input code point.
return create_value_token(Token::Type::Delim, input, TRY(input_since(start_byte_offset))); return create_value_token(Token::Type::Delim, input, input_since(start_byte_offset));
} }
size_t Tokenizer::current_byte_offset() const size_t Tokenizer::current_byte_offset() const
@ -1351,9 +1351,9 @@ size_t Tokenizer::current_byte_offset() const
return m_utf8_iterator.ptr() - m_utf8_view.bytes(); return m_utf8_iterator.ptr() - m_utf8_view.bytes();
} }
ErrorOr<String> Tokenizer::input_since(size_t offset) const String Tokenizer::input_since(size_t offset) const
{ {
return m_decoded_input.substring_from_byte_offset_with_shared_superstring(offset, current_byte_offset() - offset); return MUST(m_decoded_input.substring_from_byte_offset_with_shared_superstring(offset, current_byte_offset() - offset));
} }
} }

View file

@ -59,17 +59,17 @@ public:
class Tokenizer { class Tokenizer {
public: public:
static ErrorOr<Vector<Token>> tokenize(StringView input, StringView encoding); static Vector<Token> tokenize(StringView input, StringView encoding);
[[nodiscard]] static Token create_eof_token(); [[nodiscard]] static Token create_eof_token();
private: private:
explicit Tokenizer(String decoded_input); explicit Tokenizer(String decoded_input);
[[nodiscard]] ErrorOr<Vector<Token>> tokenize(); [[nodiscard]] Vector<Token> tokenize();
size_t current_byte_offset() const; size_t current_byte_offset() const;
ErrorOr<String> input_since(size_t offset) const; String input_since(size_t offset) const;
[[nodiscard]] u32 next_code_point(); [[nodiscard]] u32 next_code_point();
[[nodiscard]] u32 peek_code_point(size_t offset = 0) const; [[nodiscard]] u32 peek_code_point(size_t offset = 0) const;
@ -82,15 +82,15 @@ private:
[[nodiscard]] static Token create_new_token(Token::Type); [[nodiscard]] static Token create_new_token(Token::Type);
[[nodiscard]] static Token create_value_token(Token::Type, FlyString&& value, String&& representation); [[nodiscard]] static Token create_value_token(Token::Type, FlyString&& value, String&& representation);
[[nodiscard]] static Token create_value_token(Token::Type, u32 value, String&& representation); [[nodiscard]] static Token create_value_token(Token::Type, u32 value, String&& representation);
[[nodiscard]] ErrorOr<Token> consume_a_token(); [[nodiscard]] Token consume_a_token();
[[nodiscard]] ErrorOr<Token> consume_string_token(u32 ending_code_point); [[nodiscard]] Token consume_string_token(u32 ending_code_point);
[[nodiscard]] ErrorOr<Token> consume_a_numeric_token(); [[nodiscard]] Token consume_a_numeric_token();
[[nodiscard]] ErrorOr<Token> consume_an_ident_like_token(); [[nodiscard]] Token consume_an_ident_like_token();
[[nodiscard]] Number consume_a_number(); [[nodiscard]] Number consume_a_number();
[[nodiscard]] double convert_a_string_to_a_number(StringView); [[nodiscard]] double convert_a_string_to_a_number(StringView);
[[nodiscard]] ErrorOr<FlyString> consume_an_ident_sequence(); [[nodiscard]] FlyString consume_an_ident_sequence();
[[nodiscard]] u32 consume_escaped_code_point(); [[nodiscard]] u32 consume_escaped_code_point();
[[nodiscard]] ErrorOr<Token> consume_a_url_token(); [[nodiscard]] Token consume_a_url_token();
void consume_the_remnants_of_a_bad_url(); void consume_the_remnants_of_a_bad_url();
void consume_comments(); void consume_comments();
void consume_as_much_whitespace_as_possible(); void consume_as_much_whitespace_as_possible();

View file

@ -45,7 +45,7 @@ void SyntaxHighlighter::rehighlight(Palette const& palette)
false); false);
}; };
auto tokens = CSS::Parser::Tokenizer::tokenize(text, "utf-8"sv).release_value_but_fixme_should_propagate_errors(); auto tokens = CSS::Parser::Tokenizer::tokenize(text, "utf-8"sv);
for (auto const& token : tokens) { for (auto const& token : tokens) {
if (token.is(Parser::Token::Type::EndOfFile)) if (token.is(Parser::Token::Type::EndOfFile))
break; break;