LibWeb/CSS: Construct all CSS Tokens in a consistent way

Add `create_foo()` static methods for the missing Token::Types, and use
them in the Tokenizer. This means we slightly deviate from the spec now:
it says "create foo token... set its bar to 32", but we now just wait
and construct the Token fully-formed. But those cases are short so it
should still be clear what we're doing.

This makes it possible to construct all kinds of Token elsewhere, such
as for testing purposes.
This commit is contained in:
Sam Atkins 2025-07-09 12:59:31 +01:00 committed by Tim Ledbetter
parent 57dd85e4ac
commit d5bee680b0
Notes: github-actions[bot] 2025-07-09 14:06:20 +00:00
4 changed files with 203 additions and 188 deletions

View file

@ -1,15 +1,141 @@
/* /*
* Copyright (c) 2020-2021, the SerenityOS developers. * Copyright (c) 2020-2021, the SerenityOS developers.
* Copyright (c) 2022-2023, Sam Atkins <atkinssj@serenityos.org> * Copyright (c) 2022-2025, Sam Atkins <sam@ladybird.org>
* *
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
#include <AK/GenericShorthands.h>
#include <LibWeb/CSS/Parser/Token.h> #include <LibWeb/CSS/Parser/Token.h>
#include <LibWeb/CSS/Serialize.h> #include <LibWeb/CSS/Serialize.h>
namespace Web::CSS::Parser { namespace Web::CSS::Parser {
Token Token::create(Type type, String original_source_text)
{
VERIFY(first_is_one_of(type,
Type::Invalid,
Type::EndOfFile,
Type::BadString,
Type::BadUrl,
Type::CDO,
Type::CDC,
Type::Colon,
Type::Semicolon,
Type::Comma,
Type::OpenSquare,
Type::CloseSquare,
Type::OpenParen,
Type::CloseParen,
Type::OpenCurly,
Type::CloseCurly));
Token token;
token.m_type = type;
token.m_original_source_text = move(original_source_text);
return token;
}
Token Token::create_ident(FlyString ident, String original_source_text)
{
Token token;
token.m_type = Type::Ident;
token.m_value = move(ident);
token.m_original_source_text = move(original_source_text);
return token;
}
Token Token::create_function(FlyString name, String original_source_text)
{
Token token;
token.m_type = Type::Function;
token.m_value = move(name);
token.m_original_source_text = move(original_source_text);
return token;
}
Token Token::create_at_keyword(FlyString name, String original_source_text)
{
Token token;
token.m_type = Type::AtKeyword;
token.m_value = move(name);
token.m_original_source_text = move(original_source_text);
return token;
}
Token Token::create_hash(FlyString value, HashType hash_type, String original_source_text)
{
Token token;
token.m_type = Type::Hash;
token.m_value = move(value);
token.m_hash_type = hash_type;
token.m_original_source_text = move(original_source_text);
return token;
}
Token Token::create_string(FlyString value, String original_source_text)
{
Token token;
token.m_type = Type::String;
token.m_value = move(value);
token.m_original_source_text = move(original_source_text);
return token;
}
Token Token::create_url(FlyString url, String original_source_text)
{
Token token;
token.m_type = Type::Url;
token.m_value = move(url);
token.m_original_source_text = move(original_source_text);
return token;
}
Token Token::create_delim(u32 delim, String original_source_text)
{
Token token;
token.m_type = Type::Delim;
token.m_value = String::from_code_point(delim);
token.m_original_source_text = move(original_source_text);
return token;
}
Token Token::create_number(Number value, String original_source_text)
{
Token token;
token.m_type = Type::Number;
token.m_number_value = value;
token.m_original_source_text = move(original_source_text);
return token;
}
Token Token::create_percentage(Number value, String original_source_text)
{
Token token;
token.m_type = Type::Percentage;
token.m_number_value = value;
token.m_original_source_text = move(original_source_text);
return token;
}
Token Token::create_dimension(Number value, FlyString unit, String original_source_text)
{
Token token;
token.m_type = Type::Dimension;
token.m_number_value = value;
token.m_value = move(unit);
token.m_original_source_text = move(original_source_text);
return token;
}
Token Token::create_whitespace(String original_source_text)
{
Token token;
token.m_type = Type::Whitespace;
token.m_original_source_text = move(original_source_text);
return token;
}
String Token::to_string() const String Token::to_string() const
{ {
StringBuilder builder; StringBuilder builder;
@ -213,4 +339,10 @@ StringView Token::bracket_mirror_string() const
return ""sv; return ""sv;
} }
void Token::set_position_range(Badge<Tokenizer>, Position start, Position end)
{
m_start_position = start;
m_end_position = end;
}
} }

View file

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 2020-2021, the SerenityOS developers. * Copyright (c) 2020-2021, the SerenityOS developers.
* Copyright (c) 2021-2023, Sam Atkins <atkinssj@serenityos.org> * Copyright (c) 2021-2025, Sam Atkins <sam@ladybird.org>
* *
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
@ -9,14 +9,13 @@
#include <AK/FlyString.h> #include <AK/FlyString.h>
#include <LibWeb/CSS/Number.h> #include <LibWeb/CSS/Number.h>
#include <LibWeb/Forward.h>
namespace Web::CSS::Parser { namespace Web::CSS::Parser {
class Token { class Token {
friend class Tokenizer;
public: public:
enum class Type { enum class Type : u8 {
Invalid, Invalid,
EndOfFile, EndOfFile,
Ident, Ident,
@ -45,7 +44,7 @@ public:
CloseCurly CloseCurly
}; };
enum class HashType { enum class HashType : u8 {
Id, Id,
Unrestricted, Unrestricted,
}; };
@ -55,6 +54,25 @@ public:
size_t column { 0 }; size_t column { 0 };
}; };
// Use this only to create types that don't have their own create_foo() methods below.
static Token create(Type, String original_source_text = {});
static Token create_ident(FlyString ident, String original_source_text = {});
static Token create_function(FlyString name, String original_source_text = {});
static Token create_at_keyword(FlyString name, String original_source_text = {});
static Token create_hash(FlyString value, HashType hash_type, String original_source_text = {});
static Token create_string(FlyString value, String original_source_text = {});
static Token create_url(FlyString url, String original_source_text = {});
static Token create_delim(u32 delim, String original_source_text = {});
static Token create_number(Number value, String original_source_text = {});
static Token create_percentage(Number value, String original_source_text = {});
static Token create_dimension(Number value, FlyString unit, String original_source_text = {});
static Token create_dimension(double value, FlyString unit, String original_source_text = {})
{
return create_dimension(Number { Number::Type::Number, value }, move(unit), move(original_source_text));
}
static Token create_whitespace(String original_source_text = {});
Type type() const { return m_type; } Type type() const { return m_type; }
bool is(Type type) const { return m_type == type; } bool is(Type type) const { return m_type == type; }
@ -149,55 +167,7 @@ public:
String const& original_source_text() const { return m_original_source_text; } String const& original_source_text() const { return m_original_source_text; }
Position const& start_position() const { return m_start_position; } Position const& start_position() const { return m_start_position; }
Position const& end_position() const { return m_end_position; } Position const& end_position() const { return m_end_position; }
void set_position_range(Badge<Tokenizer>, Position start, Position end);
static Token create_string(FlyString str)
{
Token token;
token.m_type = Type::String;
token.m_value = move(str);
return token;
}
static Token create_number(double value, Number::Type number_type)
{
Token token;
token.m_type = Type::Number;
token.m_number_value = Number(number_type, value);
return token;
}
static Token create_percentage(double value)
{
Token token;
token.m_type = Type::Percentage;
token.m_number_value = Number(Number::Type::Number, value);
return token;
}
static Token create_dimension(double value, FlyString unit)
{
Token token;
token.m_type = Type::Dimension;
token.m_number_value = Number(Number::Type::Number, value);
token.m_value = move(unit);
return token;
}
static Token create_ident(FlyString ident)
{
Token token;
token.m_type = Type::Ident;
token.m_value = move(ident);
return token;
}
static Token create_url(FlyString url)
{
Token token;
token.m_type = Type::Url;
token.m_value = move(url);
return token;
}
private: private:
Type m_type { Type::Invalid }; Type m_type { Type::Invalid };

View file

@ -231,8 +231,7 @@ Vector<Token> Tokenizer::tokenize()
for (;;) { for (;;) {
auto token_start = m_position; auto token_start = m_position;
auto token = consume_a_token(); auto token = consume_a_token();
token.m_start_position = token_start; token.set_position_range({}, token_start, m_position);
token.m_end_position = m_position;
tokens.append(token); tokens.append(token);
if (token.is(Token::Type::EndOfFile)) { if (token.is(Token::Type::EndOfFile)) {
@ -320,32 +319,9 @@ U32Triplet Tokenizer::start_of_input_stream_triplet()
return triplet; return triplet;
} }
Token Tokenizer::create_new_token(Token::Type type)
{
Token token = {};
token.m_type = type;
return token;
}
Token Tokenizer::create_eof_token() Token Tokenizer::create_eof_token()
{ {
return create_new_token(Token::Type::EndOfFile); return Token::create(Token::Type::EndOfFile);
}
Token Tokenizer::create_value_token(Token::Type type, FlyString&& value, String&& representation)
{
auto token = create_new_token(type);
token.m_value = move(value);
token.m_original_source_text = move(representation);
return token;
}
Token Tokenizer::create_value_token(Token::Type type, u32 value, String&& representation)
{
auto token = create_new_token(type);
token.m_value = String::from_code_point(value);
token.m_original_source_text = move(representation);
return token;
} }
// https://www.w3.org/TR/css-syntax-3/#consume-escaped-code-point // https://www.w3.org/TR/css-syntax-3/#consume-escaped-code-point
@ -430,7 +406,7 @@ Token Tokenizer::consume_an_ident_like_token()
// <function-token> with its value set to string and return it. // <function-token> with its value set to string and return it.
auto next_two = peek_twin(); auto next_two = peek_twin();
if (is_quotation_mark(next_two.first) || is_apostrophe(next_two.first) || (is_whitespace(next_two.first) && (is_quotation_mark(next_two.second) || is_apostrophe(next_two.second)))) { if (is_quotation_mark(next_two.first) || is_apostrophe(next_two.first) || (is_whitespace(next_two.first) && (is_quotation_mark(next_two.second) || is_apostrophe(next_two.second)))) {
return create_value_token(Token::Type::Function, move(string), input_since(start_byte_offset)); return Token::create_function(move(string), input_since(start_byte_offset));
} }
// Otherwise, consume a url token, and return it. // Otherwise, consume a url token, and return it.
@ -442,11 +418,11 @@ Token Tokenizer::consume_an_ident_like_token()
(void)next_code_point(); (void)next_code_point();
// Create a <function-token> with its value set to string and return it. // Create a <function-token> with its value set to string and return it.
return create_value_token(Token::Type::Function, move(string), input_since(start_byte_offset)); return Token::create_function(move(string), input_since(start_byte_offset));
} }
// Otherwise, create an <ident-token> with its value set to string and return it. // Otherwise, create an <ident-token> with its value set to string and return it.
return create_value_token(Token::Type::Ident, move(string), input_since(start_byte_offset)); return Token::create_ident(move(string), input_since(start_byte_offset));
} }
// https://www.w3.org/TR/css-syntax-3/#consume-number // https://www.w3.org/TR/css-syntax-3/#consume-number
@ -613,18 +589,11 @@ Token Tokenizer::consume_a_url_token()
// 1. Initially create a <url-token> with its value set to the empty string. // 1. Initially create a <url-token> with its value set to the empty string.
auto start_byte_offset = current_byte_offset(); auto start_byte_offset = current_byte_offset();
auto token = create_new_token(Token::Type::Url);
StringBuilder builder; StringBuilder builder;
// 2. Consume as much whitespace as possible. // 2. Consume as much whitespace as possible.
consume_as_much_whitespace_as_possible(); consume_as_much_whitespace_as_possible();
auto make_token = [&]() -> Token {
token.m_value = builder.to_fly_string_without_validation();
token.m_original_source_text = input_since(start_byte_offset);
return token;
};
// 3. Repeatedly consume the next input code point from the stream: // 3. Repeatedly consume the next input code point from the stream:
for (;;) { for (;;) {
auto input = next_code_point(); auto input = next_code_point();
@ -632,14 +601,14 @@ Token Tokenizer::consume_a_url_token()
// U+0029 RIGHT PARENTHESIS ()) // U+0029 RIGHT PARENTHESIS ())
if (is_right_paren(input)) { if (is_right_paren(input)) {
// Return the <url-token>. // Return the <url-token>.
return make_token(); return Token::create_url(builder.to_fly_string_without_validation(), input_since(start_byte_offset));
} }
// EOF // EOF
if (is_eof(input)) { if (is_eof(input)) {
// This is a parse error. Return the <url-token>. // This is a parse error. Return the <url-token>.
log_parse_error(); log_parse_error();
return make_token(); return Token::create_url(builder.to_fly_string_without_validation(), input_since(start_byte_offset));
} }
// whitespace // whitespace
@ -653,20 +622,18 @@ Token Tokenizer::consume_a_url_token()
if (is_right_paren(input)) { if (is_right_paren(input)) {
(void)next_code_point(); (void)next_code_point();
return make_token(); return Token::create_url(builder.to_fly_string_without_validation(), input_since(start_byte_offset));
} }
if (is_eof(input)) { if (is_eof(input)) {
(void)next_code_point(); (void)next_code_point();
log_parse_error(); log_parse_error();
return make_token(); return Token::create_url(builder.to_fly_string_without_validation(), input_since(start_byte_offset));
} }
// otherwise, consume the remnants of a bad url, create a <bad-url-token>, and return it. // otherwise, consume the remnants of a bad url, create a <bad-url-token>, and return it.
consume_the_remnants_of_a_bad_url(); consume_the_remnants_of_a_bad_url();
auto bad_url_token = create_new_token(Token::Type::BadUrl); return Token::create(Token::Type::BadUrl, input_since(start_byte_offset));
bad_url_token.m_original_source_text = input_since(start_byte_offset);
return bad_url_token;
} }
// U+0022 QUOTATION MARK (") // U+0022 QUOTATION MARK (")
@ -677,9 +644,7 @@ Token Tokenizer::consume_a_url_token()
// This is a parse error. Consume the remnants of a bad url, create a <bad-url-token>, and return it. // This is a parse error. Consume the remnants of a bad url, create a <bad-url-token>, and return it.
log_parse_error(); log_parse_error();
consume_the_remnants_of_a_bad_url(); consume_the_remnants_of_a_bad_url();
auto bad_url_token = create_new_token(Token::Type::BadUrl); return Token::create(Token::Type::BadUrl, input_since(start_byte_offset));
bad_url_token.m_original_source_text = input_since(start_byte_offset);
return bad_url_token;
} }
// U+005C REVERSE SOLIDUS (\) // U+005C REVERSE SOLIDUS (\)
@ -694,9 +659,7 @@ Token Tokenizer::consume_a_url_token()
log_parse_error(); log_parse_error();
// Consume the remnants of a bad url, create a <bad-url-token>, and return it. // Consume the remnants of a bad url, create a <bad-url-token>, and return it.
consume_the_remnants_of_a_bad_url(); consume_the_remnants_of_a_bad_url();
auto bad_url_token = create_new_token(Token::Type::BadUrl); return Token::create(Token::Type::BadUrl, input_since(start_byte_offset));
bad_url_token.m_original_source_text = input_since(start_byte_offset);
return bad_url_token;
} }
} }
@ -766,18 +729,14 @@ Token Tokenizer::consume_a_numeric_token()
if (would_start_an_ident_sequence(peek_triplet())) { if (would_start_an_ident_sequence(peek_triplet())) {
// 1. Create a <dimension-token> with the same value and type flag as number, // 1. Create a <dimension-token> with the same value and type flag as number,
// and a unit set initially to the empty string. // and a unit set initially to the empty string.
auto token = create_new_token(Token::Type::Dimension);
token.m_number_value = number;
// 2. Consume an ident sequence. Set the <dimension-token>s unit to the returned value. // 2. Consume an ident sequence. Set the <dimension-token>s unit to the returned value.
auto unit = consume_an_ident_sequence(); auto unit = consume_an_ident_sequence();
VERIFY(!unit.is_empty()); VERIFY(!unit.is_empty());
// NOTE: We intentionally store this in the `value`, to save space. // NOTE: We intentionally store this in the `value`, to save space.
token.m_value = move(unit);
// 3. Return the <dimension-token>. // 3. Return the <dimension-token>.
token.m_original_source_text = input_since(start_byte_offset); return Token::create_dimension(number, move(unit), input_since(start_byte_offset));
return token;
} }
// Otherwise, if the next input code point is U+0025 PERCENTAGE SIGN (%), consume it. // Otherwise, if the next input code point is U+0025 PERCENTAGE SIGN (%), consume it.
@ -785,17 +744,11 @@ Token Tokenizer::consume_a_numeric_token()
(void)next_code_point(); (void)next_code_point();
// Create a <percentage-token> with the same value as number, and return it. // Create a <percentage-token> with the same value as number, and return it.
auto token = create_new_token(Token::Type::Percentage); return Token::create_percentage(number, input_since(start_byte_offset));
token.m_number_value = number;
token.m_original_source_text = input_since(start_byte_offset);
return token;
} }
// Otherwise, create a <number-token> with the same value and type flag as number, and return it. // Otherwise, create a <number-token> with the same value and type flag as number, and return it.
auto token = create_new_token(Token::Type::Number); return Token::create_number(number, input_since(start_byte_offset));
token.m_number_value = number;
token.m_original_source_text = input_since(start_byte_offset);
return token;
} }
// https://www.w3.org/TR/css-syntax-3/#starts-with-a-number // https://www.w3.org/TR/css-syntax-3/#starts-with-a-number
@ -920,28 +873,21 @@ Token Tokenizer::consume_string_token(u32 ending_code_point)
// Initially create a <string-token> with its value set to the empty string. // Initially create a <string-token> with its value set to the empty string.
auto original_source_text_start_byte_offset_including_quotation_mark = current_byte_offset() - 1; auto original_source_text_start_byte_offset_including_quotation_mark = current_byte_offset() - 1;
auto token = create_new_token(Token::Type::String);
StringBuilder builder; StringBuilder builder;
auto make_token = [&]() -> Token {
token.m_value = builder.to_fly_string_without_validation();
token.m_original_source_text = input_since(original_source_text_start_byte_offset_including_quotation_mark);
return token;
};
// Repeatedly consume the next input code point from the stream: // Repeatedly consume the next input code point from the stream:
for (;;) { for (;;) {
auto input = next_code_point(); auto input = next_code_point();
// ending code point // ending code point
if (input == ending_code_point) if (input == ending_code_point)
return make_token(); return Token::create_string(builder.to_fly_string_without_validation(), input_since(original_source_text_start_byte_offset_including_quotation_mark));
// EOF // EOF
if (is_eof(input)) { if (is_eof(input)) {
// This is a parse error. Return the <string-token>. // This is a parse error. Return the <string-token>.
log_parse_error(); log_parse_error();
return make_token(); return Token::create_string(builder.to_fly_string_without_validation(), input_since(original_source_text_start_byte_offset_including_quotation_mark));
} }
// newline // newline
@ -949,9 +895,7 @@ Token Tokenizer::consume_string_token(u32 ending_code_point)
// This is a parse error. Reconsume the current input code point, create a // This is a parse error. Reconsume the current input code point, create a
// <bad-string-token>, and return it. // <bad-string-token>, and return it.
reconsume_current_input_code_point(); reconsume_current_input_code_point();
auto bad_string_token = create_new_token(Token::Type::BadString); return Token::create(Token::Type::BadString, input_since(original_source_text_start_byte_offset_including_quotation_mark));
bad_string_token.m_original_source_text = input_since(original_source_text_start_byte_offset_including_quotation_mark);
return bad_string_token;
} }
// U+005C REVERSE SOLIDUS (\) // U+005C REVERSE SOLIDUS (\)
@ -1031,11 +975,8 @@ Token Tokenizer::consume_a_token()
// AD-HOC: Preserve comments as whitespace tokens, for serializing custom properties. // AD-HOC: Preserve comments as whitespace tokens, for serializing custom properties.
auto after_comments_byte_offset = current_byte_offset(); auto after_comments_byte_offset = current_byte_offset();
if (after_comments_byte_offset != start_byte_offset) { if (after_comments_byte_offset != start_byte_offset)
auto token = create_new_token(Token::Type::Whitespace); return Token::create_whitespace(input_since(start_byte_offset));
token.m_original_source_text = input_since(start_byte_offset);
return token;
}
// Consume the next input code point. // Consume the next input code point.
auto input = next_code_point(); auto input = next_code_point();
@ -1045,9 +986,7 @@ Token Tokenizer::consume_a_token()
dbgln_if(CSS_TOKENIZER_DEBUG, "is whitespace"); dbgln_if(CSS_TOKENIZER_DEBUG, "is whitespace");
// Consume as much whitespace as possible. Return a <whitespace-token>. // Consume as much whitespace as possible. Return a <whitespace-token>.
consume_as_much_whitespace_as_possible(); consume_as_much_whitespace_as_possible();
auto token = create_new_token(Token::Type::Whitespace); return Token::create_whitespace(input_since(start_byte_offset));
token.m_original_source_text = input_since(start_byte_offset);
return token;
} }
// U+0022 QUOTATION MARK (") // U+0022 QUOTATION MARK (")
@ -1068,24 +1007,22 @@ Token Tokenizer::consume_a_token()
if (is_ident_code_point(next_input) || is_valid_escape_sequence(maybe_escape)) { if (is_ident_code_point(next_input) || is_valid_escape_sequence(maybe_escape)) {
// 1. Create a <hash-token>. // 1. Create a <hash-token>.
auto token = create_new_token(Token::Type::Hash);
// 2. If the next 3 input code points would start an ident sequence, set the <hash-token>s // 2. If the next 3 input code points would start an ident sequence, set the <hash-token>s
// type flag to "id". // type flag to "id".
auto hash_type = Token::HashType::Unrestricted;
if (would_start_an_ident_sequence(peek_triplet())) if (would_start_an_ident_sequence(peek_triplet()))
token.m_hash_type = Token::HashType::Id; hash_type = Token::HashType::Id;
// 3. Consume an ident sequence, and set the <hash-token>s value to the returned string. // 3. Consume an ident sequence, and set the <hash-token>s value to the returned string.
auto name = consume_an_ident_sequence(); auto value = consume_an_ident_sequence();
token.m_value = move(name);
// 4. Return the <hash-token>. // 4. Return the <hash-token>.
token.m_original_source_text = input_since(start_byte_offset); return Token::create_hash(move(value), hash_type, input_since(start_byte_offset));
return token;
} }
// Otherwise, return a <delim-token> with its value set to the current input code point. // Otherwise, return a <delim-token> with its value set to the current input code point.
return create_value_token(Token::Type::Delim, input, input_since(start_byte_offset)); return Token::create_delim(input, input_since(start_byte_offset));
} }
// U+0027 APOSTROPHE (') // U+0027 APOSTROPHE (')
@ -1099,18 +1036,14 @@ Token Tokenizer::consume_a_token()
if (is_left_paren(input)) { if (is_left_paren(input)) {
dbgln_if(CSS_TOKENIZER_DEBUG, "is left paren"); dbgln_if(CSS_TOKENIZER_DEBUG, "is left paren");
// Return a <(-token>. // Return a <(-token>.
Token token = create_new_token(Token::Type::OpenParen); return Token::create(Token::Type::OpenParen, input_since(start_byte_offset));
token.m_original_source_text = input_since(start_byte_offset);
return token;
} }
// U+0029 RIGHT PARENTHESIS ()) // U+0029 RIGHT PARENTHESIS ())
if (is_right_paren(input)) { if (is_right_paren(input)) {
dbgln_if(CSS_TOKENIZER_DEBUG, "is right paren"); dbgln_if(CSS_TOKENIZER_DEBUG, "is right paren");
// Return a <)-token>. // Return a <)-token>.
Token token = create_new_token(Token::Type::CloseParen); return Token::create(Token::Type::CloseParen, input_since(start_byte_offset));
token.m_original_source_text = input_since(start_byte_offset);
return token;
} }
// U+002B PLUS SIGN (+) // U+002B PLUS SIGN (+)
@ -1124,16 +1057,14 @@ Token Tokenizer::consume_a_token()
} }
// Otherwise, return a <delim-token> with its value set to the current input code point. // Otherwise, return a <delim-token> with its value set to the current input code point.
return create_value_token(Token::Type::Delim, input, input_since(start_byte_offset)); return Token::create_delim(input, input_since(start_byte_offset));
} }
// U+002C COMMA (,) // U+002C COMMA (,)
if (is_comma(input)) { if (is_comma(input)) {
dbgln_if(CSS_TOKENIZER_DEBUG, "is comma"); dbgln_if(CSS_TOKENIZER_DEBUG, "is comma");
// Return a <comma-token>. // Return a <comma-token>.
Token token = create_new_token(Token::Type::Comma); return Token::create(Token::Type::Comma, input_since(start_byte_offset));
token.m_original_source_text = input_since(start_byte_offset);
return token;
} }
// U+002D HYPHEN-MINUS (-) // U+002D HYPHEN-MINUS (-)
@ -1153,9 +1084,7 @@ Token Tokenizer::consume_a_token()
(void)next_code_point(); (void)next_code_point();
(void)next_code_point(); (void)next_code_point();
Token token = create_new_token(Token::Type::CDC); return Token::create(Token::Type::CDC, input_since(start_byte_offset));
token.m_original_source_text = input_since(start_byte_offset);
return token;
} }
// Otherwise, if the input stream starts with an identifier, reconsume the current // Otherwise, if the input stream starts with an identifier, reconsume the current
@ -1166,7 +1095,7 @@ Token Tokenizer::consume_a_token()
} }
// Otherwise, return a <delim-token> with its value set to the current input code point. // Otherwise, return a <delim-token> with its value set to the current input code point.
return create_value_token(Token::Type::Delim, input, input_since(start_byte_offset)); return Token::create_delim(input, input_since(start_byte_offset));
} }
// U+002E FULL STOP (.) // U+002E FULL STOP (.)
@ -1180,25 +1109,21 @@ Token Tokenizer::consume_a_token()
} }
// Otherwise, return a <delim-token> with its value set to the current input code point. // Otherwise, return a <delim-token> with its value set to the current input code point.
return create_value_token(Token::Type::Delim, input, input_since(start_byte_offset)); return Token::create_delim(input, input_since(start_byte_offset));
} }
// U+003A COLON (:) // U+003A COLON (:)
if (is_colon(input)) { if (is_colon(input)) {
dbgln_if(CSS_TOKENIZER_DEBUG, "is colon"); dbgln_if(CSS_TOKENIZER_DEBUG, "is colon");
// Return a <colon-token>. // Return a <colon-token>.
Token token = create_new_token(Token::Type::Colon); return Token::create(Token::Type::Colon, input_since(start_byte_offset));
token.m_original_source_text = input_since(start_byte_offset);
return token;
} }
// U+003B SEMICOLON (;) // U+003B SEMICOLON (;)
if (is_semicolon(input)) { if (is_semicolon(input)) {
dbgln_if(CSS_TOKENIZER_DEBUG, "is semicolon"); dbgln_if(CSS_TOKENIZER_DEBUG, "is semicolon");
// Return a <semicolon-token>. // Return a <semicolon-token>.
Token token = create_new_token(Token::Type::Semicolon); return Token::create(Token::Type::Semicolon, input_since(start_byte_offset));
token.m_original_source_text = input_since(start_byte_offset);
return token;
} }
// U+003C LESS-THAN SIGN (<) // U+003C LESS-THAN SIGN (<)
@ -1212,13 +1137,11 @@ Token Tokenizer::consume_a_token()
(void)next_code_point(); (void)next_code_point();
(void)next_code_point(); (void)next_code_point();
Token token = create_new_token(Token::Type::CDO); return Token::create(Token::Type::CDO, input_since(start_byte_offset));
token.m_original_source_text = input_since(start_byte_offset);
return token;
} }
// Otherwise, return a <delim-token> with its value set to the current input code point. // Otherwise, return a <delim-token> with its value set to the current input code point.
return create_value_token(Token::Type::Delim, input, input_since(start_byte_offset)); return Token::create_delim(input, input_since(start_byte_offset));
} }
// U+0040 COMMERCIAL AT (@) // U+0040 COMMERCIAL AT (@)
@ -1227,21 +1150,20 @@ Token Tokenizer::consume_a_token()
// If the next 3 input code points would start an ident sequence, consume an ident sequence, create // If the next 3 input code points would start an ident sequence, consume an ident sequence, create
// an <at-keyword-token> with its value set to the returned value, and return it. // an <at-keyword-token> with its value set to the returned value, and return it.
if (would_start_an_ident_sequence(peek_triplet())) { if (would_start_an_ident_sequence(peek_triplet())) {
// FIXME: Do we need to set this to ascii lowercase?
auto name = consume_an_ident_sequence().to_ascii_lowercase(); auto name = consume_an_ident_sequence().to_ascii_lowercase();
return create_value_token(Token::Type::AtKeyword, move(name), input_since(start_byte_offset)); return Token::create_at_keyword(move(name), input_since(start_byte_offset));
} }
// Otherwise, return a <delim-token> with its value set to the current input code point. // Otherwise, return a <delim-token> with its value set to the current input code point.
return create_value_token(Token::Type::Delim, input, input_since(start_byte_offset)); return Token::create_delim(input, input_since(start_byte_offset));
} }
// U+005B LEFT SQUARE BRACKET ([) // U+005B LEFT SQUARE BRACKET ([)
if (is_open_square_bracket(input)) { if (is_open_square_bracket(input)) {
dbgln_if(CSS_TOKENIZER_DEBUG, "is open square"); dbgln_if(CSS_TOKENIZER_DEBUG, "is open square");
// Return a <[-token>. // Return a <[-token>.
Token token = create_new_token(Token::Type::OpenSquare); return Token::create(Token::Type::OpenSquare, input_since(start_byte_offset));
token.m_original_source_text = input_since(start_byte_offset);
return token;
} }
// U+005C REVERSE SOLIDUS (\) // U+005C REVERSE SOLIDUS (\)
@ -1257,34 +1179,28 @@ Token Tokenizer::consume_a_token()
// Otherwise, this is a parse error. Return a <delim-token> with its value set to the // Otherwise, this is a parse error. Return a <delim-token> with its value set to the
// current input code point. // current input code point.
log_parse_error(); log_parse_error();
return create_value_token(Token::Type::Delim, input, input_since(start_byte_offset)); return Token::create_delim(input, input_since(start_byte_offset));
} }
// U+005D RIGHT SQUARE BRACKET (]) // U+005D RIGHT SQUARE BRACKET (])
if (is_closed_square_bracket(input)) { if (is_closed_square_bracket(input)) {
dbgln_if(CSS_TOKENIZER_DEBUG, "is closed square"); dbgln_if(CSS_TOKENIZER_DEBUG, "is closed square");
// Return a <]-token>. // Return a <]-token>.
Token token = create_new_token(Token::Type::CloseSquare); return Token::create(Token::Type::CloseSquare, input_since(start_byte_offset));
token.m_original_source_text = input_since(start_byte_offset);
return token;
} }
// U+007B LEFT CURLY BRACKET ({) // U+007B LEFT CURLY BRACKET ({)
if (is_open_curly_bracket(input)) { if (is_open_curly_bracket(input)) {
dbgln_if(CSS_TOKENIZER_DEBUG, "is open curly"); dbgln_if(CSS_TOKENIZER_DEBUG, "is open curly");
// Return a <{-token>. // Return a <{-token>.
Token token = create_new_token(Token::Type::OpenCurly); return Token::create(Token::Type::OpenCurly, input_since(start_byte_offset));
token.m_original_source_text = input_since(start_byte_offset);
return token;
} }
// U+007D RIGHT CURLY BRACKET (}) // U+007D RIGHT CURLY BRACKET (})
if (is_closed_curly_bracket(input)) { if (is_closed_curly_bracket(input)) {
dbgln_if(CSS_TOKENIZER_DEBUG, "is closed curly"); dbgln_if(CSS_TOKENIZER_DEBUG, "is closed curly");
// Return a <}-token>. // Return a <}-token>.
Token token = create_new_token(Token::Type::CloseCurly); return Token::create(Token::Type::CloseCurly, input_since(start_byte_offset));
token.m_original_source_text = input_since(start_byte_offset);
return token;
} }
// digit // digit
@ -1312,7 +1228,7 @@ Token Tokenizer::consume_a_token()
// anything else // anything else
dbgln_if(CSS_TOKENIZER_DEBUG, "is delimiter"); dbgln_if(CSS_TOKENIZER_DEBUG, "is delimiter");
// Return a <delim-token> with its value set to the current input code point. // Return a <delim-token> with its value set to the current input code point.
return create_value_token(Token::Type::Delim, input, input_since(start_byte_offset)); return Token::create_delim(input, input_since(start_byte_offset));
} }
size_t Tokenizer::current_byte_offset() const size_t Tokenizer::current_byte_offset() const

View file

@ -79,9 +79,6 @@ private:
[[nodiscard]] U32Twin start_of_input_stream_twin(); [[nodiscard]] U32Twin start_of_input_stream_twin();
[[nodiscard]] U32Triplet start_of_input_stream_triplet(); [[nodiscard]] U32Triplet start_of_input_stream_triplet();
[[nodiscard]] static Token create_new_token(Token::Type);
[[nodiscard]] static Token create_value_token(Token::Type, FlyString&& value, String&& representation);
[[nodiscard]] static Token create_value_token(Token::Type, u32 value, String&& representation);
[[nodiscard]] Token consume_a_token(); [[nodiscard]] Token consume_a_token();
[[nodiscard]] Token consume_string_token(u32 ending_code_point); [[nodiscard]] Token consume_string_token(u32 ending_code_point);
[[nodiscard]] Token consume_a_numeric_token(); [[nodiscard]] Token consume_a_numeric_token();