diff --git a/Libraries/LibWeb/CMakeLists.txt b/Libraries/LibWeb/CMakeLists.txt index 7173b7e7af6..a69eb0c928a 100644 --- a/Libraries/LibWeb/CMakeLists.txt +++ b/Libraries/LibWeb/CMakeLists.txt @@ -146,6 +146,8 @@ set(SOURCES CSS/Parser/RuleContext.cpp CSS/Parser/RuleParsing.cpp CSS/Parser/SelectorParsing.cpp + CSS/Parser/Syntax.cpp + CSS/Parser/SyntaxParsing.cpp CSS/Parser/Token.cpp CSS/Parser/Tokenizer.cpp CSS/Parser/Types.cpp diff --git a/Libraries/LibWeb/CSS/Parser/Syntax.cpp b/Libraries/LibWeb/CSS/Parser/Syntax.cpp new file mode 100644 index 00000000000..e111ef13dfc --- /dev/null +++ b/Libraries/LibWeb/CSS/Parser/Syntax.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2025, Sam Atkins + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include + +namespace Web::CSS::Parser { + +String SyntaxNode::dump() const +{ + StringBuilder builder; + dump(builder, 0); + return builder.to_string_without_validation(); +} + +UniversalSyntaxNode::UniversalSyntaxNode() + : SyntaxNode(NodeType::Universal) +{ +} + +UniversalSyntaxNode::~UniversalSyntaxNode() = default; + +String UniversalSyntaxNode::to_string() const +{ + return "*"_string; +} + +void UniversalSyntaxNode::dump(StringBuilder& builder, int indent) const +{ + builder.appendff("{: >{}}Universal\n", "", indent); +} + +TypeSyntaxNode::TypeSyntaxNode(FlyString type_name) + : SyntaxNode(NodeType::Type) + , m_type_name(move(type_name)) +{ +} + +TypeSyntaxNode::~TypeSyntaxNode() = default; + +String TypeSyntaxNode::to_string() const +{ + return MUST(String::formatted("<{}>", m_type_name)); +} + +void TypeSyntaxNode::dump(StringBuilder& builder, int indent) const +{ + builder.appendff("{: >{}}Type: {}\n", "", indent, m_type_name); +} + +IdentSyntaxNode::IdentSyntaxNode(FlyString ident) + : SyntaxNode(NodeType::Ident) + , m_ident(move(ident)) +{ +} + +IdentSyntaxNode::~IdentSyntaxNode() = default; + +String IdentSyntaxNode::to_string() const +{ + return serialize_an_identifier(m_ident); +} + +void IdentSyntaxNode::dump(StringBuilder& builder, int indent) const +{ + builder.appendff("{: >{}}Ident: {}\n", "", indent, m_ident); +} + +MultiplierSyntaxNode::MultiplierSyntaxNode(NonnullOwnPtr child) + : SyntaxNode(NodeType::Multiplier) + , m_child(move(child)) +{ +} + +MultiplierSyntaxNode::~MultiplierSyntaxNode() = default; + +String MultiplierSyntaxNode::to_string() const +{ + return MUST(String::formatted("{}+", m_child->to_string())); +} + +void MultiplierSyntaxNode::dump(StringBuilder& builder, int indent) const +{ + builder.appendff("{: >{}}Multiplier:\n", "", indent); + m_child->dump(builder, indent + 2); +} + +CommaSeparatedMultiplierSyntaxNode::CommaSeparatedMultiplierSyntaxNode(NonnullOwnPtr child) + : SyntaxNode(NodeType::CommaSeparatedMultiplier) + , m_child(move(child)) +{ +} + +CommaSeparatedMultiplierSyntaxNode::~CommaSeparatedMultiplierSyntaxNode() = default; + +String CommaSeparatedMultiplierSyntaxNode::to_string() const +{ + return MUST(String::formatted("{}#", m_child->to_string())); +} + +void CommaSeparatedMultiplierSyntaxNode::dump(StringBuilder& builder, int indent) const +{ + builder.appendff("{: >{}}CommaSeparatedMultiplier:\n", "", indent); + m_child->dump(builder, indent + 2); +} + +AlternativesSyntaxNode::AlternativesSyntaxNode(Vector> children) + : SyntaxNode(NodeType::Alternatives) + , m_children(move(children)) +{ +} + +AlternativesSyntaxNode::~AlternativesSyntaxNode() = default; + +String AlternativesSyntaxNode::to_string() const +{ + StringBuilder builder; + + bool first = true; + for (auto const& child : m_children) { + if (first) { + first = false; + } else { + builder.append(" | "sv); + } + builder.append(child->to_string()); + } + + return builder.to_string_without_validation(); +} + +void AlternativesSyntaxNode::dump(StringBuilder& builder, int indent) const +{ + builder.appendff("{: >{}}Alternatives:\n", "", indent); + for (auto const& child : m_children) + child->dump(builder, indent + 2); +} + +} diff --git a/Libraries/LibWeb/CSS/Parser/Syntax.h b/Libraries/LibWeb/CSS/Parser/Syntax.h new file mode 100644 index 00000000000..2bd499dc9eb --- /dev/null +++ b/Libraries/LibWeb/CSS/Parser/Syntax.h @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2025, Sam Atkins + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include + +namespace Web::CSS::Parser { + +class SyntaxNode { +public: + enum class NodeType : u8 { + Universal, + Ident, + Type, + Multiplier, + CommaSeparatedMultiplier, + Alternatives, + }; + + NodeType type() const { return m_type; } + + virtual ~SyntaxNode() = default; + virtual String to_string() const = 0; + virtual void dump(StringBuilder&, int indent) const = 0; + String dump() const; + +protected: + SyntaxNode(NodeType type) + : m_type(type) + { + } + +private: + NodeType m_type; +}; + +// '*' +class UniversalSyntaxNode final : public SyntaxNode { +public: + static NonnullOwnPtr create() + { + return adopt_own(*new UniversalSyntaxNode()); + } + + virtual ~UniversalSyntaxNode() override; + virtual String to_string() const override; + virtual void dump(StringBuilder&, int indent) const override; + +private: + UniversalSyntaxNode(); +}; + +// 'foo' +class IdentSyntaxNode final : public SyntaxNode { +public: + static NonnullOwnPtr create(FlyString ident) + { + return adopt_own(*new IdentSyntaxNode(move(ident))); + } + + virtual ~IdentSyntaxNode() override; + FlyString const& ident() const { return m_ident; } + + virtual String to_string() const override; + virtual void dump(StringBuilder&, int indent) const override; + +private: + IdentSyntaxNode(FlyString); + FlyString m_ident; +}; + +// '' +class TypeSyntaxNode final : public SyntaxNode { +public: + static NonnullOwnPtr create(FlyString type_name) + { + return adopt_own(*new TypeSyntaxNode(move(type_name))); + } + + virtual ~TypeSyntaxNode() override; + FlyString const& type_name() const { return m_type_name; } + + virtual String to_string() const override; + virtual void dump(StringBuilder&, int indent) const override; + +private: + TypeSyntaxNode(FlyString type_name); + FlyString m_type_name; +}; + +// '+' +class MultiplierSyntaxNode final : public SyntaxNode { +public: + static NonnullOwnPtr create(NonnullOwnPtr child) + { + return adopt_own(*new MultiplierSyntaxNode(move(child))); + } + + virtual ~MultiplierSyntaxNode() override; + SyntaxNode const& child() const { return *m_child; } + + virtual String to_string() const override; + virtual void dump(StringBuilder&, int indent) const override; + +private: + MultiplierSyntaxNode(NonnullOwnPtr); + NonnullOwnPtr m_child; +}; + +// '#' +class CommaSeparatedMultiplierSyntaxNode final : public SyntaxNode { +public: + static NonnullOwnPtr create(NonnullOwnPtr child) + { + return adopt_own(*new CommaSeparatedMultiplierSyntaxNode(move(child))); + } + + virtual ~CommaSeparatedMultiplierSyntaxNode() override; + SyntaxNode const& child() const { return *m_child; } + + virtual String to_string() const override; + virtual void dump(StringBuilder&, int indent) const override; + +private: + CommaSeparatedMultiplierSyntaxNode(NonnullOwnPtr); + NonnullOwnPtr m_child; +}; + +// Options separated by '|' +class AlternativesSyntaxNode final : public SyntaxNode { +public: + static NonnullOwnPtr create(Vector> children) + { + return adopt_own(*new AlternativesSyntaxNode(move(children))); + } + + virtual ~AlternativesSyntaxNode() override; + ReadonlySpan> children() const { return m_children; } + + virtual String to_string() const override; + virtual void dump(StringBuilder&, int indent) const override; + +private: + AlternativesSyntaxNode(Vector>); + Vector> m_children; +}; + +} diff --git a/Libraries/LibWeb/CSS/Parser/SyntaxParsing.cpp b/Libraries/LibWeb/CSS/Parser/SyntaxParsing.cpp new file mode 100644 index 00000000000..8a8f0fe8dd6 --- /dev/null +++ b/Libraries/LibWeb/CSS/Parser/SyntaxParsing.cpp @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2025, Sam Atkins + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include + +namespace Web::CSS::Parser { + +static OwnPtr parse_syntax_single_component(TokenStream& tokens) +{ + // = '<' '>' | + // = angle | color | custom-ident | image | integer + // | length | length-percentage | number + // | percentage | resolution | string | time + // | url | transform-function + + auto transaction = tokens.begin_transaction(); + tokens.discard_whitespace(); + + // + if (tokens.next_token().is(Token::Type::Ident)) { + auto ident = tokens.consume_a_token().token().ident(); + transaction.commit(); + return IdentSyntaxNode::create(move(ident)); + } + + // '<' '>' + if (tokens.next_token().is_delim('<')) { + tokens.discard_a_token(); // '<' + auto const& type_name = tokens.consume_a_token(); + auto const& end_token = tokens.consume_a_token(); + + if (end_token.is_delim('>') + && type_name.is(Token::Type::Ident) + && first_is_one_of(type_name.token().ident(), "angle"sv, + "color"sv, + "custom-ident"sv, + "image"sv, + "integer"sv, + "length"sv, + "length-percentage"sv, + "number"sv, + "percentage"sv, + "resolution"sv, + "string"sv, + "time"sv, + "url"sv, + "transform-function"sv)) { + transaction.commit(); + return TypeSyntaxNode::create(type_name.token().ident()); + } + } + + return nullptr; +} + +static Optional parse_syntax_multiplier(TokenStream& tokens) +{ + // = [ '#' | '+' ] + auto transaction = tokens.begin_transaction(); + + auto delim = tokens.consume_a_token(); + if (delim.is_delim('#') || delim.is_delim('+')) { + transaction.commit(); + return delim.token().delim(); + } + + return {}; +} + +static OwnPtr parse_syntax_component(TokenStream& tokens) +{ + // = ? + // | '<' transform-list '>' + + auto transaction = tokens.begin_transaction(); + + tokens.discard_whitespace(); + + // '<' transform-list '>' + if (tokens.next_token().is_delim('<')) { + auto transform_list_transaction = transaction.create_child(); + tokens.discard_a_token(); // '<' + auto& ident_token = tokens.consume_a_token(); + auto& end_token = tokens.consume_a_token(); + + if (ident_token.is_ident("transform-list"sv) && end_token.is_delim('>')) { + transform_list_transaction.commit(); + return TypeSyntaxNode::create("transform-list"_fly_string); + } + } + + // ? + auto syntax_single_component = parse_syntax_single_component(tokens); + if (!syntax_single_component) + return nullptr; + + auto multiplier = parse_syntax_multiplier(tokens); + if (!multiplier.has_value()) { + transaction.commit(); + return syntax_single_component.release_nonnull(); + } + + switch (multiplier.value()) { + case '#': + transaction.commit(); + return CommaSeparatedMultiplierSyntaxNode::create(syntax_single_component.release_nonnull()); + case '+': + transaction.commit(); + return MultiplierSyntaxNode::create(syntax_single_component.release_nonnull()); + default: + return nullptr; + } +} + +static Optional parse_syntax_combinator(TokenStream& tokens) +{ + // = '|' + auto transaction = tokens.begin_transaction(); + tokens.discard_whitespace(); + + auto delim = tokens.consume_a_token(); + if (delim.is_delim('|')) { + transaction.commit(); + return delim.token().delim(); + } + + return {}; +} + +// https://drafts.csswg.org/css-values-5/#typedef-syntax +OwnPtr parse_as_syntax(Vector const& component_values) +{ + // = '*' | [ ]* | + // = ? + // | '<' transform-list '>' + // = '<' '>' | + // = angle | color | custom-ident | image | integer + // | length | length-percentage | number + // | percentage | resolution | string | time + // | url | transform-function + // = '|' + // = [ '#' | '+' ] + // + // = + // FIXME: Eventually, extend this to also parse *any* CSS grammar, not just for the type. + + TokenStream tokens { component_values }; + tokens.discard_whitespace(); + + // '*' + if (tokens.next_token().is_delim('*')) { + tokens.discard_a_token(); // '*' + tokens.discard_whitespace(); + if (tokens.has_next_token()) + return nullptr; + return UniversalSyntaxNode::create(); + } + + // = + // A is a whose value successfully parses as a , and represents the same value as + // that would. + // NB: For now, this is the only time a string is allowed in a . + if (tokens.next_token().is(Token::Type::String)) { + auto string = tokens.consume_a_token().token().string(); + tokens.discard_whitespace(); + if (tokens.has_next_token()) + return nullptr; + + auto child_component_values = Parser::create(ParsingParams {}, string).parse_as_list_of_component_values(); + return parse_as_syntax(child_component_values); + } + + // [ ]* + auto first = parse_syntax_component(tokens); + if (!first) + return nullptr; + Vector> syntax_components; + syntax_components.append(first.release_nonnull()); + + tokens.discard_whitespace(); + while (tokens.has_next_token()) { + auto combinator = parse_syntax_combinator(tokens); + tokens.discard_whitespace(); + auto component = parse_syntax_component(tokens); + tokens.discard_whitespace(); + if (!combinator.has_value() || !component) { + dbgln("Failed parsing syntax portion, combinator = `{}`, component = `{}`", combinator, component); + return nullptr; + } + + // FIXME: Make this logic smarter once we have more than one type of combinator. + // For now, assume we're always making an AlternativesSyntaxNode. + VERIFY(combinator == '|'); + + syntax_components.append(component.release_nonnull()); + } + + if (syntax_components.size() == 1) + return syntax_components.take_first(); + return AlternativesSyntaxNode::create(move(syntax_components)); +} + +} diff --git a/Libraries/LibWeb/CSS/Parser/SyntaxParsing.h b/Libraries/LibWeb/CSS/Parser/SyntaxParsing.h new file mode 100644 index 00000000000..5c071ff6af1 --- /dev/null +++ b/Libraries/LibWeb/CSS/Parser/SyntaxParsing.h @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2025, Sam Atkins + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include + +namespace Web::CSS::Parser { + +OwnPtr parse_as_syntax(Vector const&); + +} diff --git a/Libraries/LibWeb/Forward.h b/Libraries/LibWeb/Forward.h index 68c06207715..ab66db9cc31 100644 --- a/Libraries/LibWeb/Forward.h +++ b/Libraries/LibWeb/Forward.h @@ -343,6 +343,7 @@ namespace Web::CSS::Parser { class ComponentValue; class GuardedSubstitutionContexts; class Parser; +class SyntaxNode; class Token; class Tokenizer; diff --git a/Tests/LibWeb/CMakeLists.txt b/Tests/LibWeb/CMakeLists.txt index 521ac1d7236..20059375c2e 100644 --- a/Tests/LibWeb/CMakeLists.txt +++ b/Tests/LibWeb/CMakeLists.txt @@ -2,6 +2,7 @@ set(TEST_SOURCES TestCSSIDSpeed.cpp TestCSSInheritedProperty.cpp TestCSSPixels.cpp + TestCSSSyntaxParser.cpp TestCSSTokenStream.cpp TestFetchInfrastructure.cpp TestFetchURL.cpp diff --git a/Tests/LibWeb/TestCSSSyntaxParser.cpp b/Tests/LibWeb/TestCSSSyntaxParser.cpp new file mode 100644 index 00000000000..d3c96acb8d2 --- /dev/null +++ b/Tests/LibWeb/TestCSSSyntaxParser.cpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2025, Sam Atkins + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include + +namespace Web::CSS::Parser { + +static void compare_parsed_syntax_dump_to_string(Vector const& syntax_values, StringView expected) +{ + auto syntax = parse_as_syntax(syntax_values); + EXPECT(syntax != nullptr); + if (syntax) + EXPECT_EQ(syntax->dump(), expected); +} + +static void expect_dumps_equal(Vector const& lhs_values, Vector const& rhs_values) +{ + auto lhs = parse_as_syntax(lhs_values); + auto rhs = parse_as_syntax(rhs_values); + EXPECT(lhs != nullptr); + EXPECT(rhs != nullptr); + if (lhs && rhs) + EXPECT_EQ(lhs->dump(), rhs->dump()); +} + +#define TYPE_TOKENS(name) Token::create_delim('<'), Token::create_ident(name ""_fly_string), Token::create_delim('>') + +TEST_CASE(single_universal) +{ + compare_parsed_syntax_dump_to_string(Vector { Token::create_delim('*') }, "Universal\n"sv); +} + +TEST_CASE(single_ident) +{ + compare_parsed_syntax_dump_to_string(Vector { Token::create_ident("thing"_fly_string) }, "Ident: thing\n"sv); +} + +TEST_CASE(single_type) +{ + compare_parsed_syntax_dump_to_string(Vector { TYPE_TOKENS("angle") }, "Type: angle\n"sv); + compare_parsed_syntax_dump_to_string(Vector { TYPE_TOKENS("color") }, "Type: color\n"sv); + compare_parsed_syntax_dump_to_string(Vector { TYPE_TOKENS("custom-ident") }, "Type: custom-ident\n"sv); + compare_parsed_syntax_dump_to_string(Vector { TYPE_TOKENS("image") }, "Type: image\n"sv); + compare_parsed_syntax_dump_to_string(Vector { TYPE_TOKENS("integer") }, "Type: integer\n"sv); + compare_parsed_syntax_dump_to_string(Vector { TYPE_TOKENS("length") }, "Type: length\n"sv); + compare_parsed_syntax_dump_to_string(Vector { TYPE_TOKENS("length-percentage") }, "Type: length-percentage\n"sv); + compare_parsed_syntax_dump_to_string(Vector { TYPE_TOKENS("number") }, "Type: number\n"sv); + compare_parsed_syntax_dump_to_string(Vector { TYPE_TOKENS("percentage") }, "Type: percentage\n"sv); + compare_parsed_syntax_dump_to_string(Vector { TYPE_TOKENS("resolution") }, "Type: resolution\n"sv); + compare_parsed_syntax_dump_to_string(Vector { TYPE_TOKENS("string") }, "Type: string\n"sv); + compare_parsed_syntax_dump_to_string(Vector { TYPE_TOKENS("time") }, "Type: time\n"sv); + compare_parsed_syntax_dump_to_string(Vector { TYPE_TOKENS("url") }, "Type: url\n"sv); + compare_parsed_syntax_dump_to_string(Vector { TYPE_TOKENS("transform-function") }, "Type: transform-function\n"sv); +} + +TEST_CASE(multiple_keywords) +{ + compare_parsed_syntax_dump_to_string(Vector { + Token::create_ident("well"_fly_string), + Token::create_delim('|'), + Token::create_ident("hello"_fly_string), + Token::create_delim('|'), + Token::create_ident("friends"_fly_string) }, + R"~~~(Alternatives: + Ident: well + Ident: hello + Ident: friends +)~~~"sv); +} + +TEST_CASE(repeated_type) +{ + compare_parsed_syntax_dump_to_string(Vector { TYPE_TOKENS("number"), Token::create_delim('+') }, + R"~~~(Multiplier: + Type: number +)~~~"sv); +} + +TEST_CASE(repeated_with_commas) +{ + compare_parsed_syntax_dump_to_string(Vector { TYPE_TOKENS("number"), Token::create_delim('#') }, + R"~~~(CommaSeparatedMultiplier: + Type: number +)~~~"sv); +} + +TEST_CASE(complex) +{ + compare_parsed_syntax_dump_to_string(Vector { + Token::create_ident("well"_fly_string), + Token::create_delim('|'), + TYPE_TOKENS("number"), Token::create_delim('+'), + Token::create_delim('|'), + TYPE_TOKENS("string"), Token::create_delim('#') }, + R"~~~(Alternatives: + Ident: well + Multiplier: + Type: number + CommaSeparatedMultiplier: + Type: string +)~~~"sv); +} + +TEST_CASE(syntax_string) +{ + // A single string token's contents are parsed as if it was unquoted + + expect_dumps_equal(Vector { TYPE_TOKENS("number") }, Vector { Token::create_string(""_fly_string) }); + + expect_dumps_equal(Vector { + Token::create_ident("well"_fly_string), + Token::create_delim('|'), + TYPE_TOKENS("number"), Token::create_delim('+'), + Token::create_delim('|'), + TYPE_TOKENS("string"), Token::create_delim('#') }, + Vector { Token::create_string("well | + | #"_fly_string) }); +} + +TEST_CASE(invalid) +{ + // Empty + EXPECT(!parse_as_syntax(Vector {})); + EXPECT(!parse_as_syntax(Vector { Token::create_whitespace() })); + EXPECT(!parse_as_syntax(Vector { Token::create(Token::Type::EndOfFile) })); + EXPECT(!parse_as_syntax(Vector { Token::create(Token::Type::Invalid) })); + + // Incomplete + EXPECT(!parse_as_syntax(Vector { Token::create_delim('<'), Token::create_ident("number"_fly_string) })); + EXPECT(!parse_as_syntax(Vector { Token::create_ident("thing"_fly_string), Token::create_delim('|') })); + + // '*' is only allowed on its own + EXPECT(!parse_as_syntax(Vector { Token::create_delim('*'), Token::create_delim('|'), Token::create_delim('*') })); + + // cannot have multipliers + EXPECT(!parse_as_syntax(Vector { TYPE_TOKENS("transform-list"), Token::create_delim('+') })); + EXPECT(!parse_as_syntax(Vector { TYPE_TOKENS("transform-list"), Token::create_delim('#') })); + + // For , only predefined types are allowed + EXPECT(!parse_as_syntax(Vector { TYPE_TOKENS("woozle") })); + + // doesn't allow multiple types/keywords without a combinator + EXPECT(!parse_as_syntax(Vector { TYPE_TOKENS("number"), Token::create_whitespace(), TYPE_TOKENS("integer") })); + EXPECT(!parse_as_syntax(Vector { Token::create_ident("thingy"_fly_string), Token::create_whitespace(), Token::create_ident("whatsit"_fly_string) })); + + // Whitespace isn't allowed between a type and its multiplier + EXPECT(!parse_as_syntax(Vector { TYPE_TOKENS("number"), Token::create_whitespace(), Token::create_delim('+') })); + EXPECT(!parse_as_syntax(Vector { TYPE_TOKENS("number"), Token::create_whitespace(), Token::create_delim('#') })); +} + +}