From fd45c53c11d0703b44102dcafbee83a3092d1fe2 Mon Sep 17 00:00:00 2001 From: Sam Atkins Date: Wed, 2 Apr 2025 17:05:48 +0100 Subject: [PATCH] LibWeb: Parse descriptors as style values, using the JSON data The goal here is to do something a bit smarter with the parsing here than we do for properties. Instead of the JSON saying "here are the values, and here are the keywords, and we can have up to 3", here we place the syntax in the JSON directly (though currently broken up as one string per option) and then we attempt to parse each one in sequence. It's something we'll need eventually for `@property` among other things. ...However, in this first pass, I've gone with the simplest option of hard-coding the types instead of figuring them out properly. So there's a PositivePercentage type and a UnicodeRangeTokens type, instead of properly implementing the grammar for those in a generic way. --- Libraries/LibWeb/CMakeLists.txt | 2 + Libraries/LibWeb/CSS/Descriptor.cpp | 14 ++ Libraries/LibWeb/CSS/Descriptor.h | 22 +++ .../LibWeb/CSS/Parser/DescriptorParsing.cpp | 134 ++++++++++++++++++ Libraries/LibWeb/CSS/Parser/Helpers.cpp | 14 ++ Libraries/LibWeb/CSS/Parser/Parser.cpp | 45 +++++- Libraries/LibWeb/CSS/Parser/Parser.h | 15 +- Libraries/LibWeb/CSS/Parser/ValueParsing.cpp | 12 +- .../LibWeb/GenerateCSSDescriptors.cpp | 105 ++++++++++++++ 9 files changed, 356 insertions(+), 7 deletions(-) create mode 100644 Libraries/LibWeb/CSS/Descriptor.cpp create mode 100644 Libraries/LibWeb/CSS/Descriptor.h create mode 100644 Libraries/LibWeb/CSS/Parser/DescriptorParsing.cpp diff --git a/Libraries/LibWeb/CMakeLists.txt b/Libraries/LibWeb/CMakeLists.txt index 4106c282759..1d2f4009fae 100644 --- a/Libraries/LibWeb/CMakeLists.txt +++ b/Libraries/LibWeb/CMakeLists.txt @@ -89,6 +89,7 @@ set(SOURCES CSS/CSSTransition.cpp CSS/CascadedProperties.cpp CSS/ComputedProperties.cpp + CSS/Descriptor.cpp CSS/Display.cpp CSS/EdgeRect.cpp CSS/Fetch.cpp @@ -108,6 +109,7 @@ set(SOURCES CSS/MediaQueryListEvent.cpp CSS/Number.cpp CSS/Parser/ComponentValue.cpp + CSS/Parser/DescriptorParsing.cpp CSS/Parser/GradientParsing.cpp CSS/Parser/Helpers.cpp CSS/Parser/MediaParsing.cpp diff --git a/Libraries/LibWeb/CSS/Descriptor.cpp b/Libraries/LibWeb/CSS/Descriptor.cpp new file mode 100644 index 00000000000..8fb4b107060 --- /dev/null +++ b/Libraries/LibWeb/CSS/Descriptor.cpp @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2025, Sam Atkins + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include + +namespace Web::CSS { + +Descriptor::~Descriptor() = default; + +} diff --git a/Libraries/LibWeb/CSS/Descriptor.h b/Libraries/LibWeb/CSS/Descriptor.h new file mode 100644 index 00000000000..82639c80977 --- /dev/null +++ b/Libraries/LibWeb/CSS/Descriptor.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2025, Sam Atkins + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include + +namespace Web::CSS { + +struct Descriptor { + ~Descriptor(); + + DescriptorID descriptor_id; + NonnullRefPtr value; +}; + +} diff --git a/Libraries/LibWeb/CSS/Parser/DescriptorParsing.cpp b/Libraries/LibWeb/CSS/Parser/DescriptorParsing.cpp new file mode 100644 index 00000000000..b4b1545571b --- /dev/null +++ b/Libraries/LibWeb/CSS/Parser/DescriptorParsing.cpp @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2025, Sam Atkins + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include +#include +#include + +namespace Web::CSS::Parser { + +Parser::ParseErrorOr> Parser::parse_descriptor_value(AtRuleID at_rule_id, DescriptorID descriptor_id, TokenStream& unprocessed_tokens) +{ + if (!at_rule_supports_descriptor(at_rule_id, descriptor_id)) { + dbgln_if(CSS_PARSER_DEBUG, "Unsupported descriptor '{}' in '{}'", to_string(descriptor_id), to_string(at_rule_id)); + return ParseError::SyntaxError; + } + + auto context_guard = push_temporary_value_parsing_context(DescriptorContext { at_rule_id, descriptor_id }); + + Vector component_values; + while (unprocessed_tokens.has_next_token()) { + if (unprocessed_tokens.peek_token().is(Token::Type::Semicolon)) + break; + + // FIXME: Stop removing whitespace here. It's just for compatibility with the property-parsing code. + auto const& token = unprocessed_tokens.consume_a_token(); + if (token.is(Token::Type::Whitespace)) + continue; + + component_values.append(token); + } + + TokenStream tokens { component_values }; + auto metadata = get_descriptor_metadata(at_rule_id, descriptor_id); + for (auto const& option : metadata.syntax) { + auto transaction = tokens.begin_transaction(); + auto parsed_style_value = option.visit( + [&](Keyword keyword) { + return parse_all_as_single_keyword_value(tokens, keyword); + }, + [&](PropertyID property_id) -> RefPtr { + auto value_for_property = parse_css_value_for_property(property_id, tokens); + if (!value_for_property) + return nullptr; + // Descriptors don't accept the following, which properties do: + // - CSS-wide keywords + // - Shorthands + // - Arbitrary substitution functions (so, UnresolvedStyleValue) + if (value_for_property->is_css_wide_keyword() || value_for_property->is_shorthand() || value_for_property->is_unresolved()) + return nullptr; + return value_for_property.release_nonnull(); + }, + [&](DescriptorMetadata::ValueType value_type) -> RefPtr { + switch (value_type) { + case DescriptorMetadata::ValueType::FamilyName: + return parse_family_name_value(tokens); + case DescriptorMetadata::ValueType::FontSrcList: + return parse_comma_separated_value_list(tokens, [this](auto& tokens) -> RefPtr { + return parse_font_source_value(tokens); + }); + case DescriptorMetadata::ValueType::OptionalDeclarationValue: { + // FIXME: This is for an @property's initial value. Figure out what this should actually do once we need it. + StringBuilder initial_value_sb; + while (tokens.has_next_token()) + initial_value_sb.append(tokens.consume_a_token().to_string()); + return StringStyleValue::create(initial_value_sb.to_fly_string_without_validation()); + } + case DescriptorMetadata::ValueType::PositivePercentage: + if (auto percentage_value = parse_percentage_value(tokens)) { + if (percentage_value->is_percentage()) { + if (percentage_value->as_percentage().value() < 0) + return nullptr; + return percentage_value.release_nonnull(); + } + // All calculations in descriptors must be resolvable at parse-time. + if (percentage_value->is_calculated()) { + auto percentage = percentage_value->as_calculated().resolve_percentage({}); + if (percentage.has_value() && percentage->value() >= 0) + return PercentageStyleValue::create(percentage.release_value()); + return nullptr; + } + } + return nullptr; + case DescriptorMetadata::ValueType::String: + return parse_string_value(tokens); + case DescriptorMetadata::ValueType::UnicodeRangeTokens: + return parse_comma_separated_value_list(tokens, [this](auto& tokens) -> RefPtr { + return parse_unicode_range_value(tokens); + }); + } + return nullptr; + }); + if (!parsed_style_value || tokens.has_next_token()) + continue; + transaction.commit(); + return parsed_style_value.release_nonnull(); + } + + if constexpr (CSS_PARSER_DEBUG) { + dbgln("Failed to parse descriptor '{}' in '{}'", to_string(descriptor_id), to_string(at_rule_id)); + tokens.dump_all_tokens(); + } + + return ParseError::SyntaxError; +} + +Optional Parser::convert_to_descriptor(AtRuleID at_rule_id, Declaration const& declaration) +{ + auto descriptor_id = descriptor_id_from_string(at_rule_id, declaration.name); + if (!descriptor_id.has_value()) + return {}; + + auto value_token_stream = TokenStream(declaration.value); + auto value = parse_descriptor_value(at_rule_id, descriptor_id.value(), value_token_stream); + if (value.is_error()) { + if (value.error() == ParseError::SyntaxError) { + if constexpr (CSS_PARSER_DEBUG) { + dbgln("Unable to parse value for CSS @{} descriptor '{}'.", to_string(at_rule_id), declaration.name); + value_token_stream.dump_all_tokens(); + } + } + return {}; + } + + return Descriptor { *descriptor_id, value.release_value() }; +} + +} diff --git a/Libraries/LibWeb/CSS/Parser/Helpers.cpp b/Libraries/LibWeb/CSS/Parser/Helpers.cpp index 6d11fb205b5..cfc3e5487d3 100644 --- a/Libraries/LibWeb/CSS/Parser/Helpers.cpp +++ b/Libraries/LibWeb/CSS/Parser/Helpers.cpp @@ -64,6 +64,13 @@ CSS::Parser::Parser::PropertiesAndCustomProperties parse_css_style_attribute(CSS return CSS::Parser::Parser::create(context, css).parse_as_style_attribute(); } +Vector parse_css_list_of_descriptors(CSS::Parser::ParsingParams const& parsing_params, CSS::AtRuleID at_rule_id, StringView css) +{ + if (css.is_empty()) + return {}; + return CSS::Parser::Parser::create(parsing_params, css).parse_as_list_of_descriptors(at_rule_id); +} + RefPtr parse_css_value(CSS::Parser::ParsingParams const& context, StringView string, CSS::PropertyID property_id) { if (string.is_empty()) @@ -71,6 +78,13 @@ RefPtr parse_css_value(CSS::Parser::ParsingParams const& con return CSS::Parser::Parser::create(context, string).parse_as_css_value(property_id); } +RefPtr parse_css_descriptor(CSS::Parser::ParsingParams const& parsing_params, CSS::AtRuleID at_rule_id, CSS::DescriptorID descriptor_id, StringView string) +{ + if (string.is_empty()) + return nullptr; + return CSS::Parser::Parser::create(parsing_params, string).parse_as_descriptor_value(at_rule_id, descriptor_id); +} + CSS::CSSRule* parse_css_rule(CSS::Parser::ParsingParams const& context, StringView css_text) { return CSS::Parser::Parser::create(context, css_text).parse_as_css_rule(); diff --git a/Libraries/LibWeb/CSS/Parser/Parser.cpp b/Libraries/LibWeb/CSS/Parser/Parser.cpp index 77451073a78..8dcfc1e49c3 100644 --- a/Libraries/LibWeb/CSS/Parser/Parser.cpp +++ b/Libraries/LibWeb/CSS/Parser/Parser.cpp @@ -1350,6 +1350,36 @@ Parser::PropertiesAndCustomProperties Parser::parse_as_style_attribute() return properties; } +Vector Parser::parse_as_list_of_descriptors(AtRuleID at_rule_id) +{ + auto context_type = [at_rule_id] { + switch (at_rule_id) { + case AtRuleID::FontFace: + return ContextType::AtFontFace; + case AtRuleID::Property: + return ContextType::AtProperty; + } + VERIFY_NOT_REACHED(); + }(); + + m_rule_context.append(context_type); + auto declarations_and_at_rules = parse_a_blocks_contents(m_token_stream); + m_rule_context.take_last(); + + Vector descriptors; + for (auto const& rule_or_list : declarations_and_at_rules) { + if (rule_or_list.has()) + continue; + + auto& declarations = rule_or_list.get>(); + for (auto const& declaration : declarations) { + if (auto descriptor = convert_to_descriptor(at_rule_id, declaration); descriptor.has_value()) + descriptors.append(descriptor.release_value()); + } + } + return descriptors; +} + bool Parser::is_valid_in_the_current_context(Declaration const&) const { // TODO: Determine if this *particular* declaration is valid here, not just declarations in general. @@ -1574,9 +1604,10 @@ bool Parser::context_allows_quirky_length() const for (auto i = 1u; i < m_value_context.size() && unitless_length_allowed; i++) { unitless_length_allowed = m_value_context[i].visit( [](PropertyID const& property_id) { return property_has_quirk(property_id, Quirk::UnitlessLength); }, - [top_level_property](Parser::FunctionContext const& function_context) { + [top_level_property](FunctionContext const& function_context) { return function_context.name == "rect"sv && top_level_property == PropertyID::Clip; - }); + }, + [](DescriptorContext const&) { return false; }); } return unitless_length_allowed; @@ -1602,6 +1633,16 @@ RefPtr Parser::parse_as_css_value(PropertyID property_id) return parsed_value.release_value(); } +RefPtr Parser::parse_as_descriptor_value(AtRuleID at_rule_id, DescriptorID descriptor_id) +{ + auto component_values = parse_a_list_of_component_values(m_token_stream); + auto tokens = TokenStream(component_values); + auto parsed_value = parse_descriptor_value(at_rule_id, descriptor_id, tokens); + if (parsed_value.is_error()) + return nullptr; + return parsed_value.release_value(); +} + // https://html.spec.whatwg.org/multipage/images.html#parsing-a-sizes-attribute LengthOrCalculated Parser::parse_as_sizes_attribute(DOM::Element const& element, HTML::HTMLImageElement const* img) { diff --git a/Libraries/LibWeb/CSS/Parser/Parser.h b/Libraries/LibWeb/CSS/Parser/Parser.h index 765a3e38fcb..8d4f32f93d1 100644 --- a/Libraries/LibWeb/CSS/Parser/Parser.h +++ b/Libraries/LibWeb/CSS/Parser/Parser.h @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include #include @@ -94,6 +96,7 @@ public: HashMap custom_properties; }; PropertiesAndCustomProperties parse_as_style_attribute(); + Vector parse_as_list_of_descriptors(AtRuleID); CSSRule* parse_as_css_rule(); Optional parse_as_supports_condition(); @@ -116,6 +119,7 @@ public: RefPtr parse_as_supports(); RefPtr parse_as_css_value(PropertyID); + RefPtr parse_as_descriptor_value(AtRuleID, DescriptorID); Optional parse_as_component_value(); @@ -248,6 +252,8 @@ private: GC::Ref convert_to_style_declaration(Vector const&); Optional convert_to_style_property(Declaration const&); + Optional convert_to_descriptor(AtRuleID, Declaration const&); + Optional parse_dimension(ComponentValue const&); Optional parse_angle(TokenStream&); Optional parse_angle_percentage(TokenStream&); @@ -294,6 +300,7 @@ private: RefPtr parse_radial_gradient_function(TokenStream&); ParseErrorOr> parse_css_value(PropertyID, TokenStream&, Optional original_source_text = {}); + ParseErrorOr> parse_descriptor_value(AtRuleID, DescriptorID, TokenStream&); RefPtr parse_css_value_for_property(PropertyID, TokenStream&); struct PropertyAndValue { PropertyID property; @@ -482,7 +489,11 @@ private: struct FunctionContext { StringView name; }; - using ValueParsingContext = Variant; + struct DescriptorContext { + AtRuleID at_rule; + DescriptorID descriptor; + }; + using ValueParsingContext = Variant; Vector m_value_context; auto push_temporary_value_parsing_context(ValueParsingContext&& context) { @@ -515,7 +526,9 @@ namespace Web { CSS::CSSStyleSheet* parse_css_stylesheet(CSS::Parser::ParsingParams const&, StringView, Optional location = {}, Vector> = {}); CSS::Parser::Parser::PropertiesAndCustomProperties parse_css_style_attribute(CSS::Parser::ParsingParams const&, StringView); +Vector parse_css_list_of_descriptors(CSS::Parser::ParsingParams const&, CSS::AtRuleID, StringView); RefPtr parse_css_value(CSS::Parser::ParsingParams const&, StringView, CSS::PropertyID property_id = CSS::PropertyID::Invalid); +RefPtr parse_css_descriptor(CSS::Parser::ParsingParams const&, CSS::AtRuleID, CSS::DescriptorID, StringView); Optional parse_selector(CSS::Parser::ParsingParams const&, StringView); Optional parse_selector_for_nested_style_rule(CSS::Parser::ParsingParams const&, StringView); Optional parse_pseudo_element_selector(CSS::Parser::ParsingParams const&, StringView); diff --git a/Libraries/LibWeb/CSS/Parser/ValueParsing.cpp b/Libraries/LibWeb/CSS/Parser/ValueParsing.cpp index ddfd5903b14..3a8c9c075a2 100644 --- a/Libraries/LibWeb/CSS/Parser/ValueParsing.cpp +++ b/Libraries/LibWeb/CSS/Parser/ValueParsing.cpp @@ -1795,14 +1795,14 @@ RefPtr Parser::parse_color_value(TokenStream& tok if (!m_value_context.is_empty()) { quirky_color_allowed = m_value_context.first().visit( [](PropertyID const& property_id) { return property_has_quirk(property_id, Quirk::HashlessHexColor); }, - [](FunctionContext const&) { return false; }); + [](FunctionContext const&) { return false; }, + [](DescriptorContext const&) { return false; }); } for (auto i = 1u; i < m_value_context.size() && quirky_color_allowed; i++) { quirky_color_allowed = m_value_context[i].visit( [](PropertyID const& property_id) { return property_has_quirk(property_id, Quirk::UnitlessLength); }, - [](FunctionContext const&) { - return false; - }); + [](FunctionContext const&) { return false; }, + [](DescriptorContext const&) { return false; }); } if (quirky_color_allowed) { // NOTE: This algorithm is no longer in the spec, since the concept got moved and renamed. However, it works, @@ -3355,6 +3355,10 @@ RefPtr Parser::parse_calculated_value(ComponentValue const& compo } // FIXME: Add other functions that provide a context for resolving values return {}; + }, + [](DescriptorContext const&) -> Optional { + // FIXME: If any descriptors have `<*-percentage>` or `` types, add them here. + return CalculationContext {}; }); if (maybe_context.has_value()) { context = maybe_context.release_value(); diff --git a/Meta/Lagom/Tools/CodeGenerators/LibWeb/GenerateCSSDescriptors.cpp b/Meta/Lagom/Tools/CodeGenerators/LibWeb/GenerateCSSDescriptors.cpp index d1238f94af6..9adcb26edc7 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibWeb/GenerateCSSDescriptors.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibWeb/GenerateCSSDescriptors.cpp @@ -82,6 +82,8 @@ ErrorOr generate_header_file(JsonObject const& at_rules_data, Core::File& #include #include #include +#include +#include namespace Web::CSS { @@ -111,6 +113,22 @@ Optional descriptor_id_from_string(AtRuleID, StringView); FlyString to_string(DescriptorID); bool at_rule_supports_descriptor(AtRuleID, DescriptorID); + +struct DescriptorMetadata { + enum class ValueType { + // FIXME: Parse the grammar instead of hard-coding all the options! + FamilyName, + FontSrcList, + OptionalDeclarationValue, + PositivePercentage, + String, + UnicodeRangeTokens, + }; + Vector> syntax; +}; + +DescriptorMetadata get_descriptor_metadata(AtRuleID, DescriptorID); + } )~~~"); @@ -248,6 +266,93 @@ bool at_rule_supports_descriptor(AtRuleID at_rule_id, DescriptorID descriptor_id VERIFY_NOT_REACHED(); } +DescriptorMetadata get_descriptor_metadata(AtRuleID at_rule_id, DescriptorID descriptor_id) +{ + switch (at_rule_id) { +)~~~"); + + at_rules_data.for_each_member([&](auto const& at_rule_name, JsonValue const& value) { + auto const& at_rule = value.as_object(); + + auto at_rule_generator = generator.fork(); + at_rule_generator.set("at_rule:titlecase", title_casify(at_rule_name)); + at_rule_generator.append(R"~~~( + case AtRuleID::@at_rule:titlecase@: + switch (descriptor_id) { +)~~~"); + + auto const& descriptors = at_rule.get_object("descriptors"sv).value(); + descriptors.for_each_member([&](auto const& descriptor_name, JsonValue const& descriptor_value) { + auto const& descriptor = descriptor_value.as_object(); + if (is_legacy_alias(descriptor)) + return; + + auto descriptor_generator = at_rule_generator.fork(); + descriptor_generator.set("descriptor:titlecase", title_casify(descriptor_name)); + descriptor_generator.append(R"~~~( + case DescriptorID::@descriptor:titlecase@: { + DescriptorMetadata metadata; +)~~~"); + auto const& syntax = descriptor.get_array("syntax"sv).value(); + for (auto const& entry : syntax.values()) { + auto option_generator = descriptor_generator.fork(); + auto const& syntax_string = entry.as_string(); + + if (syntax_string.starts_with_bytes("<'"sv)) { + // Property + option_generator.set("property:titlecase"sv, title_casify(MUST(syntax_string.substring_from_byte_offset_with_shared_superstring(2, syntax_string.byte_count() - 4)))); + option_generator.append(R"~~~( + metadata.syntax.empend(PropertyID::@property:titlecase@); +)~~~"); + } else if (syntax_string.starts_with('<')) { + // Value type + // FIXME: Actually parse the grammar, instead of hard-coding the options! + auto value_type = [&syntax_string] { + if (syntax_string == ""sv) + return "FamilyName"_string; + if (syntax_string == ""sv) + return "FontSrcList"_string; + if (syntax_string == "?"sv) + return "OptionalDeclarationValue"_string; + if (syntax_string == ""sv) + return "PositivePercentage"_string; + if (syntax_string == ""sv) + return "String"_string; + if (syntax_string == "#"sv) + return "UnicodeRangeTokens"_string; + VERIFY_NOT_REACHED(); + }(); + option_generator.set("value_type"sv, value_type); + option_generator.append(R"~~~( + metadata.syntax.empend(DescriptorMetadata::ValueType::@value_type@); +)~~~"); + + } else { + // Keyword + option_generator.set("keyword:titlecase"sv, title_casify(syntax_string)); + option_generator.append(R"~~~( + metadata.syntax.empend(Keyword::@keyword:titlecase@); +)~~~"); + } + } + descriptor_generator.append(R"~~~( + return metadata; + } +)~~~"); + }); + + at_rule_generator.append(R"~~~( + default: + VERIFY_NOT_REACHED(); + } +)~~~"); + }); + + generator.append(R"~~~( + } + VERIFY_NOT_REACHED(); +} + } )~~~");