From 6e6507c8c5fd8edff302d80bceb93f7f9a0a1852 Mon Sep 17 00:00:00 2001 From: Glenn Skrzypczak Date: Tue, 22 Jul 2025 00:58:28 +0200 Subject: [PATCH] LibWeb/HTML: Sanitize email input with multiple attribute This implements the missing part of the value sanitization algorithm for email inputs with the multiple attribute. --- Libraries/LibWeb/HTML/FormAssociatedElement.h | 4 +- Libraries/LibWeb/HTML/HTMLButtonElement.cpp | 2 +- Libraries/LibWeb/HTML/HTMLButtonElement.h | 2 +- Libraries/LibWeb/HTML/HTMLImageElement.cpp | 2 +- Libraries/LibWeb/HTML/HTMLImageElement.h | 2 +- Libraries/LibWeb/HTML/HTMLInputElement.cpp | 58 +++++++++++++------ Libraries/LibWeb/HTML/HTMLInputElement.h | 2 +- Libraries/LibWeb/HTML/HTMLObjectElement.cpp | 2 +- Libraries/LibWeb/HTML/HTMLObjectElement.h | 2 +- Libraries/LibWeb/HTML/HTMLOutputElement.cpp | 2 +- Libraries/LibWeb/HTML/HTMLOutputElement.h | 2 +- Libraries/LibWeb/HTML/HTMLSelectElement.cpp | 2 +- Libraries/LibWeb/HTML/HTMLSelectElement.h | 2 +- Libraries/LibWeb/HTML/HTMLTextAreaElement.cpp | 2 +- Libraries/LibWeb/HTML/HTMLTextAreaElement.h | 2 +- ...rm-validation-validity-patternMismatch.txt | 12 ++-- .../forms/the-input-element/email.txt | 9 ++- 17 files changed, 66 insertions(+), 43 deletions(-) diff --git a/Libraries/LibWeb/HTML/FormAssociatedElement.h b/Libraries/LibWeb/HTML/FormAssociatedElement.h index 6586417a893..31fafa8a920 100644 --- a/Libraries/LibWeb/HTML/FormAssociatedElement.h +++ b/Libraries/LibWeb/HTML/FormAssociatedElement.h @@ -56,7 +56,7 @@ private: { \ ElementBaseClass::attribute_changed(name, old_value, value, namespace_); \ form_node_attribute_changed(name, value); \ - form_associated_element_attribute_changed(name, value, namespace_); \ + form_associated_element_attribute_changed(name, old_value, value, namespace_); \ } // https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#selection-direction @@ -152,7 +152,7 @@ protected: virtual void form_associated_element_was_inserted() { } virtual void form_associated_element_was_removed(DOM::Node*) { } virtual void form_associated_element_was_moved(GC::Ptr) { } - virtual void form_associated_element_attribute_changed(FlyString const&, Optional const&, Optional const&) { } + virtual void form_associated_element_attribute_changed(FlyString const&, Optional const&, Optional const&, Optional const&) { } void form_node_was_inserted(); void form_node_was_removed(); diff --git a/Libraries/LibWeb/HTML/HTMLButtonElement.cpp b/Libraries/LibWeb/HTML/HTMLButtonElement.cpp index c9694c432b7..e5b896e13bb 100644 --- a/Libraries/LibWeb/HTML/HTMLButtonElement.cpp +++ b/Libraries/LibWeb/HTML/HTMLButtonElement.cpp @@ -80,7 +80,7 @@ WebIDL::ExceptionOr HTMLButtonElement::set_type_for_bindings(String const& return set_attribute(HTML::AttributeNames::type, type); } -void HTMLButtonElement::form_associated_element_attribute_changed(FlyString const& name, Optional const& value, Optional const& namespace_) +void HTMLButtonElement::form_associated_element_attribute_changed(FlyString const& name, Optional const&, Optional const& value, Optional const& namespace_) { PopoverInvokerElement::associated_attribute_changed(name, value, namespace_); } diff --git a/Libraries/LibWeb/HTML/HTMLButtonElement.h b/Libraries/LibWeb/HTML/HTMLButtonElement.h index acab0caa8ab..a857cdb7a2d 100644 --- a/Libraries/LibWeb/HTML/HTMLButtonElement.h +++ b/Libraries/LibWeb/HTML/HTMLButtonElement.h @@ -42,7 +42,7 @@ public: String type_for_bindings() const; WebIDL::ExceptionOr set_type_for_bindings(String const&); - virtual void form_associated_element_attribute_changed(FlyString const& name, Optional const& value, Optional const& namespace_) override; + virtual void form_associated_element_attribute_changed(FlyString const& name, Optional const& old_value, Optional const& value, Optional const& namespace_) override; bool will_validate(); diff --git a/Libraries/LibWeb/HTML/HTMLImageElement.cpp b/Libraries/LibWeb/HTML/HTMLImageElement.cpp index c87ad091ea6..4a4034cf76d 100644 --- a/Libraries/LibWeb/HTML/HTMLImageElement.cpp +++ b/Libraries/LibWeb/HTML/HTMLImageElement.cpp @@ -134,7 +134,7 @@ void HTMLImageElement::apply_presentational_hints(GC::Ref const& value, Optional const&) +void HTMLImageElement::form_associated_element_attribute_changed(FlyString const& name, Optional const&, Optional const& value, Optional const&) { if (name == HTML::AttributeNames::crossorigin) { m_cors_setting = cors_setting_attribute_from_keyword(value); diff --git a/Libraries/LibWeb/HTML/HTMLImageElement.h b/Libraries/LibWeb/HTML/HTMLImageElement.h index dd62fc0e3ec..6bfa1172989 100644 --- a/Libraries/LibWeb/HTML/HTMLImageElement.h +++ b/Libraries/LibWeb/HTML/HTMLImageElement.h @@ -38,7 +38,7 @@ class HTMLImageElement final public: virtual ~HTMLImageElement() override; - virtual void form_associated_element_attribute_changed(FlyString const& name, Optional const& value, Optional const&) override; + virtual void form_associated_element_attribute_changed(FlyString const& name, Optional const& old_value, Optional const& value, Optional const& namespace_) override; Optional alternative_text() const override { diff --git a/Libraries/LibWeb/HTML/HTMLInputElement.cpp b/Libraries/LibWeb/HTML/HTMLInputElement.cpp index 0d71f593353..89d7785a42c 100644 --- a/Libraries/LibWeb/HTML/HTMLInputElement.cpp +++ b/Libraries/LibWeb/HTML/HTMLInputElement.cpp @@ -1380,7 +1380,7 @@ void HTMLInputElement::did_lose_focus() commit_pending_changes(); } -void HTMLInputElement::form_associated_element_attribute_changed(FlyString const& name, Optional const& value, Optional const& namespace_) +void HTMLInputElement::form_associated_element_attribute_changed(FlyString const& name, Optional const& old_value, Optional const& value, Optional const& namespace_) { PopoverInvokerElement::associated_attribute_changed(name, value, namespace_); @@ -1434,6 +1434,9 @@ void HTMLInputElement::form_associated_element_attribute_changed(FlyString const } else if (name == HTML::AttributeNames::maxlength) { handle_maxlength_attribute(); } else if (name == HTML::AttributeNames::multiple) { + if (type_state() == TypeAttributeState::Email && old_value.has_value() != value.has_value()) { + m_value = value_sanitization_algorithm(m_value); + } update_shadow_tree(); } } @@ -1657,18 +1660,34 @@ String HTMLInputElement::value_sanitization_algorithm(String const& value) const } return MUST(value.trim(Infra::ASCII_WHITESPACE)); } else if (type_state() == HTMLInputElement::TypeAttributeState::Email) { - // https://html.spec.whatwg.org/multipage/input.html#email-state-(type=email):value-sanitization-algorithm - // FIXME: handle the `multiple` attribute - // Strip newlines from the value, then strip leading and trailing ASCII whitespace from the value. - if (value.bytes_as_string_view().contains('\r') || value.bytes_as_string_view().contains('\n')) { - StringBuilder builder; - for (auto c : value.bytes_as_string_view()) { - if (c != '\r' && c != '\n') - builder.append(c); + if (!has_attribute(AttributeNames::multiple)) { + // https://html.spec.whatwg.org/multipage/input.html#email-state-(type=email):value-sanitization-algorithm + // Strip newlines from the value, then strip leading and trailing ASCII whitespace from the value. + if (value.bytes_as_string_view().contains('\r') || value.bytes_as_string_view().contains('\n')) { + StringBuilder builder; + for (auto c : value.bytes_as_string_view()) { + if (c != '\r' && c != '\n') + builder.append(c); + } + return MUST(String::from_utf8(builder.string_view().trim_whitespace())); } - return MUST(String::from_utf8(builder.string_view().trim(Infra::ASCII_WHITESPACE))); + return MUST(value.trim_ascii_whitespace()); } - return MUST(value.trim(Infra::ASCII_WHITESPACE)); + + // https://html.spec.whatwg.org/multipage/input.html#email-state-(type=email):value-sanitization-algorithm-2 + // 1. Split on commas the element's value, strip leading and trailing ASCII whitespace from each resulting token, if any, + // and let the element's values be the (possibly empty) resulting list of (possibly empty) tokens, maintaining the original order. + Vector values {}; + for (auto const& token : MUST(value.split(',', SplitBehavior::KeepEmpty))) { + values.append(MUST(token.trim_ascii_whitespace())); + } + + // 2. Set the element's value to the result of concatenating the element's values, separating each value + // from the next by a single U+002C COMMA character (,), maintaining the list's order. + StringBuilder builder; + builder.join(',', values); + return MUST(builder.to_string()); + } else if (type_state() == HTMLInputElement::TypeAttributeState::Number) { // https://html.spec.whatwg.org/multipage/input.html#number-state-(type=number):value-sanitization-algorithm // If the value of the element is not a valid floating-point number, then set it @@ -3320,9 +3339,9 @@ bool HTMLInputElement::suffering_from_a_pattern_mismatch() const // type attribute's current state, and the element has a compiled pattern regular expression but that regular expression does not match the element's value, then the element is // suffering from a pattern mismatch. - // FIXME: If the element's value is not the empty string, and the element's multiple attribute is specified and applies to the input element, - // and the element has a compiled pattern regular expression but that regular expression does not match each of the element's values, - // then the element is suffering from a pattern mismatch. + // If the element's value is not the empty string, and the element's multiple attribute is specified and applies to the input element, + // and the element has a compiled pattern regular expression but that regular expression does not match each of the element's values, + // then the element is suffering from a pattern mismatch. if (!pattern_applies()) return false; @@ -3331,13 +3350,18 @@ bool HTMLInputElement::suffering_from_a_pattern_mismatch() const if (value.is_empty()) return false; - if (has_attribute(HTML::AttributeNames::multiple) && multiple_applies()) - return false; - auto regexp_object = compiled_pattern_regular_expression(); if (!regexp_object.has_value()) return false; + if (has_attribute(HTML::AttributeNames::multiple) && multiple_applies()) { + VERIFY(type_state() == HTMLInputElement::TypeAttributeState::Email); + + return AK::any_of(MUST(value.split(',')), [®exp_object](auto const& value) { + return !regexp_object->match(value).success; + }); + } + return !regexp_object->match(value).success; } diff --git a/Libraries/LibWeb/HTML/HTMLInputElement.h b/Libraries/LibWeb/HTML/HTMLInputElement.h index f5b501fac8a..d735845ab7f 100644 --- a/Libraries/LibWeb/HTML/HTMLInputElement.h +++ b/Libraries/LibWeb/HTML/HTMLInputElement.h @@ -195,7 +195,7 @@ public: virtual void clear_algorithm() override; virtual void form_associated_element_was_inserted() override; - virtual void form_associated_element_attribute_changed(FlyString const&, Optional const&, Optional const&) override; + virtual void form_associated_element_attribute_changed(FlyString const& name, Optional const& old_value, Optional const& value, Optional const& namespace_) override; virtual WebIDL::ExceptionOr cloned(Node&, bool) const override; diff --git a/Libraries/LibWeb/HTML/HTMLObjectElement.cpp b/Libraries/LibWeb/HTML/HTMLObjectElement.cpp index 5d4d275da19..7101af651a2 100644 --- a/Libraries/LibWeb/HTML/HTMLObjectElement.cpp +++ b/Libraries/LibWeb/HTML/HTMLObjectElement.cpp @@ -91,7 +91,7 @@ bool HTMLObjectElement::will_validate() return false; } -void HTMLObjectElement::form_associated_element_attribute_changed(FlyString const& name, Optional const&, Optional const&) +void HTMLObjectElement::form_associated_element_attribute_changed(FlyString const& name, Optional const&, Optional const&, Optional const&) { // https://html.spec.whatwg.org/multipage/iframe-embed-object.html#the-object-element // Whenever one of the following conditions occur: diff --git a/Libraries/LibWeb/HTML/HTMLObjectElement.h b/Libraries/LibWeb/HTML/HTMLObjectElement.h index dec641cb66e..9e48452649e 100644 --- a/Libraries/LibWeb/HTML/HTMLObjectElement.h +++ b/Libraries/LibWeb/HTML/HTMLObjectElement.h @@ -33,7 +33,7 @@ class HTMLObjectElement final public: virtual ~HTMLObjectElement() override; - virtual void form_associated_element_attribute_changed(FlyString const& name, Optional const& value, Optional const& namespace_) override; + virtual void form_associated_element_attribute_changed(FlyString const& name, Optional const& old_value, Optional const& value, Optional const& namespace_) override; virtual void form_associated_element_was_removed(DOM::Node*) override; String data() const; diff --git a/Libraries/LibWeb/HTML/HTMLOutputElement.cpp b/Libraries/LibWeb/HTML/HTMLOutputElement.cpp index 8bd23784b4e..55139ee2828 100644 --- a/Libraries/LibWeb/HTML/HTMLOutputElement.cpp +++ b/Libraries/LibWeb/HTML/HTMLOutputElement.cpp @@ -32,7 +32,7 @@ void HTMLOutputElement::visit_edges(Cell::Visitor& visitor) visitor.visit(m_html_for); } -void HTMLOutputElement::form_associated_element_attribute_changed(FlyString const& name, Optional const& value, Optional const&) +void HTMLOutputElement::form_associated_element_attribute_changed(FlyString const& name, Optional const&, Optional const& value, Optional const&) { if (name == HTML::AttributeNames::for_) { if (m_html_for) diff --git a/Libraries/LibWeb/HTML/HTMLOutputElement.h b/Libraries/LibWeb/HTML/HTMLOutputElement.h index 9c2f7834e62..4bf5c4a47c0 100644 --- a/Libraries/LibWeb/HTML/HTMLOutputElement.h +++ b/Libraries/LibWeb/HTML/HTMLOutputElement.h @@ -65,7 +65,7 @@ private: virtual void initialize(JS::Realm&) override; virtual void visit_edges(Cell::Visitor& visitor) override; - virtual void form_associated_element_attribute_changed(FlyString const& name, Optional const& value, Optional const&) override; + virtual void form_associated_element_attribute_changed(FlyString const& name, Optional const& old_value, Optional const& value, Optional const& namespace_) override; GC::Ptr m_html_for; diff --git a/Libraries/LibWeb/HTML/HTMLSelectElement.cpp b/Libraries/LibWeb/HTML/HTMLSelectElement.cpp index 8c7503cc876..d172924ffaf 100644 --- a/Libraries/LibWeb/HTML/HTMLSelectElement.cpp +++ b/Libraries/LibWeb/HTML/HTMLSelectElement.cpp @@ -574,7 +574,7 @@ void HTMLSelectElement::form_associated_element_was_inserted() create_shadow_tree_if_needed(); } -void HTMLSelectElement::form_associated_element_attribute_changed(FlyString const& name, Optional const& value, Optional const&) +void HTMLSelectElement::form_associated_element_attribute_changed(FlyString const& name, Optional const&, Optional const& value, Optional const&) { if (name == HTML::AttributeNames::multiple) { // If the multiple attribute is absent then update the selectedness of the option elements. diff --git a/Libraries/LibWeb/HTML/HTMLSelectElement.h b/Libraries/LibWeb/HTML/HTMLSelectElement.h index 8f7aad95e65..859e85dfe29 100644 --- a/Libraries/LibWeb/HTML/HTMLSelectElement.h +++ b/Libraries/LibWeb/HTML/HTMLSelectElement.h @@ -98,7 +98,7 @@ public: virtual void activation_behavior(DOM::Event const&) override; virtual void form_associated_element_was_inserted() override; - virtual void form_associated_element_attribute_changed(FlyString const&, Optional const&, Optional const&) override; + virtual void form_associated_element_attribute_changed(FlyString const& name, Optional const& old_value, Optional const& value, Optional const& namespace_) override; void did_select_item(Optional const& id); diff --git a/Libraries/LibWeb/HTML/HTMLTextAreaElement.cpp b/Libraries/LibWeb/HTML/HTMLTextAreaElement.cpp index 48b248dc8d4..1a6f3d2ad48 100644 --- a/Libraries/LibWeb/HTML/HTMLTextAreaElement.cpp +++ b/Libraries/LibWeb/HTML/HTMLTextAreaElement.cpp @@ -438,7 +438,7 @@ void HTMLTextAreaElement::children_changed(ChildrenChangedMetadata const* metada } } -void HTMLTextAreaElement::form_associated_element_attribute_changed(FlyString const& name, Optional const& value, Optional const&) +void HTMLTextAreaElement::form_associated_element_attribute_changed(FlyString const& name, Optional const&, Optional const& value, Optional const&) { if (name == HTML::AttributeNames::placeholder) { if (m_placeholder_text_node) diff --git a/Libraries/LibWeb/HTML/HTMLTextAreaElement.h b/Libraries/LibWeb/HTML/HTMLTextAreaElement.h index 55258ceb4e4..491849821f9 100644 --- a/Libraries/LibWeb/HTML/HTMLTextAreaElement.h +++ b/Libraries/LibWeb/HTML/HTMLTextAreaElement.h @@ -70,7 +70,7 @@ public: virtual WebIDL::ExceptionOr cloned(Node&, bool) const override; virtual void form_associated_element_was_inserted() override; - virtual void form_associated_element_attribute_changed(FlyString const&, Optional const&, Optional const&) override; + virtual void form_associated_element_attribute_changed(FlyString const& name, Optional const& old_value, Optional const& value, Optional const& namespace_) override; virtual void children_changed(ChildrenChangedMetadata const*) override; diff --git a/Tests/LibWeb/Text/expected/wpt-import/html/semantics/forms/constraints/form-validation-validity-patternMismatch.txt b/Tests/LibWeb/Text/expected/wpt-import/html/semantics/forms/constraints/form-validation-validity-patternMismatch.txt index 6b413ae42ff..c24cc7007e6 100644 --- a/Tests/LibWeb/Text/expected/wpt-import/html/semantics/forms/constraints/form-validation-validity-patternMismatch.txt +++ b/Tests/LibWeb/Text/expected/wpt-import/html/semantics/forms/constraints/form-validation-validity-patternMismatch.txt @@ -2,8 +2,8 @@ Harness status: OK Found 85 tests -69 Pass -16 Fail +71 Pass +14 Fail Pass [INPUT in TEXT status] The pattern attribute is not set Pass [INPUT in TEXT status] The value attibute is empty string Pass [INPUT in TEXT status] The value attribute matches the pattern attribute @@ -80,12 +80,12 @@ Pass [INPUT in EMAIL status] The pattern attribute is not set, if multiple is pr Pass [INPUT in EMAIL status] The value attibute is empty string, if multiple is present Pass [INPUT in EMAIL status] The value attribute matches the pattern attribute, if multiple is present Pass [INPUT in EMAIL status] The value(ABC) in unicode attribute matches the pattern attribute, if multiple is present -Fail [INPUT in EMAIL status] The value attribute mismatches the pattern attribute, if multiple is present -Fail [INPUT in EMAIL status] The value attribute mismatches the pattern attribute even when a subset matches, if multiple is present +Pass [INPUT in EMAIL status] The value attribute mismatches the pattern attribute, if multiple is present +Pass [INPUT in EMAIL status] The value attribute mismatches the pattern attribute even when a subset matches, if multiple is present Pass [INPUT in EMAIL status] Invalid regular expression gets ignored, if multiple is present Pass [INPUT in EMAIL status] Invalid `v` regular expression gets ignored, if multiple is present Pass [INPUT in EMAIL status] The pattern attribute tries to escape a group, if multiple is present -Pass [INPUT in EMAIL status] The pattern attribute uses Unicode features, if multiple is present +Fail [INPUT in EMAIL status] The pattern attribute uses Unicode features, if multiple is present Pass [INPUT in EMAIL status] The value attribute matches JavaScript-specific regular expression, if multiple is present Fail [INPUT in EMAIL status] The value attribute mismatches JavaScript-specific regular expression, if multiple is present -Fail [INPUT in EMAIL status] Commas should be stripped from regex input, if multiple is present \ No newline at end of file +Pass [INPUT in EMAIL status] Commas should be stripped from regex input, if multiple is present \ No newline at end of file diff --git a/Tests/LibWeb/Text/expected/wpt-import/html/semantics/forms/the-input-element/email.txt b/Tests/LibWeb/Text/expected/wpt-import/html/semantics/forms/the-input-element/email.txt index 1a23847fce4..cf2af122a67 100644 --- a/Tests/LibWeb/Text/expected/wpt-import/html/semantics/forms/the-input-element/email.txt +++ b/Tests/LibWeb/Text/expected/wpt-import/html/semantics/forms/the-input-element/email.txt @@ -2,13 +2,12 @@ Harness status: OK Found 8 tests -5 Pass -3 Fail +8 Pass Pass single_email doesn't have the multiple attribute Pass value should be sanitized: strip line breaks Pass Email address validity -Fail When the multiple attribute is removed, the user agent must run the value sanitization algorithm +Pass When the multiple attribute is removed, the user agent must run the value sanitization algorithm Pass multiple_email has the multiple attribute -Fail run the value sanitization algorithm after setting a new value +Pass run the value sanitization algorithm after setting a new value Pass valid value is a set of valid email addresses separated by a single ',' -Fail When the multiple attribute is set, the user agent must run the value sanitization algorithm \ No newline at end of file +Pass When the multiple attribute is set, the user agent must run the value sanitization algorithm \ No newline at end of file