LibWeb: Port node text content to UTF-16

This commit is contained in:
Timothy Flynn 2025-07-28 09:46:06 -04:00 committed by Jelle Raaijmakers
commit 5c561c1a53
Notes: github-actions[bot] 2025-07-28 16:32:35 +00:00
35 changed files with 147 additions and 123 deletions

View file

@ -1004,13 +1004,13 @@ String Document::title() const
// element that is a child of the document element.
if (auto const* document_element = this->document_element(); is<SVG::SVGElement>(document_element)) {
if (auto const* title_element = document_element->first_child_of_type<SVG::SVGTitleElement>())
value = title_element->child_text_content();
value = title_element->child_text_content().to_utf8_but_should_be_ported_to_utf16();
}
// 2. Otherwise, let value be the child text content of the title element, or the empty string if the title element
// is null.
else if (auto title_element = this->title_element()) {
value = title_element->text_content().value_or(String {});
value = title_element->text_content().value_or({}).to_utf8_but_should_be_ported_to_utf16();
}
// 3. Strip and collapse ASCII whitespace in value.
@ -1045,7 +1045,7 @@ WebIDL::ExceptionOr<void> Document::set_title(String const& title)
}
// 3. String replace all with the given value within element.
element->string_replace_all(title);
element->string_replace_all(Utf16String::from_utf8(title));
}
// -> If the document element is in the HTML namespace
@ -1074,7 +1074,7 @@ WebIDL::ExceptionOr<void> Document::set_title(String const& title)
}
// 4. String replace all with the given value within element.
element->string_replace_all(title);
element->string_replace_all(Utf16String::from_utf8(title));
}
// -> Otherwise

View file

@ -309,7 +309,7 @@ static WebIDL::ExceptionOr<GC::Ref<DOM::Document>> load_media_document(HTML::Nav
img {
background-color: #fff;
}
)~~~"_string);
)~~~"_utf16);
TRY(document->head()->append_child(style_element));
auto url_string = document->url_string();

View file

@ -165,18 +165,20 @@ Optional<String> Node::alternative_text() const
}
// https://dom.spec.whatwg.org/#concept-descendant-text-content
String Node::descendant_text_content() const
Utf16String Node::descendant_text_content() const
{
StringBuilder builder;
StringBuilder builder(StringBuilder::Mode::UTF16);
for_each_in_subtree_of_type<Text>([&](auto& text_node) {
builder.append(text_node.data());
return TraversalDecision::Continue;
});
return builder.to_string_without_validation();
return builder.to_utf16_string_without_validation();
}
// https://dom.spec.whatwg.org/#dom-node-textcontent
Optional<String> Node::text_content() const
Optional<Utf16String> Node::text_content() const
{
// The textContent getter steps are to return the following, switching on the interface this implements:
@ -185,23 +187,23 @@ Optional<String> Node::text_content() const
return descendant_text_content();
// If CharacterData, return thiss data.
if (is<CharacterData>(this))
return static_cast<CharacterData const&>(*this).data().to_utf8_but_should_be_ported_to_utf16();
if (auto const* character_data = as_if<CharacterData>(*this))
return character_data->data();
// If Attr node, return this's value.
if (is<Attr>(*this))
return static_cast<Attr const&>(*this).value();
if (auto const* attribute = as_if<Attr>(*this))
return Utf16String::from_utf8(attribute->value());
// Otherwise, return null
return {};
}
// https://dom.spec.whatwg.org/#ref-for-dom-node-textcontent%E2%91%A0
void Node::set_text_content(Optional<String> const& maybe_content)
void Node::set_text_content(Optional<Utf16String> const& maybe_content)
{
// The textContent setter steps are to, if the given value is null, act as if it was the empty string instead,
// and then do as described below, switching on the interface this implements:
auto content = maybe_content.value_or(String {});
auto content = maybe_content.value_or({});
// If DocumentFragment or Element, string replace all with the given value within this.
if (is<DocumentFragment>(this) || is<Element>(this)) {
@ -213,17 +215,13 @@ void Node::set_text_content(Optional<String> const& maybe_content)
}
// If CharacterData, replace data with node this, offset 0, count thiss length, and data the given value.
else if (is<CharacterData>(this)) {
auto* character_data_node = as<CharacterData>(this);
character_data_node->set_data(Utf16String::from_utf8(content));
// FIXME: CharacterData::set_data is not spec compliant. Make this match the spec when set_data becomes spec compliant.
// Do note that this will make this function able to throw an exception.
else if (auto* character_data = as_if<CharacterData>(*this)) {
MUST(character_data->replace_data(0, character_data->length_in_utf16_code_units(), content));
}
// If Attr, set an existing attribute value with this and the given value.
if (is<Attr>(*this)) {
static_cast<Attr&>(*this).set_value(content);
else if (auto* attribute = as_if<Attr>(*this)) {
attribute->set_value(content.to_utf8_but_should_be_ported_to_utf16());
}
// Otherwise, do nothing.
@ -524,21 +522,21 @@ void Node::invalidate_style(StyleInvalidationReason reason, Vector<CSS::Invalida
document().style_invalidator().add_pending_invalidation(*this, move(invalidation_set), options.invalidate_elements_that_use_css_custom_properties);
}
String Node::child_text_content() const
Utf16String Node::child_text_content() const
{
if (!is<ParentNode>(*this))
return String {};
auto const* parent_node = as_if<ParentNode>(*this);
if (!parent_node)
return {};
StringBuilder builder;
as<ParentNode>(*this).for_each_child([&](auto& child) {
if (is<Text>(child)) {
auto maybe_content = as<Text>(child).text_content();
if (maybe_content.has_value())
builder.append(maybe_content.value());
}
StringBuilder builder(StringBuilder::Mode::UTF16);
parent_node->for_each_child_of_type<Text>([&](auto const& child) {
if (auto content = child.text_content(); content.has_value())
builder.append(*content);
return IterationDecision::Continue;
});
return MUST(builder.to_string());
return builder.to_utf16_string();
}
// https://dom.spec.whatwg.org/#concept-shadow-including-root
@ -2045,14 +2043,14 @@ void Node::replace_all(GC::Ptr<Node> node)
}
// https://dom.spec.whatwg.org/#string-replace-all
void Node::string_replace_all(String const& string)
void Node::string_replace_all(Utf16String string)
{
// 1. Let node be null.
GC::Ptr<Node> node;
// 2. If string is not the empty string, then set node to a new Text node whose data is string and node document is parents node document.
if (!string.is_empty())
node = realm().create<Text>(document(), Utf16String::from_utf8(string));
node = realm().create<Text>(document(), move(string));
// 3. Replace all with node within parent.
replace_all(node);
@ -2862,7 +2860,7 @@ ErrorOr<String> Node::name_or_description(NameOrDescription target, Document con
// If the current node has at least one direct child title element, select the appropriate title based on
// the language rules for the SVG specification, and return the title text alternative as a flat string.
element->for_each_child_of_type<SVG::SVGTitleElement>([&](SVG::SVGTitleElement const& title) mutable {
title_element_text = title.text_content();
title_element_text = title.text_content().map([](auto const& title) { return title.to_utf8_but_should_be_ported_to_utf16(); });
return IterationDecision::Break;
});
if (title_element_text.has_value())
@ -2879,7 +2877,7 @@ ErrorOr<String> Node::name_or_description(NameOrDescription target, Document con
// then use the subtree of the first such element.
if (is<HTML::HTMLTableElement>(*element))
if (auto& table = (const_cast<HTML::HTMLTableElement&>(static_cast<HTML::HTMLTableElement const&>(*element))); table.caption())
return table.caption()->text_content().release_value();
return table.caption()->text_content()->to_utf8_but_should_be_ported_to_utf16();
// https://w3c.github.io/html-aam/#fieldset-element-accessible-name-computation
// 2. If the accessible name is still empty, then: if the fieldset element has a child that is a legend element,
@ -2888,7 +2886,7 @@ ErrorOr<String> Node::name_or_description(NameOrDescription target, Document con
Optional<String> legend;
auto& fieldset = (const_cast<HTML::HTMLFieldSetElement&>(static_cast<HTML::HTMLFieldSetElement const&>(*element)));
fieldset.for_each_child_of_type<HTML::HTMLLegendElement>([&](HTML::HTMLLegendElement const& element) mutable {
legend = element.text_content().release_value();
legend = element.text_content()->to_utf8_but_should_be_ported_to_utf16();
return IterationDecision::Break;
});
if (legend.has_value())
@ -3010,7 +3008,7 @@ ErrorOr<String> Node::name_or_description(NameOrDescription target, Document con
if (is_text() && (!parent_element() || (parent_element()->is_referenced() || !parent_element()->is_hidden() || !parent_element()->has_hidden_ancestor() || parent_element()->has_referenced_and_hidden_ancestor()))) {
if (layout_node() && layout_node()->is_text_node())
return as<Layout::TextNode>(layout_node())->text_for_rendering().to_utf8_but_should_be_ported_to_utf16();
return text_content().release_value();
return text_content()->to_utf8_but_should_be_ported_to_utf16();
}
// H. Otherwise, if the current node is a descendant of an element whose Accessible Name or Accessible Description

View file

@ -268,9 +268,9 @@ public:
virtual Optional<String> alternative_text() const;
String descendant_text_content() const;
Optional<String> text_content() const;
void set_text_content(Optional<String> const&);
Utf16String descendant_text_content() const;
Optional<Utf16String> text_content() const;
void set_text_content(Optional<Utf16String> const&);
WebIDL::ExceptionOr<void> normalize();
@ -288,7 +288,7 @@ public:
const HTML::HTMLElement* enclosing_html_element() const;
const HTML::HTMLElement* enclosing_html_element_with_attribute(FlyString const&) const;
String child_text_content() const;
Utf16String child_text_content() const;
Node& shadow_including_root();
Node const& shadow_including_root() const
@ -399,7 +399,7 @@ public:
WebIDL::ExceptionOr<void> unsafely_set_html(Element&, StringView);
void replace_all(GC::Ptr<Node>);
void string_replace_all(String const&);
void string_replace_all(Utf16String);
bool is_same_node(Node const*) const;
bool is_equal_node(Node const*) const;

View file

@ -38,7 +38,7 @@ interface Node : EventTarget {
// FIXME: [LegacyNullToEmptyString] is not allowed on nullable types as per the Web IDL spec.
// However, we only apply it to setters, so this works as a stop gap.
// Replace this with something like a special cased [LegacyNullToEmptyString].
[LegacyNullToEmptyString, CEReactions] attribute DOMString? textContent;
[LegacyNullToEmptyString, CEReactions] attribute Utf16DOMString? textContent;
[CEReactions] undefined normalize();
[ImplementedAs=clone_node_binding, CEReactions] Node cloneNode(optional boolean deep = false);

View file

@ -52,7 +52,7 @@ void StyleElementUtils::update_a_style_block(DOM::Element& style_element)
return;
// 5. If the Should element's inline behavior be blocked by Content Security Policy? algorithm returns "Blocked" when executed upon the style element, "style", and the style element's child text content, then return. [CSP]
if (ContentSecurityPolicy::should_elements_inline_type_behavior_be_blocked_by_content_security_policy(style_element.realm(), style_element, ContentSecurityPolicy::Directives::Directive::InlineType::Style, style_element.child_text_content()) == ContentSecurityPolicy::Directives::Directive::Result::Blocked)
if (ContentSecurityPolicy::should_elements_inline_type_behavior_be_blocked_by_content_security_policy(style_element.realm(), style_element, ContentSecurityPolicy::Directives::Directive::InlineType::Style, style_element.child_text_content().to_utf8_but_should_be_ported_to_utf16()) == ContentSecurityPolicy::Directives::Directive::Result::Blocked)
return;
// 6. Create a CSS style sheet with the following properties:
@ -78,7 +78,7 @@ void StyleElementUtils::update_a_style_block(DOM::Element& style_element)
// Left uninitialized.
m_style_sheet_list = style_element.document_or_shadow_root_style_sheets();
m_associated_css_style_sheet = m_style_sheet_list->create_a_css_style_sheet(
style_element.text_content().value_or(String {}),
style_element.text_content().value_or({}).to_utf8_but_should_be_ported_to_utf16(),
"text/css"_string,
&style_element,
style_element.attribute(HTML::AttributeNames::media).value_or({}),

View file

@ -383,11 +383,10 @@ void canonicalize_whitespace(DOM::BoundaryPoint boundary, bool fix_collapsed_spa
if (is<DOM::Text>(*start_node) && start_offset != 0) {
auto parent_white_space_collapse = resolved_keyword(*start_node->parent(), CSS::PropertyID::WhiteSpaceCollapse);
// FIXME: Find a way to get code points directly from the UTF-8 string
auto start_node_data = Utf16String::from_utf8(*start_node->text_content());
auto offset_minus_one_code_point = start_node_data.code_point_at(start_offset - 1);
auto start_node_data = start_node->text_content().release_value();
auto offset_minus_one_code_unit = start_node_data.code_unit_at(start_offset - 1);
if (parent_white_space_collapse != CSS::Keyword::Preserve && (offset_minus_one_code_point == 0x20 || offset_minus_one_code_point == 0xA0)) {
if (parent_white_space_collapse != CSS::Keyword::Preserve && (offset_minus_one_code_unit == 0x20 || offset_minus_one_code_unit == 0xA0)) {
--start_offset;
continue;
}
@ -436,22 +435,21 @@ void canonicalize_whitespace(DOM::BoundaryPoint boundary, bool fix_collapsed_spa
if (is<DOM::Text>(*end_node) && end_offset != end_node->length()) {
auto parent_white_space_collapse = resolved_keyword(*end_node->parent(), CSS::PropertyID::WhiteSpaceCollapse);
// FIXME: Find a way to get code points directly from the UTF-8 string
auto end_node_data = Utf16String::from_utf8(*end_node->text_content());
auto offset_code_point = end_node_data.code_point_at(end_offset);
auto end_node_data = end_node->text_content().release_value();
auto offset_code_unit = end_node_data.code_unit_at(end_offset);
if (parent_white_space_collapse != CSS::Keyword::Preserve && (offset_code_point == 0x20 || offset_code_point == 0xA0)) {
if (parent_white_space_collapse != CSS::Keyword::Preserve && (offset_code_unit == 0x20 || offset_code_unit == 0xA0)) {
// 1. If fix collapsed space is true, and collapse spaces is true, and the end offsetth
// code unit of end node's data is a space (0x0020): call deleteData(end offset, 1)
// on end node, then continue this loop from the beginning.
if (fix_collapsed_space && collapse_spaces && offset_code_point == 0x20) {
if (fix_collapsed_space && collapse_spaces && offset_code_unit == 0x20) {
MUST(static_cast<DOM::CharacterData&>(*end_node).delete_data(end_offset, 1));
continue;
}
// 2. Set collapse spaces to true if the end offsetth code unit of end node's data is a
// space (0x0020), false otherwise.
collapse_spaces = offset_code_point == 0x20;
collapse_spaces = offset_code_unit == 0x20;
// 3. Add one to end offset.
++end_offset;
@ -500,7 +498,7 @@ void canonicalize_whitespace(DOM::BoundaryPoint boundary, bool fix_collapsed_spa
// AD-HOC: Use the white-space-collapse longhand instead of "white-space" shorthand: https://github.com/w3c/editing/issues/486.
if (is<DOM::Text>(*end_node) && end_offset == end_node->length() && precedes_a_line_break(end_node)) {
auto parent_white_space_collapse = resolved_keyword(*end_node->parent(), CSS::PropertyID::WhiteSpaceCollapse);
if (parent_white_space_collapse != CSS::Keyword::Preserve && end_node->text_content().value().ends_with_bytes(" "sv)) {
if (parent_white_space_collapse != CSS::Keyword::Preserve && end_node->text_content()->ends_with(" "sv)) {
// 1. Subtract one from end offset.
--end_offset;
@ -560,10 +558,10 @@ void canonicalize_whitespace(DOM::BoundaryPoint boundary, bool fix_collapsed_spa
replacement_whitespace_view = replacement_whitespace_view.substring_view(1);
// 2. If element is not the same as the start offsetth code unit of start node's data:
auto start_node_data = Utf16String::from_utf8(*start_node->text_content());
auto start_node_code_point = start_node_data.code_point_at(start_offset);
auto start_node_data = start_node->text_content().release_value();
auto start_node_code_unit = start_node_data.code_unit_at(start_offset);
if (element != start_node_code_point) {
if (element != start_node_code_unit) {
// 1. Call insertData(start offset, element) on start node.
auto& start_node_character_data = static_cast<DOM::CharacterData&>(*start_node);
MUST(start_node_character_data.insert_data(start_offset, Utf16String::from_code_point(element)));
@ -1695,7 +1693,7 @@ bool is_allowed_child_of_node(Variant<GC::Ref<DOM::Node>, FlyString> child, Vari
HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr);
if (parent_is_table_like && is<DOM::Text>(child_node.ptr())) {
auto child_text_content = child_node->text_content().release_value();
if (!all_of(child_text_content.bytes_as_string_view(), Infra::is_ascii_whitespace))
if (!all_of(child_text_content, Infra::is_ascii_whitespace))
return false;
}
@ -2907,7 +2905,8 @@ void normalize_sublists_in_node(GC::Ref<DOM::Node> item)
// 2. If child is an ol or ul, or new item is null and child is a Text node whose data
// consists of zero of more space characters:
auto child_text = child->text_content();
auto text_is_all_whitespace = child_text.has_value() && all_of(child_text.value().bytes_as_string_view(), Infra::is_ascii_whitespace);
auto text_is_all_whitespace = child_text.has_value() && all_of(*child_text, Infra::is_ascii_whitespace);
if ((is<HTML::HTMLOListElement>(*child) || is<HTML::HTMLUListElement>(*child))
|| (!new_item && is<DOM::Text>(*child) && text_is_all_whitespace)) {
// 1. Set new item to null.

View file

@ -164,14 +164,14 @@ GC::Ref<DOM::DOMTokenList> HTMLAnchorElement::rel_list()
}
// https://html.spec.whatwg.org/multipage/text-level-semantics.html#dom-a-text
String HTMLAnchorElement::text() const
Utf16String HTMLAnchorElement::text() const
{
// The text attribute's getter must return this element's descendant text content.
return descendant_text_content();
}
// https://html.spec.whatwg.org/multipage/text-level-semantics.html#dom-a-text
void HTMLAnchorElement::set_text(String const& text)
void HTMLAnchorElement::set_text(Utf16String const& text)
{
// The text attribute's setter must string replace all with the given value within this element.
string_replace_all(text);

View file

@ -26,8 +26,8 @@ public:
GC::Ref<DOM::DOMTokenList> rel_list();
String text() const;
void set_text(String const&);
Utf16String text() const;
void set_text(Utf16String const&);
// ^EventTarget
// https://html.spec.whatwg.org/multipage/interaction.html#the-tabindex-attribute:the-a-element

View file

@ -16,7 +16,7 @@ interface HTMLAnchorElement : HTMLElement {
[CEReactions, Reflect] attribute DOMString hreflang;
[CEReactions, Reflect] attribute DOMString type;
[CEReactions] attribute DOMString text;
[CEReactions] attribute Utf16DOMString text;
[CEReactions, Reflect=referrerpolicy, Enumerated=ReferrerPolicy] attribute DOMString referrerPolicy;

View file

@ -252,7 +252,7 @@ WebIDL::ExceptionOr<void> HTMLDetailsElement::create_shadow_tree_if_needed()
:host([open]) summary {
list-style-type: disclosure-open;
}
)~~~"_string);
)~~~"_utf16);
MUST(shadow_root->append_child(style));
m_summary_slot = static_cast<HTML::HTMLSlotElement&>(*summary_slot);

View file

@ -389,7 +389,7 @@ String HTMLElement::get_the_text_steps()
// 1. If element is not being rendered or if the user agent is a non-CSS user agent, then return element's descendant text content.
document().update_layout(DOM::UpdateLayoutReason::HTMLElementGetTheTextSteps);
if (!layout_node())
return descendant_text_content();
return descendant_text_content().to_utf8_but_should_be_ported_to_utf16();
// 2. Let results be a new empty list.
Vector<Variant<String, RequiredLineBreakCount>> results;

View file

@ -1203,15 +1203,15 @@ void HTMLInputElement::update_file_input_shadow_tree()
return;
auto files_label = has_attribute(HTML::AttributeNames::multiple) ? "files"sv : "file"sv;
m_file_button->set_text_content(MUST(String::formatted("Select {}...", files_label)));
m_file_button->set_text_content(Utf16String::formatted("Select {}...", files_label));
if (m_selected_files && m_selected_files->length() > 0) {
if (m_selected_files->length() == 1)
m_file_label->set_text_content(m_selected_files->item(0)->name());
m_file_label->set_text_content(Utf16String::from_utf8(m_selected_files->item(0)->name()));
else
m_file_label->set_text_content(MUST(String::formatted("{} files selected.", m_selected_files->length())));
m_file_label->set_text_content(Utf16String::formatted("{} files selected.", m_selected_files->length()));
} else {
m_file_label->set_text_content(MUST(String::formatted("No {} selected.", files_label)));
m_file_label->set_text_content(Utf16String::formatted("No {} selected.", files_label));
}
}

View file

@ -93,7 +93,9 @@ Utf16String HTMLOptionElement::value() const
{
// The value of an option element is the value of the value content attribute, if there is one.
// ...or, if there is not, the value of the element's text IDL attribute.
return Utf16String::from_utf8(attribute(HTML::AttributeNames::value).value_or(text()));
if (auto value = attribute(HTML::AttributeNames::value); value.has_value())
return Utf16String::from_utf8(*value);
return text();
}
// https://html.spec.whatwg.org/multipage/form-elements.html#dom-option-value
@ -121,8 +123,7 @@ String HTMLOptionElement::label() const
// must return that attribute's value; otherwise, it must return the element's label.
if (auto label = attribute(HTML::AttributeNames::label); label.has_value())
return label.release_value();
return text();
return text().to_utf8_but_should_be_ported_to_utf16();
}
// https://html.spec.whatwg.org/multipage/form-elements.html#dom-option-label
@ -133,9 +134,9 @@ void HTMLOptionElement::set_label(String const& label)
}
// https://html.spec.whatwg.org/multipage/form-elements.html#dom-option-text
String HTMLOptionElement::text() const
Utf16String HTMLOptionElement::text() const
{
StringBuilder builder;
StringBuilder builder(StringBuilder::Mode::UTF16);
// Concatenation of data of all the Text node descendants of the option element, in tree order,
// excluding any that are descendants of descendants of the option element that are themselves
@ -146,11 +147,11 @@ String HTMLOptionElement::text() const
});
// Return the result of stripping and collapsing ASCII whitespace from the above concatenation.
return MUST(Infra::strip_and_collapse_whitespace(builder.string_view()));
return Infra::strip_and_collapse_whitespace(builder.to_utf16_string());
}
// https://html.spec.whatwg.org/multipage/form-elements.html#dom-option-text
void HTMLOptionElement::set_text(String const& text)
void HTMLOptionElement::set_text(Utf16String const& text)
{
string_replace_all(text);
// Note: this causes children_changed() to be called, which will update the <select>'s label

View file

@ -26,8 +26,8 @@ public:
Utf16String value() const;
WebIDL::ExceptionOr<void> set_value(Utf16String const&);
String text() const;
void set_text(String const&);
Utf16String text() const;
void set_text(Utf16String const&);
[[nodiscard]] String label() const;
void set_label(String const&);

View file

@ -12,6 +12,6 @@ interface HTMLOptionElement : HTMLElement {
attribute boolean selected;
[CEReactions] attribute Utf16DOMString value;
[CEReactions] attribute DOMString text;
[CEReactions] attribute Utf16DOMString text;
readonly attribute long index;
};

View file

@ -50,7 +50,7 @@ GC::Ref<DOM::DOMTokenList> HTMLOutputElement::html_for()
}
// https://html.spec.whatwg.org/multipage/form-elements.html#dom-output-defaultvalue
String HTMLOutputElement::default_value() const
Utf16String HTMLOutputElement::default_value() const
{
// 1. If this element's default value override is non-null, then return it.
if (m_default_value_override.has_value())
@ -61,7 +61,7 @@ String HTMLOutputElement::default_value() const
}
// https://html.spec.whatwg.org/multipage/form-elements.html#dom-output-defaultvalue
void HTMLOutputElement::set_default_value(String const& default_value)
void HTMLOutputElement::set_default_value(Utf16String const& default_value)
{
// 1. If this's default value override is null, then string replace all with the given value within this and return.
if (!m_default_value_override.has_value()) {
@ -77,7 +77,7 @@ void HTMLOutputElement::set_default_value(String const& default_value)
Utf16String HTMLOutputElement::value() const
{
// The value getter steps are to return this's descendant text content.
return Utf16String::from_utf8(descendant_text_content());
return descendant_text_content();
}
// https://html.spec.whatwg.org/multipage/form-elements.html#dom-output-value
@ -87,7 +87,7 @@ void HTMLOutputElement::set_value(Utf16String const& value)
m_default_value_override = default_value();
// 2. String replace all with the given value within this.
string_replace_all(value.to_utf8_but_should_be_ported_to_utf16());
string_replace_all(value);
}
// https://html.spec.whatwg.org/multipage/form-elements.html#the-output-element:concept-form-reset-control

View file

@ -31,8 +31,8 @@ public:
return output;
}
String default_value() const;
void set_default_value(String const&);
Utf16String default_value() const;
void set_default_value(Utf16String const&);
Utf16String value() const override;
void set_value(Utf16String const&);
@ -69,7 +69,7 @@ private:
GC::Ptr<DOM::DOMTokenList> m_html_for;
Optional<String> m_default_value_override {};
Optional<Utf16String> m_default_value_override;
};
}

View file

@ -12,7 +12,7 @@ interface HTMLOutputElement : HTMLElement {
[CEReactions, Reflect] attribute DOMString name;
readonly attribute DOMString type;
[CEReactions] attribute DOMString defaultValue;
[CEReactions] attribute Utf16DOMString defaultValue;
[CEReactions] attribute Utf16DOMString value;
readonly attribute boolean willValidate;

View file

@ -188,6 +188,7 @@ void HTMLScriptElement::prepare_script()
// 5. Let source text be el's child text content.
auto source_text = child_text_content();
auto source_text_utf8 = source_text.to_utf8_but_should_be_ported_to_utf16();
// 6. If el has no src attribute, and source text is the empty string, then return.
if (!has_attribute(HTML::AttributeNames::src) && source_text.is_empty()) {
@ -278,7 +279,7 @@ void HTMLScriptElement::prepare_script()
// 19. If el does not have a src content attribute, and the Should element's inline behavior be blocked by Content Security Policy?
// algorithm returns "Blocked" when given el, "script", and source text, then return. [CSP]
if (!has_attribute(AttributeNames::src)
&& ContentSecurityPolicy::should_elements_inline_type_behavior_be_blocked_by_content_security_policy(realm(), *this, ContentSecurityPolicy::Directives::Directive::InlineType::Script, source_text) == ContentSecurityPolicy::Directives::Directive::Result::Blocked) {
&& ContentSecurityPolicy::should_elements_inline_type_behavior_be_blocked_by_content_security_policy(realm(), *this, ContentSecurityPolicy::Directives::Directive::InlineType::Script, source_text_utf8) == ContentSecurityPolicy::Directives::Directive::Result::Blocked) {
dbgln("HTMLScriptElement: Refusing to run inline script because it violates the Content Security Policy.");
return;
}
@ -452,7 +453,7 @@ void HTMLScriptElement::prepare_script()
if (m_script_type == ScriptType::Classic) {
// 1. Let script be the result of creating a classic script using source text, settings object's realm, base URL, and options.
// FIXME: Pass options.
auto script = ClassicScript::create(m_document->url().to_byte_string(), source_text, settings_object.realm(), base_url, m_source_line_number);
auto script = ClassicScript::create(m_document->url().to_byte_string(), source_text_utf8, settings_object.realm(), base_url, m_source_line_number);
// 2. Mark as ready el given script.
mark_as_ready(Result(move(script)));

View file

@ -57,8 +57,8 @@ public:
void unmark_as_already_started(Badge<DOM::Range>);
void unmark_as_parser_inserted(Badge<DOM::Range>);
String text() { return child_text_content(); }
void set_text(String const& text) { string_replace_all(text); }
Utf16String text() { return child_text_content(); }
void set_text(Utf16String const& text) { string_replace_all(text); }
[[nodiscard]] bool async() const;
void set_async(bool);

View file

@ -17,8 +17,8 @@ interface HTMLScriptElement : HTMLElement {
[CEReactions, Reflect=referrerpolicy, Enumerated=ReferrerPolicy] attribute DOMString referrerPolicy;
[CEReactions, Reflect] attribute DOMString integrity;
[CEReactions, Enumerated=FetchPriorityAttribute, Reflect=fetchpriority] attribute DOMString fetchPriority;
[CEReactions] attribute DOMString text;
[CEReactions] attribute Utf16DOMString text;
static boolean supports(DOMString type);

View file

@ -632,7 +632,7 @@ void HTMLSelectElement::update_inner_text_element()
// Update inner text element to the label of the selected option
for (auto const& option_element : m_cached_list_of_options) {
if (option_element->selected()) {
m_inner_text_element->set_text_content(MUST(Infra::strip_and_collapse_whitespace(option_element->label())));
m_inner_text_element->set_text_content(Infra::strip_and_collapse_whitespace(Utf16String::from_utf8(option_element->label())));
return;
}
}

View file

@ -114,10 +114,10 @@ void HTMLTextAreaElement::reset_algorithm()
m_user_validity = false;
m_dirty_value = false;
// and the raw value to its child text content.
set_raw_value(Utf16String::from_utf8(child_text_content()));
set_raw_value(child_text_content());
if (m_text_node) {
m_text_node->set_text_content(m_raw_value.to_utf8_but_should_be_ported_to_utf16());
m_text_node->set_text_content(m_raw_value);
update_placeholder_visibility();
}
}
@ -129,7 +129,7 @@ void HTMLTextAreaElement::clear_algorithm()
m_dirty_value = false;
// and set the raw value of element to an empty string.
set_raw_value(Utf16String::from_utf8(child_text_content()));
set_raw_value(child_text_content());
// Unlike their associated reset algorithms, changes made to form controls as part of these algorithms do count as
// changes caused by the user (and thus, e.g. do cause input events to fire).
@ -155,14 +155,14 @@ void HTMLTextAreaElement::form_associated_element_was_inserted()
}
// https://html.spec.whatwg.org/multipage/form-elements.html#dom-textarea-defaultvalue
String HTMLTextAreaElement::default_value() const
Utf16String HTMLTextAreaElement::default_value() const
{
// The defaultValue attribute's getter must return the element's child text content.
return child_text_content();
}
// https://html.spec.whatwg.org/multipage/form-elements.html#dom-textarea-defaultvalue
void HTMLTextAreaElement::set_default_value(String const& default_value)
void HTMLTextAreaElement::set_default_value(Utf16String const& default_value)
{
// The defaultValue attribute's setter must string replace all with the given value within this element.
string_replace_all(default_value);
@ -379,7 +379,7 @@ void HTMLTextAreaElement::create_shadow_tree_if_needed()
handle_readonly_attribute(attribute(HTML::AttributeNames::readonly));
// NOTE: If `children_changed()` was called before now, `m_raw_value` will hold the text content.
// Otherwise, it will get filled in whenever that does get called.
m_text_node->set_text_content(m_raw_value.to_utf8_but_should_be_ported_to_utf16());
m_text_node->set_text_content(m_raw_value);
handle_maxlength_attribute();
MUST(m_inner_text_element->append_child(*m_text_node));
@ -430,9 +430,9 @@ void HTMLTextAreaElement::children_changed(ChildrenChangedMetadata const* metada
// The children changed steps for textarea elements must, if the element's dirty value flag is false,
// set the element's raw value to its child text content.
if (!m_dirty_value) {
set_raw_value(Utf16String::from_utf8(child_text_content()));
set_raw_value(child_text_content());
if (m_text_node)
m_text_node->set_text_content(m_raw_value.to_utf8_but_should_be_ported_to_utf16());
m_text_node->set_text_content(m_raw_value);
update_placeholder_visibility();
}
}

View file

@ -77,8 +77,8 @@ public:
// https://www.w3.org/TR/html-aria/#el-textarea
virtual Optional<ARIA::Role> default_role() const override { return ARIA::Role::textbox; }
String default_value() const;
void set_default_value(String const&);
Utf16String default_value() const;
void set_default_value(Utf16String const&);
Utf16String value() const override;
void set_value(Utf16String const&);

View file

@ -22,7 +22,7 @@ interface HTMLTextAreaElement : HTMLElement {
[CEReactions, Reflect] attribute DOMString wrap;
readonly attribute DOMString type;
[CEReactions] attribute DOMString defaultValue;
[CEReactions] attribute Utf16DOMString defaultValue;
[LegacyNullToEmptyString] attribute Utf16DOMString value;
readonly attribute unsigned long textLength;

View file

@ -37,14 +37,14 @@ void HTMLTitleElement::children_changed(ChildrenChangedMetadata const* metadata)
}
// https://html.spec.whatwg.org/multipage/semantics.html#dom-title-text
String HTMLTitleElement::text() const
Utf16String HTMLTitleElement::text() const
{
// The text attribute's getter must return this title element's child text content.
return child_text_content();
}
// https://html.spec.whatwg.org/multipage/semantics.html#dom-title-text
void HTMLTitleElement::set_text(String const& value)
void HTMLTitleElement::set_text(Utf16String const& value)
{
// The text attribute's setter must string replace all with the given value within this title element.
string_replace_all(value);

View file

@ -17,8 +17,8 @@ class HTMLTitleElement final : public HTMLElement {
public:
virtual ~HTMLTitleElement() override;
String text() const;
void set_text(String const& value);
Utf16String text() const;
void set_text(Utf16String const& value);
private:
HTMLTitleElement(DOM::Document&, DOM::QualifiedName);

View file

@ -6,6 +6,6 @@ interface HTMLTitleElement : HTMLElement {
[HTMLConstructor] constructor();
[CEReactions] attribute DOMString text;
[CEReactions] attribute Utf16DOMString text;
};

View file

@ -11,7 +11,8 @@
namespace Web::Infra {
constexpr auto ASCII_WHITESPACE = "\t\n\f\r "sv;
constexpr inline auto ASCII_WHITESPACE = "\t\n\f\r "sv;
constexpr inline auto ASCII_WHITESPACE_CODE_POINTS = to_array<u32>({ '\t', '\n', '\f', '\r', ' ' });
// https://infra.spec.whatwg.org/#ascii-whitespace
constexpr bool is_ascii_whitespace(u32 code_point)

View file

@ -85,6 +85,29 @@ ErrorOr<String> strip_and_collapse_whitespace(StringView string)
return String::from_utf8(builder.string_view().trim(Infra::ASCII_WHITESPACE));
}
// https://infra.spec.whatwg.org/#strip-and-collapse-ascii-whitespace
Utf16String strip_and_collapse_whitespace(Utf16String const& string)
{
// Replace any sequence of one or more consecutive code points that are ASCII whitespace in the string with a single U+0020 SPACE code point.
if (!string.contains_any_of(Infra::ASCII_WHITESPACE_CODE_POINTS))
return string;
StringBuilder builder(StringBuilder::Mode::UTF16);
for (auto code_point : string) {
if (Infra::is_ascii_whitespace(code_point)) {
if (!builder.utf16_string_view().ends_with(" "sv))
builder.append(' ');
continue;
}
builder.append_code_point(code_point);
}
// ...and then remove any leading and trailing ASCII whitespace from that string.
return Utf16String::from_utf16(builder.utf16_string_view().trim(Infra::ASCII_WHITESPACE));
}
// https://infra.spec.whatwg.org/#code-unit-prefix
bool is_code_unit_prefix(StringView potential_prefix_utf8, StringView input_utf8)
{

View file

@ -16,6 +16,7 @@ namespace Web::Infra {
String normalize_newlines(String const&);
Utf16String normalize_newlines(Utf16String const&);
ErrorOr<String> strip_and_collapse_whitespace(StringView string);
Utf16String strip_and_collapse_whitespace(Utf16String const& string);
bool is_code_unit_prefix(StringView potential_prefix, StringView input);
ErrorOr<String> convert_to_scalar_value_string(StringView string);
ByteBuffer isomorphic_encode(StringView input);

View file

@ -143,7 +143,7 @@ bool SVGElement::should_include_in_accessibility_tree() const
bool has_title_or_desc = false;
auto role = role_from_role_attribute_value();
for_each_child_of_type<SVGElement>([&has_title_or_desc](auto& child) {
if ((is<SVGTitleElement>(child) || is<SVGDescElement>(child)) && !child.text_content()->trim_ascii_whitespace().value().is_empty()) {
if ((is<SVGTitleElement>(child) || is<SVGDescElement>(child)) && !child.text_content()->utf16_view().trim_ascii_whitespace().is_empty()) {
has_title_or_desc = true;
return IterationDecision::Break;
}
@ -152,9 +152,9 @@ bool SVGElement::should_include_in_accessibility_tree() const
// https://w3c.github.io/svg-aam/#include_elements
// TODO: Add support for the SVG tabindex attribute, and include a check for it here.
return has_title_or_desc
|| (aria_label().has_value() && !aria_label().value().trim_ascii_whitespace().value().is_empty())
|| (aria_labelled_by().has_value() && !aria_labelled_by().value().trim_ascii_whitespace().value().is_empty())
|| (aria_described_by().has_value() && !aria_described_by().value().trim_ascii_whitespace().value().is_empty())
|| (aria_label().has_value() && !aria_label()->bytes_as_string_view().trim_whitespace().is_empty())
|| (aria_labelled_by().has_value() && !aria_labelled_by()->bytes_as_string_view().trim_whitespace().is_empty())
|| (aria_described_by().has_value() && !aria_described_by()->bytes_as_string_view().trim_whitespace().is_empty())
|| (role.has_value() && ARIA::is_abstract_role(role.value()) && role != ARIA::Role::none && role != ARIA::Role::presentation);
}

View file

@ -152,7 +152,7 @@ void SVGScriptElement::process_the_script_element()
} else {
// Inline script content
script_content = child_text_content();
script_content = child_text_content().to_utf8_but_should_be_ported_to_utf16();
if (script_content.is_empty())
return;
}

View file

@ -520,7 +520,7 @@ String element_rendered_text(DOM::Node& node)
// FIXME: The spec does not define how to get the element's rendered text, other than to do exactly as Selenium does.
// This implementation is not sufficient, as we must also at least consider the shadow DOM.
if (!is<HTML::HTMLElement>(node))
return node.text_content().value_or(String {});
return node.text_content().value_or({}).to_utf8_but_should_be_ported_to_utf16();
auto& element = static_cast<HTML::HTMLElement&>(node);
return element.inner_text();