LibWeb: Handle empty string in lang attribute

If the attribute value is the empty string `(lang="")`, the language
is set to unknown. `lang` attribute higher up in the document tree
will no longer be applied to the content of that element.
This commit is contained in:
Piotr 2024-10-31 11:36:41 +01:00 committed by Sam Atkins
parent ff39f6cec5
commit e2613090ed
Notes: github-actions[bot] 2024-11-04 12:07:26 +00:00
5 changed files with 90 additions and 30 deletions

View file

@ -0,0 +1 @@
OK

View file

@ -0,0 +1 @@
OK

View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<style type='text/css'>
.test div { width: 50px; }
#box:lang(ko) { width: 100px; }
</style>
</head>
<body>
<div class="test" lang="ko">
<div id="box" lang="">Test</div>
</div>
</body>
<script src="../include.js"></script>
<script>
asyncTest((done) => {
if (document.getElementById('box').offsetWidth == 50) {
println("OK");
} else {
println("FAIL. If an element contains a lang attribute with an empty value, the value of a lang attribute higher up the document tree will no longer be applied to the content of that element.");
}
done();
});
</script>
</html>

View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html lang="ko">
<head>
<style type='text/css'>
.test div { width: 50px; }
#box:lang(ko) { width: 100px; }
</style>
</head>
<body>
<div class="test">
<div id="box" lang="">Test</div>
</div>
</body>
<script src="../include.js"></script>
<script>
asyncTest((done) => {
if (document.getElementById('box').offsetWidth == 50) {
println("OK");
} else {
println("FAIL. If the meta Content-Language element contains a language declaration but the html element uses an empty lang value, the UA will not recognize the language declared in the meta Content-Language element.");
}
done();
});
</script>
</html>

View file

@ -2905,40 +2905,48 @@ void Element::inherit_counters()
// https://html.spec.whatwg.org/multipage/dom.html#the-lang-and-xml:lang-attributes
Optional<String> Element::lang() const
{
// 1. If the node is an element that has a lang attribute in the XML namespace set
// Use the value of that attribute.
auto maybe_xml_lang = get_attribute_ns(Namespace::XML, HTML::AttributeNames::lang);
if (maybe_xml_lang.has_value())
return maybe_xml_lang.release_value();
auto attempt_to_determine_lang_attribute = [&]() -> Optional<String> {
// 1. If the node is an element that has a lang attribute in the XML namespace set
// Use the value of that attribute.
auto maybe_xml_lang = get_attribute_ns(Namespace::XML, HTML::AttributeNames::lang);
if (maybe_xml_lang.has_value())
return maybe_xml_lang.release_value();
// 2. If the node is an HTML element or an element in the SVG namespace, and it has a lang in no namespace attribute set
// Use the value of that attribute.
if (is_html_element() || namespace_uri() == Namespace::SVG) {
auto maybe_lang = get_attribute(HTML::AttributeNames::lang);
if (maybe_lang.has_value())
return maybe_lang.release_value();
}
// 2. If the node is an HTML element or an element in the SVG namespace, and it has a lang in no namespace attribute set
// Use the value of that attribute.
if (is_html_element() || namespace_uri() == Namespace::SVG) {
auto maybe_lang = get_attribute(HTML::AttributeNames::lang);
if (maybe_lang.has_value())
return maybe_lang.release_value();
}
// 3. If the node's parent is a shadow root
// Use the language of that shadow root's host.
if (auto const* parent = parent_element()) {
if (parent->is_shadow_root())
return parent->shadow_root()->host()->lang();
}
// 3. If the node's parent is a shadow root
// Use the language of that shadow root's host.
if (auto const* parent = parent_element()) {
if (parent->is_shadow_root())
return parent->shadow_root()->host()->lang();
}
// 4. If the node's parent element is not null
// Use the language of that parent element.
if (auto const* parent = parent_element())
return parent->lang();
// 4. If the node's parent element is not null
// Use the language of that parent element.
if (auto const* parent = parent_element())
return parent->lang();
// 5. Otherwise
// - If there is a pragma-set default language set, then that is the language of the node.
// - If there is no pragma-set default language set, then language information from a higher-level protocol (such as HTTP),
// if any, must be used as the final fallback language instead.
// - In the absence of any such language information, and in cases where the higher-level protocol reports multiple languages,
// the language of the node is unknown, and the corresponding language tag is the empty string.
// Default locale sounds like a reasonable fallback here.
return {};
// 5. Otherwise
// - If there is a pragma-set default language set, then that is the language of the node.
// - If there is no pragma-set default language set, then language information from a higher-level protocol (such as HTTP),
// if any, must be used as the final fallback language instead.
// - In the absence of any such language information, and in cases where the higher-level protocol reports multiple languages,
// the language of the node is unknown, and the corresponding language tag is the empty string.
// Default locale sounds like a reasonable fallback here.
return {};
};
// If the resulting value is the empty string, then it must be interpreted as meaning that the language of the node is explicitly unknown.
auto maybe_lang = attempt_to_determine_lang_attribute();
if (!maybe_lang.has_value() || maybe_lang->is_empty())
return {};
return maybe_lang.release_value();
}
}