LibWeb/HTML: Allow more characters in custom element names

Corresponds to 78d2678789

And import a related test.
This commit is contained in:
Sam Atkins 2025-07-08 14:38:42 +01:00 committed by Tim Ledbetter
parent af17f38bbf
commit b05fe0127b
Notes: github-actions[bot] 2025-07-08 16:10:51 +00:00
4 changed files with 190 additions and 50 deletions

View file

@ -1,42 +1,53 @@
/*
* Copyright (c) 2023, Srikavin Ramkumar <me@srikavin.me>
* Copyright (c) 2025, Sam Atkins <sam@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/StringView.h>
#include <AK/Utf8View.h>
#include <LibWeb/DOM/Element.h>
#include <LibWeb/HTML/CustomElements/CustomElementName.h>
namespace Web::HTML {
// https://html.spec.whatwg.org/multipage/custom-elements.html#custom-elements-core-concepts:prod-pcenchar
static bool is_pcen_char(u32 code_point)
{
return code_point == '-'
|| code_point == '.'
|| (code_point >= '0' && code_point <= '9')
|| code_point == '_'
|| (code_point >= 'a' && code_point <= 'z')
|| code_point == 0xb7
|| (code_point >= 0xc0 && code_point <= 0xd6)
|| (code_point >= 0xd8 && code_point <= 0xf6)
|| (code_point >= 0xf8 && code_point <= 0x37d)
|| (code_point >= 0x37f && code_point <= 0x1fff)
|| (code_point >= 0x200c && code_point <= 0x200d)
|| (code_point >= 0x203f && code_point <= 0x2040)
|| (code_point >= 0x2070 && code_point <= 0x218f)
|| (code_point >= 0x2c00 && code_point <= 0x2fef)
|| (code_point >= 0x3001 && code_point <= 0xD7ff)
|| (code_point >= 0xf900 && code_point <= 0xfdcf)
|| (code_point >= 0xfdf0 && code_point <= 0xfffd)
|| (code_point >= 0x10000 && code_point <= 0xeffff);
}
// https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
bool is_valid_custom_element_name(StringView name)
bool is_valid_custom_element_name(String const& name)
{
// name must not be any of the following:
// A string name is a valid custom element name if all of the following are true:
// - name is a valid element local name;
if (!DOM::is_valid_element_local_name(name))
return false;
// - name's 0th code point is an ASCII lower alpha;
auto code_points = Utf8View { name };
if (auto first = code_points.begin(); first.done() || !is_ascii_lower_alpha(*first))
return false;
// - name does not contain any ASCII upper alphas;
// - name contains a U+002D (-); and
bool contains_ascii_upper_alpha = false;
bool contains_hyphen = false;
for (auto code_point : code_points) {
if (is_ascii_upper_alpha(code_point)) {
contains_ascii_upper_alpha = true;
break;
}
if (code_point == '-')
contains_hyphen = true;
}
if (contains_ascii_upper_alpha || !contains_hyphen)
return false;
// - name is not one of the following:
// - "annotation-xml"
// - "color-profile"
// - "font-face"
// - "font-face-src"
// - "font-face-uri"
// - "font-face-format"
// - "font-face-name"
// - "missing-glyph"
if (name.is_one_of(
"annotation-xml"sv,
"color-profile"sv,
@ -49,28 +60,7 @@ bool is_valid_custom_element_name(StringView name)
return false;
}
// name must match the PotentialCustomElementName production:
// PotentialCustomElementName ::=
// [a-z] (PCENChar)* '-' (PCENChar)*
auto code_points = Utf8View { name };
auto it = code_points.begin();
if (code_points.is_empty() || *it < 'a' || *it > 'z')
return false;
++it;
bool found_hyphen = false;
for (; it != code_points.end(); ++it) {
if (!is_pcen_char(*it))
return false;
if (*it == '-')
found_hyphen = true;
}
return found_hyphen;
return true;
}
}