LibWeb/HTML: Allow more characters in custom element names

Corresponds to 78d2678789

And import a related test.
This commit is contained in:
Sam Atkins 2025-07-08 14:38:42 +01:00 committed by Tim Ledbetter
commit b05fe0127b
Notes: github-actions[bot] 2025-07-08 16:10:51 +00:00
4 changed files with 190 additions and 50 deletions

View file

@ -1,42 +1,53 @@
/*
* Copyright (c) 2023, Srikavin Ramkumar <me@srikavin.me>
* Copyright (c) 2025, Sam Atkins <sam@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/StringView.h>
#include <AK/Utf8View.h>
#include <LibWeb/DOM/Element.h>
#include <LibWeb/HTML/CustomElements/CustomElementName.h>
namespace Web::HTML {
// https://html.spec.whatwg.org/multipage/custom-elements.html#custom-elements-core-concepts:prod-pcenchar
static bool is_pcen_char(u32 code_point)
{
return code_point == '-'
|| code_point == '.'
|| (code_point >= '0' && code_point <= '9')
|| code_point == '_'
|| (code_point >= 'a' && code_point <= 'z')
|| code_point == 0xb7
|| (code_point >= 0xc0 && code_point <= 0xd6)
|| (code_point >= 0xd8 && code_point <= 0xf6)
|| (code_point >= 0xf8 && code_point <= 0x37d)
|| (code_point >= 0x37f && code_point <= 0x1fff)
|| (code_point >= 0x200c && code_point <= 0x200d)
|| (code_point >= 0x203f && code_point <= 0x2040)
|| (code_point >= 0x2070 && code_point <= 0x218f)
|| (code_point >= 0x2c00 && code_point <= 0x2fef)
|| (code_point >= 0x3001 && code_point <= 0xD7ff)
|| (code_point >= 0xf900 && code_point <= 0xfdcf)
|| (code_point >= 0xfdf0 && code_point <= 0xfffd)
|| (code_point >= 0x10000 && code_point <= 0xeffff);
}
// https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
bool is_valid_custom_element_name(StringView name)
bool is_valid_custom_element_name(String const& name)
{
// name must not be any of the following:
// A string name is a valid custom element name if all of the following are true:
// - name is a valid element local name;
if (!DOM::is_valid_element_local_name(name))
return false;
// - name's 0th code point is an ASCII lower alpha;
auto code_points = Utf8View { name };
if (auto first = code_points.begin(); first.done() || !is_ascii_lower_alpha(*first))
return false;
// - name does not contain any ASCII upper alphas;
// - name contains a U+002D (-); and
bool contains_ascii_upper_alpha = false;
bool contains_hyphen = false;
for (auto code_point : code_points) {
if (is_ascii_upper_alpha(code_point)) {
contains_ascii_upper_alpha = true;
break;
}
if (code_point == '-')
contains_hyphen = true;
}
if (contains_ascii_upper_alpha || !contains_hyphen)
return false;
// - name is not one of the following:
// - "annotation-xml"
// - "color-profile"
// - "font-face"
// - "font-face-src"
// - "font-face-uri"
// - "font-face-format"
// - "font-face-name"
// - "missing-glyph"
if (name.is_one_of(
"annotation-xml"sv,
"color-profile"sv,
@ -49,28 +60,7 @@ bool is_valid_custom_element_name(StringView name)
return false;
}
// name must match the PotentialCustomElementName production:
// PotentialCustomElementName ::=
// [a-z] (PCENChar)* '-' (PCENChar)*
auto code_points = Utf8View { name };
auto it = code_points.begin();
if (code_points.is_empty() || *it < 'a' || *it > 'z')
return false;
++it;
bool found_hyphen = false;
for (; it != code_points.end(); ++it) {
if (!is_pcen_char(*it))
return false;
if (*it == '-')
found_hyphen = true;
}
return found_hyphen;
return true;
}
}

View file

@ -1,15 +1,21 @@
/*
* Copyright (c) 2023, Srikavin Ramkumar <me@srikavin.me>
* Copyright (c) 2025, Sam Atkins <sam@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/StringView.h>
#include <AK/String.h>
namespace Web::HTML {
bool is_valid_custom_element_name(StringView name);
bool is_valid_custom_element_name(String const& name);
inline bool is_valid_custom_element_name(FlyString const& name)
{
return is_valid_custom_element_name(name.to_string());
}
}

View file

@ -0,0 +1,6 @@
Harness status: OK
Found 1 tests
1 Pass
Pass valid-custom-element-names

View file

@ -0,0 +1,138 @@
<!DOCTYPE html>
<link rel=author href="mailto:jarhar@chromium.org">
<link rel=help href="https://github.com/whatwg/html/pull/7991">
<script src="../../resources/testharness.js"></script>
<script src="../../resources/testharnessreport.js"></script>
<script>
function isAsciiLowerAlpha(codePoint) {
return codePoint >= 0x61 && codePoint <= 0x7A;
}
function isAsciiUpperAlpha(codePoint) {
return codePoint >= 0x41 && codePoint <= 0x5A;
}
function isAsciiAlpha(codePoint) {
return isAsciiLowerAlpha(codePoint) || isAsciiUpperAlpha(codePoint);
}
function isAsciiDigit(codePoint) {
return codePoint >= 0x30 && codePoint <= 0x39;
}
function isAsciiWhitespace(codePoint) {
return codePoint == 0x9 || codePoint == 0xA || codePoint == 0xC || codePoint == 0xD || codePoint == 0x20;
}
function debugString(str) {
const codePoints = [];
for (const c of str) {
codePoints.push(c.codePointAt(0));
}
return `code points: ${JSON.stringify(codePoints)}, string: "${str}"`;
}
const validCustomElementNames = [
'annotation-xml-custom',
];
const invalidCustomElementNames = [
'',
'annotation-xml',
'color-profile',
'font-face',
'font-face-src',
'font-face-uri',
'font-face-format',
'font-face-name',
'missing-glyph',
];
const testCodePoints = [0x1F171, 0x1F196, 0x10000];
for (let i = 0; i < 0x80; i++) {
testCodePoints.push(i);
}
const elementLocalNameRegex = /^(?:[A-Za-z][^\0\t\n\f\r\u0020/>]*|[:_\u0080-\u{10FFFF}][A-Za-z0-9-.:_\u0080-\u{10FFFF}]*)$/u;
function isValidCustomElementName(str) {
if (!str.length) {
return false;
}
if (!str.includes('-')) {
return false;
}
let first = true;
for (const c of str) {
const codePoint = c.codePointAt(0);
if (first) {
if (!isAsciiLowerAlpha(codePoint)) {
return false;
}
first = false;
}
if (isAsciiUpperAlpha(codePoint)) {
return false;
}
}
return elementLocalNameRegex.test(str);
}
// In order to test the branching logic of valid element local names and the
// requirement of having a '-' character, this method generates different
// variations of potential custom element names given two code points.
function createStringWithSeparatorMode(codePoint, prefix, separatorMode) {
const str = String.fromCodePoint(codePoint);
if (separatorMode == 0) {
return `${prefix}${str}`;
} else if (separatorMode == 1) {
return `${prefix}-${str}`;
} else if (separatorMode == 2) {
return `${prefix}${str}-element`;
}
}
for (const prefix of ['', 'a', 'A', ' ', '\0']) {
for (const codePoint of testCodePoints) {
for (const separatorMode of [0, 1, 2]) {
const str = createStringWithSeparatorMode(
codePoint, prefix, separatorMode);
if (isValidCustomElementName(str)) {
validCustomElementNames.push(str);
} else {
invalidCustomElementNames.push(str);
}
}
}
}
let nextClassNumber = 1;
function createElementClass() {
const name = `CustomElement${nextClassNumber++}`;
const newClass = function() {};
newClass.prototype = HTMLElement;
return newClass;
}
promise_test(async t => {
for (const validName of validCustomElementNames) {
try {
const newClass = createElementClass();
customElements.define(validName, newClass);
await customElements.whenDefined(validName);
} catch (error) {
assert_unreached(`Custom element name should have been valid but threw error: ${debugString(validName)} ${error.toString()}`);
}
}
for (const invalidName of invalidCustomElementNames) {
const newClass = createElementClass();
assert_throws_dom(
'SyntaxError',
() => customElements.define(invalidName, newClass),
`customElements.define should have thrown for invalid name: ${debugString(invalidName)}`);
await promise_rejects_dom(t, 'SyntaxError',
customElements.whenDefined(invalidName),
`customElements.whenDefined should have thrown for invalid name: ${debugString(invalidName)}`);
}
});
</script>