LibJS+LibWeb+WebContent: Port JS::PropertyKey to UTF-16

This has quite a lot of fall out. But the majority of it is just type or
UDL substitution, where the changes just fall through to other function
calls.

By changing property key storage to UTF-16, the main affected areas are:
* NativeFunction names must now be UTF-16
* Bytecode identifiers must now be UTF-16
* Module/binding names must now be UTF-16
This commit is contained in:
Timothy Flynn 2025-08-02 19:27:29 -04:00 committed by Tim Flynn
commit 0efa98a57a
Notes: github-actions[bot] 2025-08-05 11:08:30 +00:00
131 changed files with 766 additions and 726 deletions

View file

@ -225,7 +225,7 @@ ThrowCompletionOr<Realm*> get_function_realm(VM& vm, FunctionObject const& funct
}
// 8.5.2.1 InitializeBoundName ( name, value, environment ), https://tc39.es/ecma262/#sec-initializeboundname
ThrowCompletionOr<void> initialize_bound_name(VM& vm, FlyString const& name, Value value, Environment* environment)
ThrowCompletionOr<void> initialize_bound_name(VM& vm, Utf16FlyString const& name, Value value, Environment* environment)
{
// 1. If environment is not undefined, then
if (environment) {
@ -252,7 +252,7 @@ ThrowCompletionOr<void> initialize_bound_name(VM& vm, FlyString const& name, Val
bool is_compatible_property_descriptor(bool extensible, PropertyDescriptor const& descriptor, Optional<PropertyDescriptor> const& current)
{
// 1. Return ValidateAndApplyPropertyDescriptor(undefined, "", Extensible, Desc, Current).
return validate_and_apply_property_descriptor(nullptr, FlyString {}, extensible, descriptor, current);
return validate_and_apply_property_descriptor(nullptr, Utf16FlyString {}, extensible, descriptor, current);
}
// 10.1.6.3 ValidateAndApplyPropertyDescriptor ( O, P, extensible, Desc, current ), https://tc39.es/ecma262/#sec-validateandapplypropertydescriptor
@ -701,7 +701,7 @@ ThrowCompletionOr<Value> perform_eval(VM& vm, Value x, CallerMode strict_caller,
if (executable_result.is_error())
return vm.throw_completion<InternalError>(ErrorType::NotImplemented, TRY_OR_THROW_OOM(vm, executable_result.error().to_string()));
auto executable = executable_result.release_value();
executable->name = "eval"_fly_string;
executable->name = "eval"_utf16_fly_string;
if (Bytecode::g_dump_bytecode)
executable->dump();
@ -770,9 +770,11 @@ ThrowCompletionOr<void> eval_declaration_instantiation(VM& vm, Program const& pr
// a. If varEnv is a global Environment Record, then
if (global_var_environment) {
// i. For each element name of varNames, do
TRY(program.for_each_var_declared_identifier([&](auto const& identifier) -> ThrowCompletionOr<void> {
TRY(program.for_each_var_declared_identifier([&](Identifier const& identifier) -> ThrowCompletionOr<void> {
auto name = Utf16FlyString::from_utf8(identifier.string());
// 1. If varEnv.HasLexicalDeclaration(name) is true, throw a SyntaxError exception.
if (global_var_environment->has_lexical_declaration(identifier.string()))
if (global_var_environment->has_lexical_declaration(name))
return vm.throw_completion<SyntaxError>(ErrorType::TopLevelVariableAlreadyDeclared, identifier.string());
// 2. NOTE: eval will not create a global var declaration that would be shadowed by a global lexical declaration.
@ -790,8 +792,9 @@ ThrowCompletionOr<void> eval_declaration_instantiation(VM& vm, Program const& pr
if (!is<ObjectEnvironment>(*this_environment)) {
// 1. NOTE: The environment of with statements cannot contain any lexical declaration so it doesn't need to be checked for var/let hoisting conflicts.
// 2. For each element name of varNames, do
TRY(program.for_each_var_declared_identifier([&](auto const& identifier) -> ThrowCompletionOr<void> {
auto const& name = identifier.string();
TRY(program.for_each_var_declared_identifier([&](Identifier const& identifier) -> ThrowCompletionOr<void> {
auto name = Utf16FlyString::from_utf8(identifier.string());
// a. If ! thisEnv.HasBinding(name) is true, then
if (MUST(this_environment->has_binding(name))) {
// i. Throw a SyntaxError exception.
@ -824,10 +827,12 @@ ThrowCompletionOr<void> eval_declaration_instantiation(VM& vm, Program const& pr
Vector<FunctionDeclaration const&> functions_to_initialize;
// 9. Let declaredFunctionNames be a new empty List.
HashTable<FlyString> declared_function_names;
HashTable<Utf16FlyString> declared_function_names;
// 10. For each element d of varDeclarations, in reverse List order, do
TRY(program.for_each_var_function_declaration_in_reverse_order([&](FunctionDeclaration const& function) -> ThrowCompletionOr<void> {
auto function_name = Utf16FlyString::from_utf8(function.name());
// a. If d is neither a VariableDeclaration nor a ForBinding nor a BindingIdentifier, then
// i. Assert: d is either a FunctionDeclaration, a GeneratorDeclaration, an AsyncFunctionDeclaration, or an AsyncGeneratorDeclaration.
// Note: This is done by for_each_var_function_declaration_in_reverse_order.
@ -835,18 +840,17 @@ ThrowCompletionOr<void> eval_declaration_instantiation(VM& vm, Program const& pr
// ii. NOTE: If there are multiple function declarations for the same name, the last declaration is used.
// iii. Let fn be the sole element of the BoundNames of d.
// iv. If fn is not an element of declaredFunctionNames, then
if (declared_function_names.set(function.name()) != AK::HashSetResult::InsertedNewEntry)
if (declared_function_names.set(function_name) != AK::HashSetResult::InsertedNewEntry)
return {};
// 1. If varEnv is a global Environment Record, then
if (global_var_environment) {
// a. Let fnDefinable be ? varEnv.CanDeclareGlobalFunction(fn).
auto function_definable = TRY(global_var_environment->can_declare_global_function(function.name()));
auto function_definable = TRY(global_var_environment->can_declare_global_function(function_name));
// b. If fnDefinable is false, throw a TypeError exception.
if (!function_definable)
return vm.throw_completion<TypeError>(ErrorType::CannotDeclareGlobalFunction, function.name());
return vm.throw_completion<TypeError>(ErrorType::CannotDeclareGlobalFunction, function_name);
}
// 2. Append fn to declaredFunctionNames.
@ -865,12 +869,12 @@ ThrowCompletionOr<void> eval_declaration_instantiation(VM& vm, Program const& pr
if (!strict) {
// a. Let declaredFunctionOrVarNames be the list-concatenation of declaredFunctionNames and declaredVarNames.
// The spec here uses 'declaredVarNames' but that has not been declared yet.
HashTable<FlyString> hoisted_functions;
HashTable<Utf16FlyString> hoisted_functions;
// b. For each FunctionDeclaration f that is directly contained in the StatementList of a Block, CaseClause, or DefaultClause Contained within body, do
TRY(program.for_each_function_hoistable_with_annexB_extension([&](FunctionDeclaration& function_declaration) -> ThrowCompletionOr<void> {
// i. Let F be StringValue of the BindingIdentifier of f.
auto function_name = function_declaration.name();
auto function_name = Utf16FlyString::from_utf8(function_declaration.name());
// ii. If replacing the FunctionDeclaration f with a VariableStatement that has F as a BindingIdentifier would not produce any Early Errors for body, then
// Note: This is checked during parsing and for_each_function_hoistable_with_annexB_extension so it always passes here.
@ -956,7 +960,7 @@ ThrowCompletionOr<void> eval_declaration_instantiation(VM& vm, Program const& pr
}
// 12. Let declaredVarNames be a new empty List.
HashTable<FlyString> declared_var_names;
HashTable<Utf16FlyString> declared_var_names;
// 13. For each element d of varDeclarations, do
TRY(program.for_each_var_scoped_variable_declaration([&](VariableDeclaration const& declaration) {
@ -964,8 +968,8 @@ ThrowCompletionOr<void> eval_declaration_instantiation(VM& vm, Program const& pr
// Note: This is handled by for_each_var_scoped_variable_declaration.
// i. For each String vn of the BoundNames of d, do
return declaration.for_each_bound_identifier([&](auto const& identifier) -> ThrowCompletionOr<void> {
auto const& name = identifier.string();
return declaration.for_each_bound_identifier([&](Identifier const& identifier) -> ThrowCompletionOr<void> {
auto name = Utf16FlyString::from_utf8(identifier.string());
// 1. If vn is not an element of declaredFunctionNames, then
if (!declared_function_names.contains(name)) {
@ -995,8 +999,8 @@ ThrowCompletionOr<void> eval_declaration_instantiation(VM& vm, Program const& pr
// a. NOTE: Lexically declared names are only instantiated here but not initialized.
// b. For each element dn of the BoundNames of d, do
return declaration.for_each_bound_identifier([&](auto const& identifier) -> ThrowCompletionOr<void> {
auto const& name = identifier.string();
return declaration.for_each_bound_identifier([&](Identifier const& identifier) -> ThrowCompletionOr<void> {
auto name = Utf16FlyString::from_utf8(identifier.string());
// i. If IsConstantDeclaration of d is true, then
if (declaration.is_constant_declaration()) {
@ -1016,12 +1020,14 @@ ThrowCompletionOr<void> eval_declaration_instantiation(VM& vm, Program const& pr
// NOTE: We iterate in reverse order since we appended the functions
// instead of prepending. We append because prepending is much slower
// and we only use the created vector here.
for (auto& declaration : functions_to_initialize.in_reverse()) {
for (auto const& declaration : functions_to_initialize.in_reverse()) {
auto declaration_name = Utf16FlyString::from_utf8(declaration.name());
// a. Let fn be the sole element of the BoundNames of f.
// b. Let fo be InstantiateFunctionObject of f with arguments lexEnv and privateEnv.
auto function = ECMAScriptFunctionObject::create_from_function_node(
declaration,
declaration.name(),
declaration_name,
realm,
lexical_environment,
private_environment);
@ -1029,32 +1035,31 @@ ThrowCompletionOr<void> eval_declaration_instantiation(VM& vm, Program const& pr
// c. If varEnv is a global Environment Record, then
if (global_var_environment) {
// i. Perform ? varEnv.CreateGlobalFunctionBinding(fn, fo, true).
TRY(global_var_environment->create_global_function_binding(declaration.name(), function, true));
TRY(global_var_environment->create_global_function_binding(declaration_name, function, true));
}
// d. Else,
else {
// i. Let bindingExists be ! varEnv.HasBinding(fn).
auto binding_exists = MUST(variable_environment->has_binding(declaration.name()));
auto binding_exists = MUST(variable_environment->has_binding(declaration_name));
// ii. If bindingExists is false, then
if (!binding_exists) {
// 1. NOTE: The following invocation cannot return an abrupt completion because of the validation preceding step 14.
// 2. Perform ! varEnv.CreateMutableBinding(fn, true).
MUST(variable_environment->create_mutable_binding(vm, declaration.name(), true));
MUST(variable_environment->create_mutable_binding(vm, declaration_name, true));
// 3. Perform ! varEnv.InitializeBinding(fn, fo, normal).
MUST(variable_environment->initialize_binding(vm, declaration.name(), function, Environment::InitializeBindingHint::Normal));
MUST(variable_environment->initialize_binding(vm, declaration_name, function, Environment::InitializeBindingHint::Normal));
}
// iii. Else,
else {
// 1. Perform ! varEnv.SetMutableBinding(fn, fo, false).
MUST(variable_environment->set_mutable_binding(vm, declaration.name(), function, false));
MUST(variable_environment->set_mutable_binding(vm, declaration_name, function, false));
}
}
}
// 18. For each String vn of declaredVarNames, do
for (auto& var_name : declared_var_names) {
// a. If varEnv is a global Environment Record, then
if (global_var_environment) {
@ -1165,15 +1170,15 @@ Object* create_mapped_arguments_object(VM& vm, FunctionObject& function, Nonnull
// and getter/setter behavior itself without extra GC allocations.
// 17. Let mappedNames be a new empty List.
HashTable<FlyString> seen_names;
Vector<FlyString> mapped_names;
HashTable<Utf16FlyString> seen_names;
Vector<Utf16FlyString> mapped_names;
// 18. Set index to numberOfParameters - 1.
// 19. Repeat, while index ≥ 0,
VERIFY(formals->size() <= NumericLimits<i32>::max());
for (i32 index = static_cast<i32>(formals->size()) - 1; index >= 0; --index) {
// a. Let name be parameterNames[index].
auto const& name = formals->parameters()[index].binding.get<NonnullRefPtr<Identifier const>>()->string();
auto name = Utf16FlyString::from_utf8(formals->parameters()[index].binding.get<NonnullRefPtr<Identifier const>>()->string());
// b. If name is not an element of mappedNames, then
if (seen_names.contains(name))
@ -1229,22 +1234,23 @@ CanonicalIndex canonical_numeric_index_string(PropertyKey const& property_key, C
// already covered it with the is_number() == true path.
if (argument.is_empty())
return CanonicalIndex(CanonicalIndex::Type::Undefined, 0);
u32 current_index = 0;
auto const* characters = argument.bytes_as_string_view().characters_without_null_termination();
auto const length = argument.bytes_as_string_view().length();
if (characters[current_index] == '-') {
if (argument.code_unit_at(current_index) == '-') {
current_index++;
if (current_index == length)
if (current_index == argument.length_in_code_units())
return CanonicalIndex(CanonicalIndex::Type::Undefined, 0);
}
if (characters[current_index] == '0') {
if (argument.code_unit_at(current_index) == '0') {
current_index++;
if (current_index == length)
if (current_index == argument.length_in_code_units())
return CanonicalIndex(CanonicalIndex::Type::Numeric, 0);
if (characters[current_index] != '.')
if (argument.code_unit_at(current_index) != '.')
return CanonicalIndex(CanonicalIndex::Type::Undefined, 0);
current_index++;
if (current_index == length)
if (current_index == argument.length_in_code_units())
return CanonicalIndex(CanonicalIndex::Type::Undefined, 0);
}
@ -1253,11 +1259,11 @@ CanonicalIndex canonical_numeric_index_string(PropertyKey const& property_key, C
return CanonicalIndex(CanonicalIndex::Type::Numeric, 0);
// Short circuit any string that doesn't start with digits
if (char first_non_zero = characters[current_index]; first_non_zero < '0' || first_non_zero > '9')
if (auto first_non_zero = argument.code_unit_at(current_index); first_non_zero < '0' || first_non_zero > '9')
return CanonicalIndex(CanonicalIndex::Type::Undefined, 0);
// 2. Let n be ! ToNumber(argument).
auto maybe_double = argument.bytes_as_string_view().to_number<double>(AK::TrimWhitespace::No);
auto maybe_double = argument.to_number<double>(AK::TrimWhitespace::No);
if (!maybe_double.has_value())
return CanonicalIndex(CanonicalIndex::Type::Undefined, 0);
@ -1417,7 +1423,7 @@ ThrowCompletionOr<String> get_substitution(VM& vm, Utf16View const& matched, Utf
// 2. Let groupName be the substring of templateRemainder from 2 to gtPos.
auto group_name_view = template_remainder.substring_view(2, *greater_than_position - 2);
auto group_name = MUST(group_name_view.to_utf8());
auto group_name = Utf16String::from_utf16(group_name_view);
// 3. Assert: namedCaptures is an Object.
VERIFY(named_captures.is_object());