LibJS+LibWeb: Port interned bytecode strings to UTF-16

This was almost a no-op, except we intern JS exception messages. So the
bulk of this patch is porting exception messages to UTF-16.
This commit is contained in:
Timothy Flynn 2025-08-07 19:31:52 -04:00 committed by Jelle Raaijmakers
commit 70db474cf0
Notes: github-actions[bot] 2025-08-14 08:28:16 +00:00
162 changed files with 1405 additions and 1422 deletions

View file

@ -444,8 +444,8 @@ Bytecode::CodeGenerationErrorOr<Optional<ScopedOperand>> StringLiteral::generate
Bytecode::CodeGenerationErrorOr<Optional<ScopedOperand>> RegExpLiteral::generate_bytecode(Bytecode::Generator& generator, Optional<ScopedOperand> preferred_dst) const
{
Bytecode::Generator::SourceLocationScope scope(generator, *this);
auto source_index = generator.intern_string(m_pattern.to_utf8_but_should_be_ported_to_utf16());
auto flags_index = generator.intern_string(m_flags.to_utf8_but_should_be_ported_to_utf16());
auto source_index = generator.intern_string(m_pattern);
auto flags_index = generator.intern_string(m_flags);
auto regex_index = generator.intern_regex(Bytecode::ParsedRegex {
.regex = m_parsed_regex,
.pattern = m_parsed_pattern,
@ -1770,7 +1770,7 @@ Bytecode::CodeGenerationErrorOr<Optional<ScopedOperand>> CallExpression::generat
Optional<Bytecode::StringTableIndex> expression_string_index;
if (auto expression_string = this->expression_string(); expression_string.has_value())
expression_string_index = generator.intern_string(expression_string->to_utf8_but_should_be_ported_to_utf16());
expression_string_index = generator.intern_string(expression_string.release_value());
bool has_spread = any_of(arguments(), [](auto& argument) { return argument.is_spread; });
auto dst = choose_dst(generator, preferred_dst);

View file

@ -98,7 +98,7 @@ public:
Optional<IdentifierTableIndex> length_identifier;
String const& get_string(StringTableIndex index) const { return string_table->get(index); }
Utf16String const& get_string(StringTableIndex index) const { return string_table->get(index); }
Utf16FlyString const& get_identifier(IdentifierTableIndex index) const { return identifier_table->get(index); }
Optional<Utf16FlyString const&> get_identifier(Optional<IdentifierTableIndex> const& index) const

View file

@ -202,7 +202,7 @@ public:
return m_current_basic_block->is_terminated();
}
StringTableIndex intern_string(String string)
StringTableIndex intern_string(Utf16String string)
{
return m_string_table->insert(move(string));
}

View file

@ -2264,8 +2264,8 @@ void NewRegExp::execute_impl(Bytecode::Interpreter& interpreter) const
new_regexp(
interpreter.vm(),
interpreter.current_executable().regex_table->get(m_regex_index),
Utf16String::from_utf8(interpreter.current_executable().get_string(m_source_index)),
Utf16String::from_utf8(interpreter.current_executable().get_string(m_flags_index))));
interpreter.current_executable().get_string(m_source_index),
interpreter.current_executable().get_string(m_flags_index)));
}
#define JS_DEFINE_NEW_BUILTIN_ERROR_OP(ErrorName) \

View file

@ -8,13 +8,13 @@
namespace JS::Bytecode {
StringTableIndex StringTable::insert(String string)
StringTableIndex StringTable::insert(Utf16String string)
{
m_strings.append(move(string));
return { static_cast<u32>(m_strings.size() - 1) };
}
String const& StringTable::get(StringTableIndex index) const
Utf16String const& StringTable::get(StringTableIndex index) const
{
return m_strings[index.value];
}

View file

@ -7,7 +7,7 @@
#pragma once
#include <AK/DistinctNumeric.h>
#include <AK/String.h>
#include <AK/Utf16String.h>
#include <AK/Vector.h>
namespace JS::Bytecode {
@ -25,13 +25,13 @@ class StringTable {
public:
StringTable() = default;
StringTableIndex insert(String);
String const& get(StringTableIndex) const;
StringTableIndex insert(Utf16String);
Utf16String const& get(StringTableIndex) const;
void dump() const;
bool is_empty() const { return m_strings.is_empty(); }
private:
Vector<String> m_strings;
Vector<Utf16String> m_strings;
};
}