LibJS: Port RegExp flags and patterns to UTF-16

This commit is contained in:
Timothy Flynn 2025-08-06 11:28:18 -04:00 committed by Tim Flynn
commit 62d85dd90a
Notes: github-actions[bot] 2025-08-13 13:57:14 +00:00
8 changed files with 55 additions and 46 deletions

View file

@ -444,8 +444,8 @@ Bytecode::CodeGenerationErrorOr<Optional<ScopedOperand>> StringLiteral::generate
Bytecode::CodeGenerationErrorOr<Optional<ScopedOperand>> RegExpLiteral::generate_bytecode(Bytecode::Generator& generator, Optional<ScopedOperand> preferred_dst) const
{
Bytecode::Generator::SourceLocationScope scope(generator, *this);
auto source_index = generator.intern_string(m_pattern);
auto flags_index = generator.intern_string(m_flags);
auto source_index = generator.intern_string(m_pattern.to_utf8_but_should_be_ported_to_utf16());
auto flags_index = generator.intern_string(m_flags.to_utf8_but_should_be_ported_to_utf16());
auto regex_index = generator.intern_regex(Bytecode::ParsedRegex {
.regex = m_parsed_regex,
.pattern = m_parsed_pattern,

View file

@ -1542,7 +1542,7 @@ inline ThrowCompletionOr<CalleeAndThis> get_callee_and_this_from_environment(Byt
}
// 13.2.7.3 Runtime Semantics: Evaluation, https://tc39.es/ecma262/#sec-regular-expression-literals-runtime-semantics-evaluation
inline Value new_regexp(VM& vm, ParsedRegex const& parsed_regex, String const& pattern, String const& flags)
inline Value new_regexp(VM& vm, ParsedRegex const& parsed_regex, Utf16String pattern, Utf16String flags)
{
// 1. Let pattern be CodePointsToString(BodyText of RegularExpressionLiteral).
// 2. Let flags be CodePointsToString(FlagText of RegularExpressionLiteral).
@ -1551,7 +1551,7 @@ inline Value new_regexp(VM& vm, ParsedRegex const& parsed_regex, String const& p
auto& realm = *vm.current_realm();
Regex<ECMA262> regex(parsed_regex.regex, parsed_regex.pattern.to_byte_string(), parsed_regex.flags);
// NOTE: We bypass RegExpCreate and subsequently RegExpAlloc as an optimization to use the already parsed values.
auto regexp_object = RegExpObject::create(realm, move(regex), pattern, flags);
auto regexp_object = RegExpObject::create(realm, move(regex), move(pattern), move(flags));
// RegExpAlloc has these two steps from the 'Legacy RegExp features' proposal.
regexp_object->set_realm(realm);
// We don't need to check 'If SameValue(newTarget, thisRealm.[[Intrinsics]].[[%RegExp%]]) is true'
@ -2264,8 +2264,8 @@ void NewRegExp::execute_impl(Bytecode::Interpreter& interpreter) const
new_regexp(
interpreter.vm(),
interpreter.current_executable().regex_table->get(m_regex_index),
interpreter.current_executable().get_string(m_source_index),
interpreter.current_executable().get_string(m_flags_index)));
Utf16String::from_utf8(interpreter.current_executable().get_string(m_source_index)),
Utf16String::from_utf8(interpreter.current_executable().get_string(m_flags_index))));
}
#define JS_DEFINE_NEW_BUILTIN_ERROR_OP(ErrorName) \