mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-10-23 08:30:50 +00:00
LibJS: Port the JS lexer and parser to UTF-16
This ports the lexer to UTF-16 and deals with the immediate fallout up to the AST. The AST will be dealt with in upcoming commits. The lexer will still accept UTF-8 strings as input, and will transcode them to UTF-16 for lexing. This doesn't actually incur a new allocation, as we were already converting the input StringView to a ByteString for each lexer. One immediate logical benefit here is that we do not need to know off- hand how many UTF-8 bytes some special code points occupy. They all happen to be a single UTF-16 code unit. So instead of advancing the lexer by 3 positions in some cases, we can just always advance by 1.
This commit is contained in:
parent
eb74781a2d
commit
00182a2405
Notes:
github-actions[bot]
2025-08-13 13:57:27 +00:00
Author: https://github.com/trflynn89
Commit: 00182a2405
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5762
14 changed files with 467 additions and 474 deletions
|
@ -199,8 +199,10 @@ static ErrorOr<bool> parse_and_run(JS::Realm& realm, StringView source, StringVi
|
|||
if (!s_as_module) {
|
||||
auto script_or_error = JS::Script::parse(source, realm, source_name);
|
||||
if (script_or_error.is_error()) {
|
||||
auto utf16_source = Utf16String::from_utf8(source);
|
||||
|
||||
auto error = script_or_error.error()[0];
|
||||
auto hint = error.source_location_hint(source);
|
||||
auto hint = error.source_location_hint(utf16_source);
|
||||
if (!hint.is_empty())
|
||||
outln("{}", hint);
|
||||
|
||||
|
@ -213,8 +215,10 @@ static ErrorOr<bool> parse_and_run(JS::Realm& realm, StringView source, StringVi
|
|||
} else {
|
||||
auto module_or_error = JS::SourceTextModule::parse(source, realm, source_name);
|
||||
if (module_or_error.is_error()) {
|
||||
auto utf16_source = Utf16String::from_utf8(source);
|
||||
|
||||
auto error = module_or_error.error()[0];
|
||||
auto hint = error.source_location_hint(source);
|
||||
auto hint = error.source_location_hint(utf16_source);
|
||||
if (!hint.is_empty())
|
||||
outln("{}", hint);
|
||||
|
||||
|
@ -619,7 +623,7 @@ static ErrorOr<int> run_repl(bool gc_on_every_allocation, bool syntax_highlight)
|
|||
JS::Lexer lexer(line);
|
||||
bool indenters_starting_line = true;
|
||||
for (JS::Token token = lexer.next(); token.type() != JS::TokenType::Eof; token = lexer.next()) {
|
||||
auto length = Utf8View { token.value() }.length();
|
||||
auto length = token.value().length_in_code_units();
|
||||
auto start = token.offset();
|
||||
auto end = start + length;
|
||||
if (indenters_starting_line) {
|
||||
|
@ -680,8 +684,8 @@ static ErrorOr<int> run_repl(bool gc_on_every_allocation, bool syntax_highlight)
|
|||
CompleteProperty,
|
||||
} mode { Initial };
|
||||
|
||||
FlyString variable_name;
|
||||
FlyString property_name;
|
||||
Utf16FlyString variable_name;
|
||||
Utf16FlyString property_name;
|
||||
|
||||
// we're only going to complete either
|
||||
// - <N>
|
||||
|
@ -727,11 +731,11 @@ static ErrorOr<int> run_repl(bool gc_on_every_allocation, bool syntax_highlight)
|
|||
}
|
||||
}
|
||||
|
||||
bool last_token_has_trivia = js_token.trivia().length() > 0;
|
||||
bool last_token_has_trivia = !js_token.trivia().is_empty();
|
||||
|
||||
if (mode == CompleteNullProperty) {
|
||||
mode = CompleteProperty;
|
||||
property_name = ""_fly_string;
|
||||
property_name = Utf16FlyString {};
|
||||
last_token_has_trivia = false; // <name> <dot> [tab] is sensible to complete.
|
||||
}
|
||||
|
||||
|
@ -740,17 +744,18 @@ static ErrorOr<int> run_repl(bool gc_on_every_allocation, bool syntax_highlight)
|
|||
|
||||
Vector<Line::CompletionSuggestion> results;
|
||||
|
||||
Function<void(JS::Shape const&, StringView)> list_all_properties = [&results, &list_all_properties](JS::Shape const& shape, auto property_pattern) {
|
||||
Function<void(JS::Shape const&, Utf16FlyString const&)> list_all_properties = [&results, &list_all_properties](JS::Shape const& shape, Utf16FlyString const& property_pattern) {
|
||||
for (auto const& descriptor : shape.property_table()) {
|
||||
if (!descriptor.key.is_string())
|
||||
continue;
|
||||
|
||||
auto key = descriptor.key.as_string().to_utf16_string().to_utf8_but_should_be_ported_to_utf16();
|
||||
if (key.bytes_as_string_view().starts_with(property_pattern)) {
|
||||
Line::CompletionSuggestion completion { key, Line::CompletionSuggestion::ForSearch };
|
||||
auto key = descriptor.key.as_string().to_utf16_string();
|
||||
|
||||
if (key.starts_with(property_pattern.view())) {
|
||||
Line::CompletionSuggestion completion { key.to_utf8_but_should_be_ported_to_utf16(), Line::CompletionSuggestion::ForSearch };
|
||||
if (!results.contains_slow(completion)) { // hide duplicates
|
||||
results.append(key.to_byte_string());
|
||||
results.last().invariant_offset = property_pattern.length();
|
||||
results.last().invariant_offset = property_pattern.length_in_code_units();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -759,11 +764,9 @@ static ErrorOr<int> run_repl(bool gc_on_every_allocation, bool syntax_highlight)
|
|||
}
|
||||
};
|
||||
|
||||
auto variable_name_utf16 = Utf16FlyString::from_utf8(variable_name);
|
||||
|
||||
switch (mode) {
|
||||
case CompleteProperty: {
|
||||
auto reference_or_error = g_vm->resolve_binding(variable_name_utf16, &global_environment);
|
||||
auto reference_or_error = g_vm->resolve_binding(variable_name, &global_environment);
|
||||
if (reference_or_error.is_error())
|
||||
return {};
|
||||
auto value_or_error = reference_or_error.value().get_value(*g_vm);
|
||||
|
@ -785,9 +788,9 @@ static ErrorOr<int> run_repl(bool gc_on_every_allocation, bool syntax_highlight)
|
|||
list_all_properties(variable.shape(), variable_name);
|
||||
|
||||
for (auto const& name : global_environment.declarative_record().bindings()) {
|
||||
if (name.view().starts_with(variable_name_utf16.view())) {
|
||||
results.empend(name.view().to_utf8_but_should_be_ported_to_utf16().to_byte_string());
|
||||
results.last().invariant_offset = variable_name.bytes().size();
|
||||
if (name.view().starts_with(variable_name.view())) {
|
||||
results.empend(MUST(name.view().to_byte_string()));
|
||||
results.last().invariant_offset = variable_name.length_in_code_units();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue