mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-10-16 21:20:18 +00:00
LibJS: Port the JS lexer and parser to UTF-16
This ports the lexer to UTF-16 and deals with the immediate fallout up to the AST. The AST will be dealt with in upcoming commits. The lexer will still accept UTF-8 strings as input, and will transcode them to UTF-16 for lexing. This doesn't actually incur a new allocation, as we were already converting the input StringView to a ByteString for each lexer. One immediate logical benefit here is that we do not need to know off- hand how many UTF-8 bytes some special code points occupy. They all happen to be a single UTF-16 code unit. So instead of advancing the lexer by 3 positions in some cases, we can just always advance by 1.
This commit is contained in:
parent
eb74781a2d
commit
00182a2405
Notes:
github-actions[bot]
2025-08-13 13:57:27 +00:00
Author: https://github.com/trflynn89
Commit: 00182a2405
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5762
14 changed files with 467 additions and 474 deletions
|
@ -67,7 +67,7 @@ void SyntaxHighlighter::rehighlight(Palette const& palette)
|
|||
position.set_column(position.column() + 1);
|
||||
};
|
||||
|
||||
auto append_token = [&](Utf8View str, Token const& token, bool is_trivia) {
|
||||
auto append_token = [&](Utf16View const& str, Token const& token, bool is_trivia) {
|
||||
if (str.is_empty())
|
||||
return;
|
||||
|
||||
|
@ -100,10 +100,10 @@ void SyntaxHighlighter::rehighlight(Palette const& palette)
|
|||
|
||||
bool was_eof = false;
|
||||
for (auto token = lexer.next(); !was_eof; token = lexer.next()) {
|
||||
append_token(Utf8View(token.trivia()), token, true);
|
||||
append_token(token.trivia(), token, true);
|
||||
|
||||
auto token_start_position = position;
|
||||
append_token(Utf8View(token.value()), token, false);
|
||||
append_token(token.value(), token, false);
|
||||
|
||||
if (token.type() == TokenType::Eof)
|
||||
was_eof = true;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue