LibJS: Make JS lexer handle 2 and 3 character tokens faster

By checking the lengths and then looking directly at the bytes, the
generated code becomes a lot nicer.

This gives a 1.23x speedup when parsing the JS from x.com
This commit is contained in:
Andreas Kling 2024-09-02 10:46:49 +02:00 committed by Andreas Kling
commit 93a4d7395f
Notes: github-actions[bot] 2024-09-02 13:23:58 +00:00

View file

@ -20,76 +20,173 @@ HashMap<DeprecatedFlyString, TokenType> Lexer::s_keywords;
static constexpr TokenType parse_two_char_token(StringView view)
{
if (view == "=>"sv)
return TokenType::Arrow;
if (view == "+="sv)
return TokenType::PlusEquals;
if (view == "-="sv)
return TokenType::MinusEquals;
if (view == "*="sv)
return TokenType::AsteriskEquals;
if (view == "/="sv)
return TokenType::SlashEquals;
if (view == "%="sv)
return TokenType::PercentEquals;
if (view == "&="sv)
return TokenType::AmpersandEquals;
if (view == "|="sv)
return TokenType::PipeEquals;
if (view == "^="sv)
return TokenType::CaretEquals;
if (view == "&&"sv)
return TokenType::DoubleAmpersand;
if (view == "||"sv)
return TokenType::DoublePipe;
if (view == "??"sv)
return TokenType::DoubleQuestionMark;
if (view == "**"sv)
return TokenType::DoubleAsterisk;
if (view == "=="sv)
return TokenType::EqualsEquals;
if (view == "<="sv)
return TokenType::LessThanEquals;
if (view == ">="sv)
return TokenType::GreaterThanEquals;
if (view == "!="sv)
return TokenType::ExclamationMarkEquals;
if (view == "--"sv)
return TokenType::MinusMinus;
if (view == "++"sv)
return TokenType::PlusPlus;
if (view == "<<"sv)
return TokenType::ShiftLeft;
if (view == ">>"sv)
return TokenType::ShiftRight;
if (view == "?."sv)
return TokenType::QuestionMarkPeriod;
return TokenType::Invalid;
if (view.length() != 2)
return TokenType::Invalid;
auto const* bytes = view.bytes().data();
switch (bytes[0]) {
case '=':
switch (bytes[1]) {
case '>':
return TokenType::Arrow;
case '=':
return TokenType::EqualsEquals;
default:
return TokenType::Invalid;
}
case '+':
switch (bytes[1]) {
case '=':
return TokenType::PlusEquals;
case '+':
return TokenType::PlusPlus;
default:
return TokenType::Invalid;
}
case '-':
switch (bytes[1]) {
case '=':
return TokenType::MinusEquals;
case '-':
return TokenType::MinusMinus;
default:
return TokenType::Invalid;
}
case '*':
switch (bytes[1]) {
case '=':
return TokenType::AsteriskEquals;
case '*':
return TokenType::DoubleAsterisk;
default:
return TokenType::Invalid;
}
case '/':
switch (bytes[1]) {
case '=':
return TokenType::SlashEquals;
default:
return TokenType::Invalid;
}
case '%':
switch (bytes[1]) {
case '=':
return TokenType::PercentEquals;
default:
return TokenType::Invalid;
}
case '&':
switch (bytes[1]) {
case '=':
return TokenType::AmpersandEquals;
case '&':
return TokenType::DoubleAmpersand;
default:
return TokenType::Invalid;
}
case '|':
switch (bytes[1]) {
case '=':
return TokenType::PipeEquals;
case '|':
return TokenType::DoublePipe;
default:
return TokenType::Invalid;
}
case '^':
switch (bytes[1]) {
case '=':
return TokenType::CaretEquals;
default:
return TokenType::Invalid;
}
case '<':
switch (bytes[1]) {
case '=':
return TokenType::LessThanEquals;
case '<':
return TokenType::ShiftLeft;
default:
return TokenType::Invalid;
}
case '>':
switch (bytes[1]) {
case '=':
return TokenType::GreaterThanEquals;
case '>':
return TokenType::ShiftRight;
default:
return TokenType::Invalid;
}
case '?':
switch (bytes[1]) {
case '?':
return TokenType::DoubleQuestionMark;
case '.':
return TokenType::QuestionMarkPeriod;
default:
return TokenType::Invalid;
}
case '!':
switch (bytes[1]) {
case '=':
return TokenType::ExclamationMarkEquals;
default:
return TokenType::Invalid;
}
default:
return TokenType::Invalid;
}
}
static constexpr TokenType parse_three_char_token(StringView view)
{
if (view == "==="sv)
return TokenType::EqualsEqualsEquals;
if (view == "!=="sv)
return TokenType::ExclamationMarkEqualsEquals;
if (view == "**="sv)
return TokenType::DoubleAsteriskEquals;
if (view == "<<="sv)
return TokenType::ShiftLeftEquals;
if (view == ">>="sv)
return TokenType::ShiftRightEquals;
if (view == "&&="sv)
return TokenType::DoubleAmpersandEquals;
if (view == "||="sv)
return TokenType::DoublePipeEquals;
if (view == "\?\?="sv)
return TokenType::DoubleQuestionMarkEquals;
if (view == ">>>"sv)
return TokenType::UnsignedShiftRight;
if (view == "..."sv)
return TokenType::TripleDot;
return TokenType::Invalid;
if (view.length() != 3)
return TokenType::Invalid;
auto const* bytes = view.bytes().data();
switch (bytes[0]) {
case '<':
if (bytes[1] == '<' && bytes[2] == '=')
return TokenType::ShiftLeftEquals;
return TokenType::Invalid;
case '>':
if (bytes[1] == '>' && bytes[2] == '=')
return TokenType::ShiftRightEquals;
if (bytes[1] == '>' && bytes[2] == '>')
return TokenType::UnsignedShiftRight;
return TokenType::Invalid;
case '=':
if (bytes[1] == '=' && bytes[2] == '=')
return TokenType::EqualsEqualsEquals;
return TokenType::Invalid;
case '!':
if (bytes[1] == '=' && bytes[2] == '=')
return TokenType::ExclamationMarkEqualsEquals;
return TokenType::Invalid;
case '.':
if (bytes[1] == '.' && bytes[2] == '.')
return TokenType::TripleDot;
return TokenType::Invalid;
case '*':
if (bytes[1] == '*' && bytes[2] == '=')
return TokenType::DoubleAsteriskEquals;
return TokenType::Invalid;
case '&':
if (bytes[1] == '&' && bytes[2] == '=')
return TokenType::DoubleAmpersandEquals;
return TokenType::Invalid;
case '|':
if (bytes[1] == '|' && bytes[2] == '=')
return TokenType::DoublePipeEquals;
return TokenType::Invalid;
case '?':
if (bytes[1] == '?' && bytes[2] == '=')
return TokenType::DoubleQuestionMarkEquals;
return TokenType::Invalid;
default:
return TokenType::Invalid;
}
}
static consteval Array<TokenType, 256> make_single_char_tokens_array()