LibJS: Keep the lookahead lexer alive after parsing its next token

Currently, the lexer holds a ByteString, which is always heap-allocated.
When we create a copy of the lexer for the lookahead token, that token
will outlive the lexer copy. The token holds a couple of string views
into the lexer's source string. This is fine for now, because the source
string will be kept alive by the original lexer.

But if the lexer were to hold a String or Utf16String, short strings
will be stored on the stack due to SSO. Thus the token will hold views
into released stack data. We need to keep the lookahead lexer alive to
prevent UAF on views into its source string.
This commit is contained in:
Timothy Flynn 2025-08-07 12:35:16 -04:00 committed by Tim Flynn
commit eb74781a2d
Notes: github-actions[bot] 2025-08-13 13:57:35 +00:00
2 changed files with 7 additions and 12 deletions

View file

@ -4266,18 +4266,12 @@ bool Parser::match_declaration(AllowUsingDeclaration allow_using) const
|| type == TokenType::Let;
}
Token Parser::next_token(size_t steps) const
Token Parser::next_token() const
{
Lexer lookahead_lexer = m_state.lexer;
Token lookahead_token;
while (steps > 0) {
lookahead_token = lookahead_lexer.next();
steps--;
}
return lookahead_token;
// We need to keep the lookahead lexer alive to prevent UAF on the lookahead token, as the token may hold a view
// into a short string stored on the stack.
m_state.lookahead_lexer = m_state.lexer;
return m_state.lookahead_lexer->next();
}
bool Parser::try_match_let_declaration() const

View file

@ -257,7 +257,7 @@ private:
RefPtr<BindingPattern const> synthesize_binding_pattern(Expression const& expression);
Token next_token(size_t steps = 1) const;
Token next_token() const;
void check_identifier_name_for_assignment_validity(FlyString const&, bool force_strict = false);
@ -302,6 +302,7 @@ private:
struct ParserState {
Lexer lexer;
mutable Optional<Lexer> lookahead_lexer;
Token current_token;
bool previous_token_was_period { false };
Vector<ParserError> errors;