mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-09-13 13:02:28 +00:00
LibJS: Extract some JS lexer helpers to free functions
We will need these in an upcoming proposal.
This commit is contained in:
parent
3f461b96df
commit
bd1009f3c1
Notes:
github-actions[bot]
2024-12-05 12:57:21 +00:00
Author: https://github.com/trflynn89
Commit: bd1009f3c1
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/2773
Reviewed-by: https://github.com/gmta
2 changed files with 45 additions and 6 deletions
|
@ -481,13 +481,13 @@ bool Lexer::is_eof() const
|
||||||
|
|
||||||
ALWAYS_INLINE bool Lexer::is_line_terminator() const
|
ALWAYS_INLINE bool Lexer::is_line_terminator() const
|
||||||
{
|
{
|
||||||
|
// OPTIMIZATION: Fast-path for ASCII characters.
|
||||||
if (m_current_char == '\n' || m_current_char == '\r')
|
if (m_current_char == '\n' || m_current_char == '\r')
|
||||||
return true;
|
return true;
|
||||||
if (!is_unicode_character())
|
if (!is_unicode_character())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
auto code_point = current_code_point();
|
return JS::is_line_terminator(current_code_point());
|
||||||
return code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE bool Lexer::is_unicode_character() const
|
ALWAYS_INLINE bool Lexer::is_unicode_character() const
|
||||||
|
@ -511,14 +511,13 @@ ALWAYS_INLINE u32 Lexer::current_code_point() const
|
||||||
|
|
||||||
bool Lexer::is_whitespace() const
|
bool Lexer::is_whitespace() const
|
||||||
{
|
{
|
||||||
|
// OPTIMIZATION: Fast-path for ASCII characters.
|
||||||
if (is_ascii_space(m_current_char))
|
if (is_ascii_space(m_current_char))
|
||||||
return true;
|
return true;
|
||||||
if (!is_unicode_character())
|
if (!is_unicode_character())
|
||||||
return false;
|
return false;
|
||||||
auto code_point = current_code_point();
|
|
||||||
if (code_point == NO_BREAK_SPACE || code_point == ZERO_WIDTH_NO_BREAK_SPACE)
|
return JS::is_whitespace(current_code_point());
|
||||||
return true;
|
|
||||||
return Unicode::code_point_has_space_separator_general_category(code_point);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// UnicodeEscapeSequence :: https://tc39.es/ecma262/#prod-UnicodeEscapeSequence
|
// UnicodeEscapeSequence :: https://tc39.es/ecma262/#prod-UnicodeEscapeSequence
|
||||||
|
@ -1059,4 +1058,40 @@ TokenType Lexer::consume_regex_literal()
|
||||||
return TokenType::UnterminatedRegexLiteral;
|
return TokenType::UnterminatedRegexLiteral;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// https://tc39.es/ecma262/#prod-SyntaxCharacter
|
||||||
|
bool is_syntax_character(u32 code_point)
|
||||||
|
{
|
||||||
|
// SyntaxCharacter :: one of
|
||||||
|
// ^ $ \ . * + ? ( ) [ ] { } |
|
||||||
|
static constexpr Utf8View syntax_characters { "^$\\.*+?()[]{}|"sv };
|
||||||
|
return syntax_characters.contains(code_point);
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://tc39.es/ecma262/#prod-WhiteSpace
|
||||||
|
bool is_whitespace(u32 code_point)
|
||||||
|
{
|
||||||
|
// WhiteSpace ::
|
||||||
|
// <TAB>
|
||||||
|
// <VT>
|
||||||
|
// <FF>
|
||||||
|
// <ZWNBSP>
|
||||||
|
// <USP>
|
||||||
|
if (is_ascii_space(code_point))
|
||||||
|
return true;
|
||||||
|
if (code_point == NO_BREAK_SPACE || code_point == ZERO_WIDTH_NO_BREAK_SPACE)
|
||||||
|
return true;
|
||||||
|
return Unicode::code_point_has_space_separator_general_category(code_point);
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://tc39.es/ecma262/#prod-LineTerminator
|
||||||
|
bool is_line_terminator(u32 code_point)
|
||||||
|
{
|
||||||
|
// LineTerminator ::
|
||||||
|
// <LF>
|
||||||
|
// <CR>
|
||||||
|
// <LS>
|
||||||
|
// <PS>
|
||||||
|
return code_point == '\n' || code_point == '\r' || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -91,4 +91,8 @@ private:
|
||||||
RefPtr<ParsedIdentifiers> m_parsed_identifiers;
|
RefPtr<ParsedIdentifiers> m_parsed_identifiers;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
bool is_syntax_character(u32 code_point);
|
||||||
|
bool is_whitespace(u32 code_point);
|
||||||
|
bool is_line_terminator(u32 code_point);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue