mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-10-16 04:59:23 +00:00
This ports the lexer to UTF-16 and deals with the immediate fallout up to the AST. The AST will be dealt with in upcoming commits. The lexer will still accept UTF-8 strings as input, and will transcode them to UTF-16 for lexing. This doesn't actually incur a new allocation, as we were already converting the input StringView to a ByteString for each lexer. One immediate logical benefit here is that we do not need to know off- hand how many UTF-8 bytes some special code points occupy. They all happen to be a single UTF-16 code unit. So instead of advancing the lexer by 3 positions in some cases, we can just always advance by 1.
47 lines
1.6 KiB
C++
47 lines
1.6 KiB
C++
/*
|
|
* Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@serenityos.org>
|
|
* Copyright (c) 2021-2022, David Tuin <davidot@serenityos.org>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include <AK/StringView.h>
|
|
#include <AK/Vector.h>
|
|
#include <LibJS/ParserError.h>
|
|
#include <LibJS/Token.h>
|
|
|
|
namespace JS {
|
|
|
|
String ParserError::to_string() const
|
|
{
|
|
if (!position.has_value())
|
|
return message;
|
|
return MUST(String::formatted("{} (line: {}, column: {})", message, position.value().line, position.value().column));
|
|
}
|
|
|
|
ByteString ParserError::to_byte_string() const
|
|
{
|
|
if (!position.has_value())
|
|
return message.to_byte_string();
|
|
return ByteString::formatted("{} (line: {}, column: {})", message, position.value().line, position.value().column);
|
|
}
|
|
|
|
ByteString ParserError::source_location_hint(Utf16View const& source, char spacer, char indicator) const
|
|
{
|
|
if (!position.has_value())
|
|
return {};
|
|
|
|
// We need to modify the source to match what the lexer considers one line - normalizing
|
|
// line terminators to \n is easier than splitting using all different LT characters.
|
|
auto source_string = source.replace("\r\n"sv, "\n"sv, ReplaceMode::All).replace("\r"sv, "\n"sv, ReplaceMode::All).replace(LINE_SEPARATOR, "\n"sv, ReplaceMode::All).replace(PARAGRAPH_SEPARATOR, "\n"sv, ReplaceMode::All);
|
|
|
|
StringBuilder builder;
|
|
builder.append(source_string.split_view('\n', SplitBehavior::KeepEmpty)[position.value().line - 1]);
|
|
builder.append('\n');
|
|
for (size_t i = 0; i < position.value().column - 1; ++i)
|
|
builder.append(spacer);
|
|
builder.append(indicator);
|
|
return builder.to_byte_string();
|
|
}
|
|
|
|
}
|