mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-10-16 04:59:23 +00:00
This ports the lexer to UTF-16 and deals with the immediate fallout up to the AST. The AST will be dealt with in upcoming commits. The lexer will still accept UTF-8 strings as input, and will transcode them to UTF-16 for lexing. This doesn't actually incur a new allocation, as we were already converting the input StringView to a ByteString for each lexer. One immediate logical benefit here is that we do not need to know off- hand how many UTF-8 bytes some special code points occupy. They all happen to be a single UTF-16 code unit. So instead of advancing the lexer by 3 positions in some cases, we can just always advance by 1.
28 lines
639 B
C++
28 lines
639 B
C++
/*
|
|
* Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@serenityos.org>
|
|
* Copyright (c) 2021-2022, David Tuin <davidot@serenityos.org>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <AK/ByteString.h>
|
|
#include <AK/Error.h>
|
|
#include <AK/Optional.h>
|
|
#include <AK/String.h>
|
|
#include <LibJS/Export.h>
|
|
#include <LibJS/SourceRange.h>
|
|
|
|
namespace JS {
|
|
|
|
struct JS_API ParserError {
|
|
String message;
|
|
Optional<Position> position;
|
|
|
|
String to_string() const;
|
|
ByteString to_byte_string() const;
|
|
ByteString source_location_hint(Utf16View const& source, char spacer = ' ', char indicator = '^') const;
|
|
};
|
|
|
|
}
|