mirror of
				https://github.com/LadybirdBrowser/ladybird.git
				synced 2025-10-25 09:30:01 +00:00 
			
		
		
		
	This ports the lexer to UTF-16 and deals with the immediate fallout up to the AST. The AST will be dealt with in upcoming commits. The lexer will still accept UTF-8 strings as input, and will transcode them to UTF-16 for lexing. This doesn't actually incur a new allocation, as we were already converting the input StringView to a ByteString for each lexer. One immediate logical benefit here is that we do not need to know off- hand how many UTF-8 bytes some special code points occupy. They all happen to be a single UTF-16 code unit. So instead of advancing the lexer by 3 positions in some cases, we can just always advance by 1.
		
			
				
	
	
		
			28 lines
		
	
	
	
		
			639 B
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			28 lines
		
	
	
	
		
			639 B
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*
 | |
|  * Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@serenityos.org>
 | |
|  * Copyright (c) 2021-2022, David Tuin <davidot@serenityos.org>
 | |
|  *
 | |
|  * SPDX-License-Identifier: BSD-2-Clause
 | |
|  */
 | |
| 
 | |
| #pragma once
 | |
| 
 | |
| #include <AK/ByteString.h>
 | |
| #include <AK/Error.h>
 | |
| #include <AK/Optional.h>
 | |
| #include <AK/String.h>
 | |
| #include <LibJS/Export.h>
 | |
| #include <LibJS/SourceRange.h>
 | |
| 
 | |
| namespace JS {
 | |
| 
 | |
| struct JS_API ParserError {
 | |
|     String message;
 | |
|     Optional<Position> position;
 | |
| 
 | |
|     String to_string() const;
 | |
|     ByteString to_byte_string() const;
 | |
|     ByteString source_location_hint(Utf16View const& source, char spacer = ' ', char indicator = '^') const;
 | |
| };
 | |
| 
 | |
| }
 |