mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-05-12 22:22:55 +00:00
LibWeb: Make the new HTML parser parse input as UTF-8
We already convert the input to UTF-8 before starting the tokenizer, so all this patch had to do was switch the tokenizer to use an Utf8View for its input (and to emit 32-bit codepoints.)
This commit is contained in:
parent
23dad305e9
commit
b6288163f1
Notes:
sideshowbarker
2024-07-19 05:49:51 +09:00
Author: https://github.com/awesomekling
Commit: b6288163f1
3 changed files with 75 additions and 49 deletions
|
@ -29,6 +29,7 @@
|
|||
#include <AK/Queue.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Types.h>
|
||||
#include <AK/Utf8View.h>
|
||||
#include <LibWeb/Forward.h>
|
||||
#include <LibWeb/Parser/HTMLToken.h>
|
||||
|
||||
|
@ -170,6 +171,10 @@ private:
|
|||
StringView m_input;
|
||||
size_t m_cursor { 0 };
|
||||
|
||||
Utf8View m_utf8_view;
|
||||
AK::Utf8CodepointIterator m_utf8_iterator;
|
||||
AK::Utf8CodepointIterator m_prev_utf8_iterator;
|
||||
|
||||
HTMLToken m_current_token;
|
||||
|
||||
HTMLToken m_last_emitted_start_tag;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue