mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-06 17:11:51 +00:00
LibWeb: Let HTMLTokenizer walk over code points instead of UTF-8
Instead of using UTF-8 iterators to traverse the HTMLTokenizer input stream one code point at a time, we now do a one-shot conversion up front from the input encoding to a Vector<u32> of Unicode code points. This simplifies the tokenizer logic somewhat, and ends up being faster as well, so win-win. 1.02x speedup on Speedometer 2.1
This commit is contained in:
parent
7cccdb3bcf
commit
263b125782
Notes:
github-actions[bot]
2025-05-10 23:14:16 +00:00
Author: https://github.com/awesomekling
Commit: 263b125782
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/4687
3 changed files with 53 additions and 54 deletions
|
@ -268,7 +268,7 @@ void HTMLParser::run(HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point
|
|||
void HTMLParser::run(const URL::URL& url, HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point)
|
||||
{
|
||||
m_document->set_url(url);
|
||||
m_document->set_source(MUST(String::from_byte_string(m_tokenizer.source())));
|
||||
m_document->set_source(m_tokenizer.source());
|
||||
run(stop_at_insertion_point);
|
||||
the_end(*m_document, this);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue