From 4b1deb6fe12b4c7f0434a415b5f203e9ebf08e06 Mon Sep 17 00:00:00 2001 From: Gingeh <39150378+Gingeh@users.noreply.github.com> Date: Wed, 20 Nov 2024 20:55:46 +1100 Subject: [PATCH] LibWeb: Don't skip filtering when CSS contains null or surrogates --- Libraries/LibWeb/CSS/Parser/Tokenizer.cpp | 12 ++--- .../css/css-syntax/input-preprocessing.txt | Bin 0 -> 423 bytes .../css/css-syntax/input-preprocessing.html | 46 ++++++++++++++++++ 3 files changed, 52 insertions(+), 6 deletions(-) create mode 100644 Tests/LibWeb/Text/expected/wpt-import/css/css-syntax/input-preprocessing.txt create mode 100644 Tests/LibWeb/Text/input/wpt-import/css/css-syntax/input-preprocessing.html diff --git a/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp b/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp index 443fc93af50..083c823d405 100644 --- a/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp +++ b/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp @@ -204,15 +204,15 @@ Vector Tokenizer::tokenize(StringView input, StringView encoding) auto decoded_input = MUST(decoder->to_utf8(input)); - // OPTIMIZATION: If the input doesn't contain any CR or FF, we can skip the filtering - bool const contains_cr_or_ff = [&] { - for (auto byte : decoded_input.bytes()) { - if (byte == '\r' || byte == '\f') + // OPTIMIZATION: If the input doesn't contain any filterable characters, we can skip the filtering + bool const contains_filterable = [&] { + for (auto code_point : decoded_input.code_points()) { + if (code_point == '\r' || code_point == '\f' || code_point == 0x00 || is_unicode_surrogate(code_point)) return true; } return false; }(); - if (!contains_cr_or_ff) { + if (!contains_filterable) { return decoded_input; } @@ -242,7 +242,7 @@ Vector Tokenizer::tokenize(StringView input, StringView encoding) } else if (code_point == '\f') { builder.append('\n'); // Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (�). - } else if (code_point == 0x00 || (code_point >= 0xD800 && code_point <= 0xDFFF)) { + } else if (code_point == 0x00 || is_unicode_surrogate(code_point)) { builder.append_code_point(REPLACEMENT_CHARACTER); } else { builder.append_code_point(code_point); diff --git a/Tests/LibWeb/Text/expected/wpt-import/css/css-syntax/input-preprocessing.txt b/Tests/LibWeb/Text/expected/wpt-import/css/css-syntax/input-preprocessing.txt new file mode 100644 index 0000000000000000000000000000000000000000..17dd1f4da87a149abc0f9bc36d5aa7bc2d6053c1 GIT binary patch literal 423 zcmWG8&CN|Ls^sGGNG!@rEiP6lE=epYEw)nd_vYdXN-ZkQpoNi8lZ z=HfCmPzXpYF6MGcElJGGDdq}FEiTO|;S5PFE>ZAH%uVI=O)V}?Oiu+V;#5k@&u36l zNJ>r4&rK~>0CC^%->byQ1(jvU&qtPhzkhFjK3tX|4Mq0-{=Fapm|kRKk`y~=of1R|$VmwQp}7&N2FW61TcENKb08$D HN|?(5-xZKa literal 0 HcmV?d00001 diff --git a/Tests/LibWeb/Text/input/wpt-import/css/css-syntax/input-preprocessing.html b/Tests/LibWeb/Text/input/wpt-import/css/css-syntax/input-preprocessing.html new file mode 100644 index 00000000000..4db4a32d6c3 --- /dev/null +++ b/Tests/LibWeb/Text/input/wpt-import/css/css-syntax/input-preprocessing.html @@ -0,0 +1,46 @@ + +Input Preprocessing + + + + + + + +