From c954d0be274f95d1920b0209a3e553751f3335c4 Mon Sep 17 00:00:00 2001 From: Shannon Booth Date: Mon, 10 Feb 2025 15:32:10 +1300 Subject: [PATCH] LibWeb/DOM: Add missing UTF-8 decode without BOM on fragment ID We were previously crashing instead of using the replacement character on invalid bytes. --- Libraries/LibWeb/DOM/Document.cpp | 4 +- .../fragment-and-encoding-2.txt | 8 +++ .../fragment-and-encoding.txt | 9 ++++ .../scroll-frag-non-utf8-encoded-document.txt | 6 +++ .../fragment-and-encoding-2.html | 41 +++++++++++++++ .../fragment-and-encoding.html | 50 +++++++++++++++++++ ...scroll-frag-non-utf8-encoded-document.html | 21 ++++++++ 7 files changed, 137 insertions(+), 2 deletions(-) create mode 100644 Tests/LibWeb/Text/expected/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/fragment-and-encoding-2.txt create mode 100644 Tests/LibWeb/Text/expected/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/fragment-and-encoding.txt create mode 100644 Tests/LibWeb/Text/expected/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/scroll-frag-non-utf8-encoded-document.txt create mode 100644 Tests/LibWeb/Text/input/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/fragment-and-encoding-2.html create mode 100644 Tests/LibWeb/Text/input/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/fragment-and-encoding.html create mode 100644 Tests/LibWeb/Text/input/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/scroll-frag-non-utf8-encoded-document.html diff --git a/Libraries/LibWeb/DOM/Document.cpp b/Libraries/LibWeb/DOM/Document.cpp index 9ac5a55a2ab..db2863f78ed 100644 --- a/Libraries/LibWeb/DOM/Document.cpp +++ b/Libraries/LibWeb/DOM/Document.cpp @@ -2451,10 +2451,10 @@ Document::IndicatedPart Document::determine_the_indicated_part() const // 5. Let fragmentBytes be the result of percent-decoding fragment. // 6. Let decodedFragment be the result of running UTF-8 decode without BOM on fragmentBytes. - auto decoded_fragment = URL::percent_decode(*fragment); + auto decoded_fragment = String::from_utf8_with_replacement_character(URL::percent_decode(*fragment), String::WithBOMHandling::No); // 7. Set potentialIndicatedElement to the result of finding a potential indicated element given document and decodedFragment. - potential_indicated_element = find_a_potential_indicated_element(MUST(FlyString::from_deprecated_fly_string(decoded_fragment))); + potential_indicated_element = find_a_potential_indicated_element(decoded_fragment); // 8. If potentialIndicatedElement is not null, then return potentialIndicatedElement. if (potential_indicated_element) diff --git a/Tests/LibWeb/Text/expected/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/fragment-and-encoding-2.txt b/Tests/LibWeb/Text/expected/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/fragment-and-encoding-2.txt new file mode 100644 index 00000000000..c4a93376169 --- /dev/null +++ b/Tests/LibWeb/Text/expected/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/fragment-and-encoding-2.txt @@ -0,0 +1,8 @@ +Harness status: OK + +Found 3 tests + +3 Pass +Pass Invalid percent-encoded UTF-8 byte should decode as U+FFFD +Pass Percent-encoded UTF-8 BOM followed by invalid UTF-8 byte should decode as U+FEFF U+FFFD +Pass Percent-encoded UTF-8 byte sequence for U+FFFD should decode as U+FFFD \ No newline at end of file diff --git a/Tests/LibWeb/Text/expected/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/fragment-and-encoding.txt b/Tests/LibWeb/Text/expected/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/fragment-and-encoding.txt new file mode 100644 index 00000000000..d2b4aec2c60 --- /dev/null +++ b/Tests/LibWeb/Text/expected/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/fragment-and-encoding.txt @@ -0,0 +1,9 @@ +Harness status: OK + +Found 4 tests + +4 Pass +Pass U+00FF should find U+00FF +Pass Percent-encoded UTF-8 BOM should find U+FEFF as BOM is not stripped when decoding +Pass %FF should not find U+00FF as decoding it gives U+FFFD +Pass Valid UTF-8 + invalid UTF-8 should not be matched to the utf8-decoded former + the isomorphic-decoded latter \ No newline at end of file diff --git a/Tests/LibWeb/Text/expected/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/scroll-frag-non-utf8-encoded-document.txt b/Tests/LibWeb/Text/expected/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/scroll-frag-non-utf8-encoded-document.txt new file mode 100644 index 00000000000..6a1fa7b4911 --- /dev/null +++ b/Tests/LibWeb/Text/expected/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/scroll-frag-non-utf8-encoded-document.txt @@ -0,0 +1,6 @@ +Harness status: OK + +Found 1 tests + +1 Pass +Pass Fragment Navigation: fragment id should not be found in non UTF8 document \ No newline at end of file diff --git a/Tests/LibWeb/Text/input/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/fragment-and-encoding-2.html b/Tests/LibWeb/Text/input/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/fragment-and-encoding-2.html new file mode 100644 index 00000000000..446eecc10be --- /dev/null +++ b/Tests/LibWeb/Text/input/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/fragment-and-encoding-2.html @@ -0,0 +1,41 @@ + + +Fragment navigation: encoding + + +
+
+
+
+ diff --git a/Tests/LibWeb/Text/input/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/fragment-and-encoding.html b/Tests/LibWeb/Text/input/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/fragment-and-encoding.html new file mode 100644 index 00000000000..52a84ce4b8e --- /dev/null +++ b/Tests/LibWeb/Text/input/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/fragment-and-encoding.html @@ -0,0 +1,50 @@ + + +Fragment navigation: encoding + + +
+
+
+
+
+ diff --git a/Tests/LibWeb/Text/input/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/scroll-frag-non-utf8-encoded-document.html b/Tests/LibWeb/Text/input/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/scroll-frag-non-utf8-encoded-document.html new file mode 100644 index 00000000000..8a39fd34f7a --- /dev/null +++ b/Tests/LibWeb/Text/input/wpt-import/html/browsers/browsing-the-web/scroll-to-fragid/scroll-frag-non-utf8-encoded-document.html @@ -0,0 +1,21 @@ + +Fragment Navigation: fragment id should not be found in non UTF8 document + + + + + +
+
+
+