mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-04-20 19:45:12 +00:00
LibWeb/DOM: Add missing UTF-8 decode without BOM on fragment ID
We were previously crashing instead of using the replacement character on invalid bytes.
This commit is contained in:
parent
67f435975b
commit
c954d0be27
Notes:
github-actions[bot]
2025-02-10 09:49:08 +00:00
Author: https://github.com/shannonbooth Commit: https://github.com/LadybirdBrowser/ladybird/commit/c954d0be274 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/3525 Reviewed-by: https://github.com/tcl3 ✅
7 changed files with 137 additions and 2 deletions
|
@ -2451,10 +2451,10 @@ Document::IndicatedPart Document::determine_the_indicated_part() const
|
|||
|
||||
// 5. Let fragmentBytes be the result of percent-decoding fragment.
|
||||
// 6. Let decodedFragment be the result of running UTF-8 decode without BOM on fragmentBytes.
|
||||
auto decoded_fragment = URL::percent_decode(*fragment);
|
||||
auto decoded_fragment = String::from_utf8_with_replacement_character(URL::percent_decode(*fragment), String::WithBOMHandling::No);
|
||||
|
||||
// 7. Set potentialIndicatedElement to the result of finding a potential indicated element given document and decodedFragment.
|
||||
potential_indicated_element = find_a_potential_indicated_element(MUST(FlyString::from_deprecated_fly_string(decoded_fragment)));
|
||||
potential_indicated_element = find_a_potential_indicated_element(decoded_fragment);
|
||||
|
||||
// 8. If potentialIndicatedElement is not null, then return potentialIndicatedElement.
|
||||
if (potential_indicated_element)
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
Harness status: OK
|
||||
|
||||
Found 3 tests
|
||||
|
||||
3 Pass
|
||||
Pass Invalid percent-encoded UTF-8 byte should decode as U+FFFD
|
||||
Pass Percent-encoded UTF-8 BOM followed by invalid UTF-8 byte should decode as U+FEFF U+FFFD
|
||||
Pass Percent-encoded UTF-8 byte sequence for U+FFFD should decode as U+FFFD
|
|
@ -0,0 +1,9 @@
|
|||
Harness status: OK
|
||||
|
||||
Found 4 tests
|
||||
|
||||
4 Pass
|
||||
Pass U+00FF should find U+00FF
|
||||
Pass Percent-encoded UTF-8 BOM should find U+FEFF as BOM is not stripped when decoding
|
||||
Pass %FF should not find U+00FF as decoding it gives U+FFFD
|
||||
Pass Valid UTF-8 + invalid UTF-8 should not be matched to the utf8-decoded former + the isomorphic-decoded latter
|
|
@ -0,0 +1,6 @@
|
|||
Harness status: OK
|
||||
|
||||
Found 1 tests
|
||||
|
||||
1 Pass
|
||||
Pass Fragment Navigation: fragment id should not be found in non UTF8 document
|
|
@ -0,0 +1,41 @@
|
|||
<!doctype html>
|
||||
<meta charset=windows-1252>
|
||||
<title>Fragment navigation: encoding</title>
|
||||
<script src="../../../../resources/testharness.js"></script>
|
||||
<script src="../../../../resources/testharnessreport.js"></script>
|
||||
<div id="log"></div>
|
||||
<div style=height:10000px></div>
|
||||
<div id=�></div>
|
||||
<div id=�></div>
|
||||
<script>
|
||||
function goToTop() {
|
||||
location.hash = "top";
|
||||
assert_equals(self.scrollY, 0, "#top");
|
||||
}
|
||||
|
||||
test(() => {
|
||||
assert_equals(location.hash, "", "Page must be loaded with no hash");
|
||||
|
||||
location.hash = "%C2";
|
||||
assert_equals(location.hash, "#%C2");
|
||||
assert_greater_than(self.scrollY, 1000, "#%C2");
|
||||
}, "Invalid percent-encoded UTF-8 byte should decode as U+FFFD");
|
||||
|
||||
test(() => {
|
||||
goToTop();
|
||||
|
||||
location.hash = "%EF%BB%BF%C2";
|
||||
assert_equals(location.hash, "#%EF%BB%BF%C2");
|
||||
assert_greater_than(self.scrollY, 1000, "#%EF%BB%BF%C2");
|
||||
}, "Percent-encoded UTF-8 BOM followed by invalid UTF-8 byte should decode as U+FEFF U+FFFD");
|
||||
|
||||
test(() => {
|
||||
goToTop();
|
||||
|
||||
location.hash = "%EF%BF%BD";
|
||||
assert_equals(location.hash, "#%EF%BF%BD");
|
||||
assert_greater_than(self.scrollY, 1000, "#%EF%BF%BD");
|
||||
|
||||
goToTop();
|
||||
}, "Percent-encoded UTF-8 byte sequence for U+FFFD should decode as U+FFFD");
|
||||
</script>
|
|
@ -0,0 +1,50 @@
|
|||
<!doctype html>
|
||||
<meta charset=windows-1252>
|
||||
<title>Fragment navigation: encoding</title>
|
||||
<script src="../../../../resources/testharness.js"></script>
|
||||
<script src="../../../../resources/testharnessreport.js"></script>
|
||||
<div id="log"></div>
|
||||
<div style=height:10000px></div>
|
||||
<div id=ÿ></div>
|
||||
<div id=></div>
|
||||
<div id=♡ÿ><div>
|
||||
<script>
|
||||
function goToTop() {
|
||||
location.hash = "top";
|
||||
assert_equals(self.scrollY, 0, "#top");
|
||||
}
|
||||
|
||||
test(() => {
|
||||
assert_equals(location.hash, "", "Page must be loaded with no hash");
|
||||
|
||||
location.hash = "\u00FF";
|
||||
assert_equals(location.hash, "#%C3%BF");
|
||||
assert_greater_than(self.scrollY, 1000, "#%C3%BF");
|
||||
}, "U+00FF should find U+00FF");
|
||||
|
||||
test(() => {
|
||||
goToTop();
|
||||
|
||||
location.hash = "%EF%BB%BF";
|
||||
assert_greater_than(self.scrollY, 1000, "#%EF%BB%BF");
|
||||
}, "Percent-encoded UTF-8 BOM should find U+FEFF as BOM is not stripped when decoding");
|
||||
|
||||
test(() => {
|
||||
goToTop();
|
||||
|
||||
location.hash = "%FF";
|
||||
assert_equals(self.scrollY, 0, "#%FF");
|
||||
}, "%FF should not find U+00FF as decoding it gives U+FFFD");
|
||||
|
||||
test(() => {
|
||||
goToTop();
|
||||
|
||||
// U+2661 in UTF-8 + %FF.
|
||||
// Chrome had an issue that the following fragment was decoded as U+2661 U+00FF.
|
||||
// https://github.com/whatwg/html/pull/3111
|
||||
location.hash = "%E2%99%A1%FF";
|
||||
assert_equals(self.scrollY, 0, "%E2%99%A1%FF");
|
||||
|
||||
goToTop();
|
||||
}, "Valid UTF-8 + invalid UTF-8 should not be matched to the utf8-decoded former + the isomorphic-decoded latter");
|
||||
</script>
|
|
@ -0,0 +1,21 @@
|
|||
<!doctype html>
|
||||
<title>Fragment Navigation: fragment id should not be found in non UTF8 document</title>
|
||||
<meta name=timeout content=long>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=gbk"/>
|
||||
<script src="../../../../resources/testharness.js"></script>
|
||||
<script src="../../../../resources/testharnessreport.js"></script>
|
||||
<body>
|
||||
<div></div>
|
||||
<div id="塯" style="position:absolute; top:100px;"></div>
|
||||
<div style="height:200vh;"></div>
|
||||
<script>
|
||||
async_test(test => {
|
||||
assert_equals(document.characterSet, "GBK", "Document should be GBK encoded");
|
||||
assert_equals(location.hash, "", "Page must be loaded with no hash");
|
||||
location.hash = '%89g';
|
||||
test.step_timeout(() => {
|
||||
assert_equals( document.scrollingElement.scrollTop, 0 );
|
||||
test.done();
|
||||
}, 1);
|
||||
});
|
||||
</script>
|
Loading…
Add table
Reference in a new issue