mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-30 20:59:16 +00:00
LibWeb: Check charset UTF-16LE/BE separately for UTF-8 conversion
Previously, the charset of name "UTF-16BE/LE" would be checked against when following standards to convert the charset to UTF-8, but in reality, the charsets "UTF-16BE" and "UTF-16LE" should be checked separately. Co-authored-by: Jelle Raaijmakers <jelle@ladybird.org>
This commit is contained in:
parent
436f3f99a1
commit
f672c57ca7
Notes:
github-actions[bot]
2025-02-24 13:52:43 +00:00
Author: https://github.com/Jaycadox 🔰
Commit: f672c57ca7
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/3124
Reviewed-by: https://github.com/gmta ✅
3 changed files with 33 additions and 15 deletions
|
@ -311,7 +311,8 @@ Optional<ByteString> run_prescan_byte_stream_algorithm(DOM::Document& document,
|
||||||
|
|
||||||
if (!need_pragma.has_value() || (need_pragma.value() && !got_pragma) || !charset.has_value())
|
if (!need_pragma.has_value() || (need_pragma.value() && !got_pragma) || !charset.has_value())
|
||||||
continue;
|
continue;
|
||||||
if (charset.value() == "UTF-16BE/LE")
|
// https://encoding.spec.whatwg.org/#common-infrastructure-for-utf-16be-and-utf-16le
|
||||||
|
if (charset.value() == "UTF-16BE" || charset.value() == "UTF-16LE")
|
||||||
return "UTF-8";
|
return "UTF-8";
|
||||||
else if (charset.value() == "x-user-defined")
|
else if (charset.value() == "x-user-defined")
|
||||||
return "windows-1252";
|
return "windows-1252";
|
||||||
|
|
|
@ -1 +1,4 @@
|
||||||
PASS: UTF-8
|
Encoding: utf-16be mapped to characterSet: UTF-8
|
||||||
|
Encoding: utf-16le mapped to characterSet: UTF-8
|
||||||
|
Encoding: utf-8 mapped to characterSet: UTF-8
|
||||||
|
Encoding: x-user-defined mapped to characterSet: windows-1252
|
||||||
|
|
|
@ -2,23 +2,37 @@
|
||||||
<script src="include.js"></script>
|
<script src="include.js"></script>
|
||||||
<script>
|
<script>
|
||||||
asyncTest(async (done) => {
|
asyncTest(async (done) => {
|
||||||
const httpServer = httpTestServer();
|
const encodings = ['utf-8', 'utf-16be', 'utf-16le', 'x-user-defined'];
|
||||||
const url = await httpServer.createEcho("GET", "/document-computed-mimetype-test", {
|
|
||||||
status: 200,
|
|
||||||
headers: {
|
|
||||||
"Access-Control-Allow-Origin": "*",
|
|
||||||
},
|
|
||||||
body: `<!doctype html><meta charset="UTF-8"><script>parent.postMessage(document.characterSet, "*")<\/script>`,
|
|
||||||
});
|
|
||||||
|
|
||||||
const frame = document.createElement('iframe');
|
let receivedMessages = [];
|
||||||
frame.src = url;
|
const dumpMessages = () => {
|
||||||
|
receivedMessages.sort((a, b) => a.encoding.localeCompare(b.encoding));
|
||||||
|
for (const receivedMessage of receivedMessages) {
|
||||||
|
println(`Encoding: ${receivedMessage.encoding} mapped to characterSet: ${receivedMessage.characterSet}`);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
addEventListener("message", (event) => {
|
addEventListener("message", (event) => {
|
||||||
println("PASS: " + event.data);
|
receivedMessages.push(event.data);
|
||||||
done();
|
if (receivedMessages.length == encodings.length) {
|
||||||
|
dumpMessages();
|
||||||
|
done();
|
||||||
|
}
|
||||||
}, false);
|
}, false);
|
||||||
|
|
||||||
document.body.appendChild(frame);
|
const httpServer = httpTestServer();
|
||||||
|
for (let encoding of encodings) {
|
||||||
|
const url = await httpServer.createEcho("GET", `/document-computed-mimetype-test-${encoding}`, {
|
||||||
|
status: 200,
|
||||||
|
headers: {
|
||||||
|
"Access-Control-Allow-Origin": "*",
|
||||||
|
},
|
||||||
|
body: `<!doctype html><meta charset="${encoding}"><script>parent.postMessage({"encoding": "${encoding}", "characterSet": document.characterSet}, "*")<\/script>`,
|
||||||
|
});
|
||||||
|
|
||||||
|
const frame = document.createElement("iframe");
|
||||||
|
frame.src = url;
|
||||||
|
document.body.appendChild(frame);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue