mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-30 04:39:06 +00:00
LibWeb: Check charset UTF-16LE/BE separately for UTF-8 conversion
Previously, the charset of name "UTF-16BE/LE" would be checked against when following standards to convert the charset to UTF-8, but in reality, the charsets "UTF-16BE" and "UTF-16LE" should be checked separately. Co-authored-by: Jelle Raaijmakers <jelle@ladybird.org>
This commit is contained in:
parent
436f3f99a1
commit
f672c57ca7
Notes:
github-actions[bot]
2025-02-24 13:52:43 +00:00
Author: https://github.com/Jaycadox 🔰
Commit: f672c57ca7
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/3124
Reviewed-by: https://github.com/gmta ✅
3 changed files with 33 additions and 15 deletions
|
@ -311,7 +311,8 @@ Optional<ByteString> run_prescan_byte_stream_algorithm(DOM::Document& document,
|
|||
|
||||
if (!need_pragma.has_value() || (need_pragma.value() && !got_pragma) || !charset.has_value())
|
||||
continue;
|
||||
if (charset.value() == "UTF-16BE/LE")
|
||||
// https://encoding.spec.whatwg.org/#common-infrastructure-for-utf-16be-and-utf-16le
|
||||
if (charset.value() == "UTF-16BE" || charset.value() == "UTF-16LE")
|
||||
return "UTF-8";
|
||||
else if (charset.value() == "x-user-defined")
|
||||
return "windows-1252";
|
||||
|
|
|
@ -1 +1,4 @@
|
|||
PASS: UTF-8
|
||||
Encoding: utf-16be mapped to characterSet: UTF-8
|
||||
Encoding: utf-16le mapped to characterSet: UTF-8
|
||||
Encoding: utf-8 mapped to characterSet: UTF-8
|
||||
Encoding: x-user-defined mapped to characterSet: windows-1252
|
||||
|
|
|
@ -2,23 +2,37 @@
|
|||
<script src="include.js"></script>
|
||||
<script>
|
||||
asyncTest(async (done) => {
|
||||
const encodings = ['utf-8', 'utf-16be', 'utf-16le', 'x-user-defined'];
|
||||
|
||||
let receivedMessages = [];
|
||||
const dumpMessages = () => {
|
||||
receivedMessages.sort((a, b) => a.encoding.localeCompare(b.encoding));
|
||||
for (const receivedMessage of receivedMessages) {
|
||||
println(`Encoding: ${receivedMessage.encoding} mapped to characterSet: ${receivedMessage.characterSet}`);
|
||||
}
|
||||
};
|
||||
|
||||
addEventListener("message", (event) => {
|
||||
receivedMessages.push(event.data);
|
||||
if (receivedMessages.length == encodings.length) {
|
||||
dumpMessages();
|
||||
done();
|
||||
}
|
||||
}, false);
|
||||
|
||||
const httpServer = httpTestServer();
|
||||
const url = await httpServer.createEcho("GET", "/document-computed-mimetype-test", {
|
||||
for (let encoding of encodings) {
|
||||
const url = await httpServer.createEcho("GET", `/document-computed-mimetype-test-${encoding}`, {
|
||||
status: 200,
|
||||
headers: {
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
},
|
||||
body: `<!doctype html><meta charset="UTF-8"><script>parent.postMessage(document.characterSet, "*")<\/script>`,
|
||||
body: `<!doctype html><meta charset="${encoding}"><script>parent.postMessage({"encoding": "${encoding}", "characterSet": document.characterSet}, "*")<\/script>`,
|
||||
});
|
||||
|
||||
const frame = document.createElement('iframe');
|
||||
const frame = document.createElement("iframe");
|
||||
frame.src = url;
|
||||
|
||||
addEventListener("message", (event) => {
|
||||
println("PASS: " + event.data);
|
||||
done();
|
||||
}, false);
|
||||
|
||||
document.body.appendChild(frame);
|
||||
}
|
||||
});
|
||||
</script>
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue