mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-28 11:49:44 +00:00
HTMLEncodingDetection: Use mime type in encoding sniffing
Also added proper spec comments. Fixes at least one WPT test that was failing previously: https://wpt.live/encoding/single-byte-decoder.window.html?document
This commit is contained in:
parent
dae6200c1d
commit
c1a14f66ad
Notes:
github-actions[bot]
2024-10-12 14:15:30 +00:00
Author: https://github.com/0x4261756D
Commit: c1a14f66ad
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/1707
3 changed files with 67 additions and 28 deletions
|
@ -146,14 +146,14 @@ static WebIDL::ExceptionOr<JS::NonnullGCPtr<DOM::Document>> load_xml_document(HT
|
|||
if (auto maybe_encoding = type.parameters().get("charset"sv); maybe_encoding.has_value())
|
||||
content_encoding = maybe_encoding.value();
|
||||
|
||||
auto process_body = JS::create_heap_function(document->heap(), [document, url = navigation_params.response->url().value(), content_encoding = move(content_encoding)](ByteBuffer data) {
|
||||
auto process_body = JS::create_heap_function(document->heap(), [document, url = navigation_params.response->url().value(), content_encoding = move(content_encoding), mime = type](ByteBuffer data) {
|
||||
Optional<TextCodec::Decoder&> decoder;
|
||||
// The actual HTTP headers and other metadata, not the headers as mutated or implied by the algorithms given in this specification,
|
||||
// are the ones that must be used when determining the character encoding according to the rules given in the above specifications.
|
||||
if (content_encoding.has_value())
|
||||
decoder = TextCodec::decoder_for(*content_encoding);
|
||||
if (!decoder.has_value()) {
|
||||
auto encoding = HTML::run_encoding_sniffing_algorithm(document, data);
|
||||
auto encoding = HTML::run_encoding_sniffing_algorithm(document, data, mime);
|
||||
decoder = TextCodec::decoder_for(encoding);
|
||||
}
|
||||
VERIFY(decoder.has_value());
|
||||
|
@ -226,8 +226,8 @@ static WebIDL::ExceptionOr<JS::NonnullGCPtr<DOM::Document>> load_text_document(H
|
|||
// document's relevant global object to have the parser to process the implied EOF character, which eventually causes a
|
||||
// load event to be fired.
|
||||
// FIXME: Parse as we receive the document data, instead of waiting for the whole document to be fetched first.
|
||||
auto process_body = JS::create_heap_function(document->heap(), [document, url = navigation_params.response->url().value()](ByteBuffer data) {
|
||||
auto encoding = run_encoding_sniffing_algorithm(document, data);
|
||||
auto process_body = JS::create_heap_function(document->heap(), [document, url = navigation_params.response->url().value(), mime = type](ByteBuffer data) {
|
||||
auto encoding = run_encoding_sniffing_algorithm(document, data, mime);
|
||||
dbgln_if(HTML_PARSER_DEBUG, "The encoding sniffing algorithm returned encoding '{}'", encoding);
|
||||
|
||||
auto parser = HTML::HTMLParser::create_for_scripting(document);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue