mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-28 11:49:44 +00:00
LibWeb: Implement encoding sniffing algorithm
This patch implements the HTML specification's "encoding sniffing algorithm", which is used when no encoding can be obtained from the Content-Type header (either because it doesn't contain a charset=...) value or the file has not been opened via HTTP (as with local files). It also modifies the creator of the HTMLDocumentParser to use the new HTMLDocumentParser::create_with_uncertain_encoding static method, which runs the encoding sniffing algorithm before instantiating the parser. This now allows us to load local HTML pages (or remote pages without a charset specified in the 'Content-Type' header) with a non-UTF-8 encoding such as 'windows-1252'. This would previously crash the browser. :^)
This commit is contained in:
parent
67a9ebc817
commit
f808279769
Notes:
sideshowbarker
2024-07-18 17:48:35 +09:00
Author: https://github.com/MaxWipfli
Commit: f808279769
Pull-request: https://github.com/SerenityOS/serenity/pull/7055
Issue: https://github.com/SerenityOS/serenity/issues/6910
Reviewed-by: https://github.com/Dexesttp
Reviewed-by: https://github.com/awesomekling
Reviewed-by: https://github.com/linusg
6 changed files with 261 additions and 2 deletions
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/Optional.h>
|
||||
#include <AK/String.h>
|
||||
#include <LibWeb/DOM/Attribute.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
bool prescan_should_abort(const ByteBuffer& input, const size_t& position);
|
||||
bool prescan_is_whitespace_or_slash(const u8& byte);
|
||||
bool prescan_skip_whitespace_and_slashes(const ByteBuffer& input, size_t& position);
|
||||
Optional<Attribute> prescan_get_attribute(const ByteBuffer& input, size_t& position);
|
||||
Optional<String> run_prescan_byte_stream_algorithm(const ByteBuffer& input);
|
||||
String run_encoding_sniffing_algorithm(const ByteBuffer& input);
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue