mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-05-02 09:18:52 +00:00
LibWeb: Handle iso-8859-1 web content a little bit better
We now look at the HTTP response headers for a Content-Type header and try to parse it if present to find the text encoding. If the text encoding is iso-8859-1, we turn all non-ASCII characters into question marks. This makes Swedish Google load on my machine! :^)
This commit is contained in:
parent
eb6e35a1be
commit
f3676ebef5
Notes:
sideshowbarker
2024-07-19 07:00:27 +09:00
Author: https://github.com/awesomekling
Commit: f3676ebef5
3 changed files with 44 additions and 6 deletions
|
@ -383,16 +383,37 @@ static bool parse_html_document(const StringView& html, Document& document, Pare
|
|||
return true;
|
||||
}
|
||||
|
||||
RefPtr<DocumentFragment> parse_html_fragment(Document& document, const StringView& html)
|
||||
String to_utf8(const StringView& input, const String& encoding)
|
||||
{
|
||||
String output;
|
||||
if (encoding == "utf-8") {
|
||||
output = input;
|
||||
} else if (encoding == "iso-8859-1") {
|
||||
StringBuilder builder(input.length());
|
||||
for (size_t i = 0; i < input.length(); ++i) {
|
||||
u8 ch = input[i];
|
||||
builder.append(ch >= 0x80 ? '?' : ch);
|
||||
}
|
||||
output = builder.to_string();
|
||||
} else {
|
||||
dbg() << "Unknown encoding " << encoding;
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
RefPtr<DocumentFragment> parse_html_fragment(Document& document, const StringView& raw_html, const String& encoding)
|
||||
{
|
||||
auto fragment = adopt(*new DocumentFragment(document));
|
||||
if (!parse_html_document(html, document, *fragment))
|
||||
if (!parse_html_document(to_utf8(raw_html, encoding), document, *fragment))
|
||||
return nullptr;
|
||||
return fragment;
|
||||
}
|
||||
|
||||
RefPtr<Document> parse_html_document(const StringView& html, const URL& url)
|
||||
RefPtr<Document> parse_html_document(const StringView& raw_html, const URL& url, const String& encoding)
|
||||
{
|
||||
String html = to_utf8(raw_html, encoding);
|
||||
|
||||
auto document = adopt(*new Document(url));
|
||||
document->set_source(html);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue