diff --git a/Libraries/LibWeb/HtmlView.cpp b/Libraries/LibWeb/HtmlView.cpp
index b74b5c85530..0a1cb337ada 100644
--- a/Libraries/LibWeb/HtmlView.cpp
+++ b/Libraries/LibWeb/HtmlView.cpp
@@ -343,6 +343,15 @@ static RefPtr create_image_document(const ByteBuffer& data, const URL&
return document;
}
+String encoding_from_content_type(const String& content_type)
+{
+ auto offset = content_type.index_of("charset=");
+ if (offset.has_value())
+ return content_type.substring(offset.value() + 8, content_type.length() - offset.value() - 8).to_lowercase();
+
+ return "utf-8";
+}
+
void HtmlView::load(const URL& url)
{
dbg() << "HtmlView::load: " << url.to_string();
@@ -370,7 +379,15 @@ void HtmlView::load(const URL& url)
if (url.path().ends_with(".png") || url.path().ends_with(".gif")) {
document = create_image_document(data, url);
} else {
- document = parse_html_document(data, url);
+ String encoding = "utf-8";
+
+ auto content_type = response_headers.get("Content-Type");
+ if (content_type.has_value()) {
+ encoding = encoding_from_content_type(content_type.value());
+ dbg() << "I think this content has encoding '" << encoding << "'";
+ }
+
+ document = parse_html_document(data, url, encoding);
}
ASSERT(document);
set_document(document);
diff --git a/Libraries/LibWeb/Parser/HTMLParser.cpp b/Libraries/LibWeb/Parser/HTMLParser.cpp
index 25eaa237fdf..ca46b8de53c 100644
--- a/Libraries/LibWeb/Parser/HTMLParser.cpp
+++ b/Libraries/LibWeb/Parser/HTMLParser.cpp
@@ -383,16 +383,37 @@ static bool parse_html_document(const StringView& html, Document& document, Pare
return true;
}
-RefPtr parse_html_fragment(Document& document, const StringView& html)
+String to_utf8(const StringView& input, const String& encoding)
+{
+ String output;
+ if (encoding == "utf-8") {
+ output = input;
+ } else if (encoding == "iso-8859-1") {
+ StringBuilder builder(input.length());
+ for (size_t i = 0; i < input.length(); ++i) {
+ u8 ch = input[i];
+ builder.append(ch >= 0x80 ? '?' : ch);
+ }
+ output = builder.to_string();
+ } else {
+ dbg() << "Unknown encoding " << encoding;
+ ASSERT_NOT_REACHED();
+ }
+ return output;
+}
+
+RefPtr parse_html_fragment(Document& document, const StringView& raw_html, const String& encoding)
{
auto fragment = adopt(*new DocumentFragment(document));
- if (!parse_html_document(html, document, *fragment))
+ if (!parse_html_document(to_utf8(raw_html, encoding), document, *fragment))
return nullptr;
return fragment;
}
-RefPtr parse_html_document(const StringView& html, const URL& url)
+RefPtr parse_html_document(const StringView& raw_html, const URL& url, const String& encoding)
{
+ String html = to_utf8(raw_html, encoding);
+
auto document = adopt(*new Document(url));
document->set_source(html);
diff --git a/Libraries/LibWeb/Parser/HTMLParser.h b/Libraries/LibWeb/Parser/HTMLParser.h
index 60d234f87c1..da60ddcca1d 100644
--- a/Libraries/LibWeb/Parser/HTMLParser.h
+++ b/Libraries/LibWeb/Parser/HTMLParser.h
@@ -33,7 +33,7 @@ namespace Web {
class DocumentFragment;
-RefPtr parse_html_document(const StringView&, const URL& = URL());
-RefPtr parse_html_fragment(Document&, const StringView&);
+RefPtr parse_html_document(const StringView&, const URL& = URL(), const String& encoding = "utf-8");
+RefPtr parse_html_fragment(Document&, const StringView&, const String& encoding = "utf-8");
}