diff --git a/Userland/Libraries/LibURL/Parser.cpp b/Userland/Libraries/LibURL/Parser.cpp index 2be0e708968..f752410f958 100644 --- a/Userland/Libraries/LibURL/Parser.cpp +++ b/Userland/Libraries/LibURL/Parser.cpp @@ -806,8 +806,7 @@ ErrorOr Parser::percent_encode_after_encoding(TextCodec::Encoder& encode } // https://url.spec.whatwg.org/#concept-basic-url-parser -// NOTE: This parser assumes a UTF-8 encoding. -URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Optional url, Optional state_override) +URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Optional url, Optional state_override, Optional encoding) { dbgln_if(URL_PARSER_DEBUG, "URL::Parser::basic_parse: Parsing '{}'", raw_input); @@ -855,7 +854,11 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt State state = state_override.value_or(State::SchemeStart); // 5. Set encoding to the result of getting an output encoding from encoding. - auto encoder = TextCodec::encoder_for("utf-8"sv); + Optional encoder = {}; + if (encoding.has_value()) + encoder = TextCodec::encoder_for(TextCodec::get_output_encoding(*encoding)); + if (!encoder.has_value()) + encoder = TextCodec::encoder_for("utf-8"sv); VERIFY(encoder.has_value()); // 6. Let buffer be the empty string. diff --git a/Userland/Libraries/LibURL/Parser.h b/Userland/Libraries/LibURL/Parser.h index 6f4d04e4ef9..0cfdd07ceed 100644 --- a/Userland/Libraries/LibURL/Parser.h +++ b/Userland/Libraries/LibURL/Parser.h @@ -58,7 +58,7 @@ public: } // https://url.spec.whatwg.org/#concept-basic-url-parser - static URL basic_parse(StringView input, Optional const& base_url = {}, Optional url = {}, Optional state_override = {}); + static URL basic_parse(StringView input, Optional const& base_url = {}, Optional url = {}, Optional state_override = {}, Optional encoding = {}); // https://url.spec.whatwg.org/#string-percent-encode-after-encoding static ErrorOr percent_encode_after_encoding(TextCodec::Encoder&, StringView input, PercentEncodeSet percent_encode_set, bool space_as_plus = false); diff --git a/Userland/Libraries/LibWeb/DOM/Document.cpp b/Userland/Libraries/LibWeb/DOM/Document.cpp index 1f2dbb1b696..ce4ae6157db 100644 --- a/Userland/Libraries/LibWeb/DOM/Document.cpp +++ b/Userland/Libraries/LibWeb/DOM/Document.cpp @@ -1034,7 +1034,7 @@ URL::URL Document::parse_url(StringView url) const auto base_url = this->base_url(); // 2. Return the result of applying the URL parser to url, with baseURL. - return DOMURL::parse(url, base_url); + return DOMURL::parse(url, base_url, Optional { m_encoding }); } void Document::set_needs_layout() diff --git a/Userland/Libraries/LibWeb/DOMURL/DOMURL.cpp b/Userland/Libraries/LibWeb/DOMURL/DOMURL.cpp index 8c6b01040b7..fb86cb0508a 100644 --- a/Userland/Libraries/LibWeb/DOMURL/DOMURL.cpp +++ b/Userland/Libraries/LibWeb/DOMURL/DOMURL.cpp @@ -585,12 +585,12 @@ void strip_trailing_spaces_from_an_opaque_path(DOMURL& url) } // https://url.spec.whatwg.org/#concept-url-parser -URL::URL parse(StringView input, Optional const& base_url) +URL::URL parse(StringView input, Optional const& base_url, Optional encoding) { // FIXME: We should probably have an extended version of URL::URL for LibWeb instead of standalone functions like this. // 1. Let url be the result of running the basic URL parser on input with base and encoding. - auto url = URL::Parser::basic_parse(input, base_url); + auto url = URL::Parser::basic_parse(input, base_url, {}, {}, encoding); // 2. If url is failure, return failure. if (!url.is_valid()) diff --git a/Userland/Libraries/LibWeb/DOMURL/DOMURL.h b/Userland/Libraries/LibWeb/DOMURL/DOMURL.h index a25c9e431a6..f4cd7ff09cf 100644 --- a/Userland/Libraries/LibWeb/DOMURL/DOMURL.h +++ b/Userland/Libraries/LibWeb/DOMURL/DOMURL.h @@ -99,6 +99,6 @@ bool host_is_domain(URL::Host const&); void strip_trailing_spaces_from_an_opaque_path(DOMURL& url); // https://url.spec.whatwg.org/#concept-url-parser -URL::URL parse(StringView input, Optional const& base_url = {}); +URL::URL parse(StringView input, Optional const& base_url = {}, Optional encoding = {}); }