diff --git a/Tests/LibURL/TestURL.cpp b/Tests/LibURL/TestURL.cpp index 12d1f8ab684..072c92f64d6 100644 --- a/Tests/LibURL/TestURL.cpp +++ b/Tests/LibURL/TestURL.cpp @@ -341,11 +341,38 @@ TEST_CASE(unicode) TEST_CASE(query_with_non_ascii) { - URL::URL url { "http://example.com/?utf8=✓"sv }; - EXPECT(url.is_valid()); - EXPECT_EQ(url.serialize_path(), "/"sv); - EXPECT_EQ(url.query(), "utf8=%E2%9C%93"); - EXPECT(!url.fragment().has_value()); + { + URL::URL url = URL::Parser::basic_parse("http://example.com/?utf8=✓"sv); + EXPECT(url.is_valid()); + EXPECT_EQ(url.serialize_path(), "/"sv); + EXPECT_EQ(url.query(), "utf8=%E2%9C%93"); + EXPECT(!url.fragment().has_value()); + } + { + URL::URL url = URL::Parser::basic_parse("http://example.com/?shift_jis=✓"sv, {}, nullptr, {}, "shift_jis"sv); + EXPECT(url.is_valid()); + EXPECT_EQ(url.serialize_path(), "/"sv); + EXPECT_EQ(url.query(), "shift_jis=%26%2310003%3B"); + EXPECT(!url.fragment().has_value()); + } +} + +TEST_CASE(fragment_with_non_ascii) +{ + { + URL::URL url = URL::Parser::basic_parse("http://example.com/#✓"sv); + EXPECT(url.is_valid()); + EXPECT_EQ(url.serialize_path(), "/"sv); + EXPECT(!url.query().has_value()); + EXPECT_EQ(url.fragment(), "%E2%9C%93"); + } + { + URL::URL url = URL::Parser::basic_parse("http://example.com/#✓"sv, {}, nullptr, {}, "shift_jis"sv); + EXPECT(url.is_valid()); + EXPECT_EQ(url.serialize_path(), "/"sv); + EXPECT(!url.query().has_value()); + EXPECT_EQ(url.fragment(), "%E2%9C%93"); + } } TEST_CASE(complete_file_url_with_base) diff --git a/Userland/Libraries/LibURL/Parser.cpp b/Userland/Libraries/LibURL/Parser.cpp index f05e7861d27..01f6f821d63 100644 --- a/Userland/Libraries/LibURL/Parser.cpp +++ b/Userland/Libraries/LibURL/Parser.cpp @@ -1688,10 +1688,12 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, URL break; // -> query state, https://url.spec.whatwg.org/#query-state case State::Query: - // FIXME: 1. If encoding is not UTF-8 and one of the following is true: - // * url is not special - // * url’s scheme is "ws" or "wss" - // then set encoding to UTF-8. + // 1. If encoding is not UTF-8 and one of the following is true: + // * url is not special + // * url’s scheme is "ws" or "wss" + // then set encoding to UTF-8. + if (!url->is_special() || url->m_data->scheme == "ws" || url->m_data->scheme == "wss") + encoder = TextCodec::encoder_for("utf-8"sv); // 2. If one of the following is true: // * state override is not given and c is U+0023 (#) @@ -1746,7 +1748,7 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, URL // NOTE: The percent-encode is done on EOF on the entire buffer. buffer.append_code_point(code_point); } else { - url->m_data->fragment = percent_encode_after_encoding(*encoder, buffer.string_view(), PercentEncodeSet::Fragment); + url->m_data->fragment = percent_encode_after_encoding(*TextCodec::encoder_for("utf-8"sv), buffer.string_view(), PercentEncodeSet::Fragment); buffer.clear(); } break;