From d56da8cf9a0298d3e6e3ee8984f60d4a3217183b Mon Sep 17 00:00:00 2001 From: Shannon Booth Date: Thu, 15 Aug 2024 20:40:10 +1200 Subject: [PATCH] LibWeb: Sort URLSearchParams using UTF-16 code units We were previously sorting using code points which could give the wrong result for certain inputs. Fixes the last two failing tests on: https://wpt.live/url/urlsearchparams-sort.any.html --- ...rl-search-params-sort-utf16-code-units.txt | 4 ++++ ...l-search-params-sort-utf16-code-units.html | 17 ++++++++++++++ .../LibWeb/DOMURL/URLSearchParams.cpp | 23 ++++++++----------- 3 files changed, 31 insertions(+), 13 deletions(-) create mode 100644 Tests/LibWeb/Text/expected/URL/url-search-params-sort-utf16-code-units.txt create mode 100644 Tests/LibWeb/Text/input/URL/url-search-params-sort-utf16-code-units.html diff --git a/Tests/LibWeb/Text/expected/URL/url-search-params-sort-utf16-code-units.txt b/Tests/LibWeb/Text/expected/URL/url-search-params-sort-utf16-code-units.txt new file mode 100644 index 00000000000..26e63107b37 --- /dev/null +++ b/Tests/LibWeb/Text/expected/URL/url-search-params-sort-utf16-code-units.txt @@ -0,0 +1,4 @@ +%EF%BF%BC=&%EF%BF%BD=x&%EF%BF%BD=a +'\ufffc' => '' +'\ufffd' => '\u0078' +'\ufffd' => '\u0061' diff --git a/Tests/LibWeb/Text/input/URL/url-search-params-sort-utf16-code-units.html b/Tests/LibWeb/Text/input/URL/url-search-params-sort-utf16-code-units.html new file mode 100644 index 00000000000..1101024bcfa --- /dev/null +++ b/Tests/LibWeb/Text/input/URL/url-search-params-sort-utf16-code-units.html @@ -0,0 +1,17 @@ + + diff --git a/Userland/Libraries/LibWeb/DOMURL/URLSearchParams.cpp b/Userland/Libraries/LibWeb/DOMURL/URLSearchParams.cpp index 5a386928d0f..3a78274ae66 100644 --- a/Userland/Libraries/LibWeb/DOMURL/URLSearchParams.cpp +++ b/Userland/Libraries/LibWeb/DOMURL/URLSearchParams.cpp @@ -325,26 +325,23 @@ void URLSearchParams::set(String const& name, String const& value) update(); } +// https://url.spec.whatwg.org/#dom-urlsearchparams-sort void URLSearchParams::sort() { // 1. Sort all name-value pairs, if any, by their names. Sorting must be done by comparison of code units. The relative order between name-value pairs with equal names must be preserved. insertion_sort(m_list, [](auto& a, auto& b) { - Utf8View a_code_points { a.name }; - Utf8View b_code_points { b.name }; + // FIXME: There should be a way to do this without converting to utf16 + auto a_utf16 = MUST(utf8_to_utf16(a.name)); + auto b_utf16 = MUST(utf8_to_utf16(b.name)); - if (a_code_points.starts_with(b_code_points)) - return false; - if (b_code_points.starts_with(a_code_points)) - return true; + auto common_length = min(a_utf16.size(), b_utf16.size()); - for (auto k = a_code_points.begin(), l = b_code_points.begin(); - k != a_code_points.end() && l != b_code_points.end(); - ++k, ++l) { - if (*k != *l) { - return *k < *l; - } + for (size_t position = 0; position < common_length; ++position) { + if (a_utf16[position] != b_utf16[position]) + return a_utf16[position] < b_utf16[position]; } - VERIFY_NOT_REACHED(); + + return a_utf16.size() < b_utf16.size(); }); // 2. Update this.