From 0bf565b97fc3b4d63d8cded2d17737f7190ca5fb Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Sat, 2 Aug 2025 19:12:16 -0400 Subject: [PATCH] AK: Allow comparing UTF-16 strings to UTF-8 strings Before now, you could compare a Utf16View to a StringView, but it would only be valid if the StringView were ASCII. When porting code to UTF-16, it will be handy to have a code point-aware implementation for non-ASCII StringViews. --- AK/Utf16View.h | 17 ++++++++++++++++- Tests/AK/TestUtf16String.cpp | 2 +- Tests/AK/TestUtf16View.cpp | 18 ++++++++++++++++++ 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/AK/Utf16View.h b/AK/Utf16View.h index 715a66582e3..1b78520c644 100644 --- a/AK/Utf16View.h +++ b/AK/Utf16View.h @@ -20,6 +20,7 @@ #include #include #include +#include #include namespace AK { @@ -232,7 +233,21 @@ public: { if (has_ascii_storage()) return StringView { m_string.ascii, length_in_code_units() } == other; - return *this == Utf16View { other.characters_without_null_termination(), other.length() }; + + if (other.is_ascii()) + return *this == Utf16View { other.characters_without_null_termination(), other.length() }; + + Utf8View other_utf8 { other }; + + auto this_it = begin(); + auto other_it = other_utf8.begin(); + + for (; this_it != end() && other_it != other_utf8.end(); ++this_it, ++other_it) { + if (*this_it != *other_it) + return false; + } + + return this_it == end() && other_it == other_utf8.end(); } [[nodiscard]] constexpr bool equals_ignoring_case(Utf16View const& other) const diff --git a/Tests/AK/TestUtf16String.cpp b/Tests/AK/TestUtf16String.cpp index c5b5ba0e69b..1fee98ae332 100644 --- a/Tests/AK/TestUtf16String.cpp +++ b/Tests/AK/TestUtf16String.cpp @@ -1070,7 +1070,7 @@ TEST_CASE(equals_ascii) test("ababababab"sv, "😀😀😀😀😀"_utf16); // Non-ASCII string comparison. - EXPECT_NE("😀"sv, "😀"_utf16); + EXPECT_EQ("😀"sv, "😀"_utf16); } TEST_CASE(equals_ignoring_ascii_case) diff --git a/Tests/AK/TestUtf16View.cpp b/Tests/AK/TestUtf16View.cpp index 00137a8ea49..fe1803259f8 100644 --- a/Tests/AK/TestUtf16View.cpp +++ b/Tests/AK/TestUtf16View.cpp @@ -442,6 +442,24 @@ TEST_CASE(to_ascii_titlecase) EXPECT_EQ(u"foo 😀 bar"sv.to_ascii_titlecase(), u"Foo 😀 Bar"sv); } +TEST_CASE(equals_utf8) +{ + EXPECT_EQ(u""sv, ""sv); + + EXPECT_EQ(u"foo bar"sv, "foo bar"sv); + EXPECT_NE(u"foo bar"sv, "foo ba"sv); + EXPECT_NE(u"foo bar"sv, "foo"sv); + EXPECT_NE(u"foo bar"sv, ""sv); + + EXPECT_EQ(u"foo 😀 bar"sv, "foo 😀 bar"sv); + EXPECT_NE(u"foo 😀 bar"sv, "foo 😀"sv); + EXPECT_NE(u"foo 😀 bar"sv, "foo"sv); + EXPECT_NE(u"foo 😀 bar"sv, ""sv); + + EXPECT_NE(u"foo 😀 bar"sv, "foo 😂 bar"sv); + EXPECT_NE(u"foo 😂 bar"sv, "foo 😀 bar"sv); +} + TEST_CASE(equals_ignoring_case) { auto string1 = Utf16String::from_utf8("foobar"sv);