AK: Allow comparing UTF-16 strings to UTF-8 strings

Before now, you could compare a Utf16View to a StringView, but it would
only be valid if the StringView were ASCII. When porting code to UTF-16,
it will be handy to have a code point-aware implementation for non-ASCII
StringViews.
This commit is contained in:
Timothy Flynn 2025-08-02 19:12:16 -04:00 committed by Tim Flynn
commit 0bf565b97f
Notes: github-actions[bot] 2025-08-05 11:08:55 +00:00
3 changed files with 35 additions and 2 deletions

View file

@ -20,6 +20,7 @@
#include <AK/Traits.h>
#include <AK/Types.h>
#include <AK/UnicodeUtils.h>
#include <AK/Utf8View.h>
#include <AK/Vector.h>
namespace AK {
@ -232,7 +233,21 @@ public:
{
if (has_ascii_storage())
return StringView { m_string.ascii, length_in_code_units() } == other;
if (other.is_ascii())
return *this == Utf16View { other.characters_without_null_termination(), other.length() };
Utf8View other_utf8 { other };
auto this_it = begin();
auto other_it = other_utf8.begin();
for (; this_it != end() && other_it != other_utf8.end(); ++this_it, ++other_it) {
if (*this_it != *other_it)
return false;
}
return this_it == end() && other_it == other_utf8.end();
}
[[nodiscard]] constexpr bool equals_ignoring_case(Utf16View const& other) const

View file

@ -1070,7 +1070,7 @@ TEST_CASE(equals_ascii)
test("ababababab"sv, "😀😀😀😀😀"_utf16);
// Non-ASCII string comparison.
EXPECT_NE("😀"sv, "😀"_utf16);
EXPECT_EQ("😀"sv, "😀"_utf16);
}
TEST_CASE(equals_ignoring_ascii_case)

View file

@ -442,6 +442,24 @@ TEST_CASE(to_ascii_titlecase)
EXPECT_EQ(u"foo 😀 bar"sv.to_ascii_titlecase(), u"Foo 😀 Bar"sv);
}
TEST_CASE(equals_utf8)
{
EXPECT_EQ(u""sv, ""sv);
EXPECT_EQ(u"foo bar"sv, "foo bar"sv);
EXPECT_NE(u"foo bar"sv, "foo ba"sv);
EXPECT_NE(u"foo bar"sv, "foo"sv);
EXPECT_NE(u"foo bar"sv, ""sv);
EXPECT_EQ(u"foo 😀 bar"sv, "foo 😀 bar"sv);
EXPECT_NE(u"foo 😀 bar"sv, "foo 😀"sv);
EXPECT_NE(u"foo 😀 bar"sv, "foo"sv);
EXPECT_NE(u"foo 😀 bar"sv, ""sv);
EXPECT_NE(u"foo 😀 bar"sv, "foo 😂 bar"sv);
EXPECT_NE(u"foo 😂 bar"sv, "foo 😀 bar"sv);
}
TEST_CASE(equals_ignoring_case)
{
auto string1 = Utf16String::from_utf8("foobar"sv);