mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-24 18:02:20 +00:00
The underlying storage used during string formatting is StringBuilder. To support UTF-16 strings, this patch allows callers to specify a mode during StringBuilder construction. The default mode is UTF-8, for which StringBuilder remains unchanged. In UTF-16 mode, we treat the StringBuilder's internal ByteBuffer as a series of u16 code units. Appending a single character will append 2 bytes for that character (cast to a char16_t). Appending a StringView will transcode the string to UTF-16. Utf16String also gains the same memory optimization that we added for String, where we hand-off the underlying buffer to Utf16String to avoid having to re-allocate. In the future, we may want to further optimize for ASCII strings. For example, we could defer committing to the u16-esque storage until we see a non-ASCII code point.
590 lines
21 KiB
C++
590 lines
21 KiB
C++
/*
|
|
* Copyright (c) 2025, Tim Flynn <trflynn89@ladybird.org>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include <LibTest/TestCase.h>
|
|
|
|
#include <AK/Array.h>
|
|
#include <AK/CharacterTypes.h>
|
|
#include <AK/Enumerate.h>
|
|
#include <AK/StringBuilder.h>
|
|
#include <AK/Utf16String.h>
|
|
#include <AK/Utf32View.h>
|
|
|
|
static Utf16String make_copy(Utf16String const& string)
|
|
{
|
|
return string.has_ascii_storage()
|
|
? Utf16String::from_utf8(string.ascii_view())
|
|
: Utf16String::from_utf16(string.utf16_view());
|
|
}
|
|
|
|
TEST_CASE(empty_string)
|
|
{
|
|
Utf16String string {};
|
|
EXPECT(string.is_empty());
|
|
EXPECT(string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 0uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 0uz);
|
|
EXPECT_EQ(string.ascii_view(), StringView {});
|
|
}
|
|
|
|
TEST_CASE(from_utf8)
|
|
{
|
|
{
|
|
auto string = Utf16String::from_utf8("hello!"sv);
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 6uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 6uz);
|
|
EXPECT_EQ(string.ascii_view(), "hello!"sv);
|
|
}
|
|
{
|
|
auto string = Utf16String::from_utf8("hello there!"sv);
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(string.is_ascii());
|
|
EXPECT(string.has_long_ascii_storage());
|
|
EXPECT(!string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 12uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 12uz);
|
|
EXPECT_EQ(string.ascii_view(), "hello there!"sv);
|
|
}
|
|
{
|
|
auto string = Utf16String::from_utf8("😀"sv);
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(!string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(!string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 2uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 1uz);
|
|
EXPECT_EQ(string.utf16_view(), u"😀"sv);
|
|
}
|
|
{
|
|
auto string = Utf16String::from_utf8("hello 😀 there!"sv);
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(!string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(!string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 15uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 14uz);
|
|
EXPECT_EQ(string.utf16_view(), u"hello 😀 there!"sv);
|
|
}
|
|
{
|
|
auto string = Utf16String::from_utf8("hello \xed\xa0\x80!"sv);
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(!string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(!string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 8uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 8uz);
|
|
EXPECT_EQ(string.utf16_view(), u"hello \xd800!"sv);
|
|
}
|
|
{
|
|
auto string = Utf16String::from_utf8("hello \xed\xb0\x80!"sv);
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(!string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(!string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 8uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 8uz);
|
|
EXPECT_EQ(string.utf16_view(), u"hello \xdc00!"sv);
|
|
}
|
|
}
|
|
|
|
TEST_CASE(from_utf16)
|
|
{
|
|
{
|
|
auto string = Utf16String::from_utf16(u"hello!"sv);
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 6uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 6uz);
|
|
EXPECT_EQ(string.ascii_view(), "hello!"sv);
|
|
}
|
|
{
|
|
auto string = Utf16String::from_utf16(u"hello there!"sv);
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(string.is_ascii());
|
|
EXPECT(string.has_long_ascii_storage());
|
|
EXPECT(!string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 12uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 12uz);
|
|
EXPECT_EQ(string.ascii_view(), "hello there!"sv);
|
|
}
|
|
{
|
|
auto string = Utf16String::from_utf16(u"😀"sv);
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(!string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(!string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 2uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 1uz);
|
|
EXPECT_EQ(string.utf16_view(), u"😀"sv);
|
|
}
|
|
{
|
|
auto string = Utf16String::from_utf16(u"hello 😀 there!"sv);
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(!string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(!string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 15uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 14uz);
|
|
EXPECT_EQ(string.utf16_view(), u"hello 😀 there!"sv);
|
|
}
|
|
{
|
|
auto string = Utf16String::from_utf16(u"hello \xd800!"sv);
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(!string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(!string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 8uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 8uz);
|
|
EXPECT_EQ(string.utf16_view(), u"hello \xd800!"sv);
|
|
}
|
|
{
|
|
auto string = Utf16String::from_utf16(u"hello \xdc00!"sv);
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(!string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(!string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 8uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 8uz);
|
|
EXPECT_EQ(string.utf16_view(), u"hello \xdc00!"sv);
|
|
}
|
|
}
|
|
|
|
TEST_CASE(from_utf32)
|
|
{
|
|
auto strlen32 = [](char32_t const* string) {
|
|
auto const* start = string;
|
|
while (*start)
|
|
++start;
|
|
return static_cast<size_t>(start - string);
|
|
};
|
|
|
|
auto to_utf32_view = [&](char32_t const* string) {
|
|
return Utf32View { reinterpret_cast<u32 const*>(string), strlen32(string) };
|
|
};
|
|
|
|
{
|
|
auto string = Utf16String::from_utf32(to_utf32_view(U"hello!"));
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 6uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 6uz);
|
|
EXPECT_EQ(string.ascii_view(), "hello!"sv);
|
|
}
|
|
{
|
|
auto string = Utf16String::from_utf32(to_utf32_view(U"hello there!"));
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(string.is_ascii());
|
|
EXPECT(string.has_long_ascii_storage());
|
|
EXPECT(!string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 12uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 12uz);
|
|
EXPECT_EQ(string.ascii_view(), "hello there!"sv);
|
|
}
|
|
{
|
|
auto string = Utf16String::from_utf32(to_utf32_view(U"😀"));
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(!string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(!string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 2uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 1uz);
|
|
EXPECT_EQ(string.utf16_view(), u"😀"sv);
|
|
}
|
|
{
|
|
auto string = Utf16String::from_utf32(to_utf32_view(U"hello 😀 there!"));
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(!string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(!string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 15uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 14uz);
|
|
EXPECT_EQ(string.utf16_view(), u"hello 😀 there!"sv);
|
|
}
|
|
{
|
|
auto string = Utf16String::from_utf32(to_utf32_view(U"hello \xd800!"));
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(!string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(!string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 8uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 8uz);
|
|
EXPECT_EQ(string.utf16_view(), u"hello \xd800!"sv);
|
|
}
|
|
{
|
|
auto string = Utf16String::from_utf32(to_utf32_view(U"hello \xdc00!"));
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(!string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(!string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 8uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 8uz);
|
|
EXPECT_EQ(string.utf16_view(), u"hello \xdc00!"sv);
|
|
}
|
|
}
|
|
|
|
TEST_CASE(formatted)
|
|
{
|
|
{
|
|
auto string = Utf16String::formatted("{}", 42);
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 2uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 2uz);
|
|
EXPECT_EQ(string, u"42"sv);
|
|
}
|
|
{
|
|
auto string = Utf16String::number(42);
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 2uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 2uz);
|
|
EXPECT_EQ(string, u"42"sv);
|
|
}
|
|
{
|
|
auto string = Utf16String::formatted("whf {} {} {}!", "😀"sv, Utf16View { u"🍕"sv }, 3.14);
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(!string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(!string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 15uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 13uz);
|
|
EXPECT_EQ(string, u"whf 😀 🍕 3.14!"sv);
|
|
}
|
|
{
|
|
Array segments {
|
|
u"abcdefghijklmnopqrstuvwxyz"sv,
|
|
u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv,
|
|
u"abcdefghijklmnopqrstuvwxyz"sv,
|
|
u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv,
|
|
u"abcdefghijklmnopqrstuvwxyz"sv,
|
|
u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv,
|
|
};
|
|
|
|
auto string = Utf16String::join(u"--"sv, segments);
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(string.is_ascii());
|
|
EXPECT(string.has_long_ascii_storage());
|
|
EXPECT(!string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 166uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 166uz);
|
|
EXPECT_EQ(string, u"abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ--abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ--abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv);
|
|
}
|
|
{
|
|
Array segments {
|
|
u"abcdefghijklmnopqrstuvwxyz"sv,
|
|
u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv,
|
|
u"\xd83d\xde00"sv,
|
|
u"abcdefghijklmnopqrstuvwxyz"sv,
|
|
u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv,
|
|
u"🍕"sv,
|
|
u"abcdefghijklmnopqrstuvwxyz"sv,
|
|
u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv,
|
|
};
|
|
|
|
auto string = Utf16String::join(u"--"sv, segments);
|
|
EXPECT(!string.is_empty());
|
|
EXPECT(!string.is_ascii());
|
|
EXPECT(!string.has_long_ascii_storage());
|
|
EXPECT(!string.has_short_ascii_storage());
|
|
EXPECT_EQ(string.length_in_code_units(), 174uz);
|
|
EXPECT_EQ(string.length_in_code_points(), 172uz);
|
|
EXPECT_EQ(string, u"abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ--😀--abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ--🍕--abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv);
|
|
}
|
|
}
|
|
|
|
TEST_CASE(copy_operations)
|
|
{
|
|
auto test = [](Utf16String const& string1) {
|
|
auto original = make_copy(string1);
|
|
|
|
// Copy constructor.
|
|
Utf16String string2(string1);
|
|
|
|
EXPECT_EQ(string1, original);
|
|
EXPECT_EQ(string1, string2);
|
|
|
|
// Copy assignment.
|
|
Utf16String string3;
|
|
string3 = string1;
|
|
|
|
EXPECT_EQ(string1, original);
|
|
EXPECT_EQ(string1, string3);
|
|
};
|
|
|
|
test({});
|
|
test("hello"_utf16);
|
|
test("hello there general!"_utf16);
|
|
test("hello 😀 there!"_utf16);
|
|
}
|
|
|
|
TEST_CASE(move_operations)
|
|
{
|
|
auto test = [](Utf16String string1) {
|
|
auto original = make_copy(string1);
|
|
|
|
// Move constructor.
|
|
Utf16String string2(move(string1));
|
|
|
|
EXPECT(string1.is_empty());
|
|
EXPECT_EQ(string1, Utf16String {});
|
|
EXPECT_EQ(string2, original);
|
|
|
|
// Move assignment.
|
|
Utf16String string3;
|
|
string3 = move(string2);
|
|
|
|
EXPECT(string2.is_empty());
|
|
EXPECT_EQ(string2, Utf16String {});
|
|
EXPECT_EQ(string3, original);
|
|
};
|
|
|
|
test({});
|
|
test("hello"_utf16);
|
|
test("hello there general!"_utf16);
|
|
test("hello 😀 there!"_utf16);
|
|
}
|
|
|
|
TEST_CASE(equals)
|
|
{
|
|
auto test = [](Utf16String const& string1, Utf16String const& inequal_string) {
|
|
auto string2 = make_copy(string1);
|
|
|
|
EXPECT_EQ(string1, string1);
|
|
EXPECT_EQ(string1, string2);
|
|
EXPECT_EQ(string2, string1);
|
|
EXPECT_EQ(string2, string2);
|
|
|
|
if (string1.has_long_utf16_storage()) {
|
|
EXPECT_EQ(string1, string1.utf16_view());
|
|
EXPECT_EQ(string1, string2.utf16_view());
|
|
EXPECT_EQ(string2, string1.utf16_view());
|
|
EXPECT_EQ(string2, string2.utf16_view());
|
|
|
|
EXPECT_EQ(string1.utf16_view(), string1);
|
|
EXPECT_EQ(string1.utf16_view(), string2);
|
|
EXPECT_EQ(string2.utf16_view(), string1);
|
|
EXPECT_EQ(string2.utf16_view(), string2);
|
|
}
|
|
|
|
EXPECT_NE(string1, inequal_string);
|
|
EXPECT_NE(string2, inequal_string);
|
|
EXPECT_NE(inequal_string, string1);
|
|
EXPECT_NE(inequal_string, string2);
|
|
|
|
if (string1.has_long_utf16_storage()) {
|
|
EXPECT_NE(string1, inequal_string.utf16_view());
|
|
EXPECT_NE(string2, inequal_string.utf16_view());
|
|
EXPECT_NE(inequal_string, string1.utf16_view());
|
|
EXPECT_NE(inequal_string, string2.utf16_view());
|
|
|
|
EXPECT_NE(string1.utf16_view(), inequal_string);
|
|
EXPECT_NE(string2.utf16_view(), inequal_string);
|
|
EXPECT_NE(inequal_string.utf16_view(), string1);
|
|
EXPECT_NE(inequal_string.utf16_view(), string2);
|
|
}
|
|
};
|
|
|
|
// Short (empty) ASCII string comparison.
|
|
test(Utf16String {}, "hello"_utf16);
|
|
|
|
// Short ASCII string comparison.
|
|
test("hello"_utf16, "there"_utf16);
|
|
|
|
// Short and long ASCII string comparison.
|
|
test("hello"_utf16, "hello there general!"_utf16);
|
|
|
|
// Long ASCII string comparison.
|
|
test("hello there!"_utf16, "hello there general!"_utf16);
|
|
|
|
// UTF-16 string comparison.
|
|
test("😀"_utf16, "hello 😀"_utf16);
|
|
|
|
// Short ASCII and UTF-16 string comparison.
|
|
test("hello"_utf16, "😀"_utf16);
|
|
|
|
// Short ASCII and UTF-16 string of same code unit length comparison.
|
|
test("ab"_utf16, "😀"_utf16);
|
|
|
|
// Long ASCII and UTF-16 string comparison.
|
|
test("hello there general!"_utf16, "😀"_utf16);
|
|
|
|
// Long ASCII and UTF-16 string of same code unit length comparison.
|
|
test("ababababab"_utf16, "😀😀😀😀😀"_utf16);
|
|
}
|
|
|
|
TEST_CASE(equals_ascii)
|
|
{
|
|
auto test = [](StringView ascii, Utf16String const& inequal_string) {
|
|
auto string = Utf16String::from_utf8(ascii);
|
|
|
|
EXPECT_EQ(ascii, string);
|
|
EXPECT_EQ(string, ascii);
|
|
|
|
EXPECT_NE(ascii, inequal_string);
|
|
EXPECT_NE(inequal_string, ascii);
|
|
};
|
|
|
|
// Short (empty) ASCII string comparison.
|
|
test({}, "hello"_utf16);
|
|
|
|
// Short ASCII string comparison.
|
|
test("hello"sv, "there"_utf16);
|
|
|
|
// Short and long ASCII string comparison.
|
|
test("hello"sv, "hello there general!"_utf16);
|
|
|
|
// Long ASCII string comparison.
|
|
test("hello there!"sv, "hello there general!"_utf16);
|
|
|
|
// Short ASCII and UTF-16 string comparison.
|
|
test("hello"sv, "😀"_utf16);
|
|
|
|
// Short ASCII and UTF-16 string of same code unit length comparison.
|
|
test("ab"sv, "😀"_utf16);
|
|
|
|
// Long ASCII and UTF-16 string comparison.
|
|
test("hello there general!"sv, "😀"_utf16);
|
|
|
|
// Long ASCII and UTF-16 string of same code unit length comparison.
|
|
test("ababababab"sv, "😀😀😀😀😀"_utf16);
|
|
|
|
// Non-ASCII string comparison.
|
|
EXPECT_NE("😀"sv, "😀"_utf16);
|
|
}
|
|
|
|
TEST_CASE(equals_ignoring_ascii_case)
|
|
{
|
|
auto test = [](Utf16String const& string1, Utf16String const& inequal_string) {
|
|
StringBuilder builder;
|
|
for (auto [i, code_point] : enumerate(string1))
|
|
builder.append_code_point(i % 2 == 0 ? to_ascii_uppercase(code_point) : code_point);
|
|
|
|
auto string2 = Utf16String::from_utf8(builder.string_view());
|
|
|
|
EXPECT(string1.equals_ignoring_ascii_case(string1));
|
|
EXPECT(string1.equals_ignoring_ascii_case(string2));
|
|
EXPECT(string2.equals_ignoring_ascii_case(string1));
|
|
EXPECT(string2.equals_ignoring_ascii_case(string2));
|
|
|
|
if (string1.has_long_utf16_storage()) {
|
|
EXPECT(string1.equals_ignoring_ascii_case(string1.utf16_view()));
|
|
EXPECT(string1.equals_ignoring_ascii_case(string2.utf16_view()));
|
|
EXPECT(string2.equals_ignoring_ascii_case(string1.utf16_view()));
|
|
EXPECT(string2.equals_ignoring_ascii_case(string2.utf16_view()));
|
|
}
|
|
|
|
EXPECT(!string1.equals_ignoring_ascii_case(inequal_string));
|
|
EXPECT(!string2.equals_ignoring_ascii_case(inequal_string));
|
|
EXPECT(!inequal_string.equals_ignoring_ascii_case(string1));
|
|
EXPECT(!inequal_string.equals_ignoring_ascii_case(string2));
|
|
|
|
if (string1.has_long_utf16_storage()) {
|
|
EXPECT(!string1.equals_ignoring_ascii_case(inequal_string.utf16_view()));
|
|
EXPECT(!string2.equals_ignoring_ascii_case(inequal_string.utf16_view()));
|
|
EXPECT(!inequal_string.equals_ignoring_ascii_case(string1.utf16_view()));
|
|
EXPECT(!inequal_string.equals_ignoring_ascii_case(string2.utf16_view()));
|
|
}
|
|
};
|
|
|
|
// Short (empty) ASCII string comparison.
|
|
test(Utf16String {}, "hello"_utf16);
|
|
|
|
// Short ASCII string comparison.
|
|
test("hello"_utf16, "there"_utf16);
|
|
|
|
// Short and long ASCII string comparison.
|
|
test("hello"_utf16, "hello there general!"_utf16);
|
|
|
|
// Long ASCII string comparison.
|
|
test("hello there!"_utf16, "hello there general!"_utf16);
|
|
|
|
// UTF-16 string comparison.
|
|
test("😀"_utf16, "hello 😀"_utf16);
|
|
|
|
// Short ASCII and UTF-16 string comparison.
|
|
test("hello"_utf16, "😀"_utf16);
|
|
|
|
// Short ASCII and UTF-16 string of same code unit length comparison.
|
|
test("ab"_utf16, "😀"_utf16);
|
|
|
|
// Long ASCII and UTF-16 string comparison.
|
|
test("hello there general!"_utf16, "😀"_utf16);
|
|
|
|
// Long ASCII and UTF-16 string of same code unit length comparison.
|
|
test("ababababab"_utf16, "😀😀😀😀😀"_utf16);
|
|
}
|
|
|
|
TEST_CASE(iteration)
|
|
{
|
|
auto test = [](Utf16String const& string, ReadonlySpan<u32> code_points) {
|
|
EXPECT_EQ(string.length_in_code_points(), code_points.size());
|
|
|
|
for (auto [i, code_point] : enumerate(string)) {
|
|
if (code_points.size() == 0)
|
|
FAIL("Iterating an empty UTF-16 string should not produce any values");
|
|
else
|
|
EXPECT_EQ(code_point, code_points[i]);
|
|
}
|
|
|
|
auto iterator = string.end();
|
|
EXPECT_DEATH("Dereferencing a UTF-16 iterator which is at its end", *iterator);
|
|
EXPECT_DEATH("Incrementing a UTF-16 iterator which is at its end", ++iterator);
|
|
};
|
|
|
|
test({}, {});
|
|
test("hello"_utf16, { { 'h', 'e', 'l', 'l', 'o' } });
|
|
test("hello there general!"_utf16, { { 'h', 'e', 'l', 'l', 'o', ' ', 't', 'h', 'e', 'r', 'e', ' ', 'g', 'e', 'n', 'e', 'r', 'a', 'l', '!' } });
|
|
test("😀"_utf16, { { 0x1f600 } });
|
|
test("hello 😀 there!"_utf16, { { 'h', 'e', 'l', 'l', 'o', ' ', 0x1f600, ' ', 't', 'h', 'e', 'r', 'e', '!' } });
|
|
}
|
|
|
|
TEST_CASE(code_unit_at)
|
|
{
|
|
auto test = [](Utf16View const& view, size_t length_in_code_units) {
|
|
auto string = Utf16String::from_utf16(view);
|
|
EXPECT_EQ(string.length_in_code_units(), length_in_code_units);
|
|
|
|
for (size_t i = 0; i < length_in_code_units; ++i)
|
|
EXPECT_EQ(string.code_unit_at(i), view.code_unit_at(i));
|
|
};
|
|
|
|
test({}, 0);
|
|
test(u"hello"sv, 5);
|
|
test(u"hello there general!"sv, 20);
|
|
test(u"😀"sv, 2);
|
|
test(u"hello 😀 there!"sv, 15);
|
|
}
|
|
|
|
TEST_CASE(code_point_at)
|
|
{
|
|
auto test = [](Utf16View const& view, size_t length_in_code_points) {
|
|
auto string = Utf16String::from_utf16(view);
|
|
EXPECT_EQ(string.length_in_code_points(), length_in_code_points);
|
|
|
|
for (size_t i = 0; i < string.length_in_code_units(); ++i)
|
|
EXPECT_EQ(string.code_point_at(i), view.code_point_at(i));
|
|
};
|
|
|
|
test({}, 0);
|
|
test(u"hello"sv, 5);
|
|
test(u"hello there general!"sv, 20);
|
|
test(u"😀"sv, 1);
|
|
test(u"hello 😀 there!"sv, 14);
|
|
}
|