mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-30 04:39:06 +00:00
AK: Add a method to compute UTF-16 length from a UTF-8 string
This commit is contained in:
parent
743c71faa7
commit
7a17c654d2
Notes:
github-actions[bot]
2024-07-31 09:56:36 +00:00
Author: https://github.com/trflynn89
Commit: 7a17c654d2
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/901
3 changed files with 19 additions and 0 deletions
|
@ -129,6 +129,15 @@ ErrorOr<void> code_point_to_utf16(Utf16Data& string, u32 code_point, Endianness
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t utf16_code_unit_length_from_utf8(StringView string)
|
||||||
|
{
|
||||||
|
// FIXME: The CPU-specific implementations behave differently on null inputs. We treat null views as an empty string.
|
||||||
|
if (string.is_empty())
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return simdutf::utf16_length_from_utf8(string.characters_without_null_termination(), string.length());
|
||||||
|
}
|
||||||
|
|
||||||
bool Utf16View::is_high_surrogate(u16 code_unit)
|
bool Utf16View::is_high_surrogate(u16 code_unit)
|
||||||
{
|
{
|
||||||
return (code_unit >= high_surrogate_min) && (code_unit <= high_surrogate_max);
|
return (code_unit >= high_surrogate_min) && (code_unit <= high_surrogate_max);
|
||||||
|
|
|
@ -26,6 +26,8 @@ ErrorOr<Utf16Data> utf8_to_utf16(Utf8View const&, Endianness = Endianness::Host)
|
||||||
ErrorOr<Utf16Data> utf32_to_utf16(Utf32View const&, Endianness = Endianness::Host);
|
ErrorOr<Utf16Data> utf32_to_utf16(Utf32View const&, Endianness = Endianness::Host);
|
||||||
ErrorOr<void> code_point_to_utf16(Utf16Data&, u32, Endianness = Endianness::Host);
|
ErrorOr<void> code_point_to_utf16(Utf16Data&, u32, Endianness = Endianness::Host);
|
||||||
|
|
||||||
|
size_t utf16_code_unit_length_from_utf8(StringView);
|
||||||
|
|
||||||
class Utf16View;
|
class Utf16View;
|
||||||
|
|
||||||
class Utf16CodePointIterator {
|
class Utf16CodePointIterator {
|
||||||
|
|
|
@ -89,6 +89,14 @@ TEST_CASE(decode_utf16)
|
||||||
EXPECT_EQ(i, expected.size());
|
EXPECT_EQ(i, expected.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(utf16_code_unit_length_from_utf8)
|
||||||
|
{
|
||||||
|
EXPECT_EQ(AK::utf16_code_unit_length_from_utf8(""sv), 0uz);
|
||||||
|
EXPECT_EQ(AK::utf16_code_unit_length_from_utf8("abc"sv), 3uz);
|
||||||
|
EXPECT_EQ(AK::utf16_code_unit_length_from_utf8("😀"sv), 2uz);
|
||||||
|
EXPECT_EQ(AK::utf16_code_unit_length_from_utf8("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv), 39uz);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE(null_view)
|
TEST_CASE(null_view)
|
||||||
{
|
{
|
||||||
Utf16View view;
|
Utf16View view;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue