AK: Implement UTF-16 string-to-number conversions

Author: https://github.com/trflynn89 Commit: d40e3af697 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5388 Reviewed-by: https://github.com/shannonbooth ✅
2025-08-09 09:39:39 +00:00 · 2025-06-27 13:56:17 -04:00 · 2025-06-27 13:56:17 -04:00 · d40e3af697 · 2025-07-18 16:47:05 +00:00
commit d40e3af697
parent 6e0290ecaa
6 changed files with 163 additions and 45 deletions
--- a/AK/StringConversions.cpp
+++ b/AK/StringConversions.cpp
@ -7,6 +7,7 @@

 #include <AK/StringConversions.h>
 #include <AK/StringView.h>
+#include <AK/Utf16View.h>

 #include <fast_float/fast_float.h>

@ -29,27 +30,22 @@ namespace AK {
    __ENUMERATE_TYPE(float)        \
    __ENUMERATE_TYPE(double)

-template<Arithmetic T>
-Optional<ParseFirstNumberResult<T>> parse_first_number(StringView string, TrimWhitespace trim_whitespace, int base)
+template<typename CharType, Arithmetic ValueType>
+static constexpr Optional<ParseFirstNumberResult<ValueType>> from_chars(CharType const* string, size_t length, int base)
 {
-    if (trim_whitespace == TrimWhitespace::Yes)
-        string = StringUtils::trim_whitespace(string, TrimMode::Both);
+    ValueType value { 0 };

-    auto const* begin = string.characters_without_null_termination();
-    auto const* end = begin + string.length();
-    T value { 0 };
-
-    fast_float::parse_options_t<char> options;
+    fast_float::parse_options_t<CharType> options;
    options.base = base;
    options.format |= fast_float::chars_format::no_infnan;

-    if constexpr (IsSigned<T> || IsFloatingPoint<T>) {
+    if constexpr (IsSigned<ValueType> || IsFloatingPoint<ValueType>) {
        options.format |= fast_float::chars_format::allow_leading_plus;
    }

-    auto result = fast_float::from_chars_advanced(begin, end, value, options);
+    auto result = fast_float::from_chars_advanced(string, string + length, value, options);

-    if constexpr (IsFloatingPoint<T>) {
+    if constexpr (IsFloatingPoint<ValueType>) {
        if (result.ec == std::errc::result_out_of_range && (__builtin_isinf(value) || value == 0))
            result.ec = {};
    }
@ -57,7 +53,26 @@ Optional<ParseFirstNumberResult<T>> parse_first_number(StringView string, TrimWh
    if (result.ec != std::errc {})
        return {};

-    return ParseFirstNumberResult { value, static_cast<size_t>(result.ptr - begin) };
+    return ParseFirstNumberResult { value, static_cast<size_t>(result.ptr - string) };
+}
+
+template<Arithmetic T>
+Optional<ParseFirstNumberResult<T>> parse_first_number(StringView string, TrimWhitespace trim_whitespace, int base)
+{
+    if (trim_whitespace == TrimWhitespace::Yes)
+        string = StringUtils::trim_whitespace(string, TrimMode::Both);
+
+    return from_chars<char, T>(string.characters_without_null_termination(), string.length(), base);
+}
+
+template<Arithmetic T>
+Optional<ParseFirstNumberResult<T>> parse_first_number(Utf16View const& string, TrimWhitespace trim_whitespace, int base)
+{
+    if (string.has_ascii_storage())
+        return parse_first_number<T>(string.bytes(), trim_whitespace, base);
+
+    auto trimmed_string = trim_whitespace == TrimWhitespace::Yes ? string.trim_whitespace() : string;
+    return from_chars<char16_t, T>(trimmed_string.utf16_span().data(), trimmed_string.length_in_code_units(), base);
 }

 #define __ENUMERATE_TYPE(type) \
@ -65,6 +80,11 @@ Optional<ParseFirstNumberResult<T>> parse_first_number(StringView string, TrimWh
 ENUMERATE_ARITHMETIC_TYPES
 #undef __ENUMERATE_TYPE

+#define __ENUMERATE_TYPE(type) \
+    template Optional<ParseFirstNumberResult<type>> parse_first_number(Utf16View const&, TrimWhitespace, int);
+ENUMERATE_ARITHMETIC_TYPES
+#undef __ENUMERATE_TYPE
+
 template<Arithmetic T>
 Optional<T> parse_number(StringView string, TrimWhitespace trim_whitespace, int base)
 {
@ -81,20 +101,54 @@ Optional<T> parse_number(StringView string, TrimWhitespace trim_whitespace, int
    return result->value;
 }

+template<Arithmetic T>
+Optional<T> parse_number(Utf16View const& string, TrimWhitespace trim_whitespace, int base)
+{
+    if (string.has_ascii_storage())
+        return parse_number<T>(string.bytes(), trim_whitespace, base);
+
+    auto trimmed_string = trim_whitespace == TrimWhitespace::Yes ? string.trim_whitespace() : string;
+
+    auto result = parse_first_number<T>(trimmed_string, TrimWhitespace::No, base);
+    if (!result.has_value())
+        return {};
+
+    if (result->characters_parsed != trimmed_string.length_in_code_units())
+        return {};
+
+    return result->value;
+}
+
 #define __ENUMERATE_TYPE(type) \
    template Optional<type> parse_number(StringView, TrimWhitespace, int);
 ENUMERATE_ARITHMETIC_TYPES
 #undef __ENUMERATE_TYPE

+#define __ENUMERATE_TYPE(type) \
+    template Optional<type> parse_number(Utf16View const&, TrimWhitespace, int);
+ENUMERATE_ARITHMETIC_TYPES
+#undef __ENUMERATE_TYPE
+
 template<Integral T>
 Optional<T> parse_hexadecimal_number(StringView string, TrimWhitespace trim_whitespace)
 {
    return parse_number<T>(string, trim_whitespace, 16);
 }

+template<Integral T>
+Optional<T> parse_hexadecimal_number(Utf16View const& string, TrimWhitespace trim_whitespace)
+{
+    return parse_number<T>(string, trim_whitespace, 16);
+}
+
 #define __ENUMERATE_TYPE(type) \
    template Optional<type> parse_hexadecimal_number(StringView, TrimWhitespace);
 ENUMERATE_INTEGRAL_TYPES
 #undef __ENUMERATE_TYPE

+#define __ENUMERATE_TYPE(type) \
+    template Optional<type> parse_hexadecimal_number(Utf16View const&, TrimWhitespace);
+ENUMERATE_INTEGRAL_TYPES
+#undef __ENUMERATE_TYPE
+
 }
--- a/AK/StringConversions.h
+++ b/AK/StringConversions.h
@ -22,10 +22,19 @@ struct ParseFirstNumberResult {
 template<Arithmetic T>
 Optional<ParseFirstNumberResult<T>> parse_first_number(StringView, TrimWhitespace = TrimWhitespace::Yes, int base = 10);

+template<Arithmetic T>
+Optional<ParseFirstNumberResult<T>> parse_first_number(Utf16View const&, TrimWhitespace = TrimWhitespace::Yes, int base = 10);
+
 template<Arithmetic T>
 Optional<T> parse_number(StringView, TrimWhitespace = TrimWhitespace::Yes, int base = 10);

+template<Arithmetic T>
+Optional<T> parse_number(Utf16View const&, TrimWhitespace = TrimWhitespace::Yes, int base = 10);
+
 template<Integral T>
 Optional<T> parse_hexadecimal_number(StringView, TrimWhitespace = TrimWhitespace::Yes);

+template<Integral T>
+Optional<T> parse_hexadecimal_number(Utf16View const&, TrimWhitespace = TrimWhitespace::Yes);
+
 }
--- a/AK/Utf16FlyString.h
+++ b/AK/Utf16FlyString.h
@ -78,6 +78,12 @@ public:
        return view().to_ascii_titlecase();
    }

+    template<Arithmetic T>
+    ALWAYS_INLINE Optional<T> to_number(TrimWhitespace trim_whitespace = TrimWhitespace::Yes) const
+    {
+        return m_data.to_number<T>(trim_whitespace);
+    }
+
    ALWAYS_INLINE Utf16FlyString& operator=(Utf16String const& string)
    {
        *this = Utf16FlyString { string };
--- a/AK/Utf16StringBase.h
+++ b/AK/Utf16StringBase.h
@ -88,6 +88,12 @@ public:
    StringView ascii_view() const&& = delete;
    Utf16View utf16_view() const&& = delete;

+    template<Arithmetic T>
+    ALWAYS_INLINE Optional<T> to_number(TrimWhitespace trim_whitespace = TrimWhitespace::Yes) const
+    {
+        return utf16_view().to_number<T>(trim_whitespace);
+    }
+
    ALWAYS_INLINE Utf16StringBase& operator=(Utf16StringBase const& other)
    {
        if (&other != this) {
--- a/AK/Utf16View.h
+++ b/AK/Utf16View.h
@ -14,6 +14,7 @@
 #include <AK/Optional.h>
 #include <AK/Span.h>
 #include <AK/String.h>
+#include <AK/StringConversions.h>
 #include <AK/StringHash.h>
 #include <AK/Traits.h>
 #include <AK/Types.h>
@ -210,6 +211,14 @@ public:
        return { m_string.utf16, length_in_code_units() };
    }

+    template<Arithmetic T>
+    ALWAYS_INLINE Optional<T> to_number(TrimWhitespace trim_whitespace = TrimWhitespace::Yes) const
+    {
+        if (has_ascii_storage())
+            return parse_number<T>(bytes(), trim_whitespace);
+        return parse_number<T>(*this, trim_whitespace);
+    }
+
    [[nodiscard]] constexpr bool operator==(Utf16View const& other) const
    {
        if (length_in_code_units() != other.length_in_code_units())
--- a/Tests/AK/TestStringConversions.cpp
+++ b/Tests/AK/TestStringConversions.cpp
@ -7,13 +7,16 @@
 #include <LibTest/TestCase.h>

 #include <AK/StringConversions.h>
+#include <AK/Utf16View.h>

-static double parse_complete_double(StringView view)
+template<typename ViewType>
+static double parse_complete_double(ViewType const& view)
 {
    return AK::parse_number<double>(view, TrimWhitespace::No).release_value();
 }

-static float parse_complete_float(StringView view)
+template<typename ViewType>
+static float parse_complete_float(ViewType const& view)
 {
    return AK::parse_number<float>(view, TrimWhitespace::No).release_value();
 }
@ -25,18 +28,27 @@ TEST_CASE(simple_cases)
    do {                                                                                \
        EXPECT_EQ(static_cast<double>(value), parse_complete_double(#value##sv));       \
        EXPECT_EQ(-static_cast<double>(value), parse_complete_double("-" #value##sv));  \
+                                                                                        \
+        EXPECT_EQ(static_cast<double>(value), parse_complete_double(u"" #value##sv));   \
+        EXPECT_EQ(-static_cast<double>(value), parse_complete_double(u"-" #value##sv)); \
    } while (false)

 #define DOES_PARSE_FLOAT_LIKE_CPP(value)                                                \
    do {                                                                                \
        float val = parse_complete_float(#value##sv);                                   \
        EXPECT_EQ(static_cast<float>(value##f), val);                                   \
+        EXPECT_EQ(-static_cast<float>(value##f), parse_complete_float("-" #value##sv)); \
+                                                                                        \
+        val = parse_complete_float(u"" #value##sv);                                     \
+        EXPECT_EQ(static_cast<float>(value##f), val);                                   \
        EXPECT_EQ(-static_cast<float>(value##f), parse_complete_float("-" #value##sv)); \
    } while (false)

 #define DOES_PARSE_FLOAT_AND_DOUBLE_LIKE_CPP(value) \
+    do {                                            \
        DOES_PARSE_DOUBLE_LIKE_CPP(value);          \
-    DOES_PARSE_FLOAT_LIKE_CPP(value);
+        DOES_PARSE_FLOAT_LIKE_CPP(value);           \
+    } while (false)

    DOES_PARSE_DOUBLE_LIKE_CPP(2.22507385850720138309e-308);

@ -216,6 +228,9 @@ TEST_CASE(simple_cases)
    do {                                                                                   \
        EXPECT_EQ(static_cast<double>(value##.), parse_complete_double(#value##sv));       \
        EXPECT_EQ(-static_cast<double>(value##.), parse_complete_double("-" #value##sv));  \
+                                                                                           \
+        EXPECT_EQ(static_cast<double>(value##.), parse_complete_double(u"" #value##sv));   \
+        EXPECT_EQ(-static_cast<double>(value##.), parse_complete_double(u"-" #value##sv)); \
    } while (false)

    DOES_PARSE_INT_LIKE_VALUE_LIKE_CPP(0);
@ -231,7 +246,10 @@ TEST_CASE(simple_cases)
    EXPECT_EQ(0., parse_complete_double("2.4703282292062327208828439643411068618252990130716238221279284125033775363510437593264991818081799618989828234772285886546332835517796989819938739800539093906315035659515570226392290858392449105184435931802849936536152500319370457678249219365623669863658480757001585769269903706311928279558551332927834338409351978015531246597263579574622766465272827220056374006485499977096599470454020828166226237857393450736339007967761930577506740176324673600968951340535537458516661134223766678604162159680461914467291840300530057530849048765391711386591646239524912623653881879636239373280423891018672348497668235089863388587925628302755995657524455507255189313690836254779186948667994968324049705821028513185451396213837722826145437693412532098591327667236328124999e-324"sv));

 #define EXPECT_TO_PARSE_TO_VALUE_EQUAL_TO(expected_val, str)                                      \
-    EXPECT_EQ(bit_cast<u64>(expected_val), bit_cast<u64>(parse_complete_double(str##sv)));
+    do {                                                                                          \
+        EXPECT_EQ(bit_cast<u64>(expected_val), bit_cast<u64>(parse_complete_double(str##sv)));    \
+        EXPECT_EQ(bit_cast<u64>(expected_val), bit_cast<u64>(parse_complete_double(u##str##sv))); \
+    } while (false)

    EXPECT_TO_PARSE_TO_VALUE_EQUAL_TO(0., "1e-324");
    EXPECT_TO_PARSE_TO_VALUE_EQUAL_TO(-0., "-1e-324");
@ -241,12 +259,21 @@ TEST_CASE(simple_cases)
    EXPECT_TO_PARSE_TO_VALUE_EQUAL_TO(-0., "-.0e10");

 #define EXPECT_TO_PARSE_TO_INFINITY(str)                                                           \
+    do {                                                                                           \
        EXPECT_EQ(__builtin_huge_val(), parse_complete_double(str##sv));                           \
        EXPECT_EQ(__builtin_huge_val(), parse_complete_double("+" str##sv));                       \
        EXPECT_EQ(-__builtin_huge_val(), parse_complete_double("-" str##sv));                      \
        EXPECT_EQ(static_cast<float>(__builtin_huge_valf()), parse_complete_float(str##sv));       \
        EXPECT_EQ(static_cast<float>(__builtin_huge_valf()), parse_complete_float("+" str##sv));   \
-    EXPECT_EQ(static_cast<float>(-__builtin_huge_valf()), parse_complete_float("-" str##sv))
+        EXPECT_EQ(static_cast<float>(-__builtin_huge_valf()), parse_complete_float("-" str##sv));  \
+                                                                                                   \
+        EXPECT_EQ(__builtin_huge_val(), parse_complete_double(u##str##sv));                        \
+        EXPECT_EQ(__builtin_huge_val(), parse_complete_double(u"+" str##sv));                      \
+        EXPECT_EQ(-__builtin_huge_val(), parse_complete_double(u"-" str##sv));                     \
+        EXPECT_EQ(static_cast<float>(__builtin_huge_valf()), parse_complete_float(u##str##sv));    \
+        EXPECT_EQ(static_cast<float>(__builtin_huge_valf()), parse_complete_float(u"+" str##sv));  \
+        EXPECT_EQ(static_cast<float>(-__builtin_huge_valf()), parse_complete_float(u"-" str##sv)); \
+    } while (false)

    EXPECT_TO_PARSE_TO_INFINITY("123.456e789");
    EXPECT_TO_PARSE_TO_INFINITY("123456.456789e789");
@ -267,6 +294,11 @@ TEST_CASE(partial_parse_stops_at_right_spot)
        auto result = AK::parse_first_number<double>(string_value##sv);                            \
        VERIFY(result.has_value());                                                                \
        EXPECT_EQ(bit_cast<u64>(result->value), bit_cast<u64>(static_cast<double>(double_value))); \
+        EXPECT_EQ(result->characters_parsed, chars_parsed##uz);                                    \
+                                                                                                   \
+        result = AK::parse_first_number<double>(u##string_value##sv);                              \
+        VERIFY(result.has_value());                                                                \
+        EXPECT_EQ(bit_cast<u64>(result->value), bit_cast<u64>(static_cast<double>(double_value))); \
        EXPECT_EQ(result->characters_parsed, chars_parsed##uz);                                    \
    } while (false)

@ -287,8 +319,8 @@ TEST_CASE(invalid_parse)
 {
 #define EXPECT_PARSE_TO_FAIL(string_value)                                        \
    do {                                                                          \
-        auto result = AK::parse_first_number<double>(string_value##sv); \
-        EXPECT(!result.has_value());                                    \
+        EXPECT(!AK::parse_first_number<double>(string_value##sv).has_value());    \
+        EXPECT(!AK::parse_first_number<double>(u##string_value##sv).has_value()); \
    } while (false)

    EXPECT_PARSE_TO_FAIL("");
@ -331,6 +363,10 @@ TEST_CASE(detect_out_of_range_values)
    do {                                                                                           \
        auto result = AK::parse_first_number<double>(string_value##sv);                            \
        VERIFY(result.has_value());                                                                \
+        EXPECT_EQ(bit_cast<u64>(result->value), bit_cast<u64>(static_cast<double>(double_value))); \
+                                                                                                   \
+        result = AK::parse_first_number<double>(u##string_value##sv);                              \
+        VERIFY(result.has_value());                                                                \
        EXPECT_EQ(bit_cast<u64>(result->value), bit_cast<u64>(static_cast<double>(double_value))); \
    } while (false)

@ -340,15 +376,13 @@ TEST_CASE(detect_out_of_range_values)
    EXPECT_PARSE_TO_HAVE_ERROR("-10e10000", -INFINITY);
 }

-static bool parse_completely_passes(StringView view)
-{
-    return AK::parse_number<double>(view, TrimWhitespace::No).has_value();
-}
-
 TEST_CASE(parse_completely_must_be_just_floating_point)
 {
-#define EXPECT_PARSE_COMPLETELY_TO_FAIL(value) \
-    EXPECT(!parse_completely_passes(value##sv))
+#define EXPECT_PARSE_COMPLETELY_TO_FAIL(string_value)                                           \
+    do {                                                                                        \
+        EXPECT(!AK::parse_number<double>(string_value##sv, TrimWhitespace::No).has_value());    \
+        EXPECT(!AK::parse_number<double>(u##string_value##sv, TrimWhitespace::No).has_value()); \
+    } while (false)

    EXPECT_PARSE_COMPLETELY_TO_FAIL("");
    EXPECT_PARSE_COMPLETELY_TO_FAIL("-");