diff --git a/AK/StringConversions.cpp b/AK/StringConversions.cpp index 5cc82b918f8..c9dfeaa214c 100644 --- a/AK/StringConversions.cpp +++ b/AK/StringConversions.cpp @@ -7,6 +7,7 @@ #include #include +#include #include @@ -29,27 +30,22 @@ namespace AK { __ENUMERATE_TYPE(float) \ __ENUMERATE_TYPE(double) -template -Optional> parse_first_number(StringView string, TrimWhitespace trim_whitespace, int base) +template +static constexpr Optional> from_chars(CharType const* string, size_t length, int base) { - if (trim_whitespace == TrimWhitespace::Yes) - string = StringUtils::trim_whitespace(string, TrimMode::Both); + ValueType value { 0 }; - auto const* begin = string.characters_without_null_termination(); - auto const* end = begin + string.length(); - T value { 0 }; - - fast_float::parse_options_t options; + fast_float::parse_options_t options; options.base = base; options.format |= fast_float::chars_format::no_infnan; - if constexpr (IsSigned || IsFloatingPoint) { + if constexpr (IsSigned || IsFloatingPoint) { options.format |= fast_float::chars_format::allow_leading_plus; } - auto result = fast_float::from_chars_advanced(begin, end, value, options); + auto result = fast_float::from_chars_advanced(string, string + length, value, options); - if constexpr (IsFloatingPoint) { + if constexpr (IsFloatingPoint) { if (result.ec == std::errc::result_out_of_range && (__builtin_isinf(value) || value == 0)) result.ec = {}; } @@ -57,7 +53,26 @@ Optional> parse_first_number(StringView string, TrimWh if (result.ec != std::errc {}) return {}; - return ParseFirstNumberResult { value, static_cast(result.ptr - begin) }; + return ParseFirstNumberResult { value, static_cast(result.ptr - string) }; +} + +template +Optional> parse_first_number(StringView string, TrimWhitespace trim_whitespace, int base) +{ + if (trim_whitespace == TrimWhitespace::Yes) + string = StringUtils::trim_whitespace(string, TrimMode::Both); + + return from_chars(string.characters_without_null_termination(), string.length(), base); +} + +template +Optional> parse_first_number(Utf16View const& string, TrimWhitespace trim_whitespace, int base) +{ + if (string.has_ascii_storage()) + return parse_first_number(string.bytes(), trim_whitespace, base); + + auto trimmed_string = trim_whitespace == TrimWhitespace::Yes ? string.trim_whitespace() : string; + return from_chars(trimmed_string.utf16_span().data(), trimmed_string.length_in_code_units(), base); } #define __ENUMERATE_TYPE(type) \ @@ -65,6 +80,11 @@ Optional> parse_first_number(StringView string, TrimWh ENUMERATE_ARITHMETIC_TYPES #undef __ENUMERATE_TYPE +#define __ENUMERATE_TYPE(type) \ + template Optional> parse_first_number(Utf16View const&, TrimWhitespace, int); +ENUMERATE_ARITHMETIC_TYPES +#undef __ENUMERATE_TYPE + template Optional parse_number(StringView string, TrimWhitespace trim_whitespace, int base) { @@ -81,20 +101,54 @@ Optional parse_number(StringView string, TrimWhitespace trim_whitespace, int return result->value; } +template +Optional parse_number(Utf16View const& string, TrimWhitespace trim_whitespace, int base) +{ + if (string.has_ascii_storage()) + return parse_number(string.bytes(), trim_whitespace, base); + + auto trimmed_string = trim_whitespace == TrimWhitespace::Yes ? string.trim_whitespace() : string; + + auto result = parse_first_number(trimmed_string, TrimWhitespace::No, base); + if (!result.has_value()) + return {}; + + if (result->characters_parsed != trimmed_string.length_in_code_units()) + return {}; + + return result->value; +} + #define __ENUMERATE_TYPE(type) \ template Optional parse_number(StringView, TrimWhitespace, int); ENUMERATE_ARITHMETIC_TYPES #undef __ENUMERATE_TYPE +#define __ENUMERATE_TYPE(type) \ + template Optional parse_number(Utf16View const&, TrimWhitespace, int); +ENUMERATE_ARITHMETIC_TYPES +#undef __ENUMERATE_TYPE + template Optional parse_hexadecimal_number(StringView string, TrimWhitespace trim_whitespace) { return parse_number(string, trim_whitespace, 16); } +template +Optional parse_hexadecimal_number(Utf16View const& string, TrimWhitespace trim_whitespace) +{ + return parse_number(string, trim_whitespace, 16); +} + #define __ENUMERATE_TYPE(type) \ template Optional parse_hexadecimal_number(StringView, TrimWhitespace); ENUMERATE_INTEGRAL_TYPES #undef __ENUMERATE_TYPE +#define __ENUMERATE_TYPE(type) \ + template Optional parse_hexadecimal_number(Utf16View const&, TrimWhitespace); +ENUMERATE_INTEGRAL_TYPES +#undef __ENUMERATE_TYPE + } diff --git a/AK/StringConversions.h b/AK/StringConversions.h index 72073511cac..49387e601cd 100644 --- a/AK/StringConversions.h +++ b/AK/StringConversions.h @@ -22,10 +22,19 @@ struct ParseFirstNumberResult { template Optional> parse_first_number(StringView, TrimWhitespace = TrimWhitespace::Yes, int base = 10); +template +Optional> parse_first_number(Utf16View const&, TrimWhitespace = TrimWhitespace::Yes, int base = 10); + template Optional parse_number(StringView, TrimWhitespace = TrimWhitespace::Yes, int base = 10); +template +Optional parse_number(Utf16View const&, TrimWhitespace = TrimWhitespace::Yes, int base = 10); + template Optional parse_hexadecimal_number(StringView, TrimWhitespace = TrimWhitespace::Yes); +template +Optional parse_hexadecimal_number(Utf16View const&, TrimWhitespace = TrimWhitespace::Yes); + } diff --git a/AK/Utf16FlyString.h b/AK/Utf16FlyString.h index da3881bca4c..aea83e3d849 100644 --- a/AK/Utf16FlyString.h +++ b/AK/Utf16FlyString.h @@ -78,6 +78,12 @@ public: return view().to_ascii_titlecase(); } + template + ALWAYS_INLINE Optional to_number(TrimWhitespace trim_whitespace = TrimWhitespace::Yes) const + { + return m_data.to_number(trim_whitespace); + } + ALWAYS_INLINE Utf16FlyString& operator=(Utf16String const& string) { *this = Utf16FlyString { string }; diff --git a/AK/Utf16StringBase.h b/AK/Utf16StringBase.h index 33d07ce7f5f..ef57b607710 100644 --- a/AK/Utf16StringBase.h +++ b/AK/Utf16StringBase.h @@ -88,6 +88,12 @@ public: StringView ascii_view() const&& = delete; Utf16View utf16_view() const&& = delete; + template + ALWAYS_INLINE Optional to_number(TrimWhitespace trim_whitespace = TrimWhitespace::Yes) const + { + return utf16_view().to_number(trim_whitespace); + } + ALWAYS_INLINE Utf16StringBase& operator=(Utf16StringBase const& other) { if (&other != this) { diff --git a/AK/Utf16View.h b/AK/Utf16View.h index 730611b1f97..49996a5bf5f 100644 --- a/AK/Utf16View.h +++ b/AK/Utf16View.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -210,6 +211,14 @@ public: return { m_string.utf16, length_in_code_units() }; } + template + ALWAYS_INLINE Optional to_number(TrimWhitespace trim_whitespace = TrimWhitespace::Yes) const + { + if (has_ascii_storage()) + return parse_number(bytes(), trim_whitespace); + return parse_number(*this, trim_whitespace); + } + [[nodiscard]] constexpr bool operator==(Utf16View const& other) const { if (length_in_code_units() != other.length_in_code_units()) diff --git a/Tests/AK/TestStringConversions.cpp b/Tests/AK/TestStringConversions.cpp index 06a21ca8f98..7f0c9415153 100644 --- a/Tests/AK/TestStringConversions.cpp +++ b/Tests/AK/TestStringConversions.cpp @@ -7,13 +7,16 @@ #include #include +#include -static double parse_complete_double(StringView view) +template +static double parse_complete_double(ViewType const& view) { return AK::parse_number(view, TrimWhitespace::No).release_value(); } -static float parse_complete_float(StringView view) +template +static float parse_complete_float(ViewType const& view) { return AK::parse_number(view, TrimWhitespace::No).release_value(); } @@ -21,22 +24,31 @@ static float parse_complete_float(StringView view) TEST_CASE(simple_cases) { -#define DOES_PARSE_DOUBLE_LIKE_CPP(value) \ - do { \ - EXPECT_EQ(static_cast(value), parse_complete_double(#value##sv)); \ - EXPECT_EQ(-static_cast(value), parse_complete_double("-" #value##sv)); \ +#define DOES_PARSE_DOUBLE_LIKE_CPP(value) \ + do { \ + EXPECT_EQ(static_cast(value), parse_complete_double(#value##sv)); \ + EXPECT_EQ(-static_cast(value), parse_complete_double("-" #value##sv)); \ + \ + EXPECT_EQ(static_cast(value), parse_complete_double(u"" #value##sv)); \ + EXPECT_EQ(-static_cast(value), parse_complete_double(u"-" #value##sv)); \ } while (false) #define DOES_PARSE_FLOAT_LIKE_CPP(value) \ do { \ float val = parse_complete_float(#value##sv); \ EXPECT_EQ(static_cast(value##f), val); \ + EXPECT_EQ(-static_cast(value##f), parse_complete_float("-" #value##sv)); \ + \ + val = parse_complete_float(u"" #value##sv); \ + EXPECT_EQ(static_cast(value##f), val); \ EXPECT_EQ(-static_cast(value##f), parse_complete_float("-" #value##sv)); \ } while (false) #define DOES_PARSE_FLOAT_AND_DOUBLE_LIKE_CPP(value) \ - DOES_PARSE_DOUBLE_LIKE_CPP(value); \ - DOES_PARSE_FLOAT_LIKE_CPP(value); + do { \ + DOES_PARSE_DOUBLE_LIKE_CPP(value); \ + DOES_PARSE_FLOAT_LIKE_CPP(value); \ + } while (false) DOES_PARSE_DOUBLE_LIKE_CPP(2.22507385850720138309e-308); @@ -212,10 +224,13 @@ TEST_CASE(simple_cases) DOES_PARSE_FLOAT_AND_DOUBLE_LIKE_CPP(8.589934335999999523162841796875e+09); DOES_PARSE_FLOAT_AND_DOUBLE_LIKE_CPP(0.09289376810193062); -#define DOES_PARSE_INT_LIKE_VALUE_LIKE_CPP(value) \ - do { \ - EXPECT_EQ(static_cast(value##.), parse_complete_double(#value##sv)); \ - EXPECT_EQ(-static_cast(value##.), parse_complete_double("-" #value##sv)); \ +#define DOES_PARSE_INT_LIKE_VALUE_LIKE_CPP(value) \ + do { \ + EXPECT_EQ(static_cast(value##.), parse_complete_double(#value##sv)); \ + EXPECT_EQ(-static_cast(value##.), parse_complete_double("-" #value##sv)); \ + \ + EXPECT_EQ(static_cast(value##.), parse_complete_double(u"" #value##sv)); \ + EXPECT_EQ(-static_cast(value##.), parse_complete_double(u"-" #value##sv)); \ } while (false) DOES_PARSE_INT_LIKE_VALUE_LIKE_CPP(0); @@ -230,8 +245,11 @@ TEST_CASE(simple_cases) EXPECT_EQ(0., parse_complete_double("2.4703282292062327208828439643411068618252990130716238221279284125033775363510437593264991818081799618989828234772285886546332835517796989819938739800539093906315035659515570226392290858392449105184435931802849936536152500319370457678249219365623669863658480757001585769269903706311928279558551332927834338409351978015531246597263579574622766465272827220056374006485499977096599470454020828166226237857393450736339007967761930577506740176324673600968951340535537458516661134223766678604162159680461914467291840300530057530849048765391711386591646239524912623653881879636239373280423891018672348497668235089863388587925628302755995657524455507255189313690836254779186948667994968324049705821028513185451396213837722826145437693412532098591327667236328125e-324"sv)); EXPECT_EQ(0., parse_complete_double("2.4703282292062327208828439643411068618252990130716238221279284125033775363510437593264991818081799618989828234772285886546332835517796989819938739800539093906315035659515570226392290858392449105184435931802849936536152500319370457678249219365623669863658480757001585769269903706311928279558551332927834338409351978015531246597263579574622766465272827220056374006485499977096599470454020828166226237857393450736339007967761930577506740176324673600968951340535537458516661134223766678604162159680461914467291840300530057530849048765391711386591646239524912623653881879636239373280423891018672348497668235089863388587925628302755995657524455507255189313690836254779186948667994968324049705821028513185451396213837722826145437693412532098591327667236328124999e-324"sv)); -#define EXPECT_TO_PARSE_TO_VALUE_EQUAL_TO(expected_val, str) \ - EXPECT_EQ(bit_cast(expected_val), bit_cast(parse_complete_double(str##sv))); +#define EXPECT_TO_PARSE_TO_VALUE_EQUAL_TO(expected_val, str) \ + do { \ + EXPECT_EQ(bit_cast(expected_val), bit_cast(parse_complete_double(str##sv))); \ + EXPECT_EQ(bit_cast(expected_val), bit_cast(parse_complete_double(u##str##sv))); \ + } while (false) EXPECT_TO_PARSE_TO_VALUE_EQUAL_TO(0., "1e-324"); EXPECT_TO_PARSE_TO_VALUE_EQUAL_TO(-0., "-1e-324"); @@ -240,13 +258,22 @@ TEST_CASE(simple_cases) EXPECT_TO_PARSE_TO_VALUE_EQUAL_TO(0., "+.0e10"); EXPECT_TO_PARSE_TO_VALUE_EQUAL_TO(-0., "-.0e10"); -#define EXPECT_TO_PARSE_TO_INFINITY(str) \ - EXPECT_EQ(__builtin_huge_val(), parse_complete_double(str##sv)); \ - EXPECT_EQ(__builtin_huge_val(), parse_complete_double("+" str##sv)); \ - EXPECT_EQ(-__builtin_huge_val(), parse_complete_double("-" str##sv)); \ - EXPECT_EQ(static_cast(__builtin_huge_valf()), parse_complete_float(str##sv)); \ - EXPECT_EQ(static_cast(__builtin_huge_valf()), parse_complete_float("+" str##sv)); \ - EXPECT_EQ(static_cast(-__builtin_huge_valf()), parse_complete_float("-" str##sv)) +#define EXPECT_TO_PARSE_TO_INFINITY(str) \ + do { \ + EXPECT_EQ(__builtin_huge_val(), parse_complete_double(str##sv)); \ + EXPECT_EQ(__builtin_huge_val(), parse_complete_double("+" str##sv)); \ + EXPECT_EQ(-__builtin_huge_val(), parse_complete_double("-" str##sv)); \ + EXPECT_EQ(static_cast(__builtin_huge_valf()), parse_complete_float(str##sv)); \ + EXPECT_EQ(static_cast(__builtin_huge_valf()), parse_complete_float("+" str##sv)); \ + EXPECT_EQ(static_cast(-__builtin_huge_valf()), parse_complete_float("-" str##sv)); \ + \ + EXPECT_EQ(__builtin_huge_val(), parse_complete_double(u##str##sv)); \ + EXPECT_EQ(__builtin_huge_val(), parse_complete_double(u"+" str##sv)); \ + EXPECT_EQ(-__builtin_huge_val(), parse_complete_double(u"-" str##sv)); \ + EXPECT_EQ(static_cast(__builtin_huge_valf()), parse_complete_float(u##str##sv)); \ + EXPECT_EQ(static_cast(__builtin_huge_valf()), parse_complete_float(u"+" str##sv)); \ + EXPECT_EQ(static_cast(-__builtin_huge_valf()), parse_complete_float(u"-" str##sv)); \ + } while (false) EXPECT_TO_PARSE_TO_INFINITY("123.456e789"); EXPECT_TO_PARSE_TO_INFINITY("123456.456789e789"); @@ -267,6 +294,11 @@ TEST_CASE(partial_parse_stops_at_right_spot) auto result = AK::parse_first_number(string_value##sv); \ VERIFY(result.has_value()); \ EXPECT_EQ(bit_cast(result->value), bit_cast(static_cast(double_value))); \ + EXPECT_EQ(result->characters_parsed, chars_parsed##uz); \ + \ + result = AK::parse_first_number(u##string_value##sv); \ + VERIFY(result.has_value()); \ + EXPECT_EQ(bit_cast(result->value), bit_cast(static_cast(double_value))); \ EXPECT_EQ(result->characters_parsed, chars_parsed##uz); \ } while (false) @@ -285,10 +317,10 @@ TEST_CASE(partial_parse_stops_at_right_spot) TEST_CASE(invalid_parse) { -#define EXPECT_PARSE_TO_FAIL(string_value) \ - do { \ - auto result = AK::parse_first_number(string_value##sv); \ - EXPECT(!result.has_value()); \ +#define EXPECT_PARSE_TO_FAIL(string_value) \ + do { \ + EXPECT(!AK::parse_first_number(string_value##sv).has_value()); \ + EXPECT(!AK::parse_first_number(u##string_value##sv).has_value()); \ } while (false) EXPECT_PARSE_TO_FAIL(""); @@ -331,6 +363,10 @@ TEST_CASE(detect_out_of_range_values) do { \ auto result = AK::parse_first_number(string_value##sv); \ VERIFY(result.has_value()); \ + EXPECT_EQ(bit_cast(result->value), bit_cast(static_cast(double_value))); \ + \ + result = AK::parse_first_number(u##string_value##sv); \ + VERIFY(result.has_value()); \ EXPECT_EQ(bit_cast(result->value), bit_cast(static_cast(double_value))); \ } while (false) @@ -340,15 +376,13 @@ TEST_CASE(detect_out_of_range_values) EXPECT_PARSE_TO_HAVE_ERROR("-10e10000", -INFINITY); } -static bool parse_completely_passes(StringView view) -{ - return AK::parse_number(view, TrimWhitespace::No).has_value(); -} - TEST_CASE(parse_completely_must_be_just_floating_point) { -#define EXPECT_PARSE_COMPLETELY_TO_FAIL(value) \ - EXPECT(!parse_completely_passes(value##sv)) +#define EXPECT_PARSE_COMPLETELY_TO_FAIL(string_value) \ + do { \ + EXPECT(!AK::parse_number(string_value##sv, TrimWhitespace::No).has_value()); \ + EXPECT(!AK::parse_number(u##string_value##sv, TrimWhitespace::No).has_value()); \ + } while (false) EXPECT_PARSE_COMPLETELY_TO_FAIL(""); EXPECT_PARSE_COMPLETELY_TO_FAIL("-");