AK: Implement UTF-16 string-to-number conversions

This commit is contained in:
Timothy Flynn 2025-06-27 13:56:17 -04:00 committed by Tim Flynn
commit d40e3af697
Notes: github-actions[bot] 2025-07-18 16:47:05 +00:00
6 changed files with 163 additions and 45 deletions

View file

@ -7,6 +7,7 @@
#include <AK/StringConversions.h>
#include <AK/StringView.h>
#include <AK/Utf16View.h>
#include <fast_float/fast_float.h>
@ -29,27 +30,22 @@ namespace AK {
__ENUMERATE_TYPE(float) \
__ENUMERATE_TYPE(double)
template<Arithmetic T>
Optional<ParseFirstNumberResult<T>> parse_first_number(StringView string, TrimWhitespace trim_whitespace, int base)
template<typename CharType, Arithmetic ValueType>
static constexpr Optional<ParseFirstNumberResult<ValueType>> from_chars(CharType const* string, size_t length, int base)
{
if (trim_whitespace == TrimWhitespace::Yes)
string = StringUtils::trim_whitespace(string, TrimMode::Both);
ValueType value { 0 };
auto const* begin = string.characters_without_null_termination();
auto const* end = begin + string.length();
T value { 0 };
fast_float::parse_options_t<char> options;
fast_float::parse_options_t<CharType> options;
options.base = base;
options.format |= fast_float::chars_format::no_infnan;
if constexpr (IsSigned<T> || IsFloatingPoint<T>) {
if constexpr (IsSigned<ValueType> || IsFloatingPoint<ValueType>) {
options.format |= fast_float::chars_format::allow_leading_plus;
}
auto result = fast_float::from_chars_advanced(begin, end, value, options);
auto result = fast_float::from_chars_advanced(string, string + length, value, options);
if constexpr (IsFloatingPoint<T>) {
if constexpr (IsFloatingPoint<ValueType>) {
if (result.ec == std::errc::result_out_of_range && (__builtin_isinf(value) || value == 0))
result.ec = {};
}
@ -57,7 +53,26 @@ Optional<ParseFirstNumberResult<T>> parse_first_number(StringView string, TrimWh
if (result.ec != std::errc {})
return {};
return ParseFirstNumberResult { value, static_cast<size_t>(result.ptr - begin) };
return ParseFirstNumberResult { value, static_cast<size_t>(result.ptr - string) };
}
template<Arithmetic T>
Optional<ParseFirstNumberResult<T>> parse_first_number(StringView string, TrimWhitespace trim_whitespace, int base)
{
if (trim_whitespace == TrimWhitespace::Yes)
string = StringUtils::trim_whitespace(string, TrimMode::Both);
return from_chars<char, T>(string.characters_without_null_termination(), string.length(), base);
}
template<Arithmetic T>
Optional<ParseFirstNumberResult<T>> parse_first_number(Utf16View const& string, TrimWhitespace trim_whitespace, int base)
{
if (string.has_ascii_storage())
return parse_first_number<T>(string.bytes(), trim_whitespace, base);
auto trimmed_string = trim_whitespace == TrimWhitespace::Yes ? string.trim_whitespace() : string;
return from_chars<char16_t, T>(trimmed_string.utf16_span().data(), trimmed_string.length_in_code_units(), base);
}
#define __ENUMERATE_TYPE(type) \
@ -65,6 +80,11 @@ Optional<ParseFirstNumberResult<T>> parse_first_number(StringView string, TrimWh
ENUMERATE_ARITHMETIC_TYPES
#undef __ENUMERATE_TYPE
#define __ENUMERATE_TYPE(type) \
template Optional<ParseFirstNumberResult<type>> parse_first_number(Utf16View const&, TrimWhitespace, int);
ENUMERATE_ARITHMETIC_TYPES
#undef __ENUMERATE_TYPE
template<Arithmetic T>
Optional<T> parse_number(StringView string, TrimWhitespace trim_whitespace, int base)
{
@ -81,20 +101,54 @@ Optional<T> parse_number(StringView string, TrimWhitespace trim_whitespace, int
return result->value;
}
template<Arithmetic T>
Optional<T> parse_number(Utf16View const& string, TrimWhitespace trim_whitespace, int base)
{
if (string.has_ascii_storage())
return parse_number<T>(string.bytes(), trim_whitespace, base);
auto trimmed_string = trim_whitespace == TrimWhitespace::Yes ? string.trim_whitespace() : string;
auto result = parse_first_number<T>(trimmed_string, TrimWhitespace::No, base);
if (!result.has_value())
return {};
if (result->characters_parsed != trimmed_string.length_in_code_units())
return {};
return result->value;
}
#define __ENUMERATE_TYPE(type) \
template Optional<type> parse_number(StringView, TrimWhitespace, int);
ENUMERATE_ARITHMETIC_TYPES
#undef __ENUMERATE_TYPE
#define __ENUMERATE_TYPE(type) \
template Optional<type> parse_number(Utf16View const&, TrimWhitespace, int);
ENUMERATE_ARITHMETIC_TYPES
#undef __ENUMERATE_TYPE
template<Integral T>
Optional<T> parse_hexadecimal_number(StringView string, TrimWhitespace trim_whitespace)
{
return parse_number<T>(string, trim_whitespace, 16);
}
template<Integral T>
Optional<T> parse_hexadecimal_number(Utf16View const& string, TrimWhitespace trim_whitespace)
{
return parse_number<T>(string, trim_whitespace, 16);
}
#define __ENUMERATE_TYPE(type) \
template Optional<type> parse_hexadecimal_number(StringView, TrimWhitespace);
ENUMERATE_INTEGRAL_TYPES
#undef __ENUMERATE_TYPE
#define __ENUMERATE_TYPE(type) \
template Optional<type> parse_hexadecimal_number(Utf16View const&, TrimWhitespace);
ENUMERATE_INTEGRAL_TYPES
#undef __ENUMERATE_TYPE
}

View file

@ -22,10 +22,19 @@ struct ParseFirstNumberResult {
template<Arithmetic T>
Optional<ParseFirstNumberResult<T>> parse_first_number(StringView, TrimWhitespace = TrimWhitespace::Yes, int base = 10);
template<Arithmetic T>
Optional<ParseFirstNumberResult<T>> parse_first_number(Utf16View const&, TrimWhitespace = TrimWhitespace::Yes, int base = 10);
template<Arithmetic T>
Optional<T> parse_number(StringView, TrimWhitespace = TrimWhitespace::Yes, int base = 10);
template<Arithmetic T>
Optional<T> parse_number(Utf16View const&, TrimWhitespace = TrimWhitespace::Yes, int base = 10);
template<Integral T>
Optional<T> parse_hexadecimal_number(StringView, TrimWhitespace = TrimWhitespace::Yes);
template<Integral T>
Optional<T> parse_hexadecimal_number(Utf16View const&, TrimWhitespace = TrimWhitespace::Yes);
}

View file

@ -78,6 +78,12 @@ public:
return view().to_ascii_titlecase();
}
template<Arithmetic T>
ALWAYS_INLINE Optional<T> to_number(TrimWhitespace trim_whitespace = TrimWhitespace::Yes) const
{
return m_data.to_number<T>(trim_whitespace);
}
ALWAYS_INLINE Utf16FlyString& operator=(Utf16String const& string)
{
*this = Utf16FlyString { string };

View file

@ -88,6 +88,12 @@ public:
StringView ascii_view() const&& = delete;
Utf16View utf16_view() const&& = delete;
template<Arithmetic T>
ALWAYS_INLINE Optional<T> to_number(TrimWhitespace trim_whitespace = TrimWhitespace::Yes) const
{
return utf16_view().to_number<T>(trim_whitespace);
}
ALWAYS_INLINE Utf16StringBase& operator=(Utf16StringBase const& other)
{
if (&other != this) {

View file

@ -14,6 +14,7 @@
#include <AK/Optional.h>
#include <AK/Span.h>
#include <AK/String.h>
#include <AK/StringConversions.h>
#include <AK/StringHash.h>
#include <AK/Traits.h>
#include <AK/Types.h>
@ -210,6 +211,14 @@ public:
return { m_string.utf16, length_in_code_units() };
}
template<Arithmetic T>
ALWAYS_INLINE Optional<T> to_number(TrimWhitespace trim_whitespace = TrimWhitespace::Yes) const
{
if (has_ascii_storage())
return parse_number<T>(bytes(), trim_whitespace);
return parse_number<T>(*this, trim_whitespace);
}
[[nodiscard]] constexpr bool operator==(Utf16View const& other) const
{
if (length_in_code_units() != other.length_in_code_units())

View file

@ -7,13 +7,16 @@
#include <LibTest/TestCase.h>
#include <AK/StringConversions.h>
#include <AK/Utf16View.h>
static double parse_complete_double(StringView view)
template<typename ViewType>
static double parse_complete_double(ViewType const& view)
{
return AK::parse_number<double>(view, TrimWhitespace::No).release_value();
}
static float parse_complete_float(StringView view)
template<typename ViewType>
static float parse_complete_float(ViewType const& view)
{
return AK::parse_number<float>(view, TrimWhitespace::No).release_value();
}
@ -25,18 +28,27 @@ TEST_CASE(simple_cases)
do { \
EXPECT_EQ(static_cast<double>(value), parse_complete_double(#value##sv)); \
EXPECT_EQ(-static_cast<double>(value), parse_complete_double("-" #value##sv)); \
\
EXPECT_EQ(static_cast<double>(value), parse_complete_double(u"" #value##sv)); \
EXPECT_EQ(-static_cast<double>(value), parse_complete_double(u"-" #value##sv)); \
} while (false)
#define DOES_PARSE_FLOAT_LIKE_CPP(value) \
do { \
float val = parse_complete_float(#value##sv); \
EXPECT_EQ(static_cast<float>(value##f), val); \
EXPECT_EQ(-static_cast<float>(value##f), parse_complete_float("-" #value##sv)); \
\
val = parse_complete_float(u"" #value##sv); \
EXPECT_EQ(static_cast<float>(value##f), val); \
EXPECT_EQ(-static_cast<float>(value##f), parse_complete_float("-" #value##sv)); \
} while (false)
#define DOES_PARSE_FLOAT_AND_DOUBLE_LIKE_CPP(value) \
do { \
DOES_PARSE_DOUBLE_LIKE_CPP(value); \
DOES_PARSE_FLOAT_LIKE_CPP(value);
DOES_PARSE_FLOAT_LIKE_CPP(value); \
} while (false)
DOES_PARSE_DOUBLE_LIKE_CPP(2.22507385850720138309e-308);
@ -216,6 +228,9 @@ TEST_CASE(simple_cases)
do { \
EXPECT_EQ(static_cast<double>(value##.), parse_complete_double(#value##sv)); \
EXPECT_EQ(-static_cast<double>(value##.), parse_complete_double("-" #value##sv)); \
\
EXPECT_EQ(static_cast<double>(value##.), parse_complete_double(u"" #value##sv)); \
EXPECT_EQ(-static_cast<double>(value##.), parse_complete_double(u"-" #value##sv)); \
} while (false)
DOES_PARSE_INT_LIKE_VALUE_LIKE_CPP(0);
@ -231,7 +246,10 @@ TEST_CASE(simple_cases)
EXPECT_EQ(0., parse_complete_double("2.4703282292062327208828439643411068618252990130716238221279284125033775363510437593264991818081799618989828234772285886546332835517796989819938739800539093906315035659515570226392290858392449105184435931802849936536152500319370457678249219365623669863658480757001585769269903706311928279558551332927834338409351978015531246597263579574622766465272827220056374006485499977096599470454020828166226237857393450736339007967761930577506740176324673600968951340535537458516661134223766678604162159680461914467291840300530057530849048765391711386591646239524912623653881879636239373280423891018672348497668235089863388587925628302755995657524455507255189313690836254779186948667994968324049705821028513185451396213837722826145437693412532098591327667236328124999e-324"sv));
#define EXPECT_TO_PARSE_TO_VALUE_EQUAL_TO(expected_val, str) \
EXPECT_EQ(bit_cast<u64>(expected_val), bit_cast<u64>(parse_complete_double(str##sv)));
do { \
EXPECT_EQ(bit_cast<u64>(expected_val), bit_cast<u64>(parse_complete_double(str##sv))); \
EXPECT_EQ(bit_cast<u64>(expected_val), bit_cast<u64>(parse_complete_double(u##str##sv))); \
} while (false)
EXPECT_TO_PARSE_TO_VALUE_EQUAL_TO(0., "1e-324");
EXPECT_TO_PARSE_TO_VALUE_EQUAL_TO(-0., "-1e-324");
@ -241,12 +259,21 @@ TEST_CASE(simple_cases)
EXPECT_TO_PARSE_TO_VALUE_EQUAL_TO(-0., "-.0e10");
#define EXPECT_TO_PARSE_TO_INFINITY(str) \
do { \
EXPECT_EQ(__builtin_huge_val(), parse_complete_double(str##sv)); \
EXPECT_EQ(__builtin_huge_val(), parse_complete_double("+" str##sv)); \
EXPECT_EQ(-__builtin_huge_val(), parse_complete_double("-" str##sv)); \
EXPECT_EQ(static_cast<float>(__builtin_huge_valf()), parse_complete_float(str##sv)); \
EXPECT_EQ(static_cast<float>(__builtin_huge_valf()), parse_complete_float("+" str##sv)); \
EXPECT_EQ(static_cast<float>(-__builtin_huge_valf()), parse_complete_float("-" str##sv))
EXPECT_EQ(static_cast<float>(-__builtin_huge_valf()), parse_complete_float("-" str##sv)); \
\
EXPECT_EQ(__builtin_huge_val(), parse_complete_double(u##str##sv)); \
EXPECT_EQ(__builtin_huge_val(), parse_complete_double(u"+" str##sv)); \
EXPECT_EQ(-__builtin_huge_val(), parse_complete_double(u"-" str##sv)); \
EXPECT_EQ(static_cast<float>(__builtin_huge_valf()), parse_complete_float(u##str##sv)); \
EXPECT_EQ(static_cast<float>(__builtin_huge_valf()), parse_complete_float(u"+" str##sv)); \
EXPECT_EQ(static_cast<float>(-__builtin_huge_valf()), parse_complete_float(u"-" str##sv)); \
} while (false)
EXPECT_TO_PARSE_TO_INFINITY("123.456e789");
EXPECT_TO_PARSE_TO_INFINITY("123456.456789e789");
@ -267,6 +294,11 @@ TEST_CASE(partial_parse_stops_at_right_spot)
auto result = AK::parse_first_number<double>(string_value##sv); \
VERIFY(result.has_value()); \
EXPECT_EQ(bit_cast<u64>(result->value), bit_cast<u64>(static_cast<double>(double_value))); \
EXPECT_EQ(result->characters_parsed, chars_parsed##uz); \
\
result = AK::parse_first_number<double>(u##string_value##sv); \
VERIFY(result.has_value()); \
EXPECT_EQ(bit_cast<u64>(result->value), bit_cast<u64>(static_cast<double>(double_value))); \
EXPECT_EQ(result->characters_parsed, chars_parsed##uz); \
} while (false)
@ -287,8 +319,8 @@ TEST_CASE(invalid_parse)
{
#define EXPECT_PARSE_TO_FAIL(string_value) \
do { \
auto result = AK::parse_first_number<double>(string_value##sv); \
EXPECT(!result.has_value()); \
EXPECT(!AK::parse_first_number<double>(string_value##sv).has_value()); \
EXPECT(!AK::parse_first_number<double>(u##string_value##sv).has_value()); \
} while (false)
EXPECT_PARSE_TO_FAIL("");
@ -331,6 +363,10 @@ TEST_CASE(detect_out_of_range_values)
do { \
auto result = AK::parse_first_number<double>(string_value##sv); \
VERIFY(result.has_value()); \
EXPECT_EQ(bit_cast<u64>(result->value), bit_cast<u64>(static_cast<double>(double_value))); \
\
result = AK::parse_first_number<double>(u##string_value##sv); \
VERIFY(result.has_value()); \
EXPECT_EQ(bit_cast<u64>(result->value), bit_cast<u64>(static_cast<double>(double_value))); \
} while (false)
@ -340,15 +376,13 @@ TEST_CASE(detect_out_of_range_values)
EXPECT_PARSE_TO_HAVE_ERROR("-10e10000", -INFINITY);
}
static bool parse_completely_passes(StringView view)
{
return AK::parse_number<double>(view, TrimWhitespace::No).has_value();
}
TEST_CASE(parse_completely_must_be_just_floating_point)
{
#define EXPECT_PARSE_COMPLETELY_TO_FAIL(value) \
EXPECT(!parse_completely_passes(value##sv))
#define EXPECT_PARSE_COMPLETELY_TO_FAIL(string_value) \
do { \
EXPECT(!AK::parse_number<double>(string_value##sv, TrimWhitespace::No).has_value()); \
EXPECT(!AK::parse_number<double>(u##string_value##sv, TrimWhitespace::No).has_value()); \
} while (false)
EXPECT_PARSE_COMPLETELY_TO_FAIL("");
EXPECT_PARSE_COMPLETELY_TO_FAIL("-");