From d02b763cd6d061c417f3c98ab6bed4eb45e49673 Mon Sep 17 00:00:00 2001 From: Tim Ledbetter Date: Sun, 1 Dec 2024 00:04:32 +0000 Subject: [PATCH] LibWeb: Add `parse_integer_digits` methods The rules for parsing integers don't specify an upper bound on the value that can be returned, so the `parse_integer_digits` method can be used to check whether the given arbitrarily-large StringView is valid according to these rules. The `parse_integer` and `parse_non_negative_integer` methods would fail for values larger than 2147483647 when they shouldn't have. --- Libraries/LibWeb/HTML/Numbers.cpp | 49 +++++++++++++++++++++++-------- Libraries/LibWeb/HTML/Numbers.h | 2 ++ 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/Libraries/LibWeb/HTML/Numbers.cpp b/Libraries/LibWeb/HTML/Numbers.cpp index 1b22418d449..0fb0821abae 100644 --- a/Libraries/LibWeb/HTML/Numbers.cpp +++ b/Libraries/LibWeb/HTML/Numbers.cpp @@ -12,7 +12,7 @@ namespace Web::HTML { // https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-integers -Optional parse_integer(StringView string) +Optional parse_integer_digits(StringView string) { // 1. Let input be the string being parsed. // 2. Let position be a pointer into input, initially pointing at the start of the string. @@ -26,7 +26,7 @@ Optional parse_integer(StringView string) // 5. If position is past the end of input, return an error. if (lexer.is_eof()) { - return {}; + return OptionalNone {}; } // 6. If the character indicated by position (the first character) is a U+002D HYPHEN-MINUS character (-): @@ -40,23 +40,33 @@ Optional parse_integer(StringView string) // 7. If the character indicated by position is not an ASCII digit, then return an error. if (!lexer.next_is(is_ascii_digit)) { - return {}; + return OptionalNone {}; } // 8. Collect a sequence of code points that are ASCII digits from input given position, and interpret the resulting sequence as a base-ten integer. Let value be that integer. + // NOTE: Integer conversion is performed by the caller. lexer.consume_while(is_ascii_digit); size_t end_index = lexer.tell(); auto digits = lexer.input().substring_view(start_index, end_index - start_index); - auto optional_value = AK::StringUtils::convert_to_int(digits); // 9. If sign is "positive", return value, otherwise return the result of subtracting value from zero. // NOTE: Skipped, see comment on step 6. - return optional_value; + return digits; +} + +// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-integers +Optional parse_integer(StringView string) +{ + auto optional_digits = parse_integer_digits(string); + if (!optional_digits.has_value()) + return {}; + + return optional_digits->to_number(TrimWhitespace::No); } // https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-non-negative-integers -Optional parse_non_negative_integer(StringView string) +Optional parse_non_negative_integer_digits(StringView string) { // 1. Let input be the string being parsed. // 2. Let value be the result of parsing input using the rules for parsing integers. @@ -64,19 +74,32 @@ Optional parse_non_negative_integer(StringView string) // NOTE: Because we call `parse_integer`, we parse all integers as signed. If we need the extra // size that an unsigned integer offers, then this would need to be improved. That said, // I don't think we need to support such large integers at the moment. - auto optional_value = parse_integer(string); + auto optional_integer_digits = parse_integer_digits(string); // 3. If value is an error, return an error. - if (!optional_value.has_value()) { - return {}; - } + if (!optional_integer_digits.has_value()) + return OptionalNone {}; // 4. If value is less than zero, return an error. - if (optional_value.value() < 0) { - return {}; - } + if (optional_integer_digits->length() > 1 && optional_integer_digits->starts_with('-') && optional_integer_digits->bytes().at(1) != '0') + return OptionalNone {}; // 5. Return value. + // NOTE: Integer conversion is performed by the caller. + return optional_integer_digits; +} + +// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-non-negative-integers +Optional parse_non_negative_integer(StringView string) +{ + auto optional_digits = parse_non_negative_integer_digits(string); + if (!optional_digits.has_value()) + return {}; + + auto optional_value = optional_digits->to_number(TrimWhitespace::No); + if (!optional_value.has_value() || *optional_value > NumericLimits::max()) + return {}; + return static_cast(optional_value.value()); } diff --git a/Libraries/LibWeb/HTML/Numbers.h b/Libraries/LibWeb/HTML/Numbers.h index a3b608e859a..52b57b367c2 100644 --- a/Libraries/LibWeb/HTML/Numbers.h +++ b/Libraries/LibWeb/HTML/Numbers.h @@ -14,8 +14,10 @@ namespace Web::HTML { Optional parse_integer(StringView string); +Optional parse_integer_digits(StringView string); Optional parse_non_negative_integer(StringView string); +Optional parse_non_negative_integer_digits(StringView string); Optional parse_floating_point_number(StringView string);