mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-10-06 16:19:40 +00:00
LibWeb: Port the FormAssociatedElement value to UTF-16
Some checks are pending
CI / macOS, arm64, Sanitizer, Clang (push) Waiting to run
CI / Linux, x86_64, Fuzzers, Clang (push) Waiting to run
CI / Linux, x86_64, Sanitizer, GNU (push) Waiting to run
CI / Linux, x86_64, Sanitizer, Clang (push) Waiting to run
Package the js repl as a binary artifact / Linux, arm64 (push) Waiting to run
Package the js repl as a binary artifact / macOS, arm64 (push) Waiting to run
Package the js repl as a binary artifact / Linux, x86_64 (push) Waiting to run
Run test262 and test-wasm / run_and_update_results (push) Waiting to run
Lint Code / lint (push) Waiting to run
Label PRs with merge conflicts / auto-labeler (push) Waiting to run
Push notes / build (push) Waiting to run
Some checks are pending
CI / macOS, arm64, Sanitizer, Clang (push) Waiting to run
CI / Linux, x86_64, Fuzzers, Clang (push) Waiting to run
CI / Linux, x86_64, Sanitizer, GNU (push) Waiting to run
CI / Linux, x86_64, Sanitizer, Clang (push) Waiting to run
Package the js repl as a binary artifact / Linux, arm64 (push) Waiting to run
Package the js repl as a binary artifact / macOS, arm64 (push) Waiting to run
Package the js repl as a binary artifact / Linux, x86_64 (push) Waiting to run
Run test262 and test-wasm / run_and_update_results (push) Waiting to run
Lint Code / lint (push) Waiting to run
Label PRs with merge conflicts / auto-labeler (push) Waiting to run
Push notes / build (push) Waiting to run
This porting effort makes it pretty clear we will want a UTF-16-aware GenericLexer. But for now, we can actually make ASCII assumptions about what we are parsing, and act accordingly.
This commit is contained in:
parent
e7b08cf291
commit
c8888609f4
Notes:
github-actions[bot]
2025-07-28 10:26:29 +00:00
Author: https://github.com/trflynn89
Commit: c8888609f4
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5610
30 changed files with 444 additions and 333 deletions
|
@ -33,30 +33,33 @@ u32 week_number_of_the_last_day(u64 year)
|
|||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#valid-week-string
|
||||
bool is_valid_week_string(StringView value)
|
||||
bool is_valid_week_string(Utf16View const& value)
|
||||
{
|
||||
// A string is a valid week string representing a week-year year and week week if it consists of the following components in the given order:
|
||||
|
||||
// 1. Four or more ASCII digits, representing year, where year > 0
|
||||
// 2. A U+002D HYPHEN-MINUS character (-)
|
||||
// 3. A U+0057 LATIN CAPITAL LETTER W character (W)
|
||||
// 4. Two ASCII digits, representing the week week, in the range 1 ≤ week ≤ maxweek, where maxweek is the week number of the last day of week-year year
|
||||
// 4. Two ASCII digits, representing the week week, in the range 1 ≤ week ≤ maxweek, where maxweek is the week number
|
||||
// of the last day of week-year year
|
||||
auto parts = value.split_view('-', SplitBehavior::KeepEmpty);
|
||||
if (parts.size() != 2)
|
||||
return false;
|
||||
if (parts[0].length() < 4)
|
||||
|
||||
if (parts[0].length_in_code_units() < 4)
|
||||
return false;
|
||||
if (parts[1].length_in_code_units() != 3)
|
||||
return false;
|
||||
|
||||
for (auto digit : parts[0])
|
||||
if (!is_ascii_digit(digit))
|
||||
return false;
|
||||
if (parts[1].length() != 3)
|
||||
return false;
|
||||
|
||||
if (!parts[1].starts_with('W'))
|
||||
if (!parts[1].starts_with("W"sv))
|
||||
return false;
|
||||
if (!is_ascii_digit(parts[1][1]))
|
||||
if (!is_ascii_digit(parts[1].code_unit_at(1)))
|
||||
return false;
|
||||
if (!is_ascii_digit(parts[1][2]))
|
||||
if (!is_ascii_digit(parts[1].code_unit_at(2)))
|
||||
return false;
|
||||
|
||||
u64 year = 0;
|
||||
|
@ -64,13 +67,13 @@ bool is_valid_week_string(StringView value)
|
|||
year *= 10;
|
||||
year += parse_ascii_digit(d);
|
||||
}
|
||||
auto week = (parse_ascii_digit(parts[1][1]) * 10) + parse_ascii_digit(parts[1][2]);
|
||||
|
||||
auto week = (parse_ascii_digit(parts[1].code_unit_at(1)) * 10) + parse_ascii_digit(parts[1].code_unit_at(2));
|
||||
return week >= 1 && week <= week_number_of_the_last_day(year);
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#valid-month-string
|
||||
bool is_valid_month_string(StringView value)
|
||||
bool is_valid_month_string(Utf16View const& value)
|
||||
{
|
||||
// A string is a valid month string representing a year year and month month if it consists of the following components in the given order:
|
||||
|
||||
|
@ -82,40 +85,42 @@ bool is_valid_month_string(StringView value)
|
|||
if (parts.size() != 2)
|
||||
return false;
|
||||
|
||||
if (parts[0].length() < 4)
|
||||
if (parts[0].length_in_code_units() < 4)
|
||||
return false;
|
||||
if (parts[1].length_in_code_units() != 2)
|
||||
return false;
|
||||
|
||||
for (auto digit : parts[0])
|
||||
if (!is_ascii_digit(digit))
|
||||
return false;
|
||||
|
||||
if (parts[1].length() != 2)
|
||||
if (!is_ascii_digit(parts[1].code_unit_at(0)))
|
||||
return false;
|
||||
if (!is_ascii_digit(parts[1].code_unit_at(1)))
|
||||
return false;
|
||||
|
||||
if (!is_ascii_digit(parts[1][0]))
|
||||
return false;
|
||||
if (!is_ascii_digit(parts[1][1]))
|
||||
return false;
|
||||
|
||||
auto month = (parse_ascii_digit(parts[1][0]) * 10) + parse_ascii_digit(parts[1][1]);
|
||||
auto month = (parse_ascii_digit(parts[1].code_unit_at(0)) * 10) + parse_ascii_digit(parts[1].code_unit_at(1));
|
||||
return month >= 1 && month <= 12;
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#valid-date-string
|
||||
bool is_valid_date_string(StringView value)
|
||||
bool is_valid_date_string(Utf16View const& value)
|
||||
{
|
||||
// A string is a valid date string representing a year year, month month, and day day if it consists of the following components in the given order:
|
||||
|
||||
// 1. A valid month string, representing year and month
|
||||
// 2. A U+002D HYPHEN-MINUS character (-)
|
||||
// 3. Two ASCII digits, representing day, in the range 1 ≤ day ≤ maxday where maxday is the number of days in the month month and year year
|
||||
// 3. Two ASCII digits, representing day, in the range 1 ≤ day ≤ maxday where maxday is the number of days in the
|
||||
// month month and year year
|
||||
auto parts = value.split_view('-', SplitBehavior::KeepEmpty);
|
||||
if (parts.size() != 3)
|
||||
return false;
|
||||
|
||||
if (!is_valid_month_string(ByteString::formatted("{}-{}", parts[0], parts[1])))
|
||||
auto month_string = value.substring_view(0, parts[0].length_in_code_units() + 1 + parts[1].length_in_code_units());
|
||||
if (!is_valid_month_string(month_string))
|
||||
return false;
|
||||
|
||||
if (parts[2].length() != 2)
|
||||
if (parts[2].length_in_code_units() != 2)
|
||||
return false;
|
||||
|
||||
i64 year = 0;
|
||||
|
@ -123,18 +128,20 @@ bool is_valid_date_string(StringView value)
|
|||
year *= 10;
|
||||
year += parse_ascii_digit(d);
|
||||
}
|
||||
auto month = (parse_ascii_digit(parts[1][0]) * 10) + parse_ascii_digit(parts[1][1]);
|
||||
i64 day = (parse_ascii_digit(parts[2][0]) * 10) + parse_ascii_digit(parts[2][1]);
|
||||
|
||||
auto month = (parse_ascii_digit(parts[1].code_unit_at(0)) * 10) + parse_ascii_digit(parts[1].code_unit_at(1));
|
||||
i64 day = (parse_ascii_digit(parts[2].code_unit_at(0)) * 10) + parse_ascii_digit(parts[2].code_unit_at(1));
|
||||
|
||||
return day >= 1 && day <= AK::days_in_month(year, month);
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#valid-local-date-and-time-string
|
||||
bool is_valid_local_date_and_time_string(StringView value)
|
||||
bool is_valid_local_date_and_time_string(Utf16View const& value)
|
||||
{
|
||||
auto parts_split_by_T = value.split_view('T', SplitBehavior::KeepEmpty);
|
||||
if (parts_split_by_T.size() == 2)
|
||||
return is_valid_date_string(parts_split_by_T[0]) && is_valid_time_string(parts_split_by_T[1]);
|
||||
|
||||
auto parts_split_by_space = value.split_view(' ', SplitBehavior::KeepEmpty);
|
||||
if (parts_split_by_space.size() == 2)
|
||||
return is_valid_date_string(parts_split_by_space[0]) && is_valid_time_string(parts_split_by_space[1]);
|
||||
|
@ -143,11 +150,11 @@ bool is_valid_local_date_and_time_string(StringView value)
|
|||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#valid-normalised-local-date-and-time-string
|
||||
String normalize_local_date_and_time_string(String const& value)
|
||||
Utf16String normalize_local_date_and_time_string(Utf16String const& value)
|
||||
{
|
||||
if (auto spaces = value.count(" "sv); spaces > 0) {
|
||||
VERIFY(spaces == 1);
|
||||
return MUST(value.replace(" "sv, "T"sv, ReplaceMode::FirstOnly));
|
||||
return value.replace(" "sv, "T"sv, ReplaceMode::FirstOnly);
|
||||
}
|
||||
|
||||
VERIFY(value.count("T"sv) == 1);
|
||||
|
@ -155,7 +162,7 @@ String normalize_local_date_and_time_string(String const& value)
|
|||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#valid-time-string
|
||||
bool is_valid_time_string(StringView value)
|
||||
bool is_valid_time_string(Utf16View const& value)
|
||||
{
|
||||
// A string is a valid time string representing an hour hour, a minute minute, and a second second if it consists of the following components in the given order:
|
||||
|
||||
|
@ -163,49 +170,59 @@ bool is_valid_time_string(StringView value)
|
|||
// 2. A U+003A COLON character (:)
|
||||
// 3. Two ASCII digits, representing minute, in the range 0 ≤ minute ≤ 59
|
||||
// 4. If second is nonzero, or optionally if second is zero:
|
||||
// 1. A U+003A COLON character (:)
|
||||
// 2. Two ASCII digits, representing the integer part of second, in the range 0 ≤ s ≤ 59
|
||||
// 3. If second is not an integer, or optionally if second is an integer:
|
||||
// 1. A U+002E FULL STOP character (.)
|
||||
// 2. One, two, or three ASCII digits, representing the fractional part of second
|
||||
// 1. A U+003A COLON character (:)
|
||||
// 2. Two ASCII digits, representing the integer part of second, in the range 0 ≤ s ≤ 59
|
||||
// 3. If second is not an integer, or optionally if second is an integer:
|
||||
// 1. A U+002E FULL STOP character (.)
|
||||
// 2. One, two, or three ASCII digits, representing the fractional part of second
|
||||
auto parts = value.split_view(':', SplitBehavior::KeepEmpty);
|
||||
if (parts.size() != 2 && parts.size() != 3)
|
||||
return false;
|
||||
if (parts[0].length() != 2)
|
||||
|
||||
if (parts[0].length_in_code_units() != 2)
|
||||
return false;
|
||||
if (!(is_ascii_digit(parts[0][0]) && is_ascii_digit(parts[0][1])))
|
||||
if (parts[1].length_in_code_units() != 2)
|
||||
return false;
|
||||
auto hour = (parse_ascii_digit(parts[0][0]) * 10) + parse_ascii_digit(parts[0][1]);
|
||||
|
||||
if (!is_ascii_digit(parts[0].code_unit_at(0)) || !is_ascii_digit(parts[0].code_unit_at(1)))
|
||||
return false;
|
||||
|
||||
auto hour = (parse_ascii_digit(parts[0].code_unit_at(0)) * 10) + parse_ascii_digit(parts[0].code_unit_at(1));
|
||||
if (hour > 23)
|
||||
return false;
|
||||
if (parts[1].length() != 2)
|
||||
|
||||
if (!is_ascii_digit(parts[1].code_unit_at(0)) || !is_ascii_digit(parts[1].code_unit_at(1)))
|
||||
return false;
|
||||
if (!(is_ascii_digit(parts[1][0]) && is_ascii_digit(parts[1][1])))
|
||||
return false;
|
||||
auto minute = (parse_ascii_digit(parts[1][0]) * 10) + parse_ascii_digit(parts[1][1]);
|
||||
|
||||
auto minute = (parse_ascii_digit(parts[1].code_unit_at(0)) * 10) + parse_ascii_digit(parts[1].code_unit_at(1));
|
||||
if (minute > 59)
|
||||
return false;
|
||||
if (parts.size() == 2)
|
||||
return true;
|
||||
|
||||
if (parts[2].length() < 2)
|
||||
return false;
|
||||
if (!(is_ascii_digit(parts[2][0]) && is_ascii_digit(parts[2][1])))
|
||||
return false;
|
||||
auto second = (parse_ascii_digit(parts[2][0]) * 10) + parse_ascii_digit(parts[2][1]);
|
||||
if (second > 59)
|
||||
return false;
|
||||
if (parts[2].length() == 2)
|
||||
return true;
|
||||
auto second_parts = parts[2].split_view('.', SplitBehavior::KeepEmpty);
|
||||
if (second_parts.size() != 2)
|
||||
return false;
|
||||
if (second_parts[1].length() < 1 || second_parts[1].length() > 3)
|
||||
return false;
|
||||
for (auto digit : second_parts[1])
|
||||
if (!is_ascii_digit(digit))
|
||||
if (parts.size() == 3) {
|
||||
if (parts[2].length_in_code_units() < 2)
|
||||
return false;
|
||||
|
||||
if (!is_ascii_digit(parts[2].code_unit_at(0)) || !is_ascii_digit(parts[2].code_unit_at(1)))
|
||||
return false;
|
||||
|
||||
auto second = (parse_ascii_digit(parts[2].code_unit_at(0)) * 10) + parse_ascii_digit(parts[2].code_unit_at(1));
|
||||
if (second > 59)
|
||||
return false;
|
||||
|
||||
if (parts[2].length_in_code_units() > 2) {
|
||||
auto fractional = parts[2].split_view('.', SplitBehavior::KeepEmpty);
|
||||
if (fractional.size() != 2)
|
||||
return false;
|
||||
|
||||
if (fractional[1].length_in_code_units() < 1 || fractional[1].length_in_code_units() > 3)
|
||||
return false;
|
||||
|
||||
for (auto digit : fractional[1])
|
||||
if (!is_ascii_digit(digit))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue