mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-30 12:49:19 +00:00
AK: Allow treating UTF-16 views with lonely surrogates as valid
Much of the web requires us to allow lonely surrogates in UTF-16 data. The default behavior to disallow such code units has not been changed here - that will be changed in an upcoming commit.
This commit is contained in:
parent
d978a582a0
commit
2abc955ca9
Notes:
github-actions[bot]
2025-07-03 13:53:41 +00:00
Author: https://github.com/trflynn89
Commit: 2abc955ca9
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5228
Reviewed-by: https://github.com/ADKaster ✅
Reviewed-by: https://github.com/shannonbooth
3 changed files with 84 additions and 38 deletions
|
@ -307,16 +307,31 @@ bool Utf16View::is_code_unit_less_than(Utf16View const& other) const
|
|||
return a.size() < b.size();
|
||||
}
|
||||
|
||||
bool Utf16View::validate() const
|
||||
bool Utf16View::validate(AllowInvalidCodeUnits allow_invalid_code_units) const
|
||||
{
|
||||
return simdutf::validate_utf16(char_data(), length_in_code_units());
|
||||
size_t valid_code_units = 0;
|
||||
return validate(valid_code_units, allow_invalid_code_units);
|
||||
}
|
||||
|
||||
bool Utf16View::validate(size_t& valid_code_units) const
|
||||
bool Utf16View::validate(size_t& valid_code_units, AllowInvalidCodeUnits allow_invalid_code_units) const
|
||||
{
|
||||
auto result = simdutf::validate_utf16_with_errors(char_data(), length_in_code_units());
|
||||
valid_code_units = result.count;
|
||||
return result.error == simdutf::SUCCESS;
|
||||
auto view = *this;
|
||||
valid_code_units = 0;
|
||||
|
||||
while (!view.is_empty()) {
|
||||
auto result = simdutf::validate_utf16_with_errors(view.char_data(), view.length_in_code_units());
|
||||
valid_code_units += result.count;
|
||||
|
||||
if (result.error == simdutf::SUCCESS)
|
||||
return true;
|
||||
if (allow_invalid_code_units == AllowInvalidCodeUnits::No || result.error != simdutf::SURROGATE)
|
||||
return false;
|
||||
|
||||
view = view.substring_view(result.count + 1);
|
||||
++valid_code_units;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t Utf16View::calculate_length_in_code_points() const
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue