mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-08-09 09:39:39 +00:00
AK+Everywhere: Allow lonely UTF-16 surrogates by default
By definition, the web allows lonely surrogates by default. Let's have our string APIs reflect this, so we don't have to pass an allow option all over the place.
This commit is contained in:
parent
86b1c78c1a
commit
9fc3e72db2
Notes:
github-actions[bot]
2025-07-03 13:53:17 +00:00
Author: https://github.com/trflynn89
Commit: 9fc3e72db2
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5228
Reviewed-by: https://github.com/ADKaster ✅
Reviewed-by: https://github.com/shannonbooth
14 changed files with 74 additions and 77 deletions
|
@ -185,16 +185,16 @@ Utf8View Utf8View::trim(Utf8View const& characters, TrimMode mode) const
|
|||
return substring_view(substring_start, substring_length);
|
||||
}
|
||||
|
||||
bool Utf8View::validate(size_t& valid_bytes, AllowSurrogates allow_surrogates) const
|
||||
bool Utf8View::validate(size_t& valid_bytes, AllowLonelySurrogates allow_lonely_surrogates) const
|
||||
{
|
||||
auto result = simdutf::validate_utf8_with_errors(m_string.characters_without_null_termination(), m_string.length());
|
||||
valid_bytes = result.count;
|
||||
|
||||
if (result.error == simdutf::SURROGATE && allow_surrogates == AllowSurrogates::Yes) {
|
||||
if (result.error == simdutf::SURROGATE && allow_lonely_surrogates == AllowLonelySurrogates::Yes) {
|
||||
valid_bytes += 3; // All surrogates have a UTF-8 byte length of 3.
|
||||
|
||||
size_t substring_valid_bytes = 0;
|
||||
auto is_valid = substring_view(valid_bytes).validate(substring_valid_bytes, allow_surrogates);
|
||||
auto is_valid = substring_view(valid_bytes).validate(substring_valid_bytes, allow_lonely_surrogates);
|
||||
|
||||
valid_bytes += substring_valid_bytes;
|
||||
return is_valid;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue