mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-09-28 12:18:56 +00:00
AK+LibJS: Extract some UTF-16 helpers for use in an outside class
An upcoming Utf16String will need access to these helpers. Let's make them publicly available.
This commit is contained in:
parent
b6dc5050d2
commit
66006d3812
Notes:
github-actions[bot]
2025-07-03 13:54:12 +00:00
Author: https://github.com/trflynn89
Commit: 66006d3812
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5228
Reviewed-by: https://github.com/ADKaster ✅
Reviewed-by: https://github.com/shannonbooth
10 changed files with 121 additions and 86 deletions
|
@ -9,13 +9,14 @@
|
|||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include "Parser.h"
|
||||
#include <AK/Array.h>
|
||||
#include <AK/CharacterTypes.h>
|
||||
#include <AK/HashTable.h>
|
||||
#include <AK/ScopeGuard.h>
|
||||
#include <AK/StdLibExtras.h>
|
||||
#include <AK/TemporaryChange.h>
|
||||
#include <AK/UnicodeUtils.h>
|
||||
#include <LibJS/Parser.h>
|
||||
#include <LibJS/Runtime/RegExpObject.h>
|
||||
#include <LibRegex/Regex.h>
|
||||
|
||||
|
@ -4601,7 +4602,7 @@ FlyString Parser::consume_string_value()
|
|||
Utf8View view { value.bytes_as_string_view().substring_view(value.bytes().size() - 3) };
|
||||
VERIFY(view.length() <= 3);
|
||||
auto codepoint = *view.begin();
|
||||
if (Utf16View::is_high_surrogate(codepoint)) {
|
||||
if (AK::UnicodeUtils::is_utf16_high_surrogate(codepoint)) {
|
||||
syntax_error("StringValue ending with unpaired high surrogate"_string);
|
||||
VERIFY(view.length() == 1);
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <AK/CharacterTypes.h>
|
||||
#include <AK/FlyString.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <AK/UnicodeUtils.h>
|
||||
#include <AK/Utf16View.h>
|
||||
#include <AK/Utf8View.h>
|
||||
#include <LibJS/Runtime/AbstractOperations.h>
|
||||
|
@ -308,7 +309,7 @@ void RopeString::resolve(EncodingPreference preference) const
|
|||
auto high_surrogate = *Utf8View(previous_string_as_utf8.substring_view(previous_string_as_utf8.length() - 3)).begin();
|
||||
auto low_surrogate = *Utf8View(current_string_as_utf8).begin();
|
||||
|
||||
if (!Utf16View::is_high_surrogate(high_surrogate) || !Utf16View::is_low_surrogate(low_surrogate)) {
|
||||
if (!AK::UnicodeUtils::is_utf16_high_surrogate(high_surrogate) || !AK::UnicodeUtils::is_utf16_low_surrogate(low_surrogate)) {
|
||||
builder.append(current_string_as_utf8);
|
||||
previous = current;
|
||||
continue;
|
||||
|
@ -316,7 +317,7 @@ void RopeString::resolve(EncodingPreference preference) const
|
|||
|
||||
// Remove 3 bytes from the builder and replace them with the UTF-8 encoded code point.
|
||||
builder.trim(3);
|
||||
builder.append_code_point(Utf16View::decode_surrogate_pair(high_surrogate, low_surrogate));
|
||||
builder.append_code_point(AK::UnicodeUtils::decode_utf16_surrogate_pair(high_surrogate, low_surrogate));
|
||||
|
||||
// Append the remaining part of the current string.
|
||||
builder.append(current_string_as_utf8.substring_view(3));
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*/
|
||||
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <AK/UnicodeUtils.h>
|
||||
#include <AK/Utf16View.h>
|
||||
#include <AK/Utf32View.h>
|
||||
#include <LibJS/Runtime/AbstractOperations.h>
|
||||
#include <LibJS/Runtime/Array.h>
|
||||
#include <LibJS/Runtime/Error.h>
|
||||
|
@ -129,7 +129,9 @@ JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_code_point)
|
|||
return vm.throw_completion<RangeError>(ErrorType::InvalidCodePoint, next_code_point.to_string_without_side_effects());
|
||||
|
||||
// d. Set result to the string-concatenation of result and UTF16EncodeCodePoint(ℝ(nextCP)).
|
||||
MUST(code_point_to_utf16(string, static_cast<u32>(code_point)));
|
||||
(void)AK::UnicodeUtils::code_point_to_utf16(static_cast<u32>(code_point), [&](auto code_unit) {
|
||||
string.append(code_unit);
|
||||
});
|
||||
}
|
||||
|
||||
// 3. Assert: If codePoints is empty, then result is the empty String.
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <AK/Checked.h>
|
||||
#include <AK/Function.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <AK/UnicodeUtils.h>
|
||||
#include <AK/Utf16View.h>
|
||||
#include <LibGC/Heap.h>
|
||||
#include <LibJS/Runtime/AbstractOperations.h>
|
||||
|
@ -121,7 +122,7 @@ CodePoint code_point_at(Utf16View const& string, size_t position)
|
|||
}
|
||||
|
||||
// 6. If first is a trailing surrogate or position + 1 = size, then
|
||||
if (Utf16View::is_low_surrogate(first) || (position + 1 == string.length_in_code_units())) {
|
||||
if (AK::UnicodeUtils::is_utf16_low_surrogate(first) || (position + 1 == string.length_in_code_units())) {
|
||||
// a. Return the Record { [[CodePoint]]: cp, [[CodeUnitCount]]: 1, [[IsUnpairedSurrogate]]: true }.
|
||||
return { true, code_point, 1 };
|
||||
}
|
||||
|
@ -130,13 +131,13 @@ CodePoint code_point_at(Utf16View const& string, size_t position)
|
|||
auto second = string.code_unit_at(position + 1);
|
||||
|
||||
// 8. If second is not a trailing surrogate, then
|
||||
if (!Utf16View::is_low_surrogate(second)) {
|
||||
if (!AK::UnicodeUtils::is_utf16_low_surrogate(second)) {
|
||||
// a. Return the Record { [[CodePoint]]: cp, [[CodeUnitCount]]: 1, [[IsUnpairedSurrogate]]: true }.
|
||||
return { true, code_point, 1 };
|
||||
}
|
||||
|
||||
// 9. Set cp to UTF16SurrogatePairToCodePoint(first, second).
|
||||
code_point = Utf16View::decode_surrogate_pair(first, second);
|
||||
code_point = AK::UnicodeUtils::decode_utf16_surrogate_pair(first, second);
|
||||
|
||||
// 10. Return the Record { [[CodePoint]]: cp, [[CodeUnitCount]]: 2, [[IsUnpairedSurrogate]]: false }.
|
||||
return { false, code_point, 2 };
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue