mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-08-09 09:39:39 +00:00
LibRegex: Rename a couple of RegexStringView methods for clarity
`operator[]` -> `code_point_at` `code_unit_at` -> `unicode_aware_code_point_at` `unicode_aware_code_point_at` returns either a code point or a code unit depending on the Unicode flag.
This commit is contained in:
parent
2dfcc4c307
commit
81fc8ab8cc
Notes:
github-actions[bot]
2025-07-21 21:45:38 +00:00
Author: https://github.com/trflynn89
Commit: 81fc8ab8cc
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5550
Reviewed-by: https://github.com/alimpfard ✅
3 changed files with 24 additions and 24 deletions
|
@ -116,7 +116,7 @@ static void advance_string_position(MatchState& state, RegexStringView view, Opt
|
||||||
|
|
||||||
if (view.unicode()) {
|
if (view.unicode()) {
|
||||||
if (!code_point.has_value() && (state.string_position_in_code_units < view.length_in_code_units()))
|
if (!code_point.has_value() && (state.string_position_in_code_units < view.length_in_code_units()))
|
||||||
code_point = view[state.string_position_in_code_units];
|
code_point = view.code_point_at(state.string_position_in_code_units);
|
||||||
if (code_point.has_value())
|
if (code_point.has_value())
|
||||||
state.string_position_in_code_units += view.length_of_code_point(*code_point);
|
state.string_position_in_code_units += view.length_of_code_point(*code_point);
|
||||||
} else {
|
} else {
|
||||||
|
@ -282,7 +282,7 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckBegin::execute(MatchInput const& input
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (input.regex_options.has_flag_set(AllFlags::Multiline) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline)) {
|
if (input.regex_options.has_flag_set(AllFlags::Multiline) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline)) {
|
||||||
auto input_view = input.view.substring_view(state.string_position - 1, 1)[0];
|
auto input_view = input.view.substring_view(state.string_position - 1, 1).code_point_at(0);
|
||||||
return input_view == '\r' || input_view == '\n' || input_view == LineSeparator || input_view == ParagraphSeparator;
|
return input_view == '\r' || input_view == '\n' || input_view == LineSeparator || input_view == ParagraphSeparator;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -304,14 +304,14 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckBoundary::execute(MatchInput const& in
|
||||||
auto isword = [](auto ch) { return is_ascii_alphanumeric(ch) || ch == '_'; };
|
auto isword = [](auto ch) { return is_ascii_alphanumeric(ch) || ch == '_'; };
|
||||||
auto is_word_boundary = [&] {
|
auto is_word_boundary = [&] {
|
||||||
if (state.string_position == input.view.length()) {
|
if (state.string_position == input.view.length()) {
|
||||||
return (state.string_position > 0 && isword(input.view[state.string_position_in_code_units - 1]));
|
return (state.string_position > 0 && isword(input.view.code_point_at(state.string_position_in_code_units - 1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (state.string_position == 0) {
|
if (state.string_position == 0) {
|
||||||
return (isword(input.view[0]));
|
return (isword(input.view.code_point_at(0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
return !!(isword(input.view[state.string_position_in_code_units]) ^ isword(input.view[state.string_position_in_code_units - 1]));
|
return !!(isword(input.view.code_point_at(state.string_position_in_code_units)) ^ isword(input.view.code_point_at(state.string_position_in_code_units - 1)));
|
||||||
};
|
};
|
||||||
switch (type()) {
|
switch (type()) {
|
||||||
case BoundaryCheckType::Word: {
|
case BoundaryCheckType::Word: {
|
||||||
|
@ -335,7 +335,7 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckEnd::execute(MatchInput const& input,
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (input.regex_options.has_flag_set(AllFlags::Multiline) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline)) {
|
if (input.regex_options.has_flag_set(AllFlags::Multiline) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline)) {
|
||||||
auto input_view = input.view.substring_view(state.string_position, 1)[0];
|
auto input_view = input.view.substring_view(state.string_position, 1).code_point_at(0);
|
||||||
return input_view == '\r' || input_view == '\n' || input_view == LineSeparator || input_view == ParagraphSeparator;
|
return input_view == '\r' || input_view == '\n' || input_view == LineSeparator || input_view == ParagraphSeparator;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -488,7 +488,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
||||||
if (input.view.length() <= state.string_position)
|
if (input.view.length() <= state.string_position)
|
||||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||||
|
|
||||||
auto input_view = input.view.substring_view(state.string_position, 1)[0];
|
auto input_view = input.view.substring_view(state.string_position, 1).code_point_at(0);
|
||||||
auto is_equivalent_to_newline = input_view == '\n'
|
auto is_equivalent_to_newline = input_view == '\n'
|
||||||
|| (input.regex_options.has_flag_set(AllFlags::Internal_ECMA262DotSemantics)
|
|| (input.regex_options.has_flag_set(AllFlags::Internal_ECMA262DotSemantics)
|
||||||
? (input_view == '\r' || input_view == LineSeparator || input_view == ParagraphSeparator)
|
? (input_view == '\r' || input_view == LineSeparator || input_view == ParagraphSeparator)
|
||||||
|
@ -531,7 +531,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
||||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||||
|
|
||||||
auto character_class = (CharClass)m_bytecode->at(offset++);
|
auto character_class = (CharClass)m_bytecode->at(offset++);
|
||||||
auto ch = input.view.code_unit_at(state.string_position_in_code_units);
|
auto ch = input.view.unicode_aware_code_point_at(state.string_position_in_code_units);
|
||||||
|
|
||||||
compare_character_class(input, state, character_class, ch, current_inversion_state(), inverse_matched);
|
compare_character_class(input, state, character_class, ch, current_inversion_state(), inverse_matched);
|
||||||
break;
|
break;
|
||||||
|
@ -548,7 +548,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
||||||
offset += count_insensitive;
|
offset += count_insensitive;
|
||||||
|
|
||||||
bool const insensitive = input.regex_options & AllFlags::Insensitive;
|
bool const insensitive = input.regex_options & AllFlags::Insensitive;
|
||||||
auto ch = input.view.code_unit_at(state.string_position_in_code_units);
|
auto ch = input.view.unicode_aware_code_point_at(state.string_position_in_code_units);
|
||||||
|
|
||||||
if (insensitive)
|
if (insensitive)
|
||||||
ch = to_ascii_lowercase(ch);
|
ch = to_ascii_lowercase(ch);
|
||||||
|
@ -578,7 +578,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
||||||
|
|
||||||
auto from = value.from;
|
auto from = value.from;
|
||||||
auto to = value.to;
|
auto to = value.to;
|
||||||
auto ch = input.view.code_unit_at(state.string_position_in_code_units);
|
auto ch = input.view.unicode_aware_code_point_at(state.string_position_in_code_units);
|
||||||
|
|
||||||
compare_character_range(input, state, from, to, ch, current_inversion_state(), inverse_matched);
|
compare_character_range(input, state, from, to, ch, current_inversion_state(), inverse_matched);
|
||||||
break;
|
break;
|
||||||
|
@ -711,8 +711,8 @@ ALWAYS_INLINE void OpCode_Compare::compare_char(MatchInput const& input, MatchSt
|
||||||
|
|
||||||
// FIXME: Figure out how to do this if unicode() without performing a substring split first.
|
// FIXME: Figure out how to do this if unicode() without performing a substring split first.
|
||||||
auto input_view = input.view.unicode()
|
auto input_view = input.view.unicode()
|
||||||
? input.view.substring_view(state.string_position, 1)[0]
|
? input.view.substring_view(state.string_position, 1).code_point_at(0)
|
||||||
: input.view.code_unit_at(state.string_position_in_code_units);
|
: input.view.unicode_aware_code_point_at(state.string_position_in_code_units);
|
||||||
|
|
||||||
bool equal;
|
bool equal;
|
||||||
if (input.regex_options & AllFlags::Insensitive) {
|
if (input.regex_options & AllFlags::Insensitive) {
|
||||||
|
@ -753,7 +753,7 @@ ALWAYS_INLINE bool OpCode_Compare::compare_string(MatchInput const& input, Match
|
||||||
|
|
||||||
if (str.length() == 1) {
|
if (str.length() == 1) {
|
||||||
auto inverse_matched = false;
|
auto inverse_matched = false;
|
||||||
compare_char(input, state, str[0], false, inverse_matched);
|
compare_char(input, state, str.code_point_at(0), false, inverse_matched);
|
||||||
return !inverse_matched;
|
return !inverse_matched;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -843,7 +843,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_property(MatchInput const& input, Mat
|
||||||
if (state.string_position == input.view.length())
|
if (state.string_position == input.view.length())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
u32 code_point = input.view[state.string_position_in_code_units];
|
u32 code_point = input.view.code_point_at(state.string_position_in_code_units);
|
||||||
bool equal = Unicode::code_point_has_property(code_point, property);
|
bool equal = Unicode::code_point_has_property(code_point, property);
|
||||||
|
|
||||||
if (equal) {
|
if (equal) {
|
||||||
|
@ -859,7 +859,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_general_category(MatchInput const& in
|
||||||
if (state.string_position == input.view.length())
|
if (state.string_position == input.view.length())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
u32 code_point = input.view[state.string_position_in_code_units];
|
u32 code_point = input.view.code_point_at(state.string_position_in_code_units);
|
||||||
bool equal = Unicode::code_point_has_general_category(code_point, general_category);
|
bool equal = Unicode::code_point_has_general_category(code_point, general_category);
|
||||||
|
|
||||||
if (equal) {
|
if (equal) {
|
||||||
|
@ -875,7 +875,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_script(MatchInput const& input, Match
|
||||||
if (state.string_position == input.view.length())
|
if (state.string_position == input.view.length())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
u32 code_point = input.view[state.string_position_in_code_units];
|
u32 code_point = input.view.code_point_at(state.string_position_in_code_units);
|
||||||
bool equal = Unicode::code_point_has_script(code_point, script);
|
bool equal = Unicode::code_point_has_script(code_point, script);
|
||||||
|
|
||||||
if (equal) {
|
if (equal) {
|
||||||
|
@ -891,7 +891,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_script_extension(MatchInput const& in
|
||||||
if (state.string_position == input.view.length())
|
if (state.string_position == input.view.length())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
u32 code_point = input.view[state.string_position_in_code_units];
|
u32 code_point = input.view.code_point_at(state.string_position_in_code_units);
|
||||||
bool equal = Unicode::code_point_has_script_extension(code_point, script);
|
bool equal = Unicode::code_point_has_script_extension(code_point, script);
|
||||||
|
|
||||||
if (equal) {
|
if (equal) {
|
||||||
|
|
|
@ -201,25 +201,25 @@ public:
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: index must always be the code unit offset to return.
|
u32 code_point_at(size_t code_unit_index) const
|
||||||
u32 operator[](size_t index) const
|
|
||||||
{
|
{
|
||||||
return m_view.visit(
|
return m_view.visit(
|
||||||
[&](StringView view) -> u32 {
|
[&](StringView view) -> u32 {
|
||||||
auto ch = view[index];
|
auto ch = view[code_unit_index];
|
||||||
if constexpr (IsSigned<char>) {
|
if constexpr (IsSigned<char>) {
|
||||||
if (ch < 0)
|
if (ch < 0)
|
||||||
return 256u + ch;
|
return 256u + ch;
|
||||||
return ch;
|
return ch;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
[&](Utf16View const& view) -> u32 { return view.code_point_at(index); });
|
[&](Utf16View const& view) -> u32 { return view.code_point_at(code_unit_index); });
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 code_unit_at(size_t code_unit_index) const
|
// Returns the code point at the code unit offset if the Unicode flag is set. Otherwise, returns the code unit.
|
||||||
|
u32 unicode_aware_code_point_at(size_t code_unit_index) const
|
||||||
{
|
{
|
||||||
if (unicode())
|
if (unicode())
|
||||||
return operator[](code_unit_index);
|
return code_point_at(code_unit_index);
|
||||||
|
|
||||||
return m_view.visit(
|
return m_view.visit(
|
||||||
[&](StringView view) -> u32 {
|
[&](StringView view) -> u32 {
|
||||||
|
|
|
@ -291,7 +291,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
|
||||||
auto const insensitive = input.regex_options.has_flag_set(AllFlags::Insensitive);
|
auto const insensitive = input.regex_options.has_flag_set(AllFlags::Insensitive);
|
||||||
if (auto& starting_ranges = m_pattern->parser_result.optimization_data.starting_ranges; !starting_ranges.is_empty()) {
|
if (auto& starting_ranges = m_pattern->parser_result.optimization_data.starting_ranges; !starting_ranges.is_empty()) {
|
||||||
auto ranges = insensitive ? m_pattern->parser_result.optimization_data.starting_ranges_insensitive.span() : starting_ranges.span();
|
auto ranges = insensitive ? m_pattern->parser_result.optimization_data.starting_ranges_insensitive.span() : starting_ranges.span();
|
||||||
auto ch = input.view.code_unit_at(view_index);
|
auto ch = input.view.unicode_aware_code_point_at(view_index);
|
||||||
if (insensitive)
|
if (insensitive)
|
||||||
ch = to_ascii_lowercase(ch);
|
ch = to_ascii_lowercase(ch);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue