mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-29 20:29:18 +00:00
LibRegex: Account for uppercase characters in insensitive patterns
This commit is contained in:
parent
31e8189f9f
commit
5b45223d5f
Notes:
github-actions[bot]
2025-07-12 09:27:33 +00:00
Author: https://github.com/alimpfard
Commit: 5b45223d5f
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5403
Reviewed-by: https://github.com/gmta ✅
6 changed files with 73 additions and 31 deletions
|
@ -223,8 +223,11 @@ void Regex<Parser>::fill_optimization_data(BasicBlockList const& blocks)
|
|||
if (!compares.char_classes.is_empty() || !compares.negated_char_classes.is_empty() || !compares.negated_ranges.is_empty())
|
||||
return;
|
||||
|
||||
for (auto it = compares.ranges.begin(); it != compares.ranges.end(); ++it)
|
||||
for (auto it = compares.ranges.begin(); it != compares.ranges.end(); ++it) {
|
||||
parser_result.optimization_data.starting_ranges.append({ it.key(), *it });
|
||||
parser_result.optimization_data.starting_ranges_insensitive.append({ to_ascii_lowercase(it.key()), to_ascii_lowercase(*it) });
|
||||
quick_sort(parser_result.optimization_data.starting_ranges_insensitive, [](CharRange a, CharRange b) { return a.from < b.from; });
|
||||
}
|
||||
return;
|
||||
}
|
||||
case OpCodeId::CheckBegin:
|
||||
|
@ -1863,10 +1866,13 @@ void Optimizer::append_character_class(ByteCode& target, Vector<CompareTypeAndVa
|
|||
auto append_table = [&](auto& table) {
|
||||
++argument_count;
|
||||
arguments.append(to_underlying(CharacterCompareType::LookupTable));
|
||||
auto size_index = arguments.size();
|
||||
auto sensitive_size_index = arguments.size();
|
||||
auto insensitive_size_index = sensitive_size_index + 1;
|
||||
arguments.append(0);
|
||||
arguments.append(0);
|
||||
|
||||
Optional<CharRange> active_range;
|
||||
size_t range_count = 0;
|
||||
Vector<ByteCodeValueType> range_data;
|
||||
for (auto& range : table) {
|
||||
if (!active_range.has_value()) {
|
||||
active_range = range;
|
||||
|
@ -1876,16 +1882,25 @@ void Optimizer::append_character_class(ByteCode& target, Vector<CompareTypeAndVa
|
|||
if (range.from <= active_range->to + 1 && range.to + 1 >= active_range->from) {
|
||||
active_range = CharRange { min(range.from, active_range->from), max(range.to, active_range->to) };
|
||||
} else {
|
||||
++range_count;
|
||||
arguments.append(active_range.release_value());
|
||||
range_data.append(active_range.release_value());
|
||||
active_range = range;
|
||||
}
|
||||
}
|
||||
if (active_range.has_value()) {
|
||||
++range_count;
|
||||
arguments.append(active_range.release_value());
|
||||
if (active_range.has_value())
|
||||
range_data.append(active_range.release_value());
|
||||
arguments.extend(range_data);
|
||||
arguments[sensitive_size_index] = range_data.size();
|
||||
|
||||
if (!all_of(range_data, [](CharRange range) { return range.from == to_ascii_lowercase(range.from) && range.to == to_ascii_lowercase(range.to); })) {
|
||||
Vector<ByteCodeValueType> insensitive_data;
|
||||
insensitive_data.ensure_capacity(range_data.size());
|
||||
for (CharRange range : range_data)
|
||||
insensitive_data.append(CharRange { to_ascii_lowercase(range.from), to_ascii_lowercase(range.to) });
|
||||
quick_sort(insensitive_data, [](CharRange a, CharRange b) { return a.from < b.from; });
|
||||
|
||||
arguments.extend(insensitive_data);
|
||||
arguments[insensitive_size_index] = insensitive_data.size();
|
||||
}
|
||||
arguments[size_index] = range_count;
|
||||
};
|
||||
|
||||
auto contains_regular_table = !table.is_empty();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue