mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-30 20:59:16 +00:00
LibRegex: Don't repeat the same fork again
If some state has already been tried, skip over it as it would never lead to a match regardless. This fixes performance/memory issues in cases like /(a+)+b/.exec("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") or /(a|a?)+b/... Fixes #2622.
This commit is contained in:
parent
7ceeb85ba7
commit
cce000d57c
Notes:
github-actions[bot]
2025-01-17 09:35:15 +00:00
Author: https://github.com/alimpfard
Commit: cce000d57c
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/3261
3 changed files with 66 additions and 9 deletions
|
@ -561,6 +561,33 @@ struct MatchState {
|
|||
COWVector<Vector<Match>> capture_group_matches;
|
||||
COWVector<u64> repetition_marks;
|
||||
Vector<u64, 64> checkpoints;
|
||||
|
||||
// For size_t in {0..100}, ips in {0..500} and repetitions in {0..30}, there are zero collisions.
|
||||
// For the full range, zero collisions were found in 8 million random samples.
|
||||
u64 u64_hash() const
|
||||
{
|
||||
u64 hash = 0xcbf29ce484222325;
|
||||
auto combine = [&hash](auto value) {
|
||||
hash ^= value + 0x9e3779b97f4a7c15 + (hash << 6) + (hash >> 2);
|
||||
};
|
||||
auto combine_vector = [&hash](auto const& vector) {
|
||||
for (auto& value : vector) {
|
||||
hash ^= value;
|
||||
hash *= 0x100000001b3;
|
||||
}
|
||||
};
|
||||
|
||||
combine(string_position_before_match);
|
||||
combine(string_position);
|
||||
combine(string_position_in_code_units);
|
||||
combine(instruction_position);
|
||||
combine(fork_at_position);
|
||||
combine(initiating_fork.value_or(0) + initiating_fork.has_value());
|
||||
combine_vector(repetition_marks);
|
||||
combine_vector(checkpoints);
|
||||
|
||||
return hash;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue