LibRegex: Avoid hashing the state hashes again

We already had a really nice hash that had a single issue, this commit
fixes that and makes it *the* hash for the hash table, so we avoid
double-hashing and making a long chain.
This is an easy 10% perf gain.
This commit is contained in:
Ali Mohammad Pur 2025-04-18 12:28:45 +02:00
parent a688902782
commit 8312aa9ee6
2 changed files with 12 additions and 4 deletions

View file

@ -418,7 +418,8 @@ struct MatchState {
auto combine = [&hash](auto value) {
hash ^= value + 0x9e3779b97f4a7c15 + (hash << 6) + (hash >> 2);
};
auto combine_vector = [&hash](auto const& vector) {
auto combine_vector = [&hash](auto const& vector, auto tag) {
hash ^= tag * (vector.size() + 1);
for (auto& value : vector) {
hash ^= value;
hash *= 0x100000001b3;
@ -431,8 +432,8 @@ struct MatchState {
combine(instruction_position);
combine(fork_at_position);
combine(initiating_fork.value_or(0) + initiating_fork.has_value());
combine_vector(repetition_marks);
combine_vector(checkpoints);
combine_vector(repetition_marks, 0xbeefbeefbeefbeef);
combine_vector(checkpoints, 0xfacefacefaceface);
return hash;
}

View file

@ -467,11 +467,18 @@ private:
Node* m_last { nullptr };
};
struct SufficientlyUniformValueTraits : DefaultTraits<u64> {
static constexpr unsigned hash(u64 value)
{
return (value >> 32) ^ value;
}
};
template<class Parser>
bool Matcher<Parser>::execute(MatchInput const& input, MatchState& state, size_t& operations) const
{
BumpAllocatedLinkedList<MatchState> states_to_try_next;
HashTable<u64> seen_state_hashes;
HashTable<u64, SufficientlyUniformValueTraits> seen_state_hashes;
#if REGEX_DEBUG
size_t recursion_level = 0;
#endif