LibRegex: Correctly calculate the target for Repeat in table alts

Fixes a bunch of websites breaking because we now verify jump offsets by
trying to remove 0-offset jumps.
This has been broken for a good while, it was just rare to see Repeat
inside alternatives that lended themselves well to tree alts.
This commit is contained in:
Ali Mohammad Pur 2025-04-24 08:22:44 +02:00 committed by Andrew Kaster
commit fca1d33fec
Notes: github-actions[bot] 2025-04-24 07:18:25 +00:00
2 changed files with 11 additions and 1 deletions

View file

@ -898,6 +898,7 @@ void Regex<Parser>::rewrite_with_useless_jumps_removed()
auto target_old = is_repeat ? i.old_ip - old_off : i.old_ip + i.size + old_off;
if (!new_ip.contains(target_old)) {
dbgln("Target {} not found in new_ip (in {})", target_old, i.old_ip);
dbgln("Pattern: {}", pattern_value);
RegexDebug dbg;
dbg.print_bytecode(*this);
}
@ -1585,7 +1586,7 @@ void Optimizer::append_alternation(ByteCode& target, Span<ByteCode> alternatives
}
ssize_t target_value = *target_ip - patch_location - patch_size;
if (should_negate)
target_value = -target_value + 2; // from -1 to +1.
target_value = -target_value - opcode.size();
target[patch_location] = static_cast<ByteCodeValueType>(target_value);
} else {
patch_locations.append({ QualifiedIP { alternative_index, intended_jump_ip }, patch_location });

View file

@ -1285,3 +1285,12 @@ TEST_CASE(mismatching_brackets)
EXPECT_EQ(re.parser_result.error, regex::Error::MismatchingBracket);
}
}
TEST_CASE(optimizer_repeat_offset)
{
{
// Miscalculating the repeat offset in table reconstruction of alternatives would lead to crash here
// make sure that doesn't happen :)
Regex<ECMA262> re("\\/?\\??#?([\\/?#]|[\\uD800-\\uDBFF]|%[c-f][0-9a-f](%[89ab][0-9a-f]){0,2}(%[89ab]?)?|%[0-9a-f]?)$"sv);
}
}