diff --git a/Libraries/LibRegex/RegexOptimizer.cpp b/Libraries/LibRegex/RegexOptimizer.cpp index fdb95e73bd2..4a4738cad7c 100644 --- a/Libraries/LibRegex/RegexOptimizer.cpp +++ b/Libraries/LibRegex/RegexOptimizer.cpp @@ -1372,34 +1372,48 @@ void Optimizer::append_alternation(ByteCode& target, Span alternatives auto opcode_bytes = alternative_bytes.slice(state.instruction_position, opcode.size()); switch (opcode.opcode_id()) { - case OpCodeId::Jump: - incoming_jump_edges.ensure(static_cast(opcode).offset() + state.instruction_position).append({ opcode_bytes }); - has_any_backwards_jump |= static_cast(opcode).offset() < 0; + case OpCodeId::Jump: { + auto const& cast_opcode = static_cast(opcode); + incoming_jump_edges.ensure(cast_opcode.offset() + cast_opcode.size() + state.instruction_position).append({ opcode_bytes }); + has_any_backwards_jump |= cast_opcode.offset() < 0; break; - case OpCodeId::JumpNonEmpty: - incoming_jump_edges.ensure(static_cast(opcode).offset() + state.instruction_position).append({ opcode_bytes }); - has_any_backwards_jump |= static_cast(opcode).offset() < 0; + } + case OpCodeId::JumpNonEmpty: { + auto const& cast_opcode = static_cast(opcode); + incoming_jump_edges.ensure(cast_opcode.offset() + cast_opcode.size() + state.instruction_position).append({ opcode_bytes }); + has_any_backwards_jump |= cast_opcode.offset() < 0; break; - case OpCodeId::ForkJump: - incoming_jump_edges.ensure(static_cast(opcode).offset() + state.instruction_position).append({ opcode_bytes }); - has_any_backwards_jump |= static_cast(opcode).offset() < 0; + } + case OpCodeId::ForkJump: { + auto const& cast_opcode = static_cast(opcode); + incoming_jump_edges.ensure(cast_opcode.offset() + cast_opcode.size() + state.instruction_position).append({ opcode_bytes }); + has_any_backwards_jump |= cast_opcode.offset() < 0; break; - case OpCodeId::ForkStay: - incoming_jump_edges.ensure(static_cast(opcode).offset() + state.instruction_position).append({ opcode_bytes }); - has_any_backwards_jump |= static_cast(opcode).offset() < 0; + } + case OpCodeId::ForkStay: { + auto const& cast_opcode = static_cast(opcode); + incoming_jump_edges.ensure(cast_opcode.offset() + cast_opcode.size() + state.instruction_position).append({ opcode_bytes }); + has_any_backwards_jump |= cast_opcode.offset() < 0; break; - case OpCodeId::ForkReplaceJump: - incoming_jump_edges.ensure(static_cast(opcode).offset() + state.instruction_position).append({ opcode_bytes }); - has_any_backwards_jump |= static_cast(opcode).offset() < 0; + } + case OpCodeId::ForkReplaceJump: { + auto const& cast_opcode = static_cast(opcode); + incoming_jump_edges.ensure(cast_opcode.offset() + cast_opcode.size() + state.instruction_position).append({ opcode_bytes }); + has_any_backwards_jump |= cast_opcode.offset() < 0; break; - case OpCodeId::ForkReplaceStay: - incoming_jump_edges.ensure(static_cast(opcode).offset() + state.instruction_position).append({ opcode_bytes }); - has_any_backwards_jump |= static_cast(opcode).offset() < 0; + } + case OpCodeId::ForkReplaceStay: { + auto const& cast_opcode = static_cast(opcode); + incoming_jump_edges.ensure(cast_opcode.offset() + cast_opcode.size() + state.instruction_position).append({ opcode_bytes }); + has_any_backwards_jump |= cast_opcode.offset() < 0; break; - case OpCodeId::Repeat: - incoming_jump_edges.ensure(state.instruction_position - static_cast(opcode).offset()).append({ opcode_bytes }); + } + case OpCodeId::Repeat: { + auto const& cast_opcode = static_cast(opcode); + incoming_jump_edges.ensure(state.instruction_position - cast_opcode.offset()).append({ opcode_bytes }); has_any_backwards_jump = true; break; + } default: break; } diff --git a/Tests/LibRegex/TestRegex.cpp b/Tests/LibRegex/TestRegex.cpp index bf8b19a39f3..50bd974f349 100644 --- a/Tests/LibRegex/TestRegex.cpp +++ b/Tests/LibRegex/TestRegex.cpp @@ -1367,3 +1367,15 @@ TEST_CASE(zero_width_backreference) EXPECT_EQ(result.capture_group_matches.first()[0].view.to_byte_string(), ""sv); } } + +TEST_CASE(account_for_opcode_size_calculating_incoming_jump_edges) +{ + { + // The optimizer should not optimize the initial ForkStay for these alternatives as they are jumped to from different locations. + Regex re(".*a|.*b", ECMAScriptFlags::Global); + auto result = re.match("aa"sv); + EXPECT_EQ(result.success, true); + EXPECT_EQ(result.matches.size(), 1u); + EXPECT_EQ(result.matches.first().view.to_byte_string(), "aa"sv); + } +}