LibRegex: Add some more debugging info to bytecode block ranges

These were getting difficult to differentiate, now they each get a
comment on where they came from to aid with future debugging.
This commit is contained in:
Ali Mohammad Pur 2024-12-11 10:55:38 +01:00 committed by Ali Mohammad Pur
commit 4a8d3e35a3
Notes: github-actions[bot] 2024-12-13 09:01:28 +00:00
2 changed files with 10 additions and 9 deletions

View file

@ -25,6 +25,7 @@ namespace Detail {
struct Block { struct Block {
size_t start; size_t start;
size_t end; size_t end;
StringView comment { "N/A"sv };
}; };
} }

View file

@ -59,18 +59,18 @@ typename Regex<Parser>::BasicBlockList Regex<Parser>::split_basic_blocks(ByteCod
auto& op = static_cast<T const&>(opcode); auto& op = static_cast<T const&>(opcode);
ssize_t jump_offset = op.size() + op.offset(); ssize_t jump_offset = op.size() + op.offset();
if (jump_offset >= 0) { if (jump_offset >= 0) {
block_boundaries.append({ end_of_last_block, state.instruction_position }); block_boundaries.append({ end_of_last_block, state.instruction_position, "Jump ahead"sv });
end_of_last_block = state.instruction_position + opcode.size(); end_of_last_block = state.instruction_position + opcode.size();
} else { } else {
// This op jumps back, see if that's within this "block". // This op jumps back, see if that's within this "block".
if (jump_offset + state.instruction_position > end_of_last_block) { if (jump_offset + state.instruction_position > end_of_last_block) {
// Split the block! // Split the block!
block_boundaries.append({ end_of_last_block, jump_offset + state.instruction_position }); block_boundaries.append({ end_of_last_block, jump_offset + state.instruction_position, "Jump back 1"sv });
block_boundaries.append({ jump_offset + state.instruction_position, state.instruction_position }); block_boundaries.append({ jump_offset + state.instruction_position, state.instruction_position, "Jump back 2"sv });
end_of_last_block = state.instruction_position + opcode.size(); end_of_last_block = state.instruction_position + opcode.size();
} else { } else {
// Nope, it's just a jump to another block // Nope, it's just a jump to another block
block_boundaries.append({ end_of_last_block, state.instruction_position }); block_boundaries.append({ end_of_last_block, state.instruction_position, "Jump"sv });
end_of_last_block = state.instruction_position + opcode.size(); end_of_last_block = state.instruction_position + opcode.size();
} }
} }
@ -92,15 +92,15 @@ typename Regex<Parser>::BasicBlockList Regex<Parser>::split_basic_blocks(ByteCod
check_jump.template operator()<OpCode_ForkStay>(opcode); check_jump.template operator()<OpCode_ForkStay>(opcode);
break; break;
case OpCodeId::FailForks: case OpCodeId::FailForks:
block_boundaries.append({ end_of_last_block, state.instruction_position }); block_boundaries.append({ end_of_last_block, state.instruction_position, "FailForks"sv });
end_of_last_block = state.instruction_position + opcode.size(); end_of_last_block = state.instruction_position + opcode.size();
break; break;
case OpCodeId::Repeat: { case OpCodeId::Repeat: {
// Repeat produces two blocks, one containing its repeated expr, and one after that. // Repeat produces two blocks, one containing its repeated expr, and one after that.
auto repeat_start = state.instruction_position - static_cast<OpCode_Repeat const&>(opcode).offset(); auto repeat_start = state.instruction_position - static_cast<OpCode_Repeat const&>(opcode).offset();
if (repeat_start > end_of_last_block) if (repeat_start > end_of_last_block)
block_boundaries.append({ end_of_last_block, repeat_start }); block_boundaries.append({ end_of_last_block, repeat_start, "Repeat"sv });
block_boundaries.append({ repeat_start, state.instruction_position }); block_boundaries.append({ repeat_start, state.instruction_position, "Repeat after"sv });
end_of_last_block = state.instruction_position + opcode.size(); end_of_last_block = state.instruction_position + opcode.size();
break; break;
} }
@ -116,7 +116,7 @@ typename Regex<Parser>::BasicBlockList Regex<Parser>::split_basic_blocks(ByteCod
} }
if (end_of_last_block < bytecode_size) if (end_of_last_block < bytecode_size)
block_boundaries.append({ end_of_last_block, bytecode_size }); block_boundaries.append({ end_of_last_block, bytecode_size, "End"sv });
quick_sort(block_boundaries, [](auto& a, auto& b) { return a.start < b.start; }); quick_sort(block_boundaries, [](auto& a, auto& b) { return a.start < b.start; });
@ -664,7 +664,7 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
RegexDebug dbg; RegexDebug dbg;
dbg.print_bytecode(*this); dbg.print_bytecode(*this);
for (auto const& block : basic_blocks) for (auto const& block : basic_blocks)
dbgln("block from {} to {}", block.start, block.end); dbgln("block from {} to {} (comment: {})", block.start, block.end, block.comment);
} }
// A pattern such as: // A pattern such as: