diff --git a/Libraries/LibRegex/RegexByteCode.h b/Libraries/LibRegex/RegexByteCode.h index 322c4c210a5..4be198117a6 100644 --- a/Libraries/LibRegex/RegexByteCode.h +++ b/Libraries/LibRegex/RegexByteCode.h @@ -261,6 +261,7 @@ public: } FlyString get_string(size_t index) const { return m_string_table.get(index); } + auto const& string_table() const { return m_string_table; } void last_chunk() const = delete; void first_chunk() const = delete; @@ -279,6 +280,8 @@ public: } m_string_table.m_table.set(entry.key, entry.value); } + for (auto const& entry : other.m_string_table.m_inverse_table) + m_string_table.m_inverse_table.set(entry.key, entry.value); } } diff --git a/Libraries/LibRegex/RegexDebug.h b/Libraries/LibRegex/RegexDebug.h index e5a8be10bbd..f8494263716 100644 --- a/Libraries/LibRegex/RegexDebug.h +++ b/Libraries/LibRegex/RegexDebug.h @@ -51,6 +51,13 @@ public: state.instruction_position += opcode.size(); } + out(m_file, "String Table:\n"); + for (auto const& entry : bytecode.string_table().m_table) + outln(m_file, "+ {} -> {:x}\n", entry.key, entry.value); + out(m_file, "Reverse String Table:\n"); + for (auto const& entry : bytecode.string_table().m_inverse_table) + outln(m_file, "+ {:x} -> {}\n", entry.key, entry.value); + fflush(m_file); } diff --git a/Libraries/LibRegex/RegexOptimizer.cpp b/Libraries/LibRegex/RegexOptimizer.cpp index 5a8d083fea8..a519c616bac 100644 --- a/Libraries/LibRegex/RegexOptimizer.cpp +++ b/Libraries/LibRegex/RegexOptimizer.cpp @@ -999,11 +999,10 @@ void Optimizer::append_alternation(ByteCode& target, Span alternatives if (alternatives.size() == 0) return; - target.merge_string_tables_from(alternatives); - if (alternatives.size() == 1) return target.extend(move(alternatives[0])); + target.merge_string_tables_from(alternatives); if (all_of(alternatives, [](auto& x) { return x.is_empty(); })) return; diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index e4a94e916f2..eb4c8ba1a55 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -743,6 +743,8 @@ TEST_CASE(ECMA262_match) { "(?=)(?=\\d)"sv, "smart"sv, false }, // Backrefs are cleared after lookaheads, the indices should be checked before lookup. { "(?!(b))\\1"sv, "a"sv, false }, + // String table merge bug: inverse map should be merged regardless of available direct mappings. + { "((?a)|(?b))"sv, "aa"sv, false }, }; for (auto& test : tests) {