LibRegex: Remove the StringCopyMatches mode

This mode made a lot of incorrect assumptions about string lifetimes,
and instead of fixing it, let's just remove it and tweak the few unit
tests that used it.
This commit is contained in:
Andreas Kling 2025-03-24 20:12:52 +00:00
parent 41387c6b8d
commit a1c8e30ecc
6 changed files with 14 additions and 29 deletions

View file

@ -387,13 +387,7 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput c
VERIFY(start_position + length <= input.view.length());
auto view = input.view.substring_view(start_position, length);
if (input.regex_options & AllFlags::StringCopyMatches) {
match = { view.to_byte_string(), input.line, start_position, input.global_offset + start_position }; // create a copy of the original string
} else {
match = { view, input.line, start_position, input.global_offset + start_position }; // take view to original string
}
match = { input.view.substring_view(start_position, length), input.line, start_position, input.global_offset + start_position };
return ExecutionResult::Continue;
}
@ -414,11 +408,7 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchIn
auto view = input.view.substring_view(start_position, length);
if (input.regex_options & AllFlags::StringCopyMatches) {
match = { view.to_byte_string(), name(), input.line, start_position, input.global_offset + start_position }; // create a copy of the original string
} else {
match = { view, name(), input.line, start_position, input.global_offset + start_position }; // take view to original string
}
match = { view, name(), input.line, start_position, input.global_offset + start_position };
return ExecutionResult::Continue;
}

View file

@ -40,7 +40,6 @@ enum __RegexAllFlags {
__Regex_MatchNotBeginOfLine = __Regex_Global << 6, // Pattern is not forced to ^ -> search in whole string!
__Regex_MatchNotEndOfLine = __Regex_Global << 7, // Don't Force the dollar sign, $, to always match end of the string, instead of end of the line. This option is ignored if the Multiline-flag is set
__Regex_SkipSubExprResults = __Regex_Global << 8, // Do not return sub expressions in the result
__Regex_StringCopyMatches = __Regex_Global << 9, // Do explicitly copy results into new allocated string instead of StringView to original string.
__Regex_SingleLine = __Regex_Global << 10, // Dot matches newline characters
__Regex_Sticky = __Regex_Global << 11, // Force the pattern to only match consecutive matches from where the previous match ended.
__Regex_Multiline = __Regex_Global << 12, // Handle newline characters. Match each line, one by one.

View file

@ -59,7 +59,8 @@ public:
{
}
explicit RegexStringView(ByteString&&) = delete;
RegexStringView(ByteString&&) = delete;
RegexStringView(String&&) = delete;
bool is_string_view() const
{

View file

@ -197,11 +197,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
state.matches.empend();
VERIFY(start_position + state.string_position - start_position <= input.view.length());
if (input.regex_options.has_flag_set(AllFlags::StringCopyMatches)) {
state.matches.mutable_at(input.match_index) = { input.view.substring_view(start_position, state.string_position - start_position).to_byte_string(), input.line, start_position, input.global_offset + start_position };
} else { // let the view point to the original string ...
state.matches.mutable_at(input.match_index) = { input.view.substring_view(start_position, state.string_position - start_position), input.line, start_position, input.global_offset + start_position };
}
state.matches.mutable_at(input.match_index) = { input.view.substring_view(start_position, state.string_position - start_position), input.line, start_position, input.global_offset + start_position };
};
#if REGEX_DEBUG

View file

@ -25,7 +25,6 @@ enum class AllFlags {
MatchNotBeginOfLine = __Regex_MatchNotBeginOfLine, // Pattern is not forced to ^ -> search in whole string!
MatchNotEndOfLine = __Regex_MatchNotEndOfLine, // Don't Force the dollar sign, $, to always match end of the string, instead of end of the line. This option is ignored if the Multiline-flag is set
SkipSubExprResults = __Regex_SkipSubExprResults, // Do not return sub expressions in the result
StringCopyMatches = __Regex_StringCopyMatches, // Do explicitly copy results into new allocated string instead of StringView to original string.
SingleLine = __Regex_SingleLine, // Dot matches newline characters
Sticky = __Regex_Sticky, // Force the pattern to only match consecutive matches from where the previous match ended.
Multiline = __Regex_Multiline, // Handle newline characters. Match each line, one by one.
@ -53,7 +52,6 @@ enum class PosixFlags : FlagsUnderlyingType {
SkipTrimEmptyMatches = (FlagsUnderlyingType)AllFlags::SkipTrimEmptyMatches,
Multiline = (FlagsUnderlyingType)AllFlags::Multiline,
SingleMatch = (FlagsUnderlyingType)AllFlags::SingleMatch,
StringCopyMatches = (FlagsUnderlyingType)AllFlags::StringCopyMatches,
};
enum class ECMAScriptFlags : FlagsUnderlyingType {
@ -67,7 +65,6 @@ enum class ECMAScriptFlags : FlagsUnderlyingType {
SingleLine = (FlagsUnderlyingType)AllFlags::SingleLine,
Sticky = (FlagsUnderlyingType)AllFlags::Sticky,
Multiline = (FlagsUnderlyingType)AllFlags::Multiline,
StringCopyMatches = (FlagsUnderlyingType)AllFlags::StringCopyMatches,
UnicodeSets = (FlagsUnderlyingType)AllFlags::UnicodeSets,
BrowserExtended = (FlagsUnderlyingType)AllFlags::Internal_BrowserExtended,
};

View file

@ -264,10 +264,10 @@ TEST_CASE(char_utf8)
TEST_CASE(catch_all_newline)
{
Regex<PosixExtended> re("^.*$", PosixFlags::Multiline | PosixFlags::StringCopyMatches);
Regex<PosixExtended> re("^.*$", PosixFlags::Multiline);
RegexResult result;
auto lambda = [&result, &re]() {
ByteString aaa = "Hello World\nTest\n1234\n";
ByteString aaa = "Hello World\nTest\n1234\n";
auto lambda = [&]() {
result = match(aaa, re);
EXPECT_EQ(result.success, true);
};
@ -297,7 +297,7 @@ TEST_CASE(catch_all_newline_2)
{
Regex<PosixExtended> re("^.*$");
RegexResult result;
result = match("Hello World\nTest\n1234\n"sv, re, PosixFlags::Multiline | PosixFlags::StringCopyMatches);
result = match("Hello World\nTest\n1234\n"sv, re, PosixFlags::Multiline);
EXPECT_EQ(result.success, true);
EXPECT_EQ(result.count, 3u);
EXPECT_EQ(result.matches.at(0).view, "Hello World");
@ -314,7 +314,7 @@ TEST_CASE(match_all_character_class)
{
Regex<PosixExtended> re("[[:alpha:]]");
ByteString str = "[Window]\nOpacity=255\nAudibleBeep=0\n";
RegexResult result = match(str, re, PosixFlags::Global | PosixFlags::StringCopyMatches);
RegexResult result = match(str, re, PosixFlags::Global);
EXPECT_EQ(result.success, true);
EXPECT_EQ(result.count, 24u);
@ -1005,7 +1005,8 @@ TEST_CASE(case_insensitive_match)
TEST_CASE(extremely_long_fork_chain)
{
Regex<ECMA262> re("(?:aa)*");
auto result = re.match(ByteString::repeated('a', 1000));
auto input = ByteString::repeated('a', 1000);
auto result = re.match(input);
EXPECT_EQ(result.success, true);
}
@ -1048,7 +1049,8 @@ BENCHMARK_CASE(fork_performance)
}
{
Regex<ECMA262> re("^(a|a?)+$");
auto result = re.match(ByteString::formatted("{}b", g_lots_of_a_s.substring_view(0, 100)));
auto input = ByteString::formatted("{}b", g_lots_of_a_s.substring_view(0, 100));
auto result = re.match(input);
EXPECT_EQ(result.success, false);
}
}