LibRegex: Add a basic Regex<...>::replace()

This commit is contained in:
AnotherTest 2020-12-03 18:55:36 +03:30 committed by Andreas Kling
parent c85eaadb48
commit f12c98b29f
Notes: sideshowbarker 2024-07-19 00:58:55 +09:00
2 changed files with 75 additions and 0 deletions

View file

@ -37,6 +37,7 @@
#include <AK/Types.h>
#include <AK/Utf32View.h>
#include <AK/Vector.h>
#include <ctype.h>
#include <stdio.h>
@ -115,6 +116,47 @@ public:
return matcher->match(views, regex_options);
}
String replace(const RegexStringView view, const StringView& replacement_pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
{
if (!matcher || parser_result.error != Error::NoError)
return {};
StringBuilder builder;
size_t start_offset = 0;
RegexResult result = matcher->match(view, regex_options);
if (!result.success)
return view.to_string();
for (size_t i = 0; i < result.matches.size(); ++i) {
auto& match = result.matches[i];
builder.append(view.substring_view(start_offset, match.global_offset - start_offset).to_string());
start_offset = match.global_offset + match.view.length();
GenericLexer lexer(replacement_pattern);
while (!lexer.is_eof()) {
if (lexer.consume_specific('\\')) {
if (lexer.consume_specific('\\')) {
builder.append('\\');
continue;
}
auto number = lexer.consume_while(isdigit);
if (auto index = number.to_uint(); index.has_value() && result.n_capture_groups >= index.value()) {
builder.append(result.capture_group_matches[i][index.value() - 1].view.to_string());
} else {
builder.appendff("\\{}", number);
}
} else {
builder.append(lexer.consume_while([](auto ch) { return ch != '\\'; }));
}
}
}
builder.append(view.substring_view(start_offset, view.length() - start_offset).to_string());
return builder.to_string();
}
// FIXME: replace(const Vector<RegexStringView>, ...)
RegexResult search(const RegexStringView view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
{
if (!matcher || parser_result.error != Error::NoError)

View file

@ -563,4 +563,37 @@ TEST_CASE(ECMA262_match)
}
}
TEST_CASE(replace)
{
struct _test {
const char* pattern;
const char* replacement;
const char* subject;
const char* expected;
ECMAScriptFlags options {};
};
constexpr _test tests[] {
{ "foo(.+)", "aaa", "test", "test" },
{ "foo(.+)", "test\\1", "foobar", "testbar" },
{ "foo(.+)", "\\2\\1", "foobar", "\\2bar" },
{ "foo(.+)", "\\\\\\1", "foobar", "\\bar" },
{ "foo(.)", "a\\1", "fooxfooy", "axay", ECMAScriptFlags::Multiline },
};
for (auto& test : tests) {
Regex<ECMA262> re(test.pattern, test.options);
#ifdef REGEX_DEBUG
dbg() << "\n";
RegexDebug regex_dbg(stderr);
regex_dbg.print_raw_bytecode(re);
regex_dbg.print_header();
regex_dbg.print_bytecode(re);
dbg() << "\n";
#endif
EXPECT_EQ(re.parser_result.error, Error::NoError);
EXPECT_EQ(re.replace(test.subject, test.replacement), test.expected);
}
}
TEST_MAIN(Regex)