LibRegex: Treat the UnicodeSets flag as Unicode

Fixes /.../v not being interpreted as a unicode pattern.
This commit is contained in:
Ali Mohammad Pur 2025-02-28 12:29:44 +01:00 committed by Tim Flynn
commit ea3b7efd91
Notes: github-actions[bot] 2025-02-28 19:32:42 +00:00
3 changed files with 7 additions and 2 deletions

View file

@ -72,3 +72,8 @@ test("regexp that always matches stops matching if it's past the end of the stri
expect("whf".match(re)).toEqual(["", "", "", ""]);
expect(re.lastIndex).toBe(0);
});
test("v flag should enable unicode mode", () => {
const re = new RegExp("a\\u{10FFFF}", "v");
expect(re.test("a\u{10FFFF}")).toBe(true);
});

View file

@ -160,7 +160,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
input.start_offset = m_pattern->start_offset;
size_t lines_to_skip = 0;
bool unicode = input.regex_options.has_flag_set(AllFlags::Unicode);
bool unicode = input.regex_options.has_flag_set(AllFlags::Unicode) || input.regex_options.has_flag_set(AllFlags::UnicodeSets);
for (auto const& view : views)
const_cast<RegexStringView&>(view).set_unicode(unicode);

View file

@ -627,7 +627,7 @@ bool Regex<Parser>::attempt_rewrite_entire_match_as_substring_search(BasicBlockL
auto& bytecode = parser_result.bytecode;
auto is_unicode = parser_result.options.has_flag_set(AllFlags::Unicode);
auto is_unicode = parser_result.options.has_flag_set(AllFlags::Unicode) || parser_result.options.has_flag_set(AllFlags::UnicodeSets);
// We have a single basic block, let's see if it's a series of character or string compares.
StringBuilder final_string;