diff --git a/Libraries/LibJS/Runtime/RegExpObject.cpp b/Libraries/LibJS/Runtime/RegExpObject.cpp index 0d87d35146d..57caf5bfc39 100644 --- a/Libraries/LibJS/Runtime/RegExpObject.cpp +++ b/Libraries/LibJS/Runtime/RegExpObject.cpp @@ -96,19 +96,10 @@ ErrorOr parse_regex_pattern(StringView pattern, auto utf16_pattern = Utf16String::from_utf8(pattern); StringBuilder builder; - // If the Unicode flag is set, append each code point to the pattern. Otherwise, append each - // code unit. But unlike the spec, multi-byte code units must be escaped for LibRegex to parse. + // FIXME: We need to escape multi-byte code units for LibRegex to parse since the lexer there doesn't handle unicode. auto previous_code_unit_was_backslash = false; - for (size_t i = 0; i < utf16_pattern.length_in_code_units();) { - if (unicode || unicode_sets) { - auto code_point = code_point_at(utf16_pattern, i); - builder.append_code_point(code_point.code_point); - i += code_point.code_unit_count; - continue; - } - + for (size_t i = 0; i < utf16_pattern.length_in_code_units(); ++i) { u16 code_unit = utf16_pattern.code_unit_at(i); - ++i; if (code_unit > 0x7f) { // Incorrectly escaping this code unit will result in a wildly different regex than intended diff --git a/Libraries/LibJS/Tests/builtins/RegExp/RegExp.js b/Libraries/LibJS/Tests/builtins/RegExp/RegExp.js index 51e4471b490..5223b1db4eb 100644 --- a/Libraries/LibJS/Tests/builtins/RegExp/RegExp.js +++ b/Libraries/LibJS/Tests/builtins/RegExp/RegExp.js @@ -81,3 +81,19 @@ test("v flag should enable unicode mode", () => { test("parsing a large bytestring shouldn't crash", () => { RegExp(new Uint8Array(0x40000)); }); + +test("Unicode non-ASCII matching", () => { + const cases = [ + { pattern: /é/u, match: "é", expected: ["é"] }, + { pattern: /é/, match: "é", expected: ["é"] }, + { pattern: /\u{61}/u, match: "a", expected: ["a"] }, + { pattern: /\u{61}/, match: "a", expected: null }, + { pattern: /😄/u, match: "😄", expected: ["😄"] }, + { pattern: /😄/u, match: "\ud83d", expected: null }, + { pattern: /😄/, match: "\ud83d", expected: null }, + ]; + for (const test of cases) { + const result = test.match.match(test.pattern); + expect(result).toEqual(test.expected); + } +});