mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-05-04 10:18:51 +00:00
LibRegex: Explicitly check if a character falls into a table-based range
Previously, for a regex such as /[a-sy-z]/i, we would incorrectly think the character "u" fell into the range "a-s" because neither of the conditions "u > s && U > s" or "u < a && U < a" would be true, resulting in the lookup falling back to assuming the character is in the range. Instead, first explicitly check if the character falls into the range, rather than checking if it falls outside the range. If the explicit checks fail, then we know the character is outside the range.
This commit is contained in:
parent
27f5a18ce6
commit
48cb15283a
Notes:
sideshowbarker
2024-07-17 07:38:34 +09:00
Author: https://github.com/trflynn89
Commit: 48cb15283a
Pull-request: https://github.com/SerenityOS/serenity/pull/15070
Reviewed-by: https://github.com/alimpfard ✅
Reviewed-by: https://github.com/linusg ✅
2 changed files with 11 additions and 5 deletions
|
@ -690,7 +690,10 @@ TEST_CASE(ECMA262_match)
|
||||||
{ "a|$"sv, "x"sv, true, (ECMAScriptFlags)regex::AllFlags::Global }, // #11940, Global (not the 'g' flag) regexps should attempt to match the zero-length end of the string too.
|
{ "a|$"sv, "x"sv, true, (ECMAScriptFlags)regex::AllFlags::Global }, // #11940, Global (not the 'g' flag) regexps should attempt to match the zero-length end of the string too.
|
||||||
{ "foo\nbar"sv, "foo\nbar"sv, true }, // #12126, ECMA262 regexp should match literal newlines without the 's' flag.
|
{ "foo\nbar"sv, "foo\nbar"sv, true }, // #12126, ECMA262 regexp should match literal newlines without the 's' flag.
|
||||||
{ "foo[^]bar"sv, "foo\nbar"sv, true }, // #12126, ECMA262 regexp should match newline with [^].
|
{ "foo[^]bar"sv, "foo\nbar"sv, true }, // #12126, ECMA262 regexp should match newline with [^].
|
||||||
{ "^[_A-Z]+$"sv, "_aA"sv, true, ECMAScriptFlags::Insensitive } // Insensitive lookup table: characters in a range do not necessarily lie in the same range after being converted to lowercase.
|
{ "^[_A-Z]+$"sv, "_aA"sv, true, ECMAScriptFlags::Insensitive }, // Insensitive lookup table: characters in a range do not necessarily lie in the same range after being converted to lowercase.
|
||||||
|
{ "^[a-sy-z]$"sv, "b"sv, true, ECMAScriptFlags::Insensitive },
|
||||||
|
{ "^[a-sy-z]$"sv, "y"sv, true, ECMAScriptFlags::Insensitive },
|
||||||
|
{ "^[a-sy-z]$"sv, "u"sv, false, ECMAScriptFlags::Insensitive },
|
||||||
};
|
};
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
||||||
|
|
|
@ -557,11 +557,14 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
||||||
upper_case_needle = to_ascii_uppercase(needle);
|
upper_case_needle = to_ascii_uppercase(needle);
|
||||||
lower_case_needle = to_ascii_lowercase(needle);
|
lower_case_needle = to_ascii_lowercase(needle);
|
||||||
}
|
}
|
||||||
if (lower_case_needle > range.to && upper_case_needle > range.to)
|
|
||||||
|
if (lower_case_needle >= range.from && lower_case_needle <= range.to)
|
||||||
|
return 0;
|
||||||
|
if (upper_case_needle >= range.from && upper_case_needle <= range.to)
|
||||||
|
return 0;
|
||||||
|
if (lower_case_needle > range.to || upper_case_needle > range.to)
|
||||||
return 1;
|
return 1;
|
||||||
if (lower_case_needle < range.from && upper_case_needle < range.from)
|
return -1;
|
||||||
return -1;
|
|
||||||
return 0;
|
|
||||||
});
|
});
|
||||||
|
|
||||||
if (matching_range) {
|
if (matching_range) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue