LibRegex: Compare code units (not code points) in non-Unicode char range

This commit is contained in:
Timothy Flynn 2025-07-21 12:05:24 -04:00 committed by Ali Mohammad Pur
commit 2dfcc4c307
Notes: github-actions[bot] 2025-07-21 21:45:44 +00:00
10 changed files with 1045 additions and 1034 deletions

View file

@ -531,7 +531,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
return ExecutionResult::Failed_ExecuteLowPrioForks;
auto character_class = (CharClass)m_bytecode->at(offset++);
auto ch = input.view[state.string_position_in_code_units];
auto ch = input.view.code_unit_at(state.string_position_in_code_units);
compare_character_class(input, state, character_class, ch, current_inversion_state(), inverse_matched);
break;
@ -548,8 +548,8 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
offset += count_insensitive;
bool const insensitive = input.regex_options & AllFlags::Insensitive;
auto ch = input.view.code_unit_at(state.string_position_in_code_units);
auto ch = input.view[state.string_position_in_code_units];
if (insensitive)
ch = to_ascii_lowercase(ch);
@ -578,7 +578,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
auto from = value.from;
auto to = value.to;
auto ch = input.view[state.string_position_in_code_units];
auto ch = input.view.code_unit_at(state.string_position_in_code_units);
compare_character_range(input, state, from, to, ch, current_inversion_state(), inverse_matched);
break;

View file

@ -818,6 +818,17 @@ TEST_CASE(ECMA262_unicode_match)
{ "[\\ufb06]"sv, "\ufb05"sv, false, ECMAScriptFlags::Unicode },
{ "[\\ufb05]"sv, "\ufb06"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) },
{ "[\\ufb06]"sv, "\ufb05"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) },
// https://github.com/LadybirdBrowser/ladybird/issues/5549
{ "[\\ud800-\\udbff][\\udc00-\\udfff]"sv, "😀"sv, true },
{ "[\\ud800-\\udbff][\\udc00-\\udfff]"sv, "😀"sv, false, ECMAScriptFlags::Unicode },
{ "[\\ud800-\\udbff][\\udc00-\\udfff]"sv, "a"sv, false },
{ "[\\ud800-\\udbff][\\udc00-\\udfff]"sv, "a"sv, false, ECMAScriptFlags::Unicode },
{
"\\ud83c[\\udffb-\\udfff](?=\\ud83c[\\udffb-\\udfff])|(?:[^\\ud800-\\udfff][\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]?|[\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]|(?:\\ud83c[\\udde6-\\uddff]){2}|[\\ud800-\\udbff][\\udc00-\\udfff]|[\\ud800-\\udfff])[\\ufe0e\\ufe0f]?(?:[\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]|\\ud83c[\\udffb-\\udfff])?(?:\\u200d(?:[^\\ud800-\\udfff]|(?:\\ud83c[\\udde6-\\uddff]){2}|[\\ud800-\\udbff][\\udc00-\\udfff])[\\ufe0e\\ufe0f]?(?:[\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]|\\ud83c[\\udffb-\\udfff])?)*"sv,
"😀"sv,
true,
}
};
for (auto& test : tests) {

View file

@ -28,30 +28,30 @@ Pass encodeInto() into ArrayBuffer with A and destination length 10, offset 0, f
Pass encodeInto() into SharedArrayBuffer with A and destination length 10, offset 0, filler random
Pass encodeInto() into ArrayBuffer with A and destination length 10, offset 4, filler random
Pass encodeInto() into SharedArrayBuffer with A and destination length 10, offset 4, filler random
Pass encodeInto() into ArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06 and destination length 4, offset 0, filler 0
Pass encodeInto() into SharedArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06 and destination length 4, offset 0, filler 0
Pass encodeInto() into ArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06 and destination length 4, offset 4, filler 0
Pass encodeInto() into SharedArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06 and destination length 4, offset 4, filler 0
Pass encodeInto() into ArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06 and destination length 4, offset 0, filler 128
Pass encodeInto() into SharedArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06 and destination length 4, offset 0, filler 128
Pass encodeInto() into ArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06 and destination length 4, offset 4, filler 128
Pass encodeInto() into SharedArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06 and destination length 4, offset 4, filler 128
Pass encodeInto() into ArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06 and destination length 4, offset 0, filler random
Pass encodeInto() into SharedArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06 and destination length 4, offset 0, filler random
Pass encodeInto() into ArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06 and destination length 4, offset 4, filler random
Pass encodeInto() into SharedArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06 and destination length 4, offset 4, filler random
Pass encodeInto() into ArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06A and destination length 3, offset 0, filler 0
Pass encodeInto() into SharedArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06A and destination length 3, offset 0, filler 0
Pass encodeInto() into ArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06A and destination length 3, offset 4, filler 0
Pass encodeInto() into SharedArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06A and destination length 3, offset 4, filler 0
Pass encodeInto() into ArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06A and destination length 3, offset 0, filler 128
Pass encodeInto() into SharedArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06A and destination length 3, offset 0, filler 128
Pass encodeInto() into ArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06A and destination length 3, offset 4, filler 128
Pass encodeInto() into SharedArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06A and destination length 3, offset 4, filler 128
Pass encodeInto() into ArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06A and destination length 3, offset 0, filler random
Pass encodeInto() into SharedArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06A and destination length 3, offset 0, filler random
Pass encodeInto() into ArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06A and destination length 3, offset 4, filler random
Pass encodeInto() into SharedArrayBuffer with <EFBFBD><EFBFBD><EFBFBD>U+df06A and destination length 3, offset 4, filler random
Pass encodeInto() into ArrayBuffer with 𝌆 and destination length 4, offset 0, filler 0
Pass encodeInto() into SharedArrayBuffer with 𝌆 and destination length 4, offset 0, filler 0
Pass encodeInto() into ArrayBuffer with 𝌆 and destination length 4, offset 4, filler 0
Pass encodeInto() into SharedArrayBuffer with 𝌆 and destination length 4, offset 4, filler 0
Pass encodeInto() into ArrayBuffer with 𝌆 and destination length 4, offset 0, filler 128
Pass encodeInto() into SharedArrayBuffer with 𝌆 and destination length 4, offset 0, filler 128
Pass encodeInto() into ArrayBuffer with 𝌆 and destination length 4, offset 4, filler 128
Pass encodeInto() into SharedArrayBuffer with 𝌆 and destination length 4, offset 4, filler 128
Pass encodeInto() into ArrayBuffer with 𝌆 and destination length 4, offset 0, filler random
Pass encodeInto() into SharedArrayBuffer with 𝌆 and destination length 4, offset 0, filler random
Pass encodeInto() into ArrayBuffer with 𝌆 and destination length 4, offset 4, filler random
Pass encodeInto() into SharedArrayBuffer with 𝌆 and destination length 4, offset 4, filler random
Pass encodeInto() into ArrayBuffer with 𝌆A and destination length 3, offset 0, filler 0
Pass encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 0, filler 0
Pass encodeInto() into ArrayBuffer with 𝌆A and destination length 3, offset 4, filler 0
Pass encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 4, filler 0
Pass encodeInto() into ArrayBuffer with 𝌆A and destination length 3, offset 0, filler 128
Pass encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 0, filler 128
Pass encodeInto() into ArrayBuffer with 𝌆A and destination length 3, offset 4, filler 128
Pass encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 4, filler 128
Pass encodeInto() into ArrayBuffer with 𝌆A and destination length 3, offset 0, filler random
Pass encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 0, filler random
Pass encodeInto() into ArrayBuffer with 𝌆A and destination length 3, offset 4, filler random
Pass encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 4, filler random
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 0
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 0
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 0

File diff suppressed because it is too large Load diff

View file

@ -236,7 +236,7 @@ Pass Parsing origin: <http://> against <http
Pass Parsing origin: <http://./> against <about:blank>
Pass Parsing origin: <http://../> against <about:blank>
Pass Parsing origin: <h://.> against <about:blank>
Pass Parsing origin: <http://foo:<EFBFBD><EFBFBD><EFBFBD>U+dca9@example.com/bar> against <http://other.com/>
Pass Parsing origin: <http://foo:💩@example.com/bar> against <http://other.com/>
Pass Parsing origin: <#> against <test:test>
Pass Parsing origin: <#x> against <mailto:x@x.com>
Pass Parsing origin: <#x> against <about:blank>
@ -405,4 +405,4 @@ Pass Parsing origin: <non-special:\\opaque\path> against <about:blank>
Pass Parsing origin: <non-special:\/opaque> against <about:blank>
Pass Parsing origin: <non-special:/\path> against <about:blank>
Pass Parsing origin: <non-special://host/a\b> against <about:blank>
Pass Parsing origin: <http://example.com//U+d800<30><30><EFBFBD>U+dffeU+dfff﷐﷏﷯ﷰ￾￿??U+d800<30><30><EFBFBD>U+dffeU+dfff﷐﷏﷯ﷰ￾￿> against <about:blank>
Pass Parsing origin: <http://example.com//U+d800<30><30><EFBFBD><EFBFBD><EFBFBD><EFBFBD>U+dfff﷐﷏﷯ﷰ￾￿??U+d800<30><30><EFBFBD><EFBFBD><EFBFBD><EFBFBD>U+dfff﷐﷏﷯ﷰ￾￿> against <about:blank>

View file

@ -334,7 +334,7 @@ Pass Parsing: <http://[::1.]> against <http://other.com/>
Pass Parsing: <http://[::.1]> against <http://other.com/>
Pass Parsing: <http://[::%31]> against <http://other.com/>
Pass Parsing: <http://%5B::1]> against <http://other.com/>
Pass Parsing: <http://foo:<EFBFBD><EFBFBD><EFBFBD>U+dca9@example.com/bar> against <http://other.com/>
Pass Parsing: <http://foo:💩@example.com/bar> against <http://other.com/>
Pass Parsing: <#> against <test:test>
Pass Parsing: <#x> against <mailto:x@x.com>
Pass Parsing: <#x> against <data:,>
@ -731,7 +731,7 @@ Pass Parsing: <a!@$*=/foo.html> against <file:///some/dir/bar.html>
Pass Parsing: <a1234567890-+.:foo/bar> against <http://example.com/dir/file>
Pass Parsing: <file://a­b/p> without base
Pass Parsing: <file://a%C2%ADb/p> without base
Pass Parsing: <file://loC<EFBFBD><EFBFBD><EFBFBD>U+dc00<30><30><EFBFBD>U+dc0b<30><62><EFBFBD>U+dc07<30><37><EFBFBD>U+dc28<32><38><EFBFBD>U+dc2c<32><63><EFBFBD>U+dc2d/usr/bin> without base
Pass Parsing: <file://loC𝐀𝐋𝐇𝐨𝐬𝐭/usr/bin> without base
Pass Parsing: <file://­/p> without base
Pass Parsing: <file://%C2%AD/p> without base
Pass Parsing: <file://xn--/p> without base
@ -787,7 +787,7 @@ Pass Parsing: <http://foo.09..> without base
Pass Parsing: <http://0999999999999999999/> without base
Pass Parsing: <http://foo.0x> without base
Pass Parsing: <http://foo.0XFfFfFfFfFfFfFfFfFfAcE123> without base
Pass Parsing: <http://<EFBFBD><EFBFBD><EFBFBD>U+dca9.123/> without base
Pass Parsing: <http://💩.123/> without base
Pass Parsing: <https://y> without base
Pass Parsing: <https://x/y> without base
Pass Parsing: <https://x/?y> without base
@ -885,4 +885,4 @@ Pass Parsing: <non-special:\/opaque> without base
Pass Parsing: <non-special:/\path> without base
Pass Parsing: <non-special://host\a> without base
Pass Parsing: <non-special://host/a\b> without base
Pass Parsing: <http://example.com//U+d800<30><30><EFBFBD>U+dffeU+dfff﷐﷏﷯ﷰ￾￿??U+d800<30><30><EFBFBD>U+dffeU+dfff﷐﷏﷯ﷰ￾￿> without base
Pass Parsing: <http://example.com//U+d800<30><30><EFBFBD><EFBFBD><EFBFBD><EFBFBD>U+dfff﷐﷏﷯ﷰ￾￿??U+d800<30><30><EFBFBD><EFBFBD><EFBFBD><EFBFBD>U+dfff﷐﷏﷯ﷰ￾￿> without base

View file

@ -236,7 +236,7 @@ Pass Origin parsing: <http://> against <http
Pass Origin parsing: <http://./> without base
Pass Origin parsing: <http://../> without base
Pass Origin parsing: <h://.> without base
Pass Origin parsing: <http://foo:<EFBFBD><EFBFBD><EFBFBD>U+dca9@example.com/bar> against <http://other.com/>
Pass Origin parsing: <http://foo:💩@example.com/bar> against <http://other.com/>
Pass Origin parsing: <#> against <test:test>
Pass Origin parsing: <#x> against <mailto:x@x.com>
Pass Origin parsing: <#x> against <data:,>
@ -406,4 +406,4 @@ Pass Origin parsing: <non-special:\\opaque\path> without base
Pass Origin parsing: <non-special:\/opaque> without base
Pass Origin parsing: <non-special:/\path> without base
Pass Origin parsing: <non-special://host/a\b> without base
Pass Origin parsing: <http://example.com//U+d800<30><30><EFBFBD>U+dffeU+dfff﷐﷏﷯ﷰ￾￿??U+d800<30><30><EFBFBD>U+dffeU+dfff﷐﷏﷯ﷰ￾￿> without base
Pass Origin parsing: <http://example.com//U+d800<30><30><EFBFBD><EFBFBD><EFBFBD><EFBFBD>U+dfff﷐﷏﷯ﷰ￾￿??U+d800<30><30><EFBFBD><EFBFBD><EFBFBD><EFBFBD>U+dfff﷐﷏﷯ﷰ￾￿> without base

View file

@ -7,8 +7,8 @@ Pass Parse and sort: z=b&a=b&z=a&a=a
Pass URL parse and sort: z=b&a=b&z=a&a=a
Pass Parse and sort: <20>=x&&<26>=a
Pass URL parse and sort: <20>=x&&<26>=a
Pass Parse and sort: ffi&<EFBFBD><EFBFBD><EFBFBD>U+df08
Pass URL parse and sort: ffi&<EFBFBD><EFBFBD><EFBFBD>U+df08
Pass Parse and sort: ffi&🌈
Pass URL parse and sort: ffi&🌈
Pass Parse and sort: é&e<>&é
Pass URL parse and sort: é&e<>&é
Pass Parse and sort: z=z&a=a&z=y&a=b&z=x&a=c&z=w&a=d&z=v&a=e&z=u&a=f&z=t&a=g
@ -17,6 +17,6 @@ Pass Parse and sort: bbb&bb&aaa&aa=x&aa=y
Pass URL parse and sort: bbb&bb&aaa&aa=x&aa=y
Pass Parse and sort: z=z&=f&=t&=x
Pass URL parse and sort: z=z&=f&=t&=x
Pass Parse and sort: a<EFBFBD><EFBFBD><EFBFBD>U+df08&a<><61><EFBFBD>U+dca9
Pass URL parse and sort: a<EFBFBD><EFBFBD><EFBFBD>U+df08&a<><61><EFBFBD>U+dca9
Pass Parse and sort: a🌈&a💩
Pass URL parse and sort: a🌈&a💩
Pass Sorting non-existent params removes ? from URL

View file

@ -13,7 +13,7 @@ Pass Serialize &
Pass Serialize *-._
Pass Serialize %
Pass Serialize \0
Pass Serialize <EFBFBD><EFBFBD><EFBFBD>U+dca9
Pass Serialize 💩
Pass URLSearchParams.toString
Pass URLSearchParams connected to URL
Pass URLSearchParams must not do newline normalization

View file

@ -159,14 +159,14 @@ Pass Pattern: [{"password":"café"}] Inputs: [{"password":"café"}]
Pass Pattern: [{"password":"caf%c3%a9"}] Inputs: [{"password":"café"}]
Pass Pattern: [{"hostname":"xn--caf-dma.com"}] Inputs: [{"hostname":"café.com"}]
Pass Pattern: [{"hostname":"café.com"}] Inputs: [{"hostname":"café.com"}]
Pass Pattern: ["http://<EFBFBD><EFBFBD><EFBFBD>U+deb2.com/"] Inputs: ["http://<2F><><EFBFBD>U+deb2.com/"]
Pass Pattern: ["http://🚲.com/"] Inputs: ["http://🚲.com/"]
Pass Pattern: ["http://\ud83d \udeb2"] Inputs: undefined
Pass Pattern: [{"hostname":"\ud83d \udeb2"}] Inputs: undefined
Pass Pattern: [{"pathname":"\ud83d \udeb2"}] Inputs: []
Pass Pattern: [{"pathname":":\ud83d \udeb2"}] Inputs: undefined
Pass Pattern: [{"pathname":":a<EFBFBD><EFBFBD><EFBFBD>U+dd00b"}] Inputs: []
Pass Pattern: [{"pathname":"test/:a<EFBFBD><EFBFBD><EFBFBD>U+dc50b"}] Inputs: [{"pathname":"test/foo"}]
Pass Pattern: [{"pathname":":<EFBFBD><EFBFBD><EFBFBD>U+deb2"}] Inputs: undefined
Pass Pattern: [{"pathname":":a󠄀b"}] Inputs: []
Pass Pattern: [{"pathname":"test/:a𐑐b"}] Inputs: [{"pathname":"test/foo"}]
Pass Pattern: [{"pathname":":🚲"}] Inputs: undefined
Pass Pattern: [{"port":""}] Inputs: [{"protocol":"http","port":"80"}]
Pass Pattern: [{"protocol":"http","port":"80"}] Inputs: [{"protocol":"http","port":"80"}]
Pass Pattern: [{"protocol":"http","port":"80{20}?"}] Inputs: [{"protocol":"http","port":"80"}]