mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-04-21 20:15:17 +00:00
LibRegex: Remove unused Utf8View/Utf32View support in RegexStringView
This commit is contained in:
parent
b1a189acfa
commit
96f1f15ad6
Notes:
github-actions[bot]
2025-04-16 08:06:14 +00:00
Author: https://github.com/awesomekling Commit: https://github.com/LadybirdBrowser/ladybird/commit/96f1f15ad6e Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/4370
2 changed files with 15 additions and 187 deletions
|
@ -14,7 +14,6 @@
|
|||
#include <AK/COWVector.h>
|
||||
#include <AK/FlyString.h>
|
||||
#include <AK/MemMem.h>
|
||||
#include <AK/RedBlackTree.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Utf16View.h>
|
||||
|
@ -29,11 +28,6 @@ class RegexStringView {
|
|||
public:
|
||||
RegexStringView() = default;
|
||||
|
||||
RegexStringView(ByteString const& string)
|
||||
: m_view(string.view())
|
||||
{
|
||||
}
|
||||
|
||||
RegexStringView(String const& string)
|
||||
: m_view(string.bytes_as_string_view())
|
||||
{
|
||||
|
@ -44,22 +38,11 @@ public:
|
|||
{
|
||||
}
|
||||
|
||||
RegexStringView(Utf32View view)
|
||||
: m_view(view)
|
||||
{
|
||||
}
|
||||
|
||||
RegexStringView(Utf16View view)
|
||||
: m_view(view)
|
||||
{
|
||||
}
|
||||
|
||||
RegexStringView(Utf8View view)
|
||||
: m_view(view)
|
||||
{
|
||||
}
|
||||
|
||||
RegexStringView(ByteString&&) = delete;
|
||||
RegexStringView(String&&) = delete;
|
||||
|
||||
bool is_string_view() const
|
||||
|
@ -72,21 +55,11 @@ public:
|
|||
return m_view.get<StringView>();
|
||||
}
|
||||
|
||||
Utf32View const& u32_view() const
|
||||
{
|
||||
return m_view.get<Utf32View>();
|
||||
}
|
||||
|
||||
Utf16View const& u16_view() const
|
||||
{
|
||||
return m_view.get<Utf16View>();
|
||||
}
|
||||
|
||||
Utf8View const& u8_view() const
|
||||
{
|
||||
return m_view.get<Utf8View>();
|
||||
}
|
||||
|
||||
bool unicode() const { return m_unicode; }
|
||||
void set_unicode(bool unicode) { m_unicode = unicode; }
|
||||
|
||||
|
@ -115,14 +88,12 @@ public:
|
|||
{
|
||||
return m_view.visit(
|
||||
[](Utf16View const& view) { return view.length_in_code_units(); },
|
||||
[](Utf8View const& view) { return view.byte_length(); },
|
||||
[](auto const& view) { return view.length(); });
|
||||
}
|
||||
|
||||
size_t length_of_code_point(u32 code_point) const
|
||||
{
|
||||
return m_view.visit(
|
||||
[](Utf32View const&) { return 1; },
|
||||
[&](Utf16View const&) {
|
||||
if (code_point < 0x10000)
|
||||
return 1;
|
||||
|
@ -159,9 +130,6 @@ public:
|
|||
optional_string_storage = builder.to_byte_string();
|
||||
return RegexStringView { T { *optional_string_storage } };
|
||||
},
|
||||
[&](Utf32View) {
|
||||
return RegexStringView { Utf32View { data.data(), data.size() } };
|
||||
},
|
||||
[&](Utf16View) {
|
||||
optional_utf16_storage = AK::utf32_to_utf16(Utf32View { data.data(), data.size() }).release_value_but_fixme_should_propagate_errors();
|
||||
return RegexStringView { Utf16View { optional_utf16_storage } };
|
||||
|
@ -181,24 +149,6 @@ public:
|
|||
new_views.empend(view);
|
||||
return new_views;
|
||||
},
|
||||
[](Utf32View view) {
|
||||
if (view.is_empty())
|
||||
return Vector<RegexStringView> { view };
|
||||
|
||||
Vector<RegexStringView> views;
|
||||
u32 newline = '\n';
|
||||
while (!view.is_empty()) {
|
||||
auto position = AK::memmem_optional(view.code_points(), view.length() * sizeof(u32), &newline, sizeof(u32));
|
||||
if (!position.has_value())
|
||||
break;
|
||||
auto offset = position.value() / sizeof(u32);
|
||||
views.empend(view.substring_view(0, offset));
|
||||
view = view.substring_view(offset + 1, view.length() - offset - 1);
|
||||
}
|
||||
if (!view.is_empty())
|
||||
views.empend(view);
|
||||
return views;
|
||||
},
|
||||
[](Utf16View view) {
|
||||
if (view.is_empty())
|
||||
return Vector<RegexStringView> { view };
|
||||
|
@ -216,34 +166,6 @@ public:
|
|||
if (!view.is_empty())
|
||||
views.empend(view);
|
||||
return views;
|
||||
},
|
||||
[](Utf8View const& view) {
|
||||
if (view.is_empty())
|
||||
return Vector<RegexStringView> { view };
|
||||
|
||||
Vector<RegexStringView> views;
|
||||
auto it = view.begin();
|
||||
auto previous_newline_position_it = it;
|
||||
for (;;) {
|
||||
if (*it == '\n') {
|
||||
auto previous_offset = view.byte_offset_of(previous_newline_position_it);
|
||||
auto new_offset = view.byte_offset_of(it);
|
||||
auto slice = view.substring_view(previous_offset, new_offset - previous_offset);
|
||||
views.empend(slice);
|
||||
++it;
|
||||
previous_newline_position_it = it;
|
||||
}
|
||||
if (it.done())
|
||||
break;
|
||||
++it;
|
||||
}
|
||||
if (it != previous_newline_position_it) {
|
||||
auto previous_offset = view.byte_offset_of(previous_newline_position_it);
|
||||
auto new_offset = view.byte_offset_of(it);
|
||||
auto slice = view.substring_view(previous_offset, new_offset - previous_offset);
|
||||
views.empend(slice);
|
||||
}
|
||||
return views;
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -252,8 +174,7 @@ public:
|
|||
if (unicode()) {
|
||||
auto view = m_view.visit(
|
||||
[&](auto view) { return RegexStringView { view.substring_view(offset, length) }; },
|
||||
[&](Utf16View const& view) { return RegexStringView { view.unicode_substring_view(offset, length) }; },
|
||||
[&](Utf8View const& view) { return RegexStringView { view.unicode_substring_view(offset, length) }; });
|
||||
[&](Utf16View const& view) { return RegexStringView { view.unicode_substring_view(offset, length) }; });
|
||||
|
||||
view.set_unicode(unicode());
|
||||
return view;
|
||||
|
@ -302,13 +223,7 @@ public:
|
|||
return ch;
|
||||
}
|
||||
},
|
||||
[&](Utf32View const& view) -> u32 { return view[index]; },
|
||||
[&](Utf16View const& view) -> u32 { return view.code_point_at(index); },
|
||||
[&](Utf8View const& view) -> u32 {
|
||||
auto it = view.iterator_at_byte_offset(index);
|
||||
VERIFY(it != view.end());
|
||||
return *it;
|
||||
});
|
||||
[&](Utf16View const& view) -> u32 { return view.code_point_at(index); });
|
||||
}
|
||||
|
||||
u32 code_unit_at(size_t code_unit_index) const
|
||||
|
@ -325,13 +240,7 @@ public:
|
|||
return ch;
|
||||
}
|
||||
},
|
||||
[&](Utf32View const& view) -> u32 { return view[code_unit_index]; },
|
||||
[&](Utf16View const& view) -> u32 { return view.code_unit_at(code_unit_index); },
|
||||
[&](Utf8View const& view) -> u32 {
|
||||
auto it = view.iterator_at_byte_offset(code_unit_index);
|
||||
VERIFY(it != view.end());
|
||||
return *it;
|
||||
});
|
||||
[&](Utf16View const& view) -> u32 { return view.code_unit_at(code_unit_index); });
|
||||
}
|
||||
|
||||
size_t code_unit_offset_of(size_t code_point_index) const
|
||||
|
@ -341,71 +250,32 @@ public:
|
|||
Utf8View utf8_view { view };
|
||||
return utf8_view.byte_offset_of(code_point_index);
|
||||
},
|
||||
[&](Utf32View const&) -> u32 { return code_point_index; },
|
||||
[&](Utf16View const& view) -> u32 {
|
||||
return view.code_unit_offset_of(code_point_index);
|
||||
},
|
||||
[&](Utf8View const& view) -> u32 {
|
||||
return view.byte_offset_of(code_point_index);
|
||||
});
|
||||
}
|
||||
|
||||
bool operator==(char const* cstring) const
|
||||
{
|
||||
return m_view.visit(
|
||||
[&](Utf32View) { return to_byte_string() == cstring; },
|
||||
[&](Utf16View) { return to_byte_string() == cstring; },
|
||||
[&](Utf8View const& view) { return view.as_string() == cstring; },
|
||||
[&](StringView view) { return view == cstring; });
|
||||
}
|
||||
|
||||
bool operator==(ByteString const& string) const
|
||||
{
|
||||
return m_view.visit(
|
||||
[&](Utf32View) { return to_byte_string() == string; },
|
||||
[&](Utf16View) { return to_byte_string() == string; },
|
||||
[&](Utf8View const& view) { return view.as_string() == string; },
|
||||
[&](StringView view) { return view == string; });
|
||||
}
|
||||
|
||||
bool operator==(StringView string) const
|
||||
{
|
||||
return m_view.visit(
|
||||
[&](Utf32View) { return to_byte_string() == string; },
|
||||
[&](Utf16View) { return to_byte_string() == string; },
|
||||
[&](Utf8View const& view) { return view.as_string() == string; },
|
||||
[&](StringView view) { return view == string; });
|
||||
}
|
||||
|
||||
bool operator==(Utf32View const& other) const
|
||||
{
|
||||
return m_view.visit(
|
||||
[&](Utf32View view) {
|
||||
return view.length() == other.length() && __builtin_memcmp(view.code_points(), other.code_points(), view.length() * sizeof(u32)) == 0;
|
||||
},
|
||||
[&](Utf16View) { return to_byte_string() == RegexStringView { other }.to_byte_string(); },
|
||||
[&](Utf8View const& view) { return view.as_string() == RegexStringView { other }.to_byte_string(); },
|
||||
[&](StringView view) { return view == RegexStringView { other }.to_byte_string(); });
|
||||
}
|
||||
|
||||
bool operator==(Utf16View const& other) const
|
||||
{
|
||||
return m_view.visit(
|
||||
[&](Utf32View) { return to_byte_string() == RegexStringView { other }.to_byte_string(); },
|
||||
[&](Utf16View const& view) { return view == other; },
|
||||
[&](Utf8View const& view) { return view.as_string() == RegexStringView { other }.to_byte_string(); },
|
||||
[&](StringView view) { return view == RegexStringView { other }.to_byte_string(); });
|
||||
}
|
||||
|
||||
bool operator==(Utf8View const& other) const
|
||||
{
|
||||
return m_view.visit(
|
||||
[&](Utf32View) { return to_byte_string() == other.as_string(); },
|
||||
[&](Utf16View) { return to_byte_string() == other.as_string(); },
|
||||
[&](Utf8View const& view) { return view.as_string() == other.as_string(); },
|
||||
[&](StringView view) { return other.as_string() == view; });
|
||||
}
|
||||
|
||||
bool equals(RegexStringView other) const
|
||||
{
|
||||
return other.m_view.visit([this](auto const& view) { return operator==(view); });
|
||||
|
@ -431,48 +301,15 @@ public:
|
|||
bool starts_with(StringView str) const
|
||||
{
|
||||
return m_view.visit(
|
||||
[&](Utf32View) -> bool {
|
||||
TODO();
|
||||
},
|
||||
[&](Utf16View) -> bool {
|
||||
TODO();
|
||||
},
|
||||
[&](Utf8View const& view) { return view.as_string().starts_with(str); },
|
||||
[&](StringView view) { return view.starts_with(str); });
|
||||
}
|
||||
|
||||
bool starts_with(Utf32View const& str) const
|
||||
{
|
||||
return m_view.visit(
|
||||
[&](Utf32View view) -> bool {
|
||||
if (str.length() > view.length())
|
||||
return false;
|
||||
if (str.length() == view.length())
|
||||
return operator==(str);
|
||||
for (size_t i = 0; i < str.length(); ++i) {
|
||||
if (str.at(i) != view.at(i))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
},
|
||||
[&](Utf16View) -> bool { TODO(); },
|
||||
[&](Utf8View const& view) {
|
||||
auto it = view.begin();
|
||||
for (auto code_point : str) {
|
||||
if (it.done())
|
||||
return false;
|
||||
if (code_point != *it)
|
||||
return false;
|
||||
++it;
|
||||
}
|
||||
return true;
|
||||
},
|
||||
[&](StringView) -> bool { TODO(); });
|
||||
}
|
||||
|
||||
private:
|
||||
Variant<StringView, Utf8View, Utf16View, Utf32View> m_view { StringView {} };
|
||||
bool m_unicode { false };
|
||||
[[no_unique_address]] Variant<StringView, Utf16View> m_view { StringView {} };
|
||||
[[no_unique_address]] bool m_unicode { false };
|
||||
};
|
||||
|
||||
class Match final {
|
||||
|
|
|
@ -253,20 +253,11 @@ TEST_CASE(catch_all_again)
|
|||
EXPECT_EQ(has_match("Hello World"sv, re), true);
|
||||
}
|
||||
|
||||
TEST_CASE(char_utf8)
|
||||
{
|
||||
Regex<PosixExtended> re("😀");
|
||||
RegexResult result;
|
||||
|
||||
EXPECT_EQ((result = match(Utf8View { "Привет, мир! 😀 γειά σου κόσμος 😀 こんにちは世界"sv }, re, PosixFlags::Global)).success, true);
|
||||
EXPECT_EQ(result.count, 2u);
|
||||
}
|
||||
|
||||
TEST_CASE(catch_all_newline)
|
||||
{
|
||||
Regex<PosixExtended> re("^.*$", PosixFlags::Multiline);
|
||||
RegexResult result;
|
||||
ByteString aaa = "Hello World\nTest\n1234\n";
|
||||
String aaa = "Hello World\nTest\n1234\n"_string;
|
||||
auto lambda = [&]() {
|
||||
result = match(aaa, re);
|
||||
EXPECT_EQ(result.success, true);
|
||||
|
@ -283,7 +274,7 @@ TEST_CASE(catch_all_newline_view)
|
|||
Regex<PosixExtended> re("^.*$", PosixFlags::Multiline);
|
||||
RegexResult result;
|
||||
|
||||
ByteString aaa = "Hello World\nTest\n1234\n";
|
||||
String aaa = "Hello World\nTest\n1234\n"_string;
|
||||
result = match(aaa, re);
|
||||
EXPECT_EQ(result.success, true);
|
||||
EXPECT_EQ(result.count, 3u);
|
||||
|
@ -313,7 +304,7 @@ TEST_CASE(catch_all_newline_2)
|
|||
TEST_CASE(match_all_character_class)
|
||||
{
|
||||
Regex<PosixExtended> re("[[:alpha:]]");
|
||||
ByteString str = "[Window]\nOpacity=255\nAudibleBeep=0\n";
|
||||
String str = "[Window]\nOpacity=255\nAudibleBeep=0\n"_string;
|
||||
RegexResult result = match(str, re, PosixFlags::Global);
|
||||
|
||||
EXPECT_EQ(result.success, true);
|
||||
|
@ -326,7 +317,7 @@ TEST_CASE(match_all_character_class)
|
|||
TEST_CASE(match_character_class_with_assertion)
|
||||
{
|
||||
Regex<PosixExtended> re("[[:alpha:]]+$");
|
||||
ByteString str = "abcdef";
|
||||
String str = "abcdef"_string;
|
||||
RegexResult result = match(str, re);
|
||||
|
||||
EXPECT_EQ(result.success, true);
|
||||
|
@ -421,7 +412,7 @@ TEST_CASE(named_capture_group)
|
|||
regex_dbg.print_bytecode(re);
|
||||
}
|
||||
|
||||
ByteString haystack = "[Window]\nOpacity=255\nAudibleBeep=0\n";
|
||||
String haystack = "[Window]\nOpacity=255\nAudibleBeep=0\n"_string;
|
||||
EXPECT_EQ(re.search(haystack, result, PosixFlags::Multiline), true);
|
||||
EXPECT_EQ(result.count, 2u);
|
||||
EXPECT_EQ(result.matches.at(0).view, "Opacity=255");
|
||||
|
@ -444,7 +435,7 @@ TEST_CASE(ecma262_named_capture_group_with_dollar_sign)
|
|||
regex_dbg.print_bytecode(re);
|
||||
}
|
||||
|
||||
ByteString haystack = "[Window]\nOpacity=255\nAudibleBeep=0\n";
|
||||
String haystack = "[Window]\nOpacity=255\nAudibleBeep=0\n"_string;
|
||||
EXPECT_EQ(re.search(haystack, result, ECMAScriptFlags::Multiline), true);
|
||||
EXPECT_EQ(result.count, 2u);
|
||||
EXPECT_EQ(result.matches.at(0).view, "Opacity=255");
|
||||
|
@ -1009,7 +1000,7 @@ TEST_CASE(case_insensitive_match)
|
|||
TEST_CASE(extremely_long_fork_chain)
|
||||
{
|
||||
Regex<ECMA262> re("(?:aa)*");
|
||||
auto input = ByteString::repeated('a', 1000);
|
||||
auto input = MUST(String::repeated('a', 1000));
|
||||
auto result = re.match(input);
|
||||
EXPECT_EQ(result.success, true);
|
||||
}
|
||||
|
@ -1037,7 +1028,7 @@ TEST_CASE(theoretically_infinite_loop)
|
|||
}
|
||||
}
|
||||
|
||||
static auto g_lots_of_a_s = ByteString::repeated('a', 10'000'000);
|
||||
static auto g_lots_of_a_s = String::repeated('a', 10'000'000).release_value();
|
||||
|
||||
BENCHMARK_CASE(fork_performance)
|
||||
{
|
||||
|
@ -1048,12 +1039,12 @@ BENCHMARK_CASE(fork_performance)
|
|||
}
|
||||
{
|
||||
Regex<ECMA262> re("(a+)+b");
|
||||
auto result = re.match(g_lots_of_a_s.substring_view(0, 100));
|
||||
auto result = re.match(g_lots_of_a_s.bytes_as_string_view().substring_view(0, 100));
|
||||
EXPECT_EQ(result.success, false);
|
||||
}
|
||||
{
|
||||
Regex<ECMA262> re("^(a|a?)+$");
|
||||
auto input = ByteString::formatted("{}b", g_lots_of_a_s.substring_view(0, 100));
|
||||
auto input = MUST(String::formatted("{}b", g_lots_of_a_s.bytes_as_string_view().substring_view(0, 100)));
|
||||
auto result = re.match(input);
|
||||
EXPECT_EQ(result.success, false);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue