mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-04-24 13:35:12 +00:00
LibRegex: Fix greedy/reluctant modifiers in PosixExtendedParser
Also fixes the issue with assertions causing early termination when they fail.
This commit is contained in:
parent
45e5661296
commit
92ea9ed4a5
Notes:
sideshowbarker
2024-07-19 01:14:53 +09:00
Author: https://github.com/alimpfard Commit: https://github.com/SerenityOS/serenity/commit/92ea9ed4a53 Pull-request: https://github.com/SerenityOS/serenity/pull/4103 Reviewed-by: https://github.com/linusg ✅
5 changed files with 42 additions and 26 deletions
|
@ -188,26 +188,26 @@ ALWAYS_INLINE ExecutionResult OpCode_ForkStay::execute(const MatchInput&, MatchS
|
|||
ALWAYS_INLINE ExecutionResult OpCode_CheckBegin::execute(const MatchInput& input, MatchState& state, MatchOutput&) const
|
||||
{
|
||||
if (0 == state.string_position && (input.regex_options & AllFlags::MatchNotBeginOfLine))
|
||||
return ExecutionResult::Failed;
|
||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||
|
||||
if ((0 == state.string_position && !(input.regex_options & AllFlags::MatchNotBeginOfLine))
|
||||
|| (0 != state.string_position && (input.regex_options & AllFlags::MatchNotBeginOfLine))
|
||||
|| (0 == state.string_position && (input.regex_options & AllFlags::Global)))
|
||||
return ExecutionResult::Continue;
|
||||
|
||||
return ExecutionResult::Failed;
|
||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE ExecutionResult OpCode_CheckEnd::execute(const MatchInput& input, MatchState& state, MatchOutput&) const
|
||||
{
|
||||
if (state.string_position == input.view.length() && (input.regex_options & AllFlags::MatchNotEndOfLine))
|
||||
return ExecutionResult::Failed;
|
||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||
|
||||
if ((state.string_position == input.view.length() && !(input.regex_options & AllFlags::MatchNotEndOfLine))
|
||||
|| (state.string_position != input.view.length() && (input.regex_options & AllFlags::MatchNotEndOfLine || input.regex_options & AllFlags::MatchNotBeginOfLine)))
|
||||
return ExecutionResult::Succeeded;
|
||||
return ExecutionResult::Continue;
|
||||
|
||||
return ExecutionResult::Failed;
|
||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE ExecutionResult OpCode_SaveLeftCaptureGroup::execute(const MatchInput& input, MatchState& state, MatchOutput& output) const
|
||||
|
|
|
@ -204,7 +204,7 @@ public:
|
|||
void insert_bytecode_alternation(ByteCode&& left, ByteCode&& right)
|
||||
{
|
||||
|
||||
// FORKSTAY _ALT
|
||||
// FORKJUMP _ALT
|
||||
// REGEXP ALT1
|
||||
// JUMP _END
|
||||
// LABEL _ALT
|
||||
|
@ -266,12 +266,12 @@ public:
|
|||
{
|
||||
// LABEL _START = -bytecode_to_repeat.size()
|
||||
// REGEXP
|
||||
// FORKJUMP _START (FORKSTAY -> Greedy)
|
||||
// FORKSTAY _START (FORKJUMP -> Greedy)
|
||||
|
||||
if (greedy)
|
||||
bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
|
||||
else
|
||||
bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
||||
else
|
||||
bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
|
||||
|
||||
bytecode_to_repeat.empend(-(bytecode_to_repeat.size() + 1)); // Jump to the _START label
|
||||
}
|
||||
|
@ -279,7 +279,7 @@ public:
|
|||
void insert_bytecode_repetition_any(ByteCode& bytecode_to_repeat, bool greedy)
|
||||
{
|
||||
// LABEL _START
|
||||
// FORKSTAY _END (FORKJUMP -> Greedy)
|
||||
// FORKJUMP _END (FORKSTAY -> Greedy)
|
||||
// REGEXP
|
||||
// JUMP _START
|
||||
// LABEL _END
|
||||
|
@ -288,9 +288,9 @@ public:
|
|||
ByteCode bytecode;
|
||||
|
||||
if (greedy)
|
||||
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
||||
else
|
||||
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
|
||||
else
|
||||
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
||||
|
||||
bytecode.empend(bytecode_to_repeat.size() + 2); // Jump to the _END label
|
||||
|
||||
|
@ -306,15 +306,15 @@ public:
|
|||
|
||||
void insert_bytecode_repetition_zero_or_one(ByteCode& bytecode_to_repeat, bool greedy)
|
||||
{
|
||||
// FORKSTAY _END (FORKJUMP -> Greedy)
|
||||
// FORKJUMP _END (FORKSTAY -> Greedy)
|
||||
// REGEXP
|
||||
// LABEL _END
|
||||
ByteCode bytecode;
|
||||
|
||||
if (greedy)
|
||||
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
||||
else
|
||||
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
|
||||
else
|
||||
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
||||
|
||||
bytecode.empend(bytecode_to_repeat.size()); // Jump to the _END label
|
||||
|
||||
|
|
|
@ -202,23 +202,23 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_repetition_symbol(ByteCode& byteco
|
|||
} else if (match(TokenType::Plus)) {
|
||||
consume();
|
||||
|
||||
bool greedy = match(TokenType::Questionmark);
|
||||
if (greedy)
|
||||
bool nongreedy = match(TokenType::Questionmark);
|
||||
if (nongreedy)
|
||||
consume();
|
||||
|
||||
// Note: dont touch match_length_minimum, it's already correct
|
||||
bytecode_to_repeat.insert_bytecode_repetition_min_one(bytecode_to_repeat, greedy);
|
||||
bytecode_to_repeat.insert_bytecode_repetition_min_one(bytecode_to_repeat, !nongreedy);
|
||||
return !has_error();
|
||||
|
||||
} else if (match(TokenType::Asterisk)) {
|
||||
consume();
|
||||
match_length_minimum = 0;
|
||||
|
||||
bool greedy = match(TokenType::Questionmark);
|
||||
if (greedy)
|
||||
bool nongreedy = match(TokenType::Questionmark);
|
||||
if (nongreedy)
|
||||
consume();
|
||||
|
||||
bytecode_to_repeat.insert_bytecode_repetition_any(bytecode_to_repeat, greedy);
|
||||
bytecode_to_repeat.insert_bytecode_repetition_any(bytecode_to_repeat, !nongreedy);
|
||||
|
||||
return !has_error();
|
||||
|
||||
|
@ -226,11 +226,11 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_repetition_symbol(ByteCode& byteco
|
|||
consume();
|
||||
match_length_minimum = 0;
|
||||
|
||||
bool greedy = match(TokenType::Questionmark);
|
||||
if (greedy)
|
||||
bool nongreedy = match(TokenType::Questionmark);
|
||||
if (nongreedy)
|
||||
consume();
|
||||
|
||||
bytecode_to_repeat.insert_bytecode_repetition_zero_or_one(bytecode_to_repeat, greedy);
|
||||
bytecode_to_repeat.insert_bytecode_repetition_zero_or_one(bytecode_to_repeat, !nongreedy);
|
||||
return !has_error();
|
||||
}
|
||||
|
||||
|
|
|
@ -121,9 +121,15 @@ protected:
|
|||
class PosixExtendedParser final : public Parser {
|
||||
public:
|
||||
explicit PosixExtendedParser(Lexer& lexer)
|
||||
: Parser(lexer) {};
|
||||
: Parser(lexer)
|
||||
{
|
||||
}
|
||||
|
||||
PosixExtendedParser(Lexer& lexer, Optional<typename ParserTraits<PosixExtendedParser>::OptionsType> regex_options)
|
||||
: Parser(lexer, regex_options.value_or({})) {};
|
||||
: Parser(lexer, regex_options.value_or({}))
|
||||
{
|
||||
}
|
||||
|
||||
~PosixExtendedParser() = default;
|
||||
|
||||
private:
|
||||
|
|
|
@ -334,6 +334,16 @@ TEST_CASE(match_all_character_class)
|
|||
EXPECT(&result.matches.at(0).view.characters_without_null_termination()[0] != &str.view().characters_without_null_termination()[1]);
|
||||
}
|
||||
|
||||
TEST_CASE(match_character_class_with_assertion)
|
||||
{
|
||||
Regex<PosixExtended> re("[[:alpha:]]+$");
|
||||
String str = "abcdef";
|
||||
RegexResult result = match(str, re);
|
||||
|
||||
EXPECT_EQ(result.success, true);
|
||||
EXPECT_EQ(result.count, 1u);
|
||||
}
|
||||
|
||||
TEST_CASE(example_for_git_commit)
|
||||
{
|
||||
Regex<PosixExtended> re("^.*$");
|
||||
|
|
Loading…
Add table
Reference in a new issue