AK: Cache all the line positions in LineTrackingLexer

Also updates a LibWeb text test that used to report the wrong line
number.
This commit is contained in:
Ali Mohammad Pur 2024-10-10 09:58:31 +02:00 committed by Tim Ledbetter
commit 02b50d463b
Notes: github-actions[bot] 2024-10-10 22:54:58 +00:00
4 changed files with 42 additions and 31 deletions

View file

@ -175,27 +175,32 @@ ErrorOr<T> GenericLexer::consume_decimal_integer()
LineTrackingLexer::Position LineTrackingLexer::position_for(size_t index) const LineTrackingLexer::Position LineTrackingLexer::position_for(size_t index) const
{ {
auto& [cached_index, cached_line, cached_column] = m_cached_position; // Sad case: we have no idea where the nearest newline is, so we have to
// scan ahead a bit.
if (cached_index <= index) { while (index > m_largest_known_line_start_position) {
for (size_t i = cached_index; i < index; ++i) { auto next_newline = m_input.find('\n', m_largest_known_line_start_position);
if (m_input[i] == '\n') if (!next_newline.has_value()) {
++cached_line, cached_column = 0; // No more newlines, add the end of the input as a line start to avoid searching again.
else m_line_start_positions->insert(m_input.length(), m_line_start_positions->size());
++cached_column; m_largest_known_line_start_position = m_input.length();
} break;
} else {
auto lines_backtracked = m_input.substring_view(index, cached_index - index).count('\n');
cached_line -= lines_backtracked;
if (lines_backtracked == 0) {
cached_column -= cached_index - index;
} else {
auto current_line_start = m_input.substring_view(0, index).find_last('\n').value_or(0);
cached_column = index - current_line_start;
} }
m_line_start_positions->insert(next_newline.value() + 1, m_line_start_positions->size());
m_largest_known_line_start_position = next_newline.value() + 1;
} }
cached_index = index; // We should always have at least the first line start position.
return m_cached_position; auto previous_line_it = m_line_start_positions->find_largest_not_above_iterator(index);
auto previous_line_index = previous_line_it.key();
auto line = *previous_line_it;
auto column = index - previous_line_index;
if (line == 0) {
// First line, take into account the start position.
column += m_first_line_start_position.column;
}
line += m_first_line_start_position.line;
return { index, line, column };
} }
template ErrorOr<u8> GenericLexer::consume_decimal_integer<u8>(); template ErrorOr<u8> GenericLexer::consume_decimal_integer<u8>();

View file

@ -6,6 +6,8 @@
#pragma once #pragma once
#include <AK/NonnullOwnPtr.h>
#include <AK/RedBlackTree.h>
#include <AK/Result.h> #include <AK/Result.h>
#include <AK/String.h> #include <AK/String.h>
#include <AK/StringView.h> #include <AK/StringView.h>
@ -227,8 +229,6 @@ protected:
class LineTrackingLexer : public GenericLexer { class LineTrackingLexer : public GenericLexer {
public: public:
using GenericLexer::GenericLexer;
struct Position { struct Position {
size_t offset { 0 }; size_t offset { 0 };
size_t line { 0 }; size_t line { 0 };
@ -237,20 +237,27 @@ public:
LineTrackingLexer(StringView input, Position start_position) LineTrackingLexer(StringView input, Position start_position)
: GenericLexer(input) : GenericLexer(input)
, m_cached_position { , m_first_line_start_position(start_position)
.line = start_position.line, , m_line_start_positions(make<RedBlackTree<size_t, size_t>>())
.column = start_position.column, {
} m_line_start_positions->insert(0, 0);
auto first_newline = input.find('\n').map([](auto x) { return x + 1; }).value_or(input.length());
m_line_start_positions->insert(first_newline, 1);
m_largest_known_line_start_position = first_newline;
}
LineTrackingLexer(StringView input)
: LineTrackingLexer(input, { 0, 1, 1 })
{ {
} }
Position cached_position() const { return m_cached_position; }
void restore_cached_offset(Position cached_position) { m_cached_position = cached_position; }
Position position_for(size_t) const; Position position_for(size_t) const;
Position current_position() const { return position_for(m_index); } Position current_position() const { return position_for(m_index); }
protected: protected:
mutable Position m_cached_position; Position m_first_line_start_position;
mutable NonnullOwnPtr<RedBlackTree<size_t, size_t>> m_line_start_positions; // offset -> line index
mutable size_t m_largest_known_line_start_position { 0 };
}; };
constexpr auto is_any_of(StringView values) constexpr auto is_any_of(StringView values)

View file

@ -1,3 +1,3 @@
Got load event Got load event
[object HTMLDocument] [object HTMLDocument]
Failed to parse XML document: Expected '>' at line: 1, col: 20 (offset 59) Failed to parse XML document: Expected '>' at line: 2, col: 20 (offset 59)

View file

@ -147,9 +147,8 @@ private:
[[nodiscard]] auto rollback_point(SourceLocation location = SourceLocation::current()) [[nodiscard]] auto rollback_point(SourceLocation location = SourceLocation::current())
{ {
return ArmedScopeGuard { return ArmedScopeGuard {
[this, position = m_lexer.tell(), cached_position = m_lexer.cached_position(), location] { [this, position = m_lexer.tell(), location] {
m_lexer.retreat(m_lexer.tell() - position); m_lexer.retreat(m_lexer.tell() - position);
m_lexer.restore_cached_offset(cached_position);
(void)location; (void)location;
dbgln_if(XML_PARSER_DEBUG, "{:->{}}FAIL @ {} -- \x1b[31m{}\x1b[0m", " ", s_debug_indent_level * 2, location, m_lexer.remaining().substring_view(0, min(16, m_lexer.tell_remaining())).replace("\n"sv, "\\n"sv, ReplaceMode::All)); dbgln_if(XML_PARSER_DEBUG, "{:->{}}FAIL @ {} -- \x1b[31m{}\x1b[0m", " ", s_debug_indent_level * 2, location, m_lexer.remaining().substring_view(0, min(16, m_lexer.tell_remaining())).replace("\n"sv, "\\n"sv, ReplaceMode::All));
} }