From 02b50d463b174e5d525c7ab8ce8dd173d550de28 Mon Sep 17 00:00:00 2001 From: Ali Mohammad Pur Date: Thu, 10 Oct 2024 09:58:31 +0200 Subject: [PATCH] AK: Cache all the line positions in LineTrackingLexer Also updates a LibWeb text test that used to report the wrong line number. --- AK/GenericLexer.cpp | 43 +++++++++++-------- AK/GenericLexer.h | 25 +++++++---- Tests/LibWeb/Text/expected/XML/error-page.txt | 2 +- Userland/Libraries/LibXML/Parser/Parser.h | 3 +- 4 files changed, 42 insertions(+), 31 deletions(-) diff --git a/AK/GenericLexer.cpp b/AK/GenericLexer.cpp index dabeba3705f..3df80d37444 100644 --- a/AK/GenericLexer.cpp +++ b/AK/GenericLexer.cpp @@ -175,27 +175,32 @@ ErrorOr GenericLexer::consume_decimal_integer() LineTrackingLexer::Position LineTrackingLexer::position_for(size_t index) const { - auto& [cached_index, cached_line, cached_column] = m_cached_position; - - if (cached_index <= index) { - for (size_t i = cached_index; i < index; ++i) { - if (m_input[i] == '\n') - ++cached_line, cached_column = 0; - else - ++cached_column; - } - } else { - auto lines_backtracked = m_input.substring_view(index, cached_index - index).count('\n'); - cached_line -= lines_backtracked; - if (lines_backtracked == 0) { - cached_column -= cached_index - index; - } else { - auto current_line_start = m_input.substring_view(0, index).find_last('\n').value_or(0); - cached_column = index - current_line_start; + // Sad case: we have no idea where the nearest newline is, so we have to + // scan ahead a bit. + while (index > m_largest_known_line_start_position) { + auto next_newline = m_input.find('\n', m_largest_known_line_start_position); + if (!next_newline.has_value()) { + // No more newlines, add the end of the input as a line start to avoid searching again. + m_line_start_positions->insert(m_input.length(), m_line_start_positions->size()); + m_largest_known_line_start_position = m_input.length(); + break; } + m_line_start_positions->insert(next_newline.value() + 1, m_line_start_positions->size()); + m_largest_known_line_start_position = next_newline.value() + 1; } - cached_index = index; - return m_cached_position; + // We should always have at least the first line start position. + auto previous_line_it = m_line_start_positions->find_largest_not_above_iterator(index); + auto previous_line_index = previous_line_it.key(); + + auto line = *previous_line_it; + auto column = index - previous_line_index; + if (line == 0) { + // First line, take into account the start position. + column += m_first_line_start_position.column; + } + + line += m_first_line_start_position.line; + return { index, line, column }; } template ErrorOr GenericLexer::consume_decimal_integer(); diff --git a/AK/GenericLexer.h b/AK/GenericLexer.h index 8466f33bc69..f6271b3eed3 100644 --- a/AK/GenericLexer.h +++ b/AK/GenericLexer.h @@ -6,6 +6,8 @@ #pragma once +#include +#include #include #include #include @@ -227,8 +229,6 @@ protected: class LineTrackingLexer : public GenericLexer { public: - using GenericLexer::GenericLexer; - struct Position { size_t offset { 0 }; size_t line { 0 }; @@ -237,20 +237,27 @@ public: LineTrackingLexer(StringView input, Position start_position) : GenericLexer(input) - , m_cached_position { - .line = start_position.line, - .column = start_position.column, - } + , m_first_line_start_position(start_position) + , m_line_start_positions(make>()) + { + m_line_start_positions->insert(0, 0); + auto first_newline = input.find('\n').map([](auto x) { return x + 1; }).value_or(input.length()); + m_line_start_positions->insert(first_newline, 1); + m_largest_known_line_start_position = first_newline; + } + + LineTrackingLexer(StringView input) + : LineTrackingLexer(input, { 0, 1, 1 }) { } - Position cached_position() const { return m_cached_position; } - void restore_cached_offset(Position cached_position) { m_cached_position = cached_position; } Position position_for(size_t) const; Position current_position() const { return position_for(m_index); } protected: - mutable Position m_cached_position; + Position m_first_line_start_position; + mutable NonnullOwnPtr> m_line_start_positions; // offset -> line index + mutable size_t m_largest_known_line_start_position { 0 }; }; constexpr auto is_any_of(StringView values) diff --git a/Tests/LibWeb/Text/expected/XML/error-page.txt b/Tests/LibWeb/Text/expected/XML/error-page.txt index eb655d2a42a..ccdfafd1902 100644 --- a/Tests/LibWeb/Text/expected/XML/error-page.txt +++ b/Tests/LibWeb/Text/expected/XML/error-page.txt @@ -1,3 +1,3 @@ Got load event [object HTMLDocument] -Failed to parse XML document: Expected '>' at line: 1, col: 20 (offset 59) +Failed to parse XML document: Expected '>' at line: 2, col: 20 (offset 59) diff --git a/Userland/Libraries/LibXML/Parser/Parser.h b/Userland/Libraries/LibXML/Parser/Parser.h index 95e0f62b511..7ff25368f66 100644 --- a/Userland/Libraries/LibXML/Parser/Parser.h +++ b/Userland/Libraries/LibXML/Parser/Parser.h @@ -147,9 +147,8 @@ private: [[nodiscard]] auto rollback_point(SourceLocation location = SourceLocation::current()) { return ArmedScopeGuard { - [this, position = m_lexer.tell(), cached_position = m_lexer.cached_position(), location] { + [this, position = m_lexer.tell(), location] { m_lexer.retreat(m_lexer.tell() - position); - m_lexer.restore_cached_offset(cached_position); (void)location; dbgln_if(XML_PARSER_DEBUG, "{:->{}}FAIL @ {} -- \x1b[31m{}\x1b[0m", " ", s_debug_indent_level * 2, location, m_lexer.remaining().substring_view(0, min(16, m_lexer.tell_remaining())).replace("\n"sv, "\\n"sv, ReplaceMode::All)); }