mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-08-02 06:09:08 +00:00
AK: Cache all the line positions in LineTrackingLexer
Also updates a LibWeb text test that used to report the wrong line number.
This commit is contained in:
parent
e5f87eb12b
commit
02b50d463b
Notes:
github-actions[bot]
2024-10-10 22:54:58 +00:00
Author: https://github.com/alimpfard
Commit: 02b50d463b
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/1710
Reviewed-by: https://github.com/DanShaders ✅
Reviewed-by: https://github.com/tcl3 ✅
4 changed files with 42 additions and 31 deletions
|
@ -175,27 +175,32 @@ ErrorOr<T> GenericLexer::consume_decimal_integer()
|
||||||
|
|
||||||
LineTrackingLexer::Position LineTrackingLexer::position_for(size_t index) const
|
LineTrackingLexer::Position LineTrackingLexer::position_for(size_t index) const
|
||||||
{
|
{
|
||||||
auto& [cached_index, cached_line, cached_column] = m_cached_position;
|
// Sad case: we have no idea where the nearest newline is, so we have to
|
||||||
|
// scan ahead a bit.
|
||||||
if (cached_index <= index) {
|
while (index > m_largest_known_line_start_position) {
|
||||||
for (size_t i = cached_index; i < index; ++i) {
|
auto next_newline = m_input.find('\n', m_largest_known_line_start_position);
|
||||||
if (m_input[i] == '\n')
|
if (!next_newline.has_value()) {
|
||||||
++cached_line, cached_column = 0;
|
// No more newlines, add the end of the input as a line start to avoid searching again.
|
||||||
else
|
m_line_start_positions->insert(m_input.length(), m_line_start_positions->size());
|
||||||
++cached_column;
|
m_largest_known_line_start_position = m_input.length();
|
||||||
}
|
break;
|
||||||
} else {
|
|
||||||
auto lines_backtracked = m_input.substring_view(index, cached_index - index).count('\n');
|
|
||||||
cached_line -= lines_backtracked;
|
|
||||||
if (lines_backtracked == 0) {
|
|
||||||
cached_column -= cached_index - index;
|
|
||||||
} else {
|
|
||||||
auto current_line_start = m_input.substring_view(0, index).find_last('\n').value_or(0);
|
|
||||||
cached_column = index - current_line_start;
|
|
||||||
}
|
}
|
||||||
|
m_line_start_positions->insert(next_newline.value() + 1, m_line_start_positions->size());
|
||||||
|
m_largest_known_line_start_position = next_newline.value() + 1;
|
||||||
}
|
}
|
||||||
cached_index = index;
|
// We should always have at least the first line start position.
|
||||||
return m_cached_position;
|
auto previous_line_it = m_line_start_positions->find_largest_not_above_iterator(index);
|
||||||
|
auto previous_line_index = previous_line_it.key();
|
||||||
|
|
||||||
|
auto line = *previous_line_it;
|
||||||
|
auto column = index - previous_line_index;
|
||||||
|
if (line == 0) {
|
||||||
|
// First line, take into account the start position.
|
||||||
|
column += m_first_line_start_position.column;
|
||||||
|
}
|
||||||
|
|
||||||
|
line += m_first_line_start_position.line;
|
||||||
|
return { index, line, column };
|
||||||
}
|
}
|
||||||
|
|
||||||
template ErrorOr<u8> GenericLexer::consume_decimal_integer<u8>();
|
template ErrorOr<u8> GenericLexer::consume_decimal_integer<u8>();
|
||||||
|
|
|
@ -6,6 +6,8 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <AK/NonnullOwnPtr.h>
|
||||||
|
#include <AK/RedBlackTree.h>
|
||||||
#include <AK/Result.h>
|
#include <AK/Result.h>
|
||||||
#include <AK/String.h>
|
#include <AK/String.h>
|
||||||
#include <AK/StringView.h>
|
#include <AK/StringView.h>
|
||||||
|
@ -227,8 +229,6 @@ protected:
|
||||||
|
|
||||||
class LineTrackingLexer : public GenericLexer {
|
class LineTrackingLexer : public GenericLexer {
|
||||||
public:
|
public:
|
||||||
using GenericLexer::GenericLexer;
|
|
||||||
|
|
||||||
struct Position {
|
struct Position {
|
||||||
size_t offset { 0 };
|
size_t offset { 0 };
|
||||||
size_t line { 0 };
|
size_t line { 0 };
|
||||||
|
@ -237,20 +237,27 @@ public:
|
||||||
|
|
||||||
LineTrackingLexer(StringView input, Position start_position)
|
LineTrackingLexer(StringView input, Position start_position)
|
||||||
: GenericLexer(input)
|
: GenericLexer(input)
|
||||||
, m_cached_position {
|
, m_first_line_start_position(start_position)
|
||||||
.line = start_position.line,
|
, m_line_start_positions(make<RedBlackTree<size_t, size_t>>())
|
||||||
.column = start_position.column,
|
{
|
||||||
}
|
m_line_start_positions->insert(0, 0);
|
||||||
|
auto first_newline = input.find('\n').map([](auto x) { return x + 1; }).value_or(input.length());
|
||||||
|
m_line_start_positions->insert(first_newline, 1);
|
||||||
|
m_largest_known_line_start_position = first_newline;
|
||||||
|
}
|
||||||
|
|
||||||
|
LineTrackingLexer(StringView input)
|
||||||
|
: LineTrackingLexer(input, { 0, 1, 1 })
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
Position cached_position() const { return m_cached_position; }
|
|
||||||
void restore_cached_offset(Position cached_position) { m_cached_position = cached_position; }
|
|
||||||
Position position_for(size_t) const;
|
Position position_for(size_t) const;
|
||||||
Position current_position() const { return position_for(m_index); }
|
Position current_position() const { return position_for(m_index); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
mutable Position m_cached_position;
|
Position m_first_line_start_position;
|
||||||
|
mutable NonnullOwnPtr<RedBlackTree<size_t, size_t>> m_line_start_positions; // offset -> line index
|
||||||
|
mutable size_t m_largest_known_line_start_position { 0 };
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr auto is_any_of(StringView values)
|
constexpr auto is_any_of(StringView values)
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
Got load event
|
Got load event
|
||||||
[object HTMLDocument]
|
[object HTMLDocument]
|
||||||
Failed to parse XML document: Expected '>' at line: 1, col: 20 (offset 59)
|
Failed to parse XML document: Expected '>' at line: 2, col: 20 (offset 59)
|
||||||
|
|
|
@ -147,9 +147,8 @@ private:
|
||||||
[[nodiscard]] auto rollback_point(SourceLocation location = SourceLocation::current())
|
[[nodiscard]] auto rollback_point(SourceLocation location = SourceLocation::current())
|
||||||
{
|
{
|
||||||
return ArmedScopeGuard {
|
return ArmedScopeGuard {
|
||||||
[this, position = m_lexer.tell(), cached_position = m_lexer.cached_position(), location] {
|
[this, position = m_lexer.tell(), location] {
|
||||||
m_lexer.retreat(m_lexer.tell() - position);
|
m_lexer.retreat(m_lexer.tell() - position);
|
||||||
m_lexer.restore_cached_offset(cached_position);
|
|
||||||
(void)location;
|
(void)location;
|
||||||
dbgln_if(XML_PARSER_DEBUG, "{:->{}}FAIL @ {} -- \x1b[31m{}\x1b[0m", " ", s_debug_indent_level * 2, location, m_lexer.remaining().substring_view(0, min(16, m_lexer.tell_remaining())).replace("\n"sv, "\\n"sv, ReplaceMode::All));
|
dbgln_if(XML_PARSER_DEBUG, "{:->{}}FAIL @ {} -- \x1b[31m{}\x1b[0m", " ", s_debug_indent_level * 2, location, m_lexer.remaining().substring_view(0, min(16, m_lexer.tell_remaining())).replace("\n"sv, "\\n"sv, ReplaceMode::All));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue