mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-04-25 14:05:15 +00:00
The source code position cache was moved from a line based approach to a "chunk"-based approach to improve performance on large, minified JavaScript files with few lines, but this has had an adverse effect on _multi-line_ source files. Reintroduce some of the old behaviour by caching lines again, with some added sanity limits to avoid caching empty/overly small lines. Source code positions in files with few lines will still be cached less often, since minified JavaScript files can be assumed to be unusually large, and since stack traces for minified JavaScript are less useful as well. On WPT tests with large JavaScript dependencies like `css/css-masking/animations/clip-interpolation.html` this reduces the amount of time spent in `SourceCode::range_from_offsets` by as much as 99.98%, for the small small price of 80KB extra memory usage.
148 lines
4.9 KiB
C++
148 lines
4.9 KiB
C++
/*
|
|
* Copyright (c) 2022-2023, Andreas Kling <andreas@ladybird.org>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include <AK/BinarySearch.h>
|
|
#include <AK/Utf8View.h>
|
|
#include <LibJS/SourceCode.h>
|
|
#include <LibJS/SourceRange.h>
|
|
#include <LibJS/Token.h>
|
|
|
|
namespace JS {
|
|
|
|
NonnullRefPtr<SourceCode const> SourceCode::create(String filename, String code)
|
|
{
|
|
return adopt_ref(*new SourceCode(move(filename), move(code)));
|
|
}
|
|
|
|
SourceCode::SourceCode(String filename, String code)
|
|
: m_filename(move(filename))
|
|
, m_code(move(code))
|
|
{
|
|
}
|
|
|
|
String const& SourceCode::filename() const
|
|
{
|
|
return m_filename;
|
|
}
|
|
|
|
String const& SourceCode::code() const
|
|
{
|
|
return m_code;
|
|
}
|
|
|
|
void SourceCode::fill_position_cache() const
|
|
{
|
|
constexpr size_t predicted_mimimum_cached_positions = 8;
|
|
constexpr size_t minimum_distance_between_cached_positions = 32;
|
|
constexpr size_t maximum_distance_between_cached_positions = 8192;
|
|
|
|
if (m_code.is_empty())
|
|
return;
|
|
|
|
u32 previous_code_point = 0;
|
|
size_t line = 1;
|
|
size_t column = 1;
|
|
size_t offset_of_last_starting_point = 0;
|
|
m_cached_positions.ensure_capacity(predicted_mimimum_cached_positions + m_code.bytes().size() / maximum_distance_between_cached_positions);
|
|
m_cached_positions.append({ .line = 1, .column = 1, .offset = 0 });
|
|
|
|
Utf8View const view(m_code);
|
|
for (auto it = view.begin(); it != view.end(); ++it) {
|
|
u32 code_point = *it;
|
|
bool is_line_terminator = code_point == '\r' || (code_point == '\n' && previous_code_point != '\r') || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
|
|
|
|
auto byte_offset = view.byte_offset_of(it);
|
|
|
|
bool is_nonempty_line = is_line_terminator && previous_code_point != '\n' && previous_code_point != LINE_SEPARATOR && previous_code_point != PARAGRAPH_SEPARATOR && (code_point == '\n' || previous_code_point != '\r');
|
|
auto distance_between_cached_position = byte_offset - offset_of_last_starting_point;
|
|
|
|
if ((distance_between_cached_position >= minimum_distance_between_cached_positions && is_nonempty_line) || distance_between_cached_position >= maximum_distance_between_cached_positions) {
|
|
m_cached_positions.append({ .line = line, .column = column, .offset = byte_offset });
|
|
offset_of_last_starting_point = byte_offset;
|
|
}
|
|
|
|
if (is_line_terminator) {
|
|
line += 1;
|
|
column = 1;
|
|
} else {
|
|
column += 1;
|
|
}
|
|
|
|
previous_code_point = code_point;
|
|
}
|
|
}
|
|
|
|
SourceRange SourceCode::range_from_offsets(u32 start_offset, u32 end_offset) const
|
|
{
|
|
// If the underlying code is an empty string, the range is 1,1 - 1,1 no matter what.
|
|
if (m_code.is_empty())
|
|
return { *this, { .line = 1, .column = 1, .offset = 0 }, { .line = 1, .column = 1, .offset = 0 } };
|
|
|
|
if (m_cached_positions.is_empty())
|
|
fill_position_cache();
|
|
|
|
Position current { .line = 1, .column = 1, .offset = 0 };
|
|
|
|
if (!m_cached_positions.is_empty()) {
|
|
Position const dummy;
|
|
size_t nearest_index = 0;
|
|
binary_search(m_cached_positions, dummy, &nearest_index,
|
|
[&](auto&, auto& starting_point) {
|
|
return start_offset - starting_point.offset;
|
|
});
|
|
|
|
current = m_cached_positions[nearest_index];
|
|
}
|
|
|
|
Optional<Position> start;
|
|
Optional<Position> end;
|
|
|
|
u32 previous_code_point = 0;
|
|
|
|
Utf8View const view(m_code);
|
|
for (auto it = view.iterator_at_byte_offset_without_validation(current.offset); it != view.end(); ++it) {
|
|
|
|
// If we're on or after the start offset, this is the start position.
|
|
if (!start.has_value() && view.byte_offset_of(it) >= start_offset) {
|
|
start = Position {
|
|
.line = current.line,
|
|
.column = current.column,
|
|
.offset = start_offset,
|
|
};
|
|
}
|
|
|
|
// If we're on or after the end offset, this is the end position.
|
|
if (!end.has_value() && view.byte_offset_of(it) >= end_offset) {
|
|
end = Position {
|
|
.line = current.line,
|
|
.column = current.column,
|
|
.offset = end_offset,
|
|
};
|
|
break;
|
|
}
|
|
|
|
u32 code_point = *it;
|
|
|
|
bool const is_line_terminator = code_point == '\r' || (code_point == '\n' && previous_code_point != '\r') || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
|
|
previous_code_point = code_point;
|
|
|
|
if (is_line_terminator) {
|
|
current.line += 1;
|
|
current.column = 1;
|
|
continue;
|
|
}
|
|
current.column += 1;
|
|
}
|
|
|
|
// If we didn't find both a start and end position, just return 1,1-1,1.
|
|
// FIXME: This is a hack. Find a way to return the nicest possible values here.
|
|
if (!start.has_value() || !end.has_value())
|
|
return SourceRange { *this, { .line = 1, .column = 1 }, { .line = 1, .column = 1 } };
|
|
|
|
return SourceRange { *this, *start, *end };
|
|
}
|
|
|
|
}
|