mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-05-03 09:48:47 +00:00
The existing hunk data structure does not contain any way to easily store information about context surrounding the additions and removals in a hunk. While this does work fine for normal diffs (where there is never any surrounding context) this data structure is quite limiting for other use cases. Without support for surrounding context it is not possible to: * Add support for unified or context format to the diff utility to output surrounding context. * Be able to implement a patch utility that uses the surrounding context to reliably locate where to apply a patch when a hunk range does not apply perfectly. This patch changes Diff::Hunk such that its data structure more closely resembles a unified diff. Each line in a hunk is now either a change, removal, addition or context. Allowing hunks to have context inside of them exposes that HackStudio heavily relies on there being no context in the hunks that it uses for its' git gutter implementation. The fix here is simple - ask git to produce us a diff that has no context in it!
137 lines
4.3 KiB
C++
137 lines
4.3 KiB
C++
/*
|
|
* Copyright (c) 2021, Mustafa Quraish <mustafa@serenityos.org>
|
|
* Copyright (c) 2023, Shannon Booth <shannon.ml.booth@gmail.com>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include "Generator.h"
|
|
|
|
namespace Diff {
|
|
|
|
ErrorOr<Vector<Hunk>> from_text(StringView old_text, StringView new_text)
|
|
{
|
|
auto old_lines = old_text.lines();
|
|
auto new_lines = new_text.lines();
|
|
|
|
/**
|
|
* This is a simple implementation of the Longest Common Subsequence algorithm (over
|
|
* the lines of the text as opposed to the characters). A Dynamic programming approach
|
|
* is used here.
|
|
*/
|
|
|
|
enum class Direction {
|
|
Down, // Added a new line
|
|
Right, // Removed a line
|
|
Diagonal, // Line remained the same
|
|
};
|
|
|
|
// A single cell in the DP-matrix. Cell (i, j) represents the longest common
|
|
// sub-sequence of lines between old_lines[0 : i] and new_lines[0 : j].
|
|
struct Cell {
|
|
size_t length;
|
|
Direction direction;
|
|
};
|
|
|
|
auto dp_matrix = Vector<Cell>();
|
|
TRY(dp_matrix.try_resize((old_lines.size() + 1) * (new_lines.size() + 1)));
|
|
|
|
auto dp = [&dp_matrix, width = old_lines.size() + 1](size_t i, size_t j) -> Cell& {
|
|
return dp_matrix[i + width * j];
|
|
};
|
|
|
|
// Initialize the first row and column
|
|
for (size_t i = 0; i <= old_lines.size(); ++i)
|
|
dp(i, new_lines.size()) = { 0, Direction::Right };
|
|
|
|
for (size_t j = 0; j <= new_lines.size(); ++j)
|
|
dp(old_lines.size(), 0) = { 0, Direction::Down };
|
|
|
|
// Fill in the rest of the DP table
|
|
for (int i = old_lines.size() - 1; i >= 0; --i) {
|
|
for (int j = new_lines.size() - 1; j >= 0; --j) {
|
|
if (old_lines[i] == new_lines[j]) {
|
|
dp(i, j) = { dp(i + 1, j + 1).length + 1, Direction::Diagonal };
|
|
} else {
|
|
auto down = dp(i, j + 1).length;
|
|
auto right = dp(i + 1, j).length;
|
|
if (down > right)
|
|
dp(i, j) = { down, Direction::Down };
|
|
else
|
|
dp(i, j) = { right, Direction::Right };
|
|
}
|
|
}
|
|
}
|
|
|
|
Vector<Hunk> hunks;
|
|
Hunk cur_hunk;
|
|
bool in_hunk = false;
|
|
|
|
auto update_hunk = [&](size_t i, size_t j, Direction direction) -> ErrorOr<void> {
|
|
if (!in_hunk) {
|
|
HunkLocation location;
|
|
location.old_range.start_line = i;
|
|
location.new_range.start_line = j;
|
|
in_hunk = true;
|
|
cur_hunk = { location, {} };
|
|
}
|
|
|
|
if (direction == Direction::Down) {
|
|
TRY(cur_hunk.lines.try_append(Line { Line::Operation::Addition, TRY(String::from_utf8(new_lines[j])) }));
|
|
cur_hunk.location.new_range.number_of_lines++;
|
|
} else if (direction == Direction::Right) {
|
|
TRY(cur_hunk.lines.try_append(Line { Line::Operation::Removal, TRY(String::from_utf8(old_lines[i])) }));
|
|
cur_hunk.location.old_range.number_of_lines++;
|
|
}
|
|
|
|
return {};
|
|
};
|
|
|
|
auto flush_hunk = [&]() -> ErrorOr<void> {
|
|
if (in_hunk) {
|
|
// A file with no content has a zero indexed start line.
|
|
if (cur_hunk.location.new_range.start_line != 0 || cur_hunk.location.new_range.number_of_lines != 0)
|
|
cur_hunk.location.new_range.start_line++;
|
|
if (cur_hunk.location.old_range.start_line != 0 || cur_hunk.location.old_range.number_of_lines != 0)
|
|
cur_hunk.location.old_range.start_line++;
|
|
|
|
TRY(hunks.try_append(cur_hunk));
|
|
in_hunk = false;
|
|
}
|
|
|
|
return {};
|
|
};
|
|
|
|
size_t i = 0;
|
|
size_t j = 0;
|
|
|
|
while (i < old_lines.size() && j < new_lines.size()) {
|
|
auto& cell = dp(i, j);
|
|
if (cell.direction == Direction::Down) {
|
|
TRY(update_hunk(i, j, cell.direction));
|
|
++j;
|
|
} else if (cell.direction == Direction::Right) {
|
|
TRY(update_hunk(i, j, cell.direction));
|
|
++i;
|
|
} else {
|
|
++i;
|
|
++j;
|
|
TRY(flush_hunk());
|
|
}
|
|
}
|
|
|
|
while (i < old_lines.size()) {
|
|
TRY(update_hunk(i, new_lines.is_empty() ? 0 : new_lines.size() - 1, Direction::Right)); // Remove a line
|
|
++i;
|
|
}
|
|
while (j < new_lines.size()) {
|
|
TRY(update_hunk(old_lines.is_empty() ? 0 : old_lines.size() - 1, j, Direction::Down)); // Add a line
|
|
++j;
|
|
}
|
|
|
|
TRY(flush_hunk());
|
|
|
|
return hunks;
|
|
}
|
|
|
|
}
|