mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-09-28 20:29:03 +00:00
Utilities: Make uniq behavior consistent with coreutils
The main difference was that our implementation was writing the final line of a series of repeated lines, whereas the spec says "The second and succeeding copies of repeated adjacent input lines shall not be written." Additionally, there was a mistake in the -f flag implementation causing the number of fields skipped to be one greater than required.
This commit is contained in:
parent
1d932d3ebf
commit
342b358341
Notes:
sideshowbarker
2024-07-16 20:31:50 +09:00
Author: https://github.com/d-gaston
Commit: 342b358341
Pull-request: https://github.com/SerenityOS/serenity/pull/24083
Reviewed-by: https://github.com/ADKaster ✅
Reviewed-by: https://github.com/tcl3 ✅
2 changed files with 54 additions and 12 deletions
|
@ -51,12 +51,50 @@ TEST_CASE(long_line)
|
|||
run_uniq({}, StringView { input }, StringView { expected_output });
|
||||
}
|
||||
|
||||
TEST_CASE(line_longer_than_internal_stream_buffer)
|
||||
{
|
||||
auto input = Array<u8, 131072> {};
|
||||
auto expected_output = Array<u8, 65536> {};
|
||||
// Create two lines of 65535 A's and a newline.
|
||||
input.fill('A');
|
||||
input[65535] = '\n';
|
||||
input[131071] = '\n';
|
||||
|
||||
expected_output.fill('A');
|
||||
expected_output[65535] = '\n';
|
||||
|
||||
run_uniq({}, StringView { input }, StringView { expected_output });
|
||||
}
|
||||
|
||||
TEST_CASE(ignore_case_flag)
|
||||
{
|
||||
run_uniq({ "-i" }, "AAA\nAaA\n"sv, "AAA\n"sv);
|
||||
run_uniq({ "-i" }, "AAA\naaa\nAaA\n"sv, "AAA\n"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(duplicate_flag)
|
||||
{
|
||||
run_uniq({ "-d" }, "AAA\nAAA\nBBB\n"sv, "AAA\n"sv);
|
||||
run_uniq({ "-d" }, "AAA\nAAA\nBBB\nBBB\nCCC\n"sv, "AAA\nBBB\n"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(skip_chars_flag)
|
||||
{
|
||||
run_uniq({ "-s1" }, "AAA\nAaA\n"sv, "AAA\nAaA\n"sv);
|
||||
run_uniq({ "-s2" }, "AAA\nAaA\n"sv, "AAA\n"sv);
|
||||
run_uniq({ "-s200" }, "AAA\nAaA\n"sv, "AAA\n"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(skip_fields_flag)
|
||||
{
|
||||
run_uniq({ "-f1" }, "1 AA\n2 AA\n"sv, "1 AA\n"sv);
|
||||
run_uniq({ "-f1" }, "1 a AA\n2 b AA\n"sv, "1 a AA\n2 b AA\n"sv);
|
||||
run_uniq({ "-f2" }, "1 a AA\n2 b AA\n"sv, "1 a AA\n"sv);
|
||||
run_uniq({ "-f200" }, "1 AA\n2 AA\n"sv, "1 AA\n"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(count_flag)
|
||||
{
|
||||
run_uniq({ "-c" }, "AAA\nAAA\n"sv, "2 AAA\n"sv);
|
||||
run_uniq({ "-c" }, "AAA\nAAA\nBBB\n"sv, "2 AAA\n1 BBB\n"sv);
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@ static StringView skip(StringView line, unsigned char_skip_count, unsigned field
|
|||
if (is_ascii_space(c)) {
|
||||
in_field = false;
|
||||
field_index = i;
|
||||
if (++current_field > field_skip_count)
|
||||
if (++current_field >= field_skip_count)
|
||||
break;
|
||||
} else if (!in_field) {
|
||||
in_field = true;
|
||||
|
@ -83,8 +83,8 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
|
|||
auto infile = TRY(Core::InputBufferedFile::create(TRY(Core::File::open_file_or_standard_stream(inpath, Core::File::OpenMode::Read))));
|
||||
auto outfile = TRY(Core::File::open_file_or_standard_stream(outpath, Core::File::OpenMode::Write));
|
||||
|
||||
// The count starts at 1 since each line will appear at least once.
|
||||
// Otherwise the -d and -c flags do not work as expected.
|
||||
// The count starts at 1 since each line appears at least once.
|
||||
// Otherwise the -d and -c flags are off by one.
|
||||
size_t count = 1;
|
||||
ByteBuffer previous_buf = TRY(ByteBuffer::create_uninitialized(1024));
|
||||
ByteBuffer current_buf = TRY(ByteBuffer::create_uninitialized(1024));
|
||||
|
@ -92,28 +92,32 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
|
|||
StringView previous = TRY(infile->read_line_with_resize(previous_buf));
|
||||
StringView previous_to_compare = skip(previous, skip_chars, skip_fields);
|
||||
|
||||
while (TRY(infile->can_read_line())) {
|
||||
|
||||
while (!infile->is_eof()) {
|
||||
StringView current = TRY(infile->read_line_with_resize(current_buf));
|
||||
|
||||
StringView current_to_compare = skip(current, skip_chars, skip_fields);
|
||||
bool lines_equal = ignore_case ? current_to_compare.equals_ignoring_ascii_case(previous_to_compare) : current_to_compare == previous_to_compare;
|
||||
if (!lines_equal) {
|
||||
TRY(write_line_content(previous, count, duplicates_only, print_count, *outfile));
|
||||
count = 1;
|
||||
} else {
|
||||
|
||||
while (lines_equal && current.length() > 0) {
|
||||
// The docs say "The second and succeeding copies of repeated adjacent input
|
||||
// lines shall not be written", therefore keep reading lines while they match previous.
|
||||
// See https://pubs.opengroup.org/onlinepubs/9699919799/utilities/uniq.html
|
||||
current = TRY(infile->read_line_with_resize(current_buf));
|
||||
current_to_compare = skip(current, skip_chars, skip_fields);
|
||||
lines_equal = ignore_case ? current_to_compare.equals_ignoring_ascii_case(previous_to_compare) : current_to_compare == previous_to_compare;
|
||||
count++;
|
||||
}
|
||||
|
||||
TRY(write_line_content(previous, count, duplicates_only, print_count, *outfile));
|
||||
count = 1;
|
||||
|
||||
swap(current_buf, previous_buf);
|
||||
// The StringViews cannot be swapped since read_line_with_resize
|
||||
// The StringViews can't be swapped since read_line_with_resize
|
||||
// potentially changes the location of the buffers due to reallocation.
|
||||
// Instead create a new StringView of what was most recently read in.
|
||||
previous = StringView { previous_buf.span().trim(current.length()) };
|
||||
previous_to_compare = skip(previous, skip_chars, skip_fields);
|
||||
}
|
||||
|
||||
TRY(write_line_content(previous, count, duplicates_only, print_count, *outfile));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue