diff --git a/Tests/Utilities/TestUniq.cpp b/Tests/Utilities/TestUniq.cpp index 8dddfeb2d38..7b08fe3aa9c 100644 --- a/Tests/Utilities/TestUniq.cpp +++ b/Tests/Utilities/TestUniq.cpp @@ -51,12 +51,50 @@ TEST_CASE(long_line) run_uniq({}, StringView { input }, StringView { expected_output }); } +TEST_CASE(line_longer_than_internal_stream_buffer) +{ + auto input = Array {}; + auto expected_output = Array {}; + // Create two lines of 65535 A's and a newline. + input.fill('A'); + input[65535] = '\n'; + input[131071] = '\n'; + + expected_output.fill('A'); + expected_output[65535] = '\n'; + + run_uniq({}, StringView { input }, StringView { expected_output }); +} + +TEST_CASE(ignore_case_flag) +{ + run_uniq({ "-i" }, "AAA\nAaA\n"sv, "AAA\n"sv); + run_uniq({ "-i" }, "AAA\naaa\nAaA\n"sv, "AAA\n"sv); +} + TEST_CASE(duplicate_flag) { run_uniq({ "-d" }, "AAA\nAAA\nBBB\n"sv, "AAA\n"sv); + run_uniq({ "-d" }, "AAA\nAAA\nBBB\nBBB\nCCC\n"sv, "AAA\nBBB\n"sv); +} + +TEST_CASE(skip_chars_flag) +{ + run_uniq({ "-s1" }, "AAA\nAaA\n"sv, "AAA\nAaA\n"sv); + run_uniq({ "-s2" }, "AAA\nAaA\n"sv, "AAA\n"sv); + run_uniq({ "-s200" }, "AAA\nAaA\n"sv, "AAA\n"sv); +} + +TEST_CASE(skip_fields_flag) +{ + run_uniq({ "-f1" }, "1 AA\n2 AA\n"sv, "1 AA\n"sv); + run_uniq({ "-f1" }, "1 a AA\n2 b AA\n"sv, "1 a AA\n2 b AA\n"sv); + run_uniq({ "-f2" }, "1 a AA\n2 b AA\n"sv, "1 a AA\n"sv); + run_uniq({ "-f200" }, "1 AA\n2 AA\n"sv, "1 AA\n"sv); } TEST_CASE(count_flag) { run_uniq({ "-c" }, "AAA\nAAA\n"sv, "2 AAA\n"sv); + run_uniq({ "-c" }, "AAA\nAAA\nBBB\n"sv, "2 AAA\n1 BBB\n"sv); } diff --git a/Userland/Utilities/uniq.cpp b/Userland/Utilities/uniq.cpp index ac349a23d61..f2fd48231e0 100644 --- a/Userland/Utilities/uniq.cpp +++ b/Userland/Utilities/uniq.cpp @@ -37,7 +37,7 @@ static StringView skip(StringView line, unsigned char_skip_count, unsigned field if (is_ascii_space(c)) { in_field = false; field_index = i; - if (++current_field > field_skip_count) + if (++current_field >= field_skip_count) break; } else if (!in_field) { in_field = true; @@ -83,8 +83,8 @@ ErrorOr serenity_main(Main::Arguments arguments) auto infile = TRY(Core::InputBufferedFile::create(TRY(Core::File::open_file_or_standard_stream(inpath, Core::File::OpenMode::Read)))); auto outfile = TRY(Core::File::open_file_or_standard_stream(outpath, Core::File::OpenMode::Write)); - // The count starts at 1 since each line will appear at least once. - // Otherwise the -d and -c flags do not work as expected. + // The count starts at 1 since each line appears at least once. + // Otherwise the -d and -c flags are off by one. size_t count = 1; ByteBuffer previous_buf = TRY(ByteBuffer::create_uninitialized(1024)); ByteBuffer current_buf = TRY(ByteBuffer::create_uninitialized(1024)); @@ -92,28 +92,32 @@ ErrorOr serenity_main(Main::Arguments arguments) StringView previous = TRY(infile->read_line_with_resize(previous_buf)); StringView previous_to_compare = skip(previous, skip_chars, skip_fields); - while (TRY(infile->can_read_line())) { - + while (!infile->is_eof()) { StringView current = TRY(infile->read_line_with_resize(current_buf)); StringView current_to_compare = skip(current, skip_chars, skip_fields); bool lines_equal = ignore_case ? current_to_compare.equals_ignoring_ascii_case(previous_to_compare) : current_to_compare == previous_to_compare; - if (!lines_equal) { - TRY(write_line_content(previous, count, duplicates_only, print_count, *outfile)); - count = 1; - } else { + + while (lines_equal && current.length() > 0) { + // The docs say "The second and succeeding copies of repeated adjacent input + // lines shall not be written", therefore keep reading lines while they match previous. + // See https://pubs.opengroup.org/onlinepubs/9699919799/utilities/uniq.html + current = TRY(infile->read_line_with_resize(current_buf)); + current_to_compare = skip(current, skip_chars, skip_fields); + lines_equal = ignore_case ? current_to_compare.equals_ignoring_ascii_case(previous_to_compare) : current_to_compare == previous_to_compare; count++; } + TRY(write_line_content(previous, count, duplicates_only, print_count, *outfile)); + count = 1; + swap(current_buf, previous_buf); - // The StringViews cannot be swapped since read_line_with_resize + // The StringViews can't be swapped since read_line_with_resize // potentially changes the location of the buffers due to reallocation. // Instead create a new StringView of what was most recently read in. previous = StringView { previous_buf.span().trim(current.length()) }; previous_to_compare = skip(previous, skip_chars, skip_fields); } - TRY(write_line_content(previous, count, duplicates_only, print_count, *outfile)); - return 0; }