From 2baa7977a49311c2a5398b2e46aaacecd06875b5 Mon Sep 17 00:00:00 2001 From: devgianlu Date: Sat, 1 Mar 2025 17:38:52 +0100 Subject: [PATCH] LibCompress: Refactor gzip de/compressor using zlib --- Libraries/LibCompress/Gzip.cpp | 254 +----------------- Libraries/LibCompress/Gzip.h | 100 ++----- .../Compression/DecompressionStream.cpp | 2 +- Tests/LibCompress/TestGzip.cpp | 21 +- 4 files changed, 39 insertions(+), 338 deletions(-) diff --git a/Libraries/LibCompress/Gzip.cpp b/Libraries/LibCompress/Gzip.cpp index 29fea7edb32..1e34af83800 100644 --- a/Libraries/LibCompress/Gzip.cpp +++ b/Libraries/LibCompress/Gzip.cpp @@ -1,267 +1,39 @@ /* * Copyright (c) 2020-2022, the SerenityOS developers. * Copyright (c) 2021, Idan Horowitz + * Copyright (c) 2025, Altomani Gianluca * * SPDX-License-Identifier: BSD-2-Clause */ #include -#include -#include -#include -#include +#include namespace Compress { -bool GzipDecompressor::is_likely_compressed(ReadonlyBytes bytes) +ErrorOr> GzipDecompressor::create(MaybeOwned stream) { - return bytes.size() >= 2 && bytes[0] == gzip_magic_1 && bytes[1] == gzip_magic_2; -} - -bool BlockHeader::valid_magic_number() const -{ - return identification_1 == gzip_magic_1 && identification_2 == gzip_magic_2; -} - -bool BlockHeader::supported_by_implementation() const -{ - if (compression_method != 0x08) { - // RFC 1952 does not define any compression methods other than deflate. - return false; - } - - if (flags > Flags::MAX) { - // RFC 1952 does not define any more flags. - return false; - } - - return true; -} - -ErrorOr> GzipDecompressor::Member::construct(BlockHeader header, LittleEndianInputBitStream& stream) -{ - auto deflate_stream = TRY(DeflateDecompressor::construct(MaybeOwned(stream))); - return TRY(adopt_nonnull_own_or_enomem(new (nothrow) Member(header, move(deflate_stream)))); -} - -GzipDecompressor::Member::Member(BlockHeader header, NonnullOwnPtr stream) - : m_header(header) - , m_stream(move(stream)) -{ -} - -GzipDecompressor::GzipDecompressor(MaybeOwned stream) - : m_input_stream(make(move(stream))) -{ -} - -GzipDecompressor::~GzipDecompressor() -{ - m_current_member.clear(); -} - -ErrorOr GzipDecompressor::read_some(Bytes bytes) -{ - size_t total_read = 0; - while (total_read < bytes.size()) { - if (is_eof()) - break; - - auto slice = bytes.slice(total_read); - - if (m_current_member) { - auto current_slice = TRY(current_member().m_stream->read_some(slice)); - current_member().m_checksum.update(current_slice); - current_member().m_nread += current_slice.size(); - - if (current_slice.size() < slice.size()) { - u32 crc32 = TRY(m_input_stream->read_value>()); - u32 input_size = TRY(m_input_stream->read_value>()); - - if (crc32 != current_member().m_checksum.digest()) - return Error::from_string_literal("Stored CRC32 does not match the calculated CRC32 of the current member"); - - if (input_size != current_member().m_nread) - return Error::from_string_literal("Input size does not match the number of read bytes"); - - m_current_member.clear(); - - total_read += current_slice.size(); - continue; - } - - total_read += current_slice.size(); - continue; - } else { - auto current_partial_header_slice = Bytes { m_partial_header, sizeof(BlockHeader) }.slice(m_partial_header_offset); - auto current_partial_header_data = TRY(m_input_stream->read_some(current_partial_header_slice)); - m_partial_header_offset += current_partial_header_data.size(); - - if (is_eof()) - break; - - if (m_partial_header_offset < sizeof(BlockHeader)) { - break; // partial header read - } - m_partial_header_offset = 0; - - BlockHeader header = *(reinterpret_cast(m_partial_header)); - - if (!header.valid_magic_number()) - return Error::from_string_literal("Header does not have a valid magic number"); - - if (!header.supported_by_implementation()) - return Error::from_string_literal("Header is not supported by implementation"); - - if (header.flags & Flags::FEXTRA) { - u16 subfield_id = TRY(m_input_stream->read_value>()); - u16 length = TRY(m_input_stream->read_value>()); - TRY(m_input_stream->discard(length)); - (void)subfield_id; - } - - auto discard_string = [&]() -> ErrorOr { - char next_char; - do { - next_char = TRY(m_input_stream->read_value()); - } while (next_char); - - return {}; - }; - - if (header.flags & Flags::FNAME) - TRY(discard_string()); - - if (header.flags & Flags::FCOMMENT) - TRY(discard_string()); - - if (header.flags & Flags::FHCRC) { - u16 crc = TRY(m_input_stream->read_value>()); - // FIXME: we should probably verify this instead of just assuming it matches - (void)crc; - } - - m_current_member = TRY(Member::construct(header, *m_input_stream)); - continue; - } - } - return bytes.slice(0, total_read); -} - -ErrorOr> GzipDecompressor::describe_header(ReadonlyBytes bytes) -{ - if (bytes.size() < sizeof(BlockHeader)) - return OptionalNone {}; - - auto& header = *(reinterpret_cast(bytes.data())); - if (!header.valid_magic_number() || !header.supported_by_implementation()) - return OptionalNone {}; - - LittleEndian original_size = *reinterpret_cast(bytes.offset(bytes.size() - sizeof(u32))); - return TRY(String::formatted("last modified: {}, original size {}", Core::DateTime::from_timestamp(header.modification_time), (u32)original_size)); + auto buffer = TRY(AK::FixedArray::create(16 * 1024)); + auto zstream = TRY(GenericZlibDecompressor::new_z_stream(MAX_WBITS | 16)); + return adopt_nonnull_own_or_enomem(new (nothrow) GzipDecompressor(move(buffer), move(stream), zstream)); } ErrorOr GzipDecompressor::decompress_all(ReadonlyBytes bytes) { - auto memory_stream = TRY(try_make(bytes)); - auto gzip_stream = make(move(memory_stream)); - AllocatingMemoryStream output_stream; - - auto buffer = TRY(ByteBuffer::create_uninitialized(4096)); - while (!gzip_stream->is_eof()) { - auto const data = TRY(gzip_stream->read_some(buffer)); - TRY(output_stream.write_until_depleted(data)); - } - - auto output_buffer = TRY(ByteBuffer::create_uninitialized(output_stream.used_buffer_size())); - TRY(output_stream.read_until_filled(output_buffer)); - return output_buffer; + return ::Compress::decompress_all(bytes); } -bool GzipDecompressor::is_eof() const { return m_input_stream->is_eof(); } - -ErrorOr GzipDecompressor::write_some(ReadonlyBytes) +ErrorOr> GzipCompressor::create(MaybeOwned stream, GenericZlibCompressionLevel compression_level) { - return Error::from_errno(EBADF); + auto buffer = TRY(AK::FixedArray::create(16 * 1024)); + auto zstream = TRY(GenericZlibCompressor::new_z_stream(MAX_WBITS | 16, compression_level)); + return adopt_nonnull_own_or_enomem(new (nothrow) GzipCompressor(move(buffer), move(stream), zstream)); } -ErrorOr> GzipCompressor::create(MaybeOwned output_stream) +ErrorOr GzipCompressor::compress_all(ReadonlyBytes bytes, GenericZlibCompressionLevel compression_level) { - BlockHeader header; - header.identification_1 = 0x1f; - header.identification_2 = 0x8b; - header.compression_method = 0x08; - header.flags = 0; - header.modification_time = 0; - header.extra_flags = 3; // DEFLATE sets 2 for maximum compression and 4 for minimum compression - header.operating_system = 3; // unix - TRY(output_stream->write_until_depleted({ &header, sizeof(header) })); - - auto deflate_compressor = TRY(DeflateCompressor::construct(MaybeOwned(*output_stream))); - return adopt_own(*new GzipCompressor { move(output_stream), move(deflate_compressor) }); -} - -GzipCompressor::GzipCompressor(MaybeOwned output_stream, NonnullOwnPtr deflate_compressor) - : m_output_stream(move(output_stream)) - , m_deflate_compressor(move(deflate_compressor)) -{ -} - -ErrorOr GzipCompressor::read_some(Bytes) -{ - return Error::from_errno(EBADF); -} - -ErrorOr GzipCompressor::write_some(ReadonlyBytes bytes) -{ - VERIFY(!m_finished); - - TRY(m_deflate_compressor->write_until_depleted(bytes)); - m_total_bytes += bytes.size(); - m_crc32.update(bytes); - - return bytes.size(); -} - -ErrorOr GzipCompressor::finish() -{ - VERIFY(!m_finished); - m_finished = true; - - TRY(m_deflate_compressor->final_flush()); - TRY(m_output_stream->write_value>(m_crc32.digest())); - TRY(m_output_stream->write_value>(m_total_bytes)); - - return {}; -} - -bool GzipCompressor::is_eof() const -{ - return true; -} - -bool GzipCompressor::is_open() const -{ - return m_output_stream->is_open(); -} - -void GzipCompressor::close() -{ -} - -ErrorOr GzipCompressor::compress_all(ReadonlyBytes bytes) -{ - auto output_stream = TRY(try_make()); - auto gzip_stream = TRY(GzipCompressor::create(MaybeOwned { *output_stream })); - - TRY(gzip_stream->write_until_depleted(bytes)); - TRY(gzip_stream->finish()); - - auto buffer = TRY(ByteBuffer::create_uninitialized(output_stream->used_buffer_size())); - TRY(output_stream->read_until_filled(buffer.bytes())); - - return buffer; + return ::Compress::compress_all(bytes, compression_level); } } diff --git a/Libraries/LibCompress/Gzip.h b/Libraries/LibCompress/Gzip.h index 736a077fec8..c772620b782 100644 --- a/Libraries/LibCompress/Gzip.h +++ b/Libraries/LibCompress/Gzip.h @@ -1,109 +1,41 @@ /* * Copyright (c) 2020-2022, the SerenityOS developers. * Copyright (c) 2021, Idan Horowitz + * Copyright (c) 2025, Altomani Gianluca * * SPDX-License-Identifier: BSD-2-Clause */ #pragma once -#include -#include -#include -#include -#include +#include namespace Compress { -constexpr u8 gzip_magic_1 = 0x1f; -constexpr u8 gzip_magic_2 = 0x8b; -struct [[gnu::packed]] BlockHeader { - u8 identification_1; - u8 identification_2; - u8 compression_method; - u8 flags; - LittleEndian modification_time; - u8 extra_flags; - u8 operating_system; - - bool valid_magic_number() const; - bool supported_by_implementation() const; -}; - -struct Flags { - static constexpr u8 FTEXT = 1 << 0; - static constexpr u8 FHCRC = 1 << 1; - static constexpr u8 FEXTRA = 1 << 2; - static constexpr u8 FNAME = 1 << 3; - static constexpr u8 FCOMMENT = 1 << 4; - - static constexpr u8 MAX = FTEXT | FHCRC | FEXTRA | FNAME | FCOMMENT; -}; - -class GzipDecompressor final : public Stream { +class GzipDecompressor final : public GenericZlibDecompressor { public: - GzipDecompressor(MaybeOwned); - ~GzipDecompressor(); - - virtual ErrorOr read_some(Bytes) override; - virtual ErrorOr write_some(ReadonlyBytes) override; - virtual bool is_eof() const override; - virtual bool is_open() const override { return true; } - virtual void close() override { } + bool is_likely_compressed(ReadonlyBytes bytes); + static ErrorOr> create(MaybeOwned); static ErrorOr decompress_all(ReadonlyBytes); - static ErrorOr> describe_header(ReadonlyBytes); - static bool is_likely_compressed(ReadonlyBytes bytes); - private: - class Member { - public: - static ErrorOr> construct(BlockHeader header, LittleEndianInputBitStream&); - - BlockHeader m_header; - NonnullOwnPtr m_stream; - Crypto::Checksum::CRC32 m_checksum; - size_t m_nread { 0 }; - - private: - Member(BlockHeader, NonnullOwnPtr); - }; - - Member const& current_member() const { return *m_current_member; } - Member& current_member() { return *m_current_member; } - - NonnullOwnPtr m_input_stream; - u8 m_partial_header[sizeof(BlockHeader)]; - size_t m_partial_header_offset { 0 }; - OwnPtr m_current_member {}; - - bool m_eof { false }; + GzipDecompressor(AK::FixedArray buffer, MaybeOwned stream, z_stream* zstream) + : GenericZlibDecompressor(move(buffer), move(stream), zstream) + { + } }; -class GzipCompressor final : public Stream { +class GzipCompressor final : public GenericZlibCompressor { public: - static ErrorOr> create(MaybeOwned); - - virtual ErrorOr read_some(Bytes) override; - virtual ErrorOr write_some(ReadonlyBytes) override; - virtual bool is_eof() const override; - virtual bool is_open() const override; - virtual void close() override; - - static ErrorOr compress_all(ReadonlyBytes bytes); - - ErrorOr finish(); + static ErrorOr> create(MaybeOwned, GenericZlibCompressionLevel = GenericZlibCompressionLevel::Default); + static ErrorOr compress_all(ReadonlyBytes, GenericZlibCompressionLevel = GenericZlibCompressionLevel::Default); private: - GzipCompressor(MaybeOwned, NonnullOwnPtr); - - MaybeOwned m_output_stream; - NonnullOwnPtr m_deflate_compressor; - - Crypto::Checksum::CRC32 m_crc32; - size_t m_total_bytes { 0 }; - bool m_finished { false }; + GzipCompressor(AK::FixedArray buffer, MaybeOwned stream, z_stream* zstream) + : GenericZlibCompressor(move(buffer), move(stream), zstream) + { + } }; } diff --git a/Libraries/LibWeb/Compression/DecompressionStream.cpp b/Libraries/LibWeb/Compression/DecompressionStream.cpp index 1a114527559..392a1f47920 100644 --- a/Libraries/LibWeb/Compression/DecompressionStream.cpp +++ b/Libraries/LibWeb/Compression/DecompressionStream.cpp @@ -36,7 +36,7 @@ WebIDL::ExceptionOr> DecompressionStream::construct case Bindings::CompressionFormat::DeflateRaw: return TRY(Compress::DeflateDecompressor::construct(make(move(input_stream)))); case Bindings::CompressionFormat::Gzip: - return make(move(input_stream)); + return TRY(Compress::GzipDecompressor::create((move(input_stream)))); } VERIFY_NOT_REACHED(); diff --git a/Tests/LibCompress/TestGzip.cpp b/Tests/LibCompress/TestGzip.cpp index dd787dc4540..5a76b2cee42 100644 --- a/Tests/LibCompress/TestGzip.cpp +++ b/Tests/LibCompress/TestGzip.cpp @@ -4,11 +4,8 @@ * SPDX-License-Identifier: BSD-2-Clause */ -#include - -#include -#include #include +#include TEST_CASE(gzip_decompress_simple) { @@ -20,8 +17,8 @@ TEST_CASE(gzip_decompress_simple) u8 const uncompressed[] = "word1 abc word2"; - auto const decompressed = Compress::GzipDecompressor::decompress_all(compressed); - EXPECT(decompressed.value().bytes() == (ReadonlyBytes { uncompressed, sizeof(uncompressed) - 1 })); + auto const decompressed = TRY_OR_FAIL(Compress::GzipDecompressor::decompress_all(compressed)); + EXPECT(decompressed.bytes() == (ReadonlyBytes { uncompressed, sizeof(uncompressed) - 1 })); } TEST_CASE(gzip_decompress_multiple_members) @@ -36,8 +33,8 @@ TEST_CASE(gzip_decompress_multiple_members) u8 const uncompressed[] = "abcabcabcabc"; - auto const decompressed = Compress::GzipDecompressor::decompress_all(compressed); - EXPECT(decompressed.value().bytes() == (ReadonlyBytes { uncompressed, sizeof(uncompressed) - 1 })); + auto const decompressed = TRY_OR_FAIL(Compress::GzipDecompressor::decompress_all(compressed)); + EXPECT(decompressed.bytes() == (ReadonlyBytes { uncompressed, sizeof(uncompressed) - 1 })); } TEST_CASE(gzip_decompress_zeroes) @@ -61,8 +58,8 @@ TEST_CASE(gzip_decompress_zeroes) Array const uncompressed = { 0 }; - auto const decompressed = Compress::GzipDecompressor::decompress_all(compressed); - EXPECT(uncompressed == decompressed.value().bytes()); + auto const decompressed = TRY_OR_FAIL(Compress::GzipDecompressor::decompress_all(compressed)); + EXPECT(uncompressed == decompressed.bytes()); } TEST_CASE(gzip_decompress_repeat_around_buffer) @@ -81,8 +78,8 @@ TEST_CASE(gzip_decompress_repeat_around_buffer) uncompressed.span().slice(0x0100, 0x7e00).fill(0); uncompressed.span().slice(0x7f00, 0x0100).fill(1); - auto const decompressed = Compress::GzipDecompressor::decompress_all(compressed); - EXPECT(uncompressed == decompressed.value().bytes()); + auto const decompressed = TRY_OR_FAIL(Compress::GzipDecompressor::decompress_all(compressed)); + EXPECT(uncompressed == decompressed.bytes()); } TEST_CASE(gzip_round_trip)