LibCompress: Refactor zlib de/compressor using zlib

This commit is contained in:
devgianlu 2025-03-01 17:37:59 +01:00 committed by Jelle Raaijmakers
commit dafbe32626
Notes: github-actions[bot] 2025-03-19 12:48:52 +00:00
4 changed files with 63 additions and 244 deletions

View file

@ -1,175 +1,38 @@
/*
* Copyright (c) 2020, the SerenityOS developers.
* Copyright (c) 2025, Altomani Gianluca <altomanigianluca@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/BitStream.h>
#include <AK/IntegralMath.h>
#include <AK/MemoryStream.h>
#include <AK/Span.h>
#include <AK/TypeCasts.h>
#include <AK/Types.h>
#include <LibCompress/Deflate.h>
#include <LibCompress/Zlib.h>
#include <zlib.h>
namespace Compress {
ErrorOr<NonnullOwnPtr<ZlibDecompressor>> ZlibDecompressor::create(MaybeOwned<Stream> stream)
{
return adopt_nonnull_own_or_enomem(new (nothrow) ZlibDecompressor(move(stream)));
auto buffer = TRY(AK::FixedArray<u8>::create(16 * 1024));
auto zstream = TRY(GenericZlibDecompressor::new_z_stream(MAX_WBITS));
return adopt_nonnull_own_or_enomem(new (nothrow) ZlibDecompressor(move(buffer), move(stream), zstream));
}
ZlibDecompressor::ZlibDecompressor(MaybeOwned<Stream> stream)
: m_has_seen_header(false)
, m_stream(move(stream))
ErrorOr<ByteBuffer> ZlibDecompressor::decompress_all(ReadonlyBytes bytes)
{
return ::Compress::decompress_all<ZlibDecompressor>(bytes);
}
ErrorOr<Bytes> ZlibDecompressor::read_some(Bytes bytes)
ErrorOr<NonnullOwnPtr<ZlibCompressor>> ZlibCompressor::create(MaybeOwned<Stream> stream, GenericZlibCompressionLevel compression_level)
{
if (!m_has_seen_header) {
auto header = TRY(m_stream->read_value<ZlibHeader>());
if (header.compression_method != ZlibCompressionMethod::Deflate || header.compression_info > 7)
return Error::from_string_literal("Non-DEFLATE compression inside Zlib is not supported");
if (header.present_dictionary)
return Error::from_string_literal("Zlib compression with a pre-defined dictionary is currently not supported");
if (header.as_u16 % 31 != 0)
return Error::from_string_literal("Zlib error correction code does not match");
auto bit_stream = make<LittleEndianInputBitStream>(move(m_stream));
auto deflate_stream = TRY(Compress::DeflateDecompressor::construct(move(bit_stream)));
m_stream = move(deflate_stream);
m_has_seen_header = true;
}
return m_stream->read_some(bytes);
auto buffer = TRY(AK::FixedArray<u8>::create(16 * 1024));
auto zstream = TRY(GenericZlibCompressor::new_z_stream(MAX_WBITS, compression_level));
return adopt_nonnull_own_or_enomem(new (nothrow) ZlibCompressor(move(buffer), move(stream), zstream));
}
ErrorOr<size_t> ZlibDecompressor::write_some(ReadonlyBytes)
ErrorOr<ByteBuffer> ZlibCompressor::compress_all(ReadonlyBytes bytes, GenericZlibCompressionLevel compression_level)
{
return Error::from_errno(EBADF);
}
bool ZlibDecompressor::is_eof() const
{
return m_stream->is_eof();
}
bool ZlibDecompressor::is_open() const
{
return m_stream->is_open();
}
void ZlibDecompressor::close()
{
}
ErrorOr<NonnullOwnPtr<ZlibCompressor>> ZlibCompressor::construct(MaybeOwned<Stream> stream, ZlibCompressionLevel compression_level)
{
// Zlib only defines Deflate as a compression method.
auto compression_method = ZlibCompressionMethod::Deflate;
// FIXME: Find a way to compress with Deflate's "Best" compression level.
auto compressor_stream = TRY(DeflateCompressor::construct(MaybeOwned(*stream), static_cast<DeflateCompressor::CompressionLevel>(compression_level)));
auto zlib_compressor = TRY(adopt_nonnull_own_or_enomem(new (nothrow) ZlibCompressor(move(stream), move(compressor_stream))));
TRY(zlib_compressor->write_header(compression_method, compression_level));
return zlib_compressor;
}
ZlibCompressor::ZlibCompressor(MaybeOwned<Stream> stream, NonnullOwnPtr<Stream> compressor_stream)
: m_output_stream(move(stream))
, m_compressor(move(compressor_stream))
{
}
ZlibCompressor::~ZlibCompressor() = default;
ErrorOr<void> ZlibCompressor::write_header(ZlibCompressionMethod compression_method, ZlibCompressionLevel compression_level)
{
u8 compression_info = 0;
if (compression_method == ZlibCompressionMethod::Deflate) {
compression_info = AK::log2(DeflateCompressor::window_size) - 8;
VERIFY(compression_info <= 7);
}
ZlibHeader header {
.compression_method = compression_method,
.compression_info = compression_info,
.check_bits = 0,
.present_dictionary = false,
.compression_level = compression_level,
};
header.check_bits = 0b11111 - header.as_u16 % 31;
// FIXME: Support pre-defined dictionaries.
TRY(m_output_stream->write_value(header.as_u16));
return {};
}
ErrorOr<Bytes> ZlibCompressor::read_some(Bytes)
{
return Error::from_errno(EBADF);
}
ErrorOr<size_t> ZlibCompressor::write_some(ReadonlyBytes bytes)
{
VERIFY(!m_finished);
size_t n_written = TRY(m_compressor->write_some(bytes));
m_adler32_checksum.update(bytes.trim(n_written));
return n_written;
}
bool ZlibCompressor::is_eof() const
{
return false;
}
bool ZlibCompressor::is_open() const
{
return m_output_stream->is_open();
}
void ZlibCompressor::close()
{
}
ErrorOr<void> ZlibCompressor::finish()
{
VERIFY(!m_finished);
if (is<DeflateCompressor>(m_compressor.ptr()))
TRY(static_cast<DeflateCompressor*>(m_compressor.ptr())->final_flush());
NetworkOrdered<u32> adler_sum = m_adler32_checksum.digest();
TRY(m_output_stream->write_value(adler_sum));
m_finished = true;
return {};
}
ErrorOr<ByteBuffer> ZlibCompressor::compress_all(ReadonlyBytes bytes, ZlibCompressionLevel compression_level)
{
auto output_stream = TRY(try_make<AllocatingMemoryStream>());
auto zlib_stream = TRY(ZlibCompressor::construct(MaybeOwned<Stream>(*output_stream), compression_level));
TRY(zlib_stream->write_until_depleted(bytes));
TRY(zlib_stream->finish());
auto buffer = TRY(ByteBuffer::create_uninitialized(output_stream->used_buffer_size()));
TRY(output_stream->read_until_filled(buffer.bytes()));
return buffer;
return ::Compress::compress_all<ZlibCompressor>(bytes, compression_level);
}
}

View file

@ -1,5 +1,6 @@
/*
* Copyright (c) 2020, the SerenityOS developers.
* Copyright (c) 2025, Altomani Gianluca <altomanigianluca@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -7,87 +8,34 @@
#pragma once
#include <AK/ByteBuffer.h>
#include <AK/Endian.h>
#include <AK/MaybeOwned.h>
#include <AK/Optional.h>
#include <AK/OwnPtr.h>
#include <AK/Span.h>
#include <AK/Stream.h>
#include <AK/Types.h>
#include <LibCrypto/Checksum/Adler32.h>
#include <LibCompress/GenericZlib.h>
namespace Compress {
enum class ZlibCompressionMethod : u8 {
Deflate = 8,
};
enum class ZlibCompressionLevel : u8 {
Fastest,
Fast,
Default,
Best,
};
struct ZlibHeader {
union {
struct {
ZlibCompressionMethod compression_method : 4;
u8 compression_info : 4;
u8 check_bits : 5;
bool present_dictionary : 1;
ZlibCompressionLevel compression_level : 2;
};
NetworkOrdered<u16> as_u16;
};
};
static_assert(sizeof(ZlibHeader) == sizeof(u16));
class ZlibDecompressor : public Stream {
class ZlibDecompressor final : public GenericZlibDecompressor {
public:
static ErrorOr<NonnullOwnPtr<ZlibDecompressor>> create(MaybeOwned<Stream>);
virtual ErrorOr<Bytes> read_some(Bytes) override;
virtual ErrorOr<size_t> write_some(ReadonlyBytes) override;
virtual bool is_eof() const override;
virtual bool is_open() const override;
virtual void close() override;
static ErrorOr<ByteBuffer> decompress_all(ReadonlyBytes);
private:
ZlibDecompressor(MaybeOwned<Stream>);
bool m_has_seen_header { false };
MaybeOwned<Stream> m_stream;
ZlibDecompressor(AK::FixedArray<u8> buffer, MaybeOwned<Stream> stream, z_stream* zstream)
: GenericZlibDecompressor(move(buffer), move(stream), zstream)
{
}
};
class ZlibCompressor : public Stream {
class ZlibCompressor final : public GenericZlibCompressor {
public:
static ErrorOr<NonnullOwnPtr<ZlibCompressor>> construct(MaybeOwned<Stream>, ZlibCompressionLevel = ZlibCompressionLevel::Default);
~ZlibCompressor();
virtual ErrorOr<Bytes> read_some(Bytes) override;
virtual ErrorOr<size_t> write_some(ReadonlyBytes) override;
virtual bool is_eof() const override;
virtual bool is_open() const override;
virtual void close() override;
ErrorOr<void> finish();
static ErrorOr<ByteBuffer> compress_all(ReadonlyBytes bytes, ZlibCompressionLevel = ZlibCompressionLevel::Default);
static ErrorOr<NonnullOwnPtr<ZlibCompressor>> create(MaybeOwned<Stream>, GenericZlibCompressionLevel = GenericZlibCompressionLevel::Default);
static ErrorOr<ByteBuffer> compress_all(ReadonlyBytes, GenericZlibCompressionLevel = GenericZlibCompressionLevel::Default);
private:
ZlibCompressor(MaybeOwned<Stream> stream, NonnullOwnPtr<Stream> compressor_stream);
ErrorOr<void> write_header(ZlibCompressionMethod, ZlibCompressionLevel);
bool m_finished { false };
MaybeOwned<Stream> m_output_stream;
NonnullOwnPtr<Stream> m_compressor;
Crypto::Checksum::Adler32 m_adler32_checksum;
ZlibCompressor(AK::FixedArray<u8> buffer, MaybeOwned<Stream> stream, z_stream* zstream)
: GenericZlibCompressor(move(buffer), move(stream), zstream)
{
}
};
}
template<>
struct AK::Traits<Compress::ZlibHeader> : public AK::DefaultTraits<Compress::ZlibHeader> {
static constexpr bool is_trivially_serializable() { return true; }
};

View file

@ -31,7 +31,7 @@ WebIDL::ExceptionOr<GC::Ref<CompressionStream>> CompressionStream::construct_imp
auto compressor = [&, input_stream = MaybeOwned<Stream> { *input_stream }]() mutable -> ErrorOr<Compressor> {
switch (format) {
case Bindings::CompressionFormat::Deflate:
return TRY(Compress::ZlibCompressor::construct(move(input_stream)));
return TRY(Compress::ZlibCompressor::create(move(input_stream)));
case Bindings::CompressionFormat::DeflateRaw:
return TRY(Compress::DeflateCompressor::construct(make<LittleEndianInputBitStream>(move(input_stream))));
case Bindings::CompressionFormat::Gzip:

View file

@ -4,11 +4,11 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibTest/TestCase.h>
#include <AK/Array.h>
#include <AK/ByteBuffer.h>
#include <AK/MaybeOwned.h>
#include <AK/MemoryStream.h>
#include <LibCompress/Zlib.h>
#include <LibTest/TestCase.h>
TEST_CASE(zlib_decompress_simple)
{
@ -21,9 +21,7 @@ TEST_CASE(zlib_decompress_simple)
u8 const uncompressed[] = "This is a simple text file :)";
auto stream = make<FixedMemoryStream>(compressed);
auto decompressor = TRY_OR_FAIL(Compress::ZlibDecompressor::create(move(stream)));
auto decompressed = TRY_OR_FAIL(decompressor->read_until_eof());
auto decompressed = TRY_OR_FAIL(Compress::ZlibDecompressor::decompress_all(compressed));
EXPECT(decompressed.bytes() == (ReadonlyBytes { uncompressed, sizeof(uncompressed) - 1 }));
}
@ -46,31 +44,43 @@ TEST_CASE(zlib_decompress_stream)
EXPECT(decompressed.bytes() == (ReadonlyBytes { uncompressed, sizeof(uncompressed) - 1 }));
}
TEST_CASE(zlib_compress_simple)
TEST_CASE(zlib_round_trip_simple_default)
{
// Note: This is just the output of our compression function from an arbitrary point in time.
// This test is intended to ensure that the decompression doesn't change unintentionally,
// it does not make any guarantees for correctness.
Array<u8, 37> const compressed {
0x78, 0x9C, 0x0B, 0xC9, 0xC8, 0x2C, 0x56, 0xC8, 0x2C, 0x56, 0x48, 0x54,
0x28, 0xCE, 0xCC, 0x2D, 0xC8, 0x49, 0x55, 0x28, 0x49, 0xAD, 0x28, 0x51,
0x48, 0xCB, 0xCC, 0x49, 0x55, 0xB0, 0xD2, 0x04, 0x00, 0x99, 0x5E, 0x09,
0xE8
};
u8 const uncompressed[] = "This is a simple text file :)";
auto const freshly_pressed = Compress::ZlibCompressor::compress_all({ uncompressed, sizeof(uncompressed) - 1 });
EXPECT(freshly_pressed.value().bytes() == compressed.span());
auto const freshly_pressed = TRY_OR_FAIL(Compress::ZlibCompressor::compress_all({ uncompressed, sizeof(uncompressed) - 1 }, Compress::GenericZlibCompressionLevel::Default));
EXPECT(freshly_pressed.span().slice(0, 2) == ReadonlyBytes { { 0x78, 0x9C } });
auto const decompressed = TRY_OR_FAIL(Compress::ZlibDecompressor::decompress_all(freshly_pressed));
EXPECT(decompressed.bytes() == (ReadonlyBytes { uncompressed, sizeof(uncompressed) - 1 }));
}
TEST_CASE(zlib_round_trip_simple_best)
{
u8 const uncompressed[] = "This is a simple text file :)";
auto const freshly_pressed = TRY_OR_FAIL(Compress::ZlibCompressor::compress_all({ uncompressed, sizeof(uncompressed) - 1 }, Compress::GenericZlibCompressionLevel::Best));
EXPECT(freshly_pressed.span().slice(0, 2) == ReadonlyBytes { { 0x78, 0xDA } });
auto const decompressed = TRY_OR_FAIL(Compress::ZlibDecompressor::decompress_all(freshly_pressed));
EXPECT(decompressed.bytes() == (ReadonlyBytes { uncompressed, sizeof(uncompressed) - 1 }));
}
TEST_CASE(zlib_round_trip_simple_fastest)
{
u8 const uncompressed[] = "This is a simple text file :)";
auto const freshly_pressed = TRY_OR_FAIL(Compress::ZlibCompressor::compress_all({ uncompressed, sizeof(uncompressed) - 1 }, Compress::GenericZlibCompressionLevel::Fastest));
EXPECT(freshly_pressed.span().slice(0, 2) == ReadonlyBytes { { 0x78, 0x01 } });
auto const decompressed = TRY_OR_FAIL(Compress::ZlibDecompressor::decompress_all(freshly_pressed));
EXPECT(decompressed.bytes() == (ReadonlyBytes { uncompressed, sizeof(uncompressed) - 1 }));
}
TEST_CASE(zlib_decompress_with_missing_end_bits)
{
// This test case has been extracted from compressed PNG data of `/res/icons/16x16/app-masterword.png`.
// The decompression results have been confirmed using the `zlib-flate` tool.
// Note: It is unconfirmed whether there are actually bits missing.
// However, our decompressor implementation ends up in a weird state nonetheless.
Array<u8, 72> const compressed {
0x08, 0xD7, 0x63, 0x30, 0x86, 0x00, 0x01, 0x06, 0x23, 0x25, 0x30, 0x00,
@ -95,8 +105,6 @@ TEST_CASE(zlib_decompress_with_missing_end_bits)
0x44, 0x11, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
auto stream = make<FixedMemoryStream>(compressed);
auto decompressor = TRY_OR_FAIL(Compress::ZlibDecompressor::create(move(stream)));
auto decompressed = TRY_OR_FAIL(decompressor->read_until_eof());
auto decompressed = TRY_OR_FAIL(Compress::ZlibDecompressor::decompress_all(compressed));
EXPECT_EQ(decompressed.span(), uncompressed.span());
}