LibGfx+LibCompress: WebPWriter performance regression reduction

This moves both Gfx::CanonicalCode::write_symbol() and
Compress::CanonicalCode::write_symbol() inline.

It also adds `__attribute__((always_inline))` on the arguments
to visit() in the latter. (ALWAYS_INLINE doesn't work on lambdas.)

Numbers with `ministat`: I ran once:

    Build/lagom/bin/image -o test.bmp Base/res/wallpapers/sunset-retro.png

and then ran to bench:

    ~/src/hack/bench.py -n 20 -o bench_foo1.txt \
        Build/lagom/bin/image -o test.webp test.bmp

...and then `ministat bench_foo1.txt bench_foo2.txt` to compare.

The previous commit increased the time for this command by 38% compared
to the before state.

With this, it's an 8.6% regression. So still a regression, but a smaller
one.

Or, in other words, this commit reduces times by 21% compared to the
previous commit.

Numbers with hyperfine are similar -- with this on top of the previous
commit, this is a 7-11% regression, instead of an almost 50% regression.

(A local branch that changes how we compute CanonicalCodes so that we
actually compress a bit is perf-neutral since the image writing code
doesn't change.)

`hyperfine 'image -o test.webp test.bmp'`:
* Before:          23.7 ms ± 0.7 ms (116 runs)
* Previous commit: 33.2 ms ± 0.8 ms (82 runs)
* This commit:     25.5 ms ± 0.7 ms (102 runs)

`hyperfine 'animation -o wow.webp giphy.gif'`:
* Before:           85.5 ms ± 2.0 ms (34 runs)
* Previous commit: 127.7 ms ± 4.4 ms (22 runs)
* This commit:      95.3 ms ± 2.1 ms (31 runs)

`hyperfine 'animation -o wow.webp 7z7c.gif'`:
* Before:          12.6 ms ± 0.6 ms (198 runs)
* Previous commit: 16.5 ms ± 0.9 ms (153 runs)
* This commit:     13.5 ms ± 0.6 ms (186 runs)
This commit is contained in:
Nico Weber 2024-05-08 18:57:53 -04:00 committed by Tim Flynn
commit d294f150ed
Notes: sideshowbarker 2024-07-17 09:48:50 +09:00
4 changed files with 17 additions and 17 deletions

View file

@ -9,7 +9,6 @@
#include <AK/Assertions.h>
#include <AK/BinaryHeap.h>
#include <AK/BinarySearch.h>
#include <AK/BitStream.h>
#include <AK/MemoryStream.h>
#include <string.h>
@ -166,14 +165,6 @@ ErrorOr<u32> CanonicalCode::read_symbol(LittleEndianInputBitStream& stream) cons
return Error::from_string_literal("Symbol exceeds maximum symbol number");
}
ErrorOr<void> CanonicalCode::write_symbol(LittleEndianOutputBitStream& stream, u32 symbol) const
{
auto code = symbol < m_bit_codes.size() ? m_bit_codes[symbol] : 0u;
auto length = symbol < m_bit_code_lengths.size() ? m_bit_code_lengths[symbol] : 0u;
TRY(stream.write_bits(code, length));
return {};
}
DeflateDecompressor::CompressedBlock::CompressedBlock(DeflateDecompressor& decompressor, CanonicalCode literal_codes, Optional<CanonicalCode> distance_codes)
: m_decompressor(decompressor)
, m_literal_codes(literal_codes)

View file

@ -7,6 +7,7 @@
#pragma once
#include <AK/BitStream.h>
#include <AK/ByteBuffer.h>
#include <AK/CircularBuffer.h>
#include <AK/Endian.h>
@ -51,6 +52,14 @@ private:
Vector<u16, 288> m_bit_code_lengths {};
};
ALWAYS_INLINE ErrorOr<void> CanonicalCode::write_symbol(LittleEndianOutputBitStream& stream, u32 symbol) const
{
auto code = symbol < m_bit_codes.size() ? m_bit_codes[symbol] : 0u;
auto length = symbol < m_bit_code_lengths.size() ? m_bit_code_lengths[symbol] : 0u;
TRY(stream.write_bits(code, length));
return {};
}
class DeflateDecompressor final : public Stream {
private:
class CompressedBlock {

View file

@ -32,12 +32,4 @@ ErrorOr<u32> CanonicalCode::read_symbol(LittleEndianInputBitStream& bit_stream)
[&bit_stream](Compress::CanonicalCode const& code) { return code.read_symbol(bit_stream); }));
}
ErrorOr<void> CanonicalCode::write_symbol(LittleEndianOutputBitStream& bit_stream, u32 symbol) const
{
TRY(m_code.visit(
[&](u32 single_code) -> ErrorOr<void> { VERIFY(symbol == single_code); return {};},
[&](Compress::CanonicalCode const& code) { return code.write_symbol(bit_stream, symbol); }));
return {};
}
}

View file

@ -39,6 +39,14 @@ private:
Variant<u32, Compress::CanonicalCode> m_code;
};
ALWAYS_INLINE ErrorOr<void> CanonicalCode::write_symbol(LittleEndianOutputBitStream& bit_stream, u32 symbol) const
{
TRY(m_code.visit(
[&](u32 single_code) __attribute__((always_inline))->ErrorOr<void> { VERIFY(symbol == single_code); return {}; },
[&](Compress::CanonicalCode const& code) __attribute__((always_inline)) { return code.write_symbol(bit_stream, symbol); }));
return {};
}
// https://developers.google.com/speed/webp/docs/webp_lossless_bitstream_specification#61_overview
// "From here on, we refer to this set as a prefix code group."
class PrefixCodeGroup {