From 7aa61ca49b8c222a0e1877ad8da83cd8197323f8 Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Wed, 8 May 2024 08:57:53 -0400
Subject: [PATCH] LibGfx/WebP: Add CanonicalCode::write_symbol(), use it in
 writer

We still construct the code length codes manually, and now we also
construct a PrefixCodeGroup manually that assigns 8 bits to all
symbols (except for fully-opaque alpha channels, and for the
unused distance codes, like before). But now we use the CanonicalCodes
from that PrefixCodeGroup for writing.

No behavior change at all, the output is bit-for-bit identical to
before. But this is a step towards actually huffman-coding symbols.

This is however a pretty big perf regression. For
`image -o test.webp test.bmp` (where test.bmp is retro-sunset.png
re-encoded as bmp), time goes from 23.7 ms to 33.2 ms.

`animation -o wow.webp giphy.gif` goes from 85.5 ms to 127.7 ms.

`animation -o wow.webp 7z7c.gif` goes from 12.6 ms to 16.5 ms.
---
 .../ImageFormats/WebPSharedLossless.cpp       |  8 +++++
 .../LibGfx/ImageFormats/WebPSharedLossless.h  |  1 +
 .../ImageFormats/WebPWriterLossless.cpp       | 33 ++++++++++++-------
 3 files changed, 31 insertions(+), 11 deletions(-)
diff --git a/Userland/Libraries/LibGfx/ImageFormats/WebPSharedLossless.cpp b/Userland/Libraries/LibGfx/ImageFormats/WebPSharedLossless.cpp
index 3b5b4928e0e..2545594d649 100644
--- a/Userland/Libraries/LibGfx/ImageFormats/WebPSharedLossless.cpp
+++ b/Userland/Libraries/LibGfx/ImageFormats/WebPSharedLossless.cpp
@@ -32,4 +32,12 @@ ErrorOr<u32> CanonicalCode::read_symbol(LittleEndianInputBitStream& bit_stream)
         [&bit_stream](Compress::CanonicalCode const& code) { return code.read_symbol(bit_stream); }));
 }
 
+ErrorOr<void> CanonicalCode::write_symbol(LittleEndianOutputBitStream& bit_stream, u32 symbol) const
+{
+    TRY(m_code.visit(
+        [&](u32 single_code) -> ErrorOr<void> { VERIFY(symbol == single_code); return {};},
+        [&](Compress::CanonicalCode const& code) { return code.write_symbol(bit_stream, symbol); }));
+    return {};
+}
+
 }
diff --git a/Userland/Libraries/LibGfx/ImageFormats/WebPSharedLossless.h b/Userland/Libraries/LibGfx/ImageFormats/WebPSharedLossless.h
index a49b9f14050..f6da1c84fb2 100644
--- a/Userland/Libraries/LibGfx/ImageFormats/WebPSharedLossless.h
+++ b/Userland/Libraries/LibGfx/ImageFormats/WebPSharedLossless.h
@@ -23,6 +23,7 @@ public:
 
     static ErrorOr<CanonicalCode> from_bytes(ReadonlyBytes);
     ErrorOr<u32> read_symbol(LittleEndianInputBitStream&) const;
+    ErrorOr<void> write_symbol(LittleEndianOutputBitStream&, u32) const;
 
 private:
     explicit CanonicalCode(u32 single_symbol)
diff --git a/Userland/Libraries/LibGfx/ImageFormats/WebPWriterLossless.cpp b/Userland/Libraries/LibGfx/ImageFormats/WebPWriterLossless.cpp
index 9b7d5df8c89..d706f41dce2 100644
--- a/Userland/Libraries/LibGfx/ImageFormats/WebPWriterLossless.cpp
+++ b/Userland/Libraries/LibGfx/ImageFormats/WebPWriterLossless.cpp
@@ -12,6 +12,7 @@
 #include <AK/MemoryStream.h>
 #include <LibCompress/DeflateTables.h>
 #include <LibGfx/Bitmap.h>
+#include <LibGfx/ImageFormats/WebPSharedLossless.h>
 #include <LibGfx/ImageFormats/WebPWriterLossless.h>
 
 namespace Gfx {
@@ -25,7 +26,7 @@ static bool are_all_pixels_opaque(Bitmap const& bitmap)
     return true;
 }
 
-NEVER_INLINE static ErrorOr<void> write_image_data(LittleEndianOutputBitStream& bit_stream, Bitmap const& bitmap, bool all_pixels_are_opaque)
+NEVER_INLINE static ErrorOr<void> write_image_data(LittleEndianOutputBitStream& bit_stream, Bitmap const& bitmap, PrefixCodeGroup const& prefix_code_group)
 {
     // This is currently the hot loop. Keep performance in mind when you change it.
     for (ARGB32 pixel : bitmap) {
@@ -34,15 +35,10 @@ NEVER_INLINE static ErrorOr<void> write_image_data(LittleEndianOutputBitStream&
         u8 g = pixel >> 8;
         u8 b = pixel;
 
-        // We wrote a huffman table that gives every symbol 8 bits. That means we can write the image data
-        // out uncompressed –- but we do need to reverse the bit order of the bytes.
-        TRY(bit_stream.write_bits(Compress::reverse8_lookup_table[g], 8u));
-        TRY(bit_stream.write_bits(Compress::reverse8_lookup_table[r], 8u));
-        TRY(bit_stream.write_bits(Compress::reverse8_lookup_table[b], 8u));
-
-        // If all pixels are opaque, we wrote a one-element huffman table for alpha, which needs 0 bits per element.
-        if (!all_pixels_are_opaque)
-            TRY(bit_stream.write_bits(Compress::reverse8_lookup_table[a], 8u));
+        TRY(prefix_code_group[0].write_symbol(bit_stream, g));
+        TRY(prefix_code_group[1].write_symbol(bit_stream, r));
+        TRY(prefix_code_group[2].write_symbol(bit_stream, b));
+        TRY(prefix_code_group[3].write_symbol(bit_stream, a));
     }
     return {};
 }
@@ -92,6 +88,7 @@ static ErrorOr<void> write_VP8L_image_data(Stream& stream, Bitmap const& bitmap)
 
     bool all_pixels_are_opaque = are_all_pixels_opaque(bitmap);
 
+    PrefixCodeGroup prefix_code_group;
     int number_of_full_channels = all_pixels_are_opaque ? 3 : 4;
     for (int i = 0; i < number_of_full_channels; ++i) {
         TRY(bit_stream.write_bits(0u, 1u)); // Normal code length code.
@@ -119,6 +116,9 @@ static ErrorOr<void> write_VP8L_image_data(Stream& stream, Bitmap const& bitmap)
             TRY(bit_stream.write_bits(254u, 8u)); // max_symbol = 2 + 254
         }
 
+        auto bits_per_symbol = Array<u8, 256>::from_repeated_value(8);
+        prefix_code_group[i] = TRY(CanonicalCode::from_bytes(bits_per_symbol));
+
         // The code length codes only contain a single entry for '8'. WebP streams with a single element store 0 bits per element.
         // (This is different from deflate, which needs 1 bit per element.)
     }
@@ -129,16 +129,27 @@ static ErrorOr<void> write_VP8L_image_data(Stream& stream, Bitmap const& bitmap)
         TRY(bit_stream.write_bits(0u, 1u));   // num_symbols - 1
         TRY(bit_stream.write_bits(1u, 1u));   // is_first_8bits
         TRY(bit_stream.write_bits(255u, 8u)); // symbol0
+        Array<u8, 256> bits_per_symbol {};
+        // "When coding a single leaf node [...], all but one code length are zeros, and the single leaf node value
+        //  is marked with the length of 1 -- even when no bits are consumed when that single leaf node tree is used."
+        // CanonicalCode follows that convention too, even when describing simple code lengths.
+        bits_per_symbol[255] = 1;
+        prefix_code_group[3] = TRY(CanonicalCode::from_bytes(bits_per_symbol));
     }
 
     // For code #5, use a simple empty code, since we don't use this yet.
+    // "Note: Another special case is when all prefix code lengths are zeros (an empty prefix code). [...]
+    //  empty prefix codes can be coded as those containing a single symbol 0."
     TRY(bit_stream.write_bits(1u, 1u)); // Simple code length code.
     TRY(bit_stream.write_bits(0u, 1u)); // num_symbols - 1
     TRY(bit_stream.write_bits(0u, 1u)); // is_first_8bits
     TRY(bit_stream.write_bits(0u, 1u)); // symbol0
+    Array<u8, 256> bits_per_symbol {};
+    bits_per_symbol[0] = 1; // See comment in `if (all_pixels_are_opaque)` block above.
+    prefix_code_group[4] = TRY(CanonicalCode::from_bytes(bits_per_symbol));
 
     // Image data.
-    TRY(write_image_data(bit_stream, bitmap, all_pixels_are_opaque));
+    TRY(write_image_data(bit_stream, bitmap, prefix_code_group));
 
     // FIXME: Make ~LittleEndianOutputBitStream do this, or make it VERIFY() that it has happened at least.
     TRY(bit_stream.align_to_byte_boundary());