mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-04-22 04:25:13 +00:00
LibVideo: Fast-path converting colors by only matrix coefficients
We don't need to run through the whole floating-point color converter for videos that use sRGB transfer characteristics and BT.709 color primaries. This commit adds a new templated inlining function to ColorConverter to do a very fast fixed-point YCbCr to RGB conversion. With the fast path, frame conversion times go from ~7.8ms down to ~3.7ms. The fast path can benefit a lot more from extra SIMD vector width, as well.
This commit is contained in:
parent
d6b867ba89
commit
b10da81c7c
Notes:
sideshowbarker
2024-07-17 00:47:29 +09:00
Author: https://github.com/Zaggy1024 Commit: https://github.com/SerenityOS/serenity/commit/b10da81c7c Pull-request: https://github.com/SerenityOS/serenity/pull/18437 Reviewed-by: https://github.com/nico Reviewed-by: https://github.com/trflynn89 ✅
3 changed files with 152 additions and 37 deletions
|
@ -14,14 +14,12 @@
|
|||
|
||||
namespace Video {
|
||||
|
||||
DecoderErrorOr<ColorConverter> ColorConverter::create(u8 bit_depth, CodingIndependentCodePoints cicp)
|
||||
DecoderErrorOr<ColorConverter> ColorConverter::create(u8 bit_depth, CodingIndependentCodePoints input_cicp, CodingIndependentCodePoints output_cicp)
|
||||
{
|
||||
// We'll need to apply tonemapping for linear HDR values.
|
||||
bool should_tonemap = false;
|
||||
switch (cicp.transfer_characteristics()) {
|
||||
switch (input_cicp.transfer_characteristics()) {
|
||||
case TransferCharacteristics::SMPTE2084:
|
||||
should_tonemap = true;
|
||||
break;
|
||||
case TransferCharacteristics::HLG:
|
||||
should_tonemap = true;
|
||||
break;
|
||||
|
@ -34,7 +32,7 @@ DecoderErrorOr<ColorConverter> ColorConverter::create(u8 bit_depth, CodingIndepe
|
|||
// float 0..1 range.
|
||||
// This can be done with a 3x3 scaling matrix.
|
||||
size_t maximum_value = (1u << bit_depth) - 1;
|
||||
float scale = 1.0 / maximum_value;
|
||||
float scale = 1.0f / maximum_value;
|
||||
FloatMatrix4x4 integer_scaling_matrix = {
|
||||
scale, 0.0f, 0.0f, 0.0f, // y
|
||||
0.0f, scale, 0.0f, 0.0f, // u
|
||||
|
@ -50,7 +48,7 @@ DecoderErrorOr<ColorConverter> ColorConverter::create(u8 bit_depth, CodingIndepe
|
|||
float y_max;
|
||||
float uv_min;
|
||||
float uv_max;
|
||||
if (cicp.video_full_range_flag() == VideoFullRangeFlag::Studio) {
|
||||
if (input_cicp.video_full_range_flag() == VideoFullRangeFlag::Studio) {
|
||||
y_min = 16.0f / 255.0f;
|
||||
y_max = 235.0f / 255.0f;
|
||||
uv_min = y_min;
|
||||
|
@ -77,7 +75,7 @@ DecoderErrorOr<ColorConverter> ColorConverter::create(u8 bit_depth, CodingIndepe
|
|||
FloatMatrix4x4 color_conversion_matrix;
|
||||
|
||||
// https://kdashg.github.io/misc/colors/from-coeffs.html
|
||||
switch (cicp.matrix_coefficients()) {
|
||||
switch (input_cicp.matrix_coefficients()) {
|
||||
case MatrixCoefficients::BT709:
|
||||
color_conversion_matrix = {
|
||||
1.0f, 0.0f, 0.78740f, 0.0f, // y
|
||||
|
@ -104,7 +102,7 @@ DecoderErrorOr<ColorConverter> ColorConverter::create(u8 bit_depth, CodingIndepe
|
|||
};
|
||||
break;
|
||||
default:
|
||||
return DecoderError::format(DecoderErrorCategory::Invalid, "Matrix coefficients {} not supported", matrix_coefficients_to_string(cicp.matrix_coefficients()));
|
||||
return DecoderError::format(DecoderErrorCategory::Invalid, "Matrix coefficients {} not supported", matrix_coefficients_to_string(input_cicp.matrix_coefficients()));
|
||||
}
|
||||
|
||||
// 4. Apply the inverse transfer function to convert RGB values to the
|
||||
|
@ -113,23 +111,21 @@ DecoderErrorOr<ColorConverter> ColorConverter::create(u8 bit_depth, CodingIndepe
|
|||
// up the conversion.
|
||||
auto to_linear_lookup_table = InterpolatedLookupTable<to_linear_size>::create(
|
||||
[&](float value) {
|
||||
return TransferCharacteristicsConversion::to_linear_luminance(value, cicp.transfer_characteristics());
|
||||
return TransferCharacteristicsConversion::to_linear_luminance(value, input_cicp.transfer_characteristics());
|
||||
});
|
||||
|
||||
// 5. Convert the RGB color to CIE XYZ coordinates using the input color
|
||||
// primaries and then to the output color primaries.
|
||||
// This is done with two 3x3 matrices that can be combined into one
|
||||
// matrix multiplication.
|
||||
ColorPrimaries output_cp = ColorPrimaries::BT709;
|
||||
FloatMatrix3x3 color_primaries_matrix = TRY(get_conversion_matrix(cicp.color_primaries(), output_cp));
|
||||
FloatMatrix3x3 color_primaries_matrix = TRY(get_conversion_matrix(input_cicp.color_primaries(), output_cicp.color_primaries()));
|
||||
|
||||
// 6. Apply the output transfer function. For HDR color spaces, this
|
||||
// should apply tonemapping as well.
|
||||
// Use a lookup table as with step 3.
|
||||
TransferCharacteristics output_tc = TransferCharacteristics::SRGB;
|
||||
auto to_non_linear_lookup_table = InterpolatedLookupTable<to_non_linear_size>::create(
|
||||
[&](float value) {
|
||||
return TransferCharacteristicsConversion::to_non_linear_luminance(value, output_tc);
|
||||
return TransferCharacteristicsConversion::to_non_linear_luminance(value, output_cicp.transfer_characteristics());
|
||||
});
|
||||
|
||||
// Expand color primaries matrix with identity elements.
|
||||
|
@ -152,10 +148,10 @@ DecoderErrorOr<ColorConverter> ColorConverter::create(u8 bit_depth, CodingIndepe
|
|||
1.0f, // w
|
||||
};
|
||||
|
||||
bool should_skip_color_remapping = output_cp == cicp.color_primaries() && output_tc == cicp.transfer_characteristics();
|
||||
bool should_skip_color_remapping = output_cicp.color_primaries() == input_cicp.color_primaries() && output_cicp.transfer_characteristics() == input_cicp.transfer_characteristics();
|
||||
FloatMatrix4x4 input_conversion_matrix = color_conversion_matrix * range_scaling_matrix * integer_scaling_matrix;
|
||||
|
||||
return ColorConverter(bit_depth, cicp, should_skip_color_remapping, should_tonemap, input_conversion_matrix, to_linear_lookup_table, color_primaries_matrix_4x4, to_non_linear_lookup_table);
|
||||
return ColorConverter(bit_depth, input_cicp, should_skip_color_remapping, should_tonemap, input_conversion_matrix, to_linear_lookup_table, color_primaries_matrix_4x4, to_non_linear_lookup_table);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -104,10 +104,10 @@ private:
|
|||
}
|
||||
|
||||
public:
|
||||
static DecoderErrorOr<ColorConverter> create(u8 bit_depth, CodingIndependentCodePoints cicp);
|
||||
static DecoderErrorOr<ColorConverter> create(u8 bit_depth, CodingIndependentCodePoints input_cicp, CodingIndependentCodePoints output_cicp);
|
||||
|
||||
// Referencing https://en.wikipedia.org/wiki/YCbCr
|
||||
ALWAYS_INLINE Gfx::Color convert_yuv_to_full_range_rgb(u16 y, u16 u, u16 v) const
|
||||
ALWAYS_INLINE Gfx::Color convert_yuv(u16 y, u16 u, u16 v) const
|
||||
{
|
||||
auto max_zero = [](FloatVector4 vector) {
|
||||
return FloatVector4(max(0.0f, vector.x()), max(0.0f, vector.y()), max(0.0f, vector.z()), vector.w());
|
||||
|
@ -150,6 +150,99 @@ public:
|
|||
return Gfx::Color(r, g, b);
|
||||
}
|
||||
|
||||
// Fast conversion of 8-bit YUV to full-range RGB.
|
||||
template<MatrixCoefficients MC, VideoFullRangeFlag FR, Unsigned T>
|
||||
static ALWAYS_INLINE Gfx::Color convert_simple_yuv_to_rgb(T y_in, T u_in, T v_in)
|
||||
{
|
||||
static constexpr i32 bit_depth = 8;
|
||||
static constexpr i32 maximum_value = (1 << bit_depth) - 1;
|
||||
static constexpr i32 one = 1 << 14;
|
||||
static constexpr auto fraction = [](i32 numerator, i32 denominator) constexpr {
|
||||
auto temp = static_cast<i64>(numerator) * one;
|
||||
return static_cast<i32>(temp / denominator);
|
||||
};
|
||||
static constexpr auto coef = [](i32 hundred_thousandths) constexpr {
|
||||
return fraction(hundred_thousandths, 100'000);
|
||||
};
|
||||
static constexpr auto multiply = [](i32 a, i32 b) constexpr {
|
||||
return (a * b) / one;
|
||||
};
|
||||
|
||||
struct RangeFactors {
|
||||
i32 y_offset, y_scale;
|
||||
i32 uv_offset, uv_scale;
|
||||
};
|
||||
|
||||
constexpr auto range_factors = [] {
|
||||
RangeFactors range_factors;
|
||||
|
||||
i32 min = 0;
|
||||
i32 y_max = 255;
|
||||
i32 uv_max = 255;
|
||||
|
||||
if constexpr (FR == VideoFullRangeFlag::Studio) {
|
||||
min = 16;
|
||||
y_max = 235;
|
||||
uv_max = 240;
|
||||
}
|
||||
|
||||
range_factors.y_offset = -min * maximum_value / 255;
|
||||
range_factors.y_scale = fraction(255, y_max - min);
|
||||
range_factors.uv_offset = -((min + uv_max) * maximum_value) / (255 * 2);
|
||||
range_factors.uv_scale = fraction(255, uv_max - min) * 2;
|
||||
|
||||
range_factors.y_scale = multiply(range_factors.y_scale, fraction(255, maximum_value));
|
||||
range_factors.uv_scale = multiply(range_factors.uv_scale, fraction(255, maximum_value));
|
||||
|
||||
return range_factors;
|
||||
}();
|
||||
|
||||
i32 y = y_in + range_factors.y_offset;
|
||||
i32 u = u_in + range_factors.uv_offset;
|
||||
i32 v = v_in + range_factors.uv_offset;
|
||||
|
||||
i32 red;
|
||||
i32 green;
|
||||
i32 blue;
|
||||
|
||||
constexpr i32 y_scale = range_factors.y_scale;
|
||||
constexpr i32 uv_scale = range_factors.uv_scale;
|
||||
|
||||
// The equations below will have the following effects:
|
||||
// - Scale the Y, U and V values into the range 0...maximum_value*one for these fixed-point operations.
|
||||
// - Scale the values by the color range defined by VideoFullRangeFlag.
|
||||
// - Scale the U and V values by 2 to put them in the actual YCbCr coordinate space.
|
||||
// - Multiply by the YCbCr coefficients to convert to RGB.
|
||||
if constexpr (MC == MatrixCoefficients::BT709) {
|
||||
red = y * y_scale + v * multiply(coef(78740), uv_scale);
|
||||
green = y * y_scale + u * multiply(coef(-9366), uv_scale) + v * multiply(coef(-23406), uv_scale);
|
||||
blue = y * y_scale + u * multiply(coef(92780), uv_scale);
|
||||
}
|
||||
|
||||
if constexpr (MC == MatrixCoefficients::BT601) {
|
||||
red = y * y_scale + v * multiply(coef(70100), uv_scale);
|
||||
green = y * y_scale + u * multiply(coef(-17207), uv_scale) + v * multiply(coef(-35707), uv_scale);
|
||||
blue = y * y_scale + u * multiply(coef(88600), uv_scale);
|
||||
}
|
||||
|
||||
if constexpr (MC == MatrixCoefficients::BT2020ConstantLuminance) {
|
||||
red = y * y_scale + v * multiply(coef(73730), uv_scale);
|
||||
green = y * y_scale + u * multiply(coef(-8228), uv_scale) + v * multiply(coef(-28568), uv_scale);
|
||||
blue = y * y_scale + u * multiply(coef(94070), uv_scale);
|
||||
}
|
||||
|
||||
red = clamp(red, 0, maximum_value * one);
|
||||
green = clamp(green, 0, maximum_value * one);
|
||||
blue = clamp(blue, 0, maximum_value * one);
|
||||
|
||||
// This compiles down to a bit shift if maximum_value == 255
|
||||
red /= fraction(maximum_value, 255);
|
||||
green /= fraction(maximum_value, 255);
|
||||
blue /= fraction(maximum_value, 255);
|
||||
|
||||
return Gfx::Color(u8(red), u8(green), u8(blue));
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr size_t to_linear_size = 64;
|
||||
static constexpr size_t to_non_linear_size = 64;
|
||||
|
|
|
@ -57,8 +57,8 @@ ALWAYS_INLINE void interpolate_row(u32 const row, u32 const width, u16 const* pl
|
|||
}
|
||||
}
|
||||
|
||||
template<u32 subsampling_horizontal, u32 subsampling_vertical>
|
||||
ALWAYS_INLINE DecoderErrorOr<void> convert_to_bitmap(ColorConverter const& converter, u32 const width, u32 const height, FixedArray<u16> const& plane_y, FixedArray<u16> const& plane_u, FixedArray<u16> const& plane_v, Gfx::Bitmap& bitmap)
|
||||
template<u32 subsampling_horizontal, u32 subsampling_vertical, typename Convert>
|
||||
ALWAYS_INLINE DecoderErrorOr<void> convert_to_bitmap_subsampled(Convert convert, u32 const width, u32 const height, FixedArray<u16> const& plane_y, FixedArray<u16> const& plane_u, FixedArray<u16> const& plane_v, Gfx::Bitmap& bitmap)
|
||||
{
|
||||
VERIFY(bitmap.width() >= 0 && static_cast<u32>(bitmap.width()) == width);
|
||||
VERIFY(bitmap.height() >= 0 && static_cast<u32>(bitmap.height()) == height);
|
||||
|
@ -99,13 +99,13 @@ ALWAYS_INLINE DecoderErrorOr<void> convert_to_bitmap(ColorConverter const& conve
|
|||
auto* scan_line_a = bitmap.scanline(static_cast<int>(row));
|
||||
|
||||
for (size_t column = 0; column < width; column++) {
|
||||
scan_line_a[column] = converter.convert_yuv_to_full_range_rgb(y_row_a[column], u_row_a[column], v_row_a[column]).value();
|
||||
scan_line_a[column] = convert(y_row_a[column], u_row_a[column], v_row_a[column]).value();
|
||||
}
|
||||
if constexpr (subsampling_vertical != 0) {
|
||||
auto const* y_row_b = &plane_y[static_cast<size_t>(row + 1) * width];
|
||||
auto* scan_line_b = bitmap.scanline(static_cast<int>(row + 1));
|
||||
for (size_t column = 0; column < width; column++) {
|
||||
scan_line_b[column] = converter.convert_yuv_to_full_range_rgb(y_row_b[column], u_row_b[column], v_row_b[column]).value();
|
||||
scan_line_b[column] = convert(y_row_b[column], u_row_b[column], v_row_b[column]).value();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -119,7 +119,7 @@ ALWAYS_INLINE DecoderErrorOr<void> convert_to_bitmap(ColorConverter const& conve
|
|||
auto const* y_row = &plane_y[static_cast<size_t>(height - 1) * width];
|
||||
auto* scan_line = bitmap.scanline(static_cast<int>(height - 1));
|
||||
for (size_t column = 0; column < width; column++) {
|
||||
scan_line[column] = converter.convert_yuv_to_full_range_rgb(y_row[column], u_row_a[column], v_row_a[column]).value();
|
||||
scan_line[column] = convert(y_row[column], u_row_a[column], v_row_a[column]).value();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -127,23 +127,49 @@ ALWAYS_INLINE DecoderErrorOr<void> convert_to_bitmap(ColorConverter const& conve
|
|||
return {};
|
||||
}
|
||||
|
||||
template<u32 subsampling_horizontal, u32 subsampling_vertical>
|
||||
static ALWAYS_INLINE DecoderErrorOr<void> convert_to_bitmap_selecting_converter(CodingIndependentCodePoints cicp, u8 bit_depth, u32 const width, u32 const height, FixedArray<u16> const& plane_y, FixedArray<u16> const& plane_u, FixedArray<u16> const& plane_v, Gfx::Bitmap& bitmap)
|
||||
{
|
||||
constexpr auto output_cicp = CodingIndependentCodePoints(ColorPrimaries::BT709, TransferCharacteristics::SRGB, MatrixCoefficients::BT709, VideoFullRangeFlag::Full);
|
||||
|
||||
if (bit_depth == 8 && cicp.transfer_characteristics() == output_cicp.transfer_characteristics() && cicp.color_primaries() == output_cicp.color_primaries() && cicp.video_full_range_flag() == VideoFullRangeFlag::Studio) {
|
||||
switch (cicp.matrix_coefficients()) {
|
||||
case MatrixCoefficients::BT709:
|
||||
return convert_to_bitmap_subsampled<subsampling_horizontal, subsampling_vertical>([](u16 y, u16 u, u16 v) { return ColorConverter::convert_simple_yuv_to_rgb<MatrixCoefficients::BT709, VideoFullRangeFlag::Studio>(y, u, v); }, width, height, plane_y, plane_u, plane_v, bitmap);
|
||||
case MatrixCoefficients::BT601:
|
||||
return convert_to_bitmap_subsampled<subsampling_horizontal, subsampling_vertical>([](u16 y, u16 u, u16 v) { return ColorConverter::convert_simple_yuv_to_rgb<MatrixCoefficients::BT601, VideoFullRangeFlag::Studio>(y, u, v); }, width, height, plane_y, plane_u, plane_v, bitmap);
|
||||
case MatrixCoefficients::BT2020ConstantLuminance:
|
||||
case MatrixCoefficients::BT2020NonConstantLuminance:
|
||||
return convert_to_bitmap_subsampled<subsampling_horizontal, subsampling_vertical>([](u16 y, u16 u, u16 v) { return ColorConverter::convert_simple_yuv_to_rgb<MatrixCoefficients::BT2020ConstantLuminance, VideoFullRangeFlag::Studio>(y, u, v); }, width, height, plane_y, plane_u, plane_v, bitmap);
|
||||
default:
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
}
|
||||
|
||||
auto converter = TRY(ColorConverter::create(bit_depth, cicp, output_cicp));
|
||||
return convert_to_bitmap_subsampled<subsampling_horizontal, subsampling_vertical>([&](u16 y, u16 u, u16 v) { return converter.convert_yuv(y, u, v); }, width, height, plane_y, plane_u, plane_v, bitmap);
|
||||
}
|
||||
|
||||
static DecoderErrorOr<void> convert_to_bitmap_selecting_subsampling(bool subsampling_horizontal, bool subsampling_vertical, CodingIndependentCodePoints cicp, u8 bit_depth, u32 const width, u32 const height, FixedArray<u16> const& plane_y, FixedArray<u16> const& plane_u, FixedArray<u16> const& plane_v, Gfx::Bitmap& bitmap)
|
||||
{
|
||||
if (subsampling_horizontal && subsampling_vertical) {
|
||||
return convert_to_bitmap_selecting_converter<true, true>(cicp, bit_depth, width, height, plane_y, plane_u, plane_v, bitmap);
|
||||
}
|
||||
|
||||
if (subsampling_horizontal && !subsampling_vertical) {
|
||||
return convert_to_bitmap_selecting_converter<true, false>(cicp, bit_depth, width, height, plane_y, plane_u, plane_v, bitmap);
|
||||
}
|
||||
|
||||
if (!subsampling_horizontal && subsampling_vertical) {
|
||||
return convert_to_bitmap_selecting_converter<false, true>(cicp, bit_depth, width, height, plane_y, plane_u, plane_v, bitmap);
|
||||
}
|
||||
|
||||
return convert_to_bitmap_selecting_converter<false, false>(cicp, bit_depth, width, height, plane_y, plane_u, plane_v, bitmap);
|
||||
}
|
||||
|
||||
DecoderErrorOr<void> SubsampledYUVFrame::output_to_bitmap(Gfx::Bitmap& bitmap)
|
||||
{
|
||||
auto converter = TRY(ColorConverter::create(bit_depth(), cicp()));
|
||||
|
||||
if (m_subsampling_horizontal && m_subsampling_vertical) {
|
||||
return convert_to_bitmap<true, true>(converter, width(), height(), m_plane_y, m_plane_u, m_plane_v, bitmap);
|
||||
}
|
||||
|
||||
if (m_subsampling_horizontal && !m_subsampling_vertical) {
|
||||
return convert_to_bitmap<true, false>(converter, width(), height(), m_plane_y, m_plane_u, m_plane_v, bitmap);
|
||||
}
|
||||
|
||||
if (!m_subsampling_horizontal && m_subsampling_vertical) {
|
||||
return convert_to_bitmap<false, true>(converter, width(), height(), m_plane_y, m_plane_u, m_plane_v, bitmap);
|
||||
}
|
||||
|
||||
return convert_to_bitmap<false, false>(converter, width(), height(), m_plane_y, m_plane_u, m_plane_v, bitmap);
|
||||
return convert_to_bitmap_selecting_subsampling(m_subsampling_horizontal, m_subsampling_vertical, cicp(), bit_depth(), width(), height(), m_plane_y, m_plane_u, m_plane_v, bitmap);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue