mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-19 19:15:26 +00:00
Add DXT1-5 decompression on ARM
Some checks are pending
Build RPCS3 / Linux_Build (/rpcs3/.ci/build-linux.sh, clang, rpcs3/rpcs3-ci-jammy:1.0, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / Linux_Build (/rpcs3/.ci/build-linux.sh, gcc, rpcs3/rpcs3-ci-jammy:1.0, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / Linux_Build (/rpcs3/.ci/build-linux-aarch64.sh, clang, rpcs3/rpcs3-ci-jammy-aarch64:1.0, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / Windows_Build (push) Waiting to run
Some checks are pending
Build RPCS3 / Linux_Build (/rpcs3/.ci/build-linux.sh, clang, rpcs3/rpcs3-ci-jammy:1.0, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / Linux_Build (/rpcs3/.ci/build-linux.sh, gcc, rpcs3/rpcs3-ci-jammy:1.0, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / Linux_Build (/rpcs3/.ci/build-linux-aarch64.sh, clang, rpcs3/rpcs3-ci-jammy-aarch64:1.0, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / Windows_Build (push) Waiting to run
This commit is contained in:
parent
1e01511ca0
commit
e72cb6801a
14 changed files with 312 additions and 44 deletions
170
3rdparty/bcdec/bcdec.hpp
vendored
Normal file
170
3rdparty/bcdec/bcdec.hpp
vendored
Normal file
|
@ -0,0 +1,170 @@
|
|||
// Based on https://github.com/iOrange/bcdec/blob/963c5e56b7a335e066cff7d16a3de75f4e8ad366/bcdec.h
|
||||
// provides functions to decompress blocks of BC compressed images
|
||||
//
|
||||
// ------------------------------------------------------------------------------
|
||||
//
|
||||
// MIT LICENSE
|
||||
// ===========
|
||||
// Copyright (c) 2022 Sergii Kudlai
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
//
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <util/types.hpp>
|
||||
|
||||
static void bcdec__color_block(const u8* compressedBlock, u8* dstColors, int destinationPitch, bool onlyOpaqueMode) {
|
||||
u16 c0, c1;
|
||||
u32 refColors[4]; /* 0xAABBGGRR */
|
||||
u32 colorIndices;
|
||||
u32 r0, g0, b0, r1, g1, b1, r, g, b;
|
||||
|
||||
c0 = *reinterpret_cast<const u16*>(compressedBlock);
|
||||
c1 = *(reinterpret_cast<const u16*>(compressedBlock) + 1);
|
||||
|
||||
/* Unpack 565 ref colors */
|
||||
r0 = (c0 >> 11) & 0x1F;
|
||||
g0 = (c0 >> 5) & 0x3F;
|
||||
b0 = c0 & 0x1F;
|
||||
|
||||
r1 = (c1 >> 11) & 0x1F;
|
||||
g1 = (c1 >> 5) & 0x3F;
|
||||
b1 = c1 & 0x1F;
|
||||
|
||||
/* Expand 565 ref colors to 888 */
|
||||
r = (r0 * 527 + 23) >> 6;
|
||||
g = (g0 * 259 + 33) >> 6;
|
||||
b = (b0 * 527 + 23) >> 6;
|
||||
refColors[0] = 0xFF000000 | (r << 16) | (g << 8) | b;
|
||||
|
||||
r = (r1 * 527 + 23) >> 6;
|
||||
g = (g1 * 259 + 33) >> 6;
|
||||
b = (b1 * 527 + 23) >> 6;
|
||||
refColors[1] = 0xFF000000 | (r << 16) | (g << 8) | b;
|
||||
|
||||
if (c0 > c1 || onlyOpaqueMode)
|
||||
{ /* Standard BC1 mode (also BC3 color block uses ONLY this mode) */
|
||||
/* color_2 = 2/3*color_0 + 1/3*color_1
|
||||
color_3 = 1/3*color_0 + 2/3*color_1 */
|
||||
r = ((2 * r0 + r1) * 351 + 61) >> 7;
|
||||
g = ((2 * g0 + g1) * 2763 + 1039) >> 11;
|
||||
b = ((2 * b0 + b1) * 351 + 61) >> 7;
|
||||
refColors[2] = 0xFF000000 | (r << 16) | (g << 8) | b;
|
||||
|
||||
r = ((r0 + r1 * 2) * 351 + 61) >> 7;
|
||||
g = ((g0 + g1 * 2) * 2763 + 1039) >> 11;
|
||||
b = ((b0 + b1 * 2) * 351 + 61) >> 7;
|
||||
refColors[3] = 0xFF000000 | (r << 16) | (g << 8) | b;
|
||||
}
|
||||
else
|
||||
{ /* Quite rare BC1A mode */
|
||||
/* color_2 = 1/2*color_0 + 1/2*color_1;
|
||||
color_3 = 0; */
|
||||
r = ((r0 + r1) * 1053 + 125) >> 8;
|
||||
g = ((g0 + g1) * 4145 + 1019) >> 11;
|
||||
b = ((b0 + b1) * 1053 + 125) >> 8;
|
||||
refColors[2] = 0xFF000000 | (r << 16) | (g << 8) | b;
|
||||
|
||||
refColors[3] = 0x00000000;
|
||||
}
|
||||
|
||||
colorIndices = *reinterpret_cast<const u32*>(compressedBlock + 4);
|
||||
|
||||
/* Fill out the decompressed color block */
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
for (int j = 0; j < 4; ++j)
|
||||
{
|
||||
int idx = colorIndices & 0x03;
|
||||
*reinterpret_cast<u32*>(dstColors + j * 4) = refColors[idx];
|
||||
colorIndices >>= 2;
|
||||
}
|
||||
|
||||
dstColors += destinationPitch;
|
||||
}
|
||||
}
|
||||
|
||||
static void bcdec__sharp_alpha_block(const u16* alpha, u8* decompressed, int destinationPitch) {
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
for (int j = 0; j < 4; ++j)
|
||||
{
|
||||
decompressed[j * 4] = ((alpha[i] >> (4 * j)) & 0x0F) * 17;
|
||||
}
|
||||
decompressed += destinationPitch;
|
||||
}
|
||||
}
|
||||
|
||||
static void bcdec__smooth_alpha_block(const u8* compressedBlock, u8* decompressed, int destinationPitch) {
|
||||
u8 alpha[8];
|
||||
u64 block = *reinterpret_cast<const u64*>(compressedBlock);
|
||||
u64 indices;
|
||||
|
||||
alpha[0] = block & 0xFF;
|
||||
alpha[1] = (block >> 8) & 0xFF;
|
||||
|
||||
if (alpha[0] > alpha[1])
|
||||
{
|
||||
/* 6 interpolated alpha values. */
|
||||
alpha[2] = (6 * alpha[0] + alpha[1]) / 7; /* 6/7*alpha_0 + 1/7*alpha_1 */
|
||||
alpha[3] = (5 * alpha[0] + 2 * alpha[1]) / 7; /* 5/7*alpha_0 + 2/7*alpha_1 */
|
||||
alpha[4] = (4 * alpha[0] + 3 * alpha[1]) / 7; /* 4/7*alpha_0 + 3/7*alpha_1 */
|
||||
alpha[5] = (3 * alpha[0] + 4 * alpha[1]) / 7; /* 3/7*alpha_0 + 4/7*alpha_1 */
|
||||
alpha[6] = (2 * alpha[0] + 5 * alpha[1]) / 7; /* 2/7*alpha_0 + 5/7*alpha_1 */
|
||||
alpha[7] = ( alpha[0] + 6 * alpha[1]) / 7; /* 1/7*alpha_0 + 6/7*alpha_1 */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* 4 interpolated alpha values. */
|
||||
alpha[2] = (4 * alpha[0] + alpha[1]) / 5; /* 4/5*alpha_0 + 1/5*alpha_1 */
|
||||
alpha[3] = (3 * alpha[0] + 2 * alpha[1]) / 5; /* 3/5*alpha_0 + 2/5*alpha_1 */
|
||||
alpha[4] = (2 * alpha[0] + 3 * alpha[1]) / 5; /* 2/5*alpha_0 + 3/5*alpha_1 */
|
||||
alpha[5] = ( alpha[0] + 4 * alpha[1]) / 5; /* 1/5*alpha_0 + 4/5*alpha_1 */
|
||||
alpha[6] = 0x00;
|
||||
alpha[7] = 0xFF;
|
||||
}
|
||||
|
||||
indices = block >> 16;
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
for (int j = 0; j < 4; ++j)
|
||||
{
|
||||
decompressed[j * 4] = alpha[indices & 0x07];
|
||||
indices >>= 3;
|
||||
}
|
||||
decompressed += destinationPitch;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void bcdec_bc1(const u8* compressedBlock, u8* decompressedBlock, int destinationPitch) {
|
||||
bcdec__color_block(compressedBlock, decompressedBlock, destinationPitch, false);
|
||||
}
|
||||
|
||||
static inline void bcdec_bc2(const u8* compressedBlock, u8* decompressedBlock, int destinationPitch) {
|
||||
bcdec__color_block(compressedBlock + 8, decompressedBlock, destinationPitch, true);
|
||||
bcdec__sharp_alpha_block(reinterpret_cast<const u16*>(compressedBlock), decompressedBlock + 3, destinationPitch);
|
||||
}
|
||||
|
||||
static inline void bcdec_bc3(const u8* compressedBlock, u8* decompressedBlock, int destinationPitch) {
|
||||
bcdec__color_block(compressedBlock + 8, decompressedBlock, destinationPitch, true);
|
||||
bcdec__smooth_alpha_block(compressedBlock, decompressedBlock + 3, destinationPitch);
|
||||
}
|
||||
|
|
@ -3,6 +3,7 @@
|
|||
#include "TextureUtils.h"
|
||||
#include "../RSXThread.h"
|
||||
#include "../rsx_utils.h"
|
||||
#include "3rdparty/bcdec/bcdec.hpp"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
|
||||
|
@ -497,6 +498,63 @@ struct copy_rgb655_block_swizzled
|
|||
}
|
||||
};
|
||||
|
||||
struct copy_decoded_bc1_block
|
||||
{
|
||||
static void copy_mipmap_level(std::span<u32> dst, std::span<const u64> src, u16 width_in_block, u16 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block)
|
||||
{
|
||||
u32 src_offset = 0, dst_offset = 0, destinationPitch = dst_pitch_in_block * 4;
|
||||
for (u32 row = 0; row < row_count * depth; row++)
|
||||
{
|
||||
for (u32 col = 0; col < width_in_block; col++) {
|
||||
const u8* compressedBlock = reinterpret_cast<const u8*>(&src[src_offset + col]);
|
||||
u8* decompressedBlock = reinterpret_cast<u8*>(&dst[dst_offset + col * 4]);
|
||||
bcdec_bc1(compressedBlock, decompressedBlock, destinationPitch);
|
||||
}
|
||||
|
||||
src_offset += src_pitch_in_block;
|
||||
dst_offset += destinationPitch;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct copy_decoded_bc2_block
|
||||
{
|
||||
static void copy_mipmap_level(std::span<u32> dst, std::span<const u128> src, u16 width_in_block, u16 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block)
|
||||
{
|
||||
u32 src_offset = 0, dst_offset = 0, destinationPitch = dst_pitch_in_block * 4;
|
||||
for (u32 row = 0; row < row_count * depth; row++)
|
||||
{
|
||||
for (u32 col = 0; col < width_in_block; col++) {
|
||||
const u8* compressedBlock = reinterpret_cast<const u8*>(&src[src_offset + col]);
|
||||
u8* decompressedBlock = reinterpret_cast<u8*>(&dst[dst_offset + col * 4]);
|
||||
bcdec_bc2(compressedBlock, decompressedBlock, destinationPitch);
|
||||
}
|
||||
|
||||
src_offset += src_pitch_in_block;
|
||||
dst_offset += destinationPitch;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct copy_decoded_bc3_block
|
||||
{
|
||||
static void copy_mipmap_level(std::span<u32> dst, std::span<const u128> src, u16 width_in_block, u16 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block)
|
||||
{
|
||||
u32 src_offset = 0, dst_offset = 0, destinationPitch = dst_pitch_in_block * 4;
|
||||
for (u32 row = 0; row < row_count * depth; row++)
|
||||
{
|
||||
for (u32 col = 0; col < width_in_block; col++) {
|
||||
const u8* compressedBlock = reinterpret_cast<const u8*>(&src[src_offset + col]);
|
||||
u8* decompressedBlock = reinterpret_cast<u8*>(&dst[dst_offset + col * 4]);
|
||||
bcdec_bc3(compressedBlock, decompressedBlock, destinationPitch);
|
||||
}
|
||||
|
||||
src_offset += src_pitch_in_block;
|
||||
dst_offset += destinationPitch;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
namespace
|
||||
{
|
||||
/**
|
||||
|
@ -952,6 +1010,12 @@ namespace rsx
|
|||
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
|
||||
{
|
||||
if (!caps.supports_dxt)
|
||||
{
|
||||
copy_decoded_bc1_block::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const u64>(), w, h, depth, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block);
|
||||
break;
|
||||
}
|
||||
|
||||
const bool is_3d = depth > 1;
|
||||
const bool is_po2 = utils::is_power_of_2(src_layout.width_in_texel) && utils::is_power_of_2(src_layout.height_in_texel);
|
||||
|
||||
|
@ -981,8 +1045,22 @@ namespace rsx
|
|||
}
|
||||
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
|
||||
{
|
||||
if (!caps.supports_dxt)
|
||||
{
|
||||
copy_decoded_bc2_block::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const u128>(), w, h, depth, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block);
|
||||
break;
|
||||
}
|
||||
[[fallthrough]];
|
||||
}
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
|
||||
{
|
||||
if (!caps.supports_dxt)
|
||||
{
|
||||
copy_decoded_bc3_block::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const u128>(), w, h, depth, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block);
|
||||
break;
|
||||
}
|
||||
|
||||
const bool is_3d = depth > 1;
|
||||
const bool is_po2 = utils::is_power_of_2(src_layout.width_in_texel) && utils::is_power_of_2(src_layout.height_in_texel);
|
||||
|
||||
|
@ -1094,7 +1172,7 @@ namespace rsx
|
|||
return result;
|
||||
}
|
||||
|
||||
bool is_compressed_host_format(u32 texture_format)
|
||||
bool is_compressed_host_format(const texture_uploader_capabilities& caps, u32 texture_format)
|
||||
{
|
||||
switch (texture_format)
|
||||
{
|
||||
|
@ -1129,7 +1207,7 @@ namespace rsx
|
|||
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
|
||||
return true;
|
||||
return caps.supports_dxt;
|
||||
}
|
||||
fmt::throw_exception("Unknown format 0x%x", texture_format);
|
||||
}
|
||||
|
|
|
@ -227,6 +227,7 @@ namespace rsx
|
|||
bool supports_vtc_decoding;
|
||||
bool supports_hw_deswizzle;
|
||||
bool supports_zero_copy;
|
||||
bool supports_dxt;
|
||||
usz alignment;
|
||||
};
|
||||
|
||||
|
@ -252,7 +253,7 @@ namespace rsx
|
|||
u8 get_format_block_size_in_bytes(rsx::surface_color_format format);
|
||||
u8 get_format_block_size_in_bytes(rsx::surface_depth_format2 format);
|
||||
|
||||
bool is_compressed_host_format(u32 format); // Returns true for host-compressed formats (DXT)
|
||||
bool is_compressed_host_format(const texture_uploader_capabilities& caps, u32 format); // Returns true for host-compressed formats (DXT)
|
||||
u8 get_format_sample_count(rsx::surface_antialiasing antialias);
|
||||
u32 get_max_depth_value(rsx::surface_depth_format2 format);
|
||||
bool is_depth_stencil_format(rsx::surface_depth_format2 format);
|
||||
|
|
|
@ -70,6 +70,7 @@ namespace gl
|
|||
|
||||
GLenum get_sized_internal_format(u32 texture_format)
|
||||
{
|
||||
const bool supports_dxt = get_driver_caps().EXT_texture_compression_s3tc_supported;
|
||||
switch (texture_format)
|
||||
{
|
||||
case CELL_GCM_TEXTURE_B8: return GL_R8;
|
||||
|
@ -92,9 +93,9 @@ namespace gl
|
|||
case CELL_GCM_TEXTURE_D1R5G5B5: return GL_BGR5_A1;
|
||||
case CELL_GCM_TEXTURE_D8R8G8B8: return GL_BGRA8;
|
||||
case CELL_GCM_TEXTURE_Y16_X16_FLOAT: return GL_RG16F;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return supports_dxt ? GL_COMPRESSED_RGBA_S3TC_DXT1_EXT : GL_BGRA8;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return supports_dxt ? GL_COMPRESSED_RGBA_S3TC_DXT3_EXT : GL_BGRA8;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return supports_dxt ? GL_COMPRESSED_RGBA_S3TC_DXT5_EXT : GL_BGRA8;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_HILO8: return GL_RG8;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return GL_RG8_SNORM;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return GL_BGRA8;
|
||||
|
@ -105,6 +106,7 @@ namespace gl
|
|||
|
||||
std::tuple<GLenum, GLenum> get_format_type(u32 texture_format)
|
||||
{
|
||||
const bool supports_dxt = get_driver_caps().EXT_texture_compression_s3tc_supported;
|
||||
switch (texture_format)
|
||||
{
|
||||
case CELL_GCM_TEXTURE_B8: return std::make_tuple(GL_RED, GL_UNSIGNED_BYTE);
|
||||
|
@ -127,9 +129,9 @@ namespace gl
|
|||
case CELL_GCM_TEXTURE_D1R5G5B5: return std::make_tuple(GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV);
|
||||
case CELL_GCM_TEXTURE_D8R8G8B8: return std::make_tuple(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV);
|
||||
case CELL_GCM_TEXTURE_Y16_X16_FLOAT: return std::make_tuple(GL_RG, GL_HALF_FLOAT);
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return std::make_tuple(GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_UNSIGNED_BYTE);
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return std::make_tuple(GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_UNSIGNED_BYTE);
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return std::make_tuple(GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_UNSIGNED_BYTE);
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return std::make_tuple(supports_dxt ? GL_COMPRESSED_RGBA_S3TC_DXT1_EXT : GL_BGRA, GL_UNSIGNED_BYTE);
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return std::make_tuple(supports_dxt ? GL_COMPRESSED_RGBA_S3TC_DXT3_EXT : GL_BGRA, GL_UNSIGNED_BYTE);
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return std::make_tuple(supports_dxt ? GL_COMPRESSED_RGBA_S3TC_DXT5_EXT : GL_BGRA, GL_UNSIGNED_BYTE);
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_HILO8: return std::make_tuple(GL_RG, GL_UNSIGNED_BYTE);
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return std::make_tuple(GL_RG, GL_BYTE);
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return std::make_tuple(GL_BGRA, GL_UNSIGNED_BYTE);
|
||||
|
@ -587,6 +589,7 @@ namespace gl
|
|||
.supports_vtc_decoding = false,
|
||||
.supports_hw_deswizzle = driver_caps.ARB_compute_shader_supported,
|
||||
.supports_zero_copy = false,
|
||||
.supports_dxt = driver_caps.EXT_texture_compression_s3tc_supported,
|
||||
.alignment = 4
|
||||
};
|
||||
|
||||
|
@ -596,7 +599,7 @@ namespace gl
|
|||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, GL_NONE);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
||||
|
||||
if (rsx::is_compressed_host_format(format)) [[likely]]
|
||||
if (rsx::is_compressed_host_format(caps, format)) [[likely]]
|
||||
{
|
||||
caps.supports_vtc_decoding = driver_caps.vendor_NVIDIA;
|
||||
unpack_settings.apply();
|
||||
|
@ -687,13 +690,13 @@ namespace gl
|
|||
if (driver_caps.ARB_compute_shader_supported)
|
||||
{
|
||||
u64 row_pitch = rsx::align2<u64, u64>(layout.width_in_block * block_size_in_bytes, caps.alignment);
|
||||
if (!rsx::is_compressed_host_format(format))
|
||||
{
|
||||
// Handle emulated compressed formats with host unpack (R8G8 compressed)
|
||||
row_pitch = std::max<u64>(row_pitch, dst->pitch());
|
||||
}
|
||||
|
||||
image_linear_size = row_pitch * layout.height_in_block * layout.depth;
|
||||
// We're in the "else" branch, so "is_compressed_host_format()" is always false.
|
||||
// Handle emulated compressed formats with host unpack (R8G8 compressed)
|
||||
row_pitch = std::max<u64>(row_pitch, dst->pitch());
|
||||
|
||||
// FIXME: Double-check this logic; it seems like we should always use texels both here and for row_pitch.
|
||||
image_linear_size = row_pitch * layout.height_in_texel * layout.depth;
|
||||
|
||||
compute_scratch_mem = { nullptr, g_compute_decode_buffer.alloc(static_cast<u32>(image_linear_size), 256) };
|
||||
compute_scratch_mem.first = reinterpret_cast<void*>(static_cast<uintptr_t>(compute_scratch_mem.second));
|
||||
|
@ -815,7 +818,8 @@ namespace gl
|
|||
// Calculate staging buffer size
|
||||
rsx::simple_array<std::byte> data_upload_buf;
|
||||
|
||||
if (rsx::is_compressed_host_format(gcm_format))
|
||||
rsx::texture_uploader_capabilities caps { .supports_dxt = gl::get_driver_caps().EXT_texture_compression_s3tc_supported };
|
||||
if (rsx::is_compressed_host_format(caps, gcm_format))
|
||||
{
|
||||
const auto& desc = subresources_layout[0];
|
||||
const u32 texture_data_sz = desc.width_in_block * desc.height_in_block * desc.depth * rsx::get_format_block_size_in_bytes(gcm_format);
|
||||
|
|
|
@ -33,7 +33,7 @@ namespace gl
|
|||
|
||||
void capabilities::initialize()
|
||||
{
|
||||
int find_count = 18;
|
||||
int find_count = 19;
|
||||
int ext_count = 0;
|
||||
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_count);
|
||||
|
||||
|
@ -178,6 +178,13 @@ namespace gl
|
|||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_EXT_texture_compression_s3tc"))
|
||||
{
|
||||
EXT_texture_compression_s3tc_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Set GLSL version
|
||||
|
|
|
@ -41,6 +41,7 @@ namespace gl
|
|||
bool NV_depth_buffer_float_supported = false;
|
||||
bool NV_fragment_shader_barycentric_supported = false;
|
||||
bool ARB_shader_texture_image_samples = false;
|
||||
bool EXT_texture_compression_s3tc_supported = false;
|
||||
|
||||
bool vendor_INTEL = false; // has broken GLSL compiler
|
||||
bool vendor_AMD = false; // has broken ARB_multidraw
|
||||
|
|
|
@ -80,6 +80,7 @@ namespace vk
|
|||
case vk::driver_vendor::LAVAPIPE:
|
||||
case vk::driver_vendor::V3DV:
|
||||
case vk::driver_vendor::PANVK:
|
||||
case vk::driver_vendor::ARM_MALI:
|
||||
// TODO: Actually bench this. Using 32 for now to match other common configurations.
|
||||
case vk::driver_vendor::DOZEN:
|
||||
// Actual optimal size depends on the D3D device. Use 32 since it should work well on both AMD and NVIDIA
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#include "stdafx.h"
|
||||
#include "VKFormats.h"
|
||||
#include "VKHelpers.h"
|
||||
#include "vkutils/device.h"
|
||||
#include "vkutils/image.h"
|
||||
|
||||
|
@ -193,6 +194,7 @@ namespace vk
|
|||
|
||||
VkFormat get_compatible_sampler_format(const gpu_formats_support& support, u32 format)
|
||||
{
|
||||
const bool supports_dxt = vk::get_current_renderer()->get_texture_compression_bc_support();
|
||||
switch (format)
|
||||
{
|
||||
#ifndef __APPLE__
|
||||
|
@ -213,9 +215,9 @@ namespace vk
|
|||
#endif
|
||||
case CELL_GCM_TEXTURE_B8: return VK_FORMAT_R8_UNORM;
|
||||
case CELL_GCM_TEXTURE_A8R8G8B8: return VK_FORMAT_B8G8R8A8_UNORM;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return VK_FORMAT_BC1_RGBA_UNORM_BLOCK;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return VK_FORMAT_BC2_UNORM_BLOCK;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return VK_FORMAT_BC3_UNORM_BLOCK;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return supports_dxt ? VK_FORMAT_BC1_RGBA_UNORM_BLOCK : VK_FORMAT_B8G8R8A8_UNORM;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return supports_dxt ? VK_FORMAT_BC2_UNORM_BLOCK : VK_FORMAT_B8G8R8A8_UNORM;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return supports_dxt ? VK_FORMAT_BC3_UNORM_BLOCK : VK_FORMAT_B8G8R8A8_UNORM;
|
||||
case CELL_GCM_TEXTURE_G8B8: return VK_FORMAT_R8G8_UNORM;
|
||||
case CELL_GCM_TEXTURE_DEPTH24_D8: return support.d24_unorm_s8? VK_FORMAT_D24_UNORM_S8_UINT : VK_FORMAT_D32_SFLOAT_S8_UINT;
|
||||
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return VK_FORMAT_D32_SFLOAT_S8_UINT;
|
||||
|
|
|
@ -145,6 +145,9 @@ namespace vk
|
|||
case driver_vendor::PANVK:
|
||||
// Needs more testing
|
||||
break;
|
||||
case driver_vendor::ARM_MALI:
|
||||
// Needs more testing
|
||||
break;
|
||||
default:
|
||||
rsx_log.warning("Unsupported device: %s", gpu_name);
|
||||
}
|
||||
|
|
|
@ -188,6 +188,7 @@ namespace vk
|
|||
case driver_vendor::V3DV:
|
||||
case driver_vendor::HONEYKRISP:
|
||||
case driver_vendor::PANVK:
|
||||
case driver_vendor::ARM_MALI:
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -929,7 +929,7 @@ namespace vk
|
|||
return *pcmd;
|
||||
}
|
||||
|
||||
static const std::pair<u32, u32> calculate_upload_pitch(int format, u32 heap_align, vk::image* dst_image, const rsx::subresource_layout& layout)
|
||||
static const std::pair<u32, u32> calculate_upload_pitch(int format, u32 heap_align, vk::image* dst_image, const rsx::subresource_layout& layout, const rsx::texture_uploader_capabilities& caps)
|
||||
{
|
||||
u32 block_in_pixel = rsx::get_format_block_size_in_texel(format);
|
||||
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
|
||||
|
@ -950,7 +950,7 @@ namespace vk
|
|||
|
||||
// We have row_pitch in source coordinates. But some formats have a software decode step which can affect this packing!
|
||||
// For such formats, the packed pitch on src does not match packed pitch on dst
|
||||
if (!rsx::is_compressed_host_format(format))
|
||||
if (!rsx::is_compressed_host_format(caps, format))
|
||||
{
|
||||
const auto host_texel_width = vk::get_format_texel_width(dst_image->format());
|
||||
const auto host_packed_pitch = host_texel_width * layout.width_in_texel;
|
||||
|
@ -977,7 +977,8 @@ namespace vk
|
|||
VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align, rsx::flags32_t image_setup_flags)
|
||||
{
|
||||
const bool requires_depth_processing = (dst_image->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT) || (format == CELL_GCM_TEXTURE_DEPTH16_FLOAT);
|
||||
rsx::texture_uploader_capabilities caps{ .alignment = heap_align };
|
||||
auto pdev = vk::get_current_renderer();
|
||||
rsx::texture_uploader_capabilities caps{ .supports_dxt = pdev->get_texture_compression_bc_support(), .alignment = heap_align };
|
||||
rsx::texture_memory_info opt{};
|
||||
bool check_caps = true;
|
||||
|
||||
|
@ -997,11 +998,11 @@ namespace vk
|
|||
|
||||
for (const rsx::subresource_layout &layout : subresource_layout)
|
||||
{
|
||||
const auto [row_pitch, upload_pitch_in_texel] = calculate_upload_pitch(format, heap_align, dst_image, layout);
|
||||
const auto [row_pitch, upload_pitch_in_texel] = calculate_upload_pitch(format, heap_align, dst_image, layout, caps);
|
||||
caps.alignment = row_pitch;
|
||||
|
||||
// Calculate estimated memory utilization for this subresource
|
||||
image_linear_size = row_pitch * layout.height_in_block * layout.depth;
|
||||
image_linear_size = row_pitch * layout.depth * (rsx::is_compressed_host_format(caps, format) ? layout.height_in_block : layout.height_in_texel);
|
||||
|
||||
// Only do GPU-side conversion if occupancy is good
|
||||
if (check_caps)
|
||||
|
|
|
@ -55,7 +55,8 @@ namespace vk
|
|||
NVK,
|
||||
V3DV,
|
||||
HONEYKRISP,
|
||||
PANVK
|
||||
PANVK,
|
||||
ARM_MALI
|
||||
};
|
||||
|
||||
driver_vendor get_driver_vendor();
|
||||
|
|
|
@ -134,6 +134,10 @@ namespace vk
|
|||
// So far only AMD is known to remap image view and border color together. Mark as not required.
|
||||
custom_border_color_support.require_border_color_remap = get_driver_vendor() != driver_vendor::AMD;
|
||||
}
|
||||
|
||||
// v3dv and PanVK support BC1-BC3 which is all we require, support is reported as false since not all formats are supported
|
||||
optional_features_support.texture_compression_bc = features.textureCompressionBC
|
||||
|| get_driver_vendor() == driver_vendor::V3DV || get_driver_vendor() == driver_vendor::PANVK;
|
||||
}
|
||||
|
||||
void physical_device::get_physical_device_properties(bool allow_extensions)
|
||||
|
@ -303,9 +307,13 @@ namespace vk
|
|||
}
|
||||
|
||||
if (gpu_name.find("Panfrost") != umax)
|
||||
{
|
||||
{ // e.g. "Mali-G610 (Panfrost)"
|
||||
return driver_vendor::PANVK;
|
||||
}
|
||||
else if (gpu_name.find("Mali") != umax)
|
||||
{ // e.g. "Mali-G610", hence "else"
|
||||
return driver_vendor::ARM_MALI;
|
||||
}
|
||||
|
||||
return driver_vendor::unknown;
|
||||
}
|
||||
|
@ -336,6 +344,8 @@ namespace vk
|
|||
return driver_vendor::HONEYKRISP;
|
||||
case VK_DRIVER_ID_MESA_PANVK:
|
||||
return driver_vendor::PANVK;
|
||||
case VK_DRIVER_ID_ARM_PROPRIETARY:
|
||||
return driver_vendor::ARM_MALI;
|
||||
default:
|
||||
// Mobile?
|
||||
return driver_vendor::unknown;
|
||||
|
@ -471,8 +481,7 @@ namespace vk
|
|||
// Enable hardware features manually
|
||||
// Currently we require:
|
||||
// 1. Anisotropic sampling
|
||||
// 2. DXT support
|
||||
// 3. Indexable storage buffers
|
||||
// 2. Indexable storage buffers
|
||||
VkPhysicalDeviceFeatures enabled_features{};
|
||||
if (pgpu->shader_types_support.allow_float16)
|
||||
{
|
||||
|
@ -566,7 +575,7 @@ namespace vk
|
|||
// enabled_features.shaderCullDistance = VK_TRUE; // Alt notation of clip distance
|
||||
|
||||
enabled_features.samplerAnisotropy = VK_TRUE;
|
||||
enabled_features.textureCompressionBC = VK_TRUE;
|
||||
enabled_features.textureCompressionBC = pgpu->optional_features_support.texture_compression_bc;
|
||||
enabled_features.shaderStorageBufferArrayDynamicIndexing = VK_TRUE;
|
||||
|
||||
// Optionally disable unsupported stuff
|
||||
|
@ -659,19 +668,6 @@ namespace vk
|
|||
enabled_features.logicOp = VK_FALSE;
|
||||
}
|
||||
|
||||
if (!pgpu->features.textureCompressionBC && pgpu->get_driver_vendor() == driver_vendor::V3DV)
|
||||
{
|
||||
// v3dv supports BC1-BC3 which is all we require, support is reported as false since not all formats are supported
|
||||
rsx_log.error("Your GPU running on the V3DV driver does not support full texture block compression. Graphics may not render correctly.");
|
||||
enabled_features.textureCompressionBC = VK_FALSE;
|
||||
}
|
||||
|
||||
if (!pgpu->features.textureCompressionBC && pgpu->get_driver_vendor() == driver_vendor::PANVK)
|
||||
{
|
||||
rsx_log.error("Your GPU running on the PANVK driver does not support full texture block compression. Graphics may not render correctly.");
|
||||
enabled_features.textureCompressionBC = VK_FALSE;
|
||||
}
|
||||
|
||||
VkDeviceCreateInfo device = {};
|
||||
device.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
||||
device.pNext = nullptr;
|
||||
|
|
|
@ -92,6 +92,7 @@ namespace vk
|
|||
bool synchronization_2 = false;
|
||||
bool unrestricted_depth_range = false;
|
||||
bool extended_device_fault = false;
|
||||
bool texture_compression_bc = false;
|
||||
} optional_features_support;
|
||||
|
||||
friend class render_device;
|
||||
|
@ -190,6 +191,7 @@ namespace vk
|
|||
bool get_barycoords_support() const { return pgpu->optional_features_support.barycentric_coords; }
|
||||
bool get_synchronization2_support() const { return pgpu->optional_features_support.synchronization_2; }
|
||||
bool get_extended_device_fault_support() const { return pgpu->optional_features_support.extended_device_fault; }
|
||||
bool get_texture_compression_bc_support() const { return pgpu->optional_features_support.texture_compression_bc; }
|
||||
|
||||
u64 get_descriptor_update_after_bind_support() const { return pgpu->descriptor_indexing_support.update_after_bind_mask; }
|
||||
u32 get_descriptor_max_draw_calls() const { return pgpu->descriptor_max_draw_calls; }
|
||||
|
|
Loading…
Add table
Reference in a new issue