Add DXT1-5 decompression on ARM
Some checks are pending
Build RPCS3 / Linux_Build (/rpcs3/.ci/build-linux.sh, clang, rpcs3/rpcs3-ci-jammy:1.0, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / Linux_Build (/rpcs3/.ci/build-linux.sh, gcc, rpcs3/rpcs3-ci-jammy:1.0, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / Linux_Build (/rpcs3/.ci/build-linux-aarch64.sh, clang, rpcs3/rpcs3-ci-jammy-aarch64:1.0, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / Windows_Build (push) Waiting to run

This commit is contained in:
Ivan Podogov 2025-02-10 18:32:01 +00:00 committed by kd-11
parent 1e01511ca0
commit e72cb6801a
14 changed files with 312 additions and 44 deletions

170
3rdparty/bcdec/bcdec.hpp vendored Normal file
View file

@ -0,0 +1,170 @@
// Based on https://github.com/iOrange/bcdec/blob/963c5e56b7a335e066cff7d16a3de75f4e8ad366/bcdec.h
// provides functions to decompress blocks of BC compressed images
//
// ------------------------------------------------------------------------------
//
// MIT LICENSE
// ===========
// Copyright (c) 2022 Sergii Kudlai
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of
// this software and associated documentation files (the "Software"), to deal in
// the Software without restriction, including without limitation the rights to
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
// of the Software, and to permit persons to whom the Software is furnished to do
// so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// ------------------------------------------------------------------------------
#pragma once
#include <util/types.hpp>
static void bcdec__color_block(const u8* compressedBlock, u8* dstColors, int destinationPitch, bool onlyOpaqueMode) {
u16 c0, c1;
u32 refColors[4]; /* 0xAABBGGRR */
u32 colorIndices;
u32 r0, g0, b0, r1, g1, b1, r, g, b;
c0 = *reinterpret_cast<const u16*>(compressedBlock);
c1 = *(reinterpret_cast<const u16*>(compressedBlock) + 1);
/* Unpack 565 ref colors */
r0 = (c0 >> 11) & 0x1F;
g0 = (c0 >> 5) & 0x3F;
b0 = c0 & 0x1F;
r1 = (c1 >> 11) & 0x1F;
g1 = (c1 >> 5) & 0x3F;
b1 = c1 & 0x1F;
/* Expand 565 ref colors to 888 */
r = (r0 * 527 + 23) >> 6;
g = (g0 * 259 + 33) >> 6;
b = (b0 * 527 + 23) >> 6;
refColors[0] = 0xFF000000 | (r << 16) | (g << 8) | b;
r = (r1 * 527 + 23) >> 6;
g = (g1 * 259 + 33) >> 6;
b = (b1 * 527 + 23) >> 6;
refColors[1] = 0xFF000000 | (r << 16) | (g << 8) | b;
if (c0 > c1 || onlyOpaqueMode)
{ /* Standard BC1 mode (also BC3 color block uses ONLY this mode) */
/* color_2 = 2/3*color_0 + 1/3*color_1
color_3 = 1/3*color_0 + 2/3*color_1 */
r = ((2 * r0 + r1) * 351 + 61) >> 7;
g = ((2 * g0 + g1) * 2763 + 1039) >> 11;
b = ((2 * b0 + b1) * 351 + 61) >> 7;
refColors[2] = 0xFF000000 | (r << 16) | (g << 8) | b;
r = ((r0 + r1 * 2) * 351 + 61) >> 7;
g = ((g0 + g1 * 2) * 2763 + 1039) >> 11;
b = ((b0 + b1 * 2) * 351 + 61) >> 7;
refColors[3] = 0xFF000000 | (r << 16) | (g << 8) | b;
}
else
{ /* Quite rare BC1A mode */
/* color_2 = 1/2*color_0 + 1/2*color_1;
color_3 = 0; */
r = ((r0 + r1) * 1053 + 125) >> 8;
g = ((g0 + g1) * 4145 + 1019) >> 11;
b = ((b0 + b1) * 1053 + 125) >> 8;
refColors[2] = 0xFF000000 | (r << 16) | (g << 8) | b;
refColors[3] = 0x00000000;
}
colorIndices = *reinterpret_cast<const u32*>(compressedBlock + 4);
/* Fill out the decompressed color block */
for (int i = 0; i < 4; ++i)
{
for (int j = 0; j < 4; ++j)
{
int idx = colorIndices & 0x03;
*reinterpret_cast<u32*>(dstColors + j * 4) = refColors[idx];
colorIndices >>= 2;
}
dstColors += destinationPitch;
}
}
static void bcdec__sharp_alpha_block(const u16* alpha, u8* decompressed, int destinationPitch) {
for (int i = 0; i < 4; ++i)
{
for (int j = 0; j < 4; ++j)
{
decompressed[j * 4] = ((alpha[i] >> (4 * j)) & 0x0F) * 17;
}
decompressed += destinationPitch;
}
}
static void bcdec__smooth_alpha_block(const u8* compressedBlock, u8* decompressed, int destinationPitch) {
u8 alpha[8];
u64 block = *reinterpret_cast<const u64*>(compressedBlock);
u64 indices;
alpha[0] = block & 0xFF;
alpha[1] = (block >> 8) & 0xFF;
if (alpha[0] > alpha[1])
{
/* 6 interpolated alpha values. */
alpha[2] = (6 * alpha[0] + alpha[1]) / 7; /* 6/7*alpha_0 + 1/7*alpha_1 */
alpha[3] = (5 * alpha[0] + 2 * alpha[1]) / 7; /* 5/7*alpha_0 + 2/7*alpha_1 */
alpha[4] = (4 * alpha[0] + 3 * alpha[1]) / 7; /* 4/7*alpha_0 + 3/7*alpha_1 */
alpha[5] = (3 * alpha[0] + 4 * alpha[1]) / 7; /* 3/7*alpha_0 + 4/7*alpha_1 */
alpha[6] = (2 * alpha[0] + 5 * alpha[1]) / 7; /* 2/7*alpha_0 + 5/7*alpha_1 */
alpha[7] = ( alpha[0] + 6 * alpha[1]) / 7; /* 1/7*alpha_0 + 6/7*alpha_1 */
}
else
{
/* 4 interpolated alpha values. */
alpha[2] = (4 * alpha[0] + alpha[1]) / 5; /* 4/5*alpha_0 + 1/5*alpha_1 */
alpha[3] = (3 * alpha[0] + 2 * alpha[1]) / 5; /* 3/5*alpha_0 + 2/5*alpha_1 */
alpha[4] = (2 * alpha[0] + 3 * alpha[1]) / 5; /* 2/5*alpha_0 + 3/5*alpha_1 */
alpha[5] = ( alpha[0] + 4 * alpha[1]) / 5; /* 1/5*alpha_0 + 4/5*alpha_1 */
alpha[6] = 0x00;
alpha[7] = 0xFF;
}
indices = block >> 16;
for (int i = 0; i < 4; ++i)
{
for (int j = 0; j < 4; ++j)
{
decompressed[j * 4] = alpha[indices & 0x07];
indices >>= 3;
}
decompressed += destinationPitch;
}
}
static inline void bcdec_bc1(const u8* compressedBlock, u8* decompressedBlock, int destinationPitch) {
bcdec__color_block(compressedBlock, decompressedBlock, destinationPitch, false);
}
static inline void bcdec_bc2(const u8* compressedBlock, u8* decompressedBlock, int destinationPitch) {
bcdec__color_block(compressedBlock + 8, decompressedBlock, destinationPitch, true);
bcdec__sharp_alpha_block(reinterpret_cast<const u16*>(compressedBlock), decompressedBlock + 3, destinationPitch);
}
static inline void bcdec_bc3(const u8* compressedBlock, u8* decompressedBlock, int destinationPitch) {
bcdec__color_block(compressedBlock + 8, decompressedBlock, destinationPitch, true);
bcdec__smooth_alpha_block(compressedBlock, decompressedBlock + 3, destinationPitch);
}

View file

@ -3,6 +3,7 @@
#include "TextureUtils.h"
#include "../RSXThread.h"
#include "../rsx_utils.h"
#include "3rdparty/bcdec/bcdec.hpp"
#include "util/asm.hpp"
@ -497,6 +498,63 @@ struct copy_rgb655_block_swizzled
}
};
struct copy_decoded_bc1_block
{
static void copy_mipmap_level(std::span<u32> dst, std::span<const u64> src, u16 width_in_block, u16 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block)
{
u32 src_offset = 0, dst_offset = 0, destinationPitch = dst_pitch_in_block * 4;
for (u32 row = 0; row < row_count * depth; row++)
{
for (u32 col = 0; col < width_in_block; col++) {
const u8* compressedBlock = reinterpret_cast<const u8*>(&src[src_offset + col]);
u8* decompressedBlock = reinterpret_cast<u8*>(&dst[dst_offset + col * 4]);
bcdec_bc1(compressedBlock, decompressedBlock, destinationPitch);
}
src_offset += src_pitch_in_block;
dst_offset += destinationPitch;
}
}
};
struct copy_decoded_bc2_block
{
static void copy_mipmap_level(std::span<u32> dst, std::span<const u128> src, u16 width_in_block, u16 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block)
{
u32 src_offset = 0, dst_offset = 0, destinationPitch = dst_pitch_in_block * 4;
for (u32 row = 0; row < row_count * depth; row++)
{
for (u32 col = 0; col < width_in_block; col++) {
const u8* compressedBlock = reinterpret_cast<const u8*>(&src[src_offset + col]);
u8* decompressedBlock = reinterpret_cast<u8*>(&dst[dst_offset + col * 4]);
bcdec_bc2(compressedBlock, decompressedBlock, destinationPitch);
}
src_offset += src_pitch_in_block;
dst_offset += destinationPitch;
}
}
};
struct copy_decoded_bc3_block
{
static void copy_mipmap_level(std::span<u32> dst, std::span<const u128> src, u16 width_in_block, u16 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block)
{
u32 src_offset = 0, dst_offset = 0, destinationPitch = dst_pitch_in_block * 4;
for (u32 row = 0; row < row_count * depth; row++)
{
for (u32 col = 0; col < width_in_block; col++) {
const u8* compressedBlock = reinterpret_cast<const u8*>(&src[src_offset + col]);
u8* decompressedBlock = reinterpret_cast<u8*>(&dst[dst_offset + col * 4]);
bcdec_bc3(compressedBlock, decompressedBlock, destinationPitch);
}
src_offset += src_pitch_in_block;
dst_offset += destinationPitch;
}
}
};
namespace
{
/**
@ -952,6 +1010,12 @@ namespace rsx
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
{
if (!caps.supports_dxt)
{
copy_decoded_bc1_block::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const u64>(), w, h, depth, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block);
break;
}
const bool is_3d = depth > 1;
const bool is_po2 = utils::is_power_of_2(src_layout.width_in_texel) && utils::is_power_of_2(src_layout.height_in_texel);
@ -981,8 +1045,22 @@ namespace rsx
}
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
{
if (!caps.supports_dxt)
{
copy_decoded_bc2_block::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const u128>(), w, h, depth, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block);
break;
}
[[fallthrough]];
}
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
{
if (!caps.supports_dxt)
{
copy_decoded_bc3_block::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const u128>(), w, h, depth, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block);
break;
}
const bool is_3d = depth > 1;
const bool is_po2 = utils::is_power_of_2(src_layout.width_in_texel) && utils::is_power_of_2(src_layout.height_in_texel);
@ -1094,7 +1172,7 @@ namespace rsx
return result;
}
bool is_compressed_host_format(u32 texture_format)
bool is_compressed_host_format(const texture_uploader_capabilities& caps, u32 texture_format)
{
switch (texture_format)
{
@ -1129,7 +1207,7 @@ namespace rsx
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
return true;
return caps.supports_dxt;
}
fmt::throw_exception("Unknown format 0x%x", texture_format);
}

View file

@ -227,6 +227,7 @@ namespace rsx
bool supports_vtc_decoding;
bool supports_hw_deswizzle;
bool supports_zero_copy;
bool supports_dxt;
usz alignment;
};
@ -252,7 +253,7 @@ namespace rsx
u8 get_format_block_size_in_bytes(rsx::surface_color_format format);
u8 get_format_block_size_in_bytes(rsx::surface_depth_format2 format);
bool is_compressed_host_format(u32 format); // Returns true for host-compressed formats (DXT)
bool is_compressed_host_format(const texture_uploader_capabilities& caps, u32 format); // Returns true for host-compressed formats (DXT)
u8 get_format_sample_count(rsx::surface_antialiasing antialias);
u32 get_max_depth_value(rsx::surface_depth_format2 format);
bool is_depth_stencil_format(rsx::surface_depth_format2 format);

View file

@ -70,6 +70,7 @@ namespace gl
GLenum get_sized_internal_format(u32 texture_format)
{
const bool supports_dxt = get_driver_caps().EXT_texture_compression_s3tc_supported;
switch (texture_format)
{
case CELL_GCM_TEXTURE_B8: return GL_R8;
@ -92,9 +93,9 @@ namespace gl
case CELL_GCM_TEXTURE_D1R5G5B5: return GL_BGR5_A1;
case CELL_GCM_TEXTURE_D8R8G8B8: return GL_BGRA8;
case CELL_GCM_TEXTURE_Y16_X16_FLOAT: return GL_RG16F;
case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return supports_dxt ? GL_COMPRESSED_RGBA_S3TC_DXT1_EXT : GL_BGRA8;
case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return supports_dxt ? GL_COMPRESSED_RGBA_S3TC_DXT3_EXT : GL_BGRA8;
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return supports_dxt ? GL_COMPRESSED_RGBA_S3TC_DXT5_EXT : GL_BGRA8;
case CELL_GCM_TEXTURE_COMPRESSED_HILO8: return GL_RG8;
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return GL_RG8_SNORM;
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return GL_BGRA8;
@ -105,6 +106,7 @@ namespace gl
std::tuple<GLenum, GLenum> get_format_type(u32 texture_format)
{
const bool supports_dxt = get_driver_caps().EXT_texture_compression_s3tc_supported;
switch (texture_format)
{
case CELL_GCM_TEXTURE_B8: return std::make_tuple(GL_RED, GL_UNSIGNED_BYTE);
@ -127,9 +129,9 @@ namespace gl
case CELL_GCM_TEXTURE_D1R5G5B5: return std::make_tuple(GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV);
case CELL_GCM_TEXTURE_D8R8G8B8: return std::make_tuple(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV);
case CELL_GCM_TEXTURE_Y16_X16_FLOAT: return std::make_tuple(GL_RG, GL_HALF_FLOAT);
case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return std::make_tuple(GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_UNSIGNED_BYTE);
case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return std::make_tuple(GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_UNSIGNED_BYTE);
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return std::make_tuple(GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_UNSIGNED_BYTE);
case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return std::make_tuple(supports_dxt ? GL_COMPRESSED_RGBA_S3TC_DXT1_EXT : GL_BGRA, GL_UNSIGNED_BYTE);
case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return std::make_tuple(supports_dxt ? GL_COMPRESSED_RGBA_S3TC_DXT3_EXT : GL_BGRA, GL_UNSIGNED_BYTE);
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return std::make_tuple(supports_dxt ? GL_COMPRESSED_RGBA_S3TC_DXT5_EXT : GL_BGRA, GL_UNSIGNED_BYTE);
case CELL_GCM_TEXTURE_COMPRESSED_HILO8: return std::make_tuple(GL_RG, GL_UNSIGNED_BYTE);
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return std::make_tuple(GL_RG, GL_BYTE);
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return std::make_tuple(GL_BGRA, GL_UNSIGNED_BYTE);
@ -587,6 +589,7 @@ namespace gl
.supports_vtc_decoding = false,
.supports_hw_deswizzle = driver_caps.ARB_compute_shader_supported,
.supports_zero_copy = false,
.supports_dxt = driver_caps.EXT_texture_compression_s3tc_supported,
.alignment = 4
};
@ -596,7 +599,7 @@ namespace gl
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, GL_NONE);
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
if (rsx::is_compressed_host_format(format)) [[likely]]
if (rsx::is_compressed_host_format(caps, format)) [[likely]]
{
caps.supports_vtc_decoding = driver_caps.vendor_NVIDIA;
unpack_settings.apply();
@ -687,13 +690,13 @@ namespace gl
if (driver_caps.ARB_compute_shader_supported)
{
u64 row_pitch = rsx::align2<u64, u64>(layout.width_in_block * block_size_in_bytes, caps.alignment);
if (!rsx::is_compressed_host_format(format))
{
// Handle emulated compressed formats with host unpack (R8G8 compressed)
row_pitch = std::max<u64>(row_pitch, dst->pitch());
}
image_linear_size = row_pitch * layout.height_in_block * layout.depth;
// We're in the "else" branch, so "is_compressed_host_format()" is always false.
// Handle emulated compressed formats with host unpack (R8G8 compressed)
row_pitch = std::max<u64>(row_pitch, dst->pitch());
// FIXME: Double-check this logic; it seems like we should always use texels both here and for row_pitch.
image_linear_size = row_pitch * layout.height_in_texel * layout.depth;
compute_scratch_mem = { nullptr, g_compute_decode_buffer.alloc(static_cast<u32>(image_linear_size), 256) };
compute_scratch_mem.first = reinterpret_cast<void*>(static_cast<uintptr_t>(compute_scratch_mem.second));
@ -815,7 +818,8 @@ namespace gl
// Calculate staging buffer size
rsx::simple_array<std::byte> data_upload_buf;
if (rsx::is_compressed_host_format(gcm_format))
rsx::texture_uploader_capabilities caps { .supports_dxt = gl::get_driver_caps().EXT_texture_compression_s3tc_supported };
if (rsx::is_compressed_host_format(caps, gcm_format))
{
const auto& desc = subresources_layout[0];
const u32 texture_data_sz = desc.width_in_block * desc.height_in_block * desc.depth * rsx::get_format_block_size_in_bytes(gcm_format);

View file

@ -33,7 +33,7 @@ namespace gl
void capabilities::initialize()
{
int find_count = 18;
int find_count = 19;
int ext_count = 0;
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_count);
@ -178,6 +178,13 @@ namespace gl
find_count--;
continue;
}
if (check(ext_name, "GL_EXT_texture_compression_s3tc"))
{
EXT_texture_compression_s3tc_supported = true;
find_count--;
continue;
}
}
// Set GLSL version

View file

@ -41,6 +41,7 @@ namespace gl
bool NV_depth_buffer_float_supported = false;
bool NV_fragment_shader_barycentric_supported = false;
bool ARB_shader_texture_image_samples = false;
bool EXT_texture_compression_s3tc_supported = false;
bool vendor_INTEL = false; // has broken GLSL compiler
bool vendor_AMD = false; // has broken ARB_multidraw

View file

@ -80,6 +80,7 @@ namespace vk
case vk::driver_vendor::LAVAPIPE:
case vk::driver_vendor::V3DV:
case vk::driver_vendor::PANVK:
case vk::driver_vendor::ARM_MALI:
// TODO: Actually bench this. Using 32 for now to match other common configurations.
case vk::driver_vendor::DOZEN:
// Actual optimal size depends on the D3D device. Use 32 since it should work well on both AMD and NVIDIA

View file

@ -1,5 +1,6 @@
#include "stdafx.h"
#include "VKFormats.h"
#include "VKHelpers.h"
#include "vkutils/device.h"
#include "vkutils/image.h"
@ -193,6 +194,7 @@ namespace vk
VkFormat get_compatible_sampler_format(const gpu_formats_support& support, u32 format)
{
const bool supports_dxt = vk::get_current_renderer()->get_texture_compression_bc_support();
switch (format)
{
#ifndef __APPLE__
@ -213,9 +215,9 @@ namespace vk
#endif
case CELL_GCM_TEXTURE_B8: return VK_FORMAT_R8_UNORM;
case CELL_GCM_TEXTURE_A8R8G8B8: return VK_FORMAT_B8G8R8A8_UNORM;
case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return VK_FORMAT_BC1_RGBA_UNORM_BLOCK;
case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return VK_FORMAT_BC2_UNORM_BLOCK;
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return VK_FORMAT_BC3_UNORM_BLOCK;
case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return supports_dxt ? VK_FORMAT_BC1_RGBA_UNORM_BLOCK : VK_FORMAT_B8G8R8A8_UNORM;
case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return supports_dxt ? VK_FORMAT_BC2_UNORM_BLOCK : VK_FORMAT_B8G8R8A8_UNORM;
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return supports_dxt ? VK_FORMAT_BC3_UNORM_BLOCK : VK_FORMAT_B8G8R8A8_UNORM;
case CELL_GCM_TEXTURE_G8B8: return VK_FORMAT_R8G8_UNORM;
case CELL_GCM_TEXTURE_DEPTH24_D8: return support.d24_unorm_s8? VK_FORMAT_D24_UNORM_S8_UINT : VK_FORMAT_D32_SFLOAT_S8_UINT;
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return VK_FORMAT_D32_SFLOAT_S8_UINT;

View file

@ -145,6 +145,9 @@ namespace vk
case driver_vendor::PANVK:
// Needs more testing
break;
case driver_vendor::ARM_MALI:
// Needs more testing
break;
default:
rsx_log.warning("Unsupported device: %s", gpu_name);
}

View file

@ -188,6 +188,7 @@ namespace vk
case driver_vendor::V3DV:
case driver_vendor::HONEYKRISP:
case driver_vendor::PANVK:
case driver_vendor::ARM_MALI:
break;
}

View file

@ -929,7 +929,7 @@ namespace vk
return *pcmd;
}
static const std::pair<u32, u32> calculate_upload_pitch(int format, u32 heap_align, vk::image* dst_image, const rsx::subresource_layout& layout)
static const std::pair<u32, u32> calculate_upload_pitch(int format, u32 heap_align, vk::image* dst_image, const rsx::subresource_layout& layout, const rsx::texture_uploader_capabilities& caps)
{
u32 block_in_pixel = rsx::get_format_block_size_in_texel(format);
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
@ -950,7 +950,7 @@ namespace vk
// We have row_pitch in source coordinates. But some formats have a software decode step which can affect this packing!
// For such formats, the packed pitch on src does not match packed pitch on dst
if (!rsx::is_compressed_host_format(format))
if (!rsx::is_compressed_host_format(caps, format))
{
const auto host_texel_width = vk::get_format_texel_width(dst_image->format());
const auto host_packed_pitch = host_texel_width * layout.width_in_texel;
@ -977,7 +977,8 @@ namespace vk
VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align, rsx::flags32_t image_setup_flags)
{
const bool requires_depth_processing = (dst_image->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT) || (format == CELL_GCM_TEXTURE_DEPTH16_FLOAT);
rsx::texture_uploader_capabilities caps{ .alignment = heap_align };
auto pdev = vk::get_current_renderer();
rsx::texture_uploader_capabilities caps{ .supports_dxt = pdev->get_texture_compression_bc_support(), .alignment = heap_align };
rsx::texture_memory_info opt{};
bool check_caps = true;
@ -997,11 +998,11 @@ namespace vk
for (const rsx::subresource_layout &layout : subresource_layout)
{
const auto [row_pitch, upload_pitch_in_texel] = calculate_upload_pitch(format, heap_align, dst_image, layout);
const auto [row_pitch, upload_pitch_in_texel] = calculate_upload_pitch(format, heap_align, dst_image, layout, caps);
caps.alignment = row_pitch;
// Calculate estimated memory utilization for this subresource
image_linear_size = row_pitch * layout.height_in_block * layout.depth;
image_linear_size = row_pitch * layout.depth * (rsx::is_compressed_host_format(caps, format) ? layout.height_in_block : layout.height_in_texel);
// Only do GPU-side conversion if occupancy is good
if (check_caps)

View file

@ -55,7 +55,8 @@ namespace vk
NVK,
V3DV,
HONEYKRISP,
PANVK
PANVK,
ARM_MALI
};
driver_vendor get_driver_vendor();

View file

@ -134,6 +134,10 @@ namespace vk
// So far only AMD is known to remap image view and border color together. Mark as not required.
custom_border_color_support.require_border_color_remap = get_driver_vendor() != driver_vendor::AMD;
}
// v3dv and PanVK support BC1-BC3 which is all we require, support is reported as false since not all formats are supported
optional_features_support.texture_compression_bc = features.textureCompressionBC
|| get_driver_vendor() == driver_vendor::V3DV || get_driver_vendor() == driver_vendor::PANVK;
}
void physical_device::get_physical_device_properties(bool allow_extensions)
@ -303,9 +307,13 @@ namespace vk
}
if (gpu_name.find("Panfrost") != umax)
{
{ // e.g. "Mali-G610 (Panfrost)"
return driver_vendor::PANVK;
}
else if (gpu_name.find("Mali") != umax)
{ // e.g. "Mali-G610", hence "else"
return driver_vendor::ARM_MALI;
}
return driver_vendor::unknown;
}
@ -336,6 +344,8 @@ namespace vk
return driver_vendor::HONEYKRISP;
case VK_DRIVER_ID_MESA_PANVK:
return driver_vendor::PANVK;
case VK_DRIVER_ID_ARM_PROPRIETARY:
return driver_vendor::ARM_MALI;
default:
// Mobile?
return driver_vendor::unknown;
@ -471,8 +481,7 @@ namespace vk
// Enable hardware features manually
// Currently we require:
// 1. Anisotropic sampling
// 2. DXT support
// 3. Indexable storage buffers
// 2. Indexable storage buffers
VkPhysicalDeviceFeatures enabled_features{};
if (pgpu->shader_types_support.allow_float16)
{
@ -566,7 +575,7 @@ namespace vk
// enabled_features.shaderCullDistance = VK_TRUE; // Alt notation of clip distance
enabled_features.samplerAnisotropy = VK_TRUE;
enabled_features.textureCompressionBC = VK_TRUE;
enabled_features.textureCompressionBC = pgpu->optional_features_support.texture_compression_bc;
enabled_features.shaderStorageBufferArrayDynamicIndexing = VK_TRUE;
// Optionally disable unsupported stuff
@ -659,19 +668,6 @@ namespace vk
enabled_features.logicOp = VK_FALSE;
}
if (!pgpu->features.textureCompressionBC && pgpu->get_driver_vendor() == driver_vendor::V3DV)
{
// v3dv supports BC1-BC3 which is all we require, support is reported as false since not all formats are supported
rsx_log.error("Your GPU running on the V3DV driver does not support full texture block compression. Graphics may not render correctly.");
enabled_features.textureCompressionBC = VK_FALSE;
}
if (!pgpu->features.textureCompressionBC && pgpu->get_driver_vendor() == driver_vendor::PANVK)
{
rsx_log.error("Your GPU running on the PANVK driver does not support full texture block compression. Graphics may not render correctly.");
enabled_features.textureCompressionBC = VK_FALSE;
}
VkDeviceCreateInfo device = {};
device.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
device.pNext = nullptr;

View file

@ -92,6 +92,7 @@ namespace vk
bool synchronization_2 = false;
bool unrestricted_depth_range = false;
bool extended_device_fault = false;
bool texture_compression_bc = false;
} optional_features_support;
friend class render_device;
@ -190,6 +191,7 @@ namespace vk
bool get_barycoords_support() const { return pgpu->optional_features_support.barycentric_coords; }
bool get_synchronization2_support() const { return pgpu->optional_features_support.synchronization_2; }
bool get_extended_device_fault_support() const { return pgpu->optional_features_support.extended_device_fault; }
bool get_texture_compression_bc_support() const { return pgpu->optional_features_support.texture_compression_bc; }
u64 get_descriptor_update_after_bind_support() const { return pgpu->descriptor_indexing_support.update_after_bind_mask; }
u32 get_descriptor_max_draw_calls() const { return pgpu->descriptor_max_draw_calls; }