shader_recompiler: More instructions and formats

This commit is contained in:
IndecisiveTurtle 2024-07-30 19:08:27 +03:00
parent c737ba7375
commit d5383c8c34
21 changed files with 167 additions and 50 deletions

View file

@ -559,7 +559,7 @@ int PS4_SYSV_ABI sceNetEpollDestroy() {
}
int PS4_SYSV_ABI sceNetEpollWait() {
LOG_ERROR(Lib_Net, "(STUBBED) called");
LOG_TRACE(Lib_Net, "(STUBBED) called");
return ORBIS_OK;
}

View file

@ -79,7 +79,7 @@ int PS4_SYSV_ABI sceNetCtlUnregisterCallbackV6() {
}
int PS4_SYSV_ABI sceNetCtlCheckCallback() {
LOG_ERROR(Lib_NetCtl, "(STUBBED) called");
LOG_TRACE(Lib_NetCtl, "(STUBBED) called");
return ORBIS_OK;
}

View file

@ -870,7 +870,7 @@ int PS4_SYSV_ABI sceNpAsmTerminate() {
}
int PS4_SYSV_ABI sceNpCheckCallback() {
LOG_ERROR(Lib_NpManager, "(STUBBED) called");
LOG_TRACE(Lib_NpManager, "(STUBBED) called");
return ORBIS_OK;
}
@ -3510,4 +3510,4 @@ void RegisterlibSceNpManager(Core::Loader::SymbolsResolver* sym) {
sceNpUnregisterStateCallbackForToolkit);
};
} // namespace Libraries::NpManager
} // namespace Libraries::NpManager

View file

@ -106,8 +106,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod
const auto type = ctx.info.images[handle & 0xFFFF].type;
const Id zero = ctx.u32_zero_value;
const auto mips{[&] { return skip_mips ? zero : ctx.OpImageQueryLevels(ctx.U32[1], image); }};
const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa &&
type != AmdGpu::ImageType::Buffer};
const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa};
const auto query{[&](Id type) {
return uses_lod ? ctx.OpImageQuerySizeLod(type, image, lod)
: ctx.OpImageQuerySize(type, image);

View file

@ -49,7 +49,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin
DefineInterfaces(program);
DefineBuffers(info);
DefineImagesAndSamplers(info);
DefineSharedMemory(info);
DefineSharedMemory();
}
EmitContext::~EmitContext() = default;
@ -399,7 +399,12 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
return spv::ImageFormat::R11fG11fB10f;
}
UNREACHABLE();
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32_32_32 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
return spv::ImageFormat::Rgba32f;
}
UNREACHABLE_MSG("Unknown storage format data_format={}, num_format={}",
image.GetDataFmt(), image.GetNumberFmt());
}
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
@ -419,8 +424,6 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format);
case AmdGpu::ImageType::Cube:
return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, false, false, sampled, format);
case AmdGpu::ImageType::Buffer:
throw NotImplementedException("Image buffer");
default:
break;
}
@ -478,10 +481,14 @@ void EmitContext::DefineImagesAndSamplers(const Info& info) {
}
}
void EmitContext::DefineSharedMemory(const Info& info) {
if (info.shared_memory_size == 0) {
void EmitContext::DefineSharedMemory() {
static constexpr size_t DefaultSharedMemSize = 16_KB;
if (!info.uses_shared) {
return;
}
if (info.shared_memory_size == 0) {
info.shared_memory_size = DefaultSharedMemSize;
}
const auto make{[&](Id element_type, u32 element_size) {
const u32 num_elements{Common::DivCeil(info.shared_memory_size, element_size)};
const Id array_type{TypeArray(element_type, ConstU32(num_elements))};

View file

@ -232,7 +232,7 @@ private:
void DefineOutputs(const Info& info);
void DefineBuffers(const Info& info);
void DefineImagesAndSamplers(const Info& info);
void DefineSharedMemory(const Info& info);
void DefineSharedMemory();
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
};

View file

@ -2779,11 +2779,11 @@ constexpr std::array<InstFormat, 256> InstructionFormatDS = {{
// 60 = DS_READ_U16
{InstClass::DsIdxRd, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32},
// 61 = DS_CONSUME
{InstClass::DsAppendCon, InstCategory::DataShare, 3, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::DsAppendCon, InstCategory::DataShare, 3, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 62 = DS_APPEND
{InstClass::DsAppendCon, InstCategory::DataShare, 3, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::DsAppendCon, InstCategory::DataShare, 3, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 63 = DS_ORDERED_COUNT
{InstClass::GdsOrdCnt, InstCategory::DataShare, 3, 1, ScalarType::Undefined,
ScalarType::Undefined},

View file

@ -37,6 +37,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
return S_CMP(ConditionOp::EQ, false, inst);
case Opcode::S_CMP_GE_U32:
return S_CMP(ConditionOp::GE, false, inst);
case Opcode::S_CMP_GT_U32:
return S_CMP(ConditionOp::GT, false, inst);
case Opcode::S_OR_B64:
return S_OR_B64(NegateMode::None, false, inst);
case Opcode::S_NOR_B64:

View file

@ -135,6 +135,7 @@ public:
void V_SQRT_F32(const GcnInst& inst);
void V_MIN_F32(const GcnInst& inst, bool is_legacy = false);
void V_MIN3_F32(const GcnInst& inst);
void V_MIN3_I32(const GcnInst& inst);
void V_MADMK_F32(const GcnInst& inst);
void V_CUBEMA_F32(const GcnInst& inst);
void V_CUBESC_F32(const GcnInst& inst);

View file

@ -205,6 +205,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_MIN_F32(inst, false);
case Opcode::V_MIN3_F32:
return V_MIN3_F32(inst);
case Opcode::V_MIN3_I32:
return V_MIN3_I32(inst);
case Opcode::V_MIN_LEGACY_F32:
return V_MIN_F32(inst, true);
case Opcode::V_MADMK_F32:
@ -580,6 +582,13 @@ void Translator::V_MIN3_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2)));
}
void Translator::V_MIN3_I32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 src2{GetSrc(inst.src[2])};
SetDst(inst.dst[0], ir.SMin(src0, ir.SMin(src1, src2)));
}
void Translator::V_MADMK_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};

View file

@ -1,8 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma clang optimize off
#include <algorithm>
#include <deque>
#include <boost/container/small_vector.hpp>
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/breadth_first_search.h"
@ -435,8 +434,8 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
}
} else {
const u32 stride = buffer.GetStride();
//ASSERT_MSG(stride >= 4, "non-formatting load_buffer_* is not implemented for stride {}",
// stride);
ASSERT_MSG(stride >= 4, "non-formatting load_buffer_* is not implemented for stride {}",
stride);
}
IR::U32 address = ir.Imm32(inst_info.inst_offset.Value());
@ -484,7 +483,11 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
const auto tsharp = TrackSharp(tsharp_handle);
const auto image = info.ReadUd<AmdGpu::Image>(tsharp.sgpr_base, tsharp.dword_offset);
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
ASSERT(image.GetType() != AmdGpu::ImageType::Buffer);
if (image.GetType() == AmdGpu::ImageType::Invalid) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.ReplaceUsesWith(ir.CompositeConstruct(ir.Imm32(0.f), ir.Imm32(0.f), ir.Imm32(0.f), ir.Imm32(0.f)));
return;
}
u32 image_binding = descriptors.Add(ImageResource{
.sgpr_base = tsharp.sgpr_base,
.dword_offset = tsharp.dword_offset,
@ -495,30 +498,31 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
});
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
if (has_sampler) {
u32 sampler_binding{};
const u32 sampler_binding = [&] {
if (!has_sampler) {
return 0U;
}
const IR::Value& handle = producer->Arg(1);
// Inline sampler resource.
if (handle.IsImmediate()) {
sampler_binding = descriptors.Add(SamplerResource{
return descriptors.Add(SamplerResource{
.sgpr_base = std::numeric_limits<u32>::max(),
.dword_offset = 0,
.inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()},
});
} else {
// Normal sampler resource.
const auto ssharp_handle = handle.InstRecursive();
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
const auto ssharp = TrackSharp(ssharp_ud);
sampler_binding = descriptors.Add(SamplerResource{
.sgpr_base = ssharp.sgpr_base,
.dword_offset = ssharp.dword_offset,
.associated_image = image_binding,
.disable_aniso = disable_aniso,
});
}
image_binding |= (sampler_binding << 16);
}
// Normal sampler resource.
const auto ssharp_handle = handle.InstRecursive();
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
const auto ssharp = TrackSharp(ssharp_ud);
return descriptors.Add(SamplerResource{
.sgpr_base = ssharp.sgpr_base,
.dword_offset = ssharp.dword_offset,
.associated_image = image_binding,
.disable_aniso = disable_aniso,
});
}();
image_binding |= (sampler_binding << 16);
// Patch image handle
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};

View file

@ -20,11 +20,19 @@ void Visit(Info& info, IR::Inst& inst) {
case IR::Opcode::LoadSharedU8:
case IR::Opcode::WriteSharedU8:
info.uses_shared_u8 = true;
info.uses_shared = true;
break;
case IR::Opcode::LoadSharedS16:
case IR::Opcode::LoadSharedU16:
case IR::Opcode::WriteSharedU16:
info.uses_shared_u16 = true;
info.uses_shared = true;
break;
case IR::Opcode::LoadSharedU32:
case IR::Opcode::LoadSharedU64:
case IR::Opcode::WriteSharedU32:
case IR::Opcode::WriteSharedU64:
info.uses_shared = true;
break;
case IR::Opcode::ConvertF32F16:
case IR::Opcode::BitCastF16U16:

View file

@ -178,6 +178,7 @@ struct Info {
bool has_image_gather{};
bool has_image_query{};
bool uses_group_quad{};
bool uses_shared{};
bool uses_shared_u8{};
bool uses_shared_u16{};
bool uses_fp16{};

View file

@ -7,6 +7,77 @@
namespace AmdGpu {
std::string_view NameOf(DataFormat fmt) {
switch (fmt) {
case DataFormat::FormatInvalid:
return "FormatInvalid";
case DataFormat::Format8:
return "Format8";
case DataFormat::Format16:
return "Format16";
case DataFormat::Format8_8:
return "Format8_8";
case DataFormat::Format32:
return "Format32";
case DataFormat::Format16_16:
return "Format16_16";
case DataFormat::Format10_11_11:
return "Format10_11_11";
case DataFormat::Format11_11_10:
return "Format11_11_10";
case DataFormat::Format10_10_10_2:
return "Format10_10_10_2";
case DataFormat::Format2_10_10_10:
return "Format2_10_10_10";
case DataFormat::Format8_8_8_8:
return "Format8_8_8_8";
case DataFormat::Format32_32:
return "Format32_32";
case DataFormat::Format16_16_16_16:
return "Format16_16_16_16";
case DataFormat::Format32_32_32:
return "Format32_32_32";
case DataFormat::Format32_32_32_32:
return "Format32_32_32_32";
case DataFormat::Format5_6_5:
return "Format5_6_5";
case DataFormat::Format1_5_5_5:
return "Format1_5_5_5";
case DataFormat::Format5_5_5_1:
return "Format5_5_5_1";
case DataFormat::Format4_4_4_4:
return "Format4_4_4_4";
case DataFormat::Format8_24:
return "Format8_24";
case DataFormat::Format24_8:
return "Format24_8";
case DataFormat::FormatX24_8_32:
return "FormatX24_8_32";
case DataFormat::FormatGB_GR:
return "FormatGB_GR";
case DataFormat::FormatBG_RG:
return "FormatBG_RG";
case DataFormat::Format5_9_9_9:
return "Format5_9_9_9";
case DataFormat::FormatBc1:
return "FormatBc1";
case DataFormat::FormatBc2:
return "FormatBc2";
case DataFormat::FormatBc3:
return "FormatBc3";
case DataFormat::FormatBc4:
return "FormatBc4";
case DataFormat::FormatBc5:
return "FormatBc5";
case DataFormat::FormatBc6:
return "FormatBc6";
case DataFormat::FormatBc7:
return "FormatBc7";
default:
UNREACHABLE();
}
}
std::string_view NameOf(NumberFormat fmt) {
switch (fmt) {
case NumberFormat::Unorm:

View file

@ -61,6 +61,7 @@ enum class NumberFormat : u32 {
Ubscaled = 13,
};
[[nodiscard]] std::string_view NameOf(DataFormat fmt);
[[nodiscard]] std::string_view NameOf(NumberFormat fmt);
int NumComponents(DataFormat format);
@ -70,6 +71,16 @@ s32 ComponentOffset(DataFormat format, u32 comp);
} // namespace AmdGpu
template <>
struct fmt::formatter<AmdGpu::DataFormat> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(AmdGpu::DataFormat fmt, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(fmt));
}
};
template <>
struct fmt::formatter<AmdGpu::NumberFormat> {
constexpr auto parse(format_parse_context& ctx) {

View file

@ -75,7 +75,7 @@ struct Buffer {
static_assert(sizeof(Buffer) == 16); // 128bits
enum class ImageType : u64 {
Buffer = 0,
Invalid = 0,
Color1D = 8,
Color2D = 9,
Color3D = 10,
@ -88,8 +88,8 @@ enum class ImageType : u64 {
constexpr std::string_view NameOf(ImageType type) {
switch (type) {
case ImageType::Buffer:
return "Buffer";
case ImageType::Invalid:
return "Invalid";
case ImageType::Color1D:
return "Color1D";
case ImageType::Color2D:

View file

@ -337,6 +337,8 @@ std::span<const vk::Format> GetAllFormats() {
vk::Format::eR32Sfloat,
vk::Format::eR32Sint,
vk::Format::eR32Uint,
vk::Format::eBc6HUfloatBlock,
vk::Format::eR16G16Unorm,
};
return formats;
}
@ -527,6 +529,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
num_format == AmdGpu::NumberFormat::Snorm) {
return vk::Format::eR8G8B8A8Snorm;
}
if (data_format == AmdGpu::DataFormat::FormatBc6 &&
num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc6HUfloatBlock;
}
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
}

View file

@ -302,14 +302,6 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
block_pool.ReleaseContents();
inst_pool.ReleaseContents();
if (compute_key == 0xa71733ca || compute_key == 0xa55ad01d) {
return nullptr;
}
if (compute_key == 4248155022) {
printf("test\n");
}
// Recompile shader to IR.
try {
LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key);

View file

@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
liverpool{liverpool_}, memory{Core::Memory::Instance()},
pipeline_cache{instance, scheduler, liverpool},
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 2_GB, BufferType::Upload} {
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 4_GB, BufferType::Upload} {
if (!Config::nullGpu()) {
liverpool->BindRasterizer(this);
}

View file

@ -35,6 +35,8 @@ struct ImageViewInfo {
struct Image;
constexpr Common::SlotId NULL_IMAGE_VIEW_ID{0};
struct ImageView {
explicit ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, Image& image,
ImageId image_id, std::optional<vk::ImageUsageFlags> usage_override = {});

View file

@ -183,6 +183,10 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo
}
ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) {
if (info.guest_address == 0) [[unlikely]] {
return slot_image_views[NULL_IMAGE_VIEW_ID];
}
const ImageId image_id = FindImage(info);
Image& image = slot_images[image_id];
auto& usage = image.info.usage;