shader_recompiler: Stub readlane/writelane only for non-compute

This commit is contained in:
IndecisiveTurtle 2024-07-30 02:10:00 +03:00
parent f81e30e0e1
commit 9ca91a1f15
13 changed files with 129 additions and 64 deletions

View file

@ -15,7 +15,7 @@ static u32 screenWidth = 1280;
static u32 screenHeight = 720;
static s32 gpuId = -1; // Vulkan physical device index. Set to negative for auto select
static std::string logFilter;
static std::string logType = "sync";
static std::string logType = "async";
static bool isDebugDump = false;
static bool isLibc = true;
static bool isShowSplash = false;

View file

@ -208,7 +208,6 @@ public:
} else {
ForEachBackend([&entry](auto& backend) { backend.Write(entry); });
}
std::fflush(stdout);
}
private:

View file

@ -5,6 +5,31 @@
namespace Shader::Gcn {
void Translator::EmitDataShare(const GcnInst& inst) {
switch (inst.opcode) {
case Opcode::DS_SWIZZLE_B32:
return DS_SWIZZLE_B32(inst);
case Opcode::DS_READ_B32:
return DS_READ(32, false, false, inst);
case Opcode::DS_READ_B64:
return DS_READ(64, false, false, inst);
case Opcode::DS_READ2_B32:
return DS_READ(32, false, true, inst);
case Opcode::DS_READ2_B64:
return DS_READ(64, false, true, inst);
case Opcode::DS_WRITE_B32:
return DS_WRITE(32, false, false, inst);
case Opcode::DS_WRITE_B64:
return DS_WRITE(64, false, false, inst);
case Opcode::DS_WRITE2_B32:
return DS_WRITE(32, false, true, inst);
case Opcode::DS_WRITE2_B64:
return DS_WRITE(64, false, true, inst);
default:
LogMissingOpcode(inst);
}
}
void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
const u8 offset0 = inst.control.ds.offset0;
const u8 offset1 = inst.control.ds.offset1;
@ -86,29 +111,14 @@ void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc(inst.src[0]));
}
void Translator::EmitDataShare(const GcnInst& inst) {
switch (inst.opcode) {
case Opcode::DS_SWIZZLE_B32:
return DS_SWIZZLE_B32(inst);
case Opcode::DS_READ_B32:
return DS_READ(32, false, false, inst);
case Opcode::DS_READ_B64:
return DS_READ(64, false, false, inst);
case Opcode::DS_READ2_B32:
return DS_READ(32, false, true, inst);
case Opcode::DS_READ2_B64:
return DS_READ(64, false, true, inst);
case Opcode::DS_WRITE_B32:
return DS_WRITE(32, false, false, inst);
case Opcode::DS_WRITE_B64:
return DS_WRITE(64, false, false, inst);
case Opcode::DS_WRITE2_B32:
return DS_WRITE(32, false, true, inst);
case Opcode::DS_WRITE2_B64:
return DS_WRITE(64, false, true, inst);
default:
info.translation_failed = true;
}
void Translator::V_READLANE_B32(const GcnInst& inst) {
ASSERT(info.stage != Stage::Compute);
SetDst(inst.dst[0], GetSrc(inst.src[0]));
}
void Translator::V_WRITELANE_B32(const GcnInst& inst) {
ASSERT(info.stage != Stage::Compute);
SetDst(inst.dst[0], GetSrc(inst.src[0]));
}
} // namespace Shader::Gcn

View file

@ -83,7 +83,7 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
case Opcode::S_WQM_B64:
break;
default:
info.translation_failed = true;
LogMissingOpcode(inst);
}
}

View file

@ -7,6 +7,29 @@ namespace Shader::Gcn {
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
void Translator::EmitScalarMemory(const GcnInst& inst) {
switch (inst.opcode) {
case Opcode::S_LOAD_DWORDX4:
return S_LOAD_DWORD(4, inst);
case Opcode::S_LOAD_DWORDX8:
return S_LOAD_DWORD(8, inst);
case Opcode::S_LOAD_DWORDX16:
return S_LOAD_DWORD(16, inst);
case Opcode::S_BUFFER_LOAD_DWORD:
return S_BUFFER_LOAD_DWORD(1, inst);
case Opcode::S_BUFFER_LOAD_DWORDX2:
return S_BUFFER_LOAD_DWORD(2, inst);
case Opcode::S_BUFFER_LOAD_DWORDX4:
return S_BUFFER_LOAD_DWORD(4, inst);
case Opcode::S_BUFFER_LOAD_DWORDX8:
return S_BUFFER_LOAD_DWORD(8, inst);
case Opcode::S_BUFFER_LOAD_DWORDX16:
return S_BUFFER_LOAD_DWORD(16, inst);
default:
LogMissingOpcode(inst);
}
}
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
const auto& smrd = inst.control.smrd;
const u32 dword_offset = [&] -> u32 {
@ -49,27 +72,4 @@ void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
}
}
void Translator::EmitScalarMemory(const GcnInst& inst) {
switch (inst.opcode) {
case Opcode::S_LOAD_DWORDX4:
return S_LOAD_DWORD(4, inst);
case Opcode::S_LOAD_DWORDX8:
return S_LOAD_DWORD(8, inst);
case Opcode::S_LOAD_DWORDX16:
return S_LOAD_DWORD(16, inst);
case Opcode::S_BUFFER_LOAD_DWORD:
return S_BUFFER_LOAD_DWORD(1, inst);
case Opcode::S_BUFFER_LOAD_DWORDX2:
return S_BUFFER_LOAD_DWORD(2, inst);
case Opcode::S_BUFFER_LOAD_DWORDX4:
return S_BUFFER_LOAD_DWORD(4, inst);
case Opcode::S_BUFFER_LOAD_DWORDX8:
return S_BUFFER_LOAD_DWORD(8, inst);
case Opcode::S_BUFFER_LOAD_DWORDX16:
return S_BUFFER_LOAD_DWORD(16, inst);
default:
info.translation_failed = true;
}
}
} // namespace Shader::Gcn

View file

@ -479,6 +479,14 @@ void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) {
}
}
void Translator::LogMissingOpcode(const GcnInst& inst) {
const u32 opcode = u32(inst.opcode);
LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({}, category = {})",
magic_enum::enum_name(inst.opcode), u32(inst.opcode),
magic_enum::enum_name(inst.category));
info.translation_failed = true;
}
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Info& info) {
if (inst_list.empty()) {
return;
@ -523,12 +531,6 @@ void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Inf
default:
UNREACHABLE();
}
if (info.translation_failed) {
const u32 opcode = u32(inst.opcode);
LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({})",
magic_enum::enum_name(inst.opcode), u32(inst.opcode));
}
}
}

View file

@ -185,6 +185,8 @@ public:
void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
void V_READFIRSTLANE_B32(const GcnInst& inst);
void V_READLANE_B32(const GcnInst& inst);
void V_WRITELANE_B32(const GcnInst& inst);
void S_BARRIER();
// MIMG
@ -204,9 +206,12 @@ private:
void SetDst(const InstOperand& operand, const IR::U32F32& value);
void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw);
void LogMissingOpcode(const GcnInst& inst);
private:
IR::IREmitter ir;
Info& info;
bool opcode_missing = false;
};
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info);

View file

@ -159,6 +159,10 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_CMP_NE_U64(inst);
case Opcode::V_READFIRSTLANE_B32:
return V_READFIRSTLANE_B32(inst);
case Opcode::V_READLANE_B32:
return V_READLANE_B32(inst);
case Opcode::V_WRITELANE_B32:
return V_WRITELANE_B32(inst);
case Opcode::V_MAD_F32:
return V_MAD_F32(inst);
@ -285,7 +289,7 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
case Opcode::V_CMPX_TRU_U32:
return V_CMP_U32(ConditionOp::TRU, false, true, inst);
default:
info.translation_failed = true;
LogMissingOpcode(inst);
}
}

View file

@ -88,7 +88,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
case Opcode::BUFFER_STORE_DWORDX4:
return BUFFER_STORE_FORMAT(4, false, inst);
default:
info.translation_failed = true;
LogMissingOpcode(inst);
}
}

View file

@ -60,9 +60,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
Shader::Optimization::IdentityRemovalPass(program.blocks);
Shader::Optimization::DeadCodeEliminationPass(program);
Shader::Optimization::CollectShaderInfoPass(program);
fmt::print("Post passes\n\n{}\n", Shader::IR::DumpProgram(program));
std::fflush(stdout);
LOG_INFO(Render_Vulkan, "{}", Shader::IR::DumpProgram(program));
return program;
}

View file

@ -179,6 +179,40 @@ struct Image {
return base_address << 8;
}
u32 DstSelect() const {
return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9);
}
static char SelectComp(u32 sel) {
switch (sel) {
case 0:
return '0';
case 1:
return '1';
case 4:
return 'R';
case 5:
return 'G';
case 6:
return 'B';
case 7:
return 'A';
default:
UNREACHABLE();
}
}
std::string DstSelectName() const {
std::string result = "[";
u32 dst_sel = DstSelect();
for (u32 i = 0; i < 4; i++) {
result += SelectComp(dst_sel & 7);
dst_sel >>= 3;
}
result += ']';
return result;
}
u32 Pitch() const {
return pitch + 1;
}

View file

@ -47,6 +47,13 @@ vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) {
}
}
bool IsIdentityMapping(u32 dst_sel, u32 num_components) {
return (num_components == 1 && dst_sel == 0b100) ||
(num_components == 2 && dst_sel == 0b101100) ||
(num_components == 3 && dst_sel == 0b110101100) ||
(num_components == 4 && dst_sel == 0b111110101100);
}
ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept
: is_storage{is_storage} {
type = ConvertImageViewType(image.GetType());
@ -60,8 +67,13 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexce
mapping.b = ConvertComponentSwizzle(image.dst_sel_z);
mapping.a = ConvertComponentSwizzle(image.dst_sel_w);
// Check for unfortunate case of storage images being swizzled
if (is_storage && (mapping != vk::ComponentMapping{})) {
LOG_ERROR(Render_Vulkan, "Storage image requires swizzling");
const u32 num_comps = AmdGpu::NumComponents(image.GetDataFmt());
if (is_storage && !IsIdentityMapping(image.DstSelect(), num_comps)) {
if (num_comps == 4) {
printf("bad\n");
}
LOG_ERROR(Render_Vulkan, "Storage image (num_comps = {}) requires swizzling {}",
num_comps, image.DstSelectName());
mapping = vk::ComponentMapping{};
}
}

View file

@ -149,7 +149,7 @@ ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) {
image_id = slot_images.insert(instance, scheduler, info);
RegisterImage(image_id);
} else {
image_id = image_ids[0];
image_id = image_ids[image_ids.size() > 1 ? 1 : 0];
}
Image& image = slot_images[image_id];
@ -188,7 +188,8 @@ ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo&
auto& usage = image.info.usage;
if (view_info.is_storage) {
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite);
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderRead |
vk::AccessFlagBits::eShaderWrite);
usage.storage = true;
} else {
const auto new_layout = image.info.IsDepthStencil()