shader_recompiler: Typed buffers as integers

This commit is contained in:
IndecisiveTurtle 2024-09-06 15:37:39 +03:00
parent d2ff0a8a2a
commit 122a87dd23
7 changed files with 77 additions and 103 deletions

View file

@ -218,12 +218,8 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
ctx.OpStore(pointer, ctx.OpBitcast(ctx.F32[1], value));
}
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferF32(ctx, inst, handle, address);
}
template <u32 N>
static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) {
static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) {
auto& buffer = ctx.buffers[handle];
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
@ -242,20 +238,20 @@ static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) {
}
}
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferF32xN<1>(ctx, handle, address);
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferU32xN<1>(ctx, handle, address);
}
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferF32xN<2>(ctx, handle, address);
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferU32xN<2>(ctx, handle, address);
}
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferF32xN<3>(ctx, handle, address);
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferU32xN<3>(ctx, handle, address);
}
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferF32xN<4>(ctx, handle, address);
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferU32xN<4>(ctx, handle, address);
}
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
@ -271,7 +267,7 @@ Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addr
}
template <u32 N>
static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
static void EmitStoreBufferU32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
auto& buffer = ctx.buffers[handle];
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
@ -283,29 +279,25 @@ static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id va
const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
const Id ptr =
ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index_i);
ctx.OpStore(ptr, ctx.OpCompositeExtract(ctx.F32[1], value, i));
ctx.OpStore(ptr, ctx.OpCompositeExtract(buffer.data_types->Get(1), value, i));
}
}
}
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferF32xN<1>(ctx, handle, address, value);
}
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferF32xN<2>(ctx, handle, address, value);
}
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferF32xN<3>(ctx, handle, address, value);
}
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferF32xN<4>(ctx, handle, address, value);
}
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferF32xN<1>(ctx, handle, address, value);
EmitStoreBufferU32xN<1>(ctx, handle, address, value);
}
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferU32xN<2>(ctx, handle, address, value);
}
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferU32xN<3>(ctx, handle, address, value);
}
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferU32xN<4>(ctx, handle, address, value);
}
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {

View file

@ -64,24 +64,16 @@ void EmitGetGotoVariable(EmitContext& ctx);
void EmitSetScc(EmitContext& ctx);
Id EmitReadConst(EmitContext& ctx);
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index);
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);

View file

@ -384,11 +384,11 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info);
const IR::VectorReg dst_reg{inst.src[1].code};
if (num_dwords == 1) {
ir.SetVectorReg(dst_reg, IR::F32{value});
ir.SetVectorReg(dst_reg, IR::U32{value});
return;
}
for (u32 i = 0; i < num_dwords; i++) {
ir.SetVectorReg(dst_reg + i, IR::F32{ir.CompositeExtract(value, i)});
ir.SetVectorReg(dst_reg + i, IR::U32{ir.CompositeExtract(value, i)});
}
}
@ -452,21 +452,21 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
const IR::VectorReg src_reg{inst.src[1].code};
switch (num_dwords) {
case 1:
value = ir.GetVectorReg<IR::F32>(src_reg);
value = ir.GetVectorReg(src_reg);
break;
case 2:
value = ir.CompositeConstruct(ir.GetVectorReg<IR::F32>(src_reg),
ir.GetVectorReg<IR::F32>(src_reg + 1));
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg),
ir.GetVectorReg(src_reg + 1));
break;
case 3:
value = ir.CompositeConstruct(ir.GetVectorReg<IR::F32>(src_reg),
ir.GetVectorReg<IR::F32>(src_reg + 1),
ir.GetVectorReg<IR::F32>(src_reg + 2));
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg),
ir.GetVectorReg(src_reg + 1),
ir.GetVectorReg(src_reg + 2));
break;
case 4:
value = ir.CompositeConstruct(
ir.GetVectorReg<IR::F32>(src_reg), ir.GetVectorReg<IR::F32>(src_reg + 1),
ir.GetVectorReg<IR::F32>(src_reg + 2), ir.GetVectorReg<IR::F32>(src_reg + 3));
ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1),
ir.GetVectorReg(src_reg + 2), ir.GetVectorReg(src_reg + 3));
break;
}
const IR::Value handle =

View file

@ -321,13 +321,13 @@ Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& ad
BufferInstInfo info) {
switch (num_dwords) {
case 1:
return Inst(Opcode::LoadBufferF32, Flags{info}, handle, address);
return Inst(Opcode::LoadBufferU32, Flags{info}, handle, address);
case 2:
return Inst(Opcode::LoadBufferF32x2, Flags{info}, handle, address);
return Inst(Opcode::LoadBufferU32x2, Flags{info}, handle, address);
case 3:
return Inst(Opcode::LoadBufferF32x3, Flags{info}, handle, address);
return Inst(Opcode::LoadBufferU32x3, Flags{info}, handle, address);
case 4:
return Inst(Opcode::LoadBufferF32x4, Flags{info}, handle, address);
return Inst(Opcode::LoadBufferU32x4, Flags{info}, handle, address);
default:
UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords);
}
@ -341,17 +341,16 @@ void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& ad
const Value& data, BufferInstInfo info) {
switch (num_dwords) {
case 1:
Inst(data.Type() == Type::F32 ? Opcode::StoreBufferF32 : Opcode::StoreBufferU32,
Flags{info}, handle, address, data);
Inst(Opcode::StoreBufferU32, Flags{info}, handle, address, data);
break;
case 2:
Inst(Opcode::StoreBufferF32x2, Flags{info}, handle, address, data);
Inst(Opcode::StoreBufferU32x2, Flags{info}, handle, address, data);
break;
case 3:
Inst(Opcode::StoreBufferF32x3, Flags{info}, handle, address, data);
Inst(Opcode::StoreBufferU32x3, Flags{info}, handle, address, data);
break;
case 4:
Inst(Opcode::StoreBufferF32x4, Flags{info}, handle, address, data);
Inst(Opcode::StoreBufferU32x4, Flags{info}, handle, address, data);
break;
default:
UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords);

View file

@ -51,12 +51,11 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::Discard:
case Opcode::DiscardCond:
case Opcode::SetAttribute:
case Opcode::StoreBufferF32:
case Opcode::StoreBufferF32x2:
case Opcode::StoreBufferF32x3:
case Opcode::StoreBufferF32x4:
case Opcode::StoreBufferFormatF32:
case Opcode::StoreBufferU32:
case Opcode::StoreBufferU32x2:
case Opcode::StoreBufferU32x3:
case Opcode::StoreBufferU32x4:
case Opcode::StoreBufferFormatF32:
case Opcode::BufferAtomicIAdd32:
case Opcode::BufferAtomicSMin32:
case Opcode::BufferAtomicUMin32:

View file

@ -76,21 +76,19 @@ OPCODE(UndefU32, U32,
OPCODE(UndefU64, U64, )
// Buffer operations
OPCODE(LoadBufferF32, F32, Opaque, Opaque, )
OPCODE(LoadBufferF32x2, F32x2, Opaque, Opaque, )
OPCODE(LoadBufferF32x3, F32x3, Opaque, Opaque, )
OPCODE(LoadBufferF32x4, F32x4, Opaque, Opaque, )
OPCODE(LoadBufferFormatF32, F32x4, Opaque, Opaque, )
OPCODE(LoadBufferU32, U32, Opaque, Opaque, )
OPCODE(StoreBufferF32, Void, Opaque, Opaque, F32, )
OPCODE(StoreBufferF32x2, Void, Opaque, Opaque, F32x2, )
OPCODE(StoreBufferF32x3, Void, Opaque, Opaque, F32x3, )
OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, )
OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, F32x4, )
OPCODE(LoadBufferU32x2, U32x2, Opaque, Opaque, )
OPCODE(LoadBufferU32x3, U32x3, Opaque, Opaque, )
OPCODE(LoadBufferU32x4, U32x4, Opaque, Opaque, )
OPCODE(LoadBufferFormatF32, F32x4, Opaque, Opaque, )
OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, )
OPCODE(StoreBufferU32x2, Void, Opaque, Opaque, U32x2, )
OPCODE(StoreBufferU32x3, Void, Opaque, Opaque, U32x3, )
OPCODE(StoreBufferU32x4, Void, Opaque, Opaque, U32x4, )
OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, U32x4, )
// Buffer atomic operations
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )
@ -100,7 +98,7 @@ OPCODE(BufferAtomicDec32, U32, Opaq
OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, )
// Vector utility
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )

View file

@ -3,7 +3,6 @@
#include <algorithm>
#include <boost/container/small_vector.hpp>
#include "common/alignment.h"
#include "shader_recompiler/info.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/breadth_first_search.h"
@ -42,11 +41,10 @@ bool IsBufferAtomic(const IR::Inst& inst) {
bool IsBufferStore(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::StoreBufferF32:
case IR::Opcode::StoreBufferF32x2:
case IR::Opcode::StoreBufferF32x3:
case IR::Opcode::StoreBufferF32x4:
case IR::Opcode::StoreBufferU32:
case IR::Opcode::StoreBufferU32x2:
case IR::Opcode::StoreBufferU32x3:
case IR::Opcode::StoreBufferU32x4:
return true;
default:
return IsBufferAtomic(inst);
@ -55,11 +53,10 @@ bool IsBufferStore(const IR::Inst& inst) {
bool IsBufferInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::LoadBufferF32:
case IR::Opcode::LoadBufferF32x2:
case IR::Opcode::LoadBufferF32x3:
case IR::Opcode::LoadBufferF32x4:
case IR::Opcode::LoadBufferU32:
case IR::Opcode::LoadBufferU32x2:
case IR::Opcode::LoadBufferU32x3:
case IR::Opcode::LoadBufferU32x4:
case IR::Opcode::ReadConstBuffer:
return true;
default:
@ -97,18 +94,15 @@ bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
switch (inst.GetOpcode()) {
case IR::Opcode::LoadBufferF32:
case IR::Opcode::LoadBufferF32x2:
case IR::Opcode::LoadBufferF32x3:
case IR::Opcode::LoadBufferF32x4:
case IR::Opcode::StoreBufferF32:
case IR::Opcode::StoreBufferF32x2:
case IR::Opcode::StoreBufferF32x3:
case IR::Opcode::StoreBufferF32x4:
return IR::Type::F32;
case IR::Opcode::LoadBufferU32:
case IR::Opcode::ReadConstBuffer:
case IR::Opcode::LoadBufferU32x2:
case IR::Opcode::LoadBufferU32x3:
case IR::Opcode::LoadBufferU32x4:
case IR::Opcode::StoreBufferU32:
case IR::Opcode::StoreBufferU32x2:
case IR::Opcode::StoreBufferU32x3:
case IR::Opcode::StoreBufferU32x4:
case IR::Opcode::ReadConstBuffer:
case IR::Opcode::BufferAtomicIAdd32:
case IR::Opcode::BufferAtomicSwap32:
return IR::Type::U32;