fixed BUFFER_ATOMIC_ADD, DS_ADD_U32 fails

This commit is contained in:
microsoftv 2024-08-17 12:33:08 -04:00
parent e006f3a6af
commit 6f9787e733
6 changed files with 46 additions and 36 deletions

View file

@ -121,10 +121,12 @@ void Translator::DS_ADD_U32(const GcnInst& inst) {
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
IR::VectorReg dst_reg{inst.dst[0].code};
const IR::U32 value = ir.SharedAtomicIAdd(addr_offset, data);
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
// const IR::Value original_val = ir.LoadShared(32, false, addr_offset);
SetDst(inst.dst[0], value);
ir.SetVectorReg(dst_reg, IR::U32{original_val});
}
void Translator::DS_MIN_U32(const GcnInst& inst) {
@ -132,10 +134,12 @@ void Translator::DS_MIN_U32(const GcnInst& inst) {
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
IR::VectorReg dst_reg{inst.dst[0].code};
const IR::U32 value = ir.SharedAtomicIMax(addr_offset, data, false);
const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, false);
// const IR::Value original_val = ir.LoadShared(32, false, addr_offset);
SetDst(inst.dst[0], value);
ir.SetVectorReg(dst_reg, IR::U32{original_val});
}
void Translator::DS_MAX_U32(const GcnInst& inst) {
@ -143,10 +147,12 @@ void Translator::DS_MAX_U32(const GcnInst& inst) {
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
IR::VectorReg dst_reg{inst.dst[0].code};
const IR::U32 value = ir.SharedAtomicIMax(addr_offset, data, false);
const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, false);
// const IR::Value original_val = ir.LoadShared(32, false, addr_offset);
SetDst(inst.dst[0], value);
ir.SetVectorReg(dst_reg, IR::U32{original_val});
}
void Translator::S_BARRIER() {

View file

@ -458,7 +458,7 @@ void Translator::BUFFER_ATOMIC(u32 num_dwords, AtomicOp op, const GcnInst& inst)
info.inst_offset.Assign(mubuf.offset);
info.offset_enable.Assign(mubuf.offen);
// Get vdata value(s)
// Get vdata value
IR::Value vdata_val = ir.GetVectorReg<Shader::IR::U32>(vdata);
// Get address of vdata
@ -469,18 +469,13 @@ void Translator::BUFFER_ATOMIC(u32 num_dwords, AtomicOp op, const GcnInst& inst)
ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1),
ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3));
// Get current srsrc value (incorrect)
IR::U32 prev_val = ir.GetScalarReg(srsrc);
// Apply atomic op
// derefs srsrc buffer and adds vdata value to it
const IR::U32 new_vdata = IR::U32{ir.BufferAtomicIAdd(handle, address, vdata_val, info)};
// derefs srsrc buffer and adds vdata value to it, then returns
const IR::Value original_val = ir.BufferAtomicIAdd(handle, address, vdata_val, info);
if (mubuf.glc) {
ir.SetVectorReg(vdata, prev_val);
ir.SetVectorReg(vdata, IR::U32{original_val});
}
return;
}
void Translator::IMAGE_GET_LOD(const GcnInst& inst) {

View file

@ -286,26 +286,23 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset)
}
}
U32U64 IREmitter::SharedAtomicIAdd(const U32U64& a, const U32U64& b) {
if (a.Type() != b.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
}
switch (a.Type()) {
U32F32 IREmitter::SharedAtomicIAdd(const U32& address, const U32F32& data) {
switch (data.Type()) {
case Type::U32:
return Inst<U32>(Opcode::SharedAtomicIAdd32, a, b);
return Inst<U32>(Opcode::SharedAtomicIAdd32, address, data);
default:
ThrowInvalidType(a.Type());
ThrowInvalidType(data.Type());
}
}
U32 IREmitter::SharedAtomicIMin(const U32& a, const U32& b, bool is_signed) {
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMin32, a, b)
: Inst<U32>(Opcode::SharedAtomicUMin32, a, b);
U32 IREmitter::SharedAtomicIMin(const U32& address, const U32& data, bool is_signed) {
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMin32, address, data)
: Inst<U32>(Opcode::SharedAtomicUMin32, address, data);
}
U32 IREmitter::SharedAtomicIMax(const U32& a, const U32& b, bool is_signed) {
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMax32, a, b)
: Inst<U32>(Opcode::SharedAtomicUMax32, a, b);
U32 IREmitter::SharedAtomicIMax(const U32& address, const U32& data, bool is_signed) {
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMax32, address, data)
: Inst<U32>(Opcode::SharedAtomicUMax32, address, data);
}
U32 IREmitter::ReadConst(const Value& base, const U32& offset) {

View file

@ -84,9 +84,9 @@ public:
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
void WriteShared(int bit_size, const Value& value, const U32& offset);
[[nodiscard]] U32U64 SharedAtomicIAdd(const U32U64& a, const U32U64& b);
[[nodiscard]] U32 SharedAtomicIMin(const U32& a, const U32& b, bool is_signed);
[[nodiscard]] U32 SharedAtomicIMax(const U32& a, const U32& b, bool is_signed);
[[nodiscard]] U32F32 SharedAtomicIAdd(const U32& address, const U32F32& data);
[[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed);
[[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed);
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
[[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index);

View file

@ -96,11 +96,11 @@ OPCODE(StoreBufferFormatF32x4, Void, Opaq
OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, )
// Buffer atomic operations
OPCODE(BufferAtomicIAdd32, Opaque, Opaque, Opaque, Opaque )
OPCODE(BufferAtomicSMin32, U32, U32, U32, )
OPCODE(BufferAtomicUMin32, U32, U32, U32, )
OPCODE(BufferAtomicSMax32, U32, U32, U32, )
OPCODE(BufferAtomicUMax32, U32, U32, U32, )
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, )

View file

@ -42,6 +42,17 @@ bool IsBufferInstruction(const IR::Inst& inst) {
case IR::Opcode::StoreBufferFormatF32x3:
case IR::Opcode::StoreBufferFormatF32x4:
case IR::Opcode::StoreBufferU32:
case IR::Opcode::BufferAtomicIAdd32:
case IR::Opcode::BufferAtomicSMin32:
case IR::Opcode::BufferAtomicUMin32:
case IR::Opcode::BufferAtomicSMax32:
case IR::Opcode::BufferAtomicUMax32:
case IR::Opcode::BufferAtomicInc32:
case IR::Opcode::BufferAtomicDec32:
case IR::Opcode::BufferAtomicAnd32:
case IR::Opcode::BufferAtomicOr32:
case IR::Opcode::BufferAtomicXor32:
case IR::Opcode::BufferAtomicExchange32:
return true;
default:
return false;
@ -108,6 +119,7 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
case IR::Opcode::LoadBufferU32:
case IR::Opcode::ReadConstBufferU32:
case IR::Opcode::StoreBufferU32:
case IR::Opcode::BufferAtomicIAdd32:
return IR::Type::U32;
default:
UNREACHABLE();