mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-04-21 12:04:45 +00:00
fixed BUFFER_ATOMIC_ADD, DS_ADD_U32 fails
This commit is contained in:
parent
e006f3a6af
commit
6f9787e733
6 changed files with 46 additions and 36 deletions
|
@ -121,10 +121,12 @@ void Translator::DS_ADD_U32(const GcnInst& inst) {
|
|||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
|
||||
const IR::U32 value = ir.SharedAtomicIAdd(addr_offset, data);
|
||||
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
|
||||
// const IR::Value original_val = ir.LoadShared(32, false, addr_offset);
|
||||
|
||||
SetDst(inst.dst[0], value);
|
||||
ir.SetVectorReg(dst_reg, IR::U32{original_val});
|
||||
}
|
||||
|
||||
void Translator::DS_MIN_U32(const GcnInst& inst) {
|
||||
|
@ -132,10 +134,12 @@ void Translator::DS_MIN_U32(const GcnInst& inst) {
|
|||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
|
||||
const IR::U32 value = ir.SharedAtomicIMax(addr_offset, data, false);
|
||||
const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, false);
|
||||
// const IR::Value original_val = ir.LoadShared(32, false, addr_offset);
|
||||
|
||||
SetDst(inst.dst[0], value);
|
||||
ir.SetVectorReg(dst_reg, IR::U32{original_val});
|
||||
}
|
||||
|
||||
void Translator::DS_MAX_U32(const GcnInst& inst) {
|
||||
|
@ -143,10 +147,12 @@ void Translator::DS_MAX_U32(const GcnInst& inst) {
|
|||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
|
||||
const IR::U32 value = ir.SharedAtomicIMax(addr_offset, data, false);
|
||||
const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, false);
|
||||
// const IR::Value original_val = ir.LoadShared(32, false, addr_offset);
|
||||
|
||||
SetDst(inst.dst[0], value);
|
||||
ir.SetVectorReg(dst_reg, IR::U32{original_val});
|
||||
}
|
||||
|
||||
void Translator::S_BARRIER() {
|
||||
|
|
|
@ -458,7 +458,7 @@ void Translator::BUFFER_ATOMIC(u32 num_dwords, AtomicOp op, const GcnInst& inst)
|
|||
info.inst_offset.Assign(mubuf.offset);
|
||||
info.offset_enable.Assign(mubuf.offen);
|
||||
|
||||
// Get vdata value(s)
|
||||
// Get vdata value
|
||||
IR::Value vdata_val = ir.GetVectorReg<Shader::IR::U32>(vdata);
|
||||
|
||||
// Get address of vdata
|
||||
|
@ -469,18 +469,13 @@ void Translator::BUFFER_ATOMIC(u32 num_dwords, AtomicOp op, const GcnInst& inst)
|
|||
ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1),
|
||||
ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3));
|
||||
|
||||
// Get current srsrc value (incorrect)
|
||||
IR::U32 prev_val = ir.GetScalarReg(srsrc);
|
||||
|
||||
// Apply atomic op
|
||||
// derefs srsrc buffer and adds vdata value to it
|
||||
const IR::U32 new_vdata = IR::U32{ir.BufferAtomicIAdd(handle, address, vdata_val, info)};
|
||||
// derefs srsrc buffer and adds vdata value to it, then returns
|
||||
const IR::Value original_val = ir.BufferAtomicIAdd(handle, address, vdata_val, info);
|
||||
|
||||
if (mubuf.glc) {
|
||||
ir.SetVectorReg(vdata, prev_val);
|
||||
ir.SetVectorReg(vdata, IR::U32{original_val});
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
|
||||
|
|
|
@ -286,26 +286,23 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset)
|
|||
}
|
||||
}
|
||||
|
||||
U32U64 IREmitter::SharedAtomicIAdd(const U32U64& a, const U32U64& b) {
|
||||
if (a.Type() != b.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
}
|
||||
switch (a.Type()) {
|
||||
U32F32 IREmitter::SharedAtomicIAdd(const U32& address, const U32F32& data) {
|
||||
switch (data.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::SharedAtomicIAdd32, a, b);
|
||||
return Inst<U32>(Opcode::SharedAtomicIAdd32, address, data);
|
||||
default:
|
||||
ThrowInvalidType(a.Type());
|
||||
ThrowInvalidType(data.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicIMin(const U32& a, const U32& b, bool is_signed) {
|
||||
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMin32, a, b)
|
||||
: Inst<U32>(Opcode::SharedAtomicUMin32, a, b);
|
||||
U32 IREmitter::SharedAtomicIMin(const U32& address, const U32& data, bool is_signed) {
|
||||
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMin32, address, data)
|
||||
: Inst<U32>(Opcode::SharedAtomicUMin32, address, data);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicIMax(const U32& a, const U32& b, bool is_signed) {
|
||||
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMax32, a, b)
|
||||
: Inst<U32>(Opcode::SharedAtomicUMax32, a, b);
|
||||
U32 IREmitter::SharedAtomicIMax(const U32& address, const U32& data, bool is_signed) {
|
||||
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMax32, address, data)
|
||||
: Inst<U32>(Opcode::SharedAtomicUMax32, address, data);
|
||||
}
|
||||
|
||||
U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
|
||||
|
|
|
@ -84,9 +84,9 @@ public:
|
|||
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
|
||||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
||||
|
||||
[[nodiscard]] U32U64 SharedAtomicIAdd(const U32U64& a, const U32U64& b);
|
||||
[[nodiscard]] U32 SharedAtomicIMin(const U32& a, const U32& b, bool is_signed);
|
||||
[[nodiscard]] U32 SharedAtomicIMax(const U32& a, const U32& b, bool is_signed);
|
||||
[[nodiscard]] U32F32 SharedAtomicIAdd(const U32& address, const U32F32& data);
|
||||
[[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed);
|
||||
[[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed);
|
||||
|
||||
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
|
||||
[[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index);
|
||||
|
|
|
@ -96,11 +96,11 @@ OPCODE(StoreBufferFormatF32x4, Void, Opaq
|
|||
OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, )
|
||||
|
||||
// Buffer atomic operations
|
||||
OPCODE(BufferAtomicIAdd32, Opaque, Opaque, Opaque, Opaque )
|
||||
OPCODE(BufferAtomicSMin32, U32, U32, U32, )
|
||||
OPCODE(BufferAtomicUMin32, U32, U32, U32, )
|
||||
OPCODE(BufferAtomicSMax32, U32, U32, U32, )
|
||||
OPCODE(BufferAtomicUMax32, U32, U32, U32, )
|
||||
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, )
|
||||
|
|
|
@ -42,6 +42,17 @@ bool IsBufferInstruction(const IR::Inst& inst) {
|
|||
case IR::Opcode::StoreBufferFormatF32x3:
|
||||
case IR::Opcode::StoreBufferFormatF32x4:
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
case IR::Opcode::BufferAtomicIAdd32:
|
||||
case IR::Opcode::BufferAtomicSMin32:
|
||||
case IR::Opcode::BufferAtomicUMin32:
|
||||
case IR::Opcode::BufferAtomicSMax32:
|
||||
case IR::Opcode::BufferAtomicUMax32:
|
||||
case IR::Opcode::BufferAtomicInc32:
|
||||
case IR::Opcode::BufferAtomicDec32:
|
||||
case IR::Opcode::BufferAtomicAnd32:
|
||||
case IR::Opcode::BufferAtomicOr32:
|
||||
case IR::Opcode::BufferAtomicXor32:
|
||||
case IR::Opcode::BufferAtomicExchange32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -108,6 +119,7 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
|
|||
case IR::Opcode::LoadBufferU32:
|
||||
case IR::Opcode::ReadConstBufferU32:
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
case IR::Opcode::BufferAtomicIAdd32:
|
||||
return IR::Type::U32;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
|
|
Loading…
Add table
Reference in a new issue