mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-04-21 12:04:45 +00:00
BUFFER_ATOMIC | DS_MINMAX_U32
- Emission of BufferAtomicU32 - Addition of Buffer opcodes to IR - Translator for BUFFER_ATOMIC Opcode - Translators for DS_MAXMIN_U32 Opcodes
This commit is contained in:
parent
bb159eafb9
commit
1ff5c65586
8 changed files with 276 additions and 0 deletions
|
@ -12,6 +12,19 @@ std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
|
|||
return {scope, semantics};
|
||||
}
|
||||
|
||||
|
||||
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
address =
|
||||
ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
const Id index =
|
||||
ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const Id pointer{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
|
@ -21,6 +34,52 @@ Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id va
|
|||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin);
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin);
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMax);
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMax);
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) {
|
||||
// TODO: This is not yet implemented
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicDec32(EmitContext&, IR::Inst*, u32, Id, Id) {
|
||||
// TODO: This is not yet implemented
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicAnd);
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicOr);
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicXor);
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicExchange);
|
||||
}
|
||||
|
||||
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
|
||||
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd);
|
||||
}
|
||||
|
|
|
@ -77,6 +77,17 @@ void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
|
|||
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
||||
|
|
|
@ -25,6 +25,10 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
|||
return DS_WRITE(32, false, true, inst);
|
||||
case Opcode::DS_WRITE2_B64:
|
||||
return DS_WRITE(64, false, true, inst);
|
||||
case Opcode::DS_MAX_U32:
|
||||
return DS_MAX_U32(inst);
|
||||
case Opcode::DS_MIN_U32:
|
||||
return DS_MIN_U32(inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
|
@ -110,6 +114,36 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::DS_MAX_U32(const GcnInst& inst) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
|
||||
const IR::U32 old_value = IR::U32(ir.LoadShared(32, false, addr_offset));
|
||||
const IR::U32 new_value = ir.UMax(old_value, data);
|
||||
ir.WriteShared(32, new_value, addr_offset);
|
||||
|
||||
if (inst.dst[0].type != ScalarType::Undefined) {
|
||||
SetDst(inst.dst[0], old_value);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_MIN_U32(const GcnInst& inst) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
|
||||
const IR::U32 old_value = IR::U32(ir.LoadShared(32, false, addr_offset));
|
||||
const IR::U32 new_value = ir.UMin(old_value, data);
|
||||
ir.WriteShared(32, new_value, addr_offset);
|
||||
|
||||
if (inst.dst[0].type != ScalarType::Undefined) {
|
||||
SetDst(inst.dst[0], old_value);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_BARRIER() {
|
||||
ir.Barrier();
|
||||
}
|
||||
|
|
|
@ -187,6 +187,7 @@ public:
|
|||
// Vector Memory
|
||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
|
||||
void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||
void BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst);
|
||||
|
||||
// Vector interpolation
|
||||
void V_INTERP_P2_F32(const GcnInst& inst);
|
||||
|
@ -196,6 +197,8 @@ public:
|
|||
void DS_SWIZZLE_B32(const GcnInst& inst);
|
||||
void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
|
||||
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
|
||||
void DS_MAX_U32(const GcnInst& inst);
|
||||
void DS_MIN_U32(const GcnInst& inst);
|
||||
void V_READFIRSTLANE_B32(const GcnInst& inst);
|
||||
void V_READLANE_B32(const GcnInst& inst);
|
||||
void V_WRITELANE_B32(const GcnInst& inst);
|
||||
|
|
|
@ -80,6 +80,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||
case Opcode::BUFFER_STORE_FORMAT_X:
|
||||
case Opcode::BUFFER_STORE_DWORD:
|
||||
return BUFFER_STORE_FORMAT(1, false, inst);
|
||||
case Opcode::BUFFER_STORE_FORMAT_XY:
|
||||
case Opcode::BUFFER_STORE_DWORDX2:
|
||||
return BUFFER_STORE_FORMAT(2, false, inst);
|
||||
case Opcode::BUFFER_STORE_DWORDX3:
|
||||
|
@ -87,6 +88,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||
case Opcode::BUFFER_STORE_FORMAT_XYZW:
|
||||
case Opcode::BUFFER_STORE_DWORDX4:
|
||||
return BUFFER_STORE_FORMAT(4, false, inst);
|
||||
case Opcode::BUFFER_ATOMIC_ADD:
|
||||
return BUFFER_ATOMIC(AtomicOp::Add, inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
|
@ -413,6 +416,64 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns
|
|||
ir.StoreBuffer(num_dwords, handle, address, value, info);
|
||||
}
|
||||
|
||||
void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
IR::VectorReg src_reg{inst.src[1].code};
|
||||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
|
||||
const IR::Value address = [&]() -> IR::Value {
|
||||
if (mtbuf.idxen && mtbuf.offen) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1));
|
||||
}
|
||||
if (mtbuf.idxen || mtbuf.offen) {
|
||||
return ir.GetVectorReg(addr_reg);
|
||||
}
|
||||
return IR::Value{};
|
||||
}();
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mtbuf.idxen);
|
||||
info.offset_enable.Assign(mtbuf.offen);
|
||||
info.inst_offset.Assign(mtbuf.offset);
|
||||
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
const IR::Value value = ir.GetVectorReg(src_reg);
|
||||
|
||||
const IR::Value result = [&] {
|
||||
switch (op) {
|
||||
case AtomicOp::Swap:
|
||||
return ir.BufferAtomicExchange(handle, address, value, info);
|
||||
case AtomicOp::Add:
|
||||
return ir.BufferAtomicIAdd(handle, address, value, info);
|
||||
case AtomicOp::Smin:
|
||||
return ir.BufferAtomicIMin(handle, address, value, true, info);
|
||||
case AtomicOp::Umin:
|
||||
return ir.BufferAtomicUMin(handle, address, value, info);
|
||||
case AtomicOp::Smax:
|
||||
return ir.BufferAtomicIMax(handle, address, value, true, info);
|
||||
case AtomicOp::Umax:
|
||||
return ir.BufferAtomicUMax(handle, address, value, info);
|
||||
case AtomicOp::And:
|
||||
return ir.BufferAtomicAnd(handle, address, value, info);
|
||||
case AtomicOp::Or:
|
||||
return ir.BufferAtomicOr(handle, address, value, info);
|
||||
case AtomicOp::Xor:
|
||||
return ir.BufferAtomicXor(handle, address, value, info);
|
||||
case AtomicOp::Inc:
|
||||
return ir.BufferAtomicInc(handle, address, value, info);
|
||||
case AtomicOp::Dec:
|
||||
return ir.BufferAtomicDec(handle, address, value, info);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
const IR::U32F32 c_result = static_cast<IR::U32F32>(result);
|
||||
ir.SetVectorReg(src_reg, c_result);
|
||||
}
|
||||
|
||||
void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
|
|
|
@ -357,6 +357,73 @@ void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& ad
|
|||
}
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicIAdd(const Value& handle, const Value& address, const Value& value,
|
||||
BufferInstInfo info) {
|
||||
return Inst(Opcode::BufferAtomicIAdd32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicSMin(const Value& handle, const Value& address, const Value& value,
|
||||
BufferInstInfo info) {
|
||||
return Inst(Opcode::BufferAtomicSMin32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicUMin(const Value& handle, const Value& address, const Value& value,
|
||||
BufferInstInfo info) {
|
||||
return Inst(Opcode::BufferAtomicUMin32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicIMin(const Value& handle, const Value& address, const Value& value,
|
||||
bool is_signed, BufferInstInfo info) {
|
||||
return is_signed ? BufferAtomicSMin(handle, address, value, info)
|
||||
: BufferAtomicUMin(handle, address, value, info);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicSMax(const Value& handle, const Value& address, const Value& value,
|
||||
BufferInstInfo info) {
|
||||
return Inst(Opcode::BufferAtomicSMax32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicUMax(const Value& handle, const Value& address, const Value& value,
|
||||
BufferInstInfo info) {
|
||||
return Inst(Opcode::BufferAtomicUMax32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicIMax(const Value& handle, const Value& address, const Value& value,
|
||||
bool is_signed, BufferInstInfo info) {
|
||||
return is_signed ? BufferAtomicSMax(handle, address, value, info)
|
||||
: BufferAtomicUMax(handle, address, value, info);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicInc(const Value& handle, const Value& address, const Value& value,
|
||||
BufferInstInfo info) {
|
||||
return Inst(Opcode::BufferAtomicInc32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicDec(const Value& handle, const Value& address, const Value& value,
|
||||
BufferInstInfo info) {
|
||||
return Inst(Opcode::BufferAtomicDec32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicAnd(const Value& handle, const Value& address, const Value& value,
|
||||
BufferInstInfo info) {
|
||||
return Inst(Opcode::BufferAtomicAnd32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicOr(const Value& handle, const Value& address, const Value& value,
|
||||
BufferInstInfo info) {
|
||||
return Inst(Opcode::BufferAtomicOr32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicXor(const Value& handle, const Value& address, const Value& value,
|
||||
BufferInstInfo info) {
|
||||
return Inst(Opcode::BufferAtomicXor32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicExchange(const Value& handle, const Value& address, const Value& value,
|
||||
BufferInstInfo info) {
|
||||
return Inst(Opcode::BufferAtomicExchange32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::LaneId() {
|
||||
return Inst<U32>(Opcode::LaneId);
|
||||
}
|
||||
|
|
|
@ -94,6 +94,34 @@ public:
|
|||
void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data,
|
||||
BufferInstInfo info);
|
||||
|
||||
[[nodiscard]] Value BufferAtomicIAdd(const Value& handle, const Value& address,
|
||||
const Value& value, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicSMin(const Value& handle, const Value& address,
|
||||
const Value& value, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicUMin(const Value& handle, const Value& address,
|
||||
const Value& value, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicIMin(const Value& handle, const Value& address,
|
||||
const Value& value, bool is_signed, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicSMax(const Value& handle, const Value& address,
|
||||
const Value& value, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicUMax(const Value& handle, const Value& address,
|
||||
const Value& value, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicIMax(const Value& handle, const Value& address,
|
||||
const Value& value, bool is_signed, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicInc(const Value& handle, const Value& address,
|
||||
const Value& value, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicDec(const Value& handle, const Value& address,
|
||||
const Value& value, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicAnd(const Value& handle, const Value& address,
|
||||
const Value& value, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicOr(const Value& handle, const Value& address,
|
||||
const Value& value,
|
||||
BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicXor(const Value& handle, const Value& address,
|
||||
const Value& value, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicExchange(const Value& handle, const Value& address,
|
||||
const Value& value, BufferInstInfo info);
|
||||
|
||||
[[nodiscard]] U32 LaneId();
|
||||
[[nodiscard]] U32 WarpId();
|
||||
[[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index);
|
||||
|
|
|
@ -90,6 +90,19 @@ OPCODE(StoreBufferF32x3, Void, Opaq
|
|||
OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, )
|
||||
OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, )
|
||||
|
||||
// Buffer atomic operations
|
||||
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicExchange32, U32, Opaque, Opaque, U32, )
|
||||
|
||||
// Vector utility
|
||||
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
|
||||
OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )
|
||||
|
|
Loading…
Add table
Reference in a new issue