resource tracking for buffer atomic

This commit is contained in:
microsoftv 2024-08-17 13:58:25 -04:00
parent 16cf666e1d
commit 5fe7b4b8ae
5 changed files with 76 additions and 56 deletions

View file

@ -23,13 +23,9 @@ Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value,
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
// Get srsrc buffer
auto& buffer = ctx.buffers[handle];
// Get address of vdata by vaddr + buffer offset
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
// Get first index of data (4-aligned indices, addr >> 2)
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
// Get pointer to first data value in buffer using index
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index);
const auto [scope, semantics]{AtomicArgs(ctx)};
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value);

View file

@ -187,7 +187,7 @@ public:
// Vector Memory
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
void BUFFER_ATOMIC(u32 num_dwords, AtomicOp op, const GcnInst& inst);
void BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst);
// Vector interpolation
void V_INTERP_P2_F32(const GcnInst& inst);

View file

@ -107,7 +107,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
// Buffer atomic operations
case Opcode::BUFFER_ATOMIC_ADD:
return BUFFER_ATOMIC(1, AtomicOp::Add, inst);
return BUFFER_ATOMIC(AtomicOp::Add, inst);
default:
LogMissingOpcode(inst);
}
@ -439,40 +439,54 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_form
}
}
void Translator::BUFFER_ATOMIC(u32 num_dwords, AtomicOp op, const GcnInst& inst) {
// Get controls for mubuf-specific instructions
// TODO: U64
void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
const auto& mubuf = inst.control.mubuf;
// Get vaddr register
const IR::VectorReg vaddr{inst.src[0].code};
// Get vdata register
const IR::VectorReg vdata{inst.src[1].code};
// Get srsrc register
const IR::ScalarReg srsrc{inst.src[2].code * 4};
// Get offset value from soffset register
const IR::U32 soffset{GetSrc(inst.src[3])}; // TODO: Use this maybe?
const IR::U32 soffset{GetSrc(inst.src[3])};
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
// Setup instruction info from controls
IR::BufferInstInfo info{};
info.index_enable.Assign(mubuf.idxen);
info.inst_offset.Assign(mubuf.offset);
info.offset_enable.Assign(mubuf.offen);
// Get vdata value
IR::Value vdata_val = ir.GetVectorReg<Shader::IR::U32>(vdata);
// Get address of vdata
const IR::U32 address = ir.GetVectorReg(vaddr);
// Construct srsrc SGPRs
const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1),
ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3));
// Apply atomic op
// derefs srsrc buffer and adds vdata value to it
// then returns original srsrc buffer value
const IR::Value original_val = ir.BufferAtomicIAdd(handle, address, vdata_val, info);
const IR::Value original_val = [&] {
switch (op) {
case AtomicOp::Swap:
return ir.BufferAtomicExchange(handle, address, vdata_val, info);
case AtomicOp::Add:
return ir.BufferAtomicIAdd(handle, address, vdata_val, info);
case AtomicOp::Smin:
return ir.BufferAtomicIMin(handle, address, vdata_val, true, info);
case AtomicOp::Umin:
return ir.BufferAtomicIMin(handle, address, vdata_val, false, info);
case AtomicOp::Smax:
return ir.BufferAtomicIMax(handle, address, vdata_val, true, info);
case AtomicOp::Umax:
return ir.BufferAtomicIMax(handle, address, vdata_val, false, info);
case AtomicOp::And:
return ir.BufferAtomicAnd(handle, address, vdata_val, info);
case AtomicOp::Or:
return ir.BufferAtomicOr(handle, address, vdata_val, info);
case AtomicOp::Xor:
return ir.BufferAtomicXor(handle, address, vdata_val, info);
case AtomicOp::Inc:
return ir.BufferAtomicInc(handle, address, vdata_val, info);
case AtomicOp::Dec:
return ir.BufferAtomicDec(handle, address, vdata_val, info);
default:
UNREACHABLE();
}
}();
if (mubuf.glc) {
ir.SetVectorReg(vdata, IR::U32{original_val});

View file

@ -102,10 +102,12 @@ public:
[[nodiscard]] Value BufferAtomicIAdd(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicIMin(const Value& handle, const Value& address, const Value& a,
bool is_signed, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicIMax(const Value& handle, const Value& address, const Value& a,
bool is_signed, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicIMin(const Value& handle, const Value& address,
const Value& value, bool is_signed,
BufferInstInfo info);
[[nodiscard]] Value BufferAtomicIMax(const Value& handle, const Value& address,
const Value& value, bool is_signed,
BufferInstInfo info);
[[nodiscard]] Value BufferAtomicInc(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicDec(const Value& handle, const Value& address,

View file

@ -20,6 +20,42 @@ struct SharpLocation {
auto operator<=>(const SharpLocation&) const = default;
};
bool IsBufferAtomic(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::BufferAtomicIAdd32:
case IR::Opcode::BufferAtomicSMin32:
case IR::Opcode::BufferAtomicUMin32:
case IR::Opcode::BufferAtomicSMax32:
case IR::Opcode::BufferAtomicUMax32:
case IR::Opcode::BufferAtomicInc32:
case IR::Opcode::BufferAtomicDec32:
case IR::Opcode::BufferAtomicAnd32:
case IR::Opcode::BufferAtomicOr32:
case IR::Opcode::BufferAtomicXor32:
case IR::Opcode::BufferAtomicExchange32:
return true;
default:
return false;
}
}
bool IsBufferStore(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::StoreBufferF32:
case IR::Opcode::StoreBufferF32x2:
case IR::Opcode::StoreBufferF32x3:
case IR::Opcode::StoreBufferF32x4:
case IR::Opcode::StoreBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32x2:
case IR::Opcode::StoreBufferFormatF32x3:
case IR::Opcode::StoreBufferFormatF32x4:
case IR::Opcode::StoreBufferU32:
return true;
default:
return IsBufferAtomic(inst);
}
}
bool IsBufferInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::LoadBufferF32:
@ -42,20 +78,9 @@ bool IsBufferInstruction(const IR::Inst& inst) {
case IR::Opcode::StoreBufferFormatF32x3:
case IR::Opcode::StoreBufferFormatF32x4:
case IR::Opcode::StoreBufferU32:
case IR::Opcode::BufferAtomicIAdd32:
case IR::Opcode::BufferAtomicSMin32:
case IR::Opcode::BufferAtomicUMin32:
case IR::Opcode::BufferAtomicSMax32:
case IR::Opcode::BufferAtomicUMax32:
case IR::Opcode::BufferAtomicInc32:
case IR::Opcode::BufferAtomicDec32:
case IR::Opcode::BufferAtomicAnd32:
case IR::Opcode::BufferAtomicOr32:
case IR::Opcode::BufferAtomicXor32:
case IR::Opcode::BufferAtomicExchange32:
return true;
default:
return false;
return IsBufferStore(inst);
}
}
@ -126,23 +151,6 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
}
}
bool IsBufferStore(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::StoreBufferF32:
case IR::Opcode::StoreBufferF32x2:
case IR::Opcode::StoreBufferF32x3:
case IR::Opcode::StoreBufferF32x4:
case IR::Opcode::StoreBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32x2:
case IR::Opcode::StoreBufferFormatF32x3:
case IR::Opcode::StoreBufferFormatF32x4:
case IR::Opcode::StoreBufferU32:
return true;
default:
return false;
}
}
bool IsImageInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::ImageSampleExplicitLod: