BUFFER_ATOMIC | DS_MINMAX_U32

- Emission of BufferAtomicU32
- Addition of Buffer opcodes to IR
- Translator for BUFFER_ATOMIC Opcode
- Translators for DS_MAXMIN_U32 Opcodes
This commit is contained in:
microsoftv 2024-08-14 01:58:58 -04:00
parent bb159eafb9
commit 1ff5c65586
8 changed files with 276 additions and 0 deletions

View file

@ -12,6 +12,19 @@ std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
return {scope, semantics};
}
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
auto& buffer = ctx.buffers[handle];
address =
ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
const Id index =
ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const Id pointer{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
const auto [scope, semantics]{AtomicArgs(ctx)};
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
}
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const auto& texture = ctx.images[handle & 0xFFFF];
@ -21,6 +34,52 @@ Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id va
}
} // Anonymous namespace
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
}
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin);
}
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin);
}
Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMax);
}
Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMax);
}
Id EmitBufferAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) {
// TODO: This is not yet implemented
throw NotImplementedException("SPIR-V Instruction");
}
Id EmitBufferAtomicDec32(EmitContext&, IR::Inst*, u32, Id, Id) {
// TODO: This is not yet implemented
throw NotImplementedException("SPIR-V Instruction");
}
Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicAnd);
}
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicOr);
}
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicXor);
}
Id EmitBufferAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicExchange);
}
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd);
}

View file

@ -77,6 +77,17 @@ void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);

View file

@ -25,6 +25,10 @@ void Translator::EmitDataShare(const GcnInst& inst) {
return DS_WRITE(32, false, true, inst);
case Opcode::DS_WRITE2_B64:
return DS_WRITE(64, false, true, inst);
case Opcode::DS_MAX_U32:
return DS_MAX_U32(inst);
case Opcode::DS_MIN_U32:
return DS_MIN_U32(inst);
default:
LogMissingOpcode(inst);
}
@ -110,6 +114,36 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI
}
}
void Translator::DS_MAX_U32(const GcnInst& inst) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::U32 old_value = IR::U32(ir.LoadShared(32, false, addr_offset));
const IR::U32 new_value = ir.UMax(old_value, data);
ir.WriteShared(32, new_value, addr_offset);
if (inst.dst[0].type != ScalarType::Undefined) {
SetDst(inst.dst[0], old_value);
}
}
void Translator::DS_MIN_U32(const GcnInst& inst) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::U32 old_value = IR::U32(ir.LoadShared(32, false, addr_offset));
const IR::U32 new_value = ir.UMin(old_value, data);
ir.WriteShared(32, new_value, addr_offset);
if (inst.dst[0].type != ScalarType::Undefined) {
SetDst(inst.dst[0], old_value);
}
}
void Translator::S_BARRIER() {
ir.Barrier();
}

View file

@ -187,6 +187,7 @@ public:
// Vector Memory
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
void BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst);
// Vector interpolation
void V_INTERP_P2_F32(const GcnInst& inst);
@ -196,6 +197,8 @@ public:
void DS_SWIZZLE_B32(const GcnInst& inst);
void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
void DS_MAX_U32(const GcnInst& inst);
void DS_MIN_U32(const GcnInst& inst);
void V_READFIRSTLANE_B32(const GcnInst& inst);
void V_READLANE_B32(const GcnInst& inst);
void V_WRITELANE_B32(const GcnInst& inst);

View file

@ -80,6 +80,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
case Opcode::BUFFER_STORE_FORMAT_X:
case Opcode::BUFFER_STORE_DWORD:
return BUFFER_STORE_FORMAT(1, false, inst);
case Opcode::BUFFER_STORE_FORMAT_XY:
case Opcode::BUFFER_STORE_DWORDX2:
return BUFFER_STORE_FORMAT(2, false, inst);
case Opcode::BUFFER_STORE_DWORDX3:
@ -87,6 +88,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
case Opcode::BUFFER_STORE_FORMAT_XYZW:
case Opcode::BUFFER_STORE_DWORDX4:
return BUFFER_STORE_FORMAT(4, false, inst);
case Opcode::BUFFER_ATOMIC_ADD:
return BUFFER_ATOMIC(AtomicOp::Add, inst);
default:
LogMissingOpcode(inst);
}
@ -413,6 +416,64 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns
ir.StoreBuffer(num_dwords, handle, address, value, info);
}
void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
const auto& mtbuf = inst.control.mtbuf;
IR::VectorReg src_reg{inst.src[1].code};
IR::VectorReg addr_reg{inst.src[0].code};
const IR::ScalarReg sharp{inst.src[2].code * 4};
const IR::Value address = [&]() -> IR::Value {
if (mtbuf.idxen && mtbuf.offen) {
return ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1));
}
if (mtbuf.idxen || mtbuf.offen) {
return ir.GetVectorReg(addr_reg);
}
return IR::Value{};
}();
IR::BufferInstInfo info{};
info.index_enable.Assign(mtbuf.idxen);
info.offset_enable.Assign(mtbuf.offen);
info.inst_offset.Assign(mtbuf.offset);
const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
const IR::Value value = ir.GetVectorReg(src_reg);
const IR::Value result = [&] {
switch (op) {
case AtomicOp::Swap:
return ir.BufferAtomicExchange(handle, address, value, info);
case AtomicOp::Add:
return ir.BufferAtomicIAdd(handle, address, value, info);
case AtomicOp::Smin:
return ir.BufferAtomicIMin(handle, address, value, true, info);
case AtomicOp::Umin:
return ir.BufferAtomicUMin(handle, address, value, info);
case AtomicOp::Smax:
return ir.BufferAtomicIMax(handle, address, value, true, info);
case AtomicOp::Umax:
return ir.BufferAtomicUMax(handle, address, value, info);
case AtomicOp::And:
return ir.BufferAtomicAnd(handle, address, value, info);
case AtomicOp::Or:
return ir.BufferAtomicOr(handle, address, value, info);
case AtomicOp::Xor:
return ir.BufferAtomicXor(handle, address, value, info);
case AtomicOp::Inc:
return ir.BufferAtomicInc(handle, address, value, info);
case AtomicOp::Dec:
return ir.BufferAtomicDec(handle, address, value, info);
default:
UNREACHABLE();
}
}();
const IR::U32F32 c_result = static_cast<IR::U32F32>(result);
ir.SetVectorReg(src_reg, c_result);
}
void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
const auto& mimg = inst.control.mimg;
IR::VectorReg dst_reg{inst.dst[0].code};

View file

@ -357,6 +357,73 @@ void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& ad
}
}
Value IREmitter::BufferAtomicIAdd(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicIAdd32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicSMin(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicSMin32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicUMin(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicUMin32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicIMin(const Value& handle, const Value& address, const Value& value,
bool is_signed, BufferInstInfo info) {
return is_signed ? BufferAtomicSMin(handle, address, value, info)
: BufferAtomicUMin(handle, address, value, info);
}
Value IREmitter::BufferAtomicSMax(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicSMax32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicUMax(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicUMax32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicIMax(const Value& handle, const Value& address, const Value& value,
bool is_signed, BufferInstInfo info) {
return is_signed ? BufferAtomicSMax(handle, address, value, info)
: BufferAtomicUMax(handle, address, value, info);
}
Value IREmitter::BufferAtomicInc(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicInc32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicDec(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicDec32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicAnd(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicAnd32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicOr(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicOr32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicXor(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicXor32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicExchange(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicExchange32, Flags{info}, handle, address, value);
}
U32 IREmitter::LaneId() {
return Inst<U32>(Opcode::LaneId);
}

View file

@ -94,6 +94,34 @@ public:
void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data,
BufferInstInfo info);
[[nodiscard]] Value BufferAtomicIAdd(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicSMin(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicUMin(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicIMin(const Value& handle, const Value& address,
const Value& value, bool is_signed, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicSMax(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicUMax(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicIMax(const Value& handle, const Value& address,
const Value& value, bool is_signed, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicInc(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicDec(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicAnd(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicOr(const Value& handle, const Value& address,
const Value& value,
BufferInstInfo info);
[[nodiscard]] Value BufferAtomicXor(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicExchange(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] U32 LaneId();
[[nodiscard]] U32 WarpId();
[[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index);

View file

@ -90,6 +90,19 @@ OPCODE(StoreBufferF32x3, Void, Opaq
OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, )
OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, )
// Buffer atomic operations
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicExchange32, U32, Opaque, Opaque, U32, )
// Vector utility
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )