Implementing DS_SUB_U32, DS_INC_U32, DS_DEC_U32, DS_WRITE_SRC2_B32, DS_WRITE_SRC2_B64.

This commit is contained in:
Dmugetsu 2025-04-16 13:09:34 -06:00 committed by Dmugetsu
parent 62a4182aca
commit 9c1827cb45
2 changed files with 68 additions and 0 deletions

View file

@ -13,6 +13,18 @@ void Translator::EmitDataShare(const GcnInst& inst) {
// DS
case Opcode::DS_ADD_U32:
return DS_ADD_U32(inst, false);
case Opcode::DS_SUB_U32:
return DS_SUB_U32(inst, false);
case Opcode::DS_INC_U32:
return DS_INC_U32(inst, false);
case Opcode::DS_DEC_U32:
return DS_DEC_U32(inst, false);
case Opcode::DS_WRITE_SRC2_B32:
return DS_WRITE_SRC2_B32(inst, true);
case Opcode::DS_WRITE_SRC2_B64:
return DS_WRITE_SRC2_B64(inst, true);
case Opcode::DS_SUB_RTN_U32:
return DS_SUB_U32(inst, true);
case Opcode::DS_MIN_I32:
return DS_MIN_U32(inst, true, false);
case Opcode::DS_MAX_I32:
@ -228,6 +240,57 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.QuadShuffle(src, index));
}
void Translator::DS_INC_U32(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 offset =
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, ir.Imm32(1));
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::DS_DEC_U32(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 offset =
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, ir.Imm32(-1));
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::DS_WRITE_SRC2_B32(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const u32 offset = (inst.control.ds.offset1 << 8u) + inst.control.ds.offset0;
const IR::U32 addr_offset = ir.IAdd(addr, ir.Imm32(offset));
ir.WriteShared(32, addr_offset, data);
}
void Translator::DS_WRITE_SRC2_B64(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data0{GetSrc(inst.src[1])};
const IR::U32 data1{GetSrc(inst.src[2])};
const u32 offset = (inst.control.ds.offset1 << 8u) + inst.control.ds.offset0;
const IR::U32 addr_offset = ir.IAdd(addr, ir.Imm32(offset));
ir.WriteShared(64, ir.CompositeConstruct(data0, data1), addr_offset);
}
void Translator::DS_SUB_U32(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset =
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64,
const GcnInst& inst) {
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};

View file

@ -275,6 +275,11 @@ public:
void DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
void DS_APPEND(const GcnInst& inst);
void DS_CONSUME(const GcnInst& inst);
void DS_SUB_U32(const GcnInst& inst, bool);
void DS_INC_U32(const GcnInst& inst, bool rtn);
void DS_DEC_U32(const GcnInst& inst, bool rtn);
void DS_WRITE_SRC2_B32(const GcnInst& inst, bool rtn);
void DS_WRITE_SRC2_B64(const GcnInst& inst, bool rtn);
// Buffer Memory
// MUBUF / MTBUF