mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-04-21 20:14:45 +00:00
Sort opcodes by their indices. Group them too when applicable
This commit is contained in:
parent
dcf245b814
commit
9512696980
7 changed files with 1675 additions and 1596 deletions
|
@ -7,53 +7,121 @@ namespace Shader::Gcn {
|
|||
|
||||
void Translator::EmitDataShare(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
case Opcode::DS_SWIZZLE_B32:
|
||||
return DS_SWIZZLE_B32(inst);
|
||||
case Opcode::DS_READ_B32:
|
||||
return DS_READ(32, false, false, false, inst);
|
||||
case Opcode::DS_READ2ST64_B32:
|
||||
return DS_READ(32, false, true, true, inst);
|
||||
case Opcode::DS_READ_B64:
|
||||
return DS_READ(64, false, false, false, inst);
|
||||
case Opcode::DS_READ2_B32:
|
||||
return DS_READ(32, false, true, false, inst);
|
||||
case Opcode::DS_READ2_B64:
|
||||
return DS_READ(64, false, true, false, inst);
|
||||
case Opcode::DS_WRITE_B32:
|
||||
return DS_WRITE(32, false, false, false, inst);
|
||||
case Opcode::DS_WRITE2ST64_B32:
|
||||
return DS_WRITE(32, false, true, true, inst);
|
||||
case Opcode::DS_WRITE_B64:
|
||||
return DS_WRITE(64, false, false, false, inst);
|
||||
case Opcode::DS_WRITE2_B32:
|
||||
return DS_WRITE(32, false, true, false, inst);
|
||||
case Opcode::DS_WRITE2_B64:
|
||||
return DS_WRITE(64, false, true, false, inst);
|
||||
// DS
|
||||
case Opcode::DS_ADD_U32:
|
||||
return DS_ADD_U32(inst, false);
|
||||
case Opcode::DS_MIN_U32:
|
||||
return DS_MIN_U32(inst, false, false);
|
||||
case Opcode::DS_MIN_I32:
|
||||
return DS_MIN_U32(inst, true, false);
|
||||
case Opcode::DS_MAX_U32:
|
||||
return DS_MAX_U32(inst, false, false);
|
||||
case Opcode::DS_MAX_I32:
|
||||
return DS_MAX_U32(inst, true, false);
|
||||
case Opcode::DS_MIN_U32:
|
||||
return DS_MIN_U32(inst, false, false);
|
||||
case Opcode::DS_MAX_U32:
|
||||
return DS_MAX_U32(inst, false, false);
|
||||
case Opcode::DS_WRITE_B32:
|
||||
return DS_WRITE(32, false, false, false, inst);
|
||||
case Opcode::DS_WRITE2_B32:
|
||||
return DS_WRITE(32, false, true, false, inst);
|
||||
case Opcode::DS_WRITE2ST64_B32:
|
||||
return DS_WRITE(32, false, true, true, inst);
|
||||
case Opcode::DS_ADD_RTN_U32:
|
||||
return DS_ADD_U32(inst, true);
|
||||
case Opcode::DS_MIN_RTN_U32:
|
||||
return DS_MIN_U32(inst, false, true);
|
||||
case Opcode::DS_MAX_RTN_U32:
|
||||
return DS_MAX_U32(inst, false, true);
|
||||
case Opcode::DS_APPEND:
|
||||
return DS_APPEND(inst);
|
||||
case Opcode::DS_SWIZZLE_B32:
|
||||
return DS_SWIZZLE_B32(inst);
|
||||
case Opcode::DS_READ_B32:
|
||||
return DS_READ(32, false, false, false, inst);
|
||||
case Opcode::DS_READ2_B32:
|
||||
return DS_READ(32, false, true, false, inst);
|
||||
case Opcode::DS_READ2ST64_B32:
|
||||
return DS_READ(32, false, true, true, inst);
|
||||
case Opcode::DS_CONSUME:
|
||||
return DS_CONSUME(inst);
|
||||
case Opcode::DS_APPEND:
|
||||
return DS_APPEND(inst);
|
||||
case Opcode::DS_WRITE_B64:
|
||||
return DS_WRITE(64, false, false, false, inst);
|
||||
case Opcode::DS_WRITE2_B64:
|
||||
return DS_WRITE(64, false, true, false, inst);
|
||||
case Opcode::DS_READ_B64:
|
||||
return DS_READ(64, false, false, false, inst);
|
||||
case Opcode::DS_READ2_B64:
|
||||
return DS_READ(64, false, true, false, inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, is_signed);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, is_signed);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64,
|
||||
const GcnInst& inst) {
|
||||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||
const IR::VectorReg data0{inst.src[1].code};
|
||||
const IR::VectorReg data1{inst.src[2].code};
|
||||
if (is_pair) {
|
||||
const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1);
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj)));
|
||||
if (bit_size == 32) {
|
||||
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
|
||||
} else {
|
||||
ir.WriteShared(
|
||||
64, ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)),
|
||||
addr0);
|
||||
}
|
||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
||||
if (bit_size == 32) {
|
||||
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
|
||||
} else {
|
||||
ir.WriteShared(
|
||||
64, ir.CompositeConstruct(ir.GetVectorReg(data1), ir.GetVectorReg(data1 + 1)),
|
||||
addr1);
|
||||
}
|
||||
} else if (bit_size == 64) {
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
||||
const IR::Value data =
|
||||
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
|
||||
ir.WriteShared(bit_size, data, addr0);
|
||||
} else {
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
||||
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
|
||||
const u8 offset0 = inst.control.ds.offset0;
|
||||
const u8 offset1 = inst.control.ds.offset1;
|
||||
|
@ -102,101 +170,11 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64,
|
||||
const GcnInst& inst) {
|
||||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||
const IR::VectorReg data0{inst.src[1].code};
|
||||
const IR::VectorReg data1{inst.src[2].code};
|
||||
if (is_pair) {
|
||||
const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1);
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj)));
|
||||
if (bit_size == 32) {
|
||||
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
|
||||
} else {
|
||||
ir.WriteShared(
|
||||
64, ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)),
|
||||
addr0);
|
||||
}
|
||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
||||
if (bit_size == 32) {
|
||||
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
|
||||
} else {
|
||||
ir.WriteShared(
|
||||
64, ir.CompositeConstruct(ir.GetVectorReg(data1), ir.GetVectorReg(data1 + 1)),
|
||||
addr1);
|
||||
}
|
||||
} else if (bit_size == 64) {
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
||||
const IR::Value data =
|
||||
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
|
||||
ir.WriteShared(bit_size, data, addr0);
|
||||
} else {
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
||||
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, is_signed);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, is_signed);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_BARRIER() {
|
||||
ir.Barrier();
|
||||
}
|
||||
|
||||
void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
|
||||
const IR::ScalarReg dst{inst.dst[0].code};
|
||||
const IR::U32 value{GetSrc(inst.src[0])};
|
||||
|
||||
if (info.stage != Stage::Compute) {
|
||||
SetDst(inst.dst[0], value);
|
||||
} else {
|
||||
SetDst(inst.dst[0], ir.ReadFirstLane(value));
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::V_READLANE_B32(const GcnInst& inst) {
|
||||
const IR::ScalarReg dst{inst.dst[0].code};
|
||||
const IR::U32 value{GetSrc(inst.src[0])};
|
||||
const IR::U32 lane{GetSrc(inst.src[1])};
|
||||
ir.SetScalarReg(dst, ir.ReadLane(value, lane));
|
||||
}
|
||||
|
||||
void Translator::V_WRITELANE_B32(const GcnInst& inst) {
|
||||
const IR::VectorReg dst{inst.dst[0].code};
|
||||
const IR::U32 value{GetSrc(inst.src[0])};
|
||||
const IR::U32 lane{GetSrc(inst.src[1])};
|
||||
const IR::U32 old_value{GetSrc(inst.dst[0])};
|
||||
ir.SetVectorReg(dst, ir.WriteLane(old_value, value, lane));
|
||||
void Translator::DS_CONSUME(const GcnInst& inst) {
|
||||
const u32 inst_offset = inst.control.ds.offset0;
|
||||
const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset));
|
||||
const IR::U32 prev = ir.DataConsume(gds_offset);
|
||||
SetDst(inst.dst[0], prev);
|
||||
}
|
||||
|
||||
void Translator::DS_APPEND(const GcnInst& inst) {
|
||||
|
@ -206,11 +184,4 @@ void Translator::DS_APPEND(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], prev);
|
||||
}
|
||||
|
||||
void Translator::DS_CONSUME(const GcnInst& inst) {
|
||||
const u32 inst_offset = inst.control.ds.offset0;
|
||||
const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset));
|
||||
const IR::U32 prev = ir.DataConsume(gds_offset);
|
||||
SetDst(inst.dst[0], prev);
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -17,79 +17,81 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
|||
}
|
||||
default:
|
||||
switch (inst.opcode) {
|
||||
case Opcode::S_MOV_B32:
|
||||
return S_MOV(inst);
|
||||
case Opcode::S_MUL_I32:
|
||||
return S_MUL_I32(inst);
|
||||
case Opcode::S_AND_SAVEEXEC_B64:
|
||||
return S_AND_SAVEEXEC_B64(inst);
|
||||
case Opcode::S_MOV_B64:
|
||||
return S_MOV_B64(inst);
|
||||
case Opcode::S_OR_B64:
|
||||
return S_OR_B64(NegateMode::None, false, inst);
|
||||
case Opcode::S_NOR_B64:
|
||||
return S_OR_B64(NegateMode::Result, false, inst);
|
||||
case Opcode::S_XOR_B64:
|
||||
return S_OR_B64(NegateMode::None, true, inst);
|
||||
case Opcode::S_XNOR_B64:
|
||||
return S_OR_B64(NegateMode::Result, true, inst);
|
||||
case Opcode::S_ORN2_B64:
|
||||
return S_OR_B64(NegateMode::Src1, false, inst);
|
||||
case Opcode::S_AND_B64:
|
||||
return S_AND_B64(NegateMode::None, inst);
|
||||
case Opcode::S_NAND_B64:
|
||||
return S_AND_B64(NegateMode::Result, inst);
|
||||
case Opcode::S_ANDN2_B64:
|
||||
return S_AND_B64(NegateMode::Src1, inst);
|
||||
case Opcode::S_NOT_B64:
|
||||
return S_NOT_B64(inst);
|
||||
// SOP2
|
||||
case Opcode::S_ADD_U32:
|
||||
return S_ADD_U32(inst);
|
||||
case Opcode::S_SUB_U32:
|
||||
return S_SUB_U32(inst);
|
||||
case Opcode::S_ADD_I32:
|
||||
return S_ADD_I32(inst);
|
||||
case Opcode::S_AND_B32:
|
||||
return S_AND_B32(NegateMode::None, inst);
|
||||
case Opcode::S_NAND_B32:
|
||||
return S_AND_B32(NegateMode::Result, inst);
|
||||
case Opcode::S_ANDN2_B32:
|
||||
return S_AND_B32(NegateMode::Src1, inst);
|
||||
case Opcode::S_ASHR_I32:
|
||||
return S_ASHR_I32(inst);
|
||||
case Opcode::S_OR_B32:
|
||||
return S_OR_B32(inst);
|
||||
case Opcode::S_XOR_B32:
|
||||
return S_XOR_B32(inst);
|
||||
case Opcode::S_LSHL_B32:
|
||||
return S_LSHL_B32(inst);
|
||||
case Opcode::S_LSHR_B32:
|
||||
return S_LSHR_B32(inst);
|
||||
case Opcode::S_SUB_I32:
|
||||
return S_SUB_U32(inst);
|
||||
case Opcode::S_ADDC_U32:
|
||||
return S_ADDC_U32(inst);
|
||||
case Opcode::S_MIN_I32:
|
||||
return S_MIN_U32(true, inst);
|
||||
case Opcode::S_MIN_U32:
|
||||
return S_MIN_U32(false, inst);
|
||||
case Opcode::S_MAX_I32:
|
||||
return S_MAX_U32(true, inst);
|
||||
case Opcode::S_MAX_U32:
|
||||
return S_MAX_U32(false, inst);
|
||||
case Opcode::S_CSELECT_B32:
|
||||
return S_CSELECT_B32(inst);
|
||||
case Opcode::S_CSELECT_B64:
|
||||
return S_CSELECT_B64(inst);
|
||||
case Opcode::S_BFE_U32:
|
||||
return S_BFE_U32(inst);
|
||||
case Opcode::S_AND_B32:
|
||||
return S_AND_B32(NegateMode::None, inst);
|
||||
case Opcode::S_OR_B32:
|
||||
return S_OR_B32(inst);
|
||||
case Opcode::S_OR_B64:
|
||||
return S_OR_B64(NegateMode::None, false, inst);
|
||||
case Opcode::S_XOR_B32:
|
||||
return S_XOR_B32(inst);
|
||||
case Opcode::S_XOR_B64:
|
||||
return S_OR_B64(NegateMode::None, true, inst);
|
||||
case Opcode::S_ANDN2_B32:
|
||||
return S_AND_B32(NegateMode::Src1, inst);
|
||||
case Opcode::S_ANDN2_B64:
|
||||
return S_AND_B64(NegateMode::Src1, inst);
|
||||
case Opcode::S_ORN2_B64:
|
||||
return S_OR_B64(NegateMode::Src1, false, inst);
|
||||
case Opcode::S_NAND_B32:
|
||||
return S_AND_B32(NegateMode::Result, inst);
|
||||
case Opcode::S_NAND_B64:
|
||||
return S_AND_B64(NegateMode::Result, inst);
|
||||
case Opcode::S_NOR_B64:
|
||||
return S_OR_B64(NegateMode::Result, false, inst);
|
||||
case Opcode::S_XNOR_B64:
|
||||
return S_OR_B64(NegateMode::Result, true, inst);
|
||||
case Opcode::S_LSHL_B32:
|
||||
return S_LSHL_B32(inst);
|
||||
case Opcode::S_LSHR_B32:
|
||||
return S_LSHR_B32(inst);
|
||||
case Opcode::S_ASHR_I32:
|
||||
return S_ASHR_I32(inst);
|
||||
case Opcode::S_BFM_B32:
|
||||
return S_BFM_B32(inst);
|
||||
case Opcode::S_BREV_B32:
|
||||
return S_BREV_B32(inst);
|
||||
case Opcode::S_ADD_U32:
|
||||
return S_ADD_U32(inst);
|
||||
case Opcode::S_ADDC_U32:
|
||||
return S_ADDC_U32(inst);
|
||||
case Opcode::S_SUB_U32:
|
||||
case Opcode::S_SUB_I32:
|
||||
return S_SUB_U32(inst);
|
||||
case Opcode::S_MIN_U32:
|
||||
return S_MIN_U32(false, inst);
|
||||
case Opcode::S_MIN_I32:
|
||||
return S_MIN_U32(true, inst);
|
||||
case Opcode::S_MAX_U32:
|
||||
return S_MAX_U32(false, inst);
|
||||
case Opcode::S_MAX_I32:
|
||||
return S_MAX_U32(true, inst);
|
||||
case Opcode::S_MUL_I32:
|
||||
return S_MUL_I32(inst);
|
||||
case Opcode::S_BFE_U32:
|
||||
return S_BFE_U32(inst);
|
||||
case Opcode::S_ABSDIFF_I32:
|
||||
return S_ABSDIFF_I32(inst);
|
||||
|
||||
// SOP1
|
||||
case Opcode::S_MOV_B32:
|
||||
return S_MOV(inst);
|
||||
case Opcode::S_MOV_B64:
|
||||
return S_MOV_B64(inst);
|
||||
case Opcode::S_NOT_B64:
|
||||
return S_NOT_B64(inst);
|
||||
case Opcode::S_WQM_B64:
|
||||
break;
|
||||
case Opcode::S_BREV_B32:
|
||||
return S_BREV_B32(inst);
|
||||
case Opcode::S_AND_SAVEEXEC_B64:
|
||||
return S_AND_SAVEEXEC_B64(inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
|
@ -99,6 +101,7 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
|||
|
||||
void Translator::EmitSOPC(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
// SOPC
|
||||
case Opcode::S_CMP_EQ_I32:
|
||||
return S_CMP(ConditionOp::EQ, true, inst);
|
||||
case Opcode::S_CMP_LG_I32:
|
||||
|
@ -131,6 +134,7 @@ void Translator::EmitSOPC(const GcnInst& inst) {
|
|||
|
||||
void Translator::EmitSOPK(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
// SOPK
|
||||
case Opcode::S_MOVK_I32:
|
||||
return S_MOVK(inst);
|
||||
|
||||
|
@ -169,169 +173,78 @@ void Translator::EmitSOPK(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::S_MOVK(const GcnInst& inst) {
|
||||
const auto simm16 = inst.control.sopk.simm;
|
||||
if (simm16 & (1 << 15)) {
|
||||
// TODO: need to verify the case of imm sign extension
|
||||
UNREACHABLE();
|
||||
}
|
||||
SetDst(inst.dst[0], ir.Imm32(simm16));
|
||||
// SOP2
|
||||
|
||||
void Translator::S_ADD_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.IAdd(src0, src1));
|
||||
// TODO: Carry out
|
||||
ir.SetScc(ir.Imm1(false));
|
||||
}
|
||||
|
||||
void Translator::S_ADDK_I32(const GcnInst& inst) {
|
||||
const s32 simm16 = inst.control.sopk.simm;
|
||||
SetDst(inst.dst[0], ir.IAdd(GetSrc(inst.dst[0]), ir.Imm32(simm16)));
|
||||
void Translator::S_SUB_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.ISub(src0, src1));
|
||||
// TODO: Carry out
|
||||
ir.SetScc(ir.Imm1(false));
|
||||
}
|
||||
|
||||
void Translator::S_MULK_I32(const GcnInst& inst) {
|
||||
const s32 simm16 = inst.control.sopk.simm;
|
||||
SetDst(inst.dst[0], ir.IMul(GetSrc(inst.dst[0]), ir.Imm32(simm16)));
|
||||
void Translator::S_ADD_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.IAdd(src0, src1));
|
||||
// TODO: Overflow flag
|
||||
}
|
||||
|
||||
void Translator::S_MOV(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], GetSrc(inst.src[0]));
|
||||
void Translator::S_ADDC_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 carry{ir.Select(ir.GetScc(), ir.Imm32(1U), ir.Imm32(0U))};
|
||||
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), carry));
|
||||
}
|
||||
|
||||
void Translator::S_MUL_I32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], ir.IMul(GetSrc(inst.src[0]), GetSrc(inst.src[1])));
|
||||
void Translator::S_MIN_U32(bool is_signed, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 result = ir.IMin(src0, src1, is_signed);
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.IEqual(result, src0));
|
||||
}
|
||||
|
||||
void Translator::S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst) {
|
||||
const IR::U32 lhs = GetSrc(inst.src[0]);
|
||||
const IR::U32 rhs = GetSrc(inst.src[1]);
|
||||
const IR::U1 result = [&] {
|
||||
switch (cond) {
|
||||
case ConditionOp::EQ:
|
||||
return ir.IEqual(lhs, rhs);
|
||||
case ConditionOp::LG:
|
||||
return ir.INotEqual(lhs, rhs);
|
||||
case ConditionOp::GT:
|
||||
return ir.IGreaterThan(lhs, rhs, is_signed);
|
||||
case ConditionOp::GE:
|
||||
return ir.IGreaterThanEqual(lhs, rhs, is_signed);
|
||||
case ConditionOp::LT:
|
||||
return ir.ILessThan(lhs, rhs, is_signed);
|
||||
case ConditionOp::LE:
|
||||
return ir.ILessThanEqual(lhs, rhs, is_signed);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
ir.SetScc(result);
|
||||
void Translator::S_MAX_U32(bool is_signed, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 result = ir.IMax(src0, src1, is_signed);
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.IEqual(result, src0));
|
||||
}
|
||||
|
||||
void Translator::S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst) {
|
||||
const s32 simm16 = inst.control.sopk.simm;
|
||||
const IR::U32 lhs = GetSrc(inst.dst[0]);
|
||||
const IR::U32 rhs = ir.Imm32(simm16);
|
||||
const IR::U1 result = [&] {
|
||||
switch (cond) {
|
||||
case ConditionOp::EQ:
|
||||
return ir.IEqual(lhs, rhs);
|
||||
case ConditionOp::LG:
|
||||
return ir.INotEqual(lhs, rhs);
|
||||
case ConditionOp::GT:
|
||||
return ir.IGreaterThan(lhs, rhs, is_signed);
|
||||
case ConditionOp::GE:
|
||||
return ir.IGreaterThanEqual(lhs, rhs, is_signed);
|
||||
case ConditionOp::LT:
|
||||
return ir.ILessThan(lhs, rhs, is_signed);
|
||||
case ConditionOp::LE:
|
||||
return ir.ILessThanEqual(lhs, rhs, is_signed);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
ir.SetScc(result);
|
||||
void Translator::S_CSELECT_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], IR::U32{ir.Select(ir.GetScc(), src0, src1)});
|
||||
}
|
||||
|
||||
void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
|
||||
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
|
||||
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination
|
||||
// SGPR we have a special IR opcode for SPGRs that act as thread masks.
|
||||
const IR::U1 exec{ir.GetExec()};
|
||||
const IR::U1 src = [&] {
|
||||
switch (inst.src[0].field) {
|
||||
case OperandField::VccLo:
|
||||
return ir.GetVcc();
|
||||
case OperandField::ScalarGPR:
|
||||
return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
|
||||
switch (inst.dst[0].field) {
|
||||
case OperandField::ScalarGPR:
|
||||
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), exec);
|
||||
break;
|
||||
case OperandField::VccLo:
|
||||
ir.SetVcc(exec);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
// Update EXEC.
|
||||
const IR::U1 result = ir.LogicalAnd(exec, src);
|
||||
ir.SetExec(result);
|
||||
ir.SetScc(result);
|
||||
}
|
||||
|
||||
void Translator::S_MOV_B64(const GcnInst& inst) {
|
||||
const IR::U1 src = [&] {
|
||||
switch (inst.src[0].field) {
|
||||
void Translator::S_CSELECT_B64(const GcnInst& inst) {
|
||||
const auto get_src = [&](const InstOperand& operand) {
|
||||
switch (operand.field) {
|
||||
case OperandField::VccLo:
|
||||
return ir.GetVcc();
|
||||
case OperandField::ExecLo:
|
||||
return ir.GetExec();
|
||||
case OperandField::ScalarGPR:
|
||||
return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code));
|
||||
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
|
||||
case OperandField::ConstZero:
|
||||
return ir.Imm1(false);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
switch (inst.dst[0].field) {
|
||||
case OperandField::ScalarGPR:
|
||||
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), src);
|
||||
break;
|
||||
case OperandField::ExecLo:
|
||||
ir.SetExec(src);
|
||||
break;
|
||||
case OperandField::VccLo:
|
||||
ir.SetVcc(src);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_OR_B64(NegateMode negate, bool is_xor, const GcnInst& inst) {
|
||||
const auto get_src = [&](const InstOperand& operand) {
|
||||
switch (operand.field) {
|
||||
case OperandField::ExecLo:
|
||||
return ir.GetExec();
|
||||
case OperandField::VccLo:
|
||||
return ir.GetVcc();
|
||||
case OperandField::ScalarGPR:
|
||||
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
};
|
||||
|
||||
const IR::U1 src0{get_src(inst.src[0])};
|
||||
IR::U1 src1{get_src(inst.src[1])};
|
||||
if (negate == NegateMode::Src1) {
|
||||
src1 = ir.LogicalNot(src1);
|
||||
}
|
||||
IR::U1 result = is_xor ? ir.LogicalXor(src0, src1) : ir.LogicalOr(src0, src1);
|
||||
if (negate == NegateMode::Result) {
|
||||
result = ir.LogicalNot(result);
|
||||
}
|
||||
ir.SetScc(result);
|
||||
const IR::U1 src1{get_src(inst.src[1])};
|
||||
const IR::U1 result{ir.Select(ir.GetScc(), src0, src1)};
|
||||
switch (inst.dst[0].field) {
|
||||
case OperandField::VccLo:
|
||||
ir.SetVcc(result);
|
||||
|
@ -344,6 +257,20 @@ void Translator::S_OR_B64(NegateMode negate, bool is_xor, const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::S_AND_B32(NegateMode negate, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
IR::U32 src1{GetSrc(inst.src[1])};
|
||||
if (negate == NegateMode::Src1) {
|
||||
src1 = ir.BitwiseNot(src1);
|
||||
}
|
||||
IR::U32 result{ir.BitwiseAnd(src0, src1)};
|
||||
if (negate == NegateMode::Result) {
|
||||
result = ir.BitwiseNot(result);
|
||||
}
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
||||
void Translator::S_AND_B64(NegateMode negate, const GcnInst& inst) {
|
||||
const auto get_src = [&](const InstOperand& operand) {
|
||||
switch (operand.field) {
|
||||
|
@ -382,35 +309,6 @@ void Translator::S_AND_B64(NegateMode negate, const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::S_ADD_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.IAdd(src0, src1));
|
||||
// TODO: Overflow flag
|
||||
}
|
||||
|
||||
void Translator::S_AND_B32(NegateMode negate, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
IR::U32 src1{GetSrc(inst.src[1])};
|
||||
if (negate == NegateMode::Src1) {
|
||||
src1 = ir.BitwiseNot(src1);
|
||||
}
|
||||
IR::U32 result{ir.BitwiseAnd(src0, src1)};
|
||||
if (negate == NegateMode::Result) {
|
||||
result = ir.BitwiseNot(result);
|
||||
}
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
||||
void Translator::S_ASHR_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 result{ir.ShiftRightArithmetic(src0, src1)};
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
||||
void Translator::S_OR_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
|
@ -419,46 +317,30 @@ void Translator::S_OR_B32(const GcnInst& inst) {
|
|||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
||||
void Translator::S_XOR_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 result{ir.BitwiseXor(src0, src1)};
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
||||
void Translator::S_LSHR_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 result{ir.ShiftRightLogical(src0, src1)};
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
||||
void Translator::S_CSELECT_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], IR::U32{ir.Select(ir.GetScc(), src0, src1)});
|
||||
}
|
||||
|
||||
void Translator::S_CSELECT_B64(const GcnInst& inst) {
|
||||
void Translator::S_OR_B64(NegateMode negate, bool is_xor, const GcnInst& inst) {
|
||||
const auto get_src = [&](const InstOperand& operand) {
|
||||
switch (operand.field) {
|
||||
case OperandField::VccLo:
|
||||
return ir.GetVcc();
|
||||
case OperandField::ExecLo:
|
||||
return ir.GetExec();
|
||||
case OperandField::VccLo:
|
||||
return ir.GetVcc();
|
||||
case OperandField::ScalarGPR:
|
||||
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
|
||||
case OperandField::ConstZero:
|
||||
return ir.Imm1(false);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
};
|
||||
|
||||
const IR::U1 src0{get_src(inst.src[0])};
|
||||
const IR::U1 src1{get_src(inst.src[1])};
|
||||
const IR::U1 result{ir.Select(ir.GetScc(), src0, src1)};
|
||||
IR::U1 src1{get_src(inst.src[1])};
|
||||
if (negate == NegateMode::Src1) {
|
||||
src1 = ir.LogicalNot(src1);
|
||||
}
|
||||
IR::U1 result = is_xor ? ir.LogicalXor(src0, src1) : ir.LogicalOr(src0, src1);
|
||||
if (negate == NegateMode::Result) {
|
||||
result = ir.LogicalNot(result);
|
||||
}
|
||||
ir.SetScc(result);
|
||||
switch (inst.dst[0].field) {
|
||||
case OperandField::VccLo:
|
||||
ir.SetVcc(result);
|
||||
|
@ -471,12 +353,10 @@ void Translator::S_CSELECT_B64(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::S_BFE_U32(const GcnInst& inst) {
|
||||
void Translator::S_XOR_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset{ir.BitwiseAnd(src1, ir.Imm32(0x1F))};
|
||||
const IR::U32 count{ir.BitFieldExtract(src1, ir.Imm32(16), ir.Imm32(7))};
|
||||
const IR::U32 result{ir.BitFieldExtract(src0, offset, count)};
|
||||
const IR::U32 result{ir.BitwiseXor(src0, src1)};
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
@ -489,6 +369,22 @@ void Translator::S_LSHL_B32(const GcnInst& inst) {
|
|||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
||||
void Translator::S_LSHR_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 result{ir.ShiftRightLogical(src0, src1)};
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
||||
void Translator::S_ASHR_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 result{ir.ShiftRightArithmetic(src0, src1)};
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
||||
void Translator::S_BFM_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{ir.BitwiseAnd(GetSrc(inst.src[0]), ir.Imm32(0x1F))};
|
||||
const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))};
|
||||
|
@ -496,6 +392,110 @@ void Translator::S_BFM_B32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.ShiftLeftLogical(mask, src1));
|
||||
}
|
||||
|
||||
void Translator::S_MUL_I32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], ir.IMul(GetSrc(inst.src[0]), GetSrc(inst.src[1])));
|
||||
}
|
||||
|
||||
void Translator::S_BFE_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset{ir.BitwiseAnd(src1, ir.Imm32(0x1F))};
|
||||
const IR::U32 count{ir.BitFieldExtract(src1, ir.Imm32(16), ir.Imm32(7))};
|
||||
const IR::U32 result{ir.BitFieldExtract(src0, offset, count)};
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
||||
void Translator::S_ABSDIFF_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 result{ir.IAbs(ir.ISub(src0, src1))};
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
||||
// SOPK
|
||||
|
||||
void Translator::S_MOVK(const GcnInst& inst) {
|
||||
const auto simm16 = inst.control.sopk.simm;
|
||||
if (simm16 & (1 << 15)) {
|
||||
// TODO: need to verify the case of imm sign extension
|
||||
UNREACHABLE();
|
||||
}
|
||||
SetDst(inst.dst[0], ir.Imm32(simm16));
|
||||
}
|
||||
|
||||
void Translator::S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst) {
|
||||
const s32 simm16 = inst.control.sopk.simm;
|
||||
const IR::U32 lhs = GetSrc(inst.dst[0]);
|
||||
const IR::U32 rhs = ir.Imm32(simm16);
|
||||
const IR::U1 result = [&] {
|
||||
switch (cond) {
|
||||
case ConditionOp::EQ:
|
||||
return ir.IEqual(lhs, rhs);
|
||||
case ConditionOp::LG:
|
||||
return ir.INotEqual(lhs, rhs);
|
||||
case ConditionOp::GT:
|
||||
return ir.IGreaterThan(lhs, rhs, is_signed);
|
||||
case ConditionOp::GE:
|
||||
return ir.IGreaterThanEqual(lhs, rhs, is_signed);
|
||||
case ConditionOp::LT:
|
||||
return ir.ILessThan(lhs, rhs, is_signed);
|
||||
case ConditionOp::LE:
|
||||
return ir.ILessThanEqual(lhs, rhs, is_signed);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
ir.SetScc(result);
|
||||
}
|
||||
|
||||
void Translator::S_ADDK_I32(const GcnInst& inst) {
|
||||
const s32 simm16 = inst.control.sopk.simm;
|
||||
SetDst(inst.dst[0], ir.IAdd(GetSrc(inst.dst[0]), ir.Imm32(simm16)));
|
||||
}
|
||||
|
||||
void Translator::S_MULK_I32(const GcnInst& inst) {
|
||||
const s32 simm16 = inst.control.sopk.simm;
|
||||
SetDst(inst.dst[0], ir.IMul(GetSrc(inst.dst[0]), ir.Imm32(simm16)));
|
||||
}
|
||||
|
||||
// SOP1
|
||||
|
||||
void Translator::S_MOV(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], GetSrc(inst.src[0]));
|
||||
}
|
||||
|
||||
void Translator::S_MOV_B64(const GcnInst& inst) {
|
||||
const IR::U1 src = [&] {
|
||||
switch (inst.src[0].field) {
|
||||
case OperandField::VccLo:
|
||||
return ir.GetVcc();
|
||||
case OperandField::ExecLo:
|
||||
return ir.GetExec();
|
||||
case OperandField::ScalarGPR:
|
||||
return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code));
|
||||
case OperandField::ConstZero:
|
||||
return ir.Imm1(false);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
switch (inst.dst[0].field) {
|
||||
case OperandField::ScalarGPR:
|
||||
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), src);
|
||||
break;
|
||||
case OperandField::ExecLo:
|
||||
ir.SetExec(src);
|
||||
break;
|
||||
case OperandField::VccLo:
|
||||
ir.SetVcc(src);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_NOT_B64(const GcnInst& inst) {
|
||||
const auto get_src = [&](const InstOperand& operand) {
|
||||
switch (operand.field) {
|
||||
|
@ -528,22 +528,6 @@ void Translator::S_BREV_B32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0])));
|
||||
}
|
||||
|
||||
void Translator::S_ADD_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.IAdd(src0, src1));
|
||||
// TODO: Carry out
|
||||
ir.SetScc(ir.Imm1(false));
|
||||
}
|
||||
|
||||
void Translator::S_SUB_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.ISub(src0, src1));
|
||||
// TODO: Carry out
|
||||
ir.SetScc(ir.Imm1(false));
|
||||
}
|
||||
|
||||
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
|
||||
// This only really exists to let resource tracking pass know
|
||||
// there is an inline cbuf.
|
||||
|
@ -552,35 +536,69 @@ void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
|
|||
ir.SetScalarReg(dst + 1, ir.Imm32(0));
|
||||
}
|
||||
|
||||
void Translator::S_ADDC_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 carry{ir.Select(ir.GetScc(), ir.Imm32(1U), ir.Imm32(0U))};
|
||||
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), carry));
|
||||
void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
|
||||
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
|
||||
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination
|
||||
// SGPR we have a special IR opcode for SPGRs that act as thread masks.
|
||||
const IR::U1 exec{ir.GetExec()};
|
||||
const IR::U1 src = [&] {
|
||||
switch (inst.src[0].field) {
|
||||
case OperandField::VccLo:
|
||||
return ir.GetVcc();
|
||||
case OperandField::ScalarGPR:
|
||||
return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
|
||||
switch (inst.dst[0].field) {
|
||||
case OperandField::ScalarGPR:
|
||||
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), exec);
|
||||
break;
|
||||
case OperandField::VccLo:
|
||||
ir.SetVcc(exec);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
// Update EXEC.
|
||||
const IR::U1 result = ir.LogicalAnd(exec, src);
|
||||
ir.SetExec(result);
|
||||
ir.SetScc(result);
|
||||
}
|
||||
|
||||
void Translator::S_MAX_U32(bool is_signed, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 result = ir.IMax(src0, src1, is_signed);
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.IEqual(result, src0));
|
||||
// SOPC
|
||||
|
||||
void Translator::S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst) {
|
||||
const IR::U32 lhs = GetSrc(inst.src[0]);
|
||||
const IR::U32 rhs = GetSrc(inst.src[1]);
|
||||
const IR::U1 result = [&] {
|
||||
switch (cond) {
|
||||
case ConditionOp::EQ:
|
||||
return ir.IEqual(lhs, rhs);
|
||||
case ConditionOp::LG:
|
||||
return ir.INotEqual(lhs, rhs);
|
||||
case ConditionOp::GT:
|
||||
return ir.IGreaterThan(lhs, rhs, is_signed);
|
||||
case ConditionOp::GE:
|
||||
return ir.IGreaterThanEqual(lhs, rhs, is_signed);
|
||||
case ConditionOp::LT:
|
||||
return ir.ILessThan(lhs, rhs, is_signed);
|
||||
case ConditionOp::LE:
|
||||
return ir.ILessThanEqual(lhs, rhs, is_signed);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
ir.SetScc(result);
|
||||
}
|
||||
|
||||
void Translator::S_MIN_U32(bool is_signed, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 result = ir.IMin(src0, src1, is_signed);
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.IEqual(result, src0));
|
||||
}
|
||||
// SOPP
|
||||
|
||||
void Translator::S_ABSDIFF_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 result{ir.IAbs(ir.ISub(src0, src1))};
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
void Translator::S_BARRIER() {
|
||||
ir.Barrier();
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -9,6 +9,7 @@ static constexpr u32 SQ_SRC_LITERAL = 0xFF;
|
|||
|
||||
void Translator::EmitScalarMemory(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
// SMRD
|
||||
case Opcode::S_LOAD_DWORDX4:
|
||||
return S_LOAD_DWORD(4, inst);
|
||||
case Opcode::S_LOAD_DWORDX8:
|
||||
|
@ -30,6 +31,8 @@ void Translator::EmitScalarMemory(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
// SMRD
|
||||
|
||||
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||
const auto& smrd = inst.control.smrd;
|
||||
const u32 dword_offset = [&] -> u32 {
|
||||
|
|
|
@ -61,169 +61,191 @@ public:
|
|||
// Instruction categories
|
||||
void EmitPrologue();
|
||||
void EmitFetch(const GcnInst& inst);
|
||||
void EmitDataShare(const GcnInst& inst);
|
||||
void EmitVectorInterpolation(const GcnInst& inst);
|
||||
void EmitScalarMemory(const GcnInst& inst);
|
||||
void EmitVectorMemory(const GcnInst& inst);
|
||||
void EmitExport(const GcnInst& inst);
|
||||
void EmitFlowControl(u32 pc, const GcnInst& inst);
|
||||
|
||||
void EmitScalarAlu(const GcnInst& inst);
|
||||
void EmitScalarMemory(const GcnInst& inst);
|
||||
void EmitVectorAlu(const GcnInst& inst);
|
||||
void EmitVectorInterpolation(const GcnInst& inst);
|
||||
void EmitDataShare(const GcnInst& inst);
|
||||
void EmitVectorMemory(const GcnInst& inst);
|
||||
|
||||
// Instruction encodings
|
||||
void EmitSOPC(const GcnInst& inst);
|
||||
void EmitSOPK(const GcnInst& inst);
|
||||
|
||||
// Scalar ALU
|
||||
void S_MOVK(const GcnInst& inst);
|
||||
void S_MOV(const GcnInst& inst);
|
||||
void S_MUL_I32(const GcnInst& inst);
|
||||
void S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst);
|
||||
void S_AND_SAVEEXEC_B64(const GcnInst& inst);
|
||||
void S_MOV_B64(const GcnInst& inst);
|
||||
void S_OR_B64(NegateMode negate, bool is_xor, const GcnInst& inst);
|
||||
void S_AND_B64(NegateMode negate, const GcnInst& inst);
|
||||
void S_ADD_I32(const GcnInst& inst);
|
||||
void S_AND_B32(NegateMode negate, const GcnInst& inst);
|
||||
void S_ASHR_I32(const GcnInst& inst);
|
||||
void S_OR_B32(const GcnInst& inst);
|
||||
void S_XOR_B32(const GcnInst& inst);
|
||||
void S_LSHR_B32(const GcnInst& inst);
|
||||
void S_CSELECT_B32(const GcnInst& inst);
|
||||
void S_CSELECT_B64(const GcnInst& inst);
|
||||
void S_BFE_U32(const GcnInst& inst);
|
||||
void S_LSHL_B32(const GcnInst& inst);
|
||||
void S_BFM_B32(const GcnInst& inst);
|
||||
void S_NOT_B64(const GcnInst& inst);
|
||||
void S_BREV_B32(const GcnInst& inst);
|
||||
// SOP2
|
||||
void S_ADD_U32(const GcnInst& inst);
|
||||
void S_SUB_U32(const GcnInst& inst);
|
||||
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
||||
void S_ADD_I32(const GcnInst& inst);
|
||||
void S_ADDC_U32(const GcnInst& inst);
|
||||
void S_MULK_I32(const GcnInst& inst);
|
||||
void S_ADDK_I32(const GcnInst& inst);
|
||||
void S_MAX_U32(bool is_signed, const GcnInst& inst);
|
||||
void S_MIN_U32(bool is_signed, const GcnInst& inst);
|
||||
void S_MAX_U32(bool is_signed, const GcnInst& inst);
|
||||
void S_CSELECT_B32(const GcnInst& inst);
|
||||
void S_CSELECT_B64(const GcnInst& inst);
|
||||
void S_AND_B32(NegateMode negate, const GcnInst& inst);
|
||||
void S_AND_B64(NegateMode negate, const GcnInst& inst);
|
||||
void S_OR_B32(const GcnInst& inst);
|
||||
void S_OR_B64(NegateMode negate, bool is_xor, const GcnInst& inst);
|
||||
void S_XOR_B32(const GcnInst& inst);
|
||||
void S_LSHL_B32(const GcnInst& inst);
|
||||
void S_LSHR_B32(const GcnInst& inst);
|
||||
void S_ASHR_I32(const GcnInst& inst);
|
||||
void S_BFM_B32(const GcnInst& inst);
|
||||
void S_MUL_I32(const GcnInst& inst);
|
||||
void S_BFE_U32(const GcnInst& inst);
|
||||
void S_ABSDIFF_I32(const GcnInst& inst);
|
||||
|
||||
// SOPK
|
||||
void S_MOVK(const GcnInst& inst);
|
||||
void S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst);
|
||||
void S_ADDK_I32(const GcnInst& inst);
|
||||
void S_MULK_I32(const GcnInst& inst);
|
||||
|
||||
// SOP1
|
||||
void S_MOV(const GcnInst& inst);
|
||||
void S_MOV_B64(const GcnInst& inst);
|
||||
void S_NOT_B64(const GcnInst& inst);
|
||||
void S_BREV_B32(const GcnInst& inst);
|
||||
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
||||
void S_AND_SAVEEXEC_B64(const GcnInst& inst);
|
||||
|
||||
// SOPC
|
||||
void S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst);
|
||||
|
||||
// SOPP
|
||||
void S_BARRIER();
|
||||
|
||||
// Scalar Memory
|
||||
// SMRD
|
||||
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
|
||||
void S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst);
|
||||
|
||||
// Vector ALU
|
||||
void V_MOV(const GcnInst& inst);
|
||||
void V_SAD(const GcnInst& inst);
|
||||
void V_MAC_F32(const GcnInst& inst);
|
||||
void V_CVT_PKRTZ_F16_F32(const GcnInst& inst);
|
||||
void V_CVT_F32_F16(const GcnInst& inst);
|
||||
void V_CVT_F16_F32(const GcnInst& inst);
|
||||
void V_MUL_F32(const GcnInst& inst);
|
||||
// VOP2
|
||||
void V_CNDMASK_B32(const GcnInst& inst);
|
||||
void V_OR_B32(bool is_xor, const GcnInst& inst);
|
||||
void V_AND_B32(const GcnInst& inst);
|
||||
void V_LSHLREV_B32(const GcnInst& inst);
|
||||
void V_READLANE_B32(const GcnInst& inst);
|
||||
void V_WRITELANE_B32(const GcnInst& inst);
|
||||
void V_ADD_F32(const GcnInst& inst);
|
||||
void V_SUB_F32(const GcnInst& inst);
|
||||
void V_SUBREV_F32(const GcnInst& inst);
|
||||
void V_MUL_F32(const GcnInst& inst);
|
||||
void V_MUL_I32_I24(const GcnInst& inst);
|
||||
void V_MIN_F32(const GcnInst& inst, bool is_legacy = false);
|
||||
void V_MAX_F32(const GcnInst& inst, bool is_legacy = false);
|
||||
void V_MIN_I32(const GcnInst& inst);
|
||||
void V_MIN_U32(const GcnInst& inst);
|
||||
void V_MAX_U32(bool is_signed, const GcnInst& inst);
|
||||
void V_LSHR_B32(const GcnInst& inst);
|
||||
void V_LSHRREV_B32(const GcnInst& inst);
|
||||
void V_ASHR_I32(const GcnInst& inst);
|
||||
void V_ASHRREV_I32(const GcnInst& inst);
|
||||
void V_LSHL_B32(const GcnInst& inst);
|
||||
void V_LSHL_B64(const GcnInst& inst);
|
||||
void V_LSHLREV_B32(const GcnInst& inst);
|
||||
void V_AND_B32(const GcnInst& inst);
|
||||
void V_OR_B32(bool is_xor, const GcnInst& inst);
|
||||
void V_BFM_B32(const GcnInst& inst);
|
||||
void V_MAC_F32(const GcnInst& inst);
|
||||
void V_MADMK_F32(const GcnInst& inst);
|
||||
void V_BCNT_U32_B32(const GcnInst& inst);
|
||||
void V_MBCNT_U32_B32(bool is_low, const GcnInst& inst);
|
||||
void V_ADD_I32(const GcnInst& inst);
|
||||
void V_SUB_I32(const GcnInst& inst);
|
||||
void V_SUBREV_I32(const GcnInst& inst);
|
||||
void V_ADDC_U32(const GcnInst& inst);
|
||||
void V_LDEXP_F32(const GcnInst& inst);
|
||||
void V_CVT_PKRTZ_F16_F32(const GcnInst& inst);
|
||||
|
||||
// VOP1
|
||||
void V_MOV(const GcnInst& inst);
|
||||
void V_READFIRSTLANE_B32(const GcnInst& inst);
|
||||
void V_CVT_F32_I32(const GcnInst& inst);
|
||||
void V_CVT_F32_U32(const GcnInst& inst);
|
||||
void V_MAD_F32(const GcnInst& inst);
|
||||
void V_FRACT_F32(const GcnInst& inst);
|
||||
void V_ADD_F32(const GcnInst& inst);
|
||||
void V_CVT_OFF_F32_I4(const GcnInst& inst);
|
||||
void V_MED3_F32(const GcnInst& inst);
|
||||
void V_MED3_I32(const GcnInst& inst);
|
||||
void V_FLOOR_F32(const GcnInst& inst);
|
||||
void V_SUB_F32(const GcnInst& inst);
|
||||
void V_RCP_F32(const GcnInst& inst);
|
||||
void V_FMA_F32(const GcnInst& inst);
|
||||
void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst);
|
||||
void V_MAX_F32(const GcnInst& inst, bool is_legacy = false);
|
||||
void V_MAX_F64(const GcnInst& inst);
|
||||
void V_MAX_U32(bool is_signed, const GcnInst& inst);
|
||||
void V_RSQ_F32(const GcnInst& inst);
|
||||
void V_SIN_F32(const GcnInst& inst);
|
||||
void V_LOG_F32(const GcnInst& inst);
|
||||
void V_EXP_F32(const GcnInst& inst);
|
||||
void V_SQRT_F32(const GcnInst& inst);
|
||||
void V_MIN_F32(const GcnInst& inst, bool is_legacy = false);
|
||||
void V_MIN3_F32(const GcnInst& inst);
|
||||
void V_MIN3_I32(const GcnInst& inst);
|
||||
void V_MADMK_F32(const GcnInst& inst);
|
||||
void V_CUBEMA_F32(const GcnInst& inst);
|
||||
void V_CUBESC_F32(const GcnInst& inst);
|
||||
void V_CUBETC_F32(const GcnInst& inst);
|
||||
void V_CUBEID_F32(const GcnInst& inst);
|
||||
void V_CVT_U32_F32(const GcnInst& inst);
|
||||
void V_SUBREV_F32(const GcnInst& inst);
|
||||
void V_SUBREV_I32(const GcnInst& inst);
|
||||
void V_MAD_U64_U32(const GcnInst& inst);
|
||||
void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst);
|
||||
void V_LSHRREV_B32(const GcnInst& inst);
|
||||
void V_MUL_HI_U32(bool is_signed, const GcnInst& inst);
|
||||
void V_SAD_U32(const GcnInst& inst);
|
||||
void V_BFE_U32(bool is_signed, const GcnInst& inst);
|
||||
void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = true);
|
||||
void V_MUL_I32_I24(const GcnInst& inst);
|
||||
void V_SUB_I32(const GcnInst& inst);
|
||||
void V_LSHR_B32(const GcnInst& inst);
|
||||
void V_ASHRREV_I32(const GcnInst& inst);
|
||||
void V_ASHR_I32(const GcnInst& inst);
|
||||
void V_MAD_U32_U24(const GcnInst& inst);
|
||||
void V_RNDNE_F32(const GcnInst& inst);
|
||||
void V_BCNT_U32_B32(const GcnInst& inst);
|
||||
void V_COS_F32(const GcnInst& inst);
|
||||
void V_MAX3_F32(const GcnInst& inst);
|
||||
void V_MAX3_U32(bool is_signed, const GcnInst& inst);
|
||||
void V_CVT_I32_F32(const GcnInst& inst);
|
||||
void V_MIN_I32(const GcnInst& inst);
|
||||
void V_MUL_LO_U32(const GcnInst& inst);
|
||||
void V_CVT_F16_F32(const GcnInst& inst);
|
||||
void V_CVT_F32_F16(const GcnInst& inst);
|
||||
void V_CVT_FLR_I32_F32(const GcnInst& inst);
|
||||
void V_CVT_OFF_F32_I4(const GcnInst& inst);
|
||||
void V_CVT_F32_UBYTE(u32 index, const GcnInst& inst);
|
||||
void V_FRACT_F32(const GcnInst& inst);
|
||||
void V_TRUNC_F32(const GcnInst& inst);
|
||||
void V_CEIL_F32(const GcnInst& inst);
|
||||
void V_MIN_U32(const GcnInst& inst);
|
||||
void V_CMP_NE_U64(const GcnInst& inst);
|
||||
void V_BFI_B32(const GcnInst& inst);
|
||||
void V_RNDNE_F32(const GcnInst& inst);
|
||||
void V_FLOOR_F32(const GcnInst& inst);
|
||||
void V_EXP_F32(const GcnInst& inst);
|
||||
void V_LOG_F32(const GcnInst& inst);
|
||||
void V_RCP_F32(const GcnInst& inst);
|
||||
void V_RSQ_F32(const GcnInst& inst);
|
||||
void V_SQRT_F32(const GcnInst& inst);
|
||||
void V_SIN_F32(const GcnInst& inst);
|
||||
void V_COS_F32(const GcnInst& inst);
|
||||
void V_NOT_B32(const GcnInst& inst);
|
||||
void V_CVT_F32_UBYTE(u32 index, const GcnInst& inst);
|
||||
void V_BFREV_B32(const GcnInst& inst);
|
||||
void V_LDEXP_F32(const GcnInst& inst);
|
||||
void V_CVT_FLR_I32_F32(const GcnInst& inst);
|
||||
void V_CMP_CLASS_F32(const GcnInst& inst);
|
||||
void V_FFBL_B32(const GcnInst& inst);
|
||||
void V_MBCNT_U32_B32(bool is_low, const GcnInst& inst);
|
||||
void V_BFM_B32(const GcnInst& inst);
|
||||
void V_FFBH_U32(const GcnInst& inst);
|
||||
void V_MOVRELS_B32(const GcnInst& inst);
|
||||
void V_FFBL_B32(const GcnInst& inst);
|
||||
void V_MOVRELD_B32(const GcnInst& inst);
|
||||
void V_MOVRELS_B32(const GcnInst& inst);
|
||||
void V_MOVRELSD_B32(const GcnInst& inst);
|
||||
|
||||
// Vector Memory
|
||||
// VOPC
|
||||
void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst);
|
||||
void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst);
|
||||
void V_CMP_NE_U64(const GcnInst& inst);
|
||||
void V_CMP_CLASS_F32(const GcnInst& inst);
|
||||
|
||||
// VOP3a
|
||||
void V_MAD_F32(const GcnInst& inst);
|
||||
void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = false);
|
||||
void V_MAD_U32_U24(const GcnInst& inst);
|
||||
void V_CUBEID_F32(const GcnInst& inst);
|
||||
void V_CUBESC_F32(const GcnInst& inst);
|
||||
void V_CUBETC_F32(const GcnInst& inst);
|
||||
void V_CUBEMA_F32(const GcnInst& inst);
|
||||
void V_BFE_U32(bool is_signed, const GcnInst& inst);
|
||||
void V_BFI_B32(const GcnInst& inst);
|
||||
void V_FMA_F32(const GcnInst& inst);
|
||||
void V_MIN3_F32(const GcnInst& inst);
|
||||
void V_MIN3_I32(const GcnInst& inst);
|
||||
void V_MAX3_F32(const GcnInst& inst);
|
||||
void V_MAX3_U32(bool is_signed, const GcnInst& inst);
|
||||
void V_MED3_F32(const GcnInst& inst);
|
||||
void V_MED3_I32(const GcnInst& inst);
|
||||
void V_SAD(const GcnInst& inst);
|
||||
void V_SAD_U32(const GcnInst& inst);
|
||||
void V_LSHL_B64(const GcnInst& inst);
|
||||
void V_MAX_F64(const GcnInst& inst);
|
||||
void V_MUL_LO_U32(const GcnInst& inst);
|
||||
void V_MUL_HI_U32(bool is_signed, const GcnInst& inst);
|
||||
void V_MAD_U64_U32(const GcnInst& inst);
|
||||
|
||||
// Vector interpolation
|
||||
// VINTRP
|
||||
void V_INTERP_P2_F32(const GcnInst& inst);
|
||||
void V_INTERP_MOV_F32(const GcnInst& inst);
|
||||
|
||||
// Data share
|
||||
// DS
|
||||
void DS_ADD_U32(const GcnInst& inst, bool rtn);
|
||||
void DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn);
|
||||
void DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn);
|
||||
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
|
||||
void DS_SWIZZLE_B32(const GcnInst& inst);
|
||||
void DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
|
||||
void DS_APPEND(const GcnInst& inst);
|
||||
void DS_CONSUME(const GcnInst& inst);
|
||||
|
||||
// Buffer Memory
|
||||
// MUBUF / MTBUF
|
||||
void BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst);
|
||||
void BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||
void BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst);
|
||||
void BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst);
|
||||
|
||||
// Vector interpolation
|
||||
void V_INTERP_P2_F32(const GcnInst& inst);
|
||||
void V_INTERP_MOV_F32(const GcnInst& inst);
|
||||
|
||||
// Data share
|
||||
void DS_SWIZZLE_B32(const GcnInst& inst);
|
||||
void DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
|
||||
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
|
||||
void DS_ADD_U32(const GcnInst& inst, bool rtn);
|
||||
void DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn);
|
||||
void DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn);
|
||||
void V_READFIRSTLANE_B32(const GcnInst& inst);
|
||||
void V_READLANE_B32(const GcnInst& inst);
|
||||
void V_WRITELANE_B32(const GcnInst& inst);
|
||||
void DS_APPEND(const GcnInst& inst);
|
||||
void DS_CONSUME(const GcnInst& inst);
|
||||
void S_BARRIER();
|
||||
|
||||
// Image Memory
|
||||
// MIMG
|
||||
void IMAGE_GET_RESINFO(const GcnInst& inst);
|
||||
void IMAGE_SAMPLE(const GcnInst& inst);
|
||||
|
@ -241,6 +263,7 @@ private:
|
|||
void SetDst(const InstOperand& operand, const IR::U32F32& value);
|
||||
void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw);
|
||||
|
||||
// Vector ALU Helprers
|
||||
IR::U32 VMovRelSHelper(u32 src_vgprno, const IR::U32 m0);
|
||||
void VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0);
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -5,6 +5,22 @@
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
void Translator::EmitVectorInterpolation(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
// VINTRP
|
||||
case Opcode::V_INTERP_P1_F32:
|
||||
return;
|
||||
case Opcode::V_INTERP_P2_F32:
|
||||
return V_INTERP_P2_F32(inst);
|
||||
case Opcode::V_INTERP_MOV_F32:
|
||||
return V_INTERP_MOV_F32(inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
}
|
||||
|
||||
// VINTRP
|
||||
|
||||
void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr);
|
||||
|
@ -19,17 +35,4 @@ void Translator::V_INTERP_MOV_F32(const GcnInst& inst) {
|
|||
ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
||||
}
|
||||
|
||||
void Translator::EmitVectorInterpolation(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
case Opcode::V_INTERP_P1_F32:
|
||||
return;
|
||||
case Opcode::V_INTERP_P2_F32:
|
||||
return V_INTERP_P2_F32(inst);
|
||||
case Opcode::V_INTERP_MOV_F32:
|
||||
return V_INTERP_MOV_F32(inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -7,56 +7,7 @@ namespace Shader::Gcn {
|
|||
|
||||
void Translator::EmitVectorMemory(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
case Opcode::IMAGE_SAMPLE_LZ_O:
|
||||
case Opcode::IMAGE_SAMPLE_O:
|
||||
case Opcode::IMAGE_SAMPLE_C:
|
||||
case Opcode::IMAGE_SAMPLE_C_LZ:
|
||||
case Opcode::IMAGE_SAMPLE_LZ:
|
||||
case Opcode::IMAGE_SAMPLE:
|
||||
case Opcode::IMAGE_SAMPLE_L:
|
||||
case Opcode::IMAGE_SAMPLE_L_O:
|
||||
case Opcode::IMAGE_SAMPLE_C_O:
|
||||
case Opcode::IMAGE_SAMPLE_B:
|
||||
case Opcode::IMAGE_SAMPLE_C_LZ_O:
|
||||
case Opcode::IMAGE_SAMPLE_D:
|
||||
case Opcode::IMAGE_SAMPLE_CD:
|
||||
return IMAGE_SAMPLE(inst);
|
||||
case Opcode::IMAGE_GATHER4_LZ:
|
||||
case Opcode::IMAGE_GATHER4_C:
|
||||
case Opcode::IMAGE_GATHER4_C_LZ:
|
||||
case Opcode::IMAGE_GATHER4_LZ_O:
|
||||
return IMAGE_GATHER(inst);
|
||||
case Opcode::IMAGE_ATOMIC_ADD:
|
||||
return IMAGE_ATOMIC(AtomicOp::Add, inst);
|
||||
case Opcode::IMAGE_ATOMIC_AND:
|
||||
return IMAGE_ATOMIC(AtomicOp::And, inst);
|
||||
case Opcode::IMAGE_ATOMIC_OR:
|
||||
return IMAGE_ATOMIC(AtomicOp::Or, inst);
|
||||
case Opcode::IMAGE_ATOMIC_XOR:
|
||||
return IMAGE_ATOMIC(AtomicOp::Xor, inst);
|
||||
case Opcode::IMAGE_ATOMIC_UMAX:
|
||||
return IMAGE_ATOMIC(AtomicOp::Umax, inst);
|
||||
case Opcode::IMAGE_ATOMIC_SMAX:
|
||||
return IMAGE_ATOMIC(AtomicOp::Smax, inst);
|
||||
case Opcode::IMAGE_ATOMIC_UMIN:
|
||||
return IMAGE_ATOMIC(AtomicOp::Umin, inst);
|
||||
case Opcode::IMAGE_ATOMIC_SMIN:
|
||||
return IMAGE_ATOMIC(AtomicOp::Smin, inst);
|
||||
case Opcode::IMAGE_ATOMIC_INC:
|
||||
return IMAGE_ATOMIC(AtomicOp::Inc, inst);
|
||||
case Opcode::IMAGE_ATOMIC_DEC:
|
||||
return IMAGE_ATOMIC(AtomicOp::Dec, inst);
|
||||
case Opcode::IMAGE_GET_LOD:
|
||||
return IMAGE_GET_LOD(inst);
|
||||
case Opcode::IMAGE_STORE:
|
||||
return IMAGE_STORE(inst);
|
||||
case Opcode::IMAGE_LOAD_MIP:
|
||||
return IMAGE_LOAD(true, inst);
|
||||
case Opcode::IMAGE_LOAD:
|
||||
return IMAGE_LOAD(false, inst);
|
||||
case Opcode::IMAGE_GET_RESINFO:
|
||||
return IMAGE_GET_RESINFO(inst);
|
||||
|
||||
// MTBUF / MUBUF
|
||||
// Buffer load operations
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_X:
|
||||
return BUFFER_LOAD(1, true, inst);
|
||||
|
@ -137,11 +88,335 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||
case Opcode::BUFFER_ATOMIC_DEC:
|
||||
return BUFFER_ATOMIC(AtomicOp::Dec, inst);
|
||||
|
||||
// MIMG
|
||||
case Opcode::IMAGE_LOAD:
|
||||
return IMAGE_LOAD(false, inst);
|
||||
case Opcode::IMAGE_LOAD_MIP:
|
||||
return IMAGE_LOAD(true, inst);
|
||||
case Opcode::IMAGE_STORE:
|
||||
return IMAGE_STORE(inst);
|
||||
case Opcode::IMAGE_GET_RESINFO:
|
||||
return IMAGE_GET_RESINFO(inst);
|
||||
case Opcode::IMAGE_ATOMIC_ADD:
|
||||
return IMAGE_ATOMIC(AtomicOp::Add, inst);
|
||||
case Opcode::IMAGE_ATOMIC_SMIN:
|
||||
return IMAGE_ATOMIC(AtomicOp::Smin, inst);
|
||||
case Opcode::IMAGE_ATOMIC_UMIN:
|
||||
return IMAGE_ATOMIC(AtomicOp::Umin, inst);
|
||||
case Opcode::IMAGE_ATOMIC_SMAX:
|
||||
return IMAGE_ATOMIC(AtomicOp::Smax, inst);
|
||||
case Opcode::IMAGE_ATOMIC_UMAX:
|
||||
return IMAGE_ATOMIC(AtomicOp::Umax, inst);
|
||||
case Opcode::IMAGE_ATOMIC_AND:
|
||||
return IMAGE_ATOMIC(AtomicOp::And, inst);
|
||||
case Opcode::IMAGE_ATOMIC_OR:
|
||||
return IMAGE_ATOMIC(AtomicOp::Or, inst);
|
||||
case Opcode::IMAGE_ATOMIC_XOR:
|
||||
return IMAGE_ATOMIC(AtomicOp::Xor, inst);
|
||||
case Opcode::IMAGE_ATOMIC_INC:
|
||||
return IMAGE_ATOMIC(AtomicOp::Inc, inst);
|
||||
case Opcode::IMAGE_ATOMIC_DEC:
|
||||
return IMAGE_ATOMIC(AtomicOp::Dec, inst);
|
||||
case Opcode::IMAGE_SAMPLE:
|
||||
case Opcode::IMAGE_SAMPLE_D:
|
||||
case Opcode::IMAGE_SAMPLE_L:
|
||||
case Opcode::IMAGE_SAMPLE_B:
|
||||
case Opcode::IMAGE_SAMPLE_LZ:
|
||||
case Opcode::IMAGE_SAMPLE_C:
|
||||
case Opcode::IMAGE_SAMPLE_C_LZ:
|
||||
case Opcode::IMAGE_SAMPLE_O:
|
||||
case Opcode::IMAGE_SAMPLE_L_O:
|
||||
case Opcode::IMAGE_SAMPLE_LZ_O:
|
||||
case Opcode::IMAGE_SAMPLE_C_O:
|
||||
case Opcode::IMAGE_SAMPLE_C_LZ_O:
|
||||
return IMAGE_SAMPLE(inst);
|
||||
case Opcode::IMAGE_GATHER4_LZ:
|
||||
case Opcode::IMAGE_GATHER4_C:
|
||||
case Opcode::IMAGE_GATHER4_C_LZ:
|
||||
case Opcode::IMAGE_GATHER4_LZ_O:
|
||||
return IMAGE_GATHER(inst);
|
||||
case Opcode::IMAGE_GET_LOD:
|
||||
return IMAGE_GET_LOD(inst);
|
||||
case Opcode::IMAGE_SAMPLE_CD:
|
||||
return IMAGE_SAMPLE(inst);
|
||||
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
}
|
||||
|
||||
// MTBUF / MUBUF
|
||||
|
||||
// Buffer load operations
|
||||
void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
if (mtbuf.idxen && mtbuf.offen) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
||||
}
|
||||
if (mtbuf.idxen || mtbuf.offen) {
|
||||
return ir.GetVectorReg(vaddr);
|
||||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mtbuf.idxen);
|
||||
info.offset_enable.Assign(mtbuf.offen);
|
||||
info.inst_offset.Assign(mtbuf.offset);
|
||||
if (is_typed) {
|
||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
||||
ASSERT(nfmt == AmdGpu::NumberFormat::Float &&
|
||||
(dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
|
||||
dmft == AmdGpu::DataFormat::Format32_32_32 ||
|
||||
dmft == AmdGpu::DataFormat::Format32_32 || dmft == AmdGpu::DataFormat::Format32));
|
||||
}
|
||||
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info);
|
||||
const IR::VectorReg dst_reg{inst.src[1].code};
|
||||
if (num_dwords == 1) {
|
||||
ir.SetVectorReg(dst_reg, IR::U32{value});
|
||||
return;
|
||||
}
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
ir.SetVectorReg(dst_reg + i, IR::U32{ir.CompositeExtract(value, i)});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst) {
|
||||
const auto& mubuf = inst.control.mubuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
ASSERT_MSG(!mubuf.offen && mubuf.offset == 0, "Offsets for image buffers are not supported");
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
if (mubuf.idxen) {
|
||||
return ir.GetVectorReg(vaddr);
|
||||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mubuf.idxen);
|
||||
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
const IR::Value value = ir.LoadBufferFormat(handle, address, info);
|
||||
const IR::VectorReg dst_reg{inst.src[1].code};
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
ir.SetVectorReg(dst_reg + i, IR::F32{ir.CompositeExtract(value, i)});
|
||||
}
|
||||
}
|
||||
|
||||
// Buffer store operations
|
||||
void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
if (mtbuf.idxen && mtbuf.offen) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
||||
}
|
||||
if (mtbuf.idxen || mtbuf.offen) {
|
||||
return ir.GetVectorReg(vaddr);
|
||||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mtbuf.idxen);
|
||||
info.offset_enable.Assign(mtbuf.offen);
|
||||
info.inst_offset.Assign(mtbuf.offset);
|
||||
if (is_typed) {
|
||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
||||
ASSERT(nfmt == AmdGpu::NumberFormat::Float &&
|
||||
(dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
|
||||
dmft == AmdGpu::DataFormat::Format32_32_32 ||
|
||||
dmft == AmdGpu::DataFormat::Format32_32 || dmft == AmdGpu::DataFormat::Format32));
|
||||
}
|
||||
|
||||
IR::Value value{};
|
||||
const IR::VectorReg src_reg{inst.src[1].code};
|
||||
switch (num_dwords) {
|
||||
case 1:
|
||||
value = ir.GetVectorReg(src_reg);
|
||||
break;
|
||||
case 2:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1));
|
||||
break;
|
||||
case 3:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1),
|
||||
ir.GetVectorReg(src_reg + 2));
|
||||
break;
|
||||
case 4:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1),
|
||||
ir.GetVectorReg(src_reg + 2), ir.GetVectorReg(src_reg + 3));
|
||||
break;
|
||||
}
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
ir.StoreBuffer(num_dwords, handle, address, value, info);
|
||||
}
|
||||
|
||||
void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) {
|
||||
const auto& mubuf = inst.control.mubuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
ASSERT_MSG(!mubuf.offen && mubuf.offset == 0, "Offsets for image buffers are not supported");
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
if (mubuf.idxen) {
|
||||
return ir.GetVectorReg(vaddr);
|
||||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mubuf.idxen);
|
||||
|
||||
const IR::VectorReg src_reg{inst.src[1].code};
|
||||
|
||||
std::array<IR::Value, 4> comps{};
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
comps[i] = ir.GetVectorReg<IR::F32>(src_reg + i);
|
||||
}
|
||||
for (u32 i = num_dwords; i < 4; i++) {
|
||||
comps[i] = ir.Imm32(0.f);
|
||||
}
|
||||
|
||||
const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
ir.StoreBufferFormat(handle, address, value, info);
|
||||
}
|
||||
|
||||
// Buffer atomic operations
|
||||
void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
||||
const auto& mubuf = inst.control.mubuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::VectorReg vdata{inst.src[1].code};
|
||||
const IR::ScalarReg srsrc{inst.src[2].code * 4};
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
if (mubuf.idxen && mubuf.offen) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
||||
}
|
||||
if (mubuf.idxen || mubuf.offen) {
|
||||
return ir.GetVectorReg(vaddr);
|
||||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::U32 soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mubuf.idxen);
|
||||
info.inst_offset.Assign(mubuf.offset);
|
||||
info.offset_enable.Assign(mubuf.offen);
|
||||
|
||||
IR::Value vdata_val = ir.GetVectorReg<Shader::IR::U32>(vdata);
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1),
|
||||
ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3));
|
||||
|
||||
const IR::Value original_val = [&] {
|
||||
switch (op) {
|
||||
case AtomicOp::Swap:
|
||||
return ir.BufferAtomicSwap(handle, address, vdata_val, info);
|
||||
case AtomicOp::Add:
|
||||
return ir.BufferAtomicIAdd(handle, address, vdata_val, info);
|
||||
case AtomicOp::Smin:
|
||||
return ir.BufferAtomicIMin(handle, address, vdata_val, true, info);
|
||||
case AtomicOp::Umin:
|
||||
return ir.BufferAtomicIMin(handle, address, vdata_val, false, info);
|
||||
case AtomicOp::Smax:
|
||||
return ir.BufferAtomicIMax(handle, address, vdata_val, true, info);
|
||||
case AtomicOp::Umax:
|
||||
return ir.BufferAtomicIMax(handle, address, vdata_val, false, info);
|
||||
case AtomicOp::And:
|
||||
return ir.BufferAtomicAnd(handle, address, vdata_val, info);
|
||||
case AtomicOp::Or:
|
||||
return ir.BufferAtomicOr(handle, address, vdata_val, info);
|
||||
case AtomicOp::Xor:
|
||||
return ir.BufferAtomicXor(handle, address, vdata_val, info);
|
||||
case AtomicOp::Inc:
|
||||
return ir.BufferAtomicInc(handle, address, vdata_val, info);
|
||||
case AtomicOp::Dec:
|
||||
return ir.BufferAtomicDec(handle, address, vdata_val, info);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
|
||||
if (mubuf.glc) {
|
||||
ir.SetVectorReg(vdata, IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
// MIMG
|
||||
|
||||
void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
IR::VectorReg dest_reg{inst.dst[0].code};
|
||||
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||
|
||||
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
|
||||
const IR::Value body =
|
||||
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
|
||||
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
||||
|
||||
IR::TextureInstInfo info{};
|
||||
info.explicit_lod.Assign(has_mip);
|
||||
const IR::Value texel = ir.ImageFetch(handle, body, {}, {}, {}, info);
|
||||
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
if (((mimg.dmask >> i) & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)};
|
||||
ir.SetVectorReg(dest_reg++, value);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::IMAGE_STORE(const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
IR::VectorReg data_reg{inst.dst[0].code};
|
||||
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||
|
||||
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
|
||||
const IR::Value body =
|
||||
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
|
||||
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
||||
|
||||
boost::container::static_vector<IR::F32, 4> comps;
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
if (((mimg.dmask >> i) & 1) == 0) {
|
||||
comps.push_back(ir.Imm32(0.f));
|
||||
continue;
|
||||
}
|
||||
comps.push_back(ir.GetVectorReg<IR::F32>(data_reg++));
|
||||
}
|
||||
const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
|
||||
ir.ImageWrite(handle, body, value, {});
|
||||
}
|
||||
|
||||
void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||
|
@ -165,6 +440,50 @@ void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
IR::VectorReg val_reg{inst.dst[0].code};
|
||||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||
|
||||
const IR::Value value = ir.GetVectorReg(val_reg);
|
||||
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
|
||||
const IR::Value body =
|
||||
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
|
||||
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
||||
const IR::Value prev = [&] {
|
||||
switch (op) {
|
||||
case AtomicOp::Swap:
|
||||
return ir.ImageAtomicExchange(handle, body, value, {});
|
||||
case AtomicOp::Add:
|
||||
return ir.ImageAtomicIAdd(handle, body, value, {});
|
||||
case AtomicOp::Smin:
|
||||
return ir.ImageAtomicIMin(handle, body, value, true, {});
|
||||
case AtomicOp::Umin:
|
||||
return ir.ImageAtomicUMin(handle, body, value, {});
|
||||
case AtomicOp::Smax:
|
||||
return ir.ImageAtomicIMax(handle, body, value, true, {});
|
||||
case AtomicOp::Umax:
|
||||
return ir.ImageAtomicUMax(handle, body, value, {});
|
||||
case AtomicOp::And:
|
||||
return ir.ImageAtomicAnd(handle, body, value, {});
|
||||
case AtomicOp::Or:
|
||||
return ir.ImageAtomicOr(handle, body, value, {});
|
||||
case AtomicOp::Xor:
|
||||
return ir.ImageAtomicXor(handle, body, value, {});
|
||||
case AtomicOp::Inc:
|
||||
return ir.ImageAtomicInc(handle, body, value, {});
|
||||
case AtomicOp::Dec:
|
||||
return ir.ImageAtomicDec(handle, body, value, {});
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
if (mimg.glc) {
|
||||
ir.SetVectorReg(val_reg, IR::U32{prev});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
|
@ -323,271 +642,6 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
IR::VectorReg dest_reg{inst.dst[0].code};
|
||||
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||
|
||||
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
|
||||
const IR::Value body =
|
||||
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
|
||||
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
||||
|
||||
IR::TextureInstInfo info{};
|
||||
info.explicit_lod.Assign(has_mip);
|
||||
const IR::Value texel = ir.ImageFetch(handle, body, {}, {}, {}, info);
|
||||
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
if (((mimg.dmask >> i) & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)};
|
||||
ir.SetVectorReg(dest_reg++, value);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::IMAGE_STORE(const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
IR::VectorReg data_reg{inst.dst[0].code};
|
||||
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||
|
||||
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
|
||||
const IR::Value body =
|
||||
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
|
||||
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
||||
|
||||
boost::container::static_vector<IR::F32, 4> comps;
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
if (((mimg.dmask >> i) & 1) == 0) {
|
||||
comps.push_back(ir.Imm32(0.f));
|
||||
continue;
|
||||
}
|
||||
comps.push_back(ir.GetVectorReg<IR::F32>(data_reg++));
|
||||
}
|
||||
const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
|
||||
ir.ImageWrite(handle, body, value, {});
|
||||
}
|
||||
|
||||
void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
if (mtbuf.idxen && mtbuf.offen) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
||||
}
|
||||
if (mtbuf.idxen || mtbuf.offen) {
|
||||
return ir.GetVectorReg(vaddr);
|
||||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mtbuf.idxen);
|
||||
info.offset_enable.Assign(mtbuf.offen);
|
||||
info.inst_offset.Assign(mtbuf.offset);
|
||||
if (is_typed) {
|
||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
||||
ASSERT(nfmt == AmdGpu::NumberFormat::Float &&
|
||||
(dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
|
||||
dmft == AmdGpu::DataFormat::Format32_32_32 ||
|
||||
dmft == AmdGpu::DataFormat::Format32_32 || dmft == AmdGpu::DataFormat::Format32));
|
||||
}
|
||||
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info);
|
||||
const IR::VectorReg dst_reg{inst.src[1].code};
|
||||
if (num_dwords == 1) {
|
||||
ir.SetVectorReg(dst_reg, IR::U32{value});
|
||||
return;
|
||||
}
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
ir.SetVectorReg(dst_reg + i, IR::U32{ir.CompositeExtract(value, i)});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst) {
|
||||
const auto& mubuf = inst.control.mubuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
ASSERT_MSG(!mubuf.offen && mubuf.offset == 0, "Offsets for image buffers are not supported");
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
if (mubuf.idxen) {
|
||||
return ir.GetVectorReg(vaddr);
|
||||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mubuf.idxen);
|
||||
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
const IR::Value value = ir.LoadBufferFormat(handle, address, info);
|
||||
const IR::VectorReg dst_reg{inst.src[1].code};
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
ir.SetVectorReg(dst_reg + i, IR::F32{ir.CompositeExtract(value, i)});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
if (mtbuf.idxen && mtbuf.offen) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
||||
}
|
||||
if (mtbuf.idxen || mtbuf.offen) {
|
||||
return ir.GetVectorReg(vaddr);
|
||||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mtbuf.idxen);
|
||||
info.offset_enable.Assign(mtbuf.offen);
|
||||
info.inst_offset.Assign(mtbuf.offset);
|
||||
if (is_typed) {
|
||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
||||
ASSERT(nfmt == AmdGpu::NumberFormat::Float &&
|
||||
(dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
|
||||
dmft == AmdGpu::DataFormat::Format32_32_32 ||
|
||||
dmft == AmdGpu::DataFormat::Format32_32 || dmft == AmdGpu::DataFormat::Format32));
|
||||
}
|
||||
|
||||
IR::Value value{};
|
||||
const IR::VectorReg src_reg{inst.src[1].code};
|
||||
switch (num_dwords) {
|
||||
case 1:
|
||||
value = ir.GetVectorReg(src_reg);
|
||||
break;
|
||||
case 2:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1));
|
||||
break;
|
||||
case 3:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1),
|
||||
ir.GetVectorReg(src_reg + 2));
|
||||
break;
|
||||
case 4:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1),
|
||||
ir.GetVectorReg(src_reg + 2), ir.GetVectorReg(src_reg + 3));
|
||||
break;
|
||||
}
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
ir.StoreBuffer(num_dwords, handle, address, value, info);
|
||||
}
|
||||
|
||||
void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) {
|
||||
const auto& mubuf = inst.control.mubuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
ASSERT_MSG(!mubuf.offen && mubuf.offset == 0, "Offsets for image buffers are not supported");
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
if (mubuf.idxen) {
|
||||
return ir.GetVectorReg(vaddr);
|
||||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mubuf.idxen);
|
||||
|
||||
const IR::VectorReg src_reg{inst.src[1].code};
|
||||
|
||||
std::array<IR::Value, 4> comps{};
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
comps[i] = ir.GetVectorReg<IR::F32>(src_reg + i);
|
||||
}
|
||||
for (u32 i = num_dwords; i < 4; i++) {
|
||||
comps[i] = ir.Imm32(0.f);
|
||||
}
|
||||
|
||||
const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
ir.StoreBufferFormat(handle, address, value, info);
|
||||
}
|
||||
|
||||
void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
||||
const auto& mubuf = inst.control.mubuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::VectorReg vdata{inst.src[1].code};
|
||||
const IR::ScalarReg srsrc{inst.src[2].code * 4};
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
if (mubuf.idxen && mubuf.offen) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
||||
}
|
||||
if (mubuf.idxen || mubuf.offen) {
|
||||
return ir.GetVectorReg(vaddr);
|
||||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::U32 soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mubuf.idxen);
|
||||
info.inst_offset.Assign(mubuf.offset);
|
||||
info.offset_enable.Assign(mubuf.offen);
|
||||
|
||||
IR::Value vdata_val = ir.GetVectorReg<Shader::IR::U32>(vdata);
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1),
|
||||
ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3));
|
||||
|
||||
const IR::Value original_val = [&] {
|
||||
switch (op) {
|
||||
case AtomicOp::Swap:
|
||||
return ir.BufferAtomicSwap(handle, address, vdata_val, info);
|
||||
case AtomicOp::Add:
|
||||
return ir.BufferAtomicIAdd(handle, address, vdata_val, info);
|
||||
case AtomicOp::Smin:
|
||||
return ir.BufferAtomicIMin(handle, address, vdata_val, true, info);
|
||||
case AtomicOp::Umin:
|
||||
return ir.BufferAtomicIMin(handle, address, vdata_val, false, info);
|
||||
case AtomicOp::Smax:
|
||||
return ir.BufferAtomicIMax(handle, address, vdata_val, true, info);
|
||||
case AtomicOp::Umax:
|
||||
return ir.BufferAtomicIMax(handle, address, vdata_val, false, info);
|
||||
case AtomicOp::And:
|
||||
return ir.BufferAtomicAnd(handle, address, vdata_val, info);
|
||||
case AtomicOp::Or:
|
||||
return ir.BufferAtomicOr(handle, address, vdata_val, info);
|
||||
case AtomicOp::Xor:
|
||||
return ir.BufferAtomicXor(handle, address, vdata_val, info);
|
||||
case AtomicOp::Inc:
|
||||
return ir.BufferAtomicInc(handle, address, vdata_val, info);
|
||||
case AtomicOp::Dec:
|
||||
return ir.BufferAtomicDec(handle, address, vdata_val, info);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
|
||||
if (mubuf.glc) {
|
||||
ir.SetVectorReg(vdata, IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
|
@ -603,48 +657,4 @@ void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
|
|||
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 1)});
|
||||
}
|
||||
|
||||
void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
IR::VectorReg val_reg{inst.dst[0].code};
|
||||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||
|
||||
const IR::Value value = ir.GetVectorReg(val_reg);
|
||||
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
|
||||
const IR::Value body =
|
||||
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
|
||||
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
||||
const IR::Value prev = [&] {
|
||||
switch (op) {
|
||||
case AtomicOp::Swap:
|
||||
return ir.ImageAtomicExchange(handle, body, value, {});
|
||||
case AtomicOp::Add:
|
||||
return ir.ImageAtomicIAdd(handle, body, value, {});
|
||||
case AtomicOp::Smin:
|
||||
return ir.ImageAtomicIMin(handle, body, value, true, {});
|
||||
case AtomicOp::Umin:
|
||||
return ir.ImageAtomicUMin(handle, body, value, {});
|
||||
case AtomicOp::Smax:
|
||||
return ir.ImageAtomicIMax(handle, body, value, true, {});
|
||||
case AtomicOp::Umax:
|
||||
return ir.ImageAtomicUMax(handle, body, value, {});
|
||||
case AtomicOp::And:
|
||||
return ir.ImageAtomicAnd(handle, body, value, {});
|
||||
case AtomicOp::Or:
|
||||
return ir.ImageAtomicOr(handle, body, value, {});
|
||||
case AtomicOp::Xor:
|
||||
return ir.ImageAtomicXor(handle, body, value, {});
|
||||
case AtomicOp::Inc:
|
||||
return ir.ImageAtomicInc(handle, body, value, {});
|
||||
case AtomicOp::Dec:
|
||||
return ir.ImageAtomicDec(handle, body, value, {});
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
if (mimg.glc) {
|
||||
ir.SetVectorReg(val_reg, IR::U32{prev});
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
Loading…
Add table
Reference in a new issue