diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 0c3d21a96..98d39f6f6 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -7,53 +7,121 @@ namespace Shader::Gcn { void Translator::EmitDataShare(const GcnInst& inst) { switch (inst.opcode) { - case Opcode::DS_SWIZZLE_B32: - return DS_SWIZZLE_B32(inst); - case Opcode::DS_READ_B32: - return DS_READ(32, false, false, false, inst); - case Opcode::DS_READ2ST64_B32: - return DS_READ(32, false, true, true, inst); - case Opcode::DS_READ_B64: - return DS_READ(64, false, false, false, inst); - case Opcode::DS_READ2_B32: - return DS_READ(32, false, true, false, inst); - case Opcode::DS_READ2_B64: - return DS_READ(64, false, true, false, inst); - case Opcode::DS_WRITE_B32: - return DS_WRITE(32, false, false, false, inst); - case Opcode::DS_WRITE2ST64_B32: - return DS_WRITE(32, false, true, true, inst); - case Opcode::DS_WRITE_B64: - return DS_WRITE(64, false, false, false, inst); - case Opcode::DS_WRITE2_B32: - return DS_WRITE(32, false, true, false, inst); - case Opcode::DS_WRITE2_B64: - return DS_WRITE(64, false, true, false, inst); + // DS case Opcode::DS_ADD_U32: return DS_ADD_U32(inst, false); - case Opcode::DS_MIN_U32: - return DS_MIN_U32(inst, false, false); case Opcode::DS_MIN_I32: return DS_MIN_U32(inst, true, false); - case Opcode::DS_MAX_U32: - return DS_MAX_U32(inst, false, false); case Opcode::DS_MAX_I32: return DS_MAX_U32(inst, true, false); + case Opcode::DS_MIN_U32: + return DS_MIN_U32(inst, false, false); + case Opcode::DS_MAX_U32: + return DS_MAX_U32(inst, false, false); + case Opcode::DS_WRITE_B32: + return DS_WRITE(32, false, false, false, inst); + case Opcode::DS_WRITE2_B32: + return DS_WRITE(32, false, true, false, inst); + case Opcode::DS_WRITE2ST64_B32: + return DS_WRITE(32, false, true, true, inst); case Opcode::DS_ADD_RTN_U32: return DS_ADD_U32(inst, true); case Opcode::DS_MIN_RTN_U32: return DS_MIN_U32(inst, false, true); case Opcode::DS_MAX_RTN_U32: return DS_MAX_U32(inst, false, true); - case Opcode::DS_APPEND: - return DS_APPEND(inst); + case Opcode::DS_SWIZZLE_B32: + return DS_SWIZZLE_B32(inst); + case Opcode::DS_READ_B32: + return DS_READ(32, false, false, false, inst); + case Opcode::DS_READ2_B32: + return DS_READ(32, false, true, false, inst); + case Opcode::DS_READ2ST64_B32: + return DS_READ(32, false, true, true, inst); case Opcode::DS_CONSUME: return DS_CONSUME(inst); + case Opcode::DS_APPEND: + return DS_APPEND(inst); + case Opcode::DS_WRITE_B64: + return DS_WRITE(64, false, false, false, inst); + case Opcode::DS_WRITE2_B64: + return DS_WRITE(64, false, true, false, inst); + case Opcode::DS_READ_B64: + return DS_READ(64, false, false, false, inst); + case Opcode::DS_READ2_B64: + return DS_READ(64, false, true, false, inst); default: LogMissingOpcode(inst); } } +void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) { + const IR::U32 addr{GetSrc(inst.src[0])}; + const IR::U32 data{GetSrc(inst.src[1])}; + const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); + const IR::U32 addr_offset = ir.IAdd(addr, offset); + const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data); + if (rtn) { + SetDst(inst.dst[0], IR::U32{original_val}); + } +} + +void Translator::DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn) { + const IR::U32 addr{GetSrc(inst.src[0])}; + const IR::U32 data{GetSrc(inst.src[1])}; + const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); + const IR::U32 addr_offset = ir.IAdd(addr, offset); + const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, is_signed); + if (rtn) { + SetDst(inst.dst[0], IR::U32{original_val}); + } +} + +void Translator::DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn) { + const IR::U32 addr{GetSrc(inst.src[0])}; + const IR::U32 data{GetSrc(inst.src[1])}; + const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); + const IR::U32 addr_offset = ir.IAdd(addr, offset); + const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, is_signed); + if (rtn) { + SetDst(inst.dst[0], IR::U32{original_val}); + } +} + +void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, + const GcnInst& inst) { + const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; + const IR::VectorReg data0{inst.src[1].code}; + const IR::VectorReg data1{inst.src[2].code}; + if (is_pair) { + const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1); + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj))); + if (bit_size == 32) { + ir.WriteShared(32, ir.GetVectorReg(data0), addr0); + } else { + ir.WriteShared( + 64, ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)), + addr0); + } + const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj))); + if (bit_size == 32) { + ir.WriteShared(32, ir.GetVectorReg(data1), addr1); + } else { + ir.WriteShared( + 64, ir.CompositeConstruct(ir.GetVectorReg(data1), ir.GetVectorReg(data1 + 1)), + addr1); + } + } else if (bit_size == 64) { + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); + const IR::Value data = + ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)); + ir.WriteShared(bit_size, data, addr0); + } else { + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); + ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0); + } +} + void Translator::DS_SWIZZLE_B32(const GcnInst& inst) { const u8 offset0 = inst.control.ds.offset0; const u8 offset1 = inst.control.ds.offset1; @@ -102,101 +170,11 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride } } -void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, - const GcnInst& inst) { - const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; - const IR::VectorReg data0{inst.src[1].code}; - const IR::VectorReg data1{inst.src[2].code}; - if (is_pair) { - const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1); - const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj))); - if (bit_size == 32) { - ir.WriteShared(32, ir.GetVectorReg(data0), addr0); - } else { - ir.WriteShared( - 64, ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)), - addr0); - } - const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj))); - if (bit_size == 32) { - ir.WriteShared(32, ir.GetVectorReg(data1), addr1); - } else { - ir.WriteShared( - 64, ir.CompositeConstruct(ir.GetVectorReg(data1), ir.GetVectorReg(data1 + 1)), - addr1); - } - } else if (bit_size == 64) { - const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); - const IR::Value data = - ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)); - ir.WriteShared(bit_size, data, addr0); - } else { - const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); - ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0); - } -} - -void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) { - const IR::U32 addr{GetSrc(inst.src[0])}; - const IR::U32 data{GetSrc(inst.src[1])}; - const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); - const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data); - if (rtn) { - SetDst(inst.dst[0], IR::U32{original_val}); - } -} - -void Translator::DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn) { - const IR::U32 addr{GetSrc(inst.src[0])}; - const IR::U32 data{GetSrc(inst.src[1])}; - const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); - const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, is_signed); - if (rtn) { - SetDst(inst.dst[0], IR::U32{original_val}); - } -} - -void Translator::DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn) { - const IR::U32 addr{GetSrc(inst.src[0])}; - const IR::U32 data{GetSrc(inst.src[1])}; - const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); - const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, is_signed); - if (rtn) { - SetDst(inst.dst[0], IR::U32{original_val}); - } -} - -void Translator::S_BARRIER() { - ir.Barrier(); -} - -void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) { - const IR::ScalarReg dst{inst.dst[0].code}; - const IR::U32 value{GetSrc(inst.src[0])}; - - if (info.stage != Stage::Compute) { - SetDst(inst.dst[0], value); - } else { - SetDst(inst.dst[0], ir.ReadFirstLane(value)); - } -} - -void Translator::V_READLANE_B32(const GcnInst& inst) { - const IR::ScalarReg dst{inst.dst[0].code}; - const IR::U32 value{GetSrc(inst.src[0])}; - const IR::U32 lane{GetSrc(inst.src[1])}; - ir.SetScalarReg(dst, ir.ReadLane(value, lane)); -} - -void Translator::V_WRITELANE_B32(const GcnInst& inst) { - const IR::VectorReg dst{inst.dst[0].code}; - const IR::U32 value{GetSrc(inst.src[0])}; - const IR::U32 lane{GetSrc(inst.src[1])}; - const IR::U32 old_value{GetSrc(inst.dst[0])}; - ir.SetVectorReg(dst, ir.WriteLane(old_value, value, lane)); +void Translator::DS_CONSUME(const GcnInst& inst) { + const u32 inst_offset = inst.control.ds.offset0; + const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset)); + const IR::U32 prev = ir.DataConsume(gds_offset); + SetDst(inst.dst[0], prev); } void Translator::DS_APPEND(const GcnInst& inst) { @@ -206,11 +184,4 @@ void Translator::DS_APPEND(const GcnInst& inst) { SetDst(inst.dst[0], prev); } -void Translator::DS_CONSUME(const GcnInst& inst) { - const u32 inst_offset = inst.control.ds.offset0; - const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset)); - const IR::U32 prev = ir.DataConsume(gds_offset); - SetDst(inst.dst[0], prev); -} - } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index e246b5c51..89292b6d0 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -17,79 +17,81 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { } default: switch (inst.opcode) { - case Opcode::S_MOV_B32: - return S_MOV(inst); - case Opcode::S_MUL_I32: - return S_MUL_I32(inst); - case Opcode::S_AND_SAVEEXEC_B64: - return S_AND_SAVEEXEC_B64(inst); - case Opcode::S_MOV_B64: - return S_MOV_B64(inst); - case Opcode::S_OR_B64: - return S_OR_B64(NegateMode::None, false, inst); - case Opcode::S_NOR_B64: - return S_OR_B64(NegateMode::Result, false, inst); - case Opcode::S_XOR_B64: - return S_OR_B64(NegateMode::None, true, inst); - case Opcode::S_XNOR_B64: - return S_OR_B64(NegateMode::Result, true, inst); - case Opcode::S_ORN2_B64: - return S_OR_B64(NegateMode::Src1, false, inst); - case Opcode::S_AND_B64: - return S_AND_B64(NegateMode::None, inst); - case Opcode::S_NAND_B64: - return S_AND_B64(NegateMode::Result, inst); - case Opcode::S_ANDN2_B64: - return S_AND_B64(NegateMode::Src1, inst); - case Opcode::S_NOT_B64: - return S_NOT_B64(inst); + // SOP2 + case Opcode::S_ADD_U32: + return S_ADD_U32(inst); + case Opcode::S_SUB_U32: + return S_SUB_U32(inst); case Opcode::S_ADD_I32: return S_ADD_I32(inst); - case Opcode::S_AND_B32: - return S_AND_B32(NegateMode::None, inst); - case Opcode::S_NAND_B32: - return S_AND_B32(NegateMode::Result, inst); - case Opcode::S_ANDN2_B32: - return S_AND_B32(NegateMode::Src1, inst); - case Opcode::S_ASHR_I32: - return S_ASHR_I32(inst); - case Opcode::S_OR_B32: - return S_OR_B32(inst); - case Opcode::S_XOR_B32: - return S_XOR_B32(inst); - case Opcode::S_LSHL_B32: - return S_LSHL_B32(inst); - case Opcode::S_LSHR_B32: - return S_LSHR_B32(inst); + case Opcode::S_SUB_I32: + return S_SUB_U32(inst); + case Opcode::S_ADDC_U32: + return S_ADDC_U32(inst); + case Opcode::S_MIN_I32: + return S_MIN_U32(true, inst); + case Opcode::S_MIN_U32: + return S_MIN_U32(false, inst); + case Opcode::S_MAX_I32: + return S_MAX_U32(true, inst); + case Opcode::S_MAX_U32: + return S_MAX_U32(false, inst); case Opcode::S_CSELECT_B32: return S_CSELECT_B32(inst); case Opcode::S_CSELECT_B64: return S_CSELECT_B64(inst); - case Opcode::S_BFE_U32: - return S_BFE_U32(inst); + case Opcode::S_AND_B32: + return S_AND_B32(NegateMode::None, inst); + case Opcode::S_OR_B32: + return S_OR_B32(inst); + case Opcode::S_OR_B64: + return S_OR_B64(NegateMode::None, false, inst); + case Opcode::S_XOR_B32: + return S_XOR_B32(inst); + case Opcode::S_XOR_B64: + return S_OR_B64(NegateMode::None, true, inst); + case Opcode::S_ANDN2_B32: + return S_AND_B32(NegateMode::Src1, inst); + case Opcode::S_ANDN2_B64: + return S_AND_B64(NegateMode::Src1, inst); + case Opcode::S_ORN2_B64: + return S_OR_B64(NegateMode::Src1, false, inst); + case Opcode::S_NAND_B32: + return S_AND_B32(NegateMode::Result, inst); + case Opcode::S_NAND_B64: + return S_AND_B64(NegateMode::Result, inst); + case Opcode::S_NOR_B64: + return S_OR_B64(NegateMode::Result, false, inst); + case Opcode::S_XNOR_B64: + return S_OR_B64(NegateMode::Result, true, inst); + case Opcode::S_LSHL_B32: + return S_LSHL_B32(inst); + case Opcode::S_LSHR_B32: + return S_LSHR_B32(inst); + case Opcode::S_ASHR_I32: + return S_ASHR_I32(inst); case Opcode::S_BFM_B32: return S_BFM_B32(inst); - case Opcode::S_BREV_B32: - return S_BREV_B32(inst); - case Opcode::S_ADD_U32: - return S_ADD_U32(inst); - case Opcode::S_ADDC_U32: - return S_ADDC_U32(inst); - case Opcode::S_SUB_U32: - case Opcode::S_SUB_I32: - return S_SUB_U32(inst); - case Opcode::S_MIN_U32: - return S_MIN_U32(false, inst); - case Opcode::S_MIN_I32: - return S_MIN_U32(true, inst); - case Opcode::S_MAX_U32: - return S_MAX_U32(false, inst); - case Opcode::S_MAX_I32: - return S_MAX_U32(true, inst); + case Opcode::S_MUL_I32: + return S_MUL_I32(inst); + case Opcode::S_BFE_U32: + return S_BFE_U32(inst); case Opcode::S_ABSDIFF_I32: return S_ABSDIFF_I32(inst); + + // SOP1 + case Opcode::S_MOV_B32: + return S_MOV(inst); + case Opcode::S_MOV_B64: + return S_MOV_B64(inst); + case Opcode::S_NOT_B64: + return S_NOT_B64(inst); case Opcode::S_WQM_B64: break; + case Opcode::S_BREV_B32: + return S_BREV_B32(inst); + case Opcode::S_AND_SAVEEXEC_B64: + return S_AND_SAVEEXEC_B64(inst); default: LogMissingOpcode(inst); } @@ -99,6 +101,7 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { void Translator::EmitSOPC(const GcnInst& inst) { switch (inst.opcode) { + // SOPC case Opcode::S_CMP_EQ_I32: return S_CMP(ConditionOp::EQ, true, inst); case Opcode::S_CMP_LG_I32: @@ -131,6 +134,7 @@ void Translator::EmitSOPC(const GcnInst& inst) { void Translator::EmitSOPK(const GcnInst& inst) { switch (inst.opcode) { + // SOPK case Opcode::S_MOVK_I32: return S_MOVK(inst); @@ -169,169 +173,78 @@ void Translator::EmitSOPK(const GcnInst& inst) { } } -void Translator::S_MOVK(const GcnInst& inst) { - const auto simm16 = inst.control.sopk.simm; - if (simm16 & (1 << 15)) { - // TODO: need to verify the case of imm sign extension - UNREACHABLE(); - } - SetDst(inst.dst[0], ir.Imm32(simm16)); +// SOP2 + +void Translator::S_ADD_U32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.IAdd(src0, src1)); + // TODO: Carry out + ir.SetScc(ir.Imm1(false)); } -void Translator::S_ADDK_I32(const GcnInst& inst) { - const s32 simm16 = inst.control.sopk.simm; - SetDst(inst.dst[0], ir.IAdd(GetSrc(inst.dst[0]), ir.Imm32(simm16))); +void Translator::S_SUB_U32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.ISub(src0, src1)); + // TODO: Carry out + ir.SetScc(ir.Imm1(false)); } -void Translator::S_MULK_I32(const GcnInst& inst) { - const s32 simm16 = inst.control.sopk.simm; - SetDst(inst.dst[0], ir.IMul(GetSrc(inst.dst[0]), ir.Imm32(simm16))); +void Translator::S_ADD_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.IAdd(src0, src1)); + // TODO: Overflow flag } -void Translator::S_MOV(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[0])); +void Translator::S_ADDC_U32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 carry{ir.Select(ir.GetScc(), ir.Imm32(1U), ir.Imm32(0U))}; + SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), carry)); } -void Translator::S_MUL_I32(const GcnInst& inst) { - SetDst(inst.dst[0], ir.IMul(GetSrc(inst.src[0]), GetSrc(inst.src[1]))); +void Translator::S_MIN_U32(bool is_signed, const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 result = ir.IMin(src0, src1, is_signed); + SetDst(inst.dst[0], result); + ir.SetScc(ir.IEqual(result, src0)); } -void Translator::S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst) { - const IR::U32 lhs = GetSrc(inst.src[0]); - const IR::U32 rhs = GetSrc(inst.src[1]); - const IR::U1 result = [&] { - switch (cond) { - case ConditionOp::EQ: - return ir.IEqual(lhs, rhs); - case ConditionOp::LG: - return ir.INotEqual(lhs, rhs); - case ConditionOp::GT: - return ir.IGreaterThan(lhs, rhs, is_signed); - case ConditionOp::GE: - return ir.IGreaterThanEqual(lhs, rhs, is_signed); - case ConditionOp::LT: - return ir.ILessThan(lhs, rhs, is_signed); - case ConditionOp::LE: - return ir.ILessThanEqual(lhs, rhs, is_signed); - default: - UNREACHABLE(); - } - }(); - ir.SetScc(result); +void Translator::S_MAX_U32(bool is_signed, const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 result = ir.IMax(src0, src1, is_signed); + SetDst(inst.dst[0], result); + ir.SetScc(ir.IEqual(result, src0)); } -void Translator::S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst) { - const s32 simm16 = inst.control.sopk.simm; - const IR::U32 lhs = GetSrc(inst.dst[0]); - const IR::U32 rhs = ir.Imm32(simm16); - const IR::U1 result = [&] { - switch (cond) { - case ConditionOp::EQ: - return ir.IEqual(lhs, rhs); - case ConditionOp::LG: - return ir.INotEqual(lhs, rhs); - case ConditionOp::GT: - return ir.IGreaterThan(lhs, rhs, is_signed); - case ConditionOp::GE: - return ir.IGreaterThanEqual(lhs, rhs, is_signed); - case ConditionOp::LT: - return ir.ILessThan(lhs, rhs, is_signed); - case ConditionOp::LE: - return ir.ILessThanEqual(lhs, rhs, is_signed); - default: - UNREACHABLE(); - } - }(); - ir.SetScc(result); +void Translator::S_CSELECT_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], IR::U32{ir.Select(ir.GetScc(), src0, src1)}); } -void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) { - // This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs) - // However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination - // SGPR we have a special IR opcode for SPGRs that act as thread masks. - const IR::U1 exec{ir.GetExec()}; - const IR::U1 src = [&] { - switch (inst.src[0].field) { - case OperandField::VccLo: - return ir.GetVcc(); - case OperandField::ScalarGPR: - return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code)); - default: - UNREACHABLE(); - } - }(); - - switch (inst.dst[0].field) { - case OperandField::ScalarGPR: - ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), exec); - break; - case OperandField::VccLo: - ir.SetVcc(exec); - break; - default: - UNREACHABLE(); - } - - // Update EXEC. - const IR::U1 result = ir.LogicalAnd(exec, src); - ir.SetExec(result); - ir.SetScc(result); -} - -void Translator::S_MOV_B64(const GcnInst& inst) { - const IR::U1 src = [&] { - switch (inst.src[0].field) { +void Translator::S_CSELECT_B64(const GcnInst& inst) { + const auto get_src = [&](const InstOperand& operand) { + switch (operand.field) { case OperandField::VccLo: return ir.GetVcc(); case OperandField::ExecLo: return ir.GetExec(); case OperandField::ScalarGPR: - return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code)); + return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code)); case OperandField::ConstZero: return ir.Imm1(false); default: UNREACHABLE(); } - }(); - switch (inst.dst[0].field) { - case OperandField::ScalarGPR: - ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), src); - break; - case OperandField::ExecLo: - ir.SetExec(src); - break; - case OperandField::VccLo: - ir.SetVcc(src); - break; - default: - UNREACHABLE(); - } -} - -void Translator::S_OR_B64(NegateMode negate, bool is_xor, const GcnInst& inst) { - const auto get_src = [&](const InstOperand& operand) { - switch (operand.field) { - case OperandField::ExecLo: - return ir.GetExec(); - case OperandField::VccLo: - return ir.GetVcc(); - case OperandField::ScalarGPR: - return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code)); - default: - UNREACHABLE(); - } }; - const IR::U1 src0{get_src(inst.src[0])}; - IR::U1 src1{get_src(inst.src[1])}; - if (negate == NegateMode::Src1) { - src1 = ir.LogicalNot(src1); - } - IR::U1 result = is_xor ? ir.LogicalXor(src0, src1) : ir.LogicalOr(src0, src1); - if (negate == NegateMode::Result) { - result = ir.LogicalNot(result); - } - ir.SetScc(result); + const IR::U1 src1{get_src(inst.src[1])}; + const IR::U1 result{ir.Select(ir.GetScc(), src0, src1)}; switch (inst.dst[0].field) { case OperandField::VccLo: ir.SetVcc(result); @@ -344,6 +257,20 @@ void Translator::S_OR_B64(NegateMode negate, bool is_xor, const GcnInst& inst) { } } +void Translator::S_AND_B32(NegateMode negate, const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + IR::U32 src1{GetSrc(inst.src[1])}; + if (negate == NegateMode::Src1) { + src1 = ir.BitwiseNot(src1); + } + IR::U32 result{ir.BitwiseAnd(src0, src1)}; + if (negate == NegateMode::Result) { + result = ir.BitwiseNot(result); + } + SetDst(inst.dst[0], result); + ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); +} + void Translator::S_AND_B64(NegateMode negate, const GcnInst& inst) { const auto get_src = [&](const InstOperand& operand) { switch (operand.field) { @@ -382,35 +309,6 @@ void Translator::S_AND_B64(NegateMode negate, const GcnInst& inst) { } } -void Translator::S_ADD_I32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.IAdd(src0, src1)); - // TODO: Overflow flag -} - -void Translator::S_AND_B32(NegateMode negate, const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - IR::U32 src1{GetSrc(inst.src[1])}; - if (negate == NegateMode::Src1) { - src1 = ir.BitwiseNot(src1); - } - IR::U32 result{ir.BitwiseAnd(src0, src1)}; - if (negate == NegateMode::Result) { - result = ir.BitwiseNot(result); - } - SetDst(inst.dst[0], result); - ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); -} - -void Translator::S_ASHR_I32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 result{ir.ShiftRightArithmetic(src0, src1)}; - SetDst(inst.dst[0], result); - ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); -} - void Translator::S_OR_B32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; @@ -419,46 +317,30 @@ void Translator::S_OR_B32(const GcnInst& inst) { ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); } -void Translator::S_XOR_B32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 result{ir.BitwiseXor(src0, src1)}; - SetDst(inst.dst[0], result); - ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); -} - -void Translator::S_LSHR_B32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 result{ir.ShiftRightLogical(src0, src1)}; - SetDst(inst.dst[0], result); - ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); -} - -void Translator::S_CSELECT_B32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], IR::U32{ir.Select(ir.GetScc(), src0, src1)}); -} - -void Translator::S_CSELECT_B64(const GcnInst& inst) { +void Translator::S_OR_B64(NegateMode negate, bool is_xor, const GcnInst& inst) { const auto get_src = [&](const InstOperand& operand) { switch (operand.field) { - case OperandField::VccLo: - return ir.GetVcc(); case OperandField::ExecLo: return ir.GetExec(); + case OperandField::VccLo: + return ir.GetVcc(); case OperandField::ScalarGPR: return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code)); - case OperandField::ConstZero: - return ir.Imm1(false); default: UNREACHABLE(); } }; + const IR::U1 src0{get_src(inst.src[0])}; - const IR::U1 src1{get_src(inst.src[1])}; - const IR::U1 result{ir.Select(ir.GetScc(), src0, src1)}; + IR::U1 src1{get_src(inst.src[1])}; + if (negate == NegateMode::Src1) { + src1 = ir.LogicalNot(src1); + } + IR::U1 result = is_xor ? ir.LogicalXor(src0, src1) : ir.LogicalOr(src0, src1); + if (negate == NegateMode::Result) { + result = ir.LogicalNot(result); + } + ir.SetScc(result); switch (inst.dst[0].field) { case OperandField::VccLo: ir.SetVcc(result); @@ -471,12 +353,10 @@ void Translator::S_CSELECT_B64(const GcnInst& inst) { } } -void Translator::S_BFE_U32(const GcnInst& inst) { +void Translator::S_XOR_B32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 offset{ir.BitwiseAnd(src1, ir.Imm32(0x1F))}; - const IR::U32 count{ir.BitFieldExtract(src1, ir.Imm32(16), ir.Imm32(7))}; - const IR::U32 result{ir.BitFieldExtract(src0, offset, count)}; + const IR::U32 result{ir.BitwiseXor(src0, src1)}; SetDst(inst.dst[0], result); ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); } @@ -489,6 +369,22 @@ void Translator::S_LSHL_B32(const GcnInst& inst) { ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); } +void Translator::S_LSHR_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 result{ir.ShiftRightLogical(src0, src1)}; + SetDst(inst.dst[0], result); + ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); +} + +void Translator::S_ASHR_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 result{ir.ShiftRightArithmetic(src0, src1)}; + SetDst(inst.dst[0], result); + ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); +} + void Translator::S_BFM_B32(const GcnInst& inst) { const IR::U32 src0{ir.BitwiseAnd(GetSrc(inst.src[0]), ir.Imm32(0x1F))}; const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))}; @@ -496,6 +392,110 @@ void Translator::S_BFM_B32(const GcnInst& inst) { SetDst(inst.dst[0], ir.ShiftLeftLogical(mask, src1)); } +void Translator::S_MUL_I32(const GcnInst& inst) { + SetDst(inst.dst[0], ir.IMul(GetSrc(inst.src[0]), GetSrc(inst.src[1]))); +} + +void Translator::S_BFE_U32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 offset{ir.BitwiseAnd(src1, ir.Imm32(0x1F))}; + const IR::U32 count{ir.BitFieldExtract(src1, ir.Imm32(16), ir.Imm32(7))}; + const IR::U32 result{ir.BitFieldExtract(src0, offset, count)}; + SetDst(inst.dst[0], result); + ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); +} + +void Translator::S_ABSDIFF_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 result{ir.IAbs(ir.ISub(src0, src1))}; + SetDst(inst.dst[0], result); + ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); +} + +// SOPK + +void Translator::S_MOVK(const GcnInst& inst) { + const auto simm16 = inst.control.sopk.simm; + if (simm16 & (1 << 15)) { + // TODO: need to verify the case of imm sign extension + UNREACHABLE(); + } + SetDst(inst.dst[0], ir.Imm32(simm16)); +} + +void Translator::S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst) { + const s32 simm16 = inst.control.sopk.simm; + const IR::U32 lhs = GetSrc(inst.dst[0]); + const IR::U32 rhs = ir.Imm32(simm16); + const IR::U1 result = [&] { + switch (cond) { + case ConditionOp::EQ: + return ir.IEqual(lhs, rhs); + case ConditionOp::LG: + return ir.INotEqual(lhs, rhs); + case ConditionOp::GT: + return ir.IGreaterThan(lhs, rhs, is_signed); + case ConditionOp::GE: + return ir.IGreaterThanEqual(lhs, rhs, is_signed); + case ConditionOp::LT: + return ir.ILessThan(lhs, rhs, is_signed); + case ConditionOp::LE: + return ir.ILessThanEqual(lhs, rhs, is_signed); + default: + UNREACHABLE(); + } + }(); + ir.SetScc(result); +} + +void Translator::S_ADDK_I32(const GcnInst& inst) { + const s32 simm16 = inst.control.sopk.simm; + SetDst(inst.dst[0], ir.IAdd(GetSrc(inst.dst[0]), ir.Imm32(simm16))); +} + +void Translator::S_MULK_I32(const GcnInst& inst) { + const s32 simm16 = inst.control.sopk.simm; + SetDst(inst.dst[0], ir.IMul(GetSrc(inst.dst[0]), ir.Imm32(simm16))); +} + +// SOP1 + +void Translator::S_MOV(const GcnInst& inst) { + SetDst(inst.dst[0], GetSrc(inst.src[0])); +} + +void Translator::S_MOV_B64(const GcnInst& inst) { + const IR::U1 src = [&] { + switch (inst.src[0].field) { + case OperandField::VccLo: + return ir.GetVcc(); + case OperandField::ExecLo: + return ir.GetExec(); + case OperandField::ScalarGPR: + return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code)); + case OperandField::ConstZero: + return ir.Imm1(false); + default: + UNREACHABLE(); + } + }(); + switch (inst.dst[0].field) { + case OperandField::ScalarGPR: + ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), src); + break; + case OperandField::ExecLo: + ir.SetExec(src); + break; + case OperandField::VccLo: + ir.SetVcc(src); + break; + default: + UNREACHABLE(); + } +} + void Translator::S_NOT_B64(const GcnInst& inst) { const auto get_src = [&](const InstOperand& operand) { switch (operand.field) { @@ -528,22 +528,6 @@ void Translator::S_BREV_B32(const GcnInst& inst) { SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0]))); } -void Translator::S_ADD_U32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.IAdd(src0, src1)); - // TODO: Carry out - ir.SetScc(ir.Imm1(false)); -} - -void Translator::S_SUB_U32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.ISub(src0, src1)); - // TODO: Carry out - ir.SetScc(ir.Imm1(false)); -} - void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) { // This only really exists to let resource tracking pass know // there is an inline cbuf. @@ -552,35 +536,69 @@ void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) { ir.SetScalarReg(dst + 1, ir.Imm32(0)); } -void Translator::S_ADDC_U32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 carry{ir.Select(ir.GetScc(), ir.Imm32(1U), ir.Imm32(0U))}; - SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), carry)); +void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) { + // This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs) + // However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination + // SGPR we have a special IR opcode for SPGRs that act as thread masks. + const IR::U1 exec{ir.GetExec()}; + const IR::U1 src = [&] { + switch (inst.src[0].field) { + case OperandField::VccLo: + return ir.GetVcc(); + case OperandField::ScalarGPR: + return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code)); + default: + UNREACHABLE(); + } + }(); + + switch (inst.dst[0].field) { + case OperandField::ScalarGPR: + ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), exec); + break; + case OperandField::VccLo: + ir.SetVcc(exec); + break; + default: + UNREACHABLE(); + } + + // Update EXEC. + const IR::U1 result = ir.LogicalAnd(exec, src); + ir.SetExec(result); + ir.SetScc(result); } -void Translator::S_MAX_U32(bool is_signed, const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 result = ir.IMax(src0, src1, is_signed); - SetDst(inst.dst[0], result); - ir.SetScc(ir.IEqual(result, src0)); +// SOPC + +void Translator::S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst) { + const IR::U32 lhs = GetSrc(inst.src[0]); + const IR::U32 rhs = GetSrc(inst.src[1]); + const IR::U1 result = [&] { + switch (cond) { + case ConditionOp::EQ: + return ir.IEqual(lhs, rhs); + case ConditionOp::LG: + return ir.INotEqual(lhs, rhs); + case ConditionOp::GT: + return ir.IGreaterThan(lhs, rhs, is_signed); + case ConditionOp::GE: + return ir.IGreaterThanEqual(lhs, rhs, is_signed); + case ConditionOp::LT: + return ir.ILessThan(lhs, rhs, is_signed); + case ConditionOp::LE: + return ir.ILessThanEqual(lhs, rhs, is_signed); + default: + UNREACHABLE(); + } + }(); + ir.SetScc(result); } -void Translator::S_MIN_U32(bool is_signed, const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 result = ir.IMin(src0, src1, is_signed); - SetDst(inst.dst[0], result); - ir.SetScc(ir.IEqual(result, src0)); -} +// SOPP -void Translator::S_ABSDIFF_I32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 result{ir.IAbs(ir.ISub(src0, src1))}; - SetDst(inst.dst[0], result); - ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); +void Translator::S_BARRIER() { + ir.Barrier(); } } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/scalar_memory.cpp b/src/shader_recompiler/frontend/translate/scalar_memory.cpp index 29f2acc27..a6f8cafd7 100644 --- a/src/shader_recompiler/frontend/translate/scalar_memory.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_memory.cpp @@ -9,6 +9,7 @@ static constexpr u32 SQ_SRC_LITERAL = 0xFF; void Translator::EmitScalarMemory(const GcnInst& inst) { switch (inst.opcode) { + // SMRD case Opcode::S_LOAD_DWORDX4: return S_LOAD_DWORD(4, inst); case Opcode::S_LOAD_DWORDX8: @@ -30,6 +31,8 @@ void Translator::EmitScalarMemory(const GcnInst& inst) { } } +// SMRD + void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) { const auto& smrd = inst.control.smrd; const u32 dword_offset = [&] -> u32 { diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index d2053b765..9e865e008 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -61,169 +61,191 @@ public: // Instruction categories void EmitPrologue(); void EmitFetch(const GcnInst& inst); - void EmitDataShare(const GcnInst& inst); - void EmitVectorInterpolation(const GcnInst& inst); - void EmitScalarMemory(const GcnInst& inst); - void EmitVectorMemory(const GcnInst& inst); void EmitExport(const GcnInst& inst); void EmitFlowControl(u32 pc, const GcnInst& inst); + void EmitScalarAlu(const GcnInst& inst); + void EmitScalarMemory(const GcnInst& inst); void EmitVectorAlu(const GcnInst& inst); + void EmitVectorInterpolation(const GcnInst& inst); + void EmitDataShare(const GcnInst& inst); + void EmitVectorMemory(const GcnInst& inst); // Instruction encodings void EmitSOPC(const GcnInst& inst); void EmitSOPK(const GcnInst& inst); // Scalar ALU - void S_MOVK(const GcnInst& inst); - void S_MOV(const GcnInst& inst); - void S_MUL_I32(const GcnInst& inst); - void S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst); - void S_AND_SAVEEXEC_B64(const GcnInst& inst); - void S_MOV_B64(const GcnInst& inst); - void S_OR_B64(NegateMode negate, bool is_xor, const GcnInst& inst); - void S_AND_B64(NegateMode negate, const GcnInst& inst); - void S_ADD_I32(const GcnInst& inst); - void S_AND_B32(NegateMode negate, const GcnInst& inst); - void S_ASHR_I32(const GcnInst& inst); - void S_OR_B32(const GcnInst& inst); - void S_XOR_B32(const GcnInst& inst); - void S_LSHR_B32(const GcnInst& inst); - void S_CSELECT_B32(const GcnInst& inst); - void S_CSELECT_B64(const GcnInst& inst); - void S_BFE_U32(const GcnInst& inst); - void S_LSHL_B32(const GcnInst& inst); - void S_BFM_B32(const GcnInst& inst); - void S_NOT_B64(const GcnInst& inst); - void S_BREV_B32(const GcnInst& inst); + // SOP2 void S_ADD_U32(const GcnInst& inst); void S_SUB_U32(const GcnInst& inst); - void S_GETPC_B64(u32 pc, const GcnInst& inst); + void S_ADD_I32(const GcnInst& inst); void S_ADDC_U32(const GcnInst& inst); - void S_MULK_I32(const GcnInst& inst); - void S_ADDK_I32(const GcnInst& inst); - void S_MAX_U32(bool is_signed, const GcnInst& inst); void S_MIN_U32(bool is_signed, const GcnInst& inst); + void S_MAX_U32(bool is_signed, const GcnInst& inst); + void S_CSELECT_B32(const GcnInst& inst); + void S_CSELECT_B64(const GcnInst& inst); + void S_AND_B32(NegateMode negate, const GcnInst& inst); + void S_AND_B64(NegateMode negate, const GcnInst& inst); + void S_OR_B32(const GcnInst& inst); + void S_OR_B64(NegateMode negate, bool is_xor, const GcnInst& inst); + void S_XOR_B32(const GcnInst& inst); + void S_LSHL_B32(const GcnInst& inst); + void S_LSHR_B32(const GcnInst& inst); + void S_ASHR_I32(const GcnInst& inst); + void S_BFM_B32(const GcnInst& inst); + void S_MUL_I32(const GcnInst& inst); + void S_BFE_U32(const GcnInst& inst); void S_ABSDIFF_I32(const GcnInst& inst); + + // SOPK + void S_MOVK(const GcnInst& inst); void S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst); + void S_ADDK_I32(const GcnInst& inst); + void S_MULK_I32(const GcnInst& inst); + + // SOP1 + void S_MOV(const GcnInst& inst); + void S_MOV_B64(const GcnInst& inst); + void S_NOT_B64(const GcnInst& inst); + void S_BREV_B32(const GcnInst& inst); + void S_GETPC_B64(u32 pc, const GcnInst& inst); + void S_AND_SAVEEXEC_B64(const GcnInst& inst); + + // SOPC + void S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst); + + // SOPP + void S_BARRIER(); // Scalar Memory + // SMRD void S_LOAD_DWORD(int num_dwords, const GcnInst& inst); void S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst); // Vector ALU - void V_MOV(const GcnInst& inst); - void V_SAD(const GcnInst& inst); - void V_MAC_F32(const GcnInst& inst); - void V_CVT_PKRTZ_F16_F32(const GcnInst& inst); - void V_CVT_F32_F16(const GcnInst& inst); - void V_CVT_F16_F32(const GcnInst& inst); - void V_MUL_F32(const GcnInst& inst); + // VOP2 void V_CNDMASK_B32(const GcnInst& inst); - void V_OR_B32(bool is_xor, const GcnInst& inst); - void V_AND_B32(const GcnInst& inst); - void V_LSHLREV_B32(const GcnInst& inst); + void V_READLANE_B32(const GcnInst& inst); + void V_WRITELANE_B32(const GcnInst& inst); + void V_ADD_F32(const GcnInst& inst); + void V_SUB_F32(const GcnInst& inst); + void V_SUBREV_F32(const GcnInst& inst); + void V_MUL_F32(const GcnInst& inst); + void V_MUL_I32_I24(const GcnInst& inst); + void V_MIN_F32(const GcnInst& inst, bool is_legacy = false); + void V_MAX_F32(const GcnInst& inst, bool is_legacy = false); + void V_MIN_I32(const GcnInst& inst); + void V_MIN_U32(const GcnInst& inst); + void V_MAX_U32(bool is_signed, const GcnInst& inst); + void V_LSHR_B32(const GcnInst& inst); + void V_LSHRREV_B32(const GcnInst& inst); + void V_ASHR_I32(const GcnInst& inst); + void V_ASHRREV_I32(const GcnInst& inst); void V_LSHL_B32(const GcnInst& inst); - void V_LSHL_B64(const GcnInst& inst); + void V_LSHLREV_B32(const GcnInst& inst); + void V_AND_B32(const GcnInst& inst); + void V_OR_B32(bool is_xor, const GcnInst& inst); + void V_BFM_B32(const GcnInst& inst); + void V_MAC_F32(const GcnInst& inst); + void V_MADMK_F32(const GcnInst& inst); + void V_BCNT_U32_B32(const GcnInst& inst); + void V_MBCNT_U32_B32(bool is_low, const GcnInst& inst); void V_ADD_I32(const GcnInst& inst); + void V_SUB_I32(const GcnInst& inst); + void V_SUBREV_I32(const GcnInst& inst); void V_ADDC_U32(const GcnInst& inst); + void V_LDEXP_F32(const GcnInst& inst); + void V_CVT_PKRTZ_F16_F32(const GcnInst& inst); + + // VOP1 + void V_MOV(const GcnInst& inst); + void V_READFIRSTLANE_B32(const GcnInst& inst); void V_CVT_F32_I32(const GcnInst& inst); void V_CVT_F32_U32(const GcnInst& inst); - void V_MAD_F32(const GcnInst& inst); - void V_FRACT_F32(const GcnInst& inst); - void V_ADD_F32(const GcnInst& inst); - void V_CVT_OFF_F32_I4(const GcnInst& inst); - void V_MED3_F32(const GcnInst& inst); - void V_MED3_I32(const GcnInst& inst); - void V_FLOOR_F32(const GcnInst& inst); - void V_SUB_F32(const GcnInst& inst); - void V_RCP_F32(const GcnInst& inst); - void V_FMA_F32(const GcnInst& inst); - void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst); - void V_MAX_F32(const GcnInst& inst, bool is_legacy = false); - void V_MAX_F64(const GcnInst& inst); - void V_MAX_U32(bool is_signed, const GcnInst& inst); - void V_RSQ_F32(const GcnInst& inst); - void V_SIN_F32(const GcnInst& inst); - void V_LOG_F32(const GcnInst& inst); - void V_EXP_F32(const GcnInst& inst); - void V_SQRT_F32(const GcnInst& inst); - void V_MIN_F32(const GcnInst& inst, bool is_legacy = false); - void V_MIN3_F32(const GcnInst& inst); - void V_MIN3_I32(const GcnInst& inst); - void V_MADMK_F32(const GcnInst& inst); - void V_CUBEMA_F32(const GcnInst& inst); - void V_CUBESC_F32(const GcnInst& inst); - void V_CUBETC_F32(const GcnInst& inst); - void V_CUBEID_F32(const GcnInst& inst); void V_CVT_U32_F32(const GcnInst& inst); - void V_SUBREV_F32(const GcnInst& inst); - void V_SUBREV_I32(const GcnInst& inst); - void V_MAD_U64_U32(const GcnInst& inst); - void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst); - void V_LSHRREV_B32(const GcnInst& inst); - void V_MUL_HI_U32(bool is_signed, const GcnInst& inst); - void V_SAD_U32(const GcnInst& inst); - void V_BFE_U32(bool is_signed, const GcnInst& inst); - void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = true); - void V_MUL_I32_I24(const GcnInst& inst); - void V_SUB_I32(const GcnInst& inst); - void V_LSHR_B32(const GcnInst& inst); - void V_ASHRREV_I32(const GcnInst& inst); - void V_ASHR_I32(const GcnInst& inst); - void V_MAD_U32_U24(const GcnInst& inst); - void V_RNDNE_F32(const GcnInst& inst); - void V_BCNT_U32_B32(const GcnInst& inst); - void V_COS_F32(const GcnInst& inst); - void V_MAX3_F32(const GcnInst& inst); - void V_MAX3_U32(bool is_signed, const GcnInst& inst); void V_CVT_I32_F32(const GcnInst& inst); - void V_MIN_I32(const GcnInst& inst); - void V_MUL_LO_U32(const GcnInst& inst); + void V_CVT_F16_F32(const GcnInst& inst); + void V_CVT_F32_F16(const GcnInst& inst); + void V_CVT_FLR_I32_F32(const GcnInst& inst); + void V_CVT_OFF_F32_I4(const GcnInst& inst); + void V_CVT_F32_UBYTE(u32 index, const GcnInst& inst); + void V_FRACT_F32(const GcnInst& inst); void V_TRUNC_F32(const GcnInst& inst); void V_CEIL_F32(const GcnInst& inst); - void V_MIN_U32(const GcnInst& inst); - void V_CMP_NE_U64(const GcnInst& inst); - void V_BFI_B32(const GcnInst& inst); + void V_RNDNE_F32(const GcnInst& inst); + void V_FLOOR_F32(const GcnInst& inst); + void V_EXP_F32(const GcnInst& inst); + void V_LOG_F32(const GcnInst& inst); + void V_RCP_F32(const GcnInst& inst); + void V_RSQ_F32(const GcnInst& inst); + void V_SQRT_F32(const GcnInst& inst); + void V_SIN_F32(const GcnInst& inst); + void V_COS_F32(const GcnInst& inst); void V_NOT_B32(const GcnInst& inst); - void V_CVT_F32_UBYTE(u32 index, const GcnInst& inst); void V_BFREV_B32(const GcnInst& inst); - void V_LDEXP_F32(const GcnInst& inst); - void V_CVT_FLR_I32_F32(const GcnInst& inst); - void V_CMP_CLASS_F32(const GcnInst& inst); - void V_FFBL_B32(const GcnInst& inst); - void V_MBCNT_U32_B32(bool is_low, const GcnInst& inst); - void V_BFM_B32(const GcnInst& inst); void V_FFBH_U32(const GcnInst& inst); - void V_MOVRELS_B32(const GcnInst& inst); + void V_FFBL_B32(const GcnInst& inst); void V_MOVRELD_B32(const GcnInst& inst); + void V_MOVRELS_B32(const GcnInst& inst); void V_MOVRELSD_B32(const GcnInst& inst); - // Vector Memory + // VOPC + void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst); + void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst); + void V_CMP_NE_U64(const GcnInst& inst); + void V_CMP_CLASS_F32(const GcnInst& inst); + + // VOP3a + void V_MAD_F32(const GcnInst& inst); + void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = false); + void V_MAD_U32_U24(const GcnInst& inst); + void V_CUBEID_F32(const GcnInst& inst); + void V_CUBESC_F32(const GcnInst& inst); + void V_CUBETC_F32(const GcnInst& inst); + void V_CUBEMA_F32(const GcnInst& inst); + void V_BFE_U32(bool is_signed, const GcnInst& inst); + void V_BFI_B32(const GcnInst& inst); + void V_FMA_F32(const GcnInst& inst); + void V_MIN3_F32(const GcnInst& inst); + void V_MIN3_I32(const GcnInst& inst); + void V_MAX3_F32(const GcnInst& inst); + void V_MAX3_U32(bool is_signed, const GcnInst& inst); + void V_MED3_F32(const GcnInst& inst); + void V_MED3_I32(const GcnInst& inst); + void V_SAD(const GcnInst& inst); + void V_SAD_U32(const GcnInst& inst); + void V_LSHL_B64(const GcnInst& inst); + void V_MAX_F64(const GcnInst& inst); + void V_MUL_LO_U32(const GcnInst& inst); + void V_MUL_HI_U32(bool is_signed, const GcnInst& inst); + void V_MAD_U64_U32(const GcnInst& inst); + + // Vector interpolation + // VINTRP + void V_INTERP_P2_F32(const GcnInst& inst); + void V_INTERP_MOV_F32(const GcnInst& inst); + + // Data share + // DS + void DS_ADD_U32(const GcnInst& inst, bool rtn); + void DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn); + void DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn); + void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst); + void DS_SWIZZLE_B32(const GcnInst& inst); + void DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst); + void DS_APPEND(const GcnInst& inst); + void DS_CONSUME(const GcnInst& inst); + + // Buffer Memory + // MUBUF / MTBUF void BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst); void BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst); void BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst); void BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst); void BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst); - // Vector interpolation - void V_INTERP_P2_F32(const GcnInst& inst); - void V_INTERP_MOV_F32(const GcnInst& inst); - - // Data share - void DS_SWIZZLE_B32(const GcnInst& inst); - void DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst); - void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst); - void DS_ADD_U32(const GcnInst& inst, bool rtn); - void DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn); - void DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn); - void V_READFIRSTLANE_B32(const GcnInst& inst); - void V_READLANE_B32(const GcnInst& inst); - void V_WRITELANE_B32(const GcnInst& inst); - void DS_APPEND(const GcnInst& inst); - void DS_CONSUME(const GcnInst& inst); - void S_BARRIER(); - + // Image Memory // MIMG void IMAGE_GET_RESINFO(const GcnInst& inst); void IMAGE_SAMPLE(const GcnInst& inst); @@ -241,6 +263,7 @@ private: void SetDst(const InstOperand& operand, const IR::U32F32& value); void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw); + // Vector ALU Helprers IR::U32 VMovRelSHelper(u32 src_vgprno, const IR::U32 m0); void VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 82a1e3e89..3eca385d2 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -8,55 +8,110 @@ namespace Shader::Gcn { void Translator::EmitVectorAlu(const GcnInst& inst) { switch (inst.opcode) { - case Opcode::V_LSHLREV_B32: - return V_LSHLREV_B32(inst); - case Opcode::V_LSHL_B32: - return V_LSHL_B32(inst); - case Opcode::V_LSHL_B64: - return V_LSHL_B64(inst); - case Opcode::V_BFREV_B32: - return V_BFREV_B32(inst); - case Opcode::V_BFE_U32: - return V_BFE_U32(false, inst); - case Opcode::V_BFE_I32: - return V_BFE_U32(true, inst); - case Opcode::V_BFI_B32: - return V_BFI_B32(inst); + // VOP2 + case Opcode::V_CNDMASK_B32: + return V_CNDMASK_B32(inst); + case Opcode::V_READLANE_B32: + return V_READLANE_B32(inst); + case Opcode::V_WRITELANE_B32: + return V_WRITELANE_B32(inst); + case Opcode::V_ADD_F32: + return V_ADD_F32(inst); + case Opcode::V_SUB_F32: + return V_SUB_F32(inst); + case Opcode::V_SUBREV_F32: + return V_SUBREV_F32(inst); + case Opcode::V_MAC_LEGACY_F32: + case Opcode::V_MUL_LEGACY_F32: + case Opcode::V_MUL_F32: + return V_MUL_F32(inst); + case Opcode::V_MUL_I32_I24: + case Opcode::V_MUL_U32_U24: + return V_MUL_I32_I24(inst); + case Opcode::V_MIN_LEGACY_F32: + return V_MIN_F32(inst, true); + case Opcode::V_MAX_LEGACY_F32: + return V_MAX_F32(inst, true); + case Opcode::V_MIN_F32: + return V_MIN_F32(inst); + case Opcode::V_MAX_F32: + return V_MAX_F32(inst); + case Opcode::V_MIN_I32: + return V_MIN_I32(inst); + case Opcode::V_MAX_I32: + return V_MAX_U32(true, inst); + case Opcode::V_MIN_U32: + return V_MIN_U32(inst); + case Opcode::V_MAX_U32: + return V_MAX_U32(false, inst); case Opcode::V_LSHR_B32: return V_LSHR_B32(inst); - case Opcode::V_ASHRREV_I32: - return V_ASHRREV_I32(inst); - case Opcode::V_ASHR_I32: - return V_ASHR_I32(inst); case Opcode::V_LSHRREV_B32: return V_LSHRREV_B32(inst); - case Opcode::V_NOT_B32: - return V_NOT_B32(inst); + case Opcode::V_ASHR_I32: + return V_ASHR_I32(inst); + case Opcode::V_ASHRREV_I32: + return V_ASHRREV_I32(inst); + case Opcode::V_LSHL_B32: + return V_LSHL_B32(inst); + case Opcode::V_LSHLREV_B32: + return V_LSHLREV_B32(inst); case Opcode::V_AND_B32: return V_AND_B32(inst); case Opcode::V_OR_B32: return V_OR_B32(false, inst); case Opcode::V_XOR_B32: return V_OR_B32(true, inst); - case Opcode::V_FFBL_B32: - return V_FFBL_B32(inst); - - case Opcode::V_MOV_B32: - return V_MOV(inst); + case Opcode::V_BFM_B32: + return V_BFM_B32(inst); + case Opcode::V_MAC_F32: + return V_MAC_F32(inst); + case Opcode::V_MADMK_F32: + return V_MADMK_F32(inst); + case Opcode::V_MADAK_F32: + return V_FMA_F32(inst); + case Opcode::V_BCNT_U32_B32: + return V_BCNT_U32_B32(inst); + case Opcode::V_MBCNT_LO_U32_B32: + return V_MBCNT_U32_B32(true, inst); + case Opcode::V_MBCNT_HI_U32_B32: + return V_MBCNT_U32_B32(false, inst); case Opcode::V_ADD_I32: return V_ADD_I32(inst); + case Opcode::V_SUB_I32: + return V_SUB_I32(inst); + case Opcode::V_SUBREV_I32: + return V_SUBREV_I32(inst); case Opcode::V_ADDC_U32: return V_ADDC_U32(inst); + case Opcode::V_LDEXP_F32: + return V_LDEXP_F32(inst); + case Opcode::V_CVT_PKRTZ_F16_F32: + return V_CVT_PKRTZ_F16_F32(inst); + + // VOP1 + case Opcode::V_NOP: + return; + case Opcode::V_MOV_B32: + return V_MOV(inst); + case Opcode::V_READFIRSTLANE_B32: + return V_READFIRSTLANE_B32(inst); case Opcode::V_CVT_F32_I32: return V_CVT_F32_I32(inst); case Opcode::V_CVT_F32_U32: return V_CVT_F32_U32(inst); - case Opcode::V_CVT_PKRTZ_F16_F32: - return V_CVT_PKRTZ_F16_F32(inst); - case Opcode::V_CVT_F32_F16: - return V_CVT_F32_F16(inst); + case Opcode::V_CVT_U32_F32: + return V_CVT_U32_F32(inst); + case Opcode::V_CVT_I32_F32: + return V_CVT_I32_F32(inst); case Opcode::V_CVT_F16_F32: return V_CVT_F16_F32(inst); + case Opcode::V_CVT_F32_F16: + return V_CVT_F32_F16(inst); + case Opcode::V_CVT_FLR_I32_F32: + return V_CVT_FLR_I32_F32(inst); + case Opcode::V_CVT_OFF_F32_I4: + return V_CVT_OFF_F32_I4(inst); case Opcode::V_CVT_F32_UBYTE0: return V_CVT_F32_UBYTE(0, inst); case Opcode::V_CVT_F32_UBYTE1: @@ -65,34 +120,54 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_CVT_F32_UBYTE(2, inst); case Opcode::V_CVT_F32_UBYTE3: return V_CVT_F32_UBYTE(3, inst); - case Opcode::V_CVT_OFF_F32_I4: - return V_CVT_OFF_F32_I4(inst); - case Opcode::V_MAD_U64_U32: - return V_MAD_U64_U32(inst); - case Opcode::V_CMP_GE_I32: - return V_CMP_U32(ConditionOp::GE, true, false, inst); - case Opcode::V_CMP_EQ_I32: - return V_CMP_U32(ConditionOp::EQ, true, false, inst); - case Opcode::V_CMP_LE_I32: - return V_CMP_U32(ConditionOp::LE, true, false, inst); - case Opcode::V_CMP_NE_I32: - return V_CMP_U32(ConditionOp::LG, true, false, inst); - case Opcode::V_CMP_NE_U32: - return V_CMP_U32(ConditionOp::LG, false, false, inst); - case Opcode::V_CMP_EQ_U32: - return V_CMP_U32(ConditionOp::EQ, false, false, inst); - case Opcode::V_CMP_F_U32: - return V_CMP_U32(ConditionOp::F, false, false, inst); - case Opcode::V_CMP_LT_U32: - return V_CMP_U32(ConditionOp::LT, false, false, inst); - case Opcode::V_CMP_GT_U32: - return V_CMP_U32(ConditionOp::GT, false, false, inst); - case Opcode::V_CMP_GE_U32: - return V_CMP_U32(ConditionOp::GE, false, false, inst); - case Opcode::V_CMP_TRU_U32: - return V_CMP_U32(ConditionOp::TRU, false, false, inst); - case Opcode::V_CMP_NEQ_F32: - return V_CMP_F32(ConditionOp::LG, false, inst); + case Opcode::V_FRACT_F32: + return V_FRACT_F32(inst); + case Opcode::V_TRUNC_F32: + return V_TRUNC_F32(inst); + case Opcode::V_CEIL_F32: + return V_CEIL_F32(inst); + case Opcode::V_RNDNE_F32: + return V_RNDNE_F32(inst); + case Opcode::V_FLOOR_F32: + return V_FLOOR_F32(inst); + case Opcode::V_EXP_F32: + return V_EXP_F32(inst); + case Opcode::V_LOG_F32: + return V_LOG_F32(inst); + case Opcode::V_RCP_F32: + return V_RCP_F32(inst); + case Opcode::V_RCP_IFLAG_F32: + return V_RCP_F32(inst); + case Opcode::V_RSQ_CLAMP_F32: + return V_RSQ_F32(inst); + case Opcode::V_RSQ_LEGACY_F32: + return V_RSQ_F32(inst); + case Opcode::V_RSQ_F32: + return V_RSQ_F32(inst); + case Opcode::V_SQRT_F32: + return V_SQRT_F32(inst); + case Opcode::V_SIN_F32: + return V_SIN_F32(inst); + case Opcode::V_COS_F32: + return V_COS_F32(inst); + case Opcode::V_NOT_B32: + return V_NOT_B32(inst); + case Opcode::V_BFREV_B32: + return V_BFREV_B32(inst); + case Opcode::V_FFBH_U32: + return V_FFBH_U32(inst); + case Opcode::V_FFBL_B32: + return V_FFBL_B32(inst); + case Opcode::V_MOVRELD_B32: + return V_MOVRELD_B32(inst); + case Opcode::V_MOVRELS_B32: + return V_MOVRELS_B32(inst); + case Opcode::V_MOVRELSD_B32: + return V_MOVRELSD_B32(inst); + + // VOPC + + // V_CMP_{OP16}_F32 case Opcode::V_CMP_F_F32: return V_CMP_F32(ConditionOp::F, false, inst); case Opcode::V_CMP_LT_F32: @@ -107,149 +182,20 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_CMP_F32(ConditionOp::LG, false, inst); case Opcode::V_CMP_GE_F32: return V_CMP_F32(ConditionOp::GE, false, inst); - case Opcode::V_CMP_NLE_F32: - return V_CMP_F32(ConditionOp::GT, false, inst); - case Opcode::V_CMP_NLT_F32: - return V_CMP_F32(ConditionOp::GE, false, inst); - case Opcode::V_CMP_NGT_F32: - return V_CMP_F32(ConditionOp::LE, false, inst); - case Opcode::V_CMP_NGE_F32: - return V_CMP_F32(ConditionOp::LT, false, inst); case Opcode::V_CMP_U_F32: return V_CMP_F32(ConditionOp::U, false, inst); - case Opcode::V_CNDMASK_B32: - return V_CNDMASK_B32(inst); - case Opcode::V_MAX_I32: - return V_MAX_U32(true, inst); - case Opcode::V_MAX_U32: - return V_MAX_U32(false, inst); - case Opcode::V_MIN_I32: - return V_MIN_I32(inst); - case Opcode::V_CUBEMA_F32: - return V_CUBEMA_F32(inst); - case Opcode::V_CUBESC_F32: - return V_CUBESC_F32(inst); - case Opcode::V_CUBETC_F32: - return V_CUBETC_F32(inst); - case Opcode::V_CUBEID_F32: - return V_CUBEID_F32(inst); - case Opcode::V_CVT_U32_F32: - return V_CVT_U32_F32(inst); - case Opcode::V_CVT_I32_F32: - return V_CVT_I32_F32(inst); - case Opcode::V_CVT_FLR_I32_F32: - return V_CVT_FLR_I32_F32(inst); - case Opcode::V_SUBREV_I32: - return V_SUBREV_I32(inst); - case Opcode::V_MUL_HI_U32: - return V_MUL_HI_U32(false, inst); - case Opcode::V_MUL_LO_I32: - return V_MUL_LO_U32(inst); - case Opcode::V_SAD_U32: - return V_SAD_U32(inst); - case Opcode::V_SUB_I32: - return V_SUB_I32(inst); - case Opcode::V_MAD_I32_I24: - return V_MAD_I32_I24(inst); - case Opcode::V_MUL_I32_I24: - case Opcode::V_MUL_U32_U24: - return V_MUL_I32_I24(inst); - case Opcode::V_MAD_U32_U24: - return V_MAD_U32_U24(inst); - case Opcode::V_BCNT_U32_B32: - return V_BCNT_U32_B32(inst); - case Opcode::V_MUL_LO_U32: - return V_MUL_LO_U32(inst); - case Opcode::V_MIN_U32: - return V_MIN_U32(inst); - case Opcode::V_CMP_NE_U64: - return V_CMP_NE_U64(inst); - case Opcode::V_READFIRSTLANE_B32: - return V_READFIRSTLANE_B32(inst); - case Opcode::V_READLANE_B32: - return V_READLANE_B32(inst); - case Opcode::V_WRITELANE_B32: - return V_WRITELANE_B32(inst); - - case Opcode::V_MAD_F32: - return V_MAD_F32(inst); - case Opcode::V_MAC_F32: - return V_MAC_F32(inst); - case Opcode::V_MUL_F32: - return V_MUL_F32(inst); - case Opcode::V_RCP_F32: - return V_RCP_F32(inst); - case Opcode::V_LDEXP_F32: - return V_LDEXP_F32(inst); - case Opcode::V_FRACT_F32: - return V_FRACT_F32(inst); - case Opcode::V_ADD_F32: - return V_ADD_F32(inst); - case Opcode::V_MED3_F32: - return V_MED3_F32(inst); - case Opcode::V_MED3_I32: - return V_MED3_I32(inst); - case Opcode::V_FLOOR_F32: - return V_FLOOR_F32(inst); - case Opcode::V_SUB_F32: - return V_SUB_F32(inst); - case Opcode::V_FMA_F32: - case Opcode::V_MADAK_F32: - return V_FMA_F32(inst); - case Opcode::V_MAX_F32: - return V_MAX_F32(inst); - case Opcode::V_MAX_F64: - return V_MAX_F64(inst); - case Opcode::V_RSQ_F32: - return V_RSQ_F32(inst); - case Opcode::V_SIN_F32: - return V_SIN_F32(inst); - case Opcode::V_COS_F32: - return V_COS_F32(inst); - case Opcode::V_LOG_F32: - return V_LOG_F32(inst); - case Opcode::V_EXP_F32: - return V_EXP_F32(inst); - case Opcode::V_SQRT_F32: - return V_SQRT_F32(inst); - case Opcode::V_MIN_F32: - return V_MIN_F32(inst, false); - case Opcode::V_MIN3_F32: - return V_MIN3_F32(inst); - case Opcode::V_MIN3_I32: - return V_MIN3_I32(inst); - case Opcode::V_MIN_LEGACY_F32: - return V_MIN_F32(inst, true); - case Opcode::V_MADMK_F32: - return V_MADMK_F32(inst); - case Opcode::V_SUBREV_F32: - return V_SUBREV_F32(inst); - case Opcode::V_RNDNE_F32: - return V_RNDNE_F32(inst); - case Opcode::V_MAX3_F32: - return V_MAX3_F32(inst); - case Opcode::V_MAX3_U32: - return V_MAX3_U32(false, inst); - case Opcode::V_MAX3_I32: - return V_MAX_U32(true, inst); - case Opcode::V_TRUNC_F32: - return V_TRUNC_F32(inst); - case Opcode::V_CEIL_F32: - return V_CEIL_F32(inst); - case Opcode::V_MUL_LEGACY_F32: - return V_MUL_F32(inst); - case Opcode::V_MAC_LEGACY_F32: - return V_MAC_F32(inst); - case Opcode::V_MAD_LEGACY_F32: - return V_MAD_F32(inst); - case Opcode::V_MAX_LEGACY_F32: - return V_MAX_F32(inst, true); - case Opcode::V_RSQ_LEGACY_F32: - case Opcode::V_RSQ_CLAMP_F32: - return V_RSQ_F32(inst); - case Opcode::V_RCP_IFLAG_F32: - return V_RCP_F32(inst); + case Opcode::V_CMP_NGE_F32: + return V_CMP_F32(ConditionOp::LT, false, inst); + case Opcode::V_CMP_NGT_F32: + return V_CMP_F32(ConditionOp::LE, false, inst); + case Opcode::V_CMP_NLE_F32: + return V_CMP_F32(ConditionOp::GT, false, inst); + case Opcode::V_CMP_NEQ_F32: + return V_CMP_F32(ConditionOp::LG, false, inst); + case Opcode::V_CMP_NLT_F32: + return V_CMP_F32(ConditionOp::GE, false, inst); + // V_CMPX_{OP16}_F32 case Opcode::V_CMPX_F_F32: return V_CMP_F32(ConditionOp::F, true, inst); case Opcode::V_CMPX_LT_F32: @@ -278,19 +224,48 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_CMP_F32(ConditionOp::GE, true, inst); case Opcode::V_CMPX_TRU_F32: return V_CMP_F32(ConditionOp::TRU, true, inst); - case Opcode::V_CMP_CLASS_F32: - return V_CMP_CLASS_F32(inst); - case Opcode::V_CMP_LE_U32: - return V_CMP_U32(ConditionOp::LE, false, false, inst); - case Opcode::V_CMP_GT_I32: - return V_CMP_U32(ConditionOp::GT, true, false, inst); + // V_CMP_{OP8}_I32 case Opcode::V_CMP_LT_I32: return V_CMP_U32(ConditionOp::LT, true, false, inst); - case Opcode::V_CMPX_GT_I32: - return V_CMP_U32(ConditionOp::GT, true, true, inst); + case Opcode::V_CMP_EQ_I32: + return V_CMP_U32(ConditionOp::EQ, true, false, inst); + case Opcode::V_CMP_LE_I32: + return V_CMP_U32(ConditionOp::LE, true, false, inst); + case Opcode::V_CMP_GT_I32: + return V_CMP_U32(ConditionOp::GT, true, false, inst); + case Opcode::V_CMP_NE_I32: + return V_CMP_U32(ConditionOp::LG, true, false, inst); + case Opcode::V_CMP_GE_I32: + return V_CMP_U32(ConditionOp::GE, true, false, inst); + + // V_CMPX_{OP8}_I32 case Opcode::V_CMPX_LT_I32: return V_CMP_U32(ConditionOp::LT, true, true, inst); + case Opcode::V_CMPX_GT_I32: + return V_CMP_U32(ConditionOp::GT, true, true, inst); + case Opcode::V_CMPX_LG_I32: + return V_CMP_U32(ConditionOp::LG, true, true, inst); + + // V_CMP_{OP8}_U32 + case Opcode::V_CMP_F_U32: + return V_CMP_U32(ConditionOp::F, false, false, inst); + case Opcode::V_CMP_LT_U32: + return V_CMP_U32(ConditionOp::LT, false, false, inst); + case Opcode::V_CMP_EQ_U32: + return V_CMP_U32(ConditionOp::EQ, false, false, inst); + case Opcode::V_CMP_LE_U32: + return V_CMP_U32(ConditionOp::LE, false, false, inst); + case Opcode::V_CMP_GT_U32: + return V_CMP_U32(ConditionOp::GT, false, false, inst); + case Opcode::V_CMP_NE_U32: + return V_CMP_U32(ConditionOp::LG, false, false, inst); + case Opcode::V_CMP_GE_U32: + return V_CMP_U32(ConditionOp::GE, false, false, inst); + case Opcode::V_CMP_TRU_U32: + return V_CMP_U32(ConditionOp::TRU, false, false, inst); + + // V_CMPX_{OP8}_U32 case Opcode::V_CMPX_F_U32: return V_CMP_U32(ConditionOp::F, false, true, inst); case Opcode::V_CMPX_LT_U32: @@ -307,67 +282,74 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_CMP_U32(ConditionOp::GE, false, true, inst); case Opcode::V_CMPX_TRU_U32: return V_CMP_U32(ConditionOp::TRU, false, true, inst); - case Opcode::V_CMPX_LG_I32: - return V_CMP_U32(ConditionOp::LG, true, true, inst); - case Opcode::V_MBCNT_LO_U32_B32: - return V_MBCNT_U32_B32(true, inst); - case Opcode::V_MBCNT_HI_U32_B32: - return V_MBCNT_U32_B32(false, inst); - case Opcode::V_MOVRELS_B32: - return V_MOVRELS_B32(inst); - case Opcode::V_MOVRELD_B32: - return V_MOVRELD_B32(inst); - case Opcode::V_MOVRELSD_B32: - return V_MOVRELSD_B32(inst); - case Opcode::V_NOP: - return; + // V_CMPX_{OP8}_U64 + case Opcode::V_CMP_NE_U64: + return V_CMP_NE_U64(inst); + + case Opcode::V_CMP_CLASS_F32: + return V_CMP_CLASS_F32(inst); + + // VOP3a + case Opcode::V_MAD_LEGACY_F32: + return V_MAD_F32(inst); + case Opcode::V_MAD_F32: + return V_MAD_F32(inst); + case Opcode::V_MAD_I32_I24: + return V_MAD_I32_I24(inst); + case Opcode::V_MAD_U32_U24: + return V_MAD_U32_U24(inst); + case Opcode::V_CUBEID_F32: + return V_CUBEID_F32(inst); + case Opcode::V_CUBESC_F32: + return V_CUBESC_F32(inst); + case Opcode::V_CUBETC_F32: + return V_CUBETC_F32(inst); + case Opcode::V_CUBEMA_F32: + return V_CUBEMA_F32(inst); + case Opcode::V_BFE_U32: + return V_BFE_U32(false, inst); + case Opcode::V_BFE_I32: + return V_BFE_U32(true, inst); + case Opcode::V_BFI_B32: + return V_BFI_B32(inst); + case Opcode::V_FMA_F32: + return V_FMA_F32(inst); + case Opcode::V_MIN3_F32: + return V_MIN3_F32(inst); + case Opcode::V_MIN3_I32: + return V_MIN3_I32(inst); + case Opcode::V_MAX3_F32: + return V_MAX3_F32(inst); + case Opcode::V_MAX3_I32: + return V_MAX3_U32(true, inst); + case Opcode::V_MAX3_U32: + return V_MAX3_U32(false, inst); + case Opcode::V_MED3_F32: + return V_MED3_F32(inst); + case Opcode::V_MED3_I32: + return V_MED3_I32(inst); + case Opcode::V_SAD_U32: + return V_SAD_U32(inst); + case Opcode::V_LSHL_B64: + return V_LSHL_B64(inst); + case Opcode::V_MAX_F64: + return V_MAX_F64(inst); + case Opcode::V_MUL_LO_U32: + return V_MUL_LO_U32(inst); + case Opcode::V_MUL_HI_U32: + return V_MUL_HI_U32(false, inst); + case Opcode::V_MUL_LO_I32: + return V_MUL_LO_U32(inst); + case Opcode::V_MAD_U64_U32: + return V_MAD_U64_U32(inst); - case Opcode::V_BFM_B32: - return V_BFM_B32(inst); - case Opcode::V_FFBH_U32: - return V_FFBH_U32(inst); default: LogMissingOpcode(inst); } } -void Translator::V_MOV(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[0])); -} - -void Translator::V_SAD(const GcnInst& inst) { - const IR::U32 abs_diff = ir.IAbs(ir.ISub(GetSrc(inst.src[0]), GetSrc(inst.src[1]))); - SetDst(inst.dst[0], ir.IAdd(abs_diff, GetSrc(inst.src[2]))); -} - -void Translator::V_MAC_F32(const GcnInst& inst) { - SetDst(inst.dst[0], ir.FPFma(GetSrc(inst.src[0]), GetSrc(inst.src[1]), - GetSrc(inst.dst[0]))); -} - -void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { - const IR::VectorReg dst_reg{inst.dst[0].code}; - const IR::Value vec_f32 = - ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1])); - ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32)); -} - -void Translator::V_CVT_F32_F16(const GcnInst& inst) { - const IR::U32 src0 = GetSrc(inst.src[0]); - const IR::U16 src0l = ir.UConvert(16, src0); - SetDst(inst.dst[0], ir.FPConvert(32, ir.BitCast(src0l))); -} - -void Translator::V_CVT_F16_F32(const GcnInst& inst) { - const IR::F32 src0 = GetSrc(inst.src[0]); - const IR::F16 src0fp16 = ir.FPConvert(16, src0); - SetDst(inst.dst[0], ir.UConvert(32, ir.BitCast(src0fp16))); -} - -void Translator::V_MUL_F32(const GcnInst& inst) { - SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0]), GetSrc(inst.src[1]))); -} +// VOP2 void Translator::V_CNDMASK_B32(const GcnInst& inst) { const IR::VectorReg dst_reg{inst.dst[0].code}; @@ -380,19 +362,107 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) { ir.SetVectorReg(dst_reg, IR::U32F32{result}); } -void Translator::V_OR_B32(bool is_xor, const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; - const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, - is_xor ? ir.BitwiseXor(src0, src1) : IR::U32(ir.BitwiseOr(src0, src1))); +void Translator::V_READLANE_B32(const GcnInst& inst) { + const IR::ScalarReg dst{inst.dst[0].code}; + const IR::U32 value{GetSrc(inst.src[0])}; + const IR::U32 lane{GetSrc(inst.src[1])}; + ir.SetScalarReg(dst, ir.ReadLane(value, lane)); } -void Translator::V_AND_B32(const GcnInst& inst) { +void Translator::V_WRITELANE_B32(const GcnInst& inst) { + const IR::VectorReg dst{inst.dst[0].code}; + const IR::U32 value{GetSrc(inst.src[0])}; + const IR::U32 lane{GetSrc(inst.src[1])}; + const IR::U32 old_value{GetSrc(inst.dst[0])}; + ir.SetVectorReg(dst, ir.WriteLane(old_value, value, lane)); +} + +void Translator::V_ADD_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.FPAdd(src0, src1)); +} + +void Translator::V_SUB_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.FPSub(src0, src1)); +} + +void Translator::V_SUBREV_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.FPSub(src1, src0)); +} + +void Translator::V_MUL_F32(const GcnInst& inst) { + SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0]), GetSrc(inst.src[1]))); +} + +void Translator::V_MUL_I32_I24(const GcnInst& inst) { + const IR::U32 src0{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0), ir.Imm32(24), true)}; + const IR::U32 src1{ir.BitFieldExtract(GetSrc(inst.src[1]), ir.Imm32(0), ir.Imm32(24), true)}; + SetDst(inst.dst[0], ir.IMul(src0, src1)); +} + +void Translator::V_MIN_F32(const GcnInst& inst, bool is_legacy) { + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.FPMin(src0, src1, is_legacy)); +} + +void Translator::V_MAX_F32(const GcnInst& inst, bool is_legacy) { + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.FPMax(src0, src1, is_legacy)); +} + +void Translator::V_MIN_I32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; - const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, ir.BitwiseAnd(src0, src1)); + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.SMin(src0, src1)); +} + +void Translator::V_MIN_U32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.IMin(src0, src1, false)); +} + +void Translator::V_MAX_U32(bool is_signed, const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.IMax(src0, src1, is_signed)); +} + +void Translator::V_LSHR_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.ShiftRightLogical(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F)))); +} + +void Translator::V_LSHRREV_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.ShiftRightLogical(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F)))); +} + +void Translator::V_ASHR_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.ShiftRightArithmetic(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F)))); +} + +void Translator::V_ASHRREV_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.ShiftRightArithmetic(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F)))); +} + +void Translator::V_LSHL_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.ShiftLeftLogical(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F)))); } void Translator::V_LSHLREV_B32(const GcnInst& inst) { @@ -402,20 +472,72 @@ void Translator::V_LSHLREV_B32(const GcnInst& inst) { ir.SetVectorReg(dst_reg, ir.ShiftLeftLogical(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F)))); } -void Translator::V_LSHL_B32(const GcnInst& inst) { +void Translator::V_AND_B32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.ShiftLeftLogical(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F)))); + const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; + const IR::VectorReg dst_reg{inst.dst[0].code}; + ir.SetVectorReg(dst_reg, ir.BitwiseAnd(src0, src1)); } -void Translator::V_LSHL_B64(const GcnInst& inst) { - const IR::U64 src0{GetSrc64(inst.src[0])}; - const IR::U64 src1{GetSrc64(inst.src[1])}; +void Translator::V_OR_B32(bool is_xor, const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; const IR::VectorReg dst_reg{inst.dst[0].code}; - ASSERT_MSG(src0.IsImmediate() && src0.U64() == 0 && src1.IsImmediate() && src1.U64() == 0, - "V_LSHL_B64 with non-zero src0 or src1 is not supported"); - ir.SetVectorReg(dst_reg, ir.Imm32(0)); - ir.SetVectorReg(dst_reg + 1, ir.Imm32(0)); + ir.SetVectorReg(dst_reg, + is_xor ? ir.BitwiseXor(src0, src1) : IR::U32(ir.BitwiseOr(src0, src1))); +} + +void Translator::V_BFM_B32(const GcnInst& inst) { + // bitmask width + const IR::U32 src0{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0), ir.Imm32(4))}; + // bitmask offset + const IR::U32 src1{ir.BitFieldExtract(GetSrc(inst.src[1]), ir.Imm32(0), ir.Imm32(4))}; + const IR::U32 ones = ir.ISub(ir.ShiftLeftLogical(ir.Imm32(1), src0), ir.Imm32(1)); + SetDst(inst.dst[0], ir.ShiftLeftLogical(ones, src1)); +} + +void Translator::V_MAC_F32(const GcnInst& inst) { + SetDst(inst.dst[0], ir.FPFma(GetSrc(inst.src[0]), GetSrc(inst.src[1]), + GetSrc(inst.dst[0]))); +} + +void Translator::V_MADMK_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 k{GetSrc(inst.src[2])}; + SetDst(inst.dst[0], ir.FPFma(src0, k, src1)); +} + +void Translator::V_BCNT_U32_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.IAdd(ir.BitCount(src0), src1)); +} + +void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) { + if (!is_low) { + // v_mbcnt_hi_u32_b32 v2, -1, 0 + if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193 && + inst.src[1].field == OperandField::ConstZero) { + return; + } + // v_mbcnt_hi_u32_b32 vX, exec_hi, 0 + if (inst.src[0].field == OperandField::ExecHi && + inst.src[1].field == OperandField::ConstZero) { + return; + } + } else { + // v_mbcnt_lo_u32_b32 v2, -1, vX + // used combined with above to fetch lane id in non-compute stages + if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193) { + SetDst(inst.dst[0], ir.LaneId()); + } + // v_mbcnt_lo_u32_b32 v20, exec_lo, vX + // used combined in above for append buffer indexing. + if (inst.src[0].field == OperandField::ExecLo) { + SetDst(inst.dst[0], ir.Imm32(0)); + } + } } void Translator::V_ADD_I32(const GcnInst& inst) { @@ -423,11 +545,24 @@ void Translator::V_ADD_I32(const GcnInst& inst) { const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; const IR::VectorReg dst_reg{inst.dst[0].code}; ir.SetVectorReg(dst_reg, ir.IAdd(src0, src1)); - // TODO: Carry + // TODO: Carry-out +} + +void Translator::V_SUB_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.ISub(src0, src1)); + // TODO: Carry-out +} + +void Translator::V_SUBREV_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.ISub(src1, src0)); + // TODO: Carry-out } void Translator::V_ADDC_U32(const GcnInst& inst) { - const auto src0 = GetSrc(inst.src[0]); const auto src1 = GetSrc(inst.src[1]); @@ -456,6 +591,36 @@ void Translator::V_ADDC_U32(const GcnInst& inst) { ir.SetVcc(did_overflow); } +void Translator::V_LDEXP_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.FPLdexp(src0, src1)); +} + +void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { + const IR::VectorReg dst_reg{inst.dst[0].code}; + const IR::Value vec_f32 = + ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1])); + ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32)); +} + +// VOP1 + +void Translator::V_MOV(const GcnInst& inst) { + SetDst(inst.dst[0], GetSrc(inst.src[0])); +} + +void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) { + const IR::ScalarReg dst{inst.dst[0].code}; + const IR::U32 value{GetSrc(inst.src[0])}; + + if (info.stage != Stage::Compute) { + SetDst(inst.dst[0], value); + } else { + SetDst(inst.dst[0], ir.ReadFirstLane(value)); + } +} + void Translator::V_CVT_F32_I32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::VectorReg dst_reg{inst.dst[0].code}; @@ -468,23 +633,31 @@ void Translator::V_CVT_F32_U32(const GcnInst& inst) { ir.SetVectorReg(dst_reg, ir.ConvertUToF(32, 32, src0)); } -void Translator::V_MAD_F32(const GcnInst& inst) { +void Translator::V_CVT_U32_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0])}; - const IR::F32 src1{GetSrc(inst.src[1])}; - const IR::F32 src2{GetSrc(inst.src[2])}; - SetDst(inst.dst[0], ir.FPFma(src0, src1, src2)); + SetDst(inst.dst[0], ir.ConvertFToU(32, src0)); } -void Translator::V_FRACT_F32(const GcnInst& inst) { +void Translator::V_CVT_I32_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0])}; - const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, ir.Fract(src0)); + SetDst(inst.dst[0], ir.ConvertFToS(32, src0)); } -void Translator::V_ADD_F32(const GcnInst& inst) { +void Translator::V_CVT_F16_F32(const GcnInst& inst) { + const IR::F32 src0 = GetSrc(inst.src[0]); + const IR::F16 src0fp16 = ir.FPConvert(16, src0); + SetDst(inst.dst[0], ir.UConvert(32, ir.BitCast(src0fp16))); +} + +void Translator::V_CVT_F32_F16(const GcnInst& inst) { + const IR::U32 src0 = GetSrc(inst.src[0]); + const IR::U16 src0l = ir.UConvert(16, src0); + SetDst(inst.dst[0], ir.FPConvert(32, ir.BitCast(src0l))); +} + +void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0])}; - const IR::F32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.FPAdd(src0, src1)); + SetDst(inst.dst[0], ir.ConvertFToI(32, true, ir.FPFloor(src0))); } void Translator::V_CVT_OFF_F32_I4(const GcnInst& inst) { @@ -497,20 +670,31 @@ void Translator::V_CVT_OFF_F32_I4(const GcnInst& inst) { ir.SetVectorReg(dst_reg, ir.Imm32(IntToFloat[src0.U32() & 0xF])); } -void Translator::V_MED3_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - const IR::F32 src1{GetSrc(inst.src[1])}; - const IR::F32 src2{GetSrc(inst.src[2])}; - const IR::F32 mmx = ir.FPMin(ir.FPMax(src0, src1), src2); - SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx)); +void Translator::V_CVT_F32_UBYTE(u32 index, const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 byte = ir.BitFieldExtract(src0, ir.Imm32(8 * index), ir.Imm32(8)); + SetDst(inst.dst[0], ir.ConvertUToF(32, 32, byte)); } -void Translator::V_MED3_I32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 src2{GetSrc(inst.src[2])}; - const IR::U32 mmx = ir.SMin(ir.SMax(src0, src1), src2); - SetDst(inst.dst[0], ir.SMax(ir.SMin(src0, src1), mmx)); +void Translator::V_FRACT_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::VectorReg dst_reg{inst.dst[0].code}; + ir.SetVectorReg(dst_reg, ir.Fract(src0)); +} + +void Translator::V_TRUNC_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], ir.FPTrunc(src0)); +} + +void Translator::V_CEIL_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], ir.FPCeil(src0)); +} + +void Translator::V_RNDNE_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], ir.FPRoundEven(src0)); } void Translator::V_FLOOR_F32(const GcnInst& inst) { @@ -519,10 +703,14 @@ void Translator::V_FLOOR_F32(const GcnInst& inst) { ir.SetVectorReg(dst_reg, ir.FPFloor(src0)); } -void Translator::V_SUB_F32(const GcnInst& inst) { +void Translator::V_EXP_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0])}; - const IR::F32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.FPSub(src0, src1)); + SetDst(inst.dst[0], ir.FPExp2(src0)); +} + +void Translator::V_LOG_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], ir.FPLog2(src0)); } void Translator::V_RCP_F32(const GcnInst& inst) { @@ -530,13 +718,79 @@ void Translator::V_RCP_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPRecip(src0)); } -void Translator::V_FMA_F32(const GcnInst& inst) { +void Translator::V_RSQ_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0])}; - const IR::F32 src1{GetSrc(inst.src[1])}; - const IR::F32 src2{GetSrc(inst.src[2])}; - SetDst(inst.dst[0], ir.FPFma(src0, src1, src2)); + SetDst(inst.dst[0], ir.FPRecipSqrt(src0)); } +void Translator::V_SQRT_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], ir.FPSqrt(src0)); +} + +void Translator::V_SIN_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], ir.FPSin(src0)); +} + +void Translator::V_COS_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], ir.FPCos(src0)); +} + +void Translator::V_NOT_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], ir.BitwiseNot(src0)); +} + +void Translator::V_BFREV_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], ir.BitReverse(src0)); +} + +void Translator::V_FFBH_U32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + // Gcn wants the MSB position counting from the left, but SPIR-V counts from the rightmost (LSB) + // position + const IR::U32 msb_pos = ir.FindUMsb(src0); + const IR::U32 pos_from_left = ir.ISub(ir.Imm32(31), msb_pos); + // Select 0xFFFFFFFF if src0 was 0 + const IR::U1 cond = ir.INotEqual(src0, ir.Imm32(0)); + SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))}); +} + +void Translator::V_FFBL_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], ir.FindILsb(src0)); +} + +void Translator::V_MOVRELD_B32(const GcnInst& inst) { + const IR::U32 src_val{GetSrc(inst.src[0])}; + u32 dst_vgprno = inst.dst[0].code - static_cast(IR::VectorReg::V0); + IR::U32 m0 = ir.GetM0(); + + VMovRelDHelper(dst_vgprno, src_val, m0); +} + +void Translator::V_MOVRELS_B32(const GcnInst& inst) { + u32 src_vgprno = inst.src[0].code - static_cast(IR::VectorReg::V0); + const IR::U32 m0 = ir.GetM0(); + + const IR::U32 src_val = VMovRelSHelper(src_vgprno, m0); + SetDst(inst.dst[0], src_val); +} + +void Translator::V_MOVRELSD_B32(const GcnInst& inst) { + u32 src_vgprno = inst.src[0].code - static_cast(IR::VectorReg::V0); + u32 dst_vgprno = inst.dst[0].code - static_cast(IR::VectorReg::V0); + IR::U32 m0 = ir.GetM0(); + + const IR::U32 src_val = VMovRelSHelper(src_vgprno, m0); + VMovRelDHelper(dst_vgprno, src_val, m0); +} + +// VOPC + void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0])}; const IR::F32 src1{GetSrc(inst.src[1])}; @@ -578,128 +832,6 @@ void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) { } } -void Translator::V_MAX_F32(const GcnInst& inst, bool is_legacy) { - const IR::F32 src0{GetSrc(inst.src[0])}; - const IR::F32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.FPMax(src0, src1, is_legacy)); -} - -void Translator::V_MAX_F64(const GcnInst& inst) { - const IR::F64 src0{GetSrc64(inst.src[0])}; - const IR::F64 src1{GetSrc64(inst.src[1])}; - SetDst64(inst.dst[0], ir.FPMax(src0, src1)); -} - -void Translator::V_MAX_U32(bool is_signed, const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.IMax(src0, src1, is_signed)); -} - -void Translator::V_RSQ_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - SetDst(inst.dst[0], ir.FPRecipSqrt(src0)); -} - -void Translator::V_SIN_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - SetDst(inst.dst[0], ir.FPSin(src0)); -} - -void Translator::V_LOG_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - SetDst(inst.dst[0], ir.FPLog2(src0)); -} - -void Translator::V_EXP_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - SetDst(inst.dst[0], ir.FPExp2(src0)); -} - -void Translator::V_SQRT_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - SetDst(inst.dst[0], ir.FPSqrt(src0)); -} - -void Translator::V_MIN_F32(const GcnInst& inst, bool is_legacy) { - const IR::F32 src0{GetSrc(inst.src[0])}; - const IR::F32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.FPMin(src0, src1, is_legacy)); -} - -void Translator::V_MIN3_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - const IR::F32 src1{GetSrc(inst.src[1])}; - const IR::F32 src2{GetSrc(inst.src[2])}; - SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2))); -} - -void Translator::V_MIN3_I32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 src2{GetSrc(inst.src[2])}; - SetDst(inst.dst[0], ir.SMin(src0, ir.SMin(src1, src2))); -} - -void Translator::V_MADMK_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - const IR::F32 src1{GetSrc(inst.src[1])}; - const IR::F32 k{GetSrc(inst.src[2])}; - SetDst(inst.dst[0], ir.FPFma(src0, k, src1)); -} - -void Translator::V_CUBEMA_F32(const GcnInst& inst) { - SetDst(inst.dst[0], ir.Imm32(1.f)); -} - -void Translator::V_CUBESC_F32(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[0])); -} - -void Translator::V_CUBETC_F32(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[1])); -} - -void Translator::V_CUBEID_F32(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[2])); -} - -void Translator::V_CVT_U32_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - SetDst(inst.dst[0], ir.ConvertFToU(32, src0)); -} - -void Translator::V_SUBREV_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - const IR::F32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.FPSub(src1, src0)); -} - -void Translator::V_SUBREV_I32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.ISub(src1, src0)); - // TODO: Carry-out -} - -void Translator::V_MAD_U64_U32(const GcnInst& inst) { - const auto src0 = GetSrc(inst.src[0]); - const auto src1 = GetSrc(inst.src[1]); - const auto src2 = GetSrc64(inst.src[2]); - - // const IR::U64 mul_result = ir.UConvert(64, ir.IMul(src0, src1)); - const IR::U64 mul_result = - ir.PackUint2x32(ir.CompositeConstruct(ir.IMul(src0, src1), ir.Imm32(0U))); - const IR::U64 sum_result = ir.IAdd(mul_result, src2); - - SetDst64(inst.dst[0], sum_result); - - const IR::U1 less_src0 = ir.ILessThan(sum_result, mul_result, false); - const IR::U1 less_src1 = ir.ILessThan(sum_result, src2, false); - const IR::U1 did_overflow = ir.LogicalOr(less_src0, less_src1); - ir.SetVcc(did_overflow); -} - void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; @@ -738,149 +870,6 @@ void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const } } -void Translator::V_LSHRREV_B32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.ShiftRightLogical(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F)))); -} - -void Translator::V_MUL_HI_U32(bool is_signed, const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 hi{ir.CompositeExtract(ir.IMulExt(src0, src1, is_signed), 1)}; - SetDst(inst.dst[0], hi); -} - -void Translator::V_SAD_U32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 src2{GetSrc(inst.src[2])}; - IR::U32 result; - if (src0.IsImmediate() && src0.U32() == 0U) { - result = src1; - } else if (src1.IsImmediate() && src1.U32() == 0U) { - result = src0; - } else { - const IR::U32 max{ir.IMax(src0, src1, false)}; - const IR::U32 min{ir.IMin(src0, src1, false)}; - result = ir.ISub(max, min); - } - SetDst(inst.dst[0], ir.IAdd(result, src2)); -} - -void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))}; - const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))}; - SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2, is_signed)); -} - -void Translator::V_MAD_I32_I24(const GcnInst& inst, bool is_signed) { - const IR::U32 src0{ - ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0), ir.Imm32(24), is_signed)}; - const IR::U32 src1{ - ir.BitFieldExtract(GetSrc(inst.src[1]), ir.Imm32(0), ir.Imm32(24), is_signed)}; - const IR::U32 src2{GetSrc(inst.src[2])}; - SetDst(inst.dst[0], ir.IAdd(ir.IMul(src0, src1), src2)); -} - -void Translator::V_MUL_I32_I24(const GcnInst& inst) { - const IR::U32 src0{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0), ir.Imm32(24), true)}; - const IR::U32 src1{ir.BitFieldExtract(GetSrc(inst.src[1]), ir.Imm32(0), ir.Imm32(24), true)}; - SetDst(inst.dst[0], ir.IMul(src0, src1)); -} - -void Translator::V_SUB_I32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.ISub(src0, src1)); -} - -void Translator::V_LSHR_B32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.ShiftRightLogical(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F)))); -} - -void Translator::V_ASHRREV_I32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.ShiftRightArithmetic(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F)))); -} - -void Translator::V_ASHR_I32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.ShiftRightArithmetic(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F)))); -} - -void Translator::V_MAD_U32_U24(const GcnInst& inst) { - V_MAD_I32_I24(inst, false); -} - -void Translator::V_RNDNE_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - SetDst(inst.dst[0], ir.FPRoundEven(src0)); -} - -void Translator::V_BCNT_U32_B32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.IAdd(ir.BitCount(src0), src1)); -} - -void Translator::V_COS_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - SetDst(inst.dst[0], ir.FPCos(src0)); -} - -void Translator::V_MAX3_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - const IR::F32 src1{GetSrc(inst.src[1])}; - const IR::F32 src2{GetSrc(inst.src[2])}; - SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2))); -} - -void Translator::V_MAX3_U32(bool is_signed, const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 src2{GetSrc(inst.src[2])}; - SetDst(inst.dst[0], ir.IMax(src0, ir.IMax(src1, src2, is_signed), is_signed)); -} - -void Translator::V_CVT_I32_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - SetDst(inst.dst[0], ir.ConvertFToS(32, src0)); -} - -void Translator::V_MIN_I32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.SMin(src0, src1)); -} - -void Translator::V_MUL_LO_U32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.IMul(src0, src1)); -} - -void Translator::V_TRUNC_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - SetDst(inst.dst[0], ir.FPTrunc(src0)); -} - -void Translator::V_CEIL_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - SetDst(inst.dst[0], ir.FPCeil(src0)); -} - -void Translator::V_MIN_U32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.IMin(src0, src1, false)); -} - void Translator::V_CMP_NE_U64(const GcnInst& inst) { const auto get_src = [&](const InstOperand& operand) { switch (operand.field) { @@ -910,41 +899,6 @@ void Translator::V_CMP_NE_U64(const GcnInst& inst) { } } -void Translator::V_BFI_B32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 src2{GetSrc(inst.src[2])}; - SetDst(inst.dst[0], - ir.BitwiseOr(ir.BitwiseAnd(src0, src1), ir.BitwiseAnd(ir.BitwiseNot(src0), src2))); -} - -void Translator::V_NOT_B32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - SetDst(inst.dst[0], ir.BitwiseNot(src0)); -} - -void Translator::V_CVT_F32_UBYTE(u32 index, const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 byte = ir.BitFieldExtract(src0, ir.Imm32(8 * index), ir.Imm32(8)); - SetDst(inst.dst[0], ir.ConvertUToF(32, 32, byte)); -} - -void Translator::V_BFREV_B32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - SetDst(inst.dst[0], ir.BitReverse(src0)); -} - -void Translator::V_LDEXP_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.FPLdexp(src0, src1)); -} - -void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - SetDst(inst.dst[0], ir.ConvertFToI(32, true, ir.FPFloor(src0))); -} - void Translator::V_CMP_CLASS_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; @@ -971,55 +925,177 @@ void Translator::V_CMP_CLASS_F32(const GcnInst& inst) { } } -void Translator::V_FFBL_B32(const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - SetDst(inst.dst[0], ir.FindILsb(src0)); +// VOP3a + +void Translator::V_MAD_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; + SetDst(inst.dst[0], ir.FPFma(src0, src1, src2)); } -void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) { - if (!is_low) { - // v_mbcnt_hi_u32_b32 v2, -1, 0 - if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193 && - inst.src[1].field == OperandField::ConstZero) { - return; - } - // v_mbcnt_hi_u32_b32 vX, exec_hi, 0 - if (inst.src[0].field == OperandField::ExecHi && - inst.src[1].field == OperandField::ConstZero) { - return; - } +void Translator::V_MAD_I32_I24(const GcnInst& inst, bool is_signed) { + const IR::U32 src0{ + ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0), ir.Imm32(24), is_signed)}; + const IR::U32 src1{ + ir.BitFieldExtract(GetSrc(inst.src[1]), ir.Imm32(0), ir.Imm32(24), is_signed)}; + const IR::U32 src2{GetSrc(inst.src[2])}; + SetDst(inst.dst[0], ir.IAdd(ir.IMul(src0, src1), src2)); +} + +void Translator::V_MAD_U32_U24(const GcnInst& inst) { + V_MAD_I32_I24(inst, false); +} + +void Translator::V_CUBEID_F32(const GcnInst& inst) { + SetDst(inst.dst[0], GetSrc(inst.src[2])); +} + +void Translator::V_CUBESC_F32(const GcnInst& inst) { + SetDst(inst.dst[0], GetSrc(inst.src[0])); +} + +void Translator::V_CUBETC_F32(const GcnInst& inst) { + SetDst(inst.dst[0], GetSrc(inst.src[1])); +} + +void Translator::V_CUBEMA_F32(const GcnInst& inst) { + SetDst(inst.dst[0], ir.Imm32(1.f)); +} + +void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))}; + const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))}; + SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2, is_signed)); +} + +void Translator::V_BFI_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 src2{GetSrc(inst.src[2])}; + SetDst(inst.dst[0], + ir.BitwiseOr(ir.BitwiseAnd(src0, src1), ir.BitwiseAnd(ir.BitwiseNot(src0), src2))); +} + +void Translator::V_FMA_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; + SetDst(inst.dst[0], ir.FPFma(src0, src1, src2)); +} + +void Translator::V_MIN3_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; + SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2))); +} + +void Translator::V_MIN3_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 src2{GetSrc(inst.src[2])}; + SetDst(inst.dst[0], ir.SMin(src0, ir.SMin(src1, src2))); +} + +void Translator::V_MAX3_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; + SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2))); +} + +void Translator::V_MAX3_U32(bool is_signed, const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 src2{GetSrc(inst.src[2])}; + SetDst(inst.dst[0], ir.IMax(src0, ir.IMax(src1, src2, is_signed), is_signed)); +} + +void Translator::V_MED3_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; + const IR::F32 mmx = ir.FPMin(ir.FPMax(src0, src1), src2); + SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx)); +} + +void Translator::V_MED3_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 src2{GetSrc(inst.src[2])}; + const IR::U32 mmx = ir.SMin(ir.SMax(src0, src1), src2); + SetDst(inst.dst[0], ir.SMax(ir.SMin(src0, src1), mmx)); +} + +void Translator::V_SAD(const GcnInst& inst) { + const IR::U32 abs_diff = ir.IAbs(ir.ISub(GetSrc(inst.src[0]), GetSrc(inst.src[1]))); + SetDst(inst.dst[0], ir.IAdd(abs_diff, GetSrc(inst.src[2]))); +} + +void Translator::V_SAD_U32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 src2{GetSrc(inst.src[2])}; + IR::U32 result; + if (src0.IsImmediate() && src0.U32() == 0U) { + result = src1; + } else if (src1.IsImmediate() && src1.U32() == 0U) { + result = src0; } else { - // v_mbcnt_lo_u32_b32 v2, -1, vX - // used combined with above to fetch lane id in non-compute stages - if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193) { - SetDst(inst.dst[0], ir.LaneId()); - } - // v_mbcnt_lo_u32_b32 v20, exec_lo, vX - // used combined in above for append buffer indexing. - if (inst.src[0].field == OperandField::ExecLo) { - SetDst(inst.dst[0], ir.Imm32(0)); - } + const IR::U32 max{ir.IMax(src0, src1, false)}; + const IR::U32 min{ir.IMin(src0, src1, false)}; + result = ir.ISub(max, min); } + SetDst(inst.dst[0], ir.IAdd(result, src2)); } -void Translator::V_BFM_B32(const GcnInst& inst) { - // bitmask width - const IR::U32 src0{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0), ir.Imm32(4))}; - // bitmask offset - const IR::U32 src1{ir.BitFieldExtract(GetSrc(inst.src[1]), ir.Imm32(0), ir.Imm32(4))}; - const IR::U32 ones = ir.ISub(ir.ShiftLeftLogical(ir.Imm32(1), src0), ir.Imm32(1)); - SetDst(inst.dst[0], ir.ShiftLeftLogical(ones, src1)); +void Translator::V_LSHL_B64(const GcnInst& inst) { + const IR::U64 src0{GetSrc64(inst.src[0])}; + const IR::U64 src1{GetSrc64(inst.src[1])}; + const IR::VectorReg dst_reg{inst.dst[0].code}; + ASSERT_MSG(src0.IsImmediate() && src0.U64() == 0 && src1.IsImmediate() && src1.U64() == 0, + "V_LSHL_B64 with non-zero src0 or src1 is not supported"); + ir.SetVectorReg(dst_reg, ir.Imm32(0)); + ir.SetVectorReg(dst_reg + 1, ir.Imm32(0)); } -void Translator::V_FFBH_U32(const GcnInst& inst) { +void Translator::V_MAX_F64(const GcnInst& inst) { + const IR::F64 src0{GetSrc64(inst.src[0])}; + const IR::F64 src1{GetSrc64(inst.src[1])}; + SetDst64(inst.dst[0], ir.FPMax(src0, src1)); +} + +void Translator::V_MUL_LO_U32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; - // Gcn wants the MSB position counting from the left, but SPIR-V counts from the rightmost (LSB) - // position - const IR::U32 msb_pos = ir.FindUMsb(src0); - const IR::U32 pos_from_left = ir.ISub(ir.Imm32(31), msb_pos); - // Select 0xFFFFFFFF if src0 was 0 - const IR::U1 cond = ir.INotEqual(src0, ir.Imm32(0)); - SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))}); + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.IMul(src0, src1)); +} + +void Translator::V_MUL_HI_U32(bool is_signed, const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 hi{ir.CompositeExtract(ir.IMulExt(src0, src1, is_signed), 1)}; + SetDst(inst.dst[0], hi); +} + +void Translator::V_MAD_U64_U32(const GcnInst& inst) { + const auto src0 = GetSrc(inst.src[0]); + const auto src1 = GetSrc(inst.src[1]); + const auto src2 = GetSrc64(inst.src[2]); + + // const IR::U64 mul_result = ir.UConvert(64, ir.IMul(src0, src1)); + const IR::U64 mul_result = + ir.PackUint2x32(ir.CompositeConstruct(ir.IMul(src0, src1), ir.Imm32(0U))); + const IR::U64 sum_result = ir.IAdd(mul_result, src2); + + SetDst64(inst.dst[0], sum_result); + + const IR::U1 less_src0 = ir.ILessThan(sum_result, mul_result, false); + const IR::U1 less_src1 = ir.ILessThan(sum_result, src2, false); + const IR::U1 did_overflow = ir.LogicalOr(less_src0, less_src1); + ir.SetVcc(did_overflow); } // TODO: add range analysis pass to hopefully put an upper bound on m0, and only select one of @@ -1045,29 +1121,4 @@ void Translator::VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR: } } -void Translator::V_MOVRELS_B32(const GcnInst& inst) { - u32 src_vgprno = inst.src[0].code - static_cast(IR::VectorReg::V0); - const IR::U32 m0 = ir.GetM0(); - - const IR::U32 src_val = VMovRelSHelper(src_vgprno, m0); - SetDst(inst.dst[0], src_val); -} - -void Translator::V_MOVRELD_B32(const GcnInst& inst) { - const IR::U32 src_val{GetSrc(inst.src[0])}; - u32 dst_vgprno = inst.dst[0].code - static_cast(IR::VectorReg::V0); - IR::U32 m0 = ir.GetM0(); - - VMovRelDHelper(dst_vgprno, src_val, m0); -} - -void Translator::V_MOVRELSD_B32(const GcnInst& inst) { - u32 src_vgprno = inst.src[0].code - static_cast(IR::VectorReg::V0); - u32 dst_vgprno = inst.dst[0].code - static_cast(IR::VectorReg::V0); - IR::U32 m0 = ir.GetM0(); - - const IR::U32 src_val = VMovRelSHelper(src_vgprno, m0); - VMovRelDHelper(dst_vgprno, src_val, m0); -} - } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp index c12ae8f57..8617370ac 100644 --- a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp +++ b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp @@ -5,6 +5,22 @@ namespace Shader::Gcn { +void Translator::EmitVectorInterpolation(const GcnInst& inst) { + switch (inst.opcode) { + // VINTRP + case Opcode::V_INTERP_P1_F32: + return; + case Opcode::V_INTERP_P2_F32: + return V_INTERP_P2_F32(inst); + case Opcode::V_INTERP_MOV_F32: + return V_INTERP_MOV_F32(inst); + default: + LogMissingOpcode(inst); + } +} + +// VINTRP + void Translator::V_INTERP_P2_F32(const GcnInst& inst) { const IR::VectorReg dst_reg{inst.dst[0].code}; auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr); @@ -19,17 +35,4 @@ void Translator::V_INTERP_MOV_F32(const GcnInst& inst) { ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan)); } -void Translator::EmitVectorInterpolation(const GcnInst& inst) { - switch (inst.opcode) { - case Opcode::V_INTERP_P1_F32: - return; - case Opcode::V_INTERP_P2_F32: - return V_INTERP_P2_F32(inst); - case Opcode::V_INTERP_MOV_F32: - return V_INTERP_MOV_F32(inst); - default: - LogMissingOpcode(inst); - } -} - } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index e0f35fb90..efe4a15bf 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -7,56 +7,7 @@ namespace Shader::Gcn { void Translator::EmitVectorMemory(const GcnInst& inst) { switch (inst.opcode) { - case Opcode::IMAGE_SAMPLE_LZ_O: - case Opcode::IMAGE_SAMPLE_O: - case Opcode::IMAGE_SAMPLE_C: - case Opcode::IMAGE_SAMPLE_C_LZ: - case Opcode::IMAGE_SAMPLE_LZ: - case Opcode::IMAGE_SAMPLE: - case Opcode::IMAGE_SAMPLE_L: - case Opcode::IMAGE_SAMPLE_L_O: - case Opcode::IMAGE_SAMPLE_C_O: - case Opcode::IMAGE_SAMPLE_B: - case Opcode::IMAGE_SAMPLE_C_LZ_O: - case Opcode::IMAGE_SAMPLE_D: - case Opcode::IMAGE_SAMPLE_CD: - return IMAGE_SAMPLE(inst); - case Opcode::IMAGE_GATHER4_LZ: - case Opcode::IMAGE_GATHER4_C: - case Opcode::IMAGE_GATHER4_C_LZ: - case Opcode::IMAGE_GATHER4_LZ_O: - return IMAGE_GATHER(inst); - case Opcode::IMAGE_ATOMIC_ADD: - return IMAGE_ATOMIC(AtomicOp::Add, inst); - case Opcode::IMAGE_ATOMIC_AND: - return IMAGE_ATOMIC(AtomicOp::And, inst); - case Opcode::IMAGE_ATOMIC_OR: - return IMAGE_ATOMIC(AtomicOp::Or, inst); - case Opcode::IMAGE_ATOMIC_XOR: - return IMAGE_ATOMIC(AtomicOp::Xor, inst); - case Opcode::IMAGE_ATOMIC_UMAX: - return IMAGE_ATOMIC(AtomicOp::Umax, inst); - case Opcode::IMAGE_ATOMIC_SMAX: - return IMAGE_ATOMIC(AtomicOp::Smax, inst); - case Opcode::IMAGE_ATOMIC_UMIN: - return IMAGE_ATOMIC(AtomicOp::Umin, inst); - case Opcode::IMAGE_ATOMIC_SMIN: - return IMAGE_ATOMIC(AtomicOp::Smin, inst); - case Opcode::IMAGE_ATOMIC_INC: - return IMAGE_ATOMIC(AtomicOp::Inc, inst); - case Opcode::IMAGE_ATOMIC_DEC: - return IMAGE_ATOMIC(AtomicOp::Dec, inst); - case Opcode::IMAGE_GET_LOD: - return IMAGE_GET_LOD(inst); - case Opcode::IMAGE_STORE: - return IMAGE_STORE(inst); - case Opcode::IMAGE_LOAD_MIP: - return IMAGE_LOAD(true, inst); - case Opcode::IMAGE_LOAD: - return IMAGE_LOAD(false, inst); - case Opcode::IMAGE_GET_RESINFO: - return IMAGE_GET_RESINFO(inst); - + // MTBUF / MUBUF // Buffer load operations case Opcode::TBUFFER_LOAD_FORMAT_X: return BUFFER_LOAD(1, true, inst); @@ -137,11 +88,335 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { case Opcode::BUFFER_ATOMIC_DEC: return BUFFER_ATOMIC(AtomicOp::Dec, inst); + // MIMG + case Opcode::IMAGE_LOAD: + return IMAGE_LOAD(false, inst); + case Opcode::IMAGE_LOAD_MIP: + return IMAGE_LOAD(true, inst); + case Opcode::IMAGE_STORE: + return IMAGE_STORE(inst); + case Opcode::IMAGE_GET_RESINFO: + return IMAGE_GET_RESINFO(inst); + case Opcode::IMAGE_ATOMIC_ADD: + return IMAGE_ATOMIC(AtomicOp::Add, inst); + case Opcode::IMAGE_ATOMIC_SMIN: + return IMAGE_ATOMIC(AtomicOp::Smin, inst); + case Opcode::IMAGE_ATOMIC_UMIN: + return IMAGE_ATOMIC(AtomicOp::Umin, inst); + case Opcode::IMAGE_ATOMIC_SMAX: + return IMAGE_ATOMIC(AtomicOp::Smax, inst); + case Opcode::IMAGE_ATOMIC_UMAX: + return IMAGE_ATOMIC(AtomicOp::Umax, inst); + case Opcode::IMAGE_ATOMIC_AND: + return IMAGE_ATOMIC(AtomicOp::And, inst); + case Opcode::IMAGE_ATOMIC_OR: + return IMAGE_ATOMIC(AtomicOp::Or, inst); + case Opcode::IMAGE_ATOMIC_XOR: + return IMAGE_ATOMIC(AtomicOp::Xor, inst); + case Opcode::IMAGE_ATOMIC_INC: + return IMAGE_ATOMIC(AtomicOp::Inc, inst); + case Opcode::IMAGE_ATOMIC_DEC: + return IMAGE_ATOMIC(AtomicOp::Dec, inst); + case Opcode::IMAGE_SAMPLE: + case Opcode::IMAGE_SAMPLE_D: + case Opcode::IMAGE_SAMPLE_L: + case Opcode::IMAGE_SAMPLE_B: + case Opcode::IMAGE_SAMPLE_LZ: + case Opcode::IMAGE_SAMPLE_C: + case Opcode::IMAGE_SAMPLE_C_LZ: + case Opcode::IMAGE_SAMPLE_O: + case Opcode::IMAGE_SAMPLE_L_O: + case Opcode::IMAGE_SAMPLE_LZ_O: + case Opcode::IMAGE_SAMPLE_C_O: + case Opcode::IMAGE_SAMPLE_C_LZ_O: + return IMAGE_SAMPLE(inst); + case Opcode::IMAGE_GATHER4_LZ: + case Opcode::IMAGE_GATHER4_C: + case Opcode::IMAGE_GATHER4_C_LZ: + case Opcode::IMAGE_GATHER4_LZ_O: + return IMAGE_GATHER(inst); + case Opcode::IMAGE_GET_LOD: + return IMAGE_GET_LOD(inst); + case Opcode::IMAGE_SAMPLE_CD: + return IMAGE_SAMPLE(inst); + default: LogMissingOpcode(inst); } } +// MTBUF / MUBUF + +// Buffer load operations +void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) { + const auto& mtbuf = inst.control.mtbuf; + const IR::VectorReg vaddr{inst.src[0].code}; + const IR::ScalarReg sharp{inst.src[2].code * 4}; + const IR::Value address = [&] -> IR::Value { + if (mtbuf.idxen && mtbuf.offen) { + return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1)); + } + if (mtbuf.idxen || mtbuf.offen) { + return ir.GetVectorReg(vaddr); + } + return {}; + }(); + const IR::Value soffset{GetSrc(inst.src[3])}; + ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); + + IR::BufferInstInfo info{}; + info.index_enable.Assign(mtbuf.idxen); + info.offset_enable.Assign(mtbuf.offen); + info.inst_offset.Assign(mtbuf.offset); + if (is_typed) { + const auto dmft = static_cast(mtbuf.dfmt); + const auto nfmt = static_cast(mtbuf.nfmt); + ASSERT(nfmt == AmdGpu::NumberFormat::Float && + (dmft == AmdGpu::DataFormat::Format32_32_32_32 || + dmft == AmdGpu::DataFormat::Format32_32_32 || + dmft == AmdGpu::DataFormat::Format32_32 || dmft == AmdGpu::DataFormat::Format32)); + } + + const IR::Value handle = + ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), + ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); + const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info); + const IR::VectorReg dst_reg{inst.src[1].code}; + if (num_dwords == 1) { + ir.SetVectorReg(dst_reg, IR::U32{value}); + return; + } + for (u32 i = 0; i < num_dwords; i++) { + ir.SetVectorReg(dst_reg + i, IR::U32{ir.CompositeExtract(value, i)}); + } +} + +void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst) { + const auto& mubuf = inst.control.mubuf; + const IR::VectorReg vaddr{inst.src[0].code}; + const IR::ScalarReg sharp{inst.src[2].code * 4}; + ASSERT_MSG(!mubuf.offen && mubuf.offset == 0, "Offsets for image buffers are not supported"); + const IR::Value address = [&] -> IR::Value { + if (mubuf.idxen) { + return ir.GetVectorReg(vaddr); + } + return {}; + }(); + const IR::Value soffset{GetSrc(inst.src[3])}; + ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); + + IR::BufferInstInfo info{}; + info.index_enable.Assign(mubuf.idxen); + + const IR::Value handle = + ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), + ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); + const IR::Value value = ir.LoadBufferFormat(handle, address, info); + const IR::VectorReg dst_reg{inst.src[1].code}; + for (u32 i = 0; i < num_dwords; i++) { + ir.SetVectorReg(dst_reg + i, IR::F32{ir.CompositeExtract(value, i)}); + } +} + +// Buffer store operations +void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst) { + const auto& mtbuf = inst.control.mtbuf; + const IR::VectorReg vaddr{inst.src[0].code}; + const IR::ScalarReg sharp{inst.src[2].code * 4}; + const IR::Value address = [&] -> IR::Value { + if (mtbuf.idxen && mtbuf.offen) { + return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1)); + } + if (mtbuf.idxen || mtbuf.offen) { + return ir.GetVectorReg(vaddr); + } + return {}; + }(); + const IR::Value soffset{GetSrc(inst.src[3])}; + ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); + + IR::BufferInstInfo info{}; + info.index_enable.Assign(mtbuf.idxen); + info.offset_enable.Assign(mtbuf.offen); + info.inst_offset.Assign(mtbuf.offset); + if (is_typed) { + const auto dmft = static_cast(mtbuf.dfmt); + const auto nfmt = static_cast(mtbuf.nfmt); + ASSERT(nfmt == AmdGpu::NumberFormat::Float && + (dmft == AmdGpu::DataFormat::Format32_32_32_32 || + dmft == AmdGpu::DataFormat::Format32_32_32 || + dmft == AmdGpu::DataFormat::Format32_32 || dmft == AmdGpu::DataFormat::Format32)); + } + + IR::Value value{}; + const IR::VectorReg src_reg{inst.src[1].code}; + switch (num_dwords) { + case 1: + value = ir.GetVectorReg(src_reg); + break; + case 2: + value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1)); + break; + case 3: + value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1), + ir.GetVectorReg(src_reg + 2)); + break; + case 4: + value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1), + ir.GetVectorReg(src_reg + 2), ir.GetVectorReg(src_reg + 3)); + break; + } + const IR::Value handle = + ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), + ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); + ir.StoreBuffer(num_dwords, handle, address, value, info); +} + +void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) { + const auto& mubuf = inst.control.mubuf; + const IR::VectorReg vaddr{inst.src[0].code}; + const IR::ScalarReg sharp{inst.src[2].code * 4}; + ASSERT_MSG(!mubuf.offen && mubuf.offset == 0, "Offsets for image buffers are not supported"); + const IR::Value address = [&] -> IR::Value { + if (mubuf.idxen) { + return ir.GetVectorReg(vaddr); + } + return {}; + }(); + const IR::Value soffset{GetSrc(inst.src[3])}; + ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); + + IR::BufferInstInfo info{}; + info.index_enable.Assign(mubuf.idxen); + + const IR::VectorReg src_reg{inst.src[1].code}; + + std::array comps{}; + for (u32 i = 0; i < num_dwords; i++) { + comps[i] = ir.GetVectorReg(src_reg + i); + } + for (u32 i = num_dwords; i < 4; i++) { + comps[i] = ir.Imm32(0.f); + } + + const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]); + const IR::Value handle = + ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), + ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); + ir.StoreBufferFormat(handle, address, value, info); +} + +// Buffer atomic operations +void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { + const auto& mubuf = inst.control.mubuf; + const IR::VectorReg vaddr{inst.src[0].code}; + const IR::VectorReg vdata{inst.src[1].code}; + const IR::ScalarReg srsrc{inst.src[2].code * 4}; + const IR::Value address = [&] -> IR::Value { + if (mubuf.idxen && mubuf.offen) { + return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1)); + } + if (mubuf.idxen || mubuf.offen) { + return ir.GetVectorReg(vaddr); + } + return {}; + }(); + const IR::U32 soffset{GetSrc(inst.src[3])}; + ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); + + IR::BufferInstInfo info{}; + info.index_enable.Assign(mubuf.idxen); + info.inst_offset.Assign(mubuf.offset); + info.offset_enable.Assign(mubuf.offen); + + IR::Value vdata_val = ir.GetVectorReg(vdata); + const IR::Value handle = + ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1), + ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3)); + + const IR::Value original_val = [&] { + switch (op) { + case AtomicOp::Swap: + return ir.BufferAtomicSwap(handle, address, vdata_val, info); + case AtomicOp::Add: + return ir.BufferAtomicIAdd(handle, address, vdata_val, info); + case AtomicOp::Smin: + return ir.BufferAtomicIMin(handle, address, vdata_val, true, info); + case AtomicOp::Umin: + return ir.BufferAtomicIMin(handle, address, vdata_val, false, info); + case AtomicOp::Smax: + return ir.BufferAtomicIMax(handle, address, vdata_val, true, info); + case AtomicOp::Umax: + return ir.BufferAtomicIMax(handle, address, vdata_val, false, info); + case AtomicOp::And: + return ir.BufferAtomicAnd(handle, address, vdata_val, info); + case AtomicOp::Or: + return ir.BufferAtomicOr(handle, address, vdata_val, info); + case AtomicOp::Xor: + return ir.BufferAtomicXor(handle, address, vdata_val, info); + case AtomicOp::Inc: + return ir.BufferAtomicInc(handle, address, vdata_val, info); + case AtomicOp::Dec: + return ir.BufferAtomicDec(handle, address, vdata_val, info); + default: + UNREACHABLE(); + } + }(); + + if (mubuf.glc) { + ir.SetVectorReg(vdata, IR::U32{original_val}); + } +} + +// MIMG + +void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) { + const auto& mimg = inst.control.mimg; + IR::VectorReg addr_reg{inst.src[0].code}; + IR::VectorReg dest_reg{inst.dst[0].code}; + const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; + + const IR::Value handle = ir.GetScalarReg(tsharp_reg); + const IR::Value body = + ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1), + ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); + + IR::TextureInstInfo info{}; + info.explicit_lod.Assign(has_mip); + const IR::Value texel = ir.ImageFetch(handle, body, {}, {}, {}, info); + + for (u32 i = 0; i < 4; i++) { + if (((mimg.dmask >> i) & 1) == 0) { + continue; + } + IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)}; + ir.SetVectorReg(dest_reg++, value); + } +} + +void Translator::IMAGE_STORE(const GcnInst& inst) { + const auto& mimg = inst.control.mimg; + IR::VectorReg addr_reg{inst.src[0].code}; + IR::VectorReg data_reg{inst.dst[0].code}; + const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; + + const IR::Value handle = ir.GetScalarReg(tsharp_reg); + const IR::Value body = + ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1), + ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); + + boost::container::static_vector comps; + for (u32 i = 0; i < 4; i++) { + if (((mimg.dmask >> i) & 1) == 0) { + comps.push_back(ir.Imm32(0.f)); + continue; + } + comps.push_back(ir.GetVectorReg(data_reg++)); + } + const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]); + ir.ImageWrite(handle, body, value, {}); +} + void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) { IR::VectorReg dst_reg{inst.dst[0].code}; const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; @@ -165,6 +440,50 @@ void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) { } } +void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) { + const auto& mimg = inst.control.mimg; + IR::VectorReg val_reg{inst.dst[0].code}; + IR::VectorReg addr_reg{inst.src[0].code}; + const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; + + const IR::Value value = ir.GetVectorReg(val_reg); + const IR::Value handle = ir.GetScalarReg(tsharp_reg); + const IR::Value body = + ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1), + ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); + const IR::Value prev = [&] { + switch (op) { + case AtomicOp::Swap: + return ir.ImageAtomicExchange(handle, body, value, {}); + case AtomicOp::Add: + return ir.ImageAtomicIAdd(handle, body, value, {}); + case AtomicOp::Smin: + return ir.ImageAtomicIMin(handle, body, value, true, {}); + case AtomicOp::Umin: + return ir.ImageAtomicUMin(handle, body, value, {}); + case AtomicOp::Smax: + return ir.ImageAtomicIMax(handle, body, value, true, {}); + case AtomicOp::Umax: + return ir.ImageAtomicUMax(handle, body, value, {}); + case AtomicOp::And: + return ir.ImageAtomicAnd(handle, body, value, {}); + case AtomicOp::Or: + return ir.ImageAtomicOr(handle, body, value, {}); + case AtomicOp::Xor: + return ir.ImageAtomicXor(handle, body, value, {}); + case AtomicOp::Inc: + return ir.ImageAtomicInc(handle, body, value, {}); + case AtomicOp::Dec: + return ir.ImageAtomicDec(handle, body, value, {}); + default: + UNREACHABLE(); + } + }(); + if (mimg.glc) { + ir.SetVectorReg(val_reg, IR::U32{prev}); + } +} + void Translator::IMAGE_SAMPLE(const GcnInst& inst) { const auto& mimg = inst.control.mimg; IR::VectorReg addr_reg{inst.src[0].code}; @@ -323,271 +642,6 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) { } } -void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) { - const auto& mimg = inst.control.mimg; - IR::VectorReg addr_reg{inst.src[0].code}; - IR::VectorReg dest_reg{inst.dst[0].code}; - const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; - - const IR::Value handle = ir.GetScalarReg(tsharp_reg); - const IR::Value body = - ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1), - ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); - - IR::TextureInstInfo info{}; - info.explicit_lod.Assign(has_mip); - const IR::Value texel = ir.ImageFetch(handle, body, {}, {}, {}, info); - - for (u32 i = 0; i < 4; i++) { - if (((mimg.dmask >> i) & 1) == 0) { - continue; - } - IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)}; - ir.SetVectorReg(dest_reg++, value); - } -} - -void Translator::IMAGE_STORE(const GcnInst& inst) { - const auto& mimg = inst.control.mimg; - IR::VectorReg addr_reg{inst.src[0].code}; - IR::VectorReg data_reg{inst.dst[0].code}; - const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; - - const IR::Value handle = ir.GetScalarReg(tsharp_reg); - const IR::Value body = - ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1), - ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); - - boost::container::static_vector comps; - for (u32 i = 0; i < 4; i++) { - if (((mimg.dmask >> i) & 1) == 0) { - comps.push_back(ir.Imm32(0.f)); - continue; - } - comps.push_back(ir.GetVectorReg(data_reg++)); - } - const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]); - ir.ImageWrite(handle, body, value, {}); -} - -void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) { - const auto& mtbuf = inst.control.mtbuf; - const IR::VectorReg vaddr{inst.src[0].code}; - const IR::ScalarReg sharp{inst.src[2].code * 4}; - const IR::Value address = [&] -> IR::Value { - if (mtbuf.idxen && mtbuf.offen) { - return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1)); - } - if (mtbuf.idxen || mtbuf.offen) { - return ir.GetVectorReg(vaddr); - } - return {}; - }(); - const IR::Value soffset{GetSrc(inst.src[3])}; - ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); - - IR::BufferInstInfo info{}; - info.index_enable.Assign(mtbuf.idxen); - info.offset_enable.Assign(mtbuf.offen); - info.inst_offset.Assign(mtbuf.offset); - if (is_typed) { - const auto dmft = static_cast(mtbuf.dfmt); - const auto nfmt = static_cast(mtbuf.nfmt); - ASSERT(nfmt == AmdGpu::NumberFormat::Float && - (dmft == AmdGpu::DataFormat::Format32_32_32_32 || - dmft == AmdGpu::DataFormat::Format32_32_32 || - dmft == AmdGpu::DataFormat::Format32_32 || dmft == AmdGpu::DataFormat::Format32)); - } - - const IR::Value handle = - ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), - ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); - const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info); - const IR::VectorReg dst_reg{inst.src[1].code}; - if (num_dwords == 1) { - ir.SetVectorReg(dst_reg, IR::U32{value}); - return; - } - for (u32 i = 0; i < num_dwords; i++) { - ir.SetVectorReg(dst_reg + i, IR::U32{ir.CompositeExtract(value, i)}); - } -} - -void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst) { - const auto& mubuf = inst.control.mubuf; - const IR::VectorReg vaddr{inst.src[0].code}; - const IR::ScalarReg sharp{inst.src[2].code * 4}; - ASSERT_MSG(!mubuf.offen && mubuf.offset == 0, "Offsets for image buffers are not supported"); - const IR::Value address = [&] -> IR::Value { - if (mubuf.idxen) { - return ir.GetVectorReg(vaddr); - } - return {}; - }(); - const IR::Value soffset{GetSrc(inst.src[3])}; - ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); - - IR::BufferInstInfo info{}; - info.index_enable.Assign(mubuf.idxen); - - const IR::Value handle = - ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), - ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); - const IR::Value value = ir.LoadBufferFormat(handle, address, info); - const IR::VectorReg dst_reg{inst.src[1].code}; - for (u32 i = 0; i < num_dwords; i++) { - ir.SetVectorReg(dst_reg + i, IR::F32{ir.CompositeExtract(value, i)}); - } -} - -void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst) { - const auto& mtbuf = inst.control.mtbuf; - const IR::VectorReg vaddr{inst.src[0].code}; - const IR::ScalarReg sharp{inst.src[2].code * 4}; - const IR::Value address = [&] -> IR::Value { - if (mtbuf.idxen && mtbuf.offen) { - return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1)); - } - if (mtbuf.idxen || mtbuf.offen) { - return ir.GetVectorReg(vaddr); - } - return {}; - }(); - const IR::Value soffset{GetSrc(inst.src[3])}; - ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); - - IR::BufferInstInfo info{}; - info.index_enable.Assign(mtbuf.idxen); - info.offset_enable.Assign(mtbuf.offen); - info.inst_offset.Assign(mtbuf.offset); - if (is_typed) { - const auto dmft = static_cast(mtbuf.dfmt); - const auto nfmt = static_cast(mtbuf.nfmt); - ASSERT(nfmt == AmdGpu::NumberFormat::Float && - (dmft == AmdGpu::DataFormat::Format32_32_32_32 || - dmft == AmdGpu::DataFormat::Format32_32_32 || - dmft == AmdGpu::DataFormat::Format32_32 || dmft == AmdGpu::DataFormat::Format32)); - } - - IR::Value value{}; - const IR::VectorReg src_reg{inst.src[1].code}; - switch (num_dwords) { - case 1: - value = ir.GetVectorReg(src_reg); - break; - case 2: - value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1)); - break; - case 3: - value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1), - ir.GetVectorReg(src_reg + 2)); - break; - case 4: - value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1), - ir.GetVectorReg(src_reg + 2), ir.GetVectorReg(src_reg + 3)); - break; - } - const IR::Value handle = - ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), - ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); - ir.StoreBuffer(num_dwords, handle, address, value, info); -} - -void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) { - const auto& mubuf = inst.control.mubuf; - const IR::VectorReg vaddr{inst.src[0].code}; - const IR::ScalarReg sharp{inst.src[2].code * 4}; - ASSERT_MSG(!mubuf.offen && mubuf.offset == 0, "Offsets for image buffers are not supported"); - const IR::Value address = [&] -> IR::Value { - if (mubuf.idxen) { - return ir.GetVectorReg(vaddr); - } - return {}; - }(); - const IR::Value soffset{GetSrc(inst.src[3])}; - ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); - - IR::BufferInstInfo info{}; - info.index_enable.Assign(mubuf.idxen); - - const IR::VectorReg src_reg{inst.src[1].code}; - - std::array comps{}; - for (u32 i = 0; i < num_dwords; i++) { - comps[i] = ir.GetVectorReg(src_reg + i); - } - for (u32 i = num_dwords; i < 4; i++) { - comps[i] = ir.Imm32(0.f); - } - - const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]); - const IR::Value handle = - ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), - ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); - ir.StoreBufferFormat(handle, address, value, info); -} - -void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { - const auto& mubuf = inst.control.mubuf; - const IR::VectorReg vaddr{inst.src[0].code}; - const IR::VectorReg vdata{inst.src[1].code}; - const IR::ScalarReg srsrc{inst.src[2].code * 4}; - const IR::Value address = [&] -> IR::Value { - if (mubuf.idxen && mubuf.offen) { - return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1)); - } - if (mubuf.idxen || mubuf.offen) { - return ir.GetVectorReg(vaddr); - } - return {}; - }(); - const IR::U32 soffset{GetSrc(inst.src[3])}; - ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); - - IR::BufferInstInfo info{}; - info.index_enable.Assign(mubuf.idxen); - info.inst_offset.Assign(mubuf.offset); - info.offset_enable.Assign(mubuf.offen); - - IR::Value vdata_val = ir.GetVectorReg(vdata); - const IR::Value handle = - ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1), - ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3)); - - const IR::Value original_val = [&] { - switch (op) { - case AtomicOp::Swap: - return ir.BufferAtomicSwap(handle, address, vdata_val, info); - case AtomicOp::Add: - return ir.BufferAtomicIAdd(handle, address, vdata_val, info); - case AtomicOp::Smin: - return ir.BufferAtomicIMin(handle, address, vdata_val, true, info); - case AtomicOp::Umin: - return ir.BufferAtomicIMin(handle, address, vdata_val, false, info); - case AtomicOp::Smax: - return ir.BufferAtomicIMax(handle, address, vdata_val, true, info); - case AtomicOp::Umax: - return ir.BufferAtomicIMax(handle, address, vdata_val, false, info); - case AtomicOp::And: - return ir.BufferAtomicAnd(handle, address, vdata_val, info); - case AtomicOp::Or: - return ir.BufferAtomicOr(handle, address, vdata_val, info); - case AtomicOp::Xor: - return ir.BufferAtomicXor(handle, address, vdata_val, info); - case AtomicOp::Inc: - return ir.BufferAtomicInc(handle, address, vdata_val, info); - case AtomicOp::Dec: - return ir.BufferAtomicDec(handle, address, vdata_val, info); - default: - UNREACHABLE(); - } - }(); - - if (mubuf.glc) { - ir.SetVectorReg(vdata, IR::U32{original_val}); - } -} - void Translator::IMAGE_GET_LOD(const GcnInst& inst) { const auto& mimg = inst.control.mimg; IR::VectorReg dst_reg{inst.dst[0].code}; @@ -603,48 +657,4 @@ void Translator::IMAGE_GET_LOD(const GcnInst& inst) { ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 1)}); } -void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) { - const auto& mimg = inst.control.mimg; - IR::VectorReg val_reg{inst.dst[0].code}; - IR::VectorReg addr_reg{inst.src[0].code}; - const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; - - const IR::Value value = ir.GetVectorReg(val_reg); - const IR::Value handle = ir.GetScalarReg(tsharp_reg); - const IR::Value body = - ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1), - ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); - const IR::Value prev = [&] { - switch (op) { - case AtomicOp::Swap: - return ir.ImageAtomicExchange(handle, body, value, {}); - case AtomicOp::Add: - return ir.ImageAtomicIAdd(handle, body, value, {}); - case AtomicOp::Smin: - return ir.ImageAtomicIMin(handle, body, value, true, {}); - case AtomicOp::Umin: - return ir.ImageAtomicUMin(handle, body, value, {}); - case AtomicOp::Smax: - return ir.ImageAtomicIMax(handle, body, value, true, {}); - case AtomicOp::Umax: - return ir.ImageAtomicUMax(handle, body, value, {}); - case AtomicOp::And: - return ir.ImageAtomicAnd(handle, body, value, {}); - case AtomicOp::Or: - return ir.ImageAtomicOr(handle, body, value, {}); - case AtomicOp::Xor: - return ir.ImageAtomicXor(handle, body, value, {}); - case AtomicOp::Inc: - return ir.ImageAtomicInc(handle, body, value, {}); - case AtomicOp::Dec: - return ir.ImageAtomicDec(handle, body, value, {}); - default: - UNREACHABLE(); - } - }(); - if (mimg.glc) { - ir.SetVectorReg(val_reg, IR::U32{prev}); - } -} - } // namespace Shader::Gcn