add V_CVT_PKNORM_U16_F32

ok time to give it a good old college try next
This commit is contained in:
martin 2024-09-04 15:41:51 -05:00
parent 837bf41dd9
commit ccf4fc3b8d
2 changed files with 25 additions and 1 deletions

View file

@ -114,6 +114,7 @@ public:
void V_SAD(const GcnInst& inst);
void V_MAC_F32(const GcnInst& inst);
void V_CVT_PKRTZ_F16_F32(const GcnInst& inst);
void V_CVT_PKNORM_U16_F32(const GcnInst& inst);
void V_CVT_F32_F16(const GcnInst& inst);
void V_CVT_F16_F32(const GcnInst& inst);
void V_MUL_F32(const GcnInst& inst);
@ -232,7 +233,7 @@ private:
[[nodiscard]] T GetSrc64(const InstOperand& operand);
void SetDst(const InstOperand& operand, const IR::U32F32& value);
void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw);
IR::U16 Convert_F32_to_U16_Normalized(const IR::F32& src);
void LogMissingOpcode(const GcnInst& inst);
private:

View file

@ -52,6 +52,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_CVT_F32_U32(inst);
case Opcode::V_CVT_PKRTZ_F16_F32:
return V_CVT_PKRTZ_F16_F32(inst);
case Opcode::V_CVT_PKNORM_U16_F32:
return V_CVT_PKNORM_U16_F32(inst);
case Opcode::V_CVT_F32_F16:
return V_CVT_F32_F16(inst);
case Opcode::V_CVT_F16_F32:
@ -342,6 +344,27 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32));
}
IR::U16 Translator::Convert_F32_to_U16_Normalized(const IR::F32& src) {
const IR::F32 as_float = ir.FPMul(src, ir.Imm32((f32)std::numeric_limits<u16>::max()));
const IR::U32 as_unsigned = ir.ConvertFToU(32, as_float);
return ir.UConvert(16, as_unsigned);
}
void Translator::V_CVT_PKNORM_U16_F32(const GcnInst& inst) {
const IR::VectorReg dst_reg{inst.dst[0].code};
const IR::F32 src0 = GetSrc<IR::F32>(inst.src[0]);
const IR::F32 src1 = GetSrc<IR::F32>(inst.src[1]);
const IR::Value vec_u16 =
ir.CompositeConstruct(
Convert_F32_to_U16_Normalized(src0),
Convert_F32_to_U16_Normalized(src1)
);
ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_u16));
}
void Translator::V_CVT_F32_F16(const GcnInst& inst) {
const IR::U32 src0 = GetSrc(inst.src[0]);
const IR::U16 src0l = ir.UConvert(16, src0);