shader_recompiler: Implement S_BCNT1_I32_B64 and S_FF1_I32_B64 (#1889)

* shader_recompiler: Implement S_BCNT1_I32_B64

* shader_recompiler: Implement S_FF1_I32_B64

* shader_recompiler: Implement IEqual for 64-bit.

* shader_recompiler: Fix immediate type in S_FF1_I32_B32
This commit is contained in:
squidbus 2024-12-27 06:46:07 -08:00 committed by GitHub
parent 1c5947d93b
commit b1f74660df
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 68 additions and 12 deletions

View file

@ -304,10 +304,12 @@ Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id
Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count);
Id EmitBitReverse32(EmitContext& ctx, Id value);
Id EmitBitCount32(EmitContext& ctx, Id value);
Id EmitBitCount64(EmitContext& ctx, Id value);
Id EmitBitwiseNot32(EmitContext& ctx, Id value);
Id EmitFindSMsb32(EmitContext& ctx, Id value);
Id EmitFindUMsb32(EmitContext& ctx, Id value);
Id EmitFindILsb32(EmitContext& ctx, Id value);
Id EmitFindILsb64(EmitContext& ctx, Id value);
Id EmitSMin32(EmitContext& ctx, Id a, Id b);
Id EmitUMin32(EmitContext& ctx, Id a, Id b);
Id EmitSMax32(EmitContext& ctx, Id a, Id b);
@ -318,7 +320,8 @@ Id EmitSLessThan32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSLessThan64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs);
Id EmitIEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs);

View file

@ -201,6 +201,10 @@ Id EmitBitCount32(EmitContext& ctx, Id value) {
return ctx.OpBitCount(ctx.U32[1], value);
}
Id EmitBitCount64(EmitContext& ctx, Id value) {
return ctx.OpBitCount(ctx.U64, value);
}
Id EmitBitwiseNot32(EmitContext& ctx, Id value) {
return ctx.OpNot(ctx.U32[1], value);
}
@ -217,6 +221,10 @@ Id EmitFindILsb32(EmitContext& ctx, Id value) {
return ctx.OpFindILsb(ctx.U32[1], value);
}
Id EmitFindILsb64(EmitContext& ctx, Id value) {
return ctx.OpFindILsb(ctx.U64, value);
}
Id EmitSMin32(EmitContext& ctx, Id a, Id b) {
return ctx.OpSMin(ctx.U32[1], a, b);
}
@ -277,7 +285,11 @@ Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpULessThan(ctx.U1[1], lhs, rhs);
}
Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs) {
Id EmitIEqual32(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpIEqual(ctx.U1[1], lhs, rhs);
}
Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpIEqual(ctx.U1[1], lhs, rhs);
}

View file

@ -100,8 +100,12 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
return S_BREV_B32(inst);
case Opcode::S_BCNT1_I32_B32:
return S_BCNT1_I32_B32(inst);
case Opcode::S_BCNT1_I32_B64:
return S_BCNT1_I32_B64(inst);
case Opcode::S_FF1_I32_B32:
return S_FF1_I32_B32(inst);
case Opcode::S_FF1_I32_B64:
return S_FF1_I32_B64(inst);
case Opcode::S_AND_SAVEEXEC_B64:
return S_SAVEEXEC_B64(NegateMode::None, false, inst);
case Opcode::S_ORN2_SAVEEXEC_B64:
@ -585,12 +589,25 @@ void Translator::S_BCNT1_I32_B32(const GcnInst& inst) {
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
}
void Translator::S_BCNT1_I32_B64(const GcnInst& inst) {
const IR::U32 result = ir.BitCount(GetSrc64(inst.src[0]));
SetDst(inst.dst[0], result);
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
}
void Translator::S_FF1_I32_B32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 result{ir.Select(ir.IEqual(src0, ir.Imm32(0U)), ir.Imm32(-1), ir.FindILsb(src0))};
SetDst(inst.dst[0], result);
}
void Translator::S_FF1_I32_B64(const GcnInst& inst) {
const IR::U64 src0{GetSrc64(inst.src[0])};
const IR::U32 result{
ir.Select(ir.IEqual(src0, ir.Imm64(u64(0))), ir.Imm32(-1), ir.FindILsb(src0))};
SetDst(inst.dst[0], result);
}
void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst) {
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination

View file

@ -111,7 +111,9 @@ public:
void S_NOT_B64(const GcnInst& inst);
void S_BREV_B32(const GcnInst& inst);
void S_BCNT1_I32_B32(const GcnInst& inst);
void S_BCNT1_I32_B64(const GcnInst& inst);
void S_FF1_I32_B32(const GcnInst& inst);
void S_FF1_I32_B64(const GcnInst& inst);
void S_GETPC_B64(u32 pc, const GcnInst& inst);
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
void S_ABS_I32(const GcnInst& inst);

View file

@ -1273,8 +1273,15 @@ U32 IREmitter::BitReverse(const U32& value) {
return Inst<U32>(Opcode::BitReverse32, value);
}
U32 IREmitter::BitCount(const U32& value) {
return Inst<U32>(Opcode::BitCount32, value);
U32 IREmitter::BitCount(const U32U64& value) {
switch (value.Type()) {
case Type::U32:
return Inst<U32>(Opcode::BitCount32, value);
case Type::U64:
return Inst<U32>(Opcode::BitCount64, value);
default:
ThrowInvalidType(value.Type());
}
}
U32 IREmitter::BitwiseNot(const U32& value) {
@ -1289,8 +1296,15 @@ U32 IREmitter::FindUMsb(const U32& value) {
return Inst<U32>(Opcode::FindUMsb32, value);
}
U32 IREmitter::FindILsb(const U32& value) {
return Inst<U32>(Opcode::FindILsb32, value);
U32 IREmitter::FindILsb(const U32U64& value) {
switch (value.Type()) {
case Type::U32:
return Inst<U32>(Opcode::FindILsb32, value);
case Type::U64:
return Inst<U32>(Opcode::FindILsb64, value);
default:
ThrowInvalidType(value.Type());
}
}
U32 IREmitter::SMin(const U32& a, const U32& b) {
@ -1345,7 +1359,9 @@ U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) {
}
switch (lhs.Type()) {
case Type::U32:
return Inst<U1>(Opcode::IEqual, lhs, rhs);
return Inst<U1>(Opcode::IEqual32, lhs, rhs);
case Type::U64:
return Inst<U1>(Opcode::IEqual64, lhs, rhs);
default:
ThrowInvalidType(lhs.Type());
}

View file

@ -229,12 +229,12 @@ public:
[[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count,
bool is_signed = false);
[[nodiscard]] U32 BitReverse(const U32& value);
[[nodiscard]] U32 BitCount(const U32& value);
[[nodiscard]] U32 BitCount(const U32U64& value);
[[nodiscard]] U32 BitwiseNot(const U32& value);
[[nodiscard]] U32 FindSMsb(const U32& value);
[[nodiscard]] U32 FindUMsb(const U32& value);
[[nodiscard]] U32 FindILsb(const U32& value);
[[nodiscard]] U32 FindILsb(const U32U64& value);
[[nodiscard]] U32 SMin(const U32& a, const U32& b);
[[nodiscard]] U32 UMin(const U32& a, const U32& b);
[[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed);

View file

@ -284,11 +284,13 @@ OPCODE(BitFieldSExtract, U32, U32,
OPCODE(BitFieldUExtract, U32, U32, U32, U32, )
OPCODE(BitReverse32, U32, U32, )
OPCODE(BitCount32, U32, U32, )
OPCODE(BitCount64, U32, U64, )
OPCODE(BitwiseNot32, U32, U32, )
OPCODE(FindSMsb32, U32, U32, )
OPCODE(FindUMsb32, U32, U32, )
OPCODE(FindILsb32, U32, U32, )
OPCODE(FindILsb64, U32, U64, )
OPCODE(SMin32, U32, U32, U32, )
OPCODE(UMin32, U32, U32, U32, )
OPCODE(SMax32, U32, U32, U32, )
@ -299,7 +301,8 @@ OPCODE(SLessThan32, U1, U32,
OPCODE(SLessThan64, U1, U64, U64, )
OPCODE(ULessThan32, U1, U32, U32, )
OPCODE(ULessThan64, U1, U64, U64, )
OPCODE(IEqual, U1, U32, U32, )
OPCODE(IEqual32, U1, U32, U32, )
OPCODE(IEqual64, U1, U64, U64, )
OPCODE(SLessThanEqual, U1, U32, U32, )
OPCODE(ULessThanEqual, U1, U32, U32, )
OPCODE(SGreaterThan, U1, U32, U32, )

View file

@ -391,9 +391,12 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
case IR::Opcode::UGreaterThanEqual:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; });
return;
case IR::Opcode::IEqual:
case IR::Opcode::IEqual32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; });
return;
case IR::Opcode::IEqual64:
FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a == b; });
return;
case IR::Opcode::INotEqual:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; });
return;

View file

@ -249,7 +249,7 @@ std::pair<const IR::Inst*, bool> TryDisableAnisoLod0(const IR::Inst* inst) {
// Select should be based on zero check
const auto* prod0 = inst->Arg(0).InstRecursive();
if (prod0->GetOpcode() != IR::Opcode::IEqual ||
if (prod0->GetOpcode() != IR::Opcode::IEqual32 ||
!(prod0->Arg(1).IsImmediate() && prod0->Arg(1).U32() == 0u)) {
return not_found;
}