shader_recompiler: Fix BitCount64 and FindILsb64 (#1978)

This commit is contained in:
squidbus 2024-12-30 20:10:29 -08:00 committed by GitHub
parent 62780e4e43
commit 284f473a52
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 19 additions and 5 deletions

View file

@ -202,7 +202,14 @@ Id EmitBitCount32(EmitContext& ctx, Id value) {
}
Id EmitBitCount64(EmitContext& ctx, Id value) {
return ctx.OpBitCount(ctx.U64, value);
// Vulkan restricts some bitwise operations to 32-bit only, so decompose into
// two 32-bit values and add the result.
const Id unpacked{ctx.OpBitcast(ctx.U32[2], value)};
const Id lo{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 0U)};
const Id hi{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 1U)};
const Id lo_count{ctx.OpBitCount(ctx.U32[1], lo)};
const Id hi_count{ctx.OpBitCount(ctx.U32[1], hi)};
return ctx.OpIAdd(ctx.U32[1], lo_count, hi_count);
}
Id EmitBitwiseNot32(EmitContext& ctx, Id value) {
@ -222,7 +229,15 @@ Id EmitFindILsb32(EmitContext& ctx, Id value) {
}
Id EmitFindILsb64(EmitContext& ctx, Id value) {
return ctx.OpFindILsb(ctx.U64, value);
// Vulkan restricts some bitwise operations to 32-bit only, so decompose into
// two 32-bit values and select the correct result.
const Id unpacked{ctx.OpBitcast(ctx.U32[2], value)};
const Id lo{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 0U)};
const Id hi{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 1U)};
const Id lo_lsb{ctx.OpFindILsb(ctx.U32[1], lo)};
const Id hi_lsb{ctx.OpFindILsb(ctx.U32[1], hi)};
const Id found_lo{ctx.OpINotEqual(ctx.U32[1], lo_lsb, ctx.ConstU32(u32(-1)))};
return ctx.OpSelect(ctx.U32[1], found_lo, lo_lsb, hi_lsb);
}
Id EmitSMin32(EmitContext& ctx, Id a, Id b) {

View file

@ -597,14 +597,13 @@ void Translator::S_BCNT1_I32_B64(const GcnInst& inst) {
void Translator::S_FF1_I32_B32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 result{ir.Select(ir.IEqual(src0, ir.Imm32(0U)), ir.Imm32(-1), ir.FindILsb(src0))};
const IR::U32 result{ir.FindILsb(src0)};
SetDst(inst.dst[0], result);
}
void Translator::S_FF1_I32_B64(const GcnInst& inst) {
const IR::U64 src0{GetSrc64(inst.src[0])};
const IR::U32 result{
ir.Select(ir.IEqual(src0, ir.Imm64(u64(0))), ir.Imm32(-1), ir.FindILsb(src0))};
const IR::U32 result{ir.FindILsb(src0)};
SetDst(inst.dst[0], result);
}