From 638bb1efa7e44ca449918b981abda4e58344e573 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Fri, 4 Oct 2024 16:21:41 +0200 Subject: [PATCH] Add correct 32 bit bfe --- ptx/lib/zluda_ptx_impl.bc | Bin 3364 -> 3492 bytes ptx/lib/zluda_ptx_impl.cpp | 30 ++++++++++++++++++++++++------ ptx/src/test/spirv_run/bfe.ptx | 24 +++++------------------- 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/ptx/lib/zluda_ptx_impl.bc b/ptx/lib/zluda_ptx_impl.bc index 50f9d3df0e501193ac7517fea2125d76ec02b039..e9d602c16efb7dedf65431d6e95fb14411ae0b04 100644 GIT binary patch delta 222 zcmZ1?wM2S?3gf|zs%u%8*tj=eVfn&RFT}vWAk5;bBLJkCfLP?IeSn=Z14B4Z73;#o zyiAI0n|WLWc+5FkCC(UeI2Uj=Ze}^saj>yPK|k(*ib6tH5jGW|m_G%!wTbb(jNLj!Rg2C-g|jcyKg1 zGMTm;dMEHqVd-!XaB^gv{E>4tXFx;)$6VGKAEGud<$A$5c{7jNG<6vWpf`rNm1r})rhC~Gh22}B5 n3M`WgxmI%)_&0F;Wts6IVzVmu3&zRDyy}zrcvUAy@_GXR7i1gW diff --git a/ptx/lib/zluda_ptx_impl.cpp b/ptx/lib/zluda_ptx_impl.cpp index 553070e..e9cf904 100644 --- a/ptx/lib/zluda_ptx_impl.cpp +++ b/ptx/lib/zluda_ptx_impl.cpp @@ -19,16 +19,34 @@ extern "C" return (uint32_t)__ockl_get_local_size(member); } - int32_t __ockl_bfe_i32(int32_t, uint32_t, uint32_t) __attribute__((device)); - int32_t FUNC(bfe_s32)(int32_t base, uint32_t pos, uint32_t len) + uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __attribute__((device)); + uint32_t FUNC(bfe_u32)(uint32_t base, uint32_t pos_32, uint32_t len_32) { - return __ockl_bfe_i32(base, pos, len); + uint32_t pos = pos_32 & 0xFFU; + uint32_t len = len_32 & 0xFFU; + if (pos >= 32) + return 0; + // V_BFE_U32 only uses bits [4:0] for len (max value is 31) + if (len >= 32) + return base >> pos; + len = std::min(len, 31U); + return __ockl_bfe_u32(base, pos, len); } - uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __attribute__((device)); - uint32_t FUNC(bfe_u32)(uint32_t base, uint32_t pos, uint32_t len) + int32_t __ockl_bfe_i32(int32_t, uint32_t, uint32_t) __attribute__((device)); + int32_t FUNC(bfe_s32)(int32_t base, uint32_t pos_32, uint32_t len_32) { - return __ockl_bfe_u32(base, pos, len); + uint32_t pos = pos_32 & 0xFFU; + uint32_t len = len_32 & 0xFFU; + if (len == 0) + return 0; + if (pos >= 32) + return (base >> 31); + // V_BFE_I32 only uses bits [4:0] for len (max value is 31) + if (len >= 32) + return base >> pos; + len = std::min(len, 31U); + return __ockl_bfe_i32(base, pos, len); } // LLVM contains mentions of llvm.amdgcn.ubfe.i64 and llvm.amdgcn.sbfe.i64, diff --git a/ptx/src/test/spirv_run/bfe.ptx b/ptx/src/test/spirv_run/bfe.ptx index a01a14a..60ee8a6 100644 --- a/ptx/src/test/spirv_run/bfe.ptx +++ b/ptx/src/test/spirv_run/bfe.ptx @@ -10,28 +10,14 @@ .reg .u64 in_addr; .reg .u64 out_addr; .reg .u32 temp<3>; - .reg .b32 result<2>; - .reg .b64 temp64_0; - .reg .b32 temp64_1; - .reg .b32 temp64_2; - .reg .b64 result64_<2>; ld.param.u64 in_addr, [input]; ld.param.u64 out_addr, [output]; - ld.b64 temp64_0, [in_addr]; - ld.b32 temp64_1, [in_addr+8]; - ld.b32 temp64_2, [in_addr+16]; - ld.u32 temp0, [in_addr+24]; - ld.u32 temp1, [in_addr+28]; - ld.u32 temp2, [in_addr+32]; - //bfe.u64 result64_0, temp64_0, temp64_1, temp64_2; - bfe.s64 result64_1, temp64_0, temp64_1, temp64_2; - bfe.u32 result0, temp0, temp1, temp2; - bfe.s32 result1, temp0, temp1, temp2; - st.b64 [out_addr], result64_0; - st.b64 [out_addr], result64_1; - st.b32 [out_addr], result0; - st.b32 [out_addr], result1; + ld.u32 temp0, [in_addr]; + ld.u32 temp1, [in_addr+4]; + ld.u32 temp2, [in_addr+8]; + bfe.u32 temp0, temp0, temp1, temp2; + st.u32 [out_addr], temp0; ret; }