Add correct 32 bit bfe

This commit is contained in:
Andrzej Janik 2024-10-04 16:21:41 +02:00
parent 9eb7314803
commit 638bb1efa7
3 changed files with 29 additions and 25 deletions

Binary file not shown.

View file

@ -19,16 +19,34 @@ extern "C"
return (uint32_t)__ockl_get_local_size(member);
}
int32_t __ockl_bfe_i32(int32_t, uint32_t, uint32_t) __attribute__((device));
int32_t FUNC(bfe_s32)(int32_t base, uint32_t pos, uint32_t len)
uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __attribute__((device));
uint32_t FUNC(bfe_u32)(uint32_t base, uint32_t pos_32, uint32_t len_32)
{
return __ockl_bfe_i32(base, pos, len);
uint32_t pos = pos_32 & 0xFFU;
uint32_t len = len_32 & 0xFFU;
if (pos >= 32)
return 0;
// V_BFE_U32 only uses bits [4:0] for len (max value is 31)
if (len >= 32)
return base >> pos;
len = std::min(len, 31U);
return __ockl_bfe_u32(base, pos, len);
}
uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __attribute__((device));
uint32_t FUNC(bfe_u32)(uint32_t base, uint32_t pos, uint32_t len)
int32_t __ockl_bfe_i32(int32_t, uint32_t, uint32_t) __attribute__((device));
int32_t FUNC(bfe_s32)(int32_t base, uint32_t pos_32, uint32_t len_32)
{
return __ockl_bfe_u32(base, pos, len);
uint32_t pos = pos_32 & 0xFFU;
uint32_t len = len_32 & 0xFFU;
if (len == 0)
return 0;
if (pos >= 32)
return (base >> 31);
// V_BFE_I32 only uses bits [4:0] for len (max value is 31)
if (len >= 32)
return base >> pos;
len = std::min(len, 31U);
return __ockl_bfe_i32(base, pos, len);
}
// LLVM contains mentions of llvm.amdgcn.ubfe.i64 and llvm.amdgcn.sbfe.i64,

View file

@ -10,28 +10,14 @@
.reg .u64 in_addr;
.reg .u64 out_addr;
.reg .u32 temp<3>;
.reg .b32 result<2>;
.reg .b64 temp64_0;
.reg .b32 temp64_1;
.reg .b32 temp64_2;
.reg .b64 result64_<2>;
ld.param.u64 in_addr, [input];
ld.param.u64 out_addr, [output];
ld.b64 temp64_0, [in_addr];
ld.b32 temp64_1, [in_addr+8];
ld.b32 temp64_2, [in_addr+16];
ld.u32 temp0, [in_addr+24];
ld.u32 temp1, [in_addr+28];
ld.u32 temp2, [in_addr+32];
//bfe.u64 result64_0, temp64_0, temp64_1, temp64_2;
bfe.s64 result64_1, temp64_0, temp64_1, temp64_2;
bfe.u32 result0, temp0, temp1, temp2;
bfe.s32 result1, temp0, temp1, temp2;
st.b64 [out_addr], result64_0;
st.b64 [out_addr], result64_1;
st.b32 [out_addr], result0;
st.b32 [out_addr], result1;
ld.u32 temp0, [in_addr];
ld.u32 temp1, [in_addr+4];
ld.u32 temp2, [in_addr+8];
bfe.u32 temp0, temp0, temp1, temp2;
st.u32 [out_addr], temp0;
ret;
}