mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-20 00:14:45 +00:00
Add correct 32 bit bfe
This commit is contained in:
parent
9eb7314803
commit
638bb1efa7
3 changed files with 29 additions and 25 deletions
Binary file not shown.
|
@ -19,16 +19,34 @@ extern "C"
|
|||
return (uint32_t)__ockl_get_local_size(member);
|
||||
}
|
||||
|
||||
int32_t __ockl_bfe_i32(int32_t, uint32_t, uint32_t) __attribute__((device));
|
||||
int32_t FUNC(bfe_s32)(int32_t base, uint32_t pos, uint32_t len)
|
||||
uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __attribute__((device));
|
||||
uint32_t FUNC(bfe_u32)(uint32_t base, uint32_t pos_32, uint32_t len_32)
|
||||
{
|
||||
return __ockl_bfe_i32(base, pos, len);
|
||||
uint32_t pos = pos_32 & 0xFFU;
|
||||
uint32_t len = len_32 & 0xFFU;
|
||||
if (pos >= 32)
|
||||
return 0;
|
||||
// V_BFE_U32 only uses bits [4:0] for len (max value is 31)
|
||||
if (len >= 32)
|
||||
return base >> pos;
|
||||
len = std::min(len, 31U);
|
||||
return __ockl_bfe_u32(base, pos, len);
|
||||
}
|
||||
|
||||
uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __attribute__((device));
|
||||
uint32_t FUNC(bfe_u32)(uint32_t base, uint32_t pos, uint32_t len)
|
||||
int32_t __ockl_bfe_i32(int32_t, uint32_t, uint32_t) __attribute__((device));
|
||||
int32_t FUNC(bfe_s32)(int32_t base, uint32_t pos_32, uint32_t len_32)
|
||||
{
|
||||
return __ockl_bfe_u32(base, pos, len);
|
||||
uint32_t pos = pos_32 & 0xFFU;
|
||||
uint32_t len = len_32 & 0xFFU;
|
||||
if (len == 0)
|
||||
return 0;
|
||||
if (pos >= 32)
|
||||
return (base >> 31);
|
||||
// V_BFE_I32 only uses bits [4:0] for len (max value is 31)
|
||||
if (len >= 32)
|
||||
return base >> pos;
|
||||
len = std::min(len, 31U);
|
||||
return __ockl_bfe_i32(base, pos, len);
|
||||
}
|
||||
|
||||
// LLVM contains mentions of llvm.amdgcn.ubfe.i64 and llvm.amdgcn.sbfe.i64,
|
||||
|
|
|
@ -10,28 +10,14 @@
|
|||
.reg .u64 in_addr;
|
||||
.reg .u64 out_addr;
|
||||
.reg .u32 temp<3>;
|
||||
.reg .b32 result<2>;
|
||||
.reg .b64 temp64_0;
|
||||
.reg .b32 temp64_1;
|
||||
.reg .b32 temp64_2;
|
||||
.reg .b64 result64_<2>;
|
||||
|
||||
ld.param.u64 in_addr, [input];
|
||||
ld.param.u64 out_addr, [output];
|
||||
|
||||
ld.b64 temp64_0, [in_addr];
|
||||
ld.b32 temp64_1, [in_addr+8];
|
||||
ld.b32 temp64_2, [in_addr+16];
|
||||
ld.u32 temp0, [in_addr+24];
|
||||
ld.u32 temp1, [in_addr+28];
|
||||
ld.u32 temp2, [in_addr+32];
|
||||
//bfe.u64 result64_0, temp64_0, temp64_1, temp64_2;
|
||||
bfe.s64 result64_1, temp64_0, temp64_1, temp64_2;
|
||||
bfe.u32 result0, temp0, temp1, temp2;
|
||||
bfe.s32 result1, temp0, temp1, temp2;
|
||||
st.b64 [out_addr], result64_0;
|
||||
st.b64 [out_addr], result64_1;
|
||||
st.b32 [out_addr], result0;
|
||||
st.b32 [out_addr], result1;
|
||||
ld.u32 temp0, [in_addr];
|
||||
ld.u32 temp1, [in_addr+4];
|
||||
ld.u32 temp2, [in_addr+8];
|
||||
bfe.u32 temp0, temp0, temp1, temp2;
|
||||
st.u32 [out_addr], temp0;
|
||||
ret;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue