mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-08-03 14:50:53 +00:00
Add correct 32 bit bfe
This commit is contained in:
parent
9eb7314803
commit
638bb1efa7
3 changed files with 29 additions and 25 deletions
Binary file not shown.
|
@ -19,16 +19,34 @@ extern "C"
|
||||||
return (uint32_t)__ockl_get_local_size(member);
|
return (uint32_t)__ockl_get_local_size(member);
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t __ockl_bfe_i32(int32_t, uint32_t, uint32_t) __attribute__((device));
|
uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __attribute__((device));
|
||||||
int32_t FUNC(bfe_s32)(int32_t base, uint32_t pos, uint32_t len)
|
uint32_t FUNC(bfe_u32)(uint32_t base, uint32_t pos_32, uint32_t len_32)
|
||||||
{
|
{
|
||||||
return __ockl_bfe_i32(base, pos, len);
|
uint32_t pos = pos_32 & 0xFFU;
|
||||||
|
uint32_t len = len_32 & 0xFFU;
|
||||||
|
if (pos >= 32)
|
||||||
|
return 0;
|
||||||
|
// V_BFE_U32 only uses bits [4:0] for len (max value is 31)
|
||||||
|
if (len >= 32)
|
||||||
|
return base >> pos;
|
||||||
|
len = std::min(len, 31U);
|
||||||
|
return __ockl_bfe_u32(base, pos, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __attribute__((device));
|
int32_t __ockl_bfe_i32(int32_t, uint32_t, uint32_t) __attribute__((device));
|
||||||
uint32_t FUNC(bfe_u32)(uint32_t base, uint32_t pos, uint32_t len)
|
int32_t FUNC(bfe_s32)(int32_t base, uint32_t pos_32, uint32_t len_32)
|
||||||
{
|
{
|
||||||
return __ockl_bfe_u32(base, pos, len);
|
uint32_t pos = pos_32 & 0xFFU;
|
||||||
|
uint32_t len = len_32 & 0xFFU;
|
||||||
|
if (len == 0)
|
||||||
|
return 0;
|
||||||
|
if (pos >= 32)
|
||||||
|
return (base >> 31);
|
||||||
|
// V_BFE_I32 only uses bits [4:0] for len (max value is 31)
|
||||||
|
if (len >= 32)
|
||||||
|
return base >> pos;
|
||||||
|
len = std::min(len, 31U);
|
||||||
|
return __ockl_bfe_i32(base, pos, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
// LLVM contains mentions of llvm.amdgcn.ubfe.i64 and llvm.amdgcn.sbfe.i64,
|
// LLVM contains mentions of llvm.amdgcn.ubfe.i64 and llvm.amdgcn.sbfe.i64,
|
||||||
|
|
|
@ -10,28 +10,14 @@
|
||||||
.reg .u64 in_addr;
|
.reg .u64 in_addr;
|
||||||
.reg .u64 out_addr;
|
.reg .u64 out_addr;
|
||||||
.reg .u32 temp<3>;
|
.reg .u32 temp<3>;
|
||||||
.reg .b32 result<2>;
|
|
||||||
.reg .b64 temp64_0;
|
|
||||||
.reg .b32 temp64_1;
|
|
||||||
.reg .b32 temp64_2;
|
|
||||||
.reg .b64 result64_<2>;
|
|
||||||
|
|
||||||
ld.param.u64 in_addr, [input];
|
ld.param.u64 in_addr, [input];
|
||||||
ld.param.u64 out_addr, [output];
|
ld.param.u64 out_addr, [output];
|
||||||
|
|
||||||
ld.b64 temp64_0, [in_addr];
|
ld.u32 temp0, [in_addr];
|
||||||
ld.b32 temp64_1, [in_addr+8];
|
ld.u32 temp1, [in_addr+4];
|
||||||
ld.b32 temp64_2, [in_addr+16];
|
ld.u32 temp2, [in_addr+8];
|
||||||
ld.u32 temp0, [in_addr+24];
|
bfe.u32 temp0, temp0, temp1, temp2;
|
||||||
ld.u32 temp1, [in_addr+28];
|
st.u32 [out_addr], temp0;
|
||||||
ld.u32 temp2, [in_addr+32];
|
|
||||||
//bfe.u64 result64_0, temp64_0, temp64_1, temp64_2;
|
|
||||||
bfe.s64 result64_1, temp64_0, temp64_1, temp64_2;
|
|
||||||
bfe.u32 result0, temp0, temp1, temp2;
|
|
||||||
bfe.s32 result1, temp0, temp1, temp2;
|
|
||||||
st.b64 [out_addr], result64_0;
|
|
||||||
st.b64 [out_addr], result64_1;
|
|
||||||
st.b32 [out_addr], result0;
|
|
||||||
st.b32 [out_addr], result1;
|
|
||||||
ret;
|
ret;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue