mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-20 00:14:45 +00:00
Add support for bfi
This commit is contained in:
parent
84364fa412
commit
bf2aef9be0
3 changed files with 33 additions and 0 deletions
Binary file not shown.
|
@ -101,4 +101,33 @@ extern "C"
|
|||
len = sub_sat(64, pos);
|
||||
return (base << (64U - pos - len)) >> (64U - len);
|
||||
}
|
||||
|
||||
uint32_t __ockl_bfm_u32(uint32_t count, uint32_t offset) __attribute__((device));
|
||||
uint32_t FUNC(bfi_b32)(uint32_t insert, uint32_t base, uint32_t pos_32, uint32_t len_32)
|
||||
{
|
||||
uint32_t pos = pos_32 & 0xFFU;
|
||||
uint32_t len = len_32 & 0xFFU;
|
||||
if (pos >= 32)
|
||||
return base;
|
||||
uint32_t mask;
|
||||
if (len >= 32)
|
||||
mask = UINT32_MAX << pos;
|
||||
else
|
||||
mask = __ockl_bfm_u32(len, pos);
|
||||
return (~mask & base) | (mask & (insert << pos));
|
||||
}
|
||||
|
||||
uint64_t FUNC(bfi_b64)(uint64_t insert, uint64_t base, uint32_t pos, uint32_t len)
|
||||
{
|
||||
// NVIDIA docs are incorrect. In 64 bit `bfe` both `pos` and `len`
|
||||
// parameters use whole 32 bit number and not just bottom 8 bits
|
||||
if (pos >= 64)
|
||||
return base;
|
||||
uint64_t mask;
|
||||
if (len >= 64)
|
||||
mask = UINT64_MAX << pos;
|
||||
else
|
||||
mask = ((1UL << len) - 1UL) << (pos);
|
||||
return (~mask & base) | (mask & (insert << pos));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -100,6 +100,10 @@ fn run_instruction<'input>(
|
|||
let name = ["bfe_", scalar_to_ptx_name(data)].concat();
|
||||
to_call(resolver, fn_declarations, name.into(), i)?
|
||||
}
|
||||
i @ ptx_parser::Instruction::Bfi { data, .. } => {
|
||||
let name = ["bfi_", scalar_to_ptx_name(data)].concat();
|
||||
to_call(resolver, fn_declarations, name.into(), i)?
|
||||
}
|
||||
i => i,
|
||||
})
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue