mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-08-03 22:59:58 +00:00
Regenerate SPIR-V for ptx_impl and fix weird handling of ptr-ptr add or sub
This commit is contained in:
parent
e328ecc550
commit
ad2059872a
5 changed files with 164 additions and 22 deletions
Binary file not shown.
|
@ -181,6 +181,7 @@ test_ptx!(
|
||||||
[0u32, 0u32, 0u32, 2u32]
|
[0u32, 0u32, 0u32, 2u32]
|
||||||
);
|
);
|
||||||
test_ptx!(non_scalar_ptr_offset, [1u32, 2u32, 3u32, 4u32], [7u32]);
|
test_ptx!(non_scalar_ptr_offset, [1u32, 2u32, 3u32, 4u32], [7u32]);
|
||||||
|
test_ptx!(stateful_neg_offset, [1237518u64], [1237518u64]);
|
||||||
|
|
||||||
struct DisplayError<T: Debug> {
|
struct DisplayError<T: Debug> {
|
||||||
err: T,
|
err: T,
|
||||||
|
|
29
ptx/src/test/spirv_run/stateful_neg_offset.ptx
Normal file
29
ptx/src/test/spirv_run/stateful_neg_offset.ptx
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
.version 6.5
|
||||||
|
.target sm_30
|
||||||
|
.address_size 64
|
||||||
|
|
||||||
|
.visible .entry stateful_neg_offset(
|
||||||
|
.param .u64 input,
|
||||||
|
.param .u64 output
|
||||||
|
)
|
||||||
|
{
|
||||||
|
.reg .u64 in_addr;
|
||||||
|
.reg .u64 out_addr;
|
||||||
|
.reg .u64 in_addr2;
|
||||||
|
.reg .u64 out_addr2;
|
||||||
|
.reg .u64 offset;
|
||||||
|
.reg .u64 temp;
|
||||||
|
|
||||||
|
ld.param.u64 in_addr, [input];
|
||||||
|
ld.param.u64 out_addr, [output];
|
||||||
|
|
||||||
|
cvta.to.global.u64 in_addr2, in_addr;
|
||||||
|
cvta.to.global.u64 out_addr2, out_addr;
|
||||||
|
|
||||||
|
add.u64 offset, in_addr2, out_addr2;
|
||||||
|
sub.u64 offset, in_addr2, out_addr2;
|
||||||
|
|
||||||
|
ld.global.u64 temp, [in_addr2];
|
||||||
|
st.global.u64 [out_addr2], temp;
|
||||||
|
ret;
|
||||||
|
}
|
80
ptx/src/test/spirv_run/stateful_neg_offset.spvtxt
Normal file
80
ptx/src/test/spirv_run/stateful_neg_offset.spvtxt
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
OpCapability GenericPointer
|
||||||
|
OpCapability Linkage
|
||||||
|
OpCapability Addresses
|
||||||
|
OpCapability Kernel
|
||||||
|
OpCapability Int8
|
||||||
|
OpCapability Int16
|
||||||
|
OpCapability Int64
|
||||||
|
OpCapability Float16
|
||||||
|
OpCapability Float64
|
||||||
|
%57 = OpExtInstImport "OpenCL.std"
|
||||||
|
OpMemoryModel Physical64 OpenCL
|
||||||
|
OpEntryPoint Kernel %1 "stateful_neg_offset"
|
||||||
|
%void = OpTypeVoid
|
||||||
|
%uchar = OpTypeInt 8 0
|
||||||
|
%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
|
||||||
|
%61 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_uchar
|
||||||
|
%_ptr_Function__ptr_CrossWorkgroup_uchar = OpTypePointer Function %_ptr_CrossWorkgroup_uchar
|
||||||
|
%ulong = OpTypeInt 64 0
|
||||||
|
%_ptr_Function_ulong = OpTypePointer Function %ulong
|
||||||
|
%_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
|
||||||
|
%1 = OpFunction %void None %61
|
||||||
|
%29 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
|
||||||
|
%30 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
|
||||||
|
%55 = OpLabel
|
||||||
|
%15 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
|
||||||
|
%16 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
|
||||||
|
%10 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
|
||||||
|
%11 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
|
||||||
|
%12 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
|
||||||
|
%13 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
|
||||||
|
%14 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
|
||||||
|
%9 = OpVariable %_ptr_Function_ulong Function
|
||||||
|
OpStore %15 %29
|
||||||
|
OpStore %16 %30
|
||||||
|
%47 = OpBitcast %_ptr_Function_ulong %15
|
||||||
|
%17 = OpLoad %ulong %47 Aligned 8
|
||||||
|
%31 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %17
|
||||||
|
OpStore %10 %31
|
||||||
|
%48 = OpBitcast %_ptr_Function_ulong %16
|
||||||
|
%18 = OpLoad %ulong %48 Aligned 8
|
||||||
|
%32 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %18
|
||||||
|
OpStore %11 %32
|
||||||
|
%33 = OpLoad %_ptr_CrossWorkgroup_uchar %10
|
||||||
|
%20 = OpConvertPtrToU %ulong %33
|
||||||
|
%50 = OpCopyObject %ulong %20
|
||||||
|
%49 = OpCopyObject %ulong %50
|
||||||
|
%19 = OpCopyObject %ulong %49
|
||||||
|
%34 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %19
|
||||||
|
OpStore %12 %34
|
||||||
|
%35 = OpLoad %_ptr_CrossWorkgroup_uchar %11
|
||||||
|
%22 = OpConvertPtrToU %ulong %35
|
||||||
|
%52 = OpCopyObject %ulong %22
|
||||||
|
%51 = OpCopyObject %ulong %52
|
||||||
|
%21 = OpCopyObject %ulong %51
|
||||||
|
%36 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %21
|
||||||
|
OpStore %13 %36
|
||||||
|
%37 = OpLoad %_ptr_CrossWorkgroup_uchar %12
|
||||||
|
%24 = OpConvertPtrToU %ulong %37
|
||||||
|
%38 = OpLoad %_ptr_CrossWorkgroup_uchar %13
|
||||||
|
%25 = OpConvertPtrToU %ulong %38
|
||||||
|
%23 = OpIAdd %ulong %24 %25
|
||||||
|
%39 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %23
|
||||||
|
OpStore %14 %39
|
||||||
|
%40 = OpLoad %_ptr_CrossWorkgroup_uchar %12
|
||||||
|
%27 = OpConvertPtrToU %ulong %40
|
||||||
|
%41 = OpLoad %_ptr_CrossWorkgroup_uchar %13
|
||||||
|
%28 = OpConvertPtrToU %ulong %41
|
||||||
|
%26 = OpISub %ulong %27 %28
|
||||||
|
%42 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %26
|
||||||
|
OpStore %14 %42
|
||||||
|
%44 = OpLoad %_ptr_CrossWorkgroup_uchar %12
|
||||||
|
%53 = OpBitcast %_ptr_CrossWorkgroup_ulong %44
|
||||||
|
%43 = OpLoad %ulong %53 Aligned 8
|
||||||
|
OpStore %9 %43
|
||||||
|
%45 = OpLoad %_ptr_CrossWorkgroup_uchar %13
|
||||||
|
%46 = OpLoad %ulong %9
|
||||||
|
%54 = OpBitcast %_ptr_CrossWorkgroup_ulong %45
|
||||||
|
OpStore %54 %46 Aligned 8
|
||||||
|
OpReturn
|
||||||
|
OpFunctionEnd
|
|
@ -4234,7 +4234,7 @@ fn convert_to_stateful_memory_access<'a, 'input>(
|
||||||
arg,
|
arg,
|
||||||
)) => {
|
)) => {
|
||||||
if let (TypedOperand::Reg(dst), Some(src)) =
|
if let (TypedOperand::Reg(dst), Some(src)) =
|
||||||
(arg.dst, arg.src.upcast().underlying())
|
(arg.dst, arg.src.upcast().underlying_register())
|
||||||
{
|
{
|
||||||
if is_64_bit_integer(id_defs, *src) && is_64_bit_integer(id_defs, dst) {
|
if is_64_bit_integer(id_defs, *src) && is_64_bit_integer(id_defs, dst) {
|
||||||
stateful_markers.push((dst, *src));
|
stateful_markers.push((dst, *src));
|
||||||
|
@ -4266,7 +4266,7 @@ fn convert_to_stateful_memory_access<'a, 'input>(
|
||||||
arg,
|
arg,
|
||||||
)) => {
|
)) => {
|
||||||
if let (TypedOperand::Reg(dst), Some(src)) =
|
if let (TypedOperand::Reg(dst), Some(src)) =
|
||||||
(&arg.dst, arg.src.upcast().underlying())
|
(&arg.dst, arg.src.upcast().underlying_register())
|
||||||
{
|
{
|
||||||
if func_args_64bit.contains(src) {
|
if func_args_64bit.contains(src) {
|
||||||
multi_hash_map_append(&mut stateful_init_reg, *dst, *src);
|
multi_hash_map_append(&mut stateful_init_reg, *dst, *src);
|
||||||
|
@ -4320,14 +4320,16 @@ fn convert_to_stateful_memory_access<'a, 'input>(
|
||||||
}),
|
}),
|
||||||
arg,
|
arg,
|
||||||
)) => {
|
)) => {
|
||||||
|
// TODO: don't mark result of double pointer sub or double
|
||||||
|
// pointer add as ptr result
|
||||||
if let (TypedOperand::Reg(dst), Some(src1)) =
|
if let (TypedOperand::Reg(dst), Some(src1)) =
|
||||||
(arg.dst, arg.src1.upcast().underlying())
|
(arg.dst, arg.src1.upcast().underlying_register())
|
||||||
{
|
{
|
||||||
if regs_ptr_current.contains(src1) && !regs_ptr_seen.contains(src1) {
|
if regs_ptr_current.contains(src1) && !regs_ptr_seen.contains(src1) {
|
||||||
regs_ptr_new.insert(dst);
|
regs_ptr_new.insert(dst);
|
||||||
}
|
}
|
||||||
} else if let (TypedOperand::Reg(dst), Some(src2)) =
|
} else if let (TypedOperand::Reg(dst), Some(src2)) =
|
||||||
(arg.dst, arg.src2.upcast().underlying())
|
(arg.dst, arg.src2.upcast().underlying_register())
|
||||||
{
|
{
|
||||||
if regs_ptr_current.contains(src2) && !regs_ptr_seen.contains(src2) {
|
if regs_ptr_current.contains(src2) && !regs_ptr_seen.contains(src2) {
|
||||||
regs_ptr_new.insert(dst);
|
regs_ptr_new.insert(dst);
|
||||||
|
@ -4392,7 +4394,7 @@ fn convert_to_stateful_memory_access<'a, 'input>(
|
||||||
}),
|
}),
|
||||||
arg,
|
arg,
|
||||||
)) if is_add_ptr_direct(&remapped_ids, &arg) => {
|
)) if is_add_ptr_direct(&remapped_ids, &arg) => {
|
||||||
let (ptr, offset) = match arg.src1.upcast().underlying() {
|
let (ptr, offset) = match arg.src1.upcast().underlying_register() {
|
||||||
Some(src1) if remapped_ids.contains_key(src1) => {
|
Some(src1) if remapped_ids.contains_key(src1) => {
|
||||||
(remapped_ids.get(src1).unwrap(), arg.src2)
|
(remapped_ids.get(src1).unwrap(), arg.src2)
|
||||||
}
|
}
|
||||||
|
@ -4420,14 +4422,9 @@ fn convert_to_stateful_memory_access<'a, 'input>(
|
||||||
saturate: false,
|
saturate: false,
|
||||||
}),
|
}),
|
||||||
arg,
|
arg,
|
||||||
)) if is_add_ptr_direct(&remapped_ids, &arg) => {
|
)) if is_sub_ptr_direct(&remapped_ids, &arg) => {
|
||||||
let (ptr, offset) = match arg.src1.upcast().underlying() {
|
let (ptr, offset) = match arg.src1.upcast().underlying_register() {
|
||||||
Some(src1) if remapped_ids.contains_key(src1) => {
|
Some(src1) => (remapped_ids.get(src1).unwrap(), arg.src2),
|
||||||
(remapped_ids.get(src1).unwrap(), arg.src2)
|
|
||||||
}
|
|
||||||
Some(src2) if remapped_ids.contains_key(src2) => {
|
|
||||||
(remapped_ids.get(src2).unwrap(), arg.src1)
|
|
||||||
}
|
|
||||||
_ => return Err(error_unreachable()),
|
_ => return Err(error_unreachable()),
|
||||||
};
|
};
|
||||||
let offset_neg = id_defs.register_intermediate(Some((
|
let offset_neg = id_defs.register_intermediate(Some((
|
||||||
|
@ -4577,10 +4574,45 @@ fn is_add_ptr_direct(remapped_ids: &HashMap<u32, u32>, arg: &ast::Arg3<TypedArgP
|
||||||
if !remapped_ids.contains_key(&dst) {
|
if !remapped_ids.contains_key(&dst) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
match arg.src1.upcast().underlying() {
|
if let Some(src1_reg) = arg.src1.upcast().underlying_register() {
|
||||||
Some(src1) if remapped_ids.contains_key(src1) => true,
|
if remapped_ids.contains_key(src1_reg) {
|
||||||
Some(src2) if remapped_ids.contains_key(src2) => true,
|
// don't trigger optimization when adding two pointers
|
||||||
_ => false,
|
if let Some(src2_reg) = arg.src2.upcast().underlying_register() {
|
||||||
|
return !remapped_ids.contains_key(src2_reg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(src2_reg) = arg.src2.upcast().underlying_register() {
|
||||||
|
remapped_ids.contains_key(src2_reg)
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_sub_ptr_direct(remapped_ids: &HashMap<u32, u32>, arg: &ast::Arg3<TypedArgParams>) -> bool {
|
||||||
|
match arg.dst {
|
||||||
|
TypedOperand::Imm(..) | TypedOperand::RegOffset(..) | TypedOperand::VecMember(..) => {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
TypedOperand::Reg(dst) => {
|
||||||
|
if !remapped_ids.contains_key(&dst) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
match arg.src1.upcast().underlying_register() {
|
||||||
|
Some(src1_reg) => {
|
||||||
|
if remapped_ids.contains_key(src1_reg) {
|
||||||
|
// don't trigger optimization when subtracting two pointers
|
||||||
|
arg.src2
|
||||||
|
.upcast()
|
||||||
|
.underlying_register()
|
||||||
|
.map_or(true, |src2_reg| !remapped_ids.contains_key(src2_reg))
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -7099,12 +7131,12 @@ impl ast::StateSpace {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> ast::Operand<T> {
|
impl<T> ast::Operand<T> {
|
||||||
fn underlying(&self) -> Option<&T> {
|
fn underlying_register(&self) -> Option<&T> {
|
||||||
match self {
|
match self {
|
||||||
ast::Operand::Reg(r) | ast::Operand::RegOffset(r, _) => Some(r),
|
ast::Operand::Reg(r)
|
||||||
ast::Operand::Imm(_) => None,
|
| ast::Operand::RegOffset(r, _)
|
||||||
ast::Operand::VecMember(reg, _) => Some(reg),
|
| ast::Operand::VecMember(r, _) => Some(r),
|
||||||
ast::Operand::VecPack(..) => None,
|
ast::Operand::Imm(_) | ast::Operand::VecPack(..) => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue