Redo handling of sregs

This commit is contained in:
Andrzej Janik 2021-09-17 20:53:44 +00:00
commit d5a4b068dd
8 changed files with 512 additions and 447 deletions

Binary file not shown.

View file

@ -297,6 +297,22 @@ atomic_add(atom_acq_rel_sys_shared_add_f64, memory_order_acq_rel, memory_order_a
return (uint)__builtin_amdgcn_uicmp(1, 0, 33); return (uint)__builtin_amdgcn_uicmp(1, 0, 33);
} }
uint FUNC(sreg_tid)(uchar dim) {
return (uint)get_local_id(dim);
}
uint FUNC(sreg_ntid)(uchar dim) {
return (uint)get_local_size(dim);
}
uint FUNC(sreg_ctaid)(uchar dim) {
return (uint)get_group_id(dim);
}
uint FUNC(sreg_nctaid)(uchar dim) {
return (uint)get_num_groups(dim);
}
uint FUNC(sreg_clock)() { uint FUNC(sreg_clock)() {
return (uint)__builtin_amdgcn_s_memtime(); return (uint)__builtin_amdgcn_s_memtime();
} }

View file

@ -7,39 +7,64 @@
OpCapability Int64 OpCapability Int64
OpCapability Float16 OpCapability Float16
OpCapability Float64 OpCapability Float64
%18 = OpExtInstImport "OpenCL.std" %40 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "activemask" OpEntryPoint Kernel %1 "lanemask_lt"
OpExecutionMode %1 ContractionOff OpExecutionMode %1 ContractionOff
OpDecorate %15 LinkageAttributes "__zluda_ptx_impl__activemask" Import OpDecorate %11 LinkageAttributes "__zluda_ptx_impl__sreg_lanemask_lt" Import
%void = OpTypeVoid %void = OpTypeVoid
%uint = OpTypeInt 32 0 %uint = OpTypeInt 32 0
%21 = OpTypeFunction %uint %43 = OpTypeFunction %uint
%ulong = OpTypeInt 64 0 %ulong = OpTypeInt 64 0
%23 = OpTypeFunction %void %ulong %ulong %45 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong %_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Function_uint = OpTypePointer Function %uint %_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint %_ptr_Generic_uint = OpTypePointer Generic %uint
%15 = OpFunction %uint None %21 %uint_1 = OpConstant %uint 1
%11 = OpFunction %uint None %43
OpFunctionEnd OpFunctionEnd
%1 = OpFunction %void None %23 %1 = OpFunction %void None %45
%6 = OpFunctionParameter %ulong %13 = OpFunctionParameter %ulong
%7 = OpFunctionParameter %ulong %14 = OpFunctionParameter %ulong
%14 = OpLabel %38 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function %2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function %3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function %4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_uint Function %5 = OpVariable %_ptr_Function_ulong Function
OpStore %2 %6 %6 = OpVariable %_ptr_Function_uint Function
OpStore %3 %7 %7 = OpVariable %_ptr_Function_uint Function
%8 = OpLoad %ulong %3 Aligned 8 %8 = OpVariable %_ptr_Function_uint Function
OpStore %4 %8 OpStore %2 %13
%9 = OpFunctionCall %uint %15 OpStore %3 %14
OpStore %5 %9 %15 = OpLoad %ulong %2 Aligned 8
%10 = OpLoad %ulong %4 OpStore %4 %15
%11 = OpLoad %uint %5 %16 = OpLoad %ulong %3 Aligned 8
%12 = OpConvertUToPtr %_ptr_Generic_uint %10 OpStore %5 %16
%13 = OpCopyObject %uint %11 %18 = OpLoad %ulong %4
OpStore %12 %13 Aligned 4 %29 = OpConvertUToPtr %_ptr_Generic_uint %18
%28 = OpLoad %uint %29 Aligned 4
%17 = OpCopyObject %uint %28
OpStore %6 %17
%20 = OpLoad %uint %6
%31 = OpCopyObject %uint %20
%30 = OpIAdd %uint %31 %uint_1
%19 = OpCopyObject %uint %30
OpStore %7 %19
%10 = OpFunctionCall %uint %11
%32 = OpCopyObject %uint %10
%21 = OpCopyObject %uint %32
OpStore %8 %21
%23 = OpLoad %uint %7
%24 = OpLoad %uint %8
%34 = OpCopyObject %uint %23
%35 = OpCopyObject %uint %24
%33 = OpIAdd %uint %34 %35
%22 = OpCopyObject %uint %33
OpStore %7 %22
%25 = OpLoad %ulong %5
%26 = OpLoad %uint %7
%36 = OpConvertUToPtr %_ptr_Generic_uint %25
%37 = OpCopyObject %uint %26
OpStore %36 %37 Aligned 4
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View file

@ -7,55 +7,54 @@
OpCapability Int64 OpCapability Int64
OpCapability Float16 OpCapability Float16
OpCapability Float64 OpCapability Float64
%31 = OpExtInstImport "OpenCL.std" %30 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "ntid" OpEntryPoint Kernel %1 "ntid"
OpExecutionMode %1 ContractionOff OpExecutionMode %1 ContractionOff
OpDecorate %24 LinkageAttributes "get_local_size" Import OpDecorate %11 LinkageAttributes "__zluda_ptx_impl__sreg_ntid" Import
%void = OpTypeVoid %void = OpTypeVoid
%ulong = OpTypeInt 64 0
%uint = OpTypeInt 32 0 %uint = OpTypeInt 32 0
%35 = OpTypeFunction %ulong %uint %uchar = OpTypeInt 8 0
%34 = OpTypeFunction %uint %uchar
%ulong = OpTypeInt 64 0
%36 = OpTypeFunction %void %ulong %ulong %36 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong %_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Function_uint = OpTypePointer Function %uint %_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint %_ptr_Generic_uint = OpTypePointer Generic %uint
%uint_0 = OpConstant %uint 0 %uchar_0 = OpConstant %uchar 0
%24 = OpFunction %ulong None %35 %11 = OpFunction %uint None %34
%26 = OpFunctionParameter %uint %13 = OpFunctionParameter %uchar
OpFunctionEnd OpFunctionEnd
%1 = OpFunction %void None %36 %1 = OpFunction %void None %36
%9 = OpFunctionParameter %ulong %14 = OpFunctionParameter %ulong
%10 = OpFunctionParameter %ulong %15 = OpFunctionParameter %ulong
%29 = OpLabel %28 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function %2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function %3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function %4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function %5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function %6 = OpVariable %_ptr_Function_uint Function
%7 = OpVariable %_ptr_Function_uint Function %7 = OpVariable %_ptr_Function_uint Function
OpStore %2 %9 OpStore %2 %14
OpStore %3 %10 OpStore %3 %15
%11 = OpLoad %ulong %2 Aligned 8 %16 = OpLoad %ulong %2 Aligned 8
OpStore %4 %11 OpStore %4 %16
%12 = OpLoad %ulong %3 Aligned 8 %17 = OpLoad %ulong %3 Aligned 8
OpStore %5 %12 OpStore %5 %17
%14 = OpLoad %ulong %4 %19 = OpLoad %ulong %4
%27 = OpConvertUToPtr %_ptr_Generic_uint %14 %26 = OpConvertUToPtr %_ptr_Generic_uint %19
%13 = OpLoad %uint %27 Aligned 4 %18 = OpLoad %uint %26 Aligned 4
OpStore %6 %13 OpStore %6 %18
%23 = OpFunctionCall %ulong %24 %uint_0 %10 = OpFunctionCall %uint %11 %uchar_0
%40 = OpBitcast %ulong %23 %20 = OpCopyObject %uint %10
%16 = OpUConvert %uint %40 OpStore %7 %20
%15 = OpCopyObject %uint %16 %22 = OpLoad %uint %6
OpStore %7 %15 %23 = OpLoad %uint %7
%18 = OpLoad %uint %6 %21 = OpIAdd %uint %22 %23
%19 = OpLoad %uint %7 OpStore %6 %21
%17 = OpIAdd %uint %18 %19 %24 = OpLoad %ulong %5
OpStore %6 %17 %25 = OpLoad %uint %6
%20 = OpLoad %ulong %5 %27 = OpConvertUToPtr %_ptr_Generic_uint %24
%21 = OpLoad %uint %6 OpStore %27 %25 Aligned 4
%28 = OpConvertUToPtr %_ptr_Generic_uint %20
OpStore %28 %21 Aligned 4
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View file

@ -7,89 +7,87 @@
OpCapability Int64 OpCapability Int64
OpCapability Float16 OpCapability Float16
OpCapability Float64 OpCapability Float64
%57 = OpExtInstImport "OpenCL.std" %56 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "stateful_ld_st_ntid" OpEntryPoint Kernel %1 "stateful_ld_st_ntid"
OpExecutionMode %1 ContractionOff OpExecutionMode %1 ContractionOff
OpDecorate %44 LinkageAttributes "_Z12get_local_idj" Import OpDecorate %12 LinkageAttributes "__zluda_ptx_impl__sreg_tid" Import
%void = OpTypeVoid %void = OpTypeVoid
%ulong = OpTypeInt 64 0
%uint = OpTypeInt 32 0 %uint = OpTypeInt 32 0
%61 = OpTypeFunction %ulong %uint
%uchar = OpTypeInt 8 0 %uchar = OpTypeInt 8 0
%60 = OpTypeFunction %uint %uchar
%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
%64 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_uchar %62 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_uchar
%_ptr_Function__ptr_CrossWorkgroup_uchar = OpTypePointer Function %_ptr_CrossWorkgroup_uchar %_ptr_Function__ptr_CrossWorkgroup_uchar = OpTypePointer Function %_ptr_CrossWorkgroup_uchar
%_ptr_Function_uint = OpTypePointer Function %uint %_ptr_Function_uint = OpTypePointer Function %uint
%ulong = OpTypeInt 64 0
%_ptr_Function_ulong = OpTypePointer Function %ulong %_ptr_Function_ulong = OpTypePointer Function %ulong
%uint_0 = OpConstant %uint 0 %uchar_0 = OpConstant %uchar 0
%_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
%44 = OpFunction %ulong None %61 %12 = OpFunction %uint None %60
%46 = OpFunctionParameter %uint %14 = OpFunctionParameter %uchar
OpFunctionEnd OpFunctionEnd
%1 = OpFunction %void None %64 %1 = OpFunction %void None %62
%20 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar %25 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
%21 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar %26 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
%55 = OpLabel %54 = OpLabel
%12 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function %17 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%13 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function %18 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%10 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function %15 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%11 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function %16 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%6 = OpVariable %_ptr_Function_uint Function %6 = OpVariable %_ptr_Function_uint Function
%7 = OpVariable %_ptr_Function_ulong Function %7 = OpVariable %_ptr_Function_ulong Function
%8 = OpVariable %_ptr_Function_ulong Function %8 = OpVariable %_ptr_Function_ulong Function
OpStore %12 %20 OpStore %17 %25
OpStore %13 %21 OpStore %18 %26
%48 = OpBitcast %_ptr_Function_ulong %12 %47 = OpBitcast %_ptr_Function_ulong %17
%47 = OpLoad %ulong %48 Aligned 8 %46 = OpLoad %ulong %47 Aligned 8
%14 = OpCopyObject %ulong %47 %19 = OpCopyObject %ulong %46
%22 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %14 %27 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %19
OpStore %10 %22 OpStore %15 %27
%50 = OpBitcast %_ptr_Function_ulong %13 %49 = OpBitcast %_ptr_Function_ulong %18
%49 = OpLoad %ulong %50 Aligned 8 %48 = OpLoad %ulong %49 Aligned 8
%15 = OpCopyObject %ulong %49 %20 = OpCopyObject %ulong %48
%23 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %15 %28 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %20
OpStore %11 %23 OpStore %16 %28
%24 = OpLoad %_ptr_CrossWorkgroup_uchar %10 %29 = OpLoad %_ptr_CrossWorkgroup_uchar %15
%17 = OpConvertPtrToU %ulong %24 %22 = OpConvertPtrToU %ulong %29
%16 = OpCopyObject %ulong %17 %21 = OpCopyObject %ulong %22
%25 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %16 %30 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %21
OpStore %10 %25 OpStore %15 %30
%26 = OpLoad %_ptr_CrossWorkgroup_uchar %11 %31 = OpLoad %_ptr_CrossWorkgroup_uchar %16
%19 = OpConvertPtrToU %ulong %26 %24 = OpConvertPtrToU %ulong %31
%18 = OpCopyObject %ulong %19 %23 = OpCopyObject %ulong %24
%27 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %18 %32 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %23
OpStore %11 %27 OpStore %16 %32
%43 = OpFunctionCall %ulong %44 %uint_0 %11 = OpFunctionCall %uint %12 %uchar_0
%68 = OpBitcast %ulong %43 %33 = OpCopyObject %uint %11
%29 = OpUConvert %uint %68 OpStore %6 %33
%28 = OpCopyObject %uint %29 %35 = OpLoad %uint %6
OpStore %6 %28 %67 = OpBitcast %uint %35
%31 = OpLoad %uint %6 %34 = OpUConvert %ulong %67
%69 = OpBitcast %uint %31 OpStore %7 %34
%30 = OpUConvert %ulong %69 %37 = OpLoad %_ptr_CrossWorkgroup_uchar %15
OpStore %7 %30 %38 = OpLoad %ulong %7
%33 = OpLoad %_ptr_CrossWorkgroup_uchar %10 %50 = OpCopyObject %ulong %38
%34 = OpLoad %ulong %7 %68 = OpBitcast %_ptr_CrossWorkgroup_uchar %37
%51 = OpCopyObject %ulong %34 %69 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %68 %50
%70 = OpBitcast %_ptr_CrossWorkgroup_uchar %33 %36 = OpBitcast %_ptr_CrossWorkgroup_uchar %69
OpStore %15 %36
%40 = OpLoad %_ptr_CrossWorkgroup_uchar %16
%41 = OpLoad %ulong %7
%51 = OpCopyObject %ulong %41
%70 = OpBitcast %_ptr_CrossWorkgroup_uchar %40
%71 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %70 %51 %71 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %70 %51
%32 = OpBitcast %_ptr_CrossWorkgroup_uchar %71 %39 = OpBitcast %_ptr_CrossWorkgroup_uchar %71
OpStore %10 %32 OpStore %16 %39
%36 = OpLoad %_ptr_CrossWorkgroup_uchar %11 %43 = OpLoad %_ptr_CrossWorkgroup_uchar %15
%37 = OpLoad %ulong %7 %52 = OpBitcast %_ptr_CrossWorkgroup_ulong %43
%52 = OpCopyObject %ulong %37 %42 = OpLoad %ulong %52 Aligned 8
%72 = OpBitcast %_ptr_CrossWorkgroup_uchar %36 OpStore %8 %42
%73 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %72 %52 %44 = OpLoad %_ptr_CrossWorkgroup_uchar %16
%35 = OpBitcast %_ptr_CrossWorkgroup_uchar %73 %45 = OpLoad %ulong %8
OpStore %11 %35 %53 = OpBitcast %_ptr_CrossWorkgroup_ulong %44
%39 = OpLoad %_ptr_CrossWorkgroup_uchar %10 OpStore %53 %45 Aligned 8
%53 = OpBitcast %_ptr_CrossWorkgroup_ulong %39
%38 = OpLoad %ulong %53 Aligned 8
OpStore %8 %38
%40 = OpLoad %_ptr_CrossWorkgroup_uchar %11
%41 = OpLoad %ulong %8
%54 = OpBitcast %_ptr_CrossWorkgroup_ulong %40
OpStore %54 %41 Aligned 8
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View file

@ -7,93 +7,91 @@
OpCapability Int64 OpCapability Int64
OpCapability Float16 OpCapability Float16
OpCapability Float64 OpCapability Float64
%65 = OpExtInstImport "OpenCL.std" %64 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "stateful_ld_st_ntid_chain" OpEntryPoint Kernel %1 "stateful_ld_st_ntid_chain"
OpExecutionMode %1 ContractionOff OpExecutionMode %1 ContractionOff
OpDecorate %52 LinkageAttributes "_Z12get_local_idj" Import OpDecorate %16 LinkageAttributes "__zluda_ptx_impl__sreg_tid" Import
%void = OpTypeVoid %void = OpTypeVoid
%ulong = OpTypeInt 64 0
%uint = OpTypeInt 32 0 %uint = OpTypeInt 32 0
%69 = OpTypeFunction %ulong %uint
%uchar = OpTypeInt 8 0 %uchar = OpTypeInt 8 0
%68 = OpTypeFunction %uint %uchar
%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
%72 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_uchar %70 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_uchar
%_ptr_Function__ptr_CrossWorkgroup_uchar = OpTypePointer Function %_ptr_CrossWorkgroup_uchar %_ptr_Function__ptr_CrossWorkgroup_uchar = OpTypePointer Function %_ptr_CrossWorkgroup_uchar
%_ptr_Function_uint = OpTypePointer Function %uint %_ptr_Function_uint = OpTypePointer Function %uint
%ulong = OpTypeInt 64 0
%_ptr_Function_ulong = OpTypePointer Function %ulong %_ptr_Function_ulong = OpTypePointer Function %ulong
%uint_0 = OpConstant %uint 0 %uchar_0 = OpConstant %uchar 0
%_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
%52 = OpFunction %ulong None %69 %16 = OpFunction %uint None %68
%54 = OpFunctionParameter %uint %18 = OpFunctionParameter %uchar
OpFunctionEnd OpFunctionEnd
%1 = OpFunction %void None %72 %1 = OpFunction %void None %70
%28 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar %33 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
%29 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar %34 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
%63 = OpLabel %62 = OpLabel
%25 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%26 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%19 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%20 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function %20 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%21 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function %21 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%14 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function %22 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%15 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function %23 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%16 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function %24 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%17 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%18 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%19 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%10 = OpVariable %_ptr_Function_uint Function %10 = OpVariable %_ptr_Function_uint Function
%11 = OpVariable %_ptr_Function_ulong Function %11 = OpVariable %_ptr_Function_ulong Function
%12 = OpVariable %_ptr_Function_ulong Function %12 = OpVariable %_ptr_Function_ulong Function
OpStore %20 %28 OpStore %25 %33
OpStore %21 %29 OpStore %26 %34
%56 = OpBitcast %_ptr_Function_ulong %20 %55 = OpBitcast %_ptr_Function_ulong %25
%55 = OpLoad %ulong %56 Aligned 8 %54 = OpLoad %ulong %55 Aligned 8
%22 = OpCopyObject %ulong %55 %27 = OpCopyObject %ulong %54
%30 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %22 %35 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %27
OpStore %14 %30 OpStore %19 %35
%58 = OpBitcast %_ptr_Function_ulong %21 %57 = OpBitcast %_ptr_Function_ulong %26
%57 = OpLoad %ulong %58 Aligned 8 %56 = OpLoad %ulong %57 Aligned 8
%23 = OpCopyObject %ulong %57 %28 = OpCopyObject %ulong %56
%31 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %23 %36 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %28
OpStore %17 %31 OpStore %22 %36
%32 = OpLoad %_ptr_CrossWorkgroup_uchar %14 %37 = OpLoad %_ptr_CrossWorkgroup_uchar %19
%25 = OpConvertPtrToU %ulong %32 %30 = OpConvertPtrToU %ulong %37
%24 = OpCopyObject %ulong %25 %29 = OpCopyObject %ulong %30
%33 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %24 %38 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %29
OpStore %15 %33 OpStore %20 %38
%34 = OpLoad %_ptr_CrossWorkgroup_uchar %17 %39 = OpLoad %_ptr_CrossWorkgroup_uchar %22
%27 = OpConvertPtrToU %ulong %34 %32 = OpConvertPtrToU %ulong %39
%26 = OpCopyObject %ulong %27 %31 = OpCopyObject %ulong %32
%35 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %26 %40 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %31
OpStore %18 %35 OpStore %23 %40
%51 = OpFunctionCall %ulong %52 %uint_0 %15 = OpFunctionCall %uint %16 %uchar_0
%76 = OpBitcast %ulong %51 %41 = OpCopyObject %uint %15
%37 = OpUConvert %uint %76 OpStore %10 %41
%36 = OpCopyObject %uint %37 %43 = OpLoad %uint %10
OpStore %10 %36 %75 = OpBitcast %uint %43
%39 = OpLoad %uint %10 %42 = OpUConvert %ulong %75
%77 = OpBitcast %uint %39 OpStore %11 %42
%38 = OpUConvert %ulong %77 %45 = OpLoad %_ptr_CrossWorkgroup_uchar %20
OpStore %11 %38 %46 = OpLoad %ulong %11
%41 = OpLoad %_ptr_CrossWorkgroup_uchar %15 %58 = OpCopyObject %ulong %46
%42 = OpLoad %ulong %11 %76 = OpBitcast %_ptr_CrossWorkgroup_uchar %45
%59 = OpCopyObject %ulong %42 %77 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %76 %58
%78 = OpBitcast %_ptr_CrossWorkgroup_uchar %41 %44 = OpBitcast %_ptr_CrossWorkgroup_uchar %77
OpStore %21 %44
%48 = OpLoad %_ptr_CrossWorkgroup_uchar %23
%49 = OpLoad %ulong %11
%59 = OpCopyObject %ulong %49
%78 = OpBitcast %_ptr_CrossWorkgroup_uchar %48
%79 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %78 %59 %79 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %78 %59
%40 = OpBitcast %_ptr_CrossWorkgroup_uchar %79 %47 = OpBitcast %_ptr_CrossWorkgroup_uchar %79
OpStore %16 %40 OpStore %24 %47
%44 = OpLoad %_ptr_CrossWorkgroup_uchar %18 %51 = OpLoad %_ptr_CrossWorkgroup_uchar %21
%45 = OpLoad %ulong %11 %60 = OpBitcast %_ptr_CrossWorkgroup_ulong %51
%60 = OpCopyObject %ulong %45 %50 = OpLoad %ulong %60 Aligned 8
%80 = OpBitcast %_ptr_CrossWorkgroup_uchar %44 OpStore %12 %50
%81 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %80 %60 %52 = OpLoad %_ptr_CrossWorkgroup_uchar %24
%43 = OpBitcast %_ptr_CrossWorkgroup_uchar %81 %53 = OpLoad %ulong %12
OpStore %19 %43 %61 = OpBitcast %_ptr_CrossWorkgroup_ulong %52
%47 = OpLoad %_ptr_CrossWorkgroup_uchar %16 OpStore %61 %53 Aligned 8
%61 = OpBitcast %_ptr_CrossWorkgroup_ulong %47
%46 = OpLoad %ulong %61 Aligned 8
OpStore %12 %46
%48 = OpLoad %_ptr_CrossWorkgroup_uchar %19
%49 = OpLoad %ulong %12
%62 = OpBitcast %_ptr_CrossWorkgroup_ulong %48
OpStore %62 %49 Aligned 8
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View file

@ -7,103 +7,101 @@
OpCapability Int64 OpCapability Int64
OpCapability Float16 OpCapability Float16
OpCapability Float64 OpCapability Float64
%71 = OpExtInstImport "OpenCL.std" %70 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "stateful_ld_st_ntid_sub" OpEntryPoint Kernel %1 "stateful_ld_st_ntid_sub"
OpExecutionMode %1 ContractionOff OpExecutionMode %1 ContractionOff
OpDecorate %54 LinkageAttributes "_Z12get_local_idj" Import OpDecorate %16 LinkageAttributes "__zluda_ptx_impl__sreg_tid" Import
%void = OpTypeVoid %void = OpTypeVoid
%ulong = OpTypeInt 64 0
%uint = OpTypeInt 32 0 %uint = OpTypeInt 32 0
%75 = OpTypeFunction %ulong %uint
%uchar = OpTypeInt 8 0 %uchar = OpTypeInt 8 0
%74 = OpTypeFunction %uint %uchar
%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
%78 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_uchar %76 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_uchar
%_ptr_Function__ptr_CrossWorkgroup_uchar = OpTypePointer Function %_ptr_CrossWorkgroup_uchar %_ptr_Function__ptr_CrossWorkgroup_uchar = OpTypePointer Function %_ptr_CrossWorkgroup_uchar
%_ptr_Function_uint = OpTypePointer Function %uint %_ptr_Function_uint = OpTypePointer Function %uint
%ulong = OpTypeInt 64 0
%_ptr_Function_ulong = OpTypePointer Function %ulong %_ptr_Function_ulong = OpTypePointer Function %ulong
%uint_0 = OpConstant %uint 0 %uchar_0 = OpConstant %uchar 0
%ulong_0 = OpConstant %ulong 0 %ulong_0 = OpConstant %ulong 0
%_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
%ulong_0_0 = OpConstant %ulong 0 %ulong_0_0 = OpConstant %ulong 0
%54 = OpFunction %ulong None %75 %16 = OpFunction %uint None %74
%56 = OpFunctionParameter %uint %18 = OpFunctionParameter %uchar
OpFunctionEnd OpFunctionEnd
%1 = OpFunction %void None %78 %1 = OpFunction %void None %76
%30 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar %35 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
%31 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar %36 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
%69 = OpLabel %68 = OpLabel
%25 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%26 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%19 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%20 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function %20 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%21 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function %21 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%14 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function %22 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%15 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function %23 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%16 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function %24 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%17 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%18 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%19 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%10 = OpVariable %_ptr_Function_uint Function %10 = OpVariable %_ptr_Function_uint Function
%11 = OpVariable %_ptr_Function_ulong Function %11 = OpVariable %_ptr_Function_ulong Function
%12 = OpVariable %_ptr_Function_ulong Function %12 = OpVariable %_ptr_Function_ulong Function
OpStore %20 %30 OpStore %25 %35
OpStore %21 %31 OpStore %26 %36
%62 = OpBitcast %_ptr_Function_ulong %20 %61 = OpBitcast %_ptr_Function_ulong %25
%61 = OpLoad %ulong %62 Aligned 8 %60 = OpLoad %ulong %61 Aligned 8
%22 = OpCopyObject %ulong %61 %27 = OpCopyObject %ulong %60
%32 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %22 %37 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %27
OpStore %14 %32 OpStore %19 %37
%64 = OpBitcast %_ptr_Function_ulong %21 %63 = OpBitcast %_ptr_Function_ulong %26
%63 = OpLoad %ulong %64 Aligned 8 %62 = OpLoad %ulong %63 Aligned 8
%23 = OpCopyObject %ulong %63 %28 = OpCopyObject %ulong %62
%33 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %23 %38 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %28
OpStore %17 %33 OpStore %22 %38
%34 = OpLoad %_ptr_CrossWorkgroup_uchar %14 %39 = OpLoad %_ptr_CrossWorkgroup_uchar %19
%25 = OpConvertPtrToU %ulong %34 %30 = OpConvertPtrToU %ulong %39
%24 = OpCopyObject %ulong %25 %29 = OpCopyObject %ulong %30
%35 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %24 %40 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %29
OpStore %15 %35 OpStore %20 %40
%36 = OpLoad %_ptr_CrossWorkgroup_uchar %17 %41 = OpLoad %_ptr_CrossWorkgroup_uchar %22
%27 = OpConvertPtrToU %ulong %36 %32 = OpConvertPtrToU %ulong %41
%26 = OpCopyObject %ulong %27 %31 = OpCopyObject %ulong %32
%37 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %26 %42 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %31
OpStore %18 %37 OpStore %23 %42
%53 = OpFunctionCall %ulong %54 %uint_0 %15 = OpFunctionCall %uint %16 %uchar_0
%82 = OpBitcast %ulong %53 %43 = OpCopyObject %uint %15
%39 = OpUConvert %uint %82 OpStore %10 %43
%38 = OpCopyObject %uint %39 %45 = OpLoad %uint %10
OpStore %10 %38 %81 = OpBitcast %uint %45
%41 = OpLoad %uint %10 %44 = OpUConvert %ulong %81
%83 = OpBitcast %uint %41 OpStore %11 %44
%40 = OpUConvert %ulong %83 %46 = OpLoad %ulong %11
OpStore %11 %40 %64 = OpCopyObject %ulong %46
%42 = OpLoad %ulong %11 %33 = OpSNegate %ulong %64
%65 = OpCopyObject %ulong %42 %48 = OpLoad %_ptr_CrossWorkgroup_uchar %20
%28 = OpSNegate %ulong %65 %82 = OpBitcast %_ptr_CrossWorkgroup_uchar %48
%44 = OpLoad %_ptr_CrossWorkgroup_uchar %15 %83 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %82 %33
%84 = OpBitcast %_ptr_CrossWorkgroup_uchar %44 %47 = OpBitcast %_ptr_CrossWorkgroup_uchar %83
%85 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %84 %28 OpStore %21 %47
%43 = OpBitcast %_ptr_CrossWorkgroup_uchar %85 %49 = OpLoad %ulong %11
OpStore %16 %43 %65 = OpCopyObject %ulong %49
%45 = OpLoad %ulong %11 %34 = OpSNegate %ulong %65
%66 = OpCopyObject %ulong %45 %51 = OpLoad %_ptr_CrossWorkgroup_uchar %23
%29 = OpSNegate %ulong %66 %84 = OpBitcast %_ptr_CrossWorkgroup_uchar %51
%47 = OpLoad %_ptr_CrossWorkgroup_uchar %18 %85 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %84 %34
%86 = OpBitcast %_ptr_CrossWorkgroup_uchar %47 %50 = OpBitcast %_ptr_CrossWorkgroup_uchar %85
%87 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %86 %29 OpStore %24 %50
%46 = OpBitcast %_ptr_CrossWorkgroup_uchar %87 %53 = OpLoad %_ptr_CrossWorkgroup_uchar %21
OpStore %19 %46 %66 = OpBitcast %_ptr_CrossWorkgroup_ulong %53
%49 = OpLoad %_ptr_CrossWorkgroup_uchar %16 %87 = OpBitcast %_ptr_CrossWorkgroup_uchar %66
%67 = OpBitcast %_ptr_CrossWorkgroup_ulong %49 %88 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %87 %ulong_0
%57 = OpBitcast %_ptr_CrossWorkgroup_ulong %88
%52 = OpLoad %ulong %57 Aligned 8
OpStore %12 %52
%54 = OpLoad %_ptr_CrossWorkgroup_uchar %24
%55 = OpLoad %ulong %12
%67 = OpBitcast %_ptr_CrossWorkgroup_ulong %54
%89 = OpBitcast %_ptr_CrossWorkgroup_uchar %67 %89 = OpBitcast %_ptr_CrossWorkgroup_uchar %67
%90 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %89 %ulong_0 %90 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %89 %ulong_0_0
%58 = OpBitcast %_ptr_CrossWorkgroup_ulong %90 %59 = OpBitcast %_ptr_CrossWorkgroup_ulong %90
%48 = OpLoad %ulong %58 Aligned 8 OpStore %59 %55 Aligned 8
OpStore %12 %48
%50 = OpLoad %_ptr_CrossWorkgroup_uchar %19
%51 = OpLoad %ulong %12
%68 = OpBitcast %_ptr_CrossWorkgroup_ulong %50
%91 = OpBitcast %_ptr_CrossWorkgroup_uchar %68
%92 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %91 %ulong_0_0
%60 = OpBitcast %_ptr_CrossWorkgroup_ulong %92
OpStore %60 %51 Aligned 8
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View file

@ -10,8 +10,6 @@ use rspirv::binary::{Assemble, Disassemble};
static ZLUDA_PTX_IMPL_INTEL: &'static [u8] = include_bytes!("../lib/zluda_ptx_impl.spv"); static ZLUDA_PTX_IMPL_INTEL: &'static [u8] = include_bytes!("../lib/zluda_ptx_impl.spv");
static ZLUDA_PTX_IMPL_AMD: &'static [u8] = include_bytes!("../lib/zluda_ptx_impl.bc"); static ZLUDA_PTX_IMPL_AMD: &'static [u8] = include_bytes!("../lib/zluda_ptx_impl.bc");
const ZLUDA_PTX_PREFIX: &'static str = "__zluda_ptx_impl__"; const ZLUDA_PTX_PREFIX: &'static str = "__zluda_ptx_impl__";
const ZLUDA_PTX_PREFIX_SREG_CLOCK: &'static str = "__zluda_ptx_impl__sreg_clock";
const ZLUDA_PTX_PREFIX_SREG_LANEMASK_LT: &'static str = "__zluda_ptx_impl__sreg_lanemask_lt";
quick_error! { quick_error! {
#[derive(Debug)] #[derive(Debug)]
@ -426,8 +424,8 @@ pub struct KernelInfo {
pub uses_shared_mem: bool, pub uses_shared_mem: bool,
} }
pub fn to_spirv_module<'a>(ast: ast::Module<'a>) -> Result<Module, TranslateError> { pub fn to_spirv_module<'input>(ast: ast::Module<'input>) -> Result<Module, TranslateError> {
let mut id_defs = GlobalStringIdResolver::new(1); let mut id_defs = GlobalStringIdResolver::<'input>::new(1);
let mut ptx_impl_imports = HashMap::new(); let mut ptx_impl_imports = HashMap::new();
let directives = ast let directives = ast
.directives .directives
@ -1135,9 +1133,9 @@ fn emit_memory_model(builder: &mut dr::Builder) {
); );
} }
fn translate_directive<'input>( fn translate_directive<'input, 'a>(
id_defs: &mut GlobalStringIdResolver<'input>, id_defs: &'a mut GlobalStringIdResolver<'input>,
ptx_impl_imports: &mut HashMap<String, Directive<'input>>, ptx_impl_imports: &'a mut HashMap<String, Directive<'input>>,
d: ast::Directive<'input, ast::ParsedArgParams<'input>>, d: ast::Directive<'input, ast::ParsedArgParams<'input>>,
) -> Result<Option<Directive<'input>>, TranslateError> { ) -> Result<Option<Directive<'input>>, TranslateError> {
Ok(match d { Ok(match d {
@ -1157,11 +1155,11 @@ fn translate_directive<'input>(
}) })
} }
fn translate_function<'a>( fn translate_function<'input, 'a>(
id_defs: &mut GlobalStringIdResolver<'a>, id_defs: &'a mut GlobalStringIdResolver<'input>,
ptx_impl_imports: &mut HashMap<String, Directive<'a>>, ptx_impl_imports: &'a mut HashMap<String, Directive<'input>>,
f: ast::ParsedFunction<'a>, f: ast::ParsedFunction<'input>,
) -> Result<Option<Function<'a>>, TranslateError> { ) -> Result<Option<Function<'input>>, TranslateError> {
let import_as = match &f.func_directive { let import_as = match &f.func_directive {
ast::MethodDeclaration { ast::MethodDeclaration {
name: ast::MethodName::Func("__assertfail"), name: ast::MethodName::Func("__assertfail"),
@ -1206,7 +1204,7 @@ fn rename_fn_params<'a, 'b>(
} }
fn to_ssa<'input, 'b>( fn to_ssa<'input, 'b>(
ptx_impl_imports: &mut HashMap<String, Directive>, ptx_impl_imports: &'b mut HashMap<String, Directive<'input>>,
mut id_defs: FnStringIdResolver<'input, 'b>, mut id_defs: FnStringIdResolver<'input, 'b>,
fn_defs: GlobalFnDeclResolver<'input, 'b>, fn_defs: GlobalFnDeclResolver<'input, 'b>,
func_decl: Rc<RefCell<ast::MethodDeclaration<'input, spirv::Word>>>, func_decl: Rc<RefCell<ast::MethodDeclaration<'input, spirv::Word>>>,
@ -1231,6 +1229,8 @@ fn to_ssa<'input, 'b>(
let unadorned_statements = normalize_predicates(normalized_ids, &mut numeric_id_defs)?; let unadorned_statements = normalize_predicates(normalized_ids, &mut numeric_id_defs)?;
let typed_statements = let typed_statements =
convert_to_typed_statements(unadorned_statements, &fn_defs, &mut numeric_id_defs)?; convert_to_typed_statements(unadorned_statements, &fn_defs, &mut numeric_id_defs)?;
let typed_statements =
fix_special_registers2(ptx_impl_imports, typed_statements, &mut numeric_id_defs)?;
let (func_decl, typed_statements) = let (func_decl, typed_statements) =
convert_to_stateful_memory_access(func_decl, typed_statements, &mut numeric_id_defs)?; convert_to_stateful_memory_access(func_decl, typed_statements, &mut numeric_id_defs)?;
let ssa_statements = insert_mem_ssa_statements( let ssa_statements = insert_mem_ssa_statements(
@ -1238,8 +1238,6 @@ fn to_ssa<'input, 'b>(
&mut numeric_id_defs, &mut numeric_id_defs,
&mut (*func_decl).borrow_mut(), &mut (*func_decl).borrow_mut(),
)?; )?;
let ssa_statements =
fix_special_registers(ptx_impl_imports, ssa_statements, &mut numeric_id_defs)?;
let mut numeric_id_defs = numeric_id_defs.finish(); let mut numeric_id_defs = numeric_id_defs.finish();
let expanded_statements = expand_arguments(ssa_statements, &mut numeric_id_defs)?; let expanded_statements = expand_arguments(ssa_statements, &mut numeric_id_defs)?;
let expanded_statements = let expanded_statements =
@ -1257,90 +1255,147 @@ fn to_ssa<'input, 'b>(
}) })
} }
fn fix_special_registers( fn fix_special_registers2<'a, 'b, 'input>(
ptx_impl_imports: &mut HashMap<String, Directive>, ptx_impl_imports: &'a mut HashMap<String, Directive<'input>>,
typed_statements: Vec<TypedStatement>, typed_statements: Vec<TypedStatement>,
numeric_id_defs: &mut NumericIdResolver, numeric_id_defs: &'a mut NumericIdResolver<'b>,
) -> Result<Vec<TypedStatement>, TranslateError> { ) -> Result<Vec<TypedStatement>, TranslateError> {
let mut result = Vec::with_capacity(typed_statements.len()); let result = Vec::with_capacity(typed_statements.len());
let mut sreg_sresolver = SpecialRegisterResolver {
ptx_impl_imports,
numeric_id_defs,
result,
};
for s in typed_statements { for s in typed_statements {
match s { match s {
Statement::LoadVar( Statement::Call(details) => {
details let new_statement = details.visit(&mut sreg_sresolver)?;
@ sreg_sresolver.result.push(new_statement);
LoadVarDetails {
member_index: Some((_, Some(_))),
..
},
) => {
let index = details.member_index.unwrap().0;
let sreg = numeric_id_defs
.special_registers
.get(details.arg.src)
.ok_or_else(|| error_unreachable())?;
let (ocl_name, ocl_type) = sreg.get_opencl_fn_type();
let index_constant = numeric_id_defs.register_intermediate(Some((
ast::Type::Scalar(ast::ScalarType::U32),
ast::StateSpace::Reg,
)));
result.push(Statement::Constant(ConstantDefinition {
dst: index_constant,
typ: ast::ScalarType::U32,
value: ast::ImmediateValue::U64(index as u64),
}));
let fn_result = numeric_id_defs.register_intermediate(Some((
ast::Type::Scalar(ocl_type),
ast::StateSpace::Reg,
)));
let return_arguments =
vec![(fn_result, ast::Type::Scalar(ocl_type), ast::StateSpace::Reg)];
let input_arguments = vec![(
TypedOperand::Reg(index_constant),
ast::Type::Scalar(ast::ScalarType::U32),
ast::StateSpace::Reg,
)];
let fn_call = register_external_fn_call(
numeric_id_defs,
ptx_impl_imports,
ocl_name.to_string(),
return_arguments.iter().map(|(_, typ, space)| (typ, *space)),
input_arguments.iter().map(|(_, typ, space)| (typ, *space)),
)?;
result.push(Statement::Call(ResolvedCall {
uniform: false,
return_arguments,
name: fn_call,
input_arguments,
}));
result.push(Statement::Conversion(ImplicitConversion {
src: fn_result,
dst: details.arg.dst,
from_type: ast::Type::Scalar(ocl_type),
from_space: ast::StateSpace::Reg,
to_type: ast::Type::Scalar(ast::ScalarType::U32),
to_space: ast::StateSpace::Reg,
kind: ConversionKind::Default,
}));
} }
s => result.push(s), Statement::Instruction(details) => {
let new_statement = details.visit(&mut sreg_sresolver)?;
sreg_sresolver.result.push(new_statement);
}
Statement::Conditional(details) => {
let new_statement = details.visit(&mut sreg_sresolver)?;
sreg_sresolver.result.push(new_statement);
}
Statement::Conversion(details) => {
let new_statement = details.visit(&mut sreg_sresolver)?;
sreg_sresolver.result.push(new_statement);
}
Statement::PtrAccess(details) => {
let new_statement = details.visit(&mut sreg_sresolver)?;
sreg_sresolver.result.push(new_statement);
}
Statement::RepackVector(details) => {
let new_statement = details.visit(&mut sreg_sresolver)?;
sreg_sresolver.result.push(new_statement);
}
s @ Statement::Variable(_)
| s @ Statement::Label(_)
| s @ Statement::FunctionPointer(_) => sreg_sresolver.result.push(s),
_ => return Err(error_unreachable()),
} }
} }
Ok(result) Ok(sreg_sresolver.result)
} }
fn get_sreg_id_scalar_type( struct SpecialRegisterResolver<'a, 'b, 'input> {
numeric_id_defs: &mut NumericIdResolver, ptx_impl_imports: &'a mut HashMap<String, Directive<'input>>,
sreg: PtxSpecialRegister, numeric_id_defs: &'a mut NumericIdResolver<'b>,
) -> Option<(spirv::Word, ast::ScalarType, u8)> { result: Vec<TypedStatement>,
match sreg.normalized_sreg_and_type() { }
Some((normalized_sreg, typ, vec_width)) => Some((
numeric_id_defs impl<'a, 'b, 'input> SpecialRegisterResolver<'a, 'b, 'input> {
.special_registers fn replace_sreg(
.get_or_add(numeric_id_defs.current_id, normalized_sreg), &mut self,
typ, desc: ArgumentDescriptor<spirv::Word>,
vec_width, vector_index: Option<u8>,
)), ) -> Result<spirv::Word, TranslateError> {
None => None, if let Some(sreg) = self.numeric_id_defs.special_registers.get(desc.op) {
if desc.is_dst {
return Err(TranslateError::MismatchedType);
}
let input_arguments = match (vector_index, sreg.get_function_input_type()) {
(Some(idx), Some(inp_type)) => {
if inp_type != ast::ScalarType::U8 {
return Err(TranslateError::Unreachable);
}
let constant = self.numeric_id_defs.register_intermediate(Some((
ast::Type::Scalar(inp_type),
ast::StateSpace::Reg,
)));
self.result.push(Statement::Constant(ConstantDefinition {
dst: constant,
typ: inp_type,
value: ast::ImmediateValue::U64(idx as u64),
}));
vec![(
TypedOperand::Reg(constant),
ast::Type::Scalar(inp_type),
ast::StateSpace::Reg,
)]
}
(None, None) => Vec::new(),
_ => return Err(TranslateError::MismatchedType),
};
let ocl_fn_name = [ZLUDA_PTX_PREFIX, sreg.get_unprefixed_function_name()].concat();
let return_type = sreg.get_function_return_type();
let fn_result = self.numeric_id_defs.register_intermediate(Some((
ast::Type::Scalar(return_type),
ast::StateSpace::Reg,
)));
let return_arguments = vec![(
fn_result,
ast::Type::Scalar(return_type),
ast::StateSpace::Reg,
)];
let fn_call = register_external_fn_call(
self.numeric_id_defs,
self.ptx_impl_imports,
ocl_fn_name.to_string(),
return_arguments.iter().map(|(_, typ, space)| (typ, *space)),
input_arguments.iter().map(|(_, typ, space)| (typ, *space)),
)?;
self.result.push(Statement::Call(ResolvedCall {
uniform: false,
return_arguments,
name: fn_call,
input_arguments,
}));
Ok(fn_result)
} else {
Ok(desc.op)
}
}
}
impl<'a, 'b, 'input> ArgumentMapVisitor<TypedArgParams, TypedArgParams>
for SpecialRegisterResolver<'a, 'b, 'input>
{
fn id(
&mut self,
desc: ArgumentDescriptor<spirv::Word>,
_: Option<(&ast::Type, ast::StateSpace)>,
) -> Result<spirv::Word, TranslateError> {
self.replace_sreg(desc, None)
}
fn operand(
&mut self,
desc: ArgumentDescriptor<TypedOperand>,
typ: &ast::Type,
state_space: ast::StateSpace,
) -> Result<TypedOperand, TranslateError> {
Ok(match desc.op {
TypedOperand::Reg(reg) => TypedOperand::Reg(self.replace_sreg(desc.new_op(reg), None)?),
op @ TypedOperand::RegOffset(_, _) => op,
op @ TypedOperand::Imm(_) => op,
TypedOperand::VecMember(reg, idx) => {
TypedOperand::VecMember(self.replace_sreg(desc.new_op(reg), Some(idx))?, idx)
}
})
} }
} }
@ -1968,22 +2023,8 @@ fn insert_mem_ssa_statements<'a, 'b>(
} }
inst => insert_mem_ssa_statement_default(id_def, &mut result, inst)?, inst => insert_mem_ssa_statement_default(id_def, &mut result, inst)?,
}, },
Statement::Conditional(mut bra) => { Statement::Conditional(bra) => {
let generated_id = id_def.register_intermediate(Some(( insert_mem_ssa_statement_default(id_def, &mut result, bra)?
ast::Type::Scalar(ast::ScalarType::Pred),
ast::StateSpace::Reg,
)));
result.push(Statement::LoadVar(LoadVarDetails {
arg: Arg2 {
dst: generated_id,
src: bra.predicate,
},
state_space: ast::StateSpace::Reg,
typ: ast::Type::Scalar(ast::ScalarType::Pred),
member_index: None,
}));
bra.predicate = generated_id;
result.push(Statement::Conditional(bra));
} }
Statement::Conversion(conv) => { Statement::Conversion(conv) => {
insert_mem_ssa_statement_default(id_def, &mut result, conv)? insert_mem_ssa_statement_default(id_def, &mut result, conv)?
@ -1997,7 +2038,9 @@ fn insert_mem_ssa_statements<'a, 'b>(
Statement::FunctionPointer(func_ptr) => { Statement::FunctionPointer(func_ptr) => {
insert_mem_ssa_statement_default(id_def, &mut result, func_ptr)? insert_mem_ssa_statement_default(id_def, &mut result, func_ptr)?
} }
s @ Statement::Variable(_) | s @ Statement::Label(_) => result.push(s), s @ Statement::Variable(_) | s @ Statement::Label(_) | s @ Statement::Constant(..) => {
result.push(s)
}
_ => return Err(error_unreachable()), _ => return Err(error_unreachable()),
} }
} }
@ -4539,6 +4582,7 @@ fn convert_to_stateful_memory_access<'a, 'input>(
match statement { match statement {
l @ Statement::Label(_) => result.push(l), l @ Statement::Label(_) => result.push(l),
c @ Statement::Conditional(_) => result.push(c), c @ Statement::Conditional(_) => result.push(c),
c @ Statement::Constant(..) => result.push(c),
Statement::Variable(var) => { Statement::Variable(var) => {
if !remapped_ids.contains_key(&var.name) { if !remapped_ids.contains_key(&var.name) {
result.push(Statement::Variable(var)); result.push(Statement::Variable(var));
@ -4791,13 +4835,9 @@ fn is_64_bit_integer(id_defs: &NumericIdResolver, id: spirv::Word) -> bool {
#[derive(Ord, PartialOrd, Eq, PartialEq, Hash, Copy, Clone)] #[derive(Ord, PartialOrd, Eq, PartialEq, Hash, Copy, Clone)]
enum PtxSpecialRegister { enum PtxSpecialRegister {
Tid, Tid,
Tid64,
Ntid, Ntid,
Ntid64,
Ctaid, Ctaid,
Ctaid64,
Nctaid, Nctaid,
Nctaid64,
Clock, Clock,
LanemaskLt, LanemaskLt,
} }
@ -4817,71 +4857,43 @@ impl PtxSpecialRegister {
fn get_type(self) -> ast::Type { fn get_type(self) -> ast::Type {
match self { match self {
PtxSpecialRegister::Tid => ast::Type::Vector(ast::ScalarType::U32, 4), PtxSpecialRegister::Tid
PtxSpecialRegister::Tid64 => ast::Type::Vector(ast::ScalarType::U64, 3), | PtxSpecialRegister::Ntid
PtxSpecialRegister::Ntid => ast::Type::Vector(ast::ScalarType::U32, 4), | PtxSpecialRegister::Ctaid
PtxSpecialRegister::Ntid64 => ast::Type::Vector(ast::ScalarType::U64, 3), | PtxSpecialRegister::Nctaid => ast::Type::Vector(self.get_function_return_type(), 4),
PtxSpecialRegister::Ctaid => ast::Type::Vector(ast::ScalarType::U32, 4), _ => ast::Type::Scalar(self.get_function_return_type()),
PtxSpecialRegister::Ctaid64 => ast::Type::Vector(ast::ScalarType::U64, 3),
PtxSpecialRegister::Nctaid => ast::Type::Vector(ast::ScalarType::U32, 4),
PtxSpecialRegister::Nctaid64 => ast::Type::Vector(ast::ScalarType::U64, 3),
PtxSpecialRegister::Clock => ast::Type::Scalar(ast::ScalarType::U32),
PtxSpecialRegister::LanemaskLt => ast::Type::Scalar(ast::ScalarType::U32),
} }
} }
fn get_scalar_type(self) -> ast::ScalarType { fn get_function_return_type(self) -> ast::ScalarType {
match self {
PtxSpecialRegister::Tid => ast::ScalarType::U32,
PtxSpecialRegister::Ntid => ast::ScalarType::U32,
PtxSpecialRegister::Ctaid => ast::ScalarType::U32,
PtxSpecialRegister::Nctaid => ast::ScalarType::U32,
PtxSpecialRegister::Clock => ast::ScalarType::U32,
PtxSpecialRegister::LanemaskLt => ast::ScalarType::U32,
}
}
fn get_function_input_type(self) -> Option<ast::ScalarType> {
match self { match self {
PtxSpecialRegister::Tid PtxSpecialRegister::Tid
| PtxSpecialRegister::Ntid | PtxSpecialRegister::Ntid
| PtxSpecialRegister::Ctaid | PtxSpecialRegister::Ctaid
| PtxSpecialRegister::Nctaid | PtxSpecialRegister::Nctaid => Some(ast::ScalarType::U8),
| PtxSpecialRegister::Clock PtxSpecialRegister::Clock | PtxSpecialRegister::LanemaskLt => None,
| PtxSpecialRegister::LanemaskLt => ast::ScalarType::U32,
PtxSpecialRegister::Tid64
| PtxSpecialRegister::Ntid64
| PtxSpecialRegister::Ctaid64
| PtxSpecialRegister::Nctaid64 => ast::ScalarType::U64,
} }
} }
fn get_opencl_fn_type(self) -> (&'static str, ast::ScalarType) { fn get_unprefixed_function_name(self) -> &'static str {
match self { match self {
PtxSpecialRegister::Tid | PtxSpecialRegister::Tid64 => { PtxSpecialRegister::Tid => "sreg_tid",
("_Z12get_local_idj", ast::ScalarType::U64) PtxSpecialRegister::Ntid => "sreg_ntid",
} PtxSpecialRegister::Ctaid => "sreg_ctaid",
PtxSpecialRegister::Ntid | PtxSpecialRegister::Ntid64 => { PtxSpecialRegister::Nctaid => "sreg_nctaid",
("_Z14get_local_sizej", ast::ScalarType::U64) PtxSpecialRegister::Clock => "sreg_clock",
} PtxSpecialRegister::LanemaskLt => "sreg_lanemask_lt",
PtxSpecialRegister::Ctaid | PtxSpecialRegister::Ctaid64 => {
("_Z12get_group_idj", ast::ScalarType::U64)
}
PtxSpecialRegister::Nctaid | PtxSpecialRegister::Nctaid64 => {
("_Z14get_num_groupsj", ast::ScalarType::U64)
}
PtxSpecialRegister::Clock => (ZLUDA_PTX_PREFIX_SREG_CLOCK, ast::ScalarType::U32),
PtxSpecialRegister::LanemaskLt => {
(ZLUDA_PTX_PREFIX_SREG_LANEMASK_LT, ast::ScalarType::U32)
}
}
}
fn normalized_sreg_and_type(self) -> Option<(PtxSpecialRegister, ast::ScalarType, u8)> {
match self {
PtxSpecialRegister::Tid => Some((PtxSpecialRegister::Tid64, ast::ScalarType::U64, 3)),
PtxSpecialRegister::Ntid => Some((PtxSpecialRegister::Ntid64, ast::ScalarType::U64, 3)),
PtxSpecialRegister::Ctaid => {
Some((PtxSpecialRegister::Ctaid64, ast::ScalarType::U64, 3))
}
PtxSpecialRegister::Nctaid => {
Some((PtxSpecialRegister::Nctaid64, ast::ScalarType::U64, 3))
}
PtxSpecialRegister::Tid64
| PtxSpecialRegister::Ntid64
| PtxSpecialRegister::Ctaid64
| PtxSpecialRegister::Nctaid64
| PtxSpecialRegister::Clock => None,
PtxSpecialRegister::LanemaskLt => None,
} }
} }
} }
@ -4899,16 +4911,6 @@ impl SpecialRegistersMap {
} }
} }
fn builtins<'a>(&'a self) -> impl Iterator<Item = (PtxSpecialRegister, spirv::Word)> + 'a {
self.reg_to_id.iter().filter_map(|(sreg, id)| {
if sreg.normalized_sreg_and_type().is_none() {
Some((*sreg, *id))
} else {
None
}
})
}
fn interface(&self) -> Vec<spirv::Word> { fn interface(&self) -> Vec<spirv::Word> {
return Vec::new(); return Vec::new();
/* /*
@ -6416,6 +6418,35 @@ struct BrachCondition {
if_false: spirv::Word, if_false: spirv::Word,
} }
impl<From: ArgParamsEx<Id = spirv::Word>, To: ArgParamsEx<Id = spirv::Word>> Visitable<From, To>
for BrachCondition
{
fn visit(
self,
visitor: &mut impl ArgumentMapVisitor<From, To>,
) -> Result<Statement<ast::Instruction<To>, To>, TranslateError> {
let predicate = visitor.id(
ArgumentDescriptor {
op: self.predicate,
is_dst: false,
is_memory_access: false,
non_default_implicit_conversion: None,
},
Some((
&ast::Type::Scalar(ast::ScalarType::Pred),
ast::StateSpace::Reg,
)),
)?;
let if_true = self.if_true;
let if_false = self.if_false;
Ok(Statement::Conditional(BrachCondition {
predicate,
if_true,
if_false,
}))
}
}
#[derive(Clone)] #[derive(Clone)]
struct ImplicitConversion { struct ImplicitConversion {
src: spirv::Word, src: spirv::Word,