Refactor implicit conversions and start implementing vector extract/insert

2025-04-20 08:24:44 +00:00 · 2020-09-14 21:45:56 +02:00 · 2020-09-14 21:45:56 +02:00 · bb5025c9b1
commit bb5025c9b1
parent 48dac43540
11 changed files with 1118 additions and 540 deletions
--- a/ptx/src/ast.rs
+++ b/ptx/src/ast.rs
@ -349,6 +349,7 @@ pub trait ArgParams {
    type ID;
    type Operand;
    type CallOperand;
+    type VecOperand;
 }

 pub struct ParsedArgParams<'a> {
@ -359,6 +360,7 @@ impl<'a> ArgParams for ParsedArgParams<'a> {
    type ID = &'a str;
    type Operand = Operand<&'a str>;
    type CallOperand = CallOperand<&'a str>;
+    type VecOperand = (&'a str, u8);
 }

 pub struct Arg1<P: ArgParams> {
@ -376,9 +378,9 @@ pub struct Arg2St<P: ArgParams> {
 }

 pub enum Arg2Vec<P: ArgParams> {
-    Dst((P::ID, u8), P::ID),
-    Src(P::ID, (P::ID, u8)),
-    Both((P::ID, u8), (P::ID, u8)),
+    Dst(P::VecOperand, P::ID),
+    Src(P::ID, P::VecOperand),
+    Both(P::VecOperand, P::VecOperand),
 }

 pub struct Arg3<P: ArgParams> {
@ -424,8 +426,7 @@ pub struct LdData {
    pub qualifier: LdStQualifier,
    pub state_space: LdStateSpace,
    pub caching: LdCacheOperator,
-    pub vector: Option<u8>,
-    pub typ: ScalarType,
+    pub typ: Type,
 }

 #[derive(Copy, Clone, PartialEq, Eq)]
@ -710,8 +711,7 @@ pub struct StData {
    pub qualifier: LdStQualifier,
    pub state_space: StStateSpace,
    pub caching: StCacheOperator,
-    pub vector: Option<u8>,
-    pub typ: ScalarType,
+    pub typ: Type,
 }

 #[derive(PartialEq, Eq, Copy, Clone)]
--- a/ptx/src/ptx.lalrpop
+++ b/ptx/src/ptx.lalrpop
@ -269,10 +269,10 @@ ScalarType: ast::ScalarType = {
    ".f16" => ast::ScalarType::F16,
    ".f16x2" => ast::ScalarType::F16x2,
    ".pred" => ast::ScalarType::Pred,
-    MemoryType
+    LdStScalarType
 };

-MemoryType: ast::ScalarType = {
+LdStScalarType: ast::ScalarType = {
    ".b8" => ast::ScalarType::B8,
    ".b16" => ast::ScalarType::B16,
    ".b32" => ast::ScalarType::B32,
@ -446,13 +446,12 @@ Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = {

 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
 InstLd: ast::Instruction<ast::ParsedArgParams<'input>> = {
-    "ld" <q:LdStQualifier?> <ss:LdStateSpace?> <cop:LdCacheOperator?> <v:VectorPrefix?> <t:MemoryType> <dst:ExtendedID> "," "[" <src:Operand> "]" => {
+    "ld" <q:LdStQualifier?> <ss:LdStateSpace?> <cop:LdCacheOperator?> <t:LdStType> <dst:ExtendedID> "," "[" <src:Operand> "]" => {
        ast::Instruction::Ld(
            ast::LdData {
                qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
                state_space: ss.unwrap_or(ast::LdStateSpace::Generic),
                caching: cop.unwrap_or(ast::LdCacheOperator::Cached),
-                vector: v,
                typ: t
            },
            ast::Arg2 { dst:dst, src:src }
@ -460,6 +459,11 @@ InstLd: ast::Instruction<ast::ParsedArgParams<'input>> = {
    }
 };

+LdStType: ast::Type = {
+    <v:VectorPrefix> <t:LdStScalarType> => ast::Type::Vector(t, v),
+    <t:LdStScalarType> => ast::Type::Scalar(t),
+}
+
 LdStQualifier: ast::LdStQualifier = {
    ".weak" => ast::LdStQualifier::Weak,
    ".volatile" => ast::LdStQualifier::Volatile,
@ -895,13 +899,12 @@ ShlType: ast::ShlType = {
 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-st
 // Warning: NVIDIA documentation is incorrect, you can specify scope only once
 InstSt: ast::Instruction<ast::ParsedArgParams<'input>> = {
-    "st" <q:LdStQualifier?> <ss:StStateSpace?> <cop:StCacheOperator?> <v:VectorPrefix?> <t:MemoryType> "[" <src1:Operand> "]" "," <src2:Operand> => {
+    "st" <q:LdStQualifier?> <ss:StStateSpace?> <cop:StCacheOperator?> <t:LdStType> "[" <src1:Operand> "]" "," <src2:Operand> => {
        ast::Instruction::St(
            ast::StData {
                qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
                state_space: ss.unwrap_or(ast::StStateSpace::Generic),
                caching: cop.unwrap_or(ast::StCacheOperator::Writeback),
-                vector: v,
                typ: t
            },
            ast::Arg2St { src1:src1, src2:src2 }
--- a/ptx/src/test/spirv_run/call.spvtxt
+++ b/ptx/src/test/spirv_run/call.spvtxt
@ -4,20 +4,20 @@
               OpCapability Kernel
               OpCapability Int64
               OpCapability Int8
-         %45 = OpExtInstImport "OpenCL.std"
+         %47 = OpExtInstImport "OpenCL.std"
               OpMemoryModel Physical64 OpenCL
               OpEntryPoint Kernel %4 "call"
       %void = OpTypeVoid
      %ulong = OpTypeInt 64 0
-         %48 = OpTypeFunction %void %ulong %ulong
+         %50 = OpTypeFunction %void %ulong %ulong
 %_ptr_Function_ulong = OpTypePointer Function %ulong
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
-         %51 = OpTypeFunction %ulong %ulong
+         %53 = OpTypeFunction %ulong %ulong
    %ulong_1 = OpConstant %ulong 1
-          %4 = OpFunction %void None %48
+          %4 = OpFunction %void None %50
         %12 = OpFunctionParameter %ulong
         %13 = OpFunctionParameter %ulong
-         %30 = OpLabel
+         %32 = OpLabel
          %5 = OpVariable %_ptr_Function_ulong Function
          %6 = OpVariable %_ptr_Function_ulong Function
          %7 = OpVariable %_ptr_Function_ulong Function
@ -38,7 +38,9 @@
         %18 = OpLoad %ulong %28
               OpStore %9 %18
         %21 = OpLoad %ulong %9
-         %20 = OpCopyObject %ulong %21
+         %29 = OpCopyObject %ulong %21
+         %30 = OpCopyObject %ulong %29
+         %20 = OpCopyObject %ulong %30
               OpStore %10 %20
         %23 = OpLoad %ulong %10
         %22 = OpFunctionCall %ulong %1 %23
@ -48,26 +50,26 @@
               OpStore %9 %24
         %26 = OpLoad %ulong %8
         %27 = OpLoad %ulong %9
-         %29 = OpConvertUToPtr %_ptr_CrossWorkgroup_ulong %26
-               OpStore %29 %27
+         %31 = OpConvertUToPtr %_ptr_CrossWorkgroup_ulong %26
+               OpStore %31 %27
               OpReturn
               OpFunctionEnd
-          %1 = OpFunction %ulong None %51
-         %34 = OpFunctionParameter %ulong
-         %43 = OpLabel
-         %32 = OpVariable %_ptr_Function_ulong Function
-         %31 = OpVariable %_ptr_Function_ulong Function
+          %1 = OpFunction %ulong None %53
+         %36 = OpFunctionParameter %ulong
+         %45 = OpLabel
+         %34 = OpVariable %_ptr_Function_ulong Function
         %33 = OpVariable %_ptr_Function_ulong Function
-               OpStore %32 %34
-         %36 = OpLoad %ulong %32
-         %35 = OpCopyObject %ulong %36
-               OpStore %33 %35
-         %38 = OpLoad %ulong %33
-         %37 = OpIAdd %ulong %38 %ulong_1
-               OpStore %33 %37
-         %40 = OpLoad %ulong %33
-         %39 = OpCopyObject %ulong %40
-               OpStore %31 %39
-         %41 = OpLoad %ulong %31
-               OpReturnValue %41
+         %35 = OpVariable %_ptr_Function_ulong Function
+               OpStore %34 %36
+         %38 = OpLoad %ulong %34
+         %37 = OpCopyObject %ulong %38
+               OpStore %35 %37
+         %40 = OpLoad %ulong %35
+         %39 = OpIAdd %ulong %40 %ulong_1
+               OpStore %35 %39
+         %42 = OpLoad %ulong %35
+         %41 = OpCopyObject %ulong %42
+               OpStore %33 %41
+         %43 = OpLoad %ulong %33
+               OpReturnValue %43
               OpFunctionEnd
--- a/ptx/src/test/spirv_run/cvta.spvtxt
+++ b/ptx/src/test/spirv_run/cvta.spvtxt
@ -4,20 +4,20 @@
               OpCapability Kernel
               OpCapability Int64
               OpCapability Int8
-         %25 = OpExtInstImport "OpenCL.std"
+         %29 = OpExtInstImport "OpenCL.std"
               OpMemoryModel Physical64 OpenCL
               OpEntryPoint Kernel %1 "cvta"
       %void = OpTypeVoid
      %ulong = OpTypeInt 64 0
-         %28 = OpTypeFunction %void %ulong %ulong
+         %32 = OpTypeFunction %void %ulong %ulong
 %_ptr_Function_ulong = OpTypePointer Function %ulong
      %float = OpTypeFloat 32
 %_ptr_Function_float = OpTypePointer Function %float
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
-          %1 = OpFunction %void None %28
+          %1 = OpFunction %void None %32
          %7 = OpFunctionParameter %ulong
          %8 = OpFunctionParameter %ulong
-         %23 = OpLabel
+         %27 = OpLabel
          %2 = OpVariable %_ptr_Function_ulong Function
          %3 = OpVariable %_ptr_Function_ulong Function
          %4 = OpVariable %_ptr_Function_ulong Function
@ -32,18 +32,22 @@
         %11 = OpCopyObject %ulong %12
               OpStore %5 %11
         %14 = OpLoad %ulong %4
-         %13 = OpCopyObject %ulong %14
+         %22 = OpCopyObject %ulong %14
+         %21 = OpCopyObject %ulong %22
+         %13 = OpCopyObject %ulong %21
               OpStore %4 %13
         %16 = OpLoad %ulong %5
-         %15 = OpCopyObject %ulong %16
+         %24 = OpCopyObject %ulong %16
+         %23 = OpCopyObject %ulong %24
+         %15 = OpCopyObject %ulong %23
               OpStore %5 %15
         %18 = OpLoad %ulong %4
-         %21 = OpConvertUToPtr %_ptr_CrossWorkgroup_float %18
-         %17 = OpLoad %float %21
+         %25 = OpConvertUToPtr %_ptr_CrossWorkgroup_float %18
+         %17 = OpLoad %float %25
               OpStore %6 %17
         %19 = OpLoad %ulong %5
         %20 = OpLoad %float %6
-         %22 = OpConvertUToPtr %_ptr_CrossWorkgroup_float %19
-               OpStore %22 %20
+         %26 = OpConvertUToPtr %_ptr_CrossWorkgroup_float %19
+               OpStore %26 %20
               OpReturn
               OpFunctionEnd
--- a/ptx/src/test/spirv_run/ld_st_implicit.ptx
+++ b/ptx/src/test/spirv_run/ld_st_implicit.ptx
@ -0,0 +1,20 @@
+.version 6.5
+.target sm_30
+.address_size 64
+
+.visible .entry ld_st_implicit(
+	.param .u64 input,
+	.param .u64 output
+)
+{
+	.reg .u64 	    in_addr;
+    .reg .u64 	    out_addr;
+    .reg .b64 	    temp;
+
+	ld.param.u64 	in_addr, [input];
+    ld.param.u64 	out_addr, [output];
+
+    ld.global.f32          temp, [in_addr];
+    st.global.f32          [out_addr], temp;
+	ret;
+}
--- a/ptx/src/test/spirv_run/ld_st_implicit.spvtxt
+++ b/ptx/src/test/spirv_run/ld_st_implicit.spvtxt
@ -0,0 +1,48 @@
+               OpCapability GenericPointer
+               OpCapability Linkage
+               OpCapability Addresses
+               OpCapability Kernel
+               OpCapability Int64
+               OpCapability Int8
+         %23 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %1 "ld_st_implicit"
+       %void = OpTypeVoid
+      %ulong = OpTypeInt 64 0
+         %26 = OpTypeFunction %void %ulong %ulong
+%_ptr_Function_ulong = OpTypePointer Function %ulong
+      %float = OpTypeFloat 32
+%_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
+       %uint = OpTypeInt 32 0
+          %1 = OpFunction %void None %26
+          %7 = OpFunctionParameter %ulong
+          %8 = OpFunctionParameter %ulong
+         %21 = OpLabel
+          %2 = OpVariable %_ptr_Function_ulong Function
+          %3 = OpVariable %_ptr_Function_ulong Function
+          %4 = OpVariable %_ptr_Function_ulong Function
+          %5 = OpVariable %_ptr_Function_ulong Function
+          %6 = OpVariable %_ptr_Function_ulong Function
+               OpStore %2 %7
+               OpStore %3 %8
+         %10 = OpLoad %ulong %2
+          %9 = OpCopyObject %ulong %10
+               OpStore %4 %9
+         %12 = OpLoad %ulong %3
+         %11 = OpCopyObject %ulong %12
+               OpStore %5 %11
+         %14 = OpLoad %ulong %4
+         %17 = OpConvertUToPtr %_ptr_CrossWorkgroup_float %14
+         %18 = OpLoad %float %17
+         %30 = OpBitcast %ulong %18
+         %32 = OpUConvert %uint %30
+         %13 = OpBitcast %uint %32
+               OpStore %6 %13
+         %15 = OpLoad %ulong %5
+         %16 = OpLoad %ulong %6
+         %33 = OpBitcast %uint %16
+         %19 = OpUConvert %ulong %33
+         %20 = OpConvertUToPtr %_ptr_CrossWorkgroup_float %15
+               OpStore %20 %19
+               OpReturn
+               OpFunctionEnd
--- a/ptx/src/test/spirv_run/mod.rs
+++ b/ptx/src/test/spirv_run/mod.rs
@ -8,10 +8,12 @@ use spirv_headers::Word;
 use spirv_tools_sys::{
    spv_binary, spv_endianness_t, spv_parsed_instruction_t, spv_result_t, spv_target_env,
 };
+use std::collections::hash_map::Entry;
 use std::error;
 use std::ffi::{c_void, CStr, CString};
 use std::fmt;
 use std::fmt::{Debug, Display, Formatter};
+use std::hash::Hash;
 use std::mem;
 use std::slice;
 use std::{borrow::Cow, collections::HashMap, env, fs, path::PathBuf, ptr, str};
@ -41,6 +43,7 @@ macro_rules! test_ptx {
 }

 test_ptx!(ld_st, [1u64], [1u64]);
+test_ptx!(ld_st_implicit, [0.5f32], [0.5f32]);
 test_ptx!(mov, [1u64], [1u64]);
 test_ptx!(mul_lo, [1u64], [2u64]);
 test_ptx!(mul_hi, [u64::max_value()], [1u64]);
@ -214,14 +217,45 @@ fn test_spvtxt_assert<'a>(
                }
            }
        }
-        panic!(spirv_text);
+        panic!(spirv_text.to_string());
    }
    unsafe { spirv_tools::spvContextDestroy(spv_context) };
    Ok(())
 }

+struct EqMap<T>
+where
+    T: Eq + Copy + Hash,
+{
+    m1: HashMap<T, T>,
+    m2: HashMap<T, T>,
+}
+
+impl<T: Copy + Eq + Hash> EqMap<T> {
+    fn new() -> Self {
+        EqMap {
+            m1: HashMap::new(),
+            m2: HashMap::new(),
+        }
+    }
+
+    fn is_equal(&mut self, t1: T, t2: T) -> bool {
+        match (self.m1.entry(t1), self.m2.entry(t2)) {
+            (Entry::Occupied(entry1), Entry::Occupied(entry2)) => {
+                *entry1.get() == t2 && *entry2.get() == t1
+            }
+            (Entry::Vacant(entry1), Entry::Vacant(entry2)) => {
+                entry1.insert(t2);
+                entry2.insert(t1);
+                true
+            }
+            _ => false,
+        }
+    }
+}
+
 fn is_spirv_fn_equal(fn1: &Function, fn2: &Function) -> bool {
-    let mut map = HashMap::new();
+    let mut map = EqMap::new();
    if !is_option_equal(&fn1.def, &fn2.def, &mut map, is_instr_equal) {
        return false;
    }
@ -247,7 +281,7 @@ fn is_spirv_fn_equal(fn1: &Function, fn2: &Function) -> bool {
    true
 }

-fn is_block_equal(b1: &Block, b2: &Block, map: &mut HashMap<Word, Word>) -> bool {
+fn is_block_equal(b1: &Block, b2: &Block, map: &mut EqMap<Word>) -> bool {
    if !is_option_equal(&b1.label, &b2.label, map, is_instr_equal) {
        return false;
    }
@ -262,11 +296,7 @@ fn is_block_equal(b1: &Block, b2: &Block, map: &mut HashMap<Word, Word>) -> bool
    true
 }

-fn is_instr_equal(
-    instr1: &Instruction,
-    instr2: &Instruction,
-    map: &mut HashMap<Word, Word>,
-) -> bool {
+fn is_instr_equal(instr1: &Instruction, instr2: &Instruction, map: &mut EqMap<Word>) -> bool {
    if instr1.class.opcode != instr2.class.opcode {
        return false;
    }
@ -306,24 +336,14 @@ fn is_instr_equal(
    true
 }

-fn is_word_equal(w1: &Word, w2: &Word, map: &mut HashMap<Word, Word>) -> bool {
-    match map.entry(*w1) {
-        std::collections::hash_map::Entry::Occupied(entry) => {
-            if entry.get() != w2 {
-                return false;
-            }
-        }
-        std::collections::hash_map::Entry::Vacant(entry) => {
-            entry.insert(*w2);
-        }
-    }
-    true
+fn is_word_equal(t1: &Word, t2: &Word, map: &mut EqMap<Word>) -> bool {
+    map.is_equal(*t1, *t2)
 }

-fn is_option_equal<T, F: FnOnce(&T, &T, &mut HashMap<Word, Word>) -> bool>(
+fn is_option_equal<T, F: FnOnce(&T, &T, &mut EqMap<Word>) -> bool>(
    o1: &Option<T>,
    o2: &Option<T>,
-    map: &mut HashMap<Word, Word>,
+    map: &mut EqMap<Word>,
    f: F,
 ) -> bool {
    match (o1, o2) {
--- a/ptx/src/test/spirv_run/not.spvtxt
+++ b/ptx/src/test/spirv_run/not.spvtxt
@ -4,18 +4,18 @@
               OpCapability Kernel
               OpCapability Int64
               OpCapability Int8
-         %24 = OpExtInstImport "OpenCL.std"
+         %26 = OpExtInstImport "OpenCL.std"
               OpMemoryModel Physical64 OpenCL
               OpEntryPoint Kernel %1 "not"
       %void = OpTypeVoid
      %ulong = OpTypeInt 64 0
-         %27 = OpTypeFunction %void %ulong %ulong
+         %29 = OpTypeFunction %void %ulong %ulong
 %_ptr_Function_ulong = OpTypePointer Function %ulong
 %_ptr_Generic_ulong = OpTypePointer Generic %ulong
-          %1 = OpFunction %void None %27
+          %1 = OpFunction %void None %29
          %8 = OpFunctionParameter %ulong
          %9 = OpFunctionParameter %ulong
-         %22 = OpLabel
+         %24 = OpLabel
          %2 = OpVariable %_ptr_Function_ulong Function
          %3 = OpVariable %_ptr_Function_ulong Function
          %4 = OpVariable %_ptr_Function_ulong Function
@ -35,11 +35,13 @@
         %14 = OpLoad %ulong %20
               OpStore %6 %14
         %17 = OpLoad %ulong %6
-         %16 = OpNot %ulong %17
+         %22 = OpCopyObject %ulong %17
+         %21 = OpNot %ulong %22
+         %16 = OpCopyObject %ulong %21
               OpStore %7 %16
         %18 = OpLoad %ulong %5
         %19 = OpLoad %ulong %7
-         %21 = OpConvertUToPtr %_ptr_Generic_ulong %18
-               OpStore %21 %19
+         %23 = OpConvertUToPtr %_ptr_Generic_ulong %18
+               OpStore %23 %19
               OpReturn
               OpFunctionEnd
--- a/ptx/src/test/spirv_run/shl.spvtxt
+++ b/ptx/src/test/spirv_run/shl.spvtxt
@ -4,20 +4,20 @@
               OpCapability Kernel
               OpCapability Int64
               OpCapability Int8
-         %25 = OpExtInstImport "OpenCL.std"
+         %27 = OpExtInstImport "OpenCL.std"
               OpMemoryModel Physical64 OpenCL
               OpEntryPoint Kernel %1 "shl"
       %void = OpTypeVoid
      %ulong = OpTypeInt 64 0
-         %28 = OpTypeFunction %void %ulong %ulong
+         %30 = OpTypeFunction %void %ulong %ulong
 %_ptr_Function_ulong = OpTypePointer Function %ulong
 %_ptr_Generic_ulong = OpTypePointer Generic %ulong
       %uint = OpTypeInt 32 0
     %uint_2 = OpConstant %uint 2
-          %1 = OpFunction %void None %28
+          %1 = OpFunction %void None %30
          %8 = OpFunctionParameter %ulong
          %9 = OpFunctionParameter %ulong
-         %23 = OpLabel
+         %25 = OpLabel
          %2 = OpVariable %_ptr_Function_ulong Function
          %3 = OpVariable %_ptr_Function_ulong Function
          %4 = OpVariable %_ptr_Function_ulong Function
@ -37,11 +37,13 @@
         %14 = OpLoad %ulong %21
               OpStore %6 %14
         %17 = OpLoad %ulong %6
-         %16 = OpShiftLeftLogical %ulong %17 %uint_2
+         %23 = OpCopyObject %ulong %17
+         %22 = OpShiftLeftLogical %ulong %23 %uint_2
+         %16 = OpCopyObject %ulong %22
               OpStore %7 %16
         %18 = OpLoad %ulong %5
         %19 = OpLoad %ulong %7
-         %22 = OpConvertUToPtr %_ptr_Generic_ulong %18
-               OpStore %22 %19
+         %24 = OpConvertUToPtr %_ptr_Generic_ulong %18
+               OpStore %24 %19
               OpReturn
               OpFunctionEnd
--- a/ptx/src/test/spirv_run/vector.spvtxt
+++ b/ptx/src/test/spirv_run/vector.spvtxt
@ -4,43 +4,92 @@
               OpCapability Kernel
               OpCapability Int64
               OpCapability Int8
-         %25 = OpExtInstImport "OpenCL.std"
+         %58 = OpExtInstImport "OpenCL.std"
               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "add"
+               OpEntryPoint Kernel %31 "vector"
       %void = OpTypeVoid
+       %uint = OpTypeInt 32 0
+     %v2uint = OpTypeVector %uint 2
+         %62 = OpTypeFunction %v2uint %v2uint
+%_ptr_Function_v2uint = OpTypePointer Function %v2uint
+%_ptr_Function_uint = OpTypePointer Function %uint
      %ulong = OpTypeInt 64 0
-         %28 = OpTypeFunction %void %ulong %ulong
+         %66 = OpTypeFunction %void %ulong %ulong
 %_ptr_Function_ulong = OpTypePointer Function %ulong
-%_ptr_Generic_ulong = OpTypePointer Generic %ulong
-    %ulong_1 = OpConstant %ulong 1
-          %1 = OpFunction %void None %28
-          %8 = OpFunctionParameter %ulong
-          %9 = OpFunctionParameter %ulong
-         %23 = OpLabel
-          %2 = OpVariable %_ptr_Function_ulong Function
-          %3 = OpVariable %_ptr_Function_ulong Function
-          %4 = OpVariable %_ptr_Function_ulong Function
-          %5 = OpVariable %_ptr_Function_ulong Function
-          %6 = OpVariable %_ptr_Function_ulong Function
-          %7 = OpVariable %_ptr_Function_ulong Function
-               OpStore %2 %8
-               OpStore %3 %9
-         %11 = OpLoad %ulong %2
-         %10 = OpCopyObject %ulong %11
-               OpStore %4 %10
-         %13 = OpLoad %ulong %3
-         %12 = OpCopyObject %ulong %13
-               OpStore %5 %12
-         %15 = OpLoad %ulong %4
-         %21 = OpConvertUToPtr %_ptr_Generic_ulong %15
-         %14 = OpLoad %ulong %21
-               OpStore %6 %14
-         %17 = OpLoad %ulong %6
-         %16 = OpIAdd %ulong %17 %ulong_1
-               OpStore %7 %16
-         %18 = OpLoad %ulong %5
-         %19 = OpLoad %ulong %7
-         %22 = OpConvertUToPtr %_ptr_Generic_ulong %18
-               OpStore %22 %19
+%_ptr_Generic_v2uint = OpTypePointer Generic %v2uint
+          %1 = OpFunction %v2uint None %62
+          %7 = OpFunctionParameter %v2uint
+         %30 = OpLabel
+          %3 = OpVariable %_ptr_Function_v2uint Function
+          %2 = OpVariable %_ptr_Function_v2uint Function
+          %4 = OpVariable %_ptr_Function_v2uint Function
+          %5 = OpVariable %_ptr_Function_uint Function
+          %6 = OpVariable %_ptr_Function_uint Function
+               OpStore %3 %7
+          %9 = OpLoad %v2uint %3
+         %24 = OpCompositeExtract %uint %9 0
+          %8 = OpCopyObject %uint %24
+               OpStore %5 %8
+         %11 = OpLoad %v2uint %3
+         %25 = OpCompositeExtract %uint %11 1
+         %10 = OpCopyObject %uint %25
+               OpStore %6 %10
+         %13 = OpLoad %uint %5
+         %14 = OpLoad %uint %6
+         %12 = OpIAdd %uint %13 %14
+               OpStore %6 %12
+         %16 = OpLoad %uint %6
+         %26 = OpCopyObject %uint %16
+         %15 = OpCompositeInsert %uint %26 %15 0
+               OpStore %4 %15
+         %18 = OpLoad %uint %6
+         %27 = OpCopyObject %uint %18
+         %17 = OpCompositeInsert %uint %27 %17 1
+               OpStore %4 %17
+         %20 = OpLoad %v2uint %4
+         %29 = OpCompositeExtract %uint %20 1
+         %28 = OpCopyObject %uint %29
+         %19 = OpCompositeInsert %uint %28 %19 0
+               OpStore %4 %19
+         %22 = OpLoad %v2uint %4
+         %21 = OpCopyObject %v2uint %22
+               OpStore %2 %21
+         %23 = OpLoad %v2uint %2
+               OpReturnValue %23
+               OpFunctionEnd
+         %31 = OpFunction %void None %66
+         %40 = OpFunctionParameter %ulong
+         %41 = OpFunctionParameter %ulong
+         %56 = OpLabel
+         %32 = OpVariable %_ptr_Function_ulong Function
+         %33 = OpVariable %_ptr_Function_ulong Function
+         %34 = OpVariable %_ptr_Function_ulong Function
+         %35 = OpVariable %_ptr_Function_ulong Function
+         %36 = OpVariable %_ptr_Function_v2uint Function
+         %37 = OpVariable %_ptr_Function_uint Function
+         %38 = OpVariable %_ptr_Function_uint Function
+         %39 = OpVariable %_ptr_Function_ulong Function
+               OpStore %32 %40
+               OpStore %33 %41
+         %43 = OpLoad %ulong %32
+         %42 = OpCopyObject %ulong %43
+               OpStore %34 %42
+         %45 = OpLoad %ulong %33
+         %44 = OpCopyObject %ulong %45
+               OpStore %35 %44
+         %47 = OpLoad %ulong %34
+         %54 = OpConvertUToPtr %_ptr_Generic_v2uint %47
+         %46 = OpLoad %v2uint %54
+               OpStore %36 %46
+         %49 = OpLoad %v2uint %36
+         %48 = OpFunctionCall %v2uint %1 %49
+               OpStore %36 %48
+         %51 = OpLoad %v2uint %36
+         %50 = OpCopyObject %ulong %51
+               OpStore %39 %50
+         %52 = OpLoad %ulong %35
+         %53 = OpLoad %v2uint %36
+         %55 = OpConvertUToPtr %_ptr_Generic_v2uint %52
+               OpStore %55 %53
               OpReturn
               OpFunctionEnd
--- a/ptx/src/translate.rs
+++ b/ptx/src/translate.rs