Refactor how vectors are handled (#20)

Current code has a problem with handling vector members: "b.x" in "mov.u32 a, b.x". This functionality has been kinda tacked-on and has annoying issues: * vector members support is only limited to being source of movs (so "add.u32 a.x, b.x, c.y" will not work) * the width of "b" in "b.x" is not known, which led to some "interesting" workarounds * passes can either convert all member accesses to other member accesses or to temporaries. No way to convert some member accesses to temporaries (which we need for an important fix) This commit solves all this
2025-08-10 18:19:08 +00:00 · 2020-12-09 00:20:06 +01:00 · 2020-12-09 00:20:06 +01:00 · 770a379452
commit 770a379452
parent a6a9eb347b
5 changed files with 992 additions and 1511 deletions
--- a/ptx/src/ast.rs
+++ b/ptx/src/ast.rs
@ -557,7 +557,7 @@ pub enum Instruction<P: ArgParams> {
    Mul(MulDetails, Arg3<P>),
    Add(ArithDetails, Arg3<P>),
    Setp(SetpData, Arg4Setp<P>),
-    SetpBool(SetpBoolData, Arg5<P>),
+    SetpBool(SetpBoolData, Arg5Setp<P>),
    Not(BooleanType, Arg2<P>),
    Bra(BraData, Arg1<P>),
    Cvt(CvtDetails, Arg2<P>),
@ -614,16 +614,12 @@ pub struct CallInst<P: ArgParams> {
    pub uniform: bool,
    pub ret_params: Vec<P::Id>,
    pub func: P::Id,
-    pub param_list: Vec<P::CallOperand>,
+    pub param_list: Vec<P::Operand>,
 }

 pub trait ArgParams {
    type Id;
    type Operand;
-    type IdOrVector;
-    type OperandOrVector;
-    type CallOperand;
-    type SrcMemberOperand;
 }

 pub struct ParsedArgParams<'a> {
@ -633,10 +629,6 @@ pub struct ParsedArgParams<'a> {
 impl<'a> ArgParams for ParsedArgParams<'a> {
    type Id = &'a str;
    type Operand = Operand<&'a str>;
-    type CallOperand = CallOperand<&'a str>;
-    type IdOrVector = IdOrVector<&'a str>;
-    type OperandOrVector = OperandOrVector<&'a str>;
-    type SrcMemberOperand = (&'a str, u8);
 }

 pub struct Arg1<P: ArgParams> {
@ -648,45 +640,32 @@ pub struct Arg1Bar<P: ArgParams> {
 }

 pub struct Arg2<P: ArgParams> {
-    pub dst: P::Id,
+    pub dst: P::Operand,
    pub src: P::Operand,
 }
 pub struct Arg2Ld<P: ArgParams> {
-    pub dst: P::IdOrVector,
+    pub dst: P::Operand,
    pub src: P::Operand,
 }

 pub struct Arg2St<P: ArgParams> {
    pub src1: P::Operand,
-    pub src2: P::OperandOrVector,
+    pub src2: P::Operand,
 }

-pub enum Arg2Mov<P: ArgParams> {
-    Normal(Arg2MovNormal<P>),
-    Member(Arg2MovMember<P>),
-}
-
-pub struct Arg2MovNormal<P: ArgParams> {
-    pub dst: P::IdOrVector,
-    pub src: P::OperandOrVector,
-}
-
-// We duplicate dst here because during further compilation
-// composite dst and composite src will receive different ids
-pub enum Arg2MovMember<P: ArgParams> {
-    Dst((P::Id, u8), P::Id, P::Id),
-    Src(P::Id, P::SrcMemberOperand),
-    Both((P::Id, u8), P::Id, P::SrcMemberOperand),
+pub struct Arg2Mov<P: ArgParams> {
+    pub dst: P::Operand,
+    pub src: P::Operand,
 }

 pub struct Arg3<P: ArgParams> {
-    pub dst: P::Id,
+    pub dst: P::Operand,
    pub src1: P::Operand,
    pub src2: P::Operand,
 }

 pub struct Arg4<P: ArgParams> {
-    pub dst: P::Id,
+    pub dst: P::Operand,
    pub src1: P::Operand,
    pub src2: P::Operand,
    pub src3: P::Operand,
@ -699,7 +678,7 @@ pub struct Arg4Setp<P: ArgParams> {
    pub src2: P::Operand,
 }

-pub struct Arg5<P: ArgParams> {
+pub struct Arg5Setp<P: ArgParams> {
    pub dst1: P::Id,
    pub dst2: Option<P::Id>,
    pub src1: P::Operand,
@ -715,39 +694,13 @@ pub enum ImmediateValue {
    F64(f64),
 }

-#[derive(Copy, Clone)]
-pub enum Operand<ID> {
-    Reg(ID),
-    RegOffset(ID, i32),
+#[derive(Clone)]
+pub enum Operand<Id> {
+    Reg(Id),
+    RegOffset(Id, i32),
    Imm(ImmediateValue),
-}
-
-#[derive(Copy, Clone)]
-pub enum CallOperand<ID> {
-    Reg(ID),
-    Imm(ImmediateValue),
-}
-
-pub enum IdOrVector<ID> {
-    Reg(ID),
-    Vec(Vec<ID>),
-}
-
-pub enum OperandOrVector<ID> {
-    Reg(ID),
-    RegOffset(ID, i32),
-    Imm(ImmediateValue),
-    Vec(Vec<ID>),
-}
-
-impl<T> From<Operand<T>> for OperandOrVector<T> {
-    fn from(this: Operand<T>) -> Self {
-        match this {
-            Operand::Reg(r) => OperandOrVector::Reg(r),
-            Operand::RegOffset(r, imm) => OperandOrVector::RegOffset(r, imm),
-            Operand::Imm(imm) => OperandOrVector::Imm(imm),
-        }
-    }
+    VecMember(Id, u8),
+    VecPack(Vec<Id>),
 }

 pub enum VectorPrefix {
--- a/ptx/src/ptx.lalrpop
+++ b/ptx/src/ptx.lalrpop
@ -721,7 +721,7 @@ Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = {

 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
 InstLd: ast::Instruction<ast::ParsedArgParams<'input>> = {
-    "ld" <q:LdStQualifier?> <ss:LdStateSpace?> <cop:LdCacheOperator?> <t:LdStType> <dst:IdOrVector> "," <src:MemoryOperand> => {
+    "ld" <q:LdStQualifier?> <ss:LdStateSpace?> <cop:LdCacheOperator?> <t:LdStType> <dst:DstOperandVec> "," <src:MemoryOperand> => {
        ast::Instruction::Ld(
            ast::LdDetails {
                qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
@ -734,16 +734,6 @@ InstLd: ast::Instruction<ast::ParsedArgParams<'input>> = {
    }
 };

-IdOrVector: ast::IdOrVector<&'input str> = {
-    <dst:ExtendedID> => ast::IdOrVector::Reg(dst),
-    <dst:VectorExtract> => ast::IdOrVector::Vec(dst)
-}
-
-OperandOrVector: ast::OperandOrVector<&'input str> = {
-    <op:Operand> => ast::OperandOrVector::from(op),
-    <dst:VectorExtract> => ast::OperandOrVector::Vec(dst)
-}
-
 LdStType: ast::LdStType = {
    <v:VectorPrefix> <t:LdStScalarType> => ast::LdStType::Vector(t, v),
    <t:LdStScalarType> => ast::LdStType::Scalar(t),
@ -780,27 +770,17 @@ LdCacheOperator: ast::LdCacheOperator = {

 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov
 InstMov: ast::Instruction<ast::ParsedArgParams<'input>> = {
-    <m:MovNormal> => ast::Instruction::Mov(m.0, m.1),
-    <m:MovVector> => ast::Instruction::Mov(m.0, m.1),
-};
-
-
-MovNormal: (ast::MovDetails, ast::Arg2Mov<ast::ParsedArgParams<'input>>) = {
-    "mov" <t:MovScalarType> <dst:ExtendedID> "," <src:Operand> => {(
-        ast::MovDetails::new(ast::Type::Scalar(t)),
-        ast::Arg2Mov::Normal(ast::Arg2MovNormal{ dst: ast::IdOrVector::Reg(dst), src: src.into() })
-    )},
-    "mov" <pref:VectorPrefix> <t:MovVectorType> <dst:IdOrVector> "," <src:OperandOrVector> => {(
-        ast::MovDetails::new(ast::Type::Vector(t, pref)),
-        ast::Arg2Mov::Normal(ast::Arg2MovNormal{ dst: dst, src: src })
-    )}
-}
-
-MovVector: (ast::MovDetails, ast::Arg2Mov<ast::ParsedArgParams<'input>>) = {
-    "mov" <t:MovVectorType> <a:Arg2MovMember> => {(
-        ast::MovDetails::new(ast::Type::Scalar(t.into())),
-        ast::Arg2Mov::Member(a)
-    )},
+    "mov" <pref:VectorPrefix?> <t:MovScalarType> <dst:DstOperandVec> "," <src:SrcOperandVec> => {
+        let mov_type = match pref {
+            Some(vec_width) => ast::Type::Vector(t, vec_width),
+            None => ast::Type::Scalar(t)
+        };
+        let details = ast::MovDetails::new(mov_type);
+        ast::Instruction::Mov(
+            details,
+            ast::Arg2Mov { dst, src }
+        )
+    }
 }

 #[inline]
@ -819,21 +799,6 @@ MovScalarType: ast::ScalarType = {
    ".pred" => ast::ScalarType::Pred
 };

-#[inline]
-MovVectorType: ast::ScalarType = {
-    ".b16" => ast::ScalarType::B16,
-    ".b32" => ast::ScalarType::B32,
-    ".b64" => ast::ScalarType::B64,
-    ".u16" => ast::ScalarType::U16,
-    ".u32" => ast::ScalarType::U32,
-    ".u64" => ast::ScalarType::U64,
-    ".s16" => ast::ScalarType::S16,
-    ".s32" => ast::ScalarType::S32,
-    ".s64" => ast::ScalarType::S64,
-    ".f32" => ast::ScalarType::F32,
-    ".f64" => ast::ScalarType::F64,
-};
-
 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mul
 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mul
 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-mul
@ -921,7 +886,7 @@ InstAdd: ast::Instruction<ast::ParsedArgParams<'input>> = {
 // TODO: support f16 setp
 InstSetp: ast::Instruction<ast::ParsedArgParams<'input>> = {
    "setp" <d:SetpMode> <a:Arg4Setp> => ast::Instruction::Setp(d, a),
-    "setp" <d:SetpBoolMode> <a:Arg5> => ast::Instruction::SetpBool(d, a),
+    "setp" <d:SetpBoolMode> <a:Arg5Setp> => ast::Instruction::SetpBool(d, a),
 };

 SetpMode: ast::SetpData = {
@ -1198,7 +1163,7 @@ ShrType: ast::ShrType = {
 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-st
 // Warning: NVIDIA documentation is incorrect, you can specify scope only once
 InstSt: ast::Instruction<ast::ParsedArgParams<'input>> = {
-    "st" <q:LdStQualifier?> <ss:StStateSpace?> <cop:StCacheOperator?> <t:LdStType> <src1:MemoryOperand> "," <src2:OperandOrVector> => {
+    "st" <q:LdStQualifier?> <ss:StStateSpace?> <cop:StCacheOperator?> <t:LdStType> <src1:MemoryOperand> "," <src2:SrcOperandVec> => {
        ast::Instruction::St(
            ast::StData {
                qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
@ -1775,9 +1740,9 @@ Operand: ast::Operand<&'input str> = {
    <x:ImmediateValue> => ast::Operand::Imm(x)
 };

-CallOperand: ast::CallOperand<&'input str> = {
-    <r:ExtendedID> => ast::CallOperand::Reg(r),
-    <x:ImmediateValue> => ast::CallOperand::Imm(x)
+CallOperand: ast::Operand<&'input str> = {
+    <r:ExtendedID> => ast::Operand::Reg(r),
+    <x:ImmediateValue> => ast::Operand::Imm(x)
 };

 // TODO: start parsing whole constants sub-language:
@ -1825,13 +1790,7 @@ Arg1Bar: ast::Arg1Bar<ast::ParsedArgParams<'input>> = {
 };

 Arg2: ast::Arg2<ast::ParsedArgParams<'input>> = {
-    <dst:ExtendedID> "," <src:Operand> => ast::Arg2{<>}
-};
-
-Arg2MovMember: ast::Arg2MovMember<ast::ParsedArgParams<'input>> = {
-    <dst:MemberOperand> "," <src:ExtendedID> => ast::Arg2MovMember::Dst(dst, dst.0, src),
-    <dst:ExtendedID> "," <src:MemberOperand> => ast::Arg2MovMember::Src(dst, src),
-    <dst:MemberOperand> "," <src:MemberOperand> => ast::Arg2MovMember::Both(dst, dst.0, src),
+    <dst:DstOperand> "," <src:Operand> => ast::Arg2{<>}
 };

 MemberOperand: (&'input str, u8) = {
@ -1855,19 +1814,19 @@ VectorExtract: Vec<&'input str> = {
 };

 Arg3: ast::Arg3<ast::ParsedArgParams<'input>> = {
-    <dst:ExtendedID> "," <src1:Operand> "," <src2:Operand> => ast::Arg3{<>}
+    <dst:DstOperand> "," <src1:Operand> "," <src2:Operand> => ast::Arg3{<>}
 };

 Arg3Atom: ast::Arg3<ast::ParsedArgParams<'input>> = {
-    <dst:ExtendedID> "," "[" <src1:Operand> "]" "," <src2:Operand> => ast::Arg3{<>}
+    <dst:DstOperand> "," "[" <src1:Operand> "]" "," <src2:Operand> => ast::Arg3{<>}
 };

 Arg4: ast::Arg4<ast::ParsedArgParams<'input>> = {
-    <dst:ExtendedID> "," <src1:Operand> "," <src2:Operand> ","  <src3:Operand> => ast::Arg4{<>}
+    <dst:DstOperand> "," <src1:Operand> "," <src2:Operand> ","  <src3:Operand> => ast::Arg4{<>}
 };

 Arg4Atom: ast::Arg4<ast::ParsedArgParams<'input>> = {
-    <dst:ExtendedID> "," "[" <src1:Operand> "]" "," <src2:Operand> ","  <src3:Operand> => ast::Arg4{<>}
+    <dst:DstOperand> "," "[" <src1:Operand> "]" "," <src2:Operand> ","  <src3:Operand> => ast::Arg4{<>}
 };

 Arg4Setp: ast::Arg4Setp<ast::ParsedArgParams<'input>> = {
@ -1875,22 +1834,50 @@ Arg4Setp: ast::Arg4Setp<ast::ParsedArgParams<'input>> = {
 };

 // TODO: pass src3 negation somewhere
-Arg5: ast::Arg5<ast::ParsedArgParams<'input>> = {
-    <dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> "," "!"? <src3:Operand> => ast::Arg5{<>}
+Arg5Setp: ast::Arg5Setp<ast::ParsedArgParams<'input>> = {
+    <dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> "," "!"? <src3:Operand> => ast::Arg5Setp{<>}
 };

-ArgCall: (Vec<&'input str>, &'input str, Vec<ast::CallOperand<&'input str>>) = {
+ArgCall: (Vec<&'input str>, &'input str, Vec<ast::Operand<&'input str>>) = {
    "(" <ret_params:Comma<ExtendedID>> ")" "," <func:ExtendedID> "," "(" <param_list:Comma<CallOperand>> ")" => {
        (ret_params, func, param_list)
    },
    <func:ExtendedID> "," "(" <param_list:Comma<CallOperand>> ")" => (Vec::new(), func, param_list),
-    <func:ExtendedID> => (Vec::new(), func, Vec::<ast::CallOperand<_>>::new()),
+    <func:ExtendedID> => (Vec::new(), func, Vec::<ast::Operand<_>>::new()),
 };

 OptionalDst: &'input str = {
    "|" <dst2:ExtendedID> => dst2
 }

+SrcOperand: ast::Operand<&'input str> = {
+    <r:ExtendedID> => ast::Operand::Reg(r),
+    <r:ExtendedID> "+" <offset:S32Num> => ast::Operand::RegOffset(r, offset),
+    <x:ImmediateValue> => ast::Operand::Imm(x),
+    <mem_op:MemberOperand> => {
+        let (reg, idx) = mem_op;
+        ast::Operand::VecMember(reg, idx)
+    }
+}
+
+SrcOperandVec: ast::Operand<&'input str> = {
+    <normal:SrcOperand> => normal,
+    <vec:VectorExtract> => ast::Operand::VecPack(vec),
+}
+
+DstOperand: ast::Operand<&'input str> = {
+    <r:ExtendedID> => ast::Operand::Reg(r),
+    <mem_op:MemberOperand> => {
+        let (reg, idx) = mem_op;
+        ast::Operand::VecMember(reg, idx)
+    }
+}
+
+DstOperandVec: ast::Operand<&'input str> = {
+    <normal:DstOperand> => normal,
+    <vec:VectorExtract> => ast::Operand::VecPack(vec),
+}
+
 VectorPrefix: u8 = {
    ".v2" => 2,
    ".v4" => 4
--- a/ptx/src/test/spirv_run/vector.spvtxt
+++ b/ptx/src/test/spirv_run/vector.spvtxt
@ -7,91 +7,93 @@
               OpCapability Int64
               OpCapability Float16
               OpCapability Float64
-         %57 = OpExtInstImport "OpenCL.std"
+         %51 = OpExtInstImport "OpenCL.std"
               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %31 "vector"
+               OpEntryPoint Kernel %25 "vector"
       %void = OpTypeVoid
       %uint = OpTypeInt 32 0
     %v2uint = OpTypeVector %uint 2
-         %61 = OpTypeFunction %v2uint %v2uint
+         %55 = OpTypeFunction %v2uint %v2uint
 %_ptr_Function_v2uint = OpTypePointer Function %v2uint
 %_ptr_Function_uint = OpTypePointer Function %uint
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
      %ulong = OpTypeInt 64 0
-         %65 = OpTypeFunction %void %ulong %ulong
+         %67 = OpTypeFunction %void %ulong %ulong
 %_ptr_Function_ulong = OpTypePointer Function %ulong
 %_ptr_Generic_v2uint = OpTypePointer Generic %v2uint
-          %1 = OpFunction %v2uint None %61
+          %1 = OpFunction %v2uint None %55
          %7 = OpFunctionParameter %v2uint
-         %30 = OpLabel
+         %24 = OpLabel
          %2 = OpVariable %_ptr_Function_v2uint Function
          %3 = OpVariable %_ptr_Function_v2uint Function
          %4 = OpVariable %_ptr_Function_v2uint Function
          %5 = OpVariable %_ptr_Function_uint Function
          %6 = OpVariable %_ptr_Function_uint Function
               OpStore %3 %7
-          %9 = OpLoad %v2uint %3
-         %27 = OpCompositeExtract %uint %9 0
-          %8 = OpCopyObject %uint %27
+         %59 = OpInBoundsAccessChain %_ptr_Function_uint %3 %uint_0
+          %9 = OpLoad %uint %59
+          %8 = OpCopyObject %uint %9
               OpStore %5 %8
-         %11 = OpLoad %v2uint %3
-         %28 = OpCompositeExtract %uint %11 1
-         %10 = OpCopyObject %uint %28
+         %61 = OpInBoundsAccessChain %_ptr_Function_uint %3 %uint_1
+         %11 = OpLoad %uint %61
+         %10 = OpCopyObject %uint %11
               OpStore %6 %10
         %13 = OpLoad %uint %5
         %14 = OpLoad %uint %6
         %12 = OpIAdd %uint %13 %14
               OpStore %6 %12
-         %16 = OpLoad %v2uint %4
-         %17 = OpLoad %uint %6
-         %15 = OpCompositeInsert %v2uint %17 %16 0
-               OpStore %4 %15
-         %19 = OpLoad %v2uint %4
-         %20 = OpLoad %uint %6
-         %18 = OpCompositeInsert %v2uint %20 %19 1
-               OpStore %4 %18
+         %16 = OpLoad %uint %6
+         %15 = OpCopyObject %uint %16
+         %62 = OpInBoundsAccessChain %_ptr_Function_uint %4 %uint_0
+               OpStore %62 %15
+         %18 = OpLoad %uint %6
+         %17 = OpCopyObject %uint %18
+         %63 = OpInBoundsAccessChain %_ptr_Function_uint %4 %uint_1
+               OpStore %63 %17
+         %64 = OpInBoundsAccessChain %_ptr_Function_uint %4 %uint_1
+         %20 = OpLoad %uint %64
+         %19 = OpCopyObject %uint %20
+         %65 = OpInBoundsAccessChain %_ptr_Function_uint %4 %uint_0
+               OpStore %65 %19
         %22 = OpLoad %v2uint %4
-         %23 = OpLoad %v2uint %4
-         %29 = OpCompositeExtract %uint %23 1
-         %21 = OpCompositeInsert %v2uint %29 %22 0
-               OpStore %4 %21
-         %25 = OpLoad %v2uint %4
-         %24 = OpCopyObject %v2uint %25
-               OpStore %2 %24
-         %26 = OpLoad %v2uint %2
-               OpReturnValue %26
+         %21 = OpCopyObject %v2uint %22
+               OpStore %2 %21
+         %23 = OpLoad %v2uint %2
+               OpReturnValue %23
               OpFunctionEnd
-         %31 = OpFunction %void None %65
-         %40 = OpFunctionParameter %ulong
-         %41 = OpFunctionParameter %ulong
-         %55 = OpLabel
-         %32 = OpVariable %_ptr_Function_ulong Function
+         %25 = OpFunction %void None %67
+         %34 = OpFunctionParameter %ulong
+         %35 = OpFunctionParameter %ulong
+         %49 = OpLabel
+         %26 = OpVariable %_ptr_Function_ulong Function
+         %27 = OpVariable %_ptr_Function_ulong Function
+         %28 = OpVariable %_ptr_Function_ulong Function
+         %29 = OpVariable %_ptr_Function_ulong Function
+         %30 = OpVariable %_ptr_Function_v2uint Function
+         %31 = OpVariable %_ptr_Function_uint Function
+         %32 = OpVariable %_ptr_Function_uint Function
         %33 = OpVariable %_ptr_Function_ulong Function
-         %34 = OpVariable %_ptr_Function_ulong Function
-         %35 = OpVariable %_ptr_Function_ulong Function
-         %36 = OpVariable %_ptr_Function_v2uint Function
-         %37 = OpVariable %_ptr_Function_uint Function
-         %38 = OpVariable %_ptr_Function_uint Function
-         %39 = OpVariable %_ptr_Function_ulong Function
-               OpStore %32 %40
-               OpStore %33 %41
-         %42 = OpLoad %ulong %32
-               OpStore %34 %42
-         %43 = OpLoad %ulong %33
-               OpStore %35 %43
-         %45 = OpLoad %ulong %34
-         %52 = OpConvertUToPtr %_ptr_Generic_v2uint %45
-         %44 = OpLoad %v2uint %52
-               OpStore %36 %44
-         %47 = OpLoad %v2uint %36
-         %46 = OpFunctionCall %v2uint %1 %47
-               OpStore %36 %46
-         %49 = OpLoad %v2uint %36
-         %53 = OpBitcast %ulong %49
-         %48 = OpCopyObject %ulong %53
-               OpStore %39 %48
-         %50 = OpLoad %ulong %35
-         %51 = OpLoad %v2uint %36
-         %54 = OpConvertUToPtr %_ptr_Generic_v2uint %50
-               OpStore %54 %51
+               OpStore %26 %34
+               OpStore %27 %35
+         %36 = OpLoad %ulong %26
+               OpStore %28 %36
+         %37 = OpLoad %ulong %27
+               OpStore %29 %37
+         %39 = OpLoad %ulong %28
+         %46 = OpConvertUToPtr %_ptr_Generic_v2uint %39
+         %38 = OpLoad %v2uint %46
+               OpStore %30 %38
+         %41 = OpLoad %v2uint %30
+         %40 = OpFunctionCall %v2uint %1 %41
+               OpStore %30 %40
+         %43 = OpLoad %v2uint %30
+         %47 = OpBitcast %ulong %43
+         %42 = OpCopyObject %ulong %47
+               OpStore %33 %42
+         %44 = OpLoad %ulong %29
+         %45 = OpLoad %v2uint %30
+         %48 = OpConvertUToPtr %_ptr_Generic_v2uint %44
+               OpStore %48 %45
               OpReturn
               OpFunctionEnd
--- a/ptx/src/test/spirv_run/vector_extract.spvtxt
+++ b/ptx/src/test/spirv_run/vector_extract.spvtxt
@ -7,12 +7,12 @@
               OpCapability Int64
               OpCapability Float16
               OpCapability Float64
-         %73 = OpExtInstImport "OpenCL.std"
+         %61 = OpExtInstImport "OpenCL.std"
               OpMemoryModel Physical64 OpenCL
               OpEntryPoint Kernel %1 "vector_extract"
       %void = OpTypeVoid
      %ulong = OpTypeInt 64 0
-         %76 = OpTypeFunction %void %ulong %ulong
+         %64 = OpTypeFunction %void %ulong %ulong
 %_ptr_Function_ulong = OpTypePointer Function %ulong
     %ushort = OpTypeInt 16 0
 %_ptr_Function_ushort = OpTypePointer Function %ushort
@ -21,10 +21,10 @@
      %uchar = OpTypeInt 8 0
    %v4uchar = OpTypeVector %uchar 4
 %_ptr_CrossWorkgroup_v4uchar = OpTypePointer CrossWorkgroup %v4uchar
-          %1 = OpFunction %void None %76
-         %11 = OpFunctionParameter %ulong
-         %12 = OpFunctionParameter %ulong
-         %71 = OpLabel
+          %1 = OpFunction %void None %64
+         %17 = OpFunctionParameter %ulong
+         %18 = OpFunctionParameter %ulong
+         %59 = OpLabel
          %2 = OpVariable %_ptr_Function_ulong Function
          %3 = OpVariable %_ptr_Function_ulong Function
          %4 = OpVariable %_ptr_Function_ulong Function
@ -34,89 +34,92 @@
          %8 = OpVariable %_ptr_Function_ushort Function
          %9 = OpVariable %_ptr_Function_ushort Function
         %10 = OpVariable %_ptr_Function_v4ushort Function
-               OpStore %2 %11
-               OpStore %3 %12
-         %13 = OpLoad %ulong %2
-               OpStore %4 %13
-         %14 = OpLoad %ulong %3
-               OpStore %5 %14
-         %19 = OpLoad %ulong %4
-         %61 = OpConvertUToPtr %_ptr_CrossWorkgroup_v4uchar %19
-         %43 = OpLoad %v4uchar %61
-         %62 = OpCompositeExtract %uchar %43 0
-         %85 = OpBitcast %uchar %62
-         %15 = OpUConvert %ushort %85
-         %63 = OpCompositeExtract %uchar %43 1
-         %86 = OpBitcast %uchar %63
-         %16 = OpUConvert %ushort %86
-         %64 = OpCompositeExtract %uchar %43 2
-         %87 = OpBitcast %uchar %64
-         %17 = OpUConvert %ushort %87
-         %65 = OpCompositeExtract %uchar %43 3
-         %88 = OpBitcast %uchar %65
-         %18 = OpUConvert %ushort %88
-               OpStore %6 %15
-               OpStore %7 %16
-               OpStore %8 %17
-               OpStore %9 %18
-         %21 = OpLoad %ushort %7
-         %22 = OpLoad %ushort %8
-         %23 = OpLoad %ushort %9
-         %24 = OpLoad %ushort %6
-         %44 = OpUndef %v4ushort
-         %45 = OpCompositeInsert %v4ushort %21 %44 0
-         %46 = OpCompositeInsert %v4ushort %22 %45 1
-         %47 = OpCompositeInsert %v4ushort %23 %46 2
-         %48 = OpCompositeInsert %v4ushort %24 %47 3
-         %20 = OpCopyObject %v4ushort %48
-               OpStore %10 %20
-         %29 = OpLoad %v4ushort %10
-         %49 = OpCopyObject %v4ushort %29
-         %25 = OpCompositeExtract %ushort %49 0
-         %26 = OpCompositeExtract %ushort %49 1
-         %27 = OpCompositeExtract %ushort %49 2
-         %28 = OpCompositeExtract %ushort %49 3
-               OpStore %8 %25
-               OpStore %9 %26
-               OpStore %6 %27
-               OpStore %7 %28
-         %34 = OpLoad %ushort %8
-         %35 = OpLoad %ushort %9
-         %36 = OpLoad %ushort %6
-         %37 = OpLoad %ushort %7
-         %51 = OpUndef %v4ushort
-         %52 = OpCompositeInsert %v4ushort %34 %51 0
-         %53 = OpCompositeInsert %v4ushort %35 %52 1
-         %54 = OpCompositeInsert %v4ushort %36 %53 2
-         %55 = OpCompositeInsert %v4ushort %37 %54 3
-         %50 = OpCopyObject %v4ushort %55
-         %30 = OpCompositeExtract %ushort %50 0
-         %31 = OpCompositeExtract %ushort %50 1
-         %32 = OpCompositeExtract %ushort %50 2
-         %33 = OpCompositeExtract %ushort %50 3
-               OpStore %9 %30
-               OpStore %6 %31
-               OpStore %7 %32
-               OpStore %8 %33
-         %38 = OpLoad %ulong %5
-         %39 = OpLoad %ushort %6
-         %40 = OpLoad %ushort %7
-         %41 = OpLoad %ushort %8
-         %42 = OpLoad %ushort %9
-         %56 = OpUndef %v4uchar
-         %89 = OpBitcast %ushort %39
-         %66 = OpUConvert %uchar %89
-         %57 = OpCompositeInsert %v4uchar %66 %56 0
-         %90 = OpBitcast %ushort %40
-         %67 = OpUConvert %uchar %90
-         %58 = OpCompositeInsert %v4uchar %67 %57 1
-         %91 = OpBitcast %ushort %41
-         %68 = OpUConvert %uchar %91
-         %59 = OpCompositeInsert %v4uchar %68 %58 2
-         %92 = OpBitcast %ushort %42
-         %69 = OpUConvert %uchar %92
-         %60 = OpCompositeInsert %v4uchar %69 %59 3
-         %70 = OpConvertUToPtr %_ptr_CrossWorkgroup_v4uchar %38
-               OpStore %70 %60
+               OpStore %2 %17
+               OpStore %3 %18
+         %19 = OpLoad %ulong %2
+               OpStore %4 %19
+         %20 = OpLoad %ulong %3
+               OpStore %5 %20
+         %21 = OpLoad %ulong %4
+         %49 = OpConvertUToPtr %_ptr_CrossWorkgroup_v4uchar %21
+         %11 = OpLoad %v4uchar %49
+         %50 = OpCompositeExtract %uchar %11 0
+         %51 = OpCompositeExtract %uchar %11 1
+         %52 = OpCompositeExtract %uchar %11 2
+         %53 = OpCompositeExtract %uchar %11 3
+         %73 = OpBitcast %uchar %50
+         %22 = OpUConvert %ushort %73
+         %74 = OpBitcast %uchar %51
+         %23 = OpUConvert %ushort %74
+         %75 = OpBitcast %uchar %52
+         %24 = OpUConvert %ushort %75
+         %76 = OpBitcast %uchar %53
+         %25 = OpUConvert %ushort %76
+               OpStore %6 %22
+               OpStore %7 %23
+               OpStore %8 %24
+               OpStore %9 %25
+         %26 = OpLoad %ushort %7
+         %27 = OpLoad %ushort %8
+         %28 = OpLoad %ushort %9
+         %29 = OpLoad %ushort %6
+         %77 = OpUndef %v4ushort
+         %78 = OpCompositeInsert %v4ushort %26 %77 0
+         %79 = OpCompositeInsert %v4ushort %27 %78 1
+         %80 = OpCompositeInsert %v4ushort %28 %79 2
+         %81 = OpCompositeInsert %v4ushort %29 %80 3
+         %12 = OpCopyObject %v4ushort %81
+         %30 = OpCopyObject %v4ushort %12
+               OpStore %10 %30
+         %31 = OpLoad %v4ushort %10
+         %13 = OpCopyObject %v4ushort %31
+         %32 = OpCompositeExtract %ushort %13 0
+         %33 = OpCompositeExtract %ushort %13 1
+         %34 = OpCompositeExtract %ushort %13 2
+         %35 = OpCompositeExtract %ushort %13 3
+               OpStore %8 %32
+               OpStore %9 %33
+               OpStore %6 %34
+               OpStore %7 %35
+         %36 = OpLoad %ushort %8
+         %37 = OpLoad %ushort %9
+         %38 = OpLoad %ushort %6
+         %39 = OpLoad %ushort %7
+         %82 = OpUndef %v4ushort
+         %83 = OpCompositeInsert %v4ushort %36 %82 0
+         %84 = OpCompositeInsert %v4ushort %37 %83 1
+         %85 = OpCompositeInsert %v4ushort %38 %84 2
+         %86 = OpCompositeInsert %v4ushort %39 %85 3
+         %15 = OpCopyObject %v4ushort %86
+         %14 = OpCopyObject %v4ushort %15
+         %40 = OpCompositeExtract %ushort %14 0
+         %41 = OpCompositeExtract %ushort %14 1
+         %42 = OpCompositeExtract %ushort %14 2
+         %43 = OpCompositeExtract %ushort %14 3
+               OpStore %9 %40
+               OpStore %6 %41
+               OpStore %7 %42
+               OpStore %8 %43
+         %44 = OpLoad %ushort %6
+         %45 = OpLoad %ushort %7
+         %46 = OpLoad %ushort %8
+         %47 = OpLoad %ushort %9
+         %87 = OpBitcast %ushort %44
+         %54 = OpUConvert %uchar %87
+         %88 = OpBitcast %ushort %45
+         %55 = OpUConvert %uchar %88
+         %89 = OpBitcast %ushort %46
+         %56 = OpUConvert %uchar %89
+         %90 = OpBitcast %ushort %47
+         %57 = OpUConvert %uchar %90
+         %91 = OpUndef %v4uchar
+         %92 = OpCompositeInsert %v4uchar %54 %91 0
+         %93 = OpCompositeInsert %v4uchar %55 %92 1
+         %94 = OpCompositeInsert %v4uchar %56 %93 2
+         %95 = OpCompositeInsert %v4uchar %57 %94 3
+         %16 = OpCopyObject %v4uchar %95
+         %48 = OpLoad %ulong %5
+         %58 = OpConvertUToPtr %_ptr_CrossWorkgroup_v4uchar %48
+               OpStore %58 %16
               OpReturn
               OpFunctionEnd
--- a/ptx/src/translate.rs
+++ b/ptx/src/translate.rs