Implement vector-destructuring mov/ld/st

2025-04-20 00:14:45 +00:00 · 2020-09-27 23:51:34 +02:00 · 2020-09-27 23:51:34 +02:00 · 1e0b35be4b
commit 1e0b35be4b
parent 7c26568cbf
4 changed files with 1064 additions and 495 deletions
--- a/ptx/src/ast.rs
+++ b/ptx/src/ast.rs
@ -35,6 +35,19 @@ macro_rules! sub_scalar_type {
                }
            }
        }
+
+        impl std::convert::TryFrom<ScalarType> for $name {
+            type Error = ();
+
+            fn try_from(t: ScalarType) -> Result<Self, Self::Error> {
+                match t {
+                    $(
+                        ScalarType::$variant => Ok($name::$variant),
+                    )+
+                        _ => Err(()),
+                }
+            }
+        }
    };
 }

@ -159,20 +172,20 @@ pub struct Module<'a> {
    pub functions: Vec<ParsedFunction<'a>>,
 }

-pub enum MethodDecl<'a, P: ArgParams> {
-    Func(Vec<FnArgument<P>>, P::ID, Vec<FnArgument<P>>),
-    Kernel(&'a str, Vec<KernelArgument<P>>),
+pub enum MethodDecl<'a, ID> {
+    Func(Vec<FnArgument<ID>>, ID, Vec<FnArgument<ID>>),
+    Kernel(&'a str, Vec<KernelArgument<ID>>),
 }

-pub type FnArgument<P> = Variable<FnArgumentType, P>;
-pub type KernelArgument<P> = Variable<VariableParamType, P>;
+pub type FnArgument<ID> = Variable<FnArgumentType, ID>;
+pub type KernelArgument<ID> = Variable<VariableParamType, ID>;

-pub struct Function<'a, P: ArgParams, S> {
-    pub func_directive: MethodDecl<'a, P>,
+pub struct Function<'a, ID, S> {
+    pub func_directive: MethodDecl<'a, ID>,
    pub body: Option<Vec<S>>,
 }

-pub type ParsedFunction<'a> = Function<'a, ParsedArgParams<'a>, Statement<ParsedArgParams<'a>>>;
+pub type ParsedFunction<'a> = Function<'a, &'a str, Statement<ParsedArgParams<'a>>>;

 #[derive(PartialEq, Eq, Clone, Copy)]
 pub enum FnArgumentType {
@ -264,21 +277,21 @@ impl Default for ScalarType {
 }

 pub enum Statement<P: ArgParams> {
-    Label(P::ID),
-    Variable(MultiVariable<P>),
-    Instruction(Option<PredAt<P::ID>>, Instruction<P>),
+    Label(P::Id),
+    Variable(MultiVariable<P::Id>),
+    Instruction(Option<PredAt<P::Id>>, Instruction<P>),
    Block(Vec<Statement<P>>),
 }

-pub struct MultiVariable<P: ArgParams> {
-    pub var: Variable<VariableType, P>,
+pub struct MultiVariable<ID> {
+    pub var: Variable<VariableType, ID>,
    pub count: Option<u32>,
 }

-pub struct Variable<T, P: ArgParams> {
+pub struct Variable<T, ID> {
    pub align: Option<u32>,
    pub v_type: T,
-    pub name: P::ID,
+    pub name: ID,
 }

 #[derive(Eq, PartialEq, Copy, Clone)]
@ -315,9 +328,8 @@ pub struct PredAt<ID> {
 }

 pub enum Instruction<P: ArgParams> {
-    Ld(LdData, Arg2<P>),
-    Mov(MovDetails, Arg2<P>),
-    MovVector(MovVectorDetails, Arg2Vec<P>),
+    Ld(LdDetails, Arg2Ld<P>),
+    Mov(MovDetails, Arg2Mov<P>),
    Mul(MulDetails, Arg3<P>),
    Add(AddDetails, Arg3<P>),
    Setp(SetpData, Arg4Setp<P>),
@ -337,11 +349,6 @@ pub enum Instruction<P: ArgParams> {
 #[derive(Copy, Clone)]
 pub struct MadFloatDesc {}

-#[derive(Copy, Clone)]
-pub struct MovVectorDetails {
-    pub typ: MovVectorType,
-    pub length: u8,
-}
 #[derive(Copy, Clone)]
 pub struct AbsDetails {
    pub flush_to_zero: bool,
@ -350,16 +357,18 @@ pub struct AbsDetails {

 pub struct CallInst<P: ArgParams> {
    pub uniform: bool,
-    pub ret_params: Vec<P::ID>,
-    pub func: P::ID,
+    pub ret_params: Vec<P::Id>,
+    pub func: P::Id,
    pub param_list: Vec<P::CallOperand>,
 }

 pub trait ArgParams {
-    type ID;
+    type Id;
    type Operand;
+    type IdOrVector;
+    type OperandOrVector;
    type CallOperand;
-    type VecOperand;
+    type SrcMemberOperand;
 }

 pub struct ParsedArgParams<'a> {
@ -367,57 +376,73 @@ pub struct ParsedArgParams<'a> {
 }

 impl<'a> ArgParams for ParsedArgParams<'a> {
-    type ID = &'a str;
+    type Id = &'a str;
    type Operand = Operand<&'a str>;
    type CallOperand = CallOperand<&'a str>;
-    type VecOperand = (&'a str, u8);
+    type IdOrVector = IdOrVector<&'a str>;
+    type OperandOrVector = OperandOrVector<&'a str>;
+    type SrcMemberOperand = (&'a str, u8);
 }

 pub struct Arg1<P: ArgParams> {
-    pub src: P::ID, // it is a jump destination, but in terms of operands it is a source operand
+    pub src: P::Id, // it is a jump destination, but in terms of operands it is a source operand
 }

 pub struct Arg2<P: ArgParams> {
-    pub dst: P::ID,
+    pub dst: P::Id,
+    pub src: P::Operand,
+}
+pub struct Arg2Ld<P: ArgParams> {
+    pub dst: P::IdOrVector,
    pub src: P::Operand,
 }

 pub struct Arg2St<P: ArgParams> {
    pub src1: P::Operand,
-    pub src2: P::Operand,
+    pub src2: P::OperandOrVector,
+}
+
+pub enum Arg2Mov<P: ArgParams> {
+    Normal(Arg2MovNormal<P>),
+    Member(Arg2MovMember<P>),
+}
+
+pub struct Arg2MovNormal<P: ArgParams> {
+    pub dst: P::IdOrVector,
+    pub src: P::OperandOrVector,
 }

 // We duplicate dst here because during further compilation
 // composite dst and composite src will receive different ids
-pub enum Arg2Vec<P: ArgParams> {
-    Dst((P::ID, u8), P::ID, P::ID),
-    Src(P::ID, P::VecOperand),
-    Both((P::ID, u8), P::ID, P::VecOperand),
+pub enum Arg2MovMember<P: ArgParams> {
+    Dst((P::Id, u8), P::Id, P::Id),
+    Src(P::Id, P::SrcMemberOperand),
+    Both((P::Id, u8), P::Id, P::SrcMemberOperand),
 }

 pub struct Arg3<P: ArgParams> {
-    pub dst: P::ID,
+    pub dst: P::Id,
    pub src1: P::Operand,
    pub src2: P::Operand,
 }

 pub struct Arg4<P: ArgParams> {
-    pub dst: P::ID,
+    pub dst: P::Id,
    pub src1: P::Operand,
    pub src2: P::Operand,
    pub src3: P::Operand,
 }

 pub struct Arg4Setp<P: ArgParams> {
-    pub dst1: P::ID,
-    pub dst2: Option<P::ID>,
+    pub dst1: P::Id,
+    pub dst2: Option<P::Id>,
    pub src1: P::Operand,
    pub src2: P::Operand,
 }

 pub struct Arg5<P: ArgParams> {
-    pub dst1: P::ID,
-    pub dst2: Option<P::ID>,
+    pub dst1: P::Id,
+    pub dst2: Option<P::Id>,
    pub src1: P::Operand,
    pub src2: P::Operand,
    pub src3: P::Operand,
@ -436,12 +461,34 @@ pub enum CallOperand<ID> {
    Imm(u32),
 }

+pub enum IdOrVector<ID> {
+    Reg(ID),
+    Vec(Vec<ID>)
+}
+
+pub enum OperandOrVector<ID> {
+    Reg(ID),
+    RegOffset(ID, i32),
+    Imm(u32),
+    Vec(Vec<ID>)
+}
+
+impl<T> From<Operand<T>> for OperandOrVector<T> {
+    fn from(this: Operand<T>) -> Self {
+        match this {
+            Operand::Reg(r) => OperandOrVector::Reg(r),
+            Operand::RegOffset(r, imm) => OperandOrVector::RegOffset(r, imm),
+            Operand::Imm(imm) => OperandOrVector::Imm(imm),
+        }
+    }
+}
+
 pub enum VectorPrefix {
    V2,
    V4,
 }

-pub struct LdData {
+pub struct LdDetails {
    pub qualifier: LdStQualifier,
    pub state_space: LdStateSpace,
    pub caching: LdCacheOperator,
@ -482,45 +529,23 @@ pub enum LdCacheOperator {
    Uncached,
 }

-sub_scalar_type!(MovScalarType {
-    B16,
-    B32,
-    B64,
-    U16,
-    U32,
-    U64,
-    S16,
-    S32,
-    S64,
-    F32,
-    F64,
-    Pred,
-});
-
-// pred vectors are illegal
-sub_scalar_type!(MovVectorType {
-    B16,
-    B32,
-    B64,
-    U16,
-    U32,
-    U64,
-    S16,
-    S32,
-    S64,
-    F32,
-    F64,
-});
-
+#[derive(Copy, Clone)]
 pub struct MovDetails {
-    pub typ: MovType,
+    pub typ: Type,
    pub src_is_address: bool,
+    // two fields below are in use by member moves
+    pub dst_width: u8,
+    pub src_width: u8,
 }

-sub_type! {
-    MovType {
-        Scalar(MovScalarType),
-        Vector(MovVectorType, u8),
+impl MovDetails {
+    pub fn new(typ: Type) -> Self {
+        MovDetails {
+            typ,
+            src_is_address: false,
+            dst_width: 0,
+            src_width: 0
+        }
    }
 }

--- a/ptx/src/ptx.lalrpop
+++ b/ptx/src/ptx.lalrpop
@ -194,7 +194,7 @@ TargetSpecifier = {
    "map_f64_to_f32"
 };

-Directive: Option<ast::Function<'input, ast::ParsedArgParams<'input>, ast::Statement<ast::ParsedArgParams<'input>>>> = {
+Directive: Option<ast::Function<'input, &'input str, ast::Statement<ast::ParsedArgParams<'input>>>> = {
    AddressSize => None,
    <f:Function> => Some(f),
    File => None,
@ -205,7 +205,7 @@ AddressSize = {
    ".address_size" Num
 };

-Function: ast::Function<'input, ast::ParsedArgParams<'input>, ast::Statement<ast::ParsedArgParams<'input>>>  = {
+Function: ast::Function<'input, &'input str, ast::Statement<ast::ParsedArgParams<'input>>>  = {
    LinkingDirective*
    <func_directive:MethodDecl>
    <body:FunctionBody> =>  ast::Function{<>}
@ -217,29 +217,29 @@ LinkingDirective = {
    ".weak"
 };

-MethodDecl: ast::MethodDecl<'input, ast::ParsedArgParams<'input>> = {
+MethodDecl: ast::MethodDecl<'input, &'input str> = {
    ".entry" <name:ExtendedID> <params:KernelArguments> => ast::MethodDecl::Kernel(name, params),
    ".func" <ret_vals:FnArguments?> <name:ExtendedID> <params:FnArguments> => {
        ast::MethodDecl::Func(ret_vals.unwrap_or_else(|| Vec::new()), name, params)
    }
 };

-KernelArguments: Vec<ast::KernelArgument<ast::ParsedArgParams<'input>>> = {
+KernelArguments: Vec<ast::KernelArgument<&'input str>> = {
    "(" <args:Comma<KernelInput>> ")" => args
 };

-FnArguments: Vec<ast::FnArgument<ast::ParsedArgParams<'input>>> = {
+FnArguments: Vec<ast::FnArgument<&'input str>> = {
    "(" <args:Comma<FnInput>> ")" => args
 };

-KernelInput: ast::Variable<ast::VariableParamType, ast::ParsedArgParams<'input>> = {
+KernelInput: ast::Variable<ast::VariableParamType, &'input str> = {
    <v:ParamVariable> => {
        let (align, v_type, name) = v;
        ast::Variable{ align, v_type, name }
    }
 }

-FnInput: ast::Variable<ast::FnArgumentType, ast::ParsedArgParams<'input>> = {
+FnInput: ast::Variable<ast::FnArgumentType, &'input str> = {
    <v:RegVariable> => {
        let (align, v_type, name) = v;
        let v_type = ast::FnArgumentType::Reg(v_type);
@ -320,7 +320,7 @@ Align: u32 = {
 };

 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameterized-variable-names
-MultiVariable: ast::MultiVariable<ast::ParsedArgParams<'input>> = {
+MultiVariable: ast::MultiVariable<&'input str> = {
    <var:Variable> <count:VariableParam?> => ast::MultiVariable{<>}
 }

@ -331,7 +331,7 @@ VariableParam: u32 = {
    }
 }

-Variable: ast::Variable<ast::VariableType, ast::ParsedArgParams<'input>> = {
+Variable: ast::Variable<ast::VariableType, &'input str> = {
    <v:RegVariable> => {
        let (align, v_type, name) = v;
        let v_type = ast::VariableType::Reg(v_type);
@ -356,7 +356,7 @@ RegVariable: (Option<u32>, ast::VariableRegType, &'input str) = {
    }
 }

-LocalVariable: ast::Variable<ast::VariableType, ast::ParsedArgParams<'input>> = {
+LocalVariable: ast::Variable<ast::VariableType, &'input str> = {
    ".local" <align:Align?> <t:SizedScalarType> <name:ExtendedID> => {
        let v_type = ast::VariableType::Local(ast::VariableLocalType::Scalar(t));
        ast::Variable {align, v_type, name}
@ -449,19 +449,29 @@ Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = {

 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
 InstLd: ast::Instruction<ast::ParsedArgParams<'input>> = {
-    "ld" <q:LdStQualifier?> <ss:LdStateSpace?> <cop:LdCacheOperator?> <t:LdStType> <dst:ExtendedID> "," <src:MemoryOperand> => {
+    "ld" <q:LdStQualifier?> <ss:LdStateSpace?> <cop:LdCacheOperator?> <t:LdStType> <dst:IdOrVector> "," <src:MemoryOperand> => {
        ast::Instruction::Ld(
-            ast::LdData {
+            ast::LdDetails {
                qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
                state_space: ss.unwrap_or(ast::LdStateSpace::Generic),
                caching: cop.unwrap_or(ast::LdCacheOperator::Cached),
                typ: t
            },
-            ast::Arg2 { dst:dst, src:src }
+            ast::Arg2Ld { dst:dst, src:src }
        )
    }
 };

+IdOrVector: ast::IdOrVector<&'input str> = {
+    <dst:ExtendedID> => ast::IdOrVector::Reg(dst),
+    <dst:VectorExtract> => ast::IdOrVector::Vec(dst)
+}
+
+OperandOrVector: ast::OperandOrVector<&'input str> = {
+    <op:Operand> => ast::OperandOrVector::from(op),
+    <dst:VectorExtract> => ast::OperandOrVector::Vec(dst)
+}
+
 LdStType: ast::Type = {
    <v:VectorPrefix> <t:LdStScalarType> => ast::Type::Vector(t, v),
    <t:LdStScalarType> => ast::Type::Scalar(t),
@ -498,49 +508,58 @@ LdCacheOperator: ast::LdCacheOperator = {

 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov
 InstMov: ast::Instruction<ast::ParsedArgParams<'input>> = {
-    "mov" <t:MovType> <a:Arg2> => {
-        ast::Instruction::Mov(ast::MovDetails{ src_is_address: false, typ: t }, a)
-    },
-    "mov" <t:MovVectorType> <a:Arg2Vec> => {
-        ast::Instruction::MovVector(ast::MovVectorDetails{typ: t, length: 0}, a)
-    }
+    <m:MovNormal> => ast::Instruction::Mov(m.0, m.1),
+    <m:MovVector> => ast::Instruction::Mov(m.0, m.1),
 };

-#[inline]
-MovType: ast::MovType = {
-    <t:MovScalarType> => ast::MovType::Scalar(t),
-    <pref:VectorPrefix> <t:MovVectorType> => ast::MovType::Vector(t, pref)
+
+MovNormal: (ast::MovDetails, ast::Arg2Mov<ast::ParsedArgParams<'input>>) = {
+    "mov" <t:MovScalarType> <dst:ExtendedID> "," <src:Operand> => {(
+        ast::MovDetails::new(ast::Type::Scalar(t)),
+        ast::Arg2Mov::Normal(ast::Arg2MovNormal{ dst: ast::IdOrVector::Reg(dst), src: src.into() })
+    )},
+    "mov" <pref:VectorPrefix> <t:MovVectorType> <dst:IdOrVector> "," <src:OperandOrVector> => {(
+        ast::MovDetails::new(ast::Type::Vector(t, pref)),
+        ast::Arg2Mov::Normal(ast::Arg2MovNormal{ dst: dst, src: src })
+    )}
+}
+
+MovVector: (ast::MovDetails, ast::Arg2Mov<ast::ParsedArgParams<'input>>) = {
+    "mov" <t:MovVectorType> <a:Arg2MovMember> => {(
+        ast::MovDetails::new(ast::Type::Scalar(t.into())),
+        ast::Arg2Mov::Member(a)
+    )},
 }

 #[inline]
-MovScalarType: ast::MovScalarType = {
-    ".b16" => ast::MovScalarType::B16,
-    ".b32" => ast::MovScalarType::B32,
-    ".b64" => ast::MovScalarType::B64,
-    ".u16" => ast::MovScalarType::U16,
-    ".u32" => ast::MovScalarType::U32,
-    ".u64" => ast::MovScalarType::U64,
-    ".s16" => ast::MovScalarType::S16,
-    ".s32" => ast::MovScalarType::S32,
-    ".s64" => ast::MovScalarType::S64,
-    ".f32" => ast::MovScalarType::F32,
-    ".f64" => ast::MovScalarType::F64,
-    ".pred" => ast::MovScalarType::Pred
+MovScalarType: ast::ScalarType = {
+    ".b16" => ast::ScalarType::B16,
+    ".b32" => ast::ScalarType::B32,
+    ".b64" => ast::ScalarType::B64,
+    ".u16" => ast::ScalarType::U16,
+    ".u32" => ast::ScalarType::U32,
+    ".u64" => ast::ScalarType::U64,
+    ".s16" => ast::ScalarType::S16,
+    ".s32" => ast::ScalarType::S32,
+    ".s64" => ast::ScalarType::S64,
+    ".f32" => ast::ScalarType::F32,
+    ".f64" => ast::ScalarType::F64,
+    ".pred" => ast::ScalarType::Pred
 };

 #[inline]
-MovVectorType: ast::MovVectorType = {
-    ".b16" => ast::MovVectorType::B16,
-    ".b32" => ast::MovVectorType::B32,
-    ".b64" => ast::MovVectorType::B64,
-    ".u16" => ast::MovVectorType::U16,
-    ".u32" => ast::MovVectorType::U32,
-    ".u64" => ast::MovVectorType::U64,
-    ".s16" => ast::MovVectorType::S16,
-    ".s32" => ast::MovVectorType::S32,
-    ".s64" => ast::MovVectorType::S64,
-    ".f32" => ast::MovVectorType::F32,
-    ".f64" => ast::MovVectorType::F64,
+MovVectorType: ast::ScalarType = {
+    ".b16" => ast::ScalarType::B16,
+    ".b32" => ast::ScalarType::B32,
+    ".b64" => ast::ScalarType::B64,
+    ".u16" => ast::ScalarType::U16,
+    ".u32" => ast::ScalarType::U32,
+    ".u64" => ast::ScalarType::U64,
+    ".s16" => ast::ScalarType::S16,
+    ".s32" => ast::ScalarType::S32,
+    ".s64" => ast::ScalarType::S64,
+    ".f32" => ast::ScalarType::F32,
+    ".f64" => ast::ScalarType::F64,
 };

 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mul
@ -902,7 +921,7 @@ ShlType: ast::ShlType = {
 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-st
 // Warning: NVIDIA documentation is incorrect, you can specify scope only once
 InstSt: ast::Instruction<ast::ParsedArgParams<'input>> = {
-    "st" <q:LdStQualifier?> <ss:StStateSpace?> <cop:StCacheOperator?> <t:LdStType> <src1:MemoryOperand> "," <src2:Operand> => {
+    "st" <q:LdStQualifier?> <ss:StStateSpace?> <cop:StCacheOperator?> <t:LdStType> <src1:MemoryOperand> "," <src2:OperandOrVector> => {
        ast::Instruction::St(
            ast::StData {
                qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
@ -1044,13 +1063,13 @@ Arg2: ast::Arg2<ast::ParsedArgParams<'input>> = {
    <dst:ExtendedID> "," <src:Operand> => ast::Arg2{<>}
 };

-Arg2Vec: ast::Arg2Vec<ast::ParsedArgParams<'input>> = {
-    <dst:VectorOperand> "," <src:ExtendedID> => ast::Arg2Vec::Dst(dst, dst.0, src),
-    <dst:ExtendedID> "," <src:VectorOperand> => ast::Arg2Vec::Src(dst, src),
-    <dst:VectorOperand> "," <src:VectorOperand> => ast::Arg2Vec::Both(dst, dst.0, src),
+Arg2MovMember: ast::Arg2MovMember<ast::ParsedArgParams<'input>> = {
+    <dst:MemberOperand> "," <src:ExtendedID> => ast::Arg2MovMember::Dst(dst, dst.0, src),
+    <dst:ExtendedID> "," <src:MemberOperand> => ast::Arg2MovMember::Src(dst, src),
+    <dst:MemberOperand> "," <src:MemberOperand> => ast::Arg2MovMember::Both(dst, dst.0, src),
 };

-VectorOperand: (&'input str, u8) = {
+MemberOperand: (&'input str, u8) = {
    <pref:ExtendedID> "." <suf:ExtendedID> =>? {
        let suf_idx = vector_index(suf)?;
        Ok((pref, suf_idx))
@ -1061,6 +1080,15 @@ VectorOperand: (&'input str, u8) = {
    }
 };

+VectorExtract: Vec<&'input str> = {
+    "{" <r1:ExtendedID> "," <r2:ExtendedID> "}" => {
+        vec![r1, r2]
+    },
+    "{" <r1:ExtendedID> "," <r2:ExtendedID> "," <r3:ExtendedID> "," <r4:ExtendedID> "}" => {
+        vec![r1, r2, r3, r4]
+    },
+};
+
 Arg3: ast::Arg3<ast::ParsedArgParams<'input>> = {
    <dst:ExtendedID> "," <src1:Operand> "," <src2:Operand> => ast::Arg3{<>}
 };
--- a/ptx/src/test/spirv_run/ntid.spvtxt
+++ b/ptx/src/test/spirv_run/ntid.spvtxt
@ -4,15 +4,16 @@
               OpCapability Kernel
               OpCapability Int64
               OpCapability Int8
+               OpCapability Float64
         %29 = OpExtInstImport "OpenCL.std"
               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "add" %GlobalSize
-               OpDecorate %GlobalSize BuiltIn GlobalSize
+               OpEntryPoint Kernel %1 "ntid" %gl_WorkGroupSize
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
       %void = OpTypeVoid
       %uint = OpTypeInt 32 0
-     %v3uint = OpTypeVector %uint 3
-%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
- %GlobalSize = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
+     %v4uint = OpTypeVector %uint 4
+%_ptr_UniformConstant_v4uint = OpTypePointer UniformConstant %v4uint
+%gl_WorkGroupSize = OpVariable %_ptr_UniformConstant_v4uint UniformConstant
      %ulong = OpTypeInt 64 0
         %35 = OpTypeFunction %void %ulong %ulong
 %_ptr_Function_ulong = OpTypePointer Function %ulong
@ -40,7 +41,7 @@
         %25 = OpConvertUToPtr %_ptr_Generic_uint %16
         %15 = OpLoad %uint %25
               OpStore %6 %15
-         %18 = OpLoad %v3uint %GlobalSize
+         %18 = OpLoad %v4uint %gl_WorkGroupSize
         %24 = OpCompositeExtract %uint %18 0
         %17 = OpCopyObject %uint %24
               OpStore %7 %17
--- a/ptx/src/translate.rs
+++ b/ptx/src/translate.rs