Implement vector-destructuring mov/ld/st

This commit is contained in:
Andrzej Janik 2020-09-27 23:51:34 +02:00
parent 7c26568cbf
commit 1e0b35be4b
4 changed files with 1064 additions and 495 deletions

View file

@ -35,6 +35,19 @@ macro_rules! sub_scalar_type {
}
}
}
impl std::convert::TryFrom<ScalarType> for $name {
type Error = ();
fn try_from(t: ScalarType) -> Result<Self, Self::Error> {
match t {
$(
ScalarType::$variant => Ok($name::$variant),
)+
_ => Err(()),
}
}
}
};
}
@ -159,20 +172,20 @@ pub struct Module<'a> {
pub functions: Vec<ParsedFunction<'a>>,
}
pub enum MethodDecl<'a, P: ArgParams> {
Func(Vec<FnArgument<P>>, P::ID, Vec<FnArgument<P>>),
Kernel(&'a str, Vec<KernelArgument<P>>),
pub enum MethodDecl<'a, ID> {
Func(Vec<FnArgument<ID>>, ID, Vec<FnArgument<ID>>),
Kernel(&'a str, Vec<KernelArgument<ID>>),
}
pub type FnArgument<P> = Variable<FnArgumentType, P>;
pub type KernelArgument<P> = Variable<VariableParamType, P>;
pub type FnArgument<ID> = Variable<FnArgumentType, ID>;
pub type KernelArgument<ID> = Variable<VariableParamType, ID>;
pub struct Function<'a, P: ArgParams, S> {
pub func_directive: MethodDecl<'a, P>,
pub struct Function<'a, ID, S> {
pub func_directive: MethodDecl<'a, ID>,
pub body: Option<Vec<S>>,
}
pub type ParsedFunction<'a> = Function<'a, ParsedArgParams<'a>, Statement<ParsedArgParams<'a>>>;
pub type ParsedFunction<'a> = Function<'a, &'a str, Statement<ParsedArgParams<'a>>>;
#[derive(PartialEq, Eq, Clone, Copy)]
pub enum FnArgumentType {
@ -264,21 +277,21 @@ impl Default for ScalarType {
}
pub enum Statement<P: ArgParams> {
Label(P::ID),
Variable(MultiVariable<P>),
Instruction(Option<PredAt<P::ID>>, Instruction<P>),
Label(P::Id),
Variable(MultiVariable<P::Id>),
Instruction(Option<PredAt<P::Id>>, Instruction<P>),
Block(Vec<Statement<P>>),
}
pub struct MultiVariable<P: ArgParams> {
pub var: Variable<VariableType, P>,
pub struct MultiVariable<ID> {
pub var: Variable<VariableType, ID>,
pub count: Option<u32>,
}
pub struct Variable<T, P: ArgParams> {
pub struct Variable<T, ID> {
pub align: Option<u32>,
pub v_type: T,
pub name: P::ID,
pub name: ID,
}
#[derive(Eq, PartialEq, Copy, Clone)]
@ -315,9 +328,8 @@ pub struct PredAt<ID> {
}
pub enum Instruction<P: ArgParams> {
Ld(LdData, Arg2<P>),
Mov(MovDetails, Arg2<P>),
MovVector(MovVectorDetails, Arg2Vec<P>),
Ld(LdDetails, Arg2Ld<P>),
Mov(MovDetails, Arg2Mov<P>),
Mul(MulDetails, Arg3<P>),
Add(AddDetails, Arg3<P>),
Setp(SetpData, Arg4Setp<P>),
@ -337,11 +349,6 @@ pub enum Instruction<P: ArgParams> {
#[derive(Copy, Clone)]
pub struct MadFloatDesc {}
#[derive(Copy, Clone)]
pub struct MovVectorDetails {
pub typ: MovVectorType,
pub length: u8,
}
#[derive(Copy, Clone)]
pub struct AbsDetails {
pub flush_to_zero: bool,
@ -350,16 +357,18 @@ pub struct AbsDetails {
pub struct CallInst<P: ArgParams> {
pub uniform: bool,
pub ret_params: Vec<P::ID>,
pub func: P::ID,
pub ret_params: Vec<P::Id>,
pub func: P::Id,
pub param_list: Vec<P::CallOperand>,
}
pub trait ArgParams {
type ID;
type Id;
type Operand;
type IdOrVector;
type OperandOrVector;
type CallOperand;
type VecOperand;
type SrcMemberOperand;
}
pub struct ParsedArgParams<'a> {
@ -367,57 +376,73 @@ pub struct ParsedArgParams<'a> {
}
impl<'a> ArgParams for ParsedArgParams<'a> {
type ID = &'a str;
type Id = &'a str;
type Operand = Operand<&'a str>;
type CallOperand = CallOperand<&'a str>;
type VecOperand = (&'a str, u8);
type IdOrVector = IdOrVector<&'a str>;
type OperandOrVector = OperandOrVector<&'a str>;
type SrcMemberOperand = (&'a str, u8);
}
pub struct Arg1<P: ArgParams> {
pub src: P::ID, // it is a jump destination, but in terms of operands it is a source operand
pub src: P::Id, // it is a jump destination, but in terms of operands it is a source operand
}
pub struct Arg2<P: ArgParams> {
pub dst: P::ID,
pub dst: P::Id,
pub src: P::Operand,
}
pub struct Arg2Ld<P: ArgParams> {
pub dst: P::IdOrVector,
pub src: P::Operand,
}
pub struct Arg2St<P: ArgParams> {
pub src1: P::Operand,
pub src2: P::Operand,
pub src2: P::OperandOrVector,
}
pub enum Arg2Mov<P: ArgParams> {
Normal(Arg2MovNormal<P>),
Member(Arg2MovMember<P>),
}
pub struct Arg2MovNormal<P: ArgParams> {
pub dst: P::IdOrVector,
pub src: P::OperandOrVector,
}
// We duplicate dst here because during further compilation
// composite dst and composite src will receive different ids
pub enum Arg2Vec<P: ArgParams> {
Dst((P::ID, u8), P::ID, P::ID),
Src(P::ID, P::VecOperand),
Both((P::ID, u8), P::ID, P::VecOperand),
pub enum Arg2MovMember<P: ArgParams> {
Dst((P::Id, u8), P::Id, P::Id),
Src(P::Id, P::SrcMemberOperand),
Both((P::Id, u8), P::Id, P::SrcMemberOperand),
}
pub struct Arg3<P: ArgParams> {
pub dst: P::ID,
pub dst: P::Id,
pub src1: P::Operand,
pub src2: P::Operand,
}
pub struct Arg4<P: ArgParams> {
pub dst: P::ID,
pub dst: P::Id,
pub src1: P::Operand,
pub src2: P::Operand,
pub src3: P::Operand,
}
pub struct Arg4Setp<P: ArgParams> {
pub dst1: P::ID,
pub dst2: Option<P::ID>,
pub dst1: P::Id,
pub dst2: Option<P::Id>,
pub src1: P::Operand,
pub src2: P::Operand,
}
pub struct Arg5<P: ArgParams> {
pub dst1: P::ID,
pub dst2: Option<P::ID>,
pub dst1: P::Id,
pub dst2: Option<P::Id>,
pub src1: P::Operand,
pub src2: P::Operand,
pub src3: P::Operand,
@ -436,12 +461,34 @@ pub enum CallOperand<ID> {
Imm(u32),
}
pub enum IdOrVector<ID> {
Reg(ID),
Vec(Vec<ID>)
}
pub enum OperandOrVector<ID> {
Reg(ID),
RegOffset(ID, i32),
Imm(u32),
Vec(Vec<ID>)
}
impl<T> From<Operand<T>> for OperandOrVector<T> {
fn from(this: Operand<T>) -> Self {
match this {
Operand::Reg(r) => OperandOrVector::Reg(r),
Operand::RegOffset(r, imm) => OperandOrVector::RegOffset(r, imm),
Operand::Imm(imm) => OperandOrVector::Imm(imm),
}
}
}
pub enum VectorPrefix {
V2,
V4,
}
pub struct LdData {
pub struct LdDetails {
pub qualifier: LdStQualifier,
pub state_space: LdStateSpace,
pub caching: LdCacheOperator,
@ -482,45 +529,23 @@ pub enum LdCacheOperator {
Uncached,
}
sub_scalar_type!(MovScalarType {
B16,
B32,
B64,
U16,
U32,
U64,
S16,
S32,
S64,
F32,
F64,
Pred,
});
// pred vectors are illegal
sub_scalar_type!(MovVectorType {
B16,
B32,
B64,
U16,
U32,
U64,
S16,
S32,
S64,
F32,
F64,
});
#[derive(Copy, Clone)]
pub struct MovDetails {
pub typ: MovType,
pub typ: Type,
pub src_is_address: bool,
// two fields below are in use by member moves
pub dst_width: u8,
pub src_width: u8,
}
sub_type! {
MovType {
Scalar(MovScalarType),
Vector(MovVectorType, u8),
impl MovDetails {
pub fn new(typ: Type) -> Self {
MovDetails {
typ,
src_is_address: false,
dst_width: 0,
src_width: 0
}
}
}

View file

@ -194,7 +194,7 @@ TargetSpecifier = {
"map_f64_to_f32"
};
Directive: Option<ast::Function<'input, ast::ParsedArgParams<'input>, ast::Statement<ast::ParsedArgParams<'input>>>> = {
Directive: Option<ast::Function<'input, &'input str, ast::Statement<ast::ParsedArgParams<'input>>>> = {
AddressSize => None,
<f:Function> => Some(f),
File => None,
@ -205,7 +205,7 @@ AddressSize = {
".address_size" Num
};
Function: ast::Function<'input, ast::ParsedArgParams<'input>, ast::Statement<ast::ParsedArgParams<'input>>> = {
Function: ast::Function<'input, &'input str, ast::Statement<ast::ParsedArgParams<'input>>> = {
LinkingDirective*
<func_directive:MethodDecl>
<body:FunctionBody> => ast::Function{<>}
@ -217,29 +217,29 @@ LinkingDirective = {
".weak"
};
MethodDecl: ast::MethodDecl<'input, ast::ParsedArgParams<'input>> = {
MethodDecl: ast::MethodDecl<'input, &'input str> = {
".entry" <name:ExtendedID> <params:KernelArguments> => ast::MethodDecl::Kernel(name, params),
".func" <ret_vals:FnArguments?> <name:ExtendedID> <params:FnArguments> => {
ast::MethodDecl::Func(ret_vals.unwrap_or_else(|| Vec::new()), name, params)
}
};
KernelArguments: Vec<ast::KernelArgument<ast::ParsedArgParams<'input>>> = {
KernelArguments: Vec<ast::KernelArgument<&'input str>> = {
"(" <args:Comma<KernelInput>> ")" => args
};
FnArguments: Vec<ast::FnArgument<ast::ParsedArgParams<'input>>> = {
FnArguments: Vec<ast::FnArgument<&'input str>> = {
"(" <args:Comma<FnInput>> ")" => args
};
KernelInput: ast::Variable<ast::VariableParamType, ast::ParsedArgParams<'input>> = {
KernelInput: ast::Variable<ast::VariableParamType, &'input str> = {
<v:ParamVariable> => {
let (align, v_type, name) = v;
ast::Variable{ align, v_type, name }
}
}
FnInput: ast::Variable<ast::FnArgumentType, ast::ParsedArgParams<'input>> = {
FnInput: ast::Variable<ast::FnArgumentType, &'input str> = {
<v:RegVariable> => {
let (align, v_type, name) = v;
let v_type = ast::FnArgumentType::Reg(v_type);
@ -320,7 +320,7 @@ Align: u32 = {
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameterized-variable-names
MultiVariable: ast::MultiVariable<ast::ParsedArgParams<'input>> = {
MultiVariable: ast::MultiVariable<&'input str> = {
<var:Variable> <count:VariableParam?> => ast::MultiVariable{<>}
}
@ -331,7 +331,7 @@ VariableParam: u32 = {
}
}
Variable: ast::Variable<ast::VariableType, ast::ParsedArgParams<'input>> = {
Variable: ast::Variable<ast::VariableType, &'input str> = {
<v:RegVariable> => {
let (align, v_type, name) = v;
let v_type = ast::VariableType::Reg(v_type);
@ -356,7 +356,7 @@ RegVariable: (Option<u32>, ast::VariableRegType, &'input str) = {
}
}
LocalVariable: ast::Variable<ast::VariableType, ast::ParsedArgParams<'input>> = {
LocalVariable: ast::Variable<ast::VariableType, &'input str> = {
".local" <align:Align?> <t:SizedScalarType> <name:ExtendedID> => {
let v_type = ast::VariableType::Local(ast::VariableLocalType::Scalar(t));
ast::Variable {align, v_type, name}
@ -449,19 +449,29 @@ Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = {
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
InstLd: ast::Instruction<ast::ParsedArgParams<'input>> = {
"ld" <q:LdStQualifier?> <ss:LdStateSpace?> <cop:LdCacheOperator?> <t:LdStType> <dst:ExtendedID> "," <src:MemoryOperand> => {
"ld" <q:LdStQualifier?> <ss:LdStateSpace?> <cop:LdCacheOperator?> <t:LdStType> <dst:IdOrVector> "," <src:MemoryOperand> => {
ast::Instruction::Ld(
ast::LdData {
ast::LdDetails {
qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
state_space: ss.unwrap_or(ast::LdStateSpace::Generic),
caching: cop.unwrap_or(ast::LdCacheOperator::Cached),
typ: t
},
ast::Arg2 { dst:dst, src:src }
ast::Arg2Ld { dst:dst, src:src }
)
}
};
IdOrVector: ast::IdOrVector<&'input str> = {
<dst:ExtendedID> => ast::IdOrVector::Reg(dst),
<dst:VectorExtract> => ast::IdOrVector::Vec(dst)
}
OperandOrVector: ast::OperandOrVector<&'input str> = {
<op:Operand> => ast::OperandOrVector::from(op),
<dst:VectorExtract> => ast::OperandOrVector::Vec(dst)
}
LdStType: ast::Type = {
<v:VectorPrefix> <t:LdStScalarType> => ast::Type::Vector(t, v),
<t:LdStScalarType> => ast::Type::Scalar(t),
@ -498,49 +508,58 @@ LdCacheOperator: ast::LdCacheOperator = {
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov
InstMov: ast::Instruction<ast::ParsedArgParams<'input>> = {
"mov" <t:MovType> <a:Arg2> => {
ast::Instruction::Mov(ast::MovDetails{ src_is_address: false, typ: t }, a)
},
"mov" <t:MovVectorType> <a:Arg2Vec> => {
ast::Instruction::MovVector(ast::MovVectorDetails{typ: t, length: 0}, a)
}
<m:MovNormal> => ast::Instruction::Mov(m.0, m.1),
<m:MovVector> => ast::Instruction::Mov(m.0, m.1),
};
#[inline]
MovType: ast::MovType = {
<t:MovScalarType> => ast::MovType::Scalar(t),
<pref:VectorPrefix> <t:MovVectorType> => ast::MovType::Vector(t, pref)
MovNormal: (ast::MovDetails, ast::Arg2Mov<ast::ParsedArgParams<'input>>) = {
"mov" <t:MovScalarType> <dst:ExtendedID> "," <src:Operand> => {(
ast::MovDetails::new(ast::Type::Scalar(t)),
ast::Arg2Mov::Normal(ast::Arg2MovNormal{ dst: ast::IdOrVector::Reg(dst), src: src.into() })
)},
"mov" <pref:VectorPrefix> <t:MovVectorType> <dst:IdOrVector> "," <src:OperandOrVector> => {(
ast::MovDetails::new(ast::Type::Vector(t, pref)),
ast::Arg2Mov::Normal(ast::Arg2MovNormal{ dst: dst, src: src })
)}
}
MovVector: (ast::MovDetails, ast::Arg2Mov<ast::ParsedArgParams<'input>>) = {
"mov" <t:MovVectorType> <a:Arg2MovMember> => {(
ast::MovDetails::new(ast::Type::Scalar(t.into())),
ast::Arg2Mov::Member(a)
)},
}
#[inline]
MovScalarType: ast::MovScalarType = {
".b16" => ast::MovScalarType::B16,
".b32" => ast::MovScalarType::B32,
".b64" => ast::MovScalarType::B64,
".u16" => ast::MovScalarType::U16,
".u32" => ast::MovScalarType::U32,
".u64" => ast::MovScalarType::U64,
".s16" => ast::MovScalarType::S16,
".s32" => ast::MovScalarType::S32,
".s64" => ast::MovScalarType::S64,
".f32" => ast::MovScalarType::F32,
".f64" => ast::MovScalarType::F64,
".pred" => ast::MovScalarType::Pred
MovScalarType: ast::ScalarType = {
".b16" => ast::ScalarType::B16,
".b32" => ast::ScalarType::B32,
".b64" => ast::ScalarType::B64,
".u16" => ast::ScalarType::U16,
".u32" => ast::ScalarType::U32,
".u64" => ast::ScalarType::U64,
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
".f32" => ast::ScalarType::F32,
".f64" => ast::ScalarType::F64,
".pred" => ast::ScalarType::Pred
};
#[inline]
MovVectorType: ast::MovVectorType = {
".b16" => ast::MovVectorType::B16,
".b32" => ast::MovVectorType::B32,
".b64" => ast::MovVectorType::B64,
".u16" => ast::MovVectorType::U16,
".u32" => ast::MovVectorType::U32,
".u64" => ast::MovVectorType::U64,
".s16" => ast::MovVectorType::S16,
".s32" => ast::MovVectorType::S32,
".s64" => ast::MovVectorType::S64,
".f32" => ast::MovVectorType::F32,
".f64" => ast::MovVectorType::F64,
MovVectorType: ast::ScalarType = {
".b16" => ast::ScalarType::B16,
".b32" => ast::ScalarType::B32,
".b64" => ast::ScalarType::B64,
".u16" => ast::ScalarType::U16,
".u32" => ast::ScalarType::U32,
".u64" => ast::ScalarType::U64,
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
".f32" => ast::ScalarType::F32,
".f64" => ast::ScalarType::F64,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mul
@ -902,7 +921,7 @@ ShlType: ast::ShlType = {
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-st
// Warning: NVIDIA documentation is incorrect, you can specify scope only once
InstSt: ast::Instruction<ast::ParsedArgParams<'input>> = {
"st" <q:LdStQualifier?> <ss:StStateSpace?> <cop:StCacheOperator?> <t:LdStType> <src1:MemoryOperand> "," <src2:Operand> => {
"st" <q:LdStQualifier?> <ss:StStateSpace?> <cop:StCacheOperator?> <t:LdStType> <src1:MemoryOperand> "," <src2:OperandOrVector> => {
ast::Instruction::St(
ast::StData {
qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
@ -1044,13 +1063,13 @@ Arg2: ast::Arg2<ast::ParsedArgParams<'input>> = {
<dst:ExtendedID> "," <src:Operand> => ast::Arg2{<>}
};
Arg2Vec: ast::Arg2Vec<ast::ParsedArgParams<'input>> = {
<dst:VectorOperand> "," <src:ExtendedID> => ast::Arg2Vec::Dst(dst, dst.0, src),
<dst:ExtendedID> "," <src:VectorOperand> => ast::Arg2Vec::Src(dst, src),
<dst:VectorOperand> "," <src:VectorOperand> => ast::Arg2Vec::Both(dst, dst.0, src),
Arg2MovMember: ast::Arg2MovMember<ast::ParsedArgParams<'input>> = {
<dst:MemberOperand> "," <src:ExtendedID> => ast::Arg2MovMember::Dst(dst, dst.0, src),
<dst:ExtendedID> "," <src:MemberOperand> => ast::Arg2MovMember::Src(dst, src),
<dst:MemberOperand> "," <src:MemberOperand> => ast::Arg2MovMember::Both(dst, dst.0, src),
};
VectorOperand: (&'input str, u8) = {
MemberOperand: (&'input str, u8) = {
<pref:ExtendedID> "." <suf:ExtendedID> =>? {
let suf_idx = vector_index(suf)?;
Ok((pref, suf_idx))
@ -1061,6 +1080,15 @@ VectorOperand: (&'input str, u8) = {
}
};
VectorExtract: Vec<&'input str> = {
"{" <r1:ExtendedID> "," <r2:ExtendedID> "}" => {
vec![r1, r2]
},
"{" <r1:ExtendedID> "," <r2:ExtendedID> "," <r3:ExtendedID> "," <r4:ExtendedID> "}" => {
vec![r1, r2, r3, r4]
},
};
Arg3: ast::Arg3<ast::ParsedArgParams<'input>> = {
<dst:ExtendedID> "," <src1:Operand> "," <src2:Operand> => ast::Arg3{<>}
};

View file

@ -4,15 +4,16 @@
OpCapability Kernel
OpCapability Int64
OpCapability Int8
OpCapability Float64
%29 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "add" %GlobalSize
OpDecorate %GlobalSize BuiltIn GlobalSize
OpEntryPoint Kernel %1 "ntid" %gl_WorkGroupSize
OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
%void = OpTypeVoid
%uint = OpTypeInt 32 0
%v3uint = OpTypeVector %uint 3
%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
%GlobalSize = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
%v4uint = OpTypeVector %uint 4
%_ptr_UniformConstant_v4uint = OpTypePointer UniformConstant %v4uint
%gl_WorkGroupSize = OpVariable %_ptr_UniformConstant_v4uint UniformConstant
%ulong = OpTypeInt 64 0
%35 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
@ -40,7 +41,7 @@
%25 = OpConvertUToPtr %_ptr_Generic_uint %16
%15 = OpLoad %uint %25
OpStore %6 %15
%18 = OpLoad %v3uint %GlobalSize
%18 = OpLoad %v4uint %gl_WorkGroupSize
%24 = OpCompositeExtract %uint %18 0
%17 = OpCopyObject %uint %24
OpStore %7 %17

File diff suppressed because it is too large Load diff