mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-20 00:14:45 +00:00
Implement vector-destructuring mov/ld/st
This commit is contained in:
parent
7c26568cbf
commit
1e0b35be4b
4 changed files with 1064 additions and 495 deletions
181
ptx/src/ast.rs
181
ptx/src/ast.rs
|
@ -35,6 +35,19 @@ macro_rules! sub_scalar_type {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::convert::TryFrom<ScalarType> for $name {
|
||||
type Error = ();
|
||||
|
||||
fn try_from(t: ScalarType) -> Result<Self, Self::Error> {
|
||||
match t {
|
||||
$(
|
||||
ScalarType::$variant => Ok($name::$variant),
|
||||
)+
|
||||
_ => Err(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -159,20 +172,20 @@ pub struct Module<'a> {
|
|||
pub functions: Vec<ParsedFunction<'a>>,
|
||||
}
|
||||
|
||||
pub enum MethodDecl<'a, P: ArgParams> {
|
||||
Func(Vec<FnArgument<P>>, P::ID, Vec<FnArgument<P>>),
|
||||
Kernel(&'a str, Vec<KernelArgument<P>>),
|
||||
pub enum MethodDecl<'a, ID> {
|
||||
Func(Vec<FnArgument<ID>>, ID, Vec<FnArgument<ID>>),
|
||||
Kernel(&'a str, Vec<KernelArgument<ID>>),
|
||||
}
|
||||
|
||||
pub type FnArgument<P> = Variable<FnArgumentType, P>;
|
||||
pub type KernelArgument<P> = Variable<VariableParamType, P>;
|
||||
pub type FnArgument<ID> = Variable<FnArgumentType, ID>;
|
||||
pub type KernelArgument<ID> = Variable<VariableParamType, ID>;
|
||||
|
||||
pub struct Function<'a, P: ArgParams, S> {
|
||||
pub func_directive: MethodDecl<'a, P>,
|
||||
pub struct Function<'a, ID, S> {
|
||||
pub func_directive: MethodDecl<'a, ID>,
|
||||
pub body: Option<Vec<S>>,
|
||||
}
|
||||
|
||||
pub type ParsedFunction<'a> = Function<'a, ParsedArgParams<'a>, Statement<ParsedArgParams<'a>>>;
|
||||
pub type ParsedFunction<'a> = Function<'a, &'a str, Statement<ParsedArgParams<'a>>>;
|
||||
|
||||
#[derive(PartialEq, Eq, Clone, Copy)]
|
||||
pub enum FnArgumentType {
|
||||
|
@ -264,21 +277,21 @@ impl Default for ScalarType {
|
|||
}
|
||||
|
||||
pub enum Statement<P: ArgParams> {
|
||||
Label(P::ID),
|
||||
Variable(MultiVariable<P>),
|
||||
Instruction(Option<PredAt<P::ID>>, Instruction<P>),
|
||||
Label(P::Id),
|
||||
Variable(MultiVariable<P::Id>),
|
||||
Instruction(Option<PredAt<P::Id>>, Instruction<P>),
|
||||
Block(Vec<Statement<P>>),
|
||||
}
|
||||
|
||||
pub struct MultiVariable<P: ArgParams> {
|
||||
pub var: Variable<VariableType, P>,
|
||||
pub struct MultiVariable<ID> {
|
||||
pub var: Variable<VariableType, ID>,
|
||||
pub count: Option<u32>,
|
||||
}
|
||||
|
||||
pub struct Variable<T, P: ArgParams> {
|
||||
pub struct Variable<T, ID> {
|
||||
pub align: Option<u32>,
|
||||
pub v_type: T,
|
||||
pub name: P::ID,
|
||||
pub name: ID,
|
||||
}
|
||||
|
||||
#[derive(Eq, PartialEq, Copy, Clone)]
|
||||
|
@ -315,9 +328,8 @@ pub struct PredAt<ID> {
|
|||
}
|
||||
|
||||
pub enum Instruction<P: ArgParams> {
|
||||
Ld(LdData, Arg2<P>),
|
||||
Mov(MovDetails, Arg2<P>),
|
||||
MovVector(MovVectorDetails, Arg2Vec<P>),
|
||||
Ld(LdDetails, Arg2Ld<P>),
|
||||
Mov(MovDetails, Arg2Mov<P>),
|
||||
Mul(MulDetails, Arg3<P>),
|
||||
Add(AddDetails, Arg3<P>),
|
||||
Setp(SetpData, Arg4Setp<P>),
|
||||
|
@ -337,11 +349,6 @@ pub enum Instruction<P: ArgParams> {
|
|||
#[derive(Copy, Clone)]
|
||||
pub struct MadFloatDesc {}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct MovVectorDetails {
|
||||
pub typ: MovVectorType,
|
||||
pub length: u8,
|
||||
}
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct AbsDetails {
|
||||
pub flush_to_zero: bool,
|
||||
|
@ -350,16 +357,18 @@ pub struct AbsDetails {
|
|||
|
||||
pub struct CallInst<P: ArgParams> {
|
||||
pub uniform: bool,
|
||||
pub ret_params: Vec<P::ID>,
|
||||
pub func: P::ID,
|
||||
pub ret_params: Vec<P::Id>,
|
||||
pub func: P::Id,
|
||||
pub param_list: Vec<P::CallOperand>,
|
||||
}
|
||||
|
||||
pub trait ArgParams {
|
||||
type ID;
|
||||
type Id;
|
||||
type Operand;
|
||||
type IdOrVector;
|
||||
type OperandOrVector;
|
||||
type CallOperand;
|
||||
type VecOperand;
|
||||
type SrcMemberOperand;
|
||||
}
|
||||
|
||||
pub struct ParsedArgParams<'a> {
|
||||
|
@ -367,57 +376,73 @@ pub struct ParsedArgParams<'a> {
|
|||
}
|
||||
|
||||
impl<'a> ArgParams for ParsedArgParams<'a> {
|
||||
type ID = &'a str;
|
||||
type Id = &'a str;
|
||||
type Operand = Operand<&'a str>;
|
||||
type CallOperand = CallOperand<&'a str>;
|
||||
type VecOperand = (&'a str, u8);
|
||||
type IdOrVector = IdOrVector<&'a str>;
|
||||
type OperandOrVector = OperandOrVector<&'a str>;
|
||||
type SrcMemberOperand = (&'a str, u8);
|
||||
}
|
||||
|
||||
pub struct Arg1<P: ArgParams> {
|
||||
pub src: P::ID, // it is a jump destination, but in terms of operands it is a source operand
|
||||
pub src: P::Id, // it is a jump destination, but in terms of operands it is a source operand
|
||||
}
|
||||
|
||||
pub struct Arg2<P: ArgParams> {
|
||||
pub dst: P::ID,
|
||||
pub dst: P::Id,
|
||||
pub src: P::Operand,
|
||||
}
|
||||
pub struct Arg2Ld<P: ArgParams> {
|
||||
pub dst: P::IdOrVector,
|
||||
pub src: P::Operand,
|
||||
}
|
||||
|
||||
pub struct Arg2St<P: ArgParams> {
|
||||
pub src1: P::Operand,
|
||||
pub src2: P::Operand,
|
||||
pub src2: P::OperandOrVector,
|
||||
}
|
||||
|
||||
pub enum Arg2Mov<P: ArgParams> {
|
||||
Normal(Arg2MovNormal<P>),
|
||||
Member(Arg2MovMember<P>),
|
||||
}
|
||||
|
||||
pub struct Arg2MovNormal<P: ArgParams> {
|
||||
pub dst: P::IdOrVector,
|
||||
pub src: P::OperandOrVector,
|
||||
}
|
||||
|
||||
// We duplicate dst here because during further compilation
|
||||
// composite dst and composite src will receive different ids
|
||||
pub enum Arg2Vec<P: ArgParams> {
|
||||
Dst((P::ID, u8), P::ID, P::ID),
|
||||
Src(P::ID, P::VecOperand),
|
||||
Both((P::ID, u8), P::ID, P::VecOperand),
|
||||
pub enum Arg2MovMember<P: ArgParams> {
|
||||
Dst((P::Id, u8), P::Id, P::Id),
|
||||
Src(P::Id, P::SrcMemberOperand),
|
||||
Both((P::Id, u8), P::Id, P::SrcMemberOperand),
|
||||
}
|
||||
|
||||
pub struct Arg3<P: ArgParams> {
|
||||
pub dst: P::ID,
|
||||
pub dst: P::Id,
|
||||
pub src1: P::Operand,
|
||||
pub src2: P::Operand,
|
||||
}
|
||||
|
||||
pub struct Arg4<P: ArgParams> {
|
||||
pub dst: P::ID,
|
||||
pub dst: P::Id,
|
||||
pub src1: P::Operand,
|
||||
pub src2: P::Operand,
|
||||
pub src3: P::Operand,
|
||||
}
|
||||
|
||||
pub struct Arg4Setp<P: ArgParams> {
|
||||
pub dst1: P::ID,
|
||||
pub dst2: Option<P::ID>,
|
||||
pub dst1: P::Id,
|
||||
pub dst2: Option<P::Id>,
|
||||
pub src1: P::Operand,
|
||||
pub src2: P::Operand,
|
||||
}
|
||||
|
||||
pub struct Arg5<P: ArgParams> {
|
||||
pub dst1: P::ID,
|
||||
pub dst2: Option<P::ID>,
|
||||
pub dst1: P::Id,
|
||||
pub dst2: Option<P::Id>,
|
||||
pub src1: P::Operand,
|
||||
pub src2: P::Operand,
|
||||
pub src3: P::Operand,
|
||||
|
@ -436,12 +461,34 @@ pub enum CallOperand<ID> {
|
|||
Imm(u32),
|
||||
}
|
||||
|
||||
pub enum IdOrVector<ID> {
|
||||
Reg(ID),
|
||||
Vec(Vec<ID>)
|
||||
}
|
||||
|
||||
pub enum OperandOrVector<ID> {
|
||||
Reg(ID),
|
||||
RegOffset(ID, i32),
|
||||
Imm(u32),
|
||||
Vec(Vec<ID>)
|
||||
}
|
||||
|
||||
impl<T> From<Operand<T>> for OperandOrVector<T> {
|
||||
fn from(this: Operand<T>) -> Self {
|
||||
match this {
|
||||
Operand::Reg(r) => OperandOrVector::Reg(r),
|
||||
Operand::RegOffset(r, imm) => OperandOrVector::RegOffset(r, imm),
|
||||
Operand::Imm(imm) => OperandOrVector::Imm(imm),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub enum VectorPrefix {
|
||||
V2,
|
||||
V4,
|
||||
}
|
||||
|
||||
pub struct LdData {
|
||||
pub struct LdDetails {
|
||||
pub qualifier: LdStQualifier,
|
||||
pub state_space: LdStateSpace,
|
||||
pub caching: LdCacheOperator,
|
||||
|
@ -482,45 +529,23 @@ pub enum LdCacheOperator {
|
|||
Uncached,
|
||||
}
|
||||
|
||||
sub_scalar_type!(MovScalarType {
|
||||
B16,
|
||||
B32,
|
||||
B64,
|
||||
U16,
|
||||
U32,
|
||||
U64,
|
||||
S16,
|
||||
S32,
|
||||
S64,
|
||||
F32,
|
||||
F64,
|
||||
Pred,
|
||||
});
|
||||
|
||||
// pred vectors are illegal
|
||||
sub_scalar_type!(MovVectorType {
|
||||
B16,
|
||||
B32,
|
||||
B64,
|
||||
U16,
|
||||
U32,
|
||||
U64,
|
||||
S16,
|
||||
S32,
|
||||
S64,
|
||||
F32,
|
||||
F64,
|
||||
});
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct MovDetails {
|
||||
pub typ: MovType,
|
||||
pub typ: Type,
|
||||
pub src_is_address: bool,
|
||||
// two fields below are in use by member moves
|
||||
pub dst_width: u8,
|
||||
pub src_width: u8,
|
||||
}
|
||||
|
||||
sub_type! {
|
||||
MovType {
|
||||
Scalar(MovScalarType),
|
||||
Vector(MovVectorType, u8),
|
||||
impl MovDetails {
|
||||
pub fn new(typ: Type) -> Self {
|
||||
MovDetails {
|
||||
typ,
|
||||
src_is_address: false,
|
||||
dst_width: 0,
|
||||
src_width: 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -194,7 +194,7 @@ TargetSpecifier = {
|
|||
"map_f64_to_f32"
|
||||
};
|
||||
|
||||
Directive: Option<ast::Function<'input, ast::ParsedArgParams<'input>, ast::Statement<ast::ParsedArgParams<'input>>>> = {
|
||||
Directive: Option<ast::Function<'input, &'input str, ast::Statement<ast::ParsedArgParams<'input>>>> = {
|
||||
AddressSize => None,
|
||||
<f:Function> => Some(f),
|
||||
File => None,
|
||||
|
@ -205,7 +205,7 @@ AddressSize = {
|
|||
".address_size" Num
|
||||
};
|
||||
|
||||
Function: ast::Function<'input, ast::ParsedArgParams<'input>, ast::Statement<ast::ParsedArgParams<'input>>> = {
|
||||
Function: ast::Function<'input, &'input str, ast::Statement<ast::ParsedArgParams<'input>>> = {
|
||||
LinkingDirective*
|
||||
<func_directive:MethodDecl>
|
||||
<body:FunctionBody> => ast::Function{<>}
|
||||
|
@ -217,29 +217,29 @@ LinkingDirective = {
|
|||
".weak"
|
||||
};
|
||||
|
||||
MethodDecl: ast::MethodDecl<'input, ast::ParsedArgParams<'input>> = {
|
||||
MethodDecl: ast::MethodDecl<'input, &'input str> = {
|
||||
".entry" <name:ExtendedID> <params:KernelArguments> => ast::MethodDecl::Kernel(name, params),
|
||||
".func" <ret_vals:FnArguments?> <name:ExtendedID> <params:FnArguments> => {
|
||||
ast::MethodDecl::Func(ret_vals.unwrap_or_else(|| Vec::new()), name, params)
|
||||
}
|
||||
};
|
||||
|
||||
KernelArguments: Vec<ast::KernelArgument<ast::ParsedArgParams<'input>>> = {
|
||||
KernelArguments: Vec<ast::KernelArgument<&'input str>> = {
|
||||
"(" <args:Comma<KernelInput>> ")" => args
|
||||
};
|
||||
|
||||
FnArguments: Vec<ast::FnArgument<ast::ParsedArgParams<'input>>> = {
|
||||
FnArguments: Vec<ast::FnArgument<&'input str>> = {
|
||||
"(" <args:Comma<FnInput>> ")" => args
|
||||
};
|
||||
|
||||
KernelInput: ast::Variable<ast::VariableParamType, ast::ParsedArgParams<'input>> = {
|
||||
KernelInput: ast::Variable<ast::VariableParamType, &'input str> = {
|
||||
<v:ParamVariable> => {
|
||||
let (align, v_type, name) = v;
|
||||
ast::Variable{ align, v_type, name }
|
||||
}
|
||||
}
|
||||
|
||||
FnInput: ast::Variable<ast::FnArgumentType, ast::ParsedArgParams<'input>> = {
|
||||
FnInput: ast::Variable<ast::FnArgumentType, &'input str> = {
|
||||
<v:RegVariable> => {
|
||||
let (align, v_type, name) = v;
|
||||
let v_type = ast::FnArgumentType::Reg(v_type);
|
||||
|
@ -320,7 +320,7 @@ Align: u32 = {
|
|||
};
|
||||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameterized-variable-names
|
||||
MultiVariable: ast::MultiVariable<ast::ParsedArgParams<'input>> = {
|
||||
MultiVariable: ast::MultiVariable<&'input str> = {
|
||||
<var:Variable> <count:VariableParam?> => ast::MultiVariable{<>}
|
||||
}
|
||||
|
||||
|
@ -331,7 +331,7 @@ VariableParam: u32 = {
|
|||
}
|
||||
}
|
||||
|
||||
Variable: ast::Variable<ast::VariableType, ast::ParsedArgParams<'input>> = {
|
||||
Variable: ast::Variable<ast::VariableType, &'input str> = {
|
||||
<v:RegVariable> => {
|
||||
let (align, v_type, name) = v;
|
||||
let v_type = ast::VariableType::Reg(v_type);
|
||||
|
@ -356,7 +356,7 @@ RegVariable: (Option<u32>, ast::VariableRegType, &'input str) = {
|
|||
}
|
||||
}
|
||||
|
||||
LocalVariable: ast::Variable<ast::VariableType, ast::ParsedArgParams<'input>> = {
|
||||
LocalVariable: ast::Variable<ast::VariableType, &'input str> = {
|
||||
".local" <align:Align?> <t:SizedScalarType> <name:ExtendedID> => {
|
||||
let v_type = ast::VariableType::Local(ast::VariableLocalType::Scalar(t));
|
||||
ast::Variable {align, v_type, name}
|
||||
|
@ -449,19 +449,29 @@ Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
|
||||
InstLd: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
||||
"ld" <q:LdStQualifier?> <ss:LdStateSpace?> <cop:LdCacheOperator?> <t:LdStType> <dst:ExtendedID> "," <src:MemoryOperand> => {
|
||||
"ld" <q:LdStQualifier?> <ss:LdStateSpace?> <cop:LdCacheOperator?> <t:LdStType> <dst:IdOrVector> "," <src:MemoryOperand> => {
|
||||
ast::Instruction::Ld(
|
||||
ast::LdData {
|
||||
ast::LdDetails {
|
||||
qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
|
||||
state_space: ss.unwrap_or(ast::LdStateSpace::Generic),
|
||||
caching: cop.unwrap_or(ast::LdCacheOperator::Cached),
|
||||
typ: t
|
||||
},
|
||||
ast::Arg2 { dst:dst, src:src }
|
||||
ast::Arg2Ld { dst:dst, src:src }
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
IdOrVector: ast::IdOrVector<&'input str> = {
|
||||
<dst:ExtendedID> => ast::IdOrVector::Reg(dst),
|
||||
<dst:VectorExtract> => ast::IdOrVector::Vec(dst)
|
||||
}
|
||||
|
||||
OperandOrVector: ast::OperandOrVector<&'input str> = {
|
||||
<op:Operand> => ast::OperandOrVector::from(op),
|
||||
<dst:VectorExtract> => ast::OperandOrVector::Vec(dst)
|
||||
}
|
||||
|
||||
LdStType: ast::Type = {
|
||||
<v:VectorPrefix> <t:LdStScalarType> => ast::Type::Vector(t, v),
|
||||
<t:LdStScalarType> => ast::Type::Scalar(t),
|
||||
|
@ -498,49 +508,58 @@ LdCacheOperator: ast::LdCacheOperator = {
|
|||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov
|
||||
InstMov: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
||||
"mov" <t:MovType> <a:Arg2> => {
|
||||
ast::Instruction::Mov(ast::MovDetails{ src_is_address: false, typ: t }, a)
|
||||
},
|
||||
"mov" <t:MovVectorType> <a:Arg2Vec> => {
|
||||
ast::Instruction::MovVector(ast::MovVectorDetails{typ: t, length: 0}, a)
|
||||
}
|
||||
<m:MovNormal> => ast::Instruction::Mov(m.0, m.1),
|
||||
<m:MovVector> => ast::Instruction::Mov(m.0, m.1),
|
||||
};
|
||||
|
||||
#[inline]
|
||||
MovType: ast::MovType = {
|
||||
<t:MovScalarType> => ast::MovType::Scalar(t),
|
||||
<pref:VectorPrefix> <t:MovVectorType> => ast::MovType::Vector(t, pref)
|
||||
|
||||
MovNormal: (ast::MovDetails, ast::Arg2Mov<ast::ParsedArgParams<'input>>) = {
|
||||
"mov" <t:MovScalarType> <dst:ExtendedID> "," <src:Operand> => {(
|
||||
ast::MovDetails::new(ast::Type::Scalar(t)),
|
||||
ast::Arg2Mov::Normal(ast::Arg2MovNormal{ dst: ast::IdOrVector::Reg(dst), src: src.into() })
|
||||
)},
|
||||
"mov" <pref:VectorPrefix> <t:MovVectorType> <dst:IdOrVector> "," <src:OperandOrVector> => {(
|
||||
ast::MovDetails::new(ast::Type::Vector(t, pref)),
|
||||
ast::Arg2Mov::Normal(ast::Arg2MovNormal{ dst: dst, src: src })
|
||||
)}
|
||||
}
|
||||
|
||||
MovVector: (ast::MovDetails, ast::Arg2Mov<ast::ParsedArgParams<'input>>) = {
|
||||
"mov" <t:MovVectorType> <a:Arg2MovMember> => {(
|
||||
ast::MovDetails::new(ast::Type::Scalar(t.into())),
|
||||
ast::Arg2Mov::Member(a)
|
||||
)},
|
||||
}
|
||||
|
||||
#[inline]
|
||||
MovScalarType: ast::MovScalarType = {
|
||||
".b16" => ast::MovScalarType::B16,
|
||||
".b32" => ast::MovScalarType::B32,
|
||||
".b64" => ast::MovScalarType::B64,
|
||||
".u16" => ast::MovScalarType::U16,
|
||||
".u32" => ast::MovScalarType::U32,
|
||||
".u64" => ast::MovScalarType::U64,
|
||||
".s16" => ast::MovScalarType::S16,
|
||||
".s32" => ast::MovScalarType::S32,
|
||||
".s64" => ast::MovScalarType::S64,
|
||||
".f32" => ast::MovScalarType::F32,
|
||||
".f64" => ast::MovScalarType::F64,
|
||||
".pred" => ast::MovScalarType::Pred
|
||||
MovScalarType: ast::ScalarType = {
|
||||
".b16" => ast::ScalarType::B16,
|
||||
".b32" => ast::ScalarType::B32,
|
||||
".b64" => ast::ScalarType::B64,
|
||||
".u16" => ast::ScalarType::U16,
|
||||
".u32" => ast::ScalarType::U32,
|
||||
".u64" => ast::ScalarType::U64,
|
||||
".s16" => ast::ScalarType::S16,
|
||||
".s32" => ast::ScalarType::S32,
|
||||
".s64" => ast::ScalarType::S64,
|
||||
".f32" => ast::ScalarType::F32,
|
||||
".f64" => ast::ScalarType::F64,
|
||||
".pred" => ast::ScalarType::Pred
|
||||
};
|
||||
|
||||
#[inline]
|
||||
MovVectorType: ast::MovVectorType = {
|
||||
".b16" => ast::MovVectorType::B16,
|
||||
".b32" => ast::MovVectorType::B32,
|
||||
".b64" => ast::MovVectorType::B64,
|
||||
".u16" => ast::MovVectorType::U16,
|
||||
".u32" => ast::MovVectorType::U32,
|
||||
".u64" => ast::MovVectorType::U64,
|
||||
".s16" => ast::MovVectorType::S16,
|
||||
".s32" => ast::MovVectorType::S32,
|
||||
".s64" => ast::MovVectorType::S64,
|
||||
".f32" => ast::MovVectorType::F32,
|
||||
".f64" => ast::MovVectorType::F64,
|
||||
MovVectorType: ast::ScalarType = {
|
||||
".b16" => ast::ScalarType::B16,
|
||||
".b32" => ast::ScalarType::B32,
|
||||
".b64" => ast::ScalarType::B64,
|
||||
".u16" => ast::ScalarType::U16,
|
||||
".u32" => ast::ScalarType::U32,
|
||||
".u64" => ast::ScalarType::U64,
|
||||
".s16" => ast::ScalarType::S16,
|
||||
".s32" => ast::ScalarType::S32,
|
||||
".s64" => ast::ScalarType::S64,
|
||||
".f32" => ast::ScalarType::F32,
|
||||
".f64" => ast::ScalarType::F64,
|
||||
};
|
||||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mul
|
||||
|
@ -902,7 +921,7 @@ ShlType: ast::ShlType = {
|
|||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-st
|
||||
// Warning: NVIDIA documentation is incorrect, you can specify scope only once
|
||||
InstSt: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
||||
"st" <q:LdStQualifier?> <ss:StStateSpace?> <cop:StCacheOperator?> <t:LdStType> <src1:MemoryOperand> "," <src2:Operand> => {
|
||||
"st" <q:LdStQualifier?> <ss:StStateSpace?> <cop:StCacheOperator?> <t:LdStType> <src1:MemoryOperand> "," <src2:OperandOrVector> => {
|
||||
ast::Instruction::St(
|
||||
ast::StData {
|
||||
qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
|
||||
|
@ -1044,13 +1063,13 @@ Arg2: ast::Arg2<ast::ParsedArgParams<'input>> = {
|
|||
<dst:ExtendedID> "," <src:Operand> => ast::Arg2{<>}
|
||||
};
|
||||
|
||||
Arg2Vec: ast::Arg2Vec<ast::ParsedArgParams<'input>> = {
|
||||
<dst:VectorOperand> "," <src:ExtendedID> => ast::Arg2Vec::Dst(dst, dst.0, src),
|
||||
<dst:ExtendedID> "," <src:VectorOperand> => ast::Arg2Vec::Src(dst, src),
|
||||
<dst:VectorOperand> "," <src:VectorOperand> => ast::Arg2Vec::Both(dst, dst.0, src),
|
||||
Arg2MovMember: ast::Arg2MovMember<ast::ParsedArgParams<'input>> = {
|
||||
<dst:MemberOperand> "," <src:ExtendedID> => ast::Arg2MovMember::Dst(dst, dst.0, src),
|
||||
<dst:ExtendedID> "," <src:MemberOperand> => ast::Arg2MovMember::Src(dst, src),
|
||||
<dst:MemberOperand> "," <src:MemberOperand> => ast::Arg2MovMember::Both(dst, dst.0, src),
|
||||
};
|
||||
|
||||
VectorOperand: (&'input str, u8) = {
|
||||
MemberOperand: (&'input str, u8) = {
|
||||
<pref:ExtendedID> "." <suf:ExtendedID> =>? {
|
||||
let suf_idx = vector_index(suf)?;
|
||||
Ok((pref, suf_idx))
|
||||
|
@ -1061,6 +1080,15 @@ VectorOperand: (&'input str, u8) = {
|
|||
}
|
||||
};
|
||||
|
||||
VectorExtract: Vec<&'input str> = {
|
||||
"{" <r1:ExtendedID> "," <r2:ExtendedID> "}" => {
|
||||
vec![r1, r2]
|
||||
},
|
||||
"{" <r1:ExtendedID> "," <r2:ExtendedID> "," <r3:ExtendedID> "," <r4:ExtendedID> "}" => {
|
||||
vec![r1, r2, r3, r4]
|
||||
},
|
||||
};
|
||||
|
||||
Arg3: ast::Arg3<ast::ParsedArgParams<'input>> = {
|
||||
<dst:ExtendedID> "," <src1:Operand> "," <src2:Operand> => ast::Arg3{<>}
|
||||
};
|
||||
|
|
|
@ -4,15 +4,16 @@
|
|||
OpCapability Kernel
|
||||
OpCapability Int64
|
||||
OpCapability Int8
|
||||
OpCapability Float64
|
||||
%29 = OpExtInstImport "OpenCL.std"
|
||||
OpMemoryModel Physical64 OpenCL
|
||||
OpEntryPoint Kernel %1 "add" %GlobalSize
|
||||
OpDecorate %GlobalSize BuiltIn GlobalSize
|
||||
OpEntryPoint Kernel %1 "ntid" %gl_WorkGroupSize
|
||||
OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
|
||||
%void = OpTypeVoid
|
||||
%uint = OpTypeInt 32 0
|
||||
%v3uint = OpTypeVector %uint 3
|
||||
%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
|
||||
%GlobalSize = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
|
||||
%v4uint = OpTypeVector %uint 4
|
||||
%_ptr_UniformConstant_v4uint = OpTypePointer UniformConstant %v4uint
|
||||
%gl_WorkGroupSize = OpVariable %_ptr_UniformConstant_v4uint UniformConstant
|
||||
%ulong = OpTypeInt 64 0
|
||||
%35 = OpTypeFunction %void %ulong %ulong
|
||||
%_ptr_Function_ulong = OpTypePointer Function %ulong
|
||||
|
@ -40,7 +41,7 @@
|
|||
%25 = OpConvertUToPtr %_ptr_Generic_uint %16
|
||||
%15 = OpLoad %uint %25
|
||||
OpStore %6 %15
|
||||
%18 = OpLoad %v3uint %GlobalSize
|
||||
%18 = OpLoad %v4uint %gl_WorkGroupSize
|
||||
%24 = OpCompositeExtract %uint %18 0
|
||||
%17 = OpCopyObject %uint %24
|
||||
OpStore %7 %17
|
||||
|
|
1229
ptx/src/translate.rs
1229
ptx/src/translate.rs
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue