diff --git a/ptx/src/ast.rs b/ptx/src/ast.rs index 70550b2..6304aac 100644 --- a/ptx/src/ast.rs +++ b/ptx/src/ast.rs @@ -105,7 +105,7 @@ impl Default for ScalarType { pub enum Statement<'a> { Label(&'a str), Variable(Variable<'a>), - Instruction(Instruction), + Instruction(Option>, Instruction<'a>), } pub struct Variable<'a> { @@ -124,16 +124,95 @@ pub enum StateSpace { Shared, } -pub enum Instruction { - Ld, - Mov, - Mul, - Add, - Setp, - Not, - Bra, - Cvt, - Shl, - At, - Ret, +pub struct PredAt<'a> { + pub not: bool, + pub label: &'a str, } + +pub enum Instruction<'a> { + Ld(LdData, Arg2<'a>), + Mov(MovData, Arg2Mov<'a>), + Mul(MulData, Arg3<'a>), + Add(AddData, Arg3<'a>), + Setp(SetpData, Arg4<'a>), + SetpBool(SetpBoolData, Arg5<'a>), + Not(NotData, Arg2<'a>), + Bra(BraData, Arg1<'a>), + Cvt(CvtData, Arg2<'a>), + Shl(ShlData, Arg3<'a>), + St(StData, Arg2<'a>), + At(AtData, Arg1<'a>), + Ret(RetData), +} + +pub struct Arg1<'a> { + pub dst: &'a str, +} + +pub struct Arg2<'a> { + pub dst: &'a str, + pub src: Operand<'a>, +} + +pub struct Arg2Mov<'a> { + pub dst: &'a str, + pub src: MovOperand<'a>, +} + +pub struct Arg3<'a> { + pub dst: &'a str, + pub src1: Operand<'a>, + pub src2: Operand<'a>, +} + +pub struct Arg4<'a> { + pub dst1: &'a str, + pub dst2: Option<&'a str>, + pub src1: Operand<'a>, + pub src2: Operand<'a>, +} + +pub struct Arg5<'a> { + pub dst1: &'a str, + pub dst2: Option<&'a str>, + pub src1: Operand<'a>, + pub src2: Operand<'a>, + pub src3: Operand<'a>, +} + +pub enum Operand<'a> { + Reg(&'a str), + RegOffset(&'a str, i32), + Imm(i128), +} + +pub enum MovOperand<'a> { + Op(Operand<'a>), + Vec(&'a str, &'a str), +} + +pub struct LdData {} + +pub struct MovData {} + +pub struct MulData {} + +pub struct AddData {} + +pub struct SetpData {} + +pub struct SetpBoolData {} + +pub struct NotData {} + +pub struct BraData {} + +pub struct CvtData {} + +pub struct ShlData {} + +pub struct StData {} + +pub struct AtData {} + +pub struct RetData {} diff --git a/ptx/src/ptx.lalrpop b/ptx/src/ptx.lalrpop index 3ff5d9c..b051b79 100644 --- a/ptx/src/ptx.lalrpop +++ b/ptx/src/ptx.lalrpop @@ -139,7 +139,7 @@ Statement: Option> = { => Some(ast::Statement::Label(l)), DebugDirective => None, ";" => Some(ast::Statement::Variable(v)), - ";" => Some(ast::Statement::Instruction(i)) + ";" => Some(ast::Statement::Instruction(p, i)) }; DebugDirective: () = { @@ -175,7 +175,7 @@ VariableName: (&'input str, Option) = { } }; -Instruction = { +Instruction: ast::Instruction<'input> = { InstLd, InstMov, InstMul, @@ -190,8 +190,10 @@ Instruction = { }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld -InstLd: ast::Instruction = { - "ld" LdQualifier? LdStateSpace? LdCacheOperator? Vector? BaseType ID "," "[" ID "]" => ast::Instruction::Ld +InstLd: ast::Instruction<'input> = { + "ld" LdQualifier? LdStateSpace? LdCacheOperator? Vector? BaseType "," "[" "]" => { + ast::Instruction::Ld(ast::LdData{}, ast::Arg2{dst:dst, src:src}) + } }; LdQualifier: () = { @@ -222,8 +224,10 @@ LdCacheOperator = { }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov -InstMov: ast::Instruction = { - "mov" MovType ID "," Operand => ast::Instruction::Mov +InstMov: ast::Instruction<'input> = { + "mov" MovType => { + ast::Instruction::Mov(ast::MovData{}, a) + } }; MovType = { @@ -237,12 +241,16 @@ MovType = { // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mul // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mul // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-mul -InstMul: ast::Instruction = { - "mul" MulIntControl? IntType ID "," Operand "," Operand => ast::Instruction::Mul, - "mul" RoundingMode? ".ftz"? ".sat"? ".f32" ID "," Operand "," Operand => ast::Instruction::Mul, - "mul" RoundingMode? ".f64" ID "," Operand "," Operand => ast::Instruction::Mul, - "mul" ".rn"? ".ftz"? ".sat"? ".f16" ID "," Operand "," Operand => ast::Instruction::Mul, - "mul" ".rn"? ".ftz"? ".sat"? ".f16x2" ID "," Operand "," Operand => ast::Instruction::Mul, +InstMul: ast::Instruction<'input> = { + "mul" => ast::Instruction::Mul(d, a) +}; + +InstMulMode: ast::MulData = { + MulIntControl? IntType => ast::MulData{}, + RoundingMode? ".ftz"? ".sat"? ".f32" => ast::MulData{}, + RoundingMode? ".f64" => ast::MulData{}, + ".rn"? ".ftz"? ".sat"? ".f16" => ast::MulData{}, + ".rn"? ".ftz"? ".sat"? ".f16x2" => ast::MulData{} }; MulIntControl = { @@ -262,19 +270,33 @@ IntType = { // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-add // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-add // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-add -InstAdd: ast::Instruction = { - "add" IntType ID "," Operand "," Operand => ast::Instruction::Add, - "add" ".sat" ".s32" ID "," Operand "," Operand => ast::Instruction::Add, - "add" RoundingMode? ".ftz"? ".sat"? ".f32" ID "," Operand "," Operand => ast::Instruction::Add, - "add" RoundingMode? ".f64" ID "," Operand "," Operand => ast::Instruction::Add, - "add" ".rn"? ".ftz"? ".sat"? ".f16" ID "," Operand "," Operand => ast::Instruction::Add, - "add" ".rn"? ".ftz"? ".sat"? ".f16x2" ID "," Operand "," Operand => ast::Instruction::Add, +InstAdd: ast::Instruction<'input> = { + "add" => ast::Instruction::Add(d, a) +}; + +InstAddMode: ast::AddData = { + IntType => ast::AddData{}, + ".sat" ".s32" => ast::AddData{}, + RoundingMode? ".ftz"? ".sat"? ".f32" => ast::AddData{}, + RoundingMode? ".f64" => ast::AddData{}, + ".rn"? ".ftz"? ".sat"? ".f16" => ast::AddData{}, + ".rn"? ".ftz"? ".sat"? ".f16x2" => ast::AddData{} }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#comparison-and-selection-instructions-setp -InstSetp: ast::Instruction = { - "setp" SetpCmpOp ".ftz"? SetpType ID ("|" ID)? "," Operand "," Operand => ast::Instruction::Setp, - "setp" SetpCmpOp SetpBoolOp ".ftz"? SetpType ID ("|" ID)? "," Operand "," Operand "," "!"? ID => ast::Instruction::Setp +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-comparison-instructions-setp +// TODO: support f16 setp +InstSetp: ast::Instruction<'input> = { + "setp" => ast::Instruction::Setp(d, a), + "setp" => ast::Instruction::SetpBool(d, a), +}; + +SetpMode: ast::SetpData = { + SetpCmpOp ".ftz"? SetpType => ast::SetpData{} +}; + +SetpBoolMode: ast::SetpBoolData = { + SetpCmpOp SetpBoolOp ".ftz"? SetpType => ast::SetpBoolData{} }; SetpCmpOp = { @@ -294,8 +316,8 @@ SetpType = { }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-not -InstNot: ast::Instruction = { - "not" NotType ID "," Operand => ast::Instruction::Not +InstNot: ast::Instruction<'input> = { + "not" NotType => ast::Instruction::Not(ast::NotData{}, a) }; NotType = { @@ -303,18 +325,21 @@ NotType = { }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-at -InstAt: ast::Instruction = { - "@" "!"? ID => ast::Instruction::At +PredAt: ast::PredAt<'input> = { + "@" => ast::PredAt { not: false, label:label }, + "@" "!" => ast::PredAt { not: true, label:label } }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-bra -InstBra: ast::Instruction = { - InstAt? "bra" ".uni"? ID => ast::Instruction::Bra +InstBra: ast::Instruction<'input> = { + "bra" ".uni"? => ast::Instruction::Bra(ast::BraData{}, a) }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt -InstCvt: ast::Instruction = { - "cvt" CvtRnd? ".ftz"? ".sat"? CvtType CvtType ID "," Operand => ast::Instruction::Cvt +InstCvt: ast::Instruction<'input> = { + "cvt" CvtRnd? ".ftz"? ".sat"? CvtType CvtType => { + ast::Instruction::Cvt(ast::CvtData{}, a) + } }; CvtRnd = { @@ -337,8 +362,8 @@ CvtType = { }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-shl -InstShl: ast::Instruction = { - "shl" ShlType ID "," Operand "," Operand => ast::Instruction::Shl +InstShl: ast::Instruction<'input> = { + "shl" ShlType => ast::Instruction::Shl(ast::ShlData{}, a) }; ShlType = { @@ -346,8 +371,10 @@ ShlType = { }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-st -InstSt: ast::Instruction = { - "st" LdQualifier? StStateSpace? StCacheOperator? Vector? BaseType "[" ID "]" "," Operand => ast::Instruction::Shl +InstSt: ast::Instruction<'input> = { + "st" LdQualifier? StStateSpace? StCacheOperator? Vector? BaseType "[" "]" "," => { + ast::Instruction::St(ast::StData{}, ast::Arg2{dst:dst, src:src}) + } }; StStateSpace = { @@ -365,31 +392,66 @@ StCacheOperator = { }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-ret -InstRet: ast::Instruction = { - "ret" ".uni"? => ast::Instruction::Ret +InstRet: ast::Instruction<'input> = { + "ret" ".uni"? => ast::Instruction::Ret(ast::RetData{}) }; -Operand: () = { - ID, - Num, - OffsetOperand, - ArrayOperand, - VectorOperand, +Operand: ast::Operand<'input> = { + => ast::Operand::Reg(r), + "+" => { + let offset = o.parse::(); + let offset = offset.unwrap_with(errors); + ast::Operand::RegOffset(r, offset) + }, + => { + let offset = o.parse::(); + let offset = offset.unwrap_with(errors); + ast::Operand::Imm(offset) + } }; -OffsetOperand = { - ID "+" Num, +MovOperand: ast::MovOperand<'input> = { + => ast::MovOperand::Op(o), + => { + let (pref, suf) = o; + ast::MovOperand::Vec(pref, suf) + } }; -ArrayOperand = { - ID "[" Num "]", +VectorOperand: (&'input str, &'input str) = { + "." => (pref, suf), + => (pref, &suf[1..]), }; -VectorOperand: () = { - ID "." ID, - ID DotID, +Arg1: ast::Arg1<'input> = { + => ast::Arg1{<>} }; +Arg2: ast::Arg2<'input> = { + "," => ast::Arg2{<>} +}; + +Arg2Mov: ast::Arg2Mov<'input> = { + "," => ast::Arg2Mov{<>} +}; + +Arg3: ast::Arg3<'input> = { + "," "," => ast::Arg3{<>} +}; + +Arg4: ast::Arg4<'input> = { + "," "," => ast::Arg4{<>} +}; + +// TODO: pass src3 negation somewhere +Arg5: ast::Arg5<'input> = { + "," "," "," "!"? => ast::Arg5{<>} +}; + +OptionalDst: &'input str = { + "|" => dst2 +} + Vector = { ".v2", ".v4" diff --git a/ptx/src/test/mod.rs b/ptx/src/test/mod.rs index 1de55bb..e12097a 100644 --- a/ptx/src/test/mod.rs +++ b/ptx/src/test/mod.rs @@ -12,7 +12,13 @@ fn empty() { } #[test] -fn vector_add() { +fn vectorAdd_kernel64_ptx() { let vector_add = include_str!("vectorAdd_kernel64.ptx"); parse_and_assert(vector_add); } + +#[test] +fn operands_ptx() { + let vector_add = include_str!("operands.ptx"); + parse_and_assert(vector_add); +} diff --git a/ptx/src/test/operands.ptx b/ptx/src/test/operands.ptx new file mode 100644 index 0000000..67c59f5 --- /dev/null +++ b/ptx/src/test/operands.ptx @@ -0,0 +1,33 @@ +.version 6.5 +.target sm_30 +.address_size 64 + +.visible .entry foobar( + .param .u32 foobar_param_0 +) +{ + .reg .u32 %reg<10>; + .reg .u64 %reg_64; + .reg .pred p; + .reg .pred q; + + // reg + ld.param.u32 %reg0, [foobar_param_0]; + // reg with offset + ld.param.u32 %reg1, [foobar_param_0+1]; + ld.param.u32 %reg2, [foobar_param_0+-1]; + // immediate - only in local + ld.local.u32 %reg3, [1]; + + // ids + add.u32 %reg0, %reg1, %reg2; + // immediate + add.u32 %reg0, 1, %reg2; + // reg with offset + add.u32 %reg0, %reg1+1, %reg2+-1; + // suprisingly, setp accepts all forms + setp.eq.and.u32 p, %reg1+1, %reg2+-1, 2; + + // vector index - only supported by mov (maybe: ld, st, tex) + mov.u32 %reg0, %ntid.x; +} diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index f3abaf0..3f7ce9d 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -128,10 +128,30 @@ fn emit_function<'a>( builder.begin_block(Some(id))?; } ast::Statement::Variable(var) => panic!(), - ast::Statement::Instruction(i) => panic!(), + ast::Statement::Instruction(_,_) => panic!(), } } builder.ret()?; builder.end_function()?; Ok(()) } + +enum Statement { + Label, + Instruction(Instruction), + Phi(Vec) +} + +enum Instruction { + Ld, + Mov, + Mul, + Add, + Setp, + Not, + Bra, + Cvt, + Shl, + At, + Ret, +} \ No newline at end of file