diff --git a/ptx/src/lib.rs b/ptx/src/lib.rs index 7681fcf..dc24b59 100644 --- a/ptx/src/lib.rs +++ b/ptx/src/lib.rs @@ -3,14 +3,7 @@ extern crate lalrpop_util; lalrpop_mod!(pub ptx); +mod test; + pub mod ast; pub use ast::Module as Module; - -#[test] -fn version() { - assert!(ptx::ModuleParser::new().parse(" - .version 6.5 - .target - .address_size 64 - ").unwrap() == ()); -} \ No newline at end of file diff --git a/ptx/src/ptx.lalrpop b/ptx/src/ptx.lalrpop index edc613b..01697cd 100644 --- a/ptx/src/ptx.lalrpop +++ b/ptx/src/ptx.lalrpop @@ -4,9 +4,19 @@ use super::ast; grammar; match { - r"\s*" => { }, + r"\s+" => { }, r"//[^\n\r]*[\n\r]*" => { }, r"/\*([^\*]*\*+[^\*/])*([^\*]*\*+|[^\*])*\*/" => { }, + "ld", + "texmode_unified", + "texmode_independent", + "debug", + "map_f64_to_f32", + r"sm_[0-9]+" => ShaderModel, +} else { + r"(?:[a-zA-Z][a-zA-Z0-9_$]*|[_$%][a-zA-Z0-9_$]+)<[0-9]+>" => ParametrizedID, +} +else { _ } @@ -17,11 +27,20 @@ pub Module: () = { }; Version = { - ".version" U8 "." U8 + ".version" VersionNumber }; +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#ptx-module-directives-target Target = { - ".target" + ".target" Comma +}; + +TargetSpecifier = { + ShaderModel, + "texmode_unified", + "texmode_independent", + "debug", + "map_f64_to_f32" }; Directive : () = { @@ -35,7 +54,7 @@ AddressSize = { }; Function: (bool, &'input str) = { - LinkingDirective* FunctionInput FunctionBody => (is, id) + LinkingDirective* "(" Comma ")" FunctionBody => (is, id) }; LinkingDirective = { @@ -50,7 +69,7 @@ IsKernel: bool = { }; FunctionInput = { - "(" (".param" Type ID)* ")" + ".param" Type ID }; FunctionBody = { @@ -63,46 +82,91 @@ StateSpaceSpecifier = { ".const", ".global", ".local", - ".param", ".shared" }; -Type = FundamentalType; +Type = { + BaseType, + ".pred", + ".f16", + ".f16x2", +}; -FundamentalType = { - ".s8", ".s16", ".s32", ".s64", - ".u8", ".u16", ".u32", ".u64", - ".f16", ".f16x2", ".f32", ".f64", +BaseType = { ".b8", ".b16", ".b32", ".b64", - ".pred" + ".u8", ".u16", ".u32", ".u64", + ".s8", ".s16", ".s32", ".s64", + ".f32", ".f64" }; Statement: () = { Label, - StateSpace, - Instruction + DebugDirective, + Variable ";", + Instruction ";" +}; + +DebugDirective = { + DebugLocation +}; + +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-loc +DebugLocation = { + ".loc" Num Num Num }; Label = { ID ":" }; -StateSpace = { - StateSpaceSpecifier Type +Variable = { + StateSpaceSpecifier Type VariableName +}; + +VariableName = { + ID, + ParametrizedID }; Instruction = { - OpCode Operands ";" + InstLd }; -OpCode = { - "ld.param.u64" +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld +InstLd = { + "ld" LdQualifier? LdStateSpace? LdCacheOperator? Vector? BaseType ID "," "[" ID "]" }; -Operands = Comma; +LdQualifier = { + ".weak", + ".volatile", + ".relaxed.scope", + ".acquire.scope", +}; -Comma: Vec = { // (1) - ",")*> => match e { // (2) +LdStateSpace = { + ".const", + ".global", + ".local", + ".param", + ".shared", +}; + +LdCacheOperator = { + ".ca", + ".cg", + ".cs", + ".lu", + ".cv", +}; + +Vector = { + ".v2", + ".v4" +}; + +Comma: Vec = { + ",")*> => match e { None => v, Some(e) => { let mut v = v; @@ -112,10 +176,7 @@ Comma: Vec = { // (1) } }; -Operand = { - ID -}; - -U8: u8 = => u8::from_str(s).unwrap(); - -ID: &'input str = => s; \ No newline at end of file +VersionNumber = r"[0-9]+\.[0-9]+"; +Num: u64 = => u64::from_str(s).unwrap(); +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#identifiers +ID: &'input str = => s; \ No newline at end of file diff --git a/ptx/src/test/mod.rs b/ptx/src/test/mod.rs new file mode 100644 index 0000000..0198900 --- /dev/null +++ b/ptx/src/test/mod.rs @@ -0,0 +1,15 @@ +use super::ptx; + + +#[test] +fn empty() { + assert!(ptx::ModuleParser::new().parse( + ".version 6.5 .target sm_30, debug") + .unwrap() == ()); +} + +#[test] +fn vector_add() { + let vector_add = include_str!("vectorAdd_kernel64.ptx"); + assert!(ptx::ModuleParser::new().parse(vector_add).unwrap() == ()); +} \ No newline at end of file diff --git a/ptx/src/test/vectorAdd_kernel64.ptx b/ptx/src/test/vectorAdd_kernel64.ptx new file mode 100644 index 0000000..100cd93 --- /dev/null +++ b/ptx/src/test/vectorAdd_kernel64.ptx @@ -0,0 +1,592 @@ +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-27506705 +// Cuda compilation tools, release 10.2, V10.2.89 +// Based on LLVM 3.4svn +// + +.version 6.5 +.target sm_30, debug +.address_size 64 + + // .globl VecAdd_kernel + +.visible .entry VecAdd_kernel( + .param .u64 VecAdd_kernel_param_0, + .param .u64 VecAdd_kernel_param_1, + .param .u64 VecAdd_kernel_param_2, + .param .u32 VecAdd_kernel_param_3 +) +{ + .reg .pred %p<3>; + .reg .f32 %f<4>; + .reg .b32 %r<7>; + .reg .b64 %rd<13>; + + + .loc 1 21 1 +func_begin0: + .loc 1 0 0 + + .loc 1 21 1 + + ld.param.u64 %rd1, [VecAdd_kernel_param_0]; + ld.param.u64 %rd2, [VecAdd_kernel_param_1]; + ld.param.u64 %rd3, [VecAdd_kernel_param_2]; + ld.param.u32 %r2, [VecAdd_kernel_param_3]; +func_exec_begin0: + .loc 1 23 11 +tmp0: + mov.u32 %r3, %ntid.x; + mov.u32 %r4, %ctaid.x; + mul.lo.s32 %r5, %r3, %r4; + mov.u32 %r6, %tid.x; + add.s32 %r1, %r5, %r6; +tmp1: + .loc 1 25 5 + setp.lt.s32 %p1, %r1, %r2; + not.pred %p2, %p1; + @%p2 bra BB0_2; + bra.uni BB0_1; + +BB0_1: + .loc 1 26 9 +tmp2: + cvt.s64.s32 %rd4, %r1; + shl.b64 %rd5, %rd4, 2; + add.s64 %rd6, %rd1, %rd5; + ld.f32 %f1, [%rd6]; + cvt.s64.s32 %rd7, %r1; + shl.b64 %rd8, %rd7, 2; + add.s64 %rd9, %rd2, %rd8; + ld.f32 %f2, [%rd9]; + add.f32 %f3, %f1, %f2; + cvt.s64.s32 %rd10, %r1; + shl.b64 %rd11, %rd10, 2; + add.s64 %rd12, %rd3, %rd11; + st.f32 [%rd12], %f3; +tmp3: + +BB0_2: + .loc 1 27 1 + ret; +tmp4: +func_end0: +} + + .file 1 "/home/vosen/cuda-samples/0_Simple/vectorAddMMAP/vectorAdd_kernel.cu", 1581801938, 860 + +.section .debug_info { + .b32 314 + .b8 2 + .b8 0 + .b32 .debug_abbrev + .b8 8 + .b8 1 + + .b8 108 + .b8 103 + .b8 101 + .b8 110 + .b8 102 + .b8 101 + .b8 58 + .b8 32 + .b8 69 + .b8 68 + .b8 71 + .b8 32 + .b8 53 + .b8 46 + .b8 48 + + .b8 0 + .b8 4 + .b8 118 + .b8 101 + .b8 99 + .b8 116 + .b8 111 + .b8 114 + .b8 65 + .b8 100 + .b8 100 + .b8 95 + .b8 107 + .b8 101 + .b8 114 + .b8 110 + .b8 101 + .b8 108 + .b8 46 + .b8 99 + .b8 117 + + .b8 0 + .b64 0 + .b32 .debug_line + .b8 47 + .b8 104 + .b8 111 + .b8 109 + .b8 101 + .b8 47 + .b8 118 + .b8 111 + .b8 115 + .b8 101 + .b8 110 + .b8 47 + .b8 99 + .b8 117 + .b8 100 + .b8 97 + .b8 45 + .b8 115 + .b8 97 + .b8 109 + .b8 112 + .b8 108 + .b8 101 + .b8 115 + .b8 47 + .b8 48 + .b8 95 + .b8 83 + .b8 105 + .b8 109 + .b8 112 + .b8 108 + .b8 101 + .b8 47 + .b8 118 + .b8 101 + .b8 99 + .b8 116 + .b8 111 + .b8 114 + .b8 65 + .b8 100 + .b8 100 + .b8 77 + .b8 77 + .b8 65 + .b8 80 + + .b8 0 + .b8 2 + + .b8 86 + .b8 101 + .b8 99 + .b8 65 + .b8 100 + .b8 100 + .b8 95 + .b8 107 + .b8 101 + .b8 114 + .b8 110 + .b8 101 + .b8 108 + + .b8 0 + .b8 86 + .b8 101 + .b8 99 + .b8 65 + .b8 100 + .b8 100 + .b8 95 + .b8 107 + .b8 101 + .b8 114 + .b8 110 + .b8 101 + .b8 108 + + .b8 0 + .b8 1 + .b8 21 + .b32 278 + .b8 1 + .b64 func_begin0 + .b64 func_end0 + .b8 1 + .b8 156 + .b8 3 + + .b8 65 + + .b8 0 + .b8 1 + .b8 21 + .b32 284 + .b8 9 + .b8 3 + .b64 VecAdd_kernel_param_0 + .b8 7 + .b8 3 + + .b8 66 + + .b8 0 + .b8 1 + .b8 21 + .b32 284 + .b8 9 + .b8 3 + .b64 VecAdd_kernel_param_1 + .b8 7 + .b8 3 + + .b8 67 + + .b8 0 + .b8 1 + .b8 21 + .b32 304 + .b8 9 + .b8 3 + .b64 VecAdd_kernel_param_2 + .b8 7 + .b8 3 + + .b8 78 + + .b8 0 + .b8 1 + .b8 21 + .b32 310 + .b8 9 + .b8 3 + .b64 VecAdd_kernel_param_3 + .b8 7 + .b8 4 + + .b64 tmp0 + .b64 tmp4 + .b8 5 + + .b8 105 + + .b8 0 + .b8 1 + .b8 23 + .b32 310 + .b8 5 + .b8 144 + .b8 177 + .b8 228 + .b8 149 + .b8 1 + .b8 2 + .b8 0 + .b8 0 + .b8 6 + + .b8 118 + .b8 111 + .b8 105 + .b8 100 + + .b8 0 + .b8 7 + + .b32 290 + .b8 12 + .b8 8 + + .b32 295 + .b8 9 + + .b8 102 + .b8 108 + .b8 111 + .b8 97 + .b8 116 + + .b8 0 + .b8 4 + .b8 4 + .b8 7 + + .b32 295 + .b8 12 + .b8 9 + + .b8 105 + .b8 110 + .b8 116 + + .b8 0 + .b8 5 + .b8 4 + .b8 0 +} +.section .debug_abbrev { + .b8 1 + + .b8 17 + + .b8 1 + + .b8 37 + + .b8 8 + + .b8 19 + + .b8 11 + + .b8 3 + + .b8 8 + + .b8 17 + + .b8 1 + + .b8 16 + + .b8 6 + + .b8 27 + + .b8 8 + + .b8 0 + + .b8 0 + + .b8 2 + + .b8 46 + + .b8 1 + + .b8 135 + .b8 64 + + .b8 8 + + .b8 3 + + .b8 8 + + .b8 58 + + .b8 11 + + .b8 59 + + .b8 11 + + .b8 73 + + .b8 19 + + .b8 63 + + .b8 12 + + .b8 17 + + .b8 1 + + .b8 18 + + .b8 1 + + .b8 64 + + .b8 10 + + .b8 0 + + .b8 0 + + .b8 3 + + .b8 5 + + .b8 0 + + .b8 3 + + .b8 8 + + .b8 58 + + .b8 11 + + .b8 59 + + .b8 11 + + .b8 73 + + .b8 19 + + .b8 2 + + .b8 10 + + .b8 51 + + .b8 11 + + .b8 0 + + .b8 0 + + .b8 4 + + .b8 11 + + .b8 1 + + .b8 17 + + .b8 1 + + .b8 18 + + .b8 1 + + .b8 0 + + .b8 0 + + .b8 5 + + .b8 52 + + .b8 0 + + .b8 3 + + .b8 8 + + .b8 58 + + .b8 11 + + .b8 59 + + .b8 11 + + .b8 73 + + .b8 19 + + .b8 2 + + .b8 10 + + .b8 51 + + .b8 11 + + .b8 0 + + .b8 0 + + .b8 6 + + .b8 59 + + .b8 0 + + .b8 3 + + .b8 8 + + .b8 0 + + .b8 0 + + .b8 7 + + .b8 15 + + .b8 0 + + .b8 73 + + .b8 19 + + .b8 51 + + .b8 11 + + .b8 0 + + .b8 0 + + .b8 8 + + .b8 38 + + .b8 0 + + .b8 73 + + .b8 19 + + .b8 0 + + .b8 0 + + .b8 9 + + .b8 36 + + .b8 0 + + .b8 3 + + .b8 8 + + .b8 62 + + .b8 11 + + .b8 11 + + .b8 11 + + .b8 0 + + .b8 0 + + .b8 0 + +} +.section .debug_ranges { +} +.section .debug_pubnames { + .b32 32 + .b8 2 + .b8 0 + .b32 .debug_info + .b32 314 + .b32 109 + .b8 86 + .b8 101 + .b8 99 + .b8 65 + .b8 100 + .b8 100 + .b8 95 + .b8 107 + .b8 101 + .b8 114 + .b8 110 + .b8 101 + .b8 108 + .b8 0 + + .b32 0 +}