From 87cc72494ec492d60011933b76d74d8a82d9393b Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Wed, 2 Sep 2020 02:03:13 +0200 Subject: [PATCH] Parse Linux vectorAdd debug PTX kernel --- notcuda/src/impl/export_table.rs | 29 +++++++++- notcuda/src/impl/mod.rs | 8 ++- notcuda/src/impl/module.rs | 44 +++++++++++++++ notcuda/src/lib.rs | 1 + ptx/Cargo.toml | 4 +- ptx/src/ast.rs | 31 ++++++++++- ptx/src/lib.rs | 7 ++- ptx/src/ptx.lalrpop | 94 ++++++++++++++++++++++++++++---- ptx/src/test/mod.rs | 8 +++ ptx/src/translate.rs | 21 ++++++- 10 files changed, 223 insertions(+), 24 deletions(-) create mode 100644 notcuda/src/impl/module.rs diff --git a/notcuda/src/impl/export_table.rs b/notcuda/src/impl/export_table.rs index 3df07ed..afd9077 100644 --- a/notcuda/src/impl/export_table.rs +++ b/notcuda/src/impl/export_table.rs @@ -4,10 +4,13 @@ use crate::{ cuda_impl, }; -use super::{context, device, Decuda, Encuda}; +use super::{context, device, module, Decuda, Encuda}; use std::mem; use std::os::raw::{c_uint, c_ulong, c_ushort}; -use std::{ffi::c_void, ptr, slice}; +use std::{ + ffi::{c_void, CStr, CString}, + ptr, slice, +}; pub fn get(table: *mut *const std::os::raw::c_void, id: *const CUuuid) -> CUresult { if table == ptr::null_mut() || id == ptr::null_mut() { @@ -204,6 +207,12 @@ unsafe extern "C" fn get_module_from_cubin( { return CUresult::CUDA_ERROR_INVALID_VALUE; } + let result = result.decuda(); + let mut dev_count = 0; + let cu_result = device::get_count(&mut dev_count); + if cu_result != CUresult::CUDA_SUCCESS { + return cu_result; + } let fatbin_header = (*fatbinc_wrapper).data; if (*fatbin_header).magic != FATBIN_MAGIC || (*fatbin_header).version != FATBIN_VERSION { return CUresult::CUDA_ERROR_INVALID_VALUE; @@ -219,7 +228,21 @@ unsafe extern "C" fn get_module_from_cubin( ); let kernel_text = lz4::block::decompress(slice, Some((*file).uncompressed_payload as i32)).unwrap(); - return CUresult::CUDA_SUCCESS; + let kernel_text_string = match CStr::from_bytes_with_nul(&kernel_text) { + Ok(c_str) => match c_str.to_str() { + Ok(s) => s, + Err(_) => continue, + }, + Err(_) => continue, + }; + let module = module::Module::compile(kernel_text_string, dev_count as usize); + match module { + Ok(module) => { + *result = Box::into_raw(Box::new(module)); + return CUresult::CUDA_SUCCESS; + } + Err(_) => continue, + } } CUresult::CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE } diff --git a/notcuda/src/impl/mod.rs b/notcuda/src/impl/mod.rs index ded2ff4..7813532 100644 --- a/notcuda/src/impl/mod.rs +++ b/notcuda/src/impl/mod.rs @@ -1,4 +1,4 @@ -use crate::cuda::{CUctx_st, CUdevice, CUdeviceptr, CUresult}; +use crate::cuda::{CUctx_st, CUdevice, CUdeviceptr, CUresult, CUmodule}; use std::{ffi::c_void, mem::ManuallyDrop, os::raw::c_int, sync::Mutex}; #[cfg(test)] @@ -8,6 +8,7 @@ pub mod context; pub mod device; pub mod export_table; pub mod memory; +pub mod module; #[cfg(debug_assertions)] pub fn unimplemented() -> CUresult { @@ -232,3 +233,8 @@ impl Decuda<*mut c_void> for CUdeviceptr { self.0 as *mut _ } } + +impl<'a> CudaRepr for CUmodule { + type Impl = *mut module::Module; +} + diff --git a/notcuda/src/impl/module.rs b/notcuda/src/impl/module.rs new file mode 100644 index 0000000..feae40b --- /dev/null +++ b/notcuda/src/impl/module.rs @@ -0,0 +1,44 @@ +use ptx; + +pub struct Module { + spirv_code: Vec, + compiled_code: Vec>>, // size as big as the number of devices +} + +pub enum ModuleCompileError<'a> { + Parse( + Vec, + Option, &'a str>>, + ), + Compile(ptx::SpirvError), +} + +impl<'a> ModuleCompileError<'a> { + pub fn get_build_log(&self) { + + } + +} + +impl<'a> From for ModuleCompileError<'a> { + fn from(err: ptx::SpirvError) -> Self { + ModuleCompileError::Compile(err) + } +} + +impl Module { + pub fn compile(ptx_text: &str, devices: usize) -> Result { + let mut errors = Vec::new(); + let ast = ptx::ModuleParser::new().parse(&mut errors, ptx_text); + let ast = match ast { + Err(e) => return Err(ModuleCompileError::Parse(errors, Some(e))), + Ok(_) if errors.len() > 0 => return Err(ModuleCompileError::Parse(errors, None)), + Ok(ast) => ast, + }; + let spirv = ptx::to_spirv(ast)?; + Ok(Self { + spirv_code: spirv, + compiled_code: vec![None; devices], + }) + } +} diff --git a/notcuda/src/lib.rs b/notcuda/src/lib.rs index 48f5f85..0f7d014 100644 --- a/notcuda/src/lib.rs +++ b/notcuda/src/lib.rs @@ -8,6 +8,7 @@ extern crate lz4; #[cfg(test)] #[macro_use] extern crate paste; +extern crate ptx; #[allow(warnings)] mod cuda; diff --git a/ptx/Cargo.toml b/ptx/Cargo.toml index 9842e27..42d60cb 100644 --- a/ptx/Cargo.toml +++ b/ptx/Cargo.toml @@ -7,7 +7,7 @@ edition = "2018" [lib] [dependencies] -lalrpop-util = "0.18.1" +lalrpop-util = "0.19" regex = "1" rspirv = "0.6" spirv_headers = "1.4" @@ -16,7 +16,7 @@ bit-vec = "0.6" half ="1.6" [build-dependencies.lalrpop] -version = "0.18.1" +version = "0.19" features = ["lexer"] [dev-dependencies] diff --git a/ptx/src/ast.rs b/ptx/src/ast.rs index ed58d42..5de1db6 100644 --- a/ptx/src/ast.rs +++ b/ptx/src/ast.rs @@ -11,6 +11,7 @@ quick_error! { } SyntaxError {} NonF32Ftz {} + WrongArrayType {} } } @@ -50,11 +51,16 @@ pub struct Module<'a> { pub functions: Vec>, } +pub enum FunctionReturn<'a> { + Func(Vec>), + Kernel, +} + pub struct Function<'a> { - pub kernel: bool, + pub func_directive: FunctionReturn<'a>, pub name: &'a str, pub args: Vec>, - pub body: Vec>>, + pub body: Option>>>, } #[derive(Default)] @@ -68,6 +74,7 @@ pub struct Argument<'a> { pub enum Type { Scalar(ScalarType), ExtendedScalar(ExtendedScalarType), + Array(ScalarType, u32), } impl From for Type { @@ -173,10 +180,12 @@ pub enum Statement { Label(P::ID), Variable(Variable

), Instruction(Option>, Instruction

), + Block(Vec>), } pub struct Variable { pub space: StateSpace, + pub align: Option, pub v_type: Type, pub name: P::ID, pub count: Option, @@ -190,6 +199,7 @@ pub enum StateSpace { Global, Local, Shared, + Param, } pub struct PredAt { @@ -211,6 +221,23 @@ pub enum Instruction { Shl(ShlType, Arg3

), St(StData, Arg2St

), Ret(RetData), + Call(CallData, ArgCall

), + Abs(AbsDetails, Arg2

), +} + +pub struct CallData { + pub uniform: bool, +} + +pub struct AbsDetails { + pub flush_to_zero: bool, + pub typ: ScalarType +} + +pub struct ArgCall { + pub ret_params: Vec, + pub func: P::ID, + pub param_list: Vec, } pub trait ArgParams { diff --git a/ptx/src/lib.rs b/ptx/src/lib.rs index 6912d92..03d6d58 100644 --- a/ptx/src/lib.rs +++ b/ptx/src/lib.rs @@ -27,8 +27,11 @@ pub mod ast; mod test; mod translate; -pub use ast::Module; -pub use translate::to_spirv; +pub use lalrpop_util::ParseError as ParseError; +pub use lalrpop_util::lexer::Token as Token; +pub use crate::ptx::ModuleParser as ModuleParser; +pub use translate::to_spirv as to_spirv; +pub use rspirv::dr::Error as SpirvError; pub(crate) fn without_none(x: Vec>) -> Vec { x.into_iter().filter_map(|x| x).collect() diff --git a/ptx/src/ptx.lalrpop b/ptx/src/ptx.lalrpop index 66e831e..7438e97 100644 --- a/ptx/src/ptx.lalrpop +++ b/ptx/src/ptx.lalrpop @@ -24,6 +24,7 @@ match { "|", ".acquire", ".address_size", + ".align", ".and", ".b16", ".b32", @@ -108,8 +109,10 @@ match { ".xor", } else { // IF YOU ARE ADDING A NEW TOKEN HERE ALSO ADD IT BELOW TO ExtendedID + "abs", "add", "bra", + "call", "cvt", "cvta", "debug", @@ -135,8 +138,10 @@ match { } ExtendedID : &'input str = { + "abs", "add", "bra", + "call", "cvt", "cvta", "debug", @@ -197,9 +202,9 @@ AddressSize = { Function: ast::Function<'input> = { LinkingDirective* - + - "(" > ")" + => ast::Function{<>} }; @@ -209,11 +214,15 @@ LinkingDirective = { ".weak" }; -IsKernel: bool = { - ".entry" => true, - ".func" => false +FunctionReturn: ast::FunctionReturn<'input> = { + ".entry" => ast::FunctionReturn::Kernel, + ".func" => ast::FunctionReturn::Func(args.unwrap_or_else(|| Vec::new())) }; +Arguments: Vec> = { + "(" > ")" => args +} + // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameter-state-space FunctionInput: ast::Argument<'input> = { ".param" <_type:ScalarType> => { @@ -226,8 +235,9 @@ FunctionInput: ast::Argument<'input> = { } }; -pub(crate) FunctionBody: Vec>> = { - "{" "}" => { without_none(s) } +pub(crate) FunctionBody: Option>>> = { + "{" "}" => { Some(without_none(s)) }, + ";" => { None } }; StateSpaceSpecifier: ast::StateSpace = { @@ -236,7 +246,8 @@ StateSpaceSpecifier: ast::StateSpace = { ".const" => ast::StateSpace::Const, ".global" => ast::StateSpace::Global, ".local" => ast::StateSpace::Local, - ".shared" => ast::StateSpace::Shared + ".shared" => ast::StateSpace::Shared, + ".param" => ast::StateSpace::Param, // used to prepare function call }; @@ -276,7 +287,8 @@ Statement: Option>> = { => Some(ast::Statement::Label(l)), DebugDirective => None, ";" => Some(ast::Statement::Variable(v)), - ";" => Some(ast::Statement::Instruction(p, i)) + ";" => Some(ast::Statement::Instruction(p, i)), + "{" "}" => Some(ast::Statement::Block(without_none(s))) }; DebugDirective: () = { @@ -292,10 +304,32 @@ Label: &'input str = { ":" => id }; +Align: u32 = { + ".align" => { + let align = a.parse::(); + align.unwrap_with(errors) + } +}; + Variable: ast::Variable> = { - => { + => { let (name, count) = v; - ast::Variable { space: s, v_type: t, name: name, count: count } + let t = match (t, arr) { + (ast::Type::Scalar(st), Some(arr_size)) => ast::Type::Array(st, arr_size), + (t, Some(_)) => { + errors.push(ast::PtxError::WrongArrayType); + t + }, + (t, None) => t, + }; + ast::Variable { space: s, align: a, v_type: t, name: name, count: count } + } +}; + +ArraySpecifier: u32 = { + "[" "]" => { + let size = n.parse::(); + size.unwrap_with(errors) } }; @@ -326,6 +360,8 @@ Instruction: ast::Instruction> = { InstSt, InstRet, InstCvta, + InstCall, + InstAbs, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld @@ -819,6 +855,36 @@ CvtaSize: ast::CvtaSize = { ".u64" => ast::CvtaSize::U64, } +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-call +InstCall: ast::Instruction> = { + "call" => ast::Instruction::Call(ast::CallData { uniform: u.is_some() }, a) +}; + +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-abs +InstAbs: ast::Instruction> = { + "abs" => { + ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: false, typ: t }, a) + }, + "abs" ".f32" => { + ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: f.is_some(), typ: ast::ScalarType::F32 }, a) + }, + "abs" ".f64" => { + ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: false, typ: ast::ScalarType::F64 }, a) + }, + "abs" ".f16" => { + ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: f.is_some(), typ: ast::ScalarType::F16 }, a) + }, + "abs" ".f16x2" => { + todo!() + }, +}; + +SignedIntType: ast::ScalarType = { + ".s16" => ast::ScalarType::S16, + ".s32" => ast::ScalarType::S32, + ".s64" => ast::ScalarType::S64, +}; + Operand: ast::Operand<&'input str> = { => ast::Operand::Reg(r), "+" => { @@ -873,6 +939,12 @@ Arg5: ast::Arg5> = { "," "," "," "!"? => ast::Arg5{<>} }; +ArgCall: ast::ArgCall> = { + "(" > ")" "," "," "(" > ")" => ast::ArgCall{<>}, + "," "(" > ")" => ast::ArgCall{ret_params: Vec::new(), func, param_list}, + => ast::ArgCall{ret_params: Vec::new(), func, param_list: Vec::new()}, +}; + OptionalDst: &'input str = { "|" => dst2 } diff --git a/ptx/src/test/mod.rs b/ptx/src/test/mod.rs index f66992b..3252b50 100644 --- a/ptx/src/test/mod.rs +++ b/ptx/src/test/mod.rs @@ -25,3 +25,11 @@ fn operands_ptx() { let vector_add = include_str!("operands.ptx"); parse_and_assert(vector_add); } + +#[test] +#[allow(non_snake_case)] +fn _Z9vectorAddPKfS0_Pfi_ptx() { + let vector_add = include_str!("_Z9vectorAddPKfS0_Pfi.ptx"); + parse_and_assert(vector_add); +} + diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index c0cdf01..b4d01eb 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -16,6 +16,7 @@ impl SpirvType { let key = match t { ast::Type::Scalar(typ) => SpirvScalarKey::from(typ), ast::Type::ExtendedScalar(typ) => SpirvScalarKey::from(typ), + ast::Type::Array(_, _) => todo!(), }; SpirvType::Pointer(key, sc) } @@ -26,6 +27,7 @@ impl From for SpirvType { match t { ast::Type::Scalar(t) => SpirvType::Base(t.into()), ast::Type::ExtendedScalar(t) => SpirvType::Base(t.into()), + ast::Type::Array(_, _) => todo!(), } } } @@ -195,10 +197,13 @@ fn emit_function<'a>( let func_type = get_function_type(builder, map, &f.args); let func_id = builder.begin_function(map.void(), None, spirv::FunctionControl::NONE, func_type)?; - if f.kernel { - builder.entry_point(spirv::ExecutionModel::Kernel, func_id, f.name, &[]); + match f.func_directive { + ast::FunctionReturn::Kernel => { + builder.entry_point(spirv::ExecutionModel::Kernel, func_id, f.name, &[]) + } + _ => todo!(), } - let (mut func_body, unique_ids) = to_ssa(&f.args, f.body); + let (mut func_body, unique_ids) = to_ssa(&f.args, f.body.unwrap_or_else(|| todo!())); let id_offset = builder.reserve_ids(unique_ids); emit_function_args(builder, id_offset, map, &f.args); func_body = apply_id_offset(func_body, id_offset); @@ -266,6 +271,7 @@ fn normalize_predicates( let mut result = Vec::with_capacity(func.len()); for s in func { match s { + ast::Statement::Block(_) => todo!(), ast::Statement::Label(id) => result.push(Statement::Label(id)), ast::Statement::Instruction(pred, inst) => { if let Some(pred) = pred { @@ -652,6 +658,8 @@ fn emit_function_body_ops( builder.branch_conditional(bra.predicate, bra.if_true, bra.if_false, [])?; } Statement::Instruction(inst) => match inst { + ast::Instruction::Abs(_, _) => todo!(), + ast::Instruction::Call(_,_) => todo!(), // SPIR-V does not support marking jumps as guaranteed-converged ast::Instruction::Bra(_, arg) => { builder.branch(arg.src)?; @@ -1076,6 +1084,7 @@ fn expand_map_variables<'a>( s: ast::Statement>, ) { match s { + ast::Statement::Block(_) => todo!(), ast::Statement::Label(name) => result.push(ast::Statement::Label(id_defs.get_id(name))), ast::Statement::Instruction(p, i) => result.push(ast::Statement::Instruction( p.map(|p| p.map_variable(&mut |id| id_defs.get_id(id))), @@ -1086,6 +1095,7 @@ fn expand_map_variables<'a>( for new_id in id_defs.add_defs(var.name, count, var.v_type) { result.push(ast::Statement::Variable(ast::Variable { space: var.space, + align: var.align, v_type: var.v_type, name: new_id, count: None, @@ -1096,6 +1106,7 @@ fn expand_map_variables<'a>( let new_id = id_defs.add_def(var.name, Some(var.v_type)); result.push(ast::Statement::Variable(ast::Variable { space: var.space, + align: var.align, v_type: var.v_type, name: new_id, count: None, @@ -1307,6 +1318,8 @@ impl ast::Instruction { visitor: &mut V, ) -> ast::Instruction { match self { + ast::Instruction::Abs(_, _) => todo!(), + ast::Instruction::Call(_, _) => todo!(), ast::Instruction::Ld(d, a) => { let inst_type = d.typ; ast::Instruction::Ld(d, a.map_ld(visitor, Some(ast::Type::Scalar(inst_type)))) @@ -1432,6 +1445,8 @@ impl ast::Instruction { fn jump_target(&self) -> Option { match self { + ast::Instruction::Abs(_, _) => todo!(), + ast::Instruction::Call(_, _) => todo!(), ast::Instruction::Bra(_, a) => Some(a.src), ast::Instruction::Ld(_, _) | ast::Instruction::Mov(_, _)