diff --git a/ptx/Cargo.toml b/ptx/Cargo.toml index ffe064c..2ac1f68 100644 --- a/ptx/Cargo.toml +++ b/ptx/Cargo.toml @@ -26,3 +26,4 @@ hip_runtime-sys = { path = "../hip_runtime-sys" } tempfile = "3" spirv_tools-sys = { path = "../spirv_tools-sys" } paste = "1.0" +cuda-driver-sys = "0.3.0" diff --git a/ptx/src/test/spirv_run/add_non_coherent.ptx b/ptx/src/test/spirv_run/add_non_coherent.ptx index 10c35a1..c35c123 100644 --- a/ptx/src/test/spirv_run/add_non_coherent.ptx +++ b/ptx/src/test/spirv_run/add_non_coherent.ptx @@ -1,5 +1,5 @@ .version 6.5 -.target sm_30 +.target sm_32 .address_size 64 .visible .entry add_non_coherent( diff --git a/ptx/src/test/spirv_run/ld_st_implicit.ptx b/ptx/src/test/spirv_run/ld_st_implicit.ptx index 8562286..1294248 100644 --- a/ptx/src/test/spirv_run/ld_st_implicit.ptx +++ b/ptx/src/test/spirv_run/ld_st_implicit.ptx @@ -14,7 +14,8 @@ ld.param.u64 in_addr, [input]; ld.param.u64 out_addr, [output]; - ld.global.f32 temp, [in_addr]; - st.global.f32 [out_addr], temp; + mov.b64 temp, 0x0123456789abcdef; + ld.global.f32 temp, [in_addr]; + st.global.f32 [out_addr], temp; ret; } \ No newline at end of file diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs index 6c073f3..512b6cf 100644 --- a/ptx/src/test/spirv_run/mod.rs +++ b/ptx/src/test/spirv_run/mod.rs @@ -1,18 +1,6 @@ use crate::ptx; use crate::translate; use hip_runtime_sys::hipError_t; -use hip_runtime_sys::hipGetDeviceProperties; -use hip_runtime_sys::hipInit; -use hip_runtime_sys::hipMalloc; -use hip_runtime_sys::hipMemcpyAsync; -use hip_runtime_sys::hipMemcpyKind; -use hip_runtime_sys::hipMemcpyWithStream; -use hip_runtime_sys::hipMemset; -use hip_runtime_sys::hipModuleGetFunction; -use hip_runtime_sys::hipModuleLaunchKernel; -use hip_runtime_sys::hipModuleLoadData; -use hip_runtime_sys::hipStreamCreate; -use hip_runtime_sys::hipStreamSynchronize; use rspirv::{ binary::{Assemble, Disassemble}, dr::{Block, Function, Instruction, Loader, Operand}, @@ -46,7 +34,17 @@ macro_rules! test_ptx { let ptx = include_str!(concat!(stringify!($fn_name), ".ptx")); let input = $input; let mut output = $output; - test_ptx_assert(stringify!($fn_name), ptx, &input, &mut output) + test_hip_assert(stringify!($fn_name), ptx, &input, &mut output) + } + } + + paste::item! { + #[test] + fn [<$fn_name _cuda>]() -> Result<(), Box> { + let ptx = include_str!(concat!(stringify!($fn_name), ".ptx")); + let input = $input; + let mut output = $output; + test_cuda_assert(stringify!($fn_name), ptx, &input, &mut output) } } @@ -75,7 +73,7 @@ macro_rules! test_ptx { } test_ptx!(ld_st, [1u64], [1u64]); -test_ptx!(ld_st_implicit, [0.5f32], [0.5f32]); +test_ptx!(ld_st_implicit, [0.5f32, 0.25f32], [0.5f32]); test_ptx!(mov, [1u64], [1u64]); test_ptx!(mul_lo, [1u64], [2u64]); test_ptx!(mul_hi, [u64::max_value()], [1u64]); @@ -99,7 +97,8 @@ test_ptx!(ntid, [3u32], [4u32]); test_ptx!(reg_local, [12u64], [13u64]); test_ptx!(mov_address, [0xDEADu64], [0u64]); test_ptx!(b64tof64, [111u64], [111u64]); -test_ptx!(implicit_param, [34u32], [34u32]); +// This segfaults NV compiler +// test_ptx!(implicit_param, [34u32], [34u32]); test_ptx!(pred_not, [10u64, 11u64], [2u64, 0u64]); test_ptx!(mad_s32, [2i32, 3i32, 4i32], [10i32, 10i32, 10i32]); test_ptx!( @@ -178,8 +177,6 @@ test_ptx!(stateful_ld_st_ntid, [123u64], [123u64]); test_ptx!(stateful_ld_st_ntid_chain, [12651u64], [12651u64]); test_ptx!(stateful_ld_st_ntid_sub, [96311u64], [96311u64]); test_ptx!(shared_ptr_take_address, [97815231u64], [97815231u64]); -// For now, we just make sure that it builds and links -test_ptx!(assertfail, [716523871u64], [716523872u64]); test_ptx!(cvt_s64_s32, [-1i32], [-1i64]); test_ptx!(add_tuning, [2u64], [3u64]); test_ptx!(add_non_coherent, [3u64], [4u64]); @@ -224,6 +221,7 @@ test_ptx!(membar, [152731u32], [152731u32]); test_ptx!(shared_unify_extern, [7681u64], [15362u64]); test_ptx!(shared_unify_private, [67153u64], [134306u64]); +test_ptx!(assertfail); test_ptx!(func_ptr); test_ptx!(lanemask_lt); test_ptx!(extern_func); @@ -246,7 +244,7 @@ impl Debug for DisplayError { impl error::Error for DisplayError {} -fn test_ptx_assert< +fn test_hip_assert< 'a, Input: From + Debug + Copy + PartialEq, Output: From + Debug + Copy + PartialEq + Default, @@ -261,12 +259,29 @@ fn test_ptx_assert< assert!(errors.len() == 0); let zluda_module = translate::to_spirv_module(ast)?; let name = CString::new(name)?; - let result = run_spirv(name.as_c_str(), zluda_module, input, output) + let result = run_hip(name.as_c_str(), zluda_module, input, output) .map_err(|err| DisplayError { err })?; assert_eq!(result.as_slice(), output); Ok(()) } +fn test_cuda_assert< + 'a, + Input: From + Debug + Copy + PartialEq, + Output: From + Debug + Copy + PartialEq + Default, +>( + name: &str, + ptx_text: &'a str, + input: &[Input], + output: &mut [Output], +) -> Result<(), Box> { + let name = CString::new(name)?; + let result = + run_cuda(name.as_c_str(), ptx_text, input, output).map_err(|err| DisplayError { err })?; + assert_eq!(result.as_slice(), output); + Ok(()) +} + macro_rules! hip_call { ($expr:expr) => { #[allow(unused_unsafe)] @@ -279,12 +294,60 @@ macro_rules! hip_call { }; } -fn run_spirv + Copy + Debug, Output: From + Copy + Debug + Default>( +macro_rules! cuda_call { + ($expr:expr) => { + #[allow(unused_unsafe)] + { + let err = unsafe { $expr }; + if err != cuda_driver_sys::CUresult::CUDA_SUCCESS { + return Result::Err(err); + } + } + }; +} + +fn run_cuda + Copy + Debug, Output: From + Copy + Debug + Default>( + name: &CStr, + ptx_module: &str, + input: &[Input], + output: &mut [Output], +) -> Result, cuda_driver_sys::CUresult> { + use cuda_driver_sys::*; + cuda_call! { cuInit(0) }; + let ptx_module = CString::new(ptx_module).unwrap(); + let mut result = vec![0u8.into(); output.len()]; + { + let mut ctx = ptr::null_mut(); + cuda_call! { cuCtxCreate_v2(&mut ctx, 0, 0) }; + let mut module = ptr::null_mut(); + cuda_call! { cuModuleLoadData(&mut module, ptx_module.as_ptr() as _) }; + let mut kernel = ptr::null_mut(); + cuda_call! { cuModuleGetFunction(&mut kernel, module, name.as_ptr()) }; + let mut inp_b = unsafe { mem::zeroed() }; + cuda_call! { cuMemAlloc_v2(&mut inp_b, input.len() * mem::size_of::()) }; + let mut out_b = unsafe { mem::zeroed() }; + cuda_call! { cuMemAlloc_v2(&mut out_b, output.len() * mem::size_of::()) }; + cuda_call! { cuMemcpyHtoD_v2(inp_b, input.as_ptr() as _, input.len() * mem::size_of::()) }; + cuda_call! { cuMemsetD8_v2(out_b, 0, output.len() * mem::size_of::()) }; + let mut args = [&inp_b, &out_b]; + cuda_call! { cuLaunchKernel(kernel, 1,1,1,1,1,1, 1024, 0 as _, args.as_mut_ptr() as _, ptr::null_mut()) }; + cuda_call! { cuMemcpyDtoH_v2(result.as_mut_ptr() as _, out_b, output.len() * mem::size_of::()) }; + cuda_call! { cuStreamSynchronize(0 as _) }; + cuda_call! { cuMemFree_v2(inp_b) }; + cuda_call! { cuMemFree_v2(out_b) }; + cuda_call! { cuModuleUnload(module) }; + cuda_call! { cuCtxDestroy_v2(ctx) }; + } + Ok(result) +} + +fn run_hip + Copy + Debug, Output: From + Copy + Debug + Default>( name: &CStr, module: translate::Module, input: &[Input], output: &mut [Output], ) -> Result, hipError_t> { + use hip_runtime_sys::*; hip_call! { hipInit(0) }; let spirv = module.spirv.assemble(); let mut result = vec![0u8.into(); output.len()]; @@ -310,6 +373,9 @@ fn run_spirv + Copy + Debug, Output: From + Copy + Debug + D hip_call! { hipModuleLaunchKernel(kernel, 1,1,1,1,1,1, 1024, stream, args.as_mut_ptr() as _, ptr::null_mut()) }; hip_call! { hipMemcpyAsync(result.as_mut_ptr() as _, out_b, output.len() * mem::size_of::(), hipMemcpyKind::hipMemcpyDeviceToHost, stream) }; hip_call! { hipStreamSynchronize(stream) }; + hip_call! { hipFree(inp_b) }; + hip_call! { hipFree(out_b) }; + hip_call! { hipModuleUnload(module) }; } Ok(result) } diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index 3f27522..4265d33 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -1304,9 +1304,11 @@ fn emit_capabilities(builder: &mut dr::Builder) { } // http://htmlpreview.github.io/?https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/KHR/SPV_KHR_float_controls.html -fn emit_extensions(_builder: &mut dr::Builder) { +fn emit_extensions(builder: &mut dr::Builder) { // TODO: re-enable when Intel float control extension works //builder.extension("SPV_INTEL_float_controls2"); + builder.extension("SPV_KHR_float_controls"); + builder.extension("SPV_KHR_no_integer_wrap_decoration"); } fn emit_opencl_import(builder: &mut dr::Builder) -> spirv::Word {