mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-20 08:24:44 +00:00
Add CUDA tests showing problems with .shared unification
This commit is contained in:
parent
c23be576e8
commit
b763415006
5 changed files with 94 additions and 24 deletions
|
@ -26,3 +26,4 @@ hip_runtime-sys = { path = "../hip_runtime-sys" }
|
|||
tempfile = "3"
|
||||
spirv_tools-sys = { path = "../spirv_tools-sys" }
|
||||
paste = "1.0"
|
||||
cuda-driver-sys = "0.3.0"
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
.version 6.5
|
||||
.target sm_30
|
||||
.target sm_32
|
||||
.address_size 64
|
||||
|
||||
.visible .entry add_non_coherent(
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
ld.param.u64 in_addr, [input];
|
||||
ld.param.u64 out_addr, [output];
|
||||
|
||||
ld.global.f32 temp, [in_addr];
|
||||
st.global.f32 [out_addr], temp;
|
||||
mov.b64 temp, 0x0123456789abcdef;
|
||||
ld.global.f32 temp, [in_addr];
|
||||
st.global.f32 [out_addr], temp;
|
||||
ret;
|
||||
}
|
|
@ -1,18 +1,6 @@
|
|||
use crate::ptx;
|
||||
use crate::translate;
|
||||
use hip_runtime_sys::hipError_t;
|
||||
use hip_runtime_sys::hipGetDeviceProperties;
|
||||
use hip_runtime_sys::hipInit;
|
||||
use hip_runtime_sys::hipMalloc;
|
||||
use hip_runtime_sys::hipMemcpyAsync;
|
||||
use hip_runtime_sys::hipMemcpyKind;
|
||||
use hip_runtime_sys::hipMemcpyWithStream;
|
||||
use hip_runtime_sys::hipMemset;
|
||||
use hip_runtime_sys::hipModuleGetFunction;
|
||||
use hip_runtime_sys::hipModuleLaunchKernel;
|
||||
use hip_runtime_sys::hipModuleLoadData;
|
||||
use hip_runtime_sys::hipStreamCreate;
|
||||
use hip_runtime_sys::hipStreamSynchronize;
|
||||
use rspirv::{
|
||||
binary::{Assemble, Disassemble},
|
||||
dr::{Block, Function, Instruction, Loader, Operand},
|
||||
|
@ -46,7 +34,17 @@ macro_rules! test_ptx {
|
|||
let ptx = include_str!(concat!(stringify!($fn_name), ".ptx"));
|
||||
let input = $input;
|
||||
let mut output = $output;
|
||||
test_ptx_assert(stringify!($fn_name), ptx, &input, &mut output)
|
||||
test_hip_assert(stringify!($fn_name), ptx, &input, &mut output)
|
||||
}
|
||||
}
|
||||
|
||||
paste::item! {
|
||||
#[test]
|
||||
fn [<$fn_name _cuda>]() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let ptx = include_str!(concat!(stringify!($fn_name), ".ptx"));
|
||||
let input = $input;
|
||||
let mut output = $output;
|
||||
test_cuda_assert(stringify!($fn_name), ptx, &input, &mut output)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -75,7 +73,7 @@ macro_rules! test_ptx {
|
|||
}
|
||||
|
||||
test_ptx!(ld_st, [1u64], [1u64]);
|
||||
test_ptx!(ld_st_implicit, [0.5f32], [0.5f32]);
|
||||
test_ptx!(ld_st_implicit, [0.5f32, 0.25f32], [0.5f32]);
|
||||
test_ptx!(mov, [1u64], [1u64]);
|
||||
test_ptx!(mul_lo, [1u64], [2u64]);
|
||||
test_ptx!(mul_hi, [u64::max_value()], [1u64]);
|
||||
|
@ -99,7 +97,8 @@ test_ptx!(ntid, [3u32], [4u32]);
|
|||
test_ptx!(reg_local, [12u64], [13u64]);
|
||||
test_ptx!(mov_address, [0xDEADu64], [0u64]);
|
||||
test_ptx!(b64tof64, [111u64], [111u64]);
|
||||
test_ptx!(implicit_param, [34u32], [34u32]);
|
||||
// This segfaults NV compiler
|
||||
// test_ptx!(implicit_param, [34u32], [34u32]);
|
||||
test_ptx!(pred_not, [10u64, 11u64], [2u64, 0u64]);
|
||||
test_ptx!(mad_s32, [2i32, 3i32, 4i32], [10i32, 10i32, 10i32]);
|
||||
test_ptx!(
|
||||
|
@ -178,8 +177,6 @@ test_ptx!(stateful_ld_st_ntid, [123u64], [123u64]);
|
|||
test_ptx!(stateful_ld_st_ntid_chain, [12651u64], [12651u64]);
|
||||
test_ptx!(stateful_ld_st_ntid_sub, [96311u64], [96311u64]);
|
||||
test_ptx!(shared_ptr_take_address, [97815231u64], [97815231u64]);
|
||||
// For now, we just make sure that it builds and links
|
||||
test_ptx!(assertfail, [716523871u64], [716523872u64]);
|
||||
test_ptx!(cvt_s64_s32, [-1i32], [-1i64]);
|
||||
test_ptx!(add_tuning, [2u64], [3u64]);
|
||||
test_ptx!(add_non_coherent, [3u64], [4u64]);
|
||||
|
@ -224,6 +221,7 @@ test_ptx!(membar, [152731u32], [152731u32]);
|
|||
test_ptx!(shared_unify_extern, [7681u64], [15362u64]);
|
||||
test_ptx!(shared_unify_private, [67153u64], [134306u64]);
|
||||
|
||||
test_ptx!(assertfail);
|
||||
test_ptx!(func_ptr);
|
||||
test_ptx!(lanemask_lt);
|
||||
test_ptx!(extern_func);
|
||||
|
@ -246,7 +244,7 @@ impl<T: Debug> Debug for DisplayError<T> {
|
|||
|
||||
impl<T: Debug> error::Error for DisplayError<T> {}
|
||||
|
||||
fn test_ptx_assert<
|
||||
fn test_hip_assert<
|
||||
'a,
|
||||
Input: From<u8> + Debug + Copy + PartialEq,
|
||||
Output: From<u8> + Debug + Copy + PartialEq + Default,
|
||||
|
@ -261,12 +259,29 @@ fn test_ptx_assert<
|
|||
assert!(errors.len() == 0);
|
||||
let zluda_module = translate::to_spirv_module(ast)?;
|
||||
let name = CString::new(name)?;
|
||||
let result = run_spirv(name.as_c_str(), zluda_module, input, output)
|
||||
let result = run_hip(name.as_c_str(), zluda_module, input, output)
|
||||
.map_err(|err| DisplayError { err })?;
|
||||
assert_eq!(result.as_slice(), output);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn test_cuda_assert<
|
||||
'a,
|
||||
Input: From<u8> + Debug + Copy + PartialEq,
|
||||
Output: From<u8> + Debug + Copy + PartialEq + Default,
|
||||
>(
|
||||
name: &str,
|
||||
ptx_text: &'a str,
|
||||
input: &[Input],
|
||||
output: &mut [Output],
|
||||
) -> Result<(), Box<dyn error::Error + 'a>> {
|
||||
let name = CString::new(name)?;
|
||||
let result =
|
||||
run_cuda(name.as_c_str(), ptx_text, input, output).map_err(|err| DisplayError { err })?;
|
||||
assert_eq!(result.as_slice(), output);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
macro_rules! hip_call {
|
||||
($expr:expr) => {
|
||||
#[allow(unused_unsafe)]
|
||||
|
@ -279,12 +294,60 @@ macro_rules! hip_call {
|
|||
};
|
||||
}
|
||||
|
||||
fn run_spirv<Input: From<u8> + Copy + Debug, Output: From<u8> + Copy + Debug + Default>(
|
||||
macro_rules! cuda_call {
|
||||
($expr:expr) => {
|
||||
#[allow(unused_unsafe)]
|
||||
{
|
||||
let err = unsafe { $expr };
|
||||
if err != cuda_driver_sys::CUresult::CUDA_SUCCESS {
|
||||
return Result::Err(err);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
fn run_cuda<Input: From<u8> + Copy + Debug, Output: From<u8> + Copy + Debug + Default>(
|
||||
name: &CStr,
|
||||
ptx_module: &str,
|
||||
input: &[Input],
|
||||
output: &mut [Output],
|
||||
) -> Result<Vec<Output>, cuda_driver_sys::CUresult> {
|
||||
use cuda_driver_sys::*;
|
||||
cuda_call! { cuInit(0) };
|
||||
let ptx_module = CString::new(ptx_module).unwrap();
|
||||
let mut result = vec![0u8.into(); output.len()];
|
||||
{
|
||||
let mut ctx = ptr::null_mut();
|
||||
cuda_call! { cuCtxCreate_v2(&mut ctx, 0, 0) };
|
||||
let mut module = ptr::null_mut();
|
||||
cuda_call! { cuModuleLoadData(&mut module, ptx_module.as_ptr() as _) };
|
||||
let mut kernel = ptr::null_mut();
|
||||
cuda_call! { cuModuleGetFunction(&mut kernel, module, name.as_ptr()) };
|
||||
let mut inp_b = unsafe { mem::zeroed() };
|
||||
cuda_call! { cuMemAlloc_v2(&mut inp_b, input.len() * mem::size_of::<Input>()) };
|
||||
let mut out_b = unsafe { mem::zeroed() };
|
||||
cuda_call! { cuMemAlloc_v2(&mut out_b, output.len() * mem::size_of::<Output>()) };
|
||||
cuda_call! { cuMemcpyHtoD_v2(inp_b, input.as_ptr() as _, input.len() * mem::size_of::<Input>()) };
|
||||
cuda_call! { cuMemsetD8_v2(out_b, 0, output.len() * mem::size_of::<Output>()) };
|
||||
let mut args = [&inp_b, &out_b];
|
||||
cuda_call! { cuLaunchKernel(kernel, 1,1,1,1,1,1, 1024, 0 as _, args.as_mut_ptr() as _, ptr::null_mut()) };
|
||||
cuda_call! { cuMemcpyDtoH_v2(result.as_mut_ptr() as _, out_b, output.len() * mem::size_of::<Output>()) };
|
||||
cuda_call! { cuStreamSynchronize(0 as _) };
|
||||
cuda_call! { cuMemFree_v2(inp_b) };
|
||||
cuda_call! { cuMemFree_v2(out_b) };
|
||||
cuda_call! { cuModuleUnload(module) };
|
||||
cuda_call! { cuCtxDestroy_v2(ctx) };
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn run_hip<Input: From<u8> + Copy + Debug, Output: From<u8> + Copy + Debug + Default>(
|
||||
name: &CStr,
|
||||
module: translate::Module,
|
||||
input: &[Input],
|
||||
output: &mut [Output],
|
||||
) -> Result<Vec<Output>, hipError_t> {
|
||||
use hip_runtime_sys::*;
|
||||
hip_call! { hipInit(0) };
|
||||
let spirv = module.spirv.assemble();
|
||||
let mut result = vec![0u8.into(); output.len()];
|
||||
|
@ -310,6 +373,9 @@ fn run_spirv<Input: From<u8> + Copy + Debug, Output: From<u8> + Copy + Debug + D
|
|||
hip_call! { hipModuleLaunchKernel(kernel, 1,1,1,1,1,1, 1024, stream, args.as_mut_ptr() as _, ptr::null_mut()) };
|
||||
hip_call! { hipMemcpyAsync(result.as_mut_ptr() as _, out_b, output.len() * mem::size_of::<Output>(), hipMemcpyKind::hipMemcpyDeviceToHost, stream) };
|
||||
hip_call! { hipStreamSynchronize(stream) };
|
||||
hip_call! { hipFree(inp_b) };
|
||||
hip_call! { hipFree(out_b) };
|
||||
hip_call! { hipModuleUnload(module) };
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
|
|
@ -1304,9 +1304,11 @@ fn emit_capabilities(builder: &mut dr::Builder) {
|
|||
}
|
||||
|
||||
// http://htmlpreview.github.io/?https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/KHR/SPV_KHR_float_controls.html
|
||||
fn emit_extensions(_builder: &mut dr::Builder) {
|
||||
fn emit_extensions(builder: &mut dr::Builder) {
|
||||
// TODO: re-enable when Intel float control extension works
|
||||
//builder.extension("SPV_INTEL_float_controls2");
|
||||
builder.extension("SPV_KHR_float_controls");
|
||||
builder.extension("SPV_KHR_no_integer_wrap_decoration");
|
||||
}
|
||||
|
||||
fn emit_opencl_import(builder: &mut dr::Builder) -> spirv::Word {
|
||||
|
|
Loading…
Add table
Reference in a new issue