Convert unit tests to HIP

This commit is contained in:
Andrzej Janik 2021-09-06 22:58:12 +02:00
parent a71cd44104
commit 82510ce8fd
3 changed files with 193 additions and 95 deletions

View file

@ -22,7 +22,7 @@ version = "0.19"
features = ["lexer"]
[dev-dependencies]
level_zero-sys = { path = "../level_zero-sys" }
level_zero = { path = "../level_zero" }
hip_runtime-sys = { path = "../hip_runtime-sys" }
tempfile = "3"
spirv_tools-sys = { path = "../spirv_tools-sys" }
paste = "1.0"

View file

@ -8,9 +8,7 @@ extern crate quick_error;
extern crate bit_vec;
extern crate half;
#[cfg(test)]
extern crate level_zero as ze;
#[cfg(test)]
extern crate level_zero_sys as l0;
extern crate hip_runtime_sys as hip;
extern crate rspirv;
extern crate spirv_headers as spirv;

View file

@ -1,5 +1,18 @@
use crate::ptx;
use crate::translate;
use hip_runtime_sys::hipError_t;
use hip_runtime_sys::hipGetDeviceProperties;
use hip_runtime_sys::hipInit;
use hip_runtime_sys::hipMalloc;
use hip_runtime_sys::hipMemcpyAsync;
use hip_runtime_sys::hipMemcpyKind;
use hip_runtime_sys::hipMemcpyWithStream;
use hip_runtime_sys::hipMemset;
use hip_runtime_sys::hipModuleGetFunction;
use hip_runtime_sys::hipModuleLaunchKernel;
use hip_runtime_sys::hipModuleLoadData;
use hip_runtime_sys::hipStreamCreate;
use hip_runtime_sys::hipStreamSynchronize;
use rspirv::{
binary::{Assemble, Disassemble},
dr::{Block, Function, Instruction, Loader, Operand},
@ -8,15 +21,21 @@ use spirv_headers::Word;
use spirv_tools_sys::{
spv_binary, spv_endianness_t, spv_parsed_instruction_t, spv_result_t, spv_target_env,
};
use std::collections::hash_map::Entry;
use std::error;
use std::ffi::{c_void, CStr, CString};
use std::fmt;
use std::fmt::{Debug, Display, Formatter};
use std::fs::File;
use std::hash::Hash;
use std::io;
use std::io::Read;
use std::io::Write;
use std::mem;
use std::process::Command;
use std::slice;
use std::{borrow::Cow, collections::HashMap, env, fs, path::PathBuf, ptr, str};
use std::{cmp, collections::hash_map::Entry};
use tempfile::NamedTempFile;
macro_rules! test_ptx {
($fn_name:ident, $input:expr, $output:expr) => {
@ -223,102 +242,61 @@ fn test_ptx_assert<
Ok(())
}
macro_rules! hip_call {
($expr:expr) => {
#[allow(unused_unsafe)]
{
let err = unsafe { $expr };
if err != hip_runtime_sys::hipError_t::hipSuccess {
return Result::Err(err);
}
}
};
}
fn run_spirv<Input: From<u8> + Copy + Debug, Output: From<u8> + Copy + Debug + Default>(
name: &CStr,
module: translate::Module,
input: &[Input],
output: &mut [Output],
) -> ze::Result<Vec<Output>> {
ze::init()?;
) -> Result<Vec<Output>, hipError_t> {
hip_call! { hipInit(0) };
let spirv = module.spirv.assemble();
let byte_il = unsafe {
slice::from_raw_parts::<u8>(
spirv.as_ptr() as *const _,
spirv.len() * mem::size_of::<u32>(),
)
};
let use_shared_mem = module
.kernel_info
.get(name.to_str().unwrap())
.map(|info| info.uses_shared_mem)
.unwrap_or(false);
let result = vec![0u8.into(); output.len()];
let mut result = vec![0u8.into(); output.len()];
{
let mut drivers = ze::Driver::get()?;
let drv = drivers.drain(0..1).next().unwrap();
let mut devices = drv.devices()?;
let dev = devices.drain(0..1).next().unwrap();
let ctx = ze::Context::new(drv, None)?;
let queue = ze::CommandQueue::new(&ctx, dev)?;
let (module, maybe_log) = match module.should_link_ptx_impl {
Some((ptx_impl, _)) => ze::Module::build_link_spirv(
&ctx,
dev,
&[ptx_impl, byte_il],
Some(module.build_options.as_c_str()),
),
None => {
let (module, log) = ze::Module::build_spirv_logged(
&ctx,
dev,
byte_il,
Some(module.build_options.as_c_str()),
);
(module, Some(log))
}
let dev = 0;
let mut stream = ptr::null_mut();
hip_call! { hipStreamCreate(&mut stream) };
let mut dev_props = unsafe { mem::zeroed() };
hip_call! { hipGetDeviceProperties(&mut dev_props, dev) };
let gcn_arch_slice = unsafe {
slice::from_raw_parts(
dev_props.gcnArchName.as_ptr() as _,
dev_props.gcnArchName.len(),
)
};
let module = match module {
Ok(m) => m,
Err(err) => {
let raw_err_string = maybe_log
.map(|log| log.to_cstring())
.transpose()?
.unwrap_or(CString::default());
let err_string = raw_err_string.to_string_lossy();
panic!("{:?}\n{}", err, err_string);
}
};
let kernel = ze::Kernel::new_resident(&module, name)?;
kernel.set_indirect_access(
ze::sys::ze_kernel_indirect_access_flags_t::ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE,
)?;
let inp_b = ze::DeviceBuffer::<Input>::new(&ctx, dev, cmp::max(input.len(), 1))?;
let out_b = ze::DeviceBuffer::<Output>::new(&ctx, dev, cmp::max(output.len(), 1))?;
let event_pool =
ze::EventPool::new(&ctx, ze::sys::ze_event_pool_flags_t(0), 3, Some(&[dev]))?;
let ev0 = ze::Event::new(
&event_pool,
0,
ze::sys::ze_event_scope_flags_t(0),
ze::sys::ze_event_scope_flags_t(0),
)?;
let ev1 = ze::Event::new(
&event_pool,
1,
ze::sys::ze_event_scope_flags_t(0),
ze::sys::ze_event_scope_flags_t(0),
)?;
let ev2 = ze::Event::new(
&event_pool,
2,
ze::sys::ze_event_scope_flags_t(0),
ze::sys::ze_event_scope_flags_t(0),
)?;
{
let init_evs = [&ev0, &ev1];
kernel.set_group_size(1, 1, 1)?;
kernel.set_arg_buffer(0, &inp_b)?;
kernel.set_arg_buffer(1, &out_b)?;
if use_shared_mem {
unsafe { kernel.set_arg_raw(2, 128, ptr::null())? };
}
ze::CommandListBuilder::new(&ctx, dev)?
.append_memory_copy(&inp_b, input, Some(&init_evs[0]), &[])?
.append_memory_fill(&out_b, &Output::default(), Some(&init_evs[1]), &[])?
.append_launch_kernel(&kernel, &[1, 1, 1], Some(&ev2), &init_evs)?
.append_memory_copy(&*result, &out_b, None, &[&ev2])?
.execute(&queue)?;
}
let dev_name =
if let Ok(Ok(name)) = CStr::from_bytes_with_nul(gcn_arch_slice).map(|x| x.to_str()) {
name
} else {
return Err(hipError_t::hipErrorUnknown);
};
let elf_module = compile_amd(dev_name, &*spirv, module.should_link_ptx_impl)
.map_err(|_| hipError_t::hipErrorUnknown)?;
let mut module = ptr::null_mut();
hip_call! { hipModuleLoadData(&mut module, elf_module.as_ptr() as _) };
let mut kernel = ptr::null_mut();
hip_call! { hipModuleGetFunction(&mut kernel, module, name.as_ptr()) };
let mut inp_b = ptr::null_mut();
hip_call! { hipMalloc(&mut inp_b, input.len()) };
let mut out_b = ptr::null_mut();
hip_call! { hipMalloc(&mut out_b, output.len()) };
hip_call! { hipMemcpyWithStream(inp_b, input.as_ptr() as _, input.len() * mem::size_of::<Input>(), hipMemcpyKind::hipMemcpyHostToDevice, stream) };
hip_call! { hipMemset(out_b, 0, output.len() * mem::size_of::<Output>()) };
let mut args = [&inp_b, &out_b];
hip_call! { hipModuleLaunchKernel(kernel, 1,1,1,1,1,1, 0, stream, args.as_mut_ptr() as _, ptr::null_mut()) };
hip_call! { hipMemcpyAsync(result.as_mut_ptr() as _, out_b, output.len() * mem::size_of::<Output>(), hipMemcpyKind::hipMemcpyDeviceToHost, stream) };
hip_call! { hipStreamSynchronize(stream) };
}
Ok(result)
}
@ -402,7 +380,7 @@ fn test_spvtxt_assert<'a>(
}
}
}
panic!(spirv_text.to_string());
panic!("{}", spirv_text.to_string());
}
unsafe { spirv_tools::spvContextDestroy(spv_context) };
Ok(())
@ -582,3 +560,125 @@ unsafe extern "C" fn parse_instruction_cb(
}
spv_result_t::SPV_SUCCESS
}
const LLVM_SPIRV: &'static str = "/home/vosen/amd/llvm-project/build/bin/llvm-spirv";
const AMDGPU: &'static str = "/opt/amdgpu-pro/";
const AMDGPU_TARGET: &'static str = "amdgcn-amd-amdhsa";
const AMDGPU_BITCODE: [&'static str; 8] = [
"opencl.bc",
"ocml.bc",
"ockl.bc",
"oclc_correctly_rounded_sqrt_off.bc",
"oclc_daz_opt_on.bc",
"oclc_finite_only_off.bc",
"oclc_unsafe_math_off.bc",
"oclc_wavefrontsize64_off.bc",
];
const AMDGPU_BITCODE_DEVICE_PREFIX: &'static str = "oclc_isa_version_";
fn compile_amd(
device_name: &str,
spirv_il: &[u32],
ptx_lib: Option<(&'static [u8], &'static [u8])>,
) -> io::Result<Vec<u8>> {
let dir = tempfile::tempdir()?;
let mut spirv = NamedTempFile::new_in(&dir)?;
let llvm = NamedTempFile::new_in(&dir)?;
let spirv_il_u8 = unsafe {
slice::from_raw_parts(
spirv_il.as_ptr() as *const u8,
spirv_il.len() * mem::size_of::<u32>(),
)
};
spirv.write_all(spirv_il_u8)?;
let llvm_spirv_path = match env::var("LLVM_SPIRV") {
Ok(path) => Cow::Owned(path),
Err(_) => Cow::Borrowed(LLVM_SPIRV),
};
let to_llvm_cmd = Command::new(&*llvm_spirv_path)
.arg("-r")
.arg("-o")
.arg(llvm.path())
.arg(spirv.path())
.status()?;
assert!(to_llvm_cmd.success());
let linked_binary = NamedTempFile::new_in(&dir)?;
let mut llvm_link = PathBuf::from(AMDGPU);
llvm_link.push("bin");
llvm_link.push("llvm-link");
let mut linker_cmd = Command::new(&llvm_link);
linker_cmd
.arg("--only-needed")
.arg("-o")
.arg(linked_binary.path())
.arg(llvm.path())
.args(get_bitcode_paths(device_name));
if cfg!(debug_assertions) {
linker_cmd.arg("-v");
}
let status = linker_cmd.status()?;
assert!(status.success());
let mut ptx_lib_bitcode = NamedTempFile::new_in(&dir)?;
let compiled_binary = NamedTempFile::new_in(&dir)?;
let mut cland_exe = PathBuf::from(AMDGPU);
cland_exe.push("bin");
cland_exe.push("clang");
let mut compiler_cmd = Command::new(&cland_exe);
compiler_cmd
.arg(format!("-mcpu={}", device_name))
.arg("-nogpulib")
.arg("-mno-wavefrontsize64")
.arg("-O3")
.arg("-Xlinker")
.arg("--no-undefined")
.arg("-target")
.arg(AMDGPU_TARGET)
.arg("-o")
.arg(compiled_binary.path())
.arg("-x")
.arg("ir")
.arg(linked_binary.path());
if let Some((_, bitcode)) = ptx_lib {
ptx_lib_bitcode.write_all(bitcode)?;
compiler_cmd.arg(ptx_lib_bitcode.path());
};
if cfg!(debug_assertions) {
compiler_cmd.arg("-v");
}
let status = compiler_cmd.status()?;
assert!(status.success());
let mut result = Vec::new();
let compiled_bin_path = compiled_binary.path();
let mut compiled_binary = File::open(compiled_bin_path)?;
compiled_binary.read_to_end(&mut result)?;
let mut persistent = PathBuf::from("/tmp/zluda");
std::fs::create_dir_all(&persistent)?;
persistent.push(compiled_bin_path.file_name().unwrap());
std::fs::copy(compiled_bin_path, persistent)?;
Ok(result)
}
fn get_bitcode_paths(device_name: &str) -> impl Iterator<Item = PathBuf> {
let generic_paths = AMDGPU_BITCODE.iter().map(|x| {
let mut path = PathBuf::from(AMDGPU);
path.push("amdgcn");
path.push("bitcode");
path.push(x);
path
});
let suffix = if let Some(suffix_idx) = device_name.find(':') {
suffix_idx
} else {
device_name.len()
};
let mut additional_path = PathBuf::from(AMDGPU);
additional_path.push("amdgcn");
additional_path.push("bitcode");
additional_path.push(format!(
"{}{}{}",
AMDGPU_BITCODE_DEVICE_PREFIX,
&device_name[3..suffix],
".bc"
));
generic_paths.chain(std::iter::once(additional_path))
}