HIP conversion part #3

This commit is contained in:
Andrzej Janik 2021-09-05 17:29:33 +02:00
parent a63f004540
commit a71cd44104
5 changed files with 284 additions and 23 deletions

View file

@ -2454,7 +2454,7 @@ pub extern "system" fn cuModuleLoad(
module: *mut CUmodule,
fname: *const ::std::os::raw::c_char,
) -> CUresult {
unsafe { hipModuleLoad(module as _, fname as _).into() }
r#impl::module::load(module, fname).encuda()
}
#[cfg_attr(not(test), no_mangle)]
@ -2462,7 +2462,7 @@ pub extern "system" fn cuModuleLoadData(
module: *mut CUmodule,
image: *const ::std::os::raw::c_void,
) -> CUresult {
unsafe { hipModuleLoadData(module as _, image as _).into() }
r#impl::module::load_data(module, image).encuda()
}
// TODO: parse jit options
@ -2474,16 +2474,7 @@ pub extern "system" fn cuModuleLoadDataEx(
options: *mut CUjit_option,
optionValues: *mut *mut ::std::os::raw::c_void,
) -> CUresult {
unsafe {
hipModuleLoadDataEx(
module as _,
image as _,
numOptions,
options as _,
optionValues,
)
.into()
}
r#impl::module::load_data(module, image).encuda()
}
#[cfg_attr(not(test), no_mangle)]
@ -3710,7 +3701,22 @@ pub extern "system" fn cuLaunchKernel(
kernelParams: *mut *mut ::std::os::raw::c_void,
extra: *mut *mut ::std::os::raw::c_void,
) -> CUresult {
todo!()
unsafe {
hipModuleLaunchKernel(
f as _,
gridDimX,
gridDimY,
gridDimZ,
blockDimX,
blockDimY,
blockDimZ,
sharedMemBytes,
hStream as _,
kernelParams,
extra,
)
}
.into()
}
// TODO: implement default stream semantics
@ -3728,7 +3734,19 @@ pub extern "system" fn cuLaunchKernel_ptsz(
kernelParams: *mut *mut ::std::os::raw::c_void,
extra: *mut *mut ::std::os::raw::c_void,
) -> CUresult {
todo!()
cuLaunchKernel(
f,
gridDimX,
gridDimY,
gridDimZ,
blockDimX,
blockDimY,
blockDimZ,
sharedMemBytes,
hStream,
kernelParams,
extra,
)
}
#[cfg_attr(not(test), no_mangle)]

View file

@ -12,7 +12,7 @@ use crate::{
cuda_impl,
};
use super::{device, Decuda, Encuda};
use super::{device, module, Decuda, Encuda};
use std::collections::HashMap;
use std::os::raw::{c_uint, c_ulong, c_ushort};
use std::{
@ -253,20 +253,17 @@ unsafe extern "system" fn get_module_from_cubin(
},
Err(_) => continue,
};
todo!()
/*
let module = module::SpirvModule::new(kernel_text_string);
match module {
Ok(module) => {
match module::load_data_impl(result, module) {
Ok(()) => {}
Err(err) => return err,
Err(err) => return err.into(),
}
return CUresult::CUDA_SUCCESS;
}
Err(_) => continue,
}
*/
}
CUresult::CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE
}

View file

@ -1,7 +1,7 @@
use hip_runtime_sys::{hipError_t, hipFuncGetAttributes};
use hip_runtime_sys::{hipError_t, hipFuncGetAttributes, hipLaunchKernel, hipModuleLaunchKernel};
use super::{CUresult, HasLivenessCookie, LiveCheck};
use crate::cuda::{CUfunction, CUfunction_attribute};
use crate::cuda::{CUfunction, CUfunction_attribute, CUstream};
use ::std::os::raw::{c_uint, c_void};
use std::{mem, ptr};
@ -19,8 +19,12 @@ pub(crate) fn get_attribute(
return err;
}
let value = match cu_attrib {
CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK => hip_attrib.maxThreadsPerBlock,
CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES => hip_attrib.sharedSizeBytes as i32,
CUfunction_attribute::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK => {
hip_attrib.maxThreadsPerBlock
}
CUfunction_attribute::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES => {
hip_attrib.sharedSizeBytes as i32
}
_ => return hipError_t::hipErrorInvalidValue,
};
unsafe { *pi = value };

View file

@ -1,3 +1,5 @@
use hip_runtime_sys::hipError_t;
use crate::cuda::{CUctx_st, CUdevice, CUdeviceptr, CUfunc_st, CUmod_st, CUresult, CUstream_st};
use std::{
ffi::c_void,
@ -17,6 +19,7 @@ pub mod function;
#[cfg_attr(windows, path = "os_win.rs")]
#[cfg_attr(not(windows), path = "os_unix.rs")]
pub(crate) mod os;
pub(crate) mod module;
#[cfg(debug_assertions)]
pub fn unimplemented() -> CUresult {
@ -180,6 +183,13 @@ impl<T1: Encuda<To = CUresult>, T2: Encuda<To = CUresult>> Encuda for Result<T1,
}
}
impl Encuda for hipError_t {
type To = CUresult;
fn encuda(self: Self) -> Self::To {
self.into()
}
}
unsafe fn transmute_lifetime<'a, 'b, T: ?Sized>(t: &'a T) -> &'b T {
mem::transmute(t)
}

232
zluda/src/impl/module.rs Normal file
View file

@ -0,0 +1,232 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::ffi::{CStr, CString};
use std::fs::File;
use std::io::{self, Read, Write};
use std::ops::Add;
use std::os::raw::c_char;
use std::path::PathBuf;
use std::process::Command;
use std::{fs, mem, ptr, slice};
use hip_runtime_sys::{
hipCtxGetCurrent, hipCtxGetDevice, hipDeviceGetAttribute, hipDeviceGetName, hipError_t,
hipGetDeviceProperties, hipGetStreamDeviceId, hipModuleLoadData,
};
use tempfile::NamedTempFile;
use crate::cuda::CUmodule;
pub struct SpirvModule {
pub binaries: Vec<u32>,
pub kernel_info: HashMap<String, ptx::KernelInfo>,
pub should_link_ptx_impl: Option<(&'static [u8], &'static [u8])>,
pub build_options: CString,
}
impl SpirvModule {
pub fn new_raw<'a>(text: *const c_char) -> Result<Self, hipError_t> {
let u8_text = unsafe { CStr::from_ptr(text) };
let ptx_text = u8_text
.to_str()
.map_err(|_| hipError_t::hipErrorInvalidImage)?;
Self::new(ptx_text)
}
pub fn new<'a>(ptx_text: &str) -> Result<Self, hipError_t> {
let mut errors = Vec::new();
let ast = ptx::ModuleParser::new()
.parse(&mut errors, ptx_text)
.map_err(|_| hipError_t::hipErrorInvalidImage)?;
if errors.len() > 0 {
return Err(hipError_t::hipErrorInvalidImage);
}
let spirv_module =
ptx::to_spirv_module(ast).map_err(|_| hipError_t::hipErrorInvalidImage)?;
Ok(SpirvModule {
binaries: spirv_module.assemble(),
kernel_info: spirv_module.kernel_info,
should_link_ptx_impl: spirv_module.should_link_ptx_impl,
build_options: spirv_module.build_options,
})
}
}
pub(crate) fn load(module: *mut CUmodule, fname: *const i8) -> Result<(), hipError_t> {
let length = (0..)
.position(|i| unsafe { *fname.add(i) == 0 })
.ok_or(hipError_t::hipErrorInvalidValue)?;
let file_name = CStr::from_bytes_with_nul(unsafe { slice::from_raw_parts(fname as _, length) })
.map_err(|_| hipError_t::hipErrorInvalidValue)?;
let valid_file_name = file_name
.to_str()
.map_err(|_| hipError_t::hipErrorInvalidValue)?;
let mut file = File::open(valid_file_name).map_err(|_| hipError_t::hipErrorFileNotFound)?;
let mut file_buffer = Vec::new();
file.read_to_end(&mut file_buffer)
.map_err(|_| hipError_t::hipErrorUnknown)?;
drop(file);
load_data(module, file_buffer.as_ptr() as _)
}
pub(crate) fn load_data(
module: *mut CUmodule,
image: *const std::ffi::c_void,
) -> Result<(), hipError_t> {
let spirv_data = SpirvModule::new_raw(image as *const _)?;
load_data_impl(module, spirv_data)
}
pub fn load_data_impl(pmod: *mut CUmodule, spirv_data: SpirvModule) -> Result<(), hipError_t> {
let mut dev = 0;
let err = unsafe { hipCtxGetDevice(&mut dev) };
if err != hipError_t::hipSuccess {
return Err(err);
}
let mut props = unsafe { mem::zeroed() };
let err = unsafe { hipGetDeviceProperties(&mut props, dev) };
if err != hipError_t::hipSuccess {
return Err(err);
}
let gcn_arch_slice =
unsafe { slice::from_raw_parts(props.gcnArchName.as_ptr() as _, props.gcnArchName.len()) };
let name = if let Ok(Ok(name)) = CStr::from_bytes_with_nul(gcn_arch_slice).map(|x| x.to_str()) {
name
} else {
return Err(hipError_t::hipErrorUnknown);
};
let arch_binary = compile_amd(
name,
&spirv_data.binaries[..],
spirv_data.should_link_ptx_impl,
)
.map_err(|_| hipError_t::hipErrorUnknown)?;
let err = unsafe { hipModuleLoadData(pmod as _, arch_binary.as_ptr() as _) };
if err != hipError_t::hipSuccess {
return Err(err);
}
Ok(())
}
const LLVM_SPIRV: &'static str = "/home/vosen/amd/llvm-project/build/bin/llvm-spirv";
const AMDGPU: &'static str = "/opt/amdgpu-pro/";
const AMDGPU_TARGET: &'static str = "amdgcn-amd-amdhsa";
const AMDGPU_BITCODE: [&'static str; 8] = [
"opencl.bc",
"ocml.bc",
"ockl.bc",
"oclc_correctly_rounded_sqrt_off.bc",
"oclc_daz_opt_on.bc",
"oclc_finite_only_off.bc",
"oclc_unsafe_math_off.bc",
"oclc_wavefrontsize64_off.bc",
];
const AMDGPU_BITCODE_DEVICE_PREFIX: &'static str = "oclc_isa_version_";
fn compile_amd(
device_name: &str,
spirv_il: &[u32],
ptx_lib: Option<(&'static [u8], &'static [u8])>,
) -> io::Result<Vec<u8>> {
use std::env;
let dir = tempfile::tempdir()?;
let mut spirv = NamedTempFile::new_in(&dir)?;
let llvm = NamedTempFile::new_in(&dir)?;
let spirv_il_u8 = unsafe {
slice::from_raw_parts(
spirv_il.as_ptr() as *const u8,
spirv_il.len() * mem::size_of::<u32>(),
)
};
spirv.write_all(spirv_il_u8)?;
let llvm_spirv_path = match env::var("LLVM_SPIRV") {
Ok(path) => Cow::Owned(path),
Err(_) => Cow::Borrowed(LLVM_SPIRV),
};
let to_llvm_cmd = Command::new(&*llvm_spirv_path)
.arg("-r")
.arg("-o")
.arg(llvm.path())
.arg(spirv.path())
.status()?;
assert!(to_llvm_cmd.success());
let linked_binary = NamedTempFile::new_in(&dir)?;
let mut llvm_link = PathBuf::from(AMDGPU);
llvm_link.push("bin");
llvm_link.push("llvm-link");
let mut linker_cmd = Command::new(&llvm_link);
linker_cmd
.arg("--only-needed")
.arg("-o")
.arg(linked_binary.path())
.arg(llvm.path())
.args(get_bitcode_paths(device_name));
if cfg!(debug_assertions) {
linker_cmd.arg("-v");
}
let status = linker_cmd.status()?;
assert!(status.success());
let mut ptx_lib_bitcode = NamedTempFile::new_in(&dir)?;
let compiled_binary = NamedTempFile::new_in(&dir)?;
let mut cland_exe = PathBuf::from(AMDGPU);
cland_exe.push("bin");
cland_exe.push("clang");
let mut compiler_cmd = Command::new(&cland_exe);
compiler_cmd
.arg(format!("-mcpu={}", device_name))
.arg("-nogpulib")
.arg("-mno-wavefrontsize64")
.arg("-O3")
.arg("-Xlinker")
.arg("--no-undefined")
.arg("-target")
.arg(AMDGPU_TARGET)
.arg("-o")
.arg(compiled_binary.path())
.arg("-x")
.arg("ir")
.arg(linked_binary.path());
if let Some((_, bitcode)) = ptx_lib {
ptx_lib_bitcode.write_all(bitcode)?;
compiler_cmd.arg(ptx_lib_bitcode.path());
};
if cfg!(debug_assertions) {
compiler_cmd.arg("-v");
}
let status = compiler_cmd.status()?;
assert!(status.success());
let mut result = Vec::new();
let compiled_bin_path = compiled_binary.path();
let mut compiled_binary = File::open(compiled_bin_path)?;
compiled_binary.read_to_end(&mut result)?;
let mut persistent = PathBuf::from("/tmp/zluda");
std::fs::create_dir_all(&persistent)?;
persistent.push(compiled_bin_path.file_name().unwrap());
std::fs::copy(compiled_bin_path, persistent)?;
Ok(result)
}
fn get_bitcode_paths(device_name: &str) -> impl Iterator<Item = PathBuf> {
let generic_paths = AMDGPU_BITCODE.iter().map(|x| {
let mut path = PathBuf::from(AMDGPU);
path.push("amdgcn");
path.push("bitcode");
path.push(x);
path
});
let suffix = if let Some(suffix_idx) = device_name.find(':') {
suffix_idx
} else {
device_name.len()
};
let mut additional_path = PathBuf::from(AMDGPU);
additional_path.push("amdgcn");
additional_path.push("bitcode");
additional_path.push(format!(
"{}{}{}",
AMDGPU_BITCODE_DEVICE_PREFIX,
&device_name[3..suffix],
".bc"
));
generic_paths.chain(std::iter::once(additional_path))
}