mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-20 00:14:45 +00:00
HIP conversion part #3
This commit is contained in:
parent
a63f004540
commit
a71cd44104
5 changed files with 284 additions and 23 deletions
|
@ -2454,7 +2454,7 @@ pub extern "system" fn cuModuleLoad(
|
|||
module: *mut CUmodule,
|
||||
fname: *const ::std::os::raw::c_char,
|
||||
) -> CUresult {
|
||||
unsafe { hipModuleLoad(module as _, fname as _).into() }
|
||||
r#impl::module::load(module, fname).encuda()
|
||||
}
|
||||
|
||||
#[cfg_attr(not(test), no_mangle)]
|
||||
|
@ -2462,7 +2462,7 @@ pub extern "system" fn cuModuleLoadData(
|
|||
module: *mut CUmodule,
|
||||
image: *const ::std::os::raw::c_void,
|
||||
) -> CUresult {
|
||||
unsafe { hipModuleLoadData(module as _, image as _).into() }
|
||||
r#impl::module::load_data(module, image).encuda()
|
||||
}
|
||||
|
||||
// TODO: parse jit options
|
||||
|
@ -2474,16 +2474,7 @@ pub extern "system" fn cuModuleLoadDataEx(
|
|||
options: *mut CUjit_option,
|
||||
optionValues: *mut *mut ::std::os::raw::c_void,
|
||||
) -> CUresult {
|
||||
unsafe {
|
||||
hipModuleLoadDataEx(
|
||||
module as _,
|
||||
image as _,
|
||||
numOptions,
|
||||
options as _,
|
||||
optionValues,
|
||||
)
|
||||
.into()
|
||||
}
|
||||
r#impl::module::load_data(module, image).encuda()
|
||||
}
|
||||
|
||||
#[cfg_attr(not(test), no_mangle)]
|
||||
|
@ -3710,7 +3701,22 @@ pub extern "system" fn cuLaunchKernel(
|
|||
kernelParams: *mut *mut ::std::os::raw::c_void,
|
||||
extra: *mut *mut ::std::os::raw::c_void,
|
||||
) -> CUresult {
|
||||
todo!()
|
||||
unsafe {
|
||||
hipModuleLaunchKernel(
|
||||
f as _,
|
||||
gridDimX,
|
||||
gridDimY,
|
||||
gridDimZ,
|
||||
blockDimX,
|
||||
blockDimY,
|
||||
blockDimZ,
|
||||
sharedMemBytes,
|
||||
hStream as _,
|
||||
kernelParams,
|
||||
extra,
|
||||
)
|
||||
}
|
||||
.into()
|
||||
}
|
||||
|
||||
// TODO: implement default stream semantics
|
||||
|
@ -3728,7 +3734,19 @@ pub extern "system" fn cuLaunchKernel_ptsz(
|
|||
kernelParams: *mut *mut ::std::os::raw::c_void,
|
||||
extra: *mut *mut ::std::os::raw::c_void,
|
||||
) -> CUresult {
|
||||
todo!()
|
||||
cuLaunchKernel(
|
||||
f,
|
||||
gridDimX,
|
||||
gridDimY,
|
||||
gridDimZ,
|
||||
blockDimX,
|
||||
blockDimY,
|
||||
blockDimZ,
|
||||
sharedMemBytes,
|
||||
hStream,
|
||||
kernelParams,
|
||||
extra,
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg_attr(not(test), no_mangle)]
|
||||
|
|
|
@ -12,7 +12,7 @@ use crate::{
|
|||
cuda_impl,
|
||||
};
|
||||
|
||||
use super::{device, Decuda, Encuda};
|
||||
use super::{device, module, Decuda, Encuda};
|
||||
use std::collections::HashMap;
|
||||
use std::os::raw::{c_uint, c_ulong, c_ushort};
|
||||
use std::{
|
||||
|
@ -253,20 +253,17 @@ unsafe extern "system" fn get_module_from_cubin(
|
|||
},
|
||||
Err(_) => continue,
|
||||
};
|
||||
todo!()
|
||||
/*
|
||||
let module = module::SpirvModule::new(kernel_text_string);
|
||||
match module {
|
||||
Ok(module) => {
|
||||
match module::load_data_impl(result, module) {
|
||||
Ok(()) => {}
|
||||
Err(err) => return err,
|
||||
Err(err) => return err.into(),
|
||||
}
|
||||
return CUresult::CUDA_SUCCESS;
|
||||
}
|
||||
Err(_) => continue,
|
||||
}
|
||||
*/
|
||||
}
|
||||
CUresult::CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use hip_runtime_sys::{hipError_t, hipFuncGetAttributes};
|
||||
use hip_runtime_sys::{hipError_t, hipFuncGetAttributes, hipLaunchKernel, hipModuleLaunchKernel};
|
||||
|
||||
use super::{CUresult, HasLivenessCookie, LiveCheck};
|
||||
use crate::cuda::{CUfunction, CUfunction_attribute};
|
||||
use crate::cuda::{CUfunction, CUfunction_attribute, CUstream};
|
||||
use ::std::os::raw::{c_uint, c_void};
|
||||
use std::{mem, ptr};
|
||||
|
||||
|
@ -19,8 +19,12 @@ pub(crate) fn get_attribute(
|
|||
return err;
|
||||
}
|
||||
let value = match cu_attrib {
|
||||
CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK => hip_attrib.maxThreadsPerBlock,
|
||||
CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES => hip_attrib.sharedSizeBytes as i32,
|
||||
CUfunction_attribute::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK => {
|
||||
hip_attrib.maxThreadsPerBlock
|
||||
}
|
||||
CUfunction_attribute::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES => {
|
||||
hip_attrib.sharedSizeBytes as i32
|
||||
}
|
||||
_ => return hipError_t::hipErrorInvalidValue,
|
||||
};
|
||||
unsafe { *pi = value };
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
use hip_runtime_sys::hipError_t;
|
||||
|
||||
use crate::cuda::{CUctx_st, CUdevice, CUdeviceptr, CUfunc_st, CUmod_st, CUresult, CUstream_st};
|
||||
use std::{
|
||||
ffi::c_void,
|
||||
|
@ -17,6 +19,7 @@ pub mod function;
|
|||
#[cfg_attr(windows, path = "os_win.rs")]
|
||||
#[cfg_attr(not(windows), path = "os_unix.rs")]
|
||||
pub(crate) mod os;
|
||||
pub(crate) mod module;
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
pub fn unimplemented() -> CUresult {
|
||||
|
@ -180,6 +183,13 @@ impl<T1: Encuda<To = CUresult>, T2: Encuda<To = CUresult>> Encuda for Result<T1,
|
|||
}
|
||||
}
|
||||
|
||||
impl Encuda for hipError_t {
|
||||
type To = CUresult;
|
||||
fn encuda(self: Self) -> Self::To {
|
||||
self.into()
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn transmute_lifetime<'a, 'b, T: ?Sized>(t: &'a T) -> &'b T {
|
||||
mem::transmute(t)
|
||||
}
|
||||
|
|
232
zluda/src/impl/module.rs
Normal file
232
zluda/src/impl/module.rs
Normal file
|
@ -0,0 +1,232 @@
|
|||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::{CStr, CString};
|
||||
use std::fs::File;
|
||||
use std::io::{self, Read, Write};
|
||||
use std::ops::Add;
|
||||
use std::os::raw::c_char;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
use std::{fs, mem, ptr, slice};
|
||||
|
||||
use hip_runtime_sys::{
|
||||
hipCtxGetCurrent, hipCtxGetDevice, hipDeviceGetAttribute, hipDeviceGetName, hipError_t,
|
||||
hipGetDeviceProperties, hipGetStreamDeviceId, hipModuleLoadData,
|
||||
};
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
use crate::cuda::CUmodule;
|
||||
|
||||
pub struct SpirvModule {
|
||||
pub binaries: Vec<u32>,
|
||||
pub kernel_info: HashMap<String, ptx::KernelInfo>,
|
||||
pub should_link_ptx_impl: Option<(&'static [u8], &'static [u8])>,
|
||||
pub build_options: CString,
|
||||
}
|
||||
|
||||
impl SpirvModule {
|
||||
pub fn new_raw<'a>(text: *const c_char) -> Result<Self, hipError_t> {
|
||||
let u8_text = unsafe { CStr::from_ptr(text) };
|
||||
let ptx_text = u8_text
|
||||
.to_str()
|
||||
.map_err(|_| hipError_t::hipErrorInvalidImage)?;
|
||||
Self::new(ptx_text)
|
||||
}
|
||||
|
||||
pub fn new<'a>(ptx_text: &str) -> Result<Self, hipError_t> {
|
||||
let mut errors = Vec::new();
|
||||
let ast = ptx::ModuleParser::new()
|
||||
.parse(&mut errors, ptx_text)
|
||||
.map_err(|_| hipError_t::hipErrorInvalidImage)?;
|
||||
if errors.len() > 0 {
|
||||
return Err(hipError_t::hipErrorInvalidImage);
|
||||
}
|
||||
let spirv_module =
|
||||
ptx::to_spirv_module(ast).map_err(|_| hipError_t::hipErrorInvalidImage)?;
|
||||
Ok(SpirvModule {
|
||||
binaries: spirv_module.assemble(),
|
||||
kernel_info: spirv_module.kernel_info,
|
||||
should_link_ptx_impl: spirv_module.should_link_ptx_impl,
|
||||
build_options: spirv_module.build_options,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn load(module: *mut CUmodule, fname: *const i8) -> Result<(), hipError_t> {
|
||||
let length = (0..)
|
||||
.position(|i| unsafe { *fname.add(i) == 0 })
|
||||
.ok_or(hipError_t::hipErrorInvalidValue)?;
|
||||
let file_name = CStr::from_bytes_with_nul(unsafe { slice::from_raw_parts(fname as _, length) })
|
||||
.map_err(|_| hipError_t::hipErrorInvalidValue)?;
|
||||
let valid_file_name = file_name
|
||||
.to_str()
|
||||
.map_err(|_| hipError_t::hipErrorInvalidValue)?;
|
||||
let mut file = File::open(valid_file_name).map_err(|_| hipError_t::hipErrorFileNotFound)?;
|
||||
let mut file_buffer = Vec::new();
|
||||
file.read_to_end(&mut file_buffer)
|
||||
.map_err(|_| hipError_t::hipErrorUnknown)?;
|
||||
drop(file);
|
||||
load_data(module, file_buffer.as_ptr() as _)
|
||||
}
|
||||
|
||||
pub(crate) fn load_data(
|
||||
module: *mut CUmodule,
|
||||
image: *const std::ffi::c_void,
|
||||
) -> Result<(), hipError_t> {
|
||||
let spirv_data = SpirvModule::new_raw(image as *const _)?;
|
||||
load_data_impl(module, spirv_data)
|
||||
}
|
||||
|
||||
pub fn load_data_impl(pmod: *mut CUmodule, spirv_data: SpirvModule) -> Result<(), hipError_t> {
|
||||
let mut dev = 0;
|
||||
let err = unsafe { hipCtxGetDevice(&mut dev) };
|
||||
if err != hipError_t::hipSuccess {
|
||||
return Err(err);
|
||||
}
|
||||
let mut props = unsafe { mem::zeroed() };
|
||||
let err = unsafe { hipGetDeviceProperties(&mut props, dev) };
|
||||
if err != hipError_t::hipSuccess {
|
||||
return Err(err);
|
||||
}
|
||||
let gcn_arch_slice =
|
||||
unsafe { slice::from_raw_parts(props.gcnArchName.as_ptr() as _, props.gcnArchName.len()) };
|
||||
let name = if let Ok(Ok(name)) = CStr::from_bytes_with_nul(gcn_arch_slice).map(|x| x.to_str()) {
|
||||
name
|
||||
} else {
|
||||
return Err(hipError_t::hipErrorUnknown);
|
||||
};
|
||||
let arch_binary = compile_amd(
|
||||
name,
|
||||
&spirv_data.binaries[..],
|
||||
spirv_data.should_link_ptx_impl,
|
||||
)
|
||||
.map_err(|_| hipError_t::hipErrorUnknown)?;
|
||||
let err = unsafe { hipModuleLoadData(pmod as _, arch_binary.as_ptr() as _) };
|
||||
if err != hipError_t::hipSuccess {
|
||||
return Err(err);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
const LLVM_SPIRV: &'static str = "/home/vosen/amd/llvm-project/build/bin/llvm-spirv";
|
||||
const AMDGPU: &'static str = "/opt/amdgpu-pro/";
|
||||
const AMDGPU_TARGET: &'static str = "amdgcn-amd-amdhsa";
|
||||
const AMDGPU_BITCODE: [&'static str; 8] = [
|
||||
"opencl.bc",
|
||||
"ocml.bc",
|
||||
"ockl.bc",
|
||||
"oclc_correctly_rounded_sqrt_off.bc",
|
||||
"oclc_daz_opt_on.bc",
|
||||
"oclc_finite_only_off.bc",
|
||||
"oclc_unsafe_math_off.bc",
|
||||
"oclc_wavefrontsize64_off.bc",
|
||||
];
|
||||
const AMDGPU_BITCODE_DEVICE_PREFIX: &'static str = "oclc_isa_version_";
|
||||
|
||||
fn compile_amd(
|
||||
device_name: &str,
|
||||
spirv_il: &[u32],
|
||||
ptx_lib: Option<(&'static [u8], &'static [u8])>,
|
||||
) -> io::Result<Vec<u8>> {
|
||||
use std::env;
|
||||
let dir = tempfile::tempdir()?;
|
||||
let mut spirv = NamedTempFile::new_in(&dir)?;
|
||||
let llvm = NamedTempFile::new_in(&dir)?;
|
||||
let spirv_il_u8 = unsafe {
|
||||
slice::from_raw_parts(
|
||||
spirv_il.as_ptr() as *const u8,
|
||||
spirv_il.len() * mem::size_of::<u32>(),
|
||||
)
|
||||
};
|
||||
spirv.write_all(spirv_il_u8)?;
|
||||
let llvm_spirv_path = match env::var("LLVM_SPIRV") {
|
||||
Ok(path) => Cow::Owned(path),
|
||||
Err(_) => Cow::Borrowed(LLVM_SPIRV),
|
||||
};
|
||||
let to_llvm_cmd = Command::new(&*llvm_spirv_path)
|
||||
.arg("-r")
|
||||
.arg("-o")
|
||||
.arg(llvm.path())
|
||||
.arg(spirv.path())
|
||||
.status()?;
|
||||
assert!(to_llvm_cmd.success());
|
||||
let linked_binary = NamedTempFile::new_in(&dir)?;
|
||||
let mut llvm_link = PathBuf::from(AMDGPU);
|
||||
llvm_link.push("bin");
|
||||
llvm_link.push("llvm-link");
|
||||
let mut linker_cmd = Command::new(&llvm_link);
|
||||
linker_cmd
|
||||
.arg("--only-needed")
|
||||
.arg("-o")
|
||||
.arg(linked_binary.path())
|
||||
.arg(llvm.path())
|
||||
.args(get_bitcode_paths(device_name));
|
||||
if cfg!(debug_assertions) {
|
||||
linker_cmd.arg("-v");
|
||||
}
|
||||
let status = linker_cmd.status()?;
|
||||
assert!(status.success());
|
||||
let mut ptx_lib_bitcode = NamedTempFile::new_in(&dir)?;
|
||||
let compiled_binary = NamedTempFile::new_in(&dir)?;
|
||||
let mut cland_exe = PathBuf::from(AMDGPU);
|
||||
cland_exe.push("bin");
|
||||
cland_exe.push("clang");
|
||||
let mut compiler_cmd = Command::new(&cland_exe);
|
||||
compiler_cmd
|
||||
.arg(format!("-mcpu={}", device_name))
|
||||
.arg("-nogpulib")
|
||||
.arg("-mno-wavefrontsize64")
|
||||
.arg("-O3")
|
||||
.arg("-Xlinker")
|
||||
.arg("--no-undefined")
|
||||
.arg("-target")
|
||||
.arg(AMDGPU_TARGET)
|
||||
.arg("-o")
|
||||
.arg(compiled_binary.path())
|
||||
.arg("-x")
|
||||
.arg("ir")
|
||||
.arg(linked_binary.path());
|
||||
if let Some((_, bitcode)) = ptx_lib {
|
||||
ptx_lib_bitcode.write_all(bitcode)?;
|
||||
compiler_cmd.arg(ptx_lib_bitcode.path());
|
||||
};
|
||||
if cfg!(debug_assertions) {
|
||||
compiler_cmd.arg("-v");
|
||||
}
|
||||
let status = compiler_cmd.status()?;
|
||||
assert!(status.success());
|
||||
let mut result = Vec::new();
|
||||
let compiled_bin_path = compiled_binary.path();
|
||||
let mut compiled_binary = File::open(compiled_bin_path)?;
|
||||
compiled_binary.read_to_end(&mut result)?;
|
||||
let mut persistent = PathBuf::from("/tmp/zluda");
|
||||
std::fs::create_dir_all(&persistent)?;
|
||||
persistent.push(compiled_bin_path.file_name().unwrap());
|
||||
std::fs::copy(compiled_bin_path, persistent)?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn get_bitcode_paths(device_name: &str) -> impl Iterator<Item = PathBuf> {
|
||||
let generic_paths = AMDGPU_BITCODE.iter().map(|x| {
|
||||
let mut path = PathBuf::from(AMDGPU);
|
||||
path.push("amdgcn");
|
||||
path.push("bitcode");
|
||||
path.push(x);
|
||||
path
|
||||
});
|
||||
let suffix = if let Some(suffix_idx) = device_name.find(':') {
|
||||
suffix_idx
|
||||
} else {
|
||||
device_name.len()
|
||||
};
|
||||
let mut additional_path = PathBuf::from(AMDGPU);
|
||||
additional_path.push("amdgcn");
|
||||
additional_path.push("bitcode");
|
||||
additional_path.push(format!(
|
||||
"{}{}{}",
|
||||
AMDGPU_BITCODE_DEVICE_PREFIX,
|
||||
&device_name[3..suffix],
|
||||
".bc"
|
||||
));
|
||||
generic_paths.chain(std::iter::once(additional_path))
|
||||
}
|
Loading…
Add table
Reference in a new issue