mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-08-02 06:09:15 +00:00
HIP conversion part #3
This commit is contained in:
parent
a63f004540
commit
a71cd44104
5 changed files with 284 additions and 23 deletions
|
@ -2454,7 +2454,7 @@ pub extern "system" fn cuModuleLoad(
|
||||||
module: *mut CUmodule,
|
module: *mut CUmodule,
|
||||||
fname: *const ::std::os::raw::c_char,
|
fname: *const ::std::os::raw::c_char,
|
||||||
) -> CUresult {
|
) -> CUresult {
|
||||||
unsafe { hipModuleLoad(module as _, fname as _).into() }
|
r#impl::module::load(module, fname).encuda()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
|
@ -2462,7 +2462,7 @@ pub extern "system" fn cuModuleLoadData(
|
||||||
module: *mut CUmodule,
|
module: *mut CUmodule,
|
||||||
image: *const ::std::os::raw::c_void,
|
image: *const ::std::os::raw::c_void,
|
||||||
) -> CUresult {
|
) -> CUresult {
|
||||||
unsafe { hipModuleLoadData(module as _, image as _).into() }
|
r#impl::module::load_data(module, image).encuda()
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: parse jit options
|
// TODO: parse jit options
|
||||||
|
@ -2474,16 +2474,7 @@ pub extern "system" fn cuModuleLoadDataEx(
|
||||||
options: *mut CUjit_option,
|
options: *mut CUjit_option,
|
||||||
optionValues: *mut *mut ::std::os::raw::c_void,
|
optionValues: *mut *mut ::std::os::raw::c_void,
|
||||||
) -> CUresult {
|
) -> CUresult {
|
||||||
unsafe {
|
r#impl::module::load_data(module, image).encuda()
|
||||||
hipModuleLoadDataEx(
|
|
||||||
module as _,
|
|
||||||
image as _,
|
|
||||||
numOptions,
|
|
||||||
options as _,
|
|
||||||
optionValues,
|
|
||||||
)
|
|
||||||
.into()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
|
@ -3710,7 +3701,22 @@ pub extern "system" fn cuLaunchKernel(
|
||||||
kernelParams: *mut *mut ::std::os::raw::c_void,
|
kernelParams: *mut *mut ::std::os::raw::c_void,
|
||||||
extra: *mut *mut ::std::os::raw::c_void,
|
extra: *mut *mut ::std::os::raw::c_void,
|
||||||
) -> CUresult {
|
) -> CUresult {
|
||||||
todo!()
|
unsafe {
|
||||||
|
hipModuleLaunchKernel(
|
||||||
|
f as _,
|
||||||
|
gridDimX,
|
||||||
|
gridDimY,
|
||||||
|
gridDimZ,
|
||||||
|
blockDimX,
|
||||||
|
blockDimY,
|
||||||
|
blockDimZ,
|
||||||
|
sharedMemBytes,
|
||||||
|
hStream as _,
|
||||||
|
kernelParams,
|
||||||
|
extra,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
.into()
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: implement default stream semantics
|
// TODO: implement default stream semantics
|
||||||
|
@ -3728,7 +3734,19 @@ pub extern "system" fn cuLaunchKernel_ptsz(
|
||||||
kernelParams: *mut *mut ::std::os::raw::c_void,
|
kernelParams: *mut *mut ::std::os::raw::c_void,
|
||||||
extra: *mut *mut ::std::os::raw::c_void,
|
extra: *mut *mut ::std::os::raw::c_void,
|
||||||
) -> CUresult {
|
) -> CUresult {
|
||||||
todo!()
|
cuLaunchKernel(
|
||||||
|
f,
|
||||||
|
gridDimX,
|
||||||
|
gridDimY,
|
||||||
|
gridDimZ,
|
||||||
|
blockDimX,
|
||||||
|
blockDimY,
|
||||||
|
blockDimZ,
|
||||||
|
sharedMemBytes,
|
||||||
|
hStream,
|
||||||
|
kernelParams,
|
||||||
|
extra,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
|
|
|
@ -12,7 +12,7 @@ use crate::{
|
||||||
cuda_impl,
|
cuda_impl,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::{device, Decuda, Encuda};
|
use super::{device, module, Decuda, Encuda};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::os::raw::{c_uint, c_ulong, c_ushort};
|
use std::os::raw::{c_uint, c_ulong, c_ushort};
|
||||||
use std::{
|
use std::{
|
||||||
|
@ -253,20 +253,17 @@ unsafe extern "system" fn get_module_from_cubin(
|
||||||
},
|
},
|
||||||
Err(_) => continue,
|
Err(_) => continue,
|
||||||
};
|
};
|
||||||
todo!()
|
|
||||||
/*
|
|
||||||
let module = module::SpirvModule::new(kernel_text_string);
|
let module = module::SpirvModule::new(kernel_text_string);
|
||||||
match module {
|
match module {
|
||||||
Ok(module) => {
|
Ok(module) => {
|
||||||
match module::load_data_impl(result, module) {
|
match module::load_data_impl(result, module) {
|
||||||
Ok(()) => {}
|
Ok(()) => {}
|
||||||
Err(err) => return err,
|
Err(err) => return err.into(),
|
||||||
}
|
}
|
||||||
return CUresult::CUDA_SUCCESS;
|
return CUresult::CUDA_SUCCESS;
|
||||||
}
|
}
|
||||||
Err(_) => continue,
|
Err(_) => continue,
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
CUresult::CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE
|
CUresult::CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
use hip_runtime_sys::{hipError_t, hipFuncGetAttributes};
|
use hip_runtime_sys::{hipError_t, hipFuncGetAttributes, hipLaunchKernel, hipModuleLaunchKernel};
|
||||||
|
|
||||||
use super::{CUresult, HasLivenessCookie, LiveCheck};
|
use super::{CUresult, HasLivenessCookie, LiveCheck};
|
||||||
use crate::cuda::{CUfunction, CUfunction_attribute};
|
use crate::cuda::{CUfunction, CUfunction_attribute, CUstream};
|
||||||
use ::std::os::raw::{c_uint, c_void};
|
use ::std::os::raw::{c_uint, c_void};
|
||||||
use std::{mem, ptr};
|
use std::{mem, ptr};
|
||||||
|
|
||||||
|
@ -19,8 +19,12 @@ pub(crate) fn get_attribute(
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
let value = match cu_attrib {
|
let value = match cu_attrib {
|
||||||
CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK => hip_attrib.maxThreadsPerBlock,
|
CUfunction_attribute::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK => {
|
||||||
CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES => hip_attrib.sharedSizeBytes as i32,
|
hip_attrib.maxThreadsPerBlock
|
||||||
|
}
|
||||||
|
CUfunction_attribute::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES => {
|
||||||
|
hip_attrib.sharedSizeBytes as i32
|
||||||
|
}
|
||||||
_ => return hipError_t::hipErrorInvalidValue,
|
_ => return hipError_t::hipErrorInvalidValue,
|
||||||
};
|
};
|
||||||
unsafe { *pi = value };
|
unsafe { *pi = value };
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
use hip_runtime_sys::hipError_t;
|
||||||
|
|
||||||
use crate::cuda::{CUctx_st, CUdevice, CUdeviceptr, CUfunc_st, CUmod_st, CUresult, CUstream_st};
|
use crate::cuda::{CUctx_st, CUdevice, CUdeviceptr, CUfunc_st, CUmod_st, CUresult, CUstream_st};
|
||||||
use std::{
|
use std::{
|
||||||
ffi::c_void,
|
ffi::c_void,
|
||||||
|
@ -17,6 +19,7 @@ pub mod function;
|
||||||
#[cfg_attr(windows, path = "os_win.rs")]
|
#[cfg_attr(windows, path = "os_win.rs")]
|
||||||
#[cfg_attr(not(windows), path = "os_unix.rs")]
|
#[cfg_attr(not(windows), path = "os_unix.rs")]
|
||||||
pub(crate) mod os;
|
pub(crate) mod os;
|
||||||
|
pub(crate) mod module;
|
||||||
|
|
||||||
#[cfg(debug_assertions)]
|
#[cfg(debug_assertions)]
|
||||||
pub fn unimplemented() -> CUresult {
|
pub fn unimplemented() -> CUresult {
|
||||||
|
@ -180,6 +183,13 @@ impl<T1: Encuda<To = CUresult>, T2: Encuda<To = CUresult>> Encuda for Result<T1,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Encuda for hipError_t {
|
||||||
|
type To = CUresult;
|
||||||
|
fn encuda(self: Self) -> Self::To {
|
||||||
|
self.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
unsafe fn transmute_lifetime<'a, 'b, T: ?Sized>(t: &'a T) -> &'b T {
|
unsafe fn transmute_lifetime<'a, 'b, T: ?Sized>(t: &'a T) -> &'b T {
|
||||||
mem::transmute(t)
|
mem::transmute(t)
|
||||||
}
|
}
|
||||||
|
|
232
zluda/src/impl/module.rs
Normal file
232
zluda/src/impl/module.rs
Normal file
|
@ -0,0 +1,232 @@
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::ffi::{CStr, CString};
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::{self, Read, Write};
|
||||||
|
use std::ops::Add;
|
||||||
|
use std::os::raw::c_char;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::process::Command;
|
||||||
|
use std::{fs, mem, ptr, slice};
|
||||||
|
|
||||||
|
use hip_runtime_sys::{
|
||||||
|
hipCtxGetCurrent, hipCtxGetDevice, hipDeviceGetAttribute, hipDeviceGetName, hipError_t,
|
||||||
|
hipGetDeviceProperties, hipGetStreamDeviceId, hipModuleLoadData,
|
||||||
|
};
|
||||||
|
use tempfile::NamedTempFile;
|
||||||
|
|
||||||
|
use crate::cuda::CUmodule;
|
||||||
|
|
||||||
|
pub struct SpirvModule {
|
||||||
|
pub binaries: Vec<u32>,
|
||||||
|
pub kernel_info: HashMap<String, ptx::KernelInfo>,
|
||||||
|
pub should_link_ptx_impl: Option<(&'static [u8], &'static [u8])>,
|
||||||
|
pub build_options: CString,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SpirvModule {
|
||||||
|
pub fn new_raw<'a>(text: *const c_char) -> Result<Self, hipError_t> {
|
||||||
|
let u8_text = unsafe { CStr::from_ptr(text) };
|
||||||
|
let ptx_text = u8_text
|
||||||
|
.to_str()
|
||||||
|
.map_err(|_| hipError_t::hipErrorInvalidImage)?;
|
||||||
|
Self::new(ptx_text)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new<'a>(ptx_text: &str) -> Result<Self, hipError_t> {
|
||||||
|
let mut errors = Vec::new();
|
||||||
|
let ast = ptx::ModuleParser::new()
|
||||||
|
.parse(&mut errors, ptx_text)
|
||||||
|
.map_err(|_| hipError_t::hipErrorInvalidImage)?;
|
||||||
|
if errors.len() > 0 {
|
||||||
|
return Err(hipError_t::hipErrorInvalidImage);
|
||||||
|
}
|
||||||
|
let spirv_module =
|
||||||
|
ptx::to_spirv_module(ast).map_err(|_| hipError_t::hipErrorInvalidImage)?;
|
||||||
|
Ok(SpirvModule {
|
||||||
|
binaries: spirv_module.assemble(),
|
||||||
|
kernel_info: spirv_module.kernel_info,
|
||||||
|
should_link_ptx_impl: spirv_module.should_link_ptx_impl,
|
||||||
|
build_options: spirv_module.build_options,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn load(module: *mut CUmodule, fname: *const i8) -> Result<(), hipError_t> {
|
||||||
|
let length = (0..)
|
||||||
|
.position(|i| unsafe { *fname.add(i) == 0 })
|
||||||
|
.ok_or(hipError_t::hipErrorInvalidValue)?;
|
||||||
|
let file_name = CStr::from_bytes_with_nul(unsafe { slice::from_raw_parts(fname as _, length) })
|
||||||
|
.map_err(|_| hipError_t::hipErrorInvalidValue)?;
|
||||||
|
let valid_file_name = file_name
|
||||||
|
.to_str()
|
||||||
|
.map_err(|_| hipError_t::hipErrorInvalidValue)?;
|
||||||
|
let mut file = File::open(valid_file_name).map_err(|_| hipError_t::hipErrorFileNotFound)?;
|
||||||
|
let mut file_buffer = Vec::new();
|
||||||
|
file.read_to_end(&mut file_buffer)
|
||||||
|
.map_err(|_| hipError_t::hipErrorUnknown)?;
|
||||||
|
drop(file);
|
||||||
|
load_data(module, file_buffer.as_ptr() as _)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn load_data(
|
||||||
|
module: *mut CUmodule,
|
||||||
|
image: *const std::ffi::c_void,
|
||||||
|
) -> Result<(), hipError_t> {
|
||||||
|
let spirv_data = SpirvModule::new_raw(image as *const _)?;
|
||||||
|
load_data_impl(module, spirv_data)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn load_data_impl(pmod: *mut CUmodule, spirv_data: SpirvModule) -> Result<(), hipError_t> {
|
||||||
|
let mut dev = 0;
|
||||||
|
let err = unsafe { hipCtxGetDevice(&mut dev) };
|
||||||
|
if err != hipError_t::hipSuccess {
|
||||||
|
return Err(err);
|
||||||
|
}
|
||||||
|
let mut props = unsafe { mem::zeroed() };
|
||||||
|
let err = unsafe { hipGetDeviceProperties(&mut props, dev) };
|
||||||
|
if err != hipError_t::hipSuccess {
|
||||||
|
return Err(err);
|
||||||
|
}
|
||||||
|
let gcn_arch_slice =
|
||||||
|
unsafe { slice::from_raw_parts(props.gcnArchName.as_ptr() as _, props.gcnArchName.len()) };
|
||||||
|
let name = if let Ok(Ok(name)) = CStr::from_bytes_with_nul(gcn_arch_slice).map(|x| x.to_str()) {
|
||||||
|
name
|
||||||
|
} else {
|
||||||
|
return Err(hipError_t::hipErrorUnknown);
|
||||||
|
};
|
||||||
|
let arch_binary = compile_amd(
|
||||||
|
name,
|
||||||
|
&spirv_data.binaries[..],
|
||||||
|
spirv_data.should_link_ptx_impl,
|
||||||
|
)
|
||||||
|
.map_err(|_| hipError_t::hipErrorUnknown)?;
|
||||||
|
let err = unsafe { hipModuleLoadData(pmod as _, arch_binary.as_ptr() as _) };
|
||||||
|
if err != hipError_t::hipSuccess {
|
||||||
|
return Err(err);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
const LLVM_SPIRV: &'static str = "/home/vosen/amd/llvm-project/build/bin/llvm-spirv";
|
||||||
|
const AMDGPU: &'static str = "/opt/amdgpu-pro/";
|
||||||
|
const AMDGPU_TARGET: &'static str = "amdgcn-amd-amdhsa";
|
||||||
|
const AMDGPU_BITCODE: [&'static str; 8] = [
|
||||||
|
"opencl.bc",
|
||||||
|
"ocml.bc",
|
||||||
|
"ockl.bc",
|
||||||
|
"oclc_correctly_rounded_sqrt_off.bc",
|
||||||
|
"oclc_daz_opt_on.bc",
|
||||||
|
"oclc_finite_only_off.bc",
|
||||||
|
"oclc_unsafe_math_off.bc",
|
||||||
|
"oclc_wavefrontsize64_off.bc",
|
||||||
|
];
|
||||||
|
const AMDGPU_BITCODE_DEVICE_PREFIX: &'static str = "oclc_isa_version_";
|
||||||
|
|
||||||
|
fn compile_amd(
|
||||||
|
device_name: &str,
|
||||||
|
spirv_il: &[u32],
|
||||||
|
ptx_lib: Option<(&'static [u8], &'static [u8])>,
|
||||||
|
) -> io::Result<Vec<u8>> {
|
||||||
|
use std::env;
|
||||||
|
let dir = tempfile::tempdir()?;
|
||||||
|
let mut spirv = NamedTempFile::new_in(&dir)?;
|
||||||
|
let llvm = NamedTempFile::new_in(&dir)?;
|
||||||
|
let spirv_il_u8 = unsafe {
|
||||||
|
slice::from_raw_parts(
|
||||||
|
spirv_il.as_ptr() as *const u8,
|
||||||
|
spirv_il.len() * mem::size_of::<u32>(),
|
||||||
|
)
|
||||||
|
};
|
||||||
|
spirv.write_all(spirv_il_u8)?;
|
||||||
|
let llvm_spirv_path = match env::var("LLVM_SPIRV") {
|
||||||
|
Ok(path) => Cow::Owned(path),
|
||||||
|
Err(_) => Cow::Borrowed(LLVM_SPIRV),
|
||||||
|
};
|
||||||
|
let to_llvm_cmd = Command::new(&*llvm_spirv_path)
|
||||||
|
.arg("-r")
|
||||||
|
.arg("-o")
|
||||||
|
.arg(llvm.path())
|
||||||
|
.arg(spirv.path())
|
||||||
|
.status()?;
|
||||||
|
assert!(to_llvm_cmd.success());
|
||||||
|
let linked_binary = NamedTempFile::new_in(&dir)?;
|
||||||
|
let mut llvm_link = PathBuf::from(AMDGPU);
|
||||||
|
llvm_link.push("bin");
|
||||||
|
llvm_link.push("llvm-link");
|
||||||
|
let mut linker_cmd = Command::new(&llvm_link);
|
||||||
|
linker_cmd
|
||||||
|
.arg("--only-needed")
|
||||||
|
.arg("-o")
|
||||||
|
.arg(linked_binary.path())
|
||||||
|
.arg(llvm.path())
|
||||||
|
.args(get_bitcode_paths(device_name));
|
||||||
|
if cfg!(debug_assertions) {
|
||||||
|
linker_cmd.arg("-v");
|
||||||
|
}
|
||||||
|
let status = linker_cmd.status()?;
|
||||||
|
assert!(status.success());
|
||||||
|
let mut ptx_lib_bitcode = NamedTempFile::new_in(&dir)?;
|
||||||
|
let compiled_binary = NamedTempFile::new_in(&dir)?;
|
||||||
|
let mut cland_exe = PathBuf::from(AMDGPU);
|
||||||
|
cland_exe.push("bin");
|
||||||
|
cland_exe.push("clang");
|
||||||
|
let mut compiler_cmd = Command::new(&cland_exe);
|
||||||
|
compiler_cmd
|
||||||
|
.arg(format!("-mcpu={}", device_name))
|
||||||
|
.arg("-nogpulib")
|
||||||
|
.arg("-mno-wavefrontsize64")
|
||||||
|
.arg("-O3")
|
||||||
|
.arg("-Xlinker")
|
||||||
|
.arg("--no-undefined")
|
||||||
|
.arg("-target")
|
||||||
|
.arg(AMDGPU_TARGET)
|
||||||
|
.arg("-o")
|
||||||
|
.arg(compiled_binary.path())
|
||||||
|
.arg("-x")
|
||||||
|
.arg("ir")
|
||||||
|
.arg(linked_binary.path());
|
||||||
|
if let Some((_, bitcode)) = ptx_lib {
|
||||||
|
ptx_lib_bitcode.write_all(bitcode)?;
|
||||||
|
compiler_cmd.arg(ptx_lib_bitcode.path());
|
||||||
|
};
|
||||||
|
if cfg!(debug_assertions) {
|
||||||
|
compiler_cmd.arg("-v");
|
||||||
|
}
|
||||||
|
let status = compiler_cmd.status()?;
|
||||||
|
assert!(status.success());
|
||||||
|
let mut result = Vec::new();
|
||||||
|
let compiled_bin_path = compiled_binary.path();
|
||||||
|
let mut compiled_binary = File::open(compiled_bin_path)?;
|
||||||
|
compiled_binary.read_to_end(&mut result)?;
|
||||||
|
let mut persistent = PathBuf::from("/tmp/zluda");
|
||||||
|
std::fs::create_dir_all(&persistent)?;
|
||||||
|
persistent.push(compiled_bin_path.file_name().unwrap());
|
||||||
|
std::fs::copy(compiled_bin_path, persistent)?;
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_bitcode_paths(device_name: &str) -> impl Iterator<Item = PathBuf> {
|
||||||
|
let generic_paths = AMDGPU_BITCODE.iter().map(|x| {
|
||||||
|
let mut path = PathBuf::from(AMDGPU);
|
||||||
|
path.push("amdgcn");
|
||||||
|
path.push("bitcode");
|
||||||
|
path.push(x);
|
||||||
|
path
|
||||||
|
});
|
||||||
|
let suffix = if let Some(suffix_idx) = device_name.find(':') {
|
||||||
|
suffix_idx
|
||||||
|
} else {
|
||||||
|
device_name.len()
|
||||||
|
};
|
||||||
|
let mut additional_path = PathBuf::from(AMDGPU);
|
||||||
|
additional_path.push("amdgcn");
|
||||||
|
additional_path.push("bitcode");
|
||||||
|
additional_path.push(format!(
|
||||||
|
"{}{}{}",
|
||||||
|
AMDGPU_BITCODE_DEVICE_PREFIX,
|
||||||
|
&device_name[3..suffix],
|
||||||
|
".bc"
|
||||||
|
));
|
||||||
|
generic_paths.chain(std::iter::once(additional_path))
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue