mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-20 00:14:45 +00:00
Add basic cuModule*, add handful of missing stuff
This commit is contained in:
parent
3ec7bffdc5
commit
9f677e23c0
6 changed files with 152 additions and 248 deletions
|
@ -9,6 +9,8 @@ name = "nvcuda"
|
|||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
comgr = { path = "../comgr" }
|
||||
ptx_parser = { path = "../ptx_parser" }
|
||||
ptx = { path = "../ptx" }
|
||||
cuda_types = { path = "../cuda_types" }
|
||||
cuda_base = { path = "../cuda_base" }
|
||||
|
|
|
@ -7,3 +7,7 @@ pub(crate) unsafe fn get_limit(pvalue: *mut usize, limit: hipLimit_t) -> hipErro
|
|||
pub(crate) fn set_limit(limit: hipLimit_t, value: usize) -> hipError_t {
|
||||
unsafe { hipDeviceSetLimit(limit, value) }
|
||||
}
|
||||
|
||||
pub(crate) fn synchronize() -> hipError_t {
|
||||
unsafe { hipDeviceSynchronize() }
|
||||
}
|
||||
|
|
|
@ -300,6 +300,10 @@ pub(crate) fn get_properties(prop: &mut cuda_types::CUdevprop, dev: hipDevice_t)
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_count(count: &mut ::core::ffi::c_int) -> hipError_t {
|
||||
unsafe { hipGetDeviceCount(count) }
|
||||
}
|
||||
|
||||
fn clamp_usize(x: usize) -> i32 {
|
||||
usize::min(x, i32::MAX as usize) as i32
|
||||
}
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
use cuda_types::*;
|
||||
use hip_runtime_sys::*;
|
||||
use std::mem::{self, ManuallyDrop};
|
||||
|
||||
pub(super) mod context;
|
||||
pub(super) mod device;
|
||||
pub(super) mod module;
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
pub(crate) fn unimplemented() -> CUresult {
|
||||
|
@ -66,9 +68,38 @@ macro_rules! from_cuda_transmute {
|
|||
};
|
||||
}
|
||||
|
||||
macro_rules! from_cuda_object {
|
||||
($($type_:ty),*) => {
|
||||
$(
|
||||
impl<'a> FromCuda<'a, <$type_ as ZludaObject>::CudaHandle> for <$type_ as ZludaObject>::CudaHandle {
|
||||
fn from_cuda(handle: &'a <$type_ as ZludaObject>::CudaHandle) -> Result<<$type_ as ZludaObject>::CudaHandle, CUerror> {
|
||||
Ok(*handle)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FromCuda<'a, *mut <$type_ as ZludaObject>::CudaHandle> for &'a mut <$type_ as ZludaObject>::CudaHandle {
|
||||
fn from_cuda(handle: &'a *mut <$type_ as ZludaObject>::CudaHandle) -> Result<&'a mut <$type_ as ZludaObject>::CudaHandle, CUerror> {
|
||||
match unsafe { handle.as_mut() } {
|
||||
Some(x) => Ok(x),
|
||||
None => Err(CUerror::INVALID_VALUE),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FromCuda<'a, <$type_ as ZludaObject>::CudaHandle> for &'a $type_ {
|
||||
fn from_cuda(handle: &'a <$type_ as ZludaObject>::CudaHandle) -> Result<&'a $type_, CUerror> {
|
||||
Ok(as_ref(handle).as_result()?)
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
from_cuda_nop!(
|
||||
*mut i8,
|
||||
*mut usize,
|
||||
*const std::ffi::c_void,
|
||||
*const ::core::ffi::c_char,
|
||||
i32,
|
||||
u32,
|
||||
usize,
|
||||
|
@ -77,8 +108,10 @@ from_cuda_nop!(
|
|||
);
|
||||
from_cuda_transmute!(
|
||||
CUdevice => hipDevice_t,
|
||||
CUuuid => hipUUID
|
||||
CUuuid => hipUUID,
|
||||
CUfunction => hipFunction_t
|
||||
);
|
||||
from_cuda_object!(module::Module);
|
||||
|
||||
impl<'a> FromCuda<'a, CUlimit> for hipLimit_t {
|
||||
fn from_cuda(limit: &'a CUlimit) -> Result<Self, CUerror> {
|
||||
|
@ -91,6 +124,72 @@ impl<'a> FromCuda<'a, CUlimit> for hipLimit_t {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) trait ZludaObject: Sized + Send + Sync {
|
||||
const COOKIE: usize;
|
||||
const LIVENESS_FAIL: CUerror = cuda_types::CUerror::INVALID_VALUE;
|
||||
|
||||
type CudaHandle: Sized;
|
||||
|
||||
fn drop_checked(&mut self) -> CUresult;
|
||||
|
||||
fn wrap(self) -> Self::CudaHandle {
|
||||
unsafe { mem::transmute_copy(&LiveCheck::wrap(self)) }
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub(crate) struct LiveCheck<T: ZludaObject> {
|
||||
cookie: usize,
|
||||
data: ManuallyDrop<T>,
|
||||
}
|
||||
|
||||
impl<T: ZludaObject> LiveCheck<T> {
|
||||
fn wrap(data: T) -> *mut Self {
|
||||
Box::into_raw(Box::new(LiveCheck {
|
||||
cookie: T::COOKIE,
|
||||
data: ManuallyDrop::new(data),
|
||||
}))
|
||||
}
|
||||
|
||||
fn as_result(&self) -> Result<&T, CUerror> {
|
||||
if self.cookie == T::COOKIE {
|
||||
Ok(&self.data)
|
||||
} else {
|
||||
Err(T::LIVENESS_FAIL)
|
||||
}
|
||||
}
|
||||
|
||||
// This looks like nonsense, but it's not. There are two cases:
|
||||
// Err(CUerror) -> meaning that the object is invalid, this pointer does not point into valid memory
|
||||
// Ok(maybe_error) -> meaning that the object is valid, we dropped everything, but there *might*
|
||||
// an error in the underlying runtime that we want to propagate
|
||||
#[must_use]
|
||||
fn drop_checked(&mut self) -> Result<Result<(), CUerror>, CUerror> {
|
||||
if self.cookie == T::COOKIE {
|
||||
self.cookie = 0;
|
||||
let result = self.data.drop_checked();
|
||||
unsafe { ManuallyDrop::drop(&mut self.data) };
|
||||
Ok(result)
|
||||
} else {
|
||||
Err(T::LIVENESS_FAIL)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_ref<'a, T: ZludaObject>(
|
||||
handle: &'a T::CudaHandle,
|
||||
) -> &'a ManuallyDrop<Box<LiveCheck<T>>> {
|
||||
unsafe { mem::transmute(handle) }
|
||||
}
|
||||
|
||||
pub fn drop_checked<T: ZludaObject>(handle: T::CudaHandle) -> Result<(), CUerror> {
|
||||
let mut wrapped_object: ManuallyDrop<Box<LiveCheck<T>>> =
|
||||
unsafe { mem::transmute_copy(&handle) };
|
||||
let underlying_error = LiveCheck::drop_checked(&mut wrapped_object)?;
|
||||
unsafe { ManuallyDrop::drop(&mut wrapped_object) };
|
||||
underlying_error
|
||||
}
|
||||
|
||||
pub(crate) fn init(flags: ::core::ffi::c_uint) -> hipError_t {
|
||||
unsafe { hipInit(flags) }
|
||||
}
|
||||
|
|
|
@ -1,261 +1,53 @@
|
|||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::{CStr, CString};
|
||||
use std::fs::File;
|
||||
use std::io::{self, Read, Write};
|
||||
use std::ops::Add;
|
||||
use std::os::raw::c_char;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use std::{env, fs, iter, mem, ptr, slice};
|
||||
use super::ZludaObject;
|
||||
use cuda_types::*;
|
||||
use hip_runtime_sys::*;
|
||||
use std::{ffi::CStr, mem};
|
||||
|
||||
use hip_runtime_sys::{
|
||||
hipCtxGetCurrent, hipCtxGetDevice, hipDeviceGetAttribute, hipDeviceGetName, hipDeviceProp_t,
|
||||
hipError_t, hipGetDeviceProperties, hipGetStreamDeviceId, hipModuleLoadData,
|
||||
};
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
use crate::cuda::CUmodule;
|
||||
use crate::hip_call;
|
||||
|
||||
pub struct SpirvModule {
|
||||
pub binaries: Vec<u32>,
|
||||
pub kernel_info: HashMap<String, ptx::KernelInfo>,
|
||||
pub should_link_ptx_impl: Option<(&'static [u8], &'static [u8])>,
|
||||
pub build_options: CString,
|
||||
pub(crate) struct Module {
|
||||
base: hipModule_t,
|
||||
}
|
||||
|
||||
impl SpirvModule {
|
||||
pub fn new_raw<'a>(text: *const c_char) -> Result<Self, hipError_t> {
|
||||
let u8_text = unsafe { CStr::from_ptr(text) };
|
||||
let ptx_text = u8_text
|
||||
.to_str()
|
||||
.map_err(|_| hipError_t::hipErrorInvalidImage)?;
|
||||
Self::new(ptx_text)
|
||||
}
|
||||
impl ZludaObject for Module {
|
||||
const COOKIE: usize = 0xe9138bd040487d4a;
|
||||
|
||||
pub fn new<'a>(ptx_text: &str) -> Result<Self, hipError_t> {
|
||||
let mut errors = Vec::new();
|
||||
let ast = ptx::ModuleParser::new()
|
||||
.parse(&mut errors, ptx_text)
|
||||
.map_err(|_| hipError_t::hipErrorInvalidImage)?;
|
||||
if errors.len() > 0 {
|
||||
return Err(hipError_t::hipErrorInvalidImage);
|
||||
}
|
||||
let spirv_module =
|
||||
ptx::to_spirv_module(ast).map_err(|_| hipError_t::hipErrorInvalidImage)?;
|
||||
Ok(SpirvModule {
|
||||
binaries: spirv_module.assemble(),
|
||||
kernel_info: spirv_module.kernel_info,
|
||||
should_link_ptx_impl: spirv_module.should_link_ptx_impl,
|
||||
build_options: spirv_module.build_options,
|
||||
})
|
||||
type CudaHandle = CUmodule;
|
||||
|
||||
fn drop_checked(&mut self) -> CUresult {
|
||||
unsafe { hipModuleUnload(self.base) }?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn load(module: *mut CUmodule, fname: *const i8) -> Result<(), hipError_t> {
|
||||
let file_name = unsafe { CStr::from_ptr(fname) }
|
||||
pub(crate) fn load_data(module: &mut CUmodule, image: *const std::ffi::c_void) -> CUresult {
|
||||
let text = unsafe { CStr::from_ptr(image.cast()) }
|
||||
.to_str()
|
||||
.map_err(|_| hipError_t::hipErrorInvalidValue)?;
|
||||
let mut file = File::open(file_name).map_err(|_| hipError_t::hipErrorFileNotFound)?;
|
||||
let mut file_buffer = Vec::new();
|
||||
file.read_to_end(&mut file_buffer)
|
||||
.map_err(|_| hipError_t::hipErrorUnknown)?;
|
||||
let result = load_data(module, file_buffer.as_ptr() as _);
|
||||
drop(file_buffer);
|
||||
result
|
||||
}
|
||||
|
||||
pub(crate) fn load_data(
|
||||
module: *mut CUmodule,
|
||||
image: *const std::ffi::c_void,
|
||||
) -> Result<(), hipError_t> {
|
||||
if image == ptr::null() {
|
||||
return Err(hipError_t::hipErrorInvalidValue);
|
||||
}
|
||||
if unsafe { *(image as *const u32) } == 0x464c457f {
|
||||
return match unsafe { hipModuleLoadData(module as _, image) } {
|
||||
hipError_t::hipSuccess => Ok(()),
|
||||
e => Err(e),
|
||||
};
|
||||
}
|
||||
let spirv_data = SpirvModule::new_raw(image as *const _)?;
|
||||
load_data_impl(module, spirv_data)
|
||||
}
|
||||
|
||||
pub fn load_data_impl(pmod: *mut CUmodule, spirv_data: SpirvModule) -> Result<(), hipError_t> {
|
||||
.map_err(|_| CUerror::INVALID_VALUE)?;
|
||||
let ast = ptx_parser::parse_module_checked(text).map_err(|_| CUerror::NO_BINARY_FOR_GPU)?;
|
||||
let llvm_module = ptx::to_llvm_module(ast).map_err(|_| CUerror::UNKNOWN)?;
|
||||
let mut dev = 0;
|
||||
hip_call! { hipCtxGetDevice(&mut dev) };
|
||||
unsafe { hipCtxGetDevice(&mut dev) }?;
|
||||
let mut props = unsafe { mem::zeroed() };
|
||||
hip_call! { hipGetDeviceProperties(&mut props, dev) };
|
||||
let arch_binary = compile_amd(
|
||||
&props,
|
||||
iter::once(&spirv_data.binaries[..]),
|
||||
spirv_data.should_link_ptx_impl,
|
||||
unsafe { hipGetDevicePropertiesR0600(&mut props, dev) }?;
|
||||
let elf_module = comgr::compile_bitcode(
|
||||
unsafe { CStr::from_ptr(props.gcnArchName.as_ptr()) },
|
||||
&*llvm_module.llvm_ir,
|
||||
llvm_module.linked_bitcode(),
|
||||
)
|
||||
.map_err(|_| hipError_t::hipErrorUnknown)?;
|
||||
hip_call! { hipModuleLoadData(pmod as _, arch_binary.as_ptr() as _) };
|
||||
.map_err(|_| CUerror::UNKNOWN)?;
|
||||
let mut hip_module = unsafe { mem::zeroed() };
|
||||
unsafe { hipModuleLoadData(&mut hip_module, elf_module.as_ptr().cast()) }?;
|
||||
*module = Module { base: hip_module }.wrap();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
const LLVM_SPIRV: &'static str = "/home/vosen/amd/llvm-project/build/bin/llvm-spirv";
|
||||
const AMDGPU: &'static str = "/opt/rocm/";
|
||||
const AMDGPU_TARGET: &'static str = "amdgcn-amd-amdhsa";
|
||||
const AMDGPU_BITCODE: [&'static str; 8] = [
|
||||
"opencl.bc",
|
||||
"ocml.bc",
|
||||
"ockl.bc",
|
||||
"oclc_correctly_rounded_sqrt_off.bc",
|
||||
"oclc_daz_opt_on.bc",
|
||||
"oclc_finite_only_off.bc",
|
||||
"oclc_unsafe_math_off.bc",
|
||||
"oclc_wavefrontsize64_off.bc",
|
||||
];
|
||||
const AMDGPU_BITCODE_DEVICE_PREFIX: &'static str = "oclc_isa_version_";
|
||||
|
||||
pub(crate) fn compile_amd<'a>(
|
||||
device_pros: &hipDeviceProp_t,
|
||||
spirv_il: impl Iterator<Item = &'a [u32]>,
|
||||
ptx_lib: Option<(&'static [u8], &'static [u8])>,
|
||||
) -> io::Result<Vec<u8>> {
|
||||
let null_terminator = device_pros
|
||||
.gcnArchName
|
||||
.iter()
|
||||
.position(|&x| x == 0)
|
||||
.unwrap();
|
||||
let gcn_arch_slice = unsafe {
|
||||
slice::from_raw_parts(device_pros.gcnArchName.as_ptr() as _, null_terminator + 1)
|
||||
};
|
||||
let device_name =
|
||||
if let Ok(Ok(name)) = CStr::from_bytes_with_nul(gcn_arch_slice).map(|x| x.to_str()) {
|
||||
name
|
||||
} else {
|
||||
return Err(io::Error::new(io::ErrorKind::Other, ""));
|
||||
};
|
||||
let dir = tempfile::tempdir()?;
|
||||
let llvm_spirv_path = match env::var("LLVM_SPIRV") {
|
||||
Ok(path) => Cow::Owned(path),
|
||||
Err(_) => Cow::Borrowed(LLVM_SPIRV),
|
||||
};
|
||||
let llvm_files = spirv_il
|
||||
.map(|spirv| {
|
||||
let mut spirv_file = NamedTempFile::new_in(&dir)?;
|
||||
let spirv_u8 = unsafe {
|
||||
slice::from_raw_parts(
|
||||
spirv.as_ptr() as *const u8,
|
||||
spirv.len() * mem::size_of::<u32>(),
|
||||
)
|
||||
};
|
||||
spirv_file.write_all(spirv_u8)?;
|
||||
if cfg!(debug_assertions) {
|
||||
persist_file(spirv_file.path())?;
|
||||
}
|
||||
let llvm = NamedTempFile::new_in(&dir)?;
|
||||
let to_llvm_cmd = Command::new(&*llvm_spirv_path)
|
||||
//.arg("--spirv-debug")
|
||||
.arg("-r")
|
||||
.arg("-o")
|
||||
.arg(llvm.path())
|
||||
.arg(spirv_file.path())
|
||||
.status()?;
|
||||
assert!(to_llvm_cmd.success());
|
||||
if cfg!(debug_assertions) {
|
||||
persist_file(llvm.path())?;
|
||||
}
|
||||
Ok::<_, io::Error>(llvm)
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
let linked_binary = NamedTempFile::new_in(&dir)?;
|
||||
let mut llvm_link = PathBuf::from(AMDGPU);
|
||||
llvm_link.push("llvm");
|
||||
llvm_link.push("bin");
|
||||
llvm_link.push("llvm-link");
|
||||
let mut linker_cmd = Command::new(&llvm_link);
|
||||
linker_cmd
|
||||
.arg("-o")
|
||||
.arg(linked_binary.path())
|
||||
.args(llvm_files.iter().map(|f| f.path()))
|
||||
.args(get_bitcode_paths(device_name));
|
||||
if cfg!(debug_assertions) {
|
||||
linker_cmd.arg("-v");
|
||||
}
|
||||
let status = linker_cmd.status()?;
|
||||
assert!(status.success());
|
||||
if cfg!(debug_assertions) {
|
||||
persist_file(linked_binary.path())?;
|
||||
}
|
||||
let mut ptx_lib_bitcode = NamedTempFile::new_in(&dir)?;
|
||||
let compiled_binary = NamedTempFile::new_in(&dir)?;
|
||||
let mut clang_exe = PathBuf::from(AMDGPU);
|
||||
clang_exe.push("llvm");
|
||||
clang_exe.push("bin");
|
||||
clang_exe.push("clang");
|
||||
let mut compiler_cmd = Command::new(&clang_exe);
|
||||
compiler_cmd
|
||||
.arg(format!("-mcpu={}", device_name))
|
||||
.arg("-ffp-contract=off")
|
||||
.arg("-nogpulib")
|
||||
.arg("-mno-wavefrontsize64")
|
||||
.arg("-O3")
|
||||
.arg("-Xclang")
|
||||
.arg("-O3")
|
||||
.arg("-Xlinker")
|
||||
.arg("--no-undefined")
|
||||
.arg("-target")
|
||||
.arg(AMDGPU_TARGET)
|
||||
.arg("-o")
|
||||
.arg(compiled_binary.path())
|
||||
.arg("-x")
|
||||
.arg("ir")
|
||||
.arg(linked_binary.path());
|
||||
if let Some((_, bitcode)) = ptx_lib {
|
||||
ptx_lib_bitcode.write_all(bitcode)?;
|
||||
compiler_cmd.arg(ptx_lib_bitcode.path());
|
||||
};
|
||||
if cfg!(debug_assertions) {
|
||||
compiler_cmd.arg("-v");
|
||||
}
|
||||
let status = compiler_cmd.status()?;
|
||||
assert!(status.success());
|
||||
let mut result = Vec::new();
|
||||
let compiled_bin_path = compiled_binary.path();
|
||||
let mut compiled_binary = File::open(compiled_bin_path)?;
|
||||
compiled_binary.read_to_end(&mut result)?;
|
||||
if cfg!(debug_assertions) {
|
||||
persist_file(compiled_bin_path)?;
|
||||
}
|
||||
Ok(result)
|
||||
pub(crate) fn unload(hmod: CUmodule) -> CUresult {
|
||||
super::drop_checked::<Module>(hmod)
|
||||
}
|
||||
|
||||
fn persist_file(path: &Path) -> io::Result<()> {
|
||||
let mut persistent = PathBuf::from("/tmp/zluda");
|
||||
std::fs::create_dir_all(&persistent)?;
|
||||
persistent.push(path.file_name().unwrap());
|
||||
std::fs::copy(path, persistent)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_bitcode_paths(device_name: &str) -> impl Iterator<Item = PathBuf> {
|
||||
let generic_paths = AMDGPU_BITCODE.iter().map(|x| {
|
||||
let mut path = PathBuf::from(AMDGPU);
|
||||
path.push("amdgcn");
|
||||
path.push("bitcode");
|
||||
path.push(x);
|
||||
path
|
||||
});
|
||||
let suffix = if let Some(suffix_idx) = device_name.find(':') {
|
||||
suffix_idx
|
||||
} else {
|
||||
device_name.len()
|
||||
};
|
||||
let mut additional_path = PathBuf::from(AMDGPU);
|
||||
additional_path.push("amdgcn");
|
||||
additional_path.push("bitcode");
|
||||
additional_path.push(format!(
|
||||
"{}{}{}",
|
||||
AMDGPU_BITCODE_DEVICE_PREFIX,
|
||||
&device_name[3..suffix],
|
||||
".bc"
|
||||
));
|
||||
generic_paths.chain(std::iter::once(additional_path))
|
||||
pub(crate) fn get_function(
|
||||
hfunc: &mut hipFunction_t,
|
||||
hmod: &Module,
|
||||
name: *const ::core::ffi::c_char,
|
||||
) -> hipError_t {
|
||||
unsafe { hipModuleGetFunction(hfunc, hmod.base, name) }
|
||||
}
|
||||
|
|
|
@ -27,16 +27,16 @@ macro_rules! implemented {
|
|||
};
|
||||
}
|
||||
|
||||
|
||||
use cuda_base::cuda_function_declarations;
|
||||
cuda_function_declarations!(
|
||||
cuda_base::cuda_function_declarations!(
|
||||
unimplemented,
|
||||
implemented <= [
|
||||
cuCtxGetLimit,
|
||||
cuCtxSetLimit,
|
||||
cuCtxSynchronize,
|
||||
cuDeviceComputeCapability,
|
||||
cuDeviceGet,
|
||||
cuDeviceGetAttribute,
|
||||
cuDeviceGetCount,
|
||||
cuDeviceGetLuid,
|
||||
cuDeviceGetName,
|
||||
cuDeviceGetProperties,
|
||||
|
@ -44,5 +44,8 @@ cuda_function_declarations!(
|
|||
cuDeviceGetUuid_v2,
|
||||
cuDeviceTotalMem_v2,
|
||||
cuInit,
|
||||
cuModuleGetFunction,
|
||||
cuModuleLoadData,
|
||||
cuModuleUnload,
|
||||
]
|
||||
);
|
Loading…
Add table
Reference in a new issue