mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-08-03 06:40:21 +00:00
Continue HIP conversion
This commit is contained in:
parent
e248a2c9a9
commit
dbb6f09ffa
5 changed files with 74 additions and 50 deletions
|
@ -270,17 +270,7 @@ fn run_spirv<Input: From<u8> + Copy + Debug, Output: From<u8> + Copy + Debug + D
|
||||||
hip_call! { hipStreamCreate(&mut stream) };
|
hip_call! { hipStreamCreate(&mut stream) };
|
||||||
let mut dev_props = unsafe { mem::zeroed() };
|
let mut dev_props = unsafe { mem::zeroed() };
|
||||||
hip_call! { hipGetDeviceProperties(&mut dev_props, dev) };
|
hip_call! { hipGetDeviceProperties(&mut dev_props, dev) };
|
||||||
let nul_terminator = dev_props.gcnArchName.iter().position(|&x| x == 0).unwrap();
|
let elf_module = compile_amd(&dev_props, &*spirv, module.should_link_ptx_impl)
|
||||||
let gcn_arch_slice = unsafe {
|
|
||||||
slice::from_raw_parts(dev_props.gcnArchName.as_ptr() as _, nul_terminator + 1)
|
|
||||||
};
|
|
||||||
let dev_name =
|
|
||||||
if let Ok(Ok(name)) = CStr::from_bytes_with_nul(gcn_arch_slice).map(|x| x.to_str()) {
|
|
||||||
name
|
|
||||||
} else {
|
|
||||||
return Err(hipError_t::hipErrorUnknown);
|
|
||||||
};
|
|
||||||
let elf_module = compile_amd(dev_name, &*spirv, module.should_link_ptx_impl)
|
|
||||||
.map_err(|_| hipError_t::hipErrorUnknown)?;
|
.map_err(|_| hipError_t::hipErrorUnknown)?;
|
||||||
let mut module = ptr::null_mut();
|
let mut module = ptr::null_mut();
|
||||||
hip_call! { hipModuleLoadData(&mut module, elf_module.as_ptr() as _) };
|
hip_call! { hipModuleLoadData(&mut module, elf_module.as_ptr() as _) };
|
||||||
|
@ -576,10 +566,24 @@ const AMDGPU_BITCODE: [&'static str; 8] = [
|
||||||
const AMDGPU_BITCODE_DEVICE_PREFIX: &'static str = "oclc_isa_version_";
|
const AMDGPU_BITCODE_DEVICE_PREFIX: &'static str = "oclc_isa_version_";
|
||||||
|
|
||||||
fn compile_amd(
|
fn compile_amd(
|
||||||
device_name: &str,
|
device_pros: &hip::hipDeviceProp_t,
|
||||||
spirv_il: &[u32],
|
spirv_il: &[u32],
|
||||||
ptx_lib: Option<(&'static [u8], &'static [u8])>,
|
ptx_lib: Option<(&'static [u8], &'static [u8])>,
|
||||||
) -> io::Result<Vec<u8>> {
|
) -> io::Result<Vec<u8>> {
|
||||||
|
let null_terminator = device_pros
|
||||||
|
.gcnArchName
|
||||||
|
.iter()
|
||||||
|
.position(|&x| x == 0)
|
||||||
|
.unwrap();
|
||||||
|
let gcn_arch_slice = unsafe {
|
||||||
|
slice::from_raw_parts(device_pros.gcnArchName.as_ptr() as _, null_terminator + 1)
|
||||||
|
};
|
||||||
|
let device_name =
|
||||||
|
if let Ok(Ok(name)) = CStr::from_bytes_with_nul(gcn_arch_slice).map(|x| x.to_str()) {
|
||||||
|
name
|
||||||
|
} else {
|
||||||
|
return Err(io::Error::new(io::ErrorKind::Other, ""));
|
||||||
|
};
|
||||||
let dir = tempfile::tempdir()?;
|
let dir = tempfile::tempdir()?;
|
||||||
let mut spirv = NamedTempFile::new_in(&dir)?;
|
let mut spirv = NamedTempFile::new_in(&dir)?;
|
||||||
let llvm = NamedTempFile::new_in(&dir)?;
|
let llvm = NamedTempFile::new_in(&dir)?;
|
||||||
|
|
|
@ -2207,7 +2207,10 @@ pub extern "system" fn cuInit(Flags: ::std::os::raw::c_uint) -> CUresult {
|
||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
pub extern "system" fn cuDriverGetVersion(driverVersion: *mut ::std::os::raw::c_int) -> CUresult {
|
pub extern "system" fn cuDriverGetVersion(driverVersion: *mut ::std::os::raw::c_int) -> CUresult {
|
||||||
unsafe { hipDriverGetVersion(driverVersion).into() }
|
// GeekBench checks this value
|
||||||
|
// TODO: encode something more sensible
|
||||||
|
unsafe { *driverVersion = r#impl::driver_get_version() };
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
|
@ -2382,7 +2385,8 @@ pub extern "system" fn cuCtxGetFlags(flags: *mut ::std::os::raw::c_uint) -> CUre
|
||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
pub extern "system" fn cuCtxSynchronize() -> CUresult {
|
pub extern "system" fn cuCtxSynchronize() -> CUresult {
|
||||||
unsafe { hipCtxSynchronize().into() }
|
// hipCtxSynchronize is not implemented
|
||||||
|
unsafe { hipDeviceSynchronize().into() }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
|
|
|
@ -62,6 +62,10 @@ pub fn get_attribute(pi: *mut i32, attrib: CUdevice_attribute, dev_idx: c_int) -
|
||||||
}
|
}
|
||||||
//let mut props = unsafe { mem::zeroed() };
|
//let mut props = unsafe { mem::zeroed() };
|
||||||
let hip_attrib = match attrib {
|
let hip_attrib = match attrib {
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT => {
|
||||||
|
unsafe { *pi = 1 };
|
||||||
|
return hipError_t::hipSuccess;
|
||||||
|
}
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
use hip_runtime_sys::{hipError_t, hipFuncGetAttributes, hipLaunchKernel, hipModuleLaunchKernel};
|
use hip_runtime_sys::{hipError_t, hipFuncAttribute, hipFuncGetAttribute, hipFuncGetAttributes, hipFunction_attribute, hipLaunchKernel, hipModuleLaunchKernel};
|
||||||
|
|
||||||
use super::{CUresult, HasLivenessCookie, LiveCheck};
|
use super::{CUresult, HasLivenessCookie, LiveCheck};
|
||||||
use crate::cuda::{CUfunction, CUfunction_attribute, CUstream};
|
use crate::cuda::{CUfunction, CUfunction_attribute, CUstream};
|
||||||
|
@ -13,20 +13,14 @@ pub(crate) fn get_attribute(
|
||||||
if pi == ptr::null_mut() || func == ptr::null_mut() {
|
if pi == ptr::null_mut() || func == ptr::null_mut() {
|
||||||
return hipError_t::hipErrorInvalidValue;
|
return hipError_t::hipErrorInvalidValue;
|
||||||
}
|
}
|
||||||
let mut hip_attrib = unsafe { mem::zeroed() };
|
let attrib = match cu_attrib {
|
||||||
let err = unsafe { hipFuncGetAttributes(&mut hip_attrib, func as _) };
|
|
||||||
if err != hipError_t::hipSuccess {
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
let value = match cu_attrib {
|
|
||||||
CUfunction_attribute::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK => {
|
CUfunction_attribute::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK => {
|
||||||
hip_attrib.maxThreadsPerBlock
|
hipFunction_attribute::HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
|
||||||
}
|
}
|
||||||
CUfunction_attribute::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES => {
|
CUfunction_attribute::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES => {
|
||||||
hip_attrib.sharedSizeBytes as i32
|
hipFunction_attribute::HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES
|
||||||
}
|
}
|
||||||
_ => return hipError_t::hipErrorInvalidValue,
|
_ => return hipError_t::hipErrorInvalidValue,
|
||||||
};
|
};
|
||||||
unsafe { *pi = value };
|
unsafe { hipFuncGetAttribute(pi, attrib, func as _) }
|
||||||
hipError_t::hipSuccess
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,13 +5,13 @@ use std::fs::File;
|
||||||
use std::io::{self, Read, Write};
|
use std::io::{self, Read, Write};
|
||||||
use std::ops::Add;
|
use std::ops::Add;
|
||||||
use std::os::raw::c_char;
|
use std::os::raw::c_char;
|
||||||
use std::path::PathBuf;
|
use std::path::{Path, PathBuf};
|
||||||
use std::process::Command;
|
use std::process::Command;
|
||||||
use std::{fs, mem, ptr, slice};
|
use std::{env, fs, mem, ptr, slice};
|
||||||
|
|
||||||
use hip_runtime_sys::{
|
use hip_runtime_sys::{
|
||||||
hipCtxGetCurrent, hipCtxGetDevice, hipDeviceGetAttribute, hipDeviceGetName, hipError_t,
|
hipCtxGetCurrent, hipCtxGetDevice, hipDeviceGetAttribute, hipDeviceGetName, hipDeviceProp_t,
|
||||||
hipGetDeviceProperties, hipGetStreamDeviceId, hipModuleLoadData,
|
hipError_t, hipGetDeviceProperties, hipGetStreamDeviceId, hipModuleLoadData,
|
||||||
};
|
};
|
||||||
use tempfile::NamedTempFile;
|
use tempfile::NamedTempFile;
|
||||||
|
|
||||||
|
@ -85,18 +85,8 @@ pub fn load_data_impl(pmod: *mut CUmodule, spirv_data: SpirvModule) -> Result<()
|
||||||
}
|
}
|
||||||
let mut props = unsafe { mem::zeroed() };
|
let mut props = unsafe { mem::zeroed() };
|
||||||
let err = unsafe { hipGetDeviceProperties(&mut props, dev) };
|
let err = unsafe { hipGetDeviceProperties(&mut props, dev) };
|
||||||
if err != hipError_t::hipSuccess {
|
|
||||||
return Err(err);
|
|
||||||
}
|
|
||||||
let gcn_arch_slice =
|
|
||||||
unsafe { slice::from_raw_parts(props.gcnArchName.as_ptr() as _, props.gcnArchName.len()) };
|
|
||||||
let name = if let Ok(Ok(name)) = CStr::from_bytes_with_nul(gcn_arch_slice).map(|x| x.to_str()) {
|
|
||||||
name
|
|
||||||
} else {
|
|
||||||
return Err(hipError_t::hipErrorUnknown);
|
|
||||||
};
|
|
||||||
let arch_binary = compile_amd(
|
let arch_binary = compile_amd(
|
||||||
name,
|
&props,
|
||||||
&spirv_data.binaries[..],
|
&spirv_data.binaries[..],
|
||||||
spirv_data.should_link_ptx_impl,
|
spirv_data.should_link_ptx_impl,
|
||||||
)
|
)
|
||||||
|
@ -109,7 +99,7 @@ pub fn load_data_impl(pmod: *mut CUmodule, spirv_data: SpirvModule) -> Result<()
|
||||||
}
|
}
|
||||||
|
|
||||||
const LLVM_SPIRV: &'static str = "/home/vosen/amd/llvm-project/build/bin/llvm-spirv";
|
const LLVM_SPIRV: &'static str = "/home/vosen/amd/llvm-project/build/bin/llvm-spirv";
|
||||||
const AMDGPU: &'static str = "/opt/amdgpu-pro/";
|
const AMDGPU: &'static str = "/opt/rocm/";
|
||||||
const AMDGPU_TARGET: &'static str = "amdgcn-amd-amdhsa";
|
const AMDGPU_TARGET: &'static str = "amdgcn-amd-amdhsa";
|
||||||
const AMDGPU_BITCODE: [&'static str; 8] = [
|
const AMDGPU_BITCODE: [&'static str; 8] = [
|
||||||
"opencl.bc",
|
"opencl.bc",
|
||||||
|
@ -124,11 +114,24 @@ const AMDGPU_BITCODE: [&'static str; 8] = [
|
||||||
const AMDGPU_BITCODE_DEVICE_PREFIX: &'static str = "oclc_isa_version_";
|
const AMDGPU_BITCODE_DEVICE_PREFIX: &'static str = "oclc_isa_version_";
|
||||||
|
|
||||||
fn compile_amd(
|
fn compile_amd(
|
||||||
device_name: &str,
|
device_pros: &hipDeviceProp_t,
|
||||||
spirv_il: &[u32],
|
spirv_il: &[u32],
|
||||||
ptx_lib: Option<(&'static [u8], &'static [u8])>,
|
ptx_lib: Option<(&'static [u8], &'static [u8])>,
|
||||||
) -> io::Result<Vec<u8>> {
|
) -> io::Result<Vec<u8>> {
|
||||||
use std::env;
|
let null_terminator = device_pros
|
||||||
|
.gcnArchName
|
||||||
|
.iter()
|
||||||
|
.position(|&x| x == 0)
|
||||||
|
.unwrap();
|
||||||
|
let gcn_arch_slice = unsafe {
|
||||||
|
slice::from_raw_parts(device_pros.gcnArchName.as_ptr() as _, null_terminator + 1)
|
||||||
|
};
|
||||||
|
let device_name =
|
||||||
|
if let Ok(Ok(name)) = CStr::from_bytes_with_nul(gcn_arch_slice).map(|x| x.to_str()) {
|
||||||
|
name
|
||||||
|
} else {
|
||||||
|
return Err(io::Error::new(io::ErrorKind::Other, ""));
|
||||||
|
};
|
||||||
let dir = tempfile::tempdir()?;
|
let dir = tempfile::tempdir()?;
|
||||||
let mut spirv = NamedTempFile::new_in(&dir)?;
|
let mut spirv = NamedTempFile::new_in(&dir)?;
|
||||||
let llvm = NamedTempFile::new_in(&dir)?;
|
let llvm = NamedTempFile::new_in(&dir)?;
|
||||||
|
@ -150,8 +153,12 @@ fn compile_amd(
|
||||||
.arg(spirv.path())
|
.arg(spirv.path())
|
||||||
.status()?;
|
.status()?;
|
||||||
assert!(to_llvm_cmd.success());
|
assert!(to_llvm_cmd.success());
|
||||||
|
if cfg!(debug_assertions) {
|
||||||
|
persist_file(llvm.path())?;
|
||||||
|
}
|
||||||
let linked_binary = NamedTempFile::new_in(&dir)?;
|
let linked_binary = NamedTempFile::new_in(&dir)?;
|
||||||
let mut llvm_link = PathBuf::from(AMDGPU);
|
let mut llvm_link = PathBuf::from(AMDGPU);
|
||||||
|
llvm_link.push("llvm");
|
||||||
llvm_link.push("bin");
|
llvm_link.push("bin");
|
||||||
llvm_link.push("llvm-link");
|
llvm_link.push("llvm-link");
|
||||||
let mut linker_cmd = Command::new(&llvm_link);
|
let mut linker_cmd = Command::new(&llvm_link);
|
||||||
|
@ -166,12 +173,16 @@ fn compile_amd(
|
||||||
}
|
}
|
||||||
let status = linker_cmd.status()?;
|
let status = linker_cmd.status()?;
|
||||||
assert!(status.success());
|
assert!(status.success());
|
||||||
|
if cfg!(debug_assertions) {
|
||||||
|
persist_file(linked_binary.path())?;
|
||||||
|
}
|
||||||
let mut ptx_lib_bitcode = NamedTempFile::new_in(&dir)?;
|
let mut ptx_lib_bitcode = NamedTempFile::new_in(&dir)?;
|
||||||
let compiled_binary = NamedTempFile::new_in(&dir)?;
|
let compiled_binary = NamedTempFile::new_in(&dir)?;
|
||||||
let mut cland_exe = PathBuf::from(AMDGPU);
|
let mut clang_exe = PathBuf::from(AMDGPU);
|
||||||
cland_exe.push("bin");
|
clang_exe.push("llvm");
|
||||||
cland_exe.push("clang");
|
clang_exe.push("bin");
|
||||||
let mut compiler_cmd = Command::new(&cland_exe);
|
clang_exe.push("clang");
|
||||||
|
let mut compiler_cmd = Command::new(&clang_exe);
|
||||||
compiler_cmd
|
compiler_cmd
|
||||||
.arg(format!("-mcpu={}", device_name))
|
.arg(format!("-mcpu={}", device_name))
|
||||||
.arg("-nogpulib")
|
.arg("-nogpulib")
|
||||||
|
@ -199,11 +210,18 @@ fn compile_amd(
|
||||||
let compiled_bin_path = compiled_binary.path();
|
let compiled_bin_path = compiled_binary.path();
|
||||||
let mut compiled_binary = File::open(compiled_bin_path)?;
|
let mut compiled_binary = File::open(compiled_bin_path)?;
|
||||||
compiled_binary.read_to_end(&mut result)?;
|
compiled_binary.read_to_end(&mut result)?;
|
||||||
|
if cfg!(debug_assertions) {
|
||||||
|
persist_file(compiled_bin_path)?;
|
||||||
|
}
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn persist_file(path: &Path) -> io::Result<()> {
|
||||||
let mut persistent = PathBuf::from("/tmp/zluda");
|
let mut persistent = PathBuf::from("/tmp/zluda");
|
||||||
std::fs::create_dir_all(&persistent)?;
|
std::fs::create_dir_all(&persistent)?;
|
||||||
persistent.push(compiled_bin_path.file_name().unwrap());
|
persistent.push(path.file_name().unwrap());
|
||||||
std::fs::copy(compiled_bin_path, persistent)?;
|
std::fs::copy(path, persistent)?;
|
||||||
Ok(result)
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_bitcode_paths(device_name: &str) -> impl Iterator<Item = PathBuf> {
|
fn get_bitcode_paths(device_name: &str) -> impl Iterator<Item = PathBuf> {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue