Add more host-side functionality (#480)
Some checks are pending
ZLUDA / Build (Linux) (push) Waiting to run
ZLUDA / Build (Windows) (push) Waiting to run
ZLUDA / Build AMD GPU unit tests (push) Waiting to run
ZLUDA / Run AMD GPU unit tests (push) Blocked by required conditions

This commit is contained in:
Andrzej Janik 2025-08-26 01:55:17 +02:00 committed by GitHub
commit 62d340e4bd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 718 additions and 100 deletions

View file

@ -199,8 +199,8 @@ impl VisitMut for FixFnSignatures {
}
const MODULES: &[&str] = &[
"context", "device", "driver", "function", "library", "link", "memory", "module", "pointer",
"stream",
"context", "device", "driver", "event", "function", "graph", "kernel",
"library", "link", "memory", "module", "pointer", "stream"
];
fn normalize_fn_impl(

View file

@ -10451,26 +10451,6 @@ pub fn write_cuMulticastGetGranularity(
crate::CudaDisplay::write(&option, "cuMulticastGetGranularity", arg_idx, writer)?;
writer.write_all(b")")
}
pub fn write_cuPointerGetAttribute(
writer: &mut (impl std::io::Write + ?Sized),
data: *mut ::core::ffi::c_void,
attribute: cuda_types::cuda::CUpointer_attribute,
ptr: cuda_types::cuda::CUdeviceptr,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(data), ": ").as_bytes())?;
crate::CudaDisplay::write(&data, "cuPointerGetAttribute", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(attribute), ": ").as_bytes())?;
crate::CudaDisplay::write(&attribute, "cuPointerGetAttribute", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(ptr), ": ").as_bytes())?;
crate::CudaDisplay::write(&ptr, "cuPointerGetAttribute", arg_idx, writer)?;
writer.write_all(b")")
}
pub fn write_cuMemPrefetchAsync_ptsz(
writer: &mut (impl std::io::Write + ?Sized),
devPtr: cuda_types::cuda::CUdeviceptr,
@ -10666,36 +10646,6 @@ pub fn write_cuPointerSetAttribute(
crate::CudaDisplay::write(&ptr, "cuPointerSetAttribute", arg_idx, writer)?;
writer.write_all(b")")
}
pub fn write_cuPointerGetAttributes(
writer: &mut (impl std::io::Write + ?Sized),
numAttributes: ::core::ffi::c_uint,
attributes: *mut cuda_types::cuda::CUpointer_attribute,
data: *mut *mut ::core::ffi::c_void,
ptr: cuda_types::cuda::CUdeviceptr,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(numAttributes), ": ").as_bytes())?;
crate::CudaDisplay::write(
&numAttributes,
"cuPointerGetAttributes",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(attributes), ": ").as_bytes())?;
crate::CudaDisplay::write(&attributes, "cuPointerGetAttributes", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(data), ": ").as_bytes())?;
crate::CudaDisplay::write(&data, "cuPointerGetAttributes", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(ptr), ": ").as_bytes())?;
crate::CudaDisplay::write(&ptr, "cuPointerGetAttributes", arg_idx, writer)?;
writer.write_all(b")")
}
pub fn write_cuStreamCreate(
writer: &mut (impl std::io::Write + ?Sized),
phStream: *mut cuda_types::cuda::CUstream,

View file

@ -1,7 +1,7 @@
use cuda_types::cuda::*;
use std::{
any::TypeId,
ffi::{c_void, CStr},
ffi::{c_ulonglong, c_void, CStr},
fmt::LowerHex,
mem, ptr, slice,
};
@ -887,6 +887,143 @@ pub fn write_cuGraphKernelNodeSetAttribute(
write_cuGraphKernelNodeGetAttribute(writer, hNode, attr, value_out as *mut _)
}
#[allow(non_snake_case)]
pub fn write_cuPointerGetAttribute(
writer: &mut (impl std::io::Write + ?Sized),
data: *mut ::core::ffi::c_void,
attribute: cuda_types::cuda::CUpointer_attribute,
ptr: cuda_types::cuda::CUdeviceptr,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(data), ": ").as_bytes())?;
write_attribute(writer, attribute, data)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(attribute), ": ").as_bytes())?;
crate::CudaDisplay::write(&attribute, "cuPointerGetAttribute", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(ptr), ": ").as_bytes())?;
crate::CudaDisplay::write(&ptr, "cuPointerGetAttribute", arg_idx, writer)?;
writer.write_all(b")")
}
#[allow(non_snake_case)]
pub fn write_cuPointerGetAttributes(
writer: &mut (impl std::io::Write + ?Sized),
numAttributes: ::core::ffi::c_uint,
attributes: *mut cuda_types::cuda::CUpointer_attribute,
data: *mut *mut ::core::ffi::c_void,
ptr: cuda_types::cuda::CUdeviceptr,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(numAttributes), ": ").as_bytes())?;
crate::CudaDisplay::write(
&numAttributes,
"cuPointerGetAttributes",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(attributes), ": ").as_bytes())?;
let attributes = unsafe { std::slice::from_raw_parts(attributes, numAttributes as usize) };
crate::CudaDisplay::write(attributes, "cuPointerGetAttributes", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(data), ": ").as_bytes())?;
let data = unsafe { std::slice::from_raw_parts(data, numAttributes as usize) };
writer.write_all(b"[")?;
for (i, data_ptr) in data.iter().copied().enumerate() {
if i != 0 {
writer.write_all(b", ")?;
}
write_attribute(writer, attributes[i], data_ptr)?;
}
writer.write_all(b"]")?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(ptr), ": ").as_bytes())?;
crate::CudaDisplay::write(&ptr, "cuPointerGetAttributes", arg_idx, writer)?;
writer.write_all(b")")
}
fn write_attribute(
writer: &mut (impl std::io::Write + ?Sized),
attribute: cuda_types::cuda::CUpointer_attribute,
data: *mut ::core::ffi::c_void,
) -> std::io::Result<()> {
match attribute {
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_CONTEXT => {
CudaDisplay::write(unsafe { &*(data as *const cuda_types::cuda::CUcontext) }, "", 0, writer)
}
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_MEMORY_TYPE => {
CudaDisplay::write(unsafe { &*(data as *const cuda_types::cuda::CUmemorytype) }, "", 0, writer)
}
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_DEVICE_POINTER => {
CudaDisplay::write(unsafe { &*(data as *const cuda_types::cuda::CUdeviceptr) }, "", 0, writer)
}
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_HOST_POINTER => {
CudaDisplay::write(unsafe { &*(data as *const *mut ::core::ffi::c_void) }, "", 0, writer)
}
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_P2P_TOKENS => {
CudaDisplay::write(unsafe { &*(data as *const cuda_types::cuda::CUDA_POINTER_ATTRIBUTE_P2P_TOKENS) }, "", 0, writer)
}
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_SYNC_MEMOPS => {
CudaDisplay::write(unsafe { &*(data as *const bool) }, "", 0, writer)
}
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_BUFFER_ID => {
CudaDisplay::write(unsafe { &*(data as *const c_ulonglong) }, "", 0, writer)
}
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_IS_MANAGED => {
CudaDisplay::write(unsafe { &*(data as *const bool) }, "", 0, writer)
}
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL => {
CudaDisplay::write(unsafe { &*(data as *const i32) }, "", 0, writer)
}
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE => {
CudaDisplay::write(unsafe { &*(data as *const bool) }, "", 0, writer)
}
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_RANGE_START_ADDR => {
CudaDisplay::write(unsafe { &*(data as *const usize) }, "", 0, writer)
}
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_RANGE_SIZE => {
CudaDisplay::write(unsafe { &*(data as *const usize) }, "", 0, writer)
}
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_MAPPED => {
CudaDisplay::write(unsafe { &*(data as *const bool) }, "", 0, writer)
}
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES => {
CudaDisplay::write(unsafe { &*(data as *const CUmemAllocationHandleType) }, "", 0, writer)
}
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE => {
CudaDisplay::write(unsafe { &*(data as *const bool) }, "", 0, writer)
}
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_ACCESS_FLAGS => {
CudaDisplay::write(unsafe { &*(data as *const CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS) }, "", 0, writer)
}
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE => {
CudaDisplay::write(unsafe { &*(data as *const CUmemoryPool) }, "", 0, writer)
}
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_MAPPING_SIZE => {
CudaDisplay::write(unsafe { &*(data as *const usize) }, "", 0, writer)
}
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_MAPPING_BASE_ADDR => {
CudaDisplay::write(unsafe { &*(data as *const usize) }, "", 0, writer)
}
// We don't know the type of the result
// cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_MEMORY_BLOCK_ID => {
// CudaDisplay::write(unsafe { &*(data as *const ???) }, "", 0, writer)
// }
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_IS_HW_DECOMPRESS_CAPABLE => {
CudaDisplay::write(unsafe { &*(data as *const bool) }, "", 0, writer)
}
_ => writer.write_all(b"UNKNOWN ATTRIBUTE"),
}
}
#[allow(non_snake_case)]
pub fn write_cuStreamGetAttribute(
writer: &mut (impl std::io::Write + ?Sized),

View file

@ -212,3 +212,28 @@ pub(crate) unsafe fn destroy_v2(ctx: CUcontext) -> CUresult {
pub(crate) unsafe fn pop_current_v2(ctx: &mut CUcontext) -> CUresult {
pop_current(ctx)
}
pub(crate) unsafe fn get_stream_priority_range(
least_priority: *mut ::core::ffi::c_int,
greatest_priority: *mut ::core::ffi::c_int,
) -> hipError_t {
hipDeviceGetStreamPriorityRange(least_priority, greatest_priority)
}
pub(crate) unsafe fn set_flags(flags: ::core::ffi::c_uint) -> CUresult {
let cu_ctx = get_current_context()?;
let ctx: &Context = FromCuda::<_, CUerror>::from_cuda(&cu_ctx)?;
ctx.with_state_mut(|state| {
state.flags = flags;
Ok(())
})
}
pub(crate) unsafe fn get_api_version(
_ctx: CUcontext,
version: &mut ::core::ffi::c_uint,
) -> CUresult {
// That's what original CUDA driver does
*version = 3020;
Ok(())
}

View file

@ -499,3 +499,21 @@ pub(crate) fn primary_context_reset(hip_dev: hipDevice_t) -> CUresult {
ctx.with_state_mut(|state| state.reset())?;
Ok(())
}
pub(crate) unsafe fn primary_context_get_state(
dev: hipDevice_t,
flags_out: &mut ::core::ffi::c_uint,
active_out: &mut ::core::ffi::c_int,
) -> CUresult {
let (ctx, _) = get_primary_context(dev)?;
let mut flags = 0u32;
let mut active = 0i32;
ctx.with_state_mut(|state| {
flags = state.flags;
active = (state.ref_count > 0) as i32;
Ok(())
})?;
*flags_out = flags;
*active_out = active;
Ok(())
}

View file

@ -1,11 +1,12 @@
use crate::r#impl::{context, device};
use crate::r#impl::{context, device, function};
use comgr::Comgr;
use cuda_types::cuda::*;
use hip_runtime_sys::*;
use std::{
collections::BTreeMap,
ffi::{c_void, CStr, CString},
mem, ptr, slice,
sync::OnceLock,
sync::{Mutex, OnceLock},
usize,
};
use zluda_common::{FromCuda, LiveCheck};
@ -19,6 +20,44 @@ pub(crate) struct GlobalState {
pub comgr: Comgr,
pub comgr_clang_version: String,
pub cache_path: Option<String>,
pub allocations: Mutex<Allocations>,
}
pub(crate) struct Allocations {
pub pointers: BTreeMap<usize, AllocationInfo>,
}
impl Allocations {
pub fn new() -> Self {
Allocations {
pointers: BTreeMap::new(),
}
}
pub fn insert(&mut self, ptr: usize, size: usize, context: CUcontext) {
self.pointers.insert(ptr, AllocationInfo { size, context });
}
pub fn get_offset_and_info(&self, ptr: usize) -> Option<(usize, AllocationInfo)> {
// Find last pair where `start <= ptr`
let (start, alloc) = self.pointers.range(..=ptr).rev().next()?;
// Check if allocation contains the pointer
if start + alloc.size > ptr {
Some((ptr - start, *alloc))
} else {
None
}
}
pub fn remove(&mut self, ptr: usize) {
self.pointers.remove(&ptr);
}
}
#[derive(Clone, Copy, PartialEq, Debug)]
pub(crate) struct AllocationInfo {
pub size: usize,
pub context: CUcontext,
}
pub(crate) struct Device {
@ -56,9 +95,11 @@ pub(crate) fn global_state() -> Result<&'static GlobalState, CUerror> {
let comgr = Comgr::new().map_err(|_| CUerror::UNKNOWN)?;
let comgr_clang_version =
comgr::get_clang_version(&comgr).map_err(|_| CUerror::UNKNOWN)?;
let allocations = Mutex::new(Allocations::new());
Ok(GlobalState {
comgr,
comgr_clang_version,
allocations,
devices: (0..device_count)
.map(|i| {
let mut props = unsafe { mem::zeroed() };
@ -430,3 +471,97 @@ pub(crate) fn profiler_start() -> CUresult {
pub(crate) fn profiler_stop() -> CUresult {
Ok(())
}
pub(crate) unsafe fn thread_exchange_stream_capture_mode(
mode: *mut hipStreamCaptureMode,
) -> hipError_t {
hipThreadExchangeStreamCaptureMode(mode)
}
pub(crate) unsafe fn occupancy_max_active_blocks_per_multiprocessor_with_flags(
num_blocks: &mut ::core::ffi::c_int,
func: hipFunction_t,
block_size: ::core::ffi::c_int,
dynamic_smem_size: usize,
flags: ::core::ffi::c_uint,
) -> hipError_t {
hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
num_blocks,
func.0.cast(),
block_size,
dynamic_smem_size,
flags,
)?;
*num_blocks = (*num_blocks).max(1);
Ok(())
}
pub(crate) unsafe fn launch_kernel_ex(
config: &cuda_types::cuda::CUlaunchConfig,
f: hipFunction_t,
kernel_params: *mut *mut ::core::ffi::c_void,
extra: *mut *mut ::core::ffi::c_void,
) -> CUresult {
let attrs = std::slice::from_raw_parts(config.attrs, config.numAttrs as usize);
if attrs.iter().any(|&attr| {
!(attr.id == CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION
&& attr.value.programmaticStreamSerializationAllowed == 0)
}) {
return CUresult::ERROR_NOT_SUPPORTED;
}
function::launch_kernel(
f,
config.gridDimX,
config.gridDimY,
config.gridDimZ,
config.blockDimX,
config.blockDimY,
config.blockDimZ,
config.sharedMemBytes,
FromCuda::<_, CUerror>::from_cuda(&config.hStream)?,
kernel_params,
extra,
)?;
Ok(())
}
#[cfg(test)]
mod tests {
use crate::r#impl::driver::AllocationInfo;
use cuda_types::cuda::CUcontext;
#[test]
fn get_allocation() {
let ctx1 = CUcontext(0x1234 as _);
let ctx2 = CUcontext(0x5678 as _);
let mut alloc_info = super::Allocations::new();
alloc_info.insert(0x1000, 4, ctx1);
alloc_info.insert(0x2000, 8, ctx2);
for i in 0..4 {
assert_eq!(
alloc_info.get_offset_and_info(0x1000 + i),
Some((
i,
AllocationInfo {
size: 4,
context: ctx1
}
))
);
}
assert_eq!(alloc_info.get_offset_and_info(0x1000 + 4), None);
for i in 0..8 {
assert_eq!(
alloc_info.get_offset_and_info(0x2000 + i),
Some((
i,
AllocationInfo {
size: 8,
context: ctx2
}
))
);
}
assert_eq!(alloc_info.get_offset_and_info(0x2000 + 8), None);
}
}

22
zluda/src/impl/event.rs Normal file
View file

@ -0,0 +1,22 @@
use hip_runtime_sys::*;
pub(crate) unsafe fn create(event: *mut hipEvent_t, flags: ::core::ffi::c_uint) -> hipError_t {
// Flag values are compatible between CUDA and HIP for 0,1,2,4
hipEventCreateWithFlags(event, flags)
}
pub(crate) unsafe fn query(event: hipEvent_t) -> hipError_t {
hipEventQuery(event)
}
pub(crate) unsafe fn destroy_v2(event: hipEvent_t) -> hipError_t {
hipEventDestroy(event)
}
pub(crate) unsafe fn record(event: hipEvent_t, stream: hipStream_t) -> hipError_t {
hipEventRecord(event, stream)
}
pub(crate) unsafe fn synchronize(event: hipEvent_t) -> hipError_t {
hipEventSynchronize(event)
}

View file

@ -7,6 +7,14 @@ pub(crate) fn get_attribute(
) -> hipError_t {
// TODO: implement HIP_FUNC_ATTRIBUTE_PTX_VERSION
// TODO: implement HIP_FUNC_ATTRIBUTE_BINARY_VERSION
match cu_attrib {
hipFunction_attribute::HIP_FUNC_ATTRIBUTE_PTX_VERSION
| hipFunction_attribute::HIP_FUNC_ATTRIBUTE_BINARY_VERSION => {
*pi = 120;
return Ok(());
}
_ => {}
}
unsafe { hipFuncGetAttribute(pi, cu_attrib, func) }?;
if cu_attrib == hipFunction_attribute::HIP_FUNC_ATTRIBUTE_NUM_REGS {
*pi = (*pi).max(1);
@ -44,3 +52,18 @@ pub(crate) fn launch_kernel(
)
}
}
pub(crate) unsafe fn set_attribute(
func: hipFunction_t,
attribute: hipFunction_attribute,
value: i32,
) -> hipError_t {
match attribute {
hipFunction_attribute::HIP_FUNC_ATTRIBUTE_PTX_VERSION
| hipFunction_attribute::HIP_FUNC_ATTRIBUTE_BINARY_VERSION => {
return hipError_t::ErrorNotSupported;
}
_ => {}
}
hipFuncSetAttribute(func.0.cast(), hipFuncAttribute(attribute.0), value)
}

29
zluda/src/impl/graph.rs Normal file
View file

@ -0,0 +1,29 @@
use hip_runtime_sys::*;
pub(crate) unsafe fn destroy(graph: hipGraph_t) -> hipError_t {
hipGraphDestroy(graph)
}
pub(crate) unsafe fn exec_destroy(graph_exec: hipGraphExec_t) -> hipError_t {
hipGraphExecDestroy(graph_exec)
}
pub(crate) unsafe fn get_nodes(
graph: hipGraph_t,
nodes: *mut hipGraphNode_t,
num_nodes: *mut usize,
) -> hipError_t {
hipGraphGetNodes(graph, nodes, num_nodes)
}
pub(crate) unsafe fn instantiate_with_flags(
graph_exec: *mut hipGraphExec_t,
graph: hipGraph_t,
flags: u64,
) -> hipError_t {
hipGraphInstantiateWithFlags(graph_exec, graph, flags)
}
pub(crate) unsafe fn launch(graph_exec: hipGraphExec_t, stream: hipStream_t) -> hipError_t {
hipGraphLaunch(graph_exec, stream)
}

18
zluda/src/impl/kernel.rs Normal file
View file

@ -0,0 +1,18 @@
use cuda_types::cuda::CUresult;
use hip_runtime_sys::*;
use crate::r#impl::function;
pub(crate) unsafe fn get_function(func: &mut hipFunction_t, kernel: hipFunction_t) -> CUresult {
*func = kernel;
Ok(())
}
pub(crate) unsafe fn set_attribute(
attrib: hipFunction_attribute,
val: ::core::ffi::c_int,
kernel: hipFunction_t,
_dev: hipDevice_t,
) -> hipError_t {
function::set_attribute(kernel, attrib, val)
}

View file

@ -44,3 +44,20 @@ pub(crate) unsafe fn get_module(out: &mut CUmodule, library: &Library) -> CUresu
*out = module::Module { base: library.base }.wrap();
Ok(())
}
pub(crate) unsafe fn get_kernel(
kernel: *mut hipFunction_t,
library: &Library,
name: *const ::core::ffi::c_char,
) -> hipError_t {
hipModuleGetFunction(kernel, library.base, name)
}
pub(crate) unsafe fn get_global(
dptr: *mut hipDeviceptr_t,
bytes: *mut usize,
library: &Library,
name: *const ::core::ffi::c_char,
) -> hipError_t {
hipModuleGetGlobal(dptr, bytes, library.base, name)
}

View file

@ -1,13 +1,23 @@
use std::ptr;
use cuda_types::cuda::{CUerror, CUresult, CUresultConsts};
use hip_runtime_sys::*;
pub(crate) fn alloc_v2(dptr: *mut hipDeviceptr_t, bytesize: usize) -> hipError_t {
unsafe { hipMalloc(dptr.cast(), bytesize) }?;
use crate::r#impl::{context, driver};
pub(crate) fn alloc_v2(dptr: &mut hipDeviceptr_t, bytesize: usize) -> CUresult {
let context = context::get_current_context()?;
unsafe { hipMalloc(ptr::from_mut(dptr).cast(), bytesize) }?;
add_allocation(dptr.0, bytesize, context)?;
// TODO: parametrize for non-Geekbench
unsafe { hipMemsetD8(*dptr, 0, bytesize) }
unsafe { hipMemsetD8(*dptr, 0, bytesize) }?;
Ok(())
}
pub(crate) fn free_v2(dptr: hipDeviceptr_t) -> hipError_t {
unsafe { hipFree(dptr.0) }
pub(crate) unsafe fn free_v2(dptr: hipDeviceptr_t) -> CUresult {
let hip_result = hipFree(dptr.0);
remove_allocation(dptr.0)?;
Ok(hip_result?)
}
pub(crate) fn copy_dto_h_v2(
@ -45,14 +55,101 @@ pub(crate) fn get_info_v2(free: *mut usize, total: *mut usize) -> hipError_t {
unsafe { hipMemGetInfo(free, total) }
}
pub(crate) unsafe fn free_host(ptr: *mut ::core::ffi::c_void) -> hipError_t {
hipFreeHost(ptr)
pub(crate) unsafe fn free_host(ptr: *mut ::core::ffi::c_void) -> CUresult {
let hip_result = hipFreeHost(ptr);
remove_allocation(ptr)?;
Ok(hip_result?)
}
pub(crate) unsafe fn host_alloc(
pp: *mut *mut ::core::ffi::c_void,
pp: &mut *mut ::core::ffi::c_void,
bytesize: usize,
flags: ::std::os::raw::c_uint,
) -> hipError_t {
hipHostMalloc(pp, bytesize, flags)
) -> CUresult {
let context = context::get_current_context()?;
hipHostMalloc(pp, bytesize, flags)?;
add_allocation(*pp, bytesize, context)?;
Ok(())
}
fn add_allocation(
dptr: *mut ::core::ffi::c_void,
bytesize: usize,
context: cuda_types::cuda::CUcontext,
) -> Result<(), CUerror> {
let global_state = driver::global_state()?;
let mut allocations = global_state
.allocations
.lock()
.map_err(|_| CUerror::UNKNOWN)?;
allocations.insert(dptr as usize, bytesize, context);
Ok(())
}
fn remove_allocation(ptr: *mut std::ffi::c_void) -> Result<(), CUerror> {
let global_state = driver::global_state()?;
let mut allocations = global_state
.allocations
.lock()
.map_err(|_| CUerror::UNKNOWN)?;
allocations.remove(ptr as usize);
Ok(())
}
pub(crate) unsafe fn retain_allocation_handle(
_handle: *mut cuda_types::cuda::CUmemGenericAllocationHandle,
_addr: *mut ::core::ffi::c_void,
) -> CUresult {
CUresult::ERROR_NOT_SUPPORTED
}
pub(crate) unsafe fn copy_hto_d_async_v2(
dst_device: hipDeviceptr_t,
src_host: *const ::core::ffi::c_void,
byte_count: usize,
stream: hipStream_t,
) -> hipError_t {
hipMemcpyHtoDAsync(dst_device, src_host.cast_mut(), byte_count, stream)
}
pub(crate) unsafe fn copy_dto_h_async_v2(
dst_host: *mut ::core::ffi::c_void,
src_device: hipDeviceptr_t,
byte_count: usize,
stream: hipStream_t,
) -> hipError_t {
hipMemcpyDtoHAsync(dst_host, src_device, byte_count, stream)
}
pub(crate) unsafe fn copy_dto_d_async_v2(
dst_device: hipDeviceptr_t,
src_device: hipDeviceptr_t,
byte_count: usize,
stream: hipStream_t,
) -> hipError_t {
hipMemcpyDtoDAsync(dst_device, src_device, byte_count, stream)
}
pub(crate) unsafe fn copy_async(
dst: hipDeviceptr_t,
src: hipDeviceptr_t,
byte_count: usize,
stream: hipStream_t,
) -> hipError_t {
hipMemcpyAsync(
dst.0,
src.0,
byte_count,
hipMemcpyKind::hipMemcpyDefault,
stream,
)
}
pub(crate) unsafe fn set_d8_async(
dst_device: hipDeviceptr_t,
uc: ::core::ffi::c_uchar,
n: usize,
stream: hipStream_t,
) -> hipError_t {
hipMemsetD8Async(dst_device, uc, n, stream)
}

View file

@ -4,7 +4,10 @@ use zluda_common::from_cuda_object;
pub(super) mod context;
pub(super) mod device;
pub(super) mod driver;
pub(super) mod event;
pub(super) mod function;
pub(super) mod graph;
pub(super) mod kernel;
pub(super) mod library;
pub(super) mod memory;
pub(super) mod module;

View file

@ -2,39 +2,72 @@ use cuda_types::cuda::*;
use hip_runtime_sys::*;
use std::{ffi::c_void, ptr};
use crate::r#impl::driver;
// TODO: handlehipMemoryTypeUnregistered
fn to_cu_memory_type(cu: hipMemoryType) -> Result<CUmemorytype, hipErrorCode_t> {
match cu {
hipMemoryType::hipMemoryTypeHost => Ok(CUmemorytype::CU_MEMORYTYPE_HOST),
hipMemoryType::hipMemoryTypeDevice => Ok(CUmemorytype::CU_MEMORYTYPE_DEVICE),
hipMemoryType::hipMemoryTypeArray => Ok(CUmemorytype::CU_MEMORYTYPE_ARRAY),
// TODO: check if this is correct
hipMemoryType::hipMemoryTypeManaged => Ok(CUmemorytype::CU_MEMORYTYPE_UNIFIED),
hipMemoryType::hipMemoryTypeUnified => Ok(CUmemorytype::CU_MEMORYTYPE_UNIFIED),
_ => Err(hipErrorCode_t::InvalidValue),
}
}
pub(crate) unsafe fn get_attribute(
data: *mut c_void,
attribute: hipPointer_attribute,
ptr: hipDeviceptr_t,
) -> hipError_t {
) -> CUresult {
if data == ptr::null_mut() {
return hipError_t::ErrorInvalidValue;
return CUresult::ERROR_INVALID_VALUE;
}
match attribute {
// TODO: implement by getting device ordinal & allocation start,
// then go through every context for that device
hipPointer_attribute::HIP_POINTER_ATTRIBUTE_CONTEXT => hipError_t::ErrorNotSupported,
hipPointer_attribute::HIP_POINTER_ATTRIBUTE_MEMORY_TYPE => {
let mut hip_result = hipMemoryType(0);
hipPointerGetAttribute(
(&mut hip_result as *mut hipMemoryType).cast::<c_void>(),
attribute,
ptr,
)?;
let cuda_result = memory_type(hip_result)?;
unsafe { *(data.cast()) = cuda_result };
hipPointer_attribute::HIP_POINTER_ATTRIBUTE_CONTEXT => {
let globals = driver::global_state()?;
let allocations = globals.allocations.lock().map_err(|_| CUerror::UNKNOWN)?;
let (_, alloc) = allocations
.get_offset_and_info(ptr.0 as usize)
.ok_or(CUerror::INVALID_VALUE)?;
unsafe { *(data.cast()) = alloc.context };
Ok(())
}
hipPointer_attribute::HIP_POINTER_ATTRIBUTE_MEMORY_TYPE => {
let mut memory_type = hipMemoryType(0);
hipPointerGetAttribute(ptr::from_mut(&mut memory_type).cast(), attribute, ptr)?;
unsafe { *(data.cast()) = to_cu_memory_type(memory_type)? };
Ok(())
}
_ => {
hipPointerGetAttribute(data, attribute, ptr)?;
Ok(())
}
_ => unsafe { hipPointerGetAttribute(data, attribute, ptr) },
}
}
fn memory_type(cu: hipMemoryType) -> Result<CUmemorytype, hipErrorCode_t> {
match cu {
hipMemoryType::hipMemoryTypeHost => Ok(CUmemorytype::CU_MEMORYTYPE_HOST),
hipMemoryType::hipMemoryTypeDevice => Ok(CUmemorytype::CU_MEMORYTYPE_DEVICE),
hipMemoryType::hipMemoryTypeArray => Ok(CUmemorytype::CU_MEMORYTYPE_ARRAY),
hipMemoryType::hipMemoryTypeUnified => Ok(CUmemorytype::CU_MEMORYTYPE_UNIFIED),
_ => Err(hipErrorCode_t::InvalidValue),
pub(crate) unsafe fn get_attributes(
num_attributes: ::core::ffi::c_uint,
attributes: &mut hipPointer_attribute,
data: &mut *mut ::core::ffi::c_void,
ptr: hipDeviceptr_t,
) -> CUresult {
hipDrvPointerGetAttributes(num_attributes, attributes, data, ptr)?;
let attributes = std::slice::from_raw_parts_mut(attributes, num_attributes as usize);
let data = std::slice::from_raw_parts_mut(data, num_attributes as usize);
for (attr, data_ptr) in attributes.iter().copied().zip(data.iter().copied()) {
match attr {
hipPointer_attribute::HIP_POINTER_ATTRIBUTE_CONTEXT => {
get_attribute(data_ptr, attr, ptr).ok();
}
hipPointer_attribute::HIP_POINTER_ATTRIBUTE_MEMORY_TYPE => {
*(data_ptr.cast::<CUmemorytype>()) =
to_cu_memory_type(*data_ptr.cast::<hipMemoryType>())?;
}
_ => {}
}
}
Ok(())
}

View file

@ -3,3 +3,58 @@ use hip_runtime_sys::*;
pub(crate) fn synchronize(stream: hipStream_t) -> hipError_t {
unsafe { hipStreamSynchronize(stream) }
}
pub(crate) fn create_with_priority(
stream: *mut hipStream_t,
flags: ::core::ffi::c_uint,
priority: ::core::ffi::c_int,
) -> hipError_t {
unsafe { hipStreamCreateWithPriority(stream, flags, priority) }
}
pub(crate) fn destroy_v2(stream: hipStream_t) -> hipError_t {
unsafe { hipStreamDestroy(stream) }
}
pub(crate) fn begin_capture_v2(stream: hipStream_t, mode: hipStreamCaptureMode) -> hipError_t {
unsafe { hipStreamBeginCapture(stream, mode) }
}
pub(crate) fn end_capture(stream: hipStream_t, graph: *mut hipGraph_t) -> hipError_t {
unsafe { hipStreamEndCapture(stream, graph) }
}
pub(crate) fn is_capturing(
stream: hipStream_t,
capture_status: *mut hipStreamCaptureStatus,
) -> hipError_t {
unsafe { hipStreamIsCapturing(stream, capture_status) }
}
pub(crate) fn get_capture_info_v2(
stream: hipStream_t,
capture_status: *mut hipStreamCaptureStatus,
id: *mut ::core::ffi::c_ulonglong,
graph_out: *mut hipGraph_t,
dependencies_out: *mut *const hipGraphNode_t,
num_dependencies_out: *mut usize,
) -> hipError_t {
unsafe {
hipStreamGetCaptureInfo_v2(
stream,
capture_status,
id,
graph_out,
dependencies_out,
num_dependencies_out,
)
}
}
pub(crate) fn wait_event(
stream: hipStream_t,
event: hipEvent_t,
flags: ::core::ffi::c_uint,
) -> hipError_t {
unsafe { hipStreamWaitEvent(stream, event, flags) }
}

View file

@ -69,16 +69,19 @@ cuda_macros::cuda_function_declarations!(
<= [
cuCtxCreate_v2,
cuCtxDestroy_v2,
cuCtxGetLimit,
cuCtxSetCurrent,
cuCtxGetApiVersion,
cuCtxGetCurrent,
cuCtxGetDevice,
cuCtxSetLimit,
cuCtxSynchronize,
cuCtxPushCurrent,
cuCtxPushCurrent_v2,
cuCtxGetLimit,
cuCtxGetStreamPriorityRange,
cuCtxPopCurrent,
cuCtxPopCurrent_v2,
cuCtxPushCurrent,
cuCtxPushCurrent_v2,
cuCtxSetCurrent,
cuCtxSetFlags,
cuCtxSetLimit,
cuCtxSynchronize,
cuDeviceComputeCapability,
cuDeviceGet,
cuDeviceGetAttribute,
@ -88,38 +91,72 @@ cuda_macros::cuda_function_declarations!(
cuDeviceGetProperties,
cuDeviceGetUuid,
cuDeviceGetUuid_v2,
cuDevicePrimaryCtxGetState,
cuDevicePrimaryCtxRelease,
cuDevicePrimaryCtxRetain,
cuDevicePrimaryCtxReset,
cuDevicePrimaryCtxRetain,
cuDeviceTotalMem_v2,
cuDriverGetVersion,
cuEventCreate,
cuEventDestroy_v2,
cuEventQuery,
cuEventRecord,
cuEventSynchronize,
cuFuncGetAttribute,
cuFuncSetAttribute,
cuGetExportTable,
cuGetProcAddress,
cuGetProcAddress_v2,
cuGraphDestroy,
cuGraphExecDestroy,
cuGraphGetNodes,
cuGraphInstantiateWithFlags,
cuGraphLaunch,
cuInit,
cuLibraryLoadData,
cuKernelGetFunction,
cuKernelSetAttribute,
cuLaunchKernel,
cuLaunchKernelEx,
cuLibraryGetGlobal,
cuLibraryGetKernel,
cuLibraryGetModule,
cuLibraryLoadData,
cuLibraryUnload,
cuMemAlloc_v2,
cuMemFree_v2,
cuMemHostAlloc,
cuMemFreeHost,
cuMemFree_v2,
cuMemGetAddressRange_v2,
cuMemGetInfo_v2,
cuMemHostAlloc,
cuMemRetainAllocationHandle,
cuMemcpyAsync,
cuMemcpyDtoDAsync_v2,
cuMemcpyDtoHAsync_v2,
cuMemcpyDtoH_v2,
cuMemcpyHtoDAsync_v2,
cuMemcpyHtoD_v2,
cuMemsetD32_v2,
cuMemsetD8Async,
cuMemsetD8_v2,
cuModuleGetFunction,
cuModuleGetGlobal_v2,
cuModuleGetLoadingMode,
cuModuleLoadData,
cuModuleUnload,
cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags,
cuPointerGetAttribute,
cuStreamSynchronize,
cuPointerGetAttributes,
cuProfilerStart,
cuProfilerStop,
cuStreamBeginCapture_v2,
cuStreamCreateWithPriority,
cuStreamDestroy_v2,
cuStreamEndCapture,
cuStreamGetCaptureInfo_v2,
cuStreamIsCapturing,
cuStreamSynchronize,
cuStreamWaitEvent,
cuThreadExchangeStreamCaptureMode,
],
implemented_in_function <= [cuLaunchKernel,]
);

View file

@ -1390,6 +1390,8 @@ fn generate_display_cuda(
"cuStreamGetAttribute_ptsz",
"cuGraphKernelNodeGetAttribute",
"cuGraphKernelNodeSetAttribute",
"cuPointerGetAttribute",
"cuPointerGetAttributes",
];
let count_selectors = [
("cuCtxCreate_v3", 1, 2),

View file

@ -89,6 +89,12 @@ macro_rules! from_cuda_transmute {
Ok(x.cast::<$to>())
}
}
impl<'a, E: CudaErrorType> FromCuda<'a, *mut *const $from, E> for *mut *const $to {
fn from_cuda(x: &'a *mut *const $from) -> Result<Self, E> {
Ok(x.cast::<*const $to>())
}
}
)*
};
}
@ -110,6 +116,7 @@ macro_rules! from_cuda_object {
from_cuda_nop!(
*mut i8,
*mut i32,
*mut u64,
*mut usize,
*const f32,
*mut f32,
@ -134,7 +141,8 @@ from_cuda_nop!(
CUmodule,
CUcontext,
cublasHandle_t,
cublasStatus_t
cublasStatus_t,
CUlaunchConfig
);
from_cuda_transmute!(
CUuuid => hipUUID,
@ -142,7 +150,16 @@ from_cuda_transmute!(
CUfunction_attribute => hipFunction_attribute,
CUstream => hipStream_t,
CUpointer_attribute => hipPointer_attribute,
CUdeviceptr_v2 => hipDeviceptr_t
CUdeviceptr_v2 => hipDeviceptr_t,
CUevent => hipEvent_t,
// This is safe because HIP's enum is the subset of CUDA's enum and
// this type is used purely as a function result
CUstreamCaptureStatus => hipStreamCaptureStatus,
CUgraph => hipGraph_t,
CUstreamCaptureMode => hipStreamCaptureMode,
CUgraphNode => hipGraphNode_t,
CUgraphExec => hipGraphExec_t,
CUkernel => hipFunction_t
);
impl<'a, E: CudaErrorType> FromCuda<'a, CUlimit, E> for hipLimit_t {