mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-09-03 08:10:21 +00:00
Add more host-side functionality (#480)
This commit is contained in:
parent
de319f7c00
commit
62d340e4bd
18 changed files with 718 additions and 100 deletions
|
@ -199,8 +199,8 @@ impl VisitMut for FixFnSignatures {
|
|||
}
|
||||
|
||||
const MODULES: &[&str] = &[
|
||||
"context", "device", "driver", "function", "library", "link", "memory", "module", "pointer",
|
||||
"stream",
|
||||
"context", "device", "driver", "event", "function", "graph", "kernel",
|
||||
"library", "link", "memory", "module", "pointer", "stream"
|
||||
];
|
||||
|
||||
fn normalize_fn_impl(
|
||||
|
|
|
@ -10451,26 +10451,6 @@ pub fn write_cuMulticastGetGranularity(
|
|||
crate::CudaDisplay::write(&option, "cuMulticastGetGranularity", arg_idx, writer)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_cuPointerGetAttribute(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
data: *mut ::core::ffi::c_void,
|
||||
attribute: cuda_types::cuda::CUpointer_attribute,
|
||||
ptr: cuda_types::cuda::CUdeviceptr,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(data), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&data, "cuPointerGetAttribute", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(attribute), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&attribute, "cuPointerGetAttribute", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(ptr), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&ptr, "cuPointerGetAttribute", arg_idx, writer)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_cuMemPrefetchAsync_ptsz(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
devPtr: cuda_types::cuda::CUdeviceptr,
|
||||
|
@ -10666,36 +10646,6 @@ pub fn write_cuPointerSetAttribute(
|
|||
crate::CudaDisplay::write(&ptr, "cuPointerSetAttribute", arg_idx, writer)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_cuPointerGetAttributes(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
numAttributes: ::core::ffi::c_uint,
|
||||
attributes: *mut cuda_types::cuda::CUpointer_attribute,
|
||||
data: *mut *mut ::core::ffi::c_void,
|
||||
ptr: cuda_types::cuda::CUdeviceptr,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(numAttributes), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&numAttributes,
|
||||
"cuPointerGetAttributes",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(attributes), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&attributes, "cuPointerGetAttributes", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(data), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&data, "cuPointerGetAttributes", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(ptr), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&ptr, "cuPointerGetAttributes", arg_idx, writer)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_cuStreamCreate(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
phStream: *mut cuda_types::cuda::CUstream,
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use cuda_types::cuda::*;
|
||||
use std::{
|
||||
any::TypeId,
|
||||
ffi::{c_void, CStr},
|
||||
ffi::{c_ulonglong, c_void, CStr},
|
||||
fmt::LowerHex,
|
||||
mem, ptr, slice,
|
||||
};
|
||||
|
@ -887,6 +887,143 @@ pub fn write_cuGraphKernelNodeSetAttribute(
|
|||
write_cuGraphKernelNodeGetAttribute(writer, hNode, attr, value_out as *mut _)
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn write_cuPointerGetAttribute(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
data: *mut ::core::ffi::c_void,
|
||||
attribute: cuda_types::cuda::CUpointer_attribute,
|
||||
ptr: cuda_types::cuda::CUdeviceptr,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(data), ": ").as_bytes())?;
|
||||
write_attribute(writer, attribute, data)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(attribute), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&attribute, "cuPointerGetAttribute", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(ptr), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&ptr, "cuPointerGetAttribute", arg_idx, writer)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn write_cuPointerGetAttributes(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
numAttributes: ::core::ffi::c_uint,
|
||||
attributes: *mut cuda_types::cuda::CUpointer_attribute,
|
||||
data: *mut *mut ::core::ffi::c_void,
|
||||
ptr: cuda_types::cuda::CUdeviceptr,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(numAttributes), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&numAttributes,
|
||||
"cuPointerGetAttributes",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(attributes), ": ").as_bytes())?;
|
||||
let attributes = unsafe { std::slice::from_raw_parts(attributes, numAttributes as usize) };
|
||||
crate::CudaDisplay::write(attributes, "cuPointerGetAttributes", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(data), ": ").as_bytes())?;
|
||||
let data = unsafe { std::slice::from_raw_parts(data, numAttributes as usize) };
|
||||
writer.write_all(b"[")?;
|
||||
for (i, data_ptr) in data.iter().copied().enumerate() {
|
||||
if i != 0 {
|
||||
writer.write_all(b", ")?;
|
||||
}
|
||||
write_attribute(writer, attributes[i], data_ptr)?;
|
||||
}
|
||||
writer.write_all(b"]")?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(ptr), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&ptr, "cuPointerGetAttributes", arg_idx, writer)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
|
||||
fn write_attribute(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
attribute: cuda_types::cuda::CUpointer_attribute,
|
||||
data: *mut ::core::ffi::c_void,
|
||||
) -> std::io::Result<()> {
|
||||
match attribute {
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_CONTEXT => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const cuda_types::cuda::CUcontext) }, "", 0, writer)
|
||||
}
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_MEMORY_TYPE => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const cuda_types::cuda::CUmemorytype) }, "", 0, writer)
|
||||
}
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_DEVICE_POINTER => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const cuda_types::cuda::CUdeviceptr) }, "", 0, writer)
|
||||
}
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_HOST_POINTER => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const *mut ::core::ffi::c_void) }, "", 0, writer)
|
||||
}
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_P2P_TOKENS => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const cuda_types::cuda::CUDA_POINTER_ATTRIBUTE_P2P_TOKENS) }, "", 0, writer)
|
||||
}
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_SYNC_MEMOPS => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const bool) }, "", 0, writer)
|
||||
}
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_BUFFER_ID => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const c_ulonglong) }, "", 0, writer)
|
||||
}
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_IS_MANAGED => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const bool) }, "", 0, writer)
|
||||
}
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const i32) }, "", 0, writer)
|
||||
}
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const bool) }, "", 0, writer)
|
||||
}
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_RANGE_START_ADDR => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const usize) }, "", 0, writer)
|
||||
}
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_RANGE_SIZE => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const usize) }, "", 0, writer)
|
||||
}
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_MAPPED => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const bool) }, "", 0, writer)
|
||||
}
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const CUmemAllocationHandleType) }, "", 0, writer)
|
||||
}
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const bool) }, "", 0, writer)
|
||||
}
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_ACCESS_FLAGS => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS) }, "", 0, writer)
|
||||
}
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const CUmemoryPool) }, "", 0, writer)
|
||||
}
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_MAPPING_SIZE => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const usize) }, "", 0, writer)
|
||||
}
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_MAPPING_BASE_ADDR => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const usize) }, "", 0, writer)
|
||||
}
|
||||
// We don't know the type of the result
|
||||
// cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_MEMORY_BLOCK_ID => {
|
||||
// CudaDisplay::write(unsafe { &*(data as *const ???) }, "", 0, writer)
|
||||
// }
|
||||
cuda_types::cuda::CUpointer_attribute::CU_POINTER_ATTRIBUTE_IS_HW_DECOMPRESS_CAPABLE => {
|
||||
CudaDisplay::write(unsafe { &*(data as *const bool) }, "", 0, writer)
|
||||
}
|
||||
_ => writer.write_all(b"UNKNOWN ATTRIBUTE"),
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn write_cuStreamGetAttribute(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
|
|
|
@ -212,3 +212,28 @@ pub(crate) unsafe fn destroy_v2(ctx: CUcontext) -> CUresult {
|
|||
pub(crate) unsafe fn pop_current_v2(ctx: &mut CUcontext) -> CUresult {
|
||||
pop_current(ctx)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn get_stream_priority_range(
|
||||
least_priority: *mut ::core::ffi::c_int,
|
||||
greatest_priority: *mut ::core::ffi::c_int,
|
||||
) -> hipError_t {
|
||||
hipDeviceGetStreamPriorityRange(least_priority, greatest_priority)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn set_flags(flags: ::core::ffi::c_uint) -> CUresult {
|
||||
let cu_ctx = get_current_context()?;
|
||||
let ctx: &Context = FromCuda::<_, CUerror>::from_cuda(&cu_ctx)?;
|
||||
ctx.with_state_mut(|state| {
|
||||
state.flags = flags;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn get_api_version(
|
||||
_ctx: CUcontext,
|
||||
version: &mut ::core::ffi::c_uint,
|
||||
) -> CUresult {
|
||||
// That's what original CUDA driver does
|
||||
*version = 3020;
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -499,3 +499,21 @@ pub(crate) fn primary_context_reset(hip_dev: hipDevice_t) -> CUresult {
|
|||
ctx.with_state_mut(|state| state.reset())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn primary_context_get_state(
|
||||
dev: hipDevice_t,
|
||||
flags_out: &mut ::core::ffi::c_uint,
|
||||
active_out: &mut ::core::ffi::c_int,
|
||||
) -> CUresult {
|
||||
let (ctx, _) = get_primary_context(dev)?;
|
||||
let mut flags = 0u32;
|
||||
let mut active = 0i32;
|
||||
ctx.with_state_mut(|state| {
|
||||
flags = state.flags;
|
||||
active = (state.ref_count > 0) as i32;
|
||||
Ok(())
|
||||
})?;
|
||||
*flags_out = flags;
|
||||
*active_out = active;
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
use crate::r#impl::{context, device};
|
||||
use crate::r#impl::{context, device, function};
|
||||
use comgr::Comgr;
|
||||
use cuda_types::cuda::*;
|
||||
use hip_runtime_sys::*;
|
||||
use std::{
|
||||
collections::BTreeMap,
|
||||
ffi::{c_void, CStr, CString},
|
||||
mem, ptr, slice,
|
||||
sync::OnceLock,
|
||||
sync::{Mutex, OnceLock},
|
||||
usize,
|
||||
};
|
||||
use zluda_common::{FromCuda, LiveCheck};
|
||||
|
@ -19,6 +20,44 @@ pub(crate) struct GlobalState {
|
|||
pub comgr: Comgr,
|
||||
pub comgr_clang_version: String,
|
||||
pub cache_path: Option<String>,
|
||||
pub allocations: Mutex<Allocations>,
|
||||
}
|
||||
|
||||
pub(crate) struct Allocations {
|
||||
pub pointers: BTreeMap<usize, AllocationInfo>,
|
||||
}
|
||||
|
||||
impl Allocations {
|
||||
pub fn new() -> Self {
|
||||
Allocations {
|
||||
pointers: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, ptr: usize, size: usize, context: CUcontext) {
|
||||
self.pointers.insert(ptr, AllocationInfo { size, context });
|
||||
}
|
||||
|
||||
pub fn get_offset_and_info(&self, ptr: usize) -> Option<(usize, AllocationInfo)> {
|
||||
// Find last pair where `start <= ptr`
|
||||
let (start, alloc) = self.pointers.range(..=ptr).rev().next()?;
|
||||
// Check if allocation contains the pointer
|
||||
if start + alloc.size > ptr {
|
||||
Some((ptr - start, *alloc))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn remove(&mut self, ptr: usize) {
|
||||
self.pointers.remove(&ptr);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Debug)]
|
||||
pub(crate) struct AllocationInfo {
|
||||
pub size: usize,
|
||||
pub context: CUcontext,
|
||||
}
|
||||
|
||||
pub(crate) struct Device {
|
||||
|
@ -56,9 +95,11 @@ pub(crate) fn global_state() -> Result<&'static GlobalState, CUerror> {
|
|||
let comgr = Comgr::new().map_err(|_| CUerror::UNKNOWN)?;
|
||||
let comgr_clang_version =
|
||||
comgr::get_clang_version(&comgr).map_err(|_| CUerror::UNKNOWN)?;
|
||||
let allocations = Mutex::new(Allocations::new());
|
||||
Ok(GlobalState {
|
||||
comgr,
|
||||
comgr_clang_version,
|
||||
allocations,
|
||||
devices: (0..device_count)
|
||||
.map(|i| {
|
||||
let mut props = unsafe { mem::zeroed() };
|
||||
|
@ -430,3 +471,97 @@ pub(crate) fn profiler_start() -> CUresult {
|
|||
pub(crate) fn profiler_stop() -> CUresult {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn thread_exchange_stream_capture_mode(
|
||||
mode: *mut hipStreamCaptureMode,
|
||||
) -> hipError_t {
|
||||
hipThreadExchangeStreamCaptureMode(mode)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn occupancy_max_active_blocks_per_multiprocessor_with_flags(
|
||||
num_blocks: &mut ::core::ffi::c_int,
|
||||
func: hipFunction_t,
|
||||
block_size: ::core::ffi::c_int,
|
||||
dynamic_smem_size: usize,
|
||||
flags: ::core::ffi::c_uint,
|
||||
) -> hipError_t {
|
||||
hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
|
||||
num_blocks,
|
||||
func.0.cast(),
|
||||
block_size,
|
||||
dynamic_smem_size,
|
||||
flags,
|
||||
)?;
|
||||
*num_blocks = (*num_blocks).max(1);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn launch_kernel_ex(
|
||||
config: &cuda_types::cuda::CUlaunchConfig,
|
||||
f: hipFunction_t,
|
||||
kernel_params: *mut *mut ::core::ffi::c_void,
|
||||
extra: *mut *mut ::core::ffi::c_void,
|
||||
) -> CUresult {
|
||||
let attrs = std::slice::from_raw_parts(config.attrs, config.numAttrs as usize);
|
||||
if attrs.iter().any(|&attr| {
|
||||
!(attr.id == CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION
|
||||
&& attr.value.programmaticStreamSerializationAllowed == 0)
|
||||
}) {
|
||||
return CUresult::ERROR_NOT_SUPPORTED;
|
||||
}
|
||||
function::launch_kernel(
|
||||
f,
|
||||
config.gridDimX,
|
||||
config.gridDimY,
|
||||
config.gridDimZ,
|
||||
config.blockDimX,
|
||||
config.blockDimY,
|
||||
config.blockDimZ,
|
||||
config.sharedMemBytes,
|
||||
FromCuda::<_, CUerror>::from_cuda(&config.hStream)?,
|
||||
kernel_params,
|
||||
extra,
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::r#impl::driver::AllocationInfo;
|
||||
use cuda_types::cuda::CUcontext;
|
||||
|
||||
#[test]
|
||||
fn get_allocation() {
|
||||
let ctx1 = CUcontext(0x1234 as _);
|
||||
let ctx2 = CUcontext(0x5678 as _);
|
||||
let mut alloc_info = super::Allocations::new();
|
||||
alloc_info.insert(0x1000, 4, ctx1);
|
||||
alloc_info.insert(0x2000, 8, ctx2);
|
||||
for i in 0..4 {
|
||||
assert_eq!(
|
||||
alloc_info.get_offset_and_info(0x1000 + i),
|
||||
Some((
|
||||
i,
|
||||
AllocationInfo {
|
||||
size: 4,
|
||||
context: ctx1
|
||||
}
|
||||
))
|
||||
);
|
||||
}
|
||||
assert_eq!(alloc_info.get_offset_and_info(0x1000 + 4), None);
|
||||
for i in 0..8 {
|
||||
assert_eq!(
|
||||
alloc_info.get_offset_and_info(0x2000 + i),
|
||||
Some((
|
||||
i,
|
||||
AllocationInfo {
|
||||
size: 8,
|
||||
context: ctx2
|
||||
}
|
||||
))
|
||||
);
|
||||
}
|
||||
assert_eq!(alloc_info.get_offset_and_info(0x2000 + 8), None);
|
||||
}
|
||||
}
|
||||
|
|
22
zluda/src/impl/event.rs
Normal file
22
zluda/src/impl/event.rs
Normal file
|
@ -0,0 +1,22 @@
|
|||
use hip_runtime_sys::*;
|
||||
|
||||
pub(crate) unsafe fn create(event: *mut hipEvent_t, flags: ::core::ffi::c_uint) -> hipError_t {
|
||||
// Flag values are compatible between CUDA and HIP for 0,1,2,4
|
||||
hipEventCreateWithFlags(event, flags)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn query(event: hipEvent_t) -> hipError_t {
|
||||
hipEventQuery(event)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn destroy_v2(event: hipEvent_t) -> hipError_t {
|
||||
hipEventDestroy(event)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn record(event: hipEvent_t, stream: hipStream_t) -> hipError_t {
|
||||
hipEventRecord(event, stream)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn synchronize(event: hipEvent_t) -> hipError_t {
|
||||
hipEventSynchronize(event)
|
||||
}
|
|
@ -7,6 +7,14 @@ pub(crate) fn get_attribute(
|
|||
) -> hipError_t {
|
||||
// TODO: implement HIP_FUNC_ATTRIBUTE_PTX_VERSION
|
||||
// TODO: implement HIP_FUNC_ATTRIBUTE_BINARY_VERSION
|
||||
match cu_attrib {
|
||||
hipFunction_attribute::HIP_FUNC_ATTRIBUTE_PTX_VERSION
|
||||
| hipFunction_attribute::HIP_FUNC_ATTRIBUTE_BINARY_VERSION => {
|
||||
*pi = 120;
|
||||
return Ok(());
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
unsafe { hipFuncGetAttribute(pi, cu_attrib, func) }?;
|
||||
if cu_attrib == hipFunction_attribute::HIP_FUNC_ATTRIBUTE_NUM_REGS {
|
||||
*pi = (*pi).max(1);
|
||||
|
@ -44,3 +52,18 @@ pub(crate) fn launch_kernel(
|
|||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn set_attribute(
|
||||
func: hipFunction_t,
|
||||
attribute: hipFunction_attribute,
|
||||
value: i32,
|
||||
) -> hipError_t {
|
||||
match attribute {
|
||||
hipFunction_attribute::HIP_FUNC_ATTRIBUTE_PTX_VERSION
|
||||
| hipFunction_attribute::HIP_FUNC_ATTRIBUTE_BINARY_VERSION => {
|
||||
return hipError_t::ErrorNotSupported;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
hipFuncSetAttribute(func.0.cast(), hipFuncAttribute(attribute.0), value)
|
||||
}
|
||||
|
|
29
zluda/src/impl/graph.rs
Normal file
29
zluda/src/impl/graph.rs
Normal file
|
@ -0,0 +1,29 @@
|
|||
use hip_runtime_sys::*;
|
||||
|
||||
pub(crate) unsafe fn destroy(graph: hipGraph_t) -> hipError_t {
|
||||
hipGraphDestroy(graph)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn exec_destroy(graph_exec: hipGraphExec_t) -> hipError_t {
|
||||
hipGraphExecDestroy(graph_exec)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn get_nodes(
|
||||
graph: hipGraph_t,
|
||||
nodes: *mut hipGraphNode_t,
|
||||
num_nodes: *mut usize,
|
||||
) -> hipError_t {
|
||||
hipGraphGetNodes(graph, nodes, num_nodes)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn instantiate_with_flags(
|
||||
graph_exec: *mut hipGraphExec_t,
|
||||
graph: hipGraph_t,
|
||||
flags: u64,
|
||||
) -> hipError_t {
|
||||
hipGraphInstantiateWithFlags(graph_exec, graph, flags)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn launch(graph_exec: hipGraphExec_t, stream: hipStream_t) -> hipError_t {
|
||||
hipGraphLaunch(graph_exec, stream)
|
||||
}
|
18
zluda/src/impl/kernel.rs
Normal file
18
zluda/src/impl/kernel.rs
Normal file
|
@ -0,0 +1,18 @@
|
|||
use cuda_types::cuda::CUresult;
|
||||
use hip_runtime_sys::*;
|
||||
|
||||
use crate::r#impl::function;
|
||||
|
||||
pub(crate) unsafe fn get_function(func: &mut hipFunction_t, kernel: hipFunction_t) -> CUresult {
|
||||
*func = kernel;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn set_attribute(
|
||||
attrib: hipFunction_attribute,
|
||||
val: ::core::ffi::c_int,
|
||||
kernel: hipFunction_t,
|
||||
_dev: hipDevice_t,
|
||||
) -> hipError_t {
|
||||
function::set_attribute(kernel, attrib, val)
|
||||
}
|
|
@ -44,3 +44,20 @@ pub(crate) unsafe fn get_module(out: &mut CUmodule, library: &Library) -> CUresu
|
|||
*out = module::Module { base: library.base }.wrap();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn get_kernel(
|
||||
kernel: *mut hipFunction_t,
|
||||
library: &Library,
|
||||
name: *const ::core::ffi::c_char,
|
||||
) -> hipError_t {
|
||||
hipModuleGetFunction(kernel, library.base, name)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn get_global(
|
||||
dptr: *mut hipDeviceptr_t,
|
||||
bytes: *mut usize,
|
||||
library: &Library,
|
||||
name: *const ::core::ffi::c_char,
|
||||
) -> hipError_t {
|
||||
hipModuleGetGlobal(dptr, bytes, library.base, name)
|
||||
}
|
||||
|
|
|
@ -1,13 +1,23 @@
|
|||
use std::ptr;
|
||||
|
||||
use cuda_types::cuda::{CUerror, CUresult, CUresultConsts};
|
||||
use hip_runtime_sys::*;
|
||||
|
||||
pub(crate) fn alloc_v2(dptr: *mut hipDeviceptr_t, bytesize: usize) -> hipError_t {
|
||||
unsafe { hipMalloc(dptr.cast(), bytesize) }?;
|
||||
use crate::r#impl::{context, driver};
|
||||
|
||||
pub(crate) fn alloc_v2(dptr: &mut hipDeviceptr_t, bytesize: usize) -> CUresult {
|
||||
let context = context::get_current_context()?;
|
||||
unsafe { hipMalloc(ptr::from_mut(dptr).cast(), bytesize) }?;
|
||||
add_allocation(dptr.0, bytesize, context)?;
|
||||
// TODO: parametrize for non-Geekbench
|
||||
unsafe { hipMemsetD8(*dptr, 0, bytesize) }
|
||||
unsafe { hipMemsetD8(*dptr, 0, bytesize) }?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn free_v2(dptr: hipDeviceptr_t) -> hipError_t {
|
||||
unsafe { hipFree(dptr.0) }
|
||||
pub(crate) unsafe fn free_v2(dptr: hipDeviceptr_t) -> CUresult {
|
||||
let hip_result = hipFree(dptr.0);
|
||||
remove_allocation(dptr.0)?;
|
||||
Ok(hip_result?)
|
||||
}
|
||||
|
||||
pub(crate) fn copy_dto_h_v2(
|
||||
|
@ -45,14 +55,101 @@ pub(crate) fn get_info_v2(free: *mut usize, total: *mut usize) -> hipError_t {
|
|||
unsafe { hipMemGetInfo(free, total) }
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn free_host(ptr: *mut ::core::ffi::c_void) -> hipError_t {
|
||||
hipFreeHost(ptr)
|
||||
pub(crate) unsafe fn free_host(ptr: *mut ::core::ffi::c_void) -> CUresult {
|
||||
let hip_result = hipFreeHost(ptr);
|
||||
remove_allocation(ptr)?;
|
||||
Ok(hip_result?)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn host_alloc(
|
||||
pp: *mut *mut ::core::ffi::c_void,
|
||||
pp: &mut *mut ::core::ffi::c_void,
|
||||
bytesize: usize,
|
||||
flags: ::std::os::raw::c_uint,
|
||||
) -> hipError_t {
|
||||
hipHostMalloc(pp, bytesize, flags)
|
||||
) -> CUresult {
|
||||
let context = context::get_current_context()?;
|
||||
hipHostMalloc(pp, bytesize, flags)?;
|
||||
add_allocation(*pp, bytesize, context)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn add_allocation(
|
||||
dptr: *mut ::core::ffi::c_void,
|
||||
bytesize: usize,
|
||||
context: cuda_types::cuda::CUcontext,
|
||||
) -> Result<(), CUerror> {
|
||||
let global_state = driver::global_state()?;
|
||||
let mut allocations = global_state
|
||||
.allocations
|
||||
.lock()
|
||||
.map_err(|_| CUerror::UNKNOWN)?;
|
||||
allocations.insert(dptr as usize, bytesize, context);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn remove_allocation(ptr: *mut std::ffi::c_void) -> Result<(), CUerror> {
|
||||
let global_state = driver::global_state()?;
|
||||
let mut allocations = global_state
|
||||
.allocations
|
||||
.lock()
|
||||
.map_err(|_| CUerror::UNKNOWN)?;
|
||||
allocations.remove(ptr as usize);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn retain_allocation_handle(
|
||||
_handle: *mut cuda_types::cuda::CUmemGenericAllocationHandle,
|
||||
_addr: *mut ::core::ffi::c_void,
|
||||
) -> CUresult {
|
||||
CUresult::ERROR_NOT_SUPPORTED
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn copy_hto_d_async_v2(
|
||||
dst_device: hipDeviceptr_t,
|
||||
src_host: *const ::core::ffi::c_void,
|
||||
byte_count: usize,
|
||||
stream: hipStream_t,
|
||||
) -> hipError_t {
|
||||
hipMemcpyHtoDAsync(dst_device, src_host.cast_mut(), byte_count, stream)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn copy_dto_h_async_v2(
|
||||
dst_host: *mut ::core::ffi::c_void,
|
||||
src_device: hipDeviceptr_t,
|
||||
byte_count: usize,
|
||||
stream: hipStream_t,
|
||||
) -> hipError_t {
|
||||
hipMemcpyDtoHAsync(dst_host, src_device, byte_count, stream)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn copy_dto_d_async_v2(
|
||||
dst_device: hipDeviceptr_t,
|
||||
src_device: hipDeviceptr_t,
|
||||
byte_count: usize,
|
||||
stream: hipStream_t,
|
||||
) -> hipError_t {
|
||||
hipMemcpyDtoDAsync(dst_device, src_device, byte_count, stream)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn copy_async(
|
||||
dst: hipDeviceptr_t,
|
||||
src: hipDeviceptr_t,
|
||||
byte_count: usize,
|
||||
stream: hipStream_t,
|
||||
) -> hipError_t {
|
||||
hipMemcpyAsync(
|
||||
dst.0,
|
||||
src.0,
|
||||
byte_count,
|
||||
hipMemcpyKind::hipMemcpyDefault,
|
||||
stream,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn set_d8_async(
|
||||
dst_device: hipDeviceptr_t,
|
||||
uc: ::core::ffi::c_uchar,
|
||||
n: usize,
|
||||
stream: hipStream_t,
|
||||
) -> hipError_t {
|
||||
hipMemsetD8Async(dst_device, uc, n, stream)
|
||||
}
|
||||
|
|
|
@ -4,7 +4,10 @@ use zluda_common::from_cuda_object;
|
|||
pub(super) mod context;
|
||||
pub(super) mod device;
|
||||
pub(super) mod driver;
|
||||
pub(super) mod event;
|
||||
pub(super) mod function;
|
||||
pub(super) mod graph;
|
||||
pub(super) mod kernel;
|
||||
pub(super) mod library;
|
||||
pub(super) mod memory;
|
||||
pub(super) mod module;
|
||||
|
|
|
@ -2,39 +2,72 @@ use cuda_types::cuda::*;
|
|||
use hip_runtime_sys::*;
|
||||
use std::{ffi::c_void, ptr};
|
||||
|
||||
use crate::r#impl::driver;
|
||||
|
||||
// TODO: handlehipMemoryTypeUnregistered
|
||||
fn to_cu_memory_type(cu: hipMemoryType) -> Result<CUmemorytype, hipErrorCode_t> {
|
||||
match cu {
|
||||
hipMemoryType::hipMemoryTypeHost => Ok(CUmemorytype::CU_MEMORYTYPE_HOST),
|
||||
hipMemoryType::hipMemoryTypeDevice => Ok(CUmemorytype::CU_MEMORYTYPE_DEVICE),
|
||||
hipMemoryType::hipMemoryTypeArray => Ok(CUmemorytype::CU_MEMORYTYPE_ARRAY),
|
||||
// TODO: check if this is correct
|
||||
hipMemoryType::hipMemoryTypeManaged => Ok(CUmemorytype::CU_MEMORYTYPE_UNIFIED),
|
||||
hipMemoryType::hipMemoryTypeUnified => Ok(CUmemorytype::CU_MEMORYTYPE_UNIFIED),
|
||||
_ => Err(hipErrorCode_t::InvalidValue),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn get_attribute(
|
||||
data: *mut c_void,
|
||||
attribute: hipPointer_attribute,
|
||||
ptr: hipDeviceptr_t,
|
||||
) -> hipError_t {
|
||||
) -> CUresult {
|
||||
if data == ptr::null_mut() {
|
||||
return hipError_t::ErrorInvalidValue;
|
||||
return CUresult::ERROR_INVALID_VALUE;
|
||||
}
|
||||
match attribute {
|
||||
// TODO: implement by getting device ordinal & allocation start,
|
||||
// then go through every context for that device
|
||||
hipPointer_attribute::HIP_POINTER_ATTRIBUTE_CONTEXT => hipError_t::ErrorNotSupported,
|
||||
hipPointer_attribute::HIP_POINTER_ATTRIBUTE_MEMORY_TYPE => {
|
||||
let mut hip_result = hipMemoryType(0);
|
||||
hipPointerGetAttribute(
|
||||
(&mut hip_result as *mut hipMemoryType).cast::<c_void>(),
|
||||
attribute,
|
||||
ptr,
|
||||
)?;
|
||||
let cuda_result = memory_type(hip_result)?;
|
||||
unsafe { *(data.cast()) = cuda_result };
|
||||
hipPointer_attribute::HIP_POINTER_ATTRIBUTE_CONTEXT => {
|
||||
let globals = driver::global_state()?;
|
||||
let allocations = globals.allocations.lock().map_err(|_| CUerror::UNKNOWN)?;
|
||||
let (_, alloc) = allocations
|
||||
.get_offset_and_info(ptr.0 as usize)
|
||||
.ok_or(CUerror::INVALID_VALUE)?;
|
||||
unsafe { *(data.cast()) = alloc.context };
|
||||
Ok(())
|
||||
}
|
||||
hipPointer_attribute::HIP_POINTER_ATTRIBUTE_MEMORY_TYPE => {
|
||||
let mut memory_type = hipMemoryType(0);
|
||||
hipPointerGetAttribute(ptr::from_mut(&mut memory_type).cast(), attribute, ptr)?;
|
||||
unsafe { *(data.cast()) = to_cu_memory_type(memory_type)? };
|
||||
Ok(())
|
||||
}
|
||||
_ => {
|
||||
hipPointerGetAttribute(data, attribute, ptr)?;
|
||||
Ok(())
|
||||
}
|
||||
_ => unsafe { hipPointerGetAttribute(data, attribute, ptr) },
|
||||
}
|
||||
}
|
||||
|
||||
fn memory_type(cu: hipMemoryType) -> Result<CUmemorytype, hipErrorCode_t> {
|
||||
match cu {
|
||||
hipMemoryType::hipMemoryTypeHost => Ok(CUmemorytype::CU_MEMORYTYPE_HOST),
|
||||
hipMemoryType::hipMemoryTypeDevice => Ok(CUmemorytype::CU_MEMORYTYPE_DEVICE),
|
||||
hipMemoryType::hipMemoryTypeArray => Ok(CUmemorytype::CU_MEMORYTYPE_ARRAY),
|
||||
hipMemoryType::hipMemoryTypeUnified => Ok(CUmemorytype::CU_MEMORYTYPE_UNIFIED),
|
||||
_ => Err(hipErrorCode_t::InvalidValue),
|
||||
pub(crate) unsafe fn get_attributes(
|
||||
num_attributes: ::core::ffi::c_uint,
|
||||
attributes: &mut hipPointer_attribute,
|
||||
data: &mut *mut ::core::ffi::c_void,
|
||||
ptr: hipDeviceptr_t,
|
||||
) -> CUresult {
|
||||
hipDrvPointerGetAttributes(num_attributes, attributes, data, ptr)?;
|
||||
let attributes = std::slice::from_raw_parts_mut(attributes, num_attributes as usize);
|
||||
let data = std::slice::from_raw_parts_mut(data, num_attributes as usize);
|
||||
for (attr, data_ptr) in attributes.iter().copied().zip(data.iter().copied()) {
|
||||
match attr {
|
||||
hipPointer_attribute::HIP_POINTER_ATTRIBUTE_CONTEXT => {
|
||||
get_attribute(data_ptr, attr, ptr).ok();
|
||||
}
|
||||
hipPointer_attribute::HIP_POINTER_ATTRIBUTE_MEMORY_TYPE => {
|
||||
*(data_ptr.cast::<CUmemorytype>()) =
|
||||
to_cu_memory_type(*data_ptr.cast::<hipMemoryType>())?;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -3,3 +3,58 @@ use hip_runtime_sys::*;
|
|||
pub(crate) fn synchronize(stream: hipStream_t) -> hipError_t {
|
||||
unsafe { hipStreamSynchronize(stream) }
|
||||
}
|
||||
|
||||
pub(crate) fn create_with_priority(
|
||||
stream: *mut hipStream_t,
|
||||
flags: ::core::ffi::c_uint,
|
||||
priority: ::core::ffi::c_int,
|
||||
) -> hipError_t {
|
||||
unsafe { hipStreamCreateWithPriority(stream, flags, priority) }
|
||||
}
|
||||
|
||||
pub(crate) fn destroy_v2(stream: hipStream_t) -> hipError_t {
|
||||
unsafe { hipStreamDestroy(stream) }
|
||||
}
|
||||
|
||||
pub(crate) fn begin_capture_v2(stream: hipStream_t, mode: hipStreamCaptureMode) -> hipError_t {
|
||||
unsafe { hipStreamBeginCapture(stream, mode) }
|
||||
}
|
||||
|
||||
pub(crate) fn end_capture(stream: hipStream_t, graph: *mut hipGraph_t) -> hipError_t {
|
||||
unsafe { hipStreamEndCapture(stream, graph) }
|
||||
}
|
||||
|
||||
pub(crate) fn is_capturing(
|
||||
stream: hipStream_t,
|
||||
capture_status: *mut hipStreamCaptureStatus,
|
||||
) -> hipError_t {
|
||||
unsafe { hipStreamIsCapturing(stream, capture_status) }
|
||||
}
|
||||
|
||||
pub(crate) fn get_capture_info_v2(
|
||||
stream: hipStream_t,
|
||||
capture_status: *mut hipStreamCaptureStatus,
|
||||
id: *mut ::core::ffi::c_ulonglong,
|
||||
graph_out: *mut hipGraph_t,
|
||||
dependencies_out: *mut *const hipGraphNode_t,
|
||||
num_dependencies_out: *mut usize,
|
||||
) -> hipError_t {
|
||||
unsafe {
|
||||
hipStreamGetCaptureInfo_v2(
|
||||
stream,
|
||||
capture_status,
|
||||
id,
|
||||
graph_out,
|
||||
dependencies_out,
|
||||
num_dependencies_out,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn wait_event(
|
||||
stream: hipStream_t,
|
||||
event: hipEvent_t,
|
||||
flags: ::core::ffi::c_uint,
|
||||
) -> hipError_t {
|
||||
unsafe { hipStreamWaitEvent(stream, event, flags) }
|
||||
}
|
||||
|
|
|
@ -69,16 +69,19 @@ cuda_macros::cuda_function_declarations!(
|
|||
<= [
|
||||
cuCtxCreate_v2,
|
||||
cuCtxDestroy_v2,
|
||||
cuCtxGetLimit,
|
||||
cuCtxSetCurrent,
|
||||
cuCtxGetApiVersion,
|
||||
cuCtxGetCurrent,
|
||||
cuCtxGetDevice,
|
||||
cuCtxSetLimit,
|
||||
cuCtxSynchronize,
|
||||
cuCtxPushCurrent,
|
||||
cuCtxPushCurrent_v2,
|
||||
cuCtxGetLimit,
|
||||
cuCtxGetStreamPriorityRange,
|
||||
cuCtxPopCurrent,
|
||||
cuCtxPopCurrent_v2,
|
||||
cuCtxPushCurrent,
|
||||
cuCtxPushCurrent_v2,
|
||||
cuCtxSetCurrent,
|
||||
cuCtxSetFlags,
|
||||
cuCtxSetLimit,
|
||||
cuCtxSynchronize,
|
||||
cuDeviceComputeCapability,
|
||||
cuDeviceGet,
|
||||
cuDeviceGetAttribute,
|
||||
|
@ -88,38 +91,72 @@ cuda_macros::cuda_function_declarations!(
|
|||
cuDeviceGetProperties,
|
||||
cuDeviceGetUuid,
|
||||
cuDeviceGetUuid_v2,
|
||||
cuDevicePrimaryCtxGetState,
|
||||
cuDevicePrimaryCtxRelease,
|
||||
cuDevicePrimaryCtxRetain,
|
||||
cuDevicePrimaryCtxReset,
|
||||
cuDevicePrimaryCtxRetain,
|
||||
cuDeviceTotalMem_v2,
|
||||
cuDriverGetVersion,
|
||||
cuEventCreate,
|
||||
cuEventDestroy_v2,
|
||||
cuEventQuery,
|
||||
cuEventRecord,
|
||||
cuEventSynchronize,
|
||||
cuFuncGetAttribute,
|
||||
cuFuncSetAttribute,
|
||||
cuGetExportTable,
|
||||
cuGetProcAddress,
|
||||
cuGetProcAddress_v2,
|
||||
cuGraphDestroy,
|
||||
cuGraphExecDestroy,
|
||||
cuGraphGetNodes,
|
||||
cuGraphInstantiateWithFlags,
|
||||
cuGraphLaunch,
|
||||
cuInit,
|
||||
cuLibraryLoadData,
|
||||
cuKernelGetFunction,
|
||||
cuKernelSetAttribute,
|
||||
cuLaunchKernel,
|
||||
cuLaunchKernelEx,
|
||||
cuLibraryGetGlobal,
|
||||
cuLibraryGetKernel,
|
||||
cuLibraryGetModule,
|
||||
cuLibraryLoadData,
|
||||
cuLibraryUnload,
|
||||
cuMemAlloc_v2,
|
||||
cuMemFree_v2,
|
||||
cuMemHostAlloc,
|
||||
cuMemFreeHost,
|
||||
cuMemFree_v2,
|
||||
cuMemGetAddressRange_v2,
|
||||
cuMemGetInfo_v2,
|
||||
cuMemHostAlloc,
|
||||
cuMemRetainAllocationHandle,
|
||||
cuMemcpyAsync,
|
||||
cuMemcpyDtoDAsync_v2,
|
||||
cuMemcpyDtoHAsync_v2,
|
||||
cuMemcpyDtoH_v2,
|
||||
cuMemcpyHtoDAsync_v2,
|
||||
cuMemcpyHtoD_v2,
|
||||
cuMemsetD32_v2,
|
||||
cuMemsetD8Async,
|
||||
cuMemsetD8_v2,
|
||||
cuModuleGetFunction,
|
||||
cuModuleGetGlobal_v2,
|
||||
cuModuleGetLoadingMode,
|
||||
cuModuleLoadData,
|
||||
cuModuleUnload,
|
||||
cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags,
|
||||
cuPointerGetAttribute,
|
||||
cuStreamSynchronize,
|
||||
cuPointerGetAttributes,
|
||||
cuProfilerStart,
|
||||
cuProfilerStop,
|
||||
cuStreamBeginCapture_v2,
|
||||
cuStreamCreateWithPriority,
|
||||
cuStreamDestroy_v2,
|
||||
cuStreamEndCapture,
|
||||
cuStreamGetCaptureInfo_v2,
|
||||
cuStreamIsCapturing,
|
||||
cuStreamSynchronize,
|
||||
cuStreamWaitEvent,
|
||||
cuThreadExchangeStreamCaptureMode,
|
||||
],
|
||||
implemented_in_function <= [cuLaunchKernel,]
|
||||
);
|
||||
|
|
|
@ -1390,6 +1390,8 @@ fn generate_display_cuda(
|
|||
"cuStreamGetAttribute_ptsz",
|
||||
"cuGraphKernelNodeGetAttribute",
|
||||
"cuGraphKernelNodeSetAttribute",
|
||||
"cuPointerGetAttribute",
|
||||
"cuPointerGetAttributes",
|
||||
];
|
||||
let count_selectors = [
|
||||
("cuCtxCreate_v3", 1, 2),
|
||||
|
|
|
@ -89,6 +89,12 @@ macro_rules! from_cuda_transmute {
|
|||
Ok(x.cast::<$to>())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, E: CudaErrorType> FromCuda<'a, *mut *const $from, E> for *mut *const $to {
|
||||
fn from_cuda(x: &'a *mut *const $from) -> Result<Self, E> {
|
||||
Ok(x.cast::<*const $to>())
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
@ -110,6 +116,7 @@ macro_rules! from_cuda_object {
|
|||
from_cuda_nop!(
|
||||
*mut i8,
|
||||
*mut i32,
|
||||
*mut u64,
|
||||
*mut usize,
|
||||
*const f32,
|
||||
*mut f32,
|
||||
|
@ -134,7 +141,8 @@ from_cuda_nop!(
|
|||
CUmodule,
|
||||
CUcontext,
|
||||
cublasHandle_t,
|
||||
cublasStatus_t
|
||||
cublasStatus_t,
|
||||
CUlaunchConfig
|
||||
);
|
||||
from_cuda_transmute!(
|
||||
CUuuid => hipUUID,
|
||||
|
@ -142,7 +150,16 @@ from_cuda_transmute!(
|
|||
CUfunction_attribute => hipFunction_attribute,
|
||||
CUstream => hipStream_t,
|
||||
CUpointer_attribute => hipPointer_attribute,
|
||||
CUdeviceptr_v2 => hipDeviceptr_t
|
||||
CUdeviceptr_v2 => hipDeviceptr_t,
|
||||
CUevent => hipEvent_t,
|
||||
// This is safe because HIP's enum is the subset of CUDA's enum and
|
||||
// this type is used purely as a function result
|
||||
CUstreamCaptureStatus => hipStreamCaptureStatus,
|
||||
CUgraph => hipGraph_t,
|
||||
CUstreamCaptureMode => hipStreamCaptureMode,
|
||||
CUgraphNode => hipGraphNode_t,
|
||||
CUgraphExec => hipGraphExec_t,
|
||||
CUkernel => hipFunction_t
|
||||
);
|
||||
|
||||
impl<'a, E: CudaErrorType> FromCuda<'a, CUlimit, E> for hipLimit_t {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue