mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-09-03 08:10:21 +00:00
Add more NVML and cuBLAS coverage (#481)
This commit is contained in:
parent
62d340e4bd
commit
ec1358af1c
17 changed files with 5846 additions and 22 deletions
8
Cargo.lock
generated
8
Cargo.lock
generated
|
@ -455,6 +455,7 @@ dependencies = [
|
|||
"cuda_macros",
|
||||
"hip_runtime-sys",
|
||||
"rocblas-sys",
|
||||
"rocm_smi-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2737,6 +2738,10 @@ dependencies = [
|
|||
"hip_runtime-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rocm_smi-sys"
|
||||
version = "0.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "1.1.0"
|
||||
|
@ -3750,6 +3755,7 @@ version = "0.0.0"
|
|||
dependencies = [
|
||||
"cuda_macros",
|
||||
"cuda_types",
|
||||
"hip_runtime-sys",
|
||||
"rocblas-sys",
|
||||
"zluda_common",
|
||||
]
|
||||
|
@ -3817,6 +3823,8 @@ version = "0.0.0"
|
|||
dependencies = [
|
||||
"cuda_macros",
|
||||
"cuda_types",
|
||||
"rocm_smi-sys",
|
||||
"zluda_common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
@ -8498,4 +8498,125 @@ returned in \a sessionCount
|
|||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
state: *mut cuda_types::nvml::nvmlPowerSmoothingState_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlInit() -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlDeviceGetCount(
|
||||
deviceCount: *mut ::core::ffi::c_uint,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlDeviceGetHandleByIndex(
|
||||
index: ::core::ffi::c_uint,
|
||||
device: *mut cuda_types::nvml::nvmlDevice_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlDeviceGetHandleByPciBusId(
|
||||
pciBusId: *const ::core::ffi::c_char,
|
||||
device: *mut cuda_types::nvml::nvmlDevice_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlDeviceGetPciInfo(
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
pci: *mut cuda_types::nvml::nvmlPciInfo_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlDeviceGetPciInfo_v2(
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
pci: *mut cuda_types::nvml::nvmlPciInfo_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlDeviceGetNvLinkRemotePciInfo(
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
link: ::core::ffi::c_uint,
|
||||
pci: *mut cuda_types::nvml::nvmlPciInfo_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlDeviceGetGridLicensableFeatures(
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
pGridLicensableFeatures: *mut cuda_types::nvml::nvmlGridLicensableFeatures_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlDeviceGetGridLicensableFeatures_v2(
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
pGridLicensableFeatures: *mut cuda_types::nvml::nvmlGridLicensableFeatures_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlDeviceGetGridLicensableFeatures_v3(
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
pGridLicensableFeatures: *mut cuda_types::nvml::nvmlGridLicensableFeatures_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlDeviceRemoveGpu(
|
||||
pciInfo: *mut cuda_types::nvml::nvmlPciInfo_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlEventSetWait(
|
||||
set: cuda_types::nvml::nvmlEventSet_t,
|
||||
data: *mut cuda_types::nvml::nvmlEventData_t,
|
||||
timeoutms: ::core::ffi::c_uint,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlDeviceGetAttributes(
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
attributes: *mut cuda_types::nvml::nvmlDeviceAttributes_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlComputeInstanceGetInfo(
|
||||
computeInstance: cuda_types::nvml::nvmlComputeInstance_t,
|
||||
info: *mut cuda_types::nvml::nvmlComputeInstanceInfo_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlDeviceGetComputeRunningProcesses(
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
infoCount: *mut ::core::ffi::c_uint,
|
||||
infos: *mut cuda_types::nvml::nvmlProcessInfo_v1_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlDeviceGetComputeRunningProcesses_v2(
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
infoCount: *mut ::core::ffi::c_uint,
|
||||
infos: *mut cuda_types::nvml::nvmlProcessInfo_v2_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlDeviceGetGraphicsRunningProcesses(
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
infoCount: *mut ::core::ffi::c_uint,
|
||||
infos: *mut cuda_types::nvml::nvmlProcessInfo_v1_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlDeviceGetGraphicsRunningProcesses_v2(
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
infoCount: *mut ::core::ffi::c_uint,
|
||||
infos: *mut cuda_types::nvml::nvmlProcessInfo_v2_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlDeviceGetMPSComputeRunningProcesses(
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
infoCount: *mut ::core::ffi::c_uint,
|
||||
infos: *mut cuda_types::nvml::nvmlProcessInfo_v1_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlDeviceGetMPSComputeRunningProcesses_v2(
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
infoCount: *mut ::core::ffi::c_uint,
|
||||
infos: *mut cuda_types::nvml::nvmlProcessInfo_v2_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlDeviceGetGpuInstancePossiblePlacements(
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
profileId: ::core::ffi::c_uint,
|
||||
placements: *mut cuda_types::nvml::nvmlGpuInstancePlacement_t,
|
||||
count: *mut ::core::ffi::c_uint,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlVgpuInstanceGetLicenseInfo(
|
||||
vgpuInstance: cuda_types::nvml::nvmlVgpuInstance_t,
|
||||
licenseInfo: *mut cuda_types::nvml::nvmlVgpuLicenseInfo_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
#[must_use]
|
||||
fn nvmlDeviceGetDriverModel(
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
current: *mut cuda_types::nvml::nvmlDriverModel_t,
|
||||
pending: *mut cuda_types::nvml::nvmlDriverModel_t,
|
||||
) -> cuda_types::nvml::nvmlReturn_t;
|
||||
}
|
||||
|
|
|
@ -9,3 +9,6 @@ cuda_macros = { path = "../cuda_macros" }
|
|||
hip_runtime-sys = { path = "../ext/hip_runtime-sys" }
|
||||
bitflags = "2.9.1"
|
||||
rocblas-sys = { path = "../ext/rocblas-sys" }
|
||||
|
||||
[target.'cfg(unix)'.dependencies]
|
||||
rocm_smi-sys = { path = "../ext/rocm_smi-sys" }
|
||||
|
|
|
@ -4879,3 +4879,20 @@ pub type nvmlReturn_t = ::core::result::Result<(), nvmlError_t>;
|
|||
const _: fn() = || {
|
||||
let _ = std::mem::transmute::<nvmlReturn_t, u32>;
|
||||
};
|
||||
#[cfg(unix)]
|
||||
impl From<rocm_smi_sys::rsmi_error> for nvmlError_t {
|
||||
fn from(error: rocm_smi_sys::rsmi_error) -> Self {
|
||||
match error {
|
||||
rocm_smi_sys::rsmi_error::INVALID_ARGS => nvmlError_t::from(nvmlError_t::INVALID_ARGUMENT),
|
||||
rocm_smi_sys::rsmi_error::NOT_SUPPORTED => nvmlError_t::from(nvmlError_t::NOT_SUPPORTED),
|
||||
rocm_smi_sys::rsmi_error::PERMISSION => nvmlError_t::from(nvmlError_t::NO_PERMISSION),
|
||||
rocm_smi_sys::rsmi_error::INPUT_OUT_OF_BOUNDS => nvmlError_t::from(nvmlError_t::INVALID_ARGUMENT),
|
||||
rocm_smi_sys::rsmi_error::INIT_ERROR => nvmlError_t::from(nvmlError_t::UNINITIALIZED),
|
||||
rocm_smi_sys::rsmi_error::NOT_FOUND => nvmlError_t::from(nvmlError_t::GPU_NOT_FOUND),
|
||||
rocm_smi_sys::rsmi_error::INSUFFICIENT_SIZE => nvmlError_t::from(nvmlError_t::INSUFFICIENT_SIZE),
|
||||
rocm_smi_sys::rsmi_error::INTERRUPT => nvmlError_t::from(nvmlError_t::IRQ_ISSUE),
|
||||
rocm_smi_sys::rsmi_error::NO_DATA => nvmlError_t::from(nvmlError_t::NO_DATA),
|
||||
_ => nvmlError_t::from(nvmlError_t::UNKNOWN),
|
||||
}
|
||||
}
|
||||
}
|
7
ext/rocm_smi-sys/Cargo.toml
vendored
Normal file
7
ext/rocm_smi-sys/Cargo.toml
vendored
Normal file
|
@ -0,0 +1,7 @@
|
|||
[package]
|
||||
name = "rocm_smi-sys"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
4735
ext/rocm_smi-sys/src/lib.rs
vendored
Normal file
4735
ext/rocm_smi-sys/src/lib.rs
vendored
Normal file
File diff suppressed because it is too large
Load diff
|
@ -13103,6 +13103,566 @@ pub fn write_nvmlDevicePowerSmoothingSetState(
|
|||
)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlInit(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
writer.write_all(b"()")
|
||||
}
|
||||
pub fn write_nvmlDeviceGetCount(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
deviceCount: *mut ::core::ffi::c_uint,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(deviceCount), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&deviceCount, "nvmlDeviceGetCount", arg_idx, writer)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlDeviceGetHandleByIndex(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
index: ::core::ffi::c_uint,
|
||||
device: *mut cuda_types::nvml::nvmlDevice_t,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(index), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&index, "nvmlDeviceGetHandleByIndex", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&device, "nvmlDeviceGetHandleByIndex", arg_idx, writer)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlDeviceGetHandleByPciBusId(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
pciBusId: *const ::core::ffi::c_char,
|
||||
device: *mut cuda_types::nvml::nvmlDevice_t,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(pciBusId), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&pciBusId,
|
||||
"nvmlDeviceGetHandleByPciBusId",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&device,
|
||||
"nvmlDeviceGetHandleByPciBusId",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlDeviceGetPciInfo(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
pci: *mut cuda_types::nvml::nvmlPciInfo_t,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&device, "nvmlDeviceGetPciInfo", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(pci), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&pci, "nvmlDeviceGetPciInfo", arg_idx, writer)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlDeviceGetPciInfo_v2(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
pci: *mut cuda_types::nvml::nvmlPciInfo_t,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&device, "nvmlDeviceGetPciInfo_v2", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(pci), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&pci, "nvmlDeviceGetPciInfo_v2", arg_idx, writer)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlDeviceGetNvLinkRemotePciInfo(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
link: ::core::ffi::c_uint,
|
||||
pci: *mut cuda_types::nvml::nvmlPciInfo_t,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&device,
|
||||
"nvmlDeviceGetNvLinkRemotePciInfo",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(link), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&link,
|
||||
"nvmlDeviceGetNvLinkRemotePciInfo",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(pci), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&pci,
|
||||
"nvmlDeviceGetNvLinkRemotePciInfo",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlDeviceGetGridLicensableFeatures(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
pGridLicensableFeatures: *mut cuda_types::nvml::nvmlGridLicensableFeatures_t,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&device,
|
||||
"nvmlDeviceGetGridLicensableFeatures",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(pGridLicensableFeatures), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&pGridLicensableFeatures,
|
||||
"nvmlDeviceGetGridLicensableFeatures",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlDeviceGetGridLicensableFeatures_v2(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
pGridLicensableFeatures: *mut cuda_types::nvml::nvmlGridLicensableFeatures_t,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&device,
|
||||
"nvmlDeviceGetGridLicensableFeatures_v2",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(pGridLicensableFeatures), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&pGridLicensableFeatures,
|
||||
"nvmlDeviceGetGridLicensableFeatures_v2",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlDeviceGetGridLicensableFeatures_v3(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
pGridLicensableFeatures: *mut cuda_types::nvml::nvmlGridLicensableFeatures_t,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&device,
|
||||
"nvmlDeviceGetGridLicensableFeatures_v3",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(pGridLicensableFeatures), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&pGridLicensableFeatures,
|
||||
"nvmlDeviceGetGridLicensableFeatures_v3",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlDeviceRemoveGpu(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
pciInfo: *mut cuda_types::nvml::nvmlPciInfo_t,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(pciInfo), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&pciInfo, "nvmlDeviceRemoveGpu", arg_idx, writer)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlEventSetWait(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
set: cuda_types::nvml::nvmlEventSet_t,
|
||||
data: *mut cuda_types::nvml::nvmlEventData_t,
|
||||
timeoutms: ::core::ffi::c_uint,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(set), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&set, "nvmlEventSetWait", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(data), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&data, "nvmlEventSetWait", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(timeoutms), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&timeoutms, "nvmlEventSetWait", arg_idx, writer)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlDeviceGetAttributes(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
attributes: *mut cuda_types::nvml::nvmlDeviceAttributes_t,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&device, "nvmlDeviceGetAttributes", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(attributes), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&attributes, "nvmlDeviceGetAttributes", arg_idx, writer)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlComputeInstanceGetInfo(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
computeInstance: cuda_types::nvml::nvmlComputeInstance_t,
|
||||
info: *mut cuda_types::nvml::nvmlComputeInstanceInfo_t,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(computeInstance), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&computeInstance,
|
||||
"nvmlComputeInstanceGetInfo",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(info), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&info, "nvmlComputeInstanceGetInfo", arg_idx, writer)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlDeviceGetComputeRunningProcesses(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
infoCount: *mut ::core::ffi::c_uint,
|
||||
infos: *mut cuda_types::nvml::nvmlProcessInfo_v1_t,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&device,
|
||||
"nvmlDeviceGetComputeRunningProcesses",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(infoCount), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&infoCount,
|
||||
"nvmlDeviceGetComputeRunningProcesses",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(infos), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&infos,
|
||||
"nvmlDeviceGetComputeRunningProcesses",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlDeviceGetComputeRunningProcesses_v2(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
infoCount: *mut ::core::ffi::c_uint,
|
||||
infos: *mut cuda_types::nvml::nvmlProcessInfo_v2_t,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&device,
|
||||
"nvmlDeviceGetComputeRunningProcesses_v2",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(infoCount), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&infoCount,
|
||||
"nvmlDeviceGetComputeRunningProcesses_v2",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(infos), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&infos,
|
||||
"nvmlDeviceGetComputeRunningProcesses_v2",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlDeviceGetGraphicsRunningProcesses(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
infoCount: *mut ::core::ffi::c_uint,
|
||||
infos: *mut cuda_types::nvml::nvmlProcessInfo_v1_t,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&device,
|
||||
"nvmlDeviceGetGraphicsRunningProcesses",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(infoCount), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&infoCount,
|
||||
"nvmlDeviceGetGraphicsRunningProcesses",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(infos), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&infos,
|
||||
"nvmlDeviceGetGraphicsRunningProcesses",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlDeviceGetGraphicsRunningProcesses_v2(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
infoCount: *mut ::core::ffi::c_uint,
|
||||
infos: *mut cuda_types::nvml::nvmlProcessInfo_v2_t,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&device,
|
||||
"nvmlDeviceGetGraphicsRunningProcesses_v2",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(infoCount), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&infoCount,
|
||||
"nvmlDeviceGetGraphicsRunningProcesses_v2",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(infos), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&infos,
|
||||
"nvmlDeviceGetGraphicsRunningProcesses_v2",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlDeviceGetMPSComputeRunningProcesses(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
infoCount: *mut ::core::ffi::c_uint,
|
||||
infos: *mut cuda_types::nvml::nvmlProcessInfo_v1_t,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&device,
|
||||
"nvmlDeviceGetMPSComputeRunningProcesses",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(infoCount), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&infoCount,
|
||||
"nvmlDeviceGetMPSComputeRunningProcesses",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(infos), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&infos,
|
||||
"nvmlDeviceGetMPSComputeRunningProcesses",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlDeviceGetMPSComputeRunningProcesses_v2(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
infoCount: *mut ::core::ffi::c_uint,
|
||||
infos: *mut cuda_types::nvml::nvmlProcessInfo_v2_t,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&device,
|
||||
"nvmlDeviceGetMPSComputeRunningProcesses_v2",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(infoCount), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&infoCount,
|
||||
"nvmlDeviceGetMPSComputeRunningProcesses_v2",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(infos), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&infos,
|
||||
"nvmlDeviceGetMPSComputeRunningProcesses_v2",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlDeviceGetGpuInstancePossiblePlacements(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
profileId: ::core::ffi::c_uint,
|
||||
placements: *mut cuda_types::nvml::nvmlGpuInstancePlacement_t,
|
||||
count: *mut ::core::ffi::c_uint,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&device,
|
||||
"nvmlDeviceGetGpuInstancePossiblePlacements",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(profileId), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&profileId,
|
||||
"nvmlDeviceGetGpuInstancePossiblePlacements",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(placements), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&placements,
|
||||
"nvmlDeviceGetGpuInstancePossiblePlacements",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(count), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&count,
|
||||
"nvmlDeviceGetGpuInstancePossiblePlacements",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlVgpuInstanceGetLicenseInfo(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
vgpuInstance: cuda_types::nvml::nvmlVgpuInstance_t,
|
||||
licenseInfo: *mut cuda_types::nvml::nvmlVgpuLicenseInfo_t,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(vgpuInstance), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&vgpuInstance,
|
||||
"nvmlVgpuInstanceGetLicenseInfo",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(licenseInfo), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&licenseInfo,
|
||||
"nvmlVgpuInstanceGetLicenseInfo",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
pub fn write_nvmlDeviceGetDriverModel(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
device: cuda_types::nvml::nvmlDevice_t,
|
||||
current: *mut cuda_types::nvml::nvmlDriverModel_t,
|
||||
pending: *mut cuda_types::nvml::nvmlDriverModel_t,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&device, "nvmlDeviceGetDriverModel", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(current), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(¤t, "nvmlDeviceGetDriverModel", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(pending), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&pending, "nvmlDeviceGetDriverModel", arg_idx, writer)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
impl crate::CudaDisplay for cuda_types::nvml::nvmlReturn_t {
|
||||
fn write(
|
||||
&self,
|
||||
|
|
|
@ -29,6 +29,7 @@ fn main() {
|
|||
&["..", "ext", "hip_runtime-sys", "src", "lib.rs"],
|
||||
);
|
||||
generate_rocblas(&crate_root, &["..", "ext", "rocblas-sys", "src", "lib.rs"]);
|
||||
generate_rocm_smi(&crate_root, &["..", "ext", "rocm_smi-sys", "src", "lib.rs"]);
|
||||
let cuda_functions = generate_cuda(&crate_root);
|
||||
generate_process_address_table(&crate_root, cuda_functions);
|
||||
generate_ml(&crate_root);
|
||||
|
@ -179,6 +180,7 @@ fn generate_cufft(crate_root: &PathBuf) {
|
|||
&crate_root,
|
||||
&["..", "cuda_types", "src", "cufft.rs"],
|
||||
&module,
|
||||
None,
|
||||
);
|
||||
generate_display_perflib(
|
||||
Some(&result_options),
|
||||
|
@ -245,6 +247,7 @@ fn generate_cusparse(crate_root: &PathBuf) {
|
|||
&crate_root,
|
||||
&["..", "cuda_types", "src", "cusparse.rs"],
|
||||
&module,
|
||||
None,
|
||||
);
|
||||
generate_display_perflib(
|
||||
Some(&result_options),
|
||||
|
@ -685,6 +688,7 @@ fn generate_cublas(crate_root: &PathBuf) {
|
|||
&crate_root,
|
||||
&["..", "cuda_types", "src", "cublas.rs"],
|
||||
&module,
|
||||
None,
|
||||
);
|
||||
generate_display_perflib(
|
||||
Some(&result_options),
|
||||
|
@ -759,6 +763,7 @@ fn generate_cublaslt(crate_root: &PathBuf) {
|
|||
&crate_root,
|
||||
&["..", "cuda_types", "src", "cublaslt.rs"],
|
||||
&module_blas,
|
||||
None,
|
||||
);
|
||||
generate_display_perflib(
|
||||
None,
|
||||
|
@ -837,7 +842,10 @@ fn generate_ml(crate_root: &PathBuf) {
|
|||
.allowlist_var("^NVML.*")
|
||||
.must_use_type("nvmlReturn_t")
|
||||
.constified_enum("nvmlReturn_enum")
|
||||
.clang_args(["-I/usr/local/cuda/include"])
|
||||
.clang_args([
|
||||
"-I/usr/local/cuda/include",
|
||||
"-DNVML_NO_UNVERSIONED_FUNC_DEFS",
|
||||
])
|
||||
.generate()
|
||||
.unwrap()
|
||||
.to_string();
|
||||
|
@ -856,12 +864,31 @@ fn generate_ml(crate_root: &PathBuf) {
|
|||
success: ("NVML_SUCCESS", "SUCCESS"),
|
||||
hip_type: None,
|
||||
};
|
||||
let suffix =
|
||||
"#[cfg(unix)]
|
||||
impl From<rocm_smi_sys::rsmi_error> for nvmlError_t {
|
||||
fn from(error: rocm_smi_sys::rsmi_error) -> Self {
|
||||
match error {
|
||||
rocm_smi_sys::rsmi_error::INVALID_ARGS => nvmlError_t::from(nvmlError_t::INVALID_ARGUMENT),
|
||||
rocm_smi_sys::rsmi_error::NOT_SUPPORTED => nvmlError_t::from(nvmlError_t::NOT_SUPPORTED),
|
||||
rocm_smi_sys::rsmi_error::PERMISSION => nvmlError_t::from(nvmlError_t::NO_PERMISSION),
|
||||
rocm_smi_sys::rsmi_error::INPUT_OUT_OF_BOUNDS => nvmlError_t::from(nvmlError_t::INVALID_ARGUMENT),
|
||||
rocm_smi_sys::rsmi_error::INIT_ERROR => nvmlError_t::from(nvmlError_t::UNINITIALIZED),
|
||||
rocm_smi_sys::rsmi_error::NOT_FOUND => nvmlError_t::from(nvmlError_t::GPU_NOT_FOUND),
|
||||
rocm_smi_sys::rsmi_error::INSUFFICIENT_SIZE => nvmlError_t::from(nvmlError_t::INSUFFICIENT_SIZE),
|
||||
rocm_smi_sys::rsmi_error::INTERRUPT => nvmlError_t::from(nvmlError_t::IRQ_ISSUE),
|
||||
rocm_smi_sys::rsmi_error::NO_DATA => nvmlError_t::from(nvmlError_t::NO_DATA),
|
||||
_ => nvmlError_t::from(nvmlError_t::UNKNOWN),
|
||||
}
|
||||
}
|
||||
}";
|
||||
generate_types_library(
|
||||
Some(&result_options),
|
||||
None,
|
||||
&crate_root,
|
||||
&["..", "cuda_types", "src", "nvml.rs"],
|
||||
&module,
|
||||
Some(suffix),
|
||||
);
|
||||
generate_display_perflib(
|
||||
Some(&result_options),
|
||||
|
@ -878,6 +905,7 @@ fn generate_types_library(
|
|||
crate_root: &PathBuf,
|
||||
path: &[&str],
|
||||
module: &syn::File,
|
||||
suffix: Option<&str>,
|
||||
) {
|
||||
let module = generate_types_library_impl(result_options, module);
|
||||
let mut output = crate_root.clone();
|
||||
|
@ -895,6 +923,9 @@ fn generate_types_library(
|
|||
.replace(" cuDoubleComplex", " super::cuda::cuDoubleComplex");
|
||||
}
|
||||
}
|
||||
if let Some(suffix) = suffix {
|
||||
text.push_str(suffix);
|
||||
}
|
||||
write_rust_to_file(output, &text)
|
||||
}
|
||||
|
||||
|
@ -1038,6 +1069,35 @@ fn generate_rocblas(output: &PathBuf, path: &[&str]) {
|
|||
write_rust_to_file(output, text)
|
||||
}
|
||||
|
||||
fn generate_rocm_smi(output: &PathBuf, path: &[&str]) {
|
||||
let rocm_smi_header = new_builder()
|
||||
.header("/opt/rocm/include/rocm_smi/rocm_smi.h")
|
||||
.allowlist_type("^rsmi.*")
|
||||
.allowlist_function("^rsmi.*")
|
||||
.allowlist_var("^RSMI_.*")
|
||||
.must_use_type("rsmi_status_t")
|
||||
.constified_enum("rsmi_status_t")
|
||||
.clang_args(["-I/opt/rocm/include"])
|
||||
.generate()
|
||||
.unwrap()
|
||||
.to_string();
|
||||
let mut module: syn::File = syn::parse_str(&rocm_smi_header).unwrap();
|
||||
let result_options = ConvertIntoRustResultOptions {
|
||||
type_: "rsmi_status_t",
|
||||
underlying_type: "rsmi_status_t",
|
||||
new_error_type: "rsmi_error",
|
||||
error_prefix: ("RSMI_STATUS_", "ERROR_"),
|
||||
success: ("RSMI_STATUS_SUCCESS", "SUCCESS"),
|
||||
hip_type: None,
|
||||
};
|
||||
let mut converter = ConvertIntoRustResult::new(result_options);
|
||||
module.items = converter.convert(module.items).collect();
|
||||
converter.flush(&mut module.items);
|
||||
let mut output = output.clone();
|
||||
output.extend(path);
|
||||
write_rust_to_file(output, &prettyplease::unparse(&module))
|
||||
}
|
||||
|
||||
fn add_send_sync(items: &mut Vec<Item>, arg: &[&str]) {
|
||||
for type_ in arg {
|
||||
let type_ = Ident::new(type_, Span::call_site());
|
||||
|
|
|
@ -12,6 +12,7 @@ cuda_macros = { path = "../cuda_macros" }
|
|||
cuda_types = { path = "../cuda_types" }
|
||||
zluda_common = { path = "../zluda_common" }
|
||||
rocblas-sys = { path = "../ext/rocblas-sys" }
|
||||
hip_runtime-sys = { path = "../ext/hip_runtime-sys" }
|
||||
|
||||
[package.metadata.zluda]
|
||||
linux_symlinks = [
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
use std::mem;
|
||||
|
||||
use cuda_types::cublas::*;
|
||||
use zluda_common::{from_cuda_object, ZludaObject};
|
||||
|
||||
use hip_runtime_sys::*;
|
||||
use rocblas_sys::*;
|
||||
use std::mem;
|
||||
use zluda_common::{from_cuda_object, ZludaObject};
|
||||
|
||||
pub struct Handle {
|
||||
handle: rocblas_handle,
|
||||
|
@ -153,3 +152,71 @@ pub(crate) fn sgemm_v2(
|
|||
pub(crate) fn destroy_v2(handle: cublasHandle_t) -> cublasStatus_t {
|
||||
zluda_common::drop_checked::<Handle>(handle)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn set_stream_v2(handle: &Handle, stream: hipStream_t) -> rocblas_status {
|
||||
rocblas_set_stream(handle.handle, stream)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn set_workspace_v2(
|
||||
handle: &Handle,
|
||||
workspace: *mut ::core::ffi::c_void,
|
||||
size: usize,
|
||||
) -> rocblas_status {
|
||||
rocblas_set_workspace(handle.handle, workspace, size)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn get_math_mode(handle: &Handle, mode: &mut cublasMath_t) -> rocblas_status {
|
||||
let mut roc_mode = mem::zeroed();
|
||||
rocblas_get_math_mode(handle.handle, &mut roc_mode)?;
|
||||
*mode = zluda_common::FromCuda::from_cuda(&roc_mode)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn gemm_ex(
|
||||
handle: &Handle,
|
||||
transa: rocblas_operation,
|
||||
transb: rocblas_operation,
|
||||
m: ::core::ffi::c_int,
|
||||
n: ::core::ffi::c_int,
|
||||
k: ::core::ffi::c_int,
|
||||
alpha: *const ::core::ffi::c_void,
|
||||
a: *const ::core::ffi::c_void,
|
||||
a_type: rocblas_datatype,
|
||||
lda: ::core::ffi::c_int,
|
||||
b: *const ::core::ffi::c_void,
|
||||
b_type: rocblas_datatype,
|
||||
ldb: ::core::ffi::c_int,
|
||||
beta: *const ::core::ffi::c_void,
|
||||
c: *mut ::core::ffi::c_void,
|
||||
c_type: rocblas_datatype,
|
||||
ldc: ::core::ffi::c_int,
|
||||
compute_type: rocblas_datatype,
|
||||
algo: rocblas_gemm_algo,
|
||||
) -> rocblas_status {
|
||||
rocblas_gemm_ex(
|
||||
handle.handle,
|
||||
transa,
|
||||
transb,
|
||||
m,
|
||||
n,
|
||||
k,
|
||||
alpha,
|
||||
a,
|
||||
a_type,
|
||||
lda,
|
||||
b,
|
||||
b_type,
|
||||
ldb,
|
||||
beta,
|
||||
c,
|
||||
c_type,
|
||||
ldc,
|
||||
c,
|
||||
c_type,
|
||||
ldc,
|
||||
compute_type,
|
||||
algo,
|
||||
0,
|
||||
0,
|
||||
)
|
||||
}
|
||||
|
|
|
@ -47,10 +47,16 @@ cuda_macros::cublas_function_declarations!(
|
|||
implemented
|
||||
<= [
|
||||
cublasCreate_v2,
|
||||
cublasDestroy_v2,
|
||||
cublasGemmEx,
|
||||
cublasGetMathMode,
|
||||
cublasLtCreate,
|
||||
cublasLtDestroy,
|
||||
cublasSetMathMode,
|
||||
cublasSetStream_v2,
|
||||
cublasSetWorkspace_v2,
|
||||
cublasSgemmStridedBatched,
|
||||
cublasSgemm_v2,
|
||||
cublasDestroy_v2
|
||||
],
|
||||
implemented_and_always_succeeds
|
||||
<= [
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use cuda_types::{cublas::*, cuda::*};
|
||||
use cuda_types::{cublas::*, cuda::*, nvml::*};
|
||||
use hip_runtime_sys::*;
|
||||
use rocblas_sys::*;
|
||||
use std::{
|
||||
|
@ -22,6 +22,16 @@ impl CudaErrorType for cublasError_t {
|
|||
const NOT_SUPPORTED: Self = Self::NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
impl CudaErrorType for rocblas_error {
|
||||
const INVALID_VALUE: Self = Self::invalid_value;
|
||||
const NOT_SUPPORTED: Self = Self::not_implemented;
|
||||
}
|
||||
|
||||
impl CudaErrorType for nvmlError_t {
|
||||
const INVALID_VALUE: Self = Self::INVALID_ARGUMENT;
|
||||
const NOT_SUPPORTED: Self = Self::NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/// Used to try to convert CUDA API values into our internal representation.
|
||||
///
|
||||
/// Similar to [`TryFrom`], but we can implement this for primitive types. We also provide conversions from pointers to references.
|
||||
|
@ -142,7 +152,11 @@ from_cuda_nop!(
|
|||
CUcontext,
|
||||
cublasHandle_t,
|
||||
cublasStatus_t,
|
||||
CUlaunchConfig
|
||||
CUlaunchConfig,
|
||||
cublasMath_t,
|
||||
nvmlDevice_t,
|
||||
nvmlFieldValue_t,
|
||||
nvmlGpuFabricInfo_t
|
||||
);
|
||||
from_cuda_transmute!(
|
||||
CUuuid => hipUUID,
|
||||
|
@ -215,6 +229,76 @@ impl<'a, E: CudaErrorType> FromCuda<'a, cublasMath_t, E> for rocblas_math_mode {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a, E: CudaErrorType> FromCuda<'a, rocblas_math_mode, E> for cublasMath_t {
|
||||
fn from_cuda(mode: &'a rocblas_math_mode) -> Result<Self, E> {
|
||||
Ok(match *mode {
|
||||
rocblas_math_mode_::rocblas_default_math => cublasMath_t::CUBLAS_DEFAULT_MATH,
|
||||
rocblas_math_mode::rocblas_xf32_xdl_math_op => cublasMath_t::CUBLAS_TF32_TENSOR_OP_MATH,
|
||||
_ => return Err(E::NOT_SUPPORTED),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, E: CudaErrorType> FromCuda<'a, cuda_types::cublas::cudaDataType, E> for rocblas_datatype {
|
||||
fn from_cuda(mode: &'a cuda_types::cublas::cudaDataType) -> Result<Self, E> {
|
||||
Ok(match *mode {
|
||||
cudaDataType_t::CUDA_R_16F => rocblas_datatype::rocblas_datatype_f16_r,
|
||||
cudaDataType_t::CUDA_R_32F => rocblas_datatype::rocblas_datatype_f32_r,
|
||||
cudaDataType_t::CUDA_R_64F => rocblas_datatype::rocblas_datatype_f64_r,
|
||||
cudaDataType_t::CUDA_C_16F => rocblas_datatype::rocblas_datatype_f16_c,
|
||||
cudaDataType_t::CUDA_C_32F => rocblas_datatype::rocblas_datatype_f32_c,
|
||||
cudaDataType_t::CUDA_C_64F => rocblas_datatype::rocblas_datatype_f64_c,
|
||||
cudaDataType_t::CUDA_R_8I => rocblas_datatype::rocblas_datatype_i8_r,
|
||||
cudaDataType_t::CUDA_R_8U => rocblas_datatype::rocblas_datatype_u8_r,
|
||||
cudaDataType_t::CUDA_R_32I => rocblas_datatype::rocblas_datatype_i32_r,
|
||||
cudaDataType_t::CUDA_R_32U => rocblas_datatype::rocblas_datatype_u32_r,
|
||||
cudaDataType_t::CUDA_C_8I => rocblas_datatype::rocblas_datatype_i8_c,
|
||||
cudaDataType_t::CUDA_C_8U => rocblas_datatype::rocblas_datatype_u8_c,
|
||||
cudaDataType_t::CUDA_C_32I => rocblas_datatype::rocblas_datatype_i32_c,
|
||||
cudaDataType_t::CUDA_C_32U => rocblas_datatype::rocblas_datatype_u32_c,
|
||||
cudaDataType_t::CUDA_R_16BF => rocblas_datatype::rocblas_datatype_bf16_r,
|
||||
cudaDataType_t::CUDA_C_16BF => rocblas_datatype::rocblas_datatype_bf16_c,
|
||||
cudaDataType_t::CUDA_R_8F_UE4M3 => rocblas_datatype::rocblas_datatype_f8_r,
|
||||
cudaDataType_t::CUDA_R_8F_E5M2 => rocblas_datatype::rocblas_datatype_bf8_r,
|
||||
_ => return Err(E::NOT_SUPPORTED),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, E: CudaErrorType> FromCuda<'a, cuda_types::cublas::cublasComputeType_t, E>
|
||||
for rocblas_computetype
|
||||
{
|
||||
fn from_cuda(mode: &'a cuda_types::cublas::cublasComputeType_t) -> Result<Self, E> {
|
||||
Ok(match *mode {
|
||||
cublasComputeType_t::CUBLAS_COMPUTE_32F => {
|
||||
rocblas_computetype::rocblas_compute_type_f32
|
||||
}
|
||||
_ => return Err(E::NOT_SUPPORTED),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, E: CudaErrorType> FromCuda<'a, cuda_types::cublas::cublasComputeType_t, E>
|
||||
for rocblas_datatype
|
||||
{
|
||||
fn from_cuda(mode: &'a cuda_types::cublas::cublasComputeType_t) -> Result<Self, E> {
|
||||
Ok(match *mode {
|
||||
cublasComputeType_t::CUBLAS_COMPUTE_16F => rocblas_datatype::rocblas_datatype_f16_r,
|
||||
cublasComputeType_t::CUBLAS_COMPUTE_32F => rocblas_datatype::rocblas_datatype_f32_r,
|
||||
cublasComputeType_t::CUBLAS_COMPUTE_64F => rocblas_datatype::rocblas_datatype_f64_r,
|
||||
_ => return Err(E::NOT_SUPPORTED),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, E: CudaErrorType> FromCuda<'a, cuda_types::cublas::cublasGemmAlgo_t, E>
|
||||
for rocblas_gemm_algo
|
||||
{
|
||||
fn from_cuda(_: &'a cuda_types::cublas::cublasGemmAlgo_t) -> Result<Self, E> {
|
||||
Ok(rocblas_gemm_algo::rocblas_gemm_algo_standard)
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents an object that can be sent across the API boundary.
|
||||
///
|
||||
/// Some CUDA calls operate on an opaque handle. For example, `cuModuleLoadData` will load a
|
||||
|
|
|
@ -11,6 +11,10 @@ crate-type = ["cdylib"]
|
|||
[dependencies]
|
||||
cuda_macros = { path = "../cuda_macros" }
|
||||
cuda_types = { path = "../cuda_types" }
|
||||
zluda_common = { path = "../zluda_common" }
|
||||
|
||||
[target.'cfg(unix)'.dependencies]
|
||||
rocm_smi-sys = { path = "../ext/rocm_smi-sys" }
|
||||
|
||||
[package.metadata.zluda]
|
||||
linux_symlinks = [
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
use cuda_types::nvml::*;
|
||||
use std::{ffi::CStr, ptr};
|
||||
|
||||
const VERSION: &'static CStr = c"550.77";
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
pub(crate) fn unimplemented() -> nvmlReturn_t {
|
||||
unimplemented!()
|
||||
|
@ -11,20 +13,10 @@ pub(crate) fn unimplemented() -> nvmlReturn_t {
|
|||
nvmlReturn_t::ERROR_NOT_SUPPORTED
|
||||
}
|
||||
|
||||
pub(crate) fn error_string(_result: cuda_types::nvml::nvmlReturn_t) -> *const ::core::ffi::c_char {
|
||||
c"".as_ptr()
|
||||
}
|
||||
|
||||
pub(crate) fn init_v2() -> cuda_types::nvml::nvmlReturn_t {
|
||||
nvmlReturn_t::SUCCESS
|
||||
}
|
||||
|
||||
const VERSION: &'static CStr = c"550.77";
|
||||
|
||||
pub(crate) fn system_get_driver_version(
|
||||
result: *mut ::core::ffi::c_char,
|
||||
length: ::core::ffi::c_uint,
|
||||
) -> cuda_types::nvml::nvmlReturn_t {
|
||||
) -> nvmlReturn_t {
|
||||
if result == ptr::null_mut() {
|
||||
return nvmlReturn_t::ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
@ -37,3 +29,7 @@ pub(crate) fn system_get_driver_version(
|
|||
}
|
||||
nvmlReturn_t::SUCCESS
|
||||
}
|
||||
|
||||
pub(crate) fn error_string(_result: nvmlReturn_t) -> *const ::core::ffi::c_char {
|
||||
c"".as_ptr()
|
||||
}
|
77
zluda_ml/src/impl_unix.rs
Normal file
77
zluda_ml/src/impl_unix.rs
Normal file
|
@ -0,0 +1,77 @@
|
|||
use cuda_types::nvml::*;
|
||||
use rocm_smi_sys::*;
|
||||
use std::mem;
|
||||
use zluda_common::{from_cuda_object, ZludaObject};
|
||||
|
||||
pub(crate) use crate::impl_common::error_string;
|
||||
pub(crate) use crate::impl_common::system_get_driver_version;
|
||||
|
||||
pub(crate) struct Device {
|
||||
_index: u32,
|
||||
}
|
||||
|
||||
impl ZludaObject for Device {
|
||||
const COOKIE: usize = 0x79443851e7cee0d9;
|
||||
|
||||
type Error = nvmlError_t;
|
||||
type CudaHandle = nvmlDevice_t;
|
||||
|
||||
fn drop_checked(&mut self) -> nvmlReturn_t {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
from_cuda_object!(Device);
|
||||
|
||||
pub(crate) unsafe fn init() -> rsmi_status_t {
|
||||
rsmi_init(0)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn init_v2() -> rsmi_status_t {
|
||||
rsmi_init(0)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn init_with_flags(_flags: ::core::ffi::c_uint) -> rsmi_status_t {
|
||||
rsmi_init(0)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn shutdown() -> rsmi_status_t {
|
||||
rsmi_shut_down()
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn device_get_count_v2(device_count: &mut ::core::ffi::c_uint) -> rsmi_status_t {
|
||||
rsmi_num_monitor_devices(device_count)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn device_get_field_values(
|
||||
_device: &Device,
|
||||
values_count: ::core::ffi::c_int,
|
||||
values: &mut cuda_types::nvml::nvmlFieldValue_t,
|
||||
) -> nvmlReturn_t {
|
||||
for field in std::slice::from_raw_parts_mut(values, values_count as usize) {
|
||||
get_field_value(field)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
unsafe fn get_field_value(field: &mut nvmlFieldValue_st) -> Result<(), nvmlError_t> {
|
||||
*field = mem::zeroed();
|
||||
field.nvmlReturn = nvmlReturn_t::ERROR_NOT_SUPPORTED;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn device_get_gpu_fabric_info(
|
||||
_device: &Device,
|
||||
gpu_fabric_info: &mut cuda_types::nvml::nvmlGpuFabricInfo_t,
|
||||
) -> nvmlReturn_t {
|
||||
*gpu_fabric_info = mem::zeroed();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn device_get_handle_by_index_v2(
|
||||
index: ::core::ffi::c_uint,
|
||||
device: &mut cuda_types::nvml::nvmlDevice_t,
|
||||
) -> nvmlReturn_t {
|
||||
*device = Device { _index: index }.wrap();
|
||||
nvmlReturn_t::SUCCESS
|
||||
}
|
50
zluda_ml/src/impl_win.rs
Normal file
50
zluda_ml/src/impl_win.rs
Normal file
|
@ -0,0 +1,50 @@
|
|||
use cuda_types::nvml::*;
|
||||
|
||||
pub(crate) use crate::impl_common::error_string;
|
||||
pub(crate) use crate::impl_common::system_get_driver_version;
|
||||
|
||||
pub(crate) unsafe fn init() -> nvmlReturn_t {
|
||||
crate::impl_common::unimplemented()
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn init_v2() -> nvmlReturn_t {
|
||||
crate::impl_common::unimplemented()
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn init_with_flags(_flags: ::core::ffi::c_uint) -> nvmlReturn_t {
|
||||
crate::impl_common::unimplemented()
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn shutdown() -> nvmlReturn_t {
|
||||
crate::impl_common::unimplemented()
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn device_get_count_v2(_device_count: &mut ::core::ffi::c_uint) -> nvmlReturn_t {
|
||||
crate::impl_common::unimplemented()
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn device_get_field_values(
|
||||
_device: cuda_types::nvml::nvmlDevice_t,
|
||||
_values_count: ::core::ffi::c_int,
|
||||
_values: &mut cuda_types::nvml::nvmlFieldValue_t,
|
||||
) -> nvmlReturn_t {
|
||||
crate::impl_common::unimplemented()
|
||||
}
|
||||
|
||||
unsafe fn get_field_value(_field: &mut nvmlFieldValue_st) -> Result<(), nvmlError_t> {
|
||||
crate::impl_common::unimplemented()
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn device_get_gpu_fabric_info(
|
||||
_device: cuda_types::nvml::nvmlDevice_t,
|
||||
_gpu_fabric_info: &mut cuda_types::nvml::nvmlGpuFabricInfo_t,
|
||||
) -> nvmlReturn_t {
|
||||
crate::impl_common::unimplemented()
|
||||
}
|
||||
|
||||
pub(crate) fn device_get_handle_by_index_v2(
|
||||
_index: ::core::ffi::c_uint,
|
||||
_device: &mut cuda_types::nvml::nvmlDevice_t,
|
||||
) -> nvmlReturn_t {
|
||||
crate::impl_common::unimplemented()
|
||||
}
|
|
@ -1,4 +1,7 @@
|
|||
#[cfg_attr(windows, path = "impl_win.rs")]
|
||||
#[cfg_attr(unix, path = "impl_unix.rs")]
|
||||
mod r#impl;
|
||||
mod impl_common;
|
||||
|
||||
macro_rules! unimplemented_fn {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||
|
@ -6,13 +9,26 @@ macro_rules! unimplemented_fn {
|
|||
#[no_mangle]
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
pub extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||
r#impl::unimplemented()
|
||||
impl_common::unimplemented()
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! implemented_fn {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||
$(
|
||||
#[no_mangle]
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||
cuda_macros::nvml_normalize_fn!( crate::r#impl::$fn_name ) ( $( zluda_common::FromCuda::<_, cuda_types::nvml::nvmlError_t>::from_cuda(&$arg_id )?),*)?;
|
||||
Ok(())
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! implemented_unnormalized {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||
$(
|
||||
#[no_mangle]
|
||||
|
@ -26,5 +42,17 @@ macro_rules! implemented_fn {
|
|||
|
||||
cuda_macros::nvml_function_declarations!(
|
||||
unimplemented_fn,
|
||||
implemented_fn <= [nvmlErrorString, nvmlInit_v2, nvmlSystemGetDriverVersion]
|
||||
implemented_fn
|
||||
<= [
|
||||
nvmlDeviceGetCount_v2,
|
||||
nvmlDeviceGetFieldValues,
|
||||
nvmlDeviceGetGpuFabricInfo,
|
||||
nvmlDeviceGetHandleByIndex_v2,
|
||||
nvmlInit,
|
||||
nvmlInitWithFlags,
|
||||
nvmlInit_v2,
|
||||
nvmlShutdown,
|
||||
nvmlSystemGetDriverVersion,
|
||||
],
|
||||
implemented_unnormalized <= [nvmlErrorString,]
|
||||
);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue