Add more NVML and cuBLAS coverage (#481)
Some checks are pending
ZLUDA / Build (Linux) (push) Waiting to run
ZLUDA / Build (Windows) (push) Waiting to run
ZLUDA / Build AMD GPU unit tests (push) Waiting to run
ZLUDA / Run AMD GPU unit tests (push) Blocked by required conditions

This commit is contained in:
Andrzej Janik 2025-08-26 21:55:12 +02:00 committed by GitHub
commit ec1358af1c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 5846 additions and 22 deletions

8
Cargo.lock generated
View file

@ -455,6 +455,7 @@ dependencies = [
"cuda_macros",
"hip_runtime-sys",
"rocblas-sys",
"rocm_smi-sys",
]
[[package]]
@ -2737,6 +2738,10 @@ dependencies = [
"hip_runtime-sys",
]
[[package]]
name = "rocm_smi-sys"
version = "0.0.0"
[[package]]
name = "rustc-hash"
version = "1.1.0"
@ -3750,6 +3755,7 @@ version = "0.0.0"
dependencies = [
"cuda_macros",
"cuda_types",
"hip_runtime-sys",
"rocblas-sys",
"zluda_common",
]
@ -3817,6 +3823,8 @@ version = "0.0.0"
dependencies = [
"cuda_macros",
"cuda_types",
"rocm_smi-sys",
"zluda_common",
]
[[package]]

View file

@ -8498,4 +8498,125 @@ returned in \a sessionCount
device: cuda_types::nvml::nvmlDevice_t,
state: *mut cuda_types::nvml::nvmlPowerSmoothingState_t,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlInit() -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlDeviceGetCount(
deviceCount: *mut ::core::ffi::c_uint,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlDeviceGetHandleByIndex(
index: ::core::ffi::c_uint,
device: *mut cuda_types::nvml::nvmlDevice_t,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlDeviceGetHandleByPciBusId(
pciBusId: *const ::core::ffi::c_char,
device: *mut cuda_types::nvml::nvmlDevice_t,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlDeviceGetPciInfo(
device: cuda_types::nvml::nvmlDevice_t,
pci: *mut cuda_types::nvml::nvmlPciInfo_t,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlDeviceGetPciInfo_v2(
device: cuda_types::nvml::nvmlDevice_t,
pci: *mut cuda_types::nvml::nvmlPciInfo_t,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlDeviceGetNvLinkRemotePciInfo(
device: cuda_types::nvml::nvmlDevice_t,
link: ::core::ffi::c_uint,
pci: *mut cuda_types::nvml::nvmlPciInfo_t,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlDeviceGetGridLicensableFeatures(
device: cuda_types::nvml::nvmlDevice_t,
pGridLicensableFeatures: *mut cuda_types::nvml::nvmlGridLicensableFeatures_t,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlDeviceGetGridLicensableFeatures_v2(
device: cuda_types::nvml::nvmlDevice_t,
pGridLicensableFeatures: *mut cuda_types::nvml::nvmlGridLicensableFeatures_t,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlDeviceGetGridLicensableFeatures_v3(
device: cuda_types::nvml::nvmlDevice_t,
pGridLicensableFeatures: *mut cuda_types::nvml::nvmlGridLicensableFeatures_t,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlDeviceRemoveGpu(
pciInfo: *mut cuda_types::nvml::nvmlPciInfo_t,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlEventSetWait(
set: cuda_types::nvml::nvmlEventSet_t,
data: *mut cuda_types::nvml::nvmlEventData_t,
timeoutms: ::core::ffi::c_uint,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlDeviceGetAttributes(
device: cuda_types::nvml::nvmlDevice_t,
attributes: *mut cuda_types::nvml::nvmlDeviceAttributes_t,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlComputeInstanceGetInfo(
computeInstance: cuda_types::nvml::nvmlComputeInstance_t,
info: *mut cuda_types::nvml::nvmlComputeInstanceInfo_t,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlDeviceGetComputeRunningProcesses(
device: cuda_types::nvml::nvmlDevice_t,
infoCount: *mut ::core::ffi::c_uint,
infos: *mut cuda_types::nvml::nvmlProcessInfo_v1_t,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlDeviceGetComputeRunningProcesses_v2(
device: cuda_types::nvml::nvmlDevice_t,
infoCount: *mut ::core::ffi::c_uint,
infos: *mut cuda_types::nvml::nvmlProcessInfo_v2_t,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlDeviceGetGraphicsRunningProcesses(
device: cuda_types::nvml::nvmlDevice_t,
infoCount: *mut ::core::ffi::c_uint,
infos: *mut cuda_types::nvml::nvmlProcessInfo_v1_t,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlDeviceGetGraphicsRunningProcesses_v2(
device: cuda_types::nvml::nvmlDevice_t,
infoCount: *mut ::core::ffi::c_uint,
infos: *mut cuda_types::nvml::nvmlProcessInfo_v2_t,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlDeviceGetMPSComputeRunningProcesses(
device: cuda_types::nvml::nvmlDevice_t,
infoCount: *mut ::core::ffi::c_uint,
infos: *mut cuda_types::nvml::nvmlProcessInfo_v1_t,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlDeviceGetMPSComputeRunningProcesses_v2(
device: cuda_types::nvml::nvmlDevice_t,
infoCount: *mut ::core::ffi::c_uint,
infos: *mut cuda_types::nvml::nvmlProcessInfo_v2_t,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlDeviceGetGpuInstancePossiblePlacements(
device: cuda_types::nvml::nvmlDevice_t,
profileId: ::core::ffi::c_uint,
placements: *mut cuda_types::nvml::nvmlGpuInstancePlacement_t,
count: *mut ::core::ffi::c_uint,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlVgpuInstanceGetLicenseInfo(
vgpuInstance: cuda_types::nvml::nvmlVgpuInstance_t,
licenseInfo: *mut cuda_types::nvml::nvmlVgpuLicenseInfo_t,
) -> cuda_types::nvml::nvmlReturn_t;
#[must_use]
fn nvmlDeviceGetDriverModel(
device: cuda_types::nvml::nvmlDevice_t,
current: *mut cuda_types::nvml::nvmlDriverModel_t,
pending: *mut cuda_types::nvml::nvmlDriverModel_t,
) -> cuda_types::nvml::nvmlReturn_t;
}

View file

@ -9,3 +9,6 @@ cuda_macros = { path = "../cuda_macros" }
hip_runtime-sys = { path = "../ext/hip_runtime-sys" }
bitflags = "2.9.1"
rocblas-sys = { path = "../ext/rocblas-sys" }
[target.'cfg(unix)'.dependencies]
rocm_smi-sys = { path = "../ext/rocm_smi-sys" }

View file

@ -4879,3 +4879,20 @@ pub type nvmlReturn_t = ::core::result::Result<(), nvmlError_t>;
const _: fn() = || {
let _ = std::mem::transmute::<nvmlReturn_t, u32>;
};
#[cfg(unix)]
impl From<rocm_smi_sys::rsmi_error> for nvmlError_t {
fn from(error: rocm_smi_sys::rsmi_error) -> Self {
match error {
rocm_smi_sys::rsmi_error::INVALID_ARGS => nvmlError_t::from(nvmlError_t::INVALID_ARGUMENT),
rocm_smi_sys::rsmi_error::NOT_SUPPORTED => nvmlError_t::from(nvmlError_t::NOT_SUPPORTED),
rocm_smi_sys::rsmi_error::PERMISSION => nvmlError_t::from(nvmlError_t::NO_PERMISSION),
rocm_smi_sys::rsmi_error::INPUT_OUT_OF_BOUNDS => nvmlError_t::from(nvmlError_t::INVALID_ARGUMENT),
rocm_smi_sys::rsmi_error::INIT_ERROR => nvmlError_t::from(nvmlError_t::UNINITIALIZED),
rocm_smi_sys::rsmi_error::NOT_FOUND => nvmlError_t::from(nvmlError_t::GPU_NOT_FOUND),
rocm_smi_sys::rsmi_error::INSUFFICIENT_SIZE => nvmlError_t::from(nvmlError_t::INSUFFICIENT_SIZE),
rocm_smi_sys::rsmi_error::INTERRUPT => nvmlError_t::from(nvmlError_t::IRQ_ISSUE),
rocm_smi_sys::rsmi_error::NO_DATA => nvmlError_t::from(nvmlError_t::NO_DATA),
_ => nvmlError_t::from(nvmlError_t::UNKNOWN),
}
}
}

7
ext/rocm_smi-sys/Cargo.toml vendored Normal file
View file

@ -0,0 +1,7 @@
[package]
name = "rocm_smi-sys"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2021"
[lib]

4735
ext/rocm_smi-sys/src/lib.rs vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -13103,6 +13103,566 @@ pub fn write_nvmlDevicePowerSmoothingSetState(
)?;
writer.write_all(b")")
}
pub fn write_nvmlInit(
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
writer.write_all(b"()")
}
pub fn write_nvmlDeviceGetCount(
writer: &mut (impl std::io::Write + ?Sized),
deviceCount: *mut ::core::ffi::c_uint,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(deviceCount), ": ").as_bytes())?;
crate::CudaDisplay::write(&deviceCount, "nvmlDeviceGetCount", arg_idx, writer)?;
writer.write_all(b")")
}
pub fn write_nvmlDeviceGetHandleByIndex(
writer: &mut (impl std::io::Write + ?Sized),
index: ::core::ffi::c_uint,
device: *mut cuda_types::nvml::nvmlDevice_t,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(index), ": ").as_bytes())?;
crate::CudaDisplay::write(&index, "nvmlDeviceGetHandleByIndex", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
crate::CudaDisplay::write(&device, "nvmlDeviceGetHandleByIndex", arg_idx, writer)?;
writer.write_all(b")")
}
pub fn write_nvmlDeviceGetHandleByPciBusId(
writer: &mut (impl std::io::Write + ?Sized),
pciBusId: *const ::core::ffi::c_char,
device: *mut cuda_types::nvml::nvmlDevice_t,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(pciBusId), ": ").as_bytes())?;
crate::CudaDisplay::write(
&pciBusId,
"nvmlDeviceGetHandleByPciBusId",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
crate::CudaDisplay::write(
&device,
"nvmlDeviceGetHandleByPciBusId",
arg_idx,
writer,
)?;
writer.write_all(b")")
}
pub fn write_nvmlDeviceGetPciInfo(
writer: &mut (impl std::io::Write + ?Sized),
device: cuda_types::nvml::nvmlDevice_t,
pci: *mut cuda_types::nvml::nvmlPciInfo_t,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
crate::CudaDisplay::write(&device, "nvmlDeviceGetPciInfo", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(pci), ": ").as_bytes())?;
crate::CudaDisplay::write(&pci, "nvmlDeviceGetPciInfo", arg_idx, writer)?;
writer.write_all(b")")
}
pub fn write_nvmlDeviceGetPciInfo_v2(
writer: &mut (impl std::io::Write + ?Sized),
device: cuda_types::nvml::nvmlDevice_t,
pci: *mut cuda_types::nvml::nvmlPciInfo_t,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
crate::CudaDisplay::write(&device, "nvmlDeviceGetPciInfo_v2", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(pci), ": ").as_bytes())?;
crate::CudaDisplay::write(&pci, "nvmlDeviceGetPciInfo_v2", arg_idx, writer)?;
writer.write_all(b")")
}
pub fn write_nvmlDeviceGetNvLinkRemotePciInfo(
writer: &mut (impl std::io::Write + ?Sized),
device: cuda_types::nvml::nvmlDevice_t,
link: ::core::ffi::c_uint,
pci: *mut cuda_types::nvml::nvmlPciInfo_t,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
crate::CudaDisplay::write(
&device,
"nvmlDeviceGetNvLinkRemotePciInfo",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(link), ": ").as_bytes())?;
crate::CudaDisplay::write(
&link,
"nvmlDeviceGetNvLinkRemotePciInfo",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(pci), ": ").as_bytes())?;
crate::CudaDisplay::write(
&pci,
"nvmlDeviceGetNvLinkRemotePciInfo",
arg_idx,
writer,
)?;
writer.write_all(b")")
}
pub fn write_nvmlDeviceGetGridLicensableFeatures(
writer: &mut (impl std::io::Write + ?Sized),
device: cuda_types::nvml::nvmlDevice_t,
pGridLicensableFeatures: *mut cuda_types::nvml::nvmlGridLicensableFeatures_t,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
crate::CudaDisplay::write(
&device,
"nvmlDeviceGetGridLicensableFeatures",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(pGridLicensableFeatures), ": ").as_bytes())?;
crate::CudaDisplay::write(
&pGridLicensableFeatures,
"nvmlDeviceGetGridLicensableFeatures",
arg_idx,
writer,
)?;
writer.write_all(b")")
}
pub fn write_nvmlDeviceGetGridLicensableFeatures_v2(
writer: &mut (impl std::io::Write + ?Sized),
device: cuda_types::nvml::nvmlDevice_t,
pGridLicensableFeatures: *mut cuda_types::nvml::nvmlGridLicensableFeatures_t,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
crate::CudaDisplay::write(
&device,
"nvmlDeviceGetGridLicensableFeatures_v2",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(pGridLicensableFeatures), ": ").as_bytes())?;
crate::CudaDisplay::write(
&pGridLicensableFeatures,
"nvmlDeviceGetGridLicensableFeatures_v2",
arg_idx,
writer,
)?;
writer.write_all(b")")
}
pub fn write_nvmlDeviceGetGridLicensableFeatures_v3(
writer: &mut (impl std::io::Write + ?Sized),
device: cuda_types::nvml::nvmlDevice_t,
pGridLicensableFeatures: *mut cuda_types::nvml::nvmlGridLicensableFeatures_t,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
crate::CudaDisplay::write(
&device,
"nvmlDeviceGetGridLicensableFeatures_v3",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(pGridLicensableFeatures), ": ").as_bytes())?;
crate::CudaDisplay::write(
&pGridLicensableFeatures,
"nvmlDeviceGetGridLicensableFeatures_v3",
arg_idx,
writer,
)?;
writer.write_all(b")")
}
pub fn write_nvmlDeviceRemoveGpu(
writer: &mut (impl std::io::Write + ?Sized),
pciInfo: *mut cuda_types::nvml::nvmlPciInfo_t,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(pciInfo), ": ").as_bytes())?;
crate::CudaDisplay::write(&pciInfo, "nvmlDeviceRemoveGpu", arg_idx, writer)?;
writer.write_all(b")")
}
pub fn write_nvmlEventSetWait(
writer: &mut (impl std::io::Write + ?Sized),
set: cuda_types::nvml::nvmlEventSet_t,
data: *mut cuda_types::nvml::nvmlEventData_t,
timeoutms: ::core::ffi::c_uint,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(set), ": ").as_bytes())?;
crate::CudaDisplay::write(&set, "nvmlEventSetWait", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(data), ": ").as_bytes())?;
crate::CudaDisplay::write(&data, "nvmlEventSetWait", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(timeoutms), ": ").as_bytes())?;
crate::CudaDisplay::write(&timeoutms, "nvmlEventSetWait", arg_idx, writer)?;
writer.write_all(b")")
}
pub fn write_nvmlDeviceGetAttributes(
writer: &mut (impl std::io::Write + ?Sized),
device: cuda_types::nvml::nvmlDevice_t,
attributes: *mut cuda_types::nvml::nvmlDeviceAttributes_t,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
crate::CudaDisplay::write(&device, "nvmlDeviceGetAttributes", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(attributes), ": ").as_bytes())?;
crate::CudaDisplay::write(&attributes, "nvmlDeviceGetAttributes", arg_idx, writer)?;
writer.write_all(b")")
}
pub fn write_nvmlComputeInstanceGetInfo(
writer: &mut (impl std::io::Write + ?Sized),
computeInstance: cuda_types::nvml::nvmlComputeInstance_t,
info: *mut cuda_types::nvml::nvmlComputeInstanceInfo_t,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(computeInstance), ": ").as_bytes())?;
crate::CudaDisplay::write(
&computeInstance,
"nvmlComputeInstanceGetInfo",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(info), ": ").as_bytes())?;
crate::CudaDisplay::write(&info, "nvmlComputeInstanceGetInfo", arg_idx, writer)?;
writer.write_all(b")")
}
pub fn write_nvmlDeviceGetComputeRunningProcesses(
writer: &mut (impl std::io::Write + ?Sized),
device: cuda_types::nvml::nvmlDevice_t,
infoCount: *mut ::core::ffi::c_uint,
infos: *mut cuda_types::nvml::nvmlProcessInfo_v1_t,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
crate::CudaDisplay::write(
&device,
"nvmlDeviceGetComputeRunningProcesses",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(infoCount), ": ").as_bytes())?;
crate::CudaDisplay::write(
&infoCount,
"nvmlDeviceGetComputeRunningProcesses",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(infos), ": ").as_bytes())?;
crate::CudaDisplay::write(
&infos,
"nvmlDeviceGetComputeRunningProcesses",
arg_idx,
writer,
)?;
writer.write_all(b")")
}
pub fn write_nvmlDeviceGetComputeRunningProcesses_v2(
writer: &mut (impl std::io::Write + ?Sized),
device: cuda_types::nvml::nvmlDevice_t,
infoCount: *mut ::core::ffi::c_uint,
infos: *mut cuda_types::nvml::nvmlProcessInfo_v2_t,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
crate::CudaDisplay::write(
&device,
"nvmlDeviceGetComputeRunningProcesses_v2",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(infoCount), ": ").as_bytes())?;
crate::CudaDisplay::write(
&infoCount,
"nvmlDeviceGetComputeRunningProcesses_v2",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(infos), ": ").as_bytes())?;
crate::CudaDisplay::write(
&infos,
"nvmlDeviceGetComputeRunningProcesses_v2",
arg_idx,
writer,
)?;
writer.write_all(b")")
}
pub fn write_nvmlDeviceGetGraphicsRunningProcesses(
writer: &mut (impl std::io::Write + ?Sized),
device: cuda_types::nvml::nvmlDevice_t,
infoCount: *mut ::core::ffi::c_uint,
infos: *mut cuda_types::nvml::nvmlProcessInfo_v1_t,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
crate::CudaDisplay::write(
&device,
"nvmlDeviceGetGraphicsRunningProcesses",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(infoCount), ": ").as_bytes())?;
crate::CudaDisplay::write(
&infoCount,
"nvmlDeviceGetGraphicsRunningProcesses",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(infos), ": ").as_bytes())?;
crate::CudaDisplay::write(
&infos,
"nvmlDeviceGetGraphicsRunningProcesses",
arg_idx,
writer,
)?;
writer.write_all(b")")
}
pub fn write_nvmlDeviceGetGraphicsRunningProcesses_v2(
writer: &mut (impl std::io::Write + ?Sized),
device: cuda_types::nvml::nvmlDevice_t,
infoCount: *mut ::core::ffi::c_uint,
infos: *mut cuda_types::nvml::nvmlProcessInfo_v2_t,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
crate::CudaDisplay::write(
&device,
"nvmlDeviceGetGraphicsRunningProcesses_v2",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(infoCount), ": ").as_bytes())?;
crate::CudaDisplay::write(
&infoCount,
"nvmlDeviceGetGraphicsRunningProcesses_v2",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(infos), ": ").as_bytes())?;
crate::CudaDisplay::write(
&infos,
"nvmlDeviceGetGraphicsRunningProcesses_v2",
arg_idx,
writer,
)?;
writer.write_all(b")")
}
pub fn write_nvmlDeviceGetMPSComputeRunningProcesses(
writer: &mut (impl std::io::Write + ?Sized),
device: cuda_types::nvml::nvmlDevice_t,
infoCount: *mut ::core::ffi::c_uint,
infos: *mut cuda_types::nvml::nvmlProcessInfo_v1_t,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
crate::CudaDisplay::write(
&device,
"nvmlDeviceGetMPSComputeRunningProcesses",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(infoCount), ": ").as_bytes())?;
crate::CudaDisplay::write(
&infoCount,
"nvmlDeviceGetMPSComputeRunningProcesses",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(infos), ": ").as_bytes())?;
crate::CudaDisplay::write(
&infos,
"nvmlDeviceGetMPSComputeRunningProcesses",
arg_idx,
writer,
)?;
writer.write_all(b")")
}
pub fn write_nvmlDeviceGetMPSComputeRunningProcesses_v2(
writer: &mut (impl std::io::Write + ?Sized),
device: cuda_types::nvml::nvmlDevice_t,
infoCount: *mut ::core::ffi::c_uint,
infos: *mut cuda_types::nvml::nvmlProcessInfo_v2_t,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
crate::CudaDisplay::write(
&device,
"nvmlDeviceGetMPSComputeRunningProcesses_v2",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(infoCount), ": ").as_bytes())?;
crate::CudaDisplay::write(
&infoCount,
"nvmlDeviceGetMPSComputeRunningProcesses_v2",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(infos), ": ").as_bytes())?;
crate::CudaDisplay::write(
&infos,
"nvmlDeviceGetMPSComputeRunningProcesses_v2",
arg_idx,
writer,
)?;
writer.write_all(b")")
}
pub fn write_nvmlDeviceGetGpuInstancePossiblePlacements(
writer: &mut (impl std::io::Write + ?Sized),
device: cuda_types::nvml::nvmlDevice_t,
profileId: ::core::ffi::c_uint,
placements: *mut cuda_types::nvml::nvmlGpuInstancePlacement_t,
count: *mut ::core::ffi::c_uint,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
crate::CudaDisplay::write(
&device,
"nvmlDeviceGetGpuInstancePossiblePlacements",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(profileId), ": ").as_bytes())?;
crate::CudaDisplay::write(
&profileId,
"nvmlDeviceGetGpuInstancePossiblePlacements",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(placements), ": ").as_bytes())?;
crate::CudaDisplay::write(
&placements,
"nvmlDeviceGetGpuInstancePossiblePlacements",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(count), ": ").as_bytes())?;
crate::CudaDisplay::write(
&count,
"nvmlDeviceGetGpuInstancePossiblePlacements",
arg_idx,
writer,
)?;
writer.write_all(b")")
}
pub fn write_nvmlVgpuInstanceGetLicenseInfo(
writer: &mut (impl std::io::Write + ?Sized),
vgpuInstance: cuda_types::nvml::nvmlVgpuInstance_t,
licenseInfo: *mut cuda_types::nvml::nvmlVgpuLicenseInfo_t,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(vgpuInstance), ": ").as_bytes())?;
crate::CudaDisplay::write(
&vgpuInstance,
"nvmlVgpuInstanceGetLicenseInfo",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(licenseInfo), ": ").as_bytes())?;
crate::CudaDisplay::write(
&licenseInfo,
"nvmlVgpuInstanceGetLicenseInfo",
arg_idx,
writer,
)?;
writer.write_all(b")")
}
pub fn write_nvmlDeviceGetDriverModel(
writer: &mut (impl std::io::Write + ?Sized),
device: cuda_types::nvml::nvmlDevice_t,
current: *mut cuda_types::nvml::nvmlDriverModel_t,
pending: *mut cuda_types::nvml::nvmlDriverModel_t,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(device), ": ").as_bytes())?;
crate::CudaDisplay::write(&device, "nvmlDeviceGetDriverModel", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(current), ": ").as_bytes())?;
crate::CudaDisplay::write(&current, "nvmlDeviceGetDriverModel", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(pending), ": ").as_bytes())?;
crate::CudaDisplay::write(&pending, "nvmlDeviceGetDriverModel", arg_idx, writer)?;
writer.write_all(b")")
}
impl crate::CudaDisplay for cuda_types::nvml::nvmlReturn_t {
fn write(
&self,

View file

@ -29,6 +29,7 @@ fn main() {
&["..", "ext", "hip_runtime-sys", "src", "lib.rs"],
);
generate_rocblas(&crate_root, &["..", "ext", "rocblas-sys", "src", "lib.rs"]);
generate_rocm_smi(&crate_root, &["..", "ext", "rocm_smi-sys", "src", "lib.rs"]);
let cuda_functions = generate_cuda(&crate_root);
generate_process_address_table(&crate_root, cuda_functions);
generate_ml(&crate_root);
@ -179,6 +180,7 @@ fn generate_cufft(crate_root: &PathBuf) {
&crate_root,
&["..", "cuda_types", "src", "cufft.rs"],
&module,
None,
);
generate_display_perflib(
Some(&result_options),
@ -245,6 +247,7 @@ fn generate_cusparse(crate_root: &PathBuf) {
&crate_root,
&["..", "cuda_types", "src", "cusparse.rs"],
&module,
None,
);
generate_display_perflib(
Some(&result_options),
@ -685,6 +688,7 @@ fn generate_cublas(crate_root: &PathBuf) {
&crate_root,
&["..", "cuda_types", "src", "cublas.rs"],
&module,
None,
);
generate_display_perflib(
Some(&result_options),
@ -759,6 +763,7 @@ fn generate_cublaslt(crate_root: &PathBuf) {
&crate_root,
&["..", "cuda_types", "src", "cublaslt.rs"],
&module_blas,
None,
);
generate_display_perflib(
None,
@ -837,7 +842,10 @@ fn generate_ml(crate_root: &PathBuf) {
.allowlist_var("^NVML.*")
.must_use_type("nvmlReturn_t")
.constified_enum("nvmlReturn_enum")
.clang_args(["-I/usr/local/cuda/include"])
.clang_args([
"-I/usr/local/cuda/include",
"-DNVML_NO_UNVERSIONED_FUNC_DEFS",
])
.generate()
.unwrap()
.to_string();
@ -856,12 +864,31 @@ fn generate_ml(crate_root: &PathBuf) {
success: ("NVML_SUCCESS", "SUCCESS"),
hip_type: None,
};
let suffix =
"#[cfg(unix)]
impl From<rocm_smi_sys::rsmi_error> for nvmlError_t {
fn from(error: rocm_smi_sys::rsmi_error) -> Self {
match error {
rocm_smi_sys::rsmi_error::INVALID_ARGS => nvmlError_t::from(nvmlError_t::INVALID_ARGUMENT),
rocm_smi_sys::rsmi_error::NOT_SUPPORTED => nvmlError_t::from(nvmlError_t::NOT_SUPPORTED),
rocm_smi_sys::rsmi_error::PERMISSION => nvmlError_t::from(nvmlError_t::NO_PERMISSION),
rocm_smi_sys::rsmi_error::INPUT_OUT_OF_BOUNDS => nvmlError_t::from(nvmlError_t::INVALID_ARGUMENT),
rocm_smi_sys::rsmi_error::INIT_ERROR => nvmlError_t::from(nvmlError_t::UNINITIALIZED),
rocm_smi_sys::rsmi_error::NOT_FOUND => nvmlError_t::from(nvmlError_t::GPU_NOT_FOUND),
rocm_smi_sys::rsmi_error::INSUFFICIENT_SIZE => nvmlError_t::from(nvmlError_t::INSUFFICIENT_SIZE),
rocm_smi_sys::rsmi_error::INTERRUPT => nvmlError_t::from(nvmlError_t::IRQ_ISSUE),
rocm_smi_sys::rsmi_error::NO_DATA => nvmlError_t::from(nvmlError_t::NO_DATA),
_ => nvmlError_t::from(nvmlError_t::UNKNOWN),
}
}
}";
generate_types_library(
Some(&result_options),
None,
&crate_root,
&["..", "cuda_types", "src", "nvml.rs"],
&module,
Some(suffix),
);
generate_display_perflib(
Some(&result_options),
@ -878,6 +905,7 @@ fn generate_types_library(
crate_root: &PathBuf,
path: &[&str],
module: &syn::File,
suffix: Option<&str>,
) {
let module = generate_types_library_impl(result_options, module);
let mut output = crate_root.clone();
@ -895,6 +923,9 @@ fn generate_types_library(
.replace(" cuDoubleComplex", " super::cuda::cuDoubleComplex");
}
}
if let Some(suffix) = suffix {
text.push_str(suffix);
}
write_rust_to_file(output, &text)
}
@ -1038,6 +1069,35 @@ fn generate_rocblas(output: &PathBuf, path: &[&str]) {
write_rust_to_file(output, text)
}
fn generate_rocm_smi(output: &PathBuf, path: &[&str]) {
let rocm_smi_header = new_builder()
.header("/opt/rocm/include/rocm_smi/rocm_smi.h")
.allowlist_type("^rsmi.*")
.allowlist_function("^rsmi.*")
.allowlist_var("^RSMI_.*")
.must_use_type("rsmi_status_t")
.constified_enum("rsmi_status_t")
.clang_args(["-I/opt/rocm/include"])
.generate()
.unwrap()
.to_string();
let mut module: syn::File = syn::parse_str(&rocm_smi_header).unwrap();
let result_options = ConvertIntoRustResultOptions {
type_: "rsmi_status_t",
underlying_type: "rsmi_status_t",
new_error_type: "rsmi_error",
error_prefix: ("RSMI_STATUS_", "ERROR_"),
success: ("RSMI_STATUS_SUCCESS", "SUCCESS"),
hip_type: None,
};
let mut converter = ConvertIntoRustResult::new(result_options);
module.items = converter.convert(module.items).collect();
converter.flush(&mut module.items);
let mut output = output.clone();
output.extend(path);
write_rust_to_file(output, &prettyplease::unparse(&module))
}
fn add_send_sync(items: &mut Vec<Item>, arg: &[&str]) {
for type_ in arg {
let type_ = Ident::new(type_, Span::call_site());

View file

@ -12,6 +12,7 @@ cuda_macros = { path = "../cuda_macros" }
cuda_types = { path = "../cuda_types" }
zluda_common = { path = "../zluda_common" }
rocblas-sys = { path = "../ext/rocblas-sys" }
hip_runtime-sys = { path = "../ext/hip_runtime-sys" }
[package.metadata.zluda]
linux_symlinks = [

View file

@ -1,9 +1,8 @@
use std::mem;
use cuda_types::cublas::*;
use zluda_common::{from_cuda_object, ZludaObject};
use hip_runtime_sys::*;
use rocblas_sys::*;
use std::mem;
use zluda_common::{from_cuda_object, ZludaObject};
pub struct Handle {
handle: rocblas_handle,
@ -153,3 +152,71 @@ pub(crate) fn sgemm_v2(
pub(crate) fn destroy_v2(handle: cublasHandle_t) -> cublasStatus_t {
zluda_common::drop_checked::<Handle>(handle)
}
pub(crate) unsafe fn set_stream_v2(handle: &Handle, stream: hipStream_t) -> rocblas_status {
rocblas_set_stream(handle.handle, stream)
}
pub(crate) unsafe fn set_workspace_v2(
handle: &Handle,
workspace: *mut ::core::ffi::c_void,
size: usize,
) -> rocblas_status {
rocblas_set_workspace(handle.handle, workspace, size)
}
pub(crate) unsafe fn get_math_mode(handle: &Handle, mode: &mut cublasMath_t) -> rocblas_status {
let mut roc_mode = mem::zeroed();
rocblas_get_math_mode(handle.handle, &mut roc_mode)?;
*mode = zluda_common::FromCuda::from_cuda(&roc_mode)?;
Ok(())
}
pub(crate) unsafe fn gemm_ex(
handle: &Handle,
transa: rocblas_operation,
transb: rocblas_operation,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
alpha: *const ::core::ffi::c_void,
a: *const ::core::ffi::c_void,
a_type: rocblas_datatype,
lda: ::core::ffi::c_int,
b: *const ::core::ffi::c_void,
b_type: rocblas_datatype,
ldb: ::core::ffi::c_int,
beta: *const ::core::ffi::c_void,
c: *mut ::core::ffi::c_void,
c_type: rocblas_datatype,
ldc: ::core::ffi::c_int,
compute_type: rocblas_datatype,
algo: rocblas_gemm_algo,
) -> rocblas_status {
rocblas_gemm_ex(
handle.handle,
transa,
transb,
m,
n,
k,
alpha,
a,
a_type,
lda,
b,
b_type,
ldb,
beta,
c,
c_type,
ldc,
c,
c_type,
ldc,
compute_type,
algo,
0,
0,
)
}

View file

@ -47,10 +47,16 @@ cuda_macros::cublas_function_declarations!(
implemented
<= [
cublasCreate_v2,
cublasDestroy_v2,
cublasGemmEx,
cublasGetMathMode,
cublasLtCreate,
cublasLtDestroy,
cublasSetMathMode,
cublasSetStream_v2,
cublasSetWorkspace_v2,
cublasSgemmStridedBatched,
cublasSgemm_v2,
cublasDestroy_v2
],
implemented_and_always_succeeds
<= [

View file

@ -1,4 +1,4 @@
use cuda_types::{cublas::*, cuda::*};
use cuda_types::{cublas::*, cuda::*, nvml::*};
use hip_runtime_sys::*;
use rocblas_sys::*;
use std::{
@ -22,6 +22,16 @@ impl CudaErrorType for cublasError_t {
const NOT_SUPPORTED: Self = Self::NOT_SUPPORTED;
}
impl CudaErrorType for rocblas_error {
const INVALID_VALUE: Self = Self::invalid_value;
const NOT_SUPPORTED: Self = Self::not_implemented;
}
impl CudaErrorType for nvmlError_t {
const INVALID_VALUE: Self = Self::INVALID_ARGUMENT;
const NOT_SUPPORTED: Self = Self::NOT_SUPPORTED;
}
/// Used to try to convert CUDA API values into our internal representation.
///
/// Similar to [`TryFrom`], but we can implement this for primitive types. We also provide conversions from pointers to references.
@ -142,7 +152,11 @@ from_cuda_nop!(
CUcontext,
cublasHandle_t,
cublasStatus_t,
CUlaunchConfig
CUlaunchConfig,
cublasMath_t,
nvmlDevice_t,
nvmlFieldValue_t,
nvmlGpuFabricInfo_t
);
from_cuda_transmute!(
CUuuid => hipUUID,
@ -215,6 +229,76 @@ impl<'a, E: CudaErrorType> FromCuda<'a, cublasMath_t, E> for rocblas_math_mode {
}
}
impl<'a, E: CudaErrorType> FromCuda<'a, rocblas_math_mode, E> for cublasMath_t {
fn from_cuda(mode: &'a rocblas_math_mode) -> Result<Self, E> {
Ok(match *mode {
rocblas_math_mode_::rocblas_default_math => cublasMath_t::CUBLAS_DEFAULT_MATH,
rocblas_math_mode::rocblas_xf32_xdl_math_op => cublasMath_t::CUBLAS_TF32_TENSOR_OP_MATH,
_ => return Err(E::NOT_SUPPORTED),
})
}
}
impl<'a, E: CudaErrorType> FromCuda<'a, cuda_types::cublas::cudaDataType, E> for rocblas_datatype {
fn from_cuda(mode: &'a cuda_types::cublas::cudaDataType) -> Result<Self, E> {
Ok(match *mode {
cudaDataType_t::CUDA_R_16F => rocblas_datatype::rocblas_datatype_f16_r,
cudaDataType_t::CUDA_R_32F => rocblas_datatype::rocblas_datatype_f32_r,
cudaDataType_t::CUDA_R_64F => rocblas_datatype::rocblas_datatype_f64_r,
cudaDataType_t::CUDA_C_16F => rocblas_datatype::rocblas_datatype_f16_c,
cudaDataType_t::CUDA_C_32F => rocblas_datatype::rocblas_datatype_f32_c,
cudaDataType_t::CUDA_C_64F => rocblas_datatype::rocblas_datatype_f64_c,
cudaDataType_t::CUDA_R_8I => rocblas_datatype::rocblas_datatype_i8_r,
cudaDataType_t::CUDA_R_8U => rocblas_datatype::rocblas_datatype_u8_r,
cudaDataType_t::CUDA_R_32I => rocblas_datatype::rocblas_datatype_i32_r,
cudaDataType_t::CUDA_R_32U => rocblas_datatype::rocblas_datatype_u32_r,
cudaDataType_t::CUDA_C_8I => rocblas_datatype::rocblas_datatype_i8_c,
cudaDataType_t::CUDA_C_8U => rocblas_datatype::rocblas_datatype_u8_c,
cudaDataType_t::CUDA_C_32I => rocblas_datatype::rocblas_datatype_i32_c,
cudaDataType_t::CUDA_C_32U => rocblas_datatype::rocblas_datatype_u32_c,
cudaDataType_t::CUDA_R_16BF => rocblas_datatype::rocblas_datatype_bf16_r,
cudaDataType_t::CUDA_C_16BF => rocblas_datatype::rocblas_datatype_bf16_c,
cudaDataType_t::CUDA_R_8F_UE4M3 => rocblas_datatype::rocblas_datatype_f8_r,
cudaDataType_t::CUDA_R_8F_E5M2 => rocblas_datatype::rocblas_datatype_bf8_r,
_ => return Err(E::NOT_SUPPORTED),
})
}
}
impl<'a, E: CudaErrorType> FromCuda<'a, cuda_types::cublas::cublasComputeType_t, E>
for rocblas_computetype
{
fn from_cuda(mode: &'a cuda_types::cublas::cublasComputeType_t) -> Result<Self, E> {
Ok(match *mode {
cublasComputeType_t::CUBLAS_COMPUTE_32F => {
rocblas_computetype::rocblas_compute_type_f32
}
_ => return Err(E::NOT_SUPPORTED),
})
}
}
impl<'a, E: CudaErrorType> FromCuda<'a, cuda_types::cublas::cublasComputeType_t, E>
for rocblas_datatype
{
fn from_cuda(mode: &'a cuda_types::cublas::cublasComputeType_t) -> Result<Self, E> {
Ok(match *mode {
cublasComputeType_t::CUBLAS_COMPUTE_16F => rocblas_datatype::rocblas_datatype_f16_r,
cublasComputeType_t::CUBLAS_COMPUTE_32F => rocblas_datatype::rocblas_datatype_f32_r,
cublasComputeType_t::CUBLAS_COMPUTE_64F => rocblas_datatype::rocblas_datatype_f64_r,
_ => return Err(E::NOT_SUPPORTED),
})
}
}
impl<'a, E: CudaErrorType> FromCuda<'a, cuda_types::cublas::cublasGemmAlgo_t, E>
for rocblas_gemm_algo
{
fn from_cuda(_: &'a cuda_types::cublas::cublasGemmAlgo_t) -> Result<Self, E> {
Ok(rocblas_gemm_algo::rocblas_gemm_algo_standard)
}
}
/// Represents an object that can be sent across the API boundary.
///
/// Some CUDA calls operate on an opaque handle. For example, `cuModuleLoadData` will load a

View file

@ -11,6 +11,10 @@ crate-type = ["cdylib"]
[dependencies]
cuda_macros = { path = "../cuda_macros" }
cuda_types = { path = "../cuda_types" }
zluda_common = { path = "../zluda_common" }
[target.'cfg(unix)'.dependencies]
rocm_smi-sys = { path = "../ext/rocm_smi-sys" }
[package.metadata.zluda]
linux_symlinks = [

View file

@ -1,6 +1,8 @@
use cuda_types::nvml::*;
use std::{ffi::CStr, ptr};
const VERSION: &'static CStr = c"550.77";
#[cfg(debug_assertions)]
pub(crate) fn unimplemented() -> nvmlReturn_t {
unimplemented!()
@ -11,20 +13,10 @@ pub(crate) fn unimplemented() -> nvmlReturn_t {
nvmlReturn_t::ERROR_NOT_SUPPORTED
}
pub(crate) fn error_string(_result: cuda_types::nvml::nvmlReturn_t) -> *const ::core::ffi::c_char {
c"".as_ptr()
}
pub(crate) fn init_v2() -> cuda_types::nvml::nvmlReturn_t {
nvmlReturn_t::SUCCESS
}
const VERSION: &'static CStr = c"550.77";
pub(crate) fn system_get_driver_version(
result: *mut ::core::ffi::c_char,
length: ::core::ffi::c_uint,
) -> cuda_types::nvml::nvmlReturn_t {
) -> nvmlReturn_t {
if result == ptr::null_mut() {
return nvmlReturn_t::ERROR_INVALID_ARGUMENT;
}
@ -37,3 +29,7 @@ pub(crate) fn system_get_driver_version(
}
nvmlReturn_t::SUCCESS
}
pub(crate) fn error_string(_result: nvmlReturn_t) -> *const ::core::ffi::c_char {
c"".as_ptr()
}

77
zluda_ml/src/impl_unix.rs Normal file
View file

@ -0,0 +1,77 @@
use cuda_types::nvml::*;
use rocm_smi_sys::*;
use std::mem;
use zluda_common::{from_cuda_object, ZludaObject};
pub(crate) use crate::impl_common::error_string;
pub(crate) use crate::impl_common::system_get_driver_version;
pub(crate) struct Device {
_index: u32,
}
impl ZludaObject for Device {
const COOKIE: usize = 0x79443851e7cee0d9;
type Error = nvmlError_t;
type CudaHandle = nvmlDevice_t;
fn drop_checked(&mut self) -> nvmlReturn_t {
Ok(())
}
}
from_cuda_object!(Device);
pub(crate) unsafe fn init() -> rsmi_status_t {
rsmi_init(0)
}
pub(crate) unsafe fn init_v2() -> rsmi_status_t {
rsmi_init(0)
}
pub(crate) unsafe fn init_with_flags(_flags: ::core::ffi::c_uint) -> rsmi_status_t {
rsmi_init(0)
}
pub(crate) unsafe fn shutdown() -> rsmi_status_t {
rsmi_shut_down()
}
pub(crate) unsafe fn device_get_count_v2(device_count: &mut ::core::ffi::c_uint) -> rsmi_status_t {
rsmi_num_monitor_devices(device_count)
}
pub(crate) unsafe fn device_get_field_values(
_device: &Device,
values_count: ::core::ffi::c_int,
values: &mut cuda_types::nvml::nvmlFieldValue_t,
) -> nvmlReturn_t {
for field in std::slice::from_raw_parts_mut(values, values_count as usize) {
get_field_value(field)?;
}
Ok(())
}
unsafe fn get_field_value(field: &mut nvmlFieldValue_st) -> Result<(), nvmlError_t> {
*field = mem::zeroed();
field.nvmlReturn = nvmlReturn_t::ERROR_NOT_SUPPORTED;
Ok(())
}
pub(crate) unsafe fn device_get_gpu_fabric_info(
_device: &Device,
gpu_fabric_info: &mut cuda_types::nvml::nvmlGpuFabricInfo_t,
) -> nvmlReturn_t {
*gpu_fabric_info = mem::zeroed();
Ok(())
}
pub(crate) fn device_get_handle_by_index_v2(
index: ::core::ffi::c_uint,
device: &mut cuda_types::nvml::nvmlDevice_t,
) -> nvmlReturn_t {
*device = Device { _index: index }.wrap();
nvmlReturn_t::SUCCESS
}

50
zluda_ml/src/impl_win.rs Normal file
View file

@ -0,0 +1,50 @@
use cuda_types::nvml::*;
pub(crate) use crate::impl_common::error_string;
pub(crate) use crate::impl_common::system_get_driver_version;
pub(crate) unsafe fn init() -> nvmlReturn_t {
crate::impl_common::unimplemented()
}
pub(crate) unsafe fn init_v2() -> nvmlReturn_t {
crate::impl_common::unimplemented()
}
pub(crate) unsafe fn init_with_flags(_flags: ::core::ffi::c_uint) -> nvmlReturn_t {
crate::impl_common::unimplemented()
}
pub(crate) unsafe fn shutdown() -> nvmlReturn_t {
crate::impl_common::unimplemented()
}
pub(crate) unsafe fn device_get_count_v2(_device_count: &mut ::core::ffi::c_uint) -> nvmlReturn_t {
crate::impl_common::unimplemented()
}
pub(crate) unsafe fn device_get_field_values(
_device: cuda_types::nvml::nvmlDevice_t,
_values_count: ::core::ffi::c_int,
_values: &mut cuda_types::nvml::nvmlFieldValue_t,
) -> nvmlReturn_t {
crate::impl_common::unimplemented()
}
unsafe fn get_field_value(_field: &mut nvmlFieldValue_st) -> Result<(), nvmlError_t> {
crate::impl_common::unimplemented()
}
pub(crate) unsafe fn device_get_gpu_fabric_info(
_device: cuda_types::nvml::nvmlDevice_t,
_gpu_fabric_info: &mut cuda_types::nvml::nvmlGpuFabricInfo_t,
) -> nvmlReturn_t {
crate::impl_common::unimplemented()
}
pub(crate) fn device_get_handle_by_index_v2(
_index: ::core::ffi::c_uint,
_device: &mut cuda_types::nvml::nvmlDevice_t,
) -> nvmlReturn_t {
crate::impl_common::unimplemented()
}

View file

@ -1,4 +1,7 @@
#[cfg_attr(windows, path = "impl_win.rs")]
#[cfg_attr(unix, path = "impl_unix.rs")]
mod r#impl;
mod impl_common;
macro_rules! unimplemented_fn {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
@ -6,19 +9,32 @@ macro_rules! unimplemented_fn {
#[no_mangle]
#[allow(improper_ctypes_definitions)]
pub extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
r#impl::unimplemented()
impl_common::unimplemented()
}
)*
};
}
macro_rules! implemented_fn {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[no_mangle]
#[allow(improper_ctypes_definitions)]
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
cuda_macros::nvml_normalize_fn!( crate::r#impl::$fn_name ) ( $( zluda_common::FromCuda::<_, cuda_types::nvml::nvmlError_t>::from_cuda(&$arg_id )?),*)?;
Ok(())
}
)*
};
}
macro_rules! implemented_unnormalized {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[no_mangle]
#[allow(improper_ctypes_definitions)]
pub extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
cuda_macros::nvml_normalize_fn!( crate::r#impl::$fn_name ) ( $( $arg_id ),* )
cuda_macros::nvml_normalize_fn!( crate::r#impl::$fn_name ) ( $( $arg_id),*)
}
)*
};
@ -26,5 +42,17 @@ macro_rules! implemented_fn {
cuda_macros::nvml_function_declarations!(
unimplemented_fn,
implemented_fn <= [nvmlErrorString, nvmlInit_v2, nvmlSystemGetDriverVersion]
implemented_fn
<= [
nvmlDeviceGetCount_v2,
nvmlDeviceGetFieldValues,
nvmlDeviceGetGpuFabricInfo,
nvmlDeviceGetHandleByIndex_v2,
nvmlInit,
nvmlInitWithFlags,
nvmlInit_v2,
nvmlShutdown,
nvmlSystemGetDriverVersion,
],
implemented_unnormalized <= [nvmlErrorString,]
);