Add nvml tracing (#476)
Some checks failed
ZLUDA / Build (Linux) (push) Has been cancelled
ZLUDA / Build (Windows) (push) Has been cancelled
ZLUDA / Build AMD GPU unit tests (push) Has been cancelled
ZLUDA / Run AMD GPU unit tests (push) Has been cancelled

This commit is contained in:
Andrzej Janik 2025-08-19 08:09:15 +02:00 committed by GitHub
commit e805cb72a5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 13505 additions and 5 deletions

15
Cargo.lock generated
View file

@ -3944,6 +3944,21 @@ dependencies = [
"zluda_trace_common", "zluda_trace_common",
] ]
[[package]]
name = "zluda_trace_ml"
version = "0.0.0"
dependencies = [
"cglue",
"cuda_macros",
"cuda_types",
"dark_api",
"format",
"libloading",
"paste",
"unwrap_or",
"zluda_trace_common",
]
[[package]] [[package]]
name = "zluda_trace_sparse" name = "zluda_trace_sparse"
version = "0.0.0" version = "0.0.0"

View file

@ -30,6 +30,7 @@ members = [
"zluda_trace_common", "zluda_trace_common",
"zluda_trace_dnn", "zluda_trace_dnn",
"zluda_trace_fft", "zluda_trace_fft",
"zluda_trace_nvml",
"zluda_trace_sparse", "zluda_trace_sparse",
"zluda_fft", "zluda_fft",
"zluda_inject", "zluda_inject",

File diff suppressed because it is too large Load diff

View file

@ -788,6 +788,26 @@ impl<T: CudaDisplay, const N: usize> CudaDisplay for [T; N] {
} }
} }
impl<const N: usize> CudaDisplay for [i8; N] {
fn write(
&self,
fn_name: &'static str,
index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
let slice = unsafe { std::slice::from_raw_parts(self.as_ptr().cast::<u8>(), N) };
match CStr::from_bytes_until_nul(slice) {
Ok(cstr) => writer.write_all(cstr.to_bytes()),
Err(_) => CudaDisplay::write(
slice,
fn_name,
index,
writer,
),
}
}
}
impl<T: CudaDisplay> CudaDisplay for [T] { impl<T: CudaDisplay> CudaDisplay for [T] {
fn write( fn write(
&self, &self,
@ -1301,6 +1321,196 @@ fn cudnn9_print_elements(
} }
} }
fn write_nvml_value(
writer: &mut (impl std::io::Write + ?Sized),
type_: Option<cuda_types::nvml::nvmlValueType_t>,
value: cuda_types::nvml::nvmlValue_t) -> std::io::Result<()> {
match type_ {
Some(cuda_types::nvml::nvmlValueType_t::NVML_VALUE_TYPE_DOUBLE) => {
writer.write_fmt(format_args!("{}", unsafe { value.dVal } ))
}
Some(cuda_types::nvml::nvmlValueType_t::NVML_VALUE_TYPE_UNSIGNED_INT) => {
writer.write_fmt(format_args!("{}", unsafe { value.uiVal }))
}
Some(cuda_types::nvml::nvmlValueType_t::NVML_VALUE_TYPE_UNSIGNED_LONG) => {
writer.write_fmt(format_args!("{}", unsafe { value.ulVal }))
}
Some(cuda_types::nvml::nvmlValueType_t::NVML_VALUE_TYPE_UNSIGNED_LONG_LONG) => {
writer.write_fmt(format_args!("{}", unsafe { value.ullVal }))
}
Some(cuda_types::nvml::nvmlValueType_t::NVML_VALUE_TYPE_SIGNED_LONG_LONG) => {
writer.write_fmt(format_args!("{}", unsafe { value.sllVal }))
}
Some(cuda_types::nvml::nvmlValueType_t::NVML_VALUE_TYPE_SIGNED_INT) => {
writer.write_fmt(format_args!("{}", unsafe { value.siVal }))
}
Some(cuda_types::nvml::nvmlValueType_t::NVML_VALUE_TYPE_UNSIGNED_SHORT) => {
writer.write_fmt(format_args!("{}", unsafe { value.usVal }))
}
Some(_) | None => {
CudaDisplay::write(&unsafe { mem::transmute::<_, [u8;8]>(value) }, "", 0, writer)
},
}
}
impl crate::CudaDisplay for cuda_types::nvml::nvmlSample_st {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
writer.write_all(concat!("{ ", stringify!(timeStamp), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.timeStamp, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(sampleValue), ": ").as_bytes())?;
write_nvml_value(writer, None, self.sampleValue)?;
writer.write_all(b" }")
}
}
impl crate::CudaDisplay for cuda_types::nvml::nvmlVgpuInstanceUtilizationSample_st {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
writer.write_all(concat!("{ ", stringify!(vgpuInstance), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.vgpuInstance, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(timeStamp), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.timeStamp, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(smUtil), ": ").as_bytes())?;
write_nvml_value(writer, None, self.smUtil)?;
writer.write_all(concat!(", ", stringify!(memUtil), ": ").as_bytes())?;
write_nvml_value(writer, None, self.memUtil)?;
writer.write_all(concat!(", ", stringify!(encUtil), ": ").as_bytes())?;
write_nvml_value(writer, None, self.encUtil)?;
writer.write_all(concat!(", ", stringify!(decUtil), ": ").as_bytes())?;
write_nvml_value(writer, None, self.decUtil)?;
writer.write_all(b" }")
}
}
impl crate::CudaDisplay for cuda_types::nvml::nvmlVgpuInstanceUtilizationInfo_v1_t {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
writer.write_all(concat!("{ ", stringify!(timeStamp), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.timeStamp, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(vgpuInstance), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.vgpuInstance, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(smUtil), ": ").as_bytes())?;
write_nvml_value(writer, None, self.smUtil)?;
writer.write_all(concat!(", ", stringify!(memUtil), ": ").as_bytes())?;
write_nvml_value(writer, None, self.memUtil)?;
writer.write_all(concat!(", ", stringify!(encUtil), ": ").as_bytes())?;
write_nvml_value(writer, None, self.encUtil)?;
writer.write_all(concat!(", ", stringify!(decUtil), ": ").as_bytes())?;
write_nvml_value(writer, None, self.decUtil)?;
writer.write_all(concat!(", ", stringify!(jpgUtil), ": ").as_bytes())?;
write_nvml_value(writer, None, self.jpgUtil)?;
writer.write_all(concat!(", ", stringify!(ofaUtil), ": ").as_bytes())?;
write_nvml_value(writer, None, self.ofaUtil)?;
writer.write_all(b" }")
}
}
impl crate::CudaDisplay for cuda_types::nvml::nvmlFieldValue_st {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
writer.write_all(concat!("{ ", stringify!(fieldId), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.fieldId, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(scopeId), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.scopeId, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(timestamp), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.timestamp, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(latencyUsec), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.latencyUsec, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(valueType), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.valueType, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(nvmlReturn), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.nvmlReturn, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(value), ": ").as_bytes())?;
write_nvml_value(writer, Some(self.valueType), self.value)?;
writer.write_all(b" }")
}
}
impl crate::CudaDisplay for cuda_types::nvml::nvmlVgpuSchedulerSetState_st {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
writer.write_all(concat!("{ ", stringify!(schedulerPolicy), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.schedulerPolicy, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(enableARRMode), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.enableARRMode, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(schedulerParams), ": ").as_bytes())?;
if self.enableARRMode != 0 {
crate::CudaDisplay::write(&unsafe { self.schedulerParams.vgpuSchedDataWithARR }, "", 0, writer)?;
} else {
crate::CudaDisplay::write(&unsafe { self.schedulerParams.vgpuSchedData } , "", 0, writer)?;
}
writer.write_all(b" }")
}
}
impl crate::CudaDisplay for cuda_types::nvml::nvmlVgpuSchedulerLog_st {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
writer.write_all(concat!("{ ", stringify!(engineId), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.engineId, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(schedulerPolicy), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.schedulerPolicy, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(arrMode), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.arrMode, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(schedulerParams), ": ").as_bytes())?; if self.arrMode != 0 {
crate::CudaDisplay::write(&unsafe { self.schedulerParams.vgpuSchedDataWithARR }, "", 0, writer)?;
} else {
crate::CudaDisplay::write(&unsafe { self.schedulerParams.vgpuSchedData } , "", 0, writer)?;
}
writer.write_all(concat!(", ", stringify!(entriesCount), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.entriesCount, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(logEntries), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.logEntries, "", 0, writer)?;
writer.write_all(b" }")
}
}
impl crate::CudaDisplay for cuda_types::nvml::nvmlVgpuSchedulerGetState_st {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
writer.write_all(concat!("{ ", stringify!(schedulerPolicy), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.schedulerPolicy, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(arrMode), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.arrMode, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(schedulerParams), ": ").as_bytes())?;
if self.arrMode != 0 {
crate::CudaDisplay::write(&unsafe { self.schedulerParams.vgpuSchedDataWithARR }, "", 0, writer)?;
} else {
crate::CudaDisplay::write(&unsafe { self.schedulerParams.vgpuSchedData } , "", 0, writer)?;
}
writer.write_all(b" }")
}
}
mod dark_api; mod dark_api;
mod format_generated; mod format_generated;
pub use format_generated::*; pub use format_generated::*;
@ -1314,5 +1524,7 @@ mod format_generated_dnn9;
pub use format_generated_dnn9::*; pub use format_generated_dnn9::*;
mod format_generated_fft; mod format_generated_fft;
pub use format_generated_fft::*; pub use format_generated_fft::*;
mod format_generated_nvml;
pub use format_generated_nvml::*;
mod format_generated_sparse; mod format_generated_sparse;
pub use format_generated_sparse::*; pub use format_generated_sparse::*;

View file

@ -53,9 +53,9 @@ fn generate_process_address_table(crate_root: &PathBuf, mut cuda_fns: Vec<Ident>
library.get::<unsafe extern "system" fn( library.get::<unsafe extern "system" fn(
symbol: *const ::core::ffi::c_char, symbol: *const ::core::ffi::c_char,
pfn: *mut *mut ::core::ffi::c_void, pfn: *mut *mut ::core::ffi::c_void,
cudaVersion: ::core::ffi::c_int, cuda_version: ::core::ffi::c_int,
flags: cuda_types::cuda::cuuint64_t, flags: cuda_types::cuda::cuuint64_t,
symbolStatus: *mut cuda_types::cuda::CUdriverProcAddressQueryResult, symbol_status: *mut cuda_types::cuda::CUdriverProcAddressQueryResult,
) -> cuda_types::cuda::CUresult>(b"cuGetProcAddress_v2\0") ) -> cuda_types::cuda::CUresult>(b"cuGetProcAddress_v2\0")
} }
.unwrap(); .unwrap();
@ -863,6 +863,13 @@ fn generate_ml(crate_root: &PathBuf) {
&["..", "cuda_types", "src", "nvml.rs"], &["..", "cuda_types", "src", "nvml.rs"],
&module, &module,
); );
generate_display_perflib(
Some(&result_options),
&crate_root,
&["..", "format", "src", "format_generated_nvml.rs"],
&["cuda_types", "nvml"],
&module,
);
} }
fn generate_types_library( fn generate_types_library(
@ -1439,6 +1446,13 @@ fn generate_display_perflib(
"cudnnBackendDescriptor_t", "cudnnBackendDescriptor_t",
"cublasLtLoggerCallback_t", "cublasLtLoggerCallback_t",
"cusparseLoggerCallback_t", "cusparseLoggerCallback_t",
"nvmlSample_st",
"nvmlVgpuInstanceUtilizationSample_st",
"nvmlVgpuInstanceUtilizationInfo_v1_t",
"nvmlFieldValue_st",
"nvmlVgpuSchedulerSetState_st",
"nvmlVgpuSchedulerLog_st",
"nvmlVgpuSchedulerGetState_st",
]; ];
let ignore_functions = []; let ignore_functions = [];
let count_selectors = [ let count_selectors = [

View file

@ -16,6 +16,4 @@ cuda_types = { path = "../cuda_types" }
linux_symlinks = [ linux_symlinks = [
"libnvidia-ml.so", "libnvidia-ml.so",
"libnvidia-ml.so.1", "libnvidia-ml.so.1",
"trace/libnvidia-ml.so",
"trace/libnvidia-ml.so.1",
] ]

View file

@ -3,6 +3,7 @@ use cuda_types::{
cuda::{CUerror, CUresult, CUresultConsts, CUuuid}, cuda::{CUerror, CUresult, CUresultConsts, CUuuid},
cufft::cufftResultConsts, cufft::cufftResultConsts,
cusparse::cusparseStatus_tConsts, cusparse::cusparseStatus_tConsts,
nvml::nvmlReturn_tConsts,
}; };
use dark_api::ByteVecFfi; use dark_api::ByteVecFfi;
use std::{borrow::Cow, ffi::c_void, num::NonZero, ptr, sync::LazyLock}; use std::{borrow::Cow, ffi::c_void, num::NonZero, ptr, sync::LazyLock};
@ -438,3 +439,27 @@ impl ReprUsize for cuda_types::cusparse::cusparseMatrixType_t {
ByteVecFfi::new(writer) ByteVecFfi::new(writer)
} }
} }
impl ReprUsize for cuda_types::nvml::nvmlReturn_t {
fn to_usize(self) -> usize {
match self {
cuda_types::nvml::nvmlReturn_t::SUCCESS => 0,
Err(err) => err.0.get() as usize,
}
}
fn from_usize(x: usize) -> Self {
match NonZero::new(x as u32) {
None => Ok(()),
Some(err) => Err(cuda_types::nvml::nvmlError_t(err)),
}
}
const INTERNAL_ERROR: usize = cuda_types::nvml::nvmlError_t::UNKNOWN.0.get() as usize;
extern "C" fn format_status(x: usize) -> ByteVecFfi {
let mut writer = Vec::new();
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
ByteVecFfi::new(writer)
}
}

View file

@ -0,0 +1,26 @@
[package]
name = "zluda_trace_ml"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2021"
[lib]
name = "zluda_trace_ml"
crate-type = ["cdylib"]
[dependencies]
format = { path = "../format" }
dark_api = { path = "../dark_api" }
cuda_macros = { path = "../cuda_macros" }
cuda_types = { path = "../cuda_types" }
zluda_trace_common = { path = "../zluda_trace_common" }
libloading = "0.8"
paste = "1.0"
unwrap_or = "1.0.1"
cglue = "0.3.5"
[package.metadata.zluda]
linux_symlinks = [
"trace/libnvidia-ml.so",
"trace/libnvidia-ml.so.1",
]

View file

@ -0,0 +1,48 @@
use libloading::Library;
use std::sync::LazyLock;
static LIBRARY: LazyLock<Option<Library>> = LazyLock::new(get_library);
fn get_library() -> Option<Library> {
let cuda_lib = std::env::var("ZLUDA_NVML_LIB")
.ok()
.unwrap_or_else(|| "/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1".to_string());
zluda_trace_common::dlopen_local_noredirect(cuda_lib).ok()
}
macro_rules! unimplemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
use ::zluda_trace_common::ReprUsize;
let internal_error_untyped: usize = <$ret_type as ReprUsize>::INTERNAL_ERROR;
let internal_error: $ret_type = ReprUsize::from_usize(internal_error_untyped);
let maybe_fn_ptr = (&*LIBRARY).as_ref().and_then(|lib| lib.get::<unsafe extern $abi fn ( $($arg_type),* ) -> $ret_type>(concat!( stringify!($fn_name), "\0").as_bytes()).ok());
let fn_ptr = unwrap_or::unwrap_some_or!(maybe_fn_ptr, return internal_error);
let export_table = unwrap_or::unwrap_some_or!(::zluda_trace_common::get_export_table(), return internal_error);
let format_args = dark_api::FnFfiWrapper(|| {
let mut writer = Vec::new();
let formatter = paste::paste! { ::format:: [< write_ $fn_name>] };
formatter(&mut writer $(, $arg_id)* ).ok();
dark_api::ByteVecFfi::new(writer)
});
let underlying_fn = dark_api::FnFfiWrapper(|| {
let result = fn_ptr( $( $arg_id),* );
ReprUsize::to_usize(result)
});
ReprUsize::from_usize(export_table.logged_call(
cglue::slice::CSliceRef::from_str(stringify!($fn_name)),
cglue::trait_obj!(&format_args as dark_api::FnFfi),
cglue::trait_obj!(&underlying_fn as dark_api::FnFfi),
internal_error_untyped,
<$ret_type as ReprUsize>::format_status)
)
}
)*
};
}
cuda_macros::nvml_function_declarations!(unimplemented);