From f3e143d8ddda55c3f4db5328409fedf2dbba2ce1 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Fri, 19 Sep 2025 01:53:01 +0000 Subject: [PATCH] Save source ptx and save to the right path --- zluda_trace/src/lib.rs | 63 ++++++++++++++++++-------------- zluda_trace/src/replay.rs | 46 +++++++++++++++++------ zluda_trace/src/trace.rs | 26 ++++++++----- zluda_trace_common/src/replay.rs | 4 ++ 4 files changed, 91 insertions(+), 48 deletions(-) diff --git a/zluda_trace/src/lib.rs b/zluda_trace/src/lib.rs index e0f5daf..8aa78bc 100644 --- a/zluda_trace/src/lib.rs +++ b/zluda_trace/src/lib.rs @@ -1504,19 +1504,33 @@ pub(crate) fn cuLaunchKernel_Pre( _blockDimY: ::core::ffi::c_uint, _blockDimZ: ::core::ffi::c_uint, _sharedMemBytes: ::core::ffi::c_uint, - _hStream: cuda_types::cuda::CUstream, + stream: cuda_types::cuda::CUstream, kernel_params: *mut *mut ::core::ffi::c_void, _extra: *mut *mut ::core::ffi::c_void, libcuda: &mut CudaDynamicFns, state: &mut trace::StateTracker, fn_logger: &mut FnCallLog, -) -> Option<(String, Vec)> { +) -> Option { + launch_kernel_pre(f, stream, kernel_params, libcuda, state, fn_logger) +} + +fn launch_kernel_pre( + f: cuda_types::cuda::CUfunction, + stream: cuda_types::cuda::CUstream, + kernel_params: *mut *mut ::core::ffi::c_void, + libcuda: &mut CudaDynamicFns, + state: &mut trace::StateTracker, + fn_logger: &mut FnCallLog, +) -> Option { state.enqueue_counter += 1; if kernel_params.is_null() { fn_logger.log(ErrorEntry::NullPointer("kernel_params")); return None; } - replay::pre_kernel_launch(libcuda, state, fn_logger, f, kernel_params) + if state.dump_dir().is_none() { + return None; + } + replay::pre_kernel_launch(libcuda, state, fn_logger, f, stream, kernel_params) } #[allow(non_snake_case)] @@ -1529,63 +1543,58 @@ pub(crate) fn cuLaunchKernel_Post( _blockDimY: ::core::ffi::c_uint, _blockDimZ: ::core::ffi::c_uint, _sharedMemBytes: ::core::ffi::c_uint, - _hStream: cuda_types::cuda::CUstream, + stream: cuda_types::cuda::CUstream, kernel_params: *mut *mut ::core::ffi::c_void, _extra: *mut *mut ::core::ffi::c_void, - pre_state: Option<(String, Vec)>, + pre_state: Option, libcuda: &mut CudaDynamicFns, state: &mut trace::StateTracker, fn_logger: &mut FnCallLog, _result: CUresult, ) { - let (kernel_name, pre_state) = unwrap_some_or!(pre_state, return); - replay::post_kernel_launch( - libcuda, - fn_logger, - kernel_params, - pre_state, - state.enqueue_counter, - kernel_name, - ); + let pre_state = unwrap_some_or!(pre_state, return); + replay::post_kernel_launch(libcuda, state, fn_logger, stream, kernel_params, pre_state); } #[allow(non_snake_case)] pub(crate) fn cuLaunchKernelEx_Pre( - _config: *const cuda_types::cuda::CUlaunchConfig, + config: *const cuda_types::cuda::CUlaunchConfig, f: cuda_types::cuda::CUfunction, kernel_params: *mut *mut ::core::ffi::c_void, _extra: *mut *mut ::core::ffi::c_void, libcuda: &mut CudaDynamicFns, state: &mut trace::StateTracker, fn_logger: &mut FnCallLog, -) -> Option<(String, Vec)> { - state.enqueue_counter += 1; - if kernel_params.is_null() { - fn_logger.log(ErrorEntry::NullPointer("kernel_params")); - return None; - } - replay::pre_kernel_launch(libcuda, state, fn_logger, f, kernel_params) +) -> Option { + launch_kernel_pre( + f, + unsafe { *config }.hStream, + kernel_params, + libcuda, + state, + fn_logger, + ) } #[allow(non_snake_case)] pub(crate) fn cuLaunchKernelEx_Post( - _config: *const cuda_types::cuda::CUlaunchConfig, + config: *const cuda_types::cuda::CUlaunchConfig, _f: cuda_types::cuda::CUfunction, kernel_params: *mut *mut ::core::ffi::c_void, _extra: *mut *mut ::core::ffi::c_void, - pre_state: Option<(String, Vec)>, + pre_state: Option, libcuda: &mut CudaDynamicFns, state: &mut trace::StateTracker, fn_logger: &mut FnCallLog, _result: CUresult, ) { - let (kernel_name, pre_state) = unwrap_some_or!(pre_state, return); + let pre_state = unwrap_some_or!(pre_state, return); replay::post_kernel_launch( libcuda, + state, fn_logger, + unsafe { *config }.hStream, kernel_params, pre_state, - state.enqueue_counter, - kernel_name, ); } diff --git a/zluda_trace/src/replay.rs b/zluda_trace/src/replay.rs index 7bfd7da..8056e07 100644 --- a/zluda_trace/src/replay.rs +++ b/zluda_trace/src/replay.rs @@ -6,20 +6,28 @@ use crate::{ use cuda_types::cuda::*; use zluda_trace_common::replay::KernelParameter; +pub struct LaunchPreState { + kernel_name: String, + source: String, + kernel_params: Vec, +} + pub(crate) fn pre_kernel_launch( libcuda: &mut CudaDynamicFns, state: &mut trace::StateTracker, fn_logger: &mut FnCallLog, f: CUfunction, + stream: CUstream, args: *mut *mut std::ffi::c_void, -) -> Option<(String, Vec)> { +) -> Option { + fn_logger.try_cuda(|| libcuda.cuStreamSynchronize(stream))?; let SavedKernel { name, owner } = fn_logger.try_return(|| { state .kernels .get(&f) .ok_or(ErrorEntry::UnknownFunctionHandle(f)) })?; - let ParsedModule { kernels } = fn_logger.try_return(|| { + let ParsedModule { source, kernels } = fn_logger.try_return(|| { state .parsed_libraries .get(owner) @@ -74,19 +82,25 @@ pub(crate) fn pre_kernel_launch( device_ptrs: ptr_overrides, }); } - Some((name.to_string(), all_params)) + Some(LaunchPreState { + kernel_name: name.to_string(), + source: source.to_string(), + kernel_params: all_params, + }) } pub(crate) fn post_kernel_launch( libcuda: &mut CudaDynamicFns, + state: &trace::StateTracker, fn_logger: &mut FnCallLog, - args: *mut *mut std::ffi::c_void, - mut kernel_params: Vec, - enqueue_counter: usize, - kernel_name: String, + stream: CUstream, + kernel_params: *mut *mut std::ffi::c_void, + mut pre_state: LaunchPreState, ) -> Option<()> { - let raw_args = unsafe { std::slice::from_raw_parts(args, kernel_params.len()) }; - for (raw_arg, param) in raw_args.iter().zip(kernel_params.iter_mut()) { + fn_logger.try_cuda(|| libcuda.cuStreamSynchronize(stream))?; + let raw_args = + unsafe { std::slice::from_raw_parts(kernel_params, pre_state.kernel_params.len()) }; + for (raw_arg, param) in raw_args.iter().zip(pre_state.kernel_params.iter_mut()) { for (offset_in_param, offset_in_buffer, _, data_after) in param.device_ptrs.iter_mut() { let dev_ptr_param = unsafe { raw_arg.cast::().add(*offset_in_param) }; let mut dev_ptr = unsafe { dev_ptr_param.cast::().read_unaligned() }; @@ -100,11 +114,19 @@ pub(crate) fn post_kernel_launch( })?; } } - let path = format!("kernel_{enqueue_counter}_{kernel_name}.tar.zst"); + let enqueue_counter = state.enqueue_counter; + let kernel_name = &pre_state.kernel_name; + let mut path = state.dump_dir()?.to_path_buf(); + path.push(format!("kernel_{enqueue_counter}_{kernel_name}.tar.zst")); let file = fn_logger.try_return(|| std::fs::File::create_new(path).map_err(ErrorEntry::IoError))?; fn_logger.try_return(|| { - zluda_trace_common::replay::save(file, kernel_name, kernel_params) - .map_err(ErrorEntry::IoError) + zluda_trace_common::replay::save( + file, + pre_state.kernel_name, + pre_state.source, + pre_state.kernel_params, + ) + .map_err(ErrorEntry::IoError) }) } diff --git a/zluda_trace/src/trace.rs b/zluda_trace/src/trace.rs index 1740d31..219d032 100644 --- a/zluda_trace/src/trace.rs +++ b/zluda_trace/src/trace.rs @@ -30,6 +30,7 @@ pub(crate) struct StateTracker { } pub(crate) struct ParsedModule { + pub source: String, pub kernels: FxHashMap>, } @@ -57,6 +58,10 @@ impl StateTracker { } } + pub(crate) fn dump_dir(&self) -> Option<&PathBuf> { + self.writer.dump_dir.as_ref() + } + pub(crate) fn record_new_module_file( &mut self, module: CUmodule, @@ -147,12 +152,15 @@ impl StateTracker { } }); }; - self.parsed_libraries.insert( - SendablePtr(handle), - ParsedModule { - kernels: kernel_arguments.unwrap_or_default(), - }, - ); + if let Some((source, kernel_arguments)) = kernel_arguments { + self.parsed_libraries.insert( + SendablePtr(handle), + ParsedModule { + source, + kernels: kernel_arguments, + }, + ); + } } #[must_use] @@ -162,7 +170,7 @@ impl StateTracker { submodule: &[u8], fn_logger: &mut FnCallLog, type_: &'static str, - ) -> Option>> { + ) -> Option<(String, FxHashMap>)> { fn_logger.try_(|fn_logger| { self.writer .save_module(fn_logger, self.library_counter, index, submodule, type_) @@ -198,7 +206,7 @@ impl StateTracker { module_index: usize, submodule_index: Option<(usize, Option)>, module_text: &'input str, - ) -> FxHashMap> { + ) -> (String, FxHashMap>) { let (errors, params) = ptx_parser::parse_for_errors_and_params(module_text); if !errors.is_empty() { fn_logger.log(log::ErrorEntry::ModuleParsingError( @@ -210,7 +218,7 @@ impl StateTracker { &*errors, )); } - params + (module_text.to_string(), params) } pub(crate) fn record_module_in_library(&mut self, module: CUmodule, library: CUlibrary) { diff --git a/zluda_trace_common/src/replay.rs b/zluda_trace_common/src/replay.rs index 9b7ee68..fe98e7c 100644 --- a/zluda_trace_common/src/replay.rs +++ b/zluda_trace_common/src/replay.rs @@ -37,6 +37,7 @@ pub struct KernelParameter { pub fn save( writer: impl Write, kernel_name: String, + source: String, kernel_params: Vec, ) -> std::io::Result<()> { let archive = zstd::Encoder::new(writer, 0)?; @@ -61,6 +62,9 @@ pub fn save( } .serialize()?; builder.append_data(&mut header, Manifest::PATH, &*manifest)?; + let mut header = Header::new_gnu(); + header.set_size(source.len() as u64); + builder.append_data(&mut header, "source.ptx", source.as_bytes())?; for (i, param) in kernel_params.into_iter().enumerate() { let path = format!("param_{i}.bin"); let mut header = Header::new_gnu();