Allow skipping post-values

This commit is contained in:
Andrzej Janik 2025-09-22 21:18:01 +00:00
commit 07acc64d33
4 changed files with 98 additions and 21 deletions

View file

@ -1281,6 +1281,7 @@ struct Settings {
libcuda_path: String,
override_cc: Option<(u32, u32)>,
kernel_name_filter: Option<regex::Regex>,
kernel_no_output: Option<bool>,
}
impl Settings {
@ -1343,11 +1344,28 @@ impl Settings {
})
}),
};
let kernel_no_output = match env::var("ZLUDA_SAVE_KERNELS_NO_OUTPUT") {
Err(env::VarError::NotPresent) => None,
Err(e) => {
logger.log(log::ErrorEntry::ErrorBox(Box::new(e) as _));
None
}
Ok(env_string) => logger
.try_return(|| {
str::parse::<u8>(&env_string).map_err(|err| ErrorEntry::InvalidEnvVar {
var: "ZLUDA_SAVE_KERNELS_NO_OUTPUT",
pattern: "number",
value: format!("{} ({})", env_string, err),
})
})
.map(|x| x != 0),
};
Settings {
dump_dir,
libcuda_path,
override_cc,
kernel_name_filter,
kernel_no_output,
}
}
@ -1513,25 +1531,45 @@ pub(crate) fn cuLibraryLoadData_Post(
#[allow(non_snake_case)]
pub(crate) fn cuLaunchKernel_Pre(
f: cuda_types::cuda::CUfunction,
_gridDimX: ::core::ffi::c_uint,
_gridDimY: ::core::ffi::c_uint,
_gridDimZ: ::core::ffi::c_uint,
_blockDimX: ::core::ffi::c_uint,
_blockDimY: ::core::ffi::c_uint,
_blockDimZ: ::core::ffi::c_uint,
_sharedMemBytes: ::core::ffi::c_uint,
stream: cuda_types::cuda::CUstream,
gridDimX: ::core::ffi::c_uint,
gridDimY: ::core::ffi::c_uint,
gridDimZ: ::core::ffi::c_uint,
blockDimX: ::core::ffi::c_uint,
blockDimY: ::core::ffi::c_uint,
blockDimZ: ::core::ffi::c_uint,
sharedMemBytes: ::core::ffi::c_uint,
hStream: cuda_types::cuda::CUstream,
kernel_params: *mut *mut ::core::ffi::c_void,
_extra: *mut *mut ::core::ffi::c_void,
libcuda: &mut CudaDynamicFns,
state: &mut trace::StateTracker,
fn_logger: &mut FnCallLog,
) -> Option<replay::LaunchPreState> {
launch_kernel_pre(f, stream, kernel_params, libcuda, state, fn_logger)
launch_kernel_pre(
f,
CUlaunchConfig {
gridDimX,
gridDimY,
gridDimZ,
blockDimX,
blockDimY,
blockDimZ,
sharedMemBytes,
hStream,
attrs: ptr::null_mut(),
numAttrs: 0,
},
hStream,
kernel_params,
libcuda,
state,
fn_logger,
)
}
fn launch_kernel_pre(
f: cuda_types::cuda::CUfunction,
config: CUlaunchConfig,
stream: cuda_types::cuda::CUstream,
kernel_params: *mut *mut ::core::ffi::c_void,
libcuda: &mut CudaDynamicFns,
@ -1546,7 +1584,7 @@ fn launch_kernel_pre(
if state.dump_dir().is_none() {
return None;
}
replay::pre_kernel_launch(libcuda, state, fn_logger, f, stream, kernel_params)
replay::pre_kernel_launch(libcuda, state, fn_logger, config, f, stream, kernel_params)
}
#[allow(non_snake_case)]
@ -1602,6 +1640,7 @@ pub(crate) fn cuLaunchKernelEx_Pre(
) -> Option<replay::LaunchPreState> {
launch_kernel_pre(
f,
unsafe { *config },
unsafe { *config }.hStream,
kernel_params,
libcuda,

View file

@ -16,6 +16,7 @@ pub(crate) fn pre_kernel_launch(
libcuda: &mut CudaDynamicFns,
state: &mut trace::StateTracker,
fn_logger: &mut FnCallLog,
config: CUlaunchConfig,
f: CUfunction,
stream: CUstream,
args: *mut *mut std::ffi::c_void,
@ -60,12 +61,15 @@ pub(crate) fn pre_kernel_launch(
(&mut start as *mut usize).cast::<std::ffi::c_void>(),
(&mut size as *mut usize).cast::<std::ffi::c_void>(),
];
if let Some(Ok(())) = libcuda.cuPointerGetAttributes(
2,
attrs.as_mut_ptr(),
data.as_mut_ptr(),
CUdeviceptr_v2(maybe_ptr as _),
) {
fn_logger.try_cuda(|| {
libcuda.cuPointerGetAttributes(
2,
attrs.as_mut_ptr(),
data.as_mut_ptr(),
CUdeviceptr_v2(maybe_ptr as _),
)
})?;
if size != 0 {
let mut pre_buffer = vec![0u8; size];
let post_buffer = vec![0u8; size];
fn_logger.try_cuda(|| {
@ -86,11 +90,36 @@ pub(crate) fn pre_kernel_launch(
device_ptrs: ptr_overrides,
});
}
Some(LaunchPreState {
kernel_name: name.to_string(),
source: source.to_string(),
kernel_params: all_params,
})
if state.kernel_no_output {
let enqueue_counter = state.enqueue_counter;
let kernel_name = name;
let mut path = state.dump_dir()?.to_path_buf();
path.push(format!("kernel_{enqueue_counter}_{kernel_name}.tar.zst"));
let file = fn_logger
.try_return(|| std::fs::File::create_new(path).map_err(ErrorEntry::IoError))?;
fn_logger.try_return(|| {
zluda_trace_common::replay::save(
file,
name.to_string(),
false,
zluda_trace_common::replay::LaunchConfig {
grid_dim: (config.gridDimX, config.gridDimY, config.gridDimZ),
block_dim: (config.blockDimX, config.blockDimY, config.blockDimZ),
shared_mem_bytes: config.sharedMemBytes,
},
source.to_string(),
all_params,
)
.map_err(ErrorEntry::IoError)
});
None
} else {
Some(LaunchPreState {
kernel_name: name.to_string(),
source: source.to_string(),
kernel_params: all_params,
})
}
}
pub(crate) fn post_kernel_launch(
@ -128,6 +157,7 @@ pub(crate) fn post_kernel_launch(
zluda_trace_common::replay::save(
file,
pre_state.kernel_name,
true,
zluda_trace_common::replay::LaunchConfig {
grid_dim: (config.gridDimX, config.gridDimY, config.gridDimZ),
block_dim: (config.blockDimX, config.blockDimY, config.blockDimZ),

View file

@ -28,6 +28,7 @@ pub(crate) struct StateTracker {
pub(crate) enqueue_counter: usize,
pub(crate) override_cc: Option<(u32, u32)>,
pub(crate) kernel_name_filter: Option<regex::Regex>,
pub(crate) kernel_no_output: bool,
}
pub(crate) struct ParsedModule {
@ -57,6 +58,7 @@ impl StateTracker {
enqueue_counter: 0,
override_cc: settings.override_cc,
kernel_name_filter: settings.kernel_name_filter.clone(),
kernel_no_output: settings.kernel_no_output.unwrap_or(false),
}
}

View file

@ -5,6 +5,7 @@ use tar::Header;
#[derive(serde::Serialize, serde::Deserialize)]
pub struct Manifest {
pub kernel_name: String,
pub outputs: bool,
pub config: LaunchConfig,
pub parameters: Vec<Parameter>,
}
@ -46,6 +47,7 @@ pub struct KernelParameter {
pub fn save(
writer: impl Write,
kernel_name: String,
has_outputs: bool,
config: LaunchConfig,
source: String,
kernel_params: Vec<KernelParameter>,
@ -54,6 +56,7 @@ pub fn save(
let mut builder = tar::Builder::new(archive);
let (mut header, manifest) = Manifest {
kernel_name,
outputs: has_outputs,
config,
parameters: kernel_params
.iter()
@ -86,6 +89,9 @@ pub fn save(
let mut header = Header::new_gnu();
header.set_size(data_before.len() as u64);
builder.append_data(&mut header, &*path, &*data_before)?;
if !has_outputs {
continue;
}
let path = format!("param_{i}_ptr_{offset_in_param}_post.bin");
let mut header = Header::new_gnu();
header.set_size(data_after.len() as u64);