mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-09-27 19:59:08 +00:00
Trace magic pointer
This commit is contained in:
parent
44823a2d75
commit
ed1ea1f6de
2 changed files with 50 additions and 0 deletions
|
@ -895,6 +895,7 @@ cuda_function_declarations!(
|
||||||
cuModuleLoadFatBinary,
|
cuModuleLoadFatBinary,
|
||||||
cuLibraryGetModule,
|
cuLibraryGetModule,
|
||||||
cuLibraryLoadData,
|
cuLibraryLoadData,
|
||||||
|
cuMemAlloc_v2,
|
||||||
],
|
],
|
||||||
extern_redirect_with_pre_post <= [cuLaunchKernel, cuLaunchKernelEx],
|
extern_redirect_with_pre_post <= [cuLaunchKernel, cuLaunchKernelEx],
|
||||||
override_fn_core <= [cuGetProcAddress, cuGetProcAddress_v2],
|
override_fn_core <= [cuGetProcAddress, cuGetProcAddress_v2],
|
||||||
|
@ -1606,6 +1607,7 @@ pub(crate) fn cuLaunchKernel_Post(
|
||||||
fn_logger: &mut FnCallLog,
|
fn_logger: &mut FnCallLog,
|
||||||
_result: CUresult,
|
_result: CUresult,
|
||||||
) {
|
) {
|
||||||
|
save_magic_ptr(libcuda, state, _f, hStream, state.magic_ptr);
|
||||||
let pre_state = unwrap_some_or!(pre_state, return);
|
let pre_state = unwrap_some_or!(pre_state, return);
|
||||||
replay::post_kernel_launch(
|
replay::post_kernel_launch(
|
||||||
libcuda,
|
libcuda,
|
||||||
|
@ -1661,6 +1663,13 @@ pub(crate) fn cuLaunchKernelEx_Post(
|
||||||
fn_logger: &mut FnCallLog,
|
fn_logger: &mut FnCallLog,
|
||||||
_result: CUresult,
|
_result: CUresult,
|
||||||
) {
|
) {
|
||||||
|
save_magic_ptr(
|
||||||
|
libcuda,
|
||||||
|
state,
|
||||||
|
_f,
|
||||||
|
unsafe { *config }.hStream,
|
||||||
|
state.magic_ptr,
|
||||||
|
);
|
||||||
let pre_state = unwrap_some_or!(pre_state, return);
|
let pre_state = unwrap_some_or!(pre_state, return);
|
||||||
replay::post_kernel_launch(
|
replay::post_kernel_launch(
|
||||||
libcuda,
|
libcuda,
|
||||||
|
@ -1671,3 +1680,42 @@ pub(crate) fn cuLaunchKernelEx_Post(
|
||||||
pre_state,
|
pre_state,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub(crate) fn cuMemAlloc_v2_Post(
|
||||||
|
dptr: *mut cuda_types::cuda::CUdeviceptr,
|
||||||
|
bytesize: usize,
|
||||||
|
state: &mut trace::StateTracker,
|
||||||
|
_fn_logger: &mut FnCallLog,
|
||||||
|
_result: CUresult,
|
||||||
|
) {
|
||||||
|
if bytesize == 2097152 {
|
||||||
|
state.magic_ptr = Some(unsafe { *dptr });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn save_magic_ptr(
|
||||||
|
libcuda: &mut CudaDynamicFns,
|
||||||
|
state: &mut trace::StateTracker,
|
||||||
|
f: cuda_types::cuda::CUfunction,
|
||||||
|
stream: CUstream,
|
||||||
|
magic_ptr: Option<CUdeviceptr_v2>,
|
||||||
|
) {
|
||||||
|
let magic_ptr = unwrap_some_or!(magic_ptr, return);
|
||||||
|
let mut kernel_name = unwrap_some_or!(state.kernels.get(&f), return).name.clone();
|
||||||
|
kernel_name.truncate(224);
|
||||||
|
libcuda.cuStreamSynchronize(stream).unwrap().unwrap();
|
||||||
|
let mut host = vec![0u8; 2097152];
|
||||||
|
let cpy_err = libcuda
|
||||||
|
.cuMemcpyDtoH_v2(host.as_mut_ptr().cast(), magic_ptr, 2097152)
|
||||||
|
.unwrap();
|
||||||
|
if !cpy_err.is_ok() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let mut dump_dir = state.dump_dir().unwrap().clone();
|
||||||
|
dump_dir.push(format!(
|
||||||
|
"magic_ptr_{}_{}.bin",
|
||||||
|
state.enqueue_counter, kernel_name
|
||||||
|
));
|
||||||
|
std::fs::write(dump_dir, host).unwrap();
|
||||||
|
}
|
||||||
|
|
|
@ -29,6 +29,7 @@ pub(crate) struct StateTracker {
|
||||||
pub(crate) override_cc: Option<(u32, u32)>,
|
pub(crate) override_cc: Option<(u32, u32)>,
|
||||||
pub(crate) kernel_name_filter: Option<regex::Regex>,
|
pub(crate) kernel_name_filter: Option<regex::Regex>,
|
||||||
pub(crate) kernel_no_output: bool,
|
pub(crate) kernel_no_output: bool,
|
||||||
|
pub(crate) magic_ptr: Option<CUdeviceptr>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) struct ParsedModule {
|
pub(crate) struct ParsedModule {
|
||||||
|
@ -59,6 +60,7 @@ impl StateTracker {
|
||||||
override_cc: settings.override_cc,
|
override_cc: settings.override_cc,
|
||||||
kernel_name_filter: settings.kernel_name_filter.clone(),
|
kernel_name_filter: settings.kernel_name_filter.clone(),
|
||||||
kernel_no_output: settings.kernel_no_output.unwrap_or(false),
|
kernel_no_output: settings.kernel_no_output.unwrap_or(false),
|
||||||
|
magic_ptr: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue