mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-09-27 03:39:10 +00:00
Save source ptx and save to the right path
This commit is contained in:
parent
d880ee78b5
commit
f3e143d8dd
4 changed files with 91 additions and 48 deletions
|
@ -1504,19 +1504,33 @@ pub(crate) fn cuLaunchKernel_Pre(
|
||||||
_blockDimY: ::core::ffi::c_uint,
|
_blockDimY: ::core::ffi::c_uint,
|
||||||
_blockDimZ: ::core::ffi::c_uint,
|
_blockDimZ: ::core::ffi::c_uint,
|
||||||
_sharedMemBytes: ::core::ffi::c_uint,
|
_sharedMemBytes: ::core::ffi::c_uint,
|
||||||
_hStream: cuda_types::cuda::CUstream,
|
stream: cuda_types::cuda::CUstream,
|
||||||
kernel_params: *mut *mut ::core::ffi::c_void,
|
kernel_params: *mut *mut ::core::ffi::c_void,
|
||||||
_extra: *mut *mut ::core::ffi::c_void,
|
_extra: *mut *mut ::core::ffi::c_void,
|
||||||
libcuda: &mut CudaDynamicFns,
|
libcuda: &mut CudaDynamicFns,
|
||||||
state: &mut trace::StateTracker,
|
state: &mut trace::StateTracker,
|
||||||
fn_logger: &mut FnCallLog,
|
fn_logger: &mut FnCallLog,
|
||||||
) -> Option<(String, Vec<zluda_trace_common::replay::KernelParameter>)> {
|
) -> Option<replay::LaunchPreState> {
|
||||||
|
launch_kernel_pre(f, stream, kernel_params, libcuda, state, fn_logger)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn launch_kernel_pre(
|
||||||
|
f: cuda_types::cuda::CUfunction,
|
||||||
|
stream: cuda_types::cuda::CUstream,
|
||||||
|
kernel_params: *mut *mut ::core::ffi::c_void,
|
||||||
|
libcuda: &mut CudaDynamicFns,
|
||||||
|
state: &mut trace::StateTracker,
|
||||||
|
fn_logger: &mut FnCallLog,
|
||||||
|
) -> Option<replay::LaunchPreState> {
|
||||||
state.enqueue_counter += 1;
|
state.enqueue_counter += 1;
|
||||||
if kernel_params.is_null() {
|
if kernel_params.is_null() {
|
||||||
fn_logger.log(ErrorEntry::NullPointer("kernel_params"));
|
fn_logger.log(ErrorEntry::NullPointer("kernel_params"));
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
replay::pre_kernel_launch(libcuda, state, fn_logger, f, kernel_params)
|
if state.dump_dir().is_none() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
replay::pre_kernel_launch(libcuda, state, fn_logger, f, stream, kernel_params)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(non_snake_case)]
|
#[allow(non_snake_case)]
|
||||||
|
@ -1529,63 +1543,58 @@ pub(crate) fn cuLaunchKernel_Post(
|
||||||
_blockDimY: ::core::ffi::c_uint,
|
_blockDimY: ::core::ffi::c_uint,
|
||||||
_blockDimZ: ::core::ffi::c_uint,
|
_blockDimZ: ::core::ffi::c_uint,
|
||||||
_sharedMemBytes: ::core::ffi::c_uint,
|
_sharedMemBytes: ::core::ffi::c_uint,
|
||||||
_hStream: cuda_types::cuda::CUstream,
|
stream: cuda_types::cuda::CUstream,
|
||||||
kernel_params: *mut *mut ::core::ffi::c_void,
|
kernel_params: *mut *mut ::core::ffi::c_void,
|
||||||
_extra: *mut *mut ::core::ffi::c_void,
|
_extra: *mut *mut ::core::ffi::c_void,
|
||||||
pre_state: Option<(String, Vec<zluda_trace_common::replay::KernelParameter>)>,
|
pre_state: Option<replay::LaunchPreState>,
|
||||||
libcuda: &mut CudaDynamicFns,
|
libcuda: &mut CudaDynamicFns,
|
||||||
state: &mut trace::StateTracker,
|
state: &mut trace::StateTracker,
|
||||||
fn_logger: &mut FnCallLog,
|
fn_logger: &mut FnCallLog,
|
||||||
_result: CUresult,
|
_result: CUresult,
|
||||||
) {
|
) {
|
||||||
let (kernel_name, pre_state) = unwrap_some_or!(pre_state, return);
|
let pre_state = unwrap_some_or!(pre_state, return);
|
||||||
replay::post_kernel_launch(
|
replay::post_kernel_launch(libcuda, state, fn_logger, stream, kernel_params, pre_state);
|
||||||
libcuda,
|
|
||||||
fn_logger,
|
|
||||||
kernel_params,
|
|
||||||
pre_state,
|
|
||||||
state.enqueue_counter,
|
|
||||||
kernel_name,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(non_snake_case)]
|
#[allow(non_snake_case)]
|
||||||
pub(crate) fn cuLaunchKernelEx_Pre(
|
pub(crate) fn cuLaunchKernelEx_Pre(
|
||||||
_config: *const cuda_types::cuda::CUlaunchConfig,
|
config: *const cuda_types::cuda::CUlaunchConfig,
|
||||||
f: cuda_types::cuda::CUfunction,
|
f: cuda_types::cuda::CUfunction,
|
||||||
kernel_params: *mut *mut ::core::ffi::c_void,
|
kernel_params: *mut *mut ::core::ffi::c_void,
|
||||||
_extra: *mut *mut ::core::ffi::c_void,
|
_extra: *mut *mut ::core::ffi::c_void,
|
||||||
libcuda: &mut CudaDynamicFns,
|
libcuda: &mut CudaDynamicFns,
|
||||||
state: &mut trace::StateTracker,
|
state: &mut trace::StateTracker,
|
||||||
fn_logger: &mut FnCallLog,
|
fn_logger: &mut FnCallLog,
|
||||||
) -> Option<(String, Vec<zluda_trace_common::replay::KernelParameter>)> {
|
) -> Option<replay::LaunchPreState> {
|
||||||
state.enqueue_counter += 1;
|
launch_kernel_pre(
|
||||||
if kernel_params.is_null() {
|
f,
|
||||||
fn_logger.log(ErrorEntry::NullPointer("kernel_params"));
|
unsafe { *config }.hStream,
|
||||||
return None;
|
kernel_params,
|
||||||
}
|
libcuda,
|
||||||
replay::pre_kernel_launch(libcuda, state, fn_logger, f, kernel_params)
|
state,
|
||||||
|
fn_logger,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(non_snake_case)]
|
#[allow(non_snake_case)]
|
||||||
pub(crate) fn cuLaunchKernelEx_Post(
|
pub(crate) fn cuLaunchKernelEx_Post(
|
||||||
_config: *const cuda_types::cuda::CUlaunchConfig,
|
config: *const cuda_types::cuda::CUlaunchConfig,
|
||||||
_f: cuda_types::cuda::CUfunction,
|
_f: cuda_types::cuda::CUfunction,
|
||||||
kernel_params: *mut *mut ::core::ffi::c_void,
|
kernel_params: *mut *mut ::core::ffi::c_void,
|
||||||
_extra: *mut *mut ::core::ffi::c_void,
|
_extra: *mut *mut ::core::ffi::c_void,
|
||||||
pre_state: Option<(String, Vec<zluda_trace_common::replay::KernelParameter>)>,
|
pre_state: Option<replay::LaunchPreState>,
|
||||||
libcuda: &mut CudaDynamicFns,
|
libcuda: &mut CudaDynamicFns,
|
||||||
state: &mut trace::StateTracker,
|
state: &mut trace::StateTracker,
|
||||||
fn_logger: &mut FnCallLog,
|
fn_logger: &mut FnCallLog,
|
||||||
_result: CUresult,
|
_result: CUresult,
|
||||||
) {
|
) {
|
||||||
let (kernel_name, pre_state) = unwrap_some_or!(pre_state, return);
|
let pre_state = unwrap_some_or!(pre_state, return);
|
||||||
replay::post_kernel_launch(
|
replay::post_kernel_launch(
|
||||||
libcuda,
|
libcuda,
|
||||||
|
state,
|
||||||
fn_logger,
|
fn_logger,
|
||||||
|
unsafe { *config }.hStream,
|
||||||
kernel_params,
|
kernel_params,
|
||||||
pre_state,
|
pre_state,
|
||||||
state.enqueue_counter,
|
|
||||||
kernel_name,
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,20 +6,28 @@ use crate::{
|
||||||
use cuda_types::cuda::*;
|
use cuda_types::cuda::*;
|
||||||
use zluda_trace_common::replay::KernelParameter;
|
use zluda_trace_common::replay::KernelParameter;
|
||||||
|
|
||||||
|
pub struct LaunchPreState {
|
||||||
|
kernel_name: String,
|
||||||
|
source: String,
|
||||||
|
kernel_params: Vec<KernelParameter>,
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn pre_kernel_launch(
|
pub(crate) fn pre_kernel_launch(
|
||||||
libcuda: &mut CudaDynamicFns,
|
libcuda: &mut CudaDynamicFns,
|
||||||
state: &mut trace::StateTracker,
|
state: &mut trace::StateTracker,
|
||||||
fn_logger: &mut FnCallLog,
|
fn_logger: &mut FnCallLog,
|
||||||
f: CUfunction,
|
f: CUfunction,
|
||||||
|
stream: CUstream,
|
||||||
args: *mut *mut std::ffi::c_void,
|
args: *mut *mut std::ffi::c_void,
|
||||||
) -> Option<(String, Vec<KernelParameter>)> {
|
) -> Option<LaunchPreState> {
|
||||||
|
fn_logger.try_cuda(|| libcuda.cuStreamSynchronize(stream))?;
|
||||||
let SavedKernel { name, owner } = fn_logger.try_return(|| {
|
let SavedKernel { name, owner } = fn_logger.try_return(|| {
|
||||||
state
|
state
|
||||||
.kernels
|
.kernels
|
||||||
.get(&f)
|
.get(&f)
|
||||||
.ok_or(ErrorEntry::UnknownFunctionHandle(f))
|
.ok_or(ErrorEntry::UnknownFunctionHandle(f))
|
||||||
})?;
|
})?;
|
||||||
let ParsedModule { kernels } = fn_logger.try_return(|| {
|
let ParsedModule { source, kernels } = fn_logger.try_return(|| {
|
||||||
state
|
state
|
||||||
.parsed_libraries
|
.parsed_libraries
|
||||||
.get(owner)
|
.get(owner)
|
||||||
|
@ -74,19 +82,25 @@ pub(crate) fn pre_kernel_launch(
|
||||||
device_ptrs: ptr_overrides,
|
device_ptrs: ptr_overrides,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
Some((name.to_string(), all_params))
|
Some(LaunchPreState {
|
||||||
|
kernel_name: name.to_string(),
|
||||||
|
source: source.to_string(),
|
||||||
|
kernel_params: all_params,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn post_kernel_launch(
|
pub(crate) fn post_kernel_launch(
|
||||||
libcuda: &mut CudaDynamicFns,
|
libcuda: &mut CudaDynamicFns,
|
||||||
|
state: &trace::StateTracker,
|
||||||
fn_logger: &mut FnCallLog,
|
fn_logger: &mut FnCallLog,
|
||||||
args: *mut *mut std::ffi::c_void,
|
stream: CUstream,
|
||||||
mut kernel_params: Vec<KernelParameter>,
|
kernel_params: *mut *mut std::ffi::c_void,
|
||||||
enqueue_counter: usize,
|
mut pre_state: LaunchPreState,
|
||||||
kernel_name: String,
|
|
||||||
) -> Option<()> {
|
) -> Option<()> {
|
||||||
let raw_args = unsafe { std::slice::from_raw_parts(args, kernel_params.len()) };
|
fn_logger.try_cuda(|| libcuda.cuStreamSynchronize(stream))?;
|
||||||
for (raw_arg, param) in raw_args.iter().zip(kernel_params.iter_mut()) {
|
let raw_args =
|
||||||
|
unsafe { std::slice::from_raw_parts(kernel_params, pre_state.kernel_params.len()) };
|
||||||
|
for (raw_arg, param) in raw_args.iter().zip(pre_state.kernel_params.iter_mut()) {
|
||||||
for (offset_in_param, offset_in_buffer, _, data_after) in param.device_ptrs.iter_mut() {
|
for (offset_in_param, offset_in_buffer, _, data_after) in param.device_ptrs.iter_mut() {
|
||||||
let dev_ptr_param = unsafe { raw_arg.cast::<u8>().add(*offset_in_param) };
|
let dev_ptr_param = unsafe { raw_arg.cast::<u8>().add(*offset_in_param) };
|
||||||
let mut dev_ptr = unsafe { dev_ptr_param.cast::<usize>().read_unaligned() };
|
let mut dev_ptr = unsafe { dev_ptr_param.cast::<usize>().read_unaligned() };
|
||||||
|
@ -100,11 +114,19 @@ pub(crate) fn post_kernel_launch(
|
||||||
})?;
|
})?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let path = format!("kernel_{enqueue_counter}_{kernel_name}.tar.zst");
|
let enqueue_counter = state.enqueue_counter;
|
||||||
|
let kernel_name = &pre_state.kernel_name;
|
||||||
|
let mut path = state.dump_dir()?.to_path_buf();
|
||||||
|
path.push(format!("kernel_{enqueue_counter}_{kernel_name}.tar.zst"));
|
||||||
let file =
|
let file =
|
||||||
fn_logger.try_return(|| std::fs::File::create_new(path).map_err(ErrorEntry::IoError))?;
|
fn_logger.try_return(|| std::fs::File::create_new(path).map_err(ErrorEntry::IoError))?;
|
||||||
fn_logger.try_return(|| {
|
fn_logger.try_return(|| {
|
||||||
zluda_trace_common::replay::save(file, kernel_name, kernel_params)
|
zluda_trace_common::replay::save(
|
||||||
.map_err(ErrorEntry::IoError)
|
file,
|
||||||
|
pre_state.kernel_name,
|
||||||
|
pre_state.source,
|
||||||
|
pre_state.kernel_params,
|
||||||
|
)
|
||||||
|
.map_err(ErrorEntry::IoError)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,6 +30,7 @@ pub(crate) struct StateTracker {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) struct ParsedModule {
|
pub(crate) struct ParsedModule {
|
||||||
|
pub source: String,
|
||||||
pub kernels: FxHashMap<String, Vec<Layout>>,
|
pub kernels: FxHashMap<String, Vec<Layout>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -57,6 +58,10 @@ impl StateTracker {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn dump_dir(&self) -> Option<&PathBuf> {
|
||||||
|
self.writer.dump_dir.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn record_new_module_file(
|
pub(crate) fn record_new_module_file(
|
||||||
&mut self,
|
&mut self,
|
||||||
module: CUmodule,
|
module: CUmodule,
|
||||||
|
@ -147,12 +152,15 @@ impl StateTracker {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
self.parsed_libraries.insert(
|
if let Some((source, kernel_arguments)) = kernel_arguments {
|
||||||
SendablePtr(handle),
|
self.parsed_libraries.insert(
|
||||||
ParsedModule {
|
SendablePtr(handle),
|
||||||
kernels: kernel_arguments.unwrap_or_default(),
|
ParsedModule {
|
||||||
},
|
source,
|
||||||
);
|
kernels: kernel_arguments,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[must_use]
|
#[must_use]
|
||||||
|
@ -162,7 +170,7 @@ impl StateTracker {
|
||||||
submodule: &[u8],
|
submodule: &[u8],
|
||||||
fn_logger: &mut FnCallLog,
|
fn_logger: &mut FnCallLog,
|
||||||
type_: &'static str,
|
type_: &'static str,
|
||||||
) -> Option<FxHashMap<String, Vec<Layout>>> {
|
) -> Option<(String, FxHashMap<String, Vec<Layout>>)> {
|
||||||
fn_logger.try_(|fn_logger| {
|
fn_logger.try_(|fn_logger| {
|
||||||
self.writer
|
self.writer
|
||||||
.save_module(fn_logger, self.library_counter, index, submodule, type_)
|
.save_module(fn_logger, self.library_counter, index, submodule, type_)
|
||||||
|
@ -198,7 +206,7 @@ impl StateTracker {
|
||||||
module_index: usize,
|
module_index: usize,
|
||||||
submodule_index: Option<(usize, Option<usize>)>,
|
submodule_index: Option<(usize, Option<usize>)>,
|
||||||
module_text: &'input str,
|
module_text: &'input str,
|
||||||
) -> FxHashMap<String, Vec<Layout>> {
|
) -> (String, FxHashMap<String, Vec<Layout>>) {
|
||||||
let (errors, params) = ptx_parser::parse_for_errors_and_params(module_text);
|
let (errors, params) = ptx_parser::parse_for_errors_and_params(module_text);
|
||||||
if !errors.is_empty() {
|
if !errors.is_empty() {
|
||||||
fn_logger.log(log::ErrorEntry::ModuleParsingError(
|
fn_logger.log(log::ErrorEntry::ModuleParsingError(
|
||||||
|
@ -210,7 +218,7 @@ impl StateTracker {
|
||||||
&*errors,
|
&*errors,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
params
|
(module_text.to_string(), params)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn record_module_in_library(&mut self, module: CUmodule, library: CUlibrary) {
|
pub(crate) fn record_module_in_library(&mut self, module: CUmodule, library: CUlibrary) {
|
||||||
|
|
|
@ -37,6 +37,7 @@ pub struct KernelParameter {
|
||||||
pub fn save(
|
pub fn save(
|
||||||
writer: impl Write,
|
writer: impl Write,
|
||||||
kernel_name: String,
|
kernel_name: String,
|
||||||
|
source: String,
|
||||||
kernel_params: Vec<KernelParameter>,
|
kernel_params: Vec<KernelParameter>,
|
||||||
) -> std::io::Result<()> {
|
) -> std::io::Result<()> {
|
||||||
let archive = zstd::Encoder::new(writer, 0)?;
|
let archive = zstd::Encoder::new(writer, 0)?;
|
||||||
|
@ -61,6 +62,9 @@ pub fn save(
|
||||||
}
|
}
|
||||||
.serialize()?;
|
.serialize()?;
|
||||||
builder.append_data(&mut header, Manifest::PATH, &*manifest)?;
|
builder.append_data(&mut header, Manifest::PATH, &*manifest)?;
|
||||||
|
let mut header = Header::new_gnu();
|
||||||
|
header.set_size(source.len() as u64);
|
||||||
|
builder.append_data(&mut header, "source.ptx", source.as_bytes())?;
|
||||||
for (i, param) in kernel_params.into_iter().enumerate() {
|
for (i, param) in kernel_params.into_iter().enumerate() {
|
||||||
let path = format!("param_{i}.bin");
|
let path = format!("param_{i}.bin");
|
||||||
let mut header = Header::new_gnu();
|
let mut header = Header::new_gnu();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue