mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-09-27 19:59:08 +00:00
Merge commit '07acc64d33
' into demo_mode2
This commit is contained in:
commit
0c4e103f8f
6 changed files with 123 additions and 47 deletions
|
@ -1656,7 +1656,6 @@ impl<'a> MethodEmitContext<'a> {
|
||||||
.ok_or_else(|| error_mismatched_type())?,
|
.ok_or_else(|| error_mismatched_type())?,
|
||||||
);
|
);
|
||||||
let src2 = self.resolver.value(src2)?;
|
let src2 = self.resolver.value(src2)?;
|
||||||
self.resolver.with_result(arguments.dst, |dst| {
|
|
||||||
let vec = unsafe {
|
let vec = unsafe {
|
||||||
LLVMBuildInsertElement(
|
LLVMBuildInsertElement(
|
||||||
self.builder,
|
self.builder,
|
||||||
|
@ -1666,7 +1665,7 @@ impl<'a> MethodEmitContext<'a> {
|
||||||
LLVM_UNNAMED.as_ptr(),
|
LLVM_UNNAMED.as_ptr(),
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
unsafe {
|
self.resolver.with_result(arguments.dst, |dst| unsafe {
|
||||||
LLVMBuildInsertElement(
|
LLVMBuildInsertElement(
|
||||||
self.builder,
|
self.builder,
|
||||||
vec,
|
vec,
|
||||||
|
@ -1674,7 +1673,6 @@ impl<'a> MethodEmitContext<'a> {
|
||||||
LLVMConstInt(LLVMInt32TypeInContext(self.context), 0, false as i32),
|
LLVMConstInt(LLVMInt32TypeInContext(self.context), 0, false as i32),
|
||||||
dst,
|
dst,
|
||||||
)
|
)
|
||||||
}
|
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
self.resolver.with_result(arguments.dst, |dst| unsafe {
|
self.resolver.with_result(arguments.dst, |dst| unsafe {
|
||||||
|
@ -2200,7 +2198,7 @@ impl<'a> MethodEmitContext<'a> {
|
||||||
Some(&ast::ScalarType::F32.into()),
|
Some(&ast::ScalarType::F32.into()),
|
||||||
vec![(
|
vec![(
|
||||||
self.resolver.value(arguments.src)?,
|
self.resolver.value(arguments.src)?,
|
||||||
get_scalar_type(self.context, ast::ScalarType::F32.into()),
|
get_scalar_type(self.context, ast::ScalarType::F32),
|
||||||
)],
|
)],
|
||||||
)?;
|
)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -2703,14 +2701,14 @@ impl<'a> MethodEmitContext<'a> {
|
||||||
|
|
||||||
let load = unsafe { LLVMBuildLoad2(self.builder, from_type, from, LLVM_UNNAMED.as_ptr()) };
|
let load = unsafe { LLVMBuildLoad2(self.builder, from_type, from, LLVM_UNNAMED.as_ptr()) };
|
||||||
unsafe {
|
unsafe {
|
||||||
LLVMSetAlignment(load, (cp_size.as_u64() as u32) * 8);
|
LLVMSetAlignment(load, cp_size.as_u64() as u32);
|
||||||
}
|
}
|
||||||
|
|
||||||
let extended = unsafe { LLVMBuildZExt(self.builder, load, to_type, LLVM_UNNAMED.as_ptr()) };
|
let extended = unsafe { LLVMBuildZExt(self.builder, load, to_type, LLVM_UNNAMED.as_ptr()) };
|
||||||
|
|
||||||
unsafe { LLVMBuildStore(self.builder, extended, to) };
|
let store = unsafe { LLVMBuildStore(self.builder, extended, to) };
|
||||||
unsafe {
|
unsafe {
|
||||||
LLVMSetAlignment(load, (cp_size.as_u64() as u32) * 8);
|
LLVMSetAlignment(store, cp_size.as_u64() as u32);
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -2990,7 +2988,7 @@ fn get_scope_membar(scope: ast::MemScope) -> Result<*const i8, TranslateError> {
|
||||||
Ok(match scope {
|
Ok(match scope {
|
||||||
ast::MemScope::Cta => c"workgroup",
|
ast::MemScope::Cta => c"workgroup",
|
||||||
ast::MemScope::Gpu => c"agent",
|
ast::MemScope::Gpu => c"agent",
|
||||||
ast::MemScope::Sys => c"",
|
ast::MemScope::Sys => c"system",
|
||||||
ast::MemScope::Cluster => todo!(),
|
ast::MemScope::Cluster => todo!(),
|
||||||
}
|
}
|
||||||
.as_ptr())
|
.as_ptr())
|
||||||
|
|
|
@ -227,8 +227,9 @@ fn int_immediate<'a, 'input>(input: &mut PtxParser<'a, 'input>) -> PResult<ast::
|
||||||
take_error((opt(Token::Minus), num).map(|(neg, x)| {
|
take_error((opt(Token::Minus), num).map(|(neg, x)| {
|
||||||
let (num, radix, is_unsigned) = x;
|
let (num, radix, is_unsigned) = x;
|
||||||
if neg.is_some() {
|
if neg.is_some() {
|
||||||
match i64::from_str_radix(num, radix) {
|
let full_number = format!("-{num}");
|
||||||
Ok(x) => Ok(ast::ImmediateValue::S64(-x)),
|
match i64::from_str_radix(&full_number, radix) {
|
||||||
|
Ok(x) => Ok(ast::ImmediateValue::S64(x)),
|
||||||
Err(err) => Err((ast::ImmediateValue::S64(0), PtxError::from(err))),
|
Err(err) => Err((ast::ImmediateValue::S64(0), PtxError::from(err))),
|
||||||
}
|
}
|
||||||
} else if is_unsigned {
|
} else if is_unsigned {
|
||||||
|
|
|
@ -1281,6 +1281,7 @@ struct Settings {
|
||||||
libcuda_path: String,
|
libcuda_path: String,
|
||||||
override_cc: Option<(u32, u32)>,
|
override_cc: Option<(u32, u32)>,
|
||||||
kernel_name_filter: Option<regex::Regex>,
|
kernel_name_filter: Option<regex::Regex>,
|
||||||
|
kernel_no_output: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Settings {
|
impl Settings {
|
||||||
|
@ -1343,11 +1344,28 @@ impl Settings {
|
||||||
})
|
})
|
||||||
}),
|
}),
|
||||||
};
|
};
|
||||||
|
let kernel_no_output = match env::var("ZLUDA_SAVE_KERNELS_NO_OUTPUT") {
|
||||||
|
Err(env::VarError::NotPresent) => None,
|
||||||
|
Err(e) => {
|
||||||
|
logger.log(log::ErrorEntry::ErrorBox(Box::new(e) as _));
|
||||||
|
None
|
||||||
|
}
|
||||||
|
Ok(env_string) => logger
|
||||||
|
.try_return(|| {
|
||||||
|
str::parse::<u8>(&env_string).map_err(|err| ErrorEntry::InvalidEnvVar {
|
||||||
|
var: "ZLUDA_SAVE_KERNELS_NO_OUTPUT",
|
||||||
|
pattern: "number",
|
||||||
|
value: format!("{} ({})", env_string, err),
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.map(|x| x != 0),
|
||||||
|
};
|
||||||
Settings {
|
Settings {
|
||||||
dump_dir,
|
dump_dir,
|
||||||
libcuda_path,
|
libcuda_path,
|
||||||
override_cc,
|
override_cc,
|
||||||
kernel_name_filter,
|
kernel_name_filter,
|
||||||
|
kernel_no_output,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1513,25 +1531,45 @@ pub(crate) fn cuLibraryLoadData_Post(
|
||||||
#[allow(non_snake_case)]
|
#[allow(non_snake_case)]
|
||||||
pub(crate) fn cuLaunchKernel_Pre(
|
pub(crate) fn cuLaunchKernel_Pre(
|
||||||
f: cuda_types::cuda::CUfunction,
|
f: cuda_types::cuda::CUfunction,
|
||||||
_gridDimX: ::core::ffi::c_uint,
|
gridDimX: ::core::ffi::c_uint,
|
||||||
_gridDimY: ::core::ffi::c_uint,
|
gridDimY: ::core::ffi::c_uint,
|
||||||
_gridDimZ: ::core::ffi::c_uint,
|
gridDimZ: ::core::ffi::c_uint,
|
||||||
_blockDimX: ::core::ffi::c_uint,
|
blockDimX: ::core::ffi::c_uint,
|
||||||
_blockDimY: ::core::ffi::c_uint,
|
blockDimY: ::core::ffi::c_uint,
|
||||||
_blockDimZ: ::core::ffi::c_uint,
|
blockDimZ: ::core::ffi::c_uint,
|
||||||
_sharedMemBytes: ::core::ffi::c_uint,
|
sharedMemBytes: ::core::ffi::c_uint,
|
||||||
stream: cuda_types::cuda::CUstream,
|
hStream: cuda_types::cuda::CUstream,
|
||||||
kernel_params: *mut *mut ::core::ffi::c_void,
|
kernel_params: *mut *mut ::core::ffi::c_void,
|
||||||
_extra: *mut *mut ::core::ffi::c_void,
|
_extra: *mut *mut ::core::ffi::c_void,
|
||||||
libcuda: &mut CudaDynamicFns,
|
libcuda: &mut CudaDynamicFns,
|
||||||
state: &mut trace::StateTracker,
|
state: &mut trace::StateTracker,
|
||||||
fn_logger: &mut FnCallLog,
|
fn_logger: &mut FnCallLog,
|
||||||
) -> Option<replay::LaunchPreState> {
|
) -> Option<replay::LaunchPreState> {
|
||||||
launch_kernel_pre(f, stream, kernel_params, libcuda, state, fn_logger)
|
launch_kernel_pre(
|
||||||
|
f,
|
||||||
|
CUlaunchConfig {
|
||||||
|
gridDimX,
|
||||||
|
gridDimY,
|
||||||
|
gridDimZ,
|
||||||
|
blockDimX,
|
||||||
|
blockDimY,
|
||||||
|
blockDimZ,
|
||||||
|
sharedMemBytes,
|
||||||
|
hStream,
|
||||||
|
attrs: ptr::null_mut(),
|
||||||
|
numAttrs: 0,
|
||||||
|
},
|
||||||
|
hStream,
|
||||||
|
kernel_params,
|
||||||
|
libcuda,
|
||||||
|
state,
|
||||||
|
fn_logger,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn launch_kernel_pre(
|
fn launch_kernel_pre(
|
||||||
f: cuda_types::cuda::CUfunction,
|
f: cuda_types::cuda::CUfunction,
|
||||||
|
config: CUlaunchConfig,
|
||||||
stream: cuda_types::cuda::CUstream,
|
stream: cuda_types::cuda::CUstream,
|
||||||
kernel_params: *mut *mut ::core::ffi::c_void,
|
kernel_params: *mut *mut ::core::ffi::c_void,
|
||||||
libcuda: &mut CudaDynamicFns,
|
libcuda: &mut CudaDynamicFns,
|
||||||
|
@ -1546,7 +1584,7 @@ fn launch_kernel_pre(
|
||||||
if state.dump_dir().is_none() {
|
if state.dump_dir().is_none() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
replay::pre_kernel_launch(libcuda, state, fn_logger, f, stream, kernel_params)
|
replay::pre_kernel_launch(libcuda, state, fn_logger, config, f, stream, kernel_params)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(non_snake_case)]
|
#[allow(non_snake_case)]
|
||||||
|
@ -1602,6 +1640,7 @@ pub(crate) fn cuLaunchKernelEx_Pre(
|
||||||
) -> Option<replay::LaunchPreState> {
|
) -> Option<replay::LaunchPreState> {
|
||||||
launch_kernel_pre(
|
launch_kernel_pre(
|
||||||
f,
|
f,
|
||||||
|
unsafe { *config },
|
||||||
unsafe { *config }.hStream,
|
unsafe { *config }.hStream,
|
||||||
kernel_params,
|
kernel_params,
|
||||||
libcuda,
|
libcuda,
|
||||||
|
|
|
@ -16,6 +16,7 @@ pub(crate) fn pre_kernel_launch(
|
||||||
libcuda: &mut CudaDynamicFns,
|
libcuda: &mut CudaDynamicFns,
|
||||||
state: &mut trace::StateTracker,
|
state: &mut trace::StateTracker,
|
||||||
fn_logger: &mut FnCallLog,
|
fn_logger: &mut FnCallLog,
|
||||||
|
config: CUlaunchConfig,
|
||||||
f: CUfunction,
|
f: CUfunction,
|
||||||
stream: CUstream,
|
stream: CUstream,
|
||||||
args: *mut *mut std::ffi::c_void,
|
args: *mut *mut std::ffi::c_void,
|
||||||
|
@ -60,12 +61,15 @@ pub(crate) fn pre_kernel_launch(
|
||||||
(&mut start as *mut usize).cast::<std::ffi::c_void>(),
|
(&mut start as *mut usize).cast::<std::ffi::c_void>(),
|
||||||
(&mut size as *mut usize).cast::<std::ffi::c_void>(),
|
(&mut size as *mut usize).cast::<std::ffi::c_void>(),
|
||||||
];
|
];
|
||||||
if let Some(Ok(())) = libcuda.cuPointerGetAttributes(
|
fn_logger.try_cuda(|| {
|
||||||
|
libcuda.cuPointerGetAttributes(
|
||||||
2,
|
2,
|
||||||
attrs.as_mut_ptr(),
|
attrs.as_mut_ptr(),
|
||||||
data.as_mut_ptr(),
|
data.as_mut_ptr(),
|
||||||
CUdeviceptr_v2(maybe_ptr as _),
|
CUdeviceptr_v2(maybe_ptr as _),
|
||||||
) {
|
)
|
||||||
|
})?;
|
||||||
|
if size != 0 {
|
||||||
let mut pre_buffer = vec![0u8; size];
|
let mut pre_buffer = vec![0u8; size];
|
||||||
let post_buffer = vec![0u8; size];
|
let post_buffer = vec![0u8; size];
|
||||||
fn_logger.try_cuda(|| {
|
fn_logger.try_cuda(|| {
|
||||||
|
@ -86,12 +90,37 @@ pub(crate) fn pre_kernel_launch(
|
||||||
device_ptrs: ptr_overrides,
|
device_ptrs: ptr_overrides,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
if state.kernel_no_output {
|
||||||
|
let enqueue_counter = state.enqueue_counter;
|
||||||
|
let kernel_name = name;
|
||||||
|
let mut path = state.dump_dir()?.to_path_buf();
|
||||||
|
path.push(format!("kernel_{enqueue_counter}_{kernel_name}.tar.zst"));
|
||||||
|
let file = fn_logger
|
||||||
|
.try_return(|| std::fs::File::create_new(path).map_err(ErrorEntry::IoError))?;
|
||||||
|
fn_logger.try_return(|| {
|
||||||
|
zluda_trace_common::replay::save(
|
||||||
|
file,
|
||||||
|
name.to_string(),
|
||||||
|
false,
|
||||||
|
zluda_trace_common::replay::LaunchConfig {
|
||||||
|
grid_dim: (config.gridDimX, config.gridDimY, config.gridDimZ),
|
||||||
|
block_dim: (config.blockDimX, config.blockDimY, config.blockDimZ),
|
||||||
|
shared_mem_bytes: config.sharedMemBytes,
|
||||||
|
},
|
||||||
|
source.to_string(),
|
||||||
|
all_params,
|
||||||
|
)
|
||||||
|
.map_err(ErrorEntry::IoError)
|
||||||
|
});
|
||||||
|
None
|
||||||
|
} else {
|
||||||
Some(LaunchPreState {
|
Some(LaunchPreState {
|
||||||
kernel_name: name.to_string(),
|
kernel_name: name.to_string(),
|
||||||
source: source.to_string(),
|
source: source.to_string(),
|
||||||
kernel_params: all_params,
|
kernel_params: all_params,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn post_kernel_launch(
|
pub(crate) fn post_kernel_launch(
|
||||||
libcuda: &mut CudaDynamicFns,
|
libcuda: &mut CudaDynamicFns,
|
||||||
|
@ -128,6 +157,7 @@ pub(crate) fn post_kernel_launch(
|
||||||
zluda_trace_common::replay::save(
|
zluda_trace_common::replay::save(
|
||||||
file,
|
file,
|
||||||
pre_state.kernel_name,
|
pre_state.kernel_name,
|
||||||
|
true,
|
||||||
zluda_trace_common::replay::LaunchConfig {
|
zluda_trace_common::replay::LaunchConfig {
|
||||||
grid_dim: (config.gridDimX, config.gridDimY, config.gridDimZ),
|
grid_dim: (config.gridDimX, config.gridDimY, config.gridDimZ),
|
||||||
block_dim: (config.blockDimX, config.blockDimY, config.blockDimZ),
|
block_dim: (config.blockDimX, config.blockDimY, config.blockDimZ),
|
||||||
|
|
|
@ -28,6 +28,7 @@ pub(crate) struct StateTracker {
|
||||||
pub(crate) enqueue_counter: usize,
|
pub(crate) enqueue_counter: usize,
|
||||||
pub(crate) override_cc: Option<(u32, u32)>,
|
pub(crate) override_cc: Option<(u32, u32)>,
|
||||||
pub(crate) kernel_name_filter: Option<regex::Regex>,
|
pub(crate) kernel_name_filter: Option<regex::Regex>,
|
||||||
|
pub(crate) kernel_no_output: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) struct ParsedModule {
|
pub(crate) struct ParsedModule {
|
||||||
|
@ -57,6 +58,7 @@ impl StateTracker {
|
||||||
enqueue_counter: 0,
|
enqueue_counter: 0,
|
||||||
override_cc: settings.override_cc,
|
override_cc: settings.override_cc,
|
||||||
kernel_name_filter: settings.kernel_name_filter.clone(),
|
kernel_name_filter: settings.kernel_name_filter.clone(),
|
||||||
|
kernel_no_output: settings.kernel_no_output.unwrap_or(false),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@ use tar::Header;
|
||||||
#[derive(serde::Serialize, serde::Deserialize)]
|
#[derive(serde::Serialize, serde::Deserialize)]
|
||||||
pub struct Manifest {
|
pub struct Manifest {
|
||||||
pub kernel_name: String,
|
pub kernel_name: String,
|
||||||
|
pub outputs: bool,
|
||||||
pub config: LaunchConfig,
|
pub config: LaunchConfig,
|
||||||
pub parameters: Vec<Parameter>,
|
pub parameters: Vec<Parameter>,
|
||||||
}
|
}
|
||||||
|
@ -46,6 +47,7 @@ pub struct KernelParameter {
|
||||||
pub fn save(
|
pub fn save(
|
||||||
writer: impl Write,
|
writer: impl Write,
|
||||||
kernel_name: String,
|
kernel_name: String,
|
||||||
|
has_outputs: bool,
|
||||||
config: LaunchConfig,
|
config: LaunchConfig,
|
||||||
source: String,
|
source: String,
|
||||||
kernel_params: Vec<KernelParameter>,
|
kernel_params: Vec<KernelParameter>,
|
||||||
|
@ -54,6 +56,7 @@ pub fn save(
|
||||||
let mut builder = tar::Builder::new(archive);
|
let mut builder = tar::Builder::new(archive);
|
||||||
let (mut header, manifest) = Manifest {
|
let (mut header, manifest) = Manifest {
|
||||||
kernel_name,
|
kernel_name,
|
||||||
|
outputs: has_outputs,
|
||||||
config,
|
config,
|
||||||
parameters: kernel_params
|
parameters: kernel_params
|
||||||
.iter()
|
.iter()
|
||||||
|
@ -86,6 +89,9 @@ pub fn save(
|
||||||
let mut header = Header::new_gnu();
|
let mut header = Header::new_gnu();
|
||||||
header.set_size(data_before.len() as u64);
|
header.set_size(data_before.len() as u64);
|
||||||
builder.append_data(&mut header, &*path, &*data_before)?;
|
builder.append_data(&mut header, &*path, &*data_before)?;
|
||||||
|
if !has_outputs {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
let path = format!("param_{i}_ptr_{offset_in_param}_post.bin");
|
let path = format!("param_{i}_ptr_{offset_in_param}_post.bin");
|
||||||
let mut header = Header::new_gnu();
|
let mut header = Header::new_gnu();
|
||||||
header.set_size(data_after.len() as u64);
|
header.set_size(data_after.len() as u64);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue