mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-09-27 11:49:04 +00:00
Merge commit '07acc64d33
' into demo_mode2
This commit is contained in:
commit
0c4e103f8f
6 changed files with 123 additions and 47 deletions
|
@ -1656,25 +1656,23 @@ impl<'a> MethodEmitContext<'a> {
|
|||
.ok_or_else(|| error_mismatched_type())?,
|
||||
);
|
||||
let src2 = self.resolver.value(src2)?;
|
||||
self.resolver.with_result(arguments.dst, |dst| {
|
||||
let vec = unsafe {
|
||||
LLVMBuildInsertElement(
|
||||
self.builder,
|
||||
LLVMGetPoison(dst_type),
|
||||
llvm_fn(self.builder, src, packed_type, LLVM_UNNAMED.as_ptr()),
|
||||
LLVMConstInt(LLVMInt32TypeInContext(self.context), 1, false as i32),
|
||||
LLVM_UNNAMED.as_ptr(),
|
||||
)
|
||||
};
|
||||
unsafe {
|
||||
LLVMBuildInsertElement(
|
||||
self.builder,
|
||||
vec,
|
||||
llvm_fn(self.builder, src2, packed_type, LLVM_UNNAMED.as_ptr()),
|
||||
LLVMConstInt(LLVMInt32TypeInContext(self.context), 0, false as i32),
|
||||
dst,
|
||||
)
|
||||
}
|
||||
let vec = unsafe {
|
||||
LLVMBuildInsertElement(
|
||||
self.builder,
|
||||
LLVMGetPoison(dst_type),
|
||||
llvm_fn(self.builder, src, packed_type, LLVM_UNNAMED.as_ptr()),
|
||||
LLVMConstInt(LLVMInt32TypeInContext(self.context), 1, false as i32),
|
||||
LLVM_UNNAMED.as_ptr(),
|
||||
)
|
||||
};
|
||||
self.resolver.with_result(arguments.dst, |dst| unsafe {
|
||||
LLVMBuildInsertElement(
|
||||
self.builder,
|
||||
vec,
|
||||
llvm_fn(self.builder, src2, packed_type, LLVM_UNNAMED.as_ptr()),
|
||||
LLVMConstInt(LLVMInt32TypeInContext(self.context), 0, false as i32),
|
||||
dst,
|
||||
)
|
||||
})
|
||||
} else {
|
||||
self.resolver.with_result(arguments.dst, |dst| unsafe {
|
||||
|
@ -2200,7 +2198,7 @@ impl<'a> MethodEmitContext<'a> {
|
|||
Some(&ast::ScalarType::F32.into()),
|
||||
vec![(
|
||||
self.resolver.value(arguments.src)?,
|
||||
get_scalar_type(self.context, ast::ScalarType::F32.into()),
|
||||
get_scalar_type(self.context, ast::ScalarType::F32),
|
||||
)],
|
||||
)?;
|
||||
Ok(())
|
||||
|
@ -2703,14 +2701,14 @@ impl<'a> MethodEmitContext<'a> {
|
|||
|
||||
let load = unsafe { LLVMBuildLoad2(self.builder, from_type, from, LLVM_UNNAMED.as_ptr()) };
|
||||
unsafe {
|
||||
LLVMSetAlignment(load, (cp_size.as_u64() as u32) * 8);
|
||||
LLVMSetAlignment(load, cp_size.as_u64() as u32);
|
||||
}
|
||||
|
||||
let extended = unsafe { LLVMBuildZExt(self.builder, load, to_type, LLVM_UNNAMED.as_ptr()) };
|
||||
|
||||
unsafe { LLVMBuildStore(self.builder, extended, to) };
|
||||
let store = unsafe { LLVMBuildStore(self.builder, extended, to) };
|
||||
unsafe {
|
||||
LLVMSetAlignment(load, (cp_size.as_u64() as u32) * 8);
|
||||
LLVMSetAlignment(store, cp_size.as_u64() as u32);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
@ -2990,7 +2988,7 @@ fn get_scope_membar(scope: ast::MemScope) -> Result<*const i8, TranslateError> {
|
|||
Ok(match scope {
|
||||
ast::MemScope::Cta => c"workgroup",
|
||||
ast::MemScope::Gpu => c"agent",
|
||||
ast::MemScope::Sys => c"",
|
||||
ast::MemScope::Sys => c"system",
|
||||
ast::MemScope::Cluster => todo!(),
|
||||
}
|
||||
.as_ptr())
|
||||
|
|
|
@ -227,8 +227,9 @@ fn int_immediate<'a, 'input>(input: &mut PtxParser<'a, 'input>) -> PResult<ast::
|
|||
take_error((opt(Token::Minus), num).map(|(neg, x)| {
|
||||
let (num, radix, is_unsigned) = x;
|
||||
if neg.is_some() {
|
||||
match i64::from_str_radix(num, radix) {
|
||||
Ok(x) => Ok(ast::ImmediateValue::S64(-x)),
|
||||
let full_number = format!("-{num}");
|
||||
match i64::from_str_radix(&full_number, radix) {
|
||||
Ok(x) => Ok(ast::ImmediateValue::S64(x)),
|
||||
Err(err) => Err((ast::ImmediateValue::S64(0), PtxError::from(err))),
|
||||
}
|
||||
} else if is_unsigned {
|
||||
|
|
|
@ -1281,6 +1281,7 @@ struct Settings {
|
|||
libcuda_path: String,
|
||||
override_cc: Option<(u32, u32)>,
|
||||
kernel_name_filter: Option<regex::Regex>,
|
||||
kernel_no_output: Option<bool>,
|
||||
}
|
||||
|
||||
impl Settings {
|
||||
|
@ -1343,11 +1344,28 @@ impl Settings {
|
|||
})
|
||||
}),
|
||||
};
|
||||
let kernel_no_output = match env::var("ZLUDA_SAVE_KERNELS_NO_OUTPUT") {
|
||||
Err(env::VarError::NotPresent) => None,
|
||||
Err(e) => {
|
||||
logger.log(log::ErrorEntry::ErrorBox(Box::new(e) as _));
|
||||
None
|
||||
}
|
||||
Ok(env_string) => logger
|
||||
.try_return(|| {
|
||||
str::parse::<u8>(&env_string).map_err(|err| ErrorEntry::InvalidEnvVar {
|
||||
var: "ZLUDA_SAVE_KERNELS_NO_OUTPUT",
|
||||
pattern: "number",
|
||||
value: format!("{} ({})", env_string, err),
|
||||
})
|
||||
})
|
||||
.map(|x| x != 0),
|
||||
};
|
||||
Settings {
|
||||
dump_dir,
|
||||
libcuda_path,
|
||||
override_cc,
|
||||
kernel_name_filter,
|
||||
kernel_no_output,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1513,25 +1531,45 @@ pub(crate) fn cuLibraryLoadData_Post(
|
|||
#[allow(non_snake_case)]
|
||||
pub(crate) fn cuLaunchKernel_Pre(
|
||||
f: cuda_types::cuda::CUfunction,
|
||||
_gridDimX: ::core::ffi::c_uint,
|
||||
_gridDimY: ::core::ffi::c_uint,
|
||||
_gridDimZ: ::core::ffi::c_uint,
|
||||
_blockDimX: ::core::ffi::c_uint,
|
||||
_blockDimY: ::core::ffi::c_uint,
|
||||
_blockDimZ: ::core::ffi::c_uint,
|
||||
_sharedMemBytes: ::core::ffi::c_uint,
|
||||
stream: cuda_types::cuda::CUstream,
|
||||
gridDimX: ::core::ffi::c_uint,
|
||||
gridDimY: ::core::ffi::c_uint,
|
||||
gridDimZ: ::core::ffi::c_uint,
|
||||
blockDimX: ::core::ffi::c_uint,
|
||||
blockDimY: ::core::ffi::c_uint,
|
||||
blockDimZ: ::core::ffi::c_uint,
|
||||
sharedMemBytes: ::core::ffi::c_uint,
|
||||
hStream: cuda_types::cuda::CUstream,
|
||||
kernel_params: *mut *mut ::core::ffi::c_void,
|
||||
_extra: *mut *mut ::core::ffi::c_void,
|
||||
libcuda: &mut CudaDynamicFns,
|
||||
state: &mut trace::StateTracker,
|
||||
fn_logger: &mut FnCallLog,
|
||||
) -> Option<replay::LaunchPreState> {
|
||||
launch_kernel_pre(f, stream, kernel_params, libcuda, state, fn_logger)
|
||||
launch_kernel_pre(
|
||||
f,
|
||||
CUlaunchConfig {
|
||||
gridDimX,
|
||||
gridDimY,
|
||||
gridDimZ,
|
||||
blockDimX,
|
||||
blockDimY,
|
||||
blockDimZ,
|
||||
sharedMemBytes,
|
||||
hStream,
|
||||
attrs: ptr::null_mut(),
|
||||
numAttrs: 0,
|
||||
},
|
||||
hStream,
|
||||
kernel_params,
|
||||
libcuda,
|
||||
state,
|
||||
fn_logger,
|
||||
)
|
||||
}
|
||||
|
||||
fn launch_kernel_pre(
|
||||
f: cuda_types::cuda::CUfunction,
|
||||
config: CUlaunchConfig,
|
||||
stream: cuda_types::cuda::CUstream,
|
||||
kernel_params: *mut *mut ::core::ffi::c_void,
|
||||
libcuda: &mut CudaDynamicFns,
|
||||
|
@ -1546,7 +1584,7 @@ fn launch_kernel_pre(
|
|||
if state.dump_dir().is_none() {
|
||||
return None;
|
||||
}
|
||||
replay::pre_kernel_launch(libcuda, state, fn_logger, f, stream, kernel_params)
|
||||
replay::pre_kernel_launch(libcuda, state, fn_logger, config, f, stream, kernel_params)
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
|
@ -1602,6 +1640,7 @@ pub(crate) fn cuLaunchKernelEx_Pre(
|
|||
) -> Option<replay::LaunchPreState> {
|
||||
launch_kernel_pre(
|
||||
f,
|
||||
unsafe { *config },
|
||||
unsafe { *config }.hStream,
|
||||
kernel_params,
|
||||
libcuda,
|
||||
|
|
|
@ -16,6 +16,7 @@ pub(crate) fn pre_kernel_launch(
|
|||
libcuda: &mut CudaDynamicFns,
|
||||
state: &mut trace::StateTracker,
|
||||
fn_logger: &mut FnCallLog,
|
||||
config: CUlaunchConfig,
|
||||
f: CUfunction,
|
||||
stream: CUstream,
|
||||
args: *mut *mut std::ffi::c_void,
|
||||
|
@ -60,12 +61,15 @@ pub(crate) fn pre_kernel_launch(
|
|||
(&mut start as *mut usize).cast::<std::ffi::c_void>(),
|
||||
(&mut size as *mut usize).cast::<std::ffi::c_void>(),
|
||||
];
|
||||
if let Some(Ok(())) = libcuda.cuPointerGetAttributes(
|
||||
2,
|
||||
attrs.as_mut_ptr(),
|
||||
data.as_mut_ptr(),
|
||||
CUdeviceptr_v2(maybe_ptr as _),
|
||||
) {
|
||||
fn_logger.try_cuda(|| {
|
||||
libcuda.cuPointerGetAttributes(
|
||||
2,
|
||||
attrs.as_mut_ptr(),
|
||||
data.as_mut_ptr(),
|
||||
CUdeviceptr_v2(maybe_ptr as _),
|
||||
)
|
||||
})?;
|
||||
if size != 0 {
|
||||
let mut pre_buffer = vec![0u8; size];
|
||||
let post_buffer = vec![0u8; size];
|
||||
fn_logger.try_cuda(|| {
|
||||
|
@ -86,11 +90,36 @@ pub(crate) fn pre_kernel_launch(
|
|||
device_ptrs: ptr_overrides,
|
||||
});
|
||||
}
|
||||
Some(LaunchPreState {
|
||||
kernel_name: name.to_string(),
|
||||
source: source.to_string(),
|
||||
kernel_params: all_params,
|
||||
})
|
||||
if state.kernel_no_output {
|
||||
let enqueue_counter = state.enqueue_counter;
|
||||
let kernel_name = name;
|
||||
let mut path = state.dump_dir()?.to_path_buf();
|
||||
path.push(format!("kernel_{enqueue_counter}_{kernel_name}.tar.zst"));
|
||||
let file = fn_logger
|
||||
.try_return(|| std::fs::File::create_new(path).map_err(ErrorEntry::IoError))?;
|
||||
fn_logger.try_return(|| {
|
||||
zluda_trace_common::replay::save(
|
||||
file,
|
||||
name.to_string(),
|
||||
false,
|
||||
zluda_trace_common::replay::LaunchConfig {
|
||||
grid_dim: (config.gridDimX, config.gridDimY, config.gridDimZ),
|
||||
block_dim: (config.blockDimX, config.blockDimY, config.blockDimZ),
|
||||
shared_mem_bytes: config.sharedMemBytes,
|
||||
},
|
||||
source.to_string(),
|
||||
all_params,
|
||||
)
|
||||
.map_err(ErrorEntry::IoError)
|
||||
});
|
||||
None
|
||||
} else {
|
||||
Some(LaunchPreState {
|
||||
kernel_name: name.to_string(),
|
||||
source: source.to_string(),
|
||||
kernel_params: all_params,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn post_kernel_launch(
|
||||
|
@ -128,6 +157,7 @@ pub(crate) fn post_kernel_launch(
|
|||
zluda_trace_common::replay::save(
|
||||
file,
|
||||
pre_state.kernel_name,
|
||||
true,
|
||||
zluda_trace_common::replay::LaunchConfig {
|
||||
grid_dim: (config.gridDimX, config.gridDimY, config.gridDimZ),
|
||||
block_dim: (config.blockDimX, config.blockDimY, config.blockDimZ),
|
||||
|
|
|
@ -28,6 +28,7 @@ pub(crate) struct StateTracker {
|
|||
pub(crate) enqueue_counter: usize,
|
||||
pub(crate) override_cc: Option<(u32, u32)>,
|
||||
pub(crate) kernel_name_filter: Option<regex::Regex>,
|
||||
pub(crate) kernel_no_output: bool,
|
||||
}
|
||||
|
||||
pub(crate) struct ParsedModule {
|
||||
|
@ -57,6 +58,7 @@ impl StateTracker {
|
|||
enqueue_counter: 0,
|
||||
override_cc: settings.override_cc,
|
||||
kernel_name_filter: settings.kernel_name_filter.clone(),
|
||||
kernel_no_output: settings.kernel_no_output.unwrap_or(false),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ use tar::Header;
|
|||
#[derive(serde::Serialize, serde::Deserialize)]
|
||||
pub struct Manifest {
|
||||
pub kernel_name: String,
|
||||
pub outputs: bool,
|
||||
pub config: LaunchConfig,
|
||||
pub parameters: Vec<Parameter>,
|
||||
}
|
||||
|
@ -46,6 +47,7 @@ pub struct KernelParameter {
|
|||
pub fn save(
|
||||
writer: impl Write,
|
||||
kernel_name: String,
|
||||
has_outputs: bool,
|
||||
config: LaunchConfig,
|
||||
source: String,
|
||||
kernel_params: Vec<KernelParameter>,
|
||||
|
@ -54,6 +56,7 @@ pub fn save(
|
|||
let mut builder = tar::Builder::new(archive);
|
||||
let (mut header, manifest) = Manifest {
|
||||
kernel_name,
|
||||
outputs: has_outputs,
|
||||
config,
|
||||
parameters: kernel_params
|
||||
.iter()
|
||||
|
@ -86,6 +89,9 @@ pub fn save(
|
|||
let mut header = Header::new_gnu();
|
||||
header.set_size(data_before.len() as u64);
|
||||
builder.append_data(&mut header, &*path, &*data_before)?;
|
||||
if !has_outputs {
|
||||
continue;
|
||||
}
|
||||
let path = format!("param_{i}_ptr_{offset_in_param}_post.bin");
|
||||
let mut header = Header::new_gnu();
|
||||
header.set_size(data_after.len() as u64);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue