Merge commit '07acc64d33' into demo_mode2

This commit is contained in:
Andrzej Janik 2025-09-22 21:18:18 +00:00
commit 0c4e103f8f
6 changed files with 123 additions and 47 deletions

View file

@ -1656,7 +1656,6 @@ impl<'a> MethodEmitContext<'a> {
.ok_or_else(|| error_mismatched_type())?,
);
let src2 = self.resolver.value(src2)?;
self.resolver.with_result(arguments.dst, |dst| {
let vec = unsafe {
LLVMBuildInsertElement(
self.builder,
@ -1666,7 +1665,7 @@ impl<'a> MethodEmitContext<'a> {
LLVM_UNNAMED.as_ptr(),
)
};
unsafe {
self.resolver.with_result(arguments.dst, |dst| unsafe {
LLVMBuildInsertElement(
self.builder,
vec,
@ -1674,7 +1673,6 @@ impl<'a> MethodEmitContext<'a> {
LLVMConstInt(LLVMInt32TypeInContext(self.context), 0, false as i32),
dst,
)
}
})
} else {
self.resolver.with_result(arguments.dst, |dst| unsafe {
@ -2200,7 +2198,7 @@ impl<'a> MethodEmitContext<'a> {
Some(&ast::ScalarType::F32.into()),
vec![(
self.resolver.value(arguments.src)?,
get_scalar_type(self.context, ast::ScalarType::F32.into()),
get_scalar_type(self.context, ast::ScalarType::F32),
)],
)?;
Ok(())
@ -2703,14 +2701,14 @@ impl<'a> MethodEmitContext<'a> {
let load = unsafe { LLVMBuildLoad2(self.builder, from_type, from, LLVM_UNNAMED.as_ptr()) };
unsafe {
LLVMSetAlignment(load, (cp_size.as_u64() as u32) * 8);
LLVMSetAlignment(load, cp_size.as_u64() as u32);
}
let extended = unsafe { LLVMBuildZExt(self.builder, load, to_type, LLVM_UNNAMED.as_ptr()) };
unsafe { LLVMBuildStore(self.builder, extended, to) };
let store = unsafe { LLVMBuildStore(self.builder, extended, to) };
unsafe {
LLVMSetAlignment(load, (cp_size.as_u64() as u32) * 8);
LLVMSetAlignment(store, cp_size.as_u64() as u32);
}
Ok(())
}
@ -2990,7 +2988,7 @@ fn get_scope_membar(scope: ast::MemScope) -> Result<*const i8, TranslateError> {
Ok(match scope {
ast::MemScope::Cta => c"workgroup",
ast::MemScope::Gpu => c"agent",
ast::MemScope::Sys => c"",
ast::MemScope::Sys => c"system",
ast::MemScope::Cluster => todo!(),
}
.as_ptr())

View file

@ -227,8 +227,9 @@ fn int_immediate<'a, 'input>(input: &mut PtxParser<'a, 'input>) -> PResult<ast::
take_error((opt(Token::Minus), num).map(|(neg, x)| {
let (num, radix, is_unsigned) = x;
if neg.is_some() {
match i64::from_str_radix(num, radix) {
Ok(x) => Ok(ast::ImmediateValue::S64(-x)),
let full_number = format!("-{num}");
match i64::from_str_radix(&full_number, radix) {
Ok(x) => Ok(ast::ImmediateValue::S64(x)),
Err(err) => Err((ast::ImmediateValue::S64(0), PtxError::from(err))),
}
} else if is_unsigned {

View file

@ -1281,6 +1281,7 @@ struct Settings {
libcuda_path: String,
override_cc: Option<(u32, u32)>,
kernel_name_filter: Option<regex::Regex>,
kernel_no_output: Option<bool>,
}
impl Settings {
@ -1343,11 +1344,28 @@ impl Settings {
})
}),
};
let kernel_no_output = match env::var("ZLUDA_SAVE_KERNELS_NO_OUTPUT") {
Err(env::VarError::NotPresent) => None,
Err(e) => {
logger.log(log::ErrorEntry::ErrorBox(Box::new(e) as _));
None
}
Ok(env_string) => logger
.try_return(|| {
str::parse::<u8>(&env_string).map_err(|err| ErrorEntry::InvalidEnvVar {
var: "ZLUDA_SAVE_KERNELS_NO_OUTPUT",
pattern: "number",
value: format!("{} ({})", env_string, err),
})
})
.map(|x| x != 0),
};
Settings {
dump_dir,
libcuda_path,
override_cc,
kernel_name_filter,
kernel_no_output,
}
}
@ -1513,25 +1531,45 @@ pub(crate) fn cuLibraryLoadData_Post(
#[allow(non_snake_case)]
pub(crate) fn cuLaunchKernel_Pre(
f: cuda_types::cuda::CUfunction,
_gridDimX: ::core::ffi::c_uint,
_gridDimY: ::core::ffi::c_uint,
_gridDimZ: ::core::ffi::c_uint,
_blockDimX: ::core::ffi::c_uint,
_blockDimY: ::core::ffi::c_uint,
_blockDimZ: ::core::ffi::c_uint,
_sharedMemBytes: ::core::ffi::c_uint,
stream: cuda_types::cuda::CUstream,
gridDimX: ::core::ffi::c_uint,
gridDimY: ::core::ffi::c_uint,
gridDimZ: ::core::ffi::c_uint,
blockDimX: ::core::ffi::c_uint,
blockDimY: ::core::ffi::c_uint,
blockDimZ: ::core::ffi::c_uint,
sharedMemBytes: ::core::ffi::c_uint,
hStream: cuda_types::cuda::CUstream,
kernel_params: *mut *mut ::core::ffi::c_void,
_extra: *mut *mut ::core::ffi::c_void,
libcuda: &mut CudaDynamicFns,
state: &mut trace::StateTracker,
fn_logger: &mut FnCallLog,
) -> Option<replay::LaunchPreState> {
launch_kernel_pre(f, stream, kernel_params, libcuda, state, fn_logger)
launch_kernel_pre(
f,
CUlaunchConfig {
gridDimX,
gridDimY,
gridDimZ,
blockDimX,
blockDimY,
blockDimZ,
sharedMemBytes,
hStream,
attrs: ptr::null_mut(),
numAttrs: 0,
},
hStream,
kernel_params,
libcuda,
state,
fn_logger,
)
}
fn launch_kernel_pre(
f: cuda_types::cuda::CUfunction,
config: CUlaunchConfig,
stream: cuda_types::cuda::CUstream,
kernel_params: *mut *mut ::core::ffi::c_void,
libcuda: &mut CudaDynamicFns,
@ -1546,7 +1584,7 @@ fn launch_kernel_pre(
if state.dump_dir().is_none() {
return None;
}
replay::pre_kernel_launch(libcuda, state, fn_logger, f, stream, kernel_params)
replay::pre_kernel_launch(libcuda, state, fn_logger, config, f, stream, kernel_params)
}
#[allow(non_snake_case)]
@ -1602,6 +1640,7 @@ pub(crate) fn cuLaunchKernelEx_Pre(
) -> Option<replay::LaunchPreState> {
launch_kernel_pre(
f,
unsafe { *config },
unsafe { *config }.hStream,
kernel_params,
libcuda,

View file

@ -16,6 +16,7 @@ pub(crate) fn pre_kernel_launch(
libcuda: &mut CudaDynamicFns,
state: &mut trace::StateTracker,
fn_logger: &mut FnCallLog,
config: CUlaunchConfig,
f: CUfunction,
stream: CUstream,
args: *mut *mut std::ffi::c_void,
@ -60,12 +61,15 @@ pub(crate) fn pre_kernel_launch(
(&mut start as *mut usize).cast::<std::ffi::c_void>(),
(&mut size as *mut usize).cast::<std::ffi::c_void>(),
];
if let Some(Ok(())) = libcuda.cuPointerGetAttributes(
fn_logger.try_cuda(|| {
libcuda.cuPointerGetAttributes(
2,
attrs.as_mut_ptr(),
data.as_mut_ptr(),
CUdeviceptr_v2(maybe_ptr as _),
) {
)
})?;
if size != 0 {
let mut pre_buffer = vec![0u8; size];
let post_buffer = vec![0u8; size];
fn_logger.try_cuda(|| {
@ -86,11 +90,36 @@ pub(crate) fn pre_kernel_launch(
device_ptrs: ptr_overrides,
});
}
if state.kernel_no_output {
let enqueue_counter = state.enqueue_counter;
let kernel_name = name;
let mut path = state.dump_dir()?.to_path_buf();
path.push(format!("kernel_{enqueue_counter}_{kernel_name}.tar.zst"));
let file = fn_logger
.try_return(|| std::fs::File::create_new(path).map_err(ErrorEntry::IoError))?;
fn_logger.try_return(|| {
zluda_trace_common::replay::save(
file,
name.to_string(),
false,
zluda_trace_common::replay::LaunchConfig {
grid_dim: (config.gridDimX, config.gridDimY, config.gridDimZ),
block_dim: (config.blockDimX, config.blockDimY, config.blockDimZ),
shared_mem_bytes: config.sharedMemBytes,
},
source.to_string(),
all_params,
)
.map_err(ErrorEntry::IoError)
});
None
} else {
Some(LaunchPreState {
kernel_name: name.to_string(),
source: source.to_string(),
kernel_params: all_params,
})
}
}
pub(crate) fn post_kernel_launch(
@ -128,6 +157,7 @@ pub(crate) fn post_kernel_launch(
zluda_trace_common::replay::save(
file,
pre_state.kernel_name,
true,
zluda_trace_common::replay::LaunchConfig {
grid_dim: (config.gridDimX, config.gridDimY, config.gridDimZ),
block_dim: (config.blockDimX, config.blockDimY, config.blockDimZ),

View file

@ -28,6 +28,7 @@ pub(crate) struct StateTracker {
pub(crate) enqueue_counter: usize,
pub(crate) override_cc: Option<(u32, u32)>,
pub(crate) kernel_name_filter: Option<regex::Regex>,
pub(crate) kernel_no_output: bool,
}
pub(crate) struct ParsedModule {
@ -57,6 +58,7 @@ impl StateTracker {
enqueue_counter: 0,
override_cc: settings.override_cc,
kernel_name_filter: settings.kernel_name_filter.clone(),
kernel_no_output: settings.kernel_no_output.unwrap_or(false),
}
}

View file

@ -5,6 +5,7 @@ use tar::Header;
#[derive(serde::Serialize, serde::Deserialize)]
pub struct Manifest {
pub kernel_name: String,
pub outputs: bool,
pub config: LaunchConfig,
pub parameters: Vec<Parameter>,
}
@ -46,6 +47,7 @@ pub struct KernelParameter {
pub fn save(
writer: impl Write,
kernel_name: String,
has_outputs: bool,
config: LaunchConfig,
source: String,
kernel_params: Vec<KernelParameter>,
@ -54,6 +56,7 @@ pub fn save(
let mut builder = tar::Builder::new(archive);
let (mut header, manifest) = Manifest {
kernel_name,
outputs: has_outputs,
config,
parameters: kernel_params
.iter()
@ -86,6 +89,9 @@ pub fn save(
let mut header = Header::new_gnu();
header.set_size(data_before.len() as u64);
builder.append_data(&mut header, &*path, &*data_before)?;
if !has_outputs {
continue;
}
let path = format!("param_{i}_ptr_{offset_in_param}_post.bin");
let mut header = Header::new_gnu();
header.set_size(data_after.len() as u64);