From bdcef897cca85c2213f1c8689f733d4d755cddbb Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Sun, 19 Dec 2021 01:18:03 +0100 Subject: [PATCH] Start converting zluda_dump logging to provide more detailed --- zluda_dump/README.md | 2 +- zluda_dump/src/cuda.rs | 3 +- zluda_dump/src/format.rs | 361 +++++++++++++++++++++++++++++++++++++++ zluda_dump/src/lib.rs | 45 +++-- zluda_dump/src/log.rs | 65 +++++-- 5 files changed, 450 insertions(+), 26 deletions(-) create mode 100644 zluda_dump/src/format.rs diff --git a/zluda_dump/README.md b/zluda_dump/README.md index 1e7c03b..ba82a1c 100644 --- a/zluda_dump/README.md +++ b/zluda_dump/README.md @@ -1,3 +1,3 @@ grep -E '^cu.*' log.txt | sed 's/(.*//g' | sort | uniq > uniq_host.txt -cat *.log | grep "^Unrecognized s" | grep -Eo '`([^`]*)`' | sed -E 's/^`([^[:space:]]*).*`/\1/' | sort | uniq > uniq_statements.txt +cat *.log | grep "^Unrecognized s" | grep -Eo '`([^`]*)`' | sed -E 's/^`((@\w+ )?[^[:space:]]*).*`/\1/' | sort | uniq > uniq_statements.txt cat *.log | grep "^Unrecognized d" | grep -Eo '`([^`]*)`' | sed -E 's/^`([^`]*)`/\1/' | sort | uniq > uniq_directives.txt \ No newline at end of file diff --git a/zluda_dump/src/cuda.rs b/zluda_dump/src/cuda.rs index d9d57c4..3836137 100644 --- a/zluda_dump/src/cuda.rs +++ b/zluda_dump/src/cuda.rs @@ -2513,11 +2513,12 @@ extern_redirect_with_post! { ) -> CUresult; super::cuModuleLoadDataEx_Post; } -extern_redirect! { +extern_redirect_with_post! { pub fn cuModuleLoadFatBinary( module: *mut CUmodule, fatCubin: *const ::std::os::raw::c_void, ) -> CUresult; + super::cuModuleLoadFatBinary_Post; } extern_redirect! { pub fn cuModuleUnload(hmod: CUmodule) -> CUresult; diff --git a/zluda_dump/src/format.rs b/zluda_dump/src/format.rs new file mode 100644 index 0000000..df6d4f6 --- /dev/null +++ b/zluda_dump/src/format.rs @@ -0,0 +1,361 @@ +use std::{ + ffi::{c_void, CStr}, + fmt::Formatter, + io::Write, + ptr, +}; + +use crate::cuda::*; + +pub(crate) trait FormatCudaObject { + fn write_post_execution(self, result: CUresult, f: &mut impl Write); +} + +fn write_post_execution_ptr( + t: *const T, + result: CUresult, + f: &mut impl Write, +) { + if t == ptr::null() { + write!(f, "NULL").ok(); + } else if result != CUresult::CUDA_SUCCESS { + write!(f, "NONE").ok(); + } else { + unsafe { *t }.write_post_execution(result, f) + } +} + +impl FormatCudaObject for *mut T { + fn write_post_execution(self, result: CUresult, f: &mut impl Write) { + write_post_execution_ptr(self, result, f) + } +} + +impl FormatCudaObject for *const T { + fn write_post_execution(self, result: CUresult, f: &mut impl Write) { + write_post_execution_ptr(self, result, f) + } +} + +impl FormatCudaObject for CUmodule { + fn write_post_execution(self, result: CUresult, f: &mut impl Write) { + write!(f, "{:p}", self).ok(); + } +} + +impl FormatCudaObject for CUfunction { + fn write_post_execution(self, result: CUresult, f: &mut impl Write) { + write!(f, "{:p}", self).ok(); + } +} + +impl FormatCudaObject for *mut c_void { + fn write_post_execution(self, result: CUresult, f: &mut impl Write) { + write!(f, "{:p}", self).ok(); + } +} + +impl FormatCudaObject for *const c_void { + fn write_post_execution(self, result: CUresult, f: &mut impl Write) { + write!(f, "{:p}", self).ok(); + } +} + +impl FormatCudaObject for *const i8 { + fn write_post_execution(self, result: CUresult, f: &mut impl Write) { + write!(f, "\"{}\"", unsafe { CStr::from_ptr(self) }.to_str().unwrap()).ok(); + } +} + +impl FormatCudaObject for u32 { + fn write_post_execution(self, result: CUresult, f: &mut impl Write) { + write!(f, "{}", self).ok(); + } +} + +impl FormatCudaObject for i32 { + fn write_post_execution(self, result: CUresult, f: &mut impl Write) { + write!(f, "{}", self).ok(); + } +} + +impl FormatCudaObject for CUdevice { + fn write_post_execution(self, result: CUresult, f: &mut impl Write) { + write!(f, "{}", self.0).ok(); + } +} + +impl FormatCudaObject for CUjit_option { + fn write_post_execution(self, result: CUresult, f: &mut impl Write) { + match stringify_cujit_option(self) { + Some(text) => write!(f, "{}", text), + None => write!(f, "{}", self.0), + }; + } +} + +impl FormatCudaObject for CUuuid { + fn write_post_execution(self, result: CUresult, f: &mut impl Write) { + let guid = self.bytes; + write!(f, "{{{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}}}", guid[0], guid[1], guid[2], guid[3], guid[4], guid[5], guid[6], guid[7], guid[8], guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15]).ok(); + } +} + +impl FormatCudaObject for CUdevice_attribute { + fn write_post_execution(self, result: CUresult, f: &mut impl Write) { + match stringify_cudevice_attribute(self) { + Some(text) => write!(f, "{}", text), + None => write!(f, "{}", self.0), + } + .ok(); + } +} + +macro_rules! stringify_enum { + ($fn_name:ident, $type_:ident, [ $($variant:ident),+ ]) => { + pub(crate) fn $fn_name(x: $type_) -> Option<&'static str> { + match x { + $( + $type_::$variant => Some(stringify!($variant)), + )+ + _ => None + } + } + } +} + +stringify_enum! { + stringify_cudevice_attribute, + CUdevice_attribute_enum, + [ + CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, + CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, + CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, + CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, + CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, + CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK, + CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, + CU_DEVICE_ATTRIBUTE_WARP_SIZE, + CU_DEVICE_ATTRIBUTE_MAX_PITCH, + CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, + CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK, + CU_DEVICE_ATTRIBUTE_CLOCK_RATE, + CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, + CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, + CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, + CU_DEVICE_ATTRIBUTE_INTEGRATED, + CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, + CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES, + CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT, + CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, + CU_DEVICE_ATTRIBUTE_ECC_ENABLED, + CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, + CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, + CU_DEVICE_ATTRIBUTE_TCC_DRIVER, + CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, + CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, + CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, + CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, + CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT, + CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS, + CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE, + CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, + CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT, + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH, + CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED, + CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED, + CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED, + CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR, + CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR, + CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, + CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD, + CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID, + CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED, + CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO, + CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS, + CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, + CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, + CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM, + CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS, + CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS, + CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR, + CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH, + CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH, + CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, + CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES, + CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED, + CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, + CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST, + CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED, + CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED, + CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED, + CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED, + CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR, + CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED, + CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE, + CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE, + CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED, + CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK, + CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED, + CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED + ] +} + +stringify_enum! { + stringify_cujit_option, + CUjit_option, + [ + CU_JIT_MAX_REGISTERS, + CU_JIT_THREADS_PER_BLOCK, + CU_JIT_WALL_TIME, + CU_JIT_INFO_LOG_BUFFER, + CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, + CU_JIT_ERROR_LOG_BUFFER, + CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, + CU_JIT_OPTIMIZATION_LEVEL, + CU_JIT_TARGET_FROM_CUCONTEXT, + CU_JIT_TARGET, + CU_JIT_FALLBACK_STRATEGY, + CU_JIT_GENERATE_DEBUG_INFO, + CU_JIT_LOG_VERBOSE, + CU_JIT_GENERATE_LINE_INFO, + CU_JIT_CACHE_MODE, + CU_JIT_NEW_SM3X_OPT, + CU_JIT_FAST_COMPILE, + CU_JIT_GLOBAL_SYMBOL_NAMES, + CU_JIT_GLOBAL_SYMBOL_ADDRESSES, + CU_JIT_GLOBAL_SYMBOL_COUNT, + CU_JIT_NUM_OPTIONS + ] +} + +stringify_enum! { + stringify_curesult, + CUresult, + [ + CUDA_SUCCESS, + CUDA_ERROR_INVALID_VALUE, + CUDA_ERROR_OUT_OF_MEMORY, + CUDA_ERROR_NOT_INITIALIZED, + CUDA_ERROR_DEINITIALIZED, + CUDA_ERROR_PROFILER_DISABLED, + CUDA_ERROR_PROFILER_NOT_INITIALIZED, + CUDA_ERROR_PROFILER_ALREADY_STARTED, + CUDA_ERROR_PROFILER_ALREADY_STOPPED, + CUDA_ERROR_NO_DEVICE, + CUDA_ERROR_INVALID_DEVICE, + CUDA_ERROR_INVALID_IMAGE, + CUDA_ERROR_INVALID_CONTEXT, + CUDA_ERROR_CONTEXT_ALREADY_CURRENT, + CUDA_ERROR_MAP_FAILED, + CUDA_ERROR_UNMAP_FAILED, + CUDA_ERROR_ARRAY_IS_MAPPED, + CUDA_ERROR_ALREADY_MAPPED, + CUDA_ERROR_NO_BINARY_FOR_GPU, + CUDA_ERROR_ALREADY_ACQUIRED, + CUDA_ERROR_NOT_MAPPED, + CUDA_ERROR_NOT_MAPPED_AS_ARRAY, + CUDA_ERROR_NOT_MAPPED_AS_POINTER, + CUDA_ERROR_ECC_UNCORRECTABLE, + CUDA_ERROR_UNSUPPORTED_LIMIT, + CUDA_ERROR_CONTEXT_ALREADY_IN_USE, + CUDA_ERROR_PEER_ACCESS_UNSUPPORTED, + CUDA_ERROR_INVALID_PTX, + CUDA_ERROR_INVALID_GRAPHICS_CONTEXT, + CUDA_ERROR_NVLINK_UNCORRECTABLE, + CUDA_ERROR_JIT_COMPILER_NOT_FOUND, + CUDA_ERROR_INVALID_SOURCE, + CUDA_ERROR_FILE_NOT_FOUND, + CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, + CUDA_ERROR_SHARED_OBJECT_INIT_FAILED, + CUDA_ERROR_OPERATING_SYSTEM, + CUDA_ERROR_INVALID_HANDLE, + CUDA_ERROR_ILLEGAL_STATE, + CUDA_ERROR_NOT_FOUND, + CUDA_ERROR_NOT_READY, + CUDA_ERROR_ILLEGAL_ADDRESS, + CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, + CUDA_ERROR_LAUNCH_TIMEOUT, + CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, + CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED, + CUDA_ERROR_PEER_ACCESS_NOT_ENABLED, + CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE, + CUDA_ERROR_CONTEXT_IS_DESTROYED, + CUDA_ERROR_ASSERT, + CUDA_ERROR_TOO_MANY_PEERS, + CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED, + CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED, + CUDA_ERROR_HARDWARE_STACK_ERROR, + CUDA_ERROR_ILLEGAL_INSTRUCTION, + CUDA_ERROR_MISALIGNED_ADDRESS, + CUDA_ERROR_INVALID_ADDRESS_SPACE, + CUDA_ERROR_INVALID_PC, + CUDA_ERROR_LAUNCH_FAILED, + CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE, + CUDA_ERROR_NOT_PERMITTED, + CUDA_ERROR_NOT_SUPPORTED, + CUDA_ERROR_SYSTEM_NOT_READY, + CUDA_ERROR_SYSTEM_DRIVER_MISMATCH, + CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE, + CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED, + CUDA_ERROR_STREAM_CAPTURE_INVALIDATED, + CUDA_ERROR_STREAM_CAPTURE_MERGE, + CUDA_ERROR_STREAM_CAPTURE_UNMATCHED, + CUDA_ERROR_STREAM_CAPTURE_UNJOINED, + CUDA_ERROR_STREAM_CAPTURE_ISOLATION, + CUDA_ERROR_STREAM_CAPTURE_IMPLICIT, + CUDA_ERROR_CAPTURED_EVENT, + CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD, + CUDA_ERROR_TIMEOUT, + CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE, + CUDA_ERROR_UNKNOWN + ] +} diff --git a/zluda_dump/src/lib.rs b/zluda_dump/src/lib.rs index 780b9e8..1eb70e2 100644 --- a/zluda_dump/src/lib.rs +++ b/zluda_dump/src/lib.rs @@ -42,6 +42,11 @@ macro_rules! extern_redirect { }; } +macro_rules! count_tts { + () => {0usize}; + ($_head:tt $($tail:tt)*) => {1usize + count_tts!($($tail)*)}; +} + macro_rules! extern_redirect_with_post { ( pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ; @@ -53,9 +58,18 @@ macro_rules! extern_redirect_with_post { let typed_fn = unsafe { std::mem::transmute::<_, extern "system" fn( $( $arg_id : $arg_type),* ) -> $ret_type>(fn_ptr) }; typed_fn($( $arg_id ),*) }; + let get_formatted_args = |fn_logger: &mut crate::log::FunctionLogger, result: CUresult| { + let arg_count = (count_tts!($($arg_id),*) + 1) / 2; + fn_logger.begin_writing_arguments(arg_count); + $( + fn_logger.write_single_argument(result, $arg_id); + )* + fn_logger.end_writing_arguments(); + }; crate::handle_cuda_function_call_with_probes( stringify!($fn_name), || (), original_fn, + get_formatted_args, move |logger, state, _, cuda_result| $post_fn ( $( $arg_id ),* , logger, state, cuda_result ) ) } @@ -81,6 +95,7 @@ macro_rules! extern_redirect_with { #[allow(warnings)] mod cuda; mod dark_api; +mod format; mod log; #[cfg_attr(windows, path = "os_win.rs")] #[cfg_attr(not(windows), path = "os_unix.rs")] @@ -294,6 +309,7 @@ fn handle_cuda_function_call_with_probes( func: &'static str, pre_probe: impl FnOnce() -> T, original_cuda_fn: impl FnOnce(NonNull) -> CUresult, + print_arguments_fn: impl FnOnce(&mut crate::log::FunctionLogger, CUresult), post_probe: PostFn, ) -> CUresult where @@ -325,6 +341,7 @@ where let pre_result = pre_probe(); let cu_result = original_cuda_fn(fn_ptr); logger.result = Some(cu_result); + print_arguments_fn(&mut logger, cu_result); post_probe( &mut logger, &mut delayed_state.cuda_state, @@ -1220,6 +1237,7 @@ struct FatbincWrapper { } const FATBIN_MAGIC: c_uint = 0xBA55ED50; +const LEGACY_FATBIN_MAGIC: c_uint = 0x1EE55A01; const FATBIN_VERSION: c_ushort = 0x01; #[repr(C, align(8))] @@ -1484,16 +1502,6 @@ pub(crate) fn cuModuleGetFunction_Post( state: &mut trace::StateTracker, result: CUresult, ) { - if !state.module_exists(hmod) { - fn_logger.log(log::LogEntry::UnknownModule(hmod)) - } - match unsafe { CStr::from_ptr(name) }.to_str() { - Ok(str) => fn_logger.log(log::LogEntry::FunctionParameter { - name: "name", - value: str.to_string(), - }), - Err(e) => fn_logger.log(log::LogEntry::MalformedFunctionName(e)), - } } #[allow(non_snake_case)] @@ -1505,10 +1513,6 @@ pub(crate) fn cuDeviceGetAttribute_Post( state: &mut trace::StateTracker, result: CUresult, ) { - fn_logger.log(log::LogEntry::FunctionParameter { - name: "attrib", - value: attrib.0.to_string(), - }); } #[allow(non_snake_case)] @@ -1524,3 +1528,16 @@ pub(crate) fn cuDeviceComputeCapability_Post( unsafe { *major = major_ver_override as i32 }; } } + +#[allow(non_snake_case)] +pub(crate) fn cuModuleLoadFatBinary_Post( + module: *mut CUmodule, + fatCubin: *const ::std::os::raw::c_void, + fn_logger: &mut log::FunctionLogger, + state: &mut trace::StateTracker, + result: CUresult, +) { + if result == CUresult::CUDA_SUCCESS { + panic!() + } +} diff --git a/zluda_dump/src/log.rs b/zluda_dump/src/log.rs index 57c804c..ef36acd 100644 --- a/zluda_dump/src/log.rs +++ b/zluda_dump/src/log.rs @@ -1,5 +1,7 @@ use crate::cuda::CUmodule; use crate::cuda::CUuuid; +use crate::format; +use crate::format::FormatCudaObject; use super::CUresult; use super::Settings; @@ -202,28 +204,35 @@ impl Factory { pub(crate) fn get_logger(&mut self, func: &'static str) -> FunctionLogger { FunctionLogger { result: None, - name: Cow::Borrowed(func), + name: CudaFunctionName::Normal(func), fallible_emitter: &mut self.fallible_emitter, infallible_emitter: &mut self.infallible_emitter, write_buffer: &mut self.write_buffer, log_queue: &mut self.log_queue, + finished_writing_args: false, + args_to_write: 0, } } - pub(crate) fn get_logger_dark_api(&mut self, guid: CUuuid, idx: usize) -> FunctionLogger { - let guid = guid.bytes; - let fn_name = format!("{{{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}}}::{}", guid[0], guid[1], guid[2], guid[3], guid[4], guid[5], guid[6], guid[7], guid[8], guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15], idx); + pub(crate) fn get_logger_dark_api(&mut self, guid: CUuuid, index: usize) -> FunctionLogger { FunctionLogger { result: None, - name: Cow::Owned(fn_name), + name: CudaFunctionName::Dark { guid, index }, fallible_emitter: &mut self.fallible_emitter, infallible_emitter: &mut self.infallible_emitter, write_buffer: &mut self.write_buffer, log_queue: &mut self.log_queue, + finished_writing_args: false, + args_to_write: 0, } } } +enum CudaFunctionName { + Normal(&'static str), + Dark { guid: CUuuid, index: usize }, +} + // This encapsulates log output for a single function call. // It's a separate struct and not just a plain function for two reasons: // * While we want to always display return code before logging errors, @@ -231,11 +240,13 @@ impl Factory { // * We want to handle panics gracefully with Drop pub(crate) struct FunctionLogger<'a> { pub(crate) result: Option, - name: Cow<'static, str>, + name: CudaFunctionName, infallible_emitter: &'a mut Box, fallible_emitter: &'a mut Option>, write_buffer: &'a mut WriteBuffer, log_queue: &'a mut Vec, + args_to_write: usize, + finished_writing_args: bool, } impl<'a> FunctionLogger<'a> { @@ -250,11 +261,16 @@ impl<'a> FunctionLogger<'a> { } fn flush_log_queue_to_write_buffer(&mut self) { - self.write_buffer.start_line(); - self.write_buffer.write(&self.name); - self.write_buffer.write("(...) -> "); + // TODO: remove this once everything has been converted to dtailed logging + if !self.finished_writing_args { + self.begin_writing_arguments(0); + self.write_buffer.write("...) -> "); + } if let Some(result) = self.result { - write!(self.write_buffer, "{:#X}", result.0).unwrap_or_else(|_| unreachable!()); + match format::stringify_curesult(result) { + Some(text) => self.write_buffer.write(text), + None => write!(self.write_buffer, "{}", result.0).unwrap(), + } } else { self.write_buffer.write("(UNKNOWN)"); }; @@ -274,6 +290,35 @@ impl<'a> FunctionLogger<'a> { self.write_buffer.end_line(); self.write_buffer.finish(); } + + pub(crate) fn begin_writing_arguments(&mut self, len: usize) { + self.args_to_write = len; + match self.name { + CudaFunctionName::Normal(fn_name) => self.write_buffer.write(fn_name), + CudaFunctionName::Dark { guid, index } => { + guid.write_post_execution(CUresult::CUDA_SUCCESS, &mut self.write_buffer); + write!(&mut self.write_buffer, "::{}", index).ok(); + } + } + self.write_buffer.write("(") + } + + pub(crate) fn write_single_argument<'x>( + &mut self, + result: CUresult, + arg: impl FormatCudaObject, + ) { + self.args_to_write -= 1; + arg.write_post_execution(result, self.write_buffer); + if self.args_to_write != 0 { + self.write_buffer.write(", ") + } + } + + pub(crate) fn end_writing_arguments(&mut self) { + self.write_buffer.write(") -> "); + self.finished_writing_args = true; + } } impl<'a> Drop for FunctionLogger<'a> {