Start converting zluda_dump logging to provide more detailed

This commit is contained in:
Andrzej Janik 2021-12-19 01:18:03 +01:00
parent 971951bc9e
commit bdcef897cc
5 changed files with 450 additions and 26 deletions

View file

@ -1,3 +1,3 @@
grep -E '^cu.*' log.txt | sed 's/(.*//g' | sort | uniq > uniq_host.txt
cat *.log | grep "^Unrecognized s" | grep -Eo '`([^`]*)`' | sed -E 's/^`([^[:space:]]*).*`/\1/' | sort | uniq > uniq_statements.txt
cat *.log | grep "^Unrecognized s" | grep -Eo '`([^`]*)`' | sed -E 's/^`((@\w+ )?[^[:space:]]*).*`/\1/' | sort | uniq > uniq_statements.txt
cat *.log | grep "^Unrecognized d" | grep -Eo '`([^`]*)`' | sed -E 's/^`([^`]*)`/\1/' | sort | uniq > uniq_directives.txt

View file

@ -2513,11 +2513,12 @@ extern_redirect_with_post! {
) -> CUresult;
super::cuModuleLoadDataEx_Post;
}
extern_redirect! {
extern_redirect_with_post! {
pub fn cuModuleLoadFatBinary(
module: *mut CUmodule,
fatCubin: *const ::std::os::raw::c_void,
) -> CUresult;
super::cuModuleLoadFatBinary_Post;
}
extern_redirect! {
pub fn cuModuleUnload(hmod: CUmodule) -> CUresult;

361
zluda_dump/src/format.rs Normal file
View file

@ -0,0 +1,361 @@
use std::{
ffi::{c_void, CStr},
fmt::Formatter,
io::Write,
ptr,
};
use crate::cuda::*;
pub(crate) trait FormatCudaObject {
fn write_post_execution(self, result: CUresult, f: &mut impl Write);
}
fn write_post_execution_ptr<T: FormatCudaObject + Copy>(
t: *const T,
result: CUresult,
f: &mut impl Write,
) {
if t == ptr::null() {
write!(f, "NULL").ok();
} else if result != CUresult::CUDA_SUCCESS {
write!(f, "NONE").ok();
} else {
unsafe { *t }.write_post_execution(result, f)
}
}
impl<T: FormatCudaObject + Copy> FormatCudaObject for *mut T {
fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
write_post_execution_ptr(self, result, f)
}
}
impl<T: FormatCudaObject + Copy> FormatCudaObject for *const T {
fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
write_post_execution_ptr(self, result, f)
}
}
impl FormatCudaObject for CUmodule {
fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
write!(f, "{:p}", self).ok();
}
}
impl FormatCudaObject for CUfunction {
fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
write!(f, "{:p}", self).ok();
}
}
impl FormatCudaObject for *mut c_void {
fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
write!(f, "{:p}", self).ok();
}
}
impl FormatCudaObject for *const c_void {
fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
write!(f, "{:p}", self).ok();
}
}
impl FormatCudaObject for *const i8 {
fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
write!(f, "\"{}\"", unsafe { CStr::from_ptr(self) }.to_str().unwrap()).ok();
}
}
impl FormatCudaObject for u32 {
fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
write!(f, "{}", self).ok();
}
}
impl FormatCudaObject for i32 {
fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
write!(f, "{}", self).ok();
}
}
impl FormatCudaObject for CUdevice {
fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
write!(f, "{}", self.0).ok();
}
}
impl FormatCudaObject for CUjit_option {
fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
match stringify_cujit_option(self) {
Some(text) => write!(f, "{}", text),
None => write!(f, "{}", self.0),
};
}
}
impl FormatCudaObject for CUuuid {
fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
let guid = self.bytes;
write!(f, "{{{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}}}", guid[0], guid[1], guid[2], guid[3], guid[4], guid[5], guid[6], guid[7], guid[8], guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15]).ok();
}
}
impl FormatCudaObject for CUdevice_attribute {
fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
match stringify_cudevice_attribute(self) {
Some(text) => write!(f, "{}", text),
None => write!(f, "{}", self.0),
}
.ok();
}
}
macro_rules! stringify_enum {
($fn_name:ident, $type_:ident, [ $($variant:ident),+ ]) => {
pub(crate) fn $fn_name(x: $type_) -> Option<&'static str> {
match x {
$(
$type_::$variant => Some(stringify!($variant)),
)+
_ => None
}
}
}
}
stringify_enum! {
stringify_cudevice_attribute,
CUdevice_attribute_enum,
[
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X,
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y,
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z,
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X,
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y,
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z,
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK,
CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY,
CU_DEVICE_ATTRIBUTE_WARP_SIZE,
CU_DEVICE_ATTRIBUTE_MAX_PITCH,
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK,
CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK,
CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT,
CU_DEVICE_ATTRIBUTE_GPU_OVERLAP,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,
CU_DEVICE_ATTRIBUTE_INTEGRATED,
CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY,
CU_DEVICE_ATTRIBUTE_COMPUTE_MODE,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES,
CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT,
CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS,
CU_DEVICE_ATTRIBUTE_ECC_ENABLED,
CU_DEVICE_ATTRIBUTE_PCI_BUS_ID,
CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID,
CU_DEVICE_ATTRIBUTE_TCC_DRIVER,
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE,
CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH,
CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE,
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR,
CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT,
CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS,
CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE,
CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID,
CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH,
CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED,
CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED,
CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED,
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR,
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR,
CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY,
CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD,
CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID,
CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED,
CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO,
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS,
CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS,
CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED,
CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM,
CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS,
CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS,
CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR,
CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH,
CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH,
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN,
CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES,
CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED,
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES,
CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST,
CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED,
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED,
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED,
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED,
CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR,
CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED,
CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE,
CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE,
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED,
CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK,
CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED,
CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED
]
}
stringify_enum! {
stringify_cujit_option,
CUjit_option,
[
CU_JIT_MAX_REGISTERS,
CU_JIT_THREADS_PER_BLOCK,
CU_JIT_WALL_TIME,
CU_JIT_INFO_LOG_BUFFER,
CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
CU_JIT_ERROR_LOG_BUFFER,
CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
CU_JIT_OPTIMIZATION_LEVEL,
CU_JIT_TARGET_FROM_CUCONTEXT,
CU_JIT_TARGET,
CU_JIT_FALLBACK_STRATEGY,
CU_JIT_GENERATE_DEBUG_INFO,
CU_JIT_LOG_VERBOSE,
CU_JIT_GENERATE_LINE_INFO,
CU_JIT_CACHE_MODE,
CU_JIT_NEW_SM3X_OPT,
CU_JIT_FAST_COMPILE,
CU_JIT_GLOBAL_SYMBOL_NAMES,
CU_JIT_GLOBAL_SYMBOL_ADDRESSES,
CU_JIT_GLOBAL_SYMBOL_COUNT,
CU_JIT_NUM_OPTIONS
]
}
stringify_enum! {
stringify_curesult,
CUresult,
[
CUDA_SUCCESS,
CUDA_ERROR_INVALID_VALUE,
CUDA_ERROR_OUT_OF_MEMORY,
CUDA_ERROR_NOT_INITIALIZED,
CUDA_ERROR_DEINITIALIZED,
CUDA_ERROR_PROFILER_DISABLED,
CUDA_ERROR_PROFILER_NOT_INITIALIZED,
CUDA_ERROR_PROFILER_ALREADY_STARTED,
CUDA_ERROR_PROFILER_ALREADY_STOPPED,
CUDA_ERROR_NO_DEVICE,
CUDA_ERROR_INVALID_DEVICE,
CUDA_ERROR_INVALID_IMAGE,
CUDA_ERROR_INVALID_CONTEXT,
CUDA_ERROR_CONTEXT_ALREADY_CURRENT,
CUDA_ERROR_MAP_FAILED,
CUDA_ERROR_UNMAP_FAILED,
CUDA_ERROR_ARRAY_IS_MAPPED,
CUDA_ERROR_ALREADY_MAPPED,
CUDA_ERROR_NO_BINARY_FOR_GPU,
CUDA_ERROR_ALREADY_ACQUIRED,
CUDA_ERROR_NOT_MAPPED,
CUDA_ERROR_NOT_MAPPED_AS_ARRAY,
CUDA_ERROR_NOT_MAPPED_AS_POINTER,
CUDA_ERROR_ECC_UNCORRECTABLE,
CUDA_ERROR_UNSUPPORTED_LIMIT,
CUDA_ERROR_CONTEXT_ALREADY_IN_USE,
CUDA_ERROR_PEER_ACCESS_UNSUPPORTED,
CUDA_ERROR_INVALID_PTX,
CUDA_ERROR_INVALID_GRAPHICS_CONTEXT,
CUDA_ERROR_NVLINK_UNCORRECTABLE,
CUDA_ERROR_JIT_COMPILER_NOT_FOUND,
CUDA_ERROR_INVALID_SOURCE,
CUDA_ERROR_FILE_NOT_FOUND,
CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
CUDA_ERROR_SHARED_OBJECT_INIT_FAILED,
CUDA_ERROR_OPERATING_SYSTEM,
CUDA_ERROR_INVALID_HANDLE,
CUDA_ERROR_ILLEGAL_STATE,
CUDA_ERROR_NOT_FOUND,
CUDA_ERROR_NOT_READY,
CUDA_ERROR_ILLEGAL_ADDRESS,
CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
CUDA_ERROR_LAUNCH_TIMEOUT,
CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED,
CUDA_ERROR_PEER_ACCESS_NOT_ENABLED,
CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE,
CUDA_ERROR_CONTEXT_IS_DESTROYED,
CUDA_ERROR_ASSERT,
CUDA_ERROR_TOO_MANY_PEERS,
CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED,
CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED,
CUDA_ERROR_HARDWARE_STACK_ERROR,
CUDA_ERROR_ILLEGAL_INSTRUCTION,
CUDA_ERROR_MISALIGNED_ADDRESS,
CUDA_ERROR_INVALID_ADDRESS_SPACE,
CUDA_ERROR_INVALID_PC,
CUDA_ERROR_LAUNCH_FAILED,
CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE,
CUDA_ERROR_NOT_PERMITTED,
CUDA_ERROR_NOT_SUPPORTED,
CUDA_ERROR_SYSTEM_NOT_READY,
CUDA_ERROR_SYSTEM_DRIVER_MISMATCH,
CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE,
CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED,
CUDA_ERROR_STREAM_CAPTURE_INVALIDATED,
CUDA_ERROR_STREAM_CAPTURE_MERGE,
CUDA_ERROR_STREAM_CAPTURE_UNMATCHED,
CUDA_ERROR_STREAM_CAPTURE_UNJOINED,
CUDA_ERROR_STREAM_CAPTURE_ISOLATION,
CUDA_ERROR_STREAM_CAPTURE_IMPLICIT,
CUDA_ERROR_CAPTURED_EVENT,
CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD,
CUDA_ERROR_TIMEOUT,
CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE,
CUDA_ERROR_UNKNOWN
]
}

View file

@ -42,6 +42,11 @@ macro_rules! extern_redirect {
};
}
macro_rules! count_tts {
() => {0usize};
($_head:tt $($tail:tt)*) => {1usize + count_tts!($($tail)*)};
}
macro_rules! extern_redirect_with_post {
(
pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ;
@ -53,9 +58,18 @@ macro_rules! extern_redirect_with_post {
let typed_fn = unsafe { std::mem::transmute::<_, extern "system" fn( $( $arg_id : $arg_type),* ) -> $ret_type>(fn_ptr) };
typed_fn($( $arg_id ),*)
};
let get_formatted_args = |fn_logger: &mut crate::log::FunctionLogger, result: CUresult| {
let arg_count = (count_tts!($($arg_id),*) + 1) / 2;
fn_logger.begin_writing_arguments(arg_count);
$(
fn_logger.write_single_argument(result, $arg_id);
)*
fn_logger.end_writing_arguments();
};
crate::handle_cuda_function_call_with_probes(
stringify!($fn_name),
|| (), original_fn,
get_formatted_args,
move |logger, state, _, cuda_result| $post_fn ( $( $arg_id ),* , logger, state, cuda_result )
)
}
@ -81,6 +95,7 @@ macro_rules! extern_redirect_with {
#[allow(warnings)]
mod cuda;
mod dark_api;
mod format;
mod log;
#[cfg_attr(windows, path = "os_win.rs")]
#[cfg_attr(not(windows), path = "os_unix.rs")]
@ -294,6 +309,7 @@ fn handle_cuda_function_call_with_probes<T, PostFn>(
func: &'static str,
pre_probe: impl FnOnce() -> T,
original_cuda_fn: impl FnOnce(NonNull<c_void>) -> CUresult,
print_arguments_fn: impl FnOnce(&mut crate::log::FunctionLogger, CUresult),
post_probe: PostFn,
) -> CUresult
where
@ -325,6 +341,7 @@ where
let pre_result = pre_probe();
let cu_result = original_cuda_fn(fn_ptr);
logger.result = Some(cu_result);
print_arguments_fn(&mut logger, cu_result);
post_probe(
&mut logger,
&mut delayed_state.cuda_state,
@ -1220,6 +1237,7 @@ struct FatbincWrapper {
}
const FATBIN_MAGIC: c_uint = 0xBA55ED50;
const LEGACY_FATBIN_MAGIC: c_uint = 0x1EE55A01;
const FATBIN_VERSION: c_ushort = 0x01;
#[repr(C, align(8))]
@ -1484,16 +1502,6 @@ pub(crate) fn cuModuleGetFunction_Post(
state: &mut trace::StateTracker,
result: CUresult,
) {
if !state.module_exists(hmod) {
fn_logger.log(log::LogEntry::UnknownModule(hmod))
}
match unsafe { CStr::from_ptr(name) }.to_str() {
Ok(str) => fn_logger.log(log::LogEntry::FunctionParameter {
name: "name",
value: str.to_string(),
}),
Err(e) => fn_logger.log(log::LogEntry::MalformedFunctionName(e)),
}
}
#[allow(non_snake_case)]
@ -1505,10 +1513,6 @@ pub(crate) fn cuDeviceGetAttribute_Post(
state: &mut trace::StateTracker,
result: CUresult,
) {
fn_logger.log(log::LogEntry::FunctionParameter {
name: "attrib",
value: attrib.0.to_string(),
});
}
#[allow(non_snake_case)]
@ -1524,3 +1528,16 @@ pub(crate) fn cuDeviceComputeCapability_Post(
unsafe { *major = major_ver_override as i32 };
}
}
#[allow(non_snake_case)]
pub(crate) fn cuModuleLoadFatBinary_Post(
module: *mut CUmodule,
fatCubin: *const ::std::os::raw::c_void,
fn_logger: &mut log::FunctionLogger,
state: &mut trace::StateTracker,
result: CUresult,
) {
if result == CUresult::CUDA_SUCCESS {
panic!()
}
}

View file

@ -1,5 +1,7 @@
use crate::cuda::CUmodule;
use crate::cuda::CUuuid;
use crate::format;
use crate::format::FormatCudaObject;
use super::CUresult;
use super::Settings;
@ -202,28 +204,35 @@ impl Factory {
pub(crate) fn get_logger(&mut self, func: &'static str) -> FunctionLogger {
FunctionLogger {
result: None,
name: Cow::Borrowed(func),
name: CudaFunctionName::Normal(func),
fallible_emitter: &mut self.fallible_emitter,
infallible_emitter: &mut self.infallible_emitter,
write_buffer: &mut self.write_buffer,
log_queue: &mut self.log_queue,
finished_writing_args: false,
args_to_write: 0,
}
}
pub(crate) fn get_logger_dark_api(&mut self, guid: CUuuid, idx: usize) -> FunctionLogger {
let guid = guid.bytes;
let fn_name = format!("{{{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}}}::{}", guid[0], guid[1], guid[2], guid[3], guid[4], guid[5], guid[6], guid[7], guid[8], guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15], idx);
pub(crate) fn get_logger_dark_api(&mut self, guid: CUuuid, index: usize) -> FunctionLogger {
FunctionLogger {
result: None,
name: Cow::Owned(fn_name),
name: CudaFunctionName::Dark { guid, index },
fallible_emitter: &mut self.fallible_emitter,
infallible_emitter: &mut self.infallible_emitter,
write_buffer: &mut self.write_buffer,
log_queue: &mut self.log_queue,
finished_writing_args: false,
args_to_write: 0,
}
}
}
enum CudaFunctionName {
Normal(&'static str),
Dark { guid: CUuuid, index: usize },
}
// This encapsulates log output for a single function call.
// It's a separate struct and not just a plain function for two reasons:
// * While we want to always display return code before logging errors,
@ -231,11 +240,13 @@ impl Factory {
// * We want to handle panics gracefully with Drop
pub(crate) struct FunctionLogger<'a> {
pub(crate) result: Option<CUresult>,
name: Cow<'static, str>,
name: CudaFunctionName,
infallible_emitter: &'a mut Box<dyn WriteTrailingZeroAware>,
fallible_emitter: &'a mut Option<Box<dyn WriteTrailingZeroAware>>,
write_buffer: &'a mut WriteBuffer,
log_queue: &'a mut Vec<LogEntry>,
args_to_write: usize,
finished_writing_args: bool,
}
impl<'a> FunctionLogger<'a> {
@ -250,11 +261,16 @@ impl<'a> FunctionLogger<'a> {
}
fn flush_log_queue_to_write_buffer(&mut self) {
self.write_buffer.start_line();
self.write_buffer.write(&self.name);
self.write_buffer.write("(...) -> ");
// TODO: remove this once everything has been converted to dtailed logging
if !self.finished_writing_args {
self.begin_writing_arguments(0);
self.write_buffer.write("...) -> ");
}
if let Some(result) = self.result {
write!(self.write_buffer, "{:#X}", result.0).unwrap_or_else(|_| unreachable!());
match format::stringify_curesult(result) {
Some(text) => self.write_buffer.write(text),
None => write!(self.write_buffer, "{}", result.0).unwrap(),
}
} else {
self.write_buffer.write("(UNKNOWN)");
};
@ -274,6 +290,35 @@ impl<'a> FunctionLogger<'a> {
self.write_buffer.end_line();
self.write_buffer.finish();
}
pub(crate) fn begin_writing_arguments(&mut self, len: usize) {
self.args_to_write = len;
match self.name {
CudaFunctionName::Normal(fn_name) => self.write_buffer.write(fn_name),
CudaFunctionName::Dark { guid, index } => {
guid.write_post_execution(CUresult::CUDA_SUCCESS, &mut self.write_buffer);
write!(&mut self.write_buffer, "::{}", index).ok();
}
}
self.write_buffer.write("(")
}
pub(crate) fn write_single_argument<'x>(
&mut self,
result: CUresult,
arg: impl FormatCudaObject,
) {
self.args_to_write -= 1;
arg.write_post_execution(result, self.write_buffer);
if self.args_to_write != 0 {
self.write_buffer.write(", ")
}
}
pub(crate) fn end_writing_arguments(&mut self) {
self.write_buffer.write(") -> ");
self.finished_writing_args = true;
}
}
impl<'a> Drop for FunctionLogger<'a> {