mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-19 16:04:44 +00:00
Now dump function calls
This commit is contained in:
parent
dd7ced8b37
commit
e459086c5b
6 changed files with 186 additions and 38 deletions
|
@ -14,6 +14,7 @@ lz4-sys = "1.9"
|
|||
regex = "1.4"
|
||||
dynasm = "1.1"
|
||||
dynasmrt = "1.1"
|
||||
lazy_static = "1.4"
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
winapi = { version = "0.3", features = ["libloaderapi", "debugapi", "std"] }
|
||||
|
|
1
zluda_dump/README.md
Normal file
1
zluda_dump/README.md
Normal file
|
@ -0,0 +1 @@
|
|||
sed 's/(.*//g' log.txt | sort | uniq > uniq.txt
|
|
@ -7,9 +7,11 @@ use std::{
|
|||
io::{self, prelude::*},
|
||||
mem,
|
||||
os::raw::{c_int, c_uint, c_ulong, c_ushort},
|
||||
path::{Path, PathBuf},
|
||||
path::PathBuf,
|
||||
ptr::NonNull,
|
||||
rc::Rc,
|
||||
slice,
|
||||
sync::Mutex,
|
||||
};
|
||||
use std::{fs::File, ptr};
|
||||
|
||||
|
@ -20,6 +22,9 @@ use cuda::{
|
|||
use ptx::ast;
|
||||
use regex::Regex;
|
||||
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
|
||||
const CU_LAUNCH_PARAM_END: *mut c_void = 0 as *mut _;
|
||||
const CU_LAUNCH_PARAM_BUFFER_POINTER: *mut c_void = 1 as *mut _;
|
||||
const CU_LAUNCH_PARAM_BUFFER_SIZE: *mut c_void = 2 as *mut _;
|
||||
|
@ -28,14 +33,11 @@ macro_rules! extern_redirect {
|
|||
(pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ;) => {
|
||||
#[no_mangle]
|
||||
pub extern "system" fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||
unsafe { $crate::init_libcuda_handle(stringify!($fn_name)) };
|
||||
let name = std::ffi::CString::new(stringify!($fn_name)).unwrap();
|
||||
let fn_ptr = unsafe { crate::os::get_proc_address($crate::LIBCUDA_HANDLE, &name) };
|
||||
if fn_ptr == std::ptr::null_mut() {
|
||||
return CUresult::CUDA_ERROR_UNKNOWN;
|
||||
}
|
||||
let typed_fn = unsafe { std::mem::transmute::<_, extern "system" fn( $( $arg_id : $arg_type),* ) -> $ret_type>(fn_ptr) };
|
||||
typed_fn($( $arg_id ),*)
|
||||
let original_fn = |fn_ptr| {
|
||||
let typed_fn = unsafe { std::mem::transmute::<_, extern "system" fn( $( $arg_id : $arg_type),* ) -> $ret_type>(fn_ptr) };
|
||||
typed_fn($( $arg_id ),*)
|
||||
};
|
||||
crate::handle_cuda_function_call(stringify!($fn_name), original_fn)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -47,17 +49,11 @@ macro_rules! extern_redirect_with {
|
|||
) => {
|
||||
#[no_mangle]
|
||||
pub extern "system" fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||
unsafe { $crate::init_libcuda_handle(stringify!($fn_name)) };
|
||||
let continuation = |$( $arg_id : $arg_type),* | {
|
||||
let name = std::ffi::CString::new(stringify!($fn_name)).unwrap();
|
||||
let fn_ptr = unsafe { crate::os::get_proc_address($crate::LIBCUDA_HANDLE, &name) };
|
||||
if fn_ptr == std::ptr::null_mut() {
|
||||
return CUresult::CUDA_ERROR_UNKNOWN;
|
||||
}
|
||||
let original_fn = |fn_ptr| {
|
||||
let typed_fn = unsafe { std::mem::transmute::<_, extern "system" fn( $( $arg_id : $arg_type),* ) -> $ret_type>(fn_ptr) };
|
||||
typed_fn($( $arg_id ),*)
|
||||
};
|
||||
unsafe { $receiver($( $arg_id ),* , continuation) }
|
||||
crate::handle_cuda_function_call(stringify!($fn_name), original_fn)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -81,10 +77,81 @@ pub static mut KERNEL_PATTERN: Option<Regex> = None;
|
|||
pub static mut OVERRIDE_COMPUTE_CAPABILITY_MAJOR: Option<i32> = None;
|
||||
pub static mut KERNEL_INDEX_MINIMUM: usize = 0;
|
||||
pub static mut KERNEL_INDEX_MAXIMUM: usize = usize::MAX;
|
||||
pub(crate) static mut LOG_FACTORY: Option<log::Factory> = None;
|
||||
static mut LOG_FACTORY: Option<log::Factory> = None;
|
||||
|
||||
pub(crate) struct Settings {
|
||||
lazy_static! {
|
||||
static ref GLOBAL_STATE: Mutex<GlobalState> = Mutex::new(GlobalState::new());
|
||||
}
|
||||
|
||||
struct GlobalState {
|
||||
log_factory: log::Factory,
|
||||
// We split off fields that require a mutable reference to log factory to be
|
||||
// created, additionally creation of some fields in this struct can fail
|
||||
// initalization (e.g. we passed path a non-existant path to libcuda)
|
||||
delayed_state: LateInit<GlobalDelayedState>,
|
||||
}
|
||||
|
||||
unsafe impl Send for GlobalState {}
|
||||
|
||||
impl GlobalState {
|
||||
fn new() -> Self {
|
||||
GlobalState {
|
||||
log_factory: log::Factory::new(),
|
||||
delayed_state: LateInit::Unitialized,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum LateInit<T> {
|
||||
Success(T),
|
||||
Unitialized,
|
||||
Error,
|
||||
}
|
||||
|
||||
impl<T> LateInit<T> {
|
||||
fn as_mut(&mut self) -> Option<&mut T> {
|
||||
match self {
|
||||
LateInit::Success(t) => Some(t),
|
||||
LateInit::Unitialized => None,
|
||||
LateInit::Error => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct GlobalDelayedState {
|
||||
settings: Settings,
|
||||
libcuda_handle: NonNull<c_void>,
|
||||
cuda_state: CUDAStateTracker,
|
||||
}
|
||||
|
||||
impl GlobalDelayedState {
|
||||
fn new<'a>(
|
||||
func: &'static str,
|
||||
factory: &'a mut log::Factory,
|
||||
) -> (LateInit<Self>, log::FunctionLogger<'a>) {
|
||||
let (mut fn_logger, settings) = factory.get_first_logger_and_init_settings(func);
|
||||
let maybe_libcuda_handle = unsafe { os::load_cuda_library(&settings.libcuda_path) };
|
||||
let libcuda_handle = match NonNull::new(maybe_libcuda_handle) {
|
||||
Some(h) => h,
|
||||
None => {
|
||||
fn_logger.log(log::LogEntry::ErrorBox(
|
||||
format!("Invalid CUDA library at path {}", &settings.libcuda_path).into(),
|
||||
));
|
||||
return (LateInit::Error, fn_logger);
|
||||
}
|
||||
};
|
||||
let delayed_state = GlobalDelayedState {
|
||||
settings,
|
||||
libcuda_handle,
|
||||
cuda_state: CUDAStateTracker::new(),
|
||||
};
|
||||
(LateInit::Success(delayed_state), fn_logger)
|
||||
}
|
||||
}
|
||||
|
||||
struct Settings {
|
||||
dump_dir: Option<PathBuf>,
|
||||
libcuda_path: String,
|
||||
}
|
||||
|
||||
impl Settings {
|
||||
|
@ -97,7 +164,18 @@ impl Settings {
|
|||
None
|
||||
}
|
||||
};
|
||||
Settings { dump_dir }
|
||||
let libcuda_path = match env::var("ZLUDA_DUMP_LIBCUDA_FILE") {
|
||||
Err(env::VarError::NotPresent) => os::LIBCUDA_DEFAULT_PATH.to_owned(),
|
||||
Err(e) => {
|
||||
logger.log(log::LogEntry::ErrorBox(Box::new(e) as _));
|
||||
os::LIBCUDA_DEFAULT_PATH.to_owned()
|
||||
}
|
||||
Ok(env_string) => env_string,
|
||||
};
|
||||
Settings {
|
||||
dump_dir,
|
||||
libcuda_path,
|
||||
}
|
||||
}
|
||||
|
||||
fn read_and_init_dump_dir() -> Result<Option<PathBuf>, Box<dyn Error>> {
|
||||
|
@ -118,11 +196,20 @@ impl Settings {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum AllocLocation {
|
||||
Device,
|
||||
DeviceV2,
|
||||
Host,
|
||||
// This struct contains all the information about current state of CUDA runtime
|
||||
// that are relevant to us: modules, kernels, linking objects, etc.
|
||||
struct CUDAStateTracker {
|
||||
modules: HashMap<CUmodule, Option<ModuleDump>>,
|
||||
module_counter: usize,
|
||||
}
|
||||
|
||||
impl CUDAStateTracker {
|
||||
fn new() -> Self {
|
||||
CUDAStateTracker {
|
||||
modules: HashMap::new(),
|
||||
module_counter: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ModuleDump {
|
||||
|
@ -130,6 +217,44 @@ pub struct ModuleDump {
|
|||
kernels_args: Option<HashMap<String, Vec<usize>>>,
|
||||
}
|
||||
|
||||
fn handle_cuda_function_call(
|
||||
func: &'static str,
|
||||
original_cuda_fn: impl FnOnce(NonNull<c_void>) -> CUresult,
|
||||
) -> CUresult {
|
||||
let global_state_mutex = &*GLOBAL_STATE;
|
||||
// We unwrap because there's really no sensible thing we could do,
|
||||
// alternatively we could return a CUDA error, but I think it's fine to
|
||||
// crash. This is a diagnostic utility, if the lock was poisoned we can't
|
||||
// extract any useful trace or logging anyway
|
||||
let mut global_state = &mut *global_state_mutex.lock().unwrap();
|
||||
let (mut logger, delayed_state) = match global_state.delayed_state {
|
||||
LateInit::Success(ref mut delayed_state) => {
|
||||
(global_state.log_factory.get_logger(func), delayed_state)
|
||||
}
|
||||
// There's no libcuda to load, so we might as well panic
|
||||
LateInit::Error => panic!(),
|
||||
LateInit::Unitialized => {
|
||||
let (new_delayed_state, logger) =
|
||||
GlobalDelayedState::new(func, &mut global_state.log_factory);
|
||||
global_state.delayed_state = new_delayed_state;
|
||||
(logger, global_state.delayed_state.as_mut().unwrap())
|
||||
}
|
||||
};
|
||||
let name = std::ffi::CString::new(func).unwrap();
|
||||
let fn_ptr =
|
||||
unsafe { os::get_proc_address(delayed_state.libcuda_handle.as_ptr(), name.as_c_str()) };
|
||||
let cu_result = original_cuda_fn(NonNull::new(fn_ptr).unwrap());
|
||||
logger.result = Some(cu_result);
|
||||
cu_result
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum AllocLocation {
|
||||
Device,
|
||||
DeviceV2,
|
||||
Host,
|
||||
}
|
||||
|
||||
pub struct KernelDump {
|
||||
module_content: Rc<String>,
|
||||
name: String,
|
||||
|
@ -145,7 +270,7 @@ pub unsafe fn init_libcuda_handle(func: &'static str) {
|
|||
MODULES = Some(HashMap::new());
|
||||
KERNELS = Some(HashMap::new());
|
||||
BUFFERS = Some(BTreeMap::new());
|
||||
let libcuda_handle = os::load_cuda_library();
|
||||
let libcuda_handle = ptr::null_mut();
|
||||
assert_ne!(libcuda_handle, ptr::null_mut());
|
||||
LIBCUDA_HANDLE = libcuda_handle;
|
||||
match env::var("ZLUDA_DUMP_KERNEL") {
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
use crate::cuda::CUuuid;
|
||||
|
||||
use super::CUresult;
|
||||
use super::Settings;
|
||||
use std::borrow::Cow;
|
||||
use std::error::Error;
|
||||
use std::fmt::Display;
|
||||
use std::fs::File;
|
||||
|
@ -193,7 +196,20 @@ impl Factory {
|
|||
pub(crate) fn get_logger(&mut self, func: &'static str) -> FunctionLogger {
|
||||
FunctionLogger {
|
||||
result: None,
|
||||
name: func,
|
||||
name: Cow::Borrowed(func),
|
||||
fallible_emitter: &mut self.fallible_emitter,
|
||||
infallible_emitter: &mut self.infallible_emitter,
|
||||
write_buffer: &mut self.write_buffer,
|
||||
log_queue: &mut self.log_queue,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_logger_dark_api(&mut self, guid: CUuuid, idx: usize) -> FunctionLogger {
|
||||
let guid = guid.bytes;
|
||||
let fn_name = format!("{{{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}}}::{}", guid[0], guid[1], guid[2], guid[3], guid[4], guid[5], guid[6], guid[7], guid[8], guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15], idx);
|
||||
FunctionLogger {
|
||||
result: None,
|
||||
name: Cow::Owned(fn_name),
|
||||
fallible_emitter: &mut self.fallible_emitter,
|
||||
infallible_emitter: &mut self.infallible_emitter,
|
||||
write_buffer: &mut self.write_buffer,
|
||||
|
@ -209,7 +225,7 @@ impl Factory {
|
|||
// * We want to handle panics gracefully with Drop
|
||||
pub(crate) struct FunctionLogger<'a> {
|
||||
pub(crate) result: Option<CUresult>,
|
||||
name: &'static str,
|
||||
name: Cow<'static, str>,
|
||||
infallible_emitter: &'a mut Box<dyn WriteTrailingZeroAware>,
|
||||
fallible_emitter: &'a mut Option<Box<dyn WriteTrailingZeroAware>>,
|
||||
write_buffer: &'a mut WriteBuffer,
|
||||
|
@ -223,7 +239,7 @@ impl<'a> FunctionLogger<'a> {
|
|||
|
||||
fn flush_log_queue_to_write_buffer(&mut self) {
|
||||
self.write_buffer.start_line();
|
||||
self.write_buffer.write(self.name);
|
||||
self.write_buffer.write(&self.name);
|
||||
self.write_buffer.write("(...) -> ");
|
||||
if let Some(result) = self.result {
|
||||
write!(self.write_buffer, "{:#X}", result.0).unwrap_or_else(|_| unreachable!());
|
||||
|
@ -360,7 +376,7 @@ mod os {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::{cell::RefCell, io, rc::Rc, str};
|
||||
use std::{borrow::Cow, cell::RefCell, io, rc::Rc, str};
|
||||
|
||||
use super::{FunctionLogger, LogEntry, WriteTrailingZeroAware};
|
||||
use crate::{log::WriteBuffer, CUresult};
|
||||
|
@ -422,7 +438,7 @@ mod tests {
|
|||
let mut log_queue = Vec::new();
|
||||
let mut func_logger = FunctionLogger {
|
||||
result: Some(CUresult::CUDA_SUCCESS),
|
||||
name: "cuInit",
|
||||
name: Cow::Borrowed("cuInit"),
|
||||
infallible_emitter: &mut infallible_emitter,
|
||||
fallible_emitter: &mut fallible_emitter,
|
||||
write_buffer: &mut write_buffer,
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
use crate::cuda::CUuuid;
|
||||
use std::ffi::{c_void, CStr};
|
||||
use std::ffi::{c_void, CStr, CString};
|
||||
use std::mem;
|
||||
|
||||
const NVCUDA_DEFAULT_PATH: &'static [u8] = b"/usr/lib/x86_64-linux-gnu/libcuda.so.1\0";
|
||||
pub(crate) const LIBCUDA_DEFAULT_PATH: &'static str = b"/usr/lib/x86_64-linux-gnu/libcuda.so.1\0";
|
||||
|
||||
pub unsafe fn load_cuda_library() -> *mut c_void {
|
||||
pub unsafe fn load_cuda_library(libcuda_path: &str) -> *mut c_void {
|
||||
let libcuda_path = CString::new(libcuda_path).unwrap();
|
||||
libc::dlopen(
|
||||
NVCUDA_DEFAULT_PATH.as_ptr() as *const _,
|
||||
libcuda_path.as_ptr() as *const _,
|
||||
libc::RTLD_LOCAL | libc::RTLD_NOW,
|
||||
)
|
||||
}
|
||||
|
|
|
@ -15,12 +15,12 @@ use winapi::{
|
|||
|
||||
use crate::cuda::CUuuid;
|
||||
|
||||
const NVCUDA_DEFAULT_PATH: &[u16] = wch_c!(r"C:\Windows\System32\nvcuda.dll");
|
||||
pub(crate) const LIBCUDA_DEFAULT_PATH: &'static str = "C:\\Windows\\System32\\nvcuda.dll";
|
||||
const LOAD_LIBRARY_NO_REDIRECT: &'static [u8] = b"ZludaLoadLibraryW_NoRedirect\0";
|
||||
|
||||
include!("../../zluda_redirect/src/payload_guid.rs");
|
||||
|
||||
pub unsafe fn load_cuda_library() -> *mut c_void {
|
||||
pub unsafe fn load_cuda_library(libcuda_path: &str) -> *mut c_void {
|
||||
let load_lib = if is_detoured() {
|
||||
match get_non_detoured_load_library() {
|
||||
Some(load_lib) => load_lib,
|
||||
|
@ -29,7 +29,11 @@ pub unsafe fn load_cuda_library() -> *mut c_void {
|
|||
} else {
|
||||
LoadLibraryW
|
||||
};
|
||||
load_lib(NVCUDA_DEFAULT_PATH.as_ptr()) as *mut _
|
||||
let libcuda_path_uf16 = libcuda_path
|
||||
.encode_utf16()
|
||||
.chain(std::iter::once(0))
|
||||
.collect::<Vec<_>>();
|
||||
load_lib(libcuda_path_uf16.as_ptr()) as *mut _
|
||||
}
|
||||
|
||||
unsafe fn is_detoured() -> bool {
|
||||
|
|
Loading…
Add table
Reference in a new issue