mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-08-02 22:30:41 +00:00
Start converting cuModuleLoad to the new tracing infrastructure
This commit is contained in:
parent
e459086c5b
commit
24e100cb9c
4 changed files with 303 additions and 21 deletions
|
@ -2491,15 +2491,16 @@ extern_redirect! {
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuCtxDetach(ctx: CUcontext) -> CUresult;
|
pub fn cuCtxDetach(ctx: CUcontext) -> CUresult;
|
||||||
}
|
}
|
||||||
extern_redirect! {
|
extern_redirect_with_post! {
|
||||||
pub fn cuModuleLoad(module: *mut CUmodule, fname: *const ::std::os::raw::c_char) -> CUresult;
|
pub fn cuModuleLoad(module: *mut CUmodule, fname: *const ::std::os::raw::c_char) -> CUresult;
|
||||||
|
super::cuModuleLoad_Post;
|
||||||
}
|
}
|
||||||
extern_redirect_with! {
|
extern_redirect_with_post! {
|
||||||
pub fn cuModuleLoadData(
|
pub fn cuModuleLoadData(
|
||||||
module: *mut CUmodule,
|
module: *mut CUmodule,
|
||||||
image: *const ::std::os::raw::c_void,
|
image: *const ::std::os::raw::c_void,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
super::cuModuleLoadData;
|
super::cuModuleLoadData_Post;
|
||||||
}
|
}
|
||||||
extern_redirect_with! {
|
extern_redirect_with! {
|
||||||
pub fn cuModuleLoadDataEx(
|
pub fn cuModuleLoadDataEx(
|
||||||
|
|
|
@ -42,6 +42,26 @@ macro_rules! extern_redirect {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
macro_rules! extern_redirect_with_post {
|
||||||
|
(
|
||||||
|
pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ;
|
||||||
|
$post_fn:path ;
|
||||||
|
) => {
|
||||||
|
#[no_mangle]
|
||||||
|
pub extern "system" fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||||
|
let original_fn = |fn_ptr| {
|
||||||
|
let typed_fn = unsafe { std::mem::transmute::<_, extern "system" fn( $( $arg_id : $arg_type),* ) -> $ret_type>(fn_ptr) };
|
||||||
|
typed_fn($( $arg_id ),*)
|
||||||
|
};
|
||||||
|
crate::handle_cuda_function_call_with_probes(
|
||||||
|
stringify!($fn_name),
|
||||||
|
|| (), original_fn,
|
||||||
|
move |logger, state, _, cuda_result| $post_fn ( $( $arg_id ),* , logger, state, cuda_result )
|
||||||
|
)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
macro_rules! extern_redirect_with {
|
macro_rules! extern_redirect_with {
|
||||||
(
|
(
|
||||||
pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ;
|
pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ;
|
||||||
|
@ -64,6 +84,7 @@ mod log;
|
||||||
#[cfg_attr(windows, path = "os_win.rs")]
|
#[cfg_attr(windows, path = "os_win.rs")]
|
||||||
#[cfg_attr(not(windows), path = "os_unix.rs")]
|
#[cfg_attr(not(windows), path = "os_unix.rs")]
|
||||||
mod os;
|
mod os;
|
||||||
|
mod trace;
|
||||||
|
|
||||||
pub static mut LIBCUDA_HANDLE: *mut c_void = ptr::null_mut();
|
pub static mut LIBCUDA_HANDLE: *mut c_void = ptr::null_mut();
|
||||||
pub static mut PENDING_LINKING: Option<HashMap<CUlinkState, Vec<ModuleDump>>> = None;
|
pub static mut PENDING_LINKING: Option<HashMap<CUlinkState, Vec<ModuleDump>>> = None;
|
||||||
|
@ -121,7 +142,7 @@ impl<T> LateInit<T> {
|
||||||
struct GlobalDelayedState {
|
struct GlobalDelayedState {
|
||||||
settings: Settings,
|
settings: Settings,
|
||||||
libcuda_handle: NonNull<c_void>,
|
libcuda_handle: NonNull<c_void>,
|
||||||
cuda_state: CUDAStateTracker,
|
cuda_state: trace::StateTracker,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl GlobalDelayedState {
|
impl GlobalDelayedState {
|
||||||
|
@ -140,10 +161,11 @@ impl GlobalDelayedState {
|
||||||
return (LateInit::Error, fn_logger);
|
return (LateInit::Error, fn_logger);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
let cuda_state = trace::StateTracker::new(&settings);
|
||||||
let delayed_state = GlobalDelayedState {
|
let delayed_state = GlobalDelayedState {
|
||||||
settings,
|
settings,
|
||||||
libcuda_handle,
|
libcuda_handle,
|
||||||
cuda_state: CUDAStateTracker::new(),
|
cuda_state,
|
||||||
};
|
};
|
||||||
(LateInit::Success(delayed_state), fn_logger)
|
(LateInit::Success(delayed_state), fn_logger)
|
||||||
}
|
}
|
||||||
|
@ -196,22 +218,6 @@ impl Settings {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// This struct contains all the information about current state of CUDA runtime
|
|
||||||
// that are relevant to us: modules, kernels, linking objects, etc.
|
|
||||||
struct CUDAStateTracker {
|
|
||||||
modules: HashMap<CUmodule, Option<ModuleDump>>,
|
|
||||||
module_counter: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl CUDAStateTracker {
|
|
||||||
fn new() -> Self {
|
|
||||||
CUDAStateTracker {
|
|
||||||
modules: HashMap::new(),
|
|
||||||
module_counter: 0,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct ModuleDump {
|
pub struct ModuleDump {
|
||||||
content: Rc<String>,
|
content: Rc<String>,
|
||||||
kernels_args: Option<HashMap<String, Vec<usize>>>,
|
kernels_args: Option<HashMap<String, Vec<usize>>>,
|
||||||
|
@ -248,6 +254,50 @@ fn handle_cuda_function_call(
|
||||||
cu_result
|
cu_result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn handle_cuda_function_call_with_probes<T, PostFn>(
|
||||||
|
func: &'static str,
|
||||||
|
pre_probe: impl FnOnce() -> T,
|
||||||
|
original_cuda_fn: impl FnOnce(NonNull<c_void>) -> CUresult,
|
||||||
|
post_probe: PostFn,
|
||||||
|
) -> CUresult
|
||||||
|
where
|
||||||
|
for<'a> PostFn: FnOnce(&'a mut log::FunctionLogger, &'a mut trace::StateTracker, T, CUresult),
|
||||||
|
{
|
||||||
|
let global_state_mutex = &*GLOBAL_STATE;
|
||||||
|
// We unwrap because there's really no sensible thing we could do,
|
||||||
|
// alternatively we could return a CUDA error, but I think it's fine to
|
||||||
|
// crash. This is a diagnostic utility, if the lock was poisoned we can't
|
||||||
|
// extract any useful trace or logging anyway
|
||||||
|
let mut global_state = &mut *global_state_mutex.lock().unwrap();
|
||||||
|
let (mut logger, delayed_state) = match global_state.delayed_state {
|
||||||
|
LateInit::Success(ref mut delayed_state) => {
|
||||||
|
(global_state.log_factory.get_logger(func), delayed_state)
|
||||||
|
}
|
||||||
|
// There's no libcuda to load, so we might as well panic
|
||||||
|
LateInit::Error => panic!(),
|
||||||
|
LateInit::Unitialized => {
|
||||||
|
let (new_delayed_state, logger) =
|
||||||
|
GlobalDelayedState::new(func, &mut global_state.log_factory);
|
||||||
|
global_state.delayed_state = new_delayed_state;
|
||||||
|
(logger, global_state.delayed_state.as_mut().unwrap())
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let name = std::ffi::CString::new(func).unwrap();
|
||||||
|
let fn_ptr =
|
||||||
|
unsafe { os::get_proc_address(delayed_state.libcuda_handle.as_ptr(), name.as_c_str()) };
|
||||||
|
let fn_ptr = NonNull::new(fn_ptr).unwrap();
|
||||||
|
let pre_result = pre_probe();
|
||||||
|
let cu_result = original_cuda_fn(fn_ptr);
|
||||||
|
logger.result = Some(cu_result);
|
||||||
|
post_probe(
|
||||||
|
&mut logger,
|
||||||
|
&mut delayed_state.cuda_state,
|
||||||
|
pre_result,
|
||||||
|
cu_result,
|
||||||
|
);
|
||||||
|
cu_result
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
enum AllocLocation {
|
enum AllocLocation {
|
||||||
Device,
|
Device,
|
||||||
|
@ -327,6 +377,34 @@ pub unsafe fn cuModuleLoadData(
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub(crate) fn cuModuleLoad_Post(
|
||||||
|
module: *mut CUmodule,
|
||||||
|
fname: *const ::std::os::raw::c_char,
|
||||||
|
fn_logger: &mut log::FunctionLogger,
|
||||||
|
state: &mut trace::StateTracker,
|
||||||
|
result: CUresult,
|
||||||
|
) {
|
||||||
|
if result != CUresult::CUDA_SUCCESS {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
state.record_new_module_file(unsafe { *module }, fname, fn_logger)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub(crate) fn cuModuleLoadData_Post(
|
||||||
|
module: *mut CUmodule,
|
||||||
|
raw_image: *const ::std::os::raw::c_void,
|
||||||
|
fn_logger: &mut log::FunctionLogger,
|
||||||
|
state: &mut trace::StateTracker,
|
||||||
|
result: CUresult,
|
||||||
|
) {
|
||||||
|
if result != CUresult::CUDA_SUCCESS {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
state.record_new_module(unsafe { *module }, raw_image, fn_logger)
|
||||||
|
}
|
||||||
|
|
||||||
unsafe fn record_module_image_raw(module: CUmodule, raw_image: *const ::std::os::raw::c_void) {
|
unsafe fn record_module_image_raw(module: CUmodule, raw_image: *const ::std::os::raw::c_void) {
|
||||||
if *(raw_image as *const u32) == 0x464c457f {
|
if *(raw_image as *const u32) == 0x464c457f {
|
||||||
os_log!("Unsupported ELF module image: {:?}", raw_image);
|
os_log!("Unsupported ELF module image: {:?}", raw_image);
|
||||||
|
|
|
@ -1,14 +1,17 @@
|
||||||
|
use crate::cuda::CUmodule;
|
||||||
use crate::cuda::CUuuid;
|
use crate::cuda::CUuuid;
|
||||||
|
|
||||||
use super::CUresult;
|
use super::CUresult;
|
||||||
use super::Settings;
|
use super::Settings;
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
|
use std::ffi::c_void;
|
||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::io::Stderr;
|
use std::io::Stderr;
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
|
use std::str::Utf8Error;
|
||||||
|
|
||||||
const LOG_PREFIX: &[u8] = b"[ZLUDA_DUMP] ";
|
const LOG_PREFIX: &[u8] = b"[ZLUDA_DUMP] ";
|
||||||
|
|
||||||
|
@ -237,6 +240,12 @@ impl<'a> FunctionLogger<'a> {
|
||||||
self.log_queue.push(l);
|
self.log_queue.push(l);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn log_io_error(&mut self, error: io::Result<()>) {
|
||||||
|
if let Err(e) = error {
|
||||||
|
self.log_queue.push(LogEntry::IoError(e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn flush_log_queue_to_write_buffer(&mut self) {
|
fn flush_log_queue_to_write_buffer(&mut self) {
|
||||||
self.write_buffer.start_line();
|
self.write_buffer.start_line();
|
||||||
self.write_buffer.write(&self.name);
|
self.write_buffer.write(&self.name);
|
||||||
|
@ -284,6 +293,14 @@ impl<'a> Drop for FunctionLogger<'a> {
|
||||||
pub(crate) enum LogEntry {
|
pub(crate) enum LogEntry {
|
||||||
IoError(io::Error),
|
IoError(io::Error),
|
||||||
ErrorBox(Box<dyn Error>),
|
ErrorBox(Box<dyn Error>),
|
||||||
|
UnsupportedModule {
|
||||||
|
module: CUmodule,
|
||||||
|
raw_image: *const c_void,
|
||||||
|
kind: &'static str,
|
||||||
|
},
|
||||||
|
MalformedModulePath(Utf8Error),
|
||||||
|
MalformedModuleText(Utf8Error),
|
||||||
|
ModuleParsingError(usize),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Display for LogEntry {
|
impl Display for LogEntry {
|
||||||
|
@ -291,6 +308,26 @@ impl Display for LogEntry {
|
||||||
match self {
|
match self {
|
||||||
LogEntry::IoError(e) => e.fmt(f),
|
LogEntry::IoError(e) => e.fmt(f),
|
||||||
LogEntry::ErrorBox(e) => e.fmt(f),
|
LogEntry::ErrorBox(e) => e.fmt(f),
|
||||||
|
LogEntry::UnsupportedModule {
|
||||||
|
module,
|
||||||
|
raw_image,
|
||||||
|
kind,
|
||||||
|
} => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"Unsupported {} module {:?} loaded from module image {:?}",
|
||||||
|
kind, module, raw_image
|
||||||
|
)
|
||||||
|
}
|
||||||
|
LogEntry::MalformedModulePath(e) => e.fmt(f),
|
||||||
|
LogEntry::MalformedModuleText(e) => e.fmt(f),
|
||||||
|
LogEntry::ModuleParsingError(index) => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"Error parsing module, log has been written to module_{:04}.log",
|
||||||
|
index
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
166
zluda_dump/src/trace.rs
Normal file
166
zluda_dump/src/trace.rs
Normal file
|
@ -0,0 +1,166 @@
|
||||||
|
use ptx::{ast::PtxError, Token};
|
||||||
|
|
||||||
|
use crate::{cuda::CUmodule, log, Settings};
|
||||||
|
use std::{
|
||||||
|
collections::HashMap,
|
||||||
|
ffi::{c_void, CStr},
|
||||||
|
fs::{self, File},
|
||||||
|
io::{self, Read, Write},
|
||||||
|
path::PathBuf,
|
||||||
|
rc::Rc,
|
||||||
|
};
|
||||||
|
|
||||||
|
// This struct is the heart of CUDA state tracking, it:
|
||||||
|
// * receives calls from the probes about changes to CUDA state
|
||||||
|
// * records updates to the state change
|
||||||
|
// * writes out relevant state change and details to disk and log
|
||||||
|
pub(crate) struct StateTracker {
|
||||||
|
writer: DumpWriter,
|
||||||
|
modules: HashMap<CUmodule, Option<ParsedModule>>,
|
||||||
|
module_counter: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StateTracker {
|
||||||
|
pub(crate) fn new(settings: &Settings) -> Self {
|
||||||
|
StateTracker {
|
||||||
|
writer: DumpWriter::new(settings.dump_dir.clone()),
|
||||||
|
modules: HashMap::new(),
|
||||||
|
module_counter: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn record_new_module_file(
|
||||||
|
&mut self,
|
||||||
|
module: CUmodule,
|
||||||
|
file_name: *const i8,
|
||||||
|
fn_logger: &mut log::FunctionLogger,
|
||||||
|
) {
|
||||||
|
let file_name = match unsafe { CStr::from_ptr(file_name) }.to_str() {
|
||||||
|
Ok(f) => f,
|
||||||
|
Err(err) => {
|
||||||
|
fn_logger.log(log::LogEntry::MalformedModulePath(err));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let maybe_io_error = self.try_record_new_module_file(module, fn_logger, file_name);
|
||||||
|
fn_logger.log_io_error(maybe_io_error)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn try_record_new_module_file(
|
||||||
|
&mut self,
|
||||||
|
module: CUmodule,
|
||||||
|
fn_logger: &mut log::FunctionLogger,
|
||||||
|
file_name: &str,
|
||||||
|
) -> io::Result<()> {
|
||||||
|
let mut module_file = fs::File::open(file_name)?;
|
||||||
|
let mut read_buff = Vec::new();
|
||||||
|
module_file.read_to_end(&mut read_buff)?;
|
||||||
|
self.record_new_module(module, read_buff.as_ptr() as *const _, fn_logger);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn record_new_module(
|
||||||
|
&mut self,
|
||||||
|
module: CUmodule,
|
||||||
|
raw_image: *const c_void,
|
||||||
|
fn_logger: &mut log::FunctionLogger,
|
||||||
|
) {
|
||||||
|
self.module_counter += 1;
|
||||||
|
if unsafe { *(raw_image as *const [u8; 4]) } == *goblin::elf64::header::ELFMAG {
|
||||||
|
self.modules.insert(module, None);
|
||||||
|
// TODO: Parse ELF and write it to disk
|
||||||
|
fn_logger.log(log::LogEntry::UnsupportedModule {
|
||||||
|
module,
|
||||||
|
raw_image,
|
||||||
|
kind: "ELF",
|
||||||
|
})
|
||||||
|
} else if unsafe { *(raw_image as *const [u8; 8]) } == *goblin::archive::MAGIC {
|
||||||
|
self.modules.insert(module, None);
|
||||||
|
// TODO: Figure out how to get size of archive module and write it to disk
|
||||||
|
fn_logger.log(log::LogEntry::UnsupportedModule {
|
||||||
|
module,
|
||||||
|
raw_image,
|
||||||
|
kind: "archive",
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
self.record_module_ptx(module, raw_image, fn_logger)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn record_module_ptx(
|
||||||
|
&mut self,
|
||||||
|
module: CUmodule,
|
||||||
|
raw_image: *const c_void,
|
||||||
|
fn_logger: &mut log::FunctionLogger,
|
||||||
|
) {
|
||||||
|
let module_text = unsafe { CStr::from_ptr(raw_image as *const _) }.to_str();
|
||||||
|
let module_text = match module_text {
|
||||||
|
Ok(m) => m,
|
||||||
|
Err(utf8_err) => {
|
||||||
|
fn_logger.log(log::LogEntry::MalformedModuleText(utf8_err));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
fn_logger.log_io_error(self.writer.save_module(self.module_counter, module_text));
|
||||||
|
let mut errors = Vec::new();
|
||||||
|
let ast = ptx::ModuleParser::new().parse(&mut errors, module_text);
|
||||||
|
let ast = match (&*errors, ast) {
|
||||||
|
(&[], Ok(ast)) => ast,
|
||||||
|
(err_vec, res) => {
|
||||||
|
fn_logger.log(log::LogEntry::ModuleParsingError(self.module_counter));
|
||||||
|
fn_logger.log_io_error(self.writer.save_module_error_log(
|
||||||
|
self.module_counter,
|
||||||
|
err_vec,
|
||||||
|
res.err(),
|
||||||
|
));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
// TODO: store kernel names and details
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ParsedModule {
|
||||||
|
content: Rc<String>,
|
||||||
|
kernels_args: Option<HashMap<String, Vec<usize>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// This structs writes out information about CUDA execution to the dump dir
|
||||||
|
struct DumpWriter {
|
||||||
|
dump_dir: Option<PathBuf>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DumpWriter {
|
||||||
|
fn new(dump_dir: Option<PathBuf>) -> Self {
|
||||||
|
Self { dump_dir }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn save_module(&self, index: usize, text: &str) -> io::Result<()> {
|
||||||
|
let mut dump_file = match &self.dump_dir {
|
||||||
|
None => return Ok(()),
|
||||||
|
Some(d) => d.clone(),
|
||||||
|
};
|
||||||
|
dump_file.push(format!("module_{:04}.ptx", index));
|
||||||
|
let mut file = File::create(dump_file)?;
|
||||||
|
file.write_all(text.as_bytes())?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn save_module_error_log<'input>(
|
||||||
|
&self,
|
||||||
|
index: usize,
|
||||||
|
recoverable: &[ptx::ParseError<usize, Token<'input>, PtxError>],
|
||||||
|
unrecoverable: Option<ptx::ParseError<usize, Token<'input>, PtxError>>,
|
||||||
|
) -> io::Result<()> {
|
||||||
|
let mut log_file = match &self.dump_dir {
|
||||||
|
None => return Ok(()),
|
||||||
|
Some(d) => d.clone(),
|
||||||
|
};
|
||||||
|
log_file.push(format!("module_{:04}.log", index));
|
||||||
|
let mut file = File::create(log_file)?;
|
||||||
|
for err in unrecoverable.iter().chain(recoverable.iter()) {
|
||||||
|
writeln!(file, "{}", err)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue