diff --git a/Cargo.lock b/Cargo.lock index a6ded6c..3f883dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -414,22 +414,6 @@ dependencies = [ "typenum", ] -[[package]] -name = "ctor" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec09e802f5081de6157da9a75701d6c713d8dc3ba52571fd4bd25f412644e8a6" -dependencies = [ - "ctor-proc-macro", - "dtor 0.0.6", -] - -[[package]] -name = "ctor-proc-macro" -version = "0.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2931af7e13dc045d8e9d26afccc6fa115d64e115c9c84b1166288b46f6782c2" - [[package]] name = "cuda_macros" version = "0.0.0" @@ -711,30 +695,15 @@ dependencies = [ "syn 2.0.89", ] -[[package]] -name = "dtor" -version = "0.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97cbdf2ad6846025e8e25df05171abfb30e3ababa12ee0a0e44b9bbe570633a8" -dependencies = [ - "dtor-proc-macro 0.0.5", -] - [[package]] name = "dtor" version = "0.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbc66182e62c4e716e2d70f97beceea0de798923f8ca48fb82aa3134dc3cae12" dependencies = [ - "dtor-proc-macro 0.0.6", + "dtor-proc-macro", ] -[[package]] -name = "dtor-proc-macro" -version = "0.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7454e41ff9012c00d53cf7f475c5e3afa3b91b7c90568495495e8d9bf47a1055" - [[package]] name = "dtor-proc-macro" version = "0.0.6" @@ -3720,7 +3689,7 @@ dependencies = [ "cuda_macros", "cuda_types", "dark_api", - "dtor 0.0.7", + "dtor", "hip_runtime-sys", "lazy_static", "libc", @@ -3824,6 +3793,10 @@ dependencies = [ "zluda_trace", ] +[[package]] +name = "zluda_ld" +version = "0.0.0" + [[package]] name = "zluda_ml" version = "0.0.0" @@ -3834,14 +3807,6 @@ dependencies = [ "zluda_common", ] -[[package]] -name = "zluda_preload" -version = "0.0.0" -dependencies = [ - "ctor", - "unwrap_or", -] - [[package]] name = "zluda_redirect" version = "0.0.0" diff --git a/Cargo.toml b/Cargo.toml index d6fa904..ca051ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,8 +35,8 @@ members = [ "zluda_trace_sparse", "zluda_fft", "zluda_inject", + "zluda_ld", "zluda_ml", - "zluda_preload", "zluda_redirect", "zluda_sparse", "compiler", diff --git a/docs/src/quick_start.md b/docs/src/quick_start.md index 9f15ebe..e3ac164 100644 --- a/docs/src/quick_start.md +++ b/docs/src/quick_start.md @@ -32,7 +32,7 @@ Run your application like this: * Alternative method ``` - LD_PRELOAD="/zluda_preload" + LD_AUDIT="/zluda_ld:$LD_AUDIT" ``` where `` is the directory which contains ZLUDA-provided `libcuda.so`: `zluda` if you downloaded a prebuilt package or `target/release` if you built from sources. diff --git a/ptx/src/pass/insert_explicit_load_store.rs b/ptx/src/pass/insert_explicit_load_store.rs index 2805dfa..3350a82 100644 --- a/ptx/src/pass/insert_explicit_load_store.rs +++ b/ptx/src/pass/insert_explicit_load_store.rs @@ -309,9 +309,7 @@ impl<'a, 'input> InsertMemSSAVisitor<'a, 'input> { match remap { RemapAction::PreLdPostSt { .. } => {} RemapAction::LDStSpaceChange { - name, - new_space, - old_space, + name, new_space, .. } => { let generic_var = self .resolver diff --git a/zluda_preload/Cargo.toml b/zluda_ld/Cargo.toml similarity index 54% rename from zluda_preload/Cargo.toml rename to zluda_ld/Cargo.toml index ea9fa71..a319896 100644 --- a/zluda_preload/Cargo.toml +++ b/zluda_ld/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "zluda_preload" +name = "zluda_ld" version = "0.0.0" authors = ["Andrzej Janik "] edition = "2021" @@ -7,14 +7,10 @@ edition = "2021" [lib] crate-type = ["cdylib"] -[dependencies] -ctor = "0.4.3" -unwrap_or = "1.0.1" - [package.metadata.zluda] linux_only = true linux_symlinks = [ - "zluda_preload", - "trace/zluda_preload", - "trace_nvidia/zluda_preload", + "zluda_ld", + "trace/zluda_ld", + "trace_nvidia/zluda_ld", ] diff --git a/zluda_ld/src/lib.rs b/zluda_ld/src/lib.rs new file mode 100644 index 0000000..110c0b3 --- /dev/null +++ b/zluda_ld/src/lib.rs @@ -0,0 +1,164 @@ +use std::{ + ffi::{c_char, c_int, c_long, c_uint, c_void, CStr}, + mem, + path::PathBuf, + sync::{LazyLock, Mutex}, +}; + +unsafe extern "C" { + fn dladdr(addr: *const c_void, info: *mut DLInfo) -> c_int; +} + +#[repr(C)] +struct DLInfo { + dli_fname: *const c_char, + dli_fbase: *mut c_void, + dli_sname: *const c_char, + dli_saddr: *mut c_void, +} + +static FILES_FOR_REDIRECT: [&'static str; 14] = [ + "libcublas.so.12", + "libcublas.so", + "libcublasLt.so.12", + "libcublasLt.so", + "libcuda.so.1", + "libcuda.so", + "libcudnn.so.9", + "libcudnn.so", + "libcufft.so.11", + "libcufft.so", + "libcusparse.so.12", + "libcusparse.so", + "libnvidia-ml.so.1", + "libnvidia-ml.so", +]; + +// Global state, caching some computations that would be otherwise repeated +struct GlobalState { + // The full paths of the file names from `FILES_FOR_REDIRECT` that will be used for redirection + replacement_paths: Option<[Vec; FILES_FOR_REDIRECT.len()]>, + // List of cookies saved for each redirected file, to avoid self-redirecting + // when e.g. zluda_trace_blas (libcuda.so) tries to load the real libcublas.so + cookies: Mutex<[usize; FILES_FOR_REDIRECT.len() / 2]>, +} + +static GLOBAL_STATE: LazyLock = LazyLock::new(|| { + let mut self_dlinfo = unsafe { mem::zeroed::() }; + let replacement_paths = if unsafe { dladdr(la_version as _, &mut self_dlinfo) } != 0 { + unsafe { CStr::from_ptr(self_dlinfo.dli_fname) } + .to_str() + .ok() + .and_then(|path| { + let mut pathbuf = PathBuf::from(path); + if !pathbuf.pop() { + return None; + } + Some(FILES_FOR_REDIRECT.map(|file| { + let mut buffer = pathbuf.join(file).into_os_string().into_encoded_bytes(); + buffer.push(0); + buffer + })) + }) + } else { + None + }; + GlobalState { + replacement_paths, + cookies: Mutex::new([0; FILES_FOR_REDIRECT.len() / 2]), + } +}); + +#[no_mangle] +unsafe extern "C" fn la_version(_: std::ffi::c_uint) -> std::ffi::c_uint { + const LAV_CURRENT: u32 = 2; + LAV_CURRENT +} + +#[no_mangle] +unsafe extern "C" fn la_objsearch( + name: *const c_char, + cookie: *mut usize, + _flags: std::ffi::c_uint, +) -> *const c_char { + match la_objsearch_impl(name, cookie) { + Some(new_name) => new_name.as_ptr().cast(), + None => name, + } +} + +unsafe fn la_objsearch_impl( + name: *const c_char, + requesting_cookie: *mut usize, +) -> Option<&'static [u8]> { + let GlobalState { + replacement_paths, + cookies, + } = &*GLOBAL_STATE; + let requesting_cookie = requesting_cookie as usize; + let input_path = CStr::from_ptr(name).to_str().ok()?; + let replacement_paths = replacement_paths.as_ref()?; + let index = FILES_FOR_REDIRECT + .into_iter() + .enumerate() + .find_map(|(index, file)| { + if input_path.ends_with(file) { + Some(index) + } else { + None + } + })?; + let known_cookie = { cookies.lock().ok()?[index / 2] }; + if known_cookie == requesting_cookie { + return None; + } + Some(&*replacement_paths[index]) +} + +type Lmid = c_long; + +#[no_mangle] +unsafe extern "C" fn la_objopen(map: *mut link_map, _lmid: Lmid, cookie: *mut usize) -> c_uint { + save_cookie(map, cookie); + 0 +} + +unsafe fn save_cookie(map: *mut link_map, cookie: *mut usize) -> Option<()> { + let map = map.as_ref()?; + let obj_name = CStr::from_ptr(map.l_name).to_str().ok()?; + let index = FILES_FOR_REDIRECT + .into_iter() + .enumerate() + .find_map(|(index, file)| { + if obj_name.ends_with(file) { + Some(index / 2) + } else { + None + } + })?; + GLOBAL_STATE + .cookies + .lock() + .ok()? + .get_mut(index) + .map(|saved_cookie| { + *saved_cookie = cookie as usize; + }) +} + +// Public portion of glibc's struct link_map. Additional private fields omitted. +#[allow(non_camel_case_types)] +#[repr(C)] +struct link_map { + /// Difference between the address in the ELF file and the address in memory (load bias) + l_addr: usize, + /// Absolute pathname where object was found + l_name: *const c_char, + /// Dynamic section of the shared object (opaque to us) + l_ld: *mut c_void, + /// Next object in the loaded objects chain + l_next: *mut link_map, + /// Previous object in the loaded objects chain + l_prev: *mut link_map, + // Additional private / internal glibc fields follow in the real definition. +} diff --git a/zluda_preload/README.md b/zluda_preload/README.md deleted file mode 100644 index d5b28a4..0000000 --- a/zluda_preload/README.md +++ /dev/null @@ -1,21 +0,0 @@ -This crate is a last resort Linux-specific solution. -Most of the time we can inject ourselves into a process by having users -set `LD_LIBRARY_PATH`. -Unfortunately, there is software out there which dynamically links to CUDA and -CUDA performance libraries using RPATH. On Linux, dynamic linker operates -using approximately this algorithm: -* If path contains `/` treat the name as a (possibly relative) path and just use it -* Otherwise return the first that succeeds: - * Library with this name already loaded into the process - * Try paths in `DT_RPATH` (if `DT_RUNPATH` is not present) - * Try paths in `LD_LIBRARY_PATH` - * Try paths in `DT_RUNPATH` - * Try system paths - -In order to defeat `DT_RPATH` this library needs to be preloaded with `LD_PRELOAD`. -On initialization we also preload all the performance libraries. We also hijack -`dlopen` and on every call to `dlopen` that tries to open a CUDA library we -redirect it to our libraries - -We also expose `zluda_dlopen_noredirect` for the purpose of tracing libraries -so they can load real underlying library and not just get redirected to themselves diff --git a/zluda_preload/src/lib.rs b/zluda_preload/src/lib.rs deleted file mode 100644 index 5e57be2..0000000 --- a/zluda_preload/src/lib.rs +++ /dev/null @@ -1,150 +0,0 @@ -use std::{ - ffi::{c_char, c_int, c_void, CStr}, - mem, - path::PathBuf, - ptr::{self, NonNull}, - sync::LazyLock, -}; -use unwrap_or::unwrap_some_or; - -// Definition takes from `libc` crate: -// https://github.com/rust-lang/libc/blob/cf82fdf3f22ccfa98ba120efc50d5f39ab2d52ff/src/unix/linux_like/linux/mod.rs#L2682 -const RTLD_NEXT: *mut c_void = -1isize as _; - -unsafe extern "C" { - fn dlsym(handle: *mut c_void, symbol: *const c_char) -> *mut c_void; - fn dladdr(addr: *const c_void, info: *mut DLInfo) -> c_int; -} - -#[repr(C)] -struct DLInfo { - dli_fname: *const c_char, - dli_fbase: *mut c_void, - dli_sname: *const c_char, - dli_saddr: *mut c_void, -} - -static FILES_FOR_REDIRECT: [&'static str; 14] = [ - "libcublas.so", - "libcublas.so.12", - "libcublasLt.so", - "libcublasLt.so.12", - "libcuda.so", - "libcuda.so.1", - "libcudnn.so", - "libcudnn.so.9", - "libcufft.so", - "libcufft.so.11", - "libcusparse.so", - "libcusparse.so.12", - "libnvidia-ml.so", - "libnvidia-ml.so.1", -]; - -// Global state, caching some computations that would be otherwise repeated on every `dlopen` -struct GlobalState { - /// The original `dlopen` implementation from libdl. - dlopen_next: Option DlopenResult>, - /// The full paths of the file names from `FILES_FOR_REDIRECT` that will be used for redirection - replacement_paths: Option<[Vec; FILES_FOR_REDIRECT.len()]>, -} - -static GLOBAL_STATE: LazyLock = LazyLock::new(|| { - let dlopen_next = unsafe { mem::transmute(dlsym(RTLD_NEXT, c"dlopen".as_ptr())) }; - let mut self_dlinfo = unsafe { mem::zeroed::() }; - let replacement_paths = if unsafe { dladdr(dlopen as _, &mut self_dlinfo) } != 0 { - unsafe { CStr::from_ptr(self_dlinfo.dli_fname) } - .to_str() - .ok() - .and_then(|path| { - let mut pathbuf = PathBuf::from(path); - if !pathbuf.pop() { - return None; - } - Some(FILES_FOR_REDIRECT.map(|file| { - let mut buffer = pathbuf.join(file).into_os_string().into_encoded_bytes(); - buffer.push(0); - buffer - })) - }) - } else { - None - }; - GlobalState { - dlopen_next, - replacement_paths, - } -}); - -pub const RTLD_GLOBAL: c_int = 0x100; -pub const RTLD_LAZY: c_int = 1; - -#[ctor::ctor] -unsafe fn ctor() { - let GlobalState { - dlopen_next, - replacement_paths, - } = &*GLOBAL_STATE; - let dlopen_next = unwrap_some_or!(dlopen_next, return); - let replacement_paths = unwrap_some_or!(replacement_paths, return); - // We preload the paths to the files we want to redirect, because - // * We don't control dynamic linking when loading dependencies. We hijack - // dlopen, but that only works if the dependency has been explicitly - // loaded with dlopen. It does not intercept the loading of the dependencies - // * The first step that dynamic linker does is check if the file is already - // loaded - for replacement in replacement_paths.into_iter() { - dlopen_next(replacement.as_ptr().cast(), RTLD_GLOBAL | RTLD_LAZY).ok(); - } -} - -type DlopenResult = Result, ()>; - -const _: fn() = || { - let _ = std::mem::transmute::<*mut c_void, DlopenResult>; -}; - -#[no_mangle] -unsafe extern "C" fn dlopen(filename: *const c_char, flags: c_int) -> DlopenResult { - let GlobalState { - dlopen_next, - replacement_paths, - } = &*GLOBAL_STATE; - let dlopen_next = dlopen_next.ok_or(())?; - dlopen_redirect(dlopen_next, replacement_paths, filename, flags) - .or_else(|| dlopen_next(filename, flags).ok()) - .ok_or(()) -} - -#[no_mangle] -unsafe extern "C" fn zluda_dlopen_noredirect( - filename: *const c_char, - flags: c_int, -) -> DlopenResult { - let dlopen_next = GLOBAL_STATE.dlopen_next.ok_or(())?; - dlopen_next(filename, flags) -} - -unsafe fn dlopen_redirect<'a>( - dlopen_next: unsafe extern "C" fn(*const c_char, c_int) -> DlopenResult, - replacement_paths: &'a Option<[Vec; FILES_FOR_REDIRECT.len()]>, - input_path: *const c_char, - flags: c_int, -) -> Option> { - if input_path == ptr::null() { - return None; - } - let input_path = CStr::from_ptr(input_path).to_str().ok()?; - let replacement_paths = replacement_paths.as_ref()?; - let replacement_path = FILES_FOR_REDIRECT - .into_iter() - .zip(replacement_paths.into_iter()) - .find_map(|(file, path)| { - if input_path.ends_with(file) { - Some(path) - } else { - None - } - })?; - unsafe { dlopen_next(replacement_path.as_ptr() as _, flags) }.ok() -} diff --git a/zluda_trace_common/src/lib.rs b/zluda_trace_common/src/lib.rs index b7ef00d..547e982 100644 --- a/zluda_trace_common/src/lib.rs +++ b/zluda_trace_common/src/lib.rs @@ -47,9 +47,8 @@ pub fn dlopen_local_noredirect<'a>( #[cfg(unix)] pub(crate) mod os { - use libc::{c_char, c_int}; use libloading::os; - use std::{borrow::Cow, ffi::c_void, mem}; + use std::borrow::Cow; pub fn open_driver() -> Result { unsafe { @@ -67,29 +66,7 @@ pub(crate) mod os { pub unsafe fn dlopen_local_noredirect<'a>( path: Cow<'a, str>, ) -> Result { - fn terminate_with_nul<'a>(path: Cow<'a, str>) -> Cow<'a, str> { - let path = if !path.ends_with('\0') { - let mut path = path.into_owned(); - path.push('\0'); - Cow::Owned(path) - } else { - path - }; - path - } - let zluda_dlopen_noredirect = - unsafe { libc::dlsym(libc::RTLD_DEFAULT, c"zluda_dlopen_noredirect".as_ptr()) }; - let zluda_dlopen_noredirect = mem::transmute::< - _, - Option *mut c_void>, - >(zluda_dlopen_noredirect); - let dlopen = zluda_dlopen_noredirect.unwrap_or(libc::dlopen); - let path = terminate_with_nul(path); - Ok(libloading::os::unix::Library::from_raw(dlopen( - path.as_ptr().cast(), - os::unix::RTLD_LOCAL | os::unix::RTLD_LAZY, - )) - .into()) + libloading::Library::new(&*path) } }