mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-09-21 16:59:04 +00:00
Use LD_AUDIT instead of LD_PRELOAD (#508)
LD_AUDIT gives us more control that LD_PRELOAD and I've observed it to work much better
This commit is contained in:
parent
044fab47e5
commit
262c25c76e
9 changed files with 179 additions and 250 deletions
47
Cargo.lock
generated
47
Cargo.lock
generated
|
@ -414,22 +414,6 @@ dependencies = [
|
|||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ctor"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec09e802f5081de6157da9a75701d6c713d8dc3ba52571fd4bd25f412644e8a6"
|
||||
dependencies = [
|
||||
"ctor-proc-macro",
|
||||
"dtor 0.0.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ctor-proc-macro"
|
||||
version = "0.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2931af7e13dc045d8e9d26afccc6fa115d64e115c9c84b1166288b46f6782c2"
|
||||
|
||||
[[package]]
|
||||
name = "cuda_macros"
|
||||
version = "0.0.0"
|
||||
|
@ -711,30 +695,15 @@ dependencies = [
|
|||
"syn 2.0.89",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dtor"
|
||||
version = "0.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "97cbdf2ad6846025e8e25df05171abfb30e3ababa12ee0a0e44b9bbe570633a8"
|
||||
dependencies = [
|
||||
"dtor-proc-macro 0.0.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dtor"
|
||||
version = "0.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbc66182e62c4e716e2d70f97beceea0de798923f8ca48fb82aa3134dc3cae12"
|
||||
dependencies = [
|
||||
"dtor-proc-macro 0.0.6",
|
||||
"dtor-proc-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dtor-proc-macro"
|
||||
version = "0.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7454e41ff9012c00d53cf7f475c5e3afa3b91b7c90568495495e8d9bf47a1055"
|
||||
|
||||
[[package]]
|
||||
name = "dtor-proc-macro"
|
||||
version = "0.0.6"
|
||||
|
@ -3720,7 +3689,7 @@ dependencies = [
|
|||
"cuda_macros",
|
||||
"cuda_types",
|
||||
"dark_api",
|
||||
"dtor 0.0.7",
|
||||
"dtor",
|
||||
"hip_runtime-sys",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
|
@ -3824,6 +3793,10 @@ dependencies = [
|
|||
"zluda_trace",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zluda_ld"
|
||||
version = "0.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "zluda_ml"
|
||||
version = "0.0.0"
|
||||
|
@ -3834,14 +3807,6 @@ dependencies = [
|
|||
"zluda_common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zluda_preload"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"ctor",
|
||||
"unwrap_or",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zluda_redirect"
|
||||
version = "0.0.0"
|
||||
|
|
|
@ -35,8 +35,8 @@ members = [
|
|||
"zluda_trace_sparse",
|
||||
"zluda_fft",
|
||||
"zluda_inject",
|
||||
"zluda_ld",
|
||||
"zluda_ml",
|
||||
"zluda_preload",
|
||||
"zluda_redirect",
|
||||
"zluda_sparse",
|
||||
"compiler",
|
||||
|
|
|
@ -32,7 +32,7 @@ Run your application like this:
|
|||
|
||||
* Alternative method
|
||||
```
|
||||
LD_PRELOAD="<ZLUDA_DIRECTORY>/zluda_preload" <APPLICATION> <APPLICATION_ARGUMENTS>
|
||||
LD_AUDIT="<ZLUDA_DIRECTORY>/zluda_ld:$LD_AUDIT" <APPLICATION> <APPLICATION_ARGUMENTS>
|
||||
```
|
||||
|
||||
where `<ZLUDA_DIRECTORY>` is the directory which contains ZLUDA-provided `libcuda.so`: `zluda` if you downloaded a prebuilt package or `target/release` if you built from sources.
|
||||
|
|
|
@ -309,9 +309,7 @@ impl<'a, 'input> InsertMemSSAVisitor<'a, 'input> {
|
|||
match remap {
|
||||
RemapAction::PreLdPostSt { .. } => {}
|
||||
RemapAction::LDStSpaceChange {
|
||||
name,
|
||||
new_space,
|
||||
old_space,
|
||||
name, new_space, ..
|
||||
} => {
|
||||
let generic_var = self
|
||||
.resolver
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[package]
|
||||
name = "zluda_preload"
|
||||
name = "zluda_ld"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2021"
|
||||
|
@ -7,14 +7,10 @@ edition = "2021"
|
|||
[lib]
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
ctor = "0.4.3"
|
||||
unwrap_or = "1.0.1"
|
||||
|
||||
[package.metadata.zluda]
|
||||
linux_only = true
|
||||
linux_symlinks = [
|
||||
"zluda_preload",
|
||||
"trace/zluda_preload",
|
||||
"trace_nvidia/zluda_preload",
|
||||
"zluda_ld",
|
||||
"trace/zluda_ld",
|
||||
"trace_nvidia/zluda_ld",
|
||||
]
|
164
zluda_ld/src/lib.rs
Normal file
164
zluda_ld/src/lib.rs
Normal file
|
@ -0,0 +1,164 @@
|
|||
use std::{
|
||||
ffi::{c_char, c_int, c_long, c_uint, c_void, CStr},
|
||||
mem,
|
||||
path::PathBuf,
|
||||
sync::{LazyLock, Mutex},
|
||||
};
|
||||
|
||||
unsafe extern "C" {
|
||||
fn dladdr(addr: *const c_void, info: *mut DLInfo) -> c_int;
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
struct DLInfo {
|
||||
dli_fname: *const c_char,
|
||||
dli_fbase: *mut c_void,
|
||||
dli_sname: *const c_char,
|
||||
dli_saddr: *mut c_void,
|
||||
}
|
||||
|
||||
static FILES_FOR_REDIRECT: [&'static str; 14] = [
|
||||
"libcublas.so.12",
|
||||
"libcublas.so",
|
||||
"libcublasLt.so.12",
|
||||
"libcublasLt.so",
|
||||
"libcuda.so.1",
|
||||
"libcuda.so",
|
||||
"libcudnn.so.9",
|
||||
"libcudnn.so",
|
||||
"libcufft.so.11",
|
||||
"libcufft.so",
|
||||
"libcusparse.so.12",
|
||||
"libcusparse.so",
|
||||
"libnvidia-ml.so.1",
|
||||
"libnvidia-ml.so",
|
||||
];
|
||||
|
||||
// Global state, caching some computations that would be otherwise repeated
|
||||
struct GlobalState {
|
||||
// The full paths of the file names from `FILES_FOR_REDIRECT` that will be used for redirection
|
||||
replacement_paths: Option<[Vec<u8>; FILES_FOR_REDIRECT.len()]>,
|
||||
// List of cookies saved for each redirected file, to avoid self-redirecting
|
||||
// when e.g. zluda_trace_blas (libcuda.so) tries to load the real libcublas.so
|
||||
cookies: Mutex<[usize; FILES_FOR_REDIRECT.len() / 2]>,
|
||||
}
|
||||
|
||||
static GLOBAL_STATE: LazyLock<GlobalState> = LazyLock::new(|| {
|
||||
let mut self_dlinfo = unsafe { mem::zeroed::<DLInfo>() };
|
||||
let replacement_paths = if unsafe { dladdr(la_version as _, &mut self_dlinfo) } != 0 {
|
||||
unsafe { CStr::from_ptr(self_dlinfo.dli_fname) }
|
||||
.to_str()
|
||||
.ok()
|
||||
.and_then(|path| {
|
||||
let mut pathbuf = PathBuf::from(path);
|
||||
if !pathbuf.pop() {
|
||||
return None;
|
||||
}
|
||||
Some(FILES_FOR_REDIRECT.map(|file| {
|
||||
let mut buffer = pathbuf.join(file).into_os_string().into_encoded_bytes();
|
||||
buffer.push(0);
|
||||
buffer
|
||||
}))
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
GlobalState {
|
||||
replacement_paths,
|
||||
cookies: Mutex::new([0; FILES_FOR_REDIRECT.len() / 2]),
|
||||
}
|
||||
});
|
||||
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn la_version(_: std::ffi::c_uint) -> std::ffi::c_uint {
|
||||
const LAV_CURRENT: u32 = 2;
|
||||
LAV_CURRENT
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn la_objsearch(
|
||||
name: *const c_char,
|
||||
cookie: *mut usize,
|
||||
_flags: std::ffi::c_uint,
|
||||
) -> *const c_char {
|
||||
match la_objsearch_impl(name, cookie) {
|
||||
Some(new_name) => new_name.as_ptr().cast(),
|
||||
None => name,
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn la_objsearch_impl(
|
||||
name: *const c_char,
|
||||
requesting_cookie: *mut usize,
|
||||
) -> Option<&'static [u8]> {
|
||||
let GlobalState {
|
||||
replacement_paths,
|
||||
cookies,
|
||||
} = &*GLOBAL_STATE;
|
||||
let requesting_cookie = requesting_cookie as usize;
|
||||
let input_path = CStr::from_ptr(name).to_str().ok()?;
|
||||
let replacement_paths = replacement_paths.as_ref()?;
|
||||
let index = FILES_FOR_REDIRECT
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.find_map(|(index, file)| {
|
||||
if input_path.ends_with(file) {
|
||||
Some(index)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})?;
|
||||
let known_cookie = { cookies.lock().ok()?[index / 2] };
|
||||
if known_cookie == requesting_cookie {
|
||||
return None;
|
||||
}
|
||||
Some(&*replacement_paths[index])
|
||||
}
|
||||
|
||||
type Lmid = c_long;
|
||||
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn la_objopen(map: *mut link_map, _lmid: Lmid, cookie: *mut usize) -> c_uint {
|
||||
save_cookie(map, cookie);
|
||||
0
|
||||
}
|
||||
|
||||
unsafe fn save_cookie(map: *mut link_map, cookie: *mut usize) -> Option<()> {
|
||||
let map = map.as_ref()?;
|
||||
let obj_name = CStr::from_ptr(map.l_name).to_str().ok()?;
|
||||
let index = FILES_FOR_REDIRECT
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.find_map(|(index, file)| {
|
||||
if obj_name.ends_with(file) {
|
||||
Some(index / 2)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})?;
|
||||
GLOBAL_STATE
|
||||
.cookies
|
||||
.lock()
|
||||
.ok()?
|
||||
.get_mut(index)
|
||||
.map(|saved_cookie| {
|
||||
*saved_cookie = cookie as usize;
|
||||
})
|
||||
}
|
||||
|
||||
// Public portion of glibc's struct link_map. Additional private fields omitted.
|
||||
#[allow(non_camel_case_types)]
|
||||
#[repr(C)]
|
||||
struct link_map {
|
||||
/// Difference between the address in the ELF file and the address in memory (load bias)
|
||||
l_addr: usize,
|
||||
/// Absolute pathname where object was found
|
||||
l_name: *const c_char,
|
||||
/// Dynamic section of the shared object (opaque to us)
|
||||
l_ld: *mut c_void,
|
||||
/// Next object in the loaded objects chain
|
||||
l_next: *mut link_map,
|
||||
/// Previous object in the loaded objects chain
|
||||
l_prev: *mut link_map,
|
||||
// Additional private / internal glibc fields follow in the real definition.
|
||||
}
|
|
@ -1,21 +0,0 @@
|
|||
This crate is a last resort Linux-specific solution.
|
||||
Most of the time we can inject ourselves into a process by having users
|
||||
set `LD_LIBRARY_PATH`.
|
||||
Unfortunately, there is software out there which dynamically links to CUDA and
|
||||
CUDA performance libraries using RPATH. On Linux, dynamic linker operates
|
||||
using approximately this algorithm:
|
||||
* If path contains `/` treat the name as a (possibly relative) path and just use it
|
||||
* Otherwise return the first that succeeds:
|
||||
* Library with this name already loaded into the process
|
||||
* Try paths in `DT_RPATH` (if `DT_RUNPATH` is not present)
|
||||
* Try paths in `LD_LIBRARY_PATH`
|
||||
* Try paths in `DT_RUNPATH`
|
||||
* Try system paths
|
||||
|
||||
In order to defeat `DT_RPATH` this library needs to be preloaded with `LD_PRELOAD`.
|
||||
On initialization we also preload all the performance libraries. We also hijack
|
||||
`dlopen` and on every call to `dlopen` that tries to open a CUDA library we
|
||||
redirect it to our libraries
|
||||
|
||||
We also expose `zluda_dlopen_noredirect` for the purpose of tracing libraries
|
||||
so they can load real underlying library and not just get redirected to themselves
|
|
@ -1,150 +0,0 @@
|
|||
use std::{
|
||||
ffi::{c_char, c_int, c_void, CStr},
|
||||
mem,
|
||||
path::PathBuf,
|
||||
ptr::{self, NonNull},
|
||||
sync::LazyLock,
|
||||
};
|
||||
use unwrap_or::unwrap_some_or;
|
||||
|
||||
// Definition takes from `libc` crate:
|
||||
// https://github.com/rust-lang/libc/blob/cf82fdf3f22ccfa98ba120efc50d5f39ab2d52ff/src/unix/linux_like/linux/mod.rs#L2682
|
||||
const RTLD_NEXT: *mut c_void = -1isize as _;
|
||||
|
||||
unsafe extern "C" {
|
||||
fn dlsym(handle: *mut c_void, symbol: *const c_char) -> *mut c_void;
|
||||
fn dladdr(addr: *const c_void, info: *mut DLInfo) -> c_int;
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
struct DLInfo {
|
||||
dli_fname: *const c_char,
|
||||
dli_fbase: *mut c_void,
|
||||
dli_sname: *const c_char,
|
||||
dli_saddr: *mut c_void,
|
||||
}
|
||||
|
||||
static FILES_FOR_REDIRECT: [&'static str; 14] = [
|
||||
"libcublas.so",
|
||||
"libcublas.so.12",
|
||||
"libcublasLt.so",
|
||||
"libcublasLt.so.12",
|
||||
"libcuda.so",
|
||||
"libcuda.so.1",
|
||||
"libcudnn.so",
|
||||
"libcudnn.so.9",
|
||||
"libcufft.so",
|
||||
"libcufft.so.11",
|
||||
"libcusparse.so",
|
||||
"libcusparse.so.12",
|
||||
"libnvidia-ml.so",
|
||||
"libnvidia-ml.so.1",
|
||||
];
|
||||
|
||||
// Global state, caching some computations that would be otherwise repeated on every `dlopen`
|
||||
struct GlobalState {
|
||||
/// The original `dlopen` implementation from libdl.
|
||||
dlopen_next: Option<unsafe extern "C" fn(*const c_char, c_int) -> DlopenResult>,
|
||||
/// The full paths of the file names from `FILES_FOR_REDIRECT` that will be used for redirection
|
||||
replacement_paths: Option<[Vec<u8>; FILES_FOR_REDIRECT.len()]>,
|
||||
}
|
||||
|
||||
static GLOBAL_STATE: LazyLock<GlobalState> = LazyLock::new(|| {
|
||||
let dlopen_next = unsafe { mem::transmute(dlsym(RTLD_NEXT, c"dlopen".as_ptr())) };
|
||||
let mut self_dlinfo = unsafe { mem::zeroed::<DLInfo>() };
|
||||
let replacement_paths = if unsafe { dladdr(dlopen as _, &mut self_dlinfo) } != 0 {
|
||||
unsafe { CStr::from_ptr(self_dlinfo.dli_fname) }
|
||||
.to_str()
|
||||
.ok()
|
||||
.and_then(|path| {
|
||||
let mut pathbuf = PathBuf::from(path);
|
||||
if !pathbuf.pop() {
|
||||
return None;
|
||||
}
|
||||
Some(FILES_FOR_REDIRECT.map(|file| {
|
||||
let mut buffer = pathbuf.join(file).into_os_string().into_encoded_bytes();
|
||||
buffer.push(0);
|
||||
buffer
|
||||
}))
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
GlobalState {
|
||||
dlopen_next,
|
||||
replacement_paths,
|
||||
}
|
||||
});
|
||||
|
||||
pub const RTLD_GLOBAL: c_int = 0x100;
|
||||
pub const RTLD_LAZY: c_int = 1;
|
||||
|
||||
#[ctor::ctor]
|
||||
unsafe fn ctor() {
|
||||
let GlobalState {
|
||||
dlopen_next,
|
||||
replacement_paths,
|
||||
} = &*GLOBAL_STATE;
|
||||
let dlopen_next = unwrap_some_or!(dlopen_next, return);
|
||||
let replacement_paths = unwrap_some_or!(replacement_paths, return);
|
||||
// We preload the paths to the files we want to redirect, because
|
||||
// * We don't control dynamic linking when loading dependencies. We hijack
|
||||
// dlopen, but that only works if the dependency has been explicitly
|
||||
// loaded with dlopen. It does not intercept the loading of the dependencies
|
||||
// * The first step that dynamic linker does is check if the file is already
|
||||
// loaded
|
||||
for replacement in replacement_paths.into_iter() {
|
||||
dlopen_next(replacement.as_ptr().cast(), RTLD_GLOBAL | RTLD_LAZY).ok();
|
||||
}
|
||||
}
|
||||
|
||||
type DlopenResult = Result<NonNull<c_void>, ()>;
|
||||
|
||||
const _: fn() = || {
|
||||
let _ = std::mem::transmute::<*mut c_void, DlopenResult>;
|
||||
};
|
||||
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn dlopen(filename: *const c_char, flags: c_int) -> DlopenResult {
|
||||
let GlobalState {
|
||||
dlopen_next,
|
||||
replacement_paths,
|
||||
} = &*GLOBAL_STATE;
|
||||
let dlopen_next = dlopen_next.ok_or(())?;
|
||||
dlopen_redirect(dlopen_next, replacement_paths, filename, flags)
|
||||
.or_else(|| dlopen_next(filename, flags).ok())
|
||||
.ok_or(())
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn zluda_dlopen_noredirect(
|
||||
filename: *const c_char,
|
||||
flags: c_int,
|
||||
) -> DlopenResult {
|
||||
let dlopen_next = GLOBAL_STATE.dlopen_next.ok_or(())?;
|
||||
dlopen_next(filename, flags)
|
||||
}
|
||||
|
||||
unsafe fn dlopen_redirect<'a>(
|
||||
dlopen_next: unsafe extern "C" fn(*const c_char, c_int) -> DlopenResult,
|
||||
replacement_paths: &'a Option<[Vec<u8>; FILES_FOR_REDIRECT.len()]>,
|
||||
input_path: *const c_char,
|
||||
flags: c_int,
|
||||
) -> Option<NonNull<c_void>> {
|
||||
if input_path == ptr::null() {
|
||||
return None;
|
||||
}
|
||||
let input_path = CStr::from_ptr(input_path).to_str().ok()?;
|
||||
let replacement_paths = replacement_paths.as_ref()?;
|
||||
let replacement_path = FILES_FOR_REDIRECT
|
||||
.into_iter()
|
||||
.zip(replacement_paths.into_iter())
|
||||
.find_map(|(file, path)| {
|
||||
if input_path.ends_with(file) {
|
||||
Some(path)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})?;
|
||||
unsafe { dlopen_next(replacement_path.as_ptr() as _, flags) }.ok()
|
||||
}
|
|
@ -47,9 +47,8 @@ pub fn dlopen_local_noredirect<'a>(
|
|||
|
||||
#[cfg(unix)]
|
||||
pub(crate) mod os {
|
||||
use libc::{c_char, c_int};
|
||||
use libloading::os;
|
||||
use std::{borrow::Cow, ffi::c_void, mem};
|
||||
use std::borrow::Cow;
|
||||
|
||||
pub fn open_driver() -> Result<libloading::Library, libloading::Error> {
|
||||
unsafe {
|
||||
|
@ -67,29 +66,7 @@ pub(crate) mod os {
|
|||
pub unsafe fn dlopen_local_noredirect<'a>(
|
||||
path: Cow<'a, str>,
|
||||
) -> Result<libloading::Library, libloading::Error> {
|
||||
fn terminate_with_nul<'a>(path: Cow<'a, str>) -> Cow<'a, str> {
|
||||
let path = if !path.ends_with('\0') {
|
||||
let mut path = path.into_owned();
|
||||
path.push('\0');
|
||||
Cow::Owned(path)
|
||||
} else {
|
||||
path
|
||||
};
|
||||
path
|
||||
}
|
||||
let zluda_dlopen_noredirect =
|
||||
unsafe { libc::dlsym(libc::RTLD_DEFAULT, c"zluda_dlopen_noredirect".as_ptr()) };
|
||||
let zluda_dlopen_noredirect = mem::transmute::<
|
||||
_,
|
||||
Option<unsafe extern "C" fn(*const c_char, c_int) -> *mut c_void>,
|
||||
>(zluda_dlopen_noredirect);
|
||||
let dlopen = zluda_dlopen_noredirect.unwrap_or(libc::dlopen);
|
||||
let path = terminate_with_nul(path);
|
||||
Ok(libloading::os::unix::Library::from_raw(dlopen(
|
||||
path.as_ptr().cast(),
|
||||
os::unix::RTLD_LOCAL | os::unix::RTLD_LAZY,
|
||||
))
|
||||
.into())
|
||||
libloading::Library::new(&*path)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue