Do a better job in zluda_trace when saving opaque ELF binaries (#486)
Some checks failed
ZLUDA / Build (Linux) (push) Has been cancelled
ZLUDA / Build (Windows) (push) Has been cancelled
ZLUDA / Build AMD GPU unit tests (push) Has been cancelled
ZLUDA / Run AMD GPU unit tests (push) Has been cancelled

This commit is contained in:
Andrzej Janik 2025-08-29 03:23:25 +02:00 committed by GitHub
commit 9d4f1699d0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 167 additions and 17 deletions

14
Cargo.lock generated
View file

@ -1748,9 +1748,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "goblin"
version = "0.4.3"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32401e89c6446dcd28185931a01b1093726d0356820ac744023e6850689bf926"
checksum = "daa0a64d21a7eb230583b4c5f4e23b7e4e57974f96620f42a7e75e08ae66d745"
dependencies = [
"log",
"plain",
@ -2818,22 +2818,22 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "scroll"
version = "0.10.2"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fda28d4b4830b807a8b43f7b0e6b5df875311b3e7621d84577188c175b6ec1ec"
checksum = "6ab8598aa408498679922eff7fa985c25d58a90771bd6be794434c5277eab1a6"
dependencies = [
"scroll_derive",
]
[[package]]
name = "scroll_derive"
version = "0.10.5"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aaaae8f38bb311444cfb7f1979af0bc9240d95795f75f9ceddf6a59b79ceffa0"
checksum = "1783eabc414609e28a5ba76aee5ddd52199f7107a0b24c2e9746a1ecc34a683d"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
"syn 2.0.89",
]
[[package]]

View file

@ -18,7 +18,7 @@ regex = "1.4"
dynasm = "1.2"
dynasmrt = "1.2"
# we don't need elf32, but goblin has a bug where elf64 does not build without elf32
goblin = { version = "0.4", default-features = false, features = ["elf64", "elf32", "archive"] }
goblin = { version = "0.9", default-features = false, features = ["elf64", "elf32", "archive"] }
paste = "1.0"
cuda_macros = { path = "../cuda_macros" }
cuda_types = { path = "../cuda_types" }

View file

@ -1451,11 +1451,13 @@ pub(crate) fn cuLibraryLoadData_Post(
_library_option_values: *mut *mut ::core::ffi::c_void,
_num_library_options: ::core::ffi::c_uint,
state: &mut trace::StateTracker,
_fn_logger: &mut FnCallLog,
fn_logger: &mut FnCallLog,
_result: CUresult,
) {
// TODO: this is not great, the lifetime of `code` is not guaranteed to be 'static
state
.libraries
.insert(unsafe { *library }, trace::CodePointer(code));
// TODO: this is not correct, but it's enough for now, we just want to
// save the binary to disk
state.record_new_module(unsafe { CUmodule((*library).0.cast()) }, code, fn_logger);
}

View file

@ -9,12 +9,14 @@ use cuda_types::{
use dark_api::fatbin::{
decompress_lz4, decompress_zstd, Fatbin, FatbinFileIterator, FatbinSubmodule,
};
use goblin::{elf, elf32, elf64};
use rustc_hash::{FxHashMap, FxHashSet};
use std::{
borrow::Cow,
ffi::{c_void, CStr, CString},
fs::{self, File},
io::{self, Read, Write},
mem,
path::PathBuf,
};
use unwrap_or::unwrap_some_or;
@ -121,14 +123,20 @@ impl StateTracker {
fn_logger: &mut FnCallLog,
) {
self.module_counter += 1;
if unsafe { *(raw_image as *const [u8; 4]) } == *goblin::elf64::header::ELFMAG {
if unsafe { *(raw_image as *const [u8; 4]) } == *elf64::header::ELFMAG {
self.saved_modules.insert(module);
// TODO: Parse ELF and write it to disk
fn_logger.log(log::ErrorEntry::UnsupportedModule {
module,
raw_image,
kind: "ELF",
})
match unsafe { get_elf_size(raw_image) } {
Some(len) => {
let elf_image =
unsafe { std::slice::from_raw_parts(raw_image.cast::<u8>(), len) };
self.record_new_submodule(module, elf_image, fn_logger, "elf");
}
None => fn_logger.log(log::ErrorEntry::UnsupportedModule {
module,
raw_image,
kind: "ELF",
}),
}
} else if unsafe { *(raw_image as *const [u8; 8]) } == *goblin::archive::MAGIC {
self.saved_modules.insert(module);
// TODO: Figure out how to get size of archive module and write it to disk
@ -198,6 +206,146 @@ impl StateTracker {
}
}
unsafe fn get_elf_size(start: *const c_void) -> Option<usize> {
let start = start.cast::<u8>();
let ei_class = start.add(mem::size_of_val(elf::header::ELFMAG));
let (header, header_size, is_64bit): (elf::header::Header, _, _) = match *ei_class {
elf::header::ELFCLASS32 => (
(*start.cast::<elf32::header::Header>()).into(),
mem::size_of::<elf32::header::Header>() as u64,
false,
),
elf::header::ELFCLASS64 => (
(*start.cast::<elf64::header::Header>()).into(),
mem::size_of::<elf64::header::Header>() as u64,
true,
),
_ => return None,
};
let mut max_end = header_size;
max_end = max_end.max(get_max_end_for::<elf::program_header::ProgramHeader>(
start, header, is_64bit,
)?);
max_end = max_end.max(get_max_end_for::<elf::section_header::SectionHeader>(
start, header, is_64bit,
)?);
Some(max_end as usize)
}
unsafe fn get_max_end_for<T: ElfComponent>(
elf_start: *const u8,
header: elf::header::Header,
is_64bit: bool,
) -> Option<u64> {
if is_64bit && T::header_size(&header) as usize != mem::size_of::<T::Component64>() {
return None;
}
if !is_64bit && T::header_size(&header) as usize != mem::size_of::<T::Component32>() {
return None;
}
if T::is_extended_header(&header) {
// TODO: support extended headers
return None;
}
let headers_start = T::headers_offset(&header);
if headers_start == 0 {
return Some(0);
}
let mut max_end = 0;
for entry_idx in 0..T::headers_count(&header) as u64 {
let header_start =
headers_start.checked_add(entry_idx.checked_mul(T::header_size(&header) as u64)?)?;
let header_end = header_start.checked_add(T::header_size(&header) as u64)?;
max_end = max_end.max(header_end);
let component = if is_64bit {
(*elf_start
.add(header_start as usize)
.cast::<T::Component64>())
.into()
} else {
(*elf_start
.add(header_start as usize)
.cast::<T::Component32>())
.into()
};
let component_end = component.start().checked_add(component.size())?;
max_end = max_end.max(component_end);
}
Some(max_end)
}
trait ElfComponent: Sized {
type Component32: Into<Self> + Copy;
type Component64: Into<Self> + Copy;
fn is_extended_header(elf_header: &elf::header::Header) -> bool;
fn header_size(elf_header: &elf::header::Header) -> u16;
fn headers_offset(elf_header: &elf::header::Header) -> u64;
fn headers_count(elf_header: &elf::header::Header) -> u64;
fn start(&self) -> u64;
fn size(&self) -> u64;
}
impl ElfComponent for elf::program_header::ProgramHeader {
type Component32 = elf32::program_header::program_header32::ProgramHeader;
type Component64 = elf64::program_header::program_header64::ProgramHeader;
fn is_extended_header(elf_header: &elf::header::Header) -> bool {
const PN_XNUM: u16 = 0xffff;
elf_header.e_phnum >= PN_XNUM
}
fn header_size(elf_header: &elf::header::Header) -> u16 {
elf_header.e_phentsize
}
fn headers_offset(elf_header: &elf::header::Header) -> u64 {
elf_header.e_phoff
}
fn headers_count(elf_header: &elf::header::Header) -> u64 {
elf_header.e_phnum as u64
}
fn start(&self) -> u64 {
self.p_offset
}
fn size(&self) -> u64 {
self.p_filesz
}
}
impl ElfComponent for elf::section_header::SectionHeader {
type Component32 = elf32::section_header::section_header32::SectionHeader;
type Component64 = elf64::section_header::section_header64::SectionHeader;
fn is_extended_header(elf_header: &elf::header::Header) -> bool {
const SHN_LORESERVE: u16 = 0xff00;
elf_header.e_phnum >= SHN_LORESERVE
}
fn header_size(elf_header: &elf::header::Header) -> u16 {
elf_header.e_shentsize
}
fn headers_offset(elf_header: &elf::header::Header) -> u64 {
elf_header.e_shoff
}
fn headers_count(elf_header: &elf::header::Header) -> u64 {
elf_header.e_shnum as u64
}
fn start(&self) -> u64 {
self.sh_offset
}
fn size(&self) -> u64 {
self.sh_size
}
}
// This structs writes out information about CUDA execution to the trace dir
struct DumpWriter {
dump_dir: Option<PathBuf>,