Redo logging to better log dark API and performance libraries (#372)

This commit is contained in:
Andrzej Janik 2025-06-09 15:29:14 -07:00 committed by GitHub
commit c790ab45ec
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
60 changed files with 113226 additions and 8125 deletions

335
Cargo.lock generated
View file

@ -73,10 +73,10 @@ version = "0.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
dependencies = [
"bitflags 2.6.0",
"bitflags 2.9.1",
"cexpr",
"clang-sys",
"itertools",
"itertools 0.13.0",
"log",
"prettyplease",
"proc-macro2",
@ -93,6 +93,12 @@ version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]]
name = "bit-vec"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
[[package]]
name = "bitflags"
version = "1.3.2"
@ -101,9 +107,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.6.0"
version = "2.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
[[package]]
name = "bpaf"
@ -169,6 +175,8 @@ version = "1.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "812acba72f0a070b003d3697490d2b55b837230ae7c6c6497f05cc2ddbb8d938"
dependencies = [
"jobserver",
"libc",
"shlex",
]
@ -187,6 +195,44 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "cglue"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "daea323a7771ea0187f788ab4efe8123ddbc08e62e4be2337a3cfa465530ba53"
dependencies = [
"cglue-macro",
"no-std-compat",
"rustc_version",
"tarc",
]
[[package]]
name = "cglue-gen"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffebf6faae95d4dc0903c2edb945003e6bcaef789c505b48494d0405aec8f7e2"
dependencies = [
"itertools 0.10.5",
"lazy_static",
"proc-macro-crate 3.3.0",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "cglue-macro"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61827395d8686e8cec89d1967146af840bf1a3669d72b6a4e65952a71d8cce7e"
dependencies = [
"cglue-gen",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "clang-sys"
version = "1.8.1"
@ -256,10 +302,23 @@ dependencies = [
name = "cuda_types"
version = "0.0.0"
dependencies = [
"bitflags 2.9.1",
"cuda_base",
"hip_runtime-sys",
]
[[package]]
name = "dark_api"
version = "0.0.0"
dependencies = [
"bit-vec 0.8.0",
"cglue",
"cuda_types",
"format",
"paste",
"uuid",
]
[[package]]
name = "derivative"
version = "2.2.0"
@ -372,6 +431,14 @@ version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "format"
version = "0.0.0"
dependencies = [
"cuda_types",
"uuid",
]
[[package]]
name = "glob"
version = "0.3.1"
@ -434,6 +501,15 @@ dependencies = [
"version_check",
]
[[package]]
name = "itertools"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.13.0"
@ -449,6 +525,15 @@ version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
[[package]]
name = "jobserver"
version = "0.1.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0"
dependencies = [
"libc",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
@ -495,12 +580,22 @@ dependencies = [
name = "llvm_zluda"
version = "0.1.0"
dependencies = [
"bitflags 2.6.0",
"bitflags 2.9.1",
"cc",
"cmake",
"llvm-sys",
]
[[package]]
name = "lock_api"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.22"
@ -606,6 +701,12 @@ dependencies = [
"rawpointer",
]
[[package]]
name = "no-std-compat"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c"
[[package]]
name = "nom"
version = "7.1.3"
@ -659,7 +760,7 @@ version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffa5a33ddddfee04c0283a7653987d634e880347e96b5b2ed64de07efb59db9d"
dependencies = [
"proc-macro-crate",
"proc-macro-crate 0.1.5",
"proc-macro2",
"quote",
"syn 1.0.109",
@ -674,6 +775,29 @@ dependencies = [
"portable-atomic",
]
[[package]]
name = "parking_lot"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-targets",
]
[[package]]
name = "paste"
version = "1.0.15"
@ -690,6 +814,12 @@ dependencies = [
"indexmap",
]
[[package]]
name = "pkg-config"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "plain"
version = "0.2.3"
@ -740,6 +870,15 @@ dependencies = [
"toml",
]
[[package]]
name = "proc-macro-crate"
version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35"
dependencies = [
"toml_edit",
]
[[package]]
name = "proc-macro-error"
version = "1.0.4"
@ -796,7 +935,7 @@ dependencies = [
name = "ptx"
version = "0.0.0"
dependencies = [
"bit-vec",
"bit-vec 0.6.3",
"bitflags 1.3.2",
"comgr",
"cuda-driver-sys",
@ -828,7 +967,7 @@ dependencies = [
"ptx_parser_macros",
"rustc-hash 2.0.0",
"thiserror 1.0.64",
"winnow",
"winnow 0.6.20",
]
[[package]]
@ -875,6 +1014,15 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
[[package]]
name = "redox_syscall"
version = "0.5.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2f103c6d277498fbceb16e84d317e2a400f160f46904d5f5410848c829511a3"
dependencies = [
"bitflags 2.9.1",
]
[[package]]
name = "regex"
version = "1.11.0"
@ -922,13 +1070,22 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
[[package]]
name = "rustc_version"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
dependencies = [
"semver",
]
[[package]]
name = "rustix"
version = "0.38.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811"
dependencies = [
"bitflags 2.6.0",
"bitflags 2.9.1",
"errno",
"libc",
"linux-raw-sys",
@ -947,6 +1104,12 @@ version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "scroll"
version = "0.10.2"
@ -1073,6 +1236,12 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "tarc"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d475a3f83354f7eb0e9c207ceb1aaaed47fe9bc1dd4a42008a3cdfd0f5bb3dd"
[[package]]
name = "tempfile"
version = "3.13.0"
@ -1135,6 +1304,23 @@ dependencies = [
"serde",
]
[[package]]
name = "toml_datetime"
version = "0.6.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3"
[[package]]
name = "toml_edit"
version = "0.22.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e"
dependencies = [
"indexmap",
"toml_datetime",
"winnow 0.7.10",
]
[[package]]
name = "unicode-ident"
version = "1.0.13"
@ -1159,6 +1345,12 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f2fe1f049979528ce97d8a4672f984f8846fc9975be0cf14ca798116d724c4a"
[[package]]
name = "uuid"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9"
[[package]]
name = "version_check"
version = "0.9.5"
@ -1300,6 +1492,15 @@ dependencies = [
"memchr",
]
[[package]]
name = "winnow"
version = "0.7.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c06928c8748d81b05c9be96aad92e1b6ff01833332f281e8cfca3be4b35fc9ec"
dependencies = [
"memchr",
]
[[package]]
name = "xtask"
version = "0.0.0"
@ -1340,6 +1541,8 @@ name = "zluda_bindgen"
version = "0.1.0"
dependencies = [
"bindgen",
"cuda_types",
"libloading",
"prettyplease",
"proc-macro2",
"quote",
@ -1375,21 +1578,114 @@ dependencies = [
name = "zluda_dump"
version = "0.0.0"
dependencies = [
"cglue",
"cuda_base",
"cuda_types",
"dark_api",
"detours-sys",
"dynasm",
"dynasmrt",
"format",
"goblin",
"lazy_static",
"libc",
"lz4-sys",
"parking_lot",
"paste",
"ptx",
"ptx_parser",
"regex",
"rustc-hash 1.1.0",
"unwrap_or",
"wchar",
"winapi",
"zluda_dump_common",
"zstd-safe",
]
[[package]]
name = "zluda_dump_blas"
version = "0.0.0"
dependencies = [
"cglue",
"cuda_base",
"cuda_types",
"dark_api",
"format",
"libloading",
"paste",
"unwrap_or",
"zluda_dump_common",
]
[[package]]
name = "zluda_dump_blaslt"
version = "0.0.0"
dependencies = [
"cglue",
"cuda_base",
"cuda_types",
"dark_api",
"format",
"libloading",
"paste",
"unwrap_or",
"zluda_dump_common",
]
[[package]]
name = "zluda_dump_common"
version = "0.1.0"
dependencies = [
"cglue",
"cuda_types",
"dark_api",
"format",
"libloading",
]
[[package]]
name = "zluda_dump_dnn"
version = "0.0.0"
dependencies = [
"cglue",
"cuda_base",
"cuda_types",
"dark_api",
"format",
"libloading",
"paste",
"unwrap_or",
"zluda_dump_common",
]
[[package]]
name = "zluda_dump_fft"
version = "0.0.0"
dependencies = [
"cglue",
"cuda_base",
"cuda_types",
"dark_api",
"format",
"libloading",
"paste",
"unwrap_or",
"zluda_dump_common",
]
[[package]]
name = "zluda_dump_sparse"
version = "0.0.0"
dependencies = [
"cglue",
"cuda_base",
"cuda_types",
"dark_api",
"format",
"libloading",
"paste",
"unwrap_or",
"zluda_dump_common",
]
[[package]]
@ -1437,3 +1733,22 @@ dependencies = [
"cuda_base",
"cuda_types",
]
[[package]]
name = "zstd-safe"
version = "7.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
dependencies = [
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.15+zstd.1.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237"
dependencies = [
"cc",
"pkg-config",
]

View file

@ -3,27 +3,35 @@
resolver = "2"
members = [
"ext/hip_runtime-sys",
"ext/amd_comgr-sys",
"comgr",
"cuda_base",
"cuda_types",
"dark_api",
"detours-sys",
"zluda",
"zluda_dump",
"zluda_inject",
"zluda_redirect",
"zluda_ml",
"ext/amd_comgr-sys",
"ext/hip_runtime-sys",
"format",
"ptx",
"ptx_parser",
"ptx_parser_macros",
"ptx_parser_macros_impl",
"xtask",
"zluda",
"zluda_bindgen",
"zluda_dnn",
"zluda_blas",
"zluda_blaslt",
"zluda_dnn",
"zluda_dump",
"zluda_dump_blas",
"zluda_dump_blaslt",
"zluda_dump_common",
"zluda_dump_dnn",
"zluda_dump_fft",
"zluda_dump_sparse",
"zluda_fft",
"zluda_inject",
"zluda_ml",
"zluda_redirect",
"zluda_sparse",
]

View file

@ -5,16 +5,16 @@ extern "system" {
#[must_use]
fn cublasLtCreate(
lightHandle: *mut cuda_types::cublaslt::cublasLtHandle_t,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasLtDestroy(
lightHandle: cuda_types::cublaslt::cublasLtHandle_t,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
fn cublasLtGetStatusName(
status: cuda_types::cublaslt::cublasStatus_t,
status: cuda_types::cublas::cublasStatus_t,
) -> *const ::core::ffi::c_char;
fn cublasLtGetStatusString(
status: cuda_types::cublaslt::cublasStatus_t,
status: cuda_types::cublas::cublasStatus_t,
) -> *const ::core::ffi::c_char;
fn cublasLtGetVersion() -> usize;
fn cublasLtGetCudartVersion() -> usize;
@ -22,15 +22,15 @@ extern "system" {
fn cublasLtGetProperty(
type_: cuda_types::cublaslt::libraryPropertyType,
value: *mut ::core::ffi::c_int,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasLtHeuristicsCacheGetCapacity(
capacity: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasLtHeuristicsCacheSetCapacity(
capacity: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
/** Restricts usage of CPU instructions (ISA) specified by the flags in the mask.
Flags can be combined with bitwise OR(|) operator. Supported flags:
@ -72,7 +72,7 @@ extern "system" {
workspace: *mut ::core::ffi::c_void,
workspaceSizeInBytes: usize,
stream: cuda_types::cublaslt::cudaStream_t,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Matrix layout conversion helper (C = alpha * op(A) + beta * op(B))
@ -98,7 +98,7 @@ extern "system" {
C: *mut ::core::ffi::c_void,
Cdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
stream: cuda_types::cublaslt::cudaStream_t,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/// Internal. Do not use directly.
fn cublasLtMatrixLayoutInit_internal(
@ -108,7 +108,7 @@ extern "system" {
rows: u64,
cols: u64,
ld: i64,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Create new matrix layout descriptor.
@ -120,14 +120,14 @@ extern "system" {
rows: u64,
cols: u64,
ld: i64,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Destroy matrix layout descriptor.
\retval CUBLAS_STATUS_SUCCESS if operation was successful*/
fn cublasLtMatrixLayoutDestroy(
matLayout: cuda_types::cublaslt::cublasLtMatrixLayout_t,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Set matrix layout descriptor attribute.
@ -144,7 +144,7 @@ extern "system" {
attr: cuda_types::cublaslt::cublasLtMatrixLayoutAttribute_t,
buf: *const ::core::ffi::c_void,
sizeInBytes: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Get matrix layout descriptor attribute.
@ -165,7 +165,7 @@ extern "system" {
buf: *mut ::core::ffi::c_void,
sizeInBytes: usize,
sizeWritten: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/// Internal. Do not use directly.
fn cublasLtMatmulDescInit_internal(
@ -173,7 +173,7 @@ extern "system" {
size: usize,
computeType: cuda_types::cublaslt::cublasComputeType_t,
scaleType: cuda_types::cublaslt::cudaDataType_t,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Create new matmul operation descriptor.
@ -183,14 +183,14 @@ extern "system" {
matmulDesc: *mut cuda_types::cublaslt::cublasLtMatmulDesc_t,
computeType: cuda_types::cublaslt::cublasComputeType_t,
scaleType: cuda_types::cublaslt::cudaDataType_t,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Destroy matmul operation descriptor.
\retval CUBLAS_STATUS_SUCCESS if operation was successful*/
fn cublasLtMatmulDescDestroy(
matmulDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Set matmul operation descriptor attribute.
@ -207,7 +207,7 @@ extern "system" {
attr: cuda_types::cublaslt::cublasLtMatmulDescAttributes_t,
buf: *const ::core::ffi::c_void,
sizeInBytes: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Get matmul operation descriptor attribute.
@ -228,14 +228,14 @@ extern "system" {
buf: *mut ::core::ffi::c_void,
sizeInBytes: usize,
sizeWritten: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/// Internal. Do not use directly.
fn cublasLtMatrixTransformDescInit_internal(
transformDesc: cuda_types::cublaslt::cublasLtMatrixTransformDesc_t,
size: usize,
scaleType: cuda_types::cublaslt::cudaDataType,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Create new matrix transform operation descriptor.
@ -244,14 +244,14 @@ extern "system" {
fn cublasLtMatrixTransformDescCreate(
transformDesc: *mut cuda_types::cublaslt::cublasLtMatrixTransformDesc_t,
scaleType: cuda_types::cublaslt::cudaDataType,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Destroy matrix transform operation descriptor.
\retval CUBLAS_STATUS_SUCCESS if operation was successful*/
fn cublasLtMatrixTransformDescDestroy(
transformDesc: cuda_types::cublaslt::cublasLtMatrixTransformDesc_t,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Set matrix transform operation descriptor attribute.
@ -268,7 +268,7 @@ extern "system" {
attr: cuda_types::cublaslt::cublasLtMatrixTransformDescAttributes_t,
buf: *const ::core::ffi::c_void,
sizeInBytes: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Get matrix transform operation descriptor attribute.
@ -289,13 +289,13 @@ extern "system" {
buf: *mut ::core::ffi::c_void,
sizeInBytes: usize,
sizeWritten: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/// Internal. Do not use directly.
fn cublasLtMatmulPreferenceInit_internal(
pref: cuda_types::cublaslt::cublasLtMatmulPreference_t,
size: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Create new matmul heuristic search preference descriptor.
@ -303,14 +303,14 @@ extern "system" {
\retval CUBLAS_STATUS_SUCCESS if desciptor was created successfully*/
fn cublasLtMatmulPreferenceCreate(
pref: *mut cuda_types::cublaslt::cublasLtMatmulPreference_t,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Destroy matmul heuristic search preference descriptor.
\retval CUBLAS_STATUS_SUCCESS if operation was successful*/
fn cublasLtMatmulPreferenceDestroy(
pref: cuda_types::cublaslt::cublasLtMatmulPreference_t,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Set matmul heuristic search preference descriptor attribute.
@ -327,7 +327,7 @@ extern "system" {
attr: cuda_types::cublaslt::cublasLtMatmulPreferenceAttributes_t,
buf: *const ::core::ffi::c_void,
sizeInBytes: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Get matmul heuristic search preference descriptor attribute.
@ -348,7 +348,7 @@ extern "system" {
buf: *mut ::core::ffi::c_void,
sizeInBytes: usize,
sizeWritten: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Query cublasLt heuristic for algorithm appropriate for given use case.
@ -383,7 +383,7 @@ extern "system" {
requestedAlgoCount: ::core::ffi::c_int,
heuristicResultsArray: *mut cuda_types::cublaslt::cublasLtMatmulHeuristicResult_t,
returnAlgoCount: *mut ::core::ffi::c_int,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Routine to get all algo IDs that can potentially run
@ -405,7 +405,7 @@ extern "system" {
requestedAlgoCount: ::core::ffi::c_int,
algoIdsArray: *mut ::core::ffi::c_int,
returnAlgoCount: *mut ::core::ffi::c_int,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Initialize algo structure
@ -422,7 +422,7 @@ extern "system" {
Dtype: cuda_types::cublaslt::cudaDataType_t,
algoId: ::core::ffi::c_int,
algo: *mut cuda_types::cublaslt::cublasLtMatmulAlgo_t,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Check configured algo descriptor for correctness and support on current device.
@ -449,7 +449,7 @@ extern "system" {
Ddesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
algo: *const cuda_types::cublaslt::cublasLtMatmulAlgo_t,
result: *mut cuda_types::cublaslt::cublasLtMatmulHeuristicResult_t,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Get algo capability attribute.
@ -477,7 +477,7 @@ extern "system" {
buf: *mut ::core::ffi::c_void,
sizeInBytes: usize,
sizeWritten: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Set algo configuration attribute.
@ -494,7 +494,7 @@ extern "system" {
attr: cuda_types::cublaslt::cublasLtMatmulAlgoConfigAttributes_t,
buf: *const ::core::ffi::c_void,
sizeInBytes: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Get algo configuration attribute.
@ -515,7 +515,7 @@ extern "system" {
buf: *mut ::core::ffi::c_void,
sizeInBytes: usize,
sizeWritten: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Experimental: Logger callback setter.
@ -524,14 +524,16 @@ extern "system" {
\retval CUBLAS_STATUS_SUCCESS if callback was set successfully*/
fn cublasLtLoggerSetCallback(
callback: cuda_types::cublaslt::cublasLtLoggerCallback_t,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Experimental: Log file setter.
\param[in] file an open file with write permissions
\retval CUBLAS_STATUS_SUCCESS if log file was set successfully*/
fn cublasLtLoggerSetFile(file: *mut FILE) -> cuda_types::cublaslt::cublasStatus_t;
fn cublasLtLoggerSetFile(
file: *mut cuda_types::FILE,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Experimental: Open log file.
@ -540,7 +542,7 @@ extern "system" {
\retval CUBLAS_STATUS_SUCCESS if log file was created successfully*/
fn cublasLtLoggerOpenFile(
logFile: *const ::core::ffi::c_char,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Experimental: Log level setter.
@ -557,7 +559,7 @@ extern "system" {
\retval CUBLAS_STATUS_SUCCESS if log level was set successfully*/
fn cublasLtLoggerSetLevel(
level: ::core::ffi::c_int,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Experimental: Log mask setter.
@ -572,10 +574,10 @@ extern "system" {
\retval CUBLAS_STATUS_SUCCESS if log mask was set successfully*/
fn cublasLtLoggerSetMask(
mask: ::core::ffi::c_int,
) -> cuda_types::cublaslt::cublasStatus_t;
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
/** Experimental: Disable logging for the entire session.
\retval CUBLAS_STATUS_SUCCESS if disabled logging*/
fn cublasLtLoggerForceDisable() -> cuda_types::cublaslt::cublasStatus_t;
fn cublasLtLoggerForceDisable() -> cuda_types::cublas::cublasStatus_t;
}

File diff suppressed because it is too large Load diff

View file

@ -51,7 +51,9 @@ extern "system" {
callback: cuda_types::cusparse::cusparseLoggerCallback_t,
) -> cuda_types::cusparse::cusparseStatus_t;
#[must_use]
fn cusparseLoggerSetFile(file: *mut FILE) -> cuda_types::cusparse::cusparseStatus_t;
fn cusparseLoggerSetFile(
file: *mut cuda_types::FILE,
) -> cuda_types::cusparse::cusparseStatus_t;
#[must_use]
fn cusparseLoggerOpenFile(
logFile: *const ::core::ffi::c_char,

View file

@ -17,6 +17,7 @@ const CUDA_RS: &'static str = include_str! {"cuda.rs"};
const NVML_RS: &'static str = include_str! {"nvml.rs"};
const CUBLAS_RS: &'static str = include_str! {"cublas.rs"};
const CUBLASLT_RS: &'static str = include_str! {"cublaslt.rs"};
const CUBLASLT_INTERNAL_RS: &'static str = include_str! {"cublaslt_internal.rs"};
const CUFFT_RS: &'static str = include_str! {"cufft.rs"};
const CUSPARSE_RS: &'static str = include_str! {"cusparse.rs"};
const CUDNN9_RS: &'static str = include_str! {"cudnn9.rs"};
@ -50,6 +51,11 @@ pub fn cublaslt_function_declarations(tokens: TokenStream) -> TokenStream {
function_declarations(tokens, CUBLASLT_RS)
}
#[proc_macro]
pub fn cublaslt_internal_function_declarations(tokens: TokenStream) -> TokenStream {
function_declarations(tokens, CUBLASLT_INTERNAL_RS)
}
#[proc_macro]
pub fn cufft_function_declarations(tokens: TokenStream) -> TokenStream {
function_declarations(tokens, CUFFT_RS)
@ -70,23 +76,25 @@ fn function_declarations(tokens: TokenStream, module: &str) -> TokenStream {
let mut cuda_module = syn::parse_str::<File>(module).unwrap();
let mut choose_macro = ChooseMacro::new(input);
syn::visit_mut::visit_file_mut(&mut FixFnSignatures, &mut cuda_module);
let extern_ = if let Item::ForeignMod(extern_) = cuda_module.items.pop().unwrap() {
extern_
} else {
unreachable!()
};
let abi = extern_.abi.name;
for mut item in extern_.items {
if let ForeignItem::Fn(ForeignItemFn {
sig: Signature { ref ident, .. },
ref mut attrs,
..
}) = item
{
*attrs = Vec::new();
choose_macro.add(ident, quote! { #abi #item });
for item in cuda_module.items {
let extern_ = if let Item::ForeignMod(extern_) = item {
extern_
} else {
unreachable!()
};
let abi = extern_.abi.name;
for mut item in extern_.items {
if let ForeignItem::Fn(ForeignItemFn {
sig: Signature { ref ident, .. },
ref mut attrs,
..
}) = item
{
*attrs = Vec::new();
choose_macro.add(ident, quote! { #abi #item });
} else {
unreachable!()
}
}
}
let mut result = proc_macro2::TokenStream::new();

View file

@ -2,8 +2,9 @@
name = "cuda_types"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2018"
edition = "2021"
[dependencies]
cuda_base = { path = "../cuda_base" }
hip_runtime-sys = { path = "../ext/hip_runtime-sys" }
bitflags = "2.9.1"

View file

@ -33,40 +33,6 @@ pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8F_E4M3: u32 = 4194304;
pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8F_E5M2: u32 = 8388608;
pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_OP_INPUT_TYPE_MASK: u32 = 16711680;
pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_GAUSSIAN: u64 = 4294967296;
impl cublasStatus_t {
pub const CUBLAS_STATUS_SUCCESS: cublasStatus_t = cublasStatus_t(0);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_NOT_INITIALIZED: cublasStatus_t = cublasStatus_t(1);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_ALLOC_FAILED: cublasStatus_t = cublasStatus_t(3);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_INVALID_VALUE: cublasStatus_t = cublasStatus_t(7);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_ARCH_MISMATCH: cublasStatus_t = cublasStatus_t(8);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_MAPPING_ERROR: cublasStatus_t = cublasStatus_t(11);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_EXECUTION_FAILED: cublasStatus_t = cublasStatus_t(13);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_INTERNAL_ERROR: cublasStatus_t = cublasStatus_t(14);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_NOT_SUPPORTED: cublasStatus_t = cublasStatus_t(15);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_LICENSE_ERROR: cublasStatus_t = cublasStatus_t(16);
}
#[repr(transparent)]
#[must_use]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasStatus_t(pub ::core::ffi::c_uint);
impl cublasFillMode_t {
pub const CUBLAS_FILL_MODE_LOWER: cublasFillMode_t = cublasFillMode_t(0);
}
@ -5116,7 +5082,7 @@ pub struct cublasLtMatmulHeuristicResult_t {
pub workspaceSize: usize,
/** Result status, other fields are only valid if after call to cublasLtMatmulAlgoGetHeuristic() this member is set to
CUBLAS_STATUS_SUCCESS.*/
pub state: cublasStatus_t,
pub state: super::cublas::cublasStatus_t,
/** Waves count - a device utilization metric.
wavesCount value of 1.0f suggests that when kernel is launched it will fully occupy the GPU.*/

View file

@ -19,24 +19,6 @@ pub const CUSPARSE_VER_BUILD: u32 = 93;
pub const CUSPARSE_VERSION: u32 = 12508;
/// Result information returned by cudaGraphExecUpdate
pub type cudaGraphExecUpdateResultInfo = cudaGraphExecUpdateResultInfo_st;
/// Information describing an async notification event
#[repr(C)]
pub struct cudaAsyncNotificationInfo {
pub type_: cudaAsyncNotificationType,
pub info: cudaAsyncNotificationInfo__bindgen_ty_1,
}
#[repr(C)]
#[derive(Copy, Clone)]
pub union cudaAsyncNotificationInfo__bindgen_ty_1 {
pub overBudget: cudaAsyncNotificationInfo__bindgen_ty_1__bindgen_ty_1,
}
#[repr(C)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cudaAsyncNotificationInfo__bindgen_ty_1__bindgen_ty_1 {
pub bytesOverBudget: ::core::ffi::c_ulonglong,
}
/// Information describing an async notification event
pub type cudaAsyncNotificationInfo_t = cudaAsyncNotificationInfo;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseContext {

199
cuda_types/src/dark_api.rs Normal file
View file

@ -0,0 +1,199 @@
use bitflags::bitflags;
use std::ffi::{c_uint, c_ulonglong, c_ushort, c_void};
/*
fat_cubin:
typedef struct {
int magic;
int version;
const unsigned long long* data;
void *filename_or_fatbins; /* version 1: offline filename,
* version 2: array of prelinked fatbins */
} __fatBinC_Wrapper_t;
data start with this header:
#define FATBIN_MAGIC 0xBA55ED50U
#define OLD_STYLE_FATBIN_MAGIC 0x1EE55A01U
#define FATBIN_VERSION 0x0001U
struct fatbinary_ALIGN_(8) fatBinaryHeader
{
unsigned int magic; // FATBIN_MAGIC
unsigned short version; // FATBIN_VERSION
unsigned short headerSize;
unsigned long long int fatSize; // size of the entire fat binary excluding this header
};
there's binary data after header
*/
#[repr(C)]
pub struct FatbincWrapper {
pub magic: c_uint,
pub version: c_uint,
pub data: *const FatbinHeader,
pub filename_or_fatbins: *const c_void,
}
#[repr(C, align(8))]
pub struct FatbinHeader {
pub magic: c_uint,
pub version: c_ushort,
pub header_size: c_ushort,
pub files_size: c_ulonglong, // excluding frame header, size of all blocks framed by this frame
}
#[repr(C)]
pub struct FatbinFileHeader {
pub kind: c_ushort,
pub version: c_ushort,
pub header_size: c_uint,
pub padded_payload_size: c_uint,
pub unknown0: c_uint, // check if it's written into separately
pub payload_size: c_uint,
pub unknown1: c_uint,
pub unknown2: c_uint,
pub sm_version: c_uint,
pub bit_width: c_uint,
pub unknown3: c_uint,
pub flags: FatbinFileHeaderFlags,
pub unknown5: c_ulonglong,
pub uncompressed_payload: c_ulonglong,
}
bitflags! {
pub struct FatbinFileHeaderFlags: u64 {
const Is64Bit = 0x0000000000000001;
const Debug = 0x0000000000000002;
const Linux = 0x0000000000000010;
const Mac = 0x0000000000000020;
const Windows = 0x0000000000000040;
const CompressedLz4 = 0x0000000000002000;
const CompressedZstd = 0x0000000000008000;
const _ = !0;
}
}
impl FatbincWrapper {
pub const MAGIC: c_uint = 0x466243B1;
const VERSION_V1: c_uint = 0x1;
pub const VERSION_V2: c_uint = 0x2;
pub fn new<'a, T: Sized>(ptr: &*const T) -> Result<&'a Self, ParseError> {
unsafe { ptr.cast::<Self>().as_ref() }
.ok_or(ParseError::NullPointer("FatbincWrapper"))
.and_then(|ptr| {
ParseError::check_fields("FATBINC_MAGIC", ptr.magic, [Self::MAGIC])?;
ParseError::check_fields(
"FATBINC_VERSION",
ptr.version,
[Self::VERSION_V1, Self::VERSION_V2],
)?;
Ok(ptr)
})
}
}
impl FatbinHeader {
const MAGIC: c_uint = 0xBA55ED50;
const VERSION: c_ushort = 0x01;
pub fn new<'a, T: Sized>(ptr: &'a *const T) -> Result<&'a Self, ParseError> {
unsafe { ptr.cast::<Self>().as_ref() }
.ok_or(ParseError::NullPointer("FatbinHeader"))
.and_then(|ptr| {
ParseError::check_fields("FATBIN_MAGIC", ptr.magic, [Self::MAGIC])?;
ParseError::check_fields("FATBIN_VERSION", ptr.version, [Self::VERSION])?;
Ok(ptr)
})
}
pub unsafe fn get_content<'a>(&'a self) -> &'a [u8] {
let start = std::ptr::from_ref(self)
.cast::<u8>()
.add(self.header_size as usize);
std::slice::from_raw_parts(start, self.files_size as usize)
}
}
impl FatbinFileHeader {
pub const HEADER_KIND_PTX: c_ushort = 0x01;
pub const HEADER_KIND_ELF: c_ushort = 0x02;
const HEADER_VERSION_CURRENT: c_ushort = 0x101;
pub fn new_ptx<T: Sized>(ptr: *const T) -> Result<Option<&'static Self>, ParseError> {
unsafe { ptr.cast::<Self>().as_ref() }
.ok_or(ParseError::NullPointer("FatbinFileHeader"))
.and_then(|ptr| {
ParseError::check_fields(
"FATBIN_FILE_HEADER_VERSION_CURRENT",
ptr.version,
[Self::HEADER_VERSION_CURRENT],
)?;
match ptr.kind {
Self::HEADER_KIND_PTX => Ok(Some(ptr)),
Self::HEADER_KIND_ELF => Ok(None),
_ => Err(ParseError::UnexpectedBinaryField {
field_name: "FATBIN_FILE_HEADER_KIND",
observed: ptr.kind.into(),
expected: vec![Self::HEADER_KIND_PTX.into(), Self::HEADER_KIND_ELF.into()],
}),
}
})
}
pub unsafe fn next<'a>(slice: &'a mut &[u8]) -> Result<Option<&'a Self>, ParseError> {
if slice.len() < std::mem::size_of::<Self>() {
return Ok(None);
}
let this = &*slice.as_ptr().cast::<Self>();
let next_element = slice
.split_at_checked(this.header_size as usize + this.padded_payload_size as usize)
.map(|(_, next)| next);
*slice = next_element.unwrap_or(&[]);
ParseError::check_fields(
"FATBIN_FILE_HEADER_VERSION_CURRENT",
this.version,
[Self::HEADER_VERSION_CURRENT],
)?;
Ok(Some(this))
}
pub unsafe fn get_payload<'a>(&'a self) -> &'a [u8] {
let start = std::ptr::from_ref(self)
.cast::<u8>()
.add(self.header_size as usize);
std::slice::from_raw_parts(start, self.payload_size as usize)
}
}
pub enum ParseError {
NullPointer(&'static str),
UnexpectedBinaryField {
field_name: &'static str,
observed: u32,
expected: Vec<u32>,
},
}
impl ParseError {
pub(crate) fn check_fields<const N: usize, T: Into<u32> + Eq + Copy>(
name: &'static str,
observed: T,
expected: [T; N],
) -> Result<(), Self> {
if expected.contains(&observed) {
Ok(())
} else {
let observed = observed.into();
let expected = expected.into_iter().map(Into::into).collect();
Err(ParseError::UnexpectedBinaryField {
field_name: name,
expected,
observed,
})
}
}
}

View file

@ -1,3 +1,5 @@
pub enum FILE {}
pub mod cublas;
pub mod cublaslt;
pub mod cuda;
@ -7,3 +9,4 @@ pub mod cudnn9;
pub mod cufft;
pub mod cusparse;
pub mod nvml;
pub mod dark_api;

12
dark_api/Cargo.toml Normal file
View file

@ -0,0 +1,12 @@
[package]
name = "dark_api"
version = "0.0.0"
edition = "2021"
[dependencies]
cuda_types = { path = "../cuda_types" }
format = { path = "../format" }
uuid = "1.16"
paste = "1.0"
bit-vec = "0.8.0"
cglue = "0.3.5"

712
dark_api/src/lib.rs Normal file
View file

@ -0,0 +1,712 @@
use std::ffi::c_void;
use cuda_types::cuda::CUuuid;
macro_rules! dark_api_init {
(SIZE_OF, $table_len:literal, $type_:ty) => {
(std::mem::size_of::<usize>() * $table_len) as *const std::ffi::c_void
};
(NULL, $table_len:literal, $type_:ty) => {
std::ptr::null()
};
($fn_:ident, $table_len:literal, $type_:ty) => {
<$type_>::$fn_ as *const std::ffi::c_void
};
}
macro_rules! dark_api_fn {
(SIZE_OF) => { };
(NULL) => { };
($fn_:ident ( $($arg_id:ident: $arg_type:ty),* ) -> $ret_type:ty) => {
unsafe extern "system" fn $fn_(
$($arg_id : $arg_type,)*
) -> $ret_type;
}
}
macro_rules! dark_api_entry {
($idx:literal, SIZE_OF) => { };
($idx:literal, NULL) => { };
($idx:literal, $fn_:ident ( $($arg_id:ident: $arg_type:ty),* ) -> $ret_type:ty) => {
#[allow(non_snake_case)]
pub unsafe fn $fn_(
&self,
$($arg_id : $arg_type,)*
) -> $ret_type {
let ptr = self.ptr as *const *const std::ffi::c_void;
let ptr = ptr.add($idx);
let fn_ = std::mem::transmute::<_, unsafe extern "system" fn( $($arg_type,)* ) -> $ret_type >(*ptr);
(fn_)( $($arg_id,)* )
}
}
}
macro_rules! dark_api_format_args {
($writer:ident; $arg_idx:ident; $first_arg:ident $(, $arg_id:ident)*) => {
$writer.write_all(concat!(stringify!($first_arg), ": ").as_bytes())?;
format::CudaDisplay::write(& $first_arg, "", $arg_idx, $writer)?;
$(
$arg_idx += 1;
$writer.write_all(concat!(", ", stringify!($arg_id), ": ").as_bytes())?;
format::CudaDisplay::write(& $arg_id, "", $arg_idx, $writer)?;
)*
};
($writer:ident; $arg_idx:ident; ) => {
};
}
macro_rules! dark_api_is_fn {
(SIZE_OF) => {
false
};
(NULL) => {
false
};
($fn_:ident) => {
true
};
}
macro_rules! dark_api_format_fn {
(SIZE_OF) => { };
(NULL) => { };
(#[noformat] $fn_:ident ( $($arg_id:ident: $arg_type:ty),* ) -> $ret_type:ty) => { };
($fn_:ident ( $($arg_id:ident: $arg_type:ty),* ) -> $ret_type:ty) => {
pub fn $fn_ (
writer: &mut (impl std::io::Write + ?Sized),
$($arg_id: $arg_type,)*
) -> std::io::Result<()> {
#[allow(unused)]
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
dark_api_format_args!(writer; arg_idx; $($arg_id),*);
writer.write_all(b")")
}
}
}
macro_rules! dark_api {
(
$mod_name: ident;
$(
$guid:expr => $name:ident [$len:literal] {
$(
$(#[$attr:ident])?
[$index:literal] = $fn_:ident $( ( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty )?
),*
}
),+
) => {
pub mod $mod_name {
#[allow(non_snake_case)]
pub struct CudaDarkApiGlobalTable {
$(pub $name: [*const std::ffi::c_void; $len],)+
}
impl CudaDarkApiGlobalTable {
$(const $name: cuda_types::cuda::CUuuid = cuda_types::cuda::CUuuid { bytes: *uuid::uuid!($guid).as_bytes() };)+
}
unsafe impl Sync for CudaDarkApiGlobalTable {}
impl CudaDarkApiGlobalTable {
pub const fn new<T: CudaDarkApi>() -> Self {
let mut result = Self {
$(
$name: [std::ptr::null(); $len],
)+
};
$(
$( result.$name[$index] = dark_api_init!($fn_, $len, T); )*
)+
result
}
pub fn get(&self, key: &cuda_types::cuda::CUuuid) -> Option<crate::DarkApiTable> {
match key {
$(
&Self::$name => {
let fns = &self.$name[..];
let mut valid_fns = bit_vec::BitVec::from_elem($len, false);
$(
valid_fns.set($index, dark_api_is_fn!($fn_) );
)*
Some(crate::DarkApiTable {
fns,
valid_fns
})
}
)+
_ => None
}
}
}
pub trait CudaDarkApi {
$($(
dark_api_fn!($fn_ $( ( $($arg_id: $arg_type),* ) -> $ret_type )?);
)*)+
}
pub fn guid_to_name(guid: &cuda_types::cuda::CUuuid, index: usize) -> Option<(&'static str, Option<&'static str>)> {
let guid = uuid::Uuid::from_bytes(guid.bytes);
$(
if guid == uuid::uuid!($guid) {
let guid = stringify!($name);
$(
if index == $index {
return Some((guid, Some(stringify!($fn_))));
}
)*
return Some((guid, None));
}
)+
None
}
$(
paste::paste! {
pub struct [<$name:camel>] {
#[allow(dead_code)]
ptr: *const std::ffi::c_void
}
impl [<$name:camel>] {
pub const GUID: cuda_types::cuda::CUuuid = CudaDarkApiGlobalTable::$name;
pub unsafe fn new(ptr: *const std::ffi::c_void) -> Self {
Self {
ptr
}
}
$(
dark_api_entry!($index, $fn_ $( ( $($arg_id: $arg_type),* ) -> $ret_type )?);
)*
}
}
)+
pub mod format {
$($(
dark_api_format_fn!($(#[$attr])? $fn_ $( ( $($arg_id: $arg_type),* ) -> $ret_type )? );
)*)+
}
}
};
}
pub struct DarkApiTable<'a> {
fns: &'a [*const std::ffi::c_void],
valid_fns: bit_vec::BitVec,
}
impl<'a> DarkApiTable<'a> {
pub fn len(&self) -> usize {
self.fns.len()
}
pub fn get_fn(&self, idx: usize) -> Option<*const std::ffi::c_void> {
if self.valid_fns.get(idx).unwrap_or(false) {
Some(self.fns[idx])
} else {
None
}
}
pub fn start(&self) -> *const std::ffi::c_void {
self.fns.as_ptr().cast()
}
}
dark_api! {
cuda;
"{6BD5FB6C-5BF4-E74A-8987-D93912FD9DF9}" => CUDART_INTERFACE[10] {
[0] = SIZE_OF,
[1] = get_module_from_cubin(
module: *mut cuda_types::cuda::CUmodule,
fatbinc_wrapper: *const cuda_types::dark_api::FatbincWrapper
) -> (),
[2] = cudart_interface_fn2(
pctx: *mut cuda_types::cuda::CUcontext,
dev: cuda_types::cuda::CUdevice
) -> cuda_types::cuda::CUresult,
[6] = get_module_from_cubin_ext1(
result: *mut cuda_types::cuda::CUmodule,
fatbinc_wrapper: *const cuda_types::dark_api::FatbincWrapper,
arg3: *mut std::ffi::c_void,
arg4: *mut std::ffi::c_void,
arg5: u32
) -> cuda_types::cuda::CUresult,
[7] = cudart_interface_fn7(arg1: usize) -> cuda_types::cuda::CUresult,
[8] = get_module_from_cubin_ext2(
fatbin_header: *const cuda_types::dark_api::FatbinHeader,
result: *mut cuda_types::cuda::CUmodule,
arg3: *mut std::ffi::c_void,
arg4: *mut std::ffi::c_void,
arg5: u32
) -> cuda_types::cuda::CUresult
},
"{42D85A81-23F6-CB47-8298-F6E78A3AECDC}" => TOOLS_TLS[4] {
[0] = SIZE_OF
},
"{A094798C-2E74-2E74-93F2-0800200C0A66}" => TOOLS_RUNTIME_CALLBACK_HOOKS[7] {
[0] = SIZE_OF,
[2] = runtime_callback_hooks_fn2(ptr: *mut *mut std::ffi::c_void, size: *mut usize) -> (),
[6] = runtime_callback_hooks_fn6(ptr: *mut *mut std::ffi::c_void, size: *mut usize) -> ()
},
"{C693336E-1121-DF11-A8C3-68F355D89593}" => CONTEXT_LOCAL_STORAGE_INTERFACE_V0301[4] {
[0] = context_local_storage_ctor(
context: cuda_types::cuda::CUcontext,
manager: *mut std::ffi::c_void, // ContextStateManager
ctx_state: *mut std::ffi::c_void, // ContextState
// clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
dtor_cb: Option<extern "system" fn(
cuda_types::cuda::CUcontext,
*mut std::ffi::c_void, // ContextStateManager
*mut std::ffi::c_void, // ContextState
)>
) -> cuda_types::cuda::CUresult,
[1] = context_local_storage_dtor(
arg1: *mut std::ffi::c_void,
arg2: *mut std::ffi::c_void
) -> cuda_types::cuda::CUresult,
[2] = context_local_storage_get_state(
ctx_state: *mut std::ffi::c_void, // ContextState
cu_ctx: cuda_types::cuda::CUcontext,
manager: *mut std::ffi::c_void // ContextStateManager
) -> cuda_types::cuda::CUresult
},
"{0CA50B8C-1004-929A-89A7-D0DF10E77286}" => CTX_CREATE_BYPASS[2] {
[0] = SIZE_OF,
[1] = ctx_create_v2_bypass(
pctx: *mut cuda_types::cuda::CUcontext,
flags: ::std::os::raw::c_uint,
dev: cuda_types::cuda::CUdevice
) -> cuda_types::cuda::CUresult
},
"{195BCBF4-D67D-024A-ACC5-1D29CEA631AE}" => HEAP_ACCESS[3] {
[0] = SIZE_OF,
[1] = heap_alloc(
heap_alloc_record_ptr: *mut *const std::ffi::c_void, // HeapAllocRecord
arg2: usize,
arg3: usize
) -> cuda_types::cuda::CUresult,
[2] = heap_free(
heap_alloc_record_ptr: *const std::ffi::c_void, // HeapAllocRecord
arg2: *mut usize
) -> cuda_types::cuda::CUresult
},
"{B10541E1-F7C7-C74A-9F64-F223BE99F1E2}" => DEVICE_EXTENDED_RT[26] {
[0] = SIZE_OF,
[5] = device_get_attribute_ext(
dev: cuda_types::cuda::CUdevice,
attribute: std::ffi::c_uint,
unknown: std::ffi::c_int,
result: *mut [usize; 2]
) -> cuda_types::cuda::CUresult,
// I don't know is this function return, but on my GTX 1060 it returns 0
[13] = device_get_something(
result: *mut std::ffi::c_uchar,
dev: cuda_types::cuda::CUdevice
) -> cuda_types::cuda::CUresult
},
"{D4082055-BDE6-704B-8D34-BA123C66E1F2}" => INTEGRITY_CHECK[3] {
[0] = SIZE_OF,
[1] = integrity_check(
version: u32,
unix_seconds: u64,
result: *mut [u64;2]
) -> cuda_types::cuda::CUresult
},
// This functions check for some bits that are never observably set
"{263E8860-7CD2-6143-92F6-BBD5006DFA7E}" => UNKNOWN_CHECKS[4] {
[0] = SIZE_OF,
[2] = context_check(
ctx_in: cuda_types::cuda::CUcontext,
result1: *mut u32, // seems to be always 0
result2: *mut *const std::ffi::c_void
) -> cuda_types::cuda::CUresult,
[3] = check_fn3() -> u32 // seeems to always return 0
}
}
// Purely for internal use by ZLUDA dump
dark_api! {
zluda_dump;
"{0B7A5827-AF98-46AB-A951-22D19BDF5C08}" => ZLUDA_DUMP_INTERNAL[1] {
#[noformat]
[0] = logged_call(
fn_name: cglue::slice::CSliceRef<'static, u8>,
args: crate::FnFfiRef<crate::ByteVecFfi>,
fn_: crate::FnFfiRef<usize>,
internal_error: usize,
format_status: extern "C" fn(usize) -> crate::ByteVecFfi
) -> usize
}
}
#[repr(C)]
pub struct ByteVecFfi {
ptr: *mut u8,
len: usize,
capacity: usize,
}
impl ByteVecFfi {
pub fn new(mut v: Vec<u8>) -> Self {
let (ptr, len, capacity) = (v.as_mut_ptr(), v.len(), v.capacity());
std::mem::forget(v);
Self { ptr, len, capacity }
}
pub fn to_vec(self) -> Vec<u8> {
let vec = unsafe { Vec::from_raw_parts(self.ptr, self.len, self.capacity) };
std::mem::forget(self);
vec
}
}
impl Drop for ByteVecFfi {
fn drop(&mut self) {
// SAFETY: We are dropping the Vec<u8> that we created in `from`
// and we know that the pointer is valid.
unsafe {
let _ = Vec::from_raw_parts(self.ptr, self.len, self.capacity);
}
}
}
#[cglue::cglue_trait]
pub trait FnFfi {
type Output;
fn call(&self) -> Self::Output;
}
// We use this wrapper instead of implementing `FnFfi` for all T that implement `Fn() -> Output`
// because cglue machinery already provided blanket implementation of `FnFfi` for its own needs
// `cglue_trait_ext` does not work with `Fn` traits because they are special
#[repr(transparent)]
pub struct FnFfiWrapper<Output, T: std::ops::Fn() -> Output>(pub T);
impl<Output, T: std::ops::Fn() -> Output> FnFfi for FnFfiWrapper<Output, T> {
type Output = Output;
fn call(&self) -> Output {
(self.0)()
}
}
pub fn integrity_check(
version: u32,
unix_seconds: u64,
driver_version: u32,
current_process: u32,
current_thread: u32,
integrity_check_table: *const c_void,
cudart_table: *const c_void,
fn_address: *const c_void,
devices: u32,
get_device: impl FnMut(u32) -> DeviceHashinfo,
) -> [u64; 2] {
match version % 10 {
0 => return [0x3341181C03CB675C, 0x8ED383AA1F4CD1E8],
1 => return [0x1841181C03CB675C, 0x8ED383AA1F4CD1E8],
_ => {}
}
// There's first computation pass, but it does not use any input and effectively computes this
let pass1_result = [
0x14u8, 0x6A, 0xDD, 0xAE, 0x53, 0xA9, 0xA7, 0x52, 0xAA, 0x08, 0x41, 0x36, 0x0B, 0xF5, 0x5A,
0x9F,
];
let mut result = [0u8; 66];
pass2(&mut result, &pass1_result);
let pass3_input = Pass3Input {
driver_version,
version,
current_process,
current_thread,
cudart_table,
integrity_check_table,
fn_address,
unix_seconds,
};
pass3(&mut result, &pass3_input);
pass4(&mut result, devices, get_device);
let pass5_1 = pass5(&mut result);
zero_result(&mut result);
pass6(&mut result, &pass1_result);
pass7(&mut result, &pass5_1);
pass5(&mut result)
}
fn pass7(accumulator: &mut [u8; 66], pass5_1: &[u64; 2]) {
hash_pass(accumulator, pass5_1, 0);
}
fn pass6(accumulator: &mut [u8; 66], pass1_result: &[u8; 16]) {
hash_pass(accumulator, pass1_result, 0x5c);
}
fn zero_result(result: &mut [u8; 66]) {
for i in 0..16 {
result[i] = 0;
}
for i in 48..66 {
result[i] = 0;
}
}
fn pass5(result: &mut [u8; 66]) -> [u64; 2] {
let temp = 16u8.wrapping_sub(result[64]);
for _ in 0..temp {
integrity_check_single_pass(result, temp);
}
let mut temp_ptr = unsafe { result.as_mut_ptr().add(0x30) };
loop {
let temp = unsafe { *temp_ptr };
temp_ptr = unsafe { temp_ptr.add(1) };
integrity_check_single_pass(result, temp);
if temp_ptr == unsafe { result.as_mut_ptr().add(0x40) } {
break;
}
}
[
u64::from_ne_bytes(result[0..8].try_into().unwrap()),
u64::from_ne_bytes(result[8..16].try_into().unwrap()),
]
}
#[repr(C)]
struct Pass3Input {
driver_version: u32,
version: u32,
current_process: u32,
current_thread: u32,
cudart_table: *const c_void,
integrity_check_table: *const c_void,
fn_address: *const c_void,
unix_seconds: u64,
}
#[repr(C)]
#[derive(Clone, Copy)]
pub struct DeviceHashinfo {
pub guid: CUuuid,
pub pci_domain: i32,
pub pci_bus: i32,
pub pci_device: i32,
}
fn pass2(accumulator: &mut [u8; 66], pass1_result: &[u8; 16]) {
hash_pass(accumulator, pass1_result, 0x36)
}
fn pass3(accumulator: &mut [u8; 66], mixin: &Pass3Input) {
hash_pass(accumulator, mixin, 0)
}
fn pass4(
accumulator: &mut [u8; 66],
devices: u32,
mut get_device: impl FnMut(u32) -> DeviceHashinfo,
) {
for dev in 0..devices {
hash_pass(accumulator, &(get_device)(dev), 0)
}
}
fn hash_pass<T: Sized>(accumulator: &mut [u8; 66], mixin: &T, xor_mask: u8) {
for i in 0..std::mem::size_of_val(mixin) {
integrity_check_single_pass(
accumulator,
unsafe { *std::ptr::from_ref(mixin).cast::<u8>().add(i) } ^ xor_mask,
);
}
}
fn integrity_check_single_pass(arg1: &mut [u8; 66], arg2: u8) {
const MIXING_TABLE: [u8; 256] = [
0x29, 0x2E, 0x43, 0xC9, 0xA2, 0xD8, 0x7C, 0x01, 0x3D, 0x36, 0x54, 0xA1, 0xEC, 0xF0, 0x06,
0x13, 0x62, 0xA7, 0x05, 0xF3, 0xC0, 0xC7, 0x73, 0x8C, 0x98, 0x93, 0x2B, 0xD9, 0xBC, 0x4C,
0x82, 0xCA, 0x1E, 0x9B, 0x57, 0x3C, 0xFD, 0xD4, 0xE0, 0x16, 0x67, 0x42, 0x6F, 0x18, 0x8A,
0x17, 0xE5, 0x12, 0xBE, 0x4E, 0xC4, 0xD6, 0xDA, 0x9E, 0xDE, 0x49, 0xA0, 0xFB, 0xF5, 0x8E,
0xBB, 0x2F, 0xEE, 0x7A, 0xA9, 0x68, 0x79, 0x91, 0x15, 0xB2, 0x07, 0x3F, 0x94, 0xC2, 0x10,
0x89, 0x0B, 0x22, 0x5F, 0x21, 0x80, 0x7F, 0x5D, 0x9A, 0x5A, 0x90, 0x32, 0x27, 0x35, 0x3E,
0xCC, 0xE7, 0xBF, 0xF7, 0x97, 0x03, 0xFF, 0x19, 0x30, 0xB3, 0x48, 0xA5, 0xB5, 0xD1, 0xD7,
0x5E, 0x92, 0x2A, 0xAC, 0x56, 0xAA, 0xC6, 0x4F, 0xB8, 0x38, 0xD2, 0x96, 0xA4, 0x7D, 0xB6,
0x76, 0xFC, 0x6B, 0xE2, 0x9C, 0x74, 0x04, 0xF1, 0x45, 0x9D, 0x70, 0x59, 0x64, 0x71, 0x87,
0x20, 0x86, 0x5B, 0xCF, 0x65, 0xE6, 0x2D, 0xA8, 0x02, 0x1B, 0x60, 0x25, 0xAD, 0xAE, 0xB0,
0xB9, 0xF6, 0x1C, 0x46, 0x61, 0x69, 0x34, 0x40, 0x7E, 0x0F, 0x55, 0x47, 0xA3, 0x23, 0xDD,
0x51, 0xAF, 0x3A, 0xC3, 0x5C, 0xF9, 0xCE, 0xBA, 0xC5, 0xEA, 0x26, 0x2C, 0x53, 0x0D, 0x6E,
0x85, 0x28, 0x84, 0x09, 0xD3, 0xDF, 0xCD, 0xF4, 0x41, 0x81, 0x4D, 0x52, 0x6A, 0xDC, 0x37,
0xC8, 0x6C, 0xC1, 0xAB, 0xFA, 0x24, 0xE1, 0x7B, 0x08, 0x0C, 0xBD, 0xB1, 0x4A, 0x78, 0x88,
0x95, 0x8B, 0xE3, 0x63, 0xE8, 0x6D, 0xE9, 0xCB, 0xD5, 0xFE, 0x3B, 0x00, 0x1D, 0x39, 0xF2,
0xEF, 0xB7, 0x0E, 0x66, 0x58, 0xD0, 0xE4, 0xA6, 0x77, 0x72, 0xF8, 0xEB, 0x75, 0x4B, 0x0A,
0x31, 0x44, 0x50, 0xB4, 0x8F, 0xED, 0x1F, 0x1A, 0xDB, 0x99, 0x8D, 0x33, 0x9F, 0x11, 0x83,
0x14,
];
let temp1 = arg1[0x40];
arg1[temp1 as usize + 0x10] = arg2;
let temp2 = temp1 as usize;
let temp3 = (temp1 + 1) & 0xf;
arg1[temp1 as usize + 0x20] = arg1[temp2] ^ arg2;
let temp4 = MIXING_TABLE[(arg2 ^ arg1[0x41]) as usize];
let temp1 = arg1[temp2 + 0x30];
arg1[temp2 + 0x30] = temp4 ^ temp1;
arg1[0x41] = temp4 ^ temp1;
arg1[0x40] = temp3;
if temp3 != 0 {
return;
}
let mut temp1 = 0x29;
let mut temp5 = 0x0;
unsafe {
loop {
temp1 = temp1 ^ arg1[0];
arg1[0] = temp1;
let mut temp6 = arg1.as_mut_ptr().add(1);
loop {
let temp7 = temp6.add(1);
temp1 = *temp6 ^ MIXING_TABLE[temp1 as usize];
*temp6 = temp1;
temp6 = temp7;
if temp7 == arg1.as_mut_ptr().add(0x30) {
break;
}
}
temp1 = temp1.wrapping_add(temp5);
temp5 = temp5.wrapping_add(0x01);
if temp5 == 0x12 {
break;
}
temp1 = MIXING_TABLE[temp1 as usize];
}
}
}
#[cfg(test)]
mod tests {
use std::mem;
#[test]
fn integrity_check_single_pass() {
let mut input = [
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x38, 0xc0, 0x9b, 0xf7, 0xff, 0x7f, 0x00, 0x00, 0xa3, 0x61, 0xe4, 0x42,
0xf6, 0x67, 0x94, 0xff, 0x18, 0xc0, 0x9b, 0xf7, 0xff, 0x7f, 0x00, 0x00, 0xa4, 0x57,
0x72, 0xf7, 0xff, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
];
super::integrity_check_single_pass(&mut input, 34);
let expected = [
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x22, 0xc0, 0x9b, 0xf7, 0xff, 0x7f, 0x00, 0x00, 0xa3, 0x61, 0xe4, 0x42,
0xf6, 0x67, 0x94, 0xff, 0x22, 0xc0, 0x9b, 0xf7, 0xff, 0x7f, 0x00, 0x00, 0xa4, 0x57,
0x72, 0xf7, 0xff, 0x7f, 0x00, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x57,
];
assert_eq!(input, expected);
}
#[test]
fn integrity_check_pass2() {
let pass1_result = [
0x14u8, 0x6A, 0xDD, 0xAE, 0x53, 0xA9, 0xA7, 0x52, 0xAA, 0x08, 0x41, 0x36, 0x0B, 0xF5,
0x5A, 0x9F,
];
let mut result = [0u8; 66];
super::pass2(&mut result, &pass1_result);
let expected = [
0x8b, 0x21, 0x9a, 0x49, 0xe8, 0x6d, 0x1a, 0xee, 0xf2, 0x37, 0xf9, 0xb5, 0x4a, 0x8c,
0x3c, 0x75, 0xc7, 0x1e, 0xee, 0x21, 0xcf, 0x29, 0x8a, 0xe5, 0x13, 0x83, 0xf4, 0xec,
0x33, 0x04, 0xe2, 0xfd, 0xb0, 0x2f, 0x09, 0x01, 0x4f, 0xf7, 0x68, 0x6d, 0x69, 0x46,
0x43, 0x7e, 0xb6, 0x2b, 0x21, 0xed, 0x57, 0xa1, 0x10, 0x86, 0x0e, 0x60, 0x44, 0x1e,
0x70, 0x5f, 0x67, 0xd1, 0xeb, 0x67, 0xa1, 0x3d, 0x00, 0x3d,
];
assert_eq!(result, expected);
}
#[test]
fn integrity_check_pass3() {
let mut result = [
0x8b, 0x21, 0x9a, 0x49, 0xe8, 0x6d, 0x1a, 0xee, 0xf2, 0x37, 0xf9, 0xb5, 0x4a, 0x8c,
0x3c, 0x75, 0xc7, 0x1e, 0xee, 0x21, 0xcf, 0x29, 0x8a, 0xe5, 0x13, 0x83, 0xf4, 0xec,
0x33, 0x04, 0xe2, 0xfd, 0xb0, 0x2f, 0x09, 0x01, 0x4f, 0xf7, 0x68, 0x6d, 0x69, 0x46,
0x43, 0x7e, 0xb6, 0x2b, 0x21, 0xed, 0x57, 0xa1, 0x10, 0x86, 0x0e, 0x60, 0x44, 0x1e,
0x70, 0x5f, 0x67, 0xd1, 0xeb, 0x67, 0xa1, 0x3d, 0x00, 0x3d,
];
let input = super::Pass3Input {
driver_version: 0x2f30,
version: 12082,
current_process: 0x002fa423,
current_thread: 0xf79c1000,
cudart_table: 0x00007ffff6958240 as *const _,
integrity_check_table: 0x00007ffff6958220 as *const _,
fn_address: 0x00007ffff2aaf4a0 as *const _,
unix_seconds: 0x682b9cee,
};
super::pass3(&mut result, &input);
let expected = [
0x0a, 0xfd, 0xab, 0xc9, 0xff, 0x9b, 0xa0, 0xbe, 0x4d, 0x30, 0x32, 0x82, 0x74, 0x4f,
0xa7, 0x48, 0x9d, 0x23, 0x82, 0xa3, 0x87, 0xfa, 0x6c, 0xdb, 0x92, 0x49, 0xd9, 0xb5,
0x4b, 0x2b, 0x5e, 0x51, 0x6e, 0xf7, 0xf9, 0x4d, 0x28, 0x8a, 0x64, 0x06, 0x19, 0xb3,
0xe6, 0xbe, 0xa4, 0xec, 0x7e, 0x54, 0x64, 0x28, 0xd9, 0xe1, 0xd4, 0x34, 0xc0, 0xa9,
0x49, 0x88, 0xc9, 0x61, 0x58, 0xdd, 0x66, 0x74, 0x00, 0x74,
];
assert_eq!(result, expected);
}
#[test]
fn integrity_check_pass4() {
let mut result = [
0x84, 0xfd, 0x93, 0x10, 0xc6, 0xdb, 0xb3, 0xbc, 0x49, 0xc2, 0x25, 0xe7, 0xda, 0x6e,
0x22, 0x6f, 0x9b, 0xbd, 0x81, 0x59, 0xc3, 0x01, 0x9a, 0x7a, 0x26, 0x34, 0x39, 0x0f,
0x2a, 0x56, 0x13, 0xb1, 0xf6, 0xbc, 0x7f, 0xa1, 0x8f, 0x04, 0xa5, 0x4d, 0x0d, 0x78,
0xab, 0x20, 0xf8, 0x23, 0x20, 0xa5, 0x3f, 0x67, 0x36, 0xe2, 0xde, 0x8a, 0xe5, 0xdf,
0xe1, 0xf2, 0x03, 0x94, 0xad, 0xdc, 0x9a, 0xda, 0x00, 0xda,
];
super::pass4(&mut result, 1, |_| super::DeviceHashinfo {
guid: super::CUuuid {
bytes: unsafe {
std::mem::transmute([0x8a2bfe9au32, 0x382d25ac, 0xc5ae37ea, 0x5f32716d])
},
},
pci_domain: 0,
pci_bus: 2,
pci_device: 0,
});
let expected = [
0x1f, 0xd8, 0x25, 0xd2, 0xdf, 0xfa, 0x64, 0xc7, 0xb6, 0x1a, 0xaf, 0x22, 0xb8, 0x79,
0xfb, 0x96, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x7c, 0x9d, 0x46, 0xd2, 0x1f, 0xd8, 0x25, 0xd2, 0xdd, 0xfa, 0x64, 0xc7, 0xb6, 0x1a,
0xaf, 0x22, 0xe6, 0x17, 0xbd, 0x3a, 0xd7, 0xdd, 0x5f, 0x82, 0x8c, 0x87, 0xce, 0x86,
0x66, 0xaf, 0xa0, 0x50, 0x7a, 0x7d, 0xbb, 0xbc, 0x0c, 0x50,
];
assert_eq!(result, expected);
}
#[test]
fn integrity_check_pass5() {
let mut result = [
0x3e, 0x4b, 0xf2, 0x95, 0x71, 0xf5, 0x6b, 0x51, 0x07, 0xbf, 0x4b, 0xf1, 0x04, 0x0e,
0x8e, 0x0b, 0x5f, 0x4d, 0x30, 0x0c, 0x0f, 0x0c, 0xae, 0xfb, 0x48, 0xaf, 0x23, 0xb5,
0xea, 0x4c, 0xc2, 0xdb, 0xd7, 0xdf, 0x88, 0x74, 0x39, 0x58, 0x16, 0x3a, 0x1f, 0x7c,
0x9b, 0x20, 0x7e, 0x7e, 0x94, 0xc8, 0x8b, 0xc6, 0xb2, 0x38, 0x0d, 0x07, 0x7d, 0xbd,
0x90, 0xd5, 0x39, 0x63, 0xeb, 0x1d, 0x4f, 0x40, 0x00, 0x40,
];
let output = super::pass5(&mut result);
let expected_result = [
0x00, 0x23, 0x53, 0x06, 0x5e, 0x96, 0xf6, 0x9c, 0x61, 0xaa, 0x96, 0x2d, 0x2e, 0xcd,
0xa8, 0x58, 0xe9, 0xca, 0xc0, 0x2e, 0x35, 0xed, 0x5f, 0xca, 0xe1, 0x0e, 0xcd, 0x1f,
0xd0, 0x8e, 0x8b, 0x9c, 0x29, 0x4d, 0x1c, 0x94, 0x6b, 0xf7, 0x10, 0xb0, 0x07, 0x08,
0x91, 0xd6, 0x14, 0x06, 0xc0, 0xec, 0xe1, 0x9c, 0x8e, 0x33, 0xd4, 0xe9, 0x43, 0x5c,
0x86, 0x0c, 0x72, 0x4d, 0x27, 0x98, 0x91, 0x7f, 0x00, 0x7f,
];
assert_eq!(result, expected_result);
let output = unsafe { mem::transmute::<_, [u8; 16]>(output) };
let expected = [
0x00, 0x23, 0x53, 0x06, 0x5e, 0x96, 0xf6, 0x9c, 0x61, 0xaa, 0x96, 0x2d, 0x2e, 0xcd,
0xa8, 0x58,
];
assert_eq!(output, expected);
}
}

View file

@ -2,7 +2,7 @@
name = "detours-sys"
version = "0.1.2"
authors = ["Diana <5275194+DianaNites@users.noreply.github.com>"]
edition = "2018"
edition = "2021"
links = "detours"
# Package stuff
description = "Rust bindings to Microsoft Detours"

8
format/Cargo.toml Normal file
View file

@ -0,0 +1,8 @@
[package]
name = "format"
version = "0.0.0"
edition = "2021"
[dependencies]
cuda_types = { path = "../cuda_types" }
uuid = "1.16"

40
format/src/dark_api.rs Normal file
View file

@ -0,0 +1,40 @@
use crate::CudaDisplay;
use cuda_types::dark_api::*;
impl CudaDisplay for FatbincWrapper {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
writer.write_all(b"{ magic: ")?;
CudaDisplay::write(&self.magic, "", 0, writer)?;
writer.write_all(b", version: ")?;
CudaDisplay::write(&self.version, "", 0, writer)?;
writer.write_all(b", data: ")?;
CudaDisplay::write(&self.data, "", 0, writer)?;
writer.write_all(b", filename_or_fatbins: ")?;
CudaDisplay::write(&self.filename_or_fatbins, "", 0, writer)?;
writer.write_all(b" }")
}
}
impl CudaDisplay for FatbinHeader {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
writer.write_all(b"{ magic: ")?;
CudaDisplay::write(&self.magic, "", 0, writer)?;
writer.write_all(b", version: ")?;
CudaDisplay::write(&self.version, "", 0, writer)?;
writer.write_all(b", header_size: ")?;
CudaDisplay::write(&self.header_size, "", 0, writer)?;
writer.write_all(b", files_size: ")?;
CudaDisplay::write(&self.files_size, "", 0, writer)?;
writer.write_all(b" }")
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,11 +1,12 @@
use cuda_types::cuda::*;
use std::{
any::TypeId,
ffi::{c_void, CStr},
fmt::LowerHex,
mem, ptr, slice,
};
pub(crate) trait CudaDisplay {
pub trait CudaDisplay {
fn write(
&self,
fn_name: &'static str,
@ -14,6 +15,17 @@ pub(crate) trait CudaDisplay {
) -> std::io::Result<()>;
}
impl CudaDisplay for () {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
write!(writer, "()")
}
}
impl CudaDisplay for CUuuid {
fn write(
&self,
@ -22,7 +34,9 @@ impl CudaDisplay for CUuuid {
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
let guid = self.bytes;
write!(writer, "{{{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}}}", guid[0], guid[1], guid[2], guid[3], guid[4], guid[5], guid[6], guid[7], guid[8], guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15])
let uuid = uuid::Uuid::from_bytes(guid);
let braced = uuid.as_braced();
write!(writer, "{braced:#X}")
}
}
@ -37,6 +51,17 @@ impl CudaDisplay for CUdeviceptr_v1 {
}
}
impl CudaDisplay for bool {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
write!(writer, "{}", *self)
}
}
impl CudaDisplay for u8 {
fn write(
&self,
@ -81,6 +106,17 @@ impl CudaDisplay for u32 {
}
}
impl CudaDisplay for i64 {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
write!(writer, "{}", *self)
}
}
impl CudaDisplay for u64 {
fn write(
&self,
@ -125,6 +161,60 @@ impl CudaDisplay for f64 {
}
}
// user by Dark API
impl CudaDisplay
for Option<
extern "system" fn(
cuda_types::cuda::CUcontext,
*mut std::ffi::c_void,
*mut std::ffi::c_void,
),
>
{
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
if let Some(fn_ptr) = self {
write!(writer, "{:p}", *fn_ptr)
} else {
writer.write_all(b"NULL")
}
}
}
impl CudaDisplay for Option<unsafe extern "C" fn(*const i8)> {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
if let Some(fn_ptr) = self {
write!(writer, "{:p}", *fn_ptr)
} else {
writer.write_all(b"NULL")
}
}
}
impl CudaDisplay for Option<unsafe extern "C" fn(i32, *const i8, *const i8)> {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
if let Some(fn_ptr) = self {
write!(writer, "{:p}", *fn_ptr)
} else {
writer.write_all(b"NULL")
}
}
}
pub fn write_handle<T: LowerHex>(
this: &[T; 64],
writer: &mut (impl std::io::Write + ?Sized),
@ -198,11 +288,30 @@ impl CudaDisplay for *const i8 {
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
write!(
writer,
"\"{}\"",
unsafe { CStr::from_ptr(*self as _) }.to_string_lossy()
)
if self.is_null() {
writer.write_all(b"NULL")
} else {
write!(
writer,
"\"{}\"",
unsafe { CStr::from_ptr(*self as _) }.to_string_lossy()
)
}
}
}
impl CudaDisplay for *mut cuda_types::FILE {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
if self.is_null() {
writer.write_all(b"NULL")
} else {
write!(writer, "{:p}", *self)
}
}
}
@ -409,6 +518,44 @@ impl CudaDisplay for CUDA_RESOURCE_DESC_st {
}
}
impl crate::CudaDisplay for cuda_types::cuda::CUlaunchConfig_st {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
writer.write_all(concat!("{ ", stringify!(gridDimX), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.gridDimX, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(gridDimY), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.gridDimY, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(gridDimZ), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.gridDimZ, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(blockDimX), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.blockDimX, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(blockDimY), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.blockDimY, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(blockDimZ), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.blockDimZ, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(sharedMemBytes), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.sharedMemBytes, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(hStream), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.hStream, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(numAttrs), ": ").as_bytes())?;
crate::CudaDisplay::write(&self.numAttrs, "", 0, writer)?;
writer.write_all(concat!(", ", stringify!(attrs), ": ").as_bytes())?;
writer.write_all(b"[")?;
for i in 0..self.numAttrs {
if i != 0 {
writer.write_all(b", ")?;
}
crate::CudaDisplay::write(&unsafe { *self.attrs.add(i as usize) }, "", 0, writer)?;
}
writer.write_all(b"]")?;
writer.write_all(b" }")
}
}
impl CudaDisplay for CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
fn write(
&self,
@ -552,17 +699,6 @@ impl CudaDisplay for CUgraphNodeParams_st {
}
}
impl CudaDisplay for CUlaunchConfig_st {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
_writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
todo!()
}
}
impl CudaDisplay for CUeglFrame_st {
fn write(
&self,
@ -584,33 +720,30 @@ impl CudaDisplay for CUdevResource_st {
todo!()
}
}
impl CudaDisplay for CUlaunchAttribute_st {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
_writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
todo!()
}
}
impl<T: CudaDisplay> CudaDisplay for *mut T {
fn write(
&self,
fn_name: &'static str,
index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
if *self == ptr::null_mut() {
writer.write_all(b"NULL")
} else {
let this: &T = unsafe { &**self };
this.write(fn_name, index, writer)
}
write_launch_attribute(writer, fn_name, index, self.id, self.value)
}
}
impl<T: CudaDisplay> CudaDisplay for *const T {
impl<T: CudaDisplay + 'static> CudaDisplay for *mut T {
fn write(
&self,
fn_name: &'static str,
index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
CudaDisplay::write(&self.cast_const(), fn_name, index, writer)
}
}
impl<T: CudaDisplay + 'static> CudaDisplay for *const T {
fn write(
&self,
fn_name: &'static str,
@ -620,8 +753,17 @@ impl<T: CudaDisplay> CudaDisplay for *const T {
if *self == ptr::null() {
writer.write_all(b"NULL")
} else {
let this: &T = unsafe { &**self };
this.write(fn_name, index, writer)
if fn_name.len() > 2
&& fn_name.starts_with("cu")
&& fn_name.as_bytes()[2].is_ascii_lowercase()
&& (TypeId::of::<T>() == TypeId::of::<f32>()
|| TypeId::of::<T>() == TypeId::of::<f64>())
{
CudaDisplay::write(&self.cast::<c_void>(), fn_name, index, writer)
} else {
let this: &T = unsafe { &**self };
this.write(fn_name, index, writer)
}
}
}
}
@ -644,6 +786,24 @@ impl<T: CudaDisplay, const N: usize> CudaDisplay for [T; N] {
}
}
impl<T: CudaDisplay> CudaDisplay for [T] {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
writer.write_all(b"[")?;
for i in 0..self.len() {
CudaDisplay::write(&self[i], "", 0, writer)?;
if i != self.len() - 1 {
writer.write_all(b", ")?;
}
}
writer.write_all(b"]")
}
}
impl CudaDisplay for CUarrayMapInfo_st {
fn write(
&self,
@ -666,6 +826,17 @@ impl CudaDisplay for CUexecAffinityParam_st {
}
}
impl CudaDisplay for *mut cuda_types::cudnn9::cudnnRuntimeTag_t {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
_writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
todo!()
}
}
#[allow(non_snake_case)]
pub fn write_cuGraphKernelNodeGetAttribute(
writer: &mut (impl std::io::Write + ?Sized),
@ -677,7 +848,10 @@ pub fn write_cuGraphKernelNodeGetAttribute(
CudaDisplay::write(&hNode, "cuGraphKernelNodeGetAttribute", 0, writer)?;
writer.write_all(b", attr: ")?;
CudaDisplay::write(&attr, "cuGraphKernelNodeGetAttribute", 1, writer)?;
write_launch_attribute(writer, "cuGraphKernelNodeGetAttribute", 2, attr, value_out)?;
writer.write_all(b", value_out: ")?;
write_launch_attribute(writer, "cuGraphKernelNodeGetAttribute", 2, attr, unsafe {
*value_out
})?;
writer.write_all(b") ")
}
@ -702,7 +876,10 @@ pub fn write_cuStreamGetAttribute(
CudaDisplay::write(&hStream, "cuStreamGetAttribute", 0, writer)?;
writer.write_all(b", attr: ")?;
CudaDisplay::write(&attr, "cuStreamGetAttribute", 1, writer)?;
write_launch_attribute(writer, "cuStreamGetAttribute", 2, attr, value_out)?;
writer.write_all(b", value_out: ")?;
write_launch_attribute(writer, "cuStreamGetAttribute", 2, attr, unsafe {
*value_out
})?;
writer.write_all(b") ")
}
@ -711,98 +888,78 @@ fn write_launch_attribute(
fn_name: &'static str,
index: usize,
attribute: CUlaunchAttributeID,
value_out: *mut CUstreamAttrValue,
value: CUlaunchAttributeValue,
) -> std::io::Result<()> {
match attribute {
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(
unsafe { &(*value_out).accessPolicyWindow },
fn_name,
index,
writer,
)
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW = ")?;
CudaDisplay::write(unsafe { &value.accessPolicyWindow }, fn_name, index, writer)
}
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_COOPERATIVE => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).cooperative }, fn_name, index, writer)
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_COOPERATIVE = ")?;
CudaDisplay::write(unsafe { &value.cooperative }, fn_name, index, writer)
}
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).syncPolicy }, fn_name, index, writer)
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY = ")?;
CudaDisplay::write(unsafe { &value.syncPolicy }, fn_name, index, writer)
}
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).clusterDim }, fn_name, index, writer)
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION = ")?;
CudaDisplay::write(unsafe { &value.clusterDim }, fn_name, index, writer)
}
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE => {
writer.write_all(b", value_out: ")?;
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE = ")?;
CudaDisplay::write(
unsafe { &(*value_out).clusterSchedulingPolicyPreference },
unsafe { &value.clusterSchedulingPolicyPreference },
fn_name,
index,
writer,
)
}
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION => {
writer.write_all(b", value_out: ")?;
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION = ")?;
CudaDisplay::write(
unsafe { &(*value_out).programmaticStreamSerializationAllowed },
unsafe { &value.programmaticStreamSerializationAllowed },
fn_name,
index,
writer,
)
}
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(
unsafe { &(*value_out).programmaticEvent },
fn_name,
index,
writer,
)
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT = ")?;
CudaDisplay::write(unsafe { &value.programmaticEvent }, fn_name, index, writer)
}
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PRIORITY => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).priority }, fn_name, index, writer)
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_PRIORITY = ")?;
CudaDisplay::write(unsafe { &value.priority }, fn_name, index, writer)
}
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(
unsafe { &(*value_out).memSyncDomainMap },
fn_name,
index,
writer,
)
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP = ")?;
CudaDisplay::write(unsafe { &value.memSyncDomainMap }, fn_name, index, writer)
}
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(
unsafe { &(*value_out).memSyncDomain },
fn_name,
index,
writer,
)
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN = ")?;
CudaDisplay::write(unsafe { &value.memSyncDomain }, fn_name, index, writer)
}
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT => {
writer.write_all(b", value_out: ")?;
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT = ")?;
CudaDisplay::write(
unsafe { &(*value_out).launchCompletionEvent },
unsafe { &value.launchCompletionEvent },
fn_name,
index,
writer,
)
}
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE => {
writer.write_all(b", value_out: ")?;
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = ")?;
CudaDisplay::write(
unsafe { &(*value_out).deviceUpdatableKernelNode },
unsafe { &value.deviceUpdatableKernelNode },
fn_name,
index,
writer,
)
}
_ => writer.write_all(b", ... "),
_ => writer.write_all(b""),
}
}
@ -858,6 +1015,302 @@ pub fn write_cuGLGetDevices_v2(
todo!()
}
#[path = "format_generated.rs"]
#[allow(non_snake_case)]
pub fn write_cudnnBackendGetAttribute(
writer: &mut (impl std::io::Write + ?Sized),
descriptor: cuda_types::cudnn9::cudnnBackendDescriptor_t,
attributeName: cuda_types::cudnn9::cudnnBackendAttributeName_t,
attributeType: cuda_types::cudnn9::cudnnBackendAttributeType_t,
requestedElementCount: i64,
elementCount: *mut i64,
arrayOfElements: *mut ::core::ffi::c_void,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(descriptor), ": ").as_bytes())?;
crate::CudaDisplay::write(&descriptor, "cudnnBackendGetAttribute", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(attributeName), ": ").as_bytes())?;
crate::CudaDisplay::write(&attributeName, "cudnnBackendGetAttribute", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(attributeType), ": ").as_bytes())?;
crate::CudaDisplay::write(&attributeType, "cudnnBackendGetAttribute", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(requestedElementCount), ": ").as_bytes())?;
crate::CudaDisplay::write(
&requestedElementCount,
"cudnnBackendGetAttribute",
arg_idx,
writer,
)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(elementCount), ": ").as_bytes())?;
crate::CudaDisplay::write(&elementCount, "cudnnBackendGetAttribute", arg_idx, writer)?;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(arrayOfElements), ": ").as_bytes())?;
cudnn9_print_elements(
writer,
attributeType,
unsafe { elementCount.as_ref() }
.copied()
.unwrap_or(requestedElementCount),
arrayOfElements,
)?;
writer.write_all(b")")
}
#[allow(non_snake_case)]
pub fn write_cudnnBackendSetAttribute(
writer: &mut (impl std::io::Write + ?Sized),
descriptor: cuda_types::cudnn9::cudnnBackendDescriptor_t,
attributeName: cuda_types::cudnn9::cudnnBackendAttributeName_t,
attributeType: cuda_types::cudnn9::cudnnBackendAttributeType_t,
elementCount: i64,
arrayOfElements: *const ::core::ffi::c_void,
) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
writer.write_all(concat!(stringify!(descriptor), ": ").as_bytes())?;
crate::CudaDisplay::write(&descriptor, "cudnnBackendSetAttribute", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(attributeName), ": ").as_bytes())?;
crate::CudaDisplay::write(&attributeName, "cudnnBackendSetAttribute", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(attributeType), ": ").as_bytes())?;
crate::CudaDisplay::write(&attributeType, "cudnnBackendSetAttribute", arg_idx, writer)?;
arg_idx += 1;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(elementCount), ": ").as_bytes())?;
crate::CudaDisplay::write(&elementCount, "cudnnBackendSetAttribute", arg_idx, writer)?;
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(arrayOfElements), ": ").as_bytes())?;
cudnn9_print_elements(writer, attributeType, elementCount, arrayOfElements)?;
writer.write_all(b")")
}
fn cudnn9_print_elements(
writer: &mut (impl std::io::Write + ?Sized),
type_: cuda_types::cudnn9::cudnnBackendAttributeType_t,
element_count: i64,
array_of_elements: *const ::core::ffi::c_void,
) -> std::io::Result<()> {
fn print_typed<T: CudaDisplay>(
writer: &mut (impl std::io::Write + ?Sized),
element_count: i64,
array_of_elements: *const ::core::ffi::c_void,
) -> std::io::Result<()> {
if array_of_elements.is_null() {
return writer.write_all(b"NULL");
}
let elements =
unsafe { slice::from_raw_parts(array_of_elements as *const T, element_count as usize) };
CudaDisplay::write(elements, "", 0, writer)
}
match type_ {
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_HANDLE => {
print_typed::<cuda_types::cudnn9::cudnnHandle_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_DATA_TYPE => {
print_typed::<cuda_types::cudnn9::cudnnDataType_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_BOOLEAN => {
print_typed::<bool>(writer, element_count, array_of_elements)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64 => {
print_typed::<i64>(writer, element_count, array_of_elements)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_FLOAT => {
print_typed::<f32>(writer, element_count, array_of_elements)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE => {
print_typed::<f64>(writer, element_count, array_of_elements)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_VOID_PTR => {
print_typed::<*const c_void>(writer, element_count, array_of_elements)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_CONVOLUTION_MODE => {
print_typed::<cuda_types::cudnn9::cudnnConvolutionMode_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_HEUR_MODE => {
print_typed::<cuda_types::cudnn9::cudnnBackendHeurMode_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_KNOB_TYPE => {
print_typed::<cuda_types::cudnn9::cudnnBackendKnobType_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_NAN_PROPOGATION => {
print_typed::<cuda_types::cudnn9::cudnnNanPropagation_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_NUMERICAL_NOTE => {
print_typed::<cuda_types::cudnn9::cudnnBackendNumericalNote_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_LAYOUT_TYPE => {
print_typed::<cuda_types::cudnn9::cudnnBackendLayoutType_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_ATTRIB_NAME => {
print_typed::<cuda_types::cudnn9::cudnnBackendAttributeName_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_POINTWISE_MODE => {
print_typed::<cuda_types::cudnn9::cudnnPointwiseMode_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR => {
print_typed::<cuda_types::cudnn9::cudnnBackendDescriptor_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_GENSTATS_MODE => {
print_typed::<cuda_types::cudnn9::cudnnGenStatsMode_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_BN_FINALIZE_STATS_MODE => {
print_typed::<cuda_types::cudnn9::cudnnBnFinalizeStatsMode_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_REDUCTION_OPERATOR_TYPE => {
print_typed::<cuda_types::cudnn9::cudnnReduceTensorOp_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_BEHAVIOR_NOTE => {
print_typed::<cuda_types::cudnn9::cudnnBackendBehaviorNote_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_TENSOR_REORDERING_MODE => {
print_typed::<cuda_types::cudnn9::cudnnBackendTensorReordering_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_RESAMPLE_MODE => {
print_typed::<cuda_types::cudnn9::cudnnResampleMode_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_PADDING_MODE => {
print_typed::<cuda_types::cudnn9::cudnnPaddingMode_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_INT32 => {
print_typed::<i32>(writer, element_count, array_of_elements)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_CHAR => {
CudaDisplay::write(&array_of_elements.cast::<i8>(), "", 0, writer)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_SIGNAL_MODE => {
print_typed::<cuda_types::cudnn9::cudnnSignalMode_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_FRACTION => {
print_typed::<cuda_types::cudnn9::cudnnFraction_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_NORM_MODE => {
print_typed::<cuda_types::cudnn9::cudnnBackendNormMode_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_NORM_FWD_PHASE => {
print_typed::<cuda_types::cudnn9::cudnnBackendNormFwdPhase_t>(
writer,
element_count,
array_of_elements,
)
}
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_RNG_DISTRIBUTION => {
print_typed::<cuda_types::cudnn9::cudnnRngDistribution_t>(
writer,
element_count,
array_of_elements,
)
}
_ => unimplemented!(),
}
}
mod dark_api;
mod format_generated;
pub(crate) use format_generated::*;
pub use format_generated::*;
mod format_generated_blas;
pub use format_generated_blas::*;
mod format_generated_blaslt;
pub use format_generated_blaslt::*;
mod format_generated_blaslt_internal;
pub use format_generated_blaslt_internal::*;
mod format_generated_dnn9;
pub use format_generated_dnn9::*;
mod format_generated_fft;
pub use format_generated_fft::*;
mod format_generated_sparse;
pub use format_generated_sparse::*;

View file

@ -427,7 +427,7 @@ fn directive<'a, 'input>(
| Token::DotFile | Token::DotSection => true,
_ => false,
}),
PtxError::UnrecognizedDirective,
|text| PtxError::UnrecognizedDirective(text.unwrap_or("")),
)
.map(Option::flatten)
.parse_next(stream)
@ -675,7 +675,7 @@ fn statement<'a, 'input>(
_ => false,
},
),
PtxError::UnrecognizedStatement,
|text| PtxError::UnrecognizedStatement(text.unwrap_or("")),
)
.map(Option::flatten)
.parse_next(stream)
@ -1285,10 +1285,10 @@ pub enum PtxError<'input> {
ArrayInitalizer,
#[error("")]
NonExternPointer,
#[error("{0:?}")]
UnrecognizedStatement(Option<&'input str>),
#[error("{0:?}")]
UnrecognizedDirective(Option<&'input str>),
#[error("Unrecognized statement {0:?}")]
UnrecognizedStatement(&'input str),
#[error("Unrecognized directive {0:?}")]
UnrecognizedDirective(&'input str),
}
#[derive(Debug)]
@ -3492,11 +3492,11 @@ mod tests {
assert_eq!(errors.len(), 2);
assert!(matches!(
errors[0],
PtxError::UnrecognizedStatement(Some("unknown_op1.asdf foobar;"))
PtxError::UnrecognizedStatement("unknown_op1.asdf foobar;")
));
assert!(matches!(
errors[1],
PtxError::UnrecognizedStatement(Some("unknown_op2 temp2, temp;"))
PtxError::UnrecognizedStatement("unknown_op2 temp2, temp;")
));
}
@ -3533,11 +3533,11 @@ mod tests {
assert_eq!(errors.len(), 2);
assert!(matches!(
errors[0],
PtxError::UnrecognizedDirective(Some(".broken_directive_fail; 34; {"))
PtxError::UnrecognizedDirective(".broken_directive_fail; 34; {")
));
assert!(matches!(
errors[1],
PtxError::UnrecognizedDirective(Some("section foobar }"))
PtxError::UnrecognizedDirective("section foobar }")
));
}
}

View file

@ -51,7 +51,7 @@ impl Project {
fn try_new(p: Package) -> Option<Project> {
let name = p.name;
let clib_name = p.targets.into_iter().find_map(|target| {
if target.is_cdylib() {
if target.is_cdylib() || target.is_dylib() {
Some(target.name)
} else {
None

View file

@ -157,32 +157,7 @@ fn cudart_interface_fn1_impl(pctx: *mut CUcontext, dev: c_int) -> hipError_t {
hipError_t::hipSuccess
}
/*
fat_cubin:
typedef struct {
int magic;
int version;
const unsigned long long* data;
void *filename_or_fatbins; /* version 1: offline filename,
* version 2: array of prelinked fatbins */
} __fatBinC_Wrapper_t;
data start with this header:
#define FATBIN_MAGIC 0xBA55ED50U
#define OLD_STYLE_FATBIN_MAGIC 0x1EE55A01U
#define FATBIN_VERSION 0x0001U
struct fatbinary_ALIGN_(8) fatBinaryHeader
{
unsigned int magic; // FATBIN_MAGIC
unsigned short version; // FATBIN_VERSION
unsigned short headerSize;
unsigned long long int fatSize; // size of the entire fat binary excluding this header
};
there's binary data after header
*/
const FATBINC_MAGIC: c_uint = 0x466243B1;
const FATBINC_VERSION: c_uint = 0x1;

View file

@ -10,3 +10,5 @@ proc-macro2 = "1.0.89"
quote = "1.0"
prettyplease = "0.2.25"
rustc-hash = "1.1.0"
libloading = "0.8"
cuda_types = { path = "../cuda_types" }

View file

@ -0,0 +1,142 @@
// GENERATED AUTOMATICALLY BY /local/andrzej/dev/zluda/zluda_blaslt/build/decompile_internal.py. DO NOT EDIT MANUALLY
extern "C" {
#define undefined void
#define undefined1 unsigned char
#define undefined4 unsigned int
#define uint unsigned int
#define undefined8 unsigned long long
#define ulong unsigned long long
undefined4 cublasLtShutdownCtx(long param_1);
int cublasLtCtxInit(long param_1);
undefined8 cublasLtHeuristicLutSerializeEntry(undefined8 param_1,undefined8 *param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 *param_12,undefined8 *param_13,ulong param_14,undefined8 *param_15);
undefined8 cublasLtLegacyGemmACC(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
undefined8 cublasLtLegacyGemmBII(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
undefined8 cublasLtLegacyGemmBSS(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
undefined8 cublasLtLegacyGemmCCC(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
void cublasLtLegacyGemmUtilizationCCC(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined4 param_10,undefined4 param_11,undefined8 param_12,undefined8 param_13);
undefined8 cublasLtLegacyGemmDDD(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
void cublasLtLegacyGemmUtilizationDDD(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined4 param_10,undefined4 param_11,undefined8 param_12,undefined8 param_13);
undefined8 cublasLtLegacyGemmHHH(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
undefined8 cublasLtLegacyGemmHSS(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
undefined8 cublasLtLegacyGemmHSH(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
undefined8 cublasLtLegacyGemmSSS(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
undefined8 cublasLtLegacyGemmTSS(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
undefined8 cublasLtLegacyGemmTST(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
undefined8 cublasLtLegacyGemmZZZ(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
void cublasLtLegacyGemmUtilizationZZZ(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined4 param_10,undefined4 param_11,undefined8 param_12,undefined8 param_13);
undefined4 cublasLtAlgoCharacteristicGetAttribute(undefined8 param_1,long param_2,int param_3,int param_4,int param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8);
undefined8 cublasLtMatmulAlgoConfigGetAttributeRange(void);
undefined8 cublasLtHHHMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
undefined4 cublasLtHHHMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
undefined4 cublasLtHHHMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
undefined4 cublasLtHHHMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
undefined4 cublasLtHHHMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
undefined4 cublasLtHHHMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
undefined8 cublasLtHSHMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
undefined4 cublasLtHSHMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
undefined4 cublasLtHSHMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
undefined4 cublasLtHSHMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
undefined4 cublasLtHSHMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
undefined4 cublasLtHSHMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
undefined8 cublasLtSSSMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
undefined4 cublasLtSSSMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
undefined4 cublasLtSSSMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
undefined4 cublasLtSSSMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
undefined4 cublasLtSSSMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
undefined4 cublasLtSSSMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
undefined8 cublasLtDDDMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
undefined4 cublasLtDDDMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
undefined4 cublasLtDDDMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
undefined4 cublasLtDDDMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
undefined4 cublasLtDDDMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
undefined4 cublasLtDDDMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
undefined8 cublasLtBSSMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
undefined4 cublasLtBSSMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
undefined4 cublasLtBSSMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
undefined4 cublasLtBSSMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
undefined4 cublasLtBSSMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
undefined4 cublasLtBSSMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
undefined8 cublasLtCCCMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
undefined4 cublasLtCCCMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
undefined4 cublasLtCCCMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
undefined4 cublasLtCCCMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
undefined4 cublasLtCCCMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
undefined4 cublasLtCCCMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
undefined8 cublasLtZZZMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
undefined4 cublasLtZZZMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
undefined4 cublasLtZZZMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
undefined4 cublasLtZZZMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
undefined4 cublasLtZZZMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
undefined4 cublasLtZZZMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
undefined8 cublasLtACCMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
undefined4 cublasLtACCMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
undefined4 cublasLtACCMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
undefined4 cublasLtACCMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
undefined4 cublasLtACCMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
undefined4 cublasLtACCMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
undefined8 cublasLtBIIMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
undefined4 cublasLtBIIMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
undefined4 cublasLtBIIMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
undefined4 cublasLtBIIMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
undefined4 cublasLtBIIMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
undefined4 cublasLtBIIMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
undefined8 cublasLtBSBMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
undefined4 cublasLtBSBMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
undefined4 cublasLtBSBMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
undefined4 cublasLtBSBMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
undefined4 cublasLtBSBMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
undefined4 cublasLtBSBMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
undefined8 cublasLtHSSMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
undefined4 cublasLtHSSMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
undefined4 cublasLtHSSMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
undefined4 cublasLtHSSMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
undefined4 cublasLtHSSMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
undefined4 cublasLtHSSMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
undefined8 cublasLtKCCMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
undefined4 cublasLtKCCMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
undefined4 cublasLtKCCMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
undefined4 cublasLtKCCMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
undefined4 cublasLtKCCMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
undefined4 cublasLtKCCMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
undefined8 cublasLtKCKMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
undefined4 cublasLtKCKMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
undefined4 cublasLtKCKMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
undefined4 cublasLtKCKMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
undefined4 cublasLtKCKMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
undefined4 cublasLtKCKMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
undefined8 cublasLtTSSMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
undefined4 cublasLtTSSMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
undefined4 cublasLtTSSMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
undefined4 cublasLtTSSMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
undefined4 cublasLtTSSMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
undefined4 cublasLtTSSMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
undefined8 cublasLtTSTMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
undefined4 cublasLtTSTMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
undefined4 cublasLtTSTMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
undefined4 cublasLtTSTMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
undefined4 cublasLtTSTMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
undefined4 cublasLtTSTMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
undefined8 cublasLtVCCMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
undefined4 cublasLtVCCMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
undefined4 cublasLtVCCMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
undefined4 cublasLtVCCMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
undefined4 cublasLtVCCMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
undefined4 cublasLtVCCMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
undefined8 cublasLtVCVMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
undefined4 cublasLtVCVMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
undefined4 cublasLtVCVMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
undefined4 cublasLtVCVMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
undefined4 cublasLtVCVMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
undefined4 cublasLtVCVMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
undefined * cublasLt_for_cublas_BII(void);
undefined * cublasLt_for_cublas_BSS(void);
undefined * cublasLt_for_cublas_CCC(void);
undefined * cublasLt_for_cublas_DDD(void);
undefined * cublasLt_for_cublas_HHH(void);
undefined * cublasLt_for_cublas_HSH(void);
undefined * cublasLt_for_cublas_HSS(void);
undefined * cublasLt_for_cublas_SSS(void);
undefined * cublasLt_for_cublas_TSS(void);
undefined * cublasLt_for_cublas_TST(void);
undefined * cublasLt_for_cublas_ZZZ(void);
}

View file

@ -0,0 +1,40 @@
# Modified from here: https://github.com/galoget/ghidra-headless-scripts/
# Usage: analyzeHeadless <PROJECT_PATH> cublaslt -import /usr/local/cuda/lib64/libcublasLt.so -scriptPath . -postScript decompile_cublaslt_internal.py
from ghidra.app.decompiler import DecompInterface
from ghidra.util.task import ConsoleTaskMonitor
EXTERNAL_HEADER = "/usr/local/cuda/include/cublasLt.h"
with open(EXTERNAL_HEADER, 'r'):
header_content = open(EXTERNAL_HEADER, 'r').read()
decompinterface = DecompInterface()
decompinterface.openProgram(currentProgram)
functions = currentProgram.getFunctionManager().getFunctions(True)
blaslt_functions = []
monitor = ConsoleTaskMonitor()
with open("cublasLt_internal.h", "w") as output_file:
output_file.write("// GENERATED AUTOMATICALLY BY decompile_cublaslt_internal.py. DO NOT EDIT MANUALLY\n")
output_file.write("extern \"C\" {\n")
output_file.write(" #define undefined void")
output_file.write(" #define undefined1 unsigned char")
output_file.write(" #define undefined4 unsigned int")
output_file.write(" #define uint unsigned int")
output_file.write(" #define undefined8 unsigned long long")
output_file.write(" #define ulong unsigned long long")
for function in functions:
function_name = function.getName()
if not function_name.startswith("cublasLt"):
continue
if function_name.format("{}(") in header_content:
continue
decompile_results = decompinterface.decompileFunction(function, 0, monitor)
signature = decompile_results.getDecompiledFunction().getSignature()
# Ghidra disssasembles cublasLtShutdownCtx to return void, but
# looking at the assembly I'm convinced it returns a value
# On the other hand, cublasLtLegacyGemmUtilization* seem to return void
# TODO: fail if there is a new void-return function
if function_name == "cublasLtShutdownCtx":
signature = signature.replace("void", "undefined4")
output_file.write(" CUBLASWINAPI {}\n".format(signature))
output_file.write("}\n")

View file

@ -2,7 +2,8 @@ use proc_macro2::Span;
use quote::{format_ident, quote, ToTokens};
use rustc_hash::{FxHashMap, FxHashSet};
use std::{
borrow::Cow, collections::hash_map, fs::File, io::Write, iter, path::PathBuf, str::FromStr,
borrow::Cow, cmp, collections::hash_map, ffi::CString, fs::File, io::Write, iter, mem,
path::PathBuf, ptr, str::FromStr,
};
use syn::{
parse_quote, punctuated::Punctuated, visit_mut::VisitMut, Abi, Fields, FieldsUnnamed, FnArg,
@ -10,19 +11,133 @@ use syn::{
PathArguments, PathSegment, Signature, Type, TypePath, UseTree,
};
// Source: https://developer.nvidia.com/cuda-toolkit-archive
static KNOWN_CUDA_VERSIONS: &[&'static str] = &[
"12.8.1", "12.8.0", "12.6.3", "12.6.2", "12.6.1", "12.6.0", "12.5.1", "12.5.0", "12.4.1",
"12.4.0", "12.3.2", "12.3.1", "12.3.0", "12.2.2", "12.2.1", "12.2.0", "12.1.1", "12.1.0",
"12.0.1", "12.0.0", "11.8.0", "11.7.1", "11.7.0", "11.6.2", "11.6.1", "11.6.0", "11.5.2",
"11.5.1", "11.5.0", "11.4.4", "11.4.3", "11.4.2", "11.4.1", "11.4.0", "11.3.1", "11.3.0",
"11.2.2", "11.2.1", "11.2.0", "11.1.1", "11.1.0", "11.0.3", "11.0.2", "11.0.1", "11.0.0",
"10.2", "10.1", "10.0", "9.2", "9.1", "9.0", "8.0", "7.5", "7.0", "6.5", "6.0", "5.5", "5.0",
"4.2", "4.1", "4.0", "3.2", "3.1", "3.0", "2.3", "2.2", "2.1", "2.0", "1.1", "1.0",
];
fn main() {
let crate_root = PathBuf::from_str(env!("CARGO_MANIFEST_DIR")).unwrap();
generate_hip_runtime(
&crate_root,
&["..", "ext", "hip_runtime-sys", "src", "lib.rs"],
);
generate_cuda(&crate_root);
let cuda_functions = generate_cuda(&crate_root);
generate_process_address_table(&crate_root, cuda_functions);
generate_ml(&crate_root);
generate_cublas(&crate_root);
generate_cublaslt(&crate_root);
generate_cudnn(&crate_root);
generate_cufft(&crate_root);
generate_cusparse(&crate_root);
generate_cudnn(&crate_root);
}
fn generate_process_address_table(crate_root: &PathBuf, mut cuda_fns: Vec<Ident>) {
cuda_fns.sort_unstable();
let mut versions = KNOWN_CUDA_VERSIONS
.iter()
.copied()
.map(cuda_numeric_version)
.collect::<Vec<_>>();
versions.sort_unstable();
let library =
unsafe { libloading::Library::new("/usr/lib/x86_64-linux-gnu/libcuda.so.1") }.unwrap();
let cu_get_proc_address = unsafe {
library.get::<unsafe extern "system" fn(
symbol: *const ::core::ffi::c_char,
pfn: *mut *mut ::core::ffi::c_void,
cudaVersion: ::core::ffi::c_int,
flags: cuda_types::cuda::cuuint64_t,
symbolStatus: *mut cuda_types::cuda::CUdriverProcAddressQueryResult,
) -> cuda_types::cuda::CUresult>(b"cuGetProcAddress_v2\0")
}
.unwrap();
let mut result = Vec::new();
for fn_ in cuda_fns {
let mut known_variants = FxHashMap::default();
for version in std::iter::successors(Some(1), |x| Some(x + 1)) {
let map_len = known_variants.len();
for thread_suffix in ["", "_ptds", "_ptsz"] {
let version = if version == 1 {
"".to_string()
} else {
format!("_v{}", version)
};
let fn_ = format!("{}{}{}", fn_, version, thread_suffix);
match unsafe { library.get::<*mut std::ffi::c_void>(fn_.as_bytes()) } {
Ok(symbol) => {
known_variants.insert(unsafe { symbol.into_raw() }.as_raw_ptr(), fn_);
}
Err(_) => {}
}
}
if known_variants.len() == map_len {
break;
}
}
let fn_ = fn_.to_string();
let symbol = CString::new(fn_.clone()).unwrap();
for flag in [
cuda_types::cuda::CUdriverProcAddress_flags::CU_GET_PROC_ADDRESS_DEFAULT,
cuda_types::cuda::CUdriverProcAddress_flags::CU_GET_PROC_ADDRESS_LEGACY_STREAM,
cuda_types::cuda::CUdriverProcAddress_flags::CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM,
] {
let mut breakpoints = Vec::new();
let mut last_result = None;
for version in versions.iter().copied() {
let mut result = ptr::null_mut();
let mut status = unsafe { mem::zeroed() };
match unsafe { (cu_get_proc_address)(symbol.as_ptr(), &mut result, version, flag.0 as _, &mut status) } {
Ok(()) => {}
Err(cuda_types::cuda::CUerror::NOT_FOUND) => {
continue;
}
Err(e) => panic!("{}", e.0)
}
if status != cuda_types::cuda::CUdriverProcAddressQueryResult::CU_GET_PROC_ADDRESS_SUCCESS {
continue;
}
if Some(result) != last_result {
last_result = Some(result);
breakpoints.push((version, known_variants.get(&result).unwrap().clone()));
}
}
breakpoints.sort_unstable_by_key(|(version, _)| cmp::Reverse(*version));
if !breakpoints.is_empty() {
result.push((fn_.clone(), flag.0, breakpoints));
}
}
}
let mut path = crate_root.clone();
path.extend(["..", "zluda_bindgen", "src", "process_table.rs"]);
let mut file = File::create(path).unwrap();
writeln!(file, "match (name, flag) {{").unwrap();
for (fn_, version, breakpoints) in result {
writeln!(file, " (b\"{fn_}\", {version}) => {{").unwrap();
for (version, name) in breakpoints {
writeln!(file, " if version >= {version} {{").unwrap();
writeln!(file, " return {name} as _;").unwrap();
writeln!(file, " }}").unwrap();
}
writeln!(file, " usize::MAX as _").unwrap();
writeln!(file, " }}").unwrap();
}
writeln!(file, " _ => 0usize as _").unwrap();
writeln!(file, "}}").unwrap();
}
fn cuda_numeric_version(version: &str) -> i32 {
let mut version = version.split('.').map(|s| s.parse::<i32>().unwrap());
let major = version.next().unwrap();
let minor = version.next().unwrap();
let patch = version.next().unwrap_or(0);
major * 1000 + minor * 10 + patch
}
fn generate_cufft(crate_root: &PathBuf) {
@ -49,10 +164,36 @@ fn generate_cufft(crate_root: &PathBuf) {
&module,
);
generate_types_library(
Some(LibraryOverride::CuFft),
&crate_root,
&["..", "cuda_types", "src", "cufft.rs"],
&module,
)
);
generate_display_perflib(
&crate_root,
&["..", "format", "src", "format_generated_fft.rs"],
&["cuda_types", "cufft"],
&module,
);
}
fn get_functions(module: syn::File) -> Vec<Ident> {
module
.items
.iter()
.flat_map(|item| match item {
Item::ForeignMod(extern_) => {
extern_
.items
.iter()
.filter_map(|foreign_item| match foreign_item {
ForeignItem::Fn(fn_) => Some(fn_.sig.ident.clone()),
_ => None,
})
}
_ => unreachable!(),
})
.collect::<Vec<_>>()
}
fn generate_cusparse(crate_root: &PathBuf) {
@ -61,6 +202,7 @@ fn generate_cusparse(crate_root: &PathBuf) {
.allowlist_type("^cusparse.*")
.allowlist_type(".*Info_t$")
.allowlist_type(".*Info$")
.blocklist_type("^cudaAsync.*")
.allowlist_function("^cusparse.*")
.allowlist_var("^CUSPARSE_.*")
.must_use_type("cusparseStatus_t")
@ -77,10 +219,17 @@ fn generate_cusparse(crate_root: &PathBuf) {
&module,
);
generate_types_library(
None,
&crate_root,
&["..", "cuda_types", "src", "cusparse.rs"],
&module,
)
);
generate_display_perflib(
&crate_root,
&["..", "format", "src", "format_generated_sparse.rs"],
&["cuda_types", "cusparse"],
&module,
);
}
fn generate_cudnn(crate_root: &PathBuf) {
@ -135,6 +284,12 @@ fn generate_cudnn(crate_root: &PathBuf) {
&["..", "cuda_base", "src", "cudnn9.rs"],
&cudnn9_module,
);
generate_display_perflib(
&crate_root,
&["..", "format", "src", "format_generated_dnn9.rs"],
&["cuda_types", "cudnn9"],
&cudnn9_module,
);
}
// This code splits types (and constants) into one of:
@ -482,14 +637,35 @@ fn generate_cublas(crate_root: &PathBuf) {
&module,
);
generate_types_library(
None,
&crate_root,
&["..", "cuda_types", "src", "cublas.rs"],
&module,
)
);
generate_display_perflib(
&crate_root,
&["..", "format", "src", "format_generated_blas.rs"],
&["cuda_types", "cublas"],
&module,
);
}
fn remove_type(module: &mut syn::File, type_name: &str) {
let items = std::mem::replace(&mut module.items, Vec::new());
let items = items
.into_iter()
.filter_map(|item| match item {
Item::Enum(enum_) if enum_.ident == type_name => None,
Item::Struct(struct_) if struct_.ident == type_name => None,
Item::Impl(impl_) if impl_.self_ty.to_token_stream().to_string() == type_name => None,
_ => Some(item),
})
.collect();
module.items = items;
}
fn generate_cublaslt(crate_root: &PathBuf) {
let cublas_header = new_builder()
let cublaslt_header = new_builder()
.header("/usr/local/cuda/include/cublasLt.h")
.allowlist_type("^cublas.*")
.allowlist_function("^cublasLt.*")
@ -500,21 +676,58 @@ fn generate_cublaslt(crate_root: &PathBuf) {
.generate()
.unwrap()
.to_string();
let module: syn::File = syn::parse_str(&cublas_header).unwrap();
let cublaslt_internal_header = new_builder()
.header_contents(
"cublasLt_internal.h",
include_str!("../build/cublasLt_internal.h"),
)
.clang_args(["-x", "c++"])
.override_abi(bindgen::Abi::System, ".*")
.generate()
.unwrap()
.to_string()
// Simplest and dumbest way to do this
.replace("pub fn", "fn")
.replace(");", ") -> ();");
let module_blaslt_internal: syn::File = syn::parse_str(&cublaslt_internal_header).unwrap();
std::fs::write(
crate_root
.join("..")
.join("cuda_base")
.join("src")
.join("cublaslt_internal.rs"),
cublaslt_internal_header,
)
.unwrap();
let mut module_blas: syn::File = syn::parse_str(&cublaslt_header).unwrap();
remove_type(&mut module_blas, "cublasStatus_t");
generate_functions(
&crate_root,
"cublaslt",
&["..", "cuda_base", "src", "cublaslt.rs"],
&module,
&module_blas,
);
generate_types_library(
Some(LibraryOverride::CuBlasLt),
&crate_root,
&["..", "cuda_types", "src", "cublaslt.rs"],
&module,
)
&module_blas,
);
generate_display_perflib(
&crate_root,
&["..", "format", "src", "format_generated_blaslt.rs"],
&["cuda_types", "cublaslt"],
&module_blas,
);
generate_display_perflib(
&crate_root,
&["..", "format", "src", "format_generated_blaslt_internal.rs"],
&["cuda_types", "cublaslt"],
&module_blaslt_internal,
);
}
fn generate_cuda(crate_root: &PathBuf) {
fn generate_cuda(crate_root: &PathBuf) -> Vec<Ident> {
let cuda_header = new_builder()
.header_contents("cuda_wrapper.h", include_str!("../build/cuda_wrapper.h"))
.allowlist_type("^CU.*")
@ -537,23 +750,24 @@ fn generate_cuda(crate_root: &PathBuf) {
.unwrap()
.to_string();
let module: syn::File = syn::parse_str(&cuda_header).unwrap();
generate_functions(
let cuda_functions = get_functions(generate_functions(
&crate_root,
"cuda",
&["..", "cuda_base", "src", "cuda.rs"],
&module,
);
));
generate_types_cuda(
&crate_root,
&["..", "cuda_types", "src", "cuda.rs"],
&module,
);
generate_display(
generate_display_cuda(
&crate_root,
&["..", "zluda_dump", "src", "format_generated.rs"],
&["..", "format", "src", "format_generated.rs"],
&["cuda_types", "cuda"],
&module,
)
);
cuda_functions
}
fn generate_ml(crate_root: &PathBuf) {
@ -595,24 +809,44 @@ fn generate_ml(crate_root: &PathBuf) {
&module,
);
generate_types_library(
None,
&crate_root,
&["..", "cuda_types", "src", "nvml.rs"],
&module,
);
}
fn generate_types_library(crate_root: &PathBuf, path: &[&str], module: &syn::File) {
fn generate_types_library(
override_: Option<LibraryOverride>,
crate_root: &PathBuf,
path: &[&str],
module: &syn::File,
) {
let module = generate_types_library_impl(module);
let mut output = crate_root.clone();
output.extend(path);
let text = prettyplease::unparse(&module)
.replace("self::cudaDataType", "super::cuda::cudaDataType")
// complex as used by cuFFT
.replace(" cuComplex", " super::cuda::cuComplex")
.replace(" cuDoubleComplex", " super::cuda::cuDoubleComplex");
let mut text =
prettyplease::unparse(&module).replace("self::cudaDataType", "super::cuda::cudaDataType");
match override_ {
None => {}
Some(LibraryOverride::CuBlasLt) => {
text = text.replace(" cublasStatus_t", " super::cublas::cublasStatus_t");
}
Some(LibraryOverride::CuFft) => {
text = text
.replace(" cuComplex", " super::cuda::cuComplex")
.replace(" cuDoubleComplex", " super::cuda::cuDoubleComplex");
}
}
write_rust_to_file(output, &text)
}
#[derive(Clone, Copy)]
enum LibraryOverride {
CuBlasLt,
CuFft,
}
fn generate_types_library_impl(module: &syn::File) -> syn::File {
let known_reexports: Punctuated<syn::Item, syn::parse::Nothing> = parse_quote! {
pub type __half = u16;
@ -701,7 +935,12 @@ fn add_send_sync(items: &mut Vec<Item>, arg: &[&str]) {
}
}
fn generate_functions(output: &PathBuf, submodule: &str, path: &[&str], module: &syn::File) {
fn generate_functions(
output: &PathBuf,
submodule: &str,
path: &[&str],
module: &syn::File,
) -> syn::File {
let fns_ = module.items.iter().filter_map(|item| match item {
Item::ForeignMod(extern_) => match &*extern_.items {
[ForeignItem::Fn(fn_)] => Some(fn_),
@ -709,18 +948,53 @@ fn generate_functions(output: &PathBuf, submodule: &str, path: &[&str], module:
},
_ => None,
});
/*
let prelude = match submodule {
"cublaslt" => Some(quote! {
use cuda_types::cublas::cublasStatus_t;
}),
"cublas" => Some(quote! {
use cuda_types::cublas::cublasStatus_t;
}),
_ => None,
};
*/
let mut module: syn::File = parse_quote! {
extern "system" {
#(#fns_)*
}
};
let submodule = Ident::new(submodule, Span::call_site());
syn::visit_mut::visit_file_mut(&mut PrependCudaPath { module: submodule }, &mut module);
syn::visit_mut::visit_file_mut(
&mut PrependCudaPath {
module: vec![Ident::new("cuda_types", Span::call_site()), submodule],
},
&mut module,
);
syn::visit_mut::visit_file_mut(&mut RemoveVisibility, &mut module);
syn::visit_mut::visit_file_mut(&mut ExplicitReturnType, &mut module);
let mut output = output.clone();
output.extend(path);
write_rust_to_file(output, &prettyplease::unparse(&module))
write_rust_to_file(output, &prettyplease::unparse(&module));
module
/*
module
.items
.iter()
.flat_map(|item| match item {
Item::ForeignMod(extern_) => {
extern_
.items
.iter()
.filter_map(|foreign_item| match foreign_item {
ForeignItem::Fn(fn_) => Some(fn_.sig.ident.clone()),
_ => None,
})
}
_ => unreachable!(),
})
.collect::<Vec<_>>()
*/
}
fn generate_types_cuda(output: &PathBuf, path: &[&str], module: &syn::File) {
@ -892,17 +1166,24 @@ impl VisitMut for FixAbi {
}
struct PrependCudaPath {
module: Ident,
module: Vec<Ident>,
}
impl VisitMut for PrependCudaPath {
fn visit_type_path_mut(&mut self, type_: &mut TypePath) {
if type_.path.segments.len() == 1 {
match &*type_.path.segments[0].ident.to_string() {
"usize" | "u32" | "i32" | "u64" | "i64" | "f64" | "f32" | "FILE" => {}
"usize" | "u32" | "i32" | "u64" | "i64" | "f64" | "f32" => {}
"FILE" => {
*type_ = parse_quote! { cuda_types :: FILE };
}
"cublasStatus_t" => {
let module = self.module.iter().rev().skip(1).rev();
*type_ = parse_quote! { #(#module :: )* cublas :: #type_ };
}
_ => {
let module = &self.module;
*type_ = parse_quote! { cuda_types :: #module :: #type_ };
*type_ = parse_quote! { #(#module :: )* #type_ };
}
}
}
@ -927,7 +1208,7 @@ impl VisitMut for ExplicitReturnType {
}
}
fn generate_display(
fn generate_display_cuda(
output: &PathBuf,
path: &[&str],
types_crate: &[&'static str],
@ -954,8 +1235,8 @@ fn generate_display(
"CUeglFrame_st",
"CUdevResource_st",
"CUlaunchAttribute_st",
"CUlaunchConfig_st",
"CUmemcpy3DOperand_st",
"CUlaunchConfig_st",
];
let ignore_functions = [
"cuGLGetDevices",
@ -984,7 +1265,7 @@ fn generate_display(
let mut items = module
.items
.iter()
.filter_map(|i| cuda_derive_display_trait_for_item(&mut derive_state, i))
.filter_map(|i| cuda_derive_display_trait_for_item(types_crate, &mut derive_state, i))
.collect::<Vec<_>>();
items.push(curesult_display_trait(&derive_state));
let mut output = output.clone();
@ -999,6 +1280,50 @@ fn generate_display(
);
}
fn generate_display_perflib(
output: &PathBuf,
path: &[&str],
types_crate: &[&'static str],
module: &syn::File,
) {
let ignore_types = [
"cublasLtMatrixLayoutOpaque_t",
"cublasLtMatmulDescOpaque_t",
"cublasLtMatrixTransformDescOpaque_t",
"cublasLtMatmulPreferenceOpaque_t",
"cublasLogCallback",
"cudnnBackendDescriptor_t",
"cublasLtLoggerCallback_t",
"cusparseLoggerCallback_t",
];
let ignore_functions = [];
let count_selectors = [
("cudnnBackendSetAttribute", 4, 3),
("cudnnBackendGetAttribute", 5, 4),
];
let mut derive_state = DeriveDisplayState::new(
&ignore_types,
types_crate,
&ignore_functions,
&count_selectors,
);
let items = module
.items
.iter()
.filter_map(|i| cuda_derive_display_trait_for_item(types_crate, &mut derive_state, i))
.collect::<Vec<_>>();
let mut output = output.clone();
output.extend(path);
write_rust_to_file(
output,
&prettyplease::unparse(&syn::File {
shebang: None,
attrs: Vec::new(),
items,
}),
);
}
struct DeriveDisplayState<'a> {
types_crate: Path,
ignore_types: FxHashSet<Ident>,
@ -1057,13 +1382,17 @@ impl<'a> DeriveDisplayState<'a> {
}
fn cuda_derive_display_trait_for_item<'a>(
path: &[&str],
state: &mut DeriveDisplayState<'a>,
item: &'a Item,
) -> Option<syn::Item> {
let path_prefix = &state.types_crate;
let path_prefix_iter = iter::repeat(&path_prefix);
let mut prepend_path = PrependCudaPath {
module: Ident::new("cuda", Span::call_site()),
module: path
.iter()
.map(|segment| Ident::new(segment, Span::call_site()))
.collect(),
};
match item {
Item::Const(const_) => {
@ -1101,14 +1430,14 @@ fn cuda_derive_display_trait_for_item<'a>(
if i != 0 {
writer.write_all(b", ")?;
}
crate::format::CudaDisplay::write(unsafe { &*#name.add(i as usize) }, #original_fn_name, arg_idx, writer)?;
crate::CudaDisplay::write(unsafe { &*#name.add(i as usize) }, #original_fn_name, arg_idx, writer)?;
}
writer.write_all(b"]")?;
}
} else {
quote! {
writer.write_all(concat!(stringify!(#name), ": ").as_bytes())?;
crate::format::CudaDisplay::write(&#name, #original_fn_name, arg_idx, writer)?;
crate::CudaDisplay::write(&#name, #original_fn_name, arg_idx, writer)?;
}
}
});
@ -1157,7 +1486,7 @@ fn cuda_derive_display_trait_for_item<'a>(
let enum_iter = iter::repeat(&item_struct.ident);
let variants = state.enums.get(&item_struct.ident).unwrap().iter();
Some(parse_quote! {
impl crate::format::CudaDisplay for #path_prefix :: #enum_ {
impl crate::CudaDisplay for #path_prefix :: #enum_ {
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
match self {
#(& #path_prefix_iter :: #enum_iter :: #variants => writer.write_all(stringify!(#variants).as_bytes()),)*
@ -1184,13 +1513,13 @@ fn cuda_derive_display_trait_for_item<'a>(
None => return None,
};
Some(parse_quote! {
impl crate::format::CudaDisplay for #path_prefix :: #struct_ {
impl crate::CudaDisplay for #path_prefix :: #struct_ {
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
writer.write_all(concat!("{ ", stringify!(#first_field), ": ").as_bytes())?;
crate::format::CudaDisplay::write(&self.#first_field, "", 0, writer)?;
crate::CudaDisplay::write(&self.#first_field, "", 0, writer)?;
#(
writer.write_all(concat!(", ", stringify!(#rest_of_fields), ": ").as_bytes())?;
crate::format::CudaDisplay::write(&self.#rest_of_fields, "", 0, writer)?;
crate::CudaDisplay::write(&self.#rest_of_fields, "", 0, writer)?;
)*
writer.write_all(b" }")
}
@ -1199,7 +1528,7 @@ fn cuda_derive_display_trait_for_item<'a>(
}
Fields::Unnamed(FieldsUnnamed { ref unnamed, .. }) if unnamed.len() == 1 => {
Some(parse_quote! {
impl crate::format::CudaDisplay for #path_prefix :: #struct_ {
impl crate::CudaDisplay for #path_prefix :: #struct_ {
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
write!(writer, "{:p}", self.0)
}
@ -1218,9 +1547,13 @@ fn cuda_derive_display_trait_for_item<'a>(
Type::Ptr(_) => {
let type_ = &item_type.ident;
Some(parse_quote! {
impl crate::format::CudaDisplay for #path_prefix :: #type_ {
impl crate::CudaDisplay for #path_prefix :: #type_ {
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
write!(writer, "{:p}", *self)
if self.is_null() {
writer.write_all(b"NULL")
} else {
write!(writer, "{:p}", *self)
}
}
}
})
@ -1234,7 +1567,7 @@ fn cuda_derive_display_trait_for_item<'a>(
syn::GenericArgument::Type(Type::BareFn(_)) => {
let type_ = &item_type.ident;
return Some(parse_quote! {
impl crate::format::CudaDisplay for #path_prefix :: #type_ {
impl crate::CudaDisplay for #path_prefix :: #type_ {
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
write!(writer, "{:p}", unsafe { std::mem::transmute::<#path_prefix :: #type_, *mut ::std::ffi::c_void>(*self) })
}
@ -1280,7 +1613,7 @@ fn curesult_display_trait(derive_state: &DeriveDisplayState) -> syn::Item {
})
});
parse_quote! {
impl crate::format::CudaDisplay for cuda_types::cuda::CUresult {
impl crate::CudaDisplay for cuda_types::cuda::CUresult {
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
match self {
Ok(()) => writer.write_all(b"CUDA_SUCCESS"),

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,4 @@
use cuda_types::cublaslt::cublasStatus_t;
use cuda_types::cublas::cublasStatus_t;
#[cfg(debug_assertions)]
pub(crate) fn unimplemented() -> cublasStatus_t {
@ -12,14 +12,14 @@ pub(crate) fn unimplemented() -> cublasStatus_t {
#[allow(non_snake_case)]
pub(crate) fn cublasLtGetStatusName(
_status: cuda_types::cublaslt::cublasStatus_t,
_status: cuda_types::cublas::cublasStatus_t,
) -> *const ::core::ffi::c_char {
todo!()
}
#[allow(non_snake_case)]
pub(crate) fn cublasLtGetStatusString(
_status: cuda_types::cublaslt::cublasStatus_t,
_status: cuda_types::cublas::cublasStatus_t,
) -> *const ::core::ffi::c_char {
todo!()
}

View file

@ -1,7 +1,5 @@
mod r#impl;
pub enum FILE { }
macro_rules! unimplemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(

View file

@ -1,39 +1,46 @@
[package]
name = "zluda_dump"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2021"
[lib]
name = "zluda_dump"
crate-type = ["cdylib"]
[dependencies]
ptx = { path = "../ptx" }
ptx_parser = { path = "../ptx_parser" }
lz4-sys = "1.9"
regex = "1.4"
dynasm = "1.2"
dynasmrt = "1.2"
lazy_static = "1.4"
# we don't need elf32, but goblin has a bug where elf64 does not build without elf32
goblin = { version = "0.4", default-features = false, features = ["elf64", "elf32", "archive"] }
paste = "1.0"
cuda_base = { path = "../cuda_base" }
cuda_types = { path = "../cuda_types" }
[target.'cfg(windows)'.dependencies]
winapi = { version = "0.3", features = ["libloaderapi", "debugapi", "std"] }
wchar = "0.6"
detours-sys = { path = "../detours-sys" }
[target.'cfg(not(windows))'.dependencies]
libc = "0.2"
[package.metadata.zluda]
linux_symlinks = [
"dump/libcuda.so",
"dump/libcuda.so.1",
"dump_nvidia/libcuda.so",
"dump_nvidia/libcuda.so.1",
]
[package]
name = "zluda_dump"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2021"
[lib]
name = "zluda_dump"
crate-type = ["cdylib"]
[dependencies]
ptx = { path = "../ptx" }
ptx_parser = { path = "../ptx_parser" }
zluda_dump_common = { path = "../zluda_dump_common" }
format = { path = "../format" }
dark_api = { path = "../dark_api" }
lz4-sys = "1.9"
regex = "1.4"
dynasm = "1.2"
dynasmrt = "1.2"
# we don't need elf32, but goblin has a bug where elf64 does not build without elf32
goblin = { version = "0.4", default-features = false, features = ["elf64", "elf32", "archive"] }
paste = "1.0"
cuda_base = { path = "../cuda_base" }
cuda_types = { path = "../cuda_types" }
parking_lot = "0.12.3"
rustc-hash = "1.1.0"
cglue = "0.3.5"
zstd-safe = { version = "7.2.4", features = ["std"] }
unwrap_or = "1.0.1"
[target.'cfg(windows)'.dependencies]
winapi = { version = "0.3", features = ["libloaderapi", "debugapi", "std"] }
wchar = "0.6"
detours-sys = { path = "../detours-sys" }
[target.'cfg(not(windows))'.dependencies]
libc = "0.2"
[package.metadata.zluda]
linux_symlinks = [
"dump/libcuda.so",
"dump/libcuda.so.1",
"dump_nvidia/libcuda.so",
"dump_nvidia/libcuda.so.1",
]

View file

@ -1,3 +0,0 @@
grep -E '^cu.*' log.txt | sed 's/([^)]*)//g' | sort | uniq > uniq_host.txt
cat *.log | grep "^Unrecognized s" | grep -Eo '`([^`]*)`' | sed -E 's/^`((@\w+ )?[^[:space:]]*).*`/\1/' | sort | uniq > uniq_statements.txt
cat *.log | grep "^Unrecognized d" | grep -Eo '`([^`]*)`' | sed -E 's/^`([^`]*)`/\1/' | sort | uniq > uniq_directives.txt

View file

@ -1,25 +1,120 @@
use crate::format;
use crate::{log, os, trace::StateTracker};
use crate::{log::UInt, GlobalDelayedState};
use crate::os;
use crate::{CudaFunctionName, ErrorEntry};
use cuda_types::cuda::*;
use std::borrow::Cow;
use rustc_hash::FxHashMap;
use std::cell::RefMut;
use std::hash::Hash;
use std::{
collections::{hash_map, HashMap},
ffi::c_void,
mem,
os::raw::{c_int, c_uint, c_ulong, c_ushort},
ptr, slice,
};
use std::{collections::hash_map, ffi::c_void, mem};
pub(crate) struct DarkApiState {
pub(crate) struct DarkApiState2 {
// Key is Box<CUuuid, because thunk reporting unknown export table needs a
// stablememory location for the guid
overrides: HashMap<Box<CUuuidWrapper>, Vec<*const c_void>>,
original: OriginalExports,
// stable memory location for the guid
pub(crate) overrides: FxHashMap<Box<CUuuidWrapper>, (*const *const c_void, Vec<*const c_void>)>,
}
unsafe impl Send for DarkApiState2 {}
unsafe impl Sync for DarkApiState2 {}
impl DarkApiState2 {
pub(crate) fn new() -> Self {
DarkApiState2 {
overrides: FxHashMap::default(),
}
}
pub(crate) fn override_export_table(
&mut self,
known_exports: &::dark_api::cuda::CudaDarkApiGlobalTable,
original_export_table: *const *const c_void,
guid: &CUuuid_st,
) -> (*const *const c_void, Option<ErrorEntry>) {
let entry = match self.overrides.entry(Box::new(CUuuidWrapper(*guid))) {
hash_map::Entry::Occupied(entry) => {
let (_, override_table) = entry.get();
return (override_table.as_ptr(), None);
}
hash_map::Entry::Vacant(entry) => entry,
};
let mut error = None;
let byte_size: usize = unsafe { *(original_export_table.cast::<usize>()) };
// Some export tables don't start with a byte count, but directly with a
// pointer, and are instead terminated by 0 or MAX
let export_functions_start_idx;
let export_functions_size;
if byte_size > 0x10000 {
export_functions_start_idx = 0;
let mut i = 0;
loop {
let current_ptr = unsafe { original_export_table.add(i) };
let current_ptr_numeric = unsafe { *current_ptr } as usize;
if current_ptr_numeric == 0usize || current_ptr_numeric == usize::MAX {
export_functions_size = i;
break;
}
i += 1;
}
} else {
export_functions_start_idx = 1;
export_functions_size = byte_size / mem::size_of::<usize>();
}
let our_functions = known_exports.get(guid);
if let Some(ref our_functions) = our_functions {
if our_functions.len() != export_functions_size {
error = Some(ErrorEntry::UnexpectedExportTableSize {
expected: our_functions.len(),
computed: export_functions_size,
});
}
}
let mut override_table =
unsafe { std::slice::from_raw_parts(original_export_table, export_functions_size) }
.to_vec();
for i in export_functions_start_idx..export_functions_size {
let current_fn = (|| {
if let Some(ref our_functions) = our_functions {
if let Some(fn_) = our_functions.get_fn(i) {
return fn_;
}
}
os::get_thunk(
override_table[i],
Self::report_unknown_export_table_call,
std::ptr::from_ref(entry.key().as_ref()).cast(),
i,
)
})();
override_table[i] = current_fn;
}
(
entry
.insert((original_export_table, override_table))
.1
.as_ptr(),
error,
)
}
unsafe extern "system" fn report_unknown_export_table_call(guid: &CUuuid, index: usize) {
let global_state = crate::GLOBAL_STATE2.lock();
let global_state_ref_cell = &*global_state;
let mut global_state_ref_mut = global_state_ref_cell.borrow_mut();
let global_state = &mut *global_state_ref_mut;
let log_guard = crate::OuterCallGuard {
writer: &mut global_state.log_writer,
log_root: &global_state.log_stack,
};
{
let mut logger = RefMut::map(global_state.log_stack.borrow_mut(), |log_stack| {
log_stack.enter()
});
logger.name = CudaFunctionName::Dark { guid: *guid, index };
};
drop(log_guard);
}
}
#[derive(Eq, PartialEq)]
#[repr(transparent)]
pub(crate) struct CUuuidWrapper(pub CUuuid);
impl Hash for CUuuidWrapper {
@ -27,589 +122,3 @@ impl Hash for CUuuidWrapper {
self.0.bytes.hash(state);
}
}
#[allow(improper_ctypes_definitions)]
pub(crate) struct OriginalExports {
original_get_module_from_cubin: Option<
unsafe extern "system" fn(
result: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
) -> CUresult,
>,
original_get_module_from_cubin_ext1: Option<
unsafe extern "system" fn(
result: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
ptr1: *mut c_void,
ptr2: *mut c_void,
_unknown: usize,
) -> CUresult,
>,
original_get_module_from_cubin_ext2: Option<
unsafe extern "system" fn(
fatbinc_wrapper: *const FatbinHeader,
result: *mut CUmodule,
ptr1: *mut c_void,
ptr2: *mut c_void,
_unknown: usize,
) -> CUresult,
>,
}
impl DarkApiState {
pub(crate) fn new() -> Self {
let original = OriginalExports {
original_get_module_from_cubin: None,
original_get_module_from_cubin_ext1: None,
original_get_module_from_cubin_ext2: None,
};
DarkApiState {
overrides: HashMap::new(),
original,
}
}
}
pub(crate) fn override_export_table(
pp_export_table: *mut *const c_void,
p_export_table_id: *const CUuuid,
state: &mut crate::trace::StateTracker,
) {
let state = &mut state.dark_api;
let export_table_mut = unsafe { &mut *pp_export_table };
let export_id = Box::new(CUuuidWrapper(unsafe { *p_export_table_id }));
*export_table_mut = match state.overrides.entry(export_id) {
hash_map::Entry::Occupied(entry) => entry.get().as_ptr() as *const _,
hash_map::Entry::Vacant(entry) => {
let guid_ptr = unsafe {
mem::transmute::<*const CUuuidWrapper, *const CUuuid>(&**entry.key() as *const _)
};
entry
.insert(unsafe {
create_new_override(*pp_export_table as *const _, guid_ptr, &mut state.original)
})
.as_ptr() as *const _
}
};
}
unsafe fn create_new_override(
export_table: *const *const c_void,
export_id: *const CUuuid,
state: &mut OriginalExports,
) -> Vec<*const c_void> {
let mut byte_length: usize = *(export_table as *const usize);
// Some export tables don't start with a byte count, but directly with a
// pointer, and are instead terminated by 0 or MAX
let export_functions_start_idx;
let mut override_table = Vec::new();
if byte_length > 0x10000 {
export_functions_start_idx = 0;
let mut i = 0;
loop {
let current_fn = export_table.add(i);
let current_fn_numeric = *current_fn as usize;
if current_fn_numeric == 0usize || current_fn_numeric == usize::MAX {
byte_length = (i + 1) * mem::size_of::<usize>();
break;
}
i += 1;
}
} else {
override_table.push(byte_length as *const _);
export_functions_start_idx = 1;
}
for i in export_functions_start_idx..(byte_length / mem::size_of::<usize>()) {
let current_fn = export_table.add(i);
override_table.push(get_export_override_fn(state, *current_fn, export_id, i));
}
override_table
}
unsafe extern "system" fn report_unknown_export_table_call(
export_table: *const CUuuid,
idx: usize,
) {
if let Ok(mut global_state) = crate::GLOBAL_STATE.lock() {
let mut logger = global_state
.log_factory
.get_logger_dark_api(*export_table, idx, None);
logger.log(log::LogEntry::UnknownExportTableFn)
}
}
const CUDART_INTERFACE_GUID: CUuuid = CUuuid {
bytes: [
0x6b, 0xd5, 0xfb, 0x6c, 0x5b, 0xf4, 0xe7, 0x4a, 0x89, 0x87, 0xd9, 0x39, 0x12, 0xfd, 0x9d,
0xf9,
],
};
const TOOLS_RUNTIME_CALLBACK_HOOKS_GUID: CUuuid = CUuuid {
bytes: [
0xa0, 0x94, 0x79, 0x8c, 0x2e, 0x74, 0x2e, 0x74, 0x93, 0xf2, 0x08, 0x00, 0x20, 0x0c, 0x0a,
0x66,
],
};
const CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID: CUuuid = CUuuid {
bytes: [
0xc6, 0x93, 0x33, 0x6e, 0x11, 0x21, 0xdf, 0x11, 0xa8, 0xc3, 0x68, 0xf3, 0x55, 0xd8, 0x95,
0x93,
],
};
const CTX_CREATE_BYPASS_GUID: CUuuid = CUuuid {
bytes: [
0x0C, 0xA5, 0x0B, 0x8C, 0x10, 0x04, 0x92, 0x9A, 0x89, 0xA7, 0xD0, 0xDF, 0x10, 0xE7, 0x72,
0x86,
],
};
const HEAP_ACCESS_GUID: CUuuid = CUuuid {
bytes: [
0x19, 0x5B, 0xCB, 0xF4, 0xD6, 0x7D, 0x02, 0x4A, 0xAC, 0xC5, 0x1D, 0x29, 0xCE, 0xA6, 0x31,
0xAE,
],
};
const DEVICE_EXTENDED_RT_GUID: CUuuid = CUuuid {
bytes: [
0xB1u8, 0x05, 0x41, 0xE1, 0xF7, 0xC7, 0xC7, 0x4A, 0x9F, 0x64, 0xF2, 0x23, 0xBE, 0x99, 0xF1,
0xE2,
],
};
unsafe fn get_export_override_fn(
state: &mut OriginalExports,
original_fn: *const c_void,
guid: *const CUuuid,
idx: usize,
) -> *const c_void {
match (*guid, idx) {
(TOOLS_RUNTIME_CALLBACK_HOOKS_GUID, 2)
| (TOOLS_RUNTIME_CALLBACK_HOOKS_GUID, 6)
| (CUDART_INTERFACE_GUID, 2)
| (CUDART_INTERFACE_GUID, 7)
| (CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID, 0)
| (CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID, 1)
| (CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID, 2)
| (CTX_CREATE_BYPASS_GUID, 1)
| (HEAP_ACCESS_GUID, 1)
| (HEAP_ACCESS_GUID, 2)
| (DEVICE_EXTENDED_RT_GUID, 5)
| (DEVICE_EXTENDED_RT_GUID, 13) => original_fn,
(CUDART_INTERFACE_GUID, 1) => {
state.original_get_module_from_cubin = mem::transmute(original_fn);
get_module_from_cubin as *const _
}
(CUDART_INTERFACE_GUID, 6) => {
state.original_get_module_from_cubin_ext1 = mem::transmute(original_fn);
get_module_from_cubin_ext1 as *const _
}
(CUDART_INTERFACE_GUID, 8) => {
state.original_get_module_from_cubin_ext2 = mem::transmute(original_fn);
get_module_from_cubin_ext2 as *const _
}
_ => {
// terminator if it's an export table that is not size-prefixed
if original_fn == ptr::null() || (original_fn as usize) == usize::MAX {
ptr::null()
} else {
os::get_thunk(original_fn, report_unknown_export_table_call, guid, idx)
}
}
}
}
const FATBINC_MAGIC: c_uint = 0x466243B1;
const FATBINC_VERSION_V1: c_uint = 0x1;
const FATBINC_VERSION_V2: c_uint = 0x2;
#[repr(C)]
struct FatbincWrapper {
magic: c_uint,
version: c_uint,
data: *const FatbinHeader,
filename_or_fatbins: *const c_void,
}
const FATBIN_MAGIC: c_uint = 0xBA55ED50;
const FATBIN_VERSION: c_ushort = 0x01;
#[repr(C, align(8))]
struct FatbinHeader {
magic: c_uint,
version: c_ushort,
header_size: c_ushort,
files_size: c_ulong, // excluding frame header, size of all blocks framed by this frame
}
const FATBIN_FILE_HEADER_KIND_PTX: c_ushort = 0x01;
const FATBIN_FILE_HEADER_KIND_ELF: c_ushort = 0x02;
const FATBIN_FILE_HEADER_VERSION_CURRENT: c_ushort = 0x101;
// assembly file header is a bit different, but we don't care
#[repr(C)]
#[derive(Debug)]
struct FatbinFileHeader {
kind: c_ushort,
version: c_ushort,
header_size: c_uint,
padded_payload_size: c_uint,
unknown0: c_uint, // check if it's written into separately
payload_size: c_uint,
unknown1: c_uint,
unknown2: c_uint,
sm_version: c_uint,
bit_width: c_uint,
unknown3: c_uint,
unknown4: c_ulong,
unknown5: c_ulong,
uncompressed_payload: c_ulong,
}
unsafe fn record_submodules_from_wrapped_fatbin(
module: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
fn_logger: &mut log::FunctionLogger,
delayed_state: &mut GlobalDelayedState,
original_fn: impl FnOnce(&OriginalExports) -> CUresult,
) -> CUresult {
let result = original_fn(&delayed_state.cuda_state.dark_api.original);
fn_logger.result = Some(result);
let magic = (*fatbinc_wrapper).magic;
if magic != FATBINC_MAGIC {
fn_logger.log(log::LogEntry::UnexpectedBinaryField {
field_name: "FATBINC_MAGIC",
expected: vec![UInt::U32(FATBINC_MAGIC)],
observed: UInt::U32(magic),
});
}
if (*fatbinc_wrapper).version != FATBINC_VERSION_V1
&& (*fatbinc_wrapper).version != FATBINC_VERSION_V2
{
fn_logger.log(log::LogEntry::UnexpectedBinaryField {
field_name: "FATBINC_VERSION",
expected: vec![UInt::U32(FATBINC_VERSION_V1), UInt::U32(FATBINC_VERSION_V2)],
observed: UInt::U32(magic),
});
}
let is_version_2 = (*fatbinc_wrapper).version == FATBINC_VERSION_V2;
record_submodules_from_fatbin(
*module,
(*fatbinc_wrapper).data,
if is_version_2 { Some(1) } else { None },
fn_logger,
&mut delayed_state.cuda_state,
);
if is_version_2 {
let mut current = (*fatbinc_wrapper).filename_or_fatbins as *const *const c_void;
while *current != ptr::null() {
record_submodules_from_fatbin(
*module,
*current as *const _,
Some(2),
fn_logger,
&mut delayed_state.cuda_state,
);
current = current.add(1);
}
}
result
}
unsafe fn record_submodules_from_fatbin(
module: CUmodule,
fatbin_header: *const FatbinHeader,
fatbin_version: Option<usize>,
logger: &mut log::FunctionLogger,
state: &mut StateTracker,
) {
let magic = (*fatbin_header).magic;
if magic != FATBIN_MAGIC {
logger.log(log::LogEntry::UnexpectedBinaryField {
field_name: "FATBIN_MAGIC",
expected: vec![UInt::U32(FATBIN_MAGIC)],
observed: UInt::U32(magic),
});
return;
}
let version = (*fatbin_header).version;
if version != FATBIN_VERSION {
logger.log(log::LogEntry::UnexpectedBinaryField {
field_name: "FATBIN_VERSION",
expected: vec![UInt::U16(FATBIN_VERSION)],
observed: UInt::U16(version),
});
return;
}
let file = (fatbin_header as *const u8).add((*fatbin_header).header_size as usize);
let end = file.add((*fatbin_header).files_size as usize);
record_submodules(
fatbin_version == Some(2),
module,
fatbin_version,
logger,
state,
file,
end,
);
}
#[allow(improper_ctypes_definitions)]
unsafe extern "system" fn get_module_from_cubin(
module: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
) -> CUresult {
let arguments_writer = Box::new(move |writer: &mut dyn std::io::Write| {
writer.write_all(b"(")?;
writer.write_all(stringify!(module).as_bytes())?;
writer.write_all(b": ")?;
format::CudaDisplay::write(&module, "", 0, writer)?;
writer.write_all(b", ")?;
writer.write_all(stringify!(fatbinc_wrapper).as_bytes())?;
write!(writer, ": {:p})", fatbinc_wrapper)
});
let global_state = &mut *super::GLOBAL_STATE.lock().unwrap();
let mut fn_logger = global_state.log_factory.get_logger_dark_api(
CUDART_INTERFACE_GUID,
1,
Some(arguments_writer),
);
let global_state = &mut *super::GLOBAL_STATE.lock().unwrap();
let delayed_state = global_state.delayed_state.unwrap_mut();
record_submodules_from_wrapped_fatbin(
module,
fatbinc_wrapper,
&mut fn_logger,
delayed_state,
|original_exports| {
original_exports.original_get_module_from_cubin.unwrap()(module, fatbinc_wrapper)
},
)
}
#[allow(improper_ctypes_definitions)]
unsafe extern "system" fn get_module_from_cubin_ext1(
module: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
ptr1: *mut c_void,
ptr2: *mut c_void,
_unknown: usize,
) -> CUresult {
let arguments_writer = Box::new(move |writer: &mut dyn std::io::Write| {
writer.write_all(b"(")?;
writer.write_all(stringify!(module).as_bytes())?;
writer.write_all(b": ")?;
format::CudaDisplay::write(&module, "", 0, writer)?;
writer.write_all(b", ")?;
writer.write_all(stringify!(fatbinc_wrapper).as_bytes())?;
write!(writer, ": {:p}, ", fatbinc_wrapper)?;
writer.write_all(stringify!(ptr1).as_bytes())?;
write!(writer, ": {:p}, ", ptr1)?;
writer.write_all(stringify!(ptr2).as_bytes())?;
write!(writer, ": {:p}, ", ptr2)?;
writer.write_all(stringify!(_unknown).as_bytes())?;
write!(writer, ": {})", _unknown)
});
let global_state = &mut *super::GLOBAL_STATE.lock().unwrap();
let mut fn_logger = global_state.log_factory.get_logger_dark_api(
CUDART_INTERFACE_GUID,
6,
Some(arguments_writer),
);
if ptr1 != ptr::null_mut() {
fn_logger.log(log::LogEntry::UnexpectedArgument {
arg_name: stringify!(ptr1),
expected: vec![UInt::USize(0)],
observed: UInt::USize(ptr1 as usize),
});
}
if ptr2 != ptr::null_mut() {
fn_logger.log(log::LogEntry::UnexpectedArgument {
arg_name: stringify!(ptr2),
expected: vec![UInt::USize(0)],
observed: UInt::USize(ptr2 as usize),
});
}
if _unknown != 0 {
fn_logger.log(log::LogEntry::UnexpectedArgument {
arg_name: stringify!(_unknown),
expected: vec![UInt::USize(0)],
observed: UInt::USize(_unknown),
});
}
let delayed_state = global_state.delayed_state.unwrap_mut();
record_submodules_from_wrapped_fatbin(
module,
fatbinc_wrapper,
&mut fn_logger,
delayed_state,
|original_exports| {
original_exports
.original_get_module_from_cubin_ext1
.unwrap()(module, fatbinc_wrapper, ptr1, ptr2, _unknown)
},
)
}
#[allow(improper_ctypes_definitions)]
unsafe extern "system" fn get_module_from_cubin_ext2(
fatbin_header: *const FatbinHeader,
module: *mut CUmodule,
ptr1: *mut c_void,
ptr2: *mut c_void,
_unknown: usize,
) -> CUresult {
let arguments_writer = Box::new(move |writer: &mut dyn std::io::Write| {
writer.write_all(b"(")?;
writer.write_all(stringify!(fatbin_header).as_bytes())?;
write!(writer, ": {:p}, ", fatbin_header)?;
writer.write_all(stringify!(module).as_bytes())?;
writer.write_all(b": ")?;
format::CudaDisplay::write(&module, "", 0, writer)?;
writer.write_all(b", ")?;
writer.write_all(stringify!(ptr1).as_bytes())?;
write!(writer, ": {:p}, ", ptr1)?;
writer.write_all(stringify!(ptr2).as_bytes())?;
write!(writer, ": {:p}, ", ptr2)?;
writer.write_all(stringify!(_unknown).as_bytes())?;
write!(writer, ": {})", _unknown)
});
let global_state = &mut *super::GLOBAL_STATE.lock().unwrap();
let mut fn_logger = global_state.log_factory.get_logger_dark_api(
CUDART_INTERFACE_GUID,
8,
Some(arguments_writer),
);
if ptr1 != ptr::null_mut() {
fn_logger.log(log::LogEntry::UnexpectedArgument {
arg_name: stringify!(ptr1),
expected: vec![UInt::USize(0)],
observed: UInt::USize(ptr1 as usize),
});
}
if ptr2 != ptr::null_mut() {
fn_logger.log(log::LogEntry::UnexpectedArgument {
arg_name: stringify!(ptr2),
expected: vec![UInt::USize(0)],
observed: UInt::USize(ptr2 as usize),
});
}
if _unknown != 0 {
fn_logger.log(log::LogEntry::UnexpectedArgument {
arg_name: stringify!(_unknown),
expected: vec![UInt::USize(0)],
observed: UInt::USize(_unknown),
});
}
let delayed_state = global_state.delayed_state.unwrap_mut();
let result = delayed_state
.cuda_state
.dark_api
.original
.original_get_module_from_cubin_ext2
.unwrap()(fatbin_header, module, ptr1, ptr2, _unknown);
fn_logger.result = Some(result);
if result.is_err() {
return result;
}
record_submodules_from_fatbin(
*module,
fatbin_header,
None,
&mut fn_logger,
&mut delayed_state.cuda_state,
);
result
}
unsafe fn record_submodules(
should_decompress_elf: bool,
module: CUmodule,
version: Option<usize>,
fn_logger: &mut log::FunctionLogger,
state: &mut StateTracker,
start: *const u8,
end: *const u8,
) {
let mut index = start;
while index < end {
let fatbin_file = index as *const FatbinFileHeader;
let fatbin_file_version = (*fatbin_file).version;
if fatbin_file_version != FATBIN_FILE_HEADER_VERSION_CURRENT {
fn_logger.log(log::LogEntry::UnexpectedBinaryField {
field_name: stringify!(fatbin_file_version),
expected: vec![UInt::U16(FATBIN_FILE_HEADER_VERSION_CURRENT)],
observed: UInt::U16(fatbin_file_version),
});
}
let fatbin_file_kind = (*fatbin_file).kind;
if fatbin_file_kind == FATBIN_FILE_HEADER_KIND_PTX {
let decompressed = decompress_kernel_module(fatbin_file);
match decompressed {
Some(mut decompressed) => {
decompressed.pop(); // remove trailing zero
state.record_new_submodule(module, version, &*decompressed, fn_logger, "ptx")
}
None => fn_logger.log(log::LogEntry::Lz4DecompressionFailure),
}
} else if fatbin_file_kind == FATBIN_FILE_HEADER_KIND_ELF {
let source_buffer = if should_decompress_elf {
let decompressed = decompress_kernel_module(fatbin_file);
match decompressed {
Some(decompressed) => Cow::Owned(decompressed),
None => {
fn_logger.log(log::LogEntry::Lz4DecompressionFailure);
continue;
}
}
} else {
Cow::Borrowed(slice::from_raw_parts(
(fatbin_file as *const u8).add((*fatbin_file).header_size as usize),
(*fatbin_file).padded_payload_size as usize,
))
};
state.record_new_submodule(module, version, &*source_buffer, fn_logger, "elf")
} else {
fn_logger.log(log::LogEntry::UnexpectedBinaryField {
field_name: stringify!(fatbin_file_kind),
expected: vec![
UInt::U16(FATBIN_FILE_HEADER_KIND_PTX),
UInt::U16(FATBIN_FILE_HEADER_KIND_ELF),
],
observed: UInt::U16(fatbin_file_kind),
});
}
index = index
.add((*fatbin_file).header_size as usize + (*fatbin_file).padded_payload_size as usize);
}
}
const MAX_MODULE_DECOMPRESSION_BOUND: usize = 64 * 1024 * 1024;
unsafe fn decompress_kernel_module(file: *const FatbinFileHeader) -> Option<Vec<u8>> {
let decompressed_size = usize::max(1024, (*file).uncompressed_payload as usize);
let mut decompressed_vec = vec![0u8; decompressed_size];
loop {
match lz4_sys::LZ4_decompress_safe(
(file as *const u8).add((*file).header_size as usize) as *const _,
decompressed_vec.as_mut_ptr() as *mut _,
(*file).payload_size as c_int,
decompressed_vec.len() as c_int,
) {
error if error < 0 => {
let new_size = decompressed_vec.len() * 2;
if new_size > MAX_MODULE_DECOMPRESSION_BOUND {
return None;
}
decompressed_vec.resize(decompressed_vec.len() * 2, 0);
}
real_decompressed_size => {
decompressed_vec.truncate(real_decompressed_size as usize);
return Some(decompressed_vec);
}
}
}
}

View file

@ -1,55 +0,0 @@
/*
This collection of functions is here to assist with debugging
You use it by manually pasting into a module.ptx that was generated by zluda_dump
and inspecting content of additional debug buffer in replay.py
*/
.func debug_dump_from_thread_16(.reg.b64 debug_addr, .reg.u32 global_id_0, .reg.b16 value)
{
.reg.u32 local_id;
mov.u32 local_id, %tid.x;
.reg.u32 local_size;
mov.u32 local_size, %ntid.x;
.reg.u32 group_id;
mov.u32 group_id, %ctaid.x;
.reg.b32 global_id;
mad.lo.u32 global_id, group_id, local_size, local_id;
.reg.pred should_exit;
setp.ne.u32 should_exit, global_id, global_id_0;
@should_exit bra END;
.reg.b32 index;
ld.global.u32 index, [debug_addr];
st.global.u32 [debug_addr], index+1;
.reg.u64 st_offset;
cvt.u64.u32 st_offset, index;
mad.lo.u64 st_offset, st_offset, 2, 4; // sizeof(b16), sizeof(32)
add.u64 debug_addr, debug_addr, st_offset;
st.global.u16 [debug_addr], value;
END:
ret;
}
.func debug_dump_from_thread_32(.reg.b64 debug_addr, .reg.u32 global_id_0, .reg.b32 value)
{
.reg.u32 local_id;
mov.u32 local_id, %tid.x;
.reg.u32 local_size;
mov.u32 local_size, %ntid.x;
.reg.u32 group_id;
mov.u32 group_id, %ctaid.x;
.reg.b32 global_id;
mad.lo.u32 global_id, group_id, local_size, local_id;
.reg.pred should_exit;
setp.ne.u32 should_exit, global_id, global_id_0;
@should_exit bra END;
.reg.b32 index;
ld.global.u32 index, [debug_addr];
st.global.u32 [debug_addr], index+1;
.reg.u64 st_offset;
cvt.u64.u32 st_offset, index;
mad.lo.u64 st_offset, st_offset, 4, 4; // sizeof(b32), sizeof(32)
add.u64 debug_addr, debug_addr, st_offset;
st.global.u32 [debug_addr], value;
END:
ret;
}

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,8 @@
use crate::format;
use cuda_types::cuda::*;
use super::Settings;
use crate::FnCallLog;
use crate::LogEntry;
use cuda_types::cuda::*;
use format::CudaDisplay;
use std::error::Error;
use std::ffi::c_void;
use std::ffi::NulError;
@ -14,18 +16,102 @@ use std::str::Utf8Error;
const LOG_PREFIX: &[u8] = b"[ZLUDA_DUMP] ";
// This type holds all the relevant settings for logging like output path and
// creates objects which match those settings
pub(crate) struct Factory {
// Fallible emitter is optional emitter to file system, we might lack
pub(crate) struct Writer {
// Fallible emitter is an optional emitter to the file system, we might lack
// file permissions or be out of disk space
fallible_emitter: Option<Box<dyn WriteTrailingZeroAware>>,
fallible_emitter: Option<Box<dyn WriteTrailingZeroAware + Send>>,
// This is emitter that "always works" (and if it does not, then we don't
// care). In addition of normal logs it emits errors from fallible emitter
infallible_emitter: Box<dyn WriteTrailingZeroAware>,
infallible_emitter: Box<dyn WriteTrailingZeroAware + Send>,
// This object could be recreated every time, but it's slightly better for performance to
// reuse the allocations by keeping the object in globals
write_buffer: WriteBuffer,
// another shared buffer, so we dont't reallocate on every function call
log_queue: Vec<LogEntry>,
}
impl Writer {
pub(crate) fn new() -> Self {
let debug_emitter = os::new_debug_logger();
Self {
infallible_emitter: debug_emitter,
fallible_emitter: None,
write_buffer: WriteBuffer::new(),
}
}
pub(crate) fn late_init(&mut self, settings: &Settings) -> Result<(), ErrorEntry> {
self.fallible_emitter = settings
.dump_dir
.as_ref()
.map(|path| {
Ok::<_, std::io::Error>(Box::new(File::create(path.to_path_buf().join("log.txt"))?)
as Box<dyn WriteTrailingZeroAware + Send>)
})
.transpose()
.map_err(ErrorEntry::IoError)?;
self.write_buffer
.init(&self.fallible_emitter, &self.infallible_emitter);
Ok(())
}
pub(crate) fn write_and_flush(&mut self, log_root: &mut FnCallLog) {
self.write_all_from_depth(0, log_root);
self.write_buffer.finish();
let error_from_writing_to_fallible_emitter = match self.fallible_emitter {
Some(ref mut emitter) => self.write_buffer.send_to_and_flush(emitter),
None => Ok(()),
};
if let Err(e) = error_from_writing_to_fallible_emitter {
self.hack_squeeze_in_additional_error(ErrorEntry::IoError(e))
}
self.write_buffer
.send_to_and_flush(&mut self.infallible_emitter)
.ok();
self.write_buffer.reset();
log_root.reset();
}
fn write_all_from_depth(&mut self, depth: usize, fn_call: &FnCallLog) {
self.write_call(depth, fn_call);
for sub in fn_call.subcalls.iter() {
match sub {
LogEntry::FnCall(fn_call) => self.write_all_from_depth(depth + 1, fn_call),
LogEntry::Error(err) => self.write_error(depth + 1, err),
}
}
}
fn write_call(&mut self, depth: usize, call: &FnCallLog) {
self.write_buffer.start_line(depth);
write!(self.write_buffer, "{}", call.name).ok();
match call.args {
Some(ref args) => {
self.write_buffer.write_all(args).ok();
}
None => {
self.write_buffer.write_all(b"(...)").ok();
}
}
self.write_buffer.write_all(b" -> ").ok();
if let Some(ref result) = call.output {
self.write_buffer.write_all(result).ok();
} else {
self.write_buffer.write_all(b"UNKNOWN").ok();
};
self.write_buffer.end_line();
}
fn write_error(&mut self, depth: usize, error: &ErrorEntry) {
self.write_buffer.start_line(depth);
write!(self.write_buffer, "{}", error).ok();
self.write_buffer.end_line();
}
fn hack_squeeze_in_additional_error(&mut self, entry: ErrorEntry) {
self.write_buffer.undo_finish();
write!(self.write_buffer, " {}", entry).ok();
self.write_buffer.end_line();
self.write_buffer.finish();
}
}
// When writing out to the emitter (file, WinAPI, whatever else) instead of
@ -48,8 +134,8 @@ impl WriteBuffer {
fn init(
&mut self,
fallible_emitter: &Option<Box<dyn WriteTrailingZeroAware>>,
infallible_emitter: &Box<dyn WriteTrailingZeroAware>,
fallible_emitter: &Option<Box<dyn WriteTrailingZeroAware + Send>>,
infallible_emitter: &Box<dyn WriteTrailingZeroAware + Send>,
) {
if infallible_emitter.should_prefix() {
self.prefixed_buffer = Some(Vec::new());
@ -72,10 +158,16 @@ impl WriteBuffer {
.chain(self.unprefixed_buffer.as_mut().into_iter())
}
fn start_line(&mut self) {
fn start_line(&mut self, depth: usize) {
if let Some(buffer) = &mut self.prefixed_buffer {
buffer.extend_from_slice(LOG_PREFIX);
}
if depth == 0 {
return;
}
for buffer in self.all_buffers() {
buffer.extend(std::iter::repeat_n(b' ', depth * 4));
}
}
fn end_line(&mut self) {
@ -84,12 +176,6 @@ impl WriteBuffer {
}
}
fn write(&mut self, s: &str) {
for buffer in self.all_buffers() {
buffer.extend_from_slice(s.as_bytes());
}
}
fn finish(&mut self) {
for buffer in self.all_buffers() {
buffer.push(b'\0');
@ -102,22 +188,26 @@ impl WriteBuffer {
}
}
fn send_to(&self, log_emitter: &mut Box<dyn WriteTrailingZeroAware>) -> Result<(), io::Error> {
fn send_to_and_flush(
&self,
log_emitter: &mut Box<dyn WriteTrailingZeroAware + Send>,
) -> Result<(), io::Error> {
if log_emitter.should_prefix() {
log_emitter.write_zero_aware(
&*self
.prefixed_buffer
.as_ref()
.unwrap_or_else(|| unreachable!()),
)
)?;
} else {
log_emitter.write_zero_aware(
&*self
.unprefixed_buffer
.as_ref()
.unwrap_or_else(|| unreachable!()),
)
)?;
}
log_emitter.flush()
}
fn reset(&mut self) {
@ -143,183 +233,36 @@ impl Write for WriteBuffer {
}
}
impl Factory {
pub(crate) fn new() -> Self {
let debug_emitter = os::new_debug_logger();
Factory {
infallible_emitter: debug_emitter,
fallible_emitter: None,
write_buffer: WriteBuffer::new(),
log_queue: Vec::new(),
}
}
fn initalize_fallible_emitter(
settings: &Settings,
) -> std::io::Result<Option<Box<dyn WriteTrailingZeroAware>>> {
settings
.dump_dir
.as_ref()
.map(|path| {
Ok::<_, std::io::Error>(Box::new(File::create(path.to_path_buf().join("log.txt"))?)
as Box<dyn WriteTrailingZeroAware>)
})
.transpose()
}
// We load settings during first function call, since during that time we
// also create one of the loggers, what do we do about errors encountered
// at that time? We log them to the newly created logger, but to make it
// "nice" we do both of those in a single function
// An alternative would be to have something like this:
// let mut factory = Factory::new();
// let mut cuInitLog = factory.get_logger("cuInit");
// cuInitLog.load_settings(&settings);
// which is a bit nonsensical
pub(crate) fn get_first_logger_and_init_settings(
&mut self,
func: &'static str,
arguments_writer: Box<dyn FnMut(&mut dyn std::io::Write) -> std::io::Result<()>>,
) -> (FunctionLogger, Settings) {
let mut first_logger = self.get_logger(func, arguments_writer);
let settings = Settings::read_and_init(&mut first_logger);
match Self::initalize_fallible_emitter(&settings) {
Ok(fallible_emitter) => {
*first_logger.fallible_emitter = fallible_emitter;
}
Err(err) => first_logger.log(LogEntry::IoError(err)),
}
first_logger.write_buffer.init(
first_logger.fallible_emitter,
first_logger.infallible_emitter,
);
(first_logger, settings)
}
pub(crate) fn get_logger(
&mut self,
func: &'static str,
arguments_writer: Box<dyn FnMut(&mut dyn std::io::Write) -> std::io::Result<()>>,
) -> FunctionLogger {
FunctionLogger {
result: None,
name: CudaFunctionName::Normal(func),
fallible_emitter: &mut self.fallible_emitter,
infallible_emitter: &mut self.infallible_emitter,
write_buffer: &mut self.write_buffer,
log_queue: &mut self.log_queue,
arguments_writer: Some(arguments_writer),
}
}
pub(crate) fn get_logger_dark_api(
&mut self,
guid: CUuuid,
index: usize,
arguments_writer: Option<Box<dyn FnMut(&mut dyn std::io::Write) -> std::io::Result<()>>>,
) -> FunctionLogger {
FunctionLogger {
result: None,
name: CudaFunctionName::Dark { guid, index },
fallible_emitter: &mut self.fallible_emitter,
infallible_emitter: &mut self.infallible_emitter,
write_buffer: &mut self.write_buffer,
log_queue: &mut self.log_queue,
arguments_writer,
}
}
}
enum CudaFunctionName {
#[derive(Clone)]
pub(crate) enum CudaFunctionName {
Normal(&'static str),
Dark { guid: CUuuid, index: usize },
}
// This encapsulates log output for a single function call.
// It's a separate struct and not just a plain function for two reasons:
// * While we want to always display return code before logging errors,
// logging errors might come before return code is returned
// * We want to handle panics gracefully with Drop
pub(crate) struct FunctionLogger<'a> {
pub(crate) result: Option<CUresult>,
name: CudaFunctionName,
infallible_emitter: &'a mut Box<dyn WriteTrailingZeroAware>,
fallible_emitter: &'a mut Option<Box<dyn WriteTrailingZeroAware>>,
arguments_writer: Option<Box<dyn FnMut(&mut dyn std::io::Write) -> std::io::Result<()>>>,
write_buffer: &'a mut WriteBuffer,
log_queue: &'a mut Vec<LogEntry>,
}
impl<'a> FunctionLogger<'a> {
pub(crate) fn log(&mut self, l: LogEntry) {
self.log_queue.push(l);
}
pub(crate) fn log_io_error(&mut self, error: io::Result<()>) {
if let Err(e) = error {
self.log_queue.push(LogEntry::IoError(e));
}
}
fn flush_log_queue_to_write_buffer(&mut self) {
self.write_buffer.start_line();
match self.name {
CudaFunctionName::Normal(fn_name) => self.write_buffer.write(fn_name),
impl Display for CudaFunctionName {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
CudaFunctionName::Normal(fn_) => f.write_str(fn_),
CudaFunctionName::Dark { guid, index } => {
format::CudaDisplay::write(&guid, "", 0, &mut self.write_buffer).ok();
write!(&mut self.write_buffer, "::{}", index).ok();
match ::dark_api::cuda::guid_to_name(guid, *index) {
Some((name, fn_)) => match fn_ {
Some(fn_) => write!(f, "{{{name}}}::{fn_}"),
None => write!(f, "{{{name}}}::{index}"),
},
None => {
let mut temp = Vec::new();
format::CudaDisplay::write(guid, "", 0, &mut temp)
.map_err(|_| std::fmt::Error::default())?;
let temp = String::from_utf8_lossy(&*temp);
write!(f, "{temp}::{index}")
}
}
}
}
match &mut self.arguments_writer {
Some(arg_writer) => {
arg_writer(&mut self.write_buffer).ok();
}
None => {
self.write_buffer.write_all(b"(...)").ok();
}
}
self.write_buffer.write_all(b" -> ").ok();
if let Some(result) = self.result {
format::CudaDisplay::write(&result, "", 0, self.write_buffer).ok();
} else {
self.write_buffer.write_all(b"UNKNOWN").ok();
};
self.write_buffer.end_line();
for entry in self.log_queue.iter() {
write!(self.write_buffer, " {}", entry).ok();
self.write_buffer.end_line();
}
self.write_buffer.finish();
}
// This is a dirty hack: we call it at the point where our write buffer is
// already finalized and squeeze the error produced by the previous emitter
fn hack_squeeze_in_additional_error(&mut self, entry: LogEntry) {
self.write_buffer.undo_finish();
write!(self.write_buffer, " {}", entry).unwrap_or_else(|_| unreachable!());
self.write_buffer.end_line();
self.write_buffer.finish();
}
}
impl<'a> Drop for FunctionLogger<'a> {
fn drop(&mut self) {
self.flush_log_queue_to_write_buffer();
let error_from_writing_to_fallible_emitter = match self.fallible_emitter {
Some(emitter) => self.write_buffer.send_to(emitter),
None => Ok(()),
};
if let Err(e) = error_from_writing_to_fallible_emitter {
self.hack_squeeze_in_additional_error(LogEntry::IoError(e))
}
self.write_buffer.send_to(self.infallible_emitter).ok();
self.write_buffer.reset();
self.log_queue.truncate(0);
}
}
// Structured log type. We don't want frontend to care about log formatting
pub(crate) enum LogEntry {
pub(crate) enum ErrorEntry {
IoError(io::Error),
CreatedDumpDirectory(PathBuf),
ErrorBox(Box<dyn Error>),
@ -328,12 +271,13 @@ pub(crate) enum LogEntry {
raw_image: *const c_void,
kind: &'static str,
},
FunctionNotFound(CudaFunctionName),
MalformedModulePath(Utf8Error),
NonUtf8ModuleText(Utf8Error),
NulInsideModuleText(NulError),
ModuleParsingError(String),
Lz4DecompressionFailure,
UnknownExportTableFn,
ZstdDecompressionFailure(usize),
UnexpectedArgument {
arg_name: &'static str,
expected: Vec<UInt>,
@ -344,73 +288,135 @@ pub(crate) enum LogEntry {
expected: Vec<UInt>,
observed: UInt,
},
InvalidEnvVar {
var: &'static str,
pattern: &'static str,
value: String,
},
UnexpectedExportTableSize {
expected: usize,
computed: usize,
},
IntegrityCheck {
original: [u64; 2],
overriden: [u64; 2],
},
NullPointer(&'static str),
UnknownLibrary(CUlibrary),
}
impl Display for LogEntry {
unsafe impl Send for ErrorEntry {}
unsafe impl Sync for ErrorEntry {}
impl From<cuda_types::dark_api::ParseError> for ErrorEntry {
fn from(e: cuda_types::dark_api::ParseError) -> Self {
match e {
cuda_types::dark_api::ParseError::NullPointer(s) => ErrorEntry::NullPointer(s),
cuda_types::dark_api::ParseError::UnexpectedBinaryField {
field_name,
observed,
expected,
} => ErrorEntry::UnexpectedBinaryField {
field_name,
observed: UInt::from(observed),
expected: expected.into_iter().map(UInt::from).collect(),
},
}
}
}
impl Display for ErrorEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LogEntry::IoError(e) => e.fmt(f),
LogEntry::CreatedDumpDirectory(dir) => {
write!(
f,
"Created dump directory {} ",
dir.as_os_str().to_string_lossy()
)
}
LogEntry::ErrorBox(e) => e.fmt(f),
LogEntry::UnsupportedModule {
module,
raw_image,
kind,
} => {
write!(
f,
"Unsupported {} module {:?} loaded from module image {:?}",
kind, module, raw_image
)
}
LogEntry::MalformedModulePath(e) => e.fmt(f),
LogEntry::NonUtf8ModuleText(e) => e.fmt(f),
LogEntry::ModuleParsingError(file_name) => {
write!(
f,
"Error parsing module, log has been written to {}",
file_name
)
}
LogEntry::NulInsideModuleText(e) => e.fmt(f),
LogEntry::Lz4DecompressionFailure => write!(f, "LZ4 decompression failure"),
LogEntry::UnknownExportTableFn => write!(f, "Unknown export table function"),
LogEntry::UnexpectedBinaryField {
field_name,
expected,
observed,
} => write!(
f,
"Unexpected field {}. Expected one of: {{{}}}, observed: {}",
field_name,
expected
.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>()
.join(", "),
observed
),
LogEntry::UnexpectedArgument {
arg_name,
expected,
observed,
} => write!(
f,
"Unexpected argument {}. Expected one of: {{{}}}, observed: {}",
arg_name,
expected
.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>()
.join(", "),
observed
),
ErrorEntry::IoError(e) => e.fmt(f),
ErrorEntry::CreatedDumpDirectory(dir) => {
write!(
f,
"Created dump directory {} ",
dir.as_os_str().to_string_lossy()
)
}
ErrorEntry::ErrorBox(e) => e.fmt(f),
ErrorEntry::UnsupportedModule {
module,
raw_image,
kind,
} => {
write!(
f,
"Unsupported {} module {:?} loaded from module image {:?}",
kind, module, raw_image
)
}
ErrorEntry::MalformedModulePath(e) => e.fmt(f),
ErrorEntry::NonUtf8ModuleText(e) => e.fmt(f),
ErrorEntry::ModuleParsingError(file_name) => {
write!(
f,
"Error parsing module, log has been written to {}",
file_name
)
}
ErrorEntry::NulInsideModuleText(e) => e.fmt(f),
ErrorEntry::Lz4DecompressionFailure => write!(f, "LZ4 decompression failure"),
ErrorEntry::ZstdDecompressionFailure(err_code) => write!(f, "Zstd decompression failure: {}", zstd_safe::get_error_name(*err_code)),
ErrorEntry::UnexpectedBinaryField {
field_name,
expected,
observed,
} => write!(
f,
"Unexpected field {}. Expected one of: {{{}}}, observed: {}",
field_name,
expected
.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>()
.join(", "),
observed
),
ErrorEntry::UnexpectedArgument {
arg_name,
expected,
observed,
} => write!(
f,
"Unexpected argument {}. Expected one of: {{{}}}, observed: {}",
arg_name,
expected
.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>()
.join(", "),
observed
),
ErrorEntry::InvalidEnvVar {
var,
pattern,
value,
} => write!(
f,
"Unexpected value of environment variable {var}. Expected pattern: {pattern}, got value: {value}"
),
ErrorEntry::FunctionNotFound(cuda_function_name) => write!(
f,
"No function {cuda_function_name} in the underlying library"
),
ErrorEntry::UnexpectedExportTableSize { expected, computed } => {
write!(f, "Table length mismatch. Expected: {expected}, got: {computed}")
}
ErrorEntry::IntegrityCheck { original, overriden } => {
write!(f, "Overriding integrity check hash. Original: {original:?}, overriden: {overriden:?}")
}
ErrorEntry::NullPointer(type_) => {
write!(f, "Null pointer of type {type_} encountered")
}
ErrorEntry::UnknownLibrary(culibrary) => {
write!(f, "Unknown library: ")?;
let mut temp_buffer = Vec::new();
CudaDisplay::write(culibrary, "", 0, &mut temp_buffer).ok();
f.write_str(&unsafe { String::from_utf8_unchecked(temp_buffer) })
}
}
}
}
@ -422,6 +428,24 @@ pub(crate) enum UInt {
USize(usize),
}
impl From<u16> for UInt {
fn from(value: u16) -> Self {
UInt::U16(value)
}
}
impl From<u32> for UInt {
fn from(value: u32) -> Self {
UInt::U32(value)
}
}
impl From<usize> for UInt {
fn from(value: usize) -> Self {
UInt::USize(value)
}
}
impl Display for UInt {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
@ -491,7 +515,7 @@ mod os {
}
}
pub(crate) fn new_debug_logger() -> Box<dyn WriteTrailingZeroAware> {
pub(crate) fn new_debug_logger() -> Box<dyn WriteTrailingZeroAware + Send> {
let stderr = std::io::stderr();
let log_to_stderr = stderr.as_raw_handle() != ptr::null_mut();
if log_to_stderr {
@ -506,19 +530,23 @@ mod os {
mod os {
use super::WriteTrailingZeroAware;
pub(crate) fn new_debug_logger() -> Box<dyn WriteTrailingZeroAware> {
pub(crate) fn new_debug_logger() -> Box<dyn WriteTrailingZeroAware + Send> {
Box::new(std::io::stderr())
}
}
#[cfg(test)]
mod tests {
use std::{borrow::Cow, cell::RefCell, io, rc::Rc, str};
use cuda_types::cuda::CUresultConsts;
use super::{FunctionLogger, LogEntry, WriteTrailingZeroAware};
use crate::{log::{CudaFunctionName, WriteBuffer}, CUresult};
use super::{ErrorEntry, FnCallLog, WriteTrailingZeroAware};
use crate::{
log::{CudaFunctionName, WriteBuffer},
FnCallLogStack, OuterCallGuard,
};
use std::{
cell::RefCell,
io, str,
sync::{Arc, Mutex},
};
struct FailOnNthWrite {
fail_on: usize,
@ -546,11 +574,11 @@ mod tests {
// Custom type to not trigger trait coherence rules
#[derive(Clone)]
struct RcVec<T>(Rc<RefCell<Vec<T>>>);
struct ArcVec<T>(Arc<Mutex<Vec<T>>>);
impl WriteTrailingZeroAware for RcVec<u8> {
impl WriteTrailingZeroAware for ArcVec<u8> {
fn write_zero_aware(&mut self, buf: &[u8]) -> std::io::Result<()> {
let mut vec = self.0.borrow_mut();
let mut vec = self.0.lock().unwrap();
vec.extend_from_slice(buf.split_last().unwrap().1);
Ok(())
}
@ -565,37 +593,59 @@ mod tests {
}
#[test]
// TODO: fix this, it should use drop guard for testing.
// Previously FnCallLog would implement Drop and write to the log
fn error_in_fallible_emitter_is_handled_gracefully() {
let result = RcVec(Rc::new(RefCell::new(Vec::<u8>::new())));
let mut infallible_emitter = Box::new(result.clone()) as Box<dyn WriteTrailingZeroAware>;
let mut fallible_emitter = Some(Box::new(FailOnNthWrite {
let result = ArcVec(Arc::new(Mutex::new(Vec::<u8>::new())));
let infallible_emitter = Box::new(result.clone()) as Box<dyn WriteTrailingZeroAware + Send>;
let fallible_emitter = Some(Box::new(FailOnNthWrite {
fail_on: 1,
counter: 0,
}) as Box<dyn WriteTrailingZeroAware>);
}) as Box<dyn WriteTrailingZeroAware + Send>);
let mut write_buffer = WriteBuffer::new();
write_buffer.unprefixed_buffer = Some(Vec::new());
let mut log_queue = Vec::new();
let mut func_logger = FunctionLogger {
result: Some(CUresult::SUCCESS),
let mut writer = super::Writer {
fallible_emitter,
infallible_emitter,
write_buffer,
};
let func_logger = FnCallLog {
name: CudaFunctionName::Normal("cuInit"),
infallible_emitter: &mut infallible_emitter,
fallible_emitter: &mut fallible_emitter,
write_buffer: &mut write_buffer,
log_queue: &mut log_queue,
arguments_writer: None,
args: None,
output: None,
subcalls: Vec::new(),
};
let log_root = FnCallLogStack {
depth: 1,
log_root: func_logger,
};
let log_root = RefCell::new(log_root);
let drop_guard = OuterCallGuard {
writer: &mut writer,
log_root: &log_root,
};
func_logger.log(LogEntry::IoError(io::Error::from_raw_os_error(1)));
func_logger.log(LogEntry::IoError(io::Error::from_raw_os_error(2)));
func_logger.log(LogEntry::IoError(io::Error::from_raw_os_error(3)));
drop(func_logger);
drop(infallible_emitter);
{
log_root
.borrow_mut()
.log_root
.log(ErrorEntry::IoError(io::Error::from_raw_os_error(1)));
log_root
.borrow_mut()
.log_root
.log(ErrorEntry::IoError(io::Error::from_raw_os_error(2)));
log_root
.borrow_mut()
.log_root
.log(ErrorEntry::IoError(io::Error::from_raw_os_error(3)));
}
drop(drop_guard);
let result = result.0.borrow_mut();
let result = result.0.lock().unwrap();
let result_str = str::from_utf8(&*result).unwrap();
let result_lines = result_str.lines().collect::<Vec<_>>();
assert_eq!(result_lines.len(), 5);
assert_eq!(result_lines[0], "cuInit(...) -> CUDA_SUCCESS");
assert_eq!(result_lines[0], "cuInit(...) -> UNKNOWN");
assert!(result_lines[1].starts_with(" "));
assert!(result_lines[2].starts_with(" "));
assert!(result_lines[3].starts_with(" "));

View file

@ -34,18 +34,16 @@ macro_rules! os_log {
#[cfg(target_arch = "x86_64")]
pub fn get_thunk(
original_fn: *const c_void,
report_fn: unsafe extern "system" fn(*const CUuuid, usize),
report_fn: unsafe extern "system" fn(&CUuuid, usize),
guid: *const CUuuid,
idx: usize,
) -> *const c_void {
use dynasmrt::{dynasm, DynasmApi};
let mut ops = dynasmrt::x86::Assembler::new().unwrap();
let mut ops = dynasmrt::x64::Assembler::new().unwrap();
let start = ops.offset();
// Let's hope there's never more than 6 arguments
dynasm!(ops
; .arch x64
; push rbp
; mov rbp, rsp
// stack alignment
; sub rsp, 8
; push rdi
; push rsi
; push rdx
@ -62,10 +60,9 @@ pub fn get_thunk(
; pop rdx
; pop rsi
; pop rdi
; add rsp, 8
; mov rax, QWORD original_fn as i64
; call rax
; pop rbp
; ret
; jmp rax
; int 3
);
let exe_buf = ops.finalize().unwrap();
@ -73,3 +70,12 @@ pub fn get_thunk(
mem::forget(exe_buf);
result_fn as *const _
}
#[link(name = "pthread")]
unsafe extern "C" {
fn pthread_self() -> std::os::unix::thread::RawPthread;
}
pub(crate) fn current_thread() -> u32 {
(unsafe { pthread_self() }) as u32
}

View file

@ -178,3 +178,13 @@ pub fn get_thunk(
mem::forget(exe_buf);
result_fn as *const _
}
#[link(name = "kernel32")]
unsafe extern "system" {
fn GetCurrentThreadId() -> u32;
}
pub(crate) fn current_thread() -> u32 {
unsafe { GetCurrentThreadId() }
}

View file

@ -1,103 +0,0 @@
import pycuda.autoinit
import pycuda.driver as drv
import pycuda.tools as py_tools
from pathlib import PurePath
import numpy as np
from os import path
import os
import itertools
import sys
# It's impossible to discern what is the type of a buffer, here you can override equality checks
def assert_array_equal_override(kernel_name, idx, arr1, arr2):
if kernel_name == 'knn_match' and idx == 6:
arr1_view = np.frombuffer(arr1, dtype=np.dtype([('f1', np.uint32), ('f2', np.uint32), ('f3', np.uint32)]))
np.ndarray.sort(arr1_view)
arr2_view = np.frombuffer(arr2, dtype=np.dtype([('f1', np.uint32), ('f2', np.uint32), ('f3', np.uint32)]))
np.ndarray.sort(arr2_view)
if kernel_name == 'nonmax_suppression' and idx == 7:
arr1_view = np.frombuffer(arr1, dtype=np.dtype(np.uint32))
np.ndarray.sort(arr1_view)
arr2_view = np.frombuffer(arr2, dtype=np.dtype(np.uint32))
np.ndarray.sort(arr2_view)
np.testing.assert_array_equal(arr1, arr2)
def load_arguments(arg_path):
is_buffer = arg_path.endswith(".buffer")
with open(arg_path, "rb") as f:
arg_bytes = f.read()
if not is_buffer:
if len(arg_bytes) == 1:
return np.frombuffer(arg_bytes, dtype=np.uint8)[0], None
elif len(arg_bytes) == 2:
return np.frombuffer(arg_bytes, dtype=np.uint16)[0], None
elif len(arg_bytes) == 4:
return np.frombuffer(arg_bytes, dtype=np.uint32)[0], None
elif len(arg_bytes) == 8:
return np.frombuffer(arg_bytes, dtype=np.uint64)[0], None
else:
raise Exception('Incorrect size of {}: {}'.format(arg_path, len(arg_bytes)))
else:
buff = np.frombuffer(bytearray(arg_bytes), dtype=np.uint8)
buff.setflags(write=1, align=1)
return drv.InOut(buff), buff
def parse_arguments(dump_path, prefix):
dir = path.join(dump_path, prefix)
arg_files = os.listdir(dir)
return [load_arguments(path.join(dir, f)) for f in sorted(arg_files)]
def append_debug_buffer(args, grid, block):
args = list(args)
items = grid[0] * grid[1] * grid[2] * block[0] * block[1] * block[2]
debug_buff = np.zeros(items, dtype=np.uint32)
args.append((drv.InOut(debug_buff), debug_buff))
return args
def verify_single_dump(input_path, max_block_threads):
print(input_path)
kernel_name = path.basename(os.path.normpath(input_path)).split("_", 1)[1]
with open(path.join(input_path, "launch.txt"), "r") as launch_f:
launch_lines = list(map(int, launch_f.readlines()))
block = tuple(launch_lines[3:6])
launch_block_size = block[0] * block[1] * block[2]
if launch_block_size > max_block_threads:
print(
f" Skipping, launch block size ({launch_block_size}) bigger than maximum block size ({max_block_threads})")
return
module = drv.module_from_file(path.join(input_path, "module.ptx"))
kernel = module.get_function(kernel_name)
pre_args = append_debug_buffer(parse_arguments(input_path, "pre"), tuple(launch_lines[:3]), block)
kernel_pre_args, host_pre_args = zip(*pre_args)
kernel(*list(kernel_pre_args), grid=tuple(launch_lines[:3]), block=block, shared=launch_lines[6])
post_args = parse_arguments(input_path, "post")
_, host_post_args_args = zip(*post_args)
for idx, (pre_arg, post_arg) in enumerate(zip(host_pre_args, host_post_args_args)):
if pre_arg is None:
continue
try:
assert_array_equal_override(kernel_name, idx, pre_arg, post_arg)
except Exception as e:
print(f"{idx}: {e}")
def main(argv):
device = drv.Device(0)
max_threads = device.get_attribute(drv.device_attribute.MAX_THREADS_PER_BLOCK)
print(device.name())
input_path = argv[1]
if os.path.exists(path.join(input_path, "launch.txt")):
verify_single_dump(input_path, max_threads)
else:
for input_subdir in sorted([path.join(input_path, dir_name) for dir_name in os.listdir(input_path)]):
if os.path.isdir(input_subdir):
verify_single_dump(input_subdir, max_threads)
if __name__ == "__main__":
main(sys.argv)

View file

@ -1,79 +0,0 @@
use cuda_base::cuda_function_declarations;
use std::ffi::CStr;
use std::mem;
use std::ptr;
use std::ptr::NonNull;
use std::{marker::PhantomData, os::raw::c_void};
use crate::os;
struct DynamicFn<T> {
pointer: usize,
_marker: PhantomData<T>,
}
impl<T> Default for DynamicFn<T> {
fn default() -> Self {
DynamicFn {
pointer: 0,
_marker: PhantomData,
}
}
}
impl<T> DynamicFn<T> {
unsafe fn get(&mut self, lib: *mut c_void, name: &[u8]) -> Option<T> {
match self.pointer {
0 => {
let addr = os::get_proc_address(lib, CStr::from_bytes_with_nul_unchecked(name));
if addr == ptr::null_mut() {
self.pointer = 1;
return None;
} else {
self.pointer = addr as _;
}
}
1 => return None,
_ => {}
}
Some(mem::transmute_copy(&self.pointer))
}
}
pub(crate) struct CudaDynamicFns {
lib_handle: NonNull<::std::ffi::c_void>,
fn_table: CudaFnTable,
}
impl CudaDynamicFns {
pub(crate) unsafe fn load_library(path: &str) -> Option<Self> {
let lib_handle = NonNull::new(os::load_library(path));
lib_handle.map(|lib_handle| CudaDynamicFns {
lib_handle,
fn_table: CudaFnTable::default(),
})
}
}
macro_rules! emit_cuda_fn_table {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
#[derive(Default)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
struct CudaFnTable {
$($fn_name: DynamicFn<extern $abi fn ( $($arg_id : $arg_type),* ) -> $ret_type>),*
}
impl CudaDynamicFns {
$(
#[allow(dead_code)]
pub(crate) fn $fn_name(&mut self, $($arg_id : $arg_type),*) -> Option<$ret_type> {
let func = unsafe { self.fn_table.$fn_name.get(self.lib_handle.as_ptr(), concat!(stringify!($fn_name), "\0").as_bytes()) };
func.map(|f| f($($arg_id),*) )
}
)*
}
};
}
cuda_function_declarations!(emit_cuda_fn_table);

View file

@ -1,13 +1,21 @@
use crate::{dark_api, log, Settings};
use cuda_types::cuda::*;
use crate::{
log::{self, UInt},
trace, ErrorEntry, FnCallLog, Settings,
};
use cuda_types::{
cuda::*,
dark_api::{FatbinFileHeader, FatbinFileHeaderFlags, FatbinHeader, FatbincWrapper},
};
use rustc_hash::{FxHashMap, FxHashSet};
use std::{
collections::HashMap,
borrow::Cow,
ffi::{c_void, CStr, CString},
fs::{self, File},
io::{self, Read, Write},
path::PathBuf,
rc::Rc,
ptr,
};
use unwrap_or::unwrap_some_or;
// This struct is the heart of CUDA state tracking, it:
// * receives calls from the probes about changes to CUDA state
@ -15,24 +23,28 @@ use std::{
// * writes out relevant state change and details to disk and log
pub(crate) struct StateTracker {
writer: DumpWriter,
modules: HashMap<CUmodule, Option<ParsedModule>>,
pub(crate) libraries: FxHashMap<CUlibrary, CodePointer>,
saved_modules: FxHashSet<CUmodule>,
module_counter: usize,
submodule_counter: usize,
last_module_version: Option<usize>,
pub(crate) dark_api: dark_api::DarkApiState,
pub(crate) override_cc_major: Option<u32>,
pub(crate) override_cc: Option<(u32, u32)>,
}
#[derive(Clone, Copy)]
pub(crate) struct CodePointer(pub *const c_void);
unsafe impl Send for CodePointer {}
unsafe impl Sync for CodePointer {}
impl StateTracker {
pub(crate) fn new(settings: &Settings) -> Self {
StateTracker {
writer: DumpWriter::new(settings.dump_dir.clone()),
modules: HashMap::new(),
libraries: FxHashMap::default(),
saved_modules: FxHashSet::default(),
module_counter: 0,
submodule_counter: 0,
last_module_version: None,
dark_api: dark_api::DarkApiState::new(),
override_cc_major: settings.override_cc_major,
override_cc: settings.override_cc,
}
}
@ -40,12 +52,12 @@ impl StateTracker {
&mut self,
module: CUmodule,
file_name: *const i8,
fn_logger: &mut log::FunctionLogger,
fn_logger: &mut FnCallLog,
) {
let file_name = match unsafe { CStr::from_ptr(file_name) }.to_str() {
Ok(f) => f,
Err(err) => {
fn_logger.log(log::LogEntry::MalformedModulePath(err));
fn_logger.log(log::ErrorEntry::MalformedModulePath(err));
return;
}
};
@ -56,7 +68,7 @@ impl StateTracker {
fn try_record_new_module_file(
&mut self,
module: CUmodule,
fn_logger: &mut log::FunctionLogger,
fn_logger: &mut FnCallLog,
file_name: &str,
) -> io::Result<()> {
let mut module_file = fs::File::open(file_name)?;
@ -69,37 +81,29 @@ impl StateTracker {
pub(crate) fn record_new_submodule(
&mut self,
module: CUmodule,
version: Option<usize>,
submodule: &[u8],
fn_logger: &mut log::FunctionLogger,
fn_logger: &mut FnCallLog,
type_: &'static str,
) {
if !self.modules.contains_key(&module) {
if self.saved_modules.insert(module) {
self.module_counter += 1;
self.submodule_counter = 0;
self.modules.insert(module, None);
}
if version != self.last_module_version {
self.submodule_counter = 0;
}
self.submodule_counter += 1;
self.last_module_version = version;
fn_logger.log_io_error(self.writer.save_module(
self.module_counter,
version,
Some(self.submodule_counter),
submodule,
type_,
));
if type_ == "ptx" {
match CString::new(submodule) {
Err(e) => fn_logger.log(log::LogEntry::NulInsideModuleText(e)),
Err(e) => fn_logger.log(log::ErrorEntry::NulInsideModuleText(e)),
Ok(submodule_cstring) => match submodule_cstring.to_str() {
Err(e) => fn_logger.log(log::LogEntry::NonUtf8ModuleText(e)),
Err(e) => fn_logger.log(log::ErrorEntry::NonUtf8ModuleText(e)),
Ok(submodule_text) => self.try_parse_and_record_kernels(
fn_logger,
self.module_counter,
version,
Some(self.submodule_counter),
submodule_text,
),
@ -112,25 +116,36 @@ impl StateTracker {
&mut self,
module: CUmodule,
raw_image: *const c_void,
fn_logger: &mut log::FunctionLogger,
fn_logger: &mut FnCallLog,
) {
self.module_counter += 1;
if unsafe { *(raw_image as *const [u8; 4]) } == *goblin::elf64::header::ELFMAG {
self.modules.insert(module, None);
self.saved_modules.insert(module);
// TODO: Parse ELF and write it to disk
fn_logger.log(log::LogEntry::UnsupportedModule {
fn_logger.log(log::ErrorEntry::UnsupportedModule {
module,
raw_image,
kind: "ELF",
})
} else if unsafe { *(raw_image as *const [u8; 8]) } == *goblin::archive::MAGIC {
self.modules.insert(module, None);
self.saved_modules.insert(module);
// TODO: Figure out how to get size of archive module and write it to disk
fn_logger.log(log::LogEntry::UnsupportedModule {
fn_logger.log(log::ErrorEntry::UnsupportedModule {
module,
raw_image,
kind: "archive",
})
} else if unsafe { *(raw_image as *const u32) } == FatbincWrapper::MAGIC {
unsafe {
fn_logger.try_(|fn_logger| {
trace::record_submodules_from_wrapped_fatbin(
module,
raw_image as *const FatbincWrapper,
fn_logger,
self,
)
});
}
} else {
self.record_module_ptx(module, raw_image, fn_logger)
}
@ -140,44 +155,40 @@ impl StateTracker {
&mut self,
module: CUmodule,
raw_image: *const c_void,
fn_logger: &mut log::FunctionLogger,
fn_logger: &mut FnCallLog,
) {
self.modules.insert(module, None);
self.saved_modules.insert(module);
let module_text = unsafe { CStr::from_ptr(raw_image as *const _) }.to_str();
let module_text = match module_text {
Ok(m) => m,
Err(utf8_err) => {
fn_logger.log(log::LogEntry::NonUtf8ModuleText(utf8_err));
fn_logger.log(log::ErrorEntry::NonUtf8ModuleText(utf8_err));
return;
}
};
fn_logger.log_io_error(self.writer.save_module(
self.module_counter,
None,
None,
module_text.as_bytes(),
"ptx",
));
self.try_parse_and_record_kernels(fn_logger, self.module_counter, None, None, module_text);
self.try_parse_and_record_kernels(fn_logger, self.module_counter, None, module_text);
}
fn try_parse_and_record_kernels(
&mut self,
fn_logger: &mut log::FunctionLogger,
fn_logger: &mut FnCallLog,
module_index: usize,
version: Option<usize>,
submodule_index: Option<usize>,
module_text: &str,
) {
let errors = ptx_parser::parse_for_errors(module_text);
if !errors.is_empty() {
fn_logger.log(log::LogEntry::ModuleParsingError(
DumpWriter::get_file_name(module_index, version, submodule_index, "log"),
fn_logger.log(log::ErrorEntry::ModuleParsingError(
DumpWriter::get_file_name(module_index, submodule_index, "log"),
));
fn_logger.log_io_error(self.writer.save_module_error_log(
module_text,
module_index,
version,
submodule_index,
&*errors,
));
@ -185,11 +196,6 @@ impl StateTracker {
}
}
struct ParsedModule {
content: Rc<String>,
kernels_args: Option<HashMap<String, Vec<usize>>>,
}
// This structs writes out information about CUDA execution to the dump dir
struct DumpWriter {
dump_dir: Option<PathBuf>,
@ -203,7 +209,6 @@ impl DumpWriter {
fn save_module(
&self,
module_index: usize,
version: Option<usize>,
submodule_index: Option<usize>,
buffer: &[u8],
kind: &'static str,
@ -212,12 +217,7 @@ impl DumpWriter {
None => return Ok(()),
Some(d) => d.clone(),
};
dump_file.push(Self::get_file_name(
module_index,
version,
submodule_index,
kind,
));
dump_file.push(Self::get_file_name(module_index, submodule_index, kind));
let mut file = File::create(dump_file)?;
file.write_all(buffer)?;
Ok(())
@ -225,9 +225,7 @@ impl DumpWriter {
fn save_module_error_log<'input>(
&self,
module_text: &str,
module_index: usize,
version: Option<usize>,
submodule_index: Option<usize>,
errors: &[ptx_parser::PtxError<'input>],
) -> io::Result<()> {
@ -235,12 +233,7 @@ impl DumpWriter {
None => return Ok(()),
Some(d) => d.clone(),
};
log_file.push(Self::get_file_name(
module_index,
version,
submodule_index,
"log",
));
log_file.push(Self::get_file_name(module_index, submodule_index, "log"));
let mut file = File::create(log_file)?;
for error in errors {
writeln!(file, "{}", error)?;
@ -248,24 +241,131 @@ impl DumpWriter {
Ok(())
}
fn get_file_name(
module_index: usize,
version: Option<usize>,
submodule_index: Option<usize>,
kind: &str,
) -> String {
match (version, submodule_index) {
(Some(version), Some(submodule_index)) => format!(
"module_{:04}_v{}_{}.{}",
module_index, version, submodule_index, kind
),
(Some(version), None) => {
format!("module_{:04}_v{}.{}", module_index, version, kind)
fn get_file_name(module_index: usize, submodule_index: Option<usize>, kind: &str) -> String {
match submodule_index {
None => {
format!("module_{:04}.{:02}", module_index, kind)
}
(None, Some(submodule_index)) => {
format!("module_{:04}_{}.{}", module_index, submodule_index, kind)
Some(submodule_index) => {
format!("module_{:04}_{:02}.{}", module_index, submodule_index, kind)
}
(None, None) => format!("module_{:04}.{}", module_index, kind),
}
}
}
pub(crate) unsafe fn record_submodules_from_wrapped_fatbin(
module: CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
fn_logger: &mut FnCallLog,
state: &mut StateTracker,
) -> Result<(), ErrorEntry> {
let fatbinc_wrapper = FatbincWrapper::new(&fatbinc_wrapper).map_err(ErrorEntry::from)?;
let is_version_2 = fatbinc_wrapper.version == FatbincWrapper::VERSION_V2;
record_submodules_from_fatbin(module, (*fatbinc_wrapper).data, fn_logger, state)?;
if is_version_2 {
let mut current = (*fatbinc_wrapper).filename_or_fatbins as *const *const c_void;
while *current != ptr::null() {
record_submodules_from_fatbin(module, *current as *const _, fn_logger, state)?;
current = current.add(1);
}
}
Ok(())
}
pub(crate) unsafe fn record_submodules_from_fatbin(
module: CUmodule,
fatbin_header: *const FatbinHeader,
logger: &mut FnCallLog,
state: &mut StateTracker,
) -> Result<(), ErrorEntry> {
let header = FatbinHeader::new(&fatbin_header).map_err(ErrorEntry::from)?;
let file = header.get_content();
record_submodules(module, logger, state, file)?;
Ok(())
}
unsafe fn record_submodules(
module: CUmodule,
fn_logger: &mut FnCallLog,
state: &mut StateTracker,
mut file_buffer: &[u8],
) -> Result<(), ErrorEntry> {
while let Some(file) = FatbinFileHeader::next(&mut file_buffer)? {
let mut payload = if file.flags.contains(FatbinFileHeaderFlags::CompressedLz4) {
Cow::Owned(unwrap_some_or!(
fn_logger.try_return(|| decompress_lz4(file)),
continue
))
} else if file.flags.contains(FatbinFileHeaderFlags::CompressedZstd) {
Cow::Owned(unwrap_some_or!(
fn_logger.try_return(|| decompress_zstd(file)),
continue
))
} else {
Cow::Borrowed(file.get_payload())
};
match file.kind {
FatbinFileHeader::HEADER_KIND_PTX => {
while payload.last() == Some(&0) {
// remove trailing zeros
payload.to_mut().pop();
}
state.record_new_submodule(module, &*payload, fn_logger, "ptx")
}
FatbinFileHeader::HEADER_KIND_ELF => {
state.record_new_submodule(module, &*payload, fn_logger, "elf")
}
_ => {
fn_logger.log(log::ErrorEntry::UnexpectedBinaryField {
field_name: "FATBIN_FILE_HEADER_KIND",
expected: vec![
UInt::U16(FatbinFileHeader::HEADER_KIND_PTX),
UInt::U16(FatbinFileHeader::HEADER_KIND_ELF),
],
observed: UInt::U16(file.kind),
});
}
}
}
Ok(())
}
const MAX_MODULE_DECOMPRESSION_BOUND: usize = 64 * 1024 * 1024;
unsafe fn decompress_lz4(file: &FatbinFileHeader) -> Result<Vec<u8>, ErrorEntry> {
let decompressed_size = usize::max(1024, (*file).uncompressed_payload as usize);
let mut decompressed_vec = vec![0u8; decompressed_size];
loop {
match lz4_sys::LZ4_decompress_safe(
file.get_payload().as_ptr() as *const _,
decompressed_vec.as_mut_ptr() as *mut _,
(*file).payload_size as _,
decompressed_vec.len() as _,
) {
error if error < 0 => {
let new_size = decompressed_vec.len() * 2;
if new_size > MAX_MODULE_DECOMPRESSION_BOUND {
return Err(ErrorEntry::Lz4DecompressionFailure);
}
decompressed_vec.resize(decompressed_vec.len() * 2, 0);
}
real_decompressed_size => {
decompressed_vec.truncate(real_decompressed_size as usize);
return Ok(decompressed_vec);
}
}
}
}
unsafe fn decompress_zstd(file: &FatbinFileHeader) -> Result<Vec<u8>, ErrorEntry> {
let mut result = Vec::with_capacity(file.uncompressed_payload as usize);
let payload = file.get_payload();
dbg!((payload.len(), file.uncompressed_payload, file.payload_size));
match zstd_safe::decompress(&mut result, payload) {
Ok(actual_size) => {
result.truncate(actual_size);
Ok(result)
}
Err(err) => Err(ErrorEntry::ZstdDecompressionFailure(err)),
}
}

View file

@ -0,0 +1,26 @@
[package]
name = "zluda_dump_blas"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2021"
[lib]
name = "zluda_dump_blas"
crate-type = ["cdylib"]
[dependencies]
format = { path = "../format" }
dark_api = { path = "../dark_api" }
cuda_base = { path = "../cuda_base" }
cuda_types = { path = "../cuda_types" }
zluda_dump_common = { path = "../zluda_dump_common" }
libloading = "0.8"
paste = "1.0"
unwrap_or = "1.0.1"
cglue = "0.3.5"
[package.metadata.zluda]
linux_symlinks = [
"dump/libcublas.so",
"dump/libcublas.so.12"
]

View file

@ -0,0 +1,48 @@
use libloading::Library;
use std::sync::LazyLock;
static LIBRARY: LazyLock<Option<Library>> = LazyLock::new(get_library);
fn get_library() -> Option<Library> {
let cuda_lib = std::env::var("ZLUDA_BLAS_LIB")
.ok()
.unwrap_or_else(|| "/usr/local/cuda/lib64/libcublas.so".to_string());
unsafe { Library::new(cuda_lib) }.ok()
}
macro_rules! unimplemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
use ::zluda_dump_common::ReprUsize;
let internal_error_untyped: usize = <$ret_type as ReprUsize>::INTERNAL_ERROR;
let internal_error: $ret_type = ReprUsize::from_usize(internal_error_untyped);
let maybe_fn_ptr = (&*LIBRARY).as_ref().and_then(|lib| lib.get::<unsafe extern $abi fn ( $($arg_type),* ) -> $ret_type>(concat!( stringify!($fn_name), "\0").as_bytes()).ok());
let fn_ptr = unwrap_or::unwrap_some_or!(maybe_fn_ptr, return internal_error);
let export_table = unwrap_or::unwrap_some_or!(::zluda_dump_common::get_export_table(), return internal_error);
let format_args = dark_api::FnFfiWrapper(|| {
let mut writer = Vec::new();
let formatter = paste::paste! { ::format:: [< write_ $fn_name>] };
formatter(&mut writer $(, $arg_id)* ).ok();
dark_api::ByteVecFfi::new(writer)
});
let underlying_fn = dark_api::FnFfiWrapper(|| {
let result = fn_ptr( $( $arg_id),* );
ReprUsize::to_usize(result)
});
ReprUsize::from_usize(export_table.logged_call(
cglue::slice::CSliceRef::from_str(stringify!($fn_name)),
cglue::trait_obj!(&format_args as dark_api::FnFfi),
cglue::trait_obj!(&underlying_fn as dark_api::FnFfi),
internal_error_untyped,
<$ret_type as ReprUsize>::format_status)
)
}
)*
};
}
cuda_base::cublas_function_declarations!(unimplemented);

View file

@ -0,0 +1,26 @@
[package]
name = "zluda_dump_blaslt"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2021"
[lib]
name = "zluda_dump_blaslt"
crate-type = ["cdylib"]
[dependencies]
format = { path = "../format" }
dark_api = { path = "../dark_api" }
cuda_base = { path = "../cuda_base" }
cuda_types = { path = "../cuda_types" }
zluda_dump_common = { path = "../zluda_dump_common" }
libloading = "0.8"
paste = "1.0"
unwrap_or = "1.0.1"
cglue = "0.3.5"
[package.metadata.zluda]
linux_symlinks = [
"dump/libcublasLt.so",
"dump/libcublasLt.so.12"
]

View file

@ -0,0 +1,49 @@
use libloading::Library;
use std::sync::LazyLock;
static LIBRARY: LazyLock<Option<Library>> = LazyLock::new(get_library);
fn get_library() -> Option<Library> {
let cuda_lib = std::env::var("ZLUDA_BLASLT_LIB")
.ok()
.unwrap_or_else(|| "/usr/local/cuda/lib64/libcublasLt.so".to_string());
unsafe { Library::new(cuda_lib) }.ok()
}
macro_rules! unimplemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
use ::zluda_dump_common::ReprUsize;
let internal_error_untyped: usize = <$ret_type as ReprUsize>::INTERNAL_ERROR;
let internal_error: $ret_type = ReprUsize::from_usize(internal_error_untyped);
let maybe_fn_ptr = (&*LIBRARY).as_ref().and_then(|lib| lib.get::<unsafe extern $abi fn ( $($arg_type),* ) -> $ret_type>(concat!( stringify!($fn_name), "\0").as_bytes()).ok());
let fn_ptr = unwrap_or::unwrap_some_or!(maybe_fn_ptr, return internal_error);
let export_table = unwrap_or::unwrap_some_or!(::zluda_dump_common::get_export_table(), return internal_error);
let format_args = dark_api::FnFfiWrapper(|| {
let mut writer = Vec::new();
let formatter = paste::paste! { ::format:: [< write_ $fn_name>] };
formatter(&mut writer $(, $arg_id)* ).ok();
dark_api::ByteVecFfi::new(writer)
});
let underlying_fn = dark_api::FnFfiWrapper(|| {
let result = fn_ptr( $( $arg_id),* );
ReprUsize::to_usize(result)
});
ReprUsize::from_usize(export_table.logged_call(
cglue::slice::CSliceRef::from_str(stringify!($fn_name)),
cglue::trait_obj!(&format_args as dark_api::FnFfi),
cglue::trait_obj!(&underlying_fn as dark_api::FnFfi),
internal_error_untyped,
<$ret_type as ReprUsize>::format_status)
)
}
)*
};
}
cuda_base::cublaslt_function_declarations!(unimplemented);
cuda_base::cublaslt_internal_function_declarations!(unimplemented);

View file

@ -0,0 +1,13 @@
[package]
name = "zluda_dump_common"
version = "0.1.0"
edition = "2021"
[lib]
[dependencies]
libloading = "0.8"
cuda_types = { path = "../cuda_types" }
dark_api = { path = "../dark_api" }
format = { path = "../format" }
cglue = "0.3.5"

View file

@ -0,0 +1,378 @@
use cuda_types::cuda::{CUerror, CUresult, CUresultConsts, CUuuid};
use dark_api::ByteVecFfi;
use std::{num::NonZero, ptr, sync::LazyLock};
pub fn get_export_table() -> Option<::dark_api::zluda_dump::ZludaDumpInternal> {
static CU_GET_EXPORT_TABLE: LazyLock<
Result<
unsafe extern "system" fn(*mut *const ::core::ffi::c_void, *const CUuuid) -> CUresult,
libloading::Error,
>,
> = LazyLock::new(|| unsafe { get_dump_table_impl() });
let cu_get_export_table = CU_GET_EXPORT_TABLE.as_ref().ok()?;
let mut ptr = ptr::null();
unsafe { (cu_get_export_table)(&mut ptr, &::dark_api::zluda_dump::ZludaDumpInternal::GUID) }
.ok()?;
Some(unsafe { ::dark_api::zluda_dump::ZludaDumpInternal::new(ptr) })
}
unsafe fn get_dump_table_impl() -> Result<
unsafe extern "system" fn(*mut *const ::core::ffi::c_void, *const CUuuid) -> CUresult,
libloading::Error,
> {
let driver = open_driver()?;
return Ok(
*(driver.get::<unsafe extern "system" fn(
*mut *const ::core::ffi::c_void,
*const CUuuid,
) -> CUresult>(b"cuGetExportTable\0")?),
);
}
fn open_driver() -> Result<libloading::Library, libloading::Error> {
os::open_driver()
}
#[cfg(unix)]
pub(crate) mod os {
use libloading::os;
const RTLD_NOLOAD: i32 = 0x4;
pub fn open_driver() -> Result<libloading::Library, libloading::Error> {
unsafe {
os::unix::Library::open(Some("libcuda.so.1"), RTLD_NOLOAD | os::unix::RTLD_LAZY)
.or_else(|_| {
os::unix::Library::open(Some("libcuda.so"), RTLD_NOLOAD | os::unix::RTLD_LAZY)
})
.map(Into::into)
}
}
}
#[cfg(windows)]
pub(crate) mod os {
use libloading::os;
pub fn open_driver() -> Result<libloading::Library, libloading::Error> {
unsafe { os::windows::Library::open_already_loaded("nvcuda").map(Into::into) }
}
}
pub trait ReprUsize {
const INTERNAL_ERROR: usize = usize::MAX;
fn to_usize(self) -> usize;
fn from_usize(x: usize) -> Self;
extern "C" fn format_status(x: usize) -> ByteVecFfi;
}
impl ReprUsize for CUresult {
const INTERNAL_ERROR: usize = CUerror::UNKNOWN.0.get() as usize;
fn to_usize(self) -> usize {
match self {
CUresult::SUCCESS => 0,
Err(err) => err.0.get() as usize,
}
}
fn from_usize(x: usize) -> Self {
match NonZero::new(x as u32) {
None => Ok(()),
Some(err) => Err(CUerror(err)),
}
}
extern "C" fn format_status(x: usize) -> ByteVecFfi {
let mut writer = Vec::new();
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
ByteVecFfi::new(writer)
}
}
impl ReprUsize for usize {
fn to_usize(self) -> usize {
self
}
fn from_usize(x: usize) -> usize {
x
}
extern "C" fn format_status(x: usize) -> ByteVecFfi {
let mut writer = Vec::new();
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
ByteVecFfi::new(writer)
}
}
impl<T> ReprUsize for *const T {
fn to_usize(self) -> usize {
self as usize
}
fn from_usize(x: usize) -> Self {
x as Self
}
extern "C" fn format_status(x: usize) -> ByteVecFfi {
let mut writer = Vec::new();
format::CudaDisplay::write(&x, "", 0, &mut writer).ok();
ByteVecFfi::new(writer)
}
}
impl ReprUsize for cuda_types::cublas::cublasStatus_t {
fn to_usize(self) -> usize {
self.0 as usize
}
fn from_usize(x: usize) -> Self {
Self(x as u32)
}
const INTERNAL_ERROR: usize =
cuda_types::cublas::cublasStatus_t::CUBLAS_STATUS_INTERNAL_ERROR.0 as usize;
extern "C" fn format_status(x: usize) -> ByteVecFfi {
let mut writer = Vec::new();
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
ByteVecFfi::new(writer)
}
}
impl ReprUsize for cuda_types::cudnn9::cudnnStatus_t {
fn to_usize(self) -> usize {
self.0 as usize
}
fn from_usize(x: usize) -> Self {
Self(x as u32)
}
const INTERNAL_ERROR: usize =
cuda_types::cublas::cublasStatus_t::CUBLAS_STATUS_INTERNAL_ERROR.0 as usize;
extern "C" fn format_status(x: usize) -> ByteVecFfi {
let mut writer = Vec::new();
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
ByteVecFfi::new(writer)
}
}
impl ReprUsize for () {
fn to_usize(self) -> usize {
0
}
fn from_usize(_x: usize) -> Self {
()
}
extern "C" fn format_status(x: usize) -> ByteVecFfi {
let mut writer = Vec::new();
format::CudaDisplay::write(
&cuda_types::cublas::cublasStatus_t::from_usize(x),
"",
0,
&mut writer,
)
.ok();
ByteVecFfi::new(writer)
}
}
impl ReprUsize for u32 {
fn to_usize(self) -> usize {
self as usize
}
fn from_usize(x: usize) -> Self {
x as Self
}
extern "C" fn format_status(x: usize) -> ByteVecFfi {
let mut writer = Vec::new();
format::CudaDisplay::write(
&cuda_types::cublas::cublasStatus_t::from_usize(x),
"",
0,
&mut writer,
)
.ok();
ByteVecFfi::new(writer)
}
}
impl ReprUsize for i32 {
fn to_usize(self) -> usize {
self as usize
}
fn from_usize(x: usize) -> Self {
x as Self
}
extern "C" fn format_status(x: usize) -> ByteVecFfi {
let mut writer = Vec::new();
format::CudaDisplay::write(
&cuda_types::cublas::cublasStatus_t::from_usize(x),
"",
0,
&mut writer,
)
.ok();
ByteVecFfi::new(writer)
}
}
impl ReprUsize for u64 {
fn to_usize(self) -> usize {
self as usize
}
fn from_usize(x: usize) -> Self {
x as Self
}
extern "C" fn format_status(x: usize) -> ByteVecFfi {
let mut writer = Vec::new();
format::CudaDisplay::write(
&cuda_types::cublas::cublasStatus_t::from_usize(x),
"",
0,
&mut writer,
)
.ok();
ByteVecFfi::new(writer)
}
}
impl ReprUsize for *mut std::ffi::c_void {
fn to_usize(self) -> usize {
self as usize
}
fn from_usize(x: usize) -> Self {
x as Self
}
extern "C" fn format_status(x: usize) -> ByteVecFfi {
let mut writer = Vec::new();
format::CudaDisplay::write(
&cuda_types::cublas::cublasStatus_t::from_usize(x),
"",
0,
&mut writer,
)
.ok();
ByteVecFfi::new(writer)
}
}
impl ReprUsize for cuda_types::cufft::cufftResult_t {
fn to_usize(self) -> usize {
self.0 as usize
}
fn from_usize(x: usize) -> Self {
Self(x as u32)
}
extern "C" fn format_status(x: usize) -> ByteVecFfi {
let mut writer = Vec::new();
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
ByteVecFfi::new(writer)
}
}
impl ReprUsize for cuda_types::cusparse::cusparseStatus_t {
fn to_usize(self) -> usize {
self.0 as usize
}
fn from_usize(x: usize) -> Self {
Self(x as u32)
}
const INTERNAL_ERROR: usize =
cuda_types::cusparse::cusparseStatus_t::CUSPARSE_STATUS_INTERNAL_ERROR.0 as usize;
extern "C" fn format_status(x: usize) -> ByteVecFfi {
let mut writer = Vec::new();
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
ByteVecFfi::new(writer)
}
}
impl ReprUsize for cuda_types::cusparse::cusparseFillMode_t {
fn to_usize(self) -> usize {
self.0 as usize
}
fn from_usize(x: usize) -> Self {
Self(x as u32)
}
const INTERNAL_ERROR: usize = 0;
extern "C" fn format_status(x: usize) -> ByteVecFfi {
let mut writer = Vec::new();
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
ByteVecFfi::new(writer)
}
}
impl ReprUsize for cuda_types::cusparse::cusparseIndexBase_t {
fn to_usize(self) -> usize {
self.0 as usize
}
fn from_usize(x: usize) -> Self {
Self(x as u32)
}
const INTERNAL_ERROR: usize = 0;
extern "C" fn format_status(x: usize) -> ByteVecFfi {
let mut writer = Vec::new();
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
ByteVecFfi::new(writer)
}
}
impl ReprUsize for cuda_types::cusparse::cusparseDiagType_t {
fn to_usize(self) -> usize {
self.0 as usize
}
fn from_usize(x: usize) -> Self {
Self(x as u32)
}
const INTERNAL_ERROR: usize = 0;
extern "C" fn format_status(x: usize) -> ByteVecFfi {
let mut writer = Vec::new();
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
ByteVecFfi::new(writer)
}
}
impl ReprUsize for cuda_types::cusparse::cusparseMatrixType_t {
fn to_usize(self) -> usize {
self.0 as usize
}
fn from_usize(x: usize) -> Self {
Self(x as u32)
}
const INTERNAL_ERROR: usize = 0;
extern "C" fn format_status(x: usize) -> ByteVecFfi {
let mut writer = Vec::new();
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
ByteVecFfi::new(writer)
}
}

26
zluda_dump_dnn/Cargo.toml Normal file
View file

@ -0,0 +1,26 @@
[package]
name = "zluda_dump_dnn"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2021"
[lib]
name = "zluda_dump_dnn"
crate-type = ["cdylib"]
[dependencies]
format = { path = "../format" }
dark_api = { path = "../dark_api" }
cuda_base = { path = "../cuda_base" }
cuda_types = { path = "../cuda_types" }
zluda_dump_common = { path = "../zluda_dump_common" }
libloading = "0.8"
paste = "1.0"
unwrap_or = "1.0.1"
cglue = "0.3.5"
[package.metadata.zluda]
linux_symlinks = [
"dump/libcudnn.so",
"dump/libcudnn.so.9"
]

48
zluda_dump_dnn/src/lib.rs Normal file
View file

@ -0,0 +1,48 @@
use libloading::Library;
use std::sync::LazyLock;
static LIBRARY: LazyLock<Option<Library>> = LazyLock::new(get_library);
fn get_library() -> Option<Library> {
let cuda_lib = std::env::var("ZLUDA_DNN_LIB")
.ok()
.unwrap_or_else(|| "/usr/lib/x86_64-linux-gnu/libcudnn.so.9".to_string());
unsafe { Library::new(cuda_lib) }.ok()
}
macro_rules! unimplemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
use ::zluda_dump_common::ReprUsize;
let internal_error_untyped: usize = <$ret_type as ReprUsize>::INTERNAL_ERROR;
let internal_error: $ret_type = ReprUsize::from_usize(internal_error_untyped);
let maybe_fn_ptr = (&*LIBRARY).as_ref().and_then(|lib| lib.get::<unsafe extern $abi fn ( $($arg_type),* ) -> $ret_type>(concat!( stringify!($fn_name), "\0").as_bytes()).ok());
let fn_ptr = unwrap_or::unwrap_some_or!(maybe_fn_ptr, return internal_error);
let export_table = unwrap_or::unwrap_some_or!(::zluda_dump_common::get_export_table(), return internal_error);
let format_args = dark_api::FnFfiWrapper(|| {
let mut writer = Vec::new();
let formatter = paste::paste! { ::format:: [< write_ $fn_name>] };
formatter(&mut writer $(, $arg_id)* ).ok();
dark_api::ByteVecFfi::new(writer)
});
let underlying_fn = dark_api::FnFfiWrapper(|| {
let result = fn_ptr( $( $arg_id),* );
ReprUsize::to_usize(result)
});
ReprUsize::from_usize(export_table.logged_call(
cglue::slice::CSliceRef::from_str(stringify!($fn_name)),
cglue::trait_obj!(&format_args as dark_api::FnFfi),
cglue::trait_obj!(&underlying_fn as dark_api::FnFfi),
internal_error_untyped,
<$ret_type as ReprUsize>::format_status)
)
}
)*
};
}
cuda_base::cudnn9_function_declarations!(unimplemented);

26
zluda_dump_fft/Cargo.toml Normal file
View file

@ -0,0 +1,26 @@
[package]
name = "zluda_dump_fft"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2021"
[lib]
name = "zluda_dump_fft"
crate-type = ["cdylib"]
[dependencies]
format = { path = "../format" }
dark_api = { path = "../dark_api" }
cuda_base = { path = "../cuda_base" }
cuda_types = { path = "../cuda_types" }
zluda_dump_common = { path = "../zluda_dump_common" }
libloading = "0.8"
paste = "1.0"
unwrap_or = "1.0.1"
cglue = "0.3.5"
[package.metadata.zluda]
linux_symlinks = [
"dump/libcufft.so",
"dump/libcufft.so.11"
]

48
zluda_dump_fft/src/lib.rs Normal file
View file

@ -0,0 +1,48 @@
use libloading::Library;
use std::sync::LazyLock;
static LIBRARY: LazyLock<Option<Library>> = LazyLock::new(get_library);
fn get_library() -> Option<Library> {
let cuda_lib = std::env::var("ZLUDA_FFT_LIB")
.ok()
.unwrap_or_else(|| "/usr/local/cuda/lib64/libcufft.so".to_string());
unsafe { Library::new(cuda_lib) }.ok()
}
macro_rules! unimplemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
use ::zluda_dump_common::ReprUsize;
let internal_error_untyped: usize = <$ret_type as ReprUsize>::INTERNAL_ERROR;
let internal_error: $ret_type = ReprUsize::from_usize(internal_error_untyped);
let maybe_fn_ptr = (&*LIBRARY).as_ref().and_then(|lib| lib.get::<unsafe extern $abi fn ( $($arg_type),* ) -> $ret_type>(concat!( stringify!($fn_name), "\0").as_bytes()).ok());
let fn_ptr = unwrap_or::unwrap_some_or!(maybe_fn_ptr, return internal_error);
let export_table = unwrap_or::unwrap_some_or!(::zluda_dump_common::get_export_table(), return internal_error);
let format_args = dark_api::FnFfiWrapper(|| {
let mut writer = Vec::new();
let formatter = paste::paste! { ::format:: [< write_ $fn_name>] };
formatter(&mut writer $(, $arg_id)* ).ok();
dark_api::ByteVecFfi::new(writer)
});
let underlying_fn = dark_api::FnFfiWrapper(|| {
let result = fn_ptr( $( $arg_id),* );
ReprUsize::to_usize(result)
});
ReprUsize::from_usize(export_table.logged_call(
cglue::slice::CSliceRef::from_str(stringify!($fn_name)),
cglue::trait_obj!(&format_args as dark_api::FnFfi),
cglue::trait_obj!(&underlying_fn as dark_api::FnFfi),
internal_error_untyped,
<$ret_type as ReprUsize>::format_status)
)
}
)*
};
}
cuda_base::cufft_function_declarations!(unimplemented);

View file

@ -0,0 +1,26 @@
[package]
name = "zluda_dump_sparse"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2021"
[lib]
name = "zluda_dump_sparse"
crate-type = ["cdylib"]
[dependencies]
format = { path = "../format" }
dark_api = { path = "../dark_api" }
cuda_base = { path = "../cuda_base" }
cuda_types = { path = "../cuda_types" }
zluda_dump_common = { path = "../zluda_dump_common" }
libloading = "0.8"
paste = "1.0"
unwrap_or = "1.0.1"
cglue = "0.3.5"
[package.metadata.zluda]
linux_symlinks = [
"dump/libcusparse.so",
"dump/libcusparse.so.12"
]

View file

@ -0,0 +1,48 @@
use libloading::Library;
use std::sync::LazyLock;
static LIBRARY: LazyLock<Option<Library>> = LazyLock::new(get_library);
fn get_library() -> Option<Library> {
let cuda_lib = std::env::var("ZLUDA_SPARSE_LIB")
.ok()
.unwrap_or_else(|| "/usr/local/cuda/lib64/libcusparse.so".to_string());
unsafe { Library::new(cuda_lib) }.ok()
}
macro_rules! unimplemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
use ::zluda_dump_common::ReprUsize;
let internal_error_untyped: usize = <$ret_type as ReprUsize>::INTERNAL_ERROR;
let internal_error: $ret_type = ReprUsize::from_usize(internal_error_untyped);
let maybe_fn_ptr = (&*LIBRARY).as_ref().and_then(|lib| lib.get::<unsafe extern $abi fn ( $($arg_type),* ) -> $ret_type>(concat!( stringify!($fn_name), "\0").as_bytes()).ok());
let fn_ptr = unwrap_or::unwrap_some_or!(maybe_fn_ptr, return internal_error);
let export_table = unwrap_or::unwrap_some_or!(::zluda_dump_common::get_export_table(), return internal_error);
let format_args = dark_api::FnFfiWrapper(|| {
let mut writer = Vec::new();
let formatter = paste::paste! { ::format:: [< write_ $fn_name>] };
formatter(&mut writer $(, $arg_id)* ).ok();
dark_api::ByteVecFfi::new(writer)
});
let underlying_fn = dark_api::FnFfiWrapper(|| {
let result = fn_ptr( $( $arg_id),* );
ReprUsize::to_usize(result)
});
ReprUsize::from_usize(export_table.logged_call(
cglue::slice::CSliceRef::from_str(stringify!($fn_name)),
cglue::trait_obj!(&format_args as dark_api::FnFfi),
cglue::trait_obj!(&underlying_fn as dark_api::FnFfi),
internal_error_untyped,
<$ret_type as ReprUsize>::format_status)
)
}
)*
};
}
cuda_base::cusparse_function_declarations!(unimplemented);

View file

@ -2,7 +2,7 @@
name = "zluda_inject"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2018"
edition = "2021"
[[bin]]
name = "zluda_with"

View file

@ -2,7 +2,7 @@
name = "zluda_redirect"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2018"
edition = "2021"
[lib]
crate-type = ["cdylib"]

View file

@ -1,8 +1,5 @@
mod r#impl;
pub enum FILE { }
macro_rules! unimplemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(