mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-07-29 04:18:39 +00:00
Redo logging to better log dark API and performance libraries (#372)
This commit is contained in:
parent
5935cfec78
commit
c790ab45ec
60 changed files with 113226 additions and 8125 deletions
335
Cargo.lock
generated
335
Cargo.lock
generated
|
@ -73,10 +73,10 @@ version = "0.70.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
|
||||
dependencies = [
|
||||
"bitflags 2.6.0",
|
||||
"bitflags 2.9.1",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"itertools",
|
||||
"itertools 0.13.0",
|
||||
"log",
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
|
@ -93,6 +93,12 @@ version = "0.6.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
|
||||
|
||||
[[package]]
|
||||
name = "bit-vec"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
|
@ -101,9 +107,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
|||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.6.0"
|
||||
version = "2.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
|
||||
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
|
||||
|
||||
[[package]]
|
||||
name = "bpaf"
|
||||
|
@ -169,6 +175,8 @@ version = "1.1.24"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "812acba72f0a070b003d3697490d2b55b837230ae7c6c6497f05cc2ddbb8d938"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
"libc",
|
||||
"shlex",
|
||||
]
|
||||
|
||||
|
@ -187,6 +195,44 @@ version = "1.0.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "cglue"
|
||||
version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "daea323a7771ea0187f788ab4efe8123ddbc08e62e4be2337a3cfa465530ba53"
|
||||
dependencies = [
|
||||
"cglue-macro",
|
||||
"no-std-compat",
|
||||
"rustc_version",
|
||||
"tarc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cglue-gen"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ffebf6faae95d4dc0903c2edb945003e6bcaef789c505b48494d0405aec8f7e2"
|
||||
dependencies = [
|
||||
"itertools 0.10.5",
|
||||
"lazy_static",
|
||||
"proc-macro-crate 3.3.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cglue-macro"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "61827395d8686e8cec89d1967146af840bf1a3669d72b6a4e65952a71d8cce7e"
|
||||
dependencies = [
|
||||
"cglue-gen",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clang-sys"
|
||||
version = "1.8.1"
|
||||
|
@ -256,10 +302,23 @@ dependencies = [
|
|||
name = "cuda_types"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"bitflags 2.9.1",
|
||||
"cuda_base",
|
||||
"hip_runtime-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dark_api"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"bit-vec 0.8.0",
|
||||
"cglue",
|
||||
"cuda_types",
|
||||
"format",
|
||||
"paste",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derivative"
|
||||
version = "2.2.0"
|
||||
|
@ -372,6 +431,14 @@ version = "1.0.7"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||
|
||||
[[package]]
|
||||
name = "format"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"cuda_types",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.1"
|
||||
|
@ -434,6 +501,15 @@ dependencies = [
|
|||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.10.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.13.0"
|
||||
|
@ -449,6 +525,15 @@ version = "1.0.14"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.5.0"
|
||||
|
@ -495,12 +580,22 @@ dependencies = [
|
|||
name = "llvm_zluda"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bitflags 2.6.0",
|
||||
"bitflags 2.9.1",
|
||||
"cc",
|
||||
"cmake",
|
||||
"llvm-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.22"
|
||||
|
@ -606,6 +701,12 @@ dependencies = [
|
|||
"rawpointer",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "no-std-compat"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c"
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "7.1.3"
|
||||
|
@ -659,7 +760,7 @@ version = "0.4.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ffa5a33ddddfee04c0283a7653987d634e880347e96b5b2ed64de07efb59db9d"
|
||||
dependencies = [
|
||||
"proc-macro-crate",
|
||||
"proc-macro-crate 0.1.5",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
|
@ -674,6 +775,29 @@ dependencies = [
|
|||
"portable-atomic",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
|
||||
dependencies = [
|
||||
"lock_api",
|
||||
"parking_lot_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot_core"
|
||||
version = "0.9.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"redox_syscall",
|
||||
"smallvec",
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "paste"
|
||||
version = "1.0.15"
|
||||
|
@ -690,6 +814,12 @@ dependencies = [
|
|||
"indexmap",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
|
||||
|
||||
[[package]]
|
||||
name = "plain"
|
||||
version = "0.2.3"
|
||||
|
@ -740,6 +870,15 @@ dependencies = [
|
|||
"toml",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-crate"
|
||||
version = "3.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35"
|
||||
dependencies = [
|
||||
"toml_edit",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-error"
|
||||
version = "1.0.4"
|
||||
|
@ -796,7 +935,7 @@ dependencies = [
|
|||
name = "ptx"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"bit-vec",
|
||||
"bit-vec 0.6.3",
|
||||
"bitflags 1.3.2",
|
||||
"comgr",
|
||||
"cuda-driver-sys",
|
||||
|
@ -828,7 +967,7 @@ dependencies = [
|
|||
"ptx_parser_macros",
|
||||
"rustc-hash 2.0.0",
|
||||
"thiserror 1.0.64",
|
||||
"winnow",
|
||||
"winnow 0.6.20",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -875,6 +1014,15 @@ version = "0.2.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.5.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2f103c6d277498fbceb16e84d317e2a400f160f46904d5f5410848c829511a3"
|
||||
dependencies = [
|
||||
"bitflags 2.9.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.11.0"
|
||||
|
@ -922,13 +1070,22 @@ version = "2.0.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
|
||||
dependencies = [
|
||||
"semver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.37"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811"
|
||||
dependencies = [
|
||||
"bitflags 2.6.0",
|
||||
"bitflags 2.9.1",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys",
|
||||
|
@ -947,6 +1104,12 @@ version = "1.0.18"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "scroll"
|
||||
version = "0.10.2"
|
||||
|
@ -1073,6 +1236,12 @@ dependencies = [
|
|||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tarc"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d475a3f83354f7eb0e9c207ceb1aaaed47fe9bc1dd4a42008a3cdfd0f5bb3dd"
|
||||
|
||||
[[package]]
|
||||
name = "tempfile"
|
||||
version = "3.13.0"
|
||||
|
@ -1135,6 +1304,23 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml_datetime"
|
||||
version = "0.6.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3"
|
||||
|
||||
[[package]]
|
||||
name = "toml_edit"
|
||||
version = "0.22.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"toml_datetime",
|
||||
"winnow 0.7.10",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.13"
|
||||
|
@ -1159,6 +1345,12 @@ version = "1.0.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f2fe1f049979528ce97d8a4672f984f8846fc9975be0cf14ca798116d724c4a"
|
||||
|
||||
[[package]]
|
||||
name = "uuid"
|
||||
version = "1.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.5"
|
||||
|
@ -1300,6 +1492,15 @@ dependencies = [
|
|||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.7.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c06928c8748d81b05c9be96aad92e1b6ff01833332f281e8cfca3be4b35fc9ec"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xtask"
|
||||
version = "0.0.0"
|
||||
|
@ -1340,6 +1541,8 @@ name = "zluda_bindgen"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bindgen",
|
||||
"cuda_types",
|
||||
"libloading",
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
@ -1375,21 +1578,114 @@ dependencies = [
|
|||
name = "zluda_dump"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"cglue",
|
||||
"cuda_base",
|
||||
"cuda_types",
|
||||
"dark_api",
|
||||
"detours-sys",
|
||||
"dynasm",
|
||||
"dynasmrt",
|
||||
"format",
|
||||
"goblin",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"lz4-sys",
|
||||
"parking_lot",
|
||||
"paste",
|
||||
"ptx",
|
||||
"ptx_parser",
|
||||
"regex",
|
||||
"rustc-hash 1.1.0",
|
||||
"unwrap_or",
|
||||
"wchar",
|
||||
"winapi",
|
||||
"zluda_dump_common",
|
||||
"zstd-safe",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zluda_dump_blas"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"cglue",
|
||||
"cuda_base",
|
||||
"cuda_types",
|
||||
"dark_api",
|
||||
"format",
|
||||
"libloading",
|
||||
"paste",
|
||||
"unwrap_or",
|
||||
"zluda_dump_common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zluda_dump_blaslt"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"cglue",
|
||||
"cuda_base",
|
||||
"cuda_types",
|
||||
"dark_api",
|
||||
"format",
|
||||
"libloading",
|
||||
"paste",
|
||||
"unwrap_or",
|
||||
"zluda_dump_common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zluda_dump_common"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"cglue",
|
||||
"cuda_types",
|
||||
"dark_api",
|
||||
"format",
|
||||
"libloading",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zluda_dump_dnn"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"cglue",
|
||||
"cuda_base",
|
||||
"cuda_types",
|
||||
"dark_api",
|
||||
"format",
|
||||
"libloading",
|
||||
"paste",
|
||||
"unwrap_or",
|
||||
"zluda_dump_common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zluda_dump_fft"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"cglue",
|
||||
"cuda_base",
|
||||
"cuda_types",
|
||||
"dark_api",
|
||||
"format",
|
||||
"libloading",
|
||||
"paste",
|
||||
"unwrap_or",
|
||||
"zluda_dump_common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zluda_dump_sparse"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"cglue",
|
||||
"cuda_base",
|
||||
"cuda_types",
|
||||
"dark_api",
|
||||
"format",
|
||||
"libloading",
|
||||
"paste",
|
||||
"unwrap_or",
|
||||
"zluda_dump_common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1437,3 +1733,22 @@ dependencies = [
|
|||
"cuda_base",
|
||||
"cuda_types",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-safe"
|
||||
version = "7.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
|
||||
dependencies = [
|
||||
"zstd-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-sys"
|
||||
version = "2.0.15+zstd.1.5.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"pkg-config",
|
||||
]
|
||||
|
|
24
Cargo.toml
24
Cargo.toml
|
@ -3,27 +3,35 @@
|
|||
resolver = "2"
|
||||
|
||||
members = [
|
||||
"ext/hip_runtime-sys",
|
||||
"ext/amd_comgr-sys",
|
||||
"comgr",
|
||||
"cuda_base",
|
||||
"cuda_types",
|
||||
"dark_api",
|
||||
"detours-sys",
|
||||
"zluda",
|
||||
"zluda_dump",
|
||||
"zluda_inject",
|
||||
"zluda_redirect",
|
||||
"zluda_ml",
|
||||
"ext/amd_comgr-sys",
|
||||
"ext/hip_runtime-sys",
|
||||
"format",
|
||||
"ptx",
|
||||
"ptx_parser",
|
||||
"ptx_parser_macros",
|
||||
"ptx_parser_macros_impl",
|
||||
"xtask",
|
||||
"zluda",
|
||||
"zluda_bindgen",
|
||||
"zluda_dnn",
|
||||
"zluda_blas",
|
||||
"zluda_blaslt",
|
||||
"zluda_dnn",
|
||||
"zluda_dump",
|
||||
"zluda_dump_blas",
|
||||
"zluda_dump_blaslt",
|
||||
"zluda_dump_common",
|
||||
"zluda_dump_dnn",
|
||||
"zluda_dump_fft",
|
||||
"zluda_dump_sparse",
|
||||
"zluda_fft",
|
||||
"zluda_inject",
|
||||
"zluda_ml",
|
||||
"zluda_redirect",
|
||||
"zluda_sparse",
|
||||
]
|
||||
|
||||
|
|
|
@ -5,16 +5,16 @@ extern "system" {
|
|||
#[must_use]
|
||||
fn cublasLtCreate(
|
||||
lightHandle: *mut cuda_types::cublaslt::cublasLtHandle_t,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
fn cublasLtDestroy(
|
||||
lightHandle: cuda_types::cublaslt::cublasLtHandle_t,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
fn cublasLtGetStatusName(
|
||||
status: cuda_types::cublaslt::cublasStatus_t,
|
||||
status: cuda_types::cublas::cublasStatus_t,
|
||||
) -> *const ::core::ffi::c_char;
|
||||
fn cublasLtGetStatusString(
|
||||
status: cuda_types::cublaslt::cublasStatus_t,
|
||||
status: cuda_types::cublas::cublasStatus_t,
|
||||
) -> *const ::core::ffi::c_char;
|
||||
fn cublasLtGetVersion() -> usize;
|
||||
fn cublasLtGetCudartVersion() -> usize;
|
||||
|
@ -22,15 +22,15 @@ extern "system" {
|
|||
fn cublasLtGetProperty(
|
||||
type_: cuda_types::cublaslt::libraryPropertyType,
|
||||
value: *mut ::core::ffi::c_int,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
fn cublasLtHeuristicsCacheGetCapacity(
|
||||
capacity: *mut usize,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
fn cublasLtHeuristicsCacheSetCapacity(
|
||||
capacity: usize,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
/** Restricts usage of CPU instructions (ISA) specified by the flags in the mask.
|
||||
|
||||
Flags can be combined with bitwise OR(|) operator. Supported flags:
|
||||
|
@ -72,7 +72,7 @@ extern "system" {
|
|||
workspace: *mut ::core::ffi::c_void,
|
||||
workspaceSizeInBytes: usize,
|
||||
stream: cuda_types::cublaslt::cudaStream_t,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Matrix layout conversion helper (C = alpha * op(A) + beta * op(B))
|
||||
|
||||
|
@ -98,7 +98,7 @@ extern "system" {
|
|||
C: *mut ::core::ffi::c_void,
|
||||
Cdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
|
||||
stream: cuda_types::cublaslt::cudaStream_t,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/// Internal. Do not use directly.
|
||||
fn cublasLtMatrixLayoutInit_internal(
|
||||
|
@ -108,7 +108,7 @@ extern "system" {
|
|||
rows: u64,
|
||||
cols: u64,
|
||||
ld: i64,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Create new matrix layout descriptor.
|
||||
|
||||
|
@ -120,14 +120,14 @@ extern "system" {
|
|||
rows: u64,
|
||||
cols: u64,
|
||||
ld: i64,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Destroy matrix layout descriptor.
|
||||
|
||||
\retval CUBLAS_STATUS_SUCCESS if operation was successful*/
|
||||
fn cublasLtMatrixLayoutDestroy(
|
||||
matLayout: cuda_types::cublaslt::cublasLtMatrixLayout_t,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Set matrix layout descriptor attribute.
|
||||
|
||||
|
@ -144,7 +144,7 @@ extern "system" {
|
|||
attr: cuda_types::cublaslt::cublasLtMatrixLayoutAttribute_t,
|
||||
buf: *const ::core::ffi::c_void,
|
||||
sizeInBytes: usize,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Get matrix layout descriptor attribute.
|
||||
|
||||
|
@ -165,7 +165,7 @@ extern "system" {
|
|||
buf: *mut ::core::ffi::c_void,
|
||||
sizeInBytes: usize,
|
||||
sizeWritten: *mut usize,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/// Internal. Do not use directly.
|
||||
fn cublasLtMatmulDescInit_internal(
|
||||
|
@ -173,7 +173,7 @@ extern "system" {
|
|||
size: usize,
|
||||
computeType: cuda_types::cublaslt::cublasComputeType_t,
|
||||
scaleType: cuda_types::cublaslt::cudaDataType_t,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Create new matmul operation descriptor.
|
||||
|
||||
|
@ -183,14 +183,14 @@ extern "system" {
|
|||
matmulDesc: *mut cuda_types::cublaslt::cublasLtMatmulDesc_t,
|
||||
computeType: cuda_types::cublaslt::cublasComputeType_t,
|
||||
scaleType: cuda_types::cublaslt::cudaDataType_t,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Destroy matmul operation descriptor.
|
||||
|
||||
\retval CUBLAS_STATUS_SUCCESS if operation was successful*/
|
||||
fn cublasLtMatmulDescDestroy(
|
||||
matmulDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Set matmul operation descriptor attribute.
|
||||
|
||||
|
@ -207,7 +207,7 @@ extern "system" {
|
|||
attr: cuda_types::cublaslt::cublasLtMatmulDescAttributes_t,
|
||||
buf: *const ::core::ffi::c_void,
|
||||
sizeInBytes: usize,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Get matmul operation descriptor attribute.
|
||||
|
||||
|
@ -228,14 +228,14 @@ extern "system" {
|
|||
buf: *mut ::core::ffi::c_void,
|
||||
sizeInBytes: usize,
|
||||
sizeWritten: *mut usize,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/// Internal. Do not use directly.
|
||||
fn cublasLtMatrixTransformDescInit_internal(
|
||||
transformDesc: cuda_types::cublaslt::cublasLtMatrixTransformDesc_t,
|
||||
size: usize,
|
||||
scaleType: cuda_types::cublaslt::cudaDataType,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Create new matrix transform operation descriptor.
|
||||
|
||||
|
@ -244,14 +244,14 @@ extern "system" {
|
|||
fn cublasLtMatrixTransformDescCreate(
|
||||
transformDesc: *mut cuda_types::cublaslt::cublasLtMatrixTransformDesc_t,
|
||||
scaleType: cuda_types::cublaslt::cudaDataType,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Destroy matrix transform operation descriptor.
|
||||
|
||||
\retval CUBLAS_STATUS_SUCCESS if operation was successful*/
|
||||
fn cublasLtMatrixTransformDescDestroy(
|
||||
transformDesc: cuda_types::cublaslt::cublasLtMatrixTransformDesc_t,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Set matrix transform operation descriptor attribute.
|
||||
|
||||
|
@ -268,7 +268,7 @@ extern "system" {
|
|||
attr: cuda_types::cublaslt::cublasLtMatrixTransformDescAttributes_t,
|
||||
buf: *const ::core::ffi::c_void,
|
||||
sizeInBytes: usize,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Get matrix transform operation descriptor attribute.
|
||||
|
||||
|
@ -289,13 +289,13 @@ extern "system" {
|
|||
buf: *mut ::core::ffi::c_void,
|
||||
sizeInBytes: usize,
|
||||
sizeWritten: *mut usize,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/// Internal. Do not use directly.
|
||||
fn cublasLtMatmulPreferenceInit_internal(
|
||||
pref: cuda_types::cublaslt::cublasLtMatmulPreference_t,
|
||||
size: usize,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Create new matmul heuristic search preference descriptor.
|
||||
|
||||
|
@ -303,14 +303,14 @@ extern "system" {
|
|||
\retval CUBLAS_STATUS_SUCCESS if desciptor was created successfully*/
|
||||
fn cublasLtMatmulPreferenceCreate(
|
||||
pref: *mut cuda_types::cublaslt::cublasLtMatmulPreference_t,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Destroy matmul heuristic search preference descriptor.
|
||||
|
||||
\retval CUBLAS_STATUS_SUCCESS if operation was successful*/
|
||||
fn cublasLtMatmulPreferenceDestroy(
|
||||
pref: cuda_types::cublaslt::cublasLtMatmulPreference_t,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Set matmul heuristic search preference descriptor attribute.
|
||||
|
||||
|
@ -327,7 +327,7 @@ extern "system" {
|
|||
attr: cuda_types::cublaslt::cublasLtMatmulPreferenceAttributes_t,
|
||||
buf: *const ::core::ffi::c_void,
|
||||
sizeInBytes: usize,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Get matmul heuristic search preference descriptor attribute.
|
||||
|
||||
|
@ -348,7 +348,7 @@ extern "system" {
|
|||
buf: *mut ::core::ffi::c_void,
|
||||
sizeInBytes: usize,
|
||||
sizeWritten: *mut usize,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Query cublasLt heuristic for algorithm appropriate for given use case.
|
||||
|
||||
|
@ -383,7 +383,7 @@ extern "system" {
|
|||
requestedAlgoCount: ::core::ffi::c_int,
|
||||
heuristicResultsArray: *mut cuda_types::cublaslt::cublasLtMatmulHeuristicResult_t,
|
||||
returnAlgoCount: *mut ::core::ffi::c_int,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Routine to get all algo IDs that can potentially run
|
||||
|
||||
|
@ -405,7 +405,7 @@ extern "system" {
|
|||
requestedAlgoCount: ::core::ffi::c_int,
|
||||
algoIdsArray: *mut ::core::ffi::c_int,
|
||||
returnAlgoCount: *mut ::core::ffi::c_int,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Initialize algo structure
|
||||
|
||||
|
@ -422,7 +422,7 @@ extern "system" {
|
|||
Dtype: cuda_types::cublaslt::cudaDataType_t,
|
||||
algoId: ::core::ffi::c_int,
|
||||
algo: *mut cuda_types::cublaslt::cublasLtMatmulAlgo_t,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Check configured algo descriptor for correctness and support on current device.
|
||||
|
||||
|
@ -449,7 +449,7 @@ extern "system" {
|
|||
Ddesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
|
||||
algo: *const cuda_types::cublaslt::cublasLtMatmulAlgo_t,
|
||||
result: *mut cuda_types::cublaslt::cublasLtMatmulHeuristicResult_t,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Get algo capability attribute.
|
||||
|
||||
|
@ -477,7 +477,7 @@ extern "system" {
|
|||
buf: *mut ::core::ffi::c_void,
|
||||
sizeInBytes: usize,
|
||||
sizeWritten: *mut usize,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Set algo configuration attribute.
|
||||
|
||||
|
@ -494,7 +494,7 @@ extern "system" {
|
|||
attr: cuda_types::cublaslt::cublasLtMatmulAlgoConfigAttributes_t,
|
||||
buf: *const ::core::ffi::c_void,
|
||||
sizeInBytes: usize,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Get algo configuration attribute.
|
||||
|
||||
|
@ -515,7 +515,7 @@ extern "system" {
|
|||
buf: *mut ::core::ffi::c_void,
|
||||
sizeInBytes: usize,
|
||||
sizeWritten: *mut usize,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Experimental: Logger callback setter.
|
||||
|
||||
|
@ -524,14 +524,16 @@ extern "system" {
|
|||
\retval CUBLAS_STATUS_SUCCESS if callback was set successfully*/
|
||||
fn cublasLtLoggerSetCallback(
|
||||
callback: cuda_types::cublaslt::cublasLtLoggerCallback_t,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Experimental: Log file setter.
|
||||
|
||||
\param[in] file an open file with write permissions
|
||||
|
||||
\retval CUBLAS_STATUS_SUCCESS if log file was set successfully*/
|
||||
fn cublasLtLoggerSetFile(file: *mut FILE) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
fn cublasLtLoggerSetFile(
|
||||
file: *mut cuda_types::FILE,
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Experimental: Open log file.
|
||||
|
||||
|
@ -540,7 +542,7 @@ extern "system" {
|
|||
\retval CUBLAS_STATUS_SUCCESS if log file was created successfully*/
|
||||
fn cublasLtLoggerOpenFile(
|
||||
logFile: *const ::core::ffi::c_char,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Experimental: Log level setter.
|
||||
|
||||
|
@ -557,7 +559,7 @@ extern "system" {
|
|||
\retval CUBLAS_STATUS_SUCCESS if log level was set successfully*/
|
||||
fn cublasLtLoggerSetLevel(
|
||||
level: ::core::ffi::c_int,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Experimental: Log mask setter.
|
||||
|
||||
|
@ -572,10 +574,10 @@ extern "system" {
|
|||
\retval CUBLAS_STATUS_SUCCESS if log mask was set successfully*/
|
||||
fn cublasLtLoggerSetMask(
|
||||
mask: ::core::ffi::c_int,
|
||||
) -> cuda_types::cublaslt::cublasStatus_t;
|
||||
) -> cuda_types::cublas::cublasStatus_t;
|
||||
#[must_use]
|
||||
/** Experimental: Disable logging for the entire session.
|
||||
|
||||
\retval CUBLAS_STATUS_SUCCESS if disabled logging*/
|
||||
fn cublasLtLoggerForceDisable() -> cuda_types::cublaslt::cublasStatus_t;
|
||||
fn cublasLtLoggerForceDisable() -> cuda_types::cublas::cublasStatus_t;
|
||||
}
|
||||
|
|
1892
cuda_base/src/cublaslt_internal.rs
Normal file
1892
cuda_base/src/cublaslt_internal.rs
Normal file
File diff suppressed because it is too large
Load diff
|
@ -51,7 +51,9 @@ extern "system" {
|
|||
callback: cuda_types::cusparse::cusparseLoggerCallback_t,
|
||||
) -> cuda_types::cusparse::cusparseStatus_t;
|
||||
#[must_use]
|
||||
fn cusparseLoggerSetFile(file: *mut FILE) -> cuda_types::cusparse::cusparseStatus_t;
|
||||
fn cusparseLoggerSetFile(
|
||||
file: *mut cuda_types::FILE,
|
||||
) -> cuda_types::cusparse::cusparseStatus_t;
|
||||
#[must_use]
|
||||
fn cusparseLoggerOpenFile(
|
||||
logFile: *const ::core::ffi::c_char,
|
||||
|
|
|
@ -17,6 +17,7 @@ const CUDA_RS: &'static str = include_str! {"cuda.rs"};
|
|||
const NVML_RS: &'static str = include_str! {"nvml.rs"};
|
||||
const CUBLAS_RS: &'static str = include_str! {"cublas.rs"};
|
||||
const CUBLASLT_RS: &'static str = include_str! {"cublaslt.rs"};
|
||||
const CUBLASLT_INTERNAL_RS: &'static str = include_str! {"cublaslt_internal.rs"};
|
||||
const CUFFT_RS: &'static str = include_str! {"cufft.rs"};
|
||||
const CUSPARSE_RS: &'static str = include_str! {"cusparse.rs"};
|
||||
const CUDNN9_RS: &'static str = include_str! {"cudnn9.rs"};
|
||||
|
@ -50,6 +51,11 @@ pub fn cublaslt_function_declarations(tokens: TokenStream) -> TokenStream {
|
|||
function_declarations(tokens, CUBLASLT_RS)
|
||||
}
|
||||
|
||||
#[proc_macro]
|
||||
pub fn cublaslt_internal_function_declarations(tokens: TokenStream) -> TokenStream {
|
||||
function_declarations(tokens, CUBLASLT_INTERNAL_RS)
|
||||
}
|
||||
|
||||
#[proc_macro]
|
||||
pub fn cufft_function_declarations(tokens: TokenStream) -> TokenStream {
|
||||
function_declarations(tokens, CUFFT_RS)
|
||||
|
@ -70,23 +76,25 @@ fn function_declarations(tokens: TokenStream, module: &str) -> TokenStream {
|
|||
let mut cuda_module = syn::parse_str::<File>(module).unwrap();
|
||||
let mut choose_macro = ChooseMacro::new(input);
|
||||
syn::visit_mut::visit_file_mut(&mut FixFnSignatures, &mut cuda_module);
|
||||
let extern_ = if let Item::ForeignMod(extern_) = cuda_module.items.pop().unwrap() {
|
||||
extern_
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
let abi = extern_.abi.name;
|
||||
for mut item in extern_.items {
|
||||
if let ForeignItem::Fn(ForeignItemFn {
|
||||
sig: Signature { ref ident, .. },
|
||||
ref mut attrs,
|
||||
..
|
||||
}) = item
|
||||
{
|
||||
*attrs = Vec::new();
|
||||
choose_macro.add(ident, quote! { #abi #item });
|
||||
for item in cuda_module.items {
|
||||
let extern_ = if let Item::ForeignMod(extern_) = item {
|
||||
extern_
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
let abi = extern_.abi.name;
|
||||
for mut item in extern_.items {
|
||||
if let ForeignItem::Fn(ForeignItemFn {
|
||||
sig: Signature { ref ident, .. },
|
||||
ref mut attrs,
|
||||
..
|
||||
}) = item
|
||||
{
|
||||
*attrs = Vec::new();
|
||||
choose_macro.add(ident, quote! { #abi #item });
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut result = proc_macro2::TokenStream::new();
|
||||
|
|
|
@ -2,8 +2,9 @@
|
|||
name = "cuda_types"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
cuda_base = { path = "../cuda_base" }
|
||||
hip_runtime-sys = { path = "../ext/hip_runtime-sys" }
|
||||
bitflags = "2.9.1"
|
||||
|
|
|
@ -33,40 +33,6 @@ pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8F_E4M3: u32 = 4194304;
|
|||
pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8F_E5M2: u32 = 8388608;
|
||||
pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_OP_INPUT_TYPE_MASK: u32 = 16711680;
|
||||
pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_GAUSSIAN: u64 = 4294967296;
|
||||
impl cublasStatus_t {
|
||||
pub const CUBLAS_STATUS_SUCCESS: cublasStatus_t = cublasStatus_t(0);
|
||||
}
|
||||
impl cublasStatus_t {
|
||||
pub const CUBLAS_STATUS_NOT_INITIALIZED: cublasStatus_t = cublasStatus_t(1);
|
||||
}
|
||||
impl cublasStatus_t {
|
||||
pub const CUBLAS_STATUS_ALLOC_FAILED: cublasStatus_t = cublasStatus_t(3);
|
||||
}
|
||||
impl cublasStatus_t {
|
||||
pub const CUBLAS_STATUS_INVALID_VALUE: cublasStatus_t = cublasStatus_t(7);
|
||||
}
|
||||
impl cublasStatus_t {
|
||||
pub const CUBLAS_STATUS_ARCH_MISMATCH: cublasStatus_t = cublasStatus_t(8);
|
||||
}
|
||||
impl cublasStatus_t {
|
||||
pub const CUBLAS_STATUS_MAPPING_ERROR: cublasStatus_t = cublasStatus_t(11);
|
||||
}
|
||||
impl cublasStatus_t {
|
||||
pub const CUBLAS_STATUS_EXECUTION_FAILED: cublasStatus_t = cublasStatus_t(13);
|
||||
}
|
||||
impl cublasStatus_t {
|
||||
pub const CUBLAS_STATUS_INTERNAL_ERROR: cublasStatus_t = cublasStatus_t(14);
|
||||
}
|
||||
impl cublasStatus_t {
|
||||
pub const CUBLAS_STATUS_NOT_SUPPORTED: cublasStatus_t = cublasStatus_t(15);
|
||||
}
|
||||
impl cublasStatus_t {
|
||||
pub const CUBLAS_STATUS_LICENSE_ERROR: cublasStatus_t = cublasStatus_t(16);
|
||||
}
|
||||
#[repr(transparent)]
|
||||
#[must_use]
|
||||
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
|
||||
pub struct cublasStatus_t(pub ::core::ffi::c_uint);
|
||||
impl cublasFillMode_t {
|
||||
pub const CUBLAS_FILL_MODE_LOWER: cublasFillMode_t = cublasFillMode_t(0);
|
||||
}
|
||||
|
@ -5116,7 +5082,7 @@ pub struct cublasLtMatmulHeuristicResult_t {
|
|||
pub workspaceSize: usize,
|
||||
/** Result status, other fields are only valid if after call to cublasLtMatmulAlgoGetHeuristic() this member is set to
|
||||
CUBLAS_STATUS_SUCCESS.*/
|
||||
pub state: cublasStatus_t,
|
||||
pub state: super::cublas::cublasStatus_t,
|
||||
/** Waves count - a device utilization metric.
|
||||
|
||||
wavesCount value of 1.0f suggests that when kernel is launched it will fully occupy the GPU.*/
|
||||
|
|
|
@ -19,24 +19,6 @@ pub const CUSPARSE_VER_BUILD: u32 = 93;
|
|||
pub const CUSPARSE_VERSION: u32 = 12508;
|
||||
/// Result information returned by cudaGraphExecUpdate
|
||||
pub type cudaGraphExecUpdateResultInfo = cudaGraphExecUpdateResultInfo_st;
|
||||
/// Information describing an async notification event
|
||||
#[repr(C)]
|
||||
pub struct cudaAsyncNotificationInfo {
|
||||
pub type_: cudaAsyncNotificationType,
|
||||
pub info: cudaAsyncNotificationInfo__bindgen_ty_1,
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub union cudaAsyncNotificationInfo__bindgen_ty_1 {
|
||||
pub overBudget: cudaAsyncNotificationInfo__bindgen_ty_1__bindgen_ty_1,
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
|
||||
pub struct cudaAsyncNotificationInfo__bindgen_ty_1__bindgen_ty_1 {
|
||||
pub bytesOverBudget: ::core::ffi::c_ulonglong,
|
||||
}
|
||||
/// Information describing an async notification event
|
||||
pub type cudaAsyncNotificationInfo_t = cudaAsyncNotificationInfo;
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct cusparseContext {
|
||||
|
|
199
cuda_types/src/dark_api.rs
Normal file
199
cuda_types/src/dark_api.rs
Normal file
|
@ -0,0 +1,199 @@
|
|||
use bitflags::bitflags;
|
||||
use std::ffi::{c_uint, c_ulonglong, c_ushort, c_void};
|
||||
|
||||
/*
|
||||
fat_cubin:
|
||||
typedef struct {
|
||||
int magic;
|
||||
int version;
|
||||
const unsigned long long* data;
|
||||
void *filename_or_fatbins; /* version 1: offline filename,
|
||||
* version 2: array of prelinked fatbins */
|
||||
} __fatBinC_Wrapper_t;
|
||||
|
||||
data start with this header:
|
||||
#define FATBIN_MAGIC 0xBA55ED50U
|
||||
#define OLD_STYLE_FATBIN_MAGIC 0x1EE55A01U
|
||||
#define FATBIN_VERSION 0x0001U
|
||||
|
||||
struct fatbinary_ALIGN_(8) fatBinaryHeader
|
||||
{
|
||||
unsigned int magic; // FATBIN_MAGIC
|
||||
unsigned short version; // FATBIN_VERSION
|
||||
unsigned short headerSize;
|
||||
unsigned long long int fatSize; // size of the entire fat binary excluding this header
|
||||
};
|
||||
|
||||
there's binary data after header
|
||||
|
||||
*/
|
||||
|
||||
#[repr(C)]
|
||||
pub struct FatbincWrapper {
|
||||
pub magic: c_uint,
|
||||
pub version: c_uint,
|
||||
pub data: *const FatbinHeader,
|
||||
pub filename_or_fatbins: *const c_void,
|
||||
}
|
||||
|
||||
#[repr(C, align(8))]
|
||||
pub struct FatbinHeader {
|
||||
pub magic: c_uint,
|
||||
pub version: c_ushort,
|
||||
pub header_size: c_ushort,
|
||||
pub files_size: c_ulonglong, // excluding frame header, size of all blocks framed by this frame
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub struct FatbinFileHeader {
|
||||
pub kind: c_ushort,
|
||||
pub version: c_ushort,
|
||||
pub header_size: c_uint,
|
||||
pub padded_payload_size: c_uint,
|
||||
pub unknown0: c_uint, // check if it's written into separately
|
||||
pub payload_size: c_uint,
|
||||
pub unknown1: c_uint,
|
||||
pub unknown2: c_uint,
|
||||
pub sm_version: c_uint,
|
||||
pub bit_width: c_uint,
|
||||
pub unknown3: c_uint,
|
||||
pub flags: FatbinFileHeaderFlags,
|
||||
pub unknown5: c_ulonglong,
|
||||
pub uncompressed_payload: c_ulonglong,
|
||||
}
|
||||
|
||||
bitflags! {
|
||||
pub struct FatbinFileHeaderFlags: u64 {
|
||||
const Is64Bit = 0x0000000000000001;
|
||||
const Debug = 0x0000000000000002;
|
||||
const Linux = 0x0000000000000010;
|
||||
const Mac = 0x0000000000000020;
|
||||
const Windows = 0x0000000000000040;
|
||||
const CompressedLz4 = 0x0000000000002000;
|
||||
const CompressedZstd = 0x0000000000008000;
|
||||
|
||||
const _ = !0;
|
||||
}
|
||||
}
|
||||
|
||||
impl FatbincWrapper {
|
||||
pub const MAGIC: c_uint = 0x466243B1;
|
||||
const VERSION_V1: c_uint = 0x1;
|
||||
pub const VERSION_V2: c_uint = 0x2;
|
||||
|
||||
pub fn new<'a, T: Sized>(ptr: &*const T) -> Result<&'a Self, ParseError> {
|
||||
unsafe { ptr.cast::<Self>().as_ref() }
|
||||
.ok_or(ParseError::NullPointer("FatbincWrapper"))
|
||||
.and_then(|ptr| {
|
||||
ParseError::check_fields("FATBINC_MAGIC", ptr.magic, [Self::MAGIC])?;
|
||||
ParseError::check_fields(
|
||||
"FATBINC_VERSION",
|
||||
ptr.version,
|
||||
[Self::VERSION_V1, Self::VERSION_V2],
|
||||
)?;
|
||||
Ok(ptr)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl FatbinHeader {
|
||||
const MAGIC: c_uint = 0xBA55ED50;
|
||||
const VERSION: c_ushort = 0x01;
|
||||
|
||||
pub fn new<'a, T: Sized>(ptr: &'a *const T) -> Result<&'a Self, ParseError> {
|
||||
unsafe { ptr.cast::<Self>().as_ref() }
|
||||
.ok_or(ParseError::NullPointer("FatbinHeader"))
|
||||
.and_then(|ptr| {
|
||||
ParseError::check_fields("FATBIN_MAGIC", ptr.magic, [Self::MAGIC])?;
|
||||
ParseError::check_fields("FATBIN_VERSION", ptr.version, [Self::VERSION])?;
|
||||
Ok(ptr)
|
||||
})
|
||||
}
|
||||
|
||||
pub unsafe fn get_content<'a>(&'a self) -> &'a [u8] {
|
||||
let start = std::ptr::from_ref(self)
|
||||
.cast::<u8>()
|
||||
.add(self.header_size as usize);
|
||||
std::slice::from_raw_parts(start, self.files_size as usize)
|
||||
}
|
||||
}
|
||||
|
||||
impl FatbinFileHeader {
|
||||
pub const HEADER_KIND_PTX: c_ushort = 0x01;
|
||||
pub const HEADER_KIND_ELF: c_ushort = 0x02;
|
||||
const HEADER_VERSION_CURRENT: c_ushort = 0x101;
|
||||
|
||||
pub fn new_ptx<T: Sized>(ptr: *const T) -> Result<Option<&'static Self>, ParseError> {
|
||||
unsafe { ptr.cast::<Self>().as_ref() }
|
||||
.ok_or(ParseError::NullPointer("FatbinFileHeader"))
|
||||
.and_then(|ptr| {
|
||||
ParseError::check_fields(
|
||||
"FATBIN_FILE_HEADER_VERSION_CURRENT",
|
||||
ptr.version,
|
||||
[Self::HEADER_VERSION_CURRENT],
|
||||
)?;
|
||||
match ptr.kind {
|
||||
Self::HEADER_KIND_PTX => Ok(Some(ptr)),
|
||||
Self::HEADER_KIND_ELF => Ok(None),
|
||||
_ => Err(ParseError::UnexpectedBinaryField {
|
||||
field_name: "FATBIN_FILE_HEADER_KIND",
|
||||
observed: ptr.kind.into(),
|
||||
expected: vec![Self::HEADER_KIND_PTX.into(), Self::HEADER_KIND_ELF.into()],
|
||||
}),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub unsafe fn next<'a>(slice: &'a mut &[u8]) -> Result<Option<&'a Self>, ParseError> {
|
||||
if slice.len() < std::mem::size_of::<Self>() {
|
||||
return Ok(None);
|
||||
}
|
||||
let this = &*slice.as_ptr().cast::<Self>();
|
||||
let next_element = slice
|
||||
.split_at_checked(this.header_size as usize + this.padded_payload_size as usize)
|
||||
.map(|(_, next)| next);
|
||||
*slice = next_element.unwrap_or(&[]);
|
||||
ParseError::check_fields(
|
||||
"FATBIN_FILE_HEADER_VERSION_CURRENT",
|
||||
this.version,
|
||||
[Self::HEADER_VERSION_CURRENT],
|
||||
)?;
|
||||
Ok(Some(this))
|
||||
}
|
||||
|
||||
pub unsafe fn get_payload<'a>(&'a self) -> &'a [u8] {
|
||||
let start = std::ptr::from_ref(self)
|
||||
.cast::<u8>()
|
||||
.add(self.header_size as usize);
|
||||
std::slice::from_raw_parts(start, self.payload_size as usize)
|
||||
}
|
||||
}
|
||||
|
||||
pub enum ParseError {
|
||||
NullPointer(&'static str),
|
||||
UnexpectedBinaryField {
|
||||
field_name: &'static str,
|
||||
observed: u32,
|
||||
expected: Vec<u32>,
|
||||
},
|
||||
}
|
||||
|
||||
impl ParseError {
|
||||
pub(crate) fn check_fields<const N: usize, T: Into<u32> + Eq + Copy>(
|
||||
name: &'static str,
|
||||
observed: T,
|
||||
expected: [T; N],
|
||||
) -> Result<(), Self> {
|
||||
if expected.contains(&observed) {
|
||||
Ok(())
|
||||
} else {
|
||||
let observed = observed.into();
|
||||
let expected = expected.into_iter().map(Into::into).collect();
|
||||
Err(ParseError::UnexpectedBinaryField {
|
||||
field_name: name,
|
||||
expected,
|
||||
observed,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,3 +1,5 @@
|
|||
pub enum FILE {}
|
||||
|
||||
pub mod cublas;
|
||||
pub mod cublaslt;
|
||||
pub mod cuda;
|
||||
|
@ -7,3 +9,4 @@ pub mod cudnn9;
|
|||
pub mod cufft;
|
||||
pub mod cusparse;
|
||||
pub mod nvml;
|
||||
pub mod dark_api;
|
||||
|
|
12
dark_api/Cargo.toml
Normal file
12
dark_api/Cargo.toml
Normal file
|
@ -0,0 +1,12 @@
|
|||
[package]
|
||||
name = "dark_api"
|
||||
version = "0.0.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
cuda_types = { path = "../cuda_types" }
|
||||
format = { path = "../format" }
|
||||
uuid = "1.16"
|
||||
paste = "1.0"
|
||||
bit-vec = "0.8.0"
|
||||
cglue = "0.3.5"
|
712
dark_api/src/lib.rs
Normal file
712
dark_api/src/lib.rs
Normal file
|
@ -0,0 +1,712 @@
|
|||
use std::ffi::c_void;
|
||||
|
||||
use cuda_types::cuda::CUuuid;
|
||||
|
||||
macro_rules! dark_api_init {
|
||||
(SIZE_OF, $table_len:literal, $type_:ty) => {
|
||||
(std::mem::size_of::<usize>() * $table_len) as *const std::ffi::c_void
|
||||
};
|
||||
(NULL, $table_len:literal, $type_:ty) => {
|
||||
std::ptr::null()
|
||||
};
|
||||
($fn_:ident, $table_len:literal, $type_:ty) => {
|
||||
<$type_>::$fn_ as *const std::ffi::c_void
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! dark_api_fn {
|
||||
(SIZE_OF) => { };
|
||||
(NULL) => { };
|
||||
($fn_:ident ( $($arg_id:ident: $arg_type:ty),* ) -> $ret_type:ty) => {
|
||||
unsafe extern "system" fn $fn_(
|
||||
$($arg_id : $arg_type,)*
|
||||
) -> $ret_type;
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! dark_api_entry {
|
||||
($idx:literal, SIZE_OF) => { };
|
||||
($idx:literal, NULL) => { };
|
||||
($idx:literal, $fn_:ident ( $($arg_id:ident: $arg_type:ty),* ) -> $ret_type:ty) => {
|
||||
#[allow(non_snake_case)]
|
||||
pub unsafe fn $fn_(
|
||||
&self,
|
||||
$($arg_id : $arg_type,)*
|
||||
) -> $ret_type {
|
||||
let ptr = self.ptr as *const *const std::ffi::c_void;
|
||||
let ptr = ptr.add($idx);
|
||||
let fn_ = std::mem::transmute::<_, unsafe extern "system" fn( $($arg_type,)* ) -> $ret_type >(*ptr);
|
||||
(fn_)( $($arg_id,)* )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! dark_api_format_args {
|
||||
($writer:ident; $arg_idx:ident; $first_arg:ident $(, $arg_id:ident)*) => {
|
||||
$writer.write_all(concat!(stringify!($first_arg), ": ").as_bytes())?;
|
||||
format::CudaDisplay::write(& $first_arg, "", $arg_idx, $writer)?;
|
||||
$(
|
||||
$arg_idx += 1;
|
||||
$writer.write_all(concat!(", ", stringify!($arg_id), ": ").as_bytes())?;
|
||||
format::CudaDisplay::write(& $arg_id, "", $arg_idx, $writer)?;
|
||||
)*
|
||||
};
|
||||
($writer:ident; $arg_idx:ident; ) => {
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! dark_api_is_fn {
|
||||
(SIZE_OF) => {
|
||||
false
|
||||
};
|
||||
(NULL) => {
|
||||
false
|
||||
};
|
||||
($fn_:ident) => {
|
||||
true
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! dark_api_format_fn {
|
||||
(SIZE_OF) => { };
|
||||
(NULL) => { };
|
||||
(#[noformat] $fn_:ident ( $($arg_id:ident: $arg_type:ty),* ) -> $ret_type:ty) => { };
|
||||
($fn_:ident ( $($arg_id:ident: $arg_type:ty),* ) -> $ret_type:ty) => {
|
||||
pub fn $fn_ (
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
$($arg_id: $arg_type,)*
|
||||
) -> std::io::Result<()> {
|
||||
#[allow(unused)]
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
dark_api_format_args!(writer; arg_idx; $($arg_id),*);
|
||||
writer.write_all(b")")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! dark_api {
|
||||
(
|
||||
$mod_name: ident;
|
||||
$(
|
||||
$guid:expr => $name:ident [$len:literal] {
|
||||
$(
|
||||
$(#[$attr:ident])?
|
||||
[$index:literal] = $fn_:ident $( ( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty )?
|
||||
),*
|
||||
}
|
||||
),+
|
||||
) => {
|
||||
pub mod $mod_name {
|
||||
#[allow(non_snake_case)]
|
||||
pub struct CudaDarkApiGlobalTable {
|
||||
$(pub $name: [*const std::ffi::c_void; $len],)+
|
||||
}
|
||||
|
||||
impl CudaDarkApiGlobalTable {
|
||||
$(const $name: cuda_types::cuda::CUuuid = cuda_types::cuda::CUuuid { bytes: *uuid::uuid!($guid).as_bytes() };)+
|
||||
}
|
||||
|
||||
unsafe impl Sync for CudaDarkApiGlobalTable {}
|
||||
|
||||
impl CudaDarkApiGlobalTable {
|
||||
pub const fn new<T: CudaDarkApi>() -> Self {
|
||||
let mut result = Self {
|
||||
$(
|
||||
$name: [std::ptr::null(); $len],
|
||||
)+
|
||||
};
|
||||
$(
|
||||
$( result.$name[$index] = dark_api_init!($fn_, $len, T); )*
|
||||
)+
|
||||
result
|
||||
}
|
||||
|
||||
pub fn get(&self, key: &cuda_types::cuda::CUuuid) -> Option<crate::DarkApiTable> {
|
||||
match key {
|
||||
$(
|
||||
&Self::$name => {
|
||||
let fns = &self.$name[..];
|
||||
let mut valid_fns = bit_vec::BitVec::from_elem($len, false);
|
||||
$(
|
||||
valid_fns.set($index, dark_api_is_fn!($fn_) );
|
||||
)*
|
||||
Some(crate::DarkApiTable {
|
||||
fns,
|
||||
valid_fns
|
||||
})
|
||||
}
|
||||
)+
|
||||
_ => None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait CudaDarkApi {
|
||||
$($(
|
||||
dark_api_fn!($fn_ $( ( $($arg_id: $arg_type),* ) -> $ret_type )?);
|
||||
)*)+
|
||||
}
|
||||
|
||||
pub fn guid_to_name(guid: &cuda_types::cuda::CUuuid, index: usize) -> Option<(&'static str, Option<&'static str>)> {
|
||||
let guid = uuid::Uuid::from_bytes(guid.bytes);
|
||||
$(
|
||||
if guid == uuid::uuid!($guid) {
|
||||
let guid = stringify!($name);
|
||||
$(
|
||||
if index == $index {
|
||||
return Some((guid, Some(stringify!($fn_))));
|
||||
}
|
||||
)*
|
||||
return Some((guid, None));
|
||||
}
|
||||
)+
|
||||
None
|
||||
}
|
||||
|
||||
$(
|
||||
paste::paste! {
|
||||
pub struct [<$name:camel>] {
|
||||
#[allow(dead_code)]
|
||||
ptr: *const std::ffi::c_void
|
||||
}
|
||||
|
||||
impl [<$name:camel>] {
|
||||
pub const GUID: cuda_types::cuda::CUuuid = CudaDarkApiGlobalTable::$name;
|
||||
|
||||
pub unsafe fn new(ptr: *const std::ffi::c_void) -> Self {
|
||||
Self {
|
||||
ptr
|
||||
}
|
||||
}
|
||||
$(
|
||||
dark_api_entry!($index, $fn_ $( ( $($arg_id: $arg_type),* ) -> $ret_type )?);
|
||||
)*
|
||||
}
|
||||
}
|
||||
)+
|
||||
|
||||
pub mod format {
|
||||
$($(
|
||||
dark_api_format_fn!($(#[$attr])? $fn_ $( ( $($arg_id: $arg_type),* ) -> $ret_type )? );
|
||||
)*)+
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub struct DarkApiTable<'a> {
|
||||
fns: &'a [*const std::ffi::c_void],
|
||||
valid_fns: bit_vec::BitVec,
|
||||
}
|
||||
|
||||
impl<'a> DarkApiTable<'a> {
|
||||
pub fn len(&self) -> usize {
|
||||
self.fns.len()
|
||||
}
|
||||
|
||||
pub fn get_fn(&self, idx: usize) -> Option<*const std::ffi::c_void> {
|
||||
if self.valid_fns.get(idx).unwrap_or(false) {
|
||||
Some(self.fns[idx])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start(&self) -> *const std::ffi::c_void {
|
||||
self.fns.as_ptr().cast()
|
||||
}
|
||||
}
|
||||
|
||||
dark_api! {
|
||||
cuda;
|
||||
"{6BD5FB6C-5BF4-E74A-8987-D93912FD9DF9}" => CUDART_INTERFACE[10] {
|
||||
[0] = SIZE_OF,
|
||||
[1] = get_module_from_cubin(
|
||||
module: *mut cuda_types::cuda::CUmodule,
|
||||
fatbinc_wrapper: *const cuda_types::dark_api::FatbincWrapper
|
||||
) -> (),
|
||||
[2] = cudart_interface_fn2(
|
||||
pctx: *mut cuda_types::cuda::CUcontext,
|
||||
dev: cuda_types::cuda::CUdevice
|
||||
) -> cuda_types::cuda::CUresult,
|
||||
[6] = get_module_from_cubin_ext1(
|
||||
result: *mut cuda_types::cuda::CUmodule,
|
||||
fatbinc_wrapper: *const cuda_types::dark_api::FatbincWrapper,
|
||||
arg3: *mut std::ffi::c_void,
|
||||
arg4: *mut std::ffi::c_void,
|
||||
arg5: u32
|
||||
) -> cuda_types::cuda::CUresult,
|
||||
[7] = cudart_interface_fn7(arg1: usize) -> cuda_types::cuda::CUresult,
|
||||
[8] = get_module_from_cubin_ext2(
|
||||
fatbin_header: *const cuda_types::dark_api::FatbinHeader,
|
||||
result: *mut cuda_types::cuda::CUmodule,
|
||||
arg3: *mut std::ffi::c_void,
|
||||
arg4: *mut std::ffi::c_void,
|
||||
arg5: u32
|
||||
) -> cuda_types::cuda::CUresult
|
||||
},
|
||||
"{42D85A81-23F6-CB47-8298-F6E78A3AECDC}" => TOOLS_TLS[4] {
|
||||
[0] = SIZE_OF
|
||||
},
|
||||
"{A094798C-2E74-2E74-93F2-0800200C0A66}" => TOOLS_RUNTIME_CALLBACK_HOOKS[7] {
|
||||
[0] = SIZE_OF,
|
||||
[2] = runtime_callback_hooks_fn2(ptr: *mut *mut std::ffi::c_void, size: *mut usize) -> (),
|
||||
[6] = runtime_callback_hooks_fn6(ptr: *mut *mut std::ffi::c_void, size: *mut usize) -> ()
|
||||
},
|
||||
"{C693336E-1121-DF11-A8C3-68F355D89593}" => CONTEXT_LOCAL_STORAGE_INTERFACE_V0301[4] {
|
||||
[0] = context_local_storage_ctor(
|
||||
context: cuda_types::cuda::CUcontext,
|
||||
manager: *mut std::ffi::c_void, // ContextStateManager
|
||||
ctx_state: *mut std::ffi::c_void, // ContextState
|
||||
// clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
|
||||
dtor_cb: Option<extern "system" fn(
|
||||
cuda_types::cuda::CUcontext,
|
||||
*mut std::ffi::c_void, // ContextStateManager
|
||||
*mut std::ffi::c_void, // ContextState
|
||||
)>
|
||||
) -> cuda_types::cuda::CUresult,
|
||||
[1] = context_local_storage_dtor(
|
||||
arg1: *mut std::ffi::c_void,
|
||||
arg2: *mut std::ffi::c_void
|
||||
) -> cuda_types::cuda::CUresult,
|
||||
[2] = context_local_storage_get_state(
|
||||
ctx_state: *mut std::ffi::c_void, // ContextState
|
||||
cu_ctx: cuda_types::cuda::CUcontext,
|
||||
manager: *mut std::ffi::c_void // ContextStateManager
|
||||
) -> cuda_types::cuda::CUresult
|
||||
},
|
||||
"{0CA50B8C-1004-929A-89A7-D0DF10E77286}" => CTX_CREATE_BYPASS[2] {
|
||||
[0] = SIZE_OF,
|
||||
[1] = ctx_create_v2_bypass(
|
||||
pctx: *mut cuda_types::cuda::CUcontext,
|
||||
flags: ::std::os::raw::c_uint,
|
||||
dev: cuda_types::cuda::CUdevice
|
||||
) -> cuda_types::cuda::CUresult
|
||||
},
|
||||
"{195BCBF4-D67D-024A-ACC5-1D29CEA631AE}" => HEAP_ACCESS[3] {
|
||||
[0] = SIZE_OF,
|
||||
[1] = heap_alloc(
|
||||
heap_alloc_record_ptr: *mut *const std::ffi::c_void, // HeapAllocRecord
|
||||
arg2: usize,
|
||||
arg3: usize
|
||||
) -> cuda_types::cuda::CUresult,
|
||||
[2] = heap_free(
|
||||
heap_alloc_record_ptr: *const std::ffi::c_void, // HeapAllocRecord
|
||||
arg2: *mut usize
|
||||
) -> cuda_types::cuda::CUresult
|
||||
},
|
||||
"{B10541E1-F7C7-C74A-9F64-F223BE99F1E2}" => DEVICE_EXTENDED_RT[26] {
|
||||
[0] = SIZE_OF,
|
||||
[5] = device_get_attribute_ext(
|
||||
dev: cuda_types::cuda::CUdevice,
|
||||
attribute: std::ffi::c_uint,
|
||||
unknown: std::ffi::c_int,
|
||||
result: *mut [usize; 2]
|
||||
) -> cuda_types::cuda::CUresult,
|
||||
// I don't know is this function return, but on my GTX 1060 it returns 0
|
||||
[13] = device_get_something(
|
||||
result: *mut std::ffi::c_uchar,
|
||||
dev: cuda_types::cuda::CUdevice
|
||||
) -> cuda_types::cuda::CUresult
|
||||
},
|
||||
"{D4082055-BDE6-704B-8D34-BA123C66E1F2}" => INTEGRITY_CHECK[3] {
|
||||
[0] = SIZE_OF,
|
||||
[1] = integrity_check(
|
||||
version: u32,
|
||||
unix_seconds: u64,
|
||||
result: *mut [u64;2]
|
||||
) -> cuda_types::cuda::CUresult
|
||||
},
|
||||
// This functions check for some bits that are never observably set
|
||||
"{263E8860-7CD2-6143-92F6-BBD5006DFA7E}" => UNKNOWN_CHECKS[4] {
|
||||
[0] = SIZE_OF,
|
||||
[2] = context_check(
|
||||
ctx_in: cuda_types::cuda::CUcontext,
|
||||
result1: *mut u32, // seems to be always 0
|
||||
result2: *mut *const std::ffi::c_void
|
||||
) -> cuda_types::cuda::CUresult,
|
||||
[3] = check_fn3() -> u32 // seeems to always return 0
|
||||
}
|
||||
}
|
||||
|
||||
// Purely for internal use by ZLUDA dump
|
||||
dark_api! {
|
||||
zluda_dump;
|
||||
"{0B7A5827-AF98-46AB-A951-22D19BDF5C08}" => ZLUDA_DUMP_INTERNAL[1] {
|
||||
#[noformat]
|
||||
[0] = logged_call(
|
||||
fn_name: cglue::slice::CSliceRef<'static, u8>,
|
||||
args: crate::FnFfiRef<crate::ByteVecFfi>,
|
||||
fn_: crate::FnFfiRef<usize>,
|
||||
internal_error: usize,
|
||||
format_status: extern "C" fn(usize) -> crate::ByteVecFfi
|
||||
) -> usize
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub struct ByteVecFfi {
|
||||
ptr: *mut u8,
|
||||
len: usize,
|
||||
capacity: usize,
|
||||
}
|
||||
|
||||
impl ByteVecFfi {
|
||||
pub fn new(mut v: Vec<u8>) -> Self {
|
||||
let (ptr, len, capacity) = (v.as_mut_ptr(), v.len(), v.capacity());
|
||||
std::mem::forget(v);
|
||||
Self { ptr, len, capacity }
|
||||
}
|
||||
|
||||
pub fn to_vec(self) -> Vec<u8> {
|
||||
let vec = unsafe { Vec::from_raw_parts(self.ptr, self.len, self.capacity) };
|
||||
std::mem::forget(self);
|
||||
vec
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for ByteVecFfi {
|
||||
fn drop(&mut self) {
|
||||
// SAFETY: We are dropping the Vec<u8> that we created in `from`
|
||||
// and we know that the pointer is valid.
|
||||
unsafe {
|
||||
let _ = Vec::from_raw_parts(self.ptr, self.len, self.capacity);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cglue::cglue_trait]
|
||||
pub trait FnFfi {
|
||||
type Output;
|
||||
fn call(&self) -> Self::Output;
|
||||
}
|
||||
|
||||
// We use this wrapper instead of implementing `FnFfi` for all T that implement `Fn() -> Output`
|
||||
// because cglue machinery already provided blanket implementation of `FnFfi` for its own needs
|
||||
// `cglue_trait_ext` does not work with `Fn` traits because they are special
|
||||
#[repr(transparent)]
|
||||
pub struct FnFfiWrapper<Output, T: std::ops::Fn() -> Output>(pub T);
|
||||
|
||||
impl<Output, T: std::ops::Fn() -> Output> FnFfi for FnFfiWrapper<Output, T> {
|
||||
type Output = Output;
|
||||
fn call(&self) -> Output {
|
||||
(self.0)()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn integrity_check(
|
||||
version: u32,
|
||||
unix_seconds: u64,
|
||||
driver_version: u32,
|
||||
current_process: u32,
|
||||
current_thread: u32,
|
||||
integrity_check_table: *const c_void,
|
||||
cudart_table: *const c_void,
|
||||
fn_address: *const c_void,
|
||||
devices: u32,
|
||||
get_device: impl FnMut(u32) -> DeviceHashinfo,
|
||||
) -> [u64; 2] {
|
||||
match version % 10 {
|
||||
0 => return [0x3341181C03CB675C, 0x8ED383AA1F4CD1E8],
|
||||
1 => return [0x1841181C03CB675C, 0x8ED383AA1F4CD1E8],
|
||||
_ => {}
|
||||
}
|
||||
// There's first computation pass, but it does not use any input and effectively computes this
|
||||
let pass1_result = [
|
||||
0x14u8, 0x6A, 0xDD, 0xAE, 0x53, 0xA9, 0xA7, 0x52, 0xAA, 0x08, 0x41, 0x36, 0x0B, 0xF5, 0x5A,
|
||||
0x9F,
|
||||
];
|
||||
let mut result = [0u8; 66];
|
||||
pass2(&mut result, &pass1_result);
|
||||
let pass3_input = Pass3Input {
|
||||
driver_version,
|
||||
version,
|
||||
current_process,
|
||||
current_thread,
|
||||
cudart_table,
|
||||
integrity_check_table,
|
||||
fn_address,
|
||||
unix_seconds,
|
||||
};
|
||||
pass3(&mut result, &pass3_input);
|
||||
pass4(&mut result, devices, get_device);
|
||||
let pass5_1 = pass5(&mut result);
|
||||
zero_result(&mut result);
|
||||
pass6(&mut result, &pass1_result);
|
||||
pass7(&mut result, &pass5_1);
|
||||
pass5(&mut result)
|
||||
}
|
||||
|
||||
fn pass7(accumulator: &mut [u8; 66], pass5_1: &[u64; 2]) {
|
||||
hash_pass(accumulator, pass5_1, 0);
|
||||
}
|
||||
|
||||
fn pass6(accumulator: &mut [u8; 66], pass1_result: &[u8; 16]) {
|
||||
hash_pass(accumulator, pass1_result, 0x5c);
|
||||
}
|
||||
|
||||
fn zero_result(result: &mut [u8; 66]) {
|
||||
for i in 0..16 {
|
||||
result[i] = 0;
|
||||
}
|
||||
for i in 48..66 {
|
||||
result[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
fn pass5(result: &mut [u8; 66]) -> [u64; 2] {
|
||||
let temp = 16u8.wrapping_sub(result[64]);
|
||||
for _ in 0..temp {
|
||||
integrity_check_single_pass(result, temp);
|
||||
}
|
||||
let mut temp_ptr = unsafe { result.as_mut_ptr().add(0x30) };
|
||||
loop {
|
||||
let temp = unsafe { *temp_ptr };
|
||||
temp_ptr = unsafe { temp_ptr.add(1) };
|
||||
integrity_check_single_pass(result, temp);
|
||||
if temp_ptr == unsafe { result.as_mut_ptr().add(0x40) } {
|
||||
break;
|
||||
}
|
||||
}
|
||||
[
|
||||
u64::from_ne_bytes(result[0..8].try_into().unwrap()),
|
||||
u64::from_ne_bytes(result[8..16].try_into().unwrap()),
|
||||
]
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
struct Pass3Input {
|
||||
driver_version: u32,
|
||||
version: u32,
|
||||
current_process: u32,
|
||||
current_thread: u32,
|
||||
cudart_table: *const c_void,
|
||||
integrity_check_table: *const c_void,
|
||||
fn_address: *const c_void,
|
||||
unix_seconds: u64,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct DeviceHashinfo {
|
||||
pub guid: CUuuid,
|
||||
pub pci_domain: i32,
|
||||
pub pci_bus: i32,
|
||||
pub pci_device: i32,
|
||||
}
|
||||
|
||||
fn pass2(accumulator: &mut [u8; 66], pass1_result: &[u8; 16]) {
|
||||
hash_pass(accumulator, pass1_result, 0x36)
|
||||
}
|
||||
|
||||
fn pass3(accumulator: &mut [u8; 66], mixin: &Pass3Input) {
|
||||
hash_pass(accumulator, mixin, 0)
|
||||
}
|
||||
|
||||
fn pass4(
|
||||
accumulator: &mut [u8; 66],
|
||||
devices: u32,
|
||||
mut get_device: impl FnMut(u32) -> DeviceHashinfo,
|
||||
) {
|
||||
for dev in 0..devices {
|
||||
hash_pass(accumulator, &(get_device)(dev), 0)
|
||||
}
|
||||
}
|
||||
|
||||
fn hash_pass<T: Sized>(accumulator: &mut [u8; 66], mixin: &T, xor_mask: u8) {
|
||||
for i in 0..std::mem::size_of_val(mixin) {
|
||||
integrity_check_single_pass(
|
||||
accumulator,
|
||||
unsafe { *std::ptr::from_ref(mixin).cast::<u8>().add(i) } ^ xor_mask,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn integrity_check_single_pass(arg1: &mut [u8; 66], arg2: u8) {
|
||||
const MIXING_TABLE: [u8; 256] = [
|
||||
0x29, 0x2E, 0x43, 0xC9, 0xA2, 0xD8, 0x7C, 0x01, 0x3D, 0x36, 0x54, 0xA1, 0xEC, 0xF0, 0x06,
|
||||
0x13, 0x62, 0xA7, 0x05, 0xF3, 0xC0, 0xC7, 0x73, 0x8C, 0x98, 0x93, 0x2B, 0xD9, 0xBC, 0x4C,
|
||||
0x82, 0xCA, 0x1E, 0x9B, 0x57, 0x3C, 0xFD, 0xD4, 0xE0, 0x16, 0x67, 0x42, 0x6F, 0x18, 0x8A,
|
||||
0x17, 0xE5, 0x12, 0xBE, 0x4E, 0xC4, 0xD6, 0xDA, 0x9E, 0xDE, 0x49, 0xA0, 0xFB, 0xF5, 0x8E,
|
||||
0xBB, 0x2F, 0xEE, 0x7A, 0xA9, 0x68, 0x79, 0x91, 0x15, 0xB2, 0x07, 0x3F, 0x94, 0xC2, 0x10,
|
||||
0x89, 0x0B, 0x22, 0x5F, 0x21, 0x80, 0x7F, 0x5D, 0x9A, 0x5A, 0x90, 0x32, 0x27, 0x35, 0x3E,
|
||||
0xCC, 0xE7, 0xBF, 0xF7, 0x97, 0x03, 0xFF, 0x19, 0x30, 0xB3, 0x48, 0xA5, 0xB5, 0xD1, 0xD7,
|
||||
0x5E, 0x92, 0x2A, 0xAC, 0x56, 0xAA, 0xC6, 0x4F, 0xB8, 0x38, 0xD2, 0x96, 0xA4, 0x7D, 0xB6,
|
||||
0x76, 0xFC, 0x6B, 0xE2, 0x9C, 0x74, 0x04, 0xF1, 0x45, 0x9D, 0x70, 0x59, 0x64, 0x71, 0x87,
|
||||
0x20, 0x86, 0x5B, 0xCF, 0x65, 0xE6, 0x2D, 0xA8, 0x02, 0x1B, 0x60, 0x25, 0xAD, 0xAE, 0xB0,
|
||||
0xB9, 0xF6, 0x1C, 0x46, 0x61, 0x69, 0x34, 0x40, 0x7E, 0x0F, 0x55, 0x47, 0xA3, 0x23, 0xDD,
|
||||
0x51, 0xAF, 0x3A, 0xC3, 0x5C, 0xF9, 0xCE, 0xBA, 0xC5, 0xEA, 0x26, 0x2C, 0x53, 0x0D, 0x6E,
|
||||
0x85, 0x28, 0x84, 0x09, 0xD3, 0xDF, 0xCD, 0xF4, 0x41, 0x81, 0x4D, 0x52, 0x6A, 0xDC, 0x37,
|
||||
0xC8, 0x6C, 0xC1, 0xAB, 0xFA, 0x24, 0xE1, 0x7B, 0x08, 0x0C, 0xBD, 0xB1, 0x4A, 0x78, 0x88,
|
||||
0x95, 0x8B, 0xE3, 0x63, 0xE8, 0x6D, 0xE9, 0xCB, 0xD5, 0xFE, 0x3B, 0x00, 0x1D, 0x39, 0xF2,
|
||||
0xEF, 0xB7, 0x0E, 0x66, 0x58, 0xD0, 0xE4, 0xA6, 0x77, 0x72, 0xF8, 0xEB, 0x75, 0x4B, 0x0A,
|
||||
0x31, 0x44, 0x50, 0xB4, 0x8F, 0xED, 0x1F, 0x1A, 0xDB, 0x99, 0x8D, 0x33, 0x9F, 0x11, 0x83,
|
||||
0x14,
|
||||
];
|
||||
let temp1 = arg1[0x40];
|
||||
arg1[temp1 as usize + 0x10] = arg2;
|
||||
let temp2 = temp1 as usize;
|
||||
let temp3 = (temp1 + 1) & 0xf;
|
||||
arg1[temp1 as usize + 0x20] = arg1[temp2] ^ arg2;
|
||||
let temp4 = MIXING_TABLE[(arg2 ^ arg1[0x41]) as usize];
|
||||
let temp1 = arg1[temp2 + 0x30];
|
||||
arg1[temp2 + 0x30] = temp4 ^ temp1;
|
||||
arg1[0x41] = temp4 ^ temp1;
|
||||
arg1[0x40] = temp3;
|
||||
if temp3 != 0 {
|
||||
return;
|
||||
}
|
||||
let mut temp1 = 0x29;
|
||||
let mut temp5 = 0x0;
|
||||
unsafe {
|
||||
loop {
|
||||
temp1 = temp1 ^ arg1[0];
|
||||
arg1[0] = temp1;
|
||||
let mut temp6 = arg1.as_mut_ptr().add(1);
|
||||
loop {
|
||||
let temp7 = temp6.add(1);
|
||||
temp1 = *temp6 ^ MIXING_TABLE[temp1 as usize];
|
||||
*temp6 = temp1;
|
||||
temp6 = temp7;
|
||||
if temp7 == arg1.as_mut_ptr().add(0x30) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
temp1 = temp1.wrapping_add(temp5);
|
||||
temp5 = temp5.wrapping_add(0x01);
|
||||
if temp5 == 0x12 {
|
||||
break;
|
||||
}
|
||||
temp1 = MIXING_TABLE[temp1 as usize];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::mem;
|
||||
|
||||
#[test]
|
||||
fn integrity_check_single_pass() {
|
||||
let mut input = [
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x38, 0xc0, 0x9b, 0xf7, 0xff, 0x7f, 0x00, 0x00, 0xa3, 0x61, 0xe4, 0x42,
|
||||
0xf6, 0x67, 0x94, 0xff, 0x18, 0xc0, 0x9b, 0xf7, 0xff, 0x7f, 0x00, 0x00, 0xa4, 0x57,
|
||||
0x72, 0xf7, 0xff, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
];
|
||||
super::integrity_check_single_pass(&mut input, 34);
|
||||
let expected = [
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x22, 0xc0, 0x9b, 0xf7, 0xff, 0x7f, 0x00, 0x00, 0xa3, 0x61, 0xe4, 0x42,
|
||||
0xf6, 0x67, 0x94, 0xff, 0x22, 0xc0, 0x9b, 0xf7, 0xff, 0x7f, 0x00, 0x00, 0xa4, 0x57,
|
||||
0x72, 0xf7, 0xff, 0x7f, 0x00, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x57,
|
||||
];
|
||||
assert_eq!(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn integrity_check_pass2() {
|
||||
let pass1_result = [
|
||||
0x14u8, 0x6A, 0xDD, 0xAE, 0x53, 0xA9, 0xA7, 0x52, 0xAA, 0x08, 0x41, 0x36, 0x0B, 0xF5,
|
||||
0x5A, 0x9F,
|
||||
];
|
||||
let mut result = [0u8; 66];
|
||||
super::pass2(&mut result, &pass1_result);
|
||||
let expected = [
|
||||
0x8b, 0x21, 0x9a, 0x49, 0xe8, 0x6d, 0x1a, 0xee, 0xf2, 0x37, 0xf9, 0xb5, 0x4a, 0x8c,
|
||||
0x3c, 0x75, 0xc7, 0x1e, 0xee, 0x21, 0xcf, 0x29, 0x8a, 0xe5, 0x13, 0x83, 0xf4, 0xec,
|
||||
0x33, 0x04, 0xe2, 0xfd, 0xb0, 0x2f, 0x09, 0x01, 0x4f, 0xf7, 0x68, 0x6d, 0x69, 0x46,
|
||||
0x43, 0x7e, 0xb6, 0x2b, 0x21, 0xed, 0x57, 0xa1, 0x10, 0x86, 0x0e, 0x60, 0x44, 0x1e,
|
||||
0x70, 0x5f, 0x67, 0xd1, 0xeb, 0x67, 0xa1, 0x3d, 0x00, 0x3d,
|
||||
];
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn integrity_check_pass3() {
|
||||
let mut result = [
|
||||
0x8b, 0x21, 0x9a, 0x49, 0xe8, 0x6d, 0x1a, 0xee, 0xf2, 0x37, 0xf9, 0xb5, 0x4a, 0x8c,
|
||||
0x3c, 0x75, 0xc7, 0x1e, 0xee, 0x21, 0xcf, 0x29, 0x8a, 0xe5, 0x13, 0x83, 0xf4, 0xec,
|
||||
0x33, 0x04, 0xe2, 0xfd, 0xb0, 0x2f, 0x09, 0x01, 0x4f, 0xf7, 0x68, 0x6d, 0x69, 0x46,
|
||||
0x43, 0x7e, 0xb6, 0x2b, 0x21, 0xed, 0x57, 0xa1, 0x10, 0x86, 0x0e, 0x60, 0x44, 0x1e,
|
||||
0x70, 0x5f, 0x67, 0xd1, 0xeb, 0x67, 0xa1, 0x3d, 0x00, 0x3d,
|
||||
];
|
||||
let input = super::Pass3Input {
|
||||
driver_version: 0x2f30,
|
||||
version: 12082,
|
||||
current_process: 0x002fa423,
|
||||
current_thread: 0xf79c1000,
|
||||
cudart_table: 0x00007ffff6958240 as *const _,
|
||||
integrity_check_table: 0x00007ffff6958220 as *const _,
|
||||
fn_address: 0x00007ffff2aaf4a0 as *const _,
|
||||
unix_seconds: 0x682b9cee,
|
||||
};
|
||||
super::pass3(&mut result, &input);
|
||||
let expected = [
|
||||
0x0a, 0xfd, 0xab, 0xc9, 0xff, 0x9b, 0xa0, 0xbe, 0x4d, 0x30, 0x32, 0x82, 0x74, 0x4f,
|
||||
0xa7, 0x48, 0x9d, 0x23, 0x82, 0xa3, 0x87, 0xfa, 0x6c, 0xdb, 0x92, 0x49, 0xd9, 0xb5,
|
||||
0x4b, 0x2b, 0x5e, 0x51, 0x6e, 0xf7, 0xf9, 0x4d, 0x28, 0x8a, 0x64, 0x06, 0x19, 0xb3,
|
||||
0xe6, 0xbe, 0xa4, 0xec, 0x7e, 0x54, 0x64, 0x28, 0xd9, 0xe1, 0xd4, 0x34, 0xc0, 0xa9,
|
||||
0x49, 0x88, 0xc9, 0x61, 0x58, 0xdd, 0x66, 0x74, 0x00, 0x74,
|
||||
];
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn integrity_check_pass4() {
|
||||
let mut result = [
|
||||
0x84, 0xfd, 0x93, 0x10, 0xc6, 0xdb, 0xb3, 0xbc, 0x49, 0xc2, 0x25, 0xe7, 0xda, 0x6e,
|
||||
0x22, 0x6f, 0x9b, 0xbd, 0x81, 0x59, 0xc3, 0x01, 0x9a, 0x7a, 0x26, 0x34, 0x39, 0x0f,
|
||||
0x2a, 0x56, 0x13, 0xb1, 0xf6, 0xbc, 0x7f, 0xa1, 0x8f, 0x04, 0xa5, 0x4d, 0x0d, 0x78,
|
||||
0xab, 0x20, 0xf8, 0x23, 0x20, 0xa5, 0x3f, 0x67, 0x36, 0xe2, 0xde, 0x8a, 0xe5, 0xdf,
|
||||
0xe1, 0xf2, 0x03, 0x94, 0xad, 0xdc, 0x9a, 0xda, 0x00, 0xda,
|
||||
];
|
||||
super::pass4(&mut result, 1, |_| super::DeviceHashinfo {
|
||||
guid: super::CUuuid {
|
||||
bytes: unsafe {
|
||||
std::mem::transmute([0x8a2bfe9au32, 0x382d25ac, 0xc5ae37ea, 0x5f32716d])
|
||||
},
|
||||
},
|
||||
pci_domain: 0,
|
||||
pci_bus: 2,
|
||||
pci_device: 0,
|
||||
});
|
||||
let expected = [
|
||||
0x1f, 0xd8, 0x25, 0xd2, 0xdf, 0xfa, 0x64, 0xc7, 0xb6, 0x1a, 0xaf, 0x22, 0xb8, 0x79,
|
||||
0xfb, 0x96, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x7c, 0x9d, 0x46, 0xd2, 0x1f, 0xd8, 0x25, 0xd2, 0xdd, 0xfa, 0x64, 0xc7, 0xb6, 0x1a,
|
||||
0xaf, 0x22, 0xe6, 0x17, 0xbd, 0x3a, 0xd7, 0xdd, 0x5f, 0x82, 0x8c, 0x87, 0xce, 0x86,
|
||||
0x66, 0xaf, 0xa0, 0x50, 0x7a, 0x7d, 0xbb, 0xbc, 0x0c, 0x50,
|
||||
];
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn integrity_check_pass5() {
|
||||
let mut result = [
|
||||
0x3e, 0x4b, 0xf2, 0x95, 0x71, 0xf5, 0x6b, 0x51, 0x07, 0xbf, 0x4b, 0xf1, 0x04, 0x0e,
|
||||
0x8e, 0x0b, 0x5f, 0x4d, 0x30, 0x0c, 0x0f, 0x0c, 0xae, 0xfb, 0x48, 0xaf, 0x23, 0xb5,
|
||||
0xea, 0x4c, 0xc2, 0xdb, 0xd7, 0xdf, 0x88, 0x74, 0x39, 0x58, 0x16, 0x3a, 0x1f, 0x7c,
|
||||
0x9b, 0x20, 0x7e, 0x7e, 0x94, 0xc8, 0x8b, 0xc6, 0xb2, 0x38, 0x0d, 0x07, 0x7d, 0xbd,
|
||||
0x90, 0xd5, 0x39, 0x63, 0xeb, 0x1d, 0x4f, 0x40, 0x00, 0x40,
|
||||
];
|
||||
let output = super::pass5(&mut result);
|
||||
let expected_result = [
|
||||
0x00, 0x23, 0x53, 0x06, 0x5e, 0x96, 0xf6, 0x9c, 0x61, 0xaa, 0x96, 0x2d, 0x2e, 0xcd,
|
||||
0xa8, 0x58, 0xe9, 0xca, 0xc0, 0x2e, 0x35, 0xed, 0x5f, 0xca, 0xe1, 0x0e, 0xcd, 0x1f,
|
||||
0xd0, 0x8e, 0x8b, 0x9c, 0x29, 0x4d, 0x1c, 0x94, 0x6b, 0xf7, 0x10, 0xb0, 0x07, 0x08,
|
||||
0x91, 0xd6, 0x14, 0x06, 0xc0, 0xec, 0xe1, 0x9c, 0x8e, 0x33, 0xd4, 0xe9, 0x43, 0x5c,
|
||||
0x86, 0x0c, 0x72, 0x4d, 0x27, 0x98, 0x91, 0x7f, 0x00, 0x7f,
|
||||
];
|
||||
assert_eq!(result, expected_result);
|
||||
let output = unsafe { mem::transmute::<_, [u8; 16]>(output) };
|
||||
let expected = [
|
||||
0x00, 0x23, 0x53, 0x06, 0x5e, 0x96, 0xf6, 0x9c, 0x61, 0xaa, 0x96, 0x2d, 0x2e, 0xcd,
|
||||
0xa8, 0x58,
|
||||
];
|
||||
assert_eq!(output, expected);
|
||||
}
|
||||
}
|
|
@ -2,7 +2,7 @@
|
|||
name = "detours-sys"
|
||||
version = "0.1.2"
|
||||
authors = ["Diana <5275194+DianaNites@users.noreply.github.com>"]
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
links = "detours"
|
||||
# Package stuff
|
||||
description = "Rust bindings to Microsoft Detours"
|
||||
|
|
8
format/Cargo.toml
Normal file
8
format/Cargo.toml
Normal file
|
@ -0,0 +1,8 @@
|
|||
[package]
|
||||
name = "format"
|
||||
version = "0.0.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
cuda_types = { path = "../cuda_types" }
|
||||
uuid = "1.16"
|
40
format/src/dark_api.rs
Normal file
40
format/src/dark_api.rs
Normal file
|
@ -0,0 +1,40 @@
|
|||
use crate::CudaDisplay;
|
||||
use cuda_types::dark_api::*;
|
||||
|
||||
impl CudaDisplay for FatbincWrapper {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
writer.write_all(b"{ magic: ")?;
|
||||
CudaDisplay::write(&self.magic, "", 0, writer)?;
|
||||
writer.write_all(b", version: ")?;
|
||||
CudaDisplay::write(&self.version, "", 0, writer)?;
|
||||
writer.write_all(b", data: ")?;
|
||||
CudaDisplay::write(&self.data, "", 0, writer)?;
|
||||
writer.write_all(b", filename_or_fatbins: ")?;
|
||||
CudaDisplay::write(&self.filename_or_fatbins, "", 0, writer)?;
|
||||
writer.write_all(b" }")
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for FatbinHeader {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
writer.write_all(b"{ magic: ")?;
|
||||
CudaDisplay::write(&self.magic, "", 0, writer)?;
|
||||
writer.write_all(b", version: ")?;
|
||||
CudaDisplay::write(&self.version, "", 0, writer)?;
|
||||
writer.write_all(b", header_size: ")?;
|
||||
CudaDisplay::write(&self.header_size, "", 0, writer)?;
|
||||
writer.write_all(b", files_size: ")?;
|
||||
CudaDisplay::write(&self.files_size, "", 0, writer)?;
|
||||
writer.write_all(b" }")
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load diff
30269
format/src/format_generated_blas.rs
Normal file
30269
format/src/format_generated_blas.rs
Normal file
File diff suppressed because it is too large
Load diff
4950
format/src/format_generated_blaslt.rs
Normal file
4950
format/src/format_generated_blaslt.rs
Normal file
File diff suppressed because it is too large
Load diff
9208
format/src/format_generated_blaslt_internal.rs
Normal file
9208
format/src/format_generated_blaslt_internal.rs
Normal file
File diff suppressed because it is too large
Load diff
16578
format/src/format_generated_dnn9.rs
Normal file
16578
format/src/format_generated_dnn9.rs
Normal file
File diff suppressed because it is too large
Load diff
2234
format/src/format_generated_fft.rs
Normal file
2234
format/src/format_generated_fft.rs
Normal file
File diff suppressed because it is too large
Load diff
31122
format/src/format_generated_sparse.rs
Normal file
31122
format/src/format_generated_sparse.rs
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,11 +1,12 @@
|
|||
use cuda_types::cuda::*;
|
||||
use std::{
|
||||
any::TypeId,
|
||||
ffi::{c_void, CStr},
|
||||
fmt::LowerHex,
|
||||
mem, ptr, slice,
|
||||
};
|
||||
|
||||
pub(crate) trait CudaDisplay {
|
||||
pub trait CudaDisplay {
|
||||
fn write(
|
||||
&self,
|
||||
fn_name: &'static str,
|
||||
|
@ -14,6 +15,17 @@ pub(crate) trait CudaDisplay {
|
|||
) -> std::io::Result<()>;
|
||||
}
|
||||
|
||||
impl CudaDisplay for () {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
write!(writer, "()")
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for CUuuid {
|
||||
fn write(
|
||||
&self,
|
||||
|
@ -22,7 +34,9 @@ impl CudaDisplay for CUuuid {
|
|||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
let guid = self.bytes;
|
||||
write!(writer, "{{{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}}}", guid[0], guid[1], guid[2], guid[3], guid[4], guid[5], guid[6], guid[7], guid[8], guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15])
|
||||
let uuid = uuid::Uuid::from_bytes(guid);
|
||||
let braced = uuid.as_braced();
|
||||
write!(writer, "{braced:#X}")
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -37,6 +51,17 @@ impl CudaDisplay for CUdeviceptr_v1 {
|
|||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for bool {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
write!(writer, "{}", *self)
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for u8 {
|
||||
fn write(
|
||||
&self,
|
||||
|
@ -81,6 +106,17 @@ impl CudaDisplay for u32 {
|
|||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for i64 {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
write!(writer, "{}", *self)
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for u64 {
|
||||
fn write(
|
||||
&self,
|
||||
|
@ -125,6 +161,60 @@ impl CudaDisplay for f64 {
|
|||
}
|
||||
}
|
||||
|
||||
// user by Dark API
|
||||
impl CudaDisplay
|
||||
for Option<
|
||||
extern "system" fn(
|
||||
cuda_types::cuda::CUcontext,
|
||||
*mut std::ffi::c_void,
|
||||
*mut std::ffi::c_void,
|
||||
),
|
||||
>
|
||||
{
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
if let Some(fn_ptr) = self {
|
||||
write!(writer, "{:p}", *fn_ptr)
|
||||
} else {
|
||||
writer.write_all(b"NULL")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for Option<unsafe extern "C" fn(*const i8)> {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
if let Some(fn_ptr) = self {
|
||||
write!(writer, "{:p}", *fn_ptr)
|
||||
} else {
|
||||
writer.write_all(b"NULL")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for Option<unsafe extern "C" fn(i32, *const i8, *const i8)> {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
if let Some(fn_ptr) = self {
|
||||
write!(writer, "{:p}", *fn_ptr)
|
||||
} else {
|
||||
writer.write_all(b"NULL")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn write_handle<T: LowerHex>(
|
||||
this: &[T; 64],
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
|
@ -198,11 +288,30 @@ impl CudaDisplay for *const i8 {
|
|||
_index: usize,
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
write!(
|
||||
writer,
|
||||
"\"{}\"",
|
||||
unsafe { CStr::from_ptr(*self as _) }.to_string_lossy()
|
||||
)
|
||||
if self.is_null() {
|
||||
writer.write_all(b"NULL")
|
||||
} else {
|
||||
write!(
|
||||
writer,
|
||||
"\"{}\"",
|
||||
unsafe { CStr::from_ptr(*self as _) }.to_string_lossy()
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for *mut cuda_types::FILE {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
if self.is_null() {
|
||||
writer.write_all(b"NULL")
|
||||
} else {
|
||||
write!(writer, "{:p}", *self)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -409,6 +518,44 @@ impl CudaDisplay for CUDA_RESOURCE_DESC_st {
|
|||
}
|
||||
}
|
||||
|
||||
impl crate::CudaDisplay for cuda_types::cuda::CUlaunchConfig_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
writer.write_all(concat!("{ ", stringify!(gridDimX), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&self.gridDimX, "", 0, writer)?;
|
||||
writer.write_all(concat!(", ", stringify!(gridDimY), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&self.gridDimY, "", 0, writer)?;
|
||||
writer.write_all(concat!(", ", stringify!(gridDimZ), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&self.gridDimZ, "", 0, writer)?;
|
||||
writer.write_all(concat!(", ", stringify!(blockDimX), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&self.blockDimX, "", 0, writer)?;
|
||||
writer.write_all(concat!(", ", stringify!(blockDimY), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&self.blockDimY, "", 0, writer)?;
|
||||
writer.write_all(concat!(", ", stringify!(blockDimZ), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&self.blockDimZ, "", 0, writer)?;
|
||||
writer.write_all(concat!(", ", stringify!(sharedMemBytes), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&self.sharedMemBytes, "", 0, writer)?;
|
||||
writer.write_all(concat!(", ", stringify!(hStream), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&self.hStream, "", 0, writer)?;
|
||||
writer.write_all(concat!(", ", stringify!(numAttrs), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&self.numAttrs, "", 0, writer)?;
|
||||
writer.write_all(concat!(", ", stringify!(attrs), ": ").as_bytes())?;
|
||||
writer.write_all(b"[")?;
|
||||
for i in 0..self.numAttrs {
|
||||
if i != 0 {
|
||||
writer.write_all(b", ")?;
|
||||
}
|
||||
crate::CudaDisplay::write(&unsafe { *self.attrs.add(i as usize) }, "", 0, writer)?;
|
||||
}
|
||||
writer.write_all(b"]")?;
|
||||
writer.write_all(b" }")
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
|
||||
fn write(
|
||||
&self,
|
||||
|
@ -552,17 +699,6 @@ impl CudaDisplay for CUgraphNodeParams_st {
|
|||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for CUlaunchConfig_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
_writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for CUeglFrame_st {
|
||||
fn write(
|
||||
&self,
|
||||
|
@ -584,33 +720,30 @@ impl CudaDisplay for CUdevResource_st {
|
|||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for CUlaunchAttribute_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
_writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
impl<T: CudaDisplay> CudaDisplay for *mut T {
|
||||
fn write(
|
||||
&self,
|
||||
fn_name: &'static str,
|
||||
index: usize,
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
if *self == ptr::null_mut() {
|
||||
writer.write_all(b"NULL")
|
||||
} else {
|
||||
let this: &T = unsafe { &**self };
|
||||
this.write(fn_name, index, writer)
|
||||
}
|
||||
write_launch_attribute(writer, fn_name, index, self.id, self.value)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: CudaDisplay> CudaDisplay for *const T {
|
||||
impl<T: CudaDisplay + 'static> CudaDisplay for *mut T {
|
||||
fn write(
|
||||
&self,
|
||||
fn_name: &'static str,
|
||||
index: usize,
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
CudaDisplay::write(&self.cast_const(), fn_name, index, writer)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: CudaDisplay + 'static> CudaDisplay for *const T {
|
||||
fn write(
|
||||
&self,
|
||||
fn_name: &'static str,
|
||||
|
@ -620,8 +753,17 @@ impl<T: CudaDisplay> CudaDisplay for *const T {
|
|||
if *self == ptr::null() {
|
||||
writer.write_all(b"NULL")
|
||||
} else {
|
||||
let this: &T = unsafe { &**self };
|
||||
this.write(fn_name, index, writer)
|
||||
if fn_name.len() > 2
|
||||
&& fn_name.starts_with("cu")
|
||||
&& fn_name.as_bytes()[2].is_ascii_lowercase()
|
||||
&& (TypeId::of::<T>() == TypeId::of::<f32>()
|
||||
|| TypeId::of::<T>() == TypeId::of::<f64>())
|
||||
{
|
||||
CudaDisplay::write(&self.cast::<c_void>(), fn_name, index, writer)
|
||||
} else {
|
||||
let this: &T = unsafe { &**self };
|
||||
this.write(fn_name, index, writer)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -644,6 +786,24 @@ impl<T: CudaDisplay, const N: usize> CudaDisplay for [T; N] {
|
|||
}
|
||||
}
|
||||
|
||||
impl<T: CudaDisplay> CudaDisplay for [T] {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
writer.write_all(b"[")?;
|
||||
for i in 0..self.len() {
|
||||
CudaDisplay::write(&self[i], "", 0, writer)?;
|
||||
if i != self.len() - 1 {
|
||||
writer.write_all(b", ")?;
|
||||
}
|
||||
}
|
||||
writer.write_all(b"]")
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for CUarrayMapInfo_st {
|
||||
fn write(
|
||||
&self,
|
||||
|
@ -666,6 +826,17 @@ impl CudaDisplay for CUexecAffinityParam_st {
|
|||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for *mut cuda_types::cudnn9::cudnnRuntimeTag_t {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
_writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn write_cuGraphKernelNodeGetAttribute(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
|
@ -677,7 +848,10 @@ pub fn write_cuGraphKernelNodeGetAttribute(
|
|||
CudaDisplay::write(&hNode, "cuGraphKernelNodeGetAttribute", 0, writer)?;
|
||||
writer.write_all(b", attr: ")?;
|
||||
CudaDisplay::write(&attr, "cuGraphKernelNodeGetAttribute", 1, writer)?;
|
||||
write_launch_attribute(writer, "cuGraphKernelNodeGetAttribute", 2, attr, value_out)?;
|
||||
writer.write_all(b", value_out: ")?;
|
||||
write_launch_attribute(writer, "cuGraphKernelNodeGetAttribute", 2, attr, unsafe {
|
||||
*value_out
|
||||
})?;
|
||||
writer.write_all(b") ")
|
||||
}
|
||||
|
||||
|
@ -702,7 +876,10 @@ pub fn write_cuStreamGetAttribute(
|
|||
CudaDisplay::write(&hStream, "cuStreamGetAttribute", 0, writer)?;
|
||||
writer.write_all(b", attr: ")?;
|
||||
CudaDisplay::write(&attr, "cuStreamGetAttribute", 1, writer)?;
|
||||
write_launch_attribute(writer, "cuStreamGetAttribute", 2, attr, value_out)?;
|
||||
writer.write_all(b", value_out: ")?;
|
||||
write_launch_attribute(writer, "cuStreamGetAttribute", 2, attr, unsafe {
|
||||
*value_out
|
||||
})?;
|
||||
writer.write_all(b") ")
|
||||
}
|
||||
|
||||
|
@ -711,98 +888,78 @@ fn write_launch_attribute(
|
|||
fn_name: &'static str,
|
||||
index: usize,
|
||||
attribute: CUlaunchAttributeID,
|
||||
value_out: *mut CUstreamAttrValue,
|
||||
value: CUlaunchAttributeValue,
|
||||
) -> std::io::Result<()> {
|
||||
match attribute {
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(
|
||||
unsafe { &(*value_out).accessPolicyWindow },
|
||||
fn_name,
|
||||
index,
|
||||
writer,
|
||||
)
|
||||
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW = ")?;
|
||||
CudaDisplay::write(unsafe { &value.accessPolicyWindow }, fn_name, index, writer)
|
||||
}
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_COOPERATIVE => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).cooperative }, fn_name, index, writer)
|
||||
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_COOPERATIVE = ")?;
|
||||
CudaDisplay::write(unsafe { &value.cooperative }, fn_name, index, writer)
|
||||
}
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).syncPolicy }, fn_name, index, writer)
|
||||
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY = ")?;
|
||||
CudaDisplay::write(unsafe { &value.syncPolicy }, fn_name, index, writer)
|
||||
}
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).clusterDim }, fn_name, index, writer)
|
||||
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION = ")?;
|
||||
CudaDisplay::write(unsafe { &value.clusterDim }, fn_name, index, writer)
|
||||
}
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE = ")?;
|
||||
CudaDisplay::write(
|
||||
unsafe { &(*value_out).clusterSchedulingPolicyPreference },
|
||||
unsafe { &value.clusterSchedulingPolicyPreference },
|
||||
fn_name,
|
||||
index,
|
||||
writer,
|
||||
)
|
||||
}
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION = ")?;
|
||||
CudaDisplay::write(
|
||||
unsafe { &(*value_out).programmaticStreamSerializationAllowed },
|
||||
unsafe { &value.programmaticStreamSerializationAllowed },
|
||||
fn_name,
|
||||
index,
|
||||
writer,
|
||||
)
|
||||
}
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(
|
||||
unsafe { &(*value_out).programmaticEvent },
|
||||
fn_name,
|
||||
index,
|
||||
writer,
|
||||
)
|
||||
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT = ")?;
|
||||
CudaDisplay::write(unsafe { &value.programmaticEvent }, fn_name, index, writer)
|
||||
}
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PRIORITY => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).priority }, fn_name, index, writer)
|
||||
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_PRIORITY = ")?;
|
||||
CudaDisplay::write(unsafe { &value.priority }, fn_name, index, writer)
|
||||
}
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(
|
||||
unsafe { &(*value_out).memSyncDomainMap },
|
||||
fn_name,
|
||||
index,
|
||||
writer,
|
||||
)
|
||||
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP = ")?;
|
||||
CudaDisplay::write(unsafe { &value.memSyncDomainMap }, fn_name, index, writer)
|
||||
}
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(
|
||||
unsafe { &(*value_out).memSyncDomain },
|
||||
fn_name,
|
||||
index,
|
||||
writer,
|
||||
)
|
||||
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN = ")?;
|
||||
CudaDisplay::write(unsafe { &value.memSyncDomain }, fn_name, index, writer)
|
||||
}
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT = ")?;
|
||||
CudaDisplay::write(
|
||||
unsafe { &(*value_out).launchCompletionEvent },
|
||||
unsafe { &value.launchCompletionEvent },
|
||||
fn_name,
|
||||
index,
|
||||
writer,
|
||||
)
|
||||
}
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
writer.write_all(b"CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = ")?;
|
||||
CudaDisplay::write(
|
||||
unsafe { &(*value_out).deviceUpdatableKernelNode },
|
||||
unsafe { &value.deviceUpdatableKernelNode },
|
||||
fn_name,
|
||||
index,
|
||||
writer,
|
||||
)
|
||||
}
|
||||
_ => writer.write_all(b", ... "),
|
||||
_ => writer.write_all(b""),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -858,6 +1015,302 @@ pub fn write_cuGLGetDevices_v2(
|
|||
todo!()
|
||||
}
|
||||
|
||||
#[path = "format_generated.rs"]
|
||||
#[allow(non_snake_case)]
|
||||
pub fn write_cudnnBackendGetAttribute(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
descriptor: cuda_types::cudnn9::cudnnBackendDescriptor_t,
|
||||
attributeName: cuda_types::cudnn9::cudnnBackendAttributeName_t,
|
||||
attributeType: cuda_types::cudnn9::cudnnBackendAttributeType_t,
|
||||
requestedElementCount: i64,
|
||||
elementCount: *mut i64,
|
||||
arrayOfElements: *mut ::core::ffi::c_void,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(descriptor), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&descriptor, "cudnnBackendGetAttribute", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(attributeName), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&attributeName, "cudnnBackendGetAttribute", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(attributeType), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&attributeType, "cudnnBackendGetAttribute", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(requestedElementCount), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(
|
||||
&requestedElementCount,
|
||||
"cudnnBackendGetAttribute",
|
||||
arg_idx,
|
||||
writer,
|
||||
)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(elementCount), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&elementCount, "cudnnBackendGetAttribute", arg_idx, writer)?;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(arrayOfElements), ": ").as_bytes())?;
|
||||
cudnn9_print_elements(
|
||||
writer,
|
||||
attributeType,
|
||||
unsafe { elementCount.as_ref() }
|
||||
.copied()
|
||||
.unwrap_or(requestedElementCount),
|
||||
arrayOfElements,
|
||||
)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn write_cudnnBackendSetAttribute(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
descriptor: cuda_types::cudnn9::cudnnBackendDescriptor_t,
|
||||
attributeName: cuda_types::cudnn9::cudnnBackendAttributeName_t,
|
||||
attributeType: cuda_types::cudnn9::cudnnBackendAttributeType_t,
|
||||
elementCount: i64,
|
||||
arrayOfElements: *const ::core::ffi::c_void,
|
||||
) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(concat!(stringify!(descriptor), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&descriptor, "cudnnBackendSetAttribute", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(attributeName), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&attributeName, "cudnnBackendSetAttribute", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(attributeType), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&attributeType, "cudnnBackendSetAttribute", arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(elementCount), ": ").as_bytes())?;
|
||||
crate::CudaDisplay::write(&elementCount, "cudnnBackendSetAttribute", arg_idx, writer)?;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(arrayOfElements), ": ").as_bytes())?;
|
||||
cudnn9_print_elements(writer, attributeType, elementCount, arrayOfElements)?;
|
||||
writer.write_all(b")")
|
||||
}
|
||||
|
||||
fn cudnn9_print_elements(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
type_: cuda_types::cudnn9::cudnnBackendAttributeType_t,
|
||||
element_count: i64,
|
||||
array_of_elements: *const ::core::ffi::c_void,
|
||||
) -> std::io::Result<()> {
|
||||
fn print_typed<T: CudaDisplay>(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
element_count: i64,
|
||||
array_of_elements: *const ::core::ffi::c_void,
|
||||
) -> std::io::Result<()> {
|
||||
if array_of_elements.is_null() {
|
||||
return writer.write_all(b"NULL");
|
||||
}
|
||||
let elements =
|
||||
unsafe { slice::from_raw_parts(array_of_elements as *const T, element_count as usize) };
|
||||
CudaDisplay::write(elements, "", 0, writer)
|
||||
}
|
||||
match type_ {
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_HANDLE => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnHandle_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_DATA_TYPE => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnDataType_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_BOOLEAN => {
|
||||
print_typed::<bool>(writer, element_count, array_of_elements)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64 => {
|
||||
print_typed::<i64>(writer, element_count, array_of_elements)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_FLOAT => {
|
||||
print_typed::<f32>(writer, element_count, array_of_elements)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE => {
|
||||
print_typed::<f64>(writer, element_count, array_of_elements)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_VOID_PTR => {
|
||||
print_typed::<*const c_void>(writer, element_count, array_of_elements)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_CONVOLUTION_MODE => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnConvolutionMode_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_HEUR_MODE => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnBackendHeurMode_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_KNOB_TYPE => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnBackendKnobType_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_NAN_PROPOGATION => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnNanPropagation_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_NUMERICAL_NOTE => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnBackendNumericalNote_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_LAYOUT_TYPE => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnBackendLayoutType_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_ATTRIB_NAME => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnBackendAttributeName_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_POINTWISE_MODE => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnPointwiseMode_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnBackendDescriptor_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_GENSTATS_MODE => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnGenStatsMode_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_BN_FINALIZE_STATS_MODE => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnBnFinalizeStatsMode_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_REDUCTION_OPERATOR_TYPE => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnReduceTensorOp_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_BEHAVIOR_NOTE => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnBackendBehaviorNote_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_TENSOR_REORDERING_MODE => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnBackendTensorReordering_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_RESAMPLE_MODE => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnResampleMode_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_PADDING_MODE => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnPaddingMode_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_INT32 => {
|
||||
print_typed::<i32>(writer, element_count, array_of_elements)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_CHAR => {
|
||||
CudaDisplay::write(&array_of_elements.cast::<i8>(), "", 0, writer)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_SIGNAL_MODE => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnSignalMode_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_FRACTION => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnFraction_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_NORM_MODE => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnBackendNormMode_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_NORM_FWD_PHASE => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnBackendNormFwdPhase_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
cuda_types::cudnn9::cudnnBackendAttributeType_t::CUDNN_TYPE_RNG_DISTRIBUTION => {
|
||||
print_typed::<cuda_types::cudnn9::cudnnRngDistribution_t>(
|
||||
writer,
|
||||
element_count,
|
||||
array_of_elements,
|
||||
)
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
mod dark_api;
|
||||
mod format_generated;
|
||||
pub(crate) use format_generated::*;
|
||||
pub use format_generated::*;
|
||||
mod format_generated_blas;
|
||||
pub use format_generated_blas::*;
|
||||
mod format_generated_blaslt;
|
||||
pub use format_generated_blaslt::*;
|
||||
mod format_generated_blaslt_internal;
|
||||
pub use format_generated_blaslt_internal::*;
|
||||
mod format_generated_dnn9;
|
||||
pub use format_generated_dnn9::*;
|
||||
mod format_generated_fft;
|
||||
pub use format_generated_fft::*;
|
||||
mod format_generated_sparse;
|
||||
pub use format_generated_sparse::*;
|
|
@ -427,7 +427,7 @@ fn directive<'a, 'input>(
|
|||
| Token::DotFile | Token::DotSection => true,
|
||||
_ => false,
|
||||
}),
|
||||
PtxError::UnrecognizedDirective,
|
||||
|text| PtxError::UnrecognizedDirective(text.unwrap_or("")),
|
||||
)
|
||||
.map(Option::flatten)
|
||||
.parse_next(stream)
|
||||
|
@ -675,7 +675,7 @@ fn statement<'a, 'input>(
|
|||
_ => false,
|
||||
},
|
||||
),
|
||||
PtxError::UnrecognizedStatement,
|
||||
|text| PtxError::UnrecognizedStatement(text.unwrap_or("")),
|
||||
)
|
||||
.map(Option::flatten)
|
||||
.parse_next(stream)
|
||||
|
@ -1285,10 +1285,10 @@ pub enum PtxError<'input> {
|
|||
ArrayInitalizer,
|
||||
#[error("")]
|
||||
NonExternPointer,
|
||||
#[error("{0:?}")]
|
||||
UnrecognizedStatement(Option<&'input str>),
|
||||
#[error("{0:?}")]
|
||||
UnrecognizedDirective(Option<&'input str>),
|
||||
#[error("Unrecognized statement {0:?}")]
|
||||
UnrecognizedStatement(&'input str),
|
||||
#[error("Unrecognized directive {0:?}")]
|
||||
UnrecognizedDirective(&'input str),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -3492,11 +3492,11 @@ mod tests {
|
|||
assert_eq!(errors.len(), 2);
|
||||
assert!(matches!(
|
||||
errors[0],
|
||||
PtxError::UnrecognizedStatement(Some("unknown_op1.asdf foobar;"))
|
||||
PtxError::UnrecognizedStatement("unknown_op1.asdf foobar;")
|
||||
));
|
||||
assert!(matches!(
|
||||
errors[1],
|
||||
PtxError::UnrecognizedStatement(Some("unknown_op2 temp2, temp;"))
|
||||
PtxError::UnrecognizedStatement("unknown_op2 temp2, temp;")
|
||||
));
|
||||
}
|
||||
|
||||
|
@ -3533,11 +3533,11 @@ mod tests {
|
|||
assert_eq!(errors.len(), 2);
|
||||
assert!(matches!(
|
||||
errors[0],
|
||||
PtxError::UnrecognizedDirective(Some(".broken_directive_fail; 34; {"))
|
||||
PtxError::UnrecognizedDirective(".broken_directive_fail; 34; {")
|
||||
));
|
||||
assert!(matches!(
|
||||
errors[1],
|
||||
PtxError::UnrecognizedDirective(Some("section foobar }"))
|
||||
PtxError::UnrecognizedDirective("section foobar }")
|
||||
));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,7 +51,7 @@ impl Project {
|
|||
fn try_new(p: Package) -> Option<Project> {
|
||||
let name = p.name;
|
||||
let clib_name = p.targets.into_iter().find_map(|target| {
|
||||
if target.is_cdylib() {
|
||||
if target.is_cdylib() || target.is_dylib() {
|
||||
Some(target.name)
|
||||
} else {
|
||||
None
|
||||
|
|
|
@ -157,32 +157,7 @@ fn cudart_interface_fn1_impl(pctx: *mut CUcontext, dev: c_int) -> hipError_t {
|
|||
hipError_t::hipSuccess
|
||||
}
|
||||
|
||||
/*
|
||||
fat_cubin:
|
||||
typedef struct {
|
||||
int magic;
|
||||
int version;
|
||||
const unsigned long long* data;
|
||||
void *filename_or_fatbins; /* version 1: offline filename,
|
||||
* version 2: array of prelinked fatbins */
|
||||
} __fatBinC_Wrapper_t;
|
||||
|
||||
data start with this header:
|
||||
#define FATBIN_MAGIC 0xBA55ED50U
|
||||
#define OLD_STYLE_FATBIN_MAGIC 0x1EE55A01U
|
||||
#define FATBIN_VERSION 0x0001U
|
||||
|
||||
struct fatbinary_ALIGN_(8) fatBinaryHeader
|
||||
{
|
||||
unsigned int magic; // FATBIN_MAGIC
|
||||
unsigned short version; // FATBIN_VERSION
|
||||
unsigned short headerSize;
|
||||
unsigned long long int fatSize; // size of the entire fat binary excluding this header
|
||||
};
|
||||
|
||||
there's binary data after header
|
||||
|
||||
*/
|
||||
|
||||
const FATBINC_MAGIC: c_uint = 0x466243B1;
|
||||
const FATBINC_VERSION: c_uint = 0x1;
|
||||
|
|
|
@ -10,3 +10,5 @@ proc-macro2 = "1.0.89"
|
|||
quote = "1.0"
|
||||
prettyplease = "0.2.25"
|
||||
rustc-hash = "1.1.0"
|
||||
libloading = "0.8"
|
||||
cuda_types = { path = "../cuda_types" }
|
||||
|
|
142
zluda_bindgen/build/cublasLt_internal.h
Normal file
142
zluda_bindgen/build/cublasLt_internal.h
Normal file
|
@ -0,0 +1,142 @@
|
|||
// GENERATED AUTOMATICALLY BY /local/andrzej/dev/zluda/zluda_blaslt/build/decompile_internal.py. DO NOT EDIT MANUALLY
|
||||
extern "C" {
|
||||
#define undefined void
|
||||
#define undefined1 unsigned char
|
||||
#define undefined4 unsigned int
|
||||
#define uint unsigned int
|
||||
#define undefined8 unsigned long long
|
||||
#define ulong unsigned long long
|
||||
undefined4 cublasLtShutdownCtx(long param_1);
|
||||
int cublasLtCtxInit(long param_1);
|
||||
undefined8 cublasLtHeuristicLutSerializeEntry(undefined8 param_1,undefined8 *param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 *param_12,undefined8 *param_13,ulong param_14,undefined8 *param_15);
|
||||
undefined8 cublasLtLegacyGemmACC(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
|
||||
undefined8 cublasLtLegacyGemmBII(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
|
||||
undefined8 cublasLtLegacyGemmBSS(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
|
||||
undefined8 cublasLtLegacyGemmCCC(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
|
||||
void cublasLtLegacyGemmUtilizationCCC(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined4 param_10,undefined4 param_11,undefined8 param_12,undefined8 param_13);
|
||||
undefined8 cublasLtLegacyGemmDDD(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
|
||||
void cublasLtLegacyGemmUtilizationDDD(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined4 param_10,undefined4 param_11,undefined8 param_12,undefined8 param_13);
|
||||
undefined8 cublasLtLegacyGemmHHH(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
|
||||
undefined8 cublasLtLegacyGemmHSS(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
|
||||
undefined8 cublasLtLegacyGemmHSH(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
|
||||
undefined8 cublasLtLegacyGemmSSS(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
|
||||
undefined8 cublasLtLegacyGemmTSS(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
|
||||
undefined8 cublasLtLegacyGemmTST(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
|
||||
undefined8 cublasLtLegacyGemmZZZ(undefined8 param_1,undefined8 param_2,int *param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined4 param_13,undefined4 param_14,undefined8 param_15,undefined4 param_16,undefined4 param_17,undefined8 param_18,undefined4 param_19,undefined4 param_20,undefined4 param_21,undefined4 param_22,undefined4 param_23,undefined1 param_24,undefined8 param_25,undefined8 param_26,undefined8 param_27);
|
||||
void cublasLtLegacyGemmUtilizationZZZ(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined4 param_9,undefined4 param_10,undefined4 param_11,undefined8 param_12,undefined8 param_13);
|
||||
undefined4 cublasLtAlgoCharacteristicGetAttribute(undefined8 param_1,long param_2,int param_3,int param_4,int param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8);
|
||||
undefined8 cublasLtMatmulAlgoConfigGetAttributeRange(void);
|
||||
undefined8 cublasLtHHHMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
|
||||
undefined4 cublasLtHHHMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
|
||||
undefined4 cublasLtHHHMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
|
||||
undefined4 cublasLtHHHMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
|
||||
undefined4 cublasLtHHHMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
|
||||
undefined4 cublasLtHHHMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
|
||||
undefined8 cublasLtHSHMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
|
||||
undefined4 cublasLtHSHMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
|
||||
undefined4 cublasLtHSHMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
|
||||
undefined4 cublasLtHSHMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
|
||||
undefined4 cublasLtHSHMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
|
||||
undefined4 cublasLtHSHMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
|
||||
undefined8 cublasLtSSSMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
|
||||
undefined4 cublasLtSSSMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
|
||||
undefined4 cublasLtSSSMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
|
||||
undefined4 cublasLtSSSMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
|
||||
undefined4 cublasLtSSSMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
|
||||
undefined4 cublasLtSSSMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
|
||||
undefined8 cublasLtDDDMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
|
||||
undefined4 cublasLtDDDMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
|
||||
undefined4 cublasLtDDDMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
|
||||
undefined4 cublasLtDDDMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
|
||||
undefined4 cublasLtDDDMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
|
||||
undefined4 cublasLtDDDMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
|
||||
undefined8 cublasLtBSSMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
|
||||
undefined4 cublasLtBSSMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
|
||||
undefined4 cublasLtBSSMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
|
||||
undefined4 cublasLtBSSMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
|
||||
undefined4 cublasLtBSSMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
|
||||
undefined4 cublasLtBSSMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
|
||||
undefined8 cublasLtCCCMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
|
||||
undefined4 cublasLtCCCMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
|
||||
undefined4 cublasLtCCCMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
|
||||
undefined4 cublasLtCCCMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
|
||||
undefined4 cublasLtCCCMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
|
||||
undefined4 cublasLtCCCMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
|
||||
undefined8 cublasLtZZZMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
|
||||
undefined4 cublasLtZZZMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
|
||||
undefined4 cublasLtZZZMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
|
||||
undefined4 cublasLtZZZMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
|
||||
undefined4 cublasLtZZZMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
|
||||
undefined4 cublasLtZZZMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
|
||||
undefined8 cublasLtACCMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
|
||||
undefined4 cublasLtACCMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
|
||||
undefined4 cublasLtACCMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
|
||||
undefined4 cublasLtACCMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
|
||||
undefined4 cublasLtACCMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
|
||||
undefined4 cublasLtACCMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
|
||||
undefined8 cublasLtBIIMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
|
||||
undefined4 cublasLtBIIMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
|
||||
undefined4 cublasLtBIIMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
|
||||
undefined4 cublasLtBIIMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
|
||||
undefined4 cublasLtBIIMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
|
||||
undefined4 cublasLtBIIMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
|
||||
undefined8 cublasLtBSBMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
|
||||
undefined4 cublasLtBSBMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
|
||||
undefined4 cublasLtBSBMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
|
||||
undefined4 cublasLtBSBMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
|
||||
undefined4 cublasLtBSBMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
|
||||
undefined4 cublasLtBSBMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
|
||||
undefined8 cublasLtHSSMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
|
||||
undefined4 cublasLtHSSMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
|
||||
undefined4 cublasLtHSSMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
|
||||
undefined4 cublasLtHSSMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
|
||||
undefined4 cublasLtHSSMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
|
||||
undefined4 cublasLtHSSMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
|
||||
undefined8 cublasLtKCCMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
|
||||
undefined4 cublasLtKCCMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
|
||||
undefined4 cublasLtKCCMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
|
||||
undefined4 cublasLtKCCMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
|
||||
undefined4 cublasLtKCCMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
|
||||
undefined4 cublasLtKCCMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
|
||||
undefined8 cublasLtKCKMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
|
||||
undefined4 cublasLtKCKMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
|
||||
undefined4 cublasLtKCKMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
|
||||
undefined4 cublasLtKCKMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
|
||||
undefined4 cublasLtKCKMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
|
||||
undefined4 cublasLtKCKMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
|
||||
undefined8 cublasLtTSSMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
|
||||
undefined4 cublasLtTSSMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
|
||||
undefined4 cublasLtTSSMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
|
||||
undefined4 cublasLtTSSMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
|
||||
undefined4 cublasLtTSSMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
|
||||
undefined4 cublasLtTSSMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
|
||||
undefined8 cublasLtTSTMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
|
||||
undefined4 cublasLtTSTMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
|
||||
undefined4 cublasLtTSTMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
|
||||
undefined4 cublasLtTSTMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
|
||||
undefined4 cublasLtTSTMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
|
||||
undefined4 cublasLtTSTMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
|
||||
undefined8 cublasLtVCCMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
|
||||
undefined4 cublasLtVCCMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
|
||||
undefined4 cublasLtVCCMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
|
||||
undefined4 cublasLtVCCMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
|
||||
undefined4 cublasLtVCCMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
|
||||
undefined4 cublasLtVCCMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
|
||||
undefined8 cublasLtVCVMatmulAlgoGetIds(undefined8 param_1,undefined4 param_2,undefined4 param_3,int param_4,int param_5,uint param_6,uint param_7,int param_8,long param_9,int *param_10);
|
||||
undefined4 cublasLtVCVMatmulAlgoInit(undefined8 param_1,undefined4 param_2,undefined4 param_3,undefined4 param_4,undefined4 param_5,undefined4 param_6,undefined4 param_7,undefined4 param_8,undefined8 param_9);
|
||||
undefined4 cublasLtVCVMatmulAlgoCapGetAttribute(undefined8 param_1,undefined4 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5);
|
||||
undefined4 cublasLtVCVMatmul(undefined8 param_1,undefined8 param_2,undefined8 param_3,undefined8 param_4,undefined8 param_5,undefined8 param_6,undefined8 param_7,undefined8 param_8,undefined8 param_9,undefined8 param_10,undefined8 param_11,undefined8 param_12,undefined8 param_13,undefined8 param_14,undefined8 param_15,undefined8 param_16);
|
||||
undefined4 cublasLtVCVMatmulAlgoCheck(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 param_7,undefined8 param_8);
|
||||
undefined4 cublasLtVCVMatmulAlgoGetHeuristic(undefined8 param_1,long param_2,long param_3,long param_4,long param_5,long param_6,undefined8 *param_7,undefined4 param_8,undefined8 param_9,undefined8 param_10);
|
||||
undefined * cublasLt_for_cublas_BII(void);
|
||||
undefined * cublasLt_for_cublas_BSS(void);
|
||||
undefined * cublasLt_for_cublas_CCC(void);
|
||||
undefined * cublasLt_for_cublas_DDD(void);
|
||||
undefined * cublasLt_for_cublas_HHH(void);
|
||||
undefined * cublasLt_for_cublas_HSH(void);
|
||||
undefined * cublasLt_for_cublas_HSS(void);
|
||||
undefined * cublasLt_for_cublas_SSS(void);
|
||||
undefined * cublasLt_for_cublas_TSS(void);
|
||||
undefined * cublasLt_for_cublas_TST(void);
|
||||
undefined * cublasLt_for_cublas_ZZZ(void);
|
||||
}
|
40
zluda_bindgen/build/decompile_cublaslt_internal.py
Normal file
40
zluda_bindgen/build/decompile_cublaslt_internal.py
Normal file
|
@ -0,0 +1,40 @@
|
|||
# Modified from here: https://github.com/galoget/ghidra-headless-scripts/
|
||||
# Usage: analyzeHeadless <PROJECT_PATH> cublaslt -import /usr/local/cuda/lib64/libcublasLt.so -scriptPath . -postScript decompile_cublaslt_internal.py
|
||||
|
||||
from ghidra.app.decompiler import DecompInterface
|
||||
from ghidra.util.task import ConsoleTaskMonitor
|
||||
|
||||
EXTERNAL_HEADER = "/usr/local/cuda/include/cublasLt.h"
|
||||
|
||||
with open(EXTERNAL_HEADER, 'r'):
|
||||
header_content = open(EXTERNAL_HEADER, 'r').read()
|
||||
decompinterface = DecompInterface()
|
||||
decompinterface.openProgram(currentProgram)
|
||||
functions = currentProgram.getFunctionManager().getFunctions(True)
|
||||
blaslt_functions = []
|
||||
monitor = ConsoleTaskMonitor()
|
||||
with open("cublasLt_internal.h", "w") as output_file:
|
||||
output_file.write("// GENERATED AUTOMATICALLY BY decompile_cublaslt_internal.py. DO NOT EDIT MANUALLY\n")
|
||||
output_file.write("extern \"C\" {\n")
|
||||
output_file.write(" #define undefined void")
|
||||
output_file.write(" #define undefined1 unsigned char")
|
||||
output_file.write(" #define undefined4 unsigned int")
|
||||
output_file.write(" #define uint unsigned int")
|
||||
output_file.write(" #define undefined8 unsigned long long")
|
||||
output_file.write(" #define ulong unsigned long long")
|
||||
for function in functions:
|
||||
function_name = function.getName()
|
||||
if not function_name.startswith("cublasLt"):
|
||||
continue
|
||||
if function_name.format("{}(") in header_content:
|
||||
continue
|
||||
decompile_results = decompinterface.decompileFunction(function, 0, monitor)
|
||||
signature = decompile_results.getDecompiledFunction().getSignature()
|
||||
# Ghidra disssasembles cublasLtShutdownCtx to return void, but
|
||||
# looking at the assembly I'm convinced it returns a value
|
||||
# On the other hand, cublasLtLegacyGemmUtilization* seem to return void
|
||||
# TODO: fail if there is a new void-return function
|
||||
if function_name == "cublasLtShutdownCtx":
|
||||
signature = signature.replace("void", "undefined4")
|
||||
output_file.write(" CUBLASWINAPI {}\n".format(signature))
|
||||
output_file.write("}\n")
|
|
@ -2,7 +2,8 @@ use proc_macro2::Span;
|
|||
use quote::{format_ident, quote, ToTokens};
|
||||
use rustc_hash::{FxHashMap, FxHashSet};
|
||||
use std::{
|
||||
borrow::Cow, collections::hash_map, fs::File, io::Write, iter, path::PathBuf, str::FromStr,
|
||||
borrow::Cow, cmp, collections::hash_map, ffi::CString, fs::File, io::Write, iter, mem,
|
||||
path::PathBuf, ptr, str::FromStr,
|
||||
};
|
||||
use syn::{
|
||||
parse_quote, punctuated::Punctuated, visit_mut::VisitMut, Abi, Fields, FieldsUnnamed, FnArg,
|
||||
|
@ -10,19 +11,133 @@ use syn::{
|
|||
PathArguments, PathSegment, Signature, Type, TypePath, UseTree,
|
||||
};
|
||||
|
||||
// Source: https://developer.nvidia.com/cuda-toolkit-archive
|
||||
static KNOWN_CUDA_VERSIONS: &[&'static str] = &[
|
||||
"12.8.1", "12.8.0", "12.6.3", "12.6.2", "12.6.1", "12.6.0", "12.5.1", "12.5.0", "12.4.1",
|
||||
"12.4.0", "12.3.2", "12.3.1", "12.3.0", "12.2.2", "12.2.1", "12.2.0", "12.1.1", "12.1.0",
|
||||
"12.0.1", "12.0.0", "11.8.0", "11.7.1", "11.7.0", "11.6.2", "11.6.1", "11.6.0", "11.5.2",
|
||||
"11.5.1", "11.5.0", "11.4.4", "11.4.3", "11.4.2", "11.4.1", "11.4.0", "11.3.1", "11.3.0",
|
||||
"11.2.2", "11.2.1", "11.2.0", "11.1.1", "11.1.0", "11.0.3", "11.0.2", "11.0.1", "11.0.0",
|
||||
"10.2", "10.1", "10.0", "9.2", "9.1", "9.0", "8.0", "7.5", "7.0", "6.5", "6.0", "5.5", "5.0",
|
||||
"4.2", "4.1", "4.0", "3.2", "3.1", "3.0", "2.3", "2.2", "2.1", "2.0", "1.1", "1.0",
|
||||
];
|
||||
|
||||
fn main() {
|
||||
let crate_root = PathBuf::from_str(env!("CARGO_MANIFEST_DIR")).unwrap();
|
||||
generate_hip_runtime(
|
||||
&crate_root,
|
||||
&["..", "ext", "hip_runtime-sys", "src", "lib.rs"],
|
||||
);
|
||||
generate_cuda(&crate_root);
|
||||
let cuda_functions = generate_cuda(&crate_root);
|
||||
generate_process_address_table(&crate_root, cuda_functions);
|
||||
generate_ml(&crate_root);
|
||||
generate_cublas(&crate_root);
|
||||
generate_cublaslt(&crate_root);
|
||||
generate_cudnn(&crate_root);
|
||||
generate_cufft(&crate_root);
|
||||
generate_cusparse(&crate_root);
|
||||
generate_cudnn(&crate_root);
|
||||
}
|
||||
|
||||
fn generate_process_address_table(crate_root: &PathBuf, mut cuda_fns: Vec<Ident>) {
|
||||
cuda_fns.sort_unstable();
|
||||
let mut versions = KNOWN_CUDA_VERSIONS
|
||||
.iter()
|
||||
.copied()
|
||||
.map(cuda_numeric_version)
|
||||
.collect::<Vec<_>>();
|
||||
versions.sort_unstable();
|
||||
let library =
|
||||
unsafe { libloading::Library::new("/usr/lib/x86_64-linux-gnu/libcuda.so.1") }.unwrap();
|
||||
let cu_get_proc_address = unsafe {
|
||||
library.get::<unsafe extern "system" fn(
|
||||
symbol: *const ::core::ffi::c_char,
|
||||
pfn: *mut *mut ::core::ffi::c_void,
|
||||
cudaVersion: ::core::ffi::c_int,
|
||||
flags: cuda_types::cuda::cuuint64_t,
|
||||
symbolStatus: *mut cuda_types::cuda::CUdriverProcAddressQueryResult,
|
||||
) -> cuda_types::cuda::CUresult>(b"cuGetProcAddress_v2\0")
|
||||
}
|
||||
.unwrap();
|
||||
let mut result = Vec::new();
|
||||
for fn_ in cuda_fns {
|
||||
let mut known_variants = FxHashMap::default();
|
||||
for version in std::iter::successors(Some(1), |x| Some(x + 1)) {
|
||||
let map_len = known_variants.len();
|
||||
for thread_suffix in ["", "_ptds", "_ptsz"] {
|
||||
let version = if version == 1 {
|
||||
"".to_string()
|
||||
} else {
|
||||
format!("_v{}", version)
|
||||
};
|
||||
let fn_ = format!("{}{}{}", fn_, version, thread_suffix);
|
||||
match unsafe { library.get::<*mut std::ffi::c_void>(fn_.as_bytes()) } {
|
||||
Ok(symbol) => {
|
||||
known_variants.insert(unsafe { symbol.into_raw() }.as_raw_ptr(), fn_);
|
||||
}
|
||||
Err(_) => {}
|
||||
}
|
||||
}
|
||||
if known_variants.len() == map_len {
|
||||
break;
|
||||
}
|
||||
}
|
||||
let fn_ = fn_.to_string();
|
||||
let symbol = CString::new(fn_.clone()).unwrap();
|
||||
for flag in [
|
||||
cuda_types::cuda::CUdriverProcAddress_flags::CU_GET_PROC_ADDRESS_DEFAULT,
|
||||
cuda_types::cuda::CUdriverProcAddress_flags::CU_GET_PROC_ADDRESS_LEGACY_STREAM,
|
||||
cuda_types::cuda::CUdriverProcAddress_flags::CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM,
|
||||
] {
|
||||
let mut breakpoints = Vec::new();
|
||||
let mut last_result = None;
|
||||
for version in versions.iter().copied() {
|
||||
let mut result = ptr::null_mut();
|
||||
let mut status = unsafe { mem::zeroed() };
|
||||
match unsafe { (cu_get_proc_address)(symbol.as_ptr(), &mut result, version, flag.0 as _, &mut status) } {
|
||||
Ok(()) => {}
|
||||
Err(cuda_types::cuda::CUerror::NOT_FOUND) => {
|
||||
continue;
|
||||
}
|
||||
Err(e) => panic!("{}", e.0)
|
||||
}
|
||||
if status != cuda_types::cuda::CUdriverProcAddressQueryResult::CU_GET_PROC_ADDRESS_SUCCESS {
|
||||
continue;
|
||||
}
|
||||
if Some(result) != last_result {
|
||||
last_result = Some(result);
|
||||
breakpoints.push((version, known_variants.get(&result).unwrap().clone()));
|
||||
}
|
||||
}
|
||||
breakpoints.sort_unstable_by_key(|(version, _)| cmp::Reverse(*version));
|
||||
if !breakpoints.is_empty() {
|
||||
result.push((fn_.clone(), flag.0, breakpoints));
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut path = crate_root.clone();
|
||||
path.extend(["..", "zluda_bindgen", "src", "process_table.rs"]);
|
||||
let mut file = File::create(path).unwrap();
|
||||
writeln!(file, "match (name, flag) {{").unwrap();
|
||||
for (fn_, version, breakpoints) in result {
|
||||
writeln!(file, " (b\"{fn_}\", {version}) => {{").unwrap();
|
||||
for (version, name) in breakpoints {
|
||||
writeln!(file, " if version >= {version} {{").unwrap();
|
||||
writeln!(file, " return {name} as _;").unwrap();
|
||||
writeln!(file, " }}").unwrap();
|
||||
}
|
||||
writeln!(file, " usize::MAX as _").unwrap();
|
||||
writeln!(file, " }}").unwrap();
|
||||
}
|
||||
writeln!(file, " _ => 0usize as _").unwrap();
|
||||
writeln!(file, "}}").unwrap();
|
||||
}
|
||||
|
||||
fn cuda_numeric_version(version: &str) -> i32 {
|
||||
let mut version = version.split('.').map(|s| s.parse::<i32>().unwrap());
|
||||
let major = version.next().unwrap();
|
||||
let minor = version.next().unwrap();
|
||||
let patch = version.next().unwrap_or(0);
|
||||
major * 1000 + minor * 10 + patch
|
||||
}
|
||||
|
||||
fn generate_cufft(crate_root: &PathBuf) {
|
||||
|
@ -49,10 +164,36 @@ fn generate_cufft(crate_root: &PathBuf) {
|
|||
&module,
|
||||
);
|
||||
generate_types_library(
|
||||
Some(LibraryOverride::CuFft),
|
||||
&crate_root,
|
||||
&["..", "cuda_types", "src", "cufft.rs"],
|
||||
&module,
|
||||
)
|
||||
);
|
||||
generate_display_perflib(
|
||||
&crate_root,
|
||||
&["..", "format", "src", "format_generated_fft.rs"],
|
||||
&["cuda_types", "cufft"],
|
||||
&module,
|
||||
);
|
||||
}
|
||||
|
||||
fn get_functions(module: syn::File) -> Vec<Ident> {
|
||||
module
|
||||
.items
|
||||
.iter()
|
||||
.flat_map(|item| match item {
|
||||
Item::ForeignMod(extern_) => {
|
||||
extern_
|
||||
.items
|
||||
.iter()
|
||||
.filter_map(|foreign_item| match foreign_item {
|
||||
ForeignItem::Fn(fn_) => Some(fn_.sig.ident.clone()),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
_ => unreachable!(),
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
fn generate_cusparse(crate_root: &PathBuf) {
|
||||
|
@ -61,6 +202,7 @@ fn generate_cusparse(crate_root: &PathBuf) {
|
|||
.allowlist_type("^cusparse.*")
|
||||
.allowlist_type(".*Info_t$")
|
||||
.allowlist_type(".*Info$")
|
||||
.blocklist_type("^cudaAsync.*")
|
||||
.allowlist_function("^cusparse.*")
|
||||
.allowlist_var("^CUSPARSE_.*")
|
||||
.must_use_type("cusparseStatus_t")
|
||||
|
@ -77,10 +219,17 @@ fn generate_cusparse(crate_root: &PathBuf) {
|
|||
&module,
|
||||
);
|
||||
generate_types_library(
|
||||
None,
|
||||
&crate_root,
|
||||
&["..", "cuda_types", "src", "cusparse.rs"],
|
||||
&module,
|
||||
)
|
||||
);
|
||||
generate_display_perflib(
|
||||
&crate_root,
|
||||
&["..", "format", "src", "format_generated_sparse.rs"],
|
||||
&["cuda_types", "cusparse"],
|
||||
&module,
|
||||
);
|
||||
}
|
||||
|
||||
fn generate_cudnn(crate_root: &PathBuf) {
|
||||
|
@ -135,6 +284,12 @@ fn generate_cudnn(crate_root: &PathBuf) {
|
|||
&["..", "cuda_base", "src", "cudnn9.rs"],
|
||||
&cudnn9_module,
|
||||
);
|
||||
generate_display_perflib(
|
||||
&crate_root,
|
||||
&["..", "format", "src", "format_generated_dnn9.rs"],
|
||||
&["cuda_types", "cudnn9"],
|
||||
&cudnn9_module,
|
||||
);
|
||||
}
|
||||
|
||||
// This code splits types (and constants) into one of:
|
||||
|
@ -482,14 +637,35 @@ fn generate_cublas(crate_root: &PathBuf) {
|
|||
&module,
|
||||
);
|
||||
generate_types_library(
|
||||
None,
|
||||
&crate_root,
|
||||
&["..", "cuda_types", "src", "cublas.rs"],
|
||||
&module,
|
||||
)
|
||||
);
|
||||
generate_display_perflib(
|
||||
&crate_root,
|
||||
&["..", "format", "src", "format_generated_blas.rs"],
|
||||
&["cuda_types", "cublas"],
|
||||
&module,
|
||||
);
|
||||
}
|
||||
|
||||
fn remove_type(module: &mut syn::File, type_name: &str) {
|
||||
let items = std::mem::replace(&mut module.items, Vec::new());
|
||||
let items = items
|
||||
.into_iter()
|
||||
.filter_map(|item| match item {
|
||||
Item::Enum(enum_) if enum_.ident == type_name => None,
|
||||
Item::Struct(struct_) if struct_.ident == type_name => None,
|
||||
Item::Impl(impl_) if impl_.self_ty.to_token_stream().to_string() == type_name => None,
|
||||
_ => Some(item),
|
||||
})
|
||||
.collect();
|
||||
module.items = items;
|
||||
}
|
||||
|
||||
fn generate_cublaslt(crate_root: &PathBuf) {
|
||||
let cublas_header = new_builder()
|
||||
let cublaslt_header = new_builder()
|
||||
.header("/usr/local/cuda/include/cublasLt.h")
|
||||
.allowlist_type("^cublas.*")
|
||||
.allowlist_function("^cublasLt.*")
|
||||
|
@ -500,21 +676,58 @@ fn generate_cublaslt(crate_root: &PathBuf) {
|
|||
.generate()
|
||||
.unwrap()
|
||||
.to_string();
|
||||
let module: syn::File = syn::parse_str(&cublas_header).unwrap();
|
||||
let cublaslt_internal_header = new_builder()
|
||||
.header_contents(
|
||||
"cublasLt_internal.h",
|
||||
include_str!("../build/cublasLt_internal.h"),
|
||||
)
|
||||
.clang_args(["-x", "c++"])
|
||||
.override_abi(bindgen::Abi::System, ".*")
|
||||
.generate()
|
||||
.unwrap()
|
||||
.to_string()
|
||||
// Simplest and dumbest way to do this
|
||||
.replace("pub fn", "fn")
|
||||
.replace(");", ") -> ();");
|
||||
let module_blaslt_internal: syn::File = syn::parse_str(&cublaslt_internal_header).unwrap();
|
||||
std::fs::write(
|
||||
crate_root
|
||||
.join("..")
|
||||
.join("cuda_base")
|
||||
.join("src")
|
||||
.join("cublaslt_internal.rs"),
|
||||
cublaslt_internal_header,
|
||||
)
|
||||
.unwrap();
|
||||
let mut module_blas: syn::File = syn::parse_str(&cublaslt_header).unwrap();
|
||||
remove_type(&mut module_blas, "cublasStatus_t");
|
||||
generate_functions(
|
||||
&crate_root,
|
||||
"cublaslt",
|
||||
&["..", "cuda_base", "src", "cublaslt.rs"],
|
||||
&module,
|
||||
&module_blas,
|
||||
);
|
||||
generate_types_library(
|
||||
Some(LibraryOverride::CuBlasLt),
|
||||
&crate_root,
|
||||
&["..", "cuda_types", "src", "cublaslt.rs"],
|
||||
&module,
|
||||
)
|
||||
&module_blas,
|
||||
);
|
||||
generate_display_perflib(
|
||||
&crate_root,
|
||||
&["..", "format", "src", "format_generated_blaslt.rs"],
|
||||
&["cuda_types", "cublaslt"],
|
||||
&module_blas,
|
||||
);
|
||||
generate_display_perflib(
|
||||
&crate_root,
|
||||
&["..", "format", "src", "format_generated_blaslt_internal.rs"],
|
||||
&["cuda_types", "cublaslt"],
|
||||
&module_blaslt_internal,
|
||||
);
|
||||
}
|
||||
|
||||
fn generate_cuda(crate_root: &PathBuf) {
|
||||
fn generate_cuda(crate_root: &PathBuf) -> Vec<Ident> {
|
||||
let cuda_header = new_builder()
|
||||
.header_contents("cuda_wrapper.h", include_str!("../build/cuda_wrapper.h"))
|
||||
.allowlist_type("^CU.*")
|
||||
|
@ -537,23 +750,24 @@ fn generate_cuda(crate_root: &PathBuf) {
|
|||
.unwrap()
|
||||
.to_string();
|
||||
let module: syn::File = syn::parse_str(&cuda_header).unwrap();
|
||||
generate_functions(
|
||||
let cuda_functions = get_functions(generate_functions(
|
||||
&crate_root,
|
||||
"cuda",
|
||||
&["..", "cuda_base", "src", "cuda.rs"],
|
||||
&module,
|
||||
);
|
||||
));
|
||||
generate_types_cuda(
|
||||
&crate_root,
|
||||
&["..", "cuda_types", "src", "cuda.rs"],
|
||||
&module,
|
||||
);
|
||||
generate_display(
|
||||
generate_display_cuda(
|
||||
&crate_root,
|
||||
&["..", "zluda_dump", "src", "format_generated.rs"],
|
||||
&["..", "format", "src", "format_generated.rs"],
|
||||
&["cuda_types", "cuda"],
|
||||
&module,
|
||||
)
|
||||
);
|
||||
cuda_functions
|
||||
}
|
||||
|
||||
fn generate_ml(crate_root: &PathBuf) {
|
||||
|
@ -595,24 +809,44 @@ fn generate_ml(crate_root: &PathBuf) {
|
|||
&module,
|
||||
);
|
||||
generate_types_library(
|
||||
None,
|
||||
&crate_root,
|
||||
&["..", "cuda_types", "src", "nvml.rs"],
|
||||
&module,
|
||||
);
|
||||
}
|
||||
|
||||
fn generate_types_library(crate_root: &PathBuf, path: &[&str], module: &syn::File) {
|
||||
fn generate_types_library(
|
||||
override_: Option<LibraryOverride>,
|
||||
crate_root: &PathBuf,
|
||||
path: &[&str],
|
||||
module: &syn::File,
|
||||
) {
|
||||
let module = generate_types_library_impl(module);
|
||||
let mut output = crate_root.clone();
|
||||
output.extend(path);
|
||||
let text = prettyplease::unparse(&module)
|
||||
.replace("self::cudaDataType", "super::cuda::cudaDataType")
|
||||
// complex as used by cuFFT
|
||||
.replace(" cuComplex", " super::cuda::cuComplex")
|
||||
.replace(" cuDoubleComplex", " super::cuda::cuDoubleComplex");
|
||||
let mut text =
|
||||
prettyplease::unparse(&module).replace("self::cudaDataType", "super::cuda::cudaDataType");
|
||||
match override_ {
|
||||
None => {}
|
||||
Some(LibraryOverride::CuBlasLt) => {
|
||||
text = text.replace(" cublasStatus_t", " super::cublas::cublasStatus_t");
|
||||
}
|
||||
Some(LibraryOverride::CuFft) => {
|
||||
text = text
|
||||
.replace(" cuComplex", " super::cuda::cuComplex")
|
||||
.replace(" cuDoubleComplex", " super::cuda::cuDoubleComplex");
|
||||
}
|
||||
}
|
||||
write_rust_to_file(output, &text)
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum LibraryOverride {
|
||||
CuBlasLt,
|
||||
CuFft,
|
||||
}
|
||||
|
||||
fn generate_types_library_impl(module: &syn::File) -> syn::File {
|
||||
let known_reexports: Punctuated<syn::Item, syn::parse::Nothing> = parse_quote! {
|
||||
pub type __half = u16;
|
||||
|
@ -701,7 +935,12 @@ fn add_send_sync(items: &mut Vec<Item>, arg: &[&str]) {
|
|||
}
|
||||
}
|
||||
|
||||
fn generate_functions(output: &PathBuf, submodule: &str, path: &[&str], module: &syn::File) {
|
||||
fn generate_functions(
|
||||
output: &PathBuf,
|
||||
submodule: &str,
|
||||
path: &[&str],
|
||||
module: &syn::File,
|
||||
) -> syn::File {
|
||||
let fns_ = module.items.iter().filter_map(|item| match item {
|
||||
Item::ForeignMod(extern_) => match &*extern_.items {
|
||||
[ForeignItem::Fn(fn_)] => Some(fn_),
|
||||
|
@ -709,18 +948,53 @@ fn generate_functions(output: &PathBuf, submodule: &str, path: &[&str], module:
|
|||
},
|
||||
_ => None,
|
||||
});
|
||||
/*
|
||||
let prelude = match submodule {
|
||||
"cublaslt" => Some(quote! {
|
||||
use cuda_types::cublas::cublasStatus_t;
|
||||
}),
|
||||
"cublas" => Some(quote! {
|
||||
use cuda_types::cublas::cublasStatus_t;
|
||||
}),
|
||||
_ => None,
|
||||
};
|
||||
*/
|
||||
let mut module: syn::File = parse_quote! {
|
||||
extern "system" {
|
||||
#(#fns_)*
|
||||
}
|
||||
};
|
||||
let submodule = Ident::new(submodule, Span::call_site());
|
||||
syn::visit_mut::visit_file_mut(&mut PrependCudaPath { module: submodule }, &mut module);
|
||||
syn::visit_mut::visit_file_mut(
|
||||
&mut PrependCudaPath {
|
||||
module: vec![Ident::new("cuda_types", Span::call_site()), submodule],
|
||||
},
|
||||
&mut module,
|
||||
);
|
||||
syn::visit_mut::visit_file_mut(&mut RemoveVisibility, &mut module);
|
||||
syn::visit_mut::visit_file_mut(&mut ExplicitReturnType, &mut module);
|
||||
let mut output = output.clone();
|
||||
output.extend(path);
|
||||
write_rust_to_file(output, &prettyplease::unparse(&module))
|
||||
write_rust_to_file(output, &prettyplease::unparse(&module));
|
||||
module
|
||||
/*
|
||||
module
|
||||
.items
|
||||
.iter()
|
||||
.flat_map(|item| match item {
|
||||
Item::ForeignMod(extern_) => {
|
||||
extern_
|
||||
.items
|
||||
.iter()
|
||||
.filter_map(|foreign_item| match foreign_item {
|
||||
ForeignItem::Fn(fn_) => Some(fn_.sig.ident.clone()),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
_ => unreachable!(),
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
*/
|
||||
}
|
||||
|
||||
fn generate_types_cuda(output: &PathBuf, path: &[&str], module: &syn::File) {
|
||||
|
@ -892,17 +1166,24 @@ impl VisitMut for FixAbi {
|
|||
}
|
||||
|
||||
struct PrependCudaPath {
|
||||
module: Ident,
|
||||
module: Vec<Ident>,
|
||||
}
|
||||
|
||||
impl VisitMut for PrependCudaPath {
|
||||
fn visit_type_path_mut(&mut self, type_: &mut TypePath) {
|
||||
if type_.path.segments.len() == 1 {
|
||||
match &*type_.path.segments[0].ident.to_string() {
|
||||
"usize" | "u32" | "i32" | "u64" | "i64" | "f64" | "f32" | "FILE" => {}
|
||||
"usize" | "u32" | "i32" | "u64" | "i64" | "f64" | "f32" => {}
|
||||
"FILE" => {
|
||||
*type_ = parse_quote! { cuda_types :: FILE };
|
||||
}
|
||||
"cublasStatus_t" => {
|
||||
let module = self.module.iter().rev().skip(1).rev();
|
||||
*type_ = parse_quote! { #(#module :: )* cublas :: #type_ };
|
||||
}
|
||||
_ => {
|
||||
let module = &self.module;
|
||||
*type_ = parse_quote! { cuda_types :: #module :: #type_ };
|
||||
*type_ = parse_quote! { #(#module :: )* #type_ };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -927,7 +1208,7 @@ impl VisitMut for ExplicitReturnType {
|
|||
}
|
||||
}
|
||||
|
||||
fn generate_display(
|
||||
fn generate_display_cuda(
|
||||
output: &PathBuf,
|
||||
path: &[&str],
|
||||
types_crate: &[&'static str],
|
||||
|
@ -954,8 +1235,8 @@ fn generate_display(
|
|||
"CUeglFrame_st",
|
||||
"CUdevResource_st",
|
||||
"CUlaunchAttribute_st",
|
||||
"CUlaunchConfig_st",
|
||||
"CUmemcpy3DOperand_st",
|
||||
"CUlaunchConfig_st",
|
||||
];
|
||||
let ignore_functions = [
|
||||
"cuGLGetDevices",
|
||||
|
@ -984,7 +1265,7 @@ fn generate_display(
|
|||
let mut items = module
|
||||
.items
|
||||
.iter()
|
||||
.filter_map(|i| cuda_derive_display_trait_for_item(&mut derive_state, i))
|
||||
.filter_map(|i| cuda_derive_display_trait_for_item(types_crate, &mut derive_state, i))
|
||||
.collect::<Vec<_>>();
|
||||
items.push(curesult_display_trait(&derive_state));
|
||||
let mut output = output.clone();
|
||||
|
@ -999,6 +1280,50 @@ fn generate_display(
|
|||
);
|
||||
}
|
||||
|
||||
fn generate_display_perflib(
|
||||
output: &PathBuf,
|
||||
path: &[&str],
|
||||
types_crate: &[&'static str],
|
||||
module: &syn::File,
|
||||
) {
|
||||
let ignore_types = [
|
||||
"cublasLtMatrixLayoutOpaque_t",
|
||||
"cublasLtMatmulDescOpaque_t",
|
||||
"cublasLtMatrixTransformDescOpaque_t",
|
||||
"cublasLtMatmulPreferenceOpaque_t",
|
||||
"cublasLogCallback",
|
||||
"cudnnBackendDescriptor_t",
|
||||
"cublasLtLoggerCallback_t",
|
||||
"cusparseLoggerCallback_t",
|
||||
];
|
||||
let ignore_functions = [];
|
||||
let count_selectors = [
|
||||
("cudnnBackendSetAttribute", 4, 3),
|
||||
("cudnnBackendGetAttribute", 5, 4),
|
||||
];
|
||||
let mut derive_state = DeriveDisplayState::new(
|
||||
&ignore_types,
|
||||
types_crate,
|
||||
&ignore_functions,
|
||||
&count_selectors,
|
||||
);
|
||||
let items = module
|
||||
.items
|
||||
.iter()
|
||||
.filter_map(|i| cuda_derive_display_trait_for_item(types_crate, &mut derive_state, i))
|
||||
.collect::<Vec<_>>();
|
||||
let mut output = output.clone();
|
||||
output.extend(path);
|
||||
write_rust_to_file(
|
||||
output,
|
||||
&prettyplease::unparse(&syn::File {
|
||||
shebang: None,
|
||||
attrs: Vec::new(),
|
||||
items,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
struct DeriveDisplayState<'a> {
|
||||
types_crate: Path,
|
||||
ignore_types: FxHashSet<Ident>,
|
||||
|
@ -1057,13 +1382,17 @@ impl<'a> DeriveDisplayState<'a> {
|
|||
}
|
||||
|
||||
fn cuda_derive_display_trait_for_item<'a>(
|
||||
path: &[&str],
|
||||
state: &mut DeriveDisplayState<'a>,
|
||||
item: &'a Item,
|
||||
) -> Option<syn::Item> {
|
||||
let path_prefix = &state.types_crate;
|
||||
let path_prefix_iter = iter::repeat(&path_prefix);
|
||||
let mut prepend_path = PrependCudaPath {
|
||||
module: Ident::new("cuda", Span::call_site()),
|
||||
module: path
|
||||
.iter()
|
||||
.map(|segment| Ident::new(segment, Span::call_site()))
|
||||
.collect(),
|
||||
};
|
||||
match item {
|
||||
Item::Const(const_) => {
|
||||
|
@ -1101,14 +1430,14 @@ fn cuda_derive_display_trait_for_item<'a>(
|
|||
if i != 0 {
|
||||
writer.write_all(b", ")?;
|
||||
}
|
||||
crate::format::CudaDisplay::write(unsafe { &*#name.add(i as usize) }, #original_fn_name, arg_idx, writer)?;
|
||||
crate::CudaDisplay::write(unsafe { &*#name.add(i as usize) }, #original_fn_name, arg_idx, writer)?;
|
||||
}
|
||||
writer.write_all(b"]")?;
|
||||
}
|
||||
} else {
|
||||
quote! {
|
||||
writer.write_all(concat!(stringify!(#name), ": ").as_bytes())?;
|
||||
crate::format::CudaDisplay::write(&#name, #original_fn_name, arg_idx, writer)?;
|
||||
crate::CudaDisplay::write(&#name, #original_fn_name, arg_idx, writer)?;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
@ -1157,7 +1486,7 @@ fn cuda_derive_display_trait_for_item<'a>(
|
|||
let enum_iter = iter::repeat(&item_struct.ident);
|
||||
let variants = state.enums.get(&item_struct.ident).unwrap().iter();
|
||||
Some(parse_quote! {
|
||||
impl crate::format::CudaDisplay for #path_prefix :: #enum_ {
|
||||
impl crate::CudaDisplay for #path_prefix :: #enum_ {
|
||||
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
|
||||
match self {
|
||||
#(& #path_prefix_iter :: #enum_iter :: #variants => writer.write_all(stringify!(#variants).as_bytes()),)*
|
||||
|
@ -1184,13 +1513,13 @@ fn cuda_derive_display_trait_for_item<'a>(
|
|||
None => return None,
|
||||
};
|
||||
Some(parse_quote! {
|
||||
impl crate::format::CudaDisplay for #path_prefix :: #struct_ {
|
||||
impl crate::CudaDisplay for #path_prefix :: #struct_ {
|
||||
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
|
||||
writer.write_all(concat!("{ ", stringify!(#first_field), ": ").as_bytes())?;
|
||||
crate::format::CudaDisplay::write(&self.#first_field, "", 0, writer)?;
|
||||
crate::CudaDisplay::write(&self.#first_field, "", 0, writer)?;
|
||||
#(
|
||||
writer.write_all(concat!(", ", stringify!(#rest_of_fields), ": ").as_bytes())?;
|
||||
crate::format::CudaDisplay::write(&self.#rest_of_fields, "", 0, writer)?;
|
||||
crate::CudaDisplay::write(&self.#rest_of_fields, "", 0, writer)?;
|
||||
)*
|
||||
writer.write_all(b" }")
|
||||
}
|
||||
|
@ -1199,7 +1528,7 @@ fn cuda_derive_display_trait_for_item<'a>(
|
|||
}
|
||||
Fields::Unnamed(FieldsUnnamed { ref unnamed, .. }) if unnamed.len() == 1 => {
|
||||
Some(parse_quote! {
|
||||
impl crate::format::CudaDisplay for #path_prefix :: #struct_ {
|
||||
impl crate::CudaDisplay for #path_prefix :: #struct_ {
|
||||
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
|
||||
write!(writer, "{:p}", self.0)
|
||||
}
|
||||
|
@ -1218,9 +1547,13 @@ fn cuda_derive_display_trait_for_item<'a>(
|
|||
Type::Ptr(_) => {
|
||||
let type_ = &item_type.ident;
|
||||
Some(parse_quote! {
|
||||
impl crate::format::CudaDisplay for #path_prefix :: #type_ {
|
||||
impl crate::CudaDisplay for #path_prefix :: #type_ {
|
||||
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
|
||||
write!(writer, "{:p}", *self)
|
||||
if self.is_null() {
|
||||
writer.write_all(b"NULL")
|
||||
} else {
|
||||
write!(writer, "{:p}", *self)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
|
@ -1234,7 +1567,7 @@ fn cuda_derive_display_trait_for_item<'a>(
|
|||
syn::GenericArgument::Type(Type::BareFn(_)) => {
|
||||
let type_ = &item_type.ident;
|
||||
return Some(parse_quote! {
|
||||
impl crate::format::CudaDisplay for #path_prefix :: #type_ {
|
||||
impl crate::CudaDisplay for #path_prefix :: #type_ {
|
||||
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
|
||||
write!(writer, "{:p}", unsafe { std::mem::transmute::<#path_prefix :: #type_, *mut ::std::ffi::c_void>(*self) })
|
||||
}
|
||||
|
@ -1280,7 +1613,7 @@ fn curesult_display_trait(derive_state: &DeriveDisplayState) -> syn::Item {
|
|||
})
|
||||
});
|
||||
parse_quote! {
|
||||
impl crate::format::CudaDisplay for cuda_types::cuda::CUresult {
|
||||
impl crate::CudaDisplay for cuda_types::cuda::CUresult {
|
||||
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
|
||||
match self {
|
||||
Ok(()) => writer.write_all(b"CUDA_SUCCESS"),
|
||||
|
|
9078
zluda_bindgen/src/process_table.rs
Normal file
9078
zluda_bindgen/src/process_table.rs
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,4 +1,4 @@
|
|||
use cuda_types::cublaslt::cublasStatus_t;
|
||||
use cuda_types::cublas::cublasStatus_t;
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
pub(crate) fn unimplemented() -> cublasStatus_t {
|
||||
|
@ -12,14 +12,14 @@ pub(crate) fn unimplemented() -> cublasStatus_t {
|
|||
|
||||
#[allow(non_snake_case)]
|
||||
pub(crate) fn cublasLtGetStatusName(
|
||||
_status: cuda_types::cublaslt::cublasStatus_t,
|
||||
_status: cuda_types::cublas::cublasStatus_t,
|
||||
) -> *const ::core::ffi::c_char {
|
||||
todo!()
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub(crate) fn cublasLtGetStatusString(
|
||||
_status: cuda_types::cublaslt::cublasStatus_t,
|
||||
_status: cuda_types::cublas::cublasStatus_t,
|
||||
) -> *const ::core::ffi::c_char {
|
||||
todo!()
|
||||
}
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
mod r#impl;
|
||||
|
||||
pub enum FILE { }
|
||||
|
||||
macro_rules! unimplemented {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||
$(
|
||||
|
|
|
@ -1,39 +1,46 @@
|
|||
[package]
|
||||
name = "zluda_dump"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
name = "zluda_dump"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
ptx = { path = "../ptx" }
|
||||
ptx_parser = { path = "../ptx_parser" }
|
||||
lz4-sys = "1.9"
|
||||
regex = "1.4"
|
||||
dynasm = "1.2"
|
||||
dynasmrt = "1.2"
|
||||
lazy_static = "1.4"
|
||||
# we don't need elf32, but goblin has a bug where elf64 does not build without elf32
|
||||
goblin = { version = "0.4", default-features = false, features = ["elf64", "elf32", "archive"] }
|
||||
paste = "1.0"
|
||||
cuda_base = { path = "../cuda_base" }
|
||||
cuda_types = { path = "../cuda_types" }
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
winapi = { version = "0.3", features = ["libloaderapi", "debugapi", "std"] }
|
||||
wchar = "0.6"
|
||||
detours-sys = { path = "../detours-sys" }
|
||||
|
||||
[target.'cfg(not(windows))'.dependencies]
|
||||
libc = "0.2"
|
||||
|
||||
[package.metadata.zluda]
|
||||
linux_symlinks = [
|
||||
"dump/libcuda.so",
|
||||
"dump/libcuda.so.1",
|
||||
"dump_nvidia/libcuda.so",
|
||||
"dump_nvidia/libcuda.so.1",
|
||||
]
|
||||
[package]
|
||||
name = "zluda_dump"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
name = "zluda_dump"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
ptx = { path = "../ptx" }
|
||||
ptx_parser = { path = "../ptx_parser" }
|
||||
zluda_dump_common = { path = "../zluda_dump_common" }
|
||||
format = { path = "../format" }
|
||||
dark_api = { path = "../dark_api" }
|
||||
lz4-sys = "1.9"
|
||||
regex = "1.4"
|
||||
dynasm = "1.2"
|
||||
dynasmrt = "1.2"
|
||||
# we don't need elf32, but goblin has a bug where elf64 does not build without elf32
|
||||
goblin = { version = "0.4", default-features = false, features = ["elf64", "elf32", "archive"] }
|
||||
paste = "1.0"
|
||||
cuda_base = { path = "../cuda_base" }
|
||||
cuda_types = { path = "../cuda_types" }
|
||||
parking_lot = "0.12.3"
|
||||
rustc-hash = "1.1.0"
|
||||
cglue = "0.3.5"
|
||||
zstd-safe = { version = "7.2.4", features = ["std"] }
|
||||
unwrap_or = "1.0.1"
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
winapi = { version = "0.3", features = ["libloaderapi", "debugapi", "std"] }
|
||||
wchar = "0.6"
|
||||
detours-sys = { path = "../detours-sys" }
|
||||
|
||||
[target.'cfg(not(windows))'.dependencies]
|
||||
libc = "0.2"
|
||||
|
||||
[package.metadata.zluda]
|
||||
linux_symlinks = [
|
||||
"dump/libcuda.so",
|
||||
"dump/libcuda.so.1",
|
||||
"dump_nvidia/libcuda.so",
|
||||
"dump_nvidia/libcuda.so.1",
|
||||
]
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
grep -E '^cu.*' log.txt | sed 's/([^)]*)//g' | sort | uniq > uniq_host.txt
|
||||
cat *.log | grep "^Unrecognized s" | grep -Eo '`([^`]*)`' | sed -E 's/^`((@\w+ )?[^[:space:]]*).*`/\1/' | sort | uniq > uniq_statements.txt
|
||||
cat *.log | grep "^Unrecognized d" | grep -Eo '`([^`]*)`' | sed -E 's/^`([^`]*)`/\1/' | sort | uniq > uniq_directives.txt
|
|
@ -1,25 +1,120 @@
|
|||
use crate::format;
|
||||
use crate::{log, os, trace::StateTracker};
|
||||
use crate::{log::UInt, GlobalDelayedState};
|
||||
use crate::os;
|
||||
use crate::{CudaFunctionName, ErrorEntry};
|
||||
use cuda_types::cuda::*;
|
||||
use std::borrow::Cow;
|
||||
use rustc_hash::FxHashMap;
|
||||
use std::cell::RefMut;
|
||||
use std::hash::Hash;
|
||||
use std::{
|
||||
collections::{hash_map, HashMap},
|
||||
ffi::c_void,
|
||||
mem,
|
||||
os::raw::{c_int, c_uint, c_ulong, c_ushort},
|
||||
ptr, slice,
|
||||
};
|
||||
use std::{collections::hash_map, ffi::c_void, mem};
|
||||
|
||||
pub(crate) struct DarkApiState {
|
||||
pub(crate) struct DarkApiState2 {
|
||||
// Key is Box<CUuuid, because thunk reporting unknown export table needs a
|
||||
// stablememory location for the guid
|
||||
overrides: HashMap<Box<CUuuidWrapper>, Vec<*const c_void>>,
|
||||
original: OriginalExports,
|
||||
// stable memory location for the guid
|
||||
pub(crate) overrides: FxHashMap<Box<CUuuidWrapper>, (*const *const c_void, Vec<*const c_void>)>,
|
||||
}
|
||||
|
||||
unsafe impl Send for DarkApiState2 {}
|
||||
unsafe impl Sync for DarkApiState2 {}
|
||||
|
||||
impl DarkApiState2 {
|
||||
pub(crate) fn new() -> Self {
|
||||
DarkApiState2 {
|
||||
overrides: FxHashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn override_export_table(
|
||||
&mut self,
|
||||
known_exports: &::dark_api::cuda::CudaDarkApiGlobalTable,
|
||||
original_export_table: *const *const c_void,
|
||||
guid: &CUuuid_st,
|
||||
) -> (*const *const c_void, Option<ErrorEntry>) {
|
||||
let entry = match self.overrides.entry(Box::new(CUuuidWrapper(*guid))) {
|
||||
hash_map::Entry::Occupied(entry) => {
|
||||
let (_, override_table) = entry.get();
|
||||
return (override_table.as_ptr(), None);
|
||||
}
|
||||
hash_map::Entry::Vacant(entry) => entry,
|
||||
};
|
||||
let mut error = None;
|
||||
let byte_size: usize = unsafe { *(original_export_table.cast::<usize>()) };
|
||||
// Some export tables don't start with a byte count, but directly with a
|
||||
// pointer, and are instead terminated by 0 or MAX
|
||||
let export_functions_start_idx;
|
||||
let export_functions_size;
|
||||
if byte_size > 0x10000 {
|
||||
export_functions_start_idx = 0;
|
||||
let mut i = 0;
|
||||
loop {
|
||||
let current_ptr = unsafe { original_export_table.add(i) };
|
||||
let current_ptr_numeric = unsafe { *current_ptr } as usize;
|
||||
if current_ptr_numeric == 0usize || current_ptr_numeric == usize::MAX {
|
||||
export_functions_size = i;
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
} else {
|
||||
export_functions_start_idx = 1;
|
||||
export_functions_size = byte_size / mem::size_of::<usize>();
|
||||
}
|
||||
let our_functions = known_exports.get(guid);
|
||||
if let Some(ref our_functions) = our_functions {
|
||||
if our_functions.len() != export_functions_size {
|
||||
error = Some(ErrorEntry::UnexpectedExportTableSize {
|
||||
expected: our_functions.len(),
|
||||
computed: export_functions_size,
|
||||
});
|
||||
}
|
||||
}
|
||||
let mut override_table =
|
||||
unsafe { std::slice::from_raw_parts(original_export_table, export_functions_size) }
|
||||
.to_vec();
|
||||
for i in export_functions_start_idx..export_functions_size {
|
||||
let current_fn = (|| {
|
||||
if let Some(ref our_functions) = our_functions {
|
||||
if let Some(fn_) = our_functions.get_fn(i) {
|
||||
return fn_;
|
||||
}
|
||||
}
|
||||
os::get_thunk(
|
||||
override_table[i],
|
||||
Self::report_unknown_export_table_call,
|
||||
std::ptr::from_ref(entry.key().as_ref()).cast(),
|
||||
i,
|
||||
)
|
||||
})();
|
||||
override_table[i] = current_fn;
|
||||
}
|
||||
(
|
||||
entry
|
||||
.insert((original_export_table, override_table))
|
||||
.1
|
||||
.as_ptr(),
|
||||
error,
|
||||
)
|
||||
}
|
||||
|
||||
unsafe extern "system" fn report_unknown_export_table_call(guid: &CUuuid, index: usize) {
|
||||
let global_state = crate::GLOBAL_STATE2.lock();
|
||||
let global_state_ref_cell = &*global_state;
|
||||
let mut global_state_ref_mut = global_state_ref_cell.borrow_mut();
|
||||
let global_state = &mut *global_state_ref_mut;
|
||||
let log_guard = crate::OuterCallGuard {
|
||||
writer: &mut global_state.log_writer,
|
||||
log_root: &global_state.log_stack,
|
||||
};
|
||||
{
|
||||
let mut logger = RefMut::map(global_state.log_stack.borrow_mut(), |log_stack| {
|
||||
log_stack.enter()
|
||||
});
|
||||
logger.name = CudaFunctionName::Dark { guid: *guid, index };
|
||||
};
|
||||
drop(log_guard);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Eq, PartialEq)]
|
||||
#[repr(transparent)]
|
||||
pub(crate) struct CUuuidWrapper(pub CUuuid);
|
||||
|
||||
impl Hash for CUuuidWrapper {
|
||||
|
@ -27,589 +122,3 @@ impl Hash for CUuuidWrapper {
|
|||
self.0.bytes.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
pub(crate) struct OriginalExports {
|
||||
original_get_module_from_cubin: Option<
|
||||
unsafe extern "system" fn(
|
||||
result: *mut CUmodule,
|
||||
fatbinc_wrapper: *const FatbincWrapper,
|
||||
) -> CUresult,
|
||||
>,
|
||||
original_get_module_from_cubin_ext1: Option<
|
||||
unsafe extern "system" fn(
|
||||
result: *mut CUmodule,
|
||||
fatbinc_wrapper: *const FatbincWrapper,
|
||||
ptr1: *mut c_void,
|
||||
ptr2: *mut c_void,
|
||||
_unknown: usize,
|
||||
) -> CUresult,
|
||||
>,
|
||||
original_get_module_from_cubin_ext2: Option<
|
||||
unsafe extern "system" fn(
|
||||
fatbinc_wrapper: *const FatbinHeader,
|
||||
result: *mut CUmodule,
|
||||
ptr1: *mut c_void,
|
||||
ptr2: *mut c_void,
|
||||
_unknown: usize,
|
||||
) -> CUresult,
|
||||
>,
|
||||
}
|
||||
|
||||
impl DarkApiState {
|
||||
pub(crate) fn new() -> Self {
|
||||
let original = OriginalExports {
|
||||
original_get_module_from_cubin: None,
|
||||
original_get_module_from_cubin_ext1: None,
|
||||
original_get_module_from_cubin_ext2: None,
|
||||
};
|
||||
DarkApiState {
|
||||
overrides: HashMap::new(),
|
||||
original,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn override_export_table(
|
||||
pp_export_table: *mut *const c_void,
|
||||
p_export_table_id: *const CUuuid,
|
||||
state: &mut crate::trace::StateTracker,
|
||||
) {
|
||||
let state = &mut state.dark_api;
|
||||
let export_table_mut = unsafe { &mut *pp_export_table };
|
||||
let export_id = Box::new(CUuuidWrapper(unsafe { *p_export_table_id }));
|
||||
*export_table_mut = match state.overrides.entry(export_id) {
|
||||
hash_map::Entry::Occupied(entry) => entry.get().as_ptr() as *const _,
|
||||
hash_map::Entry::Vacant(entry) => {
|
||||
let guid_ptr = unsafe {
|
||||
mem::transmute::<*const CUuuidWrapper, *const CUuuid>(&**entry.key() as *const _)
|
||||
};
|
||||
entry
|
||||
.insert(unsafe {
|
||||
create_new_override(*pp_export_table as *const _, guid_ptr, &mut state.original)
|
||||
})
|
||||
.as_ptr() as *const _
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
unsafe fn create_new_override(
|
||||
export_table: *const *const c_void,
|
||||
export_id: *const CUuuid,
|
||||
state: &mut OriginalExports,
|
||||
) -> Vec<*const c_void> {
|
||||
let mut byte_length: usize = *(export_table as *const usize);
|
||||
// Some export tables don't start with a byte count, but directly with a
|
||||
// pointer, and are instead terminated by 0 or MAX
|
||||
let export_functions_start_idx;
|
||||
let mut override_table = Vec::new();
|
||||
if byte_length > 0x10000 {
|
||||
export_functions_start_idx = 0;
|
||||
let mut i = 0;
|
||||
loop {
|
||||
let current_fn = export_table.add(i);
|
||||
let current_fn_numeric = *current_fn as usize;
|
||||
if current_fn_numeric == 0usize || current_fn_numeric == usize::MAX {
|
||||
byte_length = (i + 1) * mem::size_of::<usize>();
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
} else {
|
||||
override_table.push(byte_length as *const _);
|
||||
export_functions_start_idx = 1;
|
||||
}
|
||||
for i in export_functions_start_idx..(byte_length / mem::size_of::<usize>()) {
|
||||
let current_fn = export_table.add(i);
|
||||
override_table.push(get_export_override_fn(state, *current_fn, export_id, i));
|
||||
}
|
||||
override_table
|
||||
}
|
||||
|
||||
unsafe extern "system" fn report_unknown_export_table_call(
|
||||
export_table: *const CUuuid,
|
||||
idx: usize,
|
||||
) {
|
||||
if let Ok(mut global_state) = crate::GLOBAL_STATE.lock() {
|
||||
let mut logger = global_state
|
||||
.log_factory
|
||||
.get_logger_dark_api(*export_table, idx, None);
|
||||
logger.log(log::LogEntry::UnknownExportTableFn)
|
||||
}
|
||||
}
|
||||
|
||||
const CUDART_INTERFACE_GUID: CUuuid = CUuuid {
|
||||
bytes: [
|
||||
0x6b, 0xd5, 0xfb, 0x6c, 0x5b, 0xf4, 0xe7, 0x4a, 0x89, 0x87, 0xd9, 0x39, 0x12, 0xfd, 0x9d,
|
||||
0xf9,
|
||||
],
|
||||
};
|
||||
|
||||
const TOOLS_RUNTIME_CALLBACK_HOOKS_GUID: CUuuid = CUuuid {
|
||||
bytes: [
|
||||
0xa0, 0x94, 0x79, 0x8c, 0x2e, 0x74, 0x2e, 0x74, 0x93, 0xf2, 0x08, 0x00, 0x20, 0x0c, 0x0a,
|
||||
0x66,
|
||||
],
|
||||
};
|
||||
|
||||
const CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID: CUuuid = CUuuid {
|
||||
bytes: [
|
||||
0xc6, 0x93, 0x33, 0x6e, 0x11, 0x21, 0xdf, 0x11, 0xa8, 0xc3, 0x68, 0xf3, 0x55, 0xd8, 0x95,
|
||||
0x93,
|
||||
],
|
||||
};
|
||||
|
||||
const CTX_CREATE_BYPASS_GUID: CUuuid = CUuuid {
|
||||
bytes: [
|
||||
0x0C, 0xA5, 0x0B, 0x8C, 0x10, 0x04, 0x92, 0x9A, 0x89, 0xA7, 0xD0, 0xDF, 0x10, 0xE7, 0x72,
|
||||
0x86,
|
||||
],
|
||||
};
|
||||
|
||||
const HEAP_ACCESS_GUID: CUuuid = CUuuid {
|
||||
bytes: [
|
||||
0x19, 0x5B, 0xCB, 0xF4, 0xD6, 0x7D, 0x02, 0x4A, 0xAC, 0xC5, 0x1D, 0x29, 0xCE, 0xA6, 0x31,
|
||||
0xAE,
|
||||
],
|
||||
};
|
||||
|
||||
const DEVICE_EXTENDED_RT_GUID: CUuuid = CUuuid {
|
||||
bytes: [
|
||||
0xB1u8, 0x05, 0x41, 0xE1, 0xF7, 0xC7, 0xC7, 0x4A, 0x9F, 0x64, 0xF2, 0x23, 0xBE, 0x99, 0xF1,
|
||||
0xE2,
|
||||
],
|
||||
};
|
||||
|
||||
unsafe fn get_export_override_fn(
|
||||
state: &mut OriginalExports,
|
||||
original_fn: *const c_void,
|
||||
guid: *const CUuuid,
|
||||
idx: usize,
|
||||
) -> *const c_void {
|
||||
match (*guid, idx) {
|
||||
(TOOLS_RUNTIME_CALLBACK_HOOKS_GUID, 2)
|
||||
| (TOOLS_RUNTIME_CALLBACK_HOOKS_GUID, 6)
|
||||
| (CUDART_INTERFACE_GUID, 2)
|
||||
| (CUDART_INTERFACE_GUID, 7)
|
||||
| (CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID, 0)
|
||||
| (CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID, 1)
|
||||
| (CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID, 2)
|
||||
| (CTX_CREATE_BYPASS_GUID, 1)
|
||||
| (HEAP_ACCESS_GUID, 1)
|
||||
| (HEAP_ACCESS_GUID, 2)
|
||||
| (DEVICE_EXTENDED_RT_GUID, 5)
|
||||
| (DEVICE_EXTENDED_RT_GUID, 13) => original_fn,
|
||||
(CUDART_INTERFACE_GUID, 1) => {
|
||||
state.original_get_module_from_cubin = mem::transmute(original_fn);
|
||||
get_module_from_cubin as *const _
|
||||
}
|
||||
(CUDART_INTERFACE_GUID, 6) => {
|
||||
state.original_get_module_from_cubin_ext1 = mem::transmute(original_fn);
|
||||
get_module_from_cubin_ext1 as *const _
|
||||
}
|
||||
(CUDART_INTERFACE_GUID, 8) => {
|
||||
state.original_get_module_from_cubin_ext2 = mem::transmute(original_fn);
|
||||
get_module_from_cubin_ext2 as *const _
|
||||
}
|
||||
_ => {
|
||||
// terminator if it's an export table that is not size-prefixed
|
||||
if original_fn == ptr::null() || (original_fn as usize) == usize::MAX {
|
||||
ptr::null()
|
||||
} else {
|
||||
os::get_thunk(original_fn, report_unknown_export_table_call, guid, idx)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const FATBINC_MAGIC: c_uint = 0x466243B1;
|
||||
const FATBINC_VERSION_V1: c_uint = 0x1;
|
||||
const FATBINC_VERSION_V2: c_uint = 0x2;
|
||||
|
||||
#[repr(C)]
|
||||
struct FatbincWrapper {
|
||||
magic: c_uint,
|
||||
version: c_uint,
|
||||
data: *const FatbinHeader,
|
||||
filename_or_fatbins: *const c_void,
|
||||
}
|
||||
|
||||
const FATBIN_MAGIC: c_uint = 0xBA55ED50;
|
||||
const FATBIN_VERSION: c_ushort = 0x01;
|
||||
|
||||
#[repr(C, align(8))]
|
||||
struct FatbinHeader {
|
||||
magic: c_uint,
|
||||
version: c_ushort,
|
||||
header_size: c_ushort,
|
||||
files_size: c_ulong, // excluding frame header, size of all blocks framed by this frame
|
||||
}
|
||||
|
||||
const FATBIN_FILE_HEADER_KIND_PTX: c_ushort = 0x01;
|
||||
const FATBIN_FILE_HEADER_KIND_ELF: c_ushort = 0x02;
|
||||
const FATBIN_FILE_HEADER_VERSION_CURRENT: c_ushort = 0x101;
|
||||
|
||||
// assembly file header is a bit different, but we don't care
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
struct FatbinFileHeader {
|
||||
kind: c_ushort,
|
||||
version: c_ushort,
|
||||
header_size: c_uint,
|
||||
padded_payload_size: c_uint,
|
||||
unknown0: c_uint, // check if it's written into separately
|
||||
payload_size: c_uint,
|
||||
unknown1: c_uint,
|
||||
unknown2: c_uint,
|
||||
sm_version: c_uint,
|
||||
bit_width: c_uint,
|
||||
unknown3: c_uint,
|
||||
unknown4: c_ulong,
|
||||
unknown5: c_ulong,
|
||||
uncompressed_payload: c_ulong,
|
||||
}
|
||||
|
||||
unsafe fn record_submodules_from_wrapped_fatbin(
|
||||
module: *mut CUmodule,
|
||||
fatbinc_wrapper: *const FatbincWrapper,
|
||||
fn_logger: &mut log::FunctionLogger,
|
||||
delayed_state: &mut GlobalDelayedState,
|
||||
original_fn: impl FnOnce(&OriginalExports) -> CUresult,
|
||||
) -> CUresult {
|
||||
let result = original_fn(&delayed_state.cuda_state.dark_api.original);
|
||||
fn_logger.result = Some(result);
|
||||
let magic = (*fatbinc_wrapper).magic;
|
||||
if magic != FATBINC_MAGIC {
|
||||
fn_logger.log(log::LogEntry::UnexpectedBinaryField {
|
||||
field_name: "FATBINC_MAGIC",
|
||||
expected: vec![UInt::U32(FATBINC_MAGIC)],
|
||||
observed: UInt::U32(magic),
|
||||
});
|
||||
}
|
||||
if (*fatbinc_wrapper).version != FATBINC_VERSION_V1
|
||||
&& (*fatbinc_wrapper).version != FATBINC_VERSION_V2
|
||||
{
|
||||
fn_logger.log(log::LogEntry::UnexpectedBinaryField {
|
||||
field_name: "FATBINC_VERSION",
|
||||
expected: vec![UInt::U32(FATBINC_VERSION_V1), UInt::U32(FATBINC_VERSION_V2)],
|
||||
observed: UInt::U32(magic),
|
||||
});
|
||||
}
|
||||
let is_version_2 = (*fatbinc_wrapper).version == FATBINC_VERSION_V2;
|
||||
record_submodules_from_fatbin(
|
||||
*module,
|
||||
(*fatbinc_wrapper).data,
|
||||
if is_version_2 { Some(1) } else { None },
|
||||
fn_logger,
|
||||
&mut delayed_state.cuda_state,
|
||||
);
|
||||
if is_version_2 {
|
||||
let mut current = (*fatbinc_wrapper).filename_or_fatbins as *const *const c_void;
|
||||
while *current != ptr::null() {
|
||||
record_submodules_from_fatbin(
|
||||
*module,
|
||||
*current as *const _,
|
||||
Some(2),
|
||||
fn_logger,
|
||||
&mut delayed_state.cuda_state,
|
||||
);
|
||||
current = current.add(1);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
unsafe fn record_submodules_from_fatbin(
|
||||
module: CUmodule,
|
||||
fatbin_header: *const FatbinHeader,
|
||||
fatbin_version: Option<usize>,
|
||||
logger: &mut log::FunctionLogger,
|
||||
state: &mut StateTracker,
|
||||
) {
|
||||
let magic = (*fatbin_header).magic;
|
||||
if magic != FATBIN_MAGIC {
|
||||
logger.log(log::LogEntry::UnexpectedBinaryField {
|
||||
field_name: "FATBIN_MAGIC",
|
||||
expected: vec![UInt::U32(FATBIN_MAGIC)],
|
||||
observed: UInt::U32(magic),
|
||||
});
|
||||
return;
|
||||
}
|
||||
let version = (*fatbin_header).version;
|
||||
if version != FATBIN_VERSION {
|
||||
logger.log(log::LogEntry::UnexpectedBinaryField {
|
||||
field_name: "FATBIN_VERSION",
|
||||
expected: vec![UInt::U16(FATBIN_VERSION)],
|
||||
observed: UInt::U16(version),
|
||||
});
|
||||
return;
|
||||
}
|
||||
let file = (fatbin_header as *const u8).add((*fatbin_header).header_size as usize);
|
||||
let end = file.add((*fatbin_header).files_size as usize);
|
||||
record_submodules(
|
||||
fatbin_version == Some(2),
|
||||
module,
|
||||
fatbin_version,
|
||||
logger,
|
||||
state,
|
||||
file,
|
||||
end,
|
||||
);
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
unsafe extern "system" fn get_module_from_cubin(
|
||||
module: *mut CUmodule,
|
||||
fatbinc_wrapper: *const FatbincWrapper,
|
||||
) -> CUresult {
|
||||
let arguments_writer = Box::new(move |writer: &mut dyn std::io::Write| {
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(stringify!(module).as_bytes())?;
|
||||
writer.write_all(b": ")?;
|
||||
format::CudaDisplay::write(&module, "", 0, writer)?;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(stringify!(fatbinc_wrapper).as_bytes())?;
|
||||
write!(writer, ": {:p})", fatbinc_wrapper)
|
||||
});
|
||||
let global_state = &mut *super::GLOBAL_STATE.lock().unwrap();
|
||||
let mut fn_logger = global_state.log_factory.get_logger_dark_api(
|
||||
CUDART_INTERFACE_GUID,
|
||||
1,
|
||||
Some(arguments_writer),
|
||||
);
|
||||
let global_state = &mut *super::GLOBAL_STATE.lock().unwrap();
|
||||
let delayed_state = global_state.delayed_state.unwrap_mut();
|
||||
record_submodules_from_wrapped_fatbin(
|
||||
module,
|
||||
fatbinc_wrapper,
|
||||
&mut fn_logger,
|
||||
delayed_state,
|
||||
|original_exports| {
|
||||
original_exports.original_get_module_from_cubin.unwrap()(module, fatbinc_wrapper)
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
unsafe extern "system" fn get_module_from_cubin_ext1(
|
||||
module: *mut CUmodule,
|
||||
fatbinc_wrapper: *const FatbincWrapper,
|
||||
ptr1: *mut c_void,
|
||||
ptr2: *mut c_void,
|
||||
_unknown: usize,
|
||||
) -> CUresult {
|
||||
let arguments_writer = Box::new(move |writer: &mut dyn std::io::Write| {
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(stringify!(module).as_bytes())?;
|
||||
writer.write_all(b": ")?;
|
||||
format::CudaDisplay::write(&module, "", 0, writer)?;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(stringify!(fatbinc_wrapper).as_bytes())?;
|
||||
write!(writer, ": {:p}, ", fatbinc_wrapper)?;
|
||||
writer.write_all(stringify!(ptr1).as_bytes())?;
|
||||
write!(writer, ": {:p}, ", ptr1)?;
|
||||
writer.write_all(stringify!(ptr2).as_bytes())?;
|
||||
write!(writer, ": {:p}, ", ptr2)?;
|
||||
writer.write_all(stringify!(_unknown).as_bytes())?;
|
||||
write!(writer, ": {})", _unknown)
|
||||
});
|
||||
let global_state = &mut *super::GLOBAL_STATE.lock().unwrap();
|
||||
let mut fn_logger = global_state.log_factory.get_logger_dark_api(
|
||||
CUDART_INTERFACE_GUID,
|
||||
6,
|
||||
Some(arguments_writer),
|
||||
);
|
||||
if ptr1 != ptr::null_mut() {
|
||||
fn_logger.log(log::LogEntry::UnexpectedArgument {
|
||||
arg_name: stringify!(ptr1),
|
||||
expected: vec![UInt::USize(0)],
|
||||
observed: UInt::USize(ptr1 as usize),
|
||||
});
|
||||
}
|
||||
if ptr2 != ptr::null_mut() {
|
||||
fn_logger.log(log::LogEntry::UnexpectedArgument {
|
||||
arg_name: stringify!(ptr2),
|
||||
expected: vec![UInt::USize(0)],
|
||||
observed: UInt::USize(ptr2 as usize),
|
||||
});
|
||||
}
|
||||
if _unknown != 0 {
|
||||
fn_logger.log(log::LogEntry::UnexpectedArgument {
|
||||
arg_name: stringify!(_unknown),
|
||||
expected: vec![UInt::USize(0)],
|
||||
observed: UInt::USize(_unknown),
|
||||
});
|
||||
}
|
||||
let delayed_state = global_state.delayed_state.unwrap_mut();
|
||||
record_submodules_from_wrapped_fatbin(
|
||||
module,
|
||||
fatbinc_wrapper,
|
||||
&mut fn_logger,
|
||||
delayed_state,
|
||||
|original_exports| {
|
||||
original_exports
|
||||
.original_get_module_from_cubin_ext1
|
||||
.unwrap()(module, fatbinc_wrapper, ptr1, ptr2, _unknown)
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
unsafe extern "system" fn get_module_from_cubin_ext2(
|
||||
fatbin_header: *const FatbinHeader,
|
||||
module: *mut CUmodule,
|
||||
ptr1: *mut c_void,
|
||||
ptr2: *mut c_void,
|
||||
_unknown: usize,
|
||||
) -> CUresult {
|
||||
let arguments_writer = Box::new(move |writer: &mut dyn std::io::Write| {
|
||||
writer.write_all(b"(")?;
|
||||
writer.write_all(stringify!(fatbin_header).as_bytes())?;
|
||||
write!(writer, ": {:p}, ", fatbin_header)?;
|
||||
writer.write_all(stringify!(module).as_bytes())?;
|
||||
writer.write_all(b": ")?;
|
||||
format::CudaDisplay::write(&module, "", 0, writer)?;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(stringify!(ptr1).as_bytes())?;
|
||||
write!(writer, ": {:p}, ", ptr1)?;
|
||||
writer.write_all(stringify!(ptr2).as_bytes())?;
|
||||
write!(writer, ": {:p}, ", ptr2)?;
|
||||
writer.write_all(stringify!(_unknown).as_bytes())?;
|
||||
write!(writer, ": {})", _unknown)
|
||||
});
|
||||
let global_state = &mut *super::GLOBAL_STATE.lock().unwrap();
|
||||
let mut fn_logger = global_state.log_factory.get_logger_dark_api(
|
||||
CUDART_INTERFACE_GUID,
|
||||
8,
|
||||
Some(arguments_writer),
|
||||
);
|
||||
if ptr1 != ptr::null_mut() {
|
||||
fn_logger.log(log::LogEntry::UnexpectedArgument {
|
||||
arg_name: stringify!(ptr1),
|
||||
expected: vec![UInt::USize(0)],
|
||||
observed: UInt::USize(ptr1 as usize),
|
||||
});
|
||||
}
|
||||
if ptr2 != ptr::null_mut() {
|
||||
fn_logger.log(log::LogEntry::UnexpectedArgument {
|
||||
arg_name: stringify!(ptr2),
|
||||
expected: vec![UInt::USize(0)],
|
||||
observed: UInt::USize(ptr2 as usize),
|
||||
});
|
||||
}
|
||||
if _unknown != 0 {
|
||||
fn_logger.log(log::LogEntry::UnexpectedArgument {
|
||||
arg_name: stringify!(_unknown),
|
||||
expected: vec![UInt::USize(0)],
|
||||
observed: UInt::USize(_unknown),
|
||||
});
|
||||
}
|
||||
let delayed_state = global_state.delayed_state.unwrap_mut();
|
||||
let result = delayed_state
|
||||
.cuda_state
|
||||
.dark_api
|
||||
.original
|
||||
.original_get_module_from_cubin_ext2
|
||||
.unwrap()(fatbin_header, module, ptr1, ptr2, _unknown);
|
||||
fn_logger.result = Some(result);
|
||||
if result.is_err() {
|
||||
return result;
|
||||
}
|
||||
record_submodules_from_fatbin(
|
||||
*module,
|
||||
fatbin_header,
|
||||
None,
|
||||
&mut fn_logger,
|
||||
&mut delayed_state.cuda_state,
|
||||
);
|
||||
result
|
||||
}
|
||||
|
||||
unsafe fn record_submodules(
|
||||
should_decompress_elf: bool,
|
||||
module: CUmodule,
|
||||
version: Option<usize>,
|
||||
fn_logger: &mut log::FunctionLogger,
|
||||
state: &mut StateTracker,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) {
|
||||
let mut index = start;
|
||||
while index < end {
|
||||
let fatbin_file = index as *const FatbinFileHeader;
|
||||
let fatbin_file_version = (*fatbin_file).version;
|
||||
if fatbin_file_version != FATBIN_FILE_HEADER_VERSION_CURRENT {
|
||||
fn_logger.log(log::LogEntry::UnexpectedBinaryField {
|
||||
field_name: stringify!(fatbin_file_version),
|
||||
expected: vec![UInt::U16(FATBIN_FILE_HEADER_VERSION_CURRENT)],
|
||||
observed: UInt::U16(fatbin_file_version),
|
||||
});
|
||||
}
|
||||
let fatbin_file_kind = (*fatbin_file).kind;
|
||||
if fatbin_file_kind == FATBIN_FILE_HEADER_KIND_PTX {
|
||||
let decompressed = decompress_kernel_module(fatbin_file);
|
||||
match decompressed {
|
||||
Some(mut decompressed) => {
|
||||
decompressed.pop(); // remove trailing zero
|
||||
state.record_new_submodule(module, version, &*decompressed, fn_logger, "ptx")
|
||||
}
|
||||
None => fn_logger.log(log::LogEntry::Lz4DecompressionFailure),
|
||||
}
|
||||
} else if fatbin_file_kind == FATBIN_FILE_HEADER_KIND_ELF {
|
||||
let source_buffer = if should_decompress_elf {
|
||||
let decompressed = decompress_kernel_module(fatbin_file);
|
||||
match decompressed {
|
||||
Some(decompressed) => Cow::Owned(decompressed),
|
||||
None => {
|
||||
fn_logger.log(log::LogEntry::Lz4DecompressionFailure);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Cow::Borrowed(slice::from_raw_parts(
|
||||
(fatbin_file as *const u8).add((*fatbin_file).header_size as usize),
|
||||
(*fatbin_file).padded_payload_size as usize,
|
||||
))
|
||||
};
|
||||
state.record_new_submodule(module, version, &*source_buffer, fn_logger, "elf")
|
||||
} else {
|
||||
fn_logger.log(log::LogEntry::UnexpectedBinaryField {
|
||||
field_name: stringify!(fatbin_file_kind),
|
||||
expected: vec![
|
||||
UInt::U16(FATBIN_FILE_HEADER_KIND_PTX),
|
||||
UInt::U16(FATBIN_FILE_HEADER_KIND_ELF),
|
||||
],
|
||||
observed: UInt::U16(fatbin_file_kind),
|
||||
});
|
||||
}
|
||||
index = index
|
||||
.add((*fatbin_file).header_size as usize + (*fatbin_file).padded_payload_size as usize);
|
||||
}
|
||||
}
|
||||
|
||||
const MAX_MODULE_DECOMPRESSION_BOUND: usize = 64 * 1024 * 1024;
|
||||
|
||||
unsafe fn decompress_kernel_module(file: *const FatbinFileHeader) -> Option<Vec<u8>> {
|
||||
let decompressed_size = usize::max(1024, (*file).uncompressed_payload as usize);
|
||||
let mut decompressed_vec = vec![0u8; decompressed_size];
|
||||
loop {
|
||||
match lz4_sys::LZ4_decompress_safe(
|
||||
(file as *const u8).add((*file).header_size as usize) as *const _,
|
||||
decompressed_vec.as_mut_ptr() as *mut _,
|
||||
(*file).payload_size as c_int,
|
||||
decompressed_vec.len() as c_int,
|
||||
) {
|
||||
error if error < 0 => {
|
||||
let new_size = decompressed_vec.len() * 2;
|
||||
if new_size > MAX_MODULE_DECOMPRESSION_BOUND {
|
||||
return None;
|
||||
}
|
||||
decompressed_vec.resize(decompressed_vec.len() * 2, 0);
|
||||
}
|
||||
real_decompressed_size => {
|
||||
decompressed_vec.truncate(real_decompressed_size as usize);
|
||||
return Some(decompressed_vec);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,55 +0,0 @@
|
|||
/*
|
||||
This collection of functions is here to assist with debugging
|
||||
You use it by manually pasting into a module.ptx that was generated by zluda_dump
|
||||
and inspecting content of additional debug buffer in replay.py
|
||||
*/
|
||||
|
||||
.func debug_dump_from_thread_16(.reg.b64 debug_addr, .reg.u32 global_id_0, .reg.b16 value)
|
||||
{
|
||||
.reg.u32 local_id;
|
||||
mov.u32 local_id, %tid.x;
|
||||
.reg.u32 local_size;
|
||||
mov.u32 local_size, %ntid.x;
|
||||
.reg.u32 group_id;
|
||||
mov.u32 group_id, %ctaid.x;
|
||||
.reg.b32 global_id;
|
||||
mad.lo.u32 global_id, group_id, local_size, local_id;
|
||||
.reg.pred should_exit;
|
||||
setp.ne.u32 should_exit, global_id, global_id_0;
|
||||
@should_exit bra END;
|
||||
.reg.b32 index;
|
||||
ld.global.u32 index, [debug_addr];
|
||||
st.global.u32 [debug_addr], index+1;
|
||||
.reg.u64 st_offset;
|
||||
cvt.u64.u32 st_offset, index;
|
||||
mad.lo.u64 st_offset, st_offset, 2, 4; // sizeof(b16), sizeof(32)
|
||||
add.u64 debug_addr, debug_addr, st_offset;
|
||||
st.global.u16 [debug_addr], value;
|
||||
END:
|
||||
ret;
|
||||
}
|
||||
|
||||
.func debug_dump_from_thread_32(.reg.b64 debug_addr, .reg.u32 global_id_0, .reg.b32 value)
|
||||
{
|
||||
.reg.u32 local_id;
|
||||
mov.u32 local_id, %tid.x;
|
||||
.reg.u32 local_size;
|
||||
mov.u32 local_size, %ntid.x;
|
||||
.reg.u32 group_id;
|
||||
mov.u32 group_id, %ctaid.x;
|
||||
.reg.b32 global_id;
|
||||
mad.lo.u32 global_id, group_id, local_size, local_id;
|
||||
.reg.pred should_exit;
|
||||
setp.ne.u32 should_exit, global_id, global_id_0;
|
||||
@should_exit bra END;
|
||||
.reg.b32 index;
|
||||
ld.global.u32 index, [debug_addr];
|
||||
st.global.u32 [debug_addr], index+1;
|
||||
.reg.u64 st_offset;
|
||||
cvt.u64.u32 st_offset, index;
|
||||
mad.lo.u64 st_offset, st_offset, 4, 4; // sizeof(b32), sizeof(32)
|
||||
add.u64 debug_addr, debug_addr, st_offset;
|
||||
st.global.u32 [debug_addr], value;
|
||||
END:
|
||||
ret;
|
||||
}
|
File diff suppressed because it is too large
Load diff
|
@ -1,6 +1,8 @@
|
|||
use crate::format;
|
||||
use cuda_types::cuda::*;
|
||||
use super::Settings;
|
||||
use crate::FnCallLog;
|
||||
use crate::LogEntry;
|
||||
use cuda_types::cuda::*;
|
||||
use format::CudaDisplay;
|
||||
use std::error::Error;
|
||||
use std::ffi::c_void;
|
||||
use std::ffi::NulError;
|
||||
|
@ -14,18 +16,102 @@ use std::str::Utf8Error;
|
|||
|
||||
const LOG_PREFIX: &[u8] = b"[ZLUDA_DUMP] ";
|
||||
|
||||
// This type holds all the relevant settings for logging like output path and
|
||||
// creates objects which match those settings
|
||||
pub(crate) struct Factory {
|
||||
// Fallible emitter is optional emitter to file system, we might lack
|
||||
pub(crate) struct Writer {
|
||||
// Fallible emitter is an optional emitter to the file system, we might lack
|
||||
// file permissions or be out of disk space
|
||||
fallible_emitter: Option<Box<dyn WriteTrailingZeroAware>>,
|
||||
fallible_emitter: Option<Box<dyn WriteTrailingZeroAware + Send>>,
|
||||
// This is emitter that "always works" (and if it does not, then we don't
|
||||
// care). In addition of normal logs it emits errors from fallible emitter
|
||||
infallible_emitter: Box<dyn WriteTrailingZeroAware>,
|
||||
infallible_emitter: Box<dyn WriteTrailingZeroAware + Send>,
|
||||
// This object could be recreated every time, but it's slightly better for performance to
|
||||
// reuse the allocations by keeping the object in globals
|
||||
write_buffer: WriteBuffer,
|
||||
// another shared buffer, so we dont't reallocate on every function call
|
||||
log_queue: Vec<LogEntry>,
|
||||
}
|
||||
|
||||
impl Writer {
|
||||
pub(crate) fn new() -> Self {
|
||||
let debug_emitter = os::new_debug_logger();
|
||||
Self {
|
||||
infallible_emitter: debug_emitter,
|
||||
fallible_emitter: None,
|
||||
write_buffer: WriteBuffer::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn late_init(&mut self, settings: &Settings) -> Result<(), ErrorEntry> {
|
||||
self.fallible_emitter = settings
|
||||
.dump_dir
|
||||
.as_ref()
|
||||
.map(|path| {
|
||||
Ok::<_, std::io::Error>(Box::new(File::create(path.to_path_buf().join("log.txt"))?)
|
||||
as Box<dyn WriteTrailingZeroAware + Send>)
|
||||
})
|
||||
.transpose()
|
||||
.map_err(ErrorEntry::IoError)?;
|
||||
self.write_buffer
|
||||
.init(&self.fallible_emitter, &self.infallible_emitter);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn write_and_flush(&mut self, log_root: &mut FnCallLog) {
|
||||
self.write_all_from_depth(0, log_root);
|
||||
self.write_buffer.finish();
|
||||
let error_from_writing_to_fallible_emitter = match self.fallible_emitter {
|
||||
Some(ref mut emitter) => self.write_buffer.send_to_and_flush(emitter),
|
||||
None => Ok(()),
|
||||
};
|
||||
if let Err(e) = error_from_writing_to_fallible_emitter {
|
||||
self.hack_squeeze_in_additional_error(ErrorEntry::IoError(e))
|
||||
}
|
||||
self.write_buffer
|
||||
.send_to_and_flush(&mut self.infallible_emitter)
|
||||
.ok();
|
||||
self.write_buffer.reset();
|
||||
log_root.reset();
|
||||
}
|
||||
|
||||
fn write_all_from_depth(&mut self, depth: usize, fn_call: &FnCallLog) {
|
||||
self.write_call(depth, fn_call);
|
||||
for sub in fn_call.subcalls.iter() {
|
||||
match sub {
|
||||
LogEntry::FnCall(fn_call) => self.write_all_from_depth(depth + 1, fn_call),
|
||||
LogEntry::Error(err) => self.write_error(depth + 1, err),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn write_call(&mut self, depth: usize, call: &FnCallLog) {
|
||||
self.write_buffer.start_line(depth);
|
||||
write!(self.write_buffer, "{}", call.name).ok();
|
||||
match call.args {
|
||||
Some(ref args) => {
|
||||
self.write_buffer.write_all(args).ok();
|
||||
}
|
||||
None => {
|
||||
self.write_buffer.write_all(b"(...)").ok();
|
||||
}
|
||||
}
|
||||
self.write_buffer.write_all(b" -> ").ok();
|
||||
if let Some(ref result) = call.output {
|
||||
self.write_buffer.write_all(result).ok();
|
||||
} else {
|
||||
self.write_buffer.write_all(b"UNKNOWN").ok();
|
||||
};
|
||||
self.write_buffer.end_line();
|
||||
}
|
||||
|
||||
fn write_error(&mut self, depth: usize, error: &ErrorEntry) {
|
||||
self.write_buffer.start_line(depth);
|
||||
write!(self.write_buffer, "{}", error).ok();
|
||||
self.write_buffer.end_line();
|
||||
}
|
||||
|
||||
fn hack_squeeze_in_additional_error(&mut self, entry: ErrorEntry) {
|
||||
self.write_buffer.undo_finish();
|
||||
write!(self.write_buffer, " {}", entry).ok();
|
||||
self.write_buffer.end_line();
|
||||
self.write_buffer.finish();
|
||||
}
|
||||
}
|
||||
|
||||
// When writing out to the emitter (file, WinAPI, whatever else) instead of
|
||||
|
@ -48,8 +134,8 @@ impl WriteBuffer {
|
|||
|
||||
fn init(
|
||||
&mut self,
|
||||
fallible_emitter: &Option<Box<dyn WriteTrailingZeroAware>>,
|
||||
infallible_emitter: &Box<dyn WriteTrailingZeroAware>,
|
||||
fallible_emitter: &Option<Box<dyn WriteTrailingZeroAware + Send>>,
|
||||
infallible_emitter: &Box<dyn WriteTrailingZeroAware + Send>,
|
||||
) {
|
||||
if infallible_emitter.should_prefix() {
|
||||
self.prefixed_buffer = Some(Vec::new());
|
||||
|
@ -72,10 +158,16 @@ impl WriteBuffer {
|
|||
.chain(self.unprefixed_buffer.as_mut().into_iter())
|
||||
}
|
||||
|
||||
fn start_line(&mut self) {
|
||||
fn start_line(&mut self, depth: usize) {
|
||||
if let Some(buffer) = &mut self.prefixed_buffer {
|
||||
buffer.extend_from_slice(LOG_PREFIX);
|
||||
}
|
||||
if depth == 0 {
|
||||
return;
|
||||
}
|
||||
for buffer in self.all_buffers() {
|
||||
buffer.extend(std::iter::repeat_n(b' ', depth * 4));
|
||||
}
|
||||
}
|
||||
|
||||
fn end_line(&mut self) {
|
||||
|
@ -84,12 +176,6 @@ impl WriteBuffer {
|
|||
}
|
||||
}
|
||||
|
||||
fn write(&mut self, s: &str) {
|
||||
for buffer in self.all_buffers() {
|
||||
buffer.extend_from_slice(s.as_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
fn finish(&mut self) {
|
||||
for buffer in self.all_buffers() {
|
||||
buffer.push(b'\0');
|
||||
|
@ -102,22 +188,26 @@ impl WriteBuffer {
|
|||
}
|
||||
}
|
||||
|
||||
fn send_to(&self, log_emitter: &mut Box<dyn WriteTrailingZeroAware>) -> Result<(), io::Error> {
|
||||
fn send_to_and_flush(
|
||||
&self,
|
||||
log_emitter: &mut Box<dyn WriteTrailingZeroAware + Send>,
|
||||
) -> Result<(), io::Error> {
|
||||
if log_emitter.should_prefix() {
|
||||
log_emitter.write_zero_aware(
|
||||
&*self
|
||||
.prefixed_buffer
|
||||
.as_ref()
|
||||
.unwrap_or_else(|| unreachable!()),
|
||||
)
|
||||
)?;
|
||||
} else {
|
||||
log_emitter.write_zero_aware(
|
||||
&*self
|
||||
.unprefixed_buffer
|
||||
.as_ref()
|
||||
.unwrap_or_else(|| unreachable!()),
|
||||
)
|
||||
)?;
|
||||
}
|
||||
log_emitter.flush()
|
||||
}
|
||||
|
||||
fn reset(&mut self) {
|
||||
|
@ -143,183 +233,36 @@ impl Write for WriteBuffer {
|
|||
}
|
||||
}
|
||||
|
||||
impl Factory {
|
||||
pub(crate) fn new() -> Self {
|
||||
let debug_emitter = os::new_debug_logger();
|
||||
Factory {
|
||||
infallible_emitter: debug_emitter,
|
||||
fallible_emitter: None,
|
||||
write_buffer: WriteBuffer::new(),
|
||||
log_queue: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn initalize_fallible_emitter(
|
||||
settings: &Settings,
|
||||
) -> std::io::Result<Option<Box<dyn WriteTrailingZeroAware>>> {
|
||||
settings
|
||||
.dump_dir
|
||||
.as_ref()
|
||||
.map(|path| {
|
||||
Ok::<_, std::io::Error>(Box::new(File::create(path.to_path_buf().join("log.txt"))?)
|
||||
as Box<dyn WriteTrailingZeroAware>)
|
||||
})
|
||||
.transpose()
|
||||
}
|
||||
|
||||
// We load settings during first function call, since during that time we
|
||||
// also create one of the loggers, what do we do about errors encountered
|
||||
// at that time? We log them to the newly created logger, but to make it
|
||||
// "nice" we do both of those in a single function
|
||||
// An alternative would be to have something like this:
|
||||
// let mut factory = Factory::new();
|
||||
// let mut cuInitLog = factory.get_logger("cuInit");
|
||||
// cuInitLog.load_settings(&settings);
|
||||
// which is a bit nonsensical
|
||||
pub(crate) fn get_first_logger_and_init_settings(
|
||||
&mut self,
|
||||
func: &'static str,
|
||||
arguments_writer: Box<dyn FnMut(&mut dyn std::io::Write) -> std::io::Result<()>>,
|
||||
) -> (FunctionLogger, Settings) {
|
||||
let mut first_logger = self.get_logger(func, arguments_writer);
|
||||
let settings = Settings::read_and_init(&mut first_logger);
|
||||
match Self::initalize_fallible_emitter(&settings) {
|
||||
Ok(fallible_emitter) => {
|
||||
*first_logger.fallible_emitter = fallible_emitter;
|
||||
}
|
||||
Err(err) => first_logger.log(LogEntry::IoError(err)),
|
||||
}
|
||||
first_logger.write_buffer.init(
|
||||
first_logger.fallible_emitter,
|
||||
first_logger.infallible_emitter,
|
||||
);
|
||||
(first_logger, settings)
|
||||
}
|
||||
|
||||
pub(crate) fn get_logger(
|
||||
&mut self,
|
||||
func: &'static str,
|
||||
arguments_writer: Box<dyn FnMut(&mut dyn std::io::Write) -> std::io::Result<()>>,
|
||||
) -> FunctionLogger {
|
||||
FunctionLogger {
|
||||
result: None,
|
||||
name: CudaFunctionName::Normal(func),
|
||||
fallible_emitter: &mut self.fallible_emitter,
|
||||
infallible_emitter: &mut self.infallible_emitter,
|
||||
write_buffer: &mut self.write_buffer,
|
||||
log_queue: &mut self.log_queue,
|
||||
arguments_writer: Some(arguments_writer),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_logger_dark_api(
|
||||
&mut self,
|
||||
guid: CUuuid,
|
||||
index: usize,
|
||||
arguments_writer: Option<Box<dyn FnMut(&mut dyn std::io::Write) -> std::io::Result<()>>>,
|
||||
) -> FunctionLogger {
|
||||
FunctionLogger {
|
||||
result: None,
|
||||
name: CudaFunctionName::Dark { guid, index },
|
||||
fallible_emitter: &mut self.fallible_emitter,
|
||||
infallible_emitter: &mut self.infallible_emitter,
|
||||
write_buffer: &mut self.write_buffer,
|
||||
log_queue: &mut self.log_queue,
|
||||
arguments_writer,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum CudaFunctionName {
|
||||
#[derive(Clone)]
|
||||
pub(crate) enum CudaFunctionName {
|
||||
Normal(&'static str),
|
||||
Dark { guid: CUuuid, index: usize },
|
||||
}
|
||||
|
||||
// This encapsulates log output for a single function call.
|
||||
// It's a separate struct and not just a plain function for two reasons:
|
||||
// * While we want to always display return code before logging errors,
|
||||
// logging errors might come before return code is returned
|
||||
// * We want to handle panics gracefully with Drop
|
||||
pub(crate) struct FunctionLogger<'a> {
|
||||
pub(crate) result: Option<CUresult>,
|
||||
name: CudaFunctionName,
|
||||
infallible_emitter: &'a mut Box<dyn WriteTrailingZeroAware>,
|
||||
fallible_emitter: &'a mut Option<Box<dyn WriteTrailingZeroAware>>,
|
||||
arguments_writer: Option<Box<dyn FnMut(&mut dyn std::io::Write) -> std::io::Result<()>>>,
|
||||
write_buffer: &'a mut WriteBuffer,
|
||||
log_queue: &'a mut Vec<LogEntry>,
|
||||
}
|
||||
|
||||
impl<'a> FunctionLogger<'a> {
|
||||
pub(crate) fn log(&mut self, l: LogEntry) {
|
||||
self.log_queue.push(l);
|
||||
}
|
||||
|
||||
pub(crate) fn log_io_error(&mut self, error: io::Result<()>) {
|
||||
if let Err(e) = error {
|
||||
self.log_queue.push(LogEntry::IoError(e));
|
||||
}
|
||||
}
|
||||
|
||||
fn flush_log_queue_to_write_buffer(&mut self) {
|
||||
self.write_buffer.start_line();
|
||||
match self.name {
|
||||
CudaFunctionName::Normal(fn_name) => self.write_buffer.write(fn_name),
|
||||
impl Display for CudaFunctionName {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
CudaFunctionName::Normal(fn_) => f.write_str(fn_),
|
||||
CudaFunctionName::Dark { guid, index } => {
|
||||
format::CudaDisplay::write(&guid, "", 0, &mut self.write_buffer).ok();
|
||||
write!(&mut self.write_buffer, "::{}", index).ok();
|
||||
match ::dark_api::cuda::guid_to_name(guid, *index) {
|
||||
Some((name, fn_)) => match fn_ {
|
||||
Some(fn_) => write!(f, "{{{name}}}::{fn_}"),
|
||||
None => write!(f, "{{{name}}}::{index}"),
|
||||
},
|
||||
None => {
|
||||
let mut temp = Vec::new();
|
||||
format::CudaDisplay::write(guid, "", 0, &mut temp)
|
||||
.map_err(|_| std::fmt::Error::default())?;
|
||||
let temp = String::from_utf8_lossy(&*temp);
|
||||
write!(f, "{temp}::{index}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
match &mut self.arguments_writer {
|
||||
Some(arg_writer) => {
|
||||
arg_writer(&mut self.write_buffer).ok();
|
||||
}
|
||||
None => {
|
||||
self.write_buffer.write_all(b"(...)").ok();
|
||||
}
|
||||
}
|
||||
self.write_buffer.write_all(b" -> ").ok();
|
||||
if let Some(result) = self.result {
|
||||
format::CudaDisplay::write(&result, "", 0, self.write_buffer).ok();
|
||||
} else {
|
||||
self.write_buffer.write_all(b"UNKNOWN").ok();
|
||||
};
|
||||
self.write_buffer.end_line();
|
||||
for entry in self.log_queue.iter() {
|
||||
write!(self.write_buffer, " {}", entry).ok();
|
||||
self.write_buffer.end_line();
|
||||
}
|
||||
self.write_buffer.finish();
|
||||
}
|
||||
|
||||
// This is a dirty hack: we call it at the point where our write buffer is
|
||||
// already finalized and squeeze the error produced by the previous emitter
|
||||
fn hack_squeeze_in_additional_error(&mut self, entry: LogEntry) {
|
||||
self.write_buffer.undo_finish();
|
||||
write!(self.write_buffer, " {}", entry).unwrap_or_else(|_| unreachable!());
|
||||
self.write_buffer.end_line();
|
||||
self.write_buffer.finish();
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Drop for FunctionLogger<'a> {
|
||||
fn drop(&mut self) {
|
||||
self.flush_log_queue_to_write_buffer();
|
||||
let error_from_writing_to_fallible_emitter = match self.fallible_emitter {
|
||||
Some(emitter) => self.write_buffer.send_to(emitter),
|
||||
None => Ok(()),
|
||||
};
|
||||
if let Err(e) = error_from_writing_to_fallible_emitter {
|
||||
self.hack_squeeze_in_additional_error(LogEntry::IoError(e))
|
||||
}
|
||||
self.write_buffer.send_to(self.infallible_emitter).ok();
|
||||
self.write_buffer.reset();
|
||||
self.log_queue.truncate(0);
|
||||
}
|
||||
}
|
||||
|
||||
// Structured log type. We don't want frontend to care about log formatting
|
||||
pub(crate) enum LogEntry {
|
||||
pub(crate) enum ErrorEntry {
|
||||
IoError(io::Error),
|
||||
CreatedDumpDirectory(PathBuf),
|
||||
ErrorBox(Box<dyn Error>),
|
||||
|
@ -328,12 +271,13 @@ pub(crate) enum LogEntry {
|
|||
raw_image: *const c_void,
|
||||
kind: &'static str,
|
||||
},
|
||||
FunctionNotFound(CudaFunctionName),
|
||||
MalformedModulePath(Utf8Error),
|
||||
NonUtf8ModuleText(Utf8Error),
|
||||
NulInsideModuleText(NulError),
|
||||
ModuleParsingError(String),
|
||||
Lz4DecompressionFailure,
|
||||
UnknownExportTableFn,
|
||||
ZstdDecompressionFailure(usize),
|
||||
UnexpectedArgument {
|
||||
arg_name: &'static str,
|
||||
expected: Vec<UInt>,
|
||||
|
@ -344,73 +288,135 @@ pub(crate) enum LogEntry {
|
|||
expected: Vec<UInt>,
|
||||
observed: UInt,
|
||||
},
|
||||
InvalidEnvVar {
|
||||
var: &'static str,
|
||||
pattern: &'static str,
|
||||
value: String,
|
||||
},
|
||||
UnexpectedExportTableSize {
|
||||
expected: usize,
|
||||
computed: usize,
|
||||
},
|
||||
IntegrityCheck {
|
||||
original: [u64; 2],
|
||||
overriden: [u64; 2],
|
||||
},
|
||||
NullPointer(&'static str),
|
||||
UnknownLibrary(CUlibrary),
|
||||
}
|
||||
|
||||
impl Display for LogEntry {
|
||||
unsafe impl Send for ErrorEntry {}
|
||||
unsafe impl Sync for ErrorEntry {}
|
||||
|
||||
impl From<cuda_types::dark_api::ParseError> for ErrorEntry {
|
||||
fn from(e: cuda_types::dark_api::ParseError) -> Self {
|
||||
match e {
|
||||
cuda_types::dark_api::ParseError::NullPointer(s) => ErrorEntry::NullPointer(s),
|
||||
cuda_types::dark_api::ParseError::UnexpectedBinaryField {
|
||||
field_name,
|
||||
observed,
|
||||
expected,
|
||||
} => ErrorEntry::UnexpectedBinaryField {
|
||||
field_name,
|
||||
observed: UInt::from(observed),
|
||||
expected: expected.into_iter().map(UInt::from).collect(),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ErrorEntry {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
LogEntry::IoError(e) => e.fmt(f),
|
||||
LogEntry::CreatedDumpDirectory(dir) => {
|
||||
write!(
|
||||
f,
|
||||
"Created dump directory {} ",
|
||||
dir.as_os_str().to_string_lossy()
|
||||
)
|
||||
}
|
||||
LogEntry::ErrorBox(e) => e.fmt(f),
|
||||
LogEntry::UnsupportedModule {
|
||||
module,
|
||||
raw_image,
|
||||
kind,
|
||||
} => {
|
||||
write!(
|
||||
f,
|
||||
"Unsupported {} module {:?} loaded from module image {:?}",
|
||||
kind, module, raw_image
|
||||
)
|
||||
}
|
||||
LogEntry::MalformedModulePath(e) => e.fmt(f),
|
||||
LogEntry::NonUtf8ModuleText(e) => e.fmt(f),
|
||||
LogEntry::ModuleParsingError(file_name) => {
|
||||
write!(
|
||||
f,
|
||||
"Error parsing module, log has been written to {}",
|
||||
file_name
|
||||
)
|
||||
}
|
||||
LogEntry::NulInsideModuleText(e) => e.fmt(f),
|
||||
LogEntry::Lz4DecompressionFailure => write!(f, "LZ4 decompression failure"),
|
||||
LogEntry::UnknownExportTableFn => write!(f, "Unknown export table function"),
|
||||
LogEntry::UnexpectedBinaryField {
|
||||
field_name,
|
||||
expected,
|
||||
observed,
|
||||
} => write!(
|
||||
f,
|
||||
"Unexpected field {}. Expected one of: {{{}}}, observed: {}",
|
||||
field_name,
|
||||
expected
|
||||
.iter()
|
||||
.map(|x| x.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", "),
|
||||
observed
|
||||
),
|
||||
LogEntry::UnexpectedArgument {
|
||||
arg_name,
|
||||
expected,
|
||||
observed,
|
||||
} => write!(
|
||||
f,
|
||||
"Unexpected argument {}. Expected one of: {{{}}}, observed: {}",
|
||||
arg_name,
|
||||
expected
|
||||
.iter()
|
||||
.map(|x| x.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", "),
|
||||
observed
|
||||
),
|
||||
ErrorEntry::IoError(e) => e.fmt(f),
|
||||
ErrorEntry::CreatedDumpDirectory(dir) => {
|
||||
write!(
|
||||
f,
|
||||
"Created dump directory {} ",
|
||||
dir.as_os_str().to_string_lossy()
|
||||
)
|
||||
}
|
||||
ErrorEntry::ErrorBox(e) => e.fmt(f),
|
||||
ErrorEntry::UnsupportedModule {
|
||||
module,
|
||||
raw_image,
|
||||
kind,
|
||||
} => {
|
||||
write!(
|
||||
f,
|
||||
"Unsupported {} module {:?} loaded from module image {:?}",
|
||||
kind, module, raw_image
|
||||
)
|
||||
}
|
||||
ErrorEntry::MalformedModulePath(e) => e.fmt(f),
|
||||
ErrorEntry::NonUtf8ModuleText(e) => e.fmt(f),
|
||||
ErrorEntry::ModuleParsingError(file_name) => {
|
||||
write!(
|
||||
f,
|
||||
"Error parsing module, log has been written to {}",
|
||||
file_name
|
||||
)
|
||||
}
|
||||
ErrorEntry::NulInsideModuleText(e) => e.fmt(f),
|
||||
ErrorEntry::Lz4DecompressionFailure => write!(f, "LZ4 decompression failure"),
|
||||
ErrorEntry::ZstdDecompressionFailure(err_code) => write!(f, "Zstd decompression failure: {}", zstd_safe::get_error_name(*err_code)),
|
||||
ErrorEntry::UnexpectedBinaryField {
|
||||
field_name,
|
||||
expected,
|
||||
observed,
|
||||
} => write!(
|
||||
f,
|
||||
"Unexpected field {}. Expected one of: {{{}}}, observed: {}",
|
||||
field_name,
|
||||
expected
|
||||
.iter()
|
||||
.map(|x| x.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", "),
|
||||
observed
|
||||
),
|
||||
ErrorEntry::UnexpectedArgument {
|
||||
arg_name,
|
||||
expected,
|
||||
observed,
|
||||
} => write!(
|
||||
f,
|
||||
"Unexpected argument {}. Expected one of: {{{}}}, observed: {}",
|
||||
arg_name,
|
||||
expected
|
||||
.iter()
|
||||
.map(|x| x.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", "),
|
||||
observed
|
||||
),
|
||||
ErrorEntry::InvalidEnvVar {
|
||||
var,
|
||||
pattern,
|
||||
value,
|
||||
} => write!(
|
||||
f,
|
||||
"Unexpected value of environment variable {var}. Expected pattern: {pattern}, got value: {value}"
|
||||
),
|
||||
ErrorEntry::FunctionNotFound(cuda_function_name) => write!(
|
||||
f,
|
||||
"No function {cuda_function_name} in the underlying library"
|
||||
),
|
||||
ErrorEntry::UnexpectedExportTableSize { expected, computed } => {
|
||||
write!(f, "Table length mismatch. Expected: {expected}, got: {computed}")
|
||||
}
|
||||
ErrorEntry::IntegrityCheck { original, overriden } => {
|
||||
write!(f, "Overriding integrity check hash. Original: {original:?}, overriden: {overriden:?}")
|
||||
}
|
||||
ErrorEntry::NullPointer(type_) => {
|
||||
write!(f, "Null pointer of type {type_} encountered")
|
||||
}
|
||||
ErrorEntry::UnknownLibrary(culibrary) => {
|
||||
write!(f, "Unknown library: ")?;
|
||||
let mut temp_buffer = Vec::new();
|
||||
CudaDisplay::write(culibrary, "", 0, &mut temp_buffer).ok();
|
||||
f.write_str(&unsafe { String::from_utf8_unchecked(temp_buffer) })
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -422,6 +428,24 @@ pub(crate) enum UInt {
|
|||
USize(usize),
|
||||
}
|
||||
|
||||
impl From<u16> for UInt {
|
||||
fn from(value: u16) -> Self {
|
||||
UInt::U16(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<u32> for UInt {
|
||||
fn from(value: u32) -> Self {
|
||||
UInt::U32(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<usize> for UInt {
|
||||
fn from(value: usize) -> Self {
|
||||
UInt::USize(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for UInt {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
|
@ -491,7 +515,7 @@ mod os {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) fn new_debug_logger() -> Box<dyn WriteTrailingZeroAware> {
|
||||
pub(crate) fn new_debug_logger() -> Box<dyn WriteTrailingZeroAware + Send> {
|
||||
let stderr = std::io::stderr();
|
||||
let log_to_stderr = stderr.as_raw_handle() != ptr::null_mut();
|
||||
if log_to_stderr {
|
||||
|
@ -506,19 +530,23 @@ mod os {
|
|||
mod os {
|
||||
use super::WriteTrailingZeroAware;
|
||||
|
||||
pub(crate) fn new_debug_logger() -> Box<dyn WriteTrailingZeroAware> {
|
||||
pub(crate) fn new_debug_logger() -> Box<dyn WriteTrailingZeroAware + Send> {
|
||||
Box::new(std::io::stderr())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::{borrow::Cow, cell::RefCell, io, rc::Rc, str};
|
||||
|
||||
use cuda_types::cuda::CUresultConsts;
|
||||
|
||||
use super::{FunctionLogger, LogEntry, WriteTrailingZeroAware};
|
||||
use crate::{log::{CudaFunctionName, WriteBuffer}, CUresult};
|
||||
use super::{ErrorEntry, FnCallLog, WriteTrailingZeroAware};
|
||||
use crate::{
|
||||
log::{CudaFunctionName, WriteBuffer},
|
||||
FnCallLogStack, OuterCallGuard,
|
||||
};
|
||||
use std::{
|
||||
cell::RefCell,
|
||||
io, str,
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
|
||||
struct FailOnNthWrite {
|
||||
fail_on: usize,
|
||||
|
@ -546,11 +574,11 @@ mod tests {
|
|||
|
||||
// Custom type to not trigger trait coherence rules
|
||||
#[derive(Clone)]
|
||||
struct RcVec<T>(Rc<RefCell<Vec<T>>>);
|
||||
struct ArcVec<T>(Arc<Mutex<Vec<T>>>);
|
||||
|
||||
impl WriteTrailingZeroAware for RcVec<u8> {
|
||||
impl WriteTrailingZeroAware for ArcVec<u8> {
|
||||
fn write_zero_aware(&mut self, buf: &[u8]) -> std::io::Result<()> {
|
||||
let mut vec = self.0.borrow_mut();
|
||||
let mut vec = self.0.lock().unwrap();
|
||||
vec.extend_from_slice(buf.split_last().unwrap().1);
|
||||
Ok(())
|
||||
}
|
||||
|
@ -565,37 +593,59 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
// TODO: fix this, it should use drop guard for testing.
|
||||
// Previously FnCallLog would implement Drop and write to the log
|
||||
fn error_in_fallible_emitter_is_handled_gracefully() {
|
||||
let result = RcVec(Rc::new(RefCell::new(Vec::<u8>::new())));
|
||||
let mut infallible_emitter = Box::new(result.clone()) as Box<dyn WriteTrailingZeroAware>;
|
||||
let mut fallible_emitter = Some(Box::new(FailOnNthWrite {
|
||||
let result = ArcVec(Arc::new(Mutex::new(Vec::<u8>::new())));
|
||||
let infallible_emitter = Box::new(result.clone()) as Box<dyn WriteTrailingZeroAware + Send>;
|
||||
let fallible_emitter = Some(Box::new(FailOnNthWrite {
|
||||
fail_on: 1,
|
||||
counter: 0,
|
||||
}) as Box<dyn WriteTrailingZeroAware>);
|
||||
}) as Box<dyn WriteTrailingZeroAware + Send>);
|
||||
let mut write_buffer = WriteBuffer::new();
|
||||
write_buffer.unprefixed_buffer = Some(Vec::new());
|
||||
let mut log_queue = Vec::new();
|
||||
let mut func_logger = FunctionLogger {
|
||||
result: Some(CUresult::SUCCESS),
|
||||
let mut writer = super::Writer {
|
||||
fallible_emitter,
|
||||
infallible_emitter,
|
||||
write_buffer,
|
||||
};
|
||||
let func_logger = FnCallLog {
|
||||
name: CudaFunctionName::Normal("cuInit"),
|
||||
infallible_emitter: &mut infallible_emitter,
|
||||
fallible_emitter: &mut fallible_emitter,
|
||||
write_buffer: &mut write_buffer,
|
||||
log_queue: &mut log_queue,
|
||||
arguments_writer: None,
|
||||
args: None,
|
||||
output: None,
|
||||
subcalls: Vec::new(),
|
||||
};
|
||||
let log_root = FnCallLogStack {
|
||||
depth: 1,
|
||||
log_root: func_logger,
|
||||
};
|
||||
let log_root = RefCell::new(log_root);
|
||||
let drop_guard = OuterCallGuard {
|
||||
writer: &mut writer,
|
||||
log_root: &log_root,
|
||||
};
|
||||
|
||||
func_logger.log(LogEntry::IoError(io::Error::from_raw_os_error(1)));
|
||||
func_logger.log(LogEntry::IoError(io::Error::from_raw_os_error(2)));
|
||||
func_logger.log(LogEntry::IoError(io::Error::from_raw_os_error(3)));
|
||||
drop(func_logger);
|
||||
drop(infallible_emitter);
|
||||
{
|
||||
log_root
|
||||
.borrow_mut()
|
||||
.log_root
|
||||
.log(ErrorEntry::IoError(io::Error::from_raw_os_error(1)));
|
||||
log_root
|
||||
.borrow_mut()
|
||||
.log_root
|
||||
.log(ErrorEntry::IoError(io::Error::from_raw_os_error(2)));
|
||||
log_root
|
||||
.borrow_mut()
|
||||
.log_root
|
||||
.log(ErrorEntry::IoError(io::Error::from_raw_os_error(3)));
|
||||
}
|
||||
drop(drop_guard);
|
||||
|
||||
let result = result.0.borrow_mut();
|
||||
let result = result.0.lock().unwrap();
|
||||
let result_str = str::from_utf8(&*result).unwrap();
|
||||
let result_lines = result_str.lines().collect::<Vec<_>>();
|
||||
assert_eq!(result_lines.len(), 5);
|
||||
assert_eq!(result_lines[0], "cuInit(...) -> CUDA_SUCCESS");
|
||||
assert_eq!(result_lines[0], "cuInit(...) -> UNKNOWN");
|
||||
assert!(result_lines[1].starts_with(" "));
|
||||
assert!(result_lines[2].starts_with(" "));
|
||||
assert!(result_lines[3].starts_with(" "));
|
||||
|
|
|
@ -34,18 +34,16 @@ macro_rules! os_log {
|
|||
#[cfg(target_arch = "x86_64")]
|
||||
pub fn get_thunk(
|
||||
original_fn: *const c_void,
|
||||
report_fn: unsafe extern "system" fn(*const CUuuid, usize),
|
||||
report_fn: unsafe extern "system" fn(&CUuuid, usize),
|
||||
guid: *const CUuuid,
|
||||
idx: usize,
|
||||
) -> *const c_void {
|
||||
use dynasmrt::{dynasm, DynasmApi};
|
||||
let mut ops = dynasmrt::x86::Assembler::new().unwrap();
|
||||
let mut ops = dynasmrt::x64::Assembler::new().unwrap();
|
||||
let start = ops.offset();
|
||||
// Let's hope there's never more than 6 arguments
|
||||
dynasm!(ops
|
||||
; .arch x64
|
||||
; push rbp
|
||||
; mov rbp, rsp
|
||||
// stack alignment
|
||||
; sub rsp, 8
|
||||
; push rdi
|
||||
; push rsi
|
||||
; push rdx
|
||||
|
@ -62,10 +60,9 @@ pub fn get_thunk(
|
|||
; pop rdx
|
||||
; pop rsi
|
||||
; pop rdi
|
||||
; add rsp, 8
|
||||
; mov rax, QWORD original_fn as i64
|
||||
; call rax
|
||||
; pop rbp
|
||||
; ret
|
||||
; jmp rax
|
||||
; int 3
|
||||
);
|
||||
let exe_buf = ops.finalize().unwrap();
|
||||
|
@ -73,3 +70,12 @@ pub fn get_thunk(
|
|||
mem::forget(exe_buf);
|
||||
result_fn as *const _
|
||||
}
|
||||
|
||||
#[link(name = "pthread")]
|
||||
unsafe extern "C" {
|
||||
fn pthread_self() -> std::os::unix::thread::RawPthread;
|
||||
}
|
||||
|
||||
pub(crate) fn current_thread() -> u32 {
|
||||
(unsafe { pthread_self() }) as u32
|
||||
}
|
||||
|
|
|
@ -178,3 +178,13 @@ pub fn get_thunk(
|
|||
mem::forget(exe_buf);
|
||||
result_fn as *const _
|
||||
}
|
||||
|
||||
#[link(name = "kernel32")]
|
||||
unsafe extern "system" {
|
||||
fn GetCurrentThreadId() -> u32;
|
||||
}
|
||||
|
||||
pub(crate) fn current_thread() -> u32 {
|
||||
unsafe { GetCurrentThreadId() }
|
||||
}
|
||||
|
||||
|
|
|
@ -1,103 +0,0 @@
|
|||
import pycuda.autoinit
|
||||
import pycuda.driver as drv
|
||||
import pycuda.tools as py_tools
|
||||
from pathlib import PurePath
|
||||
import numpy as np
|
||||
from os import path
|
||||
import os
|
||||
import itertools
|
||||
import sys
|
||||
|
||||
|
||||
# It's impossible to discern what is the type of a buffer, here you can override equality checks
|
||||
def assert_array_equal_override(kernel_name, idx, arr1, arr2):
|
||||
if kernel_name == 'knn_match' and idx == 6:
|
||||
arr1_view = np.frombuffer(arr1, dtype=np.dtype([('f1', np.uint32), ('f2', np.uint32), ('f3', np.uint32)]))
|
||||
np.ndarray.sort(arr1_view)
|
||||
arr2_view = np.frombuffer(arr2, dtype=np.dtype([('f1', np.uint32), ('f2', np.uint32), ('f3', np.uint32)]))
|
||||
np.ndarray.sort(arr2_view)
|
||||
if kernel_name == 'nonmax_suppression' and idx == 7:
|
||||
arr1_view = np.frombuffer(arr1, dtype=np.dtype(np.uint32))
|
||||
np.ndarray.sort(arr1_view)
|
||||
arr2_view = np.frombuffer(arr2, dtype=np.dtype(np.uint32))
|
||||
np.ndarray.sort(arr2_view)
|
||||
np.testing.assert_array_equal(arr1, arr2)
|
||||
|
||||
|
||||
def load_arguments(arg_path):
|
||||
is_buffer = arg_path.endswith(".buffer")
|
||||
with open(arg_path, "rb") as f:
|
||||
arg_bytes = f.read()
|
||||
if not is_buffer:
|
||||
if len(arg_bytes) == 1:
|
||||
return np.frombuffer(arg_bytes, dtype=np.uint8)[0], None
|
||||
elif len(arg_bytes) == 2:
|
||||
return np.frombuffer(arg_bytes, dtype=np.uint16)[0], None
|
||||
elif len(arg_bytes) == 4:
|
||||
return np.frombuffer(arg_bytes, dtype=np.uint32)[0], None
|
||||
elif len(arg_bytes) == 8:
|
||||
return np.frombuffer(arg_bytes, dtype=np.uint64)[0], None
|
||||
else:
|
||||
raise Exception('Incorrect size of {}: {}'.format(arg_path, len(arg_bytes)))
|
||||
else:
|
||||
buff = np.frombuffer(bytearray(arg_bytes), dtype=np.uint8)
|
||||
buff.setflags(write=1, align=1)
|
||||
return drv.InOut(buff), buff
|
||||
|
||||
|
||||
def parse_arguments(dump_path, prefix):
|
||||
dir = path.join(dump_path, prefix)
|
||||
arg_files = os.listdir(dir)
|
||||
return [load_arguments(path.join(dir, f)) for f in sorted(arg_files)]
|
||||
|
||||
|
||||
def append_debug_buffer(args, grid, block):
|
||||
args = list(args)
|
||||
items = grid[0] * grid[1] * grid[2] * block[0] * block[1] * block[2]
|
||||
debug_buff = np.zeros(items, dtype=np.uint32)
|
||||
args.append((drv.InOut(debug_buff), debug_buff))
|
||||
return args
|
||||
|
||||
|
||||
def verify_single_dump(input_path, max_block_threads):
|
||||
print(input_path)
|
||||
kernel_name = path.basename(os.path.normpath(input_path)).split("_", 1)[1]
|
||||
with open(path.join(input_path, "launch.txt"), "r") as launch_f:
|
||||
launch_lines = list(map(int, launch_f.readlines()))
|
||||
block = tuple(launch_lines[3:6])
|
||||
launch_block_size = block[0] * block[1] * block[2]
|
||||
if launch_block_size > max_block_threads:
|
||||
print(
|
||||
f" Skipping, launch block size ({launch_block_size}) bigger than maximum block size ({max_block_threads})")
|
||||
return
|
||||
module = drv.module_from_file(path.join(input_path, "module.ptx"))
|
||||
kernel = module.get_function(kernel_name)
|
||||
pre_args = append_debug_buffer(parse_arguments(input_path, "pre"), tuple(launch_lines[:3]), block)
|
||||
kernel_pre_args, host_pre_args = zip(*pre_args)
|
||||
kernel(*list(kernel_pre_args), grid=tuple(launch_lines[:3]), block=block, shared=launch_lines[6])
|
||||
post_args = parse_arguments(input_path, "post")
|
||||
_, host_post_args_args = zip(*post_args)
|
||||
for idx, (pre_arg, post_arg) in enumerate(zip(host_pre_args, host_post_args_args)):
|
||||
if pre_arg is None:
|
||||
continue
|
||||
try:
|
||||
assert_array_equal_override(kernel_name, idx, pre_arg, post_arg)
|
||||
except Exception as e:
|
||||
print(f"{idx}: {e}")
|
||||
|
||||
|
||||
def main(argv):
|
||||
device = drv.Device(0)
|
||||
max_threads = device.get_attribute(drv.device_attribute.MAX_THREADS_PER_BLOCK)
|
||||
print(device.name())
|
||||
input_path = argv[1]
|
||||
if os.path.exists(path.join(input_path, "launch.txt")):
|
||||
verify_single_dump(input_path, max_threads)
|
||||
else:
|
||||
for input_subdir in sorted([path.join(input_path, dir_name) for dir_name in os.listdir(input_path)]):
|
||||
if os.path.isdir(input_subdir):
|
||||
verify_single_dump(input_subdir, max_threads)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv)
|
|
@ -1,79 +0,0 @@
|
|||
use cuda_base::cuda_function_declarations;
|
||||
use std::ffi::CStr;
|
||||
use std::mem;
|
||||
use std::ptr;
|
||||
use std::ptr::NonNull;
|
||||
use std::{marker::PhantomData, os::raw::c_void};
|
||||
|
||||
use crate::os;
|
||||
|
||||
struct DynamicFn<T> {
|
||||
pointer: usize,
|
||||
_marker: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T> Default for DynamicFn<T> {
|
||||
fn default() -> Self {
|
||||
DynamicFn {
|
||||
pointer: 0,
|
||||
_marker: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> DynamicFn<T> {
|
||||
unsafe fn get(&mut self, lib: *mut c_void, name: &[u8]) -> Option<T> {
|
||||
match self.pointer {
|
||||
0 => {
|
||||
let addr = os::get_proc_address(lib, CStr::from_bytes_with_nul_unchecked(name));
|
||||
if addr == ptr::null_mut() {
|
||||
self.pointer = 1;
|
||||
return None;
|
||||
} else {
|
||||
self.pointer = addr as _;
|
||||
}
|
||||
}
|
||||
1 => return None,
|
||||
_ => {}
|
||||
}
|
||||
Some(mem::transmute_copy(&self.pointer))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct CudaDynamicFns {
|
||||
lib_handle: NonNull<::std::ffi::c_void>,
|
||||
fn_table: CudaFnTable,
|
||||
}
|
||||
|
||||
impl CudaDynamicFns {
|
||||
pub(crate) unsafe fn load_library(path: &str) -> Option<Self> {
|
||||
let lib_handle = NonNull::new(os::load_library(path));
|
||||
lib_handle.map(|lib_handle| CudaDynamicFns {
|
||||
lib_handle,
|
||||
fn_table: CudaFnTable::default(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! emit_cuda_fn_table {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||
#[derive(Default)]
|
||||
#[allow(improper_ctypes)]
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
struct CudaFnTable {
|
||||
$($fn_name: DynamicFn<extern $abi fn ( $($arg_id : $arg_type),* ) -> $ret_type>),*
|
||||
}
|
||||
|
||||
impl CudaDynamicFns {
|
||||
$(
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn $fn_name(&mut self, $($arg_id : $arg_type),*) -> Option<$ret_type> {
|
||||
let func = unsafe { self.fn_table.$fn_name.get(self.lib_handle.as_ptr(), concat!(stringify!($fn_name), "\0").as_bytes()) };
|
||||
func.map(|f| f($($arg_id),*) )
|
||||
}
|
||||
)*
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
cuda_function_declarations!(emit_cuda_fn_table);
|
|
@ -1,13 +1,21 @@
|
|||
use crate::{dark_api, log, Settings};
|
||||
use cuda_types::cuda::*;
|
||||
use crate::{
|
||||
log::{self, UInt},
|
||||
trace, ErrorEntry, FnCallLog, Settings,
|
||||
};
|
||||
use cuda_types::{
|
||||
cuda::*,
|
||||
dark_api::{FatbinFileHeader, FatbinFileHeaderFlags, FatbinHeader, FatbincWrapper},
|
||||
};
|
||||
use rustc_hash::{FxHashMap, FxHashSet};
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
borrow::Cow,
|
||||
ffi::{c_void, CStr, CString},
|
||||
fs::{self, File},
|
||||
io::{self, Read, Write},
|
||||
path::PathBuf,
|
||||
rc::Rc,
|
||||
ptr,
|
||||
};
|
||||
use unwrap_or::unwrap_some_or;
|
||||
|
||||
// This struct is the heart of CUDA state tracking, it:
|
||||
// * receives calls from the probes about changes to CUDA state
|
||||
|
@ -15,24 +23,28 @@ use std::{
|
|||
// * writes out relevant state change and details to disk and log
|
||||
pub(crate) struct StateTracker {
|
||||
writer: DumpWriter,
|
||||
modules: HashMap<CUmodule, Option<ParsedModule>>,
|
||||
pub(crate) libraries: FxHashMap<CUlibrary, CodePointer>,
|
||||
saved_modules: FxHashSet<CUmodule>,
|
||||
module_counter: usize,
|
||||
submodule_counter: usize,
|
||||
last_module_version: Option<usize>,
|
||||
pub(crate) dark_api: dark_api::DarkApiState,
|
||||
pub(crate) override_cc_major: Option<u32>,
|
||||
pub(crate) override_cc: Option<(u32, u32)>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub(crate) struct CodePointer(pub *const c_void);
|
||||
|
||||
unsafe impl Send for CodePointer {}
|
||||
unsafe impl Sync for CodePointer {}
|
||||
|
||||
impl StateTracker {
|
||||
pub(crate) fn new(settings: &Settings) -> Self {
|
||||
StateTracker {
|
||||
writer: DumpWriter::new(settings.dump_dir.clone()),
|
||||
modules: HashMap::new(),
|
||||
libraries: FxHashMap::default(),
|
||||
saved_modules: FxHashSet::default(),
|
||||
module_counter: 0,
|
||||
submodule_counter: 0,
|
||||
last_module_version: None,
|
||||
dark_api: dark_api::DarkApiState::new(),
|
||||
override_cc_major: settings.override_cc_major,
|
||||
override_cc: settings.override_cc,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -40,12 +52,12 @@ impl StateTracker {
|
|||
&mut self,
|
||||
module: CUmodule,
|
||||
file_name: *const i8,
|
||||
fn_logger: &mut log::FunctionLogger,
|
||||
fn_logger: &mut FnCallLog,
|
||||
) {
|
||||
let file_name = match unsafe { CStr::from_ptr(file_name) }.to_str() {
|
||||
Ok(f) => f,
|
||||
Err(err) => {
|
||||
fn_logger.log(log::LogEntry::MalformedModulePath(err));
|
||||
fn_logger.log(log::ErrorEntry::MalformedModulePath(err));
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
@ -56,7 +68,7 @@ impl StateTracker {
|
|||
fn try_record_new_module_file(
|
||||
&mut self,
|
||||
module: CUmodule,
|
||||
fn_logger: &mut log::FunctionLogger,
|
||||
fn_logger: &mut FnCallLog,
|
||||
file_name: &str,
|
||||
) -> io::Result<()> {
|
||||
let mut module_file = fs::File::open(file_name)?;
|
||||
|
@ -69,37 +81,29 @@ impl StateTracker {
|
|||
pub(crate) fn record_new_submodule(
|
||||
&mut self,
|
||||
module: CUmodule,
|
||||
version: Option<usize>,
|
||||
submodule: &[u8],
|
||||
fn_logger: &mut log::FunctionLogger,
|
||||
fn_logger: &mut FnCallLog,
|
||||
type_: &'static str,
|
||||
) {
|
||||
if !self.modules.contains_key(&module) {
|
||||
if self.saved_modules.insert(module) {
|
||||
self.module_counter += 1;
|
||||
self.submodule_counter = 0;
|
||||
self.modules.insert(module, None);
|
||||
}
|
||||
if version != self.last_module_version {
|
||||
self.submodule_counter = 0;
|
||||
}
|
||||
self.submodule_counter += 1;
|
||||
self.last_module_version = version;
|
||||
fn_logger.log_io_error(self.writer.save_module(
|
||||
self.module_counter,
|
||||
version,
|
||||
Some(self.submodule_counter),
|
||||
submodule,
|
||||
type_,
|
||||
));
|
||||
if type_ == "ptx" {
|
||||
match CString::new(submodule) {
|
||||
Err(e) => fn_logger.log(log::LogEntry::NulInsideModuleText(e)),
|
||||
Err(e) => fn_logger.log(log::ErrorEntry::NulInsideModuleText(e)),
|
||||
Ok(submodule_cstring) => match submodule_cstring.to_str() {
|
||||
Err(e) => fn_logger.log(log::LogEntry::NonUtf8ModuleText(e)),
|
||||
Err(e) => fn_logger.log(log::ErrorEntry::NonUtf8ModuleText(e)),
|
||||
Ok(submodule_text) => self.try_parse_and_record_kernels(
|
||||
fn_logger,
|
||||
self.module_counter,
|
||||
version,
|
||||
Some(self.submodule_counter),
|
||||
submodule_text,
|
||||
),
|
||||
|
@ -112,25 +116,36 @@ impl StateTracker {
|
|||
&mut self,
|
||||
module: CUmodule,
|
||||
raw_image: *const c_void,
|
||||
fn_logger: &mut log::FunctionLogger,
|
||||
fn_logger: &mut FnCallLog,
|
||||
) {
|
||||
self.module_counter += 1;
|
||||
if unsafe { *(raw_image as *const [u8; 4]) } == *goblin::elf64::header::ELFMAG {
|
||||
self.modules.insert(module, None);
|
||||
self.saved_modules.insert(module);
|
||||
// TODO: Parse ELF and write it to disk
|
||||
fn_logger.log(log::LogEntry::UnsupportedModule {
|
||||
fn_logger.log(log::ErrorEntry::UnsupportedModule {
|
||||
module,
|
||||
raw_image,
|
||||
kind: "ELF",
|
||||
})
|
||||
} else if unsafe { *(raw_image as *const [u8; 8]) } == *goblin::archive::MAGIC {
|
||||
self.modules.insert(module, None);
|
||||
self.saved_modules.insert(module);
|
||||
// TODO: Figure out how to get size of archive module and write it to disk
|
||||
fn_logger.log(log::LogEntry::UnsupportedModule {
|
||||
fn_logger.log(log::ErrorEntry::UnsupportedModule {
|
||||
module,
|
||||
raw_image,
|
||||
kind: "archive",
|
||||
})
|
||||
} else if unsafe { *(raw_image as *const u32) } == FatbincWrapper::MAGIC {
|
||||
unsafe {
|
||||
fn_logger.try_(|fn_logger| {
|
||||
trace::record_submodules_from_wrapped_fatbin(
|
||||
module,
|
||||
raw_image as *const FatbincWrapper,
|
||||
fn_logger,
|
||||
self,
|
||||
)
|
||||
});
|
||||
}
|
||||
} else {
|
||||
self.record_module_ptx(module, raw_image, fn_logger)
|
||||
}
|
||||
|
@ -140,44 +155,40 @@ impl StateTracker {
|
|||
&mut self,
|
||||
module: CUmodule,
|
||||
raw_image: *const c_void,
|
||||
fn_logger: &mut log::FunctionLogger,
|
||||
fn_logger: &mut FnCallLog,
|
||||
) {
|
||||
self.modules.insert(module, None);
|
||||
self.saved_modules.insert(module);
|
||||
let module_text = unsafe { CStr::from_ptr(raw_image as *const _) }.to_str();
|
||||
let module_text = match module_text {
|
||||
Ok(m) => m,
|
||||
Err(utf8_err) => {
|
||||
fn_logger.log(log::LogEntry::NonUtf8ModuleText(utf8_err));
|
||||
fn_logger.log(log::ErrorEntry::NonUtf8ModuleText(utf8_err));
|
||||
return;
|
||||
}
|
||||
};
|
||||
fn_logger.log_io_error(self.writer.save_module(
|
||||
self.module_counter,
|
||||
None,
|
||||
None,
|
||||
module_text.as_bytes(),
|
||||
"ptx",
|
||||
));
|
||||
self.try_parse_and_record_kernels(fn_logger, self.module_counter, None, None, module_text);
|
||||
self.try_parse_and_record_kernels(fn_logger, self.module_counter, None, module_text);
|
||||
}
|
||||
|
||||
fn try_parse_and_record_kernels(
|
||||
&mut self,
|
||||
fn_logger: &mut log::FunctionLogger,
|
||||
fn_logger: &mut FnCallLog,
|
||||
module_index: usize,
|
||||
version: Option<usize>,
|
||||
submodule_index: Option<usize>,
|
||||
module_text: &str,
|
||||
) {
|
||||
let errors = ptx_parser::parse_for_errors(module_text);
|
||||
if !errors.is_empty() {
|
||||
fn_logger.log(log::LogEntry::ModuleParsingError(
|
||||
DumpWriter::get_file_name(module_index, version, submodule_index, "log"),
|
||||
fn_logger.log(log::ErrorEntry::ModuleParsingError(
|
||||
DumpWriter::get_file_name(module_index, submodule_index, "log"),
|
||||
));
|
||||
fn_logger.log_io_error(self.writer.save_module_error_log(
|
||||
module_text,
|
||||
module_index,
|
||||
version,
|
||||
submodule_index,
|
||||
&*errors,
|
||||
));
|
||||
|
@ -185,11 +196,6 @@ impl StateTracker {
|
|||
}
|
||||
}
|
||||
|
||||
struct ParsedModule {
|
||||
content: Rc<String>,
|
||||
kernels_args: Option<HashMap<String, Vec<usize>>>,
|
||||
}
|
||||
|
||||
// This structs writes out information about CUDA execution to the dump dir
|
||||
struct DumpWriter {
|
||||
dump_dir: Option<PathBuf>,
|
||||
|
@ -203,7 +209,6 @@ impl DumpWriter {
|
|||
fn save_module(
|
||||
&self,
|
||||
module_index: usize,
|
||||
version: Option<usize>,
|
||||
submodule_index: Option<usize>,
|
||||
buffer: &[u8],
|
||||
kind: &'static str,
|
||||
|
@ -212,12 +217,7 @@ impl DumpWriter {
|
|||
None => return Ok(()),
|
||||
Some(d) => d.clone(),
|
||||
};
|
||||
dump_file.push(Self::get_file_name(
|
||||
module_index,
|
||||
version,
|
||||
submodule_index,
|
||||
kind,
|
||||
));
|
||||
dump_file.push(Self::get_file_name(module_index, submodule_index, kind));
|
||||
let mut file = File::create(dump_file)?;
|
||||
file.write_all(buffer)?;
|
||||
Ok(())
|
||||
|
@ -225,9 +225,7 @@ impl DumpWriter {
|
|||
|
||||
fn save_module_error_log<'input>(
|
||||
&self,
|
||||
module_text: &str,
|
||||
module_index: usize,
|
||||
version: Option<usize>,
|
||||
submodule_index: Option<usize>,
|
||||
errors: &[ptx_parser::PtxError<'input>],
|
||||
) -> io::Result<()> {
|
||||
|
@ -235,12 +233,7 @@ impl DumpWriter {
|
|||
None => return Ok(()),
|
||||
Some(d) => d.clone(),
|
||||
};
|
||||
log_file.push(Self::get_file_name(
|
||||
module_index,
|
||||
version,
|
||||
submodule_index,
|
||||
"log",
|
||||
));
|
||||
log_file.push(Self::get_file_name(module_index, submodule_index, "log"));
|
||||
let mut file = File::create(log_file)?;
|
||||
for error in errors {
|
||||
writeln!(file, "{}", error)?;
|
||||
|
@ -248,24 +241,131 @@ impl DumpWriter {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn get_file_name(
|
||||
module_index: usize,
|
||||
version: Option<usize>,
|
||||
submodule_index: Option<usize>,
|
||||
kind: &str,
|
||||
) -> String {
|
||||
match (version, submodule_index) {
|
||||
(Some(version), Some(submodule_index)) => format!(
|
||||
"module_{:04}_v{}_{}.{}",
|
||||
module_index, version, submodule_index, kind
|
||||
),
|
||||
(Some(version), None) => {
|
||||
format!("module_{:04}_v{}.{}", module_index, version, kind)
|
||||
fn get_file_name(module_index: usize, submodule_index: Option<usize>, kind: &str) -> String {
|
||||
match submodule_index {
|
||||
None => {
|
||||
format!("module_{:04}.{:02}", module_index, kind)
|
||||
}
|
||||
(None, Some(submodule_index)) => {
|
||||
format!("module_{:04}_{}.{}", module_index, submodule_index, kind)
|
||||
Some(submodule_index) => {
|
||||
format!("module_{:04}_{:02}.{}", module_index, submodule_index, kind)
|
||||
}
|
||||
(None, None) => format!("module_{:04}.{}", module_index, kind),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn record_submodules_from_wrapped_fatbin(
|
||||
module: CUmodule,
|
||||
fatbinc_wrapper: *const FatbincWrapper,
|
||||
fn_logger: &mut FnCallLog,
|
||||
state: &mut StateTracker,
|
||||
) -> Result<(), ErrorEntry> {
|
||||
let fatbinc_wrapper = FatbincWrapper::new(&fatbinc_wrapper).map_err(ErrorEntry::from)?;
|
||||
let is_version_2 = fatbinc_wrapper.version == FatbincWrapper::VERSION_V2;
|
||||
record_submodules_from_fatbin(module, (*fatbinc_wrapper).data, fn_logger, state)?;
|
||||
if is_version_2 {
|
||||
let mut current = (*fatbinc_wrapper).filename_or_fatbins as *const *const c_void;
|
||||
while *current != ptr::null() {
|
||||
record_submodules_from_fatbin(module, *current as *const _, fn_logger, state)?;
|
||||
current = current.add(1);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn record_submodules_from_fatbin(
|
||||
module: CUmodule,
|
||||
fatbin_header: *const FatbinHeader,
|
||||
logger: &mut FnCallLog,
|
||||
state: &mut StateTracker,
|
||||
) -> Result<(), ErrorEntry> {
|
||||
let header = FatbinHeader::new(&fatbin_header).map_err(ErrorEntry::from)?;
|
||||
let file = header.get_content();
|
||||
record_submodules(module, logger, state, file)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
unsafe fn record_submodules(
|
||||
module: CUmodule,
|
||||
fn_logger: &mut FnCallLog,
|
||||
state: &mut StateTracker,
|
||||
mut file_buffer: &[u8],
|
||||
) -> Result<(), ErrorEntry> {
|
||||
while let Some(file) = FatbinFileHeader::next(&mut file_buffer)? {
|
||||
let mut payload = if file.flags.contains(FatbinFileHeaderFlags::CompressedLz4) {
|
||||
Cow::Owned(unwrap_some_or!(
|
||||
fn_logger.try_return(|| decompress_lz4(file)),
|
||||
continue
|
||||
))
|
||||
} else if file.flags.contains(FatbinFileHeaderFlags::CompressedZstd) {
|
||||
Cow::Owned(unwrap_some_or!(
|
||||
fn_logger.try_return(|| decompress_zstd(file)),
|
||||
continue
|
||||
))
|
||||
} else {
|
||||
Cow::Borrowed(file.get_payload())
|
||||
};
|
||||
match file.kind {
|
||||
FatbinFileHeader::HEADER_KIND_PTX => {
|
||||
while payload.last() == Some(&0) {
|
||||
// remove trailing zeros
|
||||
payload.to_mut().pop();
|
||||
}
|
||||
state.record_new_submodule(module, &*payload, fn_logger, "ptx")
|
||||
}
|
||||
FatbinFileHeader::HEADER_KIND_ELF => {
|
||||
state.record_new_submodule(module, &*payload, fn_logger, "elf")
|
||||
}
|
||||
_ => {
|
||||
fn_logger.log(log::ErrorEntry::UnexpectedBinaryField {
|
||||
field_name: "FATBIN_FILE_HEADER_KIND",
|
||||
expected: vec![
|
||||
UInt::U16(FatbinFileHeader::HEADER_KIND_PTX),
|
||||
UInt::U16(FatbinFileHeader::HEADER_KIND_ELF),
|
||||
],
|
||||
observed: UInt::U16(file.kind),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
const MAX_MODULE_DECOMPRESSION_BOUND: usize = 64 * 1024 * 1024;
|
||||
|
||||
unsafe fn decompress_lz4(file: &FatbinFileHeader) -> Result<Vec<u8>, ErrorEntry> {
|
||||
let decompressed_size = usize::max(1024, (*file).uncompressed_payload as usize);
|
||||
let mut decompressed_vec = vec![0u8; decompressed_size];
|
||||
loop {
|
||||
match lz4_sys::LZ4_decompress_safe(
|
||||
file.get_payload().as_ptr() as *const _,
|
||||
decompressed_vec.as_mut_ptr() as *mut _,
|
||||
(*file).payload_size as _,
|
||||
decompressed_vec.len() as _,
|
||||
) {
|
||||
error if error < 0 => {
|
||||
let new_size = decompressed_vec.len() * 2;
|
||||
if new_size > MAX_MODULE_DECOMPRESSION_BOUND {
|
||||
return Err(ErrorEntry::Lz4DecompressionFailure);
|
||||
}
|
||||
decompressed_vec.resize(decompressed_vec.len() * 2, 0);
|
||||
}
|
||||
real_decompressed_size => {
|
||||
decompressed_vec.truncate(real_decompressed_size as usize);
|
||||
return Ok(decompressed_vec);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn decompress_zstd(file: &FatbinFileHeader) -> Result<Vec<u8>, ErrorEntry> {
|
||||
let mut result = Vec::with_capacity(file.uncompressed_payload as usize);
|
||||
let payload = file.get_payload();
|
||||
dbg!((payload.len(), file.uncompressed_payload, file.payload_size));
|
||||
match zstd_safe::decompress(&mut result, payload) {
|
||||
Ok(actual_size) => {
|
||||
result.truncate(actual_size);
|
||||
Ok(result)
|
||||
}
|
||||
Err(err) => Err(ErrorEntry::ZstdDecompressionFailure(err)),
|
||||
}
|
||||
}
|
||||
|
|
26
zluda_dump_blas/Cargo.toml
Normal file
26
zluda_dump_blas/Cargo.toml
Normal file
|
@ -0,0 +1,26 @@
|
|||
[package]
|
||||
name = "zluda_dump_blas"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
name = "zluda_dump_blas"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
format = { path = "../format" }
|
||||
dark_api = { path = "../dark_api" }
|
||||
cuda_base = { path = "../cuda_base" }
|
||||
cuda_types = { path = "../cuda_types" }
|
||||
zluda_dump_common = { path = "../zluda_dump_common" }
|
||||
libloading = "0.8"
|
||||
paste = "1.0"
|
||||
unwrap_or = "1.0.1"
|
||||
cglue = "0.3.5"
|
||||
|
||||
[package.metadata.zluda]
|
||||
linux_symlinks = [
|
||||
"dump/libcublas.so",
|
||||
"dump/libcublas.so.12"
|
||||
]
|
48
zluda_dump_blas/src/lib.rs
Normal file
48
zluda_dump_blas/src/lib.rs
Normal file
|
@ -0,0 +1,48 @@
|
|||
use libloading::Library;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
static LIBRARY: LazyLock<Option<Library>> = LazyLock::new(get_library);
|
||||
|
||||
fn get_library() -> Option<Library> {
|
||||
let cuda_lib = std::env::var("ZLUDA_BLAS_LIB")
|
||||
.ok()
|
||||
.unwrap_or_else(|| "/usr/local/cuda/lib64/libcublas.so".to_string());
|
||||
unsafe { Library::new(cuda_lib) }.ok()
|
||||
}
|
||||
|
||||
macro_rules! unimplemented {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||
$(
|
||||
#[cfg_attr(not(test), no_mangle)]
|
||||
#[allow(improper_ctypes)]
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||
use ::zluda_dump_common::ReprUsize;
|
||||
let internal_error_untyped: usize = <$ret_type as ReprUsize>::INTERNAL_ERROR;
|
||||
let internal_error: $ret_type = ReprUsize::from_usize(internal_error_untyped);
|
||||
let maybe_fn_ptr = (&*LIBRARY).as_ref().and_then(|lib| lib.get::<unsafe extern $abi fn ( $($arg_type),* ) -> $ret_type>(concat!( stringify!($fn_name), "\0").as_bytes()).ok());
|
||||
let fn_ptr = unwrap_or::unwrap_some_or!(maybe_fn_ptr, return internal_error);
|
||||
let export_table = unwrap_or::unwrap_some_or!(::zluda_dump_common::get_export_table(), return internal_error);
|
||||
let format_args = dark_api::FnFfiWrapper(|| {
|
||||
let mut writer = Vec::new();
|
||||
let formatter = paste::paste! { ::format:: [< write_ $fn_name>] };
|
||||
formatter(&mut writer $(, $arg_id)* ).ok();
|
||||
dark_api::ByteVecFfi::new(writer)
|
||||
});
|
||||
let underlying_fn = dark_api::FnFfiWrapper(|| {
|
||||
let result = fn_ptr( $( $arg_id),* );
|
||||
ReprUsize::to_usize(result)
|
||||
});
|
||||
ReprUsize::from_usize(export_table.logged_call(
|
||||
cglue::slice::CSliceRef::from_str(stringify!($fn_name)),
|
||||
cglue::trait_obj!(&format_args as dark_api::FnFfi),
|
||||
cglue::trait_obj!(&underlying_fn as dark_api::FnFfi),
|
||||
internal_error_untyped,
|
||||
<$ret_type as ReprUsize>::format_status)
|
||||
)
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
cuda_base::cublas_function_declarations!(unimplemented);
|
26
zluda_dump_blaslt/Cargo.toml
Normal file
26
zluda_dump_blaslt/Cargo.toml
Normal file
|
@ -0,0 +1,26 @@
|
|||
[package]
|
||||
name = "zluda_dump_blaslt"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
name = "zluda_dump_blaslt"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
format = { path = "../format" }
|
||||
dark_api = { path = "../dark_api" }
|
||||
cuda_base = { path = "../cuda_base" }
|
||||
cuda_types = { path = "../cuda_types" }
|
||||
zluda_dump_common = { path = "../zluda_dump_common" }
|
||||
libloading = "0.8"
|
||||
paste = "1.0"
|
||||
unwrap_or = "1.0.1"
|
||||
cglue = "0.3.5"
|
||||
|
||||
[package.metadata.zluda]
|
||||
linux_symlinks = [
|
||||
"dump/libcublasLt.so",
|
||||
"dump/libcublasLt.so.12"
|
||||
]
|
49
zluda_dump_blaslt/src/lib.rs
Normal file
49
zluda_dump_blaslt/src/lib.rs
Normal file
|
@ -0,0 +1,49 @@
|
|||
use libloading::Library;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
static LIBRARY: LazyLock<Option<Library>> = LazyLock::new(get_library);
|
||||
|
||||
fn get_library() -> Option<Library> {
|
||||
let cuda_lib = std::env::var("ZLUDA_BLASLT_LIB")
|
||||
.ok()
|
||||
.unwrap_or_else(|| "/usr/local/cuda/lib64/libcublasLt.so".to_string());
|
||||
unsafe { Library::new(cuda_lib) }.ok()
|
||||
}
|
||||
|
||||
macro_rules! unimplemented {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||
$(
|
||||
#[cfg_attr(not(test), no_mangle)]
|
||||
#[allow(improper_ctypes)]
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||
use ::zluda_dump_common::ReprUsize;
|
||||
let internal_error_untyped: usize = <$ret_type as ReprUsize>::INTERNAL_ERROR;
|
||||
let internal_error: $ret_type = ReprUsize::from_usize(internal_error_untyped);
|
||||
let maybe_fn_ptr = (&*LIBRARY).as_ref().and_then(|lib| lib.get::<unsafe extern $abi fn ( $($arg_type),* ) -> $ret_type>(concat!( stringify!($fn_name), "\0").as_bytes()).ok());
|
||||
let fn_ptr = unwrap_or::unwrap_some_or!(maybe_fn_ptr, return internal_error);
|
||||
let export_table = unwrap_or::unwrap_some_or!(::zluda_dump_common::get_export_table(), return internal_error);
|
||||
let format_args = dark_api::FnFfiWrapper(|| {
|
||||
let mut writer = Vec::new();
|
||||
let formatter = paste::paste! { ::format:: [< write_ $fn_name>] };
|
||||
formatter(&mut writer $(, $arg_id)* ).ok();
|
||||
dark_api::ByteVecFfi::new(writer)
|
||||
});
|
||||
let underlying_fn = dark_api::FnFfiWrapper(|| {
|
||||
let result = fn_ptr( $( $arg_id),* );
|
||||
ReprUsize::to_usize(result)
|
||||
});
|
||||
ReprUsize::from_usize(export_table.logged_call(
|
||||
cglue::slice::CSliceRef::from_str(stringify!($fn_name)),
|
||||
cglue::trait_obj!(&format_args as dark_api::FnFfi),
|
||||
cglue::trait_obj!(&underlying_fn as dark_api::FnFfi),
|
||||
internal_error_untyped,
|
||||
<$ret_type as ReprUsize>::format_status)
|
||||
)
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
cuda_base::cublaslt_function_declarations!(unimplemented);
|
||||
cuda_base::cublaslt_internal_function_declarations!(unimplemented);
|
13
zluda_dump_common/Cargo.toml
Normal file
13
zluda_dump_common/Cargo.toml
Normal file
|
@ -0,0 +1,13 @@
|
|||
[package]
|
||||
name = "zluda_dump_common"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
|
||||
[dependencies]
|
||||
libloading = "0.8"
|
||||
cuda_types = { path = "../cuda_types" }
|
||||
dark_api = { path = "../dark_api" }
|
||||
format = { path = "../format" }
|
||||
cglue = "0.3.5"
|
378
zluda_dump_common/src/lib.rs
Normal file
378
zluda_dump_common/src/lib.rs
Normal file
|
@ -0,0 +1,378 @@
|
|||
use cuda_types::cuda::{CUerror, CUresult, CUresultConsts, CUuuid};
|
||||
use dark_api::ByteVecFfi;
|
||||
use std::{num::NonZero, ptr, sync::LazyLock};
|
||||
|
||||
pub fn get_export_table() -> Option<::dark_api::zluda_dump::ZludaDumpInternal> {
|
||||
static CU_GET_EXPORT_TABLE: LazyLock<
|
||||
Result<
|
||||
unsafe extern "system" fn(*mut *const ::core::ffi::c_void, *const CUuuid) -> CUresult,
|
||||
libloading::Error,
|
||||
>,
|
||||
> = LazyLock::new(|| unsafe { get_dump_table_impl() });
|
||||
let cu_get_export_table = CU_GET_EXPORT_TABLE.as_ref().ok()?;
|
||||
let mut ptr = ptr::null();
|
||||
unsafe { (cu_get_export_table)(&mut ptr, &::dark_api::zluda_dump::ZludaDumpInternal::GUID) }
|
||||
.ok()?;
|
||||
Some(unsafe { ::dark_api::zluda_dump::ZludaDumpInternal::new(ptr) })
|
||||
}
|
||||
|
||||
unsafe fn get_dump_table_impl() -> Result<
|
||||
unsafe extern "system" fn(*mut *const ::core::ffi::c_void, *const CUuuid) -> CUresult,
|
||||
libloading::Error,
|
||||
> {
|
||||
let driver = open_driver()?;
|
||||
return Ok(
|
||||
*(driver.get::<unsafe extern "system" fn(
|
||||
*mut *const ::core::ffi::c_void,
|
||||
*const CUuuid,
|
||||
) -> CUresult>(b"cuGetExportTable\0")?),
|
||||
);
|
||||
}
|
||||
|
||||
fn open_driver() -> Result<libloading::Library, libloading::Error> {
|
||||
os::open_driver()
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
pub(crate) mod os {
|
||||
use libloading::os;
|
||||
|
||||
const RTLD_NOLOAD: i32 = 0x4;
|
||||
|
||||
pub fn open_driver() -> Result<libloading::Library, libloading::Error> {
|
||||
unsafe {
|
||||
os::unix::Library::open(Some("libcuda.so.1"), RTLD_NOLOAD | os::unix::RTLD_LAZY)
|
||||
.or_else(|_| {
|
||||
os::unix::Library::open(Some("libcuda.so"), RTLD_NOLOAD | os::unix::RTLD_LAZY)
|
||||
})
|
||||
.map(Into::into)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
pub(crate) mod os {
|
||||
use libloading::os;
|
||||
|
||||
pub fn open_driver() -> Result<libloading::Library, libloading::Error> {
|
||||
unsafe { os::windows::Library::open_already_loaded("nvcuda").map(Into::into) }
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ReprUsize {
|
||||
const INTERNAL_ERROR: usize = usize::MAX;
|
||||
fn to_usize(self) -> usize;
|
||||
fn from_usize(x: usize) -> Self;
|
||||
extern "C" fn format_status(x: usize) -> ByteVecFfi;
|
||||
}
|
||||
|
||||
impl ReprUsize for CUresult {
|
||||
const INTERNAL_ERROR: usize = CUerror::UNKNOWN.0.get() as usize;
|
||||
|
||||
fn to_usize(self) -> usize {
|
||||
match self {
|
||||
CUresult::SUCCESS => 0,
|
||||
Err(err) => err.0.get() as usize,
|
||||
}
|
||||
}
|
||||
|
||||
fn from_usize(x: usize) -> Self {
|
||||
match NonZero::new(x as u32) {
|
||||
None => Ok(()),
|
||||
Some(err) => Err(CUerror(err)),
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" fn format_status(x: usize) -> ByteVecFfi {
|
||||
let mut writer = Vec::new();
|
||||
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
|
||||
ByteVecFfi::new(writer)
|
||||
}
|
||||
}
|
||||
|
||||
impl ReprUsize for usize {
|
||||
fn to_usize(self) -> usize {
|
||||
self
|
||||
}
|
||||
|
||||
fn from_usize(x: usize) -> usize {
|
||||
x
|
||||
}
|
||||
|
||||
extern "C" fn format_status(x: usize) -> ByteVecFfi {
|
||||
let mut writer = Vec::new();
|
||||
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
|
||||
ByteVecFfi::new(writer)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> ReprUsize for *const T {
|
||||
fn to_usize(self) -> usize {
|
||||
self as usize
|
||||
}
|
||||
|
||||
fn from_usize(x: usize) -> Self {
|
||||
x as Self
|
||||
}
|
||||
|
||||
extern "C" fn format_status(x: usize) -> ByteVecFfi {
|
||||
let mut writer = Vec::new();
|
||||
format::CudaDisplay::write(&x, "", 0, &mut writer).ok();
|
||||
ByteVecFfi::new(writer)
|
||||
}
|
||||
}
|
||||
|
||||
impl ReprUsize for cuda_types::cublas::cublasStatus_t {
|
||||
fn to_usize(self) -> usize {
|
||||
self.0 as usize
|
||||
}
|
||||
|
||||
fn from_usize(x: usize) -> Self {
|
||||
Self(x as u32)
|
||||
}
|
||||
|
||||
const INTERNAL_ERROR: usize =
|
||||
cuda_types::cublas::cublasStatus_t::CUBLAS_STATUS_INTERNAL_ERROR.0 as usize;
|
||||
|
||||
extern "C" fn format_status(x: usize) -> ByteVecFfi {
|
||||
let mut writer = Vec::new();
|
||||
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
|
||||
ByteVecFfi::new(writer)
|
||||
}
|
||||
}
|
||||
|
||||
impl ReprUsize for cuda_types::cudnn9::cudnnStatus_t {
|
||||
fn to_usize(self) -> usize {
|
||||
self.0 as usize
|
||||
}
|
||||
|
||||
fn from_usize(x: usize) -> Self {
|
||||
Self(x as u32)
|
||||
}
|
||||
|
||||
const INTERNAL_ERROR: usize =
|
||||
cuda_types::cublas::cublasStatus_t::CUBLAS_STATUS_INTERNAL_ERROR.0 as usize;
|
||||
|
||||
extern "C" fn format_status(x: usize) -> ByteVecFfi {
|
||||
let mut writer = Vec::new();
|
||||
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
|
||||
ByteVecFfi::new(writer)
|
||||
}
|
||||
}
|
||||
|
||||
impl ReprUsize for () {
|
||||
fn to_usize(self) -> usize {
|
||||
0
|
||||
}
|
||||
|
||||
fn from_usize(_x: usize) -> Self {
|
||||
()
|
||||
}
|
||||
|
||||
extern "C" fn format_status(x: usize) -> ByteVecFfi {
|
||||
let mut writer = Vec::new();
|
||||
format::CudaDisplay::write(
|
||||
&cuda_types::cublas::cublasStatus_t::from_usize(x),
|
||||
"",
|
||||
0,
|
||||
&mut writer,
|
||||
)
|
||||
.ok();
|
||||
ByteVecFfi::new(writer)
|
||||
}
|
||||
}
|
||||
|
||||
impl ReprUsize for u32 {
|
||||
fn to_usize(self) -> usize {
|
||||
self as usize
|
||||
}
|
||||
|
||||
fn from_usize(x: usize) -> Self {
|
||||
x as Self
|
||||
}
|
||||
|
||||
extern "C" fn format_status(x: usize) -> ByteVecFfi {
|
||||
let mut writer = Vec::new();
|
||||
format::CudaDisplay::write(
|
||||
&cuda_types::cublas::cublasStatus_t::from_usize(x),
|
||||
"",
|
||||
0,
|
||||
&mut writer,
|
||||
)
|
||||
.ok();
|
||||
ByteVecFfi::new(writer)
|
||||
}
|
||||
}
|
||||
|
||||
impl ReprUsize for i32 {
|
||||
fn to_usize(self) -> usize {
|
||||
self as usize
|
||||
}
|
||||
|
||||
fn from_usize(x: usize) -> Self {
|
||||
x as Self
|
||||
}
|
||||
|
||||
extern "C" fn format_status(x: usize) -> ByteVecFfi {
|
||||
let mut writer = Vec::new();
|
||||
format::CudaDisplay::write(
|
||||
&cuda_types::cublas::cublasStatus_t::from_usize(x),
|
||||
"",
|
||||
0,
|
||||
&mut writer,
|
||||
)
|
||||
.ok();
|
||||
ByteVecFfi::new(writer)
|
||||
}
|
||||
}
|
||||
|
||||
impl ReprUsize for u64 {
|
||||
fn to_usize(self) -> usize {
|
||||
self as usize
|
||||
}
|
||||
|
||||
fn from_usize(x: usize) -> Self {
|
||||
x as Self
|
||||
}
|
||||
|
||||
extern "C" fn format_status(x: usize) -> ByteVecFfi {
|
||||
let mut writer = Vec::new();
|
||||
format::CudaDisplay::write(
|
||||
&cuda_types::cublas::cublasStatus_t::from_usize(x),
|
||||
"",
|
||||
0,
|
||||
&mut writer,
|
||||
)
|
||||
.ok();
|
||||
ByteVecFfi::new(writer)
|
||||
}
|
||||
}
|
||||
|
||||
impl ReprUsize for *mut std::ffi::c_void {
|
||||
fn to_usize(self) -> usize {
|
||||
self as usize
|
||||
}
|
||||
|
||||
fn from_usize(x: usize) -> Self {
|
||||
x as Self
|
||||
}
|
||||
|
||||
extern "C" fn format_status(x: usize) -> ByteVecFfi {
|
||||
let mut writer = Vec::new();
|
||||
format::CudaDisplay::write(
|
||||
&cuda_types::cublas::cublasStatus_t::from_usize(x),
|
||||
"",
|
||||
0,
|
||||
&mut writer,
|
||||
)
|
||||
.ok();
|
||||
ByteVecFfi::new(writer)
|
||||
}
|
||||
}
|
||||
|
||||
impl ReprUsize for cuda_types::cufft::cufftResult_t {
|
||||
fn to_usize(self) -> usize {
|
||||
self.0 as usize
|
||||
}
|
||||
|
||||
fn from_usize(x: usize) -> Self {
|
||||
Self(x as u32)
|
||||
}
|
||||
|
||||
extern "C" fn format_status(x: usize) -> ByteVecFfi {
|
||||
let mut writer = Vec::new();
|
||||
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
|
||||
ByteVecFfi::new(writer)
|
||||
}
|
||||
}
|
||||
|
||||
impl ReprUsize for cuda_types::cusparse::cusparseStatus_t {
|
||||
fn to_usize(self) -> usize {
|
||||
self.0 as usize
|
||||
}
|
||||
|
||||
fn from_usize(x: usize) -> Self {
|
||||
Self(x as u32)
|
||||
}
|
||||
|
||||
const INTERNAL_ERROR: usize =
|
||||
cuda_types::cusparse::cusparseStatus_t::CUSPARSE_STATUS_INTERNAL_ERROR.0 as usize;
|
||||
|
||||
extern "C" fn format_status(x: usize) -> ByteVecFfi {
|
||||
let mut writer = Vec::new();
|
||||
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
|
||||
ByteVecFfi::new(writer)
|
||||
}
|
||||
}
|
||||
|
||||
impl ReprUsize for cuda_types::cusparse::cusparseFillMode_t {
|
||||
fn to_usize(self) -> usize {
|
||||
self.0 as usize
|
||||
}
|
||||
|
||||
fn from_usize(x: usize) -> Self {
|
||||
Self(x as u32)
|
||||
}
|
||||
|
||||
const INTERNAL_ERROR: usize = 0;
|
||||
|
||||
extern "C" fn format_status(x: usize) -> ByteVecFfi {
|
||||
let mut writer = Vec::new();
|
||||
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
|
||||
ByteVecFfi::new(writer)
|
||||
}
|
||||
}
|
||||
|
||||
impl ReprUsize for cuda_types::cusparse::cusparseIndexBase_t {
|
||||
fn to_usize(self) -> usize {
|
||||
self.0 as usize
|
||||
}
|
||||
|
||||
fn from_usize(x: usize) -> Self {
|
||||
Self(x as u32)
|
||||
}
|
||||
|
||||
const INTERNAL_ERROR: usize = 0;
|
||||
|
||||
extern "C" fn format_status(x: usize) -> ByteVecFfi {
|
||||
let mut writer = Vec::new();
|
||||
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
|
||||
ByteVecFfi::new(writer)
|
||||
}
|
||||
}
|
||||
|
||||
impl ReprUsize for cuda_types::cusparse::cusparseDiagType_t {
|
||||
fn to_usize(self) -> usize {
|
||||
self.0 as usize
|
||||
}
|
||||
|
||||
fn from_usize(x: usize) -> Self {
|
||||
Self(x as u32)
|
||||
}
|
||||
|
||||
const INTERNAL_ERROR: usize = 0;
|
||||
|
||||
extern "C" fn format_status(x: usize) -> ByteVecFfi {
|
||||
let mut writer = Vec::new();
|
||||
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
|
||||
ByteVecFfi::new(writer)
|
||||
}
|
||||
}
|
||||
|
||||
impl ReprUsize for cuda_types::cusparse::cusparseMatrixType_t {
|
||||
fn to_usize(self) -> usize {
|
||||
self.0 as usize
|
||||
}
|
||||
|
||||
fn from_usize(x: usize) -> Self {
|
||||
Self(x as u32)
|
||||
}
|
||||
|
||||
const INTERNAL_ERROR: usize = 0;
|
||||
|
||||
extern "C" fn format_status(x: usize) -> ByteVecFfi {
|
||||
let mut writer = Vec::new();
|
||||
format::CudaDisplay::write(&Self::from_usize(x), "", 0, &mut writer).ok();
|
||||
ByteVecFfi::new(writer)
|
||||
}
|
||||
}
|
26
zluda_dump_dnn/Cargo.toml
Normal file
26
zluda_dump_dnn/Cargo.toml
Normal file
|
@ -0,0 +1,26 @@
|
|||
[package]
|
||||
name = "zluda_dump_dnn"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
name = "zluda_dump_dnn"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
format = { path = "../format" }
|
||||
dark_api = { path = "../dark_api" }
|
||||
cuda_base = { path = "../cuda_base" }
|
||||
cuda_types = { path = "../cuda_types" }
|
||||
zluda_dump_common = { path = "../zluda_dump_common" }
|
||||
libloading = "0.8"
|
||||
paste = "1.0"
|
||||
unwrap_or = "1.0.1"
|
||||
cglue = "0.3.5"
|
||||
|
||||
[package.metadata.zluda]
|
||||
linux_symlinks = [
|
||||
"dump/libcudnn.so",
|
||||
"dump/libcudnn.so.9"
|
||||
]
|
48
zluda_dump_dnn/src/lib.rs
Normal file
48
zluda_dump_dnn/src/lib.rs
Normal file
|
@ -0,0 +1,48 @@
|
|||
use libloading::Library;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
static LIBRARY: LazyLock<Option<Library>> = LazyLock::new(get_library);
|
||||
|
||||
fn get_library() -> Option<Library> {
|
||||
let cuda_lib = std::env::var("ZLUDA_DNN_LIB")
|
||||
.ok()
|
||||
.unwrap_or_else(|| "/usr/lib/x86_64-linux-gnu/libcudnn.so.9".to_string());
|
||||
unsafe { Library::new(cuda_lib) }.ok()
|
||||
}
|
||||
|
||||
macro_rules! unimplemented {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||
$(
|
||||
#[cfg_attr(not(test), no_mangle)]
|
||||
#[allow(improper_ctypes)]
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||
use ::zluda_dump_common::ReprUsize;
|
||||
let internal_error_untyped: usize = <$ret_type as ReprUsize>::INTERNAL_ERROR;
|
||||
let internal_error: $ret_type = ReprUsize::from_usize(internal_error_untyped);
|
||||
let maybe_fn_ptr = (&*LIBRARY).as_ref().and_then(|lib| lib.get::<unsafe extern $abi fn ( $($arg_type),* ) -> $ret_type>(concat!( stringify!($fn_name), "\0").as_bytes()).ok());
|
||||
let fn_ptr = unwrap_or::unwrap_some_or!(maybe_fn_ptr, return internal_error);
|
||||
let export_table = unwrap_or::unwrap_some_or!(::zluda_dump_common::get_export_table(), return internal_error);
|
||||
let format_args = dark_api::FnFfiWrapper(|| {
|
||||
let mut writer = Vec::new();
|
||||
let formatter = paste::paste! { ::format:: [< write_ $fn_name>] };
|
||||
formatter(&mut writer $(, $arg_id)* ).ok();
|
||||
dark_api::ByteVecFfi::new(writer)
|
||||
});
|
||||
let underlying_fn = dark_api::FnFfiWrapper(|| {
|
||||
let result = fn_ptr( $( $arg_id),* );
|
||||
ReprUsize::to_usize(result)
|
||||
});
|
||||
ReprUsize::from_usize(export_table.logged_call(
|
||||
cglue::slice::CSliceRef::from_str(stringify!($fn_name)),
|
||||
cglue::trait_obj!(&format_args as dark_api::FnFfi),
|
||||
cglue::trait_obj!(&underlying_fn as dark_api::FnFfi),
|
||||
internal_error_untyped,
|
||||
<$ret_type as ReprUsize>::format_status)
|
||||
)
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
cuda_base::cudnn9_function_declarations!(unimplemented);
|
26
zluda_dump_fft/Cargo.toml
Normal file
26
zluda_dump_fft/Cargo.toml
Normal file
|
@ -0,0 +1,26 @@
|
|||
[package]
|
||||
name = "zluda_dump_fft"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
name = "zluda_dump_fft"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
format = { path = "../format" }
|
||||
dark_api = { path = "../dark_api" }
|
||||
cuda_base = { path = "../cuda_base" }
|
||||
cuda_types = { path = "../cuda_types" }
|
||||
zluda_dump_common = { path = "../zluda_dump_common" }
|
||||
libloading = "0.8"
|
||||
paste = "1.0"
|
||||
unwrap_or = "1.0.1"
|
||||
cglue = "0.3.5"
|
||||
|
||||
[package.metadata.zluda]
|
||||
linux_symlinks = [
|
||||
"dump/libcufft.so",
|
||||
"dump/libcufft.so.11"
|
||||
]
|
48
zluda_dump_fft/src/lib.rs
Normal file
48
zluda_dump_fft/src/lib.rs
Normal file
|
@ -0,0 +1,48 @@
|
|||
use libloading::Library;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
static LIBRARY: LazyLock<Option<Library>> = LazyLock::new(get_library);
|
||||
|
||||
fn get_library() -> Option<Library> {
|
||||
let cuda_lib = std::env::var("ZLUDA_FFT_LIB")
|
||||
.ok()
|
||||
.unwrap_or_else(|| "/usr/local/cuda/lib64/libcufft.so".to_string());
|
||||
unsafe { Library::new(cuda_lib) }.ok()
|
||||
}
|
||||
|
||||
macro_rules! unimplemented {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||
$(
|
||||
#[cfg_attr(not(test), no_mangle)]
|
||||
#[allow(improper_ctypes)]
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||
use ::zluda_dump_common::ReprUsize;
|
||||
let internal_error_untyped: usize = <$ret_type as ReprUsize>::INTERNAL_ERROR;
|
||||
let internal_error: $ret_type = ReprUsize::from_usize(internal_error_untyped);
|
||||
let maybe_fn_ptr = (&*LIBRARY).as_ref().and_then(|lib| lib.get::<unsafe extern $abi fn ( $($arg_type),* ) -> $ret_type>(concat!( stringify!($fn_name), "\0").as_bytes()).ok());
|
||||
let fn_ptr = unwrap_or::unwrap_some_or!(maybe_fn_ptr, return internal_error);
|
||||
let export_table = unwrap_or::unwrap_some_or!(::zluda_dump_common::get_export_table(), return internal_error);
|
||||
let format_args = dark_api::FnFfiWrapper(|| {
|
||||
let mut writer = Vec::new();
|
||||
let formatter = paste::paste! { ::format:: [< write_ $fn_name>] };
|
||||
formatter(&mut writer $(, $arg_id)* ).ok();
|
||||
dark_api::ByteVecFfi::new(writer)
|
||||
});
|
||||
let underlying_fn = dark_api::FnFfiWrapper(|| {
|
||||
let result = fn_ptr( $( $arg_id),* );
|
||||
ReprUsize::to_usize(result)
|
||||
});
|
||||
ReprUsize::from_usize(export_table.logged_call(
|
||||
cglue::slice::CSliceRef::from_str(stringify!($fn_name)),
|
||||
cglue::trait_obj!(&format_args as dark_api::FnFfi),
|
||||
cglue::trait_obj!(&underlying_fn as dark_api::FnFfi),
|
||||
internal_error_untyped,
|
||||
<$ret_type as ReprUsize>::format_status)
|
||||
)
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
cuda_base::cufft_function_declarations!(unimplemented);
|
26
zluda_dump_sparse/Cargo.toml
Normal file
26
zluda_dump_sparse/Cargo.toml
Normal file
|
@ -0,0 +1,26 @@
|
|||
[package]
|
||||
name = "zluda_dump_sparse"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
name = "zluda_dump_sparse"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
format = { path = "../format" }
|
||||
dark_api = { path = "../dark_api" }
|
||||
cuda_base = { path = "../cuda_base" }
|
||||
cuda_types = { path = "../cuda_types" }
|
||||
zluda_dump_common = { path = "../zluda_dump_common" }
|
||||
libloading = "0.8"
|
||||
paste = "1.0"
|
||||
unwrap_or = "1.0.1"
|
||||
cglue = "0.3.5"
|
||||
|
||||
[package.metadata.zluda]
|
||||
linux_symlinks = [
|
||||
"dump/libcusparse.so",
|
||||
"dump/libcusparse.so.12"
|
||||
]
|
48
zluda_dump_sparse/src/lib.rs
Normal file
48
zluda_dump_sparse/src/lib.rs
Normal file
|
@ -0,0 +1,48 @@
|
|||
use libloading::Library;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
static LIBRARY: LazyLock<Option<Library>> = LazyLock::new(get_library);
|
||||
|
||||
fn get_library() -> Option<Library> {
|
||||
let cuda_lib = std::env::var("ZLUDA_SPARSE_LIB")
|
||||
.ok()
|
||||
.unwrap_or_else(|| "/usr/local/cuda/lib64/libcusparse.so".to_string());
|
||||
unsafe { Library::new(cuda_lib) }.ok()
|
||||
}
|
||||
|
||||
macro_rules! unimplemented {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||
$(
|
||||
#[cfg_attr(not(test), no_mangle)]
|
||||
#[allow(improper_ctypes)]
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||
use ::zluda_dump_common::ReprUsize;
|
||||
let internal_error_untyped: usize = <$ret_type as ReprUsize>::INTERNAL_ERROR;
|
||||
let internal_error: $ret_type = ReprUsize::from_usize(internal_error_untyped);
|
||||
let maybe_fn_ptr = (&*LIBRARY).as_ref().and_then(|lib| lib.get::<unsafe extern $abi fn ( $($arg_type),* ) -> $ret_type>(concat!( stringify!($fn_name), "\0").as_bytes()).ok());
|
||||
let fn_ptr = unwrap_or::unwrap_some_or!(maybe_fn_ptr, return internal_error);
|
||||
let export_table = unwrap_or::unwrap_some_or!(::zluda_dump_common::get_export_table(), return internal_error);
|
||||
let format_args = dark_api::FnFfiWrapper(|| {
|
||||
let mut writer = Vec::new();
|
||||
let formatter = paste::paste! { ::format:: [< write_ $fn_name>] };
|
||||
formatter(&mut writer $(, $arg_id)* ).ok();
|
||||
dark_api::ByteVecFfi::new(writer)
|
||||
});
|
||||
let underlying_fn = dark_api::FnFfiWrapper(|| {
|
||||
let result = fn_ptr( $( $arg_id),* );
|
||||
ReprUsize::to_usize(result)
|
||||
});
|
||||
ReprUsize::from_usize(export_table.logged_call(
|
||||
cglue::slice::CSliceRef::from_str(stringify!($fn_name)),
|
||||
cglue::trait_obj!(&format_args as dark_api::FnFfi),
|
||||
cglue::trait_obj!(&underlying_fn as dark_api::FnFfi),
|
||||
internal_error_untyped,
|
||||
<$ret_type as ReprUsize>::format_status)
|
||||
)
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
cuda_base::cusparse_function_declarations!(unimplemented);
|
|
@ -2,7 +2,7 @@
|
|||
name = "zluda_inject"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
|
||||
[[bin]]
|
||||
name = "zluda_with"
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
name = "zluda_redirect"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib"]
|
||||
|
|
|
@ -1,8 +1,5 @@
|
|||
mod r#impl;
|
||||
|
||||
pub enum FILE { }
|
||||
|
||||
|
||||
macro_rules! unimplemented {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||
$(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue