mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-19 16:04:44 +00:00
Fix host code and update to CUDA 12.4 (#299)
This commit is contained in:
parent
870fed4bb6
commit
7a6df9dcbf
71 changed files with 68561 additions and 56362 deletions
|
@ -8,16 +8,28 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninter
|
|||
python3 \
|
||||
ripgrep \
|
||||
git \
|
||||
ltrace
|
||||
ltrace \
|
||||
# required by llvm 17
|
||||
lsb-release software-properties-common gnupg
|
||||
|
||||
ARG LLVM_VERSION=17
|
||||
RUN wget https://apt.llvm.org/llvm.sh && \
|
||||
chmod +x llvm.sh && \
|
||||
./llvm.sh ${LLVM_VERSION}
|
||||
|
||||
# Feel free to change to a newer version if you have a newer verison on your host
|
||||
ARG CUDA_VERSION=12-4
|
||||
ARG CUDA_PKG_VERSION=12-4
|
||||
# Docker <-> host driver version compatiblity is newer host <-> older docker
|
||||
# We don't care about a specific driver version, so pick oldest 5XX
|
||||
ARG CUDA_DRIVER=515
|
||||
RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||
RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||
# CUDA headers need it for interop
|
||||
libgl-dev libegl-dev libvdpau-dev \
|
||||
nvidia-utils-${CUDA_DRIVER} \
|
||||
cuda-cudart-${CUDA_VERSION}
|
||||
cuda-cudart-dev-${CUDA_PKG_VERSION} \
|
||||
cuda-cudart-${CUDA_PKG_VERSION} \
|
||||
cuda-profiler-api-${CUDA_PKG_VERSION} \
|
||||
cuda-nvcc-${CUDA_PKG_VERSION}
|
||||
|
||||
ARG ROCM_VERSION=6.2.2
|
||||
RUN mkdir --parents --mode=0755 /etc/apt/keyrings && \
|
||||
|
@ -29,9 +41,11 @@ RUN mkdir --parents --mode=0755 /etc/apt/keyrings && \
|
|||
rocminfo \
|
||||
rocm-gdb \
|
||||
rocm-smi-lib \
|
||||
hip-runtime-amd && \
|
||||
rocm-llvm-dev \
|
||||
hip-runtime-amd && \
|
||||
hip-dev && \
|
||||
echo '/opt/rocm/lib' > /etc/ld.so.conf.d/rocm.conf && \
|
||||
ldconfig
|
||||
|
||||
ENV PATH=$PATH:/opt/rocm-6.2.2/bin
|
||||
ENV PATH=$PATH:/opt/rocm-${ROCM_VERSION}/bin
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
//"hostRequirements": { "gpu": "optional" }
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [ "mhutchie.git-graph" ],
|
||||
"extensions": [ "mhutchie.git-graph" ]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
7
.gitmodules
vendored
7
.gitmodules
vendored
|
@ -1,10 +1,3 @@
|
|||
[submodule "ext/spirv-tools"]
|
||||
path = ext/spirv-tools
|
||||
url = https://github.com/KhronosGroup/SPIRV-Tools
|
||||
branch = master
|
||||
[submodule "ext/spirv-headers"]
|
||||
path = ext/spirv-headers
|
||||
url = https://github.com/KhronosGroup/SPIRV-Headers
|
||||
[submodule "ext/llvm-project"]
|
||||
path = ext/llvm-project
|
||||
url = https://github.com/llvm/llvm-project
|
||||
|
|
51
Cargo.toml
51
Cargo.toml
|
@ -1,27 +1,24 @@
|
|||
[workspace]
|
||||
|
||||
resolver = "2"
|
||||
|
||||
members = [
|
||||
"ext/hip_runtime-sys",
|
||||
"ext/amd_comgr-sys",
|
||||
"comgr",
|
||||
"cuda_base",
|
||||
"cuda_types",
|
||||
"detours-sys",
|
||||
"level_zero-sys",
|
||||
"level_zero",
|
||||
"spirv_tools-sys",
|
||||
"zluda",
|
||||
"zluda_dump",
|
||||
"zluda_lib",
|
||||
"zluda_inject",
|
||||
"zluda_redirect",
|
||||
"zluda_ml",
|
||||
"ptx",
|
||||
"ptx_parser",
|
||||
"ptx_parser_macros",
|
||||
"ptx_parser_macros_impl",
|
||||
]
|
||||
|
||||
default-members = ["zluda_lib", "zluda_ml", "zluda_inject", "zluda_redirect"]
|
||||
[workspace]
|
||||
|
||||
resolver = "2"
|
||||
|
||||
members = [
|
||||
"ext/hip_runtime-sys",
|
||||
"ext/amd_comgr-sys",
|
||||
"comgr",
|
||||
"cuda_base",
|
||||
"cuda_types",
|
||||
"detours-sys",
|
||||
"zluda",
|
||||
"zluda_dump",
|
||||
"zluda_inject",
|
||||
"zluda_redirect",
|
||||
"zluda_ml",
|
||||
"ptx",
|
||||
"ptx_parser",
|
||||
"ptx_parser_macros",
|
||||
"ptx_parser_macros_impl",
|
||||
"zluda_bindgen",
|
||||
]
|
||||
|
||||
default-members = ["zluda", "zluda_ml", "zluda_inject", "zluda_redirect"]
|
||||
|
|
|
@ -2,11 +2,11 @@
|
|||
name = "cuda_base"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
quote = "1.0"
|
||||
syn = { version = "1.0", features = ["full", "visit-mut"] }
|
||||
syn = { version = "2.0", features = ["full", "visit-mut"] }
|
||||
proc-macro2 = "1.0"
|
||||
rustc-hash = "1.1.0"
|
||||
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
bindgen build/wrapper.h -o src/cuda.rs --no-partialeq "CUDA_HOST_NODE_PARAMS_st" --with-derive-eq --whitelist-function="^cu.*" --whitelist-var="^CU.*" --size_t-is-usize --default-enum-style=newtype --no-layout-tests --no-doc-comments --no-derive-debug --new-type-alias "^CUdevice_v\d+$|^CUdeviceptr_v\d+$" --must-use-type "cudaError_enum" -- -I/usr/local/cuda/include
|
|
@ -1,3 +1,7 @@
|
|||
#define __CUDA_API_VERSION_INTERNAL
|
||||
#include <cuda.h>
|
||||
#include <cudaGL.h>
|
||||
#include <cudaProfiler.h>
|
||||
#include <cudaGL.h>
|
||||
#include <cudaEGL.h>
|
||||
#include <vdpau/vdpau.h>
|
||||
#include <cudaVDPAU.h>
|
||||
|
|
26686
cuda_base/src/cuda.rs
26686
cuda_base/src/cuda.rs
File diff suppressed because it is too large
Load diff
|
@ -1,110 +1,25 @@
|
|||
extern crate proc_macro;
|
||||
|
||||
use std::collections::hash_map;
|
||||
use std::iter;
|
||||
|
||||
use proc_macro::TokenStream;
|
||||
use proc_macro2::Span;
|
||||
use quote::{format_ident, quote, ToTokens};
|
||||
use rustc_hash::{FxHashMap, FxHashSet};
|
||||
use quote::{quote, ToTokens};
|
||||
use rustc_hash::FxHashMap;
|
||||
use std::iter;
|
||||
use syn::parse::{Parse, ParseStream};
|
||||
use syn::punctuated::Punctuated;
|
||||
use syn::visit_mut::VisitMut;
|
||||
use syn::{
|
||||
bracketed, parse_macro_input, Abi, Fields, File, FnArg, ForeignItem, ForeignItemFn, Ident,
|
||||
Item, ItemForeignMod, LitStr, PatType, Path, PathArguments, PathSegment, ReturnType, Signature,
|
||||
Token, Type, TypeArray, TypePath, TypePtr,
|
||||
bracketed, parse_macro_input, File, ForeignItem, ForeignItemFn, Ident, Item, Path, Signature,
|
||||
Token,
|
||||
};
|
||||
|
||||
const CUDA_RS: &'static str = include_str! {"cuda.rs"};
|
||||
|
||||
// This macro copies cuda.rs as-is with some changes:
|
||||
// * All function declarations are filtered out
|
||||
// * CUdeviceptr_v2 is redefined from `unsigned long long` to `*void`
|
||||
// * `extern "C"` gets replaced by `extern "system"`
|
||||
// * CUuuid_st is redefined to use uchar instead of char
|
||||
#[proc_macro]
|
||||
pub fn cuda_type_declarations(_: TokenStream) -> TokenStream {
|
||||
let mut cuda_module = syn::parse_str::<File>(CUDA_RS).unwrap();
|
||||
cuda_module.items = cuda_module
|
||||
.items
|
||||
.into_iter()
|
||||
.filter_map(|item| match item {
|
||||
Item::ForeignMod(_) => None,
|
||||
Item::Struct(mut struct_) => {
|
||||
if "CUdeviceptr_v2" == struct_.ident.to_string() {
|
||||
match &mut struct_.fields {
|
||||
Fields::Unnamed(ref mut fields) => {
|
||||
fields.unnamed[0].ty =
|
||||
absolute_path_to_mut_ptr(&["std", "os", "raw", "c_void"])
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
} else if "CUuuid_st" == struct_.ident.to_string() {
|
||||
match &mut struct_.fields {
|
||||
Fields::Named(ref mut fields) => match fields.named[0].ty {
|
||||
Type::Array(TypeArray { ref mut elem, .. }) => {
|
||||
*elem = Box::new(Type::Path(TypePath {
|
||||
qself: None,
|
||||
path: segments_to_path(&["std", "os", "raw", "c_uchar"]),
|
||||
}))
|
||||
}
|
||||
_ => unreachable!(),
|
||||
},
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
Some(Item::Struct(struct_))
|
||||
}
|
||||
i => Some(i),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
syn::visit_mut::visit_file_mut(&mut FixAbi, &mut cuda_module);
|
||||
cuda_module.into_token_stream().into()
|
||||
}
|
||||
|
||||
fn segments_to_path(path: &[&'static str]) -> Path {
|
||||
let mut segments = Punctuated::new();
|
||||
for ident in path {
|
||||
let ident = PathSegment {
|
||||
ident: Ident::new(ident, Span::call_site()),
|
||||
arguments: PathArguments::None,
|
||||
};
|
||||
segments.push(ident);
|
||||
}
|
||||
Path {
|
||||
leading_colon: Some(Token)),
|
||||
segments,
|
||||
}
|
||||
}
|
||||
|
||||
fn absolute_path_to_mut_ptr(path: &[&'static str]) -> Type {
|
||||
Type::Ptr(TypePtr {
|
||||
star_token: Token),
|
||||
const_token: None,
|
||||
mutability: Some(Token)),
|
||||
elem: Box::new(Type::Path(TypePath {
|
||||
qself: None,
|
||||
path: segments_to_path(path),
|
||||
})),
|
||||
})
|
||||
}
|
||||
|
||||
struct FixAbi;
|
||||
|
||||
impl VisitMut for FixAbi {
|
||||
fn visit_abi_mut(&mut self, i: &mut Abi) {
|
||||
if let Some(ref mut name) = i.name {
|
||||
*name = LitStr::new("system", Span::call_site());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This macro accepts following arguments:
|
||||
// * `type_path`: path to the module with type definitions (in the module tree)
|
||||
// * `normal_macro`: ident for a normal macro
|
||||
// * `override_macro`: ident for an override macro
|
||||
// * `override_fns`: list of override functions
|
||||
// * zero or more:
|
||||
// * `override_macro`: ident for an override macro
|
||||
// * `override_fns`: list of override functions
|
||||
// Then macro goes through every function in rust.rs, and for every fn `foo`:
|
||||
// * if `foo` is contained in `override_fns` then pass it into `override_macro`
|
||||
// * if `foo` is not contained in `override_fns` pass it to `normal_macro`
|
||||
|
@ -117,390 +32,191 @@ impl VisitMut for FixAbi {
|
|||
#[proc_macro]
|
||||
pub fn cuda_function_declarations(tokens: TokenStream) -> TokenStream {
|
||||
let input = parse_macro_input!(tokens as FnDeclInput);
|
||||
let cuda_module = syn::parse_str::<File>(CUDA_RS).unwrap();
|
||||
let override_fns = input
|
||||
.override_fns
|
||||
.iter()
|
||||
.map(ToString::to_string)
|
||||
.collect::<FxHashSet<_>>();
|
||||
let (normal_macro_args, override_macro_args): (Vec<_>, Vec<_>) = cuda_module
|
||||
.items
|
||||
.into_iter()
|
||||
.filter_map(|item| match item {
|
||||
Item::ForeignMod(ItemForeignMod { mut items, .. }) => match items.pop().unwrap() {
|
||||
ForeignItem::Fn(ForeignItemFn {
|
||||
sig:
|
||||
Signature {
|
||||
ident,
|
||||
inputs,
|
||||
output,
|
||||
..
|
||||
},
|
||||
..
|
||||
}) => {
|
||||
let use_normal_macro = !override_fns.contains(&ident.to_string());
|
||||
let inputs = inputs
|
||||
.into_iter()
|
||||
.map(|fn_arg| match fn_arg {
|
||||
FnArg::Typed(mut pat_type) => {
|
||||
pat_type.ty =
|
||||
prepend_cuda_path_to_type(&input.type_path, pat_type.ty);
|
||||
FnArg::Typed(pat_type)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
})
|
||||
.collect::<Punctuated<_, Token![,]>>();
|
||||
let output = match output {
|
||||
ReturnType::Type(_, type_) => type_,
|
||||
ReturnType::Default => unreachable!(),
|
||||
};
|
||||
let type_path = input.type_path.clone();
|
||||
Some((
|
||||
quote! {
|
||||
"system" fn #ident(#inputs) -> #type_path :: #output
|
||||
},
|
||||
use_normal_macro,
|
||||
))
|
||||
}
|
||||
_ => unreachable!(),
|
||||
},
|
||||
_ => None,
|
||||
})
|
||||
.partition(|(_, use_normal_macro)| *use_normal_macro);
|
||||
let mut result = proc_macro2::TokenStream::new();
|
||||
if !normal_macro_args.is_empty() {
|
||||
let punctuated_normal_macro_args = to_punctuated::<Token![;]>(normal_macro_args);
|
||||
let macro_ = &input.normal_macro;
|
||||
result.extend(iter::once(quote! {
|
||||
#macro_ ! (#punctuated_normal_macro_args);
|
||||
}));
|
||||
let mut choose_macro = ChooseMacro::new(input);
|
||||
let mut cuda_module = syn::parse_str::<File>(CUDA_RS).unwrap();
|
||||
syn::visit_mut::visit_file_mut(&mut FixFnSignatures, &mut cuda_module);
|
||||
let extern_ = if let Item::ForeignMod(extern_) = cuda_module.items.pop().unwrap() {
|
||||
extern_
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
let abi = extern_.abi.name;
|
||||
for mut item in extern_.items {
|
||||
if let ForeignItem::Fn(ForeignItemFn {
|
||||
sig: Signature { ref ident, .. },
|
||||
ref mut attrs,
|
||||
..
|
||||
}) = item
|
||||
{
|
||||
*attrs = Vec::new();
|
||||
choose_macro.add(ident, quote! { #abi #item });
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
if !override_macro_args.is_empty() {
|
||||
let punctuated_override_macro_args = to_punctuated::<Token![;]>(override_macro_args);
|
||||
let macro_ = &input.override_macro;
|
||||
result.extend(iter::once(quote! {
|
||||
#macro_ ! (#punctuated_override_macro_args);
|
||||
}));
|
||||
let mut result = proc_macro2::TokenStream::new();
|
||||
for (path, items) in
|
||||
iter::once(choose_macro.default).chain(choose_macro.override_sets.into_iter())
|
||||
{
|
||||
if items.is_empty() {
|
||||
continue;
|
||||
}
|
||||
quote! {
|
||||
#path ! { #(#items)* }
|
||||
}
|
||||
.to_tokens(&mut result);
|
||||
}
|
||||
result.into()
|
||||
}
|
||||
|
||||
fn to_punctuated<P: ToTokens + Default>(
|
||||
elms: Vec<(proc_macro2::TokenStream, bool)>,
|
||||
) -> proc_macro2::TokenStream {
|
||||
let mut collection = Punctuated::<proc_macro2::TokenStream, P>::new();
|
||||
collection.extend(elms.into_iter().map(|(token_stream, _)| token_stream));
|
||||
collection.into_token_stream()
|
||||
}
|
||||
|
||||
fn prepend_cuda_path_to_type(base_path: &Path, type_: Box<Type>) -> Box<Type> {
|
||||
match *type_ {
|
||||
Type::Path(mut type_path) => {
|
||||
type_path.path = prepend_cuda_path_to_path(base_path, type_path.path);
|
||||
Box::new(Type::Path(type_path))
|
||||
}
|
||||
Type::Ptr(mut type_ptr) => {
|
||||
type_ptr.elem = prepend_cuda_path_to_type(base_path, type_ptr.elem);
|
||||
Box::new(Type::Ptr(type_ptr))
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn prepend_cuda_path_to_path(base_path: &Path, path: Path) -> Path {
|
||||
if path.leading_colon.is_some() {
|
||||
return path;
|
||||
}
|
||||
if path.segments.len() == 1 {
|
||||
let ident = path.segments[0].ident.to_string();
|
||||
if ident.starts_with("CU")
|
||||
|| ident.starts_with("cu")
|
||||
|| ident.starts_with("GL")
|
||||
|| ident == "HGPUNV"
|
||||
{
|
||||
let mut base_path = base_path.clone();
|
||||
base_path.segments.extend(path.segments);
|
||||
return base_path;
|
||||
}
|
||||
}
|
||||
path
|
||||
}
|
||||
|
||||
struct FnDeclInput {
|
||||
type_path: Path,
|
||||
normal_macro: Path,
|
||||
override_macro: Path,
|
||||
override_fns: Punctuated<Ident, Token![,]>,
|
||||
overrides: Punctuated<OverrideMacro, Token![,]>,
|
||||
}
|
||||
|
||||
impl Parse for FnDeclInput {
|
||||
fn parse(input: ParseStream) -> syn::Result<Self> {
|
||||
let type_path = input.parse::<Path>()?;
|
||||
input.parse::<Token![,]>()?;
|
||||
let normal_macro = input.parse::<Path>()?;
|
||||
input.parse::<Token![,]>()?;
|
||||
let override_macro = input.parse::<Path>()?;
|
||||
input.parse::<Token![,]>()?;
|
||||
let override_fns_content;
|
||||
bracketed!(override_fns_content in input);
|
||||
let override_fns = override_fns_content.parse_terminated(Ident::parse)?;
|
||||
let overrides = if input.is_empty() {
|
||||
Punctuated::new()
|
||||
} else {
|
||||
input.parse::<Token![,]>()?;
|
||||
input.parse_terminated(OverrideMacro::parse, Token![,])?
|
||||
};
|
||||
Ok(Self {
|
||||
type_path,
|
||||
normal_macro,
|
||||
override_macro,
|
||||
override_fns,
|
||||
overrides,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// This trait accepts following parameters:
|
||||
// * `type_path`: path to the module with type definitions (in the module tree)
|
||||
// * `trait_`: name of the trait to be derived
|
||||
// * `ignore_types`: bracketed list of types to ignore
|
||||
// * `ignore_fns`: bracketed list of fns to ignore
|
||||
#[proc_macro]
|
||||
pub fn cuda_derive_display_trait(tokens: TokenStream) -> TokenStream {
|
||||
let input = parse_macro_input!(tokens as DeriveDisplayInput);
|
||||
let cuda_module = syn::parse_str::<File>(CUDA_RS).unwrap();
|
||||
let mut derive_state = DeriveDisplayState::new(input);
|
||||
cuda_module
|
||||
.items
|
||||
.into_iter()
|
||||
.filter_map(|i| cuda_derive_display_trait_for_item(&mut derive_state, i))
|
||||
.collect::<proc_macro2::TokenStream>()
|
||||
.into()
|
||||
struct OverrideMacro {
|
||||
macro_: Path,
|
||||
functions: Punctuated<Ident, Token![,]>,
|
||||
}
|
||||
|
||||
fn cuda_derive_display_trait_for_item(
|
||||
state: &mut DeriveDisplayState,
|
||||
item: Item,
|
||||
) -> Option<proc_macro2::TokenStream> {
|
||||
let path_prefix = &state.type_path;
|
||||
let path_prefix_iter = iter::repeat(&path_prefix);
|
||||
let trait_ = &state.trait_;
|
||||
let trait_iter = iter::repeat(&state.trait_);
|
||||
match item {
|
||||
Item::Const(_) => None,
|
||||
Item::ForeignMod(ItemForeignMod { mut items, .. }) => match items.pop().unwrap() {
|
||||
ForeignItem::Fn(ForeignItemFn {
|
||||
sig: Signature { ident, inputs, .. },
|
||||
..
|
||||
}) => {
|
||||
if state.ignore_fns.contains(&ident) {
|
||||
return None;
|
||||
}
|
||||
let inputs = inputs
|
||||
.into_iter()
|
||||
.map(|fn_arg| match fn_arg {
|
||||
FnArg::Typed(mut pat_type) => {
|
||||
pat_type.ty = prepend_cuda_path_to_type(path_prefix, pat_type.ty);
|
||||
FnArg::Typed(pat_type)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let inputs_iter = inputs.iter();
|
||||
let mut arg_name_iter = inputs.iter().map(|fn_arg| match fn_arg {
|
||||
FnArg::Typed(PatType { pat, .. }) => pat,
|
||||
_ => unreachable!(),
|
||||
});
|
||||
let fn_name = format_ident!("write_{}", ident);
|
||||
let original_fn_name = ident.to_string();
|
||||
Some(match arg_name_iter.next() {
|
||||
Some(first_arg_name) => quote! {
|
||||
pub fn #fn_name(writer: &mut (impl std::io::Write + ?Sized), #(#inputs_iter,)*) -> std::io::Result<()> {
|
||||
writer.write_all(concat!("(", stringify!(#first_arg_name), ": ").as_bytes())?;
|
||||
let mut arg_idx = 0usize;
|
||||
CudaDisplay::write(&#first_arg_name, #original_fn_name, arg_idx, writer)?;
|
||||
#(
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(concat!(stringify!(#arg_name_iter), ": ").as_bytes())?;
|
||||
CudaDisplay::write(&#arg_name_iter, #original_fn_name, arg_idx, writer)?;
|
||||
arg_idx += 1;
|
||||
)*
|
||||
writer.write_all(b")")
|
||||
}
|
||||
},
|
||||
None => quote! {
|
||||
pub fn #fn_name(writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
|
||||
writer.write_all(b"()")
|
||||
}
|
||||
},
|
||||
})
|
||||
}
|
||||
_ => unreachable!(),
|
||||
},
|
||||
Item::Impl(mut item_impl) => {
|
||||
let enum_ = match *(item_impl.self_ty) {
|
||||
Type::Path(mut path) => path.path.segments.pop().unwrap().into_value().ident,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let variant_ = match item_impl.items.pop().unwrap() {
|
||||
syn::ImplItem::Const(item_const) => item_const.ident,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
state.record_enum_variant(enum_, variant_);
|
||||
None
|
||||
}
|
||||
Item::Struct(item_struct) => {
|
||||
let item_struct_name = item_struct.ident.to_string();
|
||||
if state.ignore_types.contains(&item_struct.ident) {
|
||||
return None;
|
||||
}
|
||||
if item_struct_name.ends_with("_enum") {
|
||||
let enum_ = &item_struct.ident;
|
||||
let enum_iter = iter::repeat(&item_struct.ident);
|
||||
let variants = state.enums.get(&item_struct.ident).unwrap().iter();
|
||||
Some(quote! {
|
||||
impl #trait_ for #path_prefix :: #enum_ {
|
||||
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
|
||||
match self {
|
||||
#(& #path_prefix_iter :: #enum_iter :: #variants => writer.write_all(stringify!(#variants).as_bytes()),)*
|
||||
_ => write!(writer, "{}", self.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
} else {
|
||||
let struct_ = &item_struct.ident;
|
||||
let (first_field, rest_of_fields) = match item_struct.fields {
|
||||
Fields::Named(fields) => {
|
||||
let mut all_idents = fields.named.into_iter().filter_map(|f| {
|
||||
let f_ident = f.ident.unwrap();
|
||||
let name = f_ident.to_string();
|
||||
if name.starts_with("reserved") || name == "_unused" {
|
||||
None
|
||||
} else {
|
||||
Some(f_ident)
|
||||
}
|
||||
});
|
||||
let first = match all_idents.next() {
|
||||
Some(f) => f,
|
||||
None => return None,
|
||||
};
|
||||
(first, all_idents)
|
||||
}
|
||||
_ => return None,
|
||||
};
|
||||
Some(quote! {
|
||||
impl #trait_ for #path_prefix :: #struct_ {
|
||||
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
|
||||
writer.write_all(concat!("{ ", stringify!(#first_field), ": ").as_bytes())?;
|
||||
#trait_::write(&self.#first_field, "", 0, writer)?;
|
||||
#(
|
||||
writer.write_all(concat!(", ", stringify!(#rest_of_fields), ": ").as_bytes())?;
|
||||
#trait_iter::write(&self.#rest_of_fields, "", 0, writer)?;
|
||||
)*
|
||||
writer.write_all(b" }")
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Item::Type(item_type) => {
|
||||
if state.ignore_types.contains(&item_type.ident) {
|
||||
return None;
|
||||
};
|
||||
match *(item_type.ty) {
|
||||
Type::Ptr(_) => {
|
||||
let type_ = item_type.ident;
|
||||
Some(quote! {
|
||||
impl #trait_ for #path_prefix :: #type_ {
|
||||
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
|
||||
write!(writer, "{:p}", *self)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
Type::Path(type_path) => {
|
||||
if type_path.path.leading_colon.is_some() {
|
||||
let option_seg = type_path.path.segments.last().unwrap();
|
||||
if option_seg.ident == "Option" {
|
||||
match &option_seg.arguments {
|
||||
PathArguments::AngleBracketed(generic) => match generic.args[0] {
|
||||
syn::GenericArgument::Type(Type::BareFn(_)) => {
|
||||
let type_ = &item_type.ident;
|
||||
return Some(quote! {
|
||||
impl #trait_ for #path_prefix :: #type_ {
|
||||
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
|
||||
write!(writer, "{:p}", unsafe { std::mem::transmute::<#path_prefix :: #type_, *mut ::std::ffi::c_void>(*self) })
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
_ => unreachable!(),
|
||||
},
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
Item::Union(_) => None,
|
||||
Item::Use(_) => None,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
struct DeriveDisplayState {
|
||||
type_path: Path,
|
||||
trait_: Path,
|
||||
ignore_types: FxHashSet<Ident>,
|
||||
ignore_fns: FxHashSet<Ident>,
|
||||
enums: FxHashMap<Ident, Vec<Ident>>,
|
||||
}
|
||||
|
||||
impl DeriveDisplayState {
|
||||
fn new(input: DeriveDisplayInput) -> Self {
|
||||
DeriveDisplayState {
|
||||
type_path: input.type_path,
|
||||
trait_: input.trait_,
|
||||
ignore_types: input.ignore_types.into_iter().collect(),
|
||||
ignore_fns: input.ignore_fns.into_iter().collect(),
|
||||
enums: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn record_enum_variant(&mut self, enum_: Ident, variant: Ident) {
|
||||
match self.enums.entry(enum_) {
|
||||
hash_map::Entry::Occupied(mut entry) => {
|
||||
entry.get_mut().push(variant);
|
||||
}
|
||||
hash_map::Entry::Vacant(entry) => {
|
||||
entry.insert(vec![variant]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct DeriveDisplayInput {
|
||||
type_path: Path,
|
||||
trait_: Path,
|
||||
ignore_types: Punctuated<Ident, Token![,]>,
|
||||
ignore_fns: Punctuated<Ident, Token![,]>,
|
||||
}
|
||||
|
||||
impl Parse for DeriveDisplayInput {
|
||||
impl Parse for OverrideMacro {
|
||||
fn parse(input: ParseStream) -> syn::Result<Self> {
|
||||
let type_path = input.parse::<Path>()?;
|
||||
input.parse::<Token![,]>()?;
|
||||
let trait_ = input.parse::<Path>()?;
|
||||
input.parse::<Token![,]>()?;
|
||||
let ignore_types_buffer;
|
||||
bracketed!(ignore_types_buffer in input);
|
||||
let ignore_types = ignore_types_buffer.parse_terminated(Ident::parse)?;
|
||||
input.parse::<Token![,]>()?;
|
||||
let ignore_fns_buffer;
|
||||
bracketed!(ignore_fns_buffer in input);
|
||||
let ignore_fns = ignore_fns_buffer.parse_terminated(Ident::parse)?;
|
||||
Ok(Self {
|
||||
type_path,
|
||||
trait_,
|
||||
ignore_types,
|
||||
ignore_fns,
|
||||
})
|
||||
let macro_ = input.parse::<Path>()?;
|
||||
input.parse::<Token![<=]>()?;
|
||||
let functions_content;
|
||||
bracketed!(functions_content in input);
|
||||
let functions = functions_content.parse_terminated(Ident::parse, Token![,])?;
|
||||
Ok(Self { macro_, functions })
|
||||
}
|
||||
}
|
||||
|
||||
struct ChooseMacro {
|
||||
default: (Path, Vec<proc_macro2::TokenStream>),
|
||||
override_lookup: FxHashMap<Ident, Path>,
|
||||
override_sets: FxHashMap<Path, Vec<proc_macro2::TokenStream>>,
|
||||
}
|
||||
|
||||
impl ChooseMacro {
|
||||
fn new(input: FnDeclInput) -> Self {
|
||||
let mut override_lookup = FxHashMap::default();
|
||||
let mut override_sets = FxHashMap::default();
|
||||
for OverrideMacro { macro_, functions } in input.overrides {
|
||||
for ident in functions {
|
||||
override_lookup.insert(ident, macro_.clone());
|
||||
override_sets.insert(macro_.clone(), Vec::new());
|
||||
}
|
||||
}
|
||||
Self {
|
||||
default: (input.normal_macro, Vec::new()),
|
||||
override_lookup,
|
||||
override_sets,
|
||||
}
|
||||
}
|
||||
|
||||
fn add(&mut self, ident: &Ident, tokens: proc_macro2::TokenStream) {
|
||||
match self.override_lookup.get(ident) {
|
||||
Some(override_macro) => {
|
||||
self.override_sets
|
||||
.get_mut(override_macro)
|
||||
.unwrap()
|
||||
.push(tokens);
|
||||
}
|
||||
None => self.default.1.push(tokens),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For some reason prettyplease will append trailing comma *only*
|
||||
// if there are two or more arguments
|
||||
struct FixFnSignatures;
|
||||
|
||||
impl VisitMut for FixFnSignatures {
|
||||
fn visit_signature_mut(&mut self, s: &mut syn::Signature) {
|
||||
s.inputs.pop_punct();
|
||||
}
|
||||
}
|
||||
|
||||
const MODULES: &[&str] = &[
|
||||
"context", "device", "driver", "function", "link", "memory", "module", "pointer",
|
||||
];
|
||||
|
||||
#[proc_macro]
|
||||
pub fn cuda_normalize_fn(tokens: TokenStream) -> TokenStream {
|
||||
let mut path = parse_macro_input!(tokens as syn::Path);
|
||||
let fn_ = path
|
||||
.segments
|
||||
.pop()
|
||||
.unwrap()
|
||||
.into_tuple()
|
||||
.0
|
||||
.ident
|
||||
.to_string();
|
||||
let already_has_module = MODULES.contains(&&*path.segments.last().unwrap().ident.to_string());
|
||||
let segments: Vec<String> = split(&fn_[2..]); // skip "cu"
|
||||
let fn_path = join(segments, !already_has_module);
|
||||
quote! {
|
||||
#path #fn_path
|
||||
}
|
||||
.into()
|
||||
}
|
||||
|
||||
fn split(fn_: &str) -> Vec<String> {
|
||||
let mut result = Vec::new();
|
||||
for c in fn_.chars() {
|
||||
if c.is_ascii_uppercase() {
|
||||
result.push(c.to_ascii_lowercase().to_string());
|
||||
} else {
|
||||
result.last_mut().unwrap().push(c);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn join(fn_: Vec<String>, find_module: bool) -> Punctuated<Ident, Token![::]> {
|
||||
fn full_form(segment: &str) -> Option<&[&str]> {
|
||||
Some(match segment {
|
||||
"ctx" => &["context"],
|
||||
"func" => &["function"],
|
||||
"mem" => &["memory"],
|
||||
"memcpy" => &["memory", "copy"],
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
let mut normalized: Vec<&str> = Vec::new();
|
||||
for segment in fn_.iter() {
|
||||
match full_form(segment) {
|
||||
Some(segments) => normalized.extend(segments.into_iter()),
|
||||
None => normalized.push(&*segment),
|
||||
}
|
||||
}
|
||||
if !find_module {
|
||||
return [Ident::new(&normalized.join("_"), Span::call_site())]
|
||||
.into_iter()
|
||||
.collect();
|
||||
}
|
||||
if !MODULES.contains(&normalized[0]) {
|
||||
let mut globalized = vec!["driver"];
|
||||
globalized.extend(normalized);
|
||||
normalized = globalized;
|
||||
}
|
||||
let (module, path) = normalized.split_first().unwrap();
|
||||
let path = path.join("_");
|
||||
[module, &&*path]
|
||||
.into_iter()
|
||||
.map(|s| Ident::new(s, Span::call_site()))
|
||||
.collect()
|
||||
}
|
||||
|
|
|
@ -6,3 +6,4 @@ edition = "2018"
|
|||
|
||||
[dependencies]
|
||||
cuda_base = { path = "../cuda_base" }
|
||||
hip_runtime-sys = { path = "../ext/hip_runtime-sys" }
|
||||
|
|
File diff suppressed because it is too large
Load diff
1
ext/hip_runtime-sys/README
vendored
1
ext/hip_runtime-sys/README
vendored
|
@ -1 +0,0 @@
|
|||
bindgen --rust-target 1.77 /opt/rocm/include/hip/hip_runtime_api.h -o hip_runtime_api.rs --no-layout-tests --default-enum-style=newtype --allowlist-function "hip.*" --allowlist-type "hip.*" --no-derive-debug --must-use-type hipError_t --new-type-alias "^hipDeviceptr_t$" --allowlist-var "^hip.*$" -- -I/opt/rocm/include -D__HIP_PLATFORM_AMD__
|
|
@ -1,2 +0,0 @@
|
|||
#define __HIP_PLATFORM_HCC__
|
||||
#include <hip/hip_runtime_api.h>
|
7422
ext/hip_runtime-sys/src/hip_runtime_api.rs
vendored
7422
ext/hip_runtime-sys/src/hip_runtime_api.rs
vendored
File diff suppressed because it is too large
Load diff
12669
ext/hip_runtime-sys/src/lib.rs
vendored
12669
ext/hip_runtime-sys/src/lib.rs
vendored
File diff suppressed because it is too large
Load diff
1
ext/spirv-headers
vendored
1
ext/spirv-headers
vendored
|
@ -1 +0,0 @@
|
|||
Subproject commit 308bd07424350a6000f35a77b5f85cd4f3da319e
|
1
ext/spirv-tools
vendored
1
ext/spirv-tools
vendored
|
@ -1 +0,0 @@
|
|||
Subproject commit e128ab0d624ce7beb08eb9656bb260c597a46d0a
|
|
@ -1,8 +0,0 @@
|
|||
[package]
|
||||
name = "level_zero-sys"
|
||||
version = "1.0.4"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2018"
|
||||
links = "ze_loader"
|
||||
|
||||
[lib]
|
|
@ -1,4 +0,0 @@
|
|||
sed 's/^typedef uint32_t ze_.*flags_t;$//g' include/ze_api.h > include/level_zero/ze_api.h
|
||||
sed -i -r 's/ze_(.*)_flag_t/ze_\1_flags_t/g' include/level_zero/ze_api.h
|
||||
bindgen --with-derive-default --no-default ".*format_t" --no-default ".*fd_t" --no-default ".*constants_t" --no-default ".*handle_t" --no-default ".*desc_t" --no-default ".*params_t" --size_t-is-usize --default-enum-style=newtype --bitfield-enum ".*flags_t" --whitelist-function "ze.*" --whitelist-type "ze.*" include/level_zero/ze_api.h -o src/ze_api.rs -- -Iinclude
|
||||
sed -i 's/pub struct _ze_result_t/#[must_use]\npub struct _ze_result_t/g' src/ze_api.rs
|
|
@ -1,17 +0,0 @@
|
|||
use env::VarError;
|
||||
use std::{env, path::PathBuf};
|
||||
|
||||
fn main() -> Result<(), VarError> {
|
||||
println!("cargo:rustc-link-lib=dylib=ze_loader");
|
||||
if cfg!(windows) {
|
||||
let env = env::var("CARGO_CFG_TARGET_ENV")?;
|
||||
if env == "msvc" {
|
||||
let mut path = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?);
|
||||
path.push("lib");
|
||||
println!("cargo:rustc-link-search=native={}", path.display());
|
||||
} else {
|
||||
println!("cargo:rustc-link-search=native=C:\\Windows\\System32");
|
||||
};
|
||||
}
|
||||
Ok(())
|
||||
}
|
1
level_zero-sys/include/level_zero/.gitignore
vendored
1
level_zero-sys/include/level_zero/.gitignore
vendored
|
@ -1 +0,0 @@
|
|||
ze_api.h
|
File diff suppressed because it is too large
Load diff
|
@ -1,316 +0,0 @@
|
|||
EXPORTS
|
||||
zeCommandListAppendBarrier
|
||||
zeCommandListAppendEventReset
|
||||
zeCommandListAppendImageCopy
|
||||
zeCommandListAppendImageCopyFromMemory
|
||||
zeCommandListAppendImageCopyRegion
|
||||
zeCommandListAppendImageCopyToMemory
|
||||
zeCommandListAppendLaunchCooperativeKernel
|
||||
zeCommandListAppendLaunchKernel
|
||||
zeCommandListAppendLaunchKernelIndirect
|
||||
zeCommandListAppendLaunchMultipleKernelsIndirect
|
||||
zeCommandListAppendMemAdvise
|
||||
zeCommandListAppendMemoryCopy
|
||||
zeCommandListAppendMemoryCopyFromContext
|
||||
zeCommandListAppendMemoryCopyRegion
|
||||
zeCommandListAppendMemoryFill
|
||||
zeCommandListAppendMemoryPrefetch
|
||||
zeCommandListAppendMemoryRangesBarrier
|
||||
zeCommandListAppendQueryKernelTimestamps
|
||||
zeCommandListAppendSignalEvent
|
||||
zeCommandListAppendWaitOnEvents
|
||||
zeCommandListAppendWriteGlobalTimestamp
|
||||
zeCommandListClose
|
||||
zeCommandListCreate
|
||||
zeCommandListCreateImmediate
|
||||
zeCommandListDestroy
|
||||
zeCommandListReset
|
||||
zeCommandQueueCreate
|
||||
zeCommandQueueDestroy
|
||||
zeCommandQueueExecuteCommandLists
|
||||
zeCommandQueueSynchronize
|
||||
zeContextCreate
|
||||
zeContextCreateEx
|
||||
zeContextDestroy
|
||||
zeContextEvictImage
|
||||
zeContextEvictMemory
|
||||
zeContextGetStatus
|
||||
zeContextMakeImageResident
|
||||
zeContextMakeMemoryResident
|
||||
zeContextSystemBarrier
|
||||
zeDeviceCanAccessPeer
|
||||
zeDeviceGet
|
||||
zeDeviceGetCacheProperties
|
||||
zeDeviceGetCommandQueueGroupProperties
|
||||
zeDeviceGetComputeProperties
|
||||
zeDeviceGetExternalMemoryProperties
|
||||
zeDeviceGetGlobalTimestamps
|
||||
zeDeviceGetImageProperties
|
||||
zeDeviceGetMemoryAccessProperties
|
||||
zeDeviceGetMemoryProperties
|
||||
zeDeviceGetModuleProperties
|
||||
zeDeviceGetP2PProperties
|
||||
zeDeviceGetProperties
|
||||
zeDeviceGetStatus
|
||||
zeDeviceGetSubDevices
|
||||
zeDriverGet
|
||||
zeDriverGetApiVersion
|
||||
zeDriverGetExtensionFunctionAddress
|
||||
zeDriverGetExtensionProperties
|
||||
zeDriverGetIpcProperties
|
||||
zeDriverGetProperties
|
||||
zeEventCreate
|
||||
zeEventDestroy
|
||||
zeEventHostReset
|
||||
zeEventHostSignal
|
||||
zeEventHostSynchronize
|
||||
zeEventPoolCloseIpcHandle
|
||||
zeEventPoolCreate
|
||||
zeEventPoolDestroy
|
||||
zeEventPoolGetIpcHandle
|
||||
zeEventPoolOpenIpcHandle
|
||||
zeEventQueryKernelTimestamp
|
||||
zeEventQueryStatus
|
||||
zeFenceCreate
|
||||
zeFenceDestroy
|
||||
zeFenceHostSynchronize
|
||||
zeFenceQueryStatus
|
||||
zeFenceReset
|
||||
zeGetCommandListProcAddrTable
|
||||
zeGetCommandQueueProcAddrTable
|
||||
zeGetContextProcAddrTable
|
||||
zeGetDeviceProcAddrTable
|
||||
zeGetDriverProcAddrTable
|
||||
zeGetEventPoolProcAddrTable
|
||||
zeGetEventProcAddrTable
|
||||
zeGetFenceProcAddrTable
|
||||
zeGetGlobalProcAddrTable
|
||||
zeGetImageProcAddrTable
|
||||
zeGetKernelExpProcAddrTable
|
||||
zeGetKernelProcAddrTable
|
||||
zeGetMemProcAddrTable
|
||||
zeGetModuleBuildLogProcAddrTable
|
||||
zeGetModuleProcAddrTable
|
||||
zeGetPhysicalMemProcAddrTable
|
||||
zeGetSamplerProcAddrTable
|
||||
zeGetVirtualMemProcAddrTable
|
||||
zeImageCreate
|
||||
zeImageDestroy
|
||||
zeImageGetProperties
|
||||
zeInit
|
||||
zeKernelCreate
|
||||
zeKernelDestroy
|
||||
zeKernelGetIndirectAccess
|
||||
zeKernelGetName
|
||||
zeKernelGetProperties
|
||||
zeKernelGetSourceAttributes
|
||||
zeKernelSetArgumentValue
|
||||
zeKernelSetCacheConfig
|
||||
zeKernelSetGlobalOffsetExp
|
||||
zeKernelSetGroupSize
|
||||
zeKernelSetIndirectAccess
|
||||
zeKernelSuggestGroupSize
|
||||
zeKernelSuggestMaxCooperativeGroupCount
|
||||
zeLoaderInit
|
||||
zeMemAllocDevice
|
||||
zeMemAllocHost
|
||||
zeMemAllocShared
|
||||
zeMemCloseIpcHandle
|
||||
zeMemFree
|
||||
zeMemGetAddressRange
|
||||
zeMemGetAllocProperties
|
||||
zeMemGetIpcHandle
|
||||
zeMemOpenIpcHandle
|
||||
zeModuleBuildLogDestroy
|
||||
zeModuleBuildLogGetString
|
||||
zeModuleCreate
|
||||
zeModuleDestroy
|
||||
zeModuleDynamicLink
|
||||
zeModuleGetFunctionPointer
|
||||
zeModuleGetGlobalPointer
|
||||
zeModuleGetKernelNames
|
||||
zeModuleGetNativeBinary
|
||||
zeModuleGetProperties
|
||||
zePhysicalMemCreate
|
||||
zePhysicalMemDestroy
|
||||
zeSamplerCreate
|
||||
zeSamplerDestroy
|
||||
zeVirtualMemFree
|
||||
zeVirtualMemGetAccessAttribute
|
||||
zeVirtualMemMap
|
||||
zeVirtualMemQueryPageSize
|
||||
zeVirtualMemReserve
|
||||
zeVirtualMemSetAccessAttribute
|
||||
zeVirtualMemUnmap
|
||||
zelGetTracerApiProcAddrTable
|
||||
zelTracerCreate
|
||||
zelTracerDestroy
|
||||
zelTracerSetEnabled
|
||||
zelTracerSetEpilogues
|
||||
zelTracerSetPrologues
|
||||
zesDeviceEnumDiagnosticTestSuites
|
||||
zesDeviceEnumEngineGroups
|
||||
zesDeviceEnumFabricPorts
|
||||
zesDeviceEnumFans
|
||||
zesDeviceEnumFirmwares
|
||||
zesDeviceEnumFrequencyDomains
|
||||
zesDeviceEnumLeds
|
||||
zesDeviceEnumMemoryModules
|
||||
zesDeviceEnumPerformanceFactorDomains
|
||||
zesDeviceEnumPowerDomains
|
||||
zesDeviceEnumPsus
|
||||
zesDeviceEnumRasErrorSets
|
||||
zesDeviceEnumSchedulers
|
||||
zesDeviceEnumStandbyDomains
|
||||
zesDeviceEnumTemperatureSensors
|
||||
zesDeviceEventRegister
|
||||
zesDeviceGetProperties
|
||||
zesDeviceGetState
|
||||
zesDevicePciGetBars
|
||||
zesDevicePciGetProperties
|
||||
zesDevicePciGetState
|
||||
zesDevicePciGetStats
|
||||
zesDeviceProcessesGetState
|
||||
zesDeviceReset
|
||||
zesDiagnosticsGetProperties
|
||||
zesDiagnosticsGetTests
|
||||
zesDiagnosticsRunTests
|
||||
zesDriverEventListen
|
||||
zesDriverEventListenEx
|
||||
zesEngineGetActivity
|
||||
zesEngineGetProperties
|
||||
zesFabricPortGetConfig
|
||||
zesFabricPortGetLinkType
|
||||
zesFabricPortGetProperties
|
||||
zesFabricPortGetState
|
||||
zesFabricPortGetThroughput
|
||||
zesFabricPortSetConfig
|
||||
zesFanGetConfig
|
||||
zesFanGetProperties
|
||||
zesFanGetState
|
||||
zesFanSetDefaultMode
|
||||
zesFanSetFixedSpeedMode
|
||||
zesFanSetSpeedTableMode
|
||||
zesFirmwareFlash
|
||||
zesFirmwareGetProperties
|
||||
zesFrequencyGetAvailableClocks
|
||||
zesFrequencyGetProperties
|
||||
zesFrequencyGetRange
|
||||
zesFrequencyGetState
|
||||
zesFrequencyGetThrottleTime
|
||||
zesFrequencyOcGetCapabilities
|
||||
zesFrequencyOcGetFrequencyTarget
|
||||
zesFrequencyOcGetIccMax
|
||||
zesFrequencyOcGetMode
|
||||
zesFrequencyOcGetTjMax
|
||||
zesFrequencyOcGetVoltageTarget
|
||||
zesFrequencyOcSetFrequencyTarget
|
||||
zesFrequencyOcSetIccMax
|
||||
zesFrequencyOcSetMode
|
||||
zesFrequencyOcSetTjMax
|
||||
zesFrequencyOcSetVoltageTarget
|
||||
zesFrequencySetRange
|
||||
zesGetDeviceProcAddrTable
|
||||
zesGetDiagnosticsProcAddrTable
|
||||
zesGetDriverProcAddrTable
|
||||
zesGetEngineProcAddrTable
|
||||
zesGetFabricPortProcAddrTable
|
||||
zesGetFanProcAddrTable
|
||||
zesGetFirmwareProcAddrTable
|
||||
zesGetFrequencyProcAddrTable
|
||||
zesGetLedProcAddrTable
|
||||
zesGetMemoryProcAddrTable
|
||||
zesGetPerformanceFactorProcAddrTable
|
||||
zesGetPowerProcAddrTable
|
||||
zesGetPsuProcAddrTable
|
||||
zesGetRasProcAddrTable
|
||||
zesGetSchedulerProcAddrTable
|
||||
zesGetStandbyProcAddrTable
|
||||
zesGetTemperatureProcAddrTable
|
||||
zesLedGetProperties
|
||||
zesLedGetState
|
||||
zesLedSetColor
|
||||
zesLedSetState
|
||||
zesMemoryGetBandwidth
|
||||
zesMemoryGetProperties
|
||||
zesMemoryGetState
|
||||
zesPerformanceFactorGetConfig
|
||||
zesPerformanceFactorGetProperties
|
||||
zesPerformanceFactorSetConfig
|
||||
zesPowerGetEnergyCounter
|
||||
zesPowerGetEnergyThreshold
|
||||
zesPowerGetLimits
|
||||
zesPowerGetProperties
|
||||
zesPowerSetEnergyThreshold
|
||||
zesPowerSetLimits
|
||||
zesPsuGetProperties
|
||||
zesPsuGetState
|
||||
zesRasGetConfig
|
||||
zesRasGetProperties
|
||||
zesRasGetState
|
||||
zesRasSetConfig
|
||||
zesSchedulerGetCurrentMode
|
||||
zesSchedulerGetProperties
|
||||
zesSchedulerGetTimeoutModeProperties
|
||||
zesSchedulerGetTimesliceModeProperties
|
||||
zesSchedulerSetComputeUnitDebugMode
|
||||
zesSchedulerSetExclusiveMode
|
||||
zesSchedulerSetTimeoutMode
|
||||
zesSchedulerSetTimesliceMode
|
||||
zesStandbyGetMode
|
||||
zesStandbyGetProperties
|
||||
zesStandbySetMode
|
||||
zesTemperatureGetConfig
|
||||
zesTemperatureGetProperties
|
||||
zesTemperatureGetState
|
||||
zesTemperatureSetConfig
|
||||
zetCommandListAppendMetricMemoryBarrier
|
||||
zetCommandListAppendMetricQueryBegin
|
||||
zetCommandListAppendMetricQueryEnd
|
||||
zetCommandListAppendMetricStreamerMarker
|
||||
zetContextActivateMetricGroups
|
||||
zetDebugAcknowledgeEvent
|
||||
zetDebugAttach
|
||||
zetDebugDetach
|
||||
zetDebugGetRegisterSetProperties
|
||||
zetDebugInterrupt
|
||||
zetDebugReadEvent
|
||||
zetDebugReadMemory
|
||||
zetDebugReadRegisters
|
||||
zetDebugResume
|
||||
zetDebugWriteMemory
|
||||
zetDebugWriteRegisters
|
||||
zetDeviceGetDebugProperties
|
||||
zetGetCommandListProcAddrTable
|
||||
zetGetContextProcAddrTable
|
||||
zetGetDebugProcAddrTable
|
||||
zetGetDeviceProcAddrTable
|
||||
zetGetKernelProcAddrTable
|
||||
zetGetMetricGroupProcAddrTable
|
||||
zetGetMetricProcAddrTable
|
||||
zetGetMetricQueryPoolProcAddrTable
|
||||
zetGetMetricQueryProcAddrTable
|
||||
zetGetMetricStreamerProcAddrTable
|
||||
zetGetModuleProcAddrTable
|
||||
zetGetTracerExpProcAddrTable
|
||||
zetKernelGetProfileInfo
|
||||
zetMetricGet
|
||||
zetMetricGetProperties
|
||||
zetMetricGroupCalculateMetricValues
|
||||
zetMetricGroupGet
|
||||
zetMetricGroupGetProperties
|
||||
zetMetricQueryCreate
|
||||
zetMetricQueryDestroy
|
||||
zetMetricQueryGetData
|
||||
zetMetricQueryPoolCreate
|
||||
zetMetricQueryPoolDestroy
|
||||
zetMetricQueryReset
|
||||
zetMetricStreamerClose
|
||||
zetMetricStreamerOpen
|
||||
zetMetricStreamerReadData
|
||||
zetModuleGetDebugInfo
|
||||
zetTracerExpCreate
|
||||
zetTracerExpDestroy
|
||||
zetTracerExpSetEnabled
|
||||
zetTracerExpSetEpilogues
|
||||
zetTracerExpSetPrologues
|
Binary file not shown.
|
@ -1,3 +0,0 @@
|
|||
#![allow(warnings)]
|
||||
pub mod ze_api;
|
||||
pub use ze_api::*;
|
File diff suppressed because it is too large
Load diff
|
@ -1,14 +0,0 @@
|
|||
[package]
|
||||
name = "level_zero"
|
||||
version = "0.1.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2018"
|
||||
|
||||
[lib]
|
||||
|
||||
[dependencies]
|
||||
level_zero-sys = { path = "../level_zero-sys" }
|
||||
|
||||
[dependencies.ocl-core]
|
||||
version = "0.11"
|
||||
features = ["opencl_version_1_2", "opencl_version_2_0", "opencl_version_2_1"]
|
|
@ -1 +0,0 @@
|
|||
More ergonomic bindings for oneAPI Level Zero
|
|
@ -1,4 +0,0 @@
|
|||
pub use level_zero_sys as sys;
|
||||
|
||||
pub mod ze;
|
||||
pub use ze::*;
|
1289
level_zero/src/ze.rs
1289
level_zero/src/ze.rs
File diff suppressed because it is too large
Load diff
|
@ -1,7 +1,10 @@
|
|||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
||||
#include <llvm-c/Core.h>
|
||||
#include <llvm/IR/IRBuilder.h>
|
||||
#include <llvm/IR/Type.h>
|
||||
#include <llvm/IR/Instructions.h>
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
@ -189,7 +192,8 @@ void LLVMZludaBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering Ordering,
|
|||
auto builder = llvm::unwrap(B);
|
||||
LLVMContext &context = builder->getContext();
|
||||
builder->CreateFence(mapFromLLVMOrdering(Ordering),
|
||||
context.getOrInsertSyncScopeID(scope));
|
||||
context.getOrInsertSyncScopeID(scope),
|
||||
Name);
|
||||
}
|
||||
|
||||
LLVM_C_EXTERN_C_END
|
Binary file not shown.
|
@ -25,16 +25,16 @@ extern "C"
|
|||
return (uint32_t)__ockl_get_local_size(member);
|
||||
}
|
||||
|
||||
size_t __ockl_get_global_id(uint32_t) __device__;
|
||||
size_t __ockl_get_group_id(uint32_t) __device__;
|
||||
uint32_t FUNC(sreg_ctaid)(uint8_t member)
|
||||
{
|
||||
return (uint32_t)__ockl_get_global_id(member);
|
||||
return (uint32_t)__ockl_get_group_id(member);
|
||||
}
|
||||
|
||||
size_t __ockl_get_global_size(uint32_t) __device__;
|
||||
size_t __ockl_get_num_groups(uint32_t) __device__;
|
||||
uint32_t FUNC(sreg_nctaid)(uint8_t member)
|
||||
{
|
||||
return (uint32_t)__ockl_get_global_size(member);
|
||||
return (uint32_t)__ockl_get_num_groups(member);
|
||||
}
|
||||
|
||||
uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __attribute__((device));
|
||||
|
|
|
@ -284,20 +284,40 @@ fn immediate_value<'a, 'input>(stream: &mut PtxParser<'a, 'input>) -> PResult<as
|
|||
.parse_next(stream)
|
||||
}
|
||||
|
||||
pub fn parse_module_unchecked<'input>(text: &'input str) -> Option<ast::Module<'input>> {
|
||||
let input = lex_with_span(text).ok()?;
|
||||
let mut errors = Vec::new();
|
||||
let state = PtxParserState::new(text, &mut errors);
|
||||
let parser = PtxParser {
|
||||
state,
|
||||
input: &input[..],
|
||||
pub fn parse_for_errors<'input>(text: &'input str) -> Vec<PtxError> {
|
||||
let (tokens, mut errors) = lex_with_span_unchecked(text);
|
||||
let parse_result = {
|
||||
let state = PtxParserState::new(text, &mut errors);
|
||||
let parser = PtxParser {
|
||||
state,
|
||||
input: &tokens[..],
|
||||
};
|
||||
module
|
||||
.parse(parser)
|
||||
.map_err(|err| PtxError::Parser(err.into_inner()))
|
||||
};
|
||||
let parsing_result = module.parse(parser).ok();
|
||||
if !errors.is_empty() {
|
||||
None
|
||||
} else {
|
||||
parsing_result
|
||||
match parse_result {
|
||||
Ok(_) => {}
|
||||
Err(err) => {
|
||||
errors.push(err);
|
||||
}
|
||||
}
|
||||
errors
|
||||
}
|
||||
|
||||
fn lex_with_span_unchecked<'input>(
|
||||
text: &'input str,
|
||||
) -> (Vec<(Token<'input>, logos::Span)>, Vec<PtxError>) {
|
||||
let lexer = Token::lexer(text);
|
||||
let mut result = Vec::new();
|
||||
let mut errors = Vec::new();
|
||||
for (token, span) in lexer.spanned() {
|
||||
match token {
|
||||
Ok(t) => result.push((t, span)),
|
||||
Err(err) => errors.push(PtxError::Lexer { source: err }),
|
||||
}
|
||||
}
|
||||
(result, errors)
|
||||
}
|
||||
|
||||
pub fn parse_module_checked<'input>(
|
||||
|
@ -342,17 +362,6 @@ pub fn parse_module_checked<'input>(
|
|||
}
|
||||
}
|
||||
|
||||
fn lex_with_span<'input>(
|
||||
text: &'input str,
|
||||
) -> Result<Vec<(Token<'input>, logos::Span)>, TokenError> {
|
||||
let lexer = Token::lexer(text);
|
||||
let mut result = Vec::new();
|
||||
for (token, span) in lexer.spanned() {
|
||||
result.push((token?, span));
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn module<'a, 'input>(stream: &mut PtxParser<'a, 'input>) -> PResult<ast::Module<'input>> {
|
||||
(
|
||||
version,
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
[package]
|
||||
name = "spirv_tools-sys"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2018"
|
||||
|
||||
[lib]
|
||||
|
||||
[build-dependencies]
|
||||
cmake = "0.1"
|
|
@ -1 +0,0 @@
|
|||
bindgen --whitelist-type="spv.*" --whitelist-function="spv.*" --size_t-is-usize --default-enum-style=rust --bitfield-enum="spv_text_to_binary_options_t|spv_binary_to_text_options_t" ../ext/SPIRV-Tools/include/spirv-tools/libspirv.h -o src/spirv_tools.rs
|
|
@ -1,28 +0,0 @@
|
|||
extern crate cmake;
|
||||
|
||||
use cmake::Config;
|
||||
use std::{env::VarError, path::PathBuf};
|
||||
|
||||
fn main() -> Result<(), VarError> {
|
||||
let root_path = std::env::var("CARGO_MANIFEST_DIR")?;
|
||||
let mut headers_path = PathBuf::new();
|
||||
headers_path.push(root_path);
|
||||
headers_path.push("../ext/spirv-headers");
|
||||
let spirv_tools_dir = Config::new("../ext/spirv-tools")
|
||||
.always_configure(false)
|
||||
.define("SPIRV-Headers_SOURCE_DIR", headers_path)
|
||||
.define("SPIRV_SKIP_EXECUTABLES", "ON")
|
||||
.define("SPIRV_SKIP_TESTS", "ON")
|
||||
.build();
|
||||
println!(
|
||||
"cargo:rustc-link-search=native={}/bin",
|
||||
spirv_tools_dir.display()
|
||||
);
|
||||
println!(
|
||||
"cargo:rustc-link-search=native={}/lib",
|
||||
spirv_tools_dir.display()
|
||||
);
|
||||
// dynamic linking to avoid linking to C++ runtime
|
||||
println!("cargo:rustc-link-lib=dylib=SPIRV-Tools-shared");
|
||||
Ok(())
|
||||
}
|
|
@ -1,3 +0,0 @@
|
|||
#[allow(warnings)]
|
||||
mod spirv_tools;
|
||||
pub use spirv_tools::*;
|
|
@ -1,972 +0,0 @@
|
|||
/* automatically generated by rust-bindgen 0.54.1 */
|
||||
|
||||
pub type __uint16_t = ::std::os::raw::c_ushort;
|
||||
pub type __uint32_t = ::std::os::raw::c_uint;
|
||||
#[repr(i32)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum spv_result_t {
|
||||
SPV_SUCCESS = 0,
|
||||
SPV_UNSUPPORTED = 1,
|
||||
SPV_END_OF_STREAM = 2,
|
||||
SPV_WARNING = 3,
|
||||
SPV_FAILED_MATCH = 4,
|
||||
SPV_REQUESTED_TERMINATION = 5,
|
||||
SPV_ERROR_INTERNAL = -1,
|
||||
SPV_ERROR_OUT_OF_MEMORY = -2,
|
||||
SPV_ERROR_INVALID_POINTER = -3,
|
||||
SPV_ERROR_INVALID_BINARY = -4,
|
||||
SPV_ERROR_INVALID_TEXT = -5,
|
||||
SPV_ERROR_INVALID_TABLE = -6,
|
||||
SPV_ERROR_INVALID_VALUE = -7,
|
||||
SPV_ERROR_INVALID_DIAGNOSTIC = -8,
|
||||
SPV_ERROR_INVALID_LOOKUP = -9,
|
||||
SPV_ERROR_INVALID_ID = -10,
|
||||
SPV_ERROR_INVALID_CFG = -11,
|
||||
SPV_ERROR_INVALID_LAYOUT = -12,
|
||||
SPV_ERROR_INVALID_CAPABILITY = -13,
|
||||
SPV_ERROR_INVALID_DATA = -14,
|
||||
SPV_ERROR_MISSING_EXTENSION = -15,
|
||||
SPV_ERROR_WRONG_VERSION = -16,
|
||||
_spv_result_t = 2147483647,
|
||||
}
|
||||
#[repr(u32)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum spv_message_level_t {
|
||||
SPV_MSG_FATAL = 0,
|
||||
SPV_MSG_INTERNAL_ERROR = 1,
|
||||
SPV_MSG_ERROR = 2,
|
||||
SPV_MSG_WARNING = 3,
|
||||
SPV_MSG_INFO = 4,
|
||||
SPV_MSG_DEBUG = 5,
|
||||
}
|
||||
#[repr(u32)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum spv_endianness_t {
|
||||
SPV_ENDIANNESS_LITTLE = 0,
|
||||
SPV_ENDIANNESS_BIG = 1,
|
||||
_spv_endianness_t = 2147483647,
|
||||
}
|
||||
impl spv_operand_type_t {
|
||||
pub const SPV_OPERAND_TYPE_FIRST_OPTIONAL_TYPE: spv_operand_type_t =
|
||||
spv_operand_type_t::SPV_OPERAND_TYPE_OPTIONAL_ID;
|
||||
}
|
||||
impl spv_operand_type_t {
|
||||
pub const SPV_OPERAND_TYPE_FIRST_VARIABLE_TYPE: spv_operand_type_t =
|
||||
spv_operand_type_t::SPV_OPERAND_TYPE_VARIABLE_ID;
|
||||
}
|
||||
impl spv_operand_type_t {
|
||||
pub const SPV_OPERAND_TYPE_LAST_VARIABLE_TYPE: spv_operand_type_t =
|
||||
spv_operand_type_t::SPV_OPERAND_TYPE_VARIABLE_ID_LITERAL_INTEGER;
|
||||
}
|
||||
impl spv_operand_type_t {
|
||||
pub const SPV_OPERAND_TYPE_LAST_OPTIONAL_TYPE: spv_operand_type_t =
|
||||
spv_operand_type_t::SPV_OPERAND_TYPE_VARIABLE_ID_LITERAL_INTEGER;
|
||||
}
|
||||
#[repr(u32)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum spv_operand_type_t {
|
||||
SPV_OPERAND_TYPE_NONE = 0,
|
||||
SPV_OPERAND_TYPE_ID = 1,
|
||||
SPV_OPERAND_TYPE_TYPE_ID = 2,
|
||||
SPV_OPERAND_TYPE_RESULT_ID = 3,
|
||||
SPV_OPERAND_TYPE_MEMORY_SEMANTICS_ID = 4,
|
||||
SPV_OPERAND_TYPE_SCOPE_ID = 5,
|
||||
SPV_OPERAND_TYPE_LITERAL_INTEGER = 6,
|
||||
SPV_OPERAND_TYPE_EXTENSION_INSTRUCTION_NUMBER = 7,
|
||||
SPV_OPERAND_TYPE_SPEC_CONSTANT_OP_NUMBER = 8,
|
||||
SPV_OPERAND_TYPE_TYPED_LITERAL_NUMBER = 9,
|
||||
SPV_OPERAND_TYPE_LITERAL_STRING = 10,
|
||||
SPV_OPERAND_TYPE_SOURCE_LANGUAGE = 11,
|
||||
SPV_OPERAND_TYPE_EXECUTION_MODEL = 12,
|
||||
SPV_OPERAND_TYPE_ADDRESSING_MODEL = 13,
|
||||
SPV_OPERAND_TYPE_MEMORY_MODEL = 14,
|
||||
SPV_OPERAND_TYPE_EXECUTION_MODE = 15,
|
||||
SPV_OPERAND_TYPE_STORAGE_CLASS = 16,
|
||||
SPV_OPERAND_TYPE_DIMENSIONALITY = 17,
|
||||
SPV_OPERAND_TYPE_SAMPLER_ADDRESSING_MODE = 18,
|
||||
SPV_OPERAND_TYPE_SAMPLER_FILTER_MODE = 19,
|
||||
SPV_OPERAND_TYPE_SAMPLER_IMAGE_FORMAT = 20,
|
||||
SPV_OPERAND_TYPE_IMAGE_CHANNEL_ORDER = 21,
|
||||
SPV_OPERAND_TYPE_IMAGE_CHANNEL_DATA_TYPE = 22,
|
||||
SPV_OPERAND_TYPE_FP_ROUNDING_MODE = 23,
|
||||
SPV_OPERAND_TYPE_LINKAGE_TYPE = 24,
|
||||
SPV_OPERAND_TYPE_ACCESS_QUALIFIER = 25,
|
||||
SPV_OPERAND_TYPE_FUNCTION_PARAMETER_ATTRIBUTE = 26,
|
||||
SPV_OPERAND_TYPE_DECORATION = 27,
|
||||
SPV_OPERAND_TYPE_BUILT_IN = 28,
|
||||
SPV_OPERAND_TYPE_GROUP_OPERATION = 29,
|
||||
SPV_OPERAND_TYPE_KERNEL_ENQ_FLAGS = 30,
|
||||
SPV_OPERAND_TYPE_KERNEL_PROFILING_INFO = 31,
|
||||
SPV_OPERAND_TYPE_CAPABILITY = 32,
|
||||
SPV_OPERAND_TYPE_RAY_FLAGS = 33,
|
||||
SPV_OPERAND_TYPE_RAY_QUERY_INTERSECTION = 34,
|
||||
SPV_OPERAND_TYPE_RAY_QUERY_COMMITTED_INTERSECTION_TYPE = 35,
|
||||
SPV_OPERAND_TYPE_RAY_QUERY_CANDIDATE_INTERSECTION_TYPE = 36,
|
||||
SPV_OPERAND_TYPE_IMAGE = 37,
|
||||
SPV_OPERAND_TYPE_FP_FAST_MATH_MODE = 38,
|
||||
SPV_OPERAND_TYPE_SELECTION_CONTROL = 39,
|
||||
SPV_OPERAND_TYPE_LOOP_CONTROL = 40,
|
||||
SPV_OPERAND_TYPE_FUNCTION_CONTROL = 41,
|
||||
SPV_OPERAND_TYPE_MEMORY_ACCESS = 42,
|
||||
SPV_OPERAND_TYPE_OPTIONAL_ID = 43,
|
||||
SPV_OPERAND_TYPE_OPTIONAL_IMAGE = 44,
|
||||
SPV_OPERAND_TYPE_OPTIONAL_MEMORY_ACCESS = 45,
|
||||
SPV_OPERAND_TYPE_OPTIONAL_LITERAL_INTEGER = 46,
|
||||
SPV_OPERAND_TYPE_OPTIONAL_LITERAL_NUMBER = 47,
|
||||
SPV_OPERAND_TYPE_OPTIONAL_TYPED_LITERAL_INTEGER = 48,
|
||||
SPV_OPERAND_TYPE_OPTIONAL_LITERAL_STRING = 49,
|
||||
SPV_OPERAND_TYPE_OPTIONAL_ACCESS_QUALIFIER = 50,
|
||||
SPV_OPERAND_TYPE_OPTIONAL_CIV = 51,
|
||||
SPV_OPERAND_TYPE_VARIABLE_ID = 52,
|
||||
SPV_OPERAND_TYPE_VARIABLE_LITERAL_INTEGER = 53,
|
||||
SPV_OPERAND_TYPE_VARIABLE_LITERAL_INTEGER_ID = 54,
|
||||
SPV_OPERAND_TYPE_VARIABLE_ID_LITERAL_INTEGER = 55,
|
||||
SPV_OPERAND_TYPE_DEBUG_INFO_FLAGS = 56,
|
||||
SPV_OPERAND_TYPE_DEBUG_BASE_TYPE_ATTRIBUTE_ENCODING = 57,
|
||||
SPV_OPERAND_TYPE_DEBUG_COMPOSITE_TYPE = 58,
|
||||
SPV_OPERAND_TYPE_DEBUG_TYPE_QUALIFIER = 59,
|
||||
SPV_OPERAND_TYPE_DEBUG_OPERATION = 60,
|
||||
SPV_OPERAND_TYPE_CLDEBUG100_DEBUG_INFO_FLAGS = 61,
|
||||
SPV_OPERAND_TYPE_CLDEBUG100_DEBUG_BASE_TYPE_ATTRIBUTE_ENCODING = 62,
|
||||
SPV_OPERAND_TYPE_CLDEBUG100_DEBUG_COMPOSITE_TYPE = 63,
|
||||
SPV_OPERAND_TYPE_CLDEBUG100_DEBUG_TYPE_QUALIFIER = 64,
|
||||
SPV_OPERAND_TYPE_CLDEBUG100_DEBUG_OPERATION = 65,
|
||||
SPV_OPERAND_TYPE_CLDEBUG100_DEBUG_IMPORTED_ENTITY = 66,
|
||||
SPV_OPERAND_TYPE_NUM_OPERAND_TYPES = 67,
|
||||
_spv_operand_type_t = 2147483647,
|
||||
}
|
||||
#[repr(u32)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum spv_ext_inst_type_t {
|
||||
SPV_EXT_INST_TYPE_NONE = 0,
|
||||
SPV_EXT_INST_TYPE_GLSL_STD_450 = 1,
|
||||
SPV_EXT_INST_TYPE_OPENCL_STD = 2,
|
||||
SPV_EXT_INST_TYPE_SPV_AMD_SHADER_EXPLICIT_VERTEX_PARAMETER = 3,
|
||||
SPV_EXT_INST_TYPE_SPV_AMD_SHADER_TRINARY_MINMAX = 4,
|
||||
SPV_EXT_INST_TYPE_SPV_AMD_GCN_SHADER = 5,
|
||||
SPV_EXT_INST_TYPE_SPV_AMD_SHADER_BALLOT = 6,
|
||||
SPV_EXT_INST_TYPE_DEBUGINFO = 7,
|
||||
SPV_EXT_INST_TYPE_OPENCL_DEBUGINFO_100 = 8,
|
||||
SPV_EXT_INST_TYPE_NONSEMANTIC_UNKNOWN = 9,
|
||||
_spv_ext_inst_type_t = 2147483647,
|
||||
}
|
||||
#[repr(u32)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum spv_number_kind_t {
|
||||
SPV_NUMBER_NONE = 0,
|
||||
SPV_NUMBER_UNSIGNED_INT = 1,
|
||||
SPV_NUMBER_SIGNED_INT = 2,
|
||||
SPV_NUMBER_FLOATING = 3,
|
||||
}
|
||||
impl spv_text_to_binary_options_t {
|
||||
pub const SPV_TEXT_TO_BINARY_OPTION_NONE: spv_text_to_binary_options_t =
|
||||
spv_text_to_binary_options_t(1);
|
||||
}
|
||||
impl spv_text_to_binary_options_t {
|
||||
pub const SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS: spv_text_to_binary_options_t =
|
||||
spv_text_to_binary_options_t(2);
|
||||
}
|
||||
impl spv_text_to_binary_options_t {
|
||||
pub const _spv_text_to_binary_options_t: spv_text_to_binary_options_t =
|
||||
spv_text_to_binary_options_t(2147483647);
|
||||
}
|
||||
impl ::std::ops::BitOr<spv_text_to_binary_options_t> for spv_text_to_binary_options_t {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn bitor(self, other: Self) -> Self {
|
||||
spv_text_to_binary_options_t(self.0 | other.0)
|
||||
}
|
||||
}
|
||||
impl ::std::ops::BitOrAssign for spv_text_to_binary_options_t {
|
||||
#[inline]
|
||||
fn bitor_assign(&mut self, rhs: spv_text_to_binary_options_t) {
|
||||
self.0 |= rhs.0;
|
||||
}
|
||||
}
|
||||
impl ::std::ops::BitAnd<spv_text_to_binary_options_t> for spv_text_to_binary_options_t {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn bitand(self, other: Self) -> Self {
|
||||
spv_text_to_binary_options_t(self.0 & other.0)
|
||||
}
|
||||
}
|
||||
impl ::std::ops::BitAndAssign for spv_text_to_binary_options_t {
|
||||
#[inline]
|
||||
fn bitand_assign(&mut self, rhs: spv_text_to_binary_options_t) {
|
||||
self.0 &= rhs.0;
|
||||
}
|
||||
}
|
||||
#[repr(transparent)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct spv_text_to_binary_options_t(pub u32);
|
||||
impl spv_binary_to_text_options_t {
|
||||
pub const SPV_BINARY_TO_TEXT_OPTION_NONE: spv_binary_to_text_options_t =
|
||||
spv_binary_to_text_options_t(1);
|
||||
}
|
||||
impl spv_binary_to_text_options_t {
|
||||
pub const SPV_BINARY_TO_TEXT_OPTION_PRINT: spv_binary_to_text_options_t =
|
||||
spv_binary_to_text_options_t(2);
|
||||
}
|
||||
impl spv_binary_to_text_options_t {
|
||||
pub const SPV_BINARY_TO_TEXT_OPTION_COLOR: spv_binary_to_text_options_t =
|
||||
spv_binary_to_text_options_t(4);
|
||||
}
|
||||
impl spv_binary_to_text_options_t {
|
||||
pub const SPV_BINARY_TO_TEXT_OPTION_INDENT: spv_binary_to_text_options_t =
|
||||
spv_binary_to_text_options_t(8);
|
||||
}
|
||||
impl spv_binary_to_text_options_t {
|
||||
pub const SPV_BINARY_TO_TEXT_OPTION_SHOW_BYTE_OFFSET: spv_binary_to_text_options_t =
|
||||
spv_binary_to_text_options_t(16);
|
||||
}
|
||||
impl spv_binary_to_text_options_t {
|
||||
pub const SPV_BINARY_TO_TEXT_OPTION_NO_HEADER: spv_binary_to_text_options_t =
|
||||
spv_binary_to_text_options_t(32);
|
||||
}
|
||||
impl spv_binary_to_text_options_t {
|
||||
pub const SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES: spv_binary_to_text_options_t =
|
||||
spv_binary_to_text_options_t(64);
|
||||
}
|
||||
impl spv_binary_to_text_options_t {
|
||||
pub const _spv_binary_to_text_options_t: spv_binary_to_text_options_t =
|
||||
spv_binary_to_text_options_t(2147483647);
|
||||
}
|
||||
impl ::std::ops::BitOr<spv_binary_to_text_options_t> for spv_binary_to_text_options_t {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn bitor(self, other: Self) -> Self {
|
||||
spv_binary_to_text_options_t(self.0 | other.0)
|
||||
}
|
||||
}
|
||||
impl ::std::ops::BitOrAssign for spv_binary_to_text_options_t {
|
||||
#[inline]
|
||||
fn bitor_assign(&mut self, rhs: spv_binary_to_text_options_t) {
|
||||
self.0 |= rhs.0;
|
||||
}
|
||||
}
|
||||
impl ::std::ops::BitAnd<spv_binary_to_text_options_t> for spv_binary_to_text_options_t {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn bitand(self, other: Self) -> Self {
|
||||
spv_binary_to_text_options_t(self.0 & other.0)
|
||||
}
|
||||
}
|
||||
impl ::std::ops::BitAndAssign for spv_binary_to_text_options_t {
|
||||
#[inline]
|
||||
fn bitand_assign(&mut self, rhs: spv_binary_to_text_options_t) {
|
||||
self.0 &= rhs.0;
|
||||
}
|
||||
}
|
||||
#[repr(transparent)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct spv_binary_to_text_options_t(pub u32);
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct spv_parsed_operand_t {
|
||||
pub offset: u16,
|
||||
pub num_words: u16,
|
||||
pub type_: spv_operand_type_t,
|
||||
pub number_kind: spv_number_kind_t,
|
||||
pub number_bit_width: u32,
|
||||
}
|
||||
#[test]
|
||||
fn bindgen_test_layout_spv_parsed_operand_t() {
|
||||
assert_eq!(
|
||||
::std::mem::size_of::<spv_parsed_operand_t>(),
|
||||
16usize,
|
||||
concat!("Size of: ", stringify!(spv_parsed_operand_t))
|
||||
);
|
||||
assert_eq!(
|
||||
::std::mem::align_of::<spv_parsed_operand_t>(),
|
||||
4usize,
|
||||
concat!("Alignment of ", stringify!(spv_parsed_operand_t))
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe { &(*(::std::ptr::null::<spv_parsed_operand_t>())).offset as *const _ as usize },
|
||||
0usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_parsed_operand_t),
|
||||
"::",
|
||||
stringify!(offset)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe { &(*(::std::ptr::null::<spv_parsed_operand_t>())).num_words as *const _ as usize },
|
||||
2usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_parsed_operand_t),
|
||||
"::",
|
||||
stringify!(num_words)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe { &(*(::std::ptr::null::<spv_parsed_operand_t>())).type_ as *const _ as usize },
|
||||
4usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_parsed_operand_t),
|
||||
"::",
|
||||
stringify!(type_)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe {
|
||||
&(*(::std::ptr::null::<spv_parsed_operand_t>())).number_kind as *const _ as usize
|
||||
},
|
||||
8usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_parsed_operand_t),
|
||||
"::",
|
||||
stringify!(number_kind)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe {
|
||||
&(*(::std::ptr::null::<spv_parsed_operand_t>())).number_bit_width as *const _ as usize
|
||||
},
|
||||
12usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_parsed_operand_t),
|
||||
"::",
|
||||
stringify!(number_bit_width)
|
||||
)
|
||||
);
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct spv_parsed_instruction_t {
|
||||
pub words: *const u32,
|
||||
pub num_words: u16,
|
||||
pub opcode: u16,
|
||||
pub ext_inst_type: spv_ext_inst_type_t,
|
||||
pub type_id: u32,
|
||||
pub result_id: u32,
|
||||
pub operands: *const spv_parsed_operand_t,
|
||||
pub num_operands: u16,
|
||||
}
|
||||
#[test]
|
||||
fn bindgen_test_layout_spv_parsed_instruction_t() {
|
||||
assert_eq!(
|
||||
::std::mem::size_of::<spv_parsed_instruction_t>(),
|
||||
40usize,
|
||||
concat!("Size of: ", stringify!(spv_parsed_instruction_t))
|
||||
);
|
||||
assert_eq!(
|
||||
::std::mem::align_of::<spv_parsed_instruction_t>(),
|
||||
8usize,
|
||||
concat!("Alignment of ", stringify!(spv_parsed_instruction_t))
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe { &(*(::std::ptr::null::<spv_parsed_instruction_t>())).words as *const _ as usize },
|
||||
0usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_parsed_instruction_t),
|
||||
"::",
|
||||
stringify!(words)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe {
|
||||
&(*(::std::ptr::null::<spv_parsed_instruction_t>())).num_words as *const _ as usize
|
||||
},
|
||||
8usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_parsed_instruction_t),
|
||||
"::",
|
||||
stringify!(num_words)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe { &(*(::std::ptr::null::<spv_parsed_instruction_t>())).opcode as *const _ as usize },
|
||||
10usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_parsed_instruction_t),
|
||||
"::",
|
||||
stringify!(opcode)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe {
|
||||
&(*(::std::ptr::null::<spv_parsed_instruction_t>())).ext_inst_type as *const _ as usize
|
||||
},
|
||||
12usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_parsed_instruction_t),
|
||||
"::",
|
||||
stringify!(ext_inst_type)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe {
|
||||
&(*(::std::ptr::null::<spv_parsed_instruction_t>())).type_id as *const _ as usize
|
||||
},
|
||||
16usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_parsed_instruction_t),
|
||||
"::",
|
||||
stringify!(type_id)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe {
|
||||
&(*(::std::ptr::null::<spv_parsed_instruction_t>())).result_id as *const _ as usize
|
||||
},
|
||||
20usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_parsed_instruction_t),
|
||||
"::",
|
||||
stringify!(result_id)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe {
|
||||
&(*(::std::ptr::null::<spv_parsed_instruction_t>())).operands as *const _ as usize
|
||||
},
|
||||
24usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_parsed_instruction_t),
|
||||
"::",
|
||||
stringify!(operands)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe {
|
||||
&(*(::std::ptr::null::<spv_parsed_instruction_t>())).num_operands as *const _ as usize
|
||||
},
|
||||
32usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_parsed_instruction_t),
|
||||
"::",
|
||||
stringify!(num_operands)
|
||||
)
|
||||
);
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct spv_const_binary_t {
|
||||
pub code: *const u32,
|
||||
pub wordCount: usize,
|
||||
}
|
||||
#[test]
|
||||
fn bindgen_test_layout_spv_const_binary_t() {
|
||||
assert_eq!(
|
||||
::std::mem::size_of::<spv_const_binary_t>(),
|
||||
16usize,
|
||||
concat!("Size of: ", stringify!(spv_const_binary_t))
|
||||
);
|
||||
assert_eq!(
|
||||
::std::mem::align_of::<spv_const_binary_t>(),
|
||||
8usize,
|
||||
concat!("Alignment of ", stringify!(spv_const_binary_t))
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe { &(*(::std::ptr::null::<spv_const_binary_t>())).code as *const _ as usize },
|
||||
0usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_const_binary_t),
|
||||
"::",
|
||||
stringify!(code)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe { &(*(::std::ptr::null::<spv_const_binary_t>())).wordCount as *const _ as usize },
|
||||
8usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_const_binary_t),
|
||||
"::",
|
||||
stringify!(wordCount)
|
||||
)
|
||||
);
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct spv_binary_t {
|
||||
pub code: *mut u32,
|
||||
pub wordCount: usize,
|
||||
}
|
||||
#[test]
|
||||
fn bindgen_test_layout_spv_binary_t() {
|
||||
assert_eq!(
|
||||
::std::mem::size_of::<spv_binary_t>(),
|
||||
16usize,
|
||||
concat!("Size of: ", stringify!(spv_binary_t))
|
||||
);
|
||||
assert_eq!(
|
||||
::std::mem::align_of::<spv_binary_t>(),
|
||||
8usize,
|
||||
concat!("Alignment of ", stringify!(spv_binary_t))
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe { &(*(::std::ptr::null::<spv_binary_t>())).code as *const _ as usize },
|
||||
0usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_binary_t),
|
||||
"::",
|
||||
stringify!(code)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe { &(*(::std::ptr::null::<spv_binary_t>())).wordCount as *const _ as usize },
|
||||
8usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_binary_t),
|
||||
"::",
|
||||
stringify!(wordCount)
|
||||
)
|
||||
);
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct spv_text_t {
|
||||
pub str_: *const ::std::os::raw::c_char,
|
||||
pub length: usize,
|
||||
}
|
||||
#[test]
|
||||
fn bindgen_test_layout_spv_text_t() {
|
||||
assert_eq!(
|
||||
::std::mem::size_of::<spv_text_t>(),
|
||||
16usize,
|
||||
concat!("Size of: ", stringify!(spv_text_t))
|
||||
);
|
||||
assert_eq!(
|
||||
::std::mem::align_of::<spv_text_t>(),
|
||||
8usize,
|
||||
concat!("Alignment of ", stringify!(spv_text_t))
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe { &(*(::std::ptr::null::<spv_text_t>())).str_ as *const _ as usize },
|
||||
0usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_text_t),
|
||||
"::",
|
||||
stringify!(str_)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe { &(*(::std::ptr::null::<spv_text_t>())).length as *const _ as usize },
|
||||
8usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_text_t),
|
||||
"::",
|
||||
stringify!(length)
|
||||
)
|
||||
);
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct spv_position_t {
|
||||
pub line: usize,
|
||||
pub column: usize,
|
||||
pub index: usize,
|
||||
}
|
||||
#[test]
|
||||
fn bindgen_test_layout_spv_position_t() {
|
||||
assert_eq!(
|
||||
::std::mem::size_of::<spv_position_t>(),
|
||||
24usize,
|
||||
concat!("Size of: ", stringify!(spv_position_t))
|
||||
);
|
||||
assert_eq!(
|
||||
::std::mem::align_of::<spv_position_t>(),
|
||||
8usize,
|
||||
concat!("Alignment of ", stringify!(spv_position_t))
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe { &(*(::std::ptr::null::<spv_position_t>())).line as *const _ as usize },
|
||||
0usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_position_t),
|
||||
"::",
|
||||
stringify!(line)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe { &(*(::std::ptr::null::<spv_position_t>())).column as *const _ as usize },
|
||||
8usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_position_t),
|
||||
"::",
|
||||
stringify!(column)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe { &(*(::std::ptr::null::<spv_position_t>())).index as *const _ as usize },
|
||||
16usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_position_t),
|
||||
"::",
|
||||
stringify!(index)
|
||||
)
|
||||
);
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct spv_diagnostic_t {
|
||||
pub position: spv_position_t,
|
||||
pub error: *mut ::std::os::raw::c_char,
|
||||
pub isTextSource: bool,
|
||||
}
|
||||
#[test]
|
||||
fn bindgen_test_layout_spv_diagnostic_t() {
|
||||
assert_eq!(
|
||||
::std::mem::size_of::<spv_diagnostic_t>(),
|
||||
40usize,
|
||||
concat!("Size of: ", stringify!(spv_diagnostic_t))
|
||||
);
|
||||
assert_eq!(
|
||||
::std::mem::align_of::<spv_diagnostic_t>(),
|
||||
8usize,
|
||||
concat!("Alignment of ", stringify!(spv_diagnostic_t))
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe { &(*(::std::ptr::null::<spv_diagnostic_t>())).position as *const _ as usize },
|
||||
0usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_diagnostic_t),
|
||||
"::",
|
||||
stringify!(position)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe { &(*(::std::ptr::null::<spv_diagnostic_t>())).error as *const _ as usize },
|
||||
24usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_diagnostic_t),
|
||||
"::",
|
||||
stringify!(error)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
unsafe { &(*(::std::ptr::null::<spv_diagnostic_t>())).isTextSource as *const _ as usize },
|
||||
32usize,
|
||||
concat!(
|
||||
"Offset of field: ",
|
||||
stringify!(spv_diagnostic_t),
|
||||
"::",
|
||||
stringify!(isTextSource)
|
||||
)
|
||||
);
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct spv_context_t {
|
||||
_unused: [u8; 0],
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct spv_validator_options_t {
|
||||
_unused: [u8; 0],
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct spv_optimizer_options_t {
|
||||
_unused: [u8; 0],
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct spv_reducer_options_t {
|
||||
_unused: [u8; 0],
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct spv_fuzzer_options_t {
|
||||
_unused: [u8; 0],
|
||||
}
|
||||
pub type spv_const_binary = *mut spv_const_binary_t;
|
||||
pub type spv_binary = *mut spv_binary_t;
|
||||
pub type spv_text = *mut spv_text_t;
|
||||
pub type spv_position = *mut spv_position_t;
|
||||
pub type spv_diagnostic = *mut spv_diagnostic_t;
|
||||
pub type spv_const_context = *const spv_context_t;
|
||||
pub type spv_context = *mut spv_context_t;
|
||||
pub type spv_validator_options = *mut spv_validator_options_t;
|
||||
pub type spv_const_validator_options = *const spv_validator_options_t;
|
||||
pub type spv_optimizer_options = *mut spv_optimizer_options_t;
|
||||
pub type spv_const_optimizer_options = *const spv_optimizer_options_t;
|
||||
pub type spv_reducer_options = *mut spv_reducer_options_t;
|
||||
pub type spv_const_reducer_options = *const spv_reducer_options_t;
|
||||
pub type spv_fuzzer_options = *mut spv_fuzzer_options_t;
|
||||
pub type spv_const_fuzzer_options = *const spv_fuzzer_options_t;
|
||||
extern "C" {
|
||||
pub fn spvSoftwareVersionString() -> *const ::std::os::raw::c_char;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvSoftwareVersionDetailsString() -> *const ::std::os::raw::c_char;
|
||||
}
|
||||
#[repr(u32)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum spv_target_env {
|
||||
SPV_ENV_UNIVERSAL_1_0 = 0,
|
||||
SPV_ENV_VULKAN_1_0 = 1,
|
||||
SPV_ENV_UNIVERSAL_1_1 = 2,
|
||||
SPV_ENV_OPENCL_2_1 = 3,
|
||||
SPV_ENV_OPENCL_2_2 = 4,
|
||||
SPV_ENV_OPENGL_4_0 = 5,
|
||||
SPV_ENV_OPENGL_4_1 = 6,
|
||||
SPV_ENV_OPENGL_4_2 = 7,
|
||||
SPV_ENV_OPENGL_4_3 = 8,
|
||||
SPV_ENV_OPENGL_4_5 = 9,
|
||||
SPV_ENV_UNIVERSAL_1_2 = 10,
|
||||
SPV_ENV_OPENCL_1_2 = 11,
|
||||
SPV_ENV_OPENCL_EMBEDDED_1_2 = 12,
|
||||
SPV_ENV_OPENCL_2_0 = 13,
|
||||
SPV_ENV_OPENCL_EMBEDDED_2_0 = 14,
|
||||
SPV_ENV_OPENCL_EMBEDDED_2_1 = 15,
|
||||
SPV_ENV_OPENCL_EMBEDDED_2_2 = 16,
|
||||
SPV_ENV_UNIVERSAL_1_3 = 17,
|
||||
SPV_ENV_VULKAN_1_1 = 18,
|
||||
SPV_ENV_WEBGPU_0 = 19,
|
||||
SPV_ENV_UNIVERSAL_1_4 = 20,
|
||||
SPV_ENV_VULKAN_1_1_SPIRV_1_4 = 21,
|
||||
SPV_ENV_UNIVERSAL_1_5 = 22,
|
||||
SPV_ENV_VULKAN_1_2 = 23,
|
||||
}
|
||||
#[repr(u32)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum spv_validator_limit {
|
||||
spv_validator_limit_max_struct_members = 0,
|
||||
spv_validator_limit_max_struct_depth = 1,
|
||||
spv_validator_limit_max_local_variables = 2,
|
||||
spv_validator_limit_max_global_variables = 3,
|
||||
spv_validator_limit_max_switch_branches = 4,
|
||||
spv_validator_limit_max_function_args = 5,
|
||||
spv_validator_limit_max_control_flow_nesting_depth = 6,
|
||||
spv_validator_limit_max_access_chain_indexes = 7,
|
||||
spv_validator_limit_max_id_bound = 8,
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvTargetEnvDescription(env: spv_target_env) -> *const ::std::os::raw::c_char;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvParseTargetEnv(s: *const ::std::os::raw::c_char, env: *mut spv_target_env) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvParseVulkanEnv(vulkan_ver: u32, spirv_ver: u32, env: *mut spv_target_env) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvContextCreate(env: spv_target_env) -> spv_context;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvContextDestroy(context: spv_context);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvValidatorOptionsCreate() -> spv_validator_options;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvValidatorOptionsDestroy(options: spv_validator_options);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvValidatorOptionsSetUniversalLimit(
|
||||
options: spv_validator_options,
|
||||
limit_type: spv_validator_limit,
|
||||
limit: u32,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvValidatorOptionsSetRelaxStoreStruct(options: spv_validator_options, val: bool);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvValidatorOptionsSetRelaxLogicalPointer(options: spv_validator_options, val: bool);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvValidatorOptionsSetBeforeHlslLegalization(options: spv_validator_options, val: bool);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvValidatorOptionsSetRelaxBlockLayout(options: spv_validator_options, val: bool);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvValidatorOptionsSetUniformBufferStandardLayout(
|
||||
options: spv_validator_options,
|
||||
val: bool,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvValidatorOptionsSetScalarBlockLayout(options: spv_validator_options, val: bool);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvValidatorOptionsSetSkipBlockLayout(options: spv_validator_options, val: bool);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvOptimizerOptionsCreate() -> spv_optimizer_options;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvOptimizerOptionsDestroy(options: spv_optimizer_options);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvOptimizerOptionsSetRunValidator(options: spv_optimizer_options, val: bool);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvOptimizerOptionsSetValidatorOptions(
|
||||
options: spv_optimizer_options,
|
||||
val: spv_validator_options,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvOptimizerOptionsSetMaxIdBound(options: spv_optimizer_options, val: u32);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvOptimizerOptionsSetPreserveBindings(options: spv_optimizer_options, val: bool);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvOptimizerOptionsSetPreserveSpecConstants(options: spv_optimizer_options, val: bool);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvReducerOptionsCreate() -> spv_reducer_options;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvReducerOptionsDestroy(options: spv_reducer_options);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvReducerOptionsSetStepLimit(options: spv_reducer_options, step_limit: u32);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvReducerOptionsSetFailOnValidationError(
|
||||
options: spv_reducer_options,
|
||||
fail_on_validation_error: bool,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvFuzzerOptionsCreate() -> spv_fuzzer_options;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvFuzzerOptionsDestroy(options: spv_fuzzer_options);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvFuzzerOptionsEnableReplayValidation(options: spv_fuzzer_options);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvFuzzerOptionsSetRandomSeed(options: spv_fuzzer_options, seed: u32);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvFuzzerOptionsSetShrinkerStepLimit(
|
||||
options: spv_fuzzer_options,
|
||||
shrinker_step_limit: u32,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvFuzzerOptionsEnableFuzzerPassValidation(options: spv_fuzzer_options);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvTextToBinary(
|
||||
context: spv_const_context,
|
||||
text: *const ::std::os::raw::c_char,
|
||||
length: usize,
|
||||
binary: *mut spv_binary,
|
||||
diagnostic: *mut spv_diagnostic,
|
||||
) -> spv_result_t;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvTextToBinaryWithOptions(
|
||||
context: spv_const_context,
|
||||
text: *const ::std::os::raw::c_char,
|
||||
length: usize,
|
||||
options: u32,
|
||||
binary: *mut spv_binary,
|
||||
diagnostic: *mut spv_diagnostic,
|
||||
) -> spv_result_t;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvTextDestroy(text: spv_text);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvBinaryToText(
|
||||
context: spv_const_context,
|
||||
binary: *const u32,
|
||||
word_count: usize,
|
||||
options: u32,
|
||||
text: *mut spv_text,
|
||||
diagnostic: *mut spv_diagnostic,
|
||||
) -> spv_result_t;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvBinaryDestroy(binary: spv_binary);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvValidate(
|
||||
context: spv_const_context,
|
||||
binary: spv_const_binary,
|
||||
diagnostic: *mut spv_diagnostic,
|
||||
) -> spv_result_t;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvValidateWithOptions(
|
||||
context: spv_const_context,
|
||||
options: spv_const_validator_options,
|
||||
binary: spv_const_binary,
|
||||
diagnostic: *mut spv_diagnostic,
|
||||
) -> spv_result_t;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvValidateBinary(
|
||||
context: spv_const_context,
|
||||
words: *const u32,
|
||||
num_words: usize,
|
||||
diagnostic: *mut spv_diagnostic,
|
||||
) -> spv_result_t;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvDiagnosticCreate(
|
||||
position: spv_position,
|
||||
message: *const ::std::os::raw::c_char,
|
||||
) -> spv_diagnostic;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvDiagnosticDestroy(diagnostic: spv_diagnostic);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvDiagnosticPrint(diagnostic: spv_diagnostic) -> spv_result_t;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn spvOpcodeString(opcode: u32) -> *const ::std::os::raw::c_char;
|
||||
}
|
||||
pub type spv_parsed_header_fn_t = ::std::option::Option<
|
||||
unsafe extern "C" fn(
|
||||
user_data: *mut ::std::os::raw::c_void,
|
||||
endian: spv_endianness_t,
|
||||
magic: u32,
|
||||
version: u32,
|
||||
generator: u32,
|
||||
id_bound: u32,
|
||||
reserved: u32,
|
||||
) -> spv_result_t,
|
||||
>;
|
||||
pub type spv_parsed_instruction_fn_t = ::std::option::Option<
|
||||
unsafe extern "C" fn(
|
||||
user_data: *mut ::std::os::raw::c_void,
|
||||
parsed_instruction: *const spv_parsed_instruction_t,
|
||||
) -> spv_result_t,
|
||||
>;
|
||||
extern "C" {
|
||||
pub fn spvBinaryParse(
|
||||
context: spv_const_context,
|
||||
user_data: *mut ::std::os::raw::c_void,
|
||||
words: *const u32,
|
||||
num_words: usize,
|
||||
parse_header: spv_parsed_header_fn_t,
|
||||
parse_instruction: spv_parsed_instruction_fn_t,
|
||||
diagnostic: *mut spv_diagnostic,
|
||||
) -> spv_result_t;
|
||||
}
|
|
@ -2,26 +2,25 @@
|
|||
name = "zluda"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
name = "zluda"
|
||||
name = "nvcuda"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
comgr = { path = "../comgr" }
|
||||
ptx_parser = { path = "../ptx_parser" }
|
||||
ptx = { path = "../ptx" }
|
||||
cuda_types = { path = "../cuda_types" }
|
||||
cuda_base = { path = "../cuda_base" }
|
||||
hip_runtime-sys = { path = "../ext/hip_runtime-sys" }
|
||||
lazy_static = "1.4"
|
||||
num_enum = "0.4"
|
||||
lz4-sys = "1.9"
|
||||
tempfile = "3"
|
||||
paste = "1.0"
|
||||
|
||||
[dependencies.ocl-core]
|
||||
version = "0.11"
|
||||
features = ["opencl_version_1_2", "opencl_version_2_0", "opencl_version_2_1"]
|
||||
rustc-hash = "1.1"
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
winapi = { version = "0.3", features = ["heapapi", "std"] }
|
||||
|
||||
[dev-dependencies]
|
||||
cuda-driver-sys = "0.3.0"
|
|
@ -1,3 +0,0 @@
|
|||
bindgen /usr/local/cuda/include/cuda.h -o cuda.rs --whitelist-function="^cu.*" --size_t-is-usize --default-enum-style=newtype --no-layout-tests --no-doc-comments --no-derive-debug --new-type-alias "^CUdevice$|^CUdeviceptr$"
|
||||
sed -i -e 's/extern "C" {//g' -e 's/-> CUresult;/-> CUresult { impl_::unsupported()/g' -e 's/pub fn /#[no_mangle] pub extern "system" fn /g' cuda.rs
|
||||
rustfmt cuda.rs
|
|
@ -1,20 +0,0 @@
|
|||
use env::VarError;
|
||||
use std::{env, path::PathBuf};
|
||||
|
||||
// HACK ALERT
|
||||
// This is a temporary hack to to make sure that linker does not pick up
|
||||
// NVIDIA OpenCL .lib using paths injected by cl-sys
|
||||
|
||||
fn main() -> Result<(), VarError> {
|
||||
if cfg!(windows) {
|
||||
let env = env::var("CARGO_CFG_TARGET_ENV")?;
|
||||
if env == "msvc" {
|
||||
let mut path = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?);
|
||||
path.push("lib");
|
||||
println!("cargo:rustc-link-search=native={}", path.display());
|
||||
} else {
|
||||
println!("cargo:rustc-link-search=native=C:\\Windows\\System32");
|
||||
};
|
||||
}
|
||||
Ok(())
|
||||
}
|
4720
zluda/src/cuda.rs
4720
zluda/src/cuda.rs
File diff suppressed because it is too large
Load diff
|
@ -1,24 +1,93 @@
|
|||
use std::ptr;
|
||||
use super::{driver, FromCuda, ZludaObject};
|
||||
use cuda_types::*;
|
||||
use hip_runtime_sys::*;
|
||||
use rustc_hash::FxHashSet;
|
||||
use std::{cell::RefCell, ptr, sync::Mutex};
|
||||
|
||||
use crate::cuda::CUlimit;
|
||||
use crate::cuda::CUresult;
|
||||
thread_local! {
|
||||
pub(crate) static CONTEXT_STACK: RefCell<Vec<(CUcontext, hipDevice_t)>> = RefCell::new(Vec::new());
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn get_limit(pvalue: *mut usize, limit: CUlimit) -> CUresult {
|
||||
if pvalue == ptr::null_mut() {
|
||||
return CUresult::CUDA_ERROR_INVALID_VALUE;
|
||||
}
|
||||
if limit == CUlimit::CU_LIMIT_STACK_SIZE {
|
||||
*pvalue = 512; // GTX 1060 reports 1024
|
||||
CUresult::CUDA_SUCCESS
|
||||
} else {
|
||||
CUresult::CUDA_ERROR_NOT_SUPPORTED
|
||||
pub(crate) struct Context {
|
||||
pub(crate) device: hipDevice_t,
|
||||
pub(crate) mutable: Mutex<OwnedByContext>,
|
||||
}
|
||||
|
||||
pub(crate) struct OwnedByContext {
|
||||
pub(crate) ref_count: usize, // only used by primary context
|
||||
pub(crate) _memory: FxHashSet<hipDeviceptr_t>,
|
||||
pub(crate) _streams: FxHashSet<hipStream_t>,
|
||||
pub(crate) _modules: FxHashSet<CUmodule>,
|
||||
}
|
||||
|
||||
impl ZludaObject for Context {
|
||||
const COOKIE: usize = 0x5f867c6d9cb73315;
|
||||
|
||||
type CudaHandle = CUcontext;
|
||||
|
||||
fn drop_checked(&mut self) -> CUresult {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn set_limit(limit: CUlimit, value: usize) -> CUresult {
|
||||
if limit == CUlimit::CU_LIMIT_STACK_SIZE {
|
||||
CUresult::CUDA_SUCCESS
|
||||
} else {
|
||||
CUresult::CUDA_ERROR_NOT_SUPPORTED
|
||||
pub(crate) fn new(device: hipDevice_t) -> Context {
|
||||
Context {
|
||||
device,
|
||||
mutable: Mutex::new(OwnedByContext {
|
||||
ref_count: 0,
|
||||
_memory: FxHashSet::default(),
|
||||
_streams: FxHashSet::default(),
|
||||
_modules: FxHashSet::default(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn get_limit(pvalue: *mut usize, limit: hipLimit_t) -> hipError_t {
|
||||
unsafe { hipDeviceGetLimit(pvalue, limit) }
|
||||
}
|
||||
|
||||
pub(crate) fn set_limit(limit: hipLimit_t, value: usize) -> hipError_t {
|
||||
unsafe { hipDeviceSetLimit(limit, value) }
|
||||
}
|
||||
|
||||
pub(crate) fn synchronize() -> hipError_t {
|
||||
unsafe { hipDeviceSynchronize() }
|
||||
}
|
||||
|
||||
pub(crate) fn get_primary(hip_dev: hipDevice_t) -> Result<(&'static Context, CUcontext), CUerror> {
|
||||
let dev = driver::device(hip_dev)?;
|
||||
Ok(dev.primary_context())
|
||||
}
|
||||
|
||||
pub(crate) fn set_current(raw_ctx: CUcontext) -> CUresult {
|
||||
let new_device = if raw_ctx.0 == ptr::null_mut() {
|
||||
CONTEXT_STACK.with(|stack| {
|
||||
let mut stack = stack.borrow_mut();
|
||||
if let Some((_, old_device)) = stack.pop() {
|
||||
if let Some((_, new_device)) = stack.last() {
|
||||
if old_device != *new_device {
|
||||
return Some(*new_device);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
})
|
||||
} else {
|
||||
let ctx: &Context = FromCuda::from_cuda(&raw_ctx)?;
|
||||
let device = ctx.device;
|
||||
CONTEXT_STACK.with(move |stack| {
|
||||
let mut stack = stack.borrow_mut();
|
||||
let last_device = stack.last().map(|(_, dev)| *dev);
|
||||
stack.push((raw_ctx, device));
|
||||
match last_device {
|
||||
None => Some(device),
|
||||
Some(last_device) if last_device != device => Some(device),
|
||||
_ => None,
|
||||
}
|
||||
})
|
||||
};
|
||||
if let Some(dev) = new_device {
|
||||
unsafe { hipSetDevice(dev)? };
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -1,29 +1,27 @@
|
|||
use super::{transmute_lifetime, transmute_lifetime_mut, CUresult};
|
||||
use crate::{
|
||||
cuda::{self, CUdevice, CUdevprop},
|
||||
hip_call,
|
||||
};
|
||||
use cuda::{CUdevice_attribute, CUuuid_st};
|
||||
use hip_runtime_sys::{
|
||||
hipDeviceAttribute_t, hipDeviceGetAttribute, hipError_t, hipGetDeviceProperties,
|
||||
};
|
||||
use ocl_core::{ClDeviceIdPtr, ContextProperties, DeviceType};
|
||||
use paste::paste;
|
||||
use std::{
|
||||
cmp,
|
||||
collections::HashSet,
|
||||
ffi::c_void,
|
||||
mem,
|
||||
os::raw::{c_char, c_int, c_uint},
|
||||
ptr,
|
||||
sync::atomic::{AtomicU32, Ordering},
|
||||
};
|
||||
use cuda_types::*;
|
||||
use hip_runtime_sys::*;
|
||||
use std::{mem, ptr};
|
||||
|
||||
const PROJECT_URL_SUFFIX_SHORT: &'static str = " [ZLUDA]";
|
||||
const PROJECT_URL_SUFFIX_LONG: &'static str = " [github.com/vosen/ZLUDA]";
|
||||
use super::context;
|
||||
|
||||
const PROJECT_SUFFIX: &[u8] = b" [ZLUDA]\0";
|
||||
pub const COMPUTE_CAPABILITY_MAJOR: i32 = 8;
|
||||
pub const COMPUTE_CAPABILITY_MINOR: i32 = 8;
|
||||
|
||||
pub(crate) fn compute_capability(major: &mut i32, minor: &mut i32, _dev: hipDevice_t) -> CUresult {
|
||||
*major = COMPUTE_CAPABILITY_MAJOR;
|
||||
*minor = COMPUTE_CAPABILITY_MINOR;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get(device: *mut hipDevice_t, ordinal: i32) -> hipError_t {
|
||||
unsafe { hipDeviceGet(device, ordinal) }
|
||||
}
|
||||
|
||||
#[allow(warnings)]
|
||||
trait hipDeviceAttribute_t_ext {
|
||||
trait DeviceAttributeNames {
|
||||
const hipDeviceAttributeGpuOverlap: hipDeviceAttribute_t =
|
||||
hipDeviceAttribute_t::hipDeviceAttributeDeviceOverlap;
|
||||
const hipDeviceAttributeMaximumTexture1DWidth: hipDeviceAttribute_t =
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth;
|
||||
const hipDeviceAttributeMaximumTexture2DWidth: hipDeviceAttribute_t =
|
||||
|
@ -42,307 +40,300 @@ trait hipDeviceAttribute_t_ext {
|
|||
hipDeviceAttribute_t::hipDeviceAttributeMaxThreadsPerMultiProcessor;
|
||||
const hipDeviceAttributeAsyncEngineCount: hipDeviceAttribute_t =
|
||||
hipDeviceAttribute_t::hipDeviceAttributeConcurrentKernels;
|
||||
const hipDeviceAttributePciDomainId: hipDeviceAttribute_t =
|
||||
hipDeviceAttribute_t::hipDeviceAttributePciDomainID;
|
||||
const hipDeviceAttributeMultiGpuBoard: hipDeviceAttribute_t =
|
||||
hipDeviceAttribute_t::hipDeviceAttributeIsMultiGpuBoard;
|
||||
const hipDeviceAttributeMultiGpuBoardGroupId: hipDeviceAttribute_t =
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMultiGpuBoardGroupID;
|
||||
const hipDeviceAttributeMaxSharedMemoryPerBlockOptin: hipDeviceAttribute_t =
|
||||
hipDeviceAttribute_t::hipDeviceAttributeSharedMemPerBlockOptin;
|
||||
}
|
||||
|
||||
impl hipDeviceAttribute_t_ext for hipDeviceAttribute_t {}
|
||||
impl DeviceAttributeNames for hipDeviceAttribute_t {}
|
||||
|
||||
macro_rules! remap_attribute {
|
||||
($attrib:expr => $([ $($word:expr)* ]),*,) => {
|
||||
match $attrib {
|
||||
$(
|
||||
paste! { CUdevice_attribute:: [< CU_DEVICE_ATTRIBUTE $(_ $word:upper)* >] } => {
|
||||
paste! { hipDeviceAttribute_t:: [< hipDeviceAttribute $($word:camel)* >] }
|
||||
paste::paste! { CUdevice_attribute:: [< CU_DEVICE_ATTRIBUTE $(_ $word:upper)* >] } => {
|
||||
paste::paste! { hipDeviceAttribute_t:: [< hipDeviceAttribute $($word:camel)* >] }
|
||||
}
|
||||
)*
|
||||
_ => return hipError_t::hipErrorInvalidValue
|
||||
_ => return Err(hipErrorCode_t::NotSupported)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_attribute(pi: *mut i32, attrib: CUdevice_attribute, dev_idx: c_int) -> hipError_t {
|
||||
if pi == ptr::null_mut() {
|
||||
return hipError_t::hipErrorInvalidValue;
|
||||
}
|
||||
//let mut props = unsafe { mem::zeroed() };
|
||||
let hip_attrib = match attrib {
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT => {
|
||||
unsafe { *pi = 1 };
|
||||
return hipError_t::hipSuccess;
|
||||
pub(crate) fn get_attribute(
|
||||
pi: &mut i32,
|
||||
attrib: CUdevice_attribute,
|
||||
dev_idx: hipDevice_t,
|
||||
) -> hipError_t {
|
||||
match attrib {
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_WARP_SIZE => {
|
||||
*pi = 32;
|
||||
return Ok(());
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED => {
|
||||
unsafe { *pi = 1 };
|
||||
return hipError_t::hipSuccess;
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_TCC_DRIVER
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID => {
|
||||
unsafe { *pi = 0 };
|
||||
return hipError_t::hipSuccess;
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_TCC_DRIVER => {
|
||||
*pi = 0;
|
||||
return Ok(());
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR => {
|
||||
unsafe { *pi = 8 };
|
||||
return hipError_t::hipSuccess;
|
||||
*pi = COMPUTE_CAPABILITY_MAJOR;
|
||||
return Ok(());
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR => {
|
||||
unsafe { *pi = 0 };
|
||||
return hipError_t::hipSuccess;
|
||||
*pi = COMPUTE_CAPABILITY_MINOR;
|
||||
return Ok(());
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
let attrib = remap_attribute! {
|
||||
attrib =>
|
||||
[MAX THREADS PER BLOCK],
|
||||
[MAX BLOCK DIM X],
|
||||
[MAX BLOCK DIM Y],
|
||||
[MAX BLOCK DIM Z],
|
||||
[MAX GRID DIM X],
|
||||
[MAX GRID DIM Y],
|
||||
[MAX GRID DIM Z],
|
||||
[MAX SHARED MEMORY PER BLOCK],
|
||||
[TOTAL CONSTANT MEMORY],
|
||||
//[WARP SIZE],
|
||||
[MAX PITCH],
|
||||
[MAX REGISTERS PER BLOCK],
|
||||
[CLOCK RATE],
|
||||
[TEXTURE ALIGNMENT],
|
||||
[GPU OVERLAP],
|
||||
[MULTIPROCESSOR COUNT],
|
||||
[KERNEL EXEC TIMEOUT],
|
||||
[INTEGRATED],
|
||||
[CAN MAP HOST MEMORY],
|
||||
[COMPUTE MODE],
|
||||
[MAXIMUM TEXTURE1D WIDTH],
|
||||
[MAXIMUM TEXTURE2D WIDTH],
|
||||
[MAXIMUM TEXTURE2D HEIGHT],
|
||||
[MAXIMUM TEXTURE3D WIDTH],
|
||||
[MAXIMUM TEXTURE3D HEIGHT],
|
||||
[MAXIMUM TEXTURE3D DEPTH],
|
||||
//[MAXIMUM TEXTURE2D LAYERED WIDTH],
|
||||
//[MAXIMUM TEXTURE2D LAYERED HEIGHT],
|
||||
//[MAXIMUM TEXTURE2D LAYERED LAYERS],
|
||||
//[MAXIMUM TEXTURE2D ARRAY WIDTH],
|
||||
//[MAXIMUM TEXTURE2D ARRAY HEIGHT],
|
||||
//[MAXIMUM TEXTURE2D ARRAY NUMSLICES],
|
||||
[SURFACE ALIGNMENT],
|
||||
[CONCURRENT KERNELS],
|
||||
[ECC ENABLED],
|
||||
[PCI BUS ID],
|
||||
[PCI DEVICE ID],
|
||||
//[TCC DRIVER],
|
||||
[MEMORY CLOCK RATE],
|
||||
[GLOBAL MEMORY BUS WIDTH],
|
||||
[L2 CACHE SIZE],
|
||||
[MAX THREADS PER MULTIPROCESSOR],
|
||||
[ASYNC ENGINE COUNT],
|
||||
[UNIFIED ADDRESSING],
|
||||
//[MAXIMUM TEXTURE1D LAYERED WIDTH],
|
||||
//[MAXIMUM TEXTURE1D LAYERED LAYERS],
|
||||
//[CAN TEX2D GATHER],
|
||||
//[MAXIMUM TEXTURE2D GATHER WIDTH],
|
||||
//[MAXIMUM TEXTURE2D GATHER HEIGHT],
|
||||
//[MAXIMUM TEXTURE3D WIDTH ALTERNATE],
|
||||
//[MAXIMUM TEXTURE3D HEIGHT ALTERNATE],
|
||||
//[MAXIMUM TEXTURE3D DEPTH ALTERNATE],
|
||||
[PCI DOMAIN ID],
|
||||
[TEXTURE PITCH ALIGNMENT],
|
||||
//[MAXIMUM TEXTURECUBEMAP WIDTH],
|
||||
//[MAXIMUM TEXTURECUBEMAP LAYERED WIDTH],
|
||||
//[MAXIMUM TEXTURECUBEMAP LAYERED LAYERS],
|
||||
//[MAXIMUM SURFACE1D WIDTH],
|
||||
//[MAXIMUM SURFACE2D WIDTH],
|
||||
//[MAXIMUM SURFACE2D HEIGHT],
|
||||
//[MAXIMUM SURFACE3D WIDTH],
|
||||
//[MAXIMUM SURFACE3D HEIGHT],
|
||||
//[MAXIMUM SURFACE3D DEPTH],
|
||||
//[MAXIMUM SURFACE1D LAYERED WIDTH],
|
||||
//[MAXIMUM SURFACE1D LAYERED LAYERS],
|
||||
//[MAXIMUM SURFACE2D LAYERED WIDTH],
|
||||
//[MAXIMUM SURFACE2D LAYERED HEIGHT],
|
||||
//[MAXIMUM SURFACE2D LAYERED LAYERS],
|
||||
//[MAXIMUM SURFACECUBEMAP WIDTH],
|
||||
//[MAXIMUM SURFACECUBEMAP LAYERED WIDTH],
|
||||
//[MAXIMUM SURFACECUBEMAP LAYERED LAYERS],
|
||||
//[MAXIMUM TEXTURE1D LINEAR WIDTH],
|
||||
//[MAXIMUM TEXTURE2D LINEAR WIDTH],
|
||||
//[MAXIMUM TEXTURE2D LINEAR HEIGHT],
|
||||
//[MAXIMUM TEXTURE2D LINEAR PITCH],
|
||||
//[MAXIMUM TEXTURE2D MIPMAPPED WIDTH],
|
||||
//[MAXIMUM TEXTURE2D MIPMAPPED HEIGHT],
|
||||
//[COMPUTE CAPABILITY MAJOR],
|
||||
//[COMPUTE CAPABILITY MINOR],
|
||||
//[MAXIMUM TEXTURE1D MIPMAPPED WIDTH],
|
||||
[STREAM PRIORITIES SUPPORTED],
|
||||
[GLOBAL L1 CACHE SUPPORTED],
|
||||
[LOCAL L1 CACHE SUPPORTED],
|
||||
[MAX SHARED MEMORY PER MULTIPROCESSOR],
|
||||
[MAX REGISTERS PER MULTIPROCESSOR],
|
||||
[MANAGED MEMORY],
|
||||
[MULTI GPU BOARD],
|
||||
[MULTI GPU BOARD GROUP ID],
|
||||
[HOST NATIVE ATOMIC SUPPORTED],
|
||||
[SINGLE TO DOUBLE PRECISION PERF RATIO],
|
||||
[PAGEABLE MEMORY ACCESS],
|
||||
[CONCURRENT MANAGED ACCESS],
|
||||
[COMPUTE PREEMPTION SUPPORTED],
|
||||
[CAN USE HOST POINTER FOR REGISTERED MEM],
|
||||
//[CAN USE STREAM MEM OPS],
|
||||
[COOPERATIVE LAUNCH],
|
||||
[COOPERATIVE MULTI DEVICE LAUNCH],
|
||||
[MAX SHARED MEMORY PER BLOCK OPTIN],
|
||||
//[CAN FLUSH REMOTE WRITES],
|
||||
[HOST REGISTER SUPPORTED],
|
||||
[PAGEABLE MEMORY ACCESS USES HOST PAGE TABLES],
|
||||
[DIRECT MANAGED MEM ACCESS FROM HOST],
|
||||
//[VIRTUAL ADDRESS MANAGEMENT SUPPORTED],
|
||||
[VIRTUAL MEMORY MANAGEMENT SUPPORTED],
|
||||
//[HANDLE TYPE POSIX FILE DESCRIPTOR SUPPORTED],
|
||||
//[HANDLE TYPE WIN32 HANDLE SUPPORTED],
|
||||
//[HANDLE TYPE WIN32 KMT HANDLE SUPPORTED],
|
||||
//[MAX BLOCKS PER MULTIPROCESSOR],
|
||||
//[GENERIC COMPRESSION SUPPORTED],
|
||||
//[MAX PERSISTING L2 CACHE SIZE],
|
||||
//[MAX ACCESS POLICY WINDOW SIZE],
|
||||
//[GPU DIRECT RDMA WITH CUDA VMM SUPPORTED],
|
||||
//[RESERVED SHARED MEMORY PER BLOCK],
|
||||
//[SPARSE CUDA ARRAY SUPPORTED],
|
||||
//[READ ONLY HOST REGISTER SUPPORTED],
|
||||
//[TIMELINE SEMAPHORE INTEROP SUPPORTED],
|
||||
[MEMORY POOLS SUPPORTED],
|
||||
//[GPU DIRECT RDMA SUPPORTED],
|
||||
//[GPU DIRECT RDMA FLUSH WRITES OPTIONS],
|
||||
//[GPU DIRECT RDMA WRITES ORDERING],
|
||||
//[MEMPOOL SUPPORTED HANDLE TYPES],
|
||||
//[CLUSTER LAUNCH],
|
||||
//[DEFERRED MAPPING CUDA ARRAY SUPPORTED],
|
||||
//[CAN USE 64 BIT STREAM MEM OPS],
|
||||
//[CAN USE STREAM WAIT VALUE NOR],
|
||||
//[DMA BUF SUPPORTED],
|
||||
//[IPC EVENT SUPPORTED],
|
||||
//[MEM SYNC DOMAIN COUNT],
|
||||
//[TENSOR MAP ACCESS SUPPORTED],
|
||||
//[HANDLE TYPE FABRIC SUPPORTED],
|
||||
//[UNIFIED FUNCTION POINTERS],
|
||||
//[NUMA CONFIG],
|
||||
//[NUMA ID],
|
||||
//[MULTICAST SUPPORTED],
|
||||
//[MPS ENABLED],
|
||||
//[HOST NUMA ID],
|
||||
};
|
||||
unsafe { hipDeviceGetAttribute(pi, attrib, dev_idx) }
|
||||
}
|
||||
|
||||
pub(crate) fn get_uuid(uuid: *mut hipUUID, device: hipDevice_t) -> hipError_t {
|
||||
unsafe { hipDeviceGetUuid(uuid, device) }
|
||||
}
|
||||
|
||||
pub(crate) fn get_uuid_v2(uuid: *mut hipUUID, device: hipDevice_t) -> hipError_t {
|
||||
get_uuid(uuid, device)
|
||||
}
|
||||
|
||||
pub(crate) fn get_luid(
|
||||
luid: *mut ::core::ffi::c_char,
|
||||
device_node_mask: &mut ::core::ffi::c_uint,
|
||||
dev: hipDevice_t,
|
||||
) -> hipError_t {
|
||||
let luid = unsafe {
|
||||
luid.cast::<[i8; 8]>()
|
||||
.as_mut()
|
||||
.ok_or(hipErrorCode_t::InvalidValue)
|
||||
}?;
|
||||
let mut properties = unsafe { mem::zeroed() };
|
||||
unsafe { hipGetDevicePropertiesR0600(&mut properties, dev) }?;
|
||||
*luid = properties.luid;
|
||||
*device_node_mask = properties.luidDeviceNodeMask;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_name(
|
||||
name: *mut ::core::ffi::c_char,
|
||||
len: ::core::ffi::c_int,
|
||||
dev: hipDevice_t,
|
||||
) -> cuda_types::CUresult {
|
||||
unsafe { hipDeviceGetName(name, len, dev) }?;
|
||||
let len = len as usize;
|
||||
let buffer = unsafe { std::slice::from_raw_parts(name, len) };
|
||||
let first_zero = buffer.iter().position(|c| *c == 0);
|
||||
let first_zero = if let Some(x) = first_zero {
|
||||
x
|
||||
} else {
|
||||
return Ok(());
|
||||
};
|
||||
if (first_zero + PROJECT_SUFFIX.len()) > len {
|
||||
return Ok(());
|
||||
}
|
||||
unsafe {
|
||||
ptr::copy_nonoverlapping(
|
||||
PROJECT_SUFFIX.as_ptr() as _,
|
||||
name.add(first_zero),
|
||||
PROJECT_SUFFIX.len(),
|
||||
)
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn total_mem_v2(bytes: *mut usize, dev: hipDevice_t) -> hipError_t {
|
||||
unsafe { hipDeviceTotalMem(bytes, dev) }
|
||||
}
|
||||
|
||||
pub(crate) fn get_properties(prop: &mut cuda_types::CUdevprop, dev: hipDevice_t) -> hipError_t {
|
||||
let mut hip_props = unsafe { mem::zeroed() };
|
||||
unsafe { hipGetDevicePropertiesR0600(&mut hip_props, dev) }?;
|
||||
prop.maxThreadsPerBlock = hip_props.maxThreadsPerBlock;
|
||||
prop.maxThreadsDim = hip_props.maxThreadsDim;
|
||||
prop.maxGridSize = hip_props.maxGridSize;
|
||||
prop.totalConstantMemory = clamp_usize(hip_props.totalConstMem);
|
||||
prop.SIMDWidth = 32;
|
||||
prop.memPitch = clamp_usize(hip_props.memPitch);
|
||||
prop.regsPerBlock = hip_props.regsPerBlock;
|
||||
prop.clockRate = hip_props.clockRate;
|
||||
prop.textureAlign = clamp_usize(hip_props.textureAlignment);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_count(count: &mut ::core::ffi::c_int) -> hipError_t {
|
||||
unsafe { hipGetDeviceCount(count) }
|
||||
}
|
||||
|
||||
fn clamp_usize(x: usize) -> i32 {
|
||||
usize::min(x, i32::MAX as usize) as i32
|
||||
}
|
||||
|
||||
pub(crate) fn primary_context_retain(
|
||||
pctx: &mut CUcontext,
|
||||
hip_dev: hipDevice_t,
|
||||
) -> Result<(), CUerror> {
|
||||
let (ctx, raw_ctx) = context::get_primary(hip_dev)?;
|
||||
{
|
||||
let mut mutable_ctx = ctx.mutable.lock().map_err(|_| CUerror::UNKNOWN)?;
|
||||
mutable_ctx.ref_count += 1;
|
||||
}
|
||||
*pctx = raw_ctx;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn primary_context_release(hip_dev: hipDevice_t) -> Result<(), CUerror> {
|
||||
let (ctx, _) = context::get_primary(hip_dev)?;
|
||||
{
|
||||
let mut mutable_ctx = ctx.mutable.lock().map_err(|_| CUerror::UNKNOWN)?;
|
||||
if mutable_ctx.ref_count == 0 {
|
||||
return Err(CUerror::INVALID_CONTEXT);
|
||||
}
|
||||
mutable_ctx.ref_count -= 1;
|
||||
if mutable_ctx.ref_count == 0 {
|
||||
// TODO: drop all children
|
||||
}
|
||||
// we assume that arrayed texts have the same limits
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
|
||||
}
|
||||
// we treat surface the same as texture
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeTextureAlignment
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DWidth
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DHeight
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DDepth
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
|
||||
}
|
||||
// Totally made up
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS => {
|
||||
unsafe { *pi = u16::MAX as i32 };
|
||||
return hipError_t::hipSuccess;
|
||||
}
|
||||
// linear sizes
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH => {
|
||||
let mut prop = unsafe { mem::zeroed() };
|
||||
let err = unsafe { hipGetDeviceProperties(&mut prop, dev_idx) };
|
||||
if err != hipError_t::hipSuccess {
|
||||
return err;
|
||||
}
|
||||
unsafe { *pi = prop.maxTexture1DLinear };
|
||||
return hipError_t::hipSuccess;
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID => {
|
||||
let mut prop = unsafe { mem::zeroed() };
|
||||
let err = unsafe { hipGetDeviceProperties(&mut prop, dev_idx) };
|
||||
if err != hipError_t::hipSuccess {
|
||||
return err;
|
||||
}
|
||||
unsafe { *pi = prop.pciDomainID };
|
||||
return hipError_t::hipSuccess;
|
||||
}
|
||||
attrib => remap_attribute! {
|
||||
attrib =>
|
||||
[MAX THREADS PER BLOCK],
|
||||
[MAX BLOCK DIM X],
|
||||
[MAX BLOCK DIM Y],
|
||||
[MAX BLOCK DIM Z],
|
||||
[MAX GRID DIM X],
|
||||
[MAX GRID DIM Y],
|
||||
[MAX GRID DIM Z],
|
||||
[MAX SHARED MEMORY PER BLOCK],
|
||||
[TOTAL CONSTANT MEMORY],
|
||||
[WARP SIZE],
|
||||
[MAX PITCH],
|
||||
[MAX REGISTERS PER BLOCK],
|
||||
[CLOCK RATE],
|
||||
[TEXTURE ALIGNMENT],
|
||||
//[GPU OVERLAP],
|
||||
[MULTIPROCESSOR COUNT],
|
||||
[KERNEL EXEC TIMEOUT],
|
||||
[INTEGRATED],
|
||||
[CAN MAP HOST MEMORY],
|
||||
[COMPUTE MODE],
|
||||
[MAXIMUM TEXTURE1D WIDTH],
|
||||
[MAXIMUM TEXTURE2D WIDTH],
|
||||
[MAXIMUM TEXTURE2D HEIGHT],
|
||||
[MAXIMUM TEXTURE3D WIDTH],
|
||||
[MAXIMUM TEXTURE3D HEIGHT],
|
||||
[MAXIMUM TEXTURE3D DEPTH],
|
||||
//[MAXIMUM TEXTURE2D LAYERED WIDTH],
|
||||
//[MAXIMUM TEXTURE2D LAYERED HEIGHT],
|
||||
//[MAXIMUM TEXTURE2D LAYERED LAYERS],
|
||||
//[MAXIMUM TEXTURE2D ARRAY WIDTH],
|
||||
//[MAXIMUM TEXTURE2D ARRAY HEIGHT],
|
||||
//[MAXIMUM TEXTURE2D ARRAY NUMSLICES],
|
||||
//[SURFACE ALIGNMENT],
|
||||
[CONCURRENT KERNELS],
|
||||
[ECC ENABLED],
|
||||
[PCI BUS ID],
|
||||
[PCI DEVICE ID],
|
||||
//[TCC DRIVER],
|
||||
[MEMORY CLOCK RATE],
|
||||
[GLOBAL MEMORY BUS WIDTH],
|
||||
[L2 CACHE SIZE],
|
||||
[MAX THREADS PER MULTIPROCESSOR],
|
||||
[ASYNC ENGINE COUNT],
|
||||
//[UNIFIED ADDRESSING],
|
||||
//[MAXIMUM TEXTURE1D LAYERED WIDTH],
|
||||
//[MAXIMUM TEXTURE1D LAYERED LAYERS],
|
||||
//[CAN TEX2D GATHER],
|
||||
//[MAXIMUM TEXTURE2D GATHER WIDTH],
|
||||
//[MAXIMUM TEXTURE2D GATHER HEIGHT],
|
||||
//[MAXIMUM TEXTURE3D WIDTH ALTERNATE],
|
||||
//[MAXIMUM TEXTURE3D HEIGHT ALTERNATE],
|
||||
//[MAXIMUM TEXTURE3D DEPTH ALTERNATE],
|
||||
//[PCI DOMAIN ID],
|
||||
[TEXTURE PITCH ALIGNMENT],
|
||||
//[MAXIMUM TEXTURECUBEMAP WIDTH],
|
||||
//[MAXIMUM TEXTURECUBEMAP LAYERED WIDTH],
|
||||
//[MAXIMUM TEXTURECUBEMAP LAYERED LAYERS],
|
||||
//[MAXIMUM SURFACE1D WIDTH],
|
||||
//[MAXIMUM SURFACE2D WIDTH],
|
||||
//[MAXIMUM SURFACE2D HEIGHT],
|
||||
//[MAXIMUM SURFACE3D WIDTH],
|
||||
//[MAXIMUM SURFACE3D HEIGHT],
|
||||
//[MAXIMUM SURFACE3D DEPTH],
|
||||
//[MAXIMUM SURFACE1D LAYERED WIDTH],
|
||||
//[MAXIMUM SURFACE1D LAYERED LAYERS],
|
||||
//[MAXIMUM SURFACE2D LAYERED WIDTH],
|
||||
//[MAXIMUM SURFACE2D LAYERED HEIGHT],
|
||||
//[MAXIMUM SURFACE2D LAYERED LAYERS],
|
||||
//[MAXIMUM SURFACECUBEMAP WIDTH],
|
||||
//[MAXIMUM SURFACECUBEMAP LAYERED WIDTH],
|
||||
//[MAXIMUM SURFACECUBEMAP LAYERED LAYERS],
|
||||
//[MAXIMUM TEXTURE1D LINEAR WIDTH],
|
||||
//[MAXIMUM TEXTURE2D LINEAR WIDTH],
|
||||
//[MAXIMUM TEXTURE2D LINEAR HEIGHT],
|
||||
//[MAXIMUM TEXTURE2D LINEAR PITCH],
|
||||
//[MAXIMUM TEXTURE2D MIPMAPPED WIDTH],
|
||||
//[MAXIMUM TEXTURE2D MIPMAPPED HEIGHT],
|
||||
//[COMPUTE CAPABILITY MAJOR],
|
||||
//[COMPUTE CAPABILITY MINOR],
|
||||
//[MAXIMUM TEXTURE1D MIPMAPPED WIDTH],
|
||||
//[STREAM PRIORITIES SUPPORTED],
|
||||
//[GLOBAL L1 CACHE SUPPORTED],
|
||||
//[LOCAL L1 CACHE SUPPORTED],
|
||||
[MAX SHARED MEMORY PER MULTIPROCESSOR],
|
||||
//[MAX REGISTERS PER MULTIPROCESSOR],
|
||||
[MANAGED MEMORY],
|
||||
//[MULTI GPU BOARD],
|
||||
//[MULTI GPU BOARD GROUP ID],
|
||||
//[HOST NATIVE ATOMIC SUPPORTED],
|
||||
//[SINGLE TO DOUBLE PRECISION PERF RATIO],
|
||||
[PAGEABLE MEMORY ACCESS],
|
||||
[CONCURRENT MANAGED ACCESS],
|
||||
//[COMPUTE PREEMPTION SUPPORTED],
|
||||
//[CAN USE HOST POINTER FOR REGISTERED MEM],
|
||||
//[CAN USE STREAM MEM OPS],
|
||||
//[CAN USE 64 BIT STREAM MEM OPS],
|
||||
//[CAN USE STREAM WAIT VALUE NOR],
|
||||
[COOPERATIVE LAUNCH],
|
||||
[COOPERATIVE MULTI DEVICE LAUNCH],
|
||||
//[MAX SHARED MEMORY PER BLOCK OPTIN],
|
||||
//[CAN FLUSH REMOTE WRITES],
|
||||
//[HOST REGISTER SUPPORTED],
|
||||
[PAGEABLE MEMORY ACCESS USES HOST PAGE TABLES],
|
||||
[DIRECT MANAGED MEM ACCESS FROM HOST],
|
||||
//[VIRTUAL ADDRESS MANAGEMENT SUPPORTED],
|
||||
//[VIRTUAL MEMORY MANAGEMENT SUPPORTED],
|
||||
//[HANDLE TYPE POSIX FILE DESCRIPTOR SUPPORTED],
|
||||
//[HANDLE TYPE WIN32 HANDLE SUPPORTED],
|
||||
//[HANDLE TYPE WIN32 KMT HANDLE SUPPORTED],
|
||||
//[MAX BLOCKS PER MULTIPROCESSOR],
|
||||
//[GENERIC COMPRESSION SUPPORTED],
|
||||
//[MAX PERSISTING L2 CACHE SIZE],
|
||||
//[MAX ACCESS POLICY WINDOW SIZE],
|
||||
//[GPU DIRECT RDMA WITH CUDA VMM SUPPORTED],
|
||||
//[RESERVED SHARED MEMORY PER BLOCK],
|
||||
//[SPARSE CUDA ARRAY SUPPORTED],
|
||||
//[READ ONLY HOST REGISTER SUPPORTED],
|
||||
//[TIMELINE SEMAPHORE INTEROP SUPPORTED],
|
||||
//[MEMORY POOLS SUPPORTED],
|
||||
},
|
||||
};
|
||||
unsafe { hipDeviceGetAttribute(pi, hip_attrib, dev_idx) }
|
||||
}
|
||||
|
||||
pub fn get_uuid(uuid: *mut CUuuid_st, _dev_idx: c_int) -> Result<(), CUresult> {
|
||||
unsafe {
|
||||
*uuid = CUuuid_st {
|
||||
bytes: mem::zeroed(),
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// TODO: add support if Level 0 exposes it
|
||||
pub fn get_luid(
|
||||
luid: *mut c_char,
|
||||
dev_node_mask: *mut c_uint,
|
||||
_dev_idx: c_int,
|
||||
) -> Result<(), CUresult> {
|
||||
unsafe { ptr::write_bytes(luid, 0u8, 8) };
|
||||
unsafe { *dev_node_mask = 0 };
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn get_properties(prop: *mut CUdevprop, dev: CUdevice) -> Result<(), hipError_t> {
|
||||
if prop == ptr::null_mut() {
|
||||
return Err(hipError_t::hipErrorInvalidValue);
|
||||
}
|
||||
let mut hip_props = mem::zeroed();
|
||||
hip_call! { hipGetDeviceProperties(&mut hip_props, dev.0) };
|
||||
(*prop).maxThreadsPerBlock = hip_props.maxThreadsPerBlock;
|
||||
(*prop).maxThreadsDim = hip_props.maxThreadsDim;
|
||||
(*prop).maxGridSize = hip_props.maxGridSize;
|
||||
(*prop).totalConstantMemory = usize::min(hip_props.totalConstMem, i32::MAX as usize) as i32;
|
||||
(*prop).SIMDWidth = hip_props.warpSize;
|
||||
(*prop).memPitch = usize::min(hip_props.memPitch, i32::MAX as usize) as i32;
|
||||
(*prop).regsPerBlock = hip_props.regsPerBlock;
|
||||
(*prop).clockRate = hip_props.clockRate;
|
||||
(*prop).textureAlign = usize::min(hip_props.textureAlignment, i32::MAX as usize) as i32;
|
||||
Ok(())
|
||||
}
|
||||
|
|
79
zluda/src/impl/driver.rs
Normal file
79
zluda/src/impl/driver.rs
Normal file
|
@ -0,0 +1,79 @@
|
|||
use cuda_types::*;
|
||||
use hip_runtime_sys::*;
|
||||
use std::{
|
||||
ffi::{CStr, CString},
|
||||
mem, slice,
|
||||
sync::OnceLock,
|
||||
};
|
||||
|
||||
use crate::r#impl::context;
|
||||
|
||||
use super::LiveCheck;
|
||||
|
||||
pub(crate) struct GlobalState {
|
||||
pub devices: Vec<Device>,
|
||||
}
|
||||
|
||||
pub(crate) struct Device {
|
||||
pub(crate) _comgr_isa: CString,
|
||||
primary_context: LiveCheck<context::Context>,
|
||||
}
|
||||
|
||||
impl Device {
|
||||
pub(crate) fn primary_context<'a>(&'a self) -> (&'a context::Context, CUcontext) {
|
||||
unsafe {
|
||||
(
|
||||
self.primary_context.data.assume_init_ref(),
|
||||
self.primary_context.as_handle(),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn device(dev: i32) -> Result<&'static Device, CUerror> {
|
||||
global_state()?
|
||||
.devices
|
||||
.get(dev as usize)
|
||||
.ok_or(CUerror::INVALID_DEVICE)
|
||||
}
|
||||
|
||||
pub(crate) fn global_state() -> Result<&'static GlobalState, CUerror> {
|
||||
static GLOBAL_STATE: OnceLock<Result<GlobalState, CUerror>> = OnceLock::new();
|
||||
fn cast_slice<'a>(bytes: &'a [i8]) -> &'a [u8] {
|
||||
unsafe { slice::from_raw_parts(bytes.as_ptr().cast(), bytes.len()) }
|
||||
}
|
||||
GLOBAL_STATE
|
||||
.get_or_init(|| {
|
||||
let mut device_count = 0;
|
||||
unsafe { hipGetDeviceCount(&mut device_count) }?;
|
||||
Ok(GlobalState {
|
||||
devices: (0..device_count)
|
||||
.map(|i| {
|
||||
let mut props = unsafe { mem::zeroed() };
|
||||
unsafe { hipGetDevicePropertiesR0600(&mut props, i) }?;
|
||||
Ok::<_, CUerror>(Device {
|
||||
_comgr_isa: CStr::from_bytes_until_nul(cast_slice(
|
||||
&props.gcnArchName[..],
|
||||
))
|
||||
.map_err(|_| CUerror::UNKNOWN)?
|
||||
.to_owned(),
|
||||
primary_context: LiveCheck::new(context::new(i)),
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?,
|
||||
})
|
||||
})
|
||||
.as_ref()
|
||||
.map_err(|e| *e)
|
||||
}
|
||||
|
||||
pub(crate) fn init(flags: ::core::ffi::c_uint) -> CUresult {
|
||||
unsafe { hipInit(flags) }?;
|
||||
global_state()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_version(version: &mut ::core::ffi::c_int) -> CUresult {
|
||||
*version = cuda_types::CUDA_VERSION as i32;
|
||||
Ok(())
|
||||
}
|
|
@ -1,26 +1,46 @@
|
|||
use hip_runtime_sys::{hipError_t, hipFuncAttribute, hipFuncGetAttribute, hipFuncGetAttributes, hipFunction_attribute, hipLaunchKernel, hipModuleLaunchKernel};
|
||||
|
||||
use super::{CUresult, HasLivenessCookie, LiveCheck};
|
||||
use crate::cuda::{CUfunction, CUfunction_attribute, CUstream};
|
||||
use ::std::os::raw::{c_uint, c_void};
|
||||
use std::{mem, ptr};
|
||||
use hip_runtime_sys::*;
|
||||
|
||||
pub(crate) fn get_attribute(
|
||||
pi: *mut i32,
|
||||
cu_attrib: CUfunction_attribute,
|
||||
func: CUfunction,
|
||||
pi: &mut i32,
|
||||
cu_attrib: hipFunction_attribute,
|
||||
func: hipFunction_t,
|
||||
) -> hipError_t {
|
||||
if pi == ptr::null_mut() || func == ptr::null_mut() {
|
||||
return hipError_t::hipErrorInvalidValue;
|
||||
// TODO: implement HIP_FUNC_ATTRIBUTE_PTX_VERSION
|
||||
// TODO: implement HIP_FUNC_ATTRIBUTE_BINARY_VERSION
|
||||
unsafe { hipFuncGetAttribute(pi, cu_attrib, func) }?;
|
||||
if cu_attrib == hipFunction_attribute::HIP_FUNC_ATTRIBUTE_NUM_REGS {
|
||||
*pi = (*pi).max(1);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn launch_kernel(
|
||||
f: hipFunction_t,
|
||||
grid_dim_x: ::core::ffi::c_uint,
|
||||
grid_dim_y: ::core::ffi::c_uint,
|
||||
grid_dim_z: ::core::ffi::c_uint,
|
||||
block_dim_x: ::core::ffi::c_uint,
|
||||
block_dim_y: ::core::ffi::c_uint,
|
||||
block_dim_z: ::core::ffi::c_uint,
|
||||
shared_mem_bytes: ::core::ffi::c_uint,
|
||||
stream: hipStream_t,
|
||||
kernel_params: *mut *mut ::core::ffi::c_void,
|
||||
extra: *mut *mut ::core::ffi::c_void,
|
||||
) -> hipError_t {
|
||||
// TODO: fix constants in extra
|
||||
unsafe {
|
||||
hipModuleLaunchKernel(
|
||||
f,
|
||||
grid_dim_x,
|
||||
grid_dim_y,
|
||||
grid_dim_z,
|
||||
block_dim_x,
|
||||
block_dim_y,
|
||||
block_dim_z,
|
||||
shared_mem_bytes,
|
||||
stream,
|
||||
kernel_params,
|
||||
extra,
|
||||
)
|
||||
}
|
||||
let attrib = match cu_attrib {
|
||||
CUfunction_attribute::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK => {
|
||||
hipFunction_attribute::HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
|
||||
}
|
||||
CUfunction_attribute::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES => {
|
||||
hipFunction_attribute::HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES
|
||||
}
|
||||
_ => return hipError_t::hipErrorInvalidValue,
|
||||
};
|
||||
unsafe { hipFuncGetAttribute(pi, attrib, func as _) }
|
||||
}
|
||||
|
|
|
@ -1,86 +0,0 @@
|
|||
use std::{
|
||||
ffi::{c_void, CStr},
|
||||
mem, ptr, slice,
|
||||
};
|
||||
|
||||
use hip_runtime_sys::{hipCtxGetDevice, hipError_t, hipGetDeviceProperties};
|
||||
|
||||
use crate::{
|
||||
cuda::{CUjitInputType, CUjit_option, CUlinkState, CUresult},
|
||||
hip_call,
|
||||
};
|
||||
|
||||
use super::module::{self, SpirvModule};
|
||||
|
||||
struct LinkState {
|
||||
modules: Vec<SpirvModule>,
|
||||
result: Option<Vec<u8>>,
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn create(
|
||||
num_options: u32,
|
||||
options: *mut CUjit_option,
|
||||
option_values: *mut *mut c_void,
|
||||
state_out: *mut CUlinkState,
|
||||
) -> CUresult {
|
||||
if state_out == ptr::null_mut() {
|
||||
return CUresult::CUDA_ERROR_INVALID_VALUE;
|
||||
}
|
||||
let state = Box::new(LinkState {
|
||||
modules: Vec::new(),
|
||||
result: None,
|
||||
});
|
||||
*state_out = mem::transmute(state);
|
||||
CUresult::CUDA_SUCCESS
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn add_data(
|
||||
state: CUlinkState,
|
||||
type_: CUjitInputType,
|
||||
data: *mut c_void,
|
||||
size: usize,
|
||||
name: *const i8,
|
||||
num_options: u32,
|
||||
options: *mut CUjit_option,
|
||||
option_values: *mut *mut c_void,
|
||||
) -> Result<(), hipError_t> {
|
||||
if state == ptr::null_mut() {
|
||||
return Err(hipError_t::hipErrorInvalidValue);
|
||||
}
|
||||
let state: *mut LinkState = mem::transmute(state);
|
||||
let state = &mut *state;
|
||||
// V-RAY specific hack
|
||||
if state.modules.len() == 2 {
|
||||
return Err(hipError_t::hipSuccess);
|
||||
}
|
||||
let spirv_data = SpirvModule::new_raw(data as *const _)?;
|
||||
state.modules.push(spirv_data);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn complete(
|
||||
state: CUlinkState,
|
||||
cubin_out: *mut *mut c_void,
|
||||
size_out: *mut usize,
|
||||
) -> Result<(), hipError_t> {
|
||||
let mut dev = 0;
|
||||
hip_call! { hipCtxGetDevice(&mut dev) };
|
||||
let mut props = unsafe { mem::zeroed() };
|
||||
hip_call! { hipGetDeviceProperties(&mut props, dev) };
|
||||
let state: &mut LinkState = mem::transmute(state);
|
||||
let spirv_bins = state.modules.iter().map(|m| &m.binaries[..]);
|
||||
let should_link_ptx_impl = state.modules.iter().find_map(|m| m.should_link_ptx_impl);
|
||||
let mut arch_binary = module::compile_amd(&props, spirv_bins, should_link_ptx_impl)
|
||||
.map_err(|_| hipError_t::hipErrorUnknown)?;
|
||||
let ptr = arch_binary.as_mut_ptr();
|
||||
let size = arch_binary.len();
|
||||
state.result = Some(arch_binary);
|
||||
*cubin_out = ptr as _;
|
||||
*size_out = size;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn destroy(state: CUlinkState) -> CUresult {
|
||||
let state: Box<LinkState> = mem::transmute(state);
|
||||
CUresult::CUDA_SUCCESS
|
||||
}
|
|
@ -1,55 +1,35 @@
|
|||
use hip_runtime_sys::{
|
||||
hipDrvMemcpy3D, hipError_t, hipMemcpy3D, hipMemcpy3DParms, hipMemoryType, hipPitchedPtr,
|
||||
hipPos, HIP_MEMCPY3D,
|
||||
};
|
||||
use std::ptr;
|
||||
use hip_runtime_sys::*;
|
||||
|
||||
use crate::{
|
||||
cuda::{CUDA_MEMCPY3D_st, CUdeviceptr, CUmemorytype, CUresult},
|
||||
hip_call,
|
||||
};
|
||||
|
||||
// TODO change HIP impl to 64 bits
|
||||
pub(crate) unsafe fn copy_3d(cu_copy: *const CUDA_MEMCPY3D_st) -> Result<(), hipError_t> {
|
||||
if cu_copy == ptr::null() {
|
||||
return Err(hipError_t::hipErrorInvalidValue);
|
||||
}
|
||||
let cu_copy = *cu_copy;
|
||||
let hip_copy = HIP_MEMCPY3D {
|
||||
srcXInBytes: cu_copy.srcXInBytes as u32,
|
||||
srcY: cu_copy.srcY as u32,
|
||||
srcZ: cu_copy.srcZ as u32,
|
||||
srcLOD: cu_copy.srcLOD as u32,
|
||||
srcMemoryType: memory_type(cu_copy.srcMemoryType)?,
|
||||
srcHost: cu_copy.srcHost,
|
||||
srcDevice: cu_copy.srcDevice.0 as _,
|
||||
srcArray: cu_copy.srcArray as _,
|
||||
srcPitch: cu_copy.srcPitch as u32,
|
||||
srcHeight: cu_copy.srcHeight as u32,
|
||||
dstXInBytes: cu_copy.dstXInBytes as u32,
|
||||
dstY: cu_copy.dstY as u32,
|
||||
dstZ: cu_copy.dstZ as u32,
|
||||
dstLOD: cu_copy.dstLOD as u32,
|
||||
dstMemoryType: memory_type(cu_copy.dstMemoryType)?,
|
||||
dstHost: cu_copy.dstHost,
|
||||
dstDevice: cu_copy.dstDevice.0 as _,
|
||||
dstArray: cu_copy.dstArray as _,
|
||||
dstPitch: cu_copy.dstPitch as u32,
|
||||
dstHeight: cu_copy.dstHeight as u32,
|
||||
WidthInBytes: cu_copy.WidthInBytes as u32,
|
||||
Height: cu_copy.Height as u32,
|
||||
Depth: cu_copy.Depth as u32,
|
||||
};
|
||||
hip_call! { hipDrvMemcpy3D(&hip_copy) };
|
||||
Ok(())
|
||||
pub(crate) fn alloc_v2(dptr: *mut hipDeviceptr_t, bytesize: usize) -> hipError_t {
|
||||
unsafe { hipMalloc(dptr.cast(), bytesize) }?;
|
||||
// TODO: parametrize for non-Geekbench
|
||||
unsafe { hipMemsetD8(*dptr, 0, bytesize) }
|
||||
}
|
||||
|
||||
pub(crate) fn memory_type(cu: CUmemorytype) -> Result<hipMemoryType, hipError_t> {
|
||||
match cu {
|
||||
CUmemorytype::CU_MEMORYTYPE_HOST => Ok(hipMemoryType::hipMemoryTypeHost),
|
||||
CUmemorytype::CU_MEMORYTYPE_DEVICE => Ok(hipMemoryType::hipMemoryTypeDevice),
|
||||
CUmemorytype::CU_MEMORYTYPE_ARRAY => Ok(hipMemoryType::hipMemoryTypeArray),
|
||||
CUmemorytype::CU_MEMORYTYPE_UNIFIED => Ok(hipMemoryType::hipMemoryTypeUnified),
|
||||
_ => Err(hipError_t::hipErrorInvalidValue),
|
||||
}
|
||||
pub(crate) fn free_v2(dptr: hipDeviceptr_t) -> hipError_t {
|
||||
unsafe { hipFree(dptr.0) }
|
||||
}
|
||||
|
||||
pub(crate) fn copy_dto_h_v2(
|
||||
dst_host: *mut ::core::ffi::c_void,
|
||||
src_device: hipDeviceptr_t,
|
||||
byte_count: usize,
|
||||
) -> hipError_t {
|
||||
unsafe { hipMemcpyDtoH(dst_host, src_device, byte_count) }
|
||||
}
|
||||
|
||||
pub(crate) fn copy_hto_d_v2(
|
||||
dst_device: hipDeviceptr_t,
|
||||
src_host: *const ::core::ffi::c_void,
|
||||
byte_count: usize,
|
||||
) -> hipError_t {
|
||||
unsafe { hipMemcpyHtoD(dst_device, src_host.cast_mut(), byte_count) }
|
||||
}
|
||||
|
||||
pub(crate) fn get_address_range_v2(
|
||||
pbase: *mut hipDeviceptr_t,
|
||||
psize: *mut usize,
|
||||
dptr: hipDeviceptr_t,
|
||||
) -> hipError_t {
|
||||
unsafe { hipMemGetAddressRange(pbase, psize, dptr) }
|
||||
}
|
||||
|
|
|
@ -1,230 +1,209 @@
|
|||
use hip_runtime_sys::hipError_t;
|
||||
use cuda_types::*;
|
||||
use hip_runtime_sys::*;
|
||||
use std::mem::{self, ManuallyDrop, MaybeUninit};
|
||||
|
||||
use crate::cuda::{CUctx_st, CUdevice, CUdeviceptr, CUfunc_st, CUmod_st, CUresult, CUstream_st};
|
||||
use std::{
|
||||
ffi::c_void,
|
||||
mem::{self, ManuallyDrop},
|
||||
os::raw::c_int,
|
||||
ptr,
|
||||
sync::Mutex,
|
||||
sync::TryLockError,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
pub mod test;
|
||||
pub mod device;
|
||||
pub mod export_table;
|
||||
pub mod function;
|
||||
#[cfg_attr(windows, path = "os_win.rs")]
|
||||
#[cfg_attr(not(windows), path = "os_unix.rs")]
|
||||
pub(crate) mod os;
|
||||
pub(crate) mod module;
|
||||
pub(crate) mod context;
|
||||
pub(crate) mod memory;
|
||||
pub(crate) mod link;
|
||||
pub(crate) mod pointer;
|
||||
pub(super) mod context;
|
||||
pub(super) mod device;
|
||||
pub(super) mod driver;
|
||||
pub(super) mod function;
|
||||
pub(super) mod memory;
|
||||
pub(super) mod module;
|
||||
pub(super) mod pointer;
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
pub fn unimplemented() -> CUresult {
|
||||
pub(crate) fn unimplemented() -> CUresult {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
pub fn unimplemented() -> CUresult {
|
||||
CUresult::CUDA_ERROR_NOT_SUPPORTED
|
||||
pub(crate) fn unimplemented() -> CUresult {
|
||||
CUresult::ERROR_NOT_SUPPORTED
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! hip_call {
|
||||
($expr:expr) => {
|
||||
#[allow(unused_unsafe)]
|
||||
{
|
||||
let err = unsafe { $expr };
|
||||
if err != hip_runtime_sys::hipError_t::hipSuccess {
|
||||
return Result::Err(err);
|
||||
pub(crate) trait FromCuda<'a, T>: Sized {
|
||||
fn from_cuda(t: &'a T) -> Result<Self, CUerror>;
|
||||
}
|
||||
|
||||
macro_rules! from_cuda_nop {
|
||||
($($type_:ty),*) => {
|
||||
$(
|
||||
impl<'a> FromCuda<'a, $type_> for $type_ {
|
||||
fn from_cuda(x: &'a $type_) -> Result<Self, CUerror> {
|
||||
Ok(*x)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FromCuda<'a, *mut $type_> for &'a mut $type_ {
|
||||
fn from_cuda(x: &'a *mut $type_) -> Result<Self, CUerror> {
|
||||
match unsafe { x.as_mut() } {
|
||||
Some(x) => Ok(x),
|
||||
None => Err(CUerror::INVALID_VALUE),
|
||||
}
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
pub trait HasLivenessCookie: Sized {
|
||||
macro_rules! from_cuda_transmute {
|
||||
($($from:ty => $to:ty),*) => {
|
||||
$(
|
||||
impl<'a> FromCuda<'a, $from> for $to {
|
||||
fn from_cuda(x: &'a $from) -> Result<Self, CUerror> {
|
||||
Ok(unsafe { std::mem::transmute(*x) })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FromCuda<'a, *mut $from> for &'a mut $to {
|
||||
fn from_cuda(x: &'a *mut $from) -> Result<Self, CUerror> {
|
||||
match unsafe { x.cast::<$to>().as_mut() } {
|
||||
Some(x) => Ok(x),
|
||||
None => Err(CUerror::INVALID_VALUE),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FromCuda<'a, *mut $from> for * mut $to {
|
||||
fn from_cuda(x: &'a *mut $from) -> Result<Self, CUerror> {
|
||||
Ok(x.cast::<$to>())
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! from_cuda_object {
|
||||
($($type_:ty),*) => {
|
||||
$(
|
||||
impl<'a> FromCuda<'a, <$type_ as ZludaObject>::CudaHandle> for <$type_ as ZludaObject>::CudaHandle {
|
||||
fn from_cuda(handle: &'a <$type_ as ZludaObject>::CudaHandle) -> Result<<$type_ as ZludaObject>::CudaHandle, CUerror> {
|
||||
Ok(*handle)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FromCuda<'a, *mut <$type_ as ZludaObject>::CudaHandle> for &'a mut <$type_ as ZludaObject>::CudaHandle {
|
||||
fn from_cuda(handle: &'a *mut <$type_ as ZludaObject>::CudaHandle) -> Result<&'a mut <$type_ as ZludaObject>::CudaHandle, CUerror> {
|
||||
match unsafe { handle.as_mut() } {
|
||||
Some(x) => Ok(x),
|
||||
None => Err(CUerror::INVALID_VALUE),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FromCuda<'a, <$type_ as ZludaObject>::CudaHandle> for &'a $type_ {
|
||||
fn from_cuda(handle: &'a <$type_ as ZludaObject>::CudaHandle) -> Result<&'a $type_, CUerror> {
|
||||
Ok(as_ref(handle).as_result()?)
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
from_cuda_nop!(
|
||||
*mut i8,
|
||||
*mut i32,
|
||||
*mut usize,
|
||||
*const ::core::ffi::c_void,
|
||||
*const ::core::ffi::c_char,
|
||||
*mut ::core::ffi::c_void,
|
||||
*mut *mut ::core::ffi::c_void,
|
||||
i32,
|
||||
u32,
|
||||
usize,
|
||||
cuda_types::CUdevprop,
|
||||
CUdevice_attribute
|
||||
);
|
||||
from_cuda_transmute!(
|
||||
CUuuid => hipUUID,
|
||||
CUfunction => hipFunction_t,
|
||||
CUfunction_attribute => hipFunction_attribute,
|
||||
CUstream => hipStream_t,
|
||||
CUpointer_attribute => hipPointer_attribute,
|
||||
CUdeviceptr_v2 => hipDeviceptr_t
|
||||
);
|
||||
from_cuda_object!(module::Module, context::Context);
|
||||
|
||||
impl<'a> FromCuda<'a, CUlimit> for hipLimit_t {
|
||||
fn from_cuda(limit: &'a CUlimit) -> Result<Self, CUerror> {
|
||||
Ok(match *limit {
|
||||
CUlimit::CU_LIMIT_STACK_SIZE => hipLimit_t::hipLimitStackSize,
|
||||
CUlimit::CU_LIMIT_PRINTF_FIFO_SIZE => hipLimit_t::hipLimitPrintfFifoSize,
|
||||
CUlimit::CU_LIMIT_MALLOC_HEAP_SIZE => hipLimit_t::hipLimitMallocHeapSize,
|
||||
_ => return Err(CUerror::NOT_SUPPORTED),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) trait ZludaObject: Sized + Send + Sync {
|
||||
const COOKIE: usize;
|
||||
const LIVENESS_FAIL: CUresult;
|
||||
const LIVENESS_FAIL: CUerror = cuda_types::CUerror::INVALID_VALUE;
|
||||
|
||||
fn try_drop(&mut self) -> Result<(), CUresult>;
|
||||
type CudaHandle: Sized;
|
||||
|
||||
fn drop_checked(&mut self) -> CUresult;
|
||||
|
||||
fn wrap(self) -> Self::CudaHandle {
|
||||
unsafe { mem::transmute_copy(&LiveCheck::wrap(self)) }
|
||||
}
|
||||
}
|
||||
|
||||
// This struct is a best-effort check if wrapped value has been dropped,
|
||||
// while it's inherently safe, its use coming from FFI is very unsafe
|
||||
#[repr(C)]
|
||||
pub struct LiveCheck<T: HasLivenessCookie> {
|
||||
pub(crate) struct LiveCheck<T: ZludaObject> {
|
||||
cookie: usize,
|
||||
data: ManuallyDrop<T>,
|
||||
data: MaybeUninit<T>,
|
||||
}
|
||||
|
||||
impl<T: HasLivenessCookie> LiveCheck<T> {
|
||||
pub fn new(data: T) -> Self {
|
||||
impl<T: ZludaObject> LiveCheck<T> {
|
||||
fn new(data: T) -> Self {
|
||||
LiveCheck {
|
||||
cookie: T::COOKIE,
|
||||
data: ManuallyDrop::new(data),
|
||||
data: MaybeUninit::new(data),
|
||||
}
|
||||
}
|
||||
|
||||
fn destroy_impl(this: *mut Self) -> Result<(), CUresult> {
|
||||
let mut ctx_box = ManuallyDrop::new(unsafe { Box::from_raw(this) });
|
||||
ctx_box.try_drop()?;
|
||||
unsafe { ManuallyDrop::drop(&mut ctx_box) };
|
||||
Ok(())
|
||||
fn as_handle(&self) -> T::CudaHandle {
|
||||
unsafe { mem::transmute_copy(&self) }
|
||||
}
|
||||
|
||||
unsafe fn ptr_from_inner(this: *mut T) -> *mut Self {
|
||||
let outer_ptr = (this as *mut u8).sub(mem::size_of::<usize>());
|
||||
outer_ptr as *mut Self
|
||||
fn wrap(data: T) -> *mut Self {
|
||||
Box::into_raw(Box::new(Self::new(data)))
|
||||
}
|
||||
|
||||
pub unsafe fn as_ref_unchecked(&self) -> &T {
|
||||
&self.data
|
||||
}
|
||||
|
||||
pub fn as_option_mut(&mut self) -> Option<&mut T> {
|
||||
fn as_result(&self) -> Result<&T, CUerror> {
|
||||
if self.cookie == T::COOKIE {
|
||||
Some(&mut self.data)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_result(&self) -> Result<&T, CUresult> {
|
||||
if self.cookie == T::COOKIE {
|
||||
Ok(&self.data)
|
||||
} else {
|
||||
Err(T::LIVENESS_FAIL)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_result_mut(&mut self) -> Result<&mut T, CUresult> {
|
||||
if self.cookie == T::COOKIE {
|
||||
Ok(&mut self.data)
|
||||
Ok(unsafe { self.data.assume_init_ref() })
|
||||
} else {
|
||||
Err(T::LIVENESS_FAIL)
|
||||
}
|
||||
}
|
||||
|
||||
// This looks like nonsense, but it's not. There are two cases:
|
||||
// Err(CUerror) -> meaning that the object is invalid, this pointer does not point into valid memory
|
||||
// Ok(maybe_error) -> meaning that the object is valid, we dropped everything, but there *might*
|
||||
// an error in the underlying runtime that we want to propagate
|
||||
#[must_use]
|
||||
pub fn try_drop(&mut self) -> Result<(), CUresult> {
|
||||
fn drop_checked(&mut self) -> Result<Result<(), CUerror>, CUerror> {
|
||||
if self.cookie == T::COOKIE {
|
||||
self.cookie = 0;
|
||||
self.data.try_drop()?;
|
||||
unsafe { ManuallyDrop::drop(&mut self.data) };
|
||||
return Ok(());
|
||||
}
|
||||
Err(T::LIVENESS_FAIL)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: HasLivenessCookie> Drop for LiveCheck<T> {
|
||||
fn drop(&mut self) {
|
||||
self.cookie = 0;
|
||||
}
|
||||
}
|
||||
|
||||
pub trait CudaRepr: Sized {
|
||||
type Impl: Sized;
|
||||
}
|
||||
|
||||
impl<T: CudaRepr> CudaRepr for *mut T {
|
||||
type Impl = *mut T::Impl;
|
||||
}
|
||||
|
||||
pub trait Decuda<To> {
|
||||
fn decuda(self: Self) -> To;
|
||||
}
|
||||
|
||||
impl<T: CudaRepr> Decuda<*mut T::Impl> for *mut T {
|
||||
fn decuda(self: Self) -> *mut T::Impl {
|
||||
self as *mut _
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<TryLockError<T>> for CUresult {
|
||||
fn from(_: TryLockError<T>) -> Self {
|
||||
CUresult::CUDA_ERROR_ILLEGAL_STATE
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ocl_core::Error> for CUresult {
|
||||
fn from(result: ocl_core::Error) -> Self {
|
||||
match result {
|
||||
_ => CUresult::CUDA_ERROR_UNKNOWN,
|
||||
let result = unsafe { self.data.assume_init_mut().drop_checked() };
|
||||
unsafe { MaybeUninit::assume_init_drop(&mut self.data) };
|
||||
Ok(result)
|
||||
} else {
|
||||
Err(T::LIVENESS_FAIL)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<hip_runtime_sys::hipError_t> for CUresult {
|
||||
fn from(result: hip_runtime_sys::hipError_t) -> Self {
|
||||
match result {
|
||||
hip_runtime_sys::hipError_t::hipErrorRuntimeMemory
|
||||
| hip_runtime_sys::hipError_t::hipErrorRuntimeOther => CUresult::CUDA_ERROR_UNKNOWN,
|
||||
hip_runtime_sys::hipError_t(e) => CUresult(e),
|
||||
}
|
||||
}
|
||||
pub fn as_ref<'a, T: ZludaObject>(
|
||||
handle: &'a T::CudaHandle,
|
||||
) -> &'a ManuallyDrop<Box<LiveCheck<T>>> {
|
||||
unsafe { mem::transmute(handle) }
|
||||
}
|
||||
|
||||
pub trait Encuda {
|
||||
type To: Sized;
|
||||
fn encuda(self: Self) -> Self::To;
|
||||
}
|
||||
|
||||
impl Encuda for CUresult {
|
||||
type To = CUresult;
|
||||
fn encuda(self: Self) -> Self::To {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Encuda for () {
|
||||
type To = CUresult;
|
||||
fn encuda(self: Self) -> Self::To {
|
||||
CUresult::CUDA_SUCCESS
|
||||
}
|
||||
}
|
||||
|
||||
impl<T1: Encuda<To = CUresult>, T2: Encuda<To = CUresult>> Encuda for Result<T1, T2> {
|
||||
type To = CUresult;
|
||||
fn encuda(self: Self) -> Self::To {
|
||||
match self {
|
||||
Ok(e) => e.encuda(),
|
||||
Err(e) => e.encuda(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Encuda for hipError_t {
|
||||
type To = CUresult;
|
||||
fn encuda(self: Self) -> Self::To {
|
||||
self.into()
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn transmute_lifetime<'a, 'b, T: ?Sized>(t: &'a T) -> &'b T {
|
||||
mem::transmute(t)
|
||||
}
|
||||
|
||||
unsafe fn transmute_lifetime_mut<'a, 'b, T: ?Sized>(t: &'a mut T) -> &'b mut T {
|
||||
mem::transmute(t)
|
||||
}
|
||||
|
||||
pub fn driver_get_version() -> c_int {
|
||||
i32::max_value()
|
||||
}
|
||||
|
||||
impl<'a> CudaRepr for CUdeviceptr {
|
||||
type Impl = *mut c_void;
|
||||
}
|
||||
|
||||
impl Decuda<*mut c_void> for CUdeviceptr {
|
||||
fn decuda(self) -> *mut c_void {
|
||||
self.0 as *mut _
|
||||
}
|
||||
pub fn drop_checked<T: ZludaObject>(handle: T::CudaHandle) -> Result<(), CUerror> {
|
||||
let mut wrapped_object: ManuallyDrop<Box<LiveCheck<T>>> =
|
||||
unsafe { mem::transmute_copy(&handle) };
|
||||
let underlying_error = LiveCheck::drop_checked(&mut wrapped_object)?;
|
||||
unsafe { ManuallyDrop::drop(&mut wrapped_object) };
|
||||
underlying_error
|
||||
}
|
||||
|
|
|
@ -1,261 +1,53 @@
|
|||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::{CStr, CString};
|
||||
use std::fs::File;
|
||||
use std::io::{self, Read, Write};
|
||||
use std::ops::Add;
|
||||
use std::os::raw::c_char;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use std::{env, fs, iter, mem, ptr, slice};
|
||||
use super::ZludaObject;
|
||||
use cuda_types::*;
|
||||
use hip_runtime_sys::*;
|
||||
use std::{ffi::CStr, mem};
|
||||
|
||||
use hip_runtime_sys::{
|
||||
hipCtxGetCurrent, hipCtxGetDevice, hipDeviceGetAttribute, hipDeviceGetName, hipDeviceProp_t,
|
||||
hipError_t, hipGetDeviceProperties, hipGetStreamDeviceId, hipModuleLoadData,
|
||||
};
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
use crate::cuda::CUmodule;
|
||||
use crate::hip_call;
|
||||
|
||||
pub struct SpirvModule {
|
||||
pub binaries: Vec<u32>,
|
||||
pub kernel_info: HashMap<String, ptx::KernelInfo>,
|
||||
pub should_link_ptx_impl: Option<(&'static [u8], &'static [u8])>,
|
||||
pub build_options: CString,
|
||||
pub(crate) struct Module {
|
||||
base: hipModule_t,
|
||||
}
|
||||
|
||||
impl SpirvModule {
|
||||
pub fn new_raw<'a>(text: *const c_char) -> Result<Self, hipError_t> {
|
||||
let u8_text = unsafe { CStr::from_ptr(text) };
|
||||
let ptx_text = u8_text
|
||||
.to_str()
|
||||
.map_err(|_| hipError_t::hipErrorInvalidImage)?;
|
||||
Self::new(ptx_text)
|
||||
}
|
||||
impl ZludaObject for Module {
|
||||
const COOKIE: usize = 0xe9138bd040487d4a;
|
||||
|
||||
pub fn new<'a>(ptx_text: &str) -> Result<Self, hipError_t> {
|
||||
let mut errors = Vec::new();
|
||||
let ast = ptx::ModuleParser::new()
|
||||
.parse(&mut errors, ptx_text)
|
||||
.map_err(|_| hipError_t::hipErrorInvalidImage)?;
|
||||
if errors.len() > 0 {
|
||||
return Err(hipError_t::hipErrorInvalidImage);
|
||||
}
|
||||
let spirv_module =
|
||||
ptx::to_spirv_module(ast).map_err(|_| hipError_t::hipErrorInvalidImage)?;
|
||||
Ok(SpirvModule {
|
||||
binaries: spirv_module.assemble(),
|
||||
kernel_info: spirv_module.kernel_info,
|
||||
should_link_ptx_impl: spirv_module.should_link_ptx_impl,
|
||||
build_options: spirv_module.build_options,
|
||||
})
|
||||
type CudaHandle = CUmodule;
|
||||
|
||||
fn drop_checked(&mut self) -> CUresult {
|
||||
unsafe { hipModuleUnload(self.base) }?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn load(module: *mut CUmodule, fname: *const i8) -> Result<(), hipError_t> {
|
||||
let file_name = unsafe { CStr::from_ptr(fname) }
|
||||
pub(crate) fn load_data(module: &mut CUmodule, image: *const std::ffi::c_void) -> CUresult {
|
||||
let text = unsafe { CStr::from_ptr(image.cast()) }
|
||||
.to_str()
|
||||
.map_err(|_| hipError_t::hipErrorInvalidValue)?;
|
||||
let mut file = File::open(file_name).map_err(|_| hipError_t::hipErrorFileNotFound)?;
|
||||
let mut file_buffer = Vec::new();
|
||||
file.read_to_end(&mut file_buffer)
|
||||
.map_err(|_| hipError_t::hipErrorUnknown)?;
|
||||
let result = load_data(module, file_buffer.as_ptr() as _);
|
||||
drop(file_buffer);
|
||||
result
|
||||
}
|
||||
|
||||
pub(crate) fn load_data(
|
||||
module: *mut CUmodule,
|
||||
image: *const std::ffi::c_void,
|
||||
) -> Result<(), hipError_t> {
|
||||
if image == ptr::null() {
|
||||
return Err(hipError_t::hipErrorInvalidValue);
|
||||
}
|
||||
if unsafe { *(image as *const u32) } == 0x464c457f {
|
||||
return match unsafe { hipModuleLoadData(module as _, image) } {
|
||||
hipError_t::hipSuccess => Ok(()),
|
||||
e => Err(e),
|
||||
};
|
||||
}
|
||||
let spirv_data = SpirvModule::new_raw(image as *const _)?;
|
||||
load_data_impl(module, spirv_data)
|
||||
}
|
||||
|
||||
pub fn load_data_impl(pmod: *mut CUmodule, spirv_data: SpirvModule) -> Result<(), hipError_t> {
|
||||
.map_err(|_| CUerror::INVALID_VALUE)?;
|
||||
let ast = ptx_parser::parse_module_checked(text).map_err(|_| CUerror::NO_BINARY_FOR_GPU)?;
|
||||
let llvm_module = ptx::to_llvm_module(ast).map_err(|_| CUerror::UNKNOWN)?;
|
||||
let mut dev = 0;
|
||||
hip_call! { hipCtxGetDevice(&mut dev) };
|
||||
unsafe { hipCtxGetDevice(&mut dev) }?;
|
||||
let mut props = unsafe { mem::zeroed() };
|
||||
hip_call! { hipGetDeviceProperties(&mut props, dev) };
|
||||
let arch_binary = compile_amd(
|
||||
&props,
|
||||
iter::once(&spirv_data.binaries[..]),
|
||||
spirv_data.should_link_ptx_impl,
|
||||
unsafe { hipGetDevicePropertiesR0600(&mut props, dev) }?;
|
||||
let elf_module = comgr::compile_bitcode(
|
||||
unsafe { CStr::from_ptr(props.gcnArchName.as_ptr()) },
|
||||
&*llvm_module.llvm_ir,
|
||||
llvm_module.linked_bitcode(),
|
||||
)
|
||||
.map_err(|_| hipError_t::hipErrorUnknown)?;
|
||||
hip_call! { hipModuleLoadData(pmod as _, arch_binary.as_ptr() as _) };
|
||||
.map_err(|_| CUerror::UNKNOWN)?;
|
||||
let mut hip_module = unsafe { mem::zeroed() };
|
||||
unsafe { hipModuleLoadData(&mut hip_module, elf_module.as_ptr().cast()) }?;
|
||||
*module = Module { base: hip_module }.wrap();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
const LLVM_SPIRV: &'static str = "/home/vosen/amd/llvm-project/build/bin/llvm-spirv";
|
||||
const AMDGPU: &'static str = "/opt/rocm/";
|
||||
const AMDGPU_TARGET: &'static str = "amdgcn-amd-amdhsa";
|
||||
const AMDGPU_BITCODE: [&'static str; 8] = [
|
||||
"opencl.bc",
|
||||
"ocml.bc",
|
||||
"ockl.bc",
|
||||
"oclc_correctly_rounded_sqrt_off.bc",
|
||||
"oclc_daz_opt_on.bc",
|
||||
"oclc_finite_only_off.bc",
|
||||
"oclc_unsafe_math_off.bc",
|
||||
"oclc_wavefrontsize64_off.bc",
|
||||
];
|
||||
const AMDGPU_BITCODE_DEVICE_PREFIX: &'static str = "oclc_isa_version_";
|
||||
|
||||
pub(crate) fn compile_amd<'a>(
|
||||
device_pros: &hipDeviceProp_t,
|
||||
spirv_il: impl Iterator<Item = &'a [u32]>,
|
||||
ptx_lib: Option<(&'static [u8], &'static [u8])>,
|
||||
) -> io::Result<Vec<u8>> {
|
||||
let null_terminator = device_pros
|
||||
.gcnArchName
|
||||
.iter()
|
||||
.position(|&x| x == 0)
|
||||
.unwrap();
|
||||
let gcn_arch_slice = unsafe {
|
||||
slice::from_raw_parts(device_pros.gcnArchName.as_ptr() as _, null_terminator + 1)
|
||||
};
|
||||
let device_name =
|
||||
if let Ok(Ok(name)) = CStr::from_bytes_with_nul(gcn_arch_slice).map(|x| x.to_str()) {
|
||||
name
|
||||
} else {
|
||||
return Err(io::Error::new(io::ErrorKind::Other, ""));
|
||||
};
|
||||
let dir = tempfile::tempdir()?;
|
||||
let llvm_spirv_path = match env::var("LLVM_SPIRV") {
|
||||
Ok(path) => Cow::Owned(path),
|
||||
Err(_) => Cow::Borrowed(LLVM_SPIRV),
|
||||
};
|
||||
let llvm_files = spirv_il
|
||||
.map(|spirv| {
|
||||
let mut spirv_file = NamedTempFile::new_in(&dir)?;
|
||||
let spirv_u8 = unsafe {
|
||||
slice::from_raw_parts(
|
||||
spirv.as_ptr() as *const u8,
|
||||
spirv.len() * mem::size_of::<u32>(),
|
||||
)
|
||||
};
|
||||
spirv_file.write_all(spirv_u8)?;
|
||||
if cfg!(debug_assertions) {
|
||||
persist_file(spirv_file.path())?;
|
||||
}
|
||||
let llvm = NamedTempFile::new_in(&dir)?;
|
||||
let to_llvm_cmd = Command::new(&*llvm_spirv_path)
|
||||
//.arg("--spirv-debug")
|
||||
.arg("-r")
|
||||
.arg("-o")
|
||||
.arg(llvm.path())
|
||||
.arg(spirv_file.path())
|
||||
.status()?;
|
||||
assert!(to_llvm_cmd.success());
|
||||
if cfg!(debug_assertions) {
|
||||
persist_file(llvm.path())?;
|
||||
}
|
||||
Ok::<_, io::Error>(llvm)
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
let linked_binary = NamedTempFile::new_in(&dir)?;
|
||||
let mut llvm_link = PathBuf::from(AMDGPU);
|
||||
llvm_link.push("llvm");
|
||||
llvm_link.push("bin");
|
||||
llvm_link.push("llvm-link");
|
||||
let mut linker_cmd = Command::new(&llvm_link);
|
||||
linker_cmd
|
||||
.arg("-o")
|
||||
.arg(linked_binary.path())
|
||||
.args(llvm_files.iter().map(|f| f.path()))
|
||||
.args(get_bitcode_paths(device_name));
|
||||
if cfg!(debug_assertions) {
|
||||
linker_cmd.arg("-v");
|
||||
}
|
||||
let status = linker_cmd.status()?;
|
||||
assert!(status.success());
|
||||
if cfg!(debug_assertions) {
|
||||
persist_file(linked_binary.path())?;
|
||||
}
|
||||
let mut ptx_lib_bitcode = NamedTempFile::new_in(&dir)?;
|
||||
let compiled_binary = NamedTempFile::new_in(&dir)?;
|
||||
let mut clang_exe = PathBuf::from(AMDGPU);
|
||||
clang_exe.push("llvm");
|
||||
clang_exe.push("bin");
|
||||
clang_exe.push("clang");
|
||||
let mut compiler_cmd = Command::new(&clang_exe);
|
||||
compiler_cmd
|
||||
.arg(format!("-mcpu={}", device_name))
|
||||
.arg("-ffp-contract=off")
|
||||
.arg("-nogpulib")
|
||||
.arg("-mno-wavefrontsize64")
|
||||
.arg("-O3")
|
||||
.arg("-Xclang")
|
||||
.arg("-O3")
|
||||
.arg("-Xlinker")
|
||||
.arg("--no-undefined")
|
||||
.arg("-target")
|
||||
.arg(AMDGPU_TARGET)
|
||||
.arg("-o")
|
||||
.arg(compiled_binary.path())
|
||||
.arg("-x")
|
||||
.arg("ir")
|
||||
.arg(linked_binary.path());
|
||||
if let Some((_, bitcode)) = ptx_lib {
|
||||
ptx_lib_bitcode.write_all(bitcode)?;
|
||||
compiler_cmd.arg(ptx_lib_bitcode.path());
|
||||
};
|
||||
if cfg!(debug_assertions) {
|
||||
compiler_cmd.arg("-v");
|
||||
}
|
||||
let status = compiler_cmd.status()?;
|
||||
assert!(status.success());
|
||||
let mut result = Vec::new();
|
||||
let compiled_bin_path = compiled_binary.path();
|
||||
let mut compiled_binary = File::open(compiled_bin_path)?;
|
||||
compiled_binary.read_to_end(&mut result)?;
|
||||
if cfg!(debug_assertions) {
|
||||
persist_file(compiled_bin_path)?;
|
||||
}
|
||||
Ok(result)
|
||||
pub(crate) fn unload(hmod: CUmodule) -> CUresult {
|
||||
super::drop_checked::<Module>(hmod)
|
||||
}
|
||||
|
||||
fn persist_file(path: &Path) -> io::Result<()> {
|
||||
let mut persistent = PathBuf::from("/tmp/zluda");
|
||||
std::fs::create_dir_all(&persistent)?;
|
||||
persistent.push(path.file_name().unwrap());
|
||||
std::fs::copy(path, persistent)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_bitcode_paths(device_name: &str) -> impl Iterator<Item = PathBuf> {
|
||||
let generic_paths = AMDGPU_BITCODE.iter().map(|x| {
|
||||
let mut path = PathBuf::from(AMDGPU);
|
||||
path.push("amdgcn");
|
||||
path.push("bitcode");
|
||||
path.push(x);
|
||||
path
|
||||
});
|
||||
let suffix = if let Some(suffix_idx) = device_name.find(':') {
|
||||
suffix_idx
|
||||
} else {
|
||||
device_name.len()
|
||||
};
|
||||
let mut additional_path = PathBuf::from(AMDGPU);
|
||||
additional_path.push("amdgcn");
|
||||
additional_path.push("bitcode");
|
||||
additional_path.push(format!(
|
||||
"{}{}{}",
|
||||
AMDGPU_BITCODE_DEVICE_PREFIX,
|
||||
&device_name[3..suffix],
|
||||
".bc"
|
||||
));
|
||||
generic_paths.chain(std::iter::once(additional_path))
|
||||
pub(crate) fn get_function(
|
||||
hfunc: &mut hipFunction_t,
|
||||
hmod: &Module,
|
||||
name: *const ::core::ffi::c_char,
|
||||
) -> hipError_t {
|
||||
unsafe { hipModuleGetFunction(hfunc, hmod.base, name) }
|
||||
}
|
||||
|
|
|
@ -1,53 +1,40 @@
|
|||
use std::{ffi::c_void, mem, ptr};
|
||||
|
||||
use hip_runtime_sys::{hipError_t, hipMemoryType, hipPointerGetAttributes};
|
||||
|
||||
use crate::{
|
||||
cuda::{CUdeviceptr, CUmemorytype, CUpointer_attribute},
|
||||
hip_call,
|
||||
};
|
||||
use cuda_types::*;
|
||||
use hip_runtime_sys::*;
|
||||
use std::{ffi::c_void, ptr};
|
||||
|
||||
pub(crate) unsafe fn get_attribute(
|
||||
data: *mut c_void,
|
||||
attribute: CUpointer_attribute,
|
||||
ptr: CUdeviceptr,
|
||||
) -> Result<(), hipError_t> {
|
||||
attribute: hipPointer_attribute,
|
||||
ptr: hipDeviceptr_t,
|
||||
) -> hipError_t {
|
||||
if data == ptr::null_mut() {
|
||||
return Err(hipError_t::hipErrorInvalidValue);
|
||||
return hipError_t::ErrorInvalidValue;
|
||||
}
|
||||
let mut attribs = mem::zeroed();
|
||||
hip_call! { hipPointerGetAttributes(&mut attribs, ptr.0 as _) };
|
||||
match attribute {
|
||||
CUpointer_attribute::CU_POINTER_ATTRIBUTE_CONTEXT => {
|
||||
*(data as *mut _) = attribs.device;
|
||||
// TODO: implement by getting device ordinal & allocation start,
|
||||
// then go through every context for that device
|
||||
hipPointer_attribute::HIP_POINTER_ATTRIBUTE_CONTEXT => hipError_t::ErrorNotSupported,
|
||||
hipPointer_attribute::HIP_POINTER_ATTRIBUTE_MEMORY_TYPE => {
|
||||
let mut hip_result = hipMemoryType(0);
|
||||
hipPointerGetAttribute(
|
||||
(&mut hip_result as *mut hipMemoryType).cast::<c_void>(),
|
||||
attribute,
|
||||
ptr,
|
||||
)?;
|
||||
let cuda_result = memory_type(hip_result)?;
|
||||
unsafe { *(data.cast()) = cuda_result };
|
||||
Ok(())
|
||||
}
|
||||
CUpointer_attribute::CU_POINTER_ATTRIBUTE_MEMORY_TYPE => {
|
||||
*(data as *mut _) = memory_type(attribs.memoryType)?;
|
||||
Ok(())
|
||||
}
|
||||
CUpointer_attribute::CU_POINTER_ATTRIBUTE_DEVICE_POINTER => {
|
||||
*(data as *mut _) = attribs.devicePointer;
|
||||
Ok(())
|
||||
}
|
||||
CUpointer_attribute::CU_POINTER_ATTRIBUTE_HOST_POINTER => {
|
||||
*(data as *mut _) = attribs.hostPointer;
|
||||
Ok(())
|
||||
}
|
||||
CUpointer_attribute::CU_POINTER_ATTRIBUTE_IS_MANAGED => {
|
||||
*(data as *mut _) = attribs.isManaged;
|
||||
Ok(())
|
||||
}
|
||||
_ => Err(hipError_t::hipErrorNotSupported),
|
||||
_ => unsafe { hipPointerGetAttribute(data, attribute, ptr) },
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn memory_type(cu: hipMemoryType) -> Result<CUmemorytype, hipError_t> {
|
||||
fn memory_type(cu: hipMemoryType) -> Result<CUmemorytype, hipErrorCode_t> {
|
||||
match cu {
|
||||
hipMemoryType::hipMemoryTypeHost => Ok(CUmemorytype::CU_MEMORYTYPE_HOST),
|
||||
hipMemoryType::hipMemoryTypeDevice => Ok(CUmemorytype::CU_MEMORYTYPE_DEVICE),
|
||||
hipMemoryType::hipMemoryTypeArray => Ok(CUmemorytype::CU_MEMORYTYPE_ARRAY),
|
||||
hipMemoryType::hipMemoryTypeUnified => Ok(CUmemorytype::CU_MEMORYTYPE_UNIFIED),
|
||||
_ => Err(hipError_t::hipErrorInvalidValue),
|
||||
_ => Err(hipErrorCode_t::InvalidValue),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,157 +0,0 @@
|
|||
#![allow(non_snake_case)]
|
||||
|
||||
use crate::cuda as zluda;
|
||||
use crate::cuda::CUstream;
|
||||
use crate::cuda::CUuuid;
|
||||
use crate::{
|
||||
cuda::{CUdevice, CUdeviceptr},
|
||||
r#impl::CUresult,
|
||||
};
|
||||
use ::std::{
|
||||
ffi::c_void,
|
||||
os::raw::{c_int, c_uint},
|
||||
};
|
||||
use cuda_driver_sys as cuda;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! cuda_driver_test {
|
||||
($func:ident) => {
|
||||
paste! {
|
||||
#[test]
|
||||
fn [<$func _zluda>]() {
|
||||
$func::<crate::r#impl::test::Zluda>()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn [<$func _cuda>]() {
|
||||
$func::<crate::r#impl::test::Cuda>()
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub trait CudaDriverFns {
|
||||
fn cuInit(flags: c_uint) -> CUresult;
|
||||
fn cuCtxCreate_v2(pctx: *mut *mut c_void, flags: c_uint, dev: c_int) -> CUresult;
|
||||
fn cuCtxDestroy_v2(ctx: *mut c_void) -> CUresult;
|
||||
fn cuCtxPopCurrent_v2(pctx: *mut *mut c_void) -> CUresult;
|
||||
fn cuCtxGetApiVersion(ctx: *mut c_void, version: *mut c_uint) -> CUresult;
|
||||
fn cuCtxGetCurrent(pctx: *mut *mut c_void) -> CUresult;
|
||||
fn cuMemAlloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> CUresult;
|
||||
fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: c_int) -> CUresult;
|
||||
fn cuDevicePrimaryCtxGetState(dev: c_int, flags: *mut c_uint, active: *mut c_int) -> CUresult;
|
||||
fn cuStreamGetCtx(hStream: CUstream, pctx: *mut *mut c_void) -> CUresult;
|
||||
fn cuStreamCreate(stream: *mut CUstream, flags: c_uint) -> CUresult;
|
||||
fn cuMemFree_v2(mem: *mut c_void) -> CUresult;
|
||||
fn cuStreamDestroy_v2(stream: CUstream) -> CUresult;
|
||||
}
|
||||
|
||||
pub struct Zluda();
|
||||
|
||||
impl CudaDriverFns for Zluda {
|
||||
fn cuInit(_flags: c_uint) -> CUresult {
|
||||
zluda::cuInit(_flags as _)
|
||||
}
|
||||
|
||||
fn cuCtxCreate_v2(pctx: *mut *mut c_void, flags: c_uint, dev: c_int) -> CUresult {
|
||||
zluda::cuCtxCreate_v2(pctx as *mut _, flags, CUdevice(dev))
|
||||
}
|
||||
|
||||
fn cuCtxDestroy_v2(ctx: *mut c_void) -> CUresult {
|
||||
zluda::cuCtxDestroy_v2(ctx as *mut _)
|
||||
}
|
||||
|
||||
fn cuCtxPopCurrent_v2(pctx: *mut *mut c_void) -> CUresult {
|
||||
zluda::cuCtxPopCurrent_v2(pctx as *mut _)
|
||||
}
|
||||
|
||||
fn cuCtxGetApiVersion(ctx: *mut c_void, version: *mut c_uint) -> CUresult {
|
||||
zluda::cuCtxGetApiVersion(ctx as *mut _, version)
|
||||
}
|
||||
|
||||
fn cuCtxGetCurrent(pctx: *mut *mut c_void) -> CUresult {
|
||||
zluda::cuCtxGetCurrent(pctx as *mut _)
|
||||
}
|
||||
fn cuMemAlloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> CUresult {
|
||||
zluda::cuMemAlloc_v2(dptr as *mut _, bytesize)
|
||||
}
|
||||
|
||||
fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: c_int) -> CUresult {
|
||||
zluda::cuDeviceGetUuid(uuid, CUdevice(dev))
|
||||
}
|
||||
|
||||
fn cuDevicePrimaryCtxGetState(dev: c_int, flags: *mut c_uint, active: *mut c_int) -> CUresult {
|
||||
zluda::cuDevicePrimaryCtxGetState(CUdevice(dev), flags, active)
|
||||
}
|
||||
|
||||
fn cuStreamGetCtx(hStream: CUstream, pctx: *mut *mut c_void) -> CUresult {
|
||||
zluda::cuStreamGetCtx(hStream, pctx as _)
|
||||
}
|
||||
|
||||
fn cuStreamCreate(stream: *mut CUstream, flags: c_uint) -> CUresult {
|
||||
zluda::cuStreamCreate(stream, flags)
|
||||
}
|
||||
|
||||
fn cuMemFree_v2(dptr: *mut c_void) -> CUresult {
|
||||
zluda::cuMemFree_v2(CUdeviceptr(dptr as _))
|
||||
}
|
||||
|
||||
fn cuStreamDestroy_v2(stream: CUstream) -> CUresult {
|
||||
zluda::cuStreamDestroy_v2(stream)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Cuda();
|
||||
|
||||
impl CudaDriverFns for Cuda {
|
||||
fn cuInit(flags: c_uint) -> CUresult {
|
||||
unsafe { CUresult(cuda::cuInit(flags) as c_uint) }
|
||||
}
|
||||
|
||||
fn cuCtxCreate_v2(pctx: *mut *mut c_void, flags: c_uint, dev: c_int) -> CUresult {
|
||||
unsafe { CUresult(cuda::cuCtxCreate_v2(pctx as *mut _, flags, dev) as c_uint) }
|
||||
}
|
||||
|
||||
fn cuCtxDestroy_v2(ctx: *mut c_void) -> CUresult {
|
||||
unsafe { CUresult(cuda::cuCtxDestroy_v2(ctx as *mut _) as c_uint) }
|
||||
}
|
||||
|
||||
fn cuCtxPopCurrent_v2(pctx: *mut *mut c_void) -> CUresult {
|
||||
unsafe { CUresult(cuda::cuCtxPopCurrent_v2(pctx as *mut _) as c_uint) }
|
||||
}
|
||||
|
||||
fn cuCtxGetApiVersion(ctx: *mut c_void, version: *mut c_uint) -> CUresult {
|
||||
unsafe { CUresult(cuda::cuCtxGetApiVersion(ctx as *mut _, version) as c_uint) }
|
||||
}
|
||||
|
||||
fn cuCtxGetCurrent(pctx: *mut *mut c_void) -> CUresult {
|
||||
unsafe { CUresult(cuda::cuCtxGetCurrent(pctx as *mut _) as c_uint) }
|
||||
}
|
||||
fn cuMemAlloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> CUresult {
|
||||
unsafe { CUresult(cuda::cuMemAlloc_v2(dptr as *mut _, bytesize) as c_uint) }
|
||||
}
|
||||
|
||||
fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: c_int) -> CUresult {
|
||||
unsafe { CUresult(cuda::cuDeviceGetUuid(uuid as *mut _, dev) as c_uint) }
|
||||
}
|
||||
|
||||
fn cuDevicePrimaryCtxGetState(dev: c_int, flags: *mut c_uint, active: *mut c_int) -> CUresult {
|
||||
unsafe { CUresult(cuda::cuDevicePrimaryCtxGetState(dev, flags, active) as c_uint) }
|
||||
}
|
||||
|
||||
fn cuStreamGetCtx(hStream: CUstream, pctx: *mut *mut c_void) -> CUresult {
|
||||
unsafe { CUresult(cuda::cuStreamGetCtx(hStream as _, pctx as _) as c_uint) }
|
||||
}
|
||||
|
||||
fn cuStreamCreate(stream: *mut CUstream, flags: c_uint) -> CUresult {
|
||||
unsafe { CUresult(cuda::cuStreamCreate(stream as _, flags as _) as c_uint) }
|
||||
}
|
||||
|
||||
fn cuMemFree_v2(mem: *mut c_void) -> CUresult {
|
||||
unsafe { CUresult(cuda::cuMemFree_v2(mem as _) as c_uint) }
|
||||
}
|
||||
|
||||
fn cuStreamDestroy_v2(stream: CUstream) -> CUresult {
|
||||
unsafe { CUresult(cuda::cuStreamDestroy_v2(stream as _) as c_uint) }
|
||||
}
|
||||
}
|
|
@ -1,13 +1,79 @@
|
|||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
#[cfg(test)]
|
||||
extern crate cuda_driver_sys;
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
extern crate paste;
|
||||
extern crate ptx;
|
||||
|
||||
#[allow(warnings)]
|
||||
pub mod cuda;
|
||||
mod cuda_impl;
|
||||
pub(crate) mod r#impl;
|
||||
|
||||
macro_rules! unimplemented {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
|
||||
$(
|
||||
#[cfg_attr(not(test), no_mangle)]
|
||||
#[allow(improper_ctypes)]
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||
crate::r#impl::unimplemented()
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! implemented {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
|
||||
$(
|
||||
#[cfg_attr(not(test), no_mangle)]
|
||||
#[allow(improper_ctypes)]
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||
cuda_base::cuda_normalize_fn!( crate::r#impl::$fn_name ) ($(crate::r#impl::FromCuda::from_cuda(&$arg_id)?),*)?;
|
||||
Ok(())
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! implemented_in_function {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
|
||||
$(
|
||||
#[cfg_attr(not(test), no_mangle)]
|
||||
#[allow(improper_ctypes)]
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||
cuda_base::cuda_normalize_fn!( crate::r#impl::function::$fn_name ) ($(crate::r#impl::FromCuda::from_cuda(&$arg_id)?),*)?;
|
||||
Ok(())
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
cuda_base::cuda_function_declarations!(
|
||||
unimplemented,
|
||||
implemented <= [
|
||||
cuCtxGetLimit,
|
||||
cuCtxSetCurrent,
|
||||
cuCtxSetLimit,
|
||||
cuCtxSynchronize,
|
||||
cuDeviceComputeCapability,
|
||||
cuDeviceGet,
|
||||
cuDeviceGetAttribute,
|
||||
cuDeviceGetCount,
|
||||
cuDeviceGetLuid,
|
||||
cuDeviceGetName,
|
||||
cuDevicePrimaryCtxRelease,
|
||||
cuDevicePrimaryCtxRetain,
|
||||
cuDeviceGetProperties,
|
||||
cuDeviceGetUuid,
|
||||
cuDeviceGetUuid_v2,
|
||||
cuDeviceTotalMem_v2,
|
||||
cuDriverGetVersion,
|
||||
cuFuncGetAttribute,
|
||||
cuInit,
|
||||
cuMemAlloc_v2,
|
||||
cuMemFree_v2,
|
||||
cuMemcpyDtoH_v2,
|
||||
cuMemcpyHtoD_v2,
|
||||
cuModuleGetFunction,
|
||||
cuModuleLoadData,
|
||||
cuModuleUnload,
|
||||
cuPointerGetAttribute,
|
||||
cuMemGetAddressRange_v2,
|
||||
],
|
||||
implemented_in_function <= [
|
||||
cuLaunchKernel,
|
||||
]
|
||||
);
|
12
zluda_bindgen/Cargo.toml
Normal file
12
zluda_bindgen/Cargo.toml
Normal file
|
@ -0,0 +1,12 @@
|
|||
[package]
|
||||
name = "zluda_bindgen"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
bindgen = "0.70"
|
||||
syn = { version = "2.0", features = ["full", "visit-mut"] }
|
||||
proc-macro2 = "1.0.89"
|
||||
quote = "1.0"
|
||||
prettyplease = "0.2.25"
|
||||
rustc-hash = "1.1.0"
|
7
zluda_bindgen/build/cuda_wrapper.h
Normal file
7
zluda_bindgen/build/cuda_wrapper.h
Normal file
|
@ -0,0 +1,7 @@
|
|||
#define __CUDA_API_VERSION_INTERNAL
|
||||
#include <cuda.h>
|
||||
#include <cudaProfiler.h>
|
||||
#include <cudaGL.h>
|
||||
#include <cudaEGL.h>
|
||||
#include <vdpau/vdpau.h>
|
||||
#include <cudaVDPAU.h>
|
703
zluda_bindgen/src/main.rs
Normal file
703
zluda_bindgen/src/main.rs
Normal file
|
@ -0,0 +1,703 @@
|
|||
use proc_macro2::Span;
|
||||
use quote::{format_ident, quote, ToTokens};
|
||||
use rustc_hash::{FxHashMap, FxHashSet};
|
||||
use std::{collections::hash_map, fs::File, io::Write, iter, path::PathBuf, str::FromStr};
|
||||
use syn::{
|
||||
parse_quote, punctuated::Punctuated, visit_mut::VisitMut, Abi, Fields, FieldsUnnamed, FnArg,
|
||||
ForeignItem, ForeignItemFn, Ident, Item, ItemConst, ItemForeignMod, ItemUse, LitStr, Path,
|
||||
PathArguments, Signature, Type, TypePath, UseTree,
|
||||
};
|
||||
|
||||
fn main() {
|
||||
let crate_root = PathBuf::from_str(env!("CARGO_MANIFEST_DIR")).unwrap();
|
||||
generate_hip_runtime(
|
||||
&crate_root,
|
||||
&["..", "ext", "hip_runtime-sys", "src", "lib.rs"],
|
||||
);
|
||||
let cuda_header = bindgen::Builder::default()
|
||||
.use_core()
|
||||
.rust_target(bindgen::RustTarget::Stable_1_77)
|
||||
.layout_tests(false)
|
||||
.default_enum_style(bindgen::EnumVariation::NewType {
|
||||
is_bitfield: false,
|
||||
is_global: false,
|
||||
})
|
||||
.derive_hash(true)
|
||||
.derive_eq(true)
|
||||
.header_contents("cuda_wrapper.h", include_str!("../build/cuda_wrapper.h"))
|
||||
.allowlist_type("^CU.*")
|
||||
.allowlist_function("^cu.*")
|
||||
.allowlist_var("^CU.*")
|
||||
.must_use_type("cudaError_enum")
|
||||
.constified_enum("cudaError_enum")
|
||||
.no_partialeq("CUDA_HOST_NODE_PARAMS_st")
|
||||
.new_type_alias(r"^CUdeviceptr_v\d+$")
|
||||
.new_type_alias(r"^CUcontext$")
|
||||
.new_type_alias(r"^CUstream$")
|
||||
.new_type_alias(r"^CUmodule$")
|
||||
.new_type_alias(r"^CUfunction$")
|
||||
.new_type_alias(r"^CUlibrary$")
|
||||
.clang_args(["-I/usr/local/cuda/include"])
|
||||
.generate()
|
||||
.unwrap()
|
||||
.to_string();
|
||||
let module: syn::File = syn::parse_str(&cuda_header).unwrap();
|
||||
generate_functions(&crate_root, &["..", "cuda_base", "src", "cuda.rs"], &module);
|
||||
generate_types(&crate_root, &["..", "cuda_types", "src", "lib.rs"], &module);
|
||||
generate_display(
|
||||
&crate_root,
|
||||
&["..", "zluda_dump", "src", "format_generated.rs"],
|
||||
"cuda_types",
|
||||
&module,
|
||||
)
|
||||
}
|
||||
|
||||
fn generate_hip_runtime(output: &PathBuf, path: &[&str]) {
|
||||
let hiprt_header = bindgen::Builder::default()
|
||||
.use_core()
|
||||
.rust_target(bindgen::RustTarget::Stable_1_77)
|
||||
.layout_tests(false)
|
||||
.default_enum_style(bindgen::EnumVariation::NewType {
|
||||
is_bitfield: false,
|
||||
is_global: false,
|
||||
})
|
||||
.derive_hash(true)
|
||||
.derive_eq(true)
|
||||
.header("/opt/rocm/include/hip/hip_runtime_api.h")
|
||||
.allowlist_type("^hip.*")
|
||||
.allowlist_function("^hip.*")
|
||||
.allowlist_var("^hip.*")
|
||||
.must_use_type("hipError_t")
|
||||
.constified_enum("hipError_t")
|
||||
.new_type_alias("^hipDeviceptr_t$")
|
||||
.new_type_alias("^hipStream_t$")
|
||||
.new_type_alias("^hipModule_t$")
|
||||
.new_type_alias("^hipFunction_t$")
|
||||
.clang_args(["-I/opt/rocm/include", "-D__HIP_PLATFORM_AMD__"])
|
||||
.generate()
|
||||
.unwrap()
|
||||
.to_string();
|
||||
let mut module: syn::File = syn::parse_str(&hiprt_header).unwrap();
|
||||
let mut converter = ConvertIntoRustResult {
|
||||
type_: "hipError_t",
|
||||
underlying_type: "hipError_t",
|
||||
new_error_type: "hipErrorCode_t",
|
||||
error_prefix: ("hipError", "Error"),
|
||||
success: ("hipSuccess", "Success"),
|
||||
constants: Vec::new(),
|
||||
};
|
||||
module.items = module
|
||||
.items
|
||||
.into_iter()
|
||||
.filter_map(|item| match item {
|
||||
Item::Const(const_) => converter.get_const(const_).map(Item::Const),
|
||||
Item::Use(use_) => converter.get_use(use_).map(Item::Use),
|
||||
Item::Type(type_) => converter.get_type(type_).map(Item::Type),
|
||||
item => Some(item),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
converter.flush(&mut module.items);
|
||||
add_send_sync(
|
||||
&mut module.items,
|
||||
&[
|
||||
"hipDeviceptr_t",
|
||||
"hipStream_t",
|
||||
"hipModule_t",
|
||||
"hipFunction_t",
|
||||
],
|
||||
);
|
||||
let mut output = output.clone();
|
||||
output.extend(path);
|
||||
write_rust_to_file(output, &prettyplease::unparse(&module))
|
||||
}
|
||||
|
||||
fn add_send_sync(items: &mut Vec<Item>, arg: &[&str]) {
|
||||
for type_ in arg {
|
||||
let type_ = Ident::new(type_, Span::call_site());
|
||||
items.extend([
|
||||
parse_quote! {
|
||||
unsafe impl Send for #type_ {}
|
||||
},
|
||||
parse_quote! {
|
||||
unsafe impl Sync for #type_ {}
|
||||
},
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_functions(output: &PathBuf, path: &[&str], module: &syn::File) {
|
||||
let fns_ = module.items.iter().filter_map(|item| match item {
|
||||
Item::ForeignMod(extern_) => match &*extern_.items {
|
||||
[ForeignItem::Fn(fn_)] => Some(fn_),
|
||||
_ => unreachable!(),
|
||||
},
|
||||
_ => None,
|
||||
});
|
||||
let mut module: syn::File = parse_quote! {
|
||||
extern "system" {
|
||||
#(#fns_)*
|
||||
}
|
||||
};
|
||||
syn::visit_mut::visit_file_mut(&mut PrependCudaPath, &mut module);
|
||||
syn::visit_mut::visit_file_mut(&mut RemoveVisibility, &mut module);
|
||||
syn::visit_mut::visit_file_mut(&mut ExplicitReturnType, &mut module);
|
||||
let mut output = output.clone();
|
||||
output.extend(path);
|
||||
write_rust_to_file(output, &prettyplease::unparse(&module))
|
||||
}
|
||||
|
||||
fn generate_types(output: &PathBuf, path: &[&str], module: &syn::File) {
|
||||
let mut module = module.clone();
|
||||
let mut converter = ConvertIntoRustResult {
|
||||
type_: "CUresult",
|
||||
underlying_type: "cudaError_enum",
|
||||
new_error_type: "CUerror",
|
||||
error_prefix: ("CUDA_ERROR_", "ERROR_"),
|
||||
success: ("CUDA_SUCCESS", "SUCCESS"),
|
||||
constants: Vec::new(),
|
||||
};
|
||||
module.items = module
|
||||
.items
|
||||
.into_iter()
|
||||
.filter_map(|item| match item {
|
||||
Item::ForeignMod(_) => None,
|
||||
Item::Const(const_) => converter.get_const(const_).map(Item::Const),
|
||||
Item::Use(use_) => converter.get_use(use_).map(Item::Use),
|
||||
Item::Type(type_) => converter.get_type(type_).map(Item::Type),
|
||||
Item::Struct(mut struct_) => {
|
||||
let ident_string = struct_.ident.to_string();
|
||||
match &*ident_string {
|
||||
"CUdeviceptr_v2" => {
|
||||
struct_.fields = Fields::Unnamed(parse_quote! {
|
||||
(pub *mut ::core::ffi::c_void)
|
||||
});
|
||||
}
|
||||
"CUuuid_st" => {
|
||||
struct_.fields = Fields::Named(parse_quote! {
|
||||
{pub bytes: [::core::ffi::c_uchar; 16usize]}
|
||||
});
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Some(Item::Struct(struct_))
|
||||
}
|
||||
item => Some(item),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
converter.flush(&mut module.items);
|
||||
module.items.push(parse_quote! {
|
||||
impl From<hip_runtime_sys::hipErrorCode_t> for CUerror {
|
||||
fn from(error: hip_runtime_sys::hipErrorCode_t) -> Self {
|
||||
Self(error.0)
|
||||
}
|
||||
}
|
||||
});
|
||||
add_send_sync(
|
||||
&mut module.items,
|
||||
&[
|
||||
"CUdeviceptr",
|
||||
"CUcontext",
|
||||
"CUstream",
|
||||
"CUmodule",
|
||||
"CUfunction",
|
||||
"CUlibrary",
|
||||
],
|
||||
);
|
||||
syn::visit_mut::visit_file_mut(&mut FixAbi, &mut module);
|
||||
let mut output = output.clone();
|
||||
output.extend(path);
|
||||
write_rust_to_file(output, &prettyplease::unparse(&module))
|
||||
}
|
||||
|
||||
fn write_rust_to_file(path: impl AsRef<std::path::Path>, content: &str) {
|
||||
let mut file = File::create(path).unwrap();
|
||||
file.write("// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\n".as_bytes())
|
||||
.unwrap();
|
||||
file.write(content.as_bytes()).unwrap();
|
||||
}
|
||||
|
||||
struct ConvertIntoRustResult {
|
||||
type_: &'static str,
|
||||
underlying_type: &'static str,
|
||||
new_error_type: &'static str,
|
||||
error_prefix: (&'static str, &'static str),
|
||||
success: (&'static str, &'static str),
|
||||
constants: Vec<syn::ItemConst>,
|
||||
}
|
||||
|
||||
impl ConvertIntoRustResult {
|
||||
fn get_const(&mut self, const_: syn::ItemConst) -> Option<syn::ItemConst> {
|
||||
let name = const_.ident.to_string();
|
||||
if name.starts_with(self.underlying_type) {
|
||||
self.constants.push(const_);
|
||||
None
|
||||
} else {
|
||||
Some(const_)
|
||||
}
|
||||
}
|
||||
|
||||
fn get_use(&mut self, use_: ItemUse) -> Option<ItemUse> {
|
||||
if let UseTree::Path(ref path) = use_.tree {
|
||||
if let UseTree::Rename(ref rename) = &*path.tree {
|
||||
if rename.rename == self.type_ {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(use_)
|
||||
}
|
||||
|
||||
fn flush(self, items: &mut Vec<Item>) {
|
||||
let type_ = format_ident!("{}", self.type_);
|
||||
let type_trait = format_ident!("{}Consts", self.type_);
|
||||
let new_error_type = format_ident!("{}", self.new_error_type);
|
||||
let success = format_ident!("{}", self.success.1);
|
||||
let mut result_variants = Vec::new();
|
||||
let mut error_variants = Vec::new();
|
||||
for const_ in self.constants.iter() {
|
||||
let ident = const_.ident.to_string();
|
||||
if ident.ends_with(self.success.0) {
|
||||
result_variants.push(quote! {
|
||||
const #success: #type_ = #type_::Ok(());
|
||||
});
|
||||
} else {
|
||||
let old_prefix_len = self.underlying_type.len() + 1 + self.error_prefix.0.len();
|
||||
let variant_ident =
|
||||
format_ident!("{}{}", self.error_prefix.1, &ident[old_prefix_len..]);
|
||||
let error_ident = format_ident!("{}", &ident[old_prefix_len..]);
|
||||
let expr = &const_.expr;
|
||||
result_variants.push(quote! {
|
||||
const #variant_ident: #type_ = #type_::Err(#new_error_type::#error_ident);
|
||||
});
|
||||
error_variants.push(quote! {
|
||||
pub const #error_ident: #new_error_type = #new_error_type(unsafe { ::core::num::NonZeroU32::new_unchecked(#expr) });
|
||||
});
|
||||
}
|
||||
}
|
||||
let extra_items: Punctuated<syn::Item, syn::parse::Nothing> = parse_quote! {
|
||||
impl #new_error_type {
|
||||
#(#error_variants)*
|
||||
}
|
||||
#[repr(transparent)]
|
||||
#[derive(Debug, Hash, Copy, Clone, PartialEq, Eq)]
|
||||
pub struct #new_error_type(pub ::core::num::NonZeroU32);
|
||||
|
||||
pub trait #type_trait {
|
||||
#(#result_variants)*
|
||||
}
|
||||
impl #type_trait for #type_ {}
|
||||
#[must_use]
|
||||
pub type #type_ = ::core::result::Result<(), #new_error_type>;
|
||||
const _: fn() = || {
|
||||
let _ = std::mem::transmute::<#type_, u32>;
|
||||
};
|
||||
};
|
||||
items.extend(extra_items);
|
||||
}
|
||||
|
||||
fn get_type(&self, type_: syn::ItemType) -> Option<syn::ItemType> {
|
||||
if type_.ident.to_string() == self.type_ {
|
||||
None
|
||||
} else {
|
||||
Some(type_)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct FixAbi;
|
||||
|
||||
impl VisitMut for FixAbi {
|
||||
fn visit_abi_mut(&mut self, i: &mut Abi) {
|
||||
if let Some(ref mut name) = i.name {
|
||||
*name = LitStr::new("system", Span::call_site());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct PrependCudaPath;
|
||||
|
||||
impl VisitMut for PrependCudaPath {
|
||||
fn visit_type_path_mut(&mut self, type_: &mut TypePath) {
|
||||
if type_.path.segments.len() == 1 {
|
||||
match &*type_.path.segments[0].ident.to_string() {
|
||||
"usize" | "f64" | "f32" => {}
|
||||
_ => {
|
||||
*type_ = parse_quote! { cuda_types :: #type_ };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct RemoveVisibility;
|
||||
|
||||
impl VisitMut for RemoveVisibility {
|
||||
fn visit_visibility_mut(&mut self, i: &mut syn::Visibility) {
|
||||
*i = syn::Visibility::Inherited;
|
||||
}
|
||||
}
|
||||
|
||||
struct ExplicitReturnType;
|
||||
|
||||
impl VisitMut for ExplicitReturnType {
|
||||
fn visit_return_type_mut(&mut self, i: &mut syn::ReturnType) {
|
||||
if let syn::ReturnType::Default = i {
|
||||
*i = parse_quote! { -> {} };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_display(
|
||||
output: &PathBuf,
|
||||
path: &[&str],
|
||||
types_crate: &'static str,
|
||||
module: &syn::File,
|
||||
) {
|
||||
let ignore_types = [
|
||||
"CUdevice",
|
||||
"CUdeviceptr_v1",
|
||||
"CUarrayMapInfo_st",
|
||||
"CUDA_RESOURCE_DESC_st",
|
||||
"CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st",
|
||||
"CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st",
|
||||
"CUexecAffinityParam_st",
|
||||
"CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st",
|
||||
"CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st",
|
||||
"CUuuid_st",
|
||||
"HGPUNV",
|
||||
"EGLint",
|
||||
"EGLSyncKHR",
|
||||
"EGLImageKHR",
|
||||
"EGLStreamKHR",
|
||||
"CUasyncNotificationInfo_st",
|
||||
"CUgraphNodeParams_st",
|
||||
"CUeglFrame_st",
|
||||
"CUdevResource_st",
|
||||
"CUlaunchAttribute_st",
|
||||
"CUlaunchConfig_st",
|
||||
];
|
||||
let ignore_functions = [
|
||||
"cuGLGetDevices",
|
||||
"cuGLGetDevices_v2",
|
||||
"cuStreamSetAttribute",
|
||||
"cuStreamSetAttribute_ptsz",
|
||||
"cuStreamGetAttribute",
|
||||
"cuStreamGetAttribute_ptsz",
|
||||
"cuGraphKernelNodeGetAttribute",
|
||||
"cuGraphKernelNodeSetAttribute",
|
||||
];
|
||||
let count_selectors = [
|
||||
("cuCtxCreate_v3", 1, 2),
|
||||
("cuMemMapArrayAsync", 0, 1),
|
||||
("cuMemMapArrayAsync_ptsz", 0, 1),
|
||||
("cuStreamBatchMemOp", 2, 1),
|
||||
("cuStreamBatchMemOp_ptsz", 2, 1),
|
||||
("cuStreamBatchMemOp_v2", 2, 1),
|
||||
];
|
||||
let mut derive_state = DeriveDisplayState::new(
|
||||
&ignore_types,
|
||||
types_crate,
|
||||
&ignore_functions,
|
||||
&count_selectors,
|
||||
);
|
||||
let mut items = module
|
||||
.items
|
||||
.iter()
|
||||
.filter_map(|i| cuda_derive_display_trait_for_item(&mut derive_state, i))
|
||||
.collect::<Vec<_>>();
|
||||
items.push(curesult_display_trait(&derive_state));
|
||||
let mut output = output.clone();
|
||||
output.extend(path);
|
||||
write_rust_to_file(
|
||||
output,
|
||||
&prettyplease::unparse(&syn::File {
|
||||
shebang: None,
|
||||
attrs: Vec::new(),
|
||||
items,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
struct DeriveDisplayState<'a> {
|
||||
types_crate: &'static str,
|
||||
ignore_types: FxHashSet<Ident>,
|
||||
ignore_fns: FxHashSet<Ident>,
|
||||
enums: FxHashMap<&'a Ident, Vec<&'a Ident>>,
|
||||
array_arguments: FxHashMap<(Ident, usize), usize>,
|
||||
result_variants: Vec<&'a ItemConst>,
|
||||
}
|
||||
|
||||
impl<'a> DeriveDisplayState<'a> {
|
||||
fn new(
|
||||
ignore_types: &[&'static str],
|
||||
types_crate: &'static str,
|
||||
ignore_fns: &[&'static str],
|
||||
count_selectors: &[(&'static str, usize, usize)],
|
||||
) -> Self {
|
||||
DeriveDisplayState {
|
||||
types_crate,
|
||||
ignore_types: ignore_types
|
||||
.into_iter()
|
||||
.map(|x| Ident::new(x, Span::call_site()))
|
||||
.collect(),
|
||||
ignore_fns: ignore_fns
|
||||
.into_iter()
|
||||
.map(|x| Ident::new(x, Span::call_site()))
|
||||
.collect(),
|
||||
array_arguments: count_selectors
|
||||
.into_iter()
|
||||
.map(|(name, val, count)| ((Ident::new(name, Span::call_site()), *val), *count))
|
||||
.collect(),
|
||||
enums: Default::default(),
|
||||
result_variants: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn record_enum_variant(&mut self, enum_: &'a Ident, variant: &'a Ident) {
|
||||
match self.enums.entry(enum_) {
|
||||
hash_map::Entry::Occupied(mut entry) => {
|
||||
entry.get_mut().push(variant);
|
||||
}
|
||||
hash_map::Entry::Vacant(entry) => {
|
||||
entry.insert(vec![variant]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn cuda_derive_display_trait_for_item<'a>(
|
||||
state: &mut DeriveDisplayState<'a>,
|
||||
item: &'a Item,
|
||||
) -> Option<syn::Item> {
|
||||
let path_prefix = Path::from(Ident::new(state.types_crate, Span::call_site()));
|
||||
let path_prefix_iter = iter::repeat(&path_prefix);
|
||||
match item {
|
||||
Item::Const(const_) => {
|
||||
if const_.ty.to_token_stream().to_string() == "cudaError_enum" {
|
||||
state.result_variants.push(const_);
|
||||
}
|
||||
None
|
||||
}
|
||||
Item::ForeignMod(ItemForeignMod { items, .. }) => match items.last().unwrap() {
|
||||
ForeignItem::Fn(ForeignItemFn {
|
||||
sig: Signature { ident, inputs, .. },
|
||||
..
|
||||
}) => {
|
||||
if state.ignore_fns.contains(ident) {
|
||||
return None;
|
||||
}
|
||||
let inputs = inputs
|
||||
.iter()
|
||||
.map(|fn_arg| {
|
||||
let mut fn_arg = fn_arg.clone();
|
||||
syn::visit_mut::visit_fn_arg_mut(&mut PrependCudaPath, &mut fn_arg);
|
||||
fn_arg
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let inputs_iter = inputs.iter();
|
||||
let original_fn_name = ident.to_string();
|
||||
let mut write_argument = inputs.iter().enumerate().map(|(index, fn_arg)| {
|
||||
let name = fn_arg_name(fn_arg);
|
||||
if let Some(length_index) = state.array_arguments.get(&(ident.clone(), index)) {
|
||||
let length = fn_arg_name(&inputs[*length_index]);
|
||||
quote! {
|
||||
writer.write_all(concat!(stringify!(#name), ": ").as_bytes())?;
|
||||
writer.write_all(b"[")?;
|
||||
for i in 0..#length {
|
||||
if i != 0 {
|
||||
writer.write_all(b", ")?;
|
||||
}
|
||||
crate::format::CudaDisplay::write(unsafe { &*#name.add(i as usize) }, #original_fn_name, arg_idx, writer)?;
|
||||
}
|
||||
writer.write_all(b"]")?;
|
||||
}
|
||||
} else {
|
||||
quote! {
|
||||
writer.write_all(concat!(stringify!(#name), ": ").as_bytes())?;
|
||||
crate::format::CudaDisplay::write(&#name, #original_fn_name, arg_idx, writer)?;
|
||||
}
|
||||
}
|
||||
});
|
||||
let fn_name = format_ident!("write_{}", ident);
|
||||
Some(match write_argument.next() {
|
||||
Some(first_write_argument) => parse_quote! {
|
||||
pub fn #fn_name(writer: &mut (impl std::io::Write + ?Sized), #(#inputs_iter,)*) -> std::io::Result<()> {
|
||||
let mut arg_idx = 0usize;
|
||||
writer.write_all(b"(")?;
|
||||
#first_write_argument
|
||||
#(
|
||||
arg_idx += 1;
|
||||
writer.write_all(b", ")?;
|
||||
#write_argument
|
||||
)*
|
||||
writer.write_all(b")")
|
||||
}
|
||||
},
|
||||
None => parse_quote! {
|
||||
pub fn #fn_name(writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
|
||||
writer.write_all(b"()")
|
||||
}
|
||||
},
|
||||
})
|
||||
}
|
||||
_ => unreachable!(),
|
||||
},
|
||||
Item::Impl(ref item_impl) => {
|
||||
let enum_ = match &*item_impl.self_ty {
|
||||
Type::Path(ref path) => &path.path.segments.last().unwrap().ident,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let variant_ = match item_impl.items.last().unwrap() {
|
||||
syn::ImplItem::Const(item_const) => &item_const.ident,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
state.record_enum_variant(enum_, variant_);
|
||||
None
|
||||
}
|
||||
Item::Struct(item_struct) => {
|
||||
if state.ignore_types.contains(&item_struct.ident) {
|
||||
return None;
|
||||
}
|
||||
if state.enums.contains_key(&item_struct.ident) {
|
||||
let enum_ = &item_struct.ident;
|
||||
let enum_iter = iter::repeat(&item_struct.ident);
|
||||
let variants = state.enums.get(&item_struct.ident).unwrap().iter();
|
||||
Some(parse_quote! {
|
||||
impl crate::format::CudaDisplay for #path_prefix :: #enum_ {
|
||||
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
|
||||
match self {
|
||||
#(& #path_prefix_iter :: #enum_iter :: #variants => writer.write_all(stringify!(#variants).as_bytes()),)*
|
||||
_ => write!(writer, "{}", self.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
} else {
|
||||
let struct_ = &item_struct.ident;
|
||||
match item_struct.fields {
|
||||
Fields::Named(ref fields) => {
|
||||
let mut rest_of_fields = fields.named.iter().filter_map(|f| {
|
||||
let f_ident = f.ident.as_ref().unwrap();
|
||||
let name = f_ident.to_string();
|
||||
if name.starts_with("reserved") || name == "_unused" {
|
||||
None
|
||||
} else {
|
||||
Some(f_ident)
|
||||
}
|
||||
});
|
||||
let first_field = match rest_of_fields.next() {
|
||||
Some(f) => f,
|
||||
None => return None,
|
||||
};
|
||||
Some(parse_quote! {
|
||||
impl crate::format::CudaDisplay for #path_prefix :: #struct_ {
|
||||
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
|
||||
writer.write_all(concat!("{ ", stringify!(#first_field), ": ").as_bytes())?;
|
||||
crate::format::CudaDisplay::write(&self.#first_field, "", 0, writer)?;
|
||||
#(
|
||||
writer.write_all(concat!(", ", stringify!(#rest_of_fields), ": ").as_bytes())?;
|
||||
crate::format::CudaDisplay::write(&self.#rest_of_fields, "", 0, writer)?;
|
||||
)*
|
||||
writer.write_all(b" }")
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
Fields::Unnamed(FieldsUnnamed { ref unnamed, .. }) if unnamed.len() == 1 => {
|
||||
Some(parse_quote! {
|
||||
impl crate::format::CudaDisplay for #path_prefix :: #struct_ {
|
||||
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
|
||||
write!(writer, "{:p}", self.0)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
}
|
||||
Item::Type(item_type) => {
|
||||
if state.ignore_types.contains(&item_type.ident) {
|
||||
return None;
|
||||
};
|
||||
match &*item_type.ty {
|
||||
Type::Ptr(_) => {
|
||||
let type_ = &item_type.ident;
|
||||
Some(parse_quote! {
|
||||
impl crate::format::CudaDisplay for #path_prefix :: #type_ {
|
||||
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
|
||||
write!(writer, "{:p}", *self)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
Type::Path(type_path) => {
|
||||
if type_path.path.leading_colon.is_some() {
|
||||
let option_seg = type_path.path.segments.last().unwrap();
|
||||
if option_seg.ident == "Option" {
|
||||
match &option_seg.arguments {
|
||||
PathArguments::AngleBracketed(generic) => match generic.args[0] {
|
||||
syn::GenericArgument::Type(Type::BareFn(_)) => {
|
||||
let type_ = &item_type.ident;
|
||||
return Some(parse_quote! {
|
||||
impl crate::format::CudaDisplay for #path_prefix :: #type_ {
|
||||
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
|
||||
write!(writer, "{:p}", unsafe { std::mem::transmute::<#path_prefix :: #type_, *mut ::std::ffi::c_void>(*self) })
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
_ => unreachable!(),
|
||||
},
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
Item::Union(_) => None,
|
||||
Item::Use(_) => None,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn fn_arg_name(fn_arg: &FnArg) -> &Box<syn::Pat> {
|
||||
let name = if let FnArg::Typed(t) = fn_arg {
|
||||
&t.pat
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
name
|
||||
}
|
||||
|
||||
fn curesult_display_trait(derive_state: &DeriveDisplayState) -> syn::Item {
|
||||
let errors = derive_state.result_variants.iter().filter_map(|const_| {
|
||||
let prefix = "cudaError_enum_";
|
||||
let text = &const_.ident.to_string()[prefix.len()..];
|
||||
if text == "CUDA_SUCCESS" {
|
||||
return None;
|
||||
}
|
||||
let expr = &const_.expr;
|
||||
Some(quote! {
|
||||
#expr => writer.write_all(#text.as_bytes()),
|
||||
})
|
||||
});
|
||||
parse_quote! {
|
||||
impl crate::format::CudaDisplay for cuda_types::CUresult {
|
||||
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
|
||||
match self {
|
||||
Ok(()) => writer.write_all(b"CUDA_SUCCESS"),
|
||||
Err(err) => {
|
||||
match err.0.get() {
|
||||
#(#errors)*
|
||||
err => write!(writer, "{}", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -2,7 +2,7 @@
|
|||
name = "zluda_dump"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
name = "zluda_dump"
|
||||
|
@ -10,6 +10,7 @@ crate-type = ["cdylib"]
|
|||
|
||||
[dependencies]
|
||||
ptx = { path = "../ptx" }
|
||||
ptx_parser = { path = "../ptx_parser" }
|
||||
lz4-sys = "1.9"
|
||||
regex = "1.4"
|
||||
dynasm = "1.2"
|
||||
|
|
|
@ -28,6 +28,7 @@ impl Hash for CUuuidWrapper {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
pub(crate) struct OriginalExports {
|
||||
original_get_module_from_cubin: Option<
|
||||
unsafe extern "system" fn(
|
||||
|
@ -356,6 +357,7 @@ unsafe fn record_submodules_from_fatbin(
|
|||
);
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
unsafe extern "system" fn get_module_from_cubin(
|
||||
module: *mut CUmodule,
|
||||
fatbinc_wrapper: *const FatbincWrapper,
|
||||
|
@ -388,6 +390,7 @@ unsafe extern "system" fn get_module_from_cubin(
|
|||
)
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
unsafe extern "system" fn get_module_from_cubin_ext1(
|
||||
module: *mut CUmodule,
|
||||
fatbinc_wrapper: *const FatbincWrapper,
|
||||
|
@ -451,6 +454,7 @@ unsafe extern "system" fn get_module_from_cubin_ext1(
|
|||
)
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
unsafe extern "system" fn get_module_from_cubin_ext2(
|
||||
fatbin_header: *const FatbinHeader,
|
||||
module: *mut CUmodule,
|
||||
|
@ -508,7 +512,7 @@ unsafe extern "system" fn get_module_from_cubin_ext2(
|
|||
.original_get_module_from_cubin_ext2
|
||||
.unwrap()(fatbin_header, module, ptr1, ptr2, _unknown);
|
||||
fn_logger.result = Some(result);
|
||||
if result != CUresult::CUDA_SUCCESS {
|
||||
if result.is_err() {
|
||||
return result;
|
||||
}
|
||||
record_submodules_from_fatbin(
|
||||
|
|
|
@ -1,11 +1,10 @@
|
|||
use cuda_types::{CUGLDeviceList, CUdevice};
|
||||
use std::{
|
||||
ffi::{c_void, CStr},
|
||||
fmt::LowerHex,
|
||||
mem, ptr, slice,
|
||||
};
|
||||
|
||||
use cuda_base::cuda_derive_display_trait;
|
||||
|
||||
pub(crate) trait CudaDisplay {
|
||||
fn write(
|
||||
&self,
|
||||
|
@ -27,28 +26,6 @@ impl CudaDisplay for cuda_types::CUuuid {
|
|||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUdevice {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
write!(writer, "{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUdeviceptr {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
write!(writer, "{:p}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUdeviceptr_v1 {
|
||||
fn write(
|
||||
&self,
|
||||
|
@ -494,6 +471,59 @@ impl CudaDisplay
|
|||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUgraphNodeParams_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
_writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUlaunchConfig_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
_writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUeglFrame_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
_writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUdevResource_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
_writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
impl CudaDisplay for cuda_types::CUlaunchAttribute_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
_writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
impl<T: CudaDisplay> CudaDisplay for *mut T {
|
||||
fn write(
|
||||
&self,
|
||||
|
@ -544,34 +574,26 @@ impl<T: CudaDisplay, const N: usize> CudaDisplay for [T; N] {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn write_cuStreamBatchMemOp(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
stream: cuda_types::CUstream,
|
||||
count: ::std::os::raw::c_uint,
|
||||
paramArray: *mut cuda_types::CUstreamBatchMemOpParams,
|
||||
flags: ::std::os::raw::c_uint,
|
||||
) -> std::io::Result<()> {
|
||||
writer.write_all(b"(stream: ")?;
|
||||
CudaDisplay::write(&stream, "cuStreamBatchMemOp", 0, writer)?;
|
||||
writer.write_all(b", ")?;
|
||||
writer.write_all(b"count: ")?;
|
||||
CudaDisplay::write(&count, "cuStreamBatchMemOp", 1, writer)?;
|
||||
writer.write_all(b", paramArray: [")?;
|
||||
for i in 0..count {
|
||||
if i != 0 {
|
||||
writer.write_all(b", ")?;
|
||||
}
|
||||
CudaDisplay::write(
|
||||
&unsafe { paramArray.add(i as usize) },
|
||||
"cuStreamBatchMemOp",
|
||||
2,
|
||||
writer,
|
||||
)?;
|
||||
impl CudaDisplay for cuda_types::CUarrayMapInfo_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
_writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUexecAffinityParam_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
_index: usize,
|
||||
_writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
todo!()
|
||||
}
|
||||
writer.write_all(b"], flags: ")?;
|
||||
CudaDisplay::write(&flags, "cuStreamBatchMemOp", 3, writer)?;
|
||||
writer.write_all(b") ")
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
|
@ -585,27 +607,7 @@ pub fn write_cuGraphKernelNodeGetAttribute(
|
|||
CudaDisplay::write(&hNode, "cuGraphKernelNodeGetAttribute", 0, writer)?;
|
||||
writer.write_all(b", attr: ")?;
|
||||
CudaDisplay::write(&attr, "cuGraphKernelNodeGetAttribute", 1, writer)?;
|
||||
match attr {
|
||||
cuda_types::CUkernelNodeAttrID::CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(
|
||||
unsafe { &(*value_out).accessPolicyWindow },
|
||||
"cuGraphKernelNodeGetAttribute",
|
||||
2,
|
||||
writer,
|
||||
)?;
|
||||
}
|
||||
cuda_types::CUkernelNodeAttrID::CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(
|
||||
unsafe { &(*value_out).cooperative },
|
||||
"cuGraphKernelNodeGetAttribute",
|
||||
2,
|
||||
writer,
|
||||
)?;
|
||||
}
|
||||
_ => return writer.write_all(b", ...) "),
|
||||
}
|
||||
write_launch_attribute(writer, "cuGraphKernelNodeGetAttribute", 2, attr, value_out)?;
|
||||
writer.write_all(b") ")
|
||||
}
|
||||
|
||||
|
@ -630,28 +632,73 @@ pub fn write_cuStreamGetAttribute(
|
|||
CudaDisplay::write(&hStream, "cuStreamGetAttribute", 0, writer)?;
|
||||
writer.write_all(b", attr: ")?;
|
||||
CudaDisplay::write(&attr, "cuStreamGetAttribute", 1, writer)?;
|
||||
match attr {
|
||||
cuda_types::CUstreamAttrID::CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW => {
|
||||
write_launch_attribute(writer, "cuStreamGetAttribute", 2, attr, value_out)?;
|
||||
writer.write_all(b") ")
|
||||
}
|
||||
|
||||
fn write_launch_attribute(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
fn_name: &'static str,
|
||||
index: usize,
|
||||
attribute: cuda_types::CUlaunchAttributeID,
|
||||
value_out: *mut cuda_types::CUstreamAttrValue,
|
||||
) -> std::io::Result<()> {
|
||||
match attribute {
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(
|
||||
unsafe { &(*value_out).accessPolicyWindow },
|
||||
"cuStreamGetAttribute",
|
||||
2,
|
||||
fn_name,
|
||||
index,
|
||||
writer,
|
||||
)?;
|
||||
)
|
||||
}
|
||||
cuda_types::CUstreamAttrID::CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY => {
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_COOPERATIVE => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(
|
||||
unsafe { &(*value_out).syncPolicy },
|
||||
"cuStreamGetAttribute",
|
||||
2,
|
||||
writer,
|
||||
)?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).cooperative }, fn_name, index, writer)
|
||||
}
|
||||
_ => return writer.write_all(b", ...) "),
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).syncPolicy }, fn_name, index, writer)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).clusterDim }, fn_name, index, writer)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).clusterSchedulingPolicyPreference }, fn_name, index, writer)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).programmaticStreamSerializationAllowed }, fn_name, index, writer)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).programmaticEvent }, fn_name, index, writer)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PRIORITY => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).priority }, fn_name, index, writer)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).memSyncDomainMap }, fn_name, index, writer)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).memSyncDomain }, fn_name, index, writer)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).launchCompletionEvent }, fn_name, index, writer)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).deviceUpdatableKernelNode }, fn_name, index, writer)
|
||||
}
|
||||
_ => writer.write_all(b", ... "),
|
||||
}
|
||||
writer.write_all(b") ")
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
|
@ -685,71 +732,27 @@ pub fn write_cuStreamSetAttribute_ptsz(
|
|||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn write_cuCtxCreate_v3(
|
||||
pub fn write_cuGLGetDevices(
|
||||
_writer: &mut (impl std::io::Write + ?Sized),
|
||||
_pctx: *mut cuda_types::CUcontext,
|
||||
_paramsArray: *mut cuda_types::CUexecAffinityParam,
|
||||
_numParams: ::std::os::raw::c_int,
|
||||
_flags: ::std::os::raw::c_uint,
|
||||
_dev: cuda_types::CUdevice,
|
||||
_pCudaDeviceCount: *mut ::std::os::raw::c_uint,
|
||||
_pCudaDevices: *mut CUdevice,
|
||||
_cudaDeviceCount: ::std::os::raw::c_uint,
|
||||
_deviceList: CUGLDeviceList,
|
||||
) -> std::io::Result<()> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn write_cuCtxGetExecAffinity(
|
||||
pub fn write_cuGLGetDevices_v2(
|
||||
_writer: &mut (impl std::io::Write + ?Sized),
|
||||
_pExecAffinity: *mut cuda_types::CUexecAffinityParam,
|
||||
_type_: cuda_types::CUexecAffinityType,
|
||||
_pCudaDeviceCount: *mut ::std::os::raw::c_uint,
|
||||
_pCudaDevices: *mut CUdevice,
|
||||
_cudaDeviceCount: ::std::os::raw::c_uint,
|
||||
_deviceList: CUGLDeviceList,
|
||||
) -> std::io::Result<()> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn write_cuMemMapArrayAsync(
|
||||
_writer: &mut (impl std::io::Write + ?Sized),
|
||||
_mapInfoList: *mut cuda_types::CUarrayMapInfo,
|
||||
_count: ::std::os::raw::c_uint,
|
||||
_hStream: cuda_types::CUstream,
|
||||
) -> std::io::Result<()> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn write_cuMemMapArrayAsync_ptsz(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
mapInfoList: *mut cuda_types::CUarrayMapInfo,
|
||||
count: ::std::os::raw::c_uint,
|
||||
hStream: cuda_types::CUstream,
|
||||
) -> std::io::Result<()> {
|
||||
write_cuMemMapArrayAsync(writer, mapInfoList, count, hStream)
|
||||
}
|
||||
|
||||
cuda_derive_display_trait!(
|
||||
cuda_types,
|
||||
CudaDisplay,
|
||||
[
|
||||
CUarrayMapInfo_st,
|
||||
CUDA_RESOURCE_DESC_st,
|
||||
CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st,
|
||||
CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st,
|
||||
CUexecAffinityParam_st,
|
||||
CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st,
|
||||
CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st,
|
||||
CUuuid_st,
|
||||
HGPUNV
|
||||
],
|
||||
[
|
||||
cuCtxCreate_v3,
|
||||
cuCtxGetExecAffinity,
|
||||
cuGraphKernelNodeGetAttribute,
|
||||
cuGraphKernelNodeSetAttribute,
|
||||
cuMemMapArrayAsync,
|
||||
cuMemMapArrayAsync_ptsz,
|
||||
cuStreamBatchMemOp,
|
||||
cuStreamGetAttribute,
|
||||
cuStreamGetAttribute_ptsz,
|
||||
cuStreamSetAttribute,
|
||||
cuStreamSetAttribute_ptsz
|
||||
]
|
||||
);
|
||||
#[path = "format_generated.rs"]
|
||||
mod format_generated;
|
||||
pub(crate) use format_generated::*;
|
||||
|
|
24916
zluda_dump/src/format_generated.rs
Normal file
24916
zluda_dump/src/format_generated.rs
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,22 +1,18 @@
|
|||
use cuda_types::{
|
||||
CUdevice, CUdevice_attribute, CUfunction, CUjit_option, CUmodule, CUresult, CUuuid,
|
||||
};
|
||||
use cuda_types::*;
|
||||
use paste::paste;
|
||||
use side_by_side::CudaDynamicFns;
|
||||
use std::io;
|
||||
use std::{
|
||||
collections::HashMap, env, error::Error, ffi::c_void, fs, path::PathBuf, ptr::NonNull, rc::Rc,
|
||||
sync::Mutex,
|
||||
};
|
||||
use std::{collections::HashMap, env, error::Error, fs, path::PathBuf, rc::Rc, sync::Mutex};
|
||||
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
extern crate cuda_types;
|
||||
|
||||
macro_rules! extern_redirect {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path);*) => {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
|
||||
$(
|
||||
#[no_mangle]
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
pub extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||
let original_fn = |dynamic_fns: &mut crate::side_by_side::CudaDynamicFns| {
|
||||
dynamic_fns.$fn_name($( $arg_id ),*)
|
||||
|
@ -34,10 +30,11 @@ macro_rules! extern_redirect {
|
|||
}
|
||||
|
||||
macro_rules! extern_redirect_with_post {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path);*) => {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
|
||||
$(
|
||||
#[no_mangle]
|
||||
pub extern "system" fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
pub extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||
let original_fn = |dynamic_fns: &mut crate::side_by_side::CudaDynamicFns| {
|
||||
dynamic_fns.$fn_name($( $arg_id ),*)
|
||||
};
|
||||
|
@ -60,10 +57,8 @@ macro_rules! extern_redirect_with_post {
|
|||
|
||||
use cuda_base::cuda_function_declarations;
|
||||
cuda_function_declarations!(
|
||||
cuda_types,
|
||||
extern_redirect,
|
||||
extern_redirect_with_post,
|
||||
[
|
||||
extern_redirect_with_post <= [
|
||||
cuModuleLoad,
|
||||
cuModuleLoadData,
|
||||
cuModuleLoadDataEx,
|
||||
|
@ -205,10 +200,10 @@ impl Settings {
|
|||
}
|
||||
};
|
||||
let libcuda_path = match env::var("ZLUDA_CUDA_LIB") {
|
||||
Err(env::VarError::NotPresent) => os::LIBCUDA_DEFAULT_PATH.to_owned(),
|
||||
Err(env::VarError::NotPresent) => os::LIBCUDA_DEFAULT_PATH.to_string(),
|
||||
Err(e) => {
|
||||
logger.log(log::LogEntry::ErrorBox(Box::new(e) as _));
|
||||
os::LIBCUDA_DEFAULT_PATH.to_owned()
|
||||
os::LIBCUDA_DEFAULT_PATH.to_string()
|
||||
}
|
||||
Ok(env_string) => env_string,
|
||||
};
|
||||
|
@ -302,7 +297,7 @@ where
|
|||
// alternatively we could return a CUDA error, but I think it's fine to
|
||||
// crash. This is a diagnostic utility, if the lock was poisoned we can't
|
||||
// extract any useful trace or logging anyway
|
||||
let mut global_state = &mut *global_state_mutex.lock().unwrap();
|
||||
let global_state = &mut *global_state_mutex.lock().unwrap();
|
||||
let (mut logger, delayed_state) = match global_state.delayed_state {
|
||||
LateInit::Success(ref mut delayed_state) => (
|
||||
global_state.log_factory.get_logger(func, arguments_writer),
|
||||
|
@ -325,7 +320,7 @@ where
|
|||
logger.log(log::LogEntry::ErrorBox(
|
||||
format!("No function {} in the underlying CUDA library", func).into(),
|
||||
));
|
||||
CUresult::CUDA_ERROR_UNKNOWN
|
||||
CUresult::ERROR_UNKNOWN
|
||||
}
|
||||
};
|
||||
logger.result = maybe_cu_result;
|
||||
|
@ -359,7 +354,7 @@ pub(crate) fn cuModuleLoad_Post(
|
|||
state: &mut trace::StateTracker,
|
||||
result: CUresult,
|
||||
) {
|
||||
if result != CUresult::CUDA_SUCCESS {
|
||||
if result.is_err() {
|
||||
return;
|
||||
}
|
||||
state.record_new_module_file(unsafe { *module }, fname, fn_logger)
|
||||
|
@ -373,7 +368,7 @@ pub(crate) fn cuModuleLoadData_Post(
|
|||
state: &mut trace::StateTracker,
|
||||
result: CUresult,
|
||||
) {
|
||||
if result != CUresult::CUDA_SUCCESS {
|
||||
if result.is_err() {
|
||||
return;
|
||||
}
|
||||
state.record_new_module(unsafe { *module }, raw_image, fn_logger)
|
||||
|
@ -401,7 +396,7 @@ pub(crate) fn cuGetExportTable_Post(
|
|||
state: &mut trace::StateTracker,
|
||||
result: CUresult,
|
||||
) {
|
||||
if result != CUresult::CUDA_SUCCESS {
|
||||
if result.is_err() {
|
||||
return;
|
||||
}
|
||||
dark_api::override_export_table(ppExportTable, pExportTableId, state)
|
||||
|
@ -451,7 +446,7 @@ pub(crate) fn cuModuleLoadFatBinary_Post(
|
|||
_state: &mut trace::StateTracker,
|
||||
result: CUresult,
|
||||
) {
|
||||
if result == CUresult::CUDA_SUCCESS {
|
||||
if result.is_ok() {
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
use crate::cuda::CUuuid;
|
||||
use cuda_types::CUuuid;
|
||||
use std::ffi::{c_void, CStr, CString};
|
||||
use std::mem;
|
||||
|
||||
pub(crate) const LIBCUDA_DEFAULT_PATH: &'static str = b"/usr/lib/x86_64-linux-gnu/libcuda.so.1\0";
|
||||
pub(crate) const LIBCUDA_DEFAULT_PATH: &str = "/usr/lib/x86_64-linux-gnu/libcuda.so.1";
|
||||
|
||||
pub unsafe fn load_library(libcuda_path: &str) -> *mut c_void {
|
||||
let libcuda_path = CString::new(libcuda_path).unwrap();
|
||||
|
|
|
@ -56,8 +56,10 @@ impl CudaDynamicFns {
|
|||
}
|
||||
|
||||
macro_rules! emit_cuda_fn_table {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path);*) => {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
|
||||
#[derive(Default)]
|
||||
#[allow(improper_ctypes)]
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
struct CudaFnTable {
|
||||
$($fn_name: DynamicFn<extern $abi fn ( $($arg_id : $arg_type),* ) -> $ret_type>),*
|
||||
}
|
||||
|
@ -74,4 +76,4 @@ macro_rules! emit_cuda_fn_table {
|
|||
};
|
||||
}
|
||||
|
||||
cuda_function_declarations!(cuda_types, emit_cuda_fn_table, emit_cuda_fn_table, []);
|
||||
cuda_function_declarations!(emit_cuda_fn_table);
|
||||
|
|
|
@ -1,6 +1,3 @@
|
|||
use ptx::{ast::PtxError, Token};
|
||||
use ptx::{DisplayParseError, ModuleParserExt};
|
||||
|
||||
use crate::{dark_api, log, Settings};
|
||||
use cuda_types::CUmodule;
|
||||
use std::{
|
||||
|
@ -172,7 +169,7 @@ impl StateTracker {
|
|||
submodule_index: Option<usize>,
|
||||
module_text: &str,
|
||||
) {
|
||||
let (_ast, errors) = ptx::ModuleParser::parse_unchecked(module_text);
|
||||
let errors = ptx_parser::parse_for_errors(module_text);
|
||||
if !errors.is_empty() {
|
||||
fn_logger.log(log::LogEntry::ModuleParsingError(
|
||||
DumpWriter::get_file_name(module_index, version, submodule_index, "log"),
|
||||
|
@ -232,7 +229,7 @@ impl DumpWriter {
|
|||
module_index: usize,
|
||||
version: Option<usize>,
|
||||
submodule_index: Option<usize>,
|
||||
errors: &[ptx::ParseError<usize, Token<'input>, PtxError>],
|
||||
errors: &[ptx_parser::PtxError<'input>],
|
||||
) -> io::Result<()> {
|
||||
let mut log_file = match &self.dump_dir {
|
||||
None => return Ok(()),
|
||||
|
@ -246,8 +243,7 @@ impl DumpWriter {
|
|||
));
|
||||
let mut file = File::create(log_file)?;
|
||||
for error in errors {
|
||||
let pretty_print_error = unsafe { DisplayParseError::new(error, module_text) };
|
||||
writeln!(file, "{}", pretty_print_error)?;
|
||||
writeln!(file, "{}", error)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -1,12 +0,0 @@
|
|||
[package]
|
||||
name = "zluda_lib"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2018"
|
||||
|
||||
[lib]
|
||||
name = "nvcuda"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
zluda = { path = "../zluda" }
|
|
@ -1 +0,0 @@
|
|||
This project exist solely as a workaround, to make sure that ZLUDA-created CUDA driver does not clash with real CUDA driver when running unit tests
|
|
@ -1,11 +0,0 @@
|
|||
pub extern crate zluda;
|
||||
|
||||
pub use zluda::cuda::*;
|
||||
|
||||
// For some reason, on Linux linker strips out all our re-exports,
|
||||
// there's probably a cleaner solution, but for now just exporting
|
||||
// the function below stops it from doing so
|
||||
#[no_mangle]
|
||||
fn _zluda_very_bad_linker_hack() {
|
||||
cuInit(0);
|
||||
}
|
|
@ -2,12 +2,8 @@
|
|||
name = "zluda_ml"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
name = "nvml"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies.ocl-core]
|
||||
version = "0.11"
|
||||
features = ["opencl_version_1_2", "opencl_version_2_0", "opencl_version_2_1"]
|
|
@ -1,23 +1,5 @@
|
|||
use std::{
|
||||
os::raw::{c_char, c_uint},
|
||||
ptr,
|
||||
};
|
||||
|
||||
use crate::nvml::nvmlReturn_t;
|
||||
|
||||
const VERSION: &'static [u8] = b"418.40.04";
|
||||
|
||||
macro_rules! stringify_nmvlreturn_t {
|
||||
($x:ident => [ $($variant:ident),+ ]) => {
|
||||
match $x {
|
||||
$(
|
||||
nvmlReturn_t::$variant => Some(concat!(stringify!($variant), "\0")),
|
||||
)+
|
||||
_ => None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
pub(crate) fn unimplemented() -> nvmlReturn_t {
|
||||
unimplemented!()
|
||||
|
@ -27,119 +9,3 @@ pub(crate) fn unimplemented() -> nvmlReturn_t {
|
|||
pub(crate) fn unimplemented() -> nvmlReturn_t {
|
||||
nvmlReturn_t::NVML_ERROR_NOT_SUPPORTED
|
||||
}
|
||||
|
||||
pub(crate) fn error_string(result: nvmlReturn_t) -> *const ::std::os::raw::c_char {
|
||||
let text = stringify_nmvlreturn_t!(
|
||||
result => [
|
||||
NVML_SUCCESS,
|
||||
NVML_ERROR_UNINITIALIZED,
|
||||
NVML_ERROR_INVALID_ARGUMENT,
|
||||
NVML_ERROR_NOT_SUPPORTED,
|
||||
NVML_ERROR_NO_PERMISSION,
|
||||
NVML_ERROR_ALREADY_INITIALIZED,
|
||||
NVML_ERROR_NOT_FOUND,
|
||||
NVML_ERROR_INSUFFICIENT_SIZE,
|
||||
NVML_ERROR_INSUFFICIENT_POWER,
|
||||
NVML_ERROR_DRIVER_NOT_LOADED,
|
||||
NVML_ERROR_TIMEOUT,
|
||||
NVML_ERROR_IRQ_ISSUE,
|
||||
NVML_ERROR_LIBRARY_NOT_FOUND,
|
||||
NVML_ERROR_FUNCTION_NOT_FOUND,
|
||||
NVML_ERROR_CORRUPTED_INFOROM,
|
||||
NVML_ERROR_GPU_IS_LOST,
|
||||
NVML_ERROR_RESET_REQUIRED,
|
||||
NVML_ERROR_OPERATING_SYSTEM,
|
||||
NVML_ERROR_LIB_RM_VERSION_MISMATCH,
|
||||
NVML_ERROR_IN_USE,
|
||||
NVML_ERROR_MEMORY,
|
||||
NVML_ERROR_NO_DATA,
|
||||
NVML_ERROR_VGPU_ECC_NOT_SUPPORTED,
|
||||
NVML_ERROR_INSUFFICIENT_RESOURCES,
|
||||
NVML_ERROR_UNKNOWN
|
||||
]
|
||||
);
|
||||
match text {
|
||||
Some(text) => text.as_ptr() as *const _,
|
||||
None => ptr::null(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn shutdown() -> nvmlReturn_t {
|
||||
nvmlReturn_t::NVML_SUCCESS
|
||||
}
|
||||
|
||||
static mut DEVICE: Option<ocl_core::DeviceId> = None;
|
||||
|
||||
pub(crate) fn init() -> Result<(), nvmlReturn_t> {
|
||||
let platforms = ocl_core::get_platform_ids()?;
|
||||
let device = platforms.iter().find_map(|plat| {
|
||||
let devices = ocl_core::get_device_ids(plat, Some(ocl_core::DeviceType::GPU), None).ok()?;
|
||||
for dev in devices {
|
||||
let vendor = ocl_core::get_device_info(dev, ocl_core::DeviceInfo::VendorId).ok()?;
|
||||
match vendor {
|
||||
ocl_core::DeviceInfoResult::VendorId(0x8086)
|
||||
| ocl_core::DeviceInfoResult::VendorId(0x1002) => {}
|
||||
_ => continue,
|
||||
};
|
||||
let dev_type = ocl_core::get_device_info(dev, ocl_core::DeviceInfo::Type).ok()?;
|
||||
if let ocl_core::DeviceInfoResult::Type(ocl_core::DeviceType::GPU) = dev_type {
|
||||
return Some(dev);
|
||||
}
|
||||
}
|
||||
None
|
||||
});
|
||||
unsafe { DEVICE = device };
|
||||
if device.is_some() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(nvmlReturn_t::NVML_ERROR_UNKNOWN)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn init_with_flags() -> Result<(), nvmlReturn_t> {
|
||||
init()
|
||||
}
|
||||
|
||||
impl From<ocl_core::Error> for nvmlReturn_t {
|
||||
fn from(_: ocl_core::Error) -> Self {
|
||||
nvmlReturn_t::NVML_ERROR_UNKNOWN
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Result<(), nvmlReturn_t>> for nvmlReturn_t {
|
||||
fn from(result: Result<(), nvmlReturn_t>) -> Self {
|
||||
match result {
|
||||
Ok(()) => nvmlReturn_t::NVML_SUCCESS,
|
||||
Err(e) => e,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct CountingWriter<T: std::io::Write> {
|
||||
pub base: T,
|
||||
pub len: usize,
|
||||
}
|
||||
|
||||
impl<T: std::io::Write> std::io::Write for CountingWriter<T> {
|
||||
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||
self.len += buf.len();
|
||||
self.base.write(buf)
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> std::io::Result<()> {
|
||||
self.base.flush()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn system_get_driver_version(
|
||||
version_ptr: *mut c_char,
|
||||
length: c_uint,
|
||||
) -> Result<(), nvmlReturn_t> {
|
||||
if version_ptr == ptr::null_mut() || length == 0 {
|
||||
return Err(nvmlReturn_t::NVML_ERROR_INVALID_ARGUMENT);
|
||||
}
|
||||
let strlen = usize::min(VERSION.len(), (length as usize) - 1);
|
||||
std::ptr::copy_nonoverlapping(VERSION.as_ptr(), version_ptr as _, strlen);
|
||||
*version_ptr.add(strlen) = 0;
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -1131,27 +1131,27 @@ pub use self::nvmlPcieLinkState_enum as nvmlPcieLinkState_t;
|
|||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn nvmlInit_v2() -> nvmlReturn_t {
|
||||
crate::r#impl::init().into()
|
||||
crate::r#impl::unimplemented()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn nvmlInit() -> nvmlReturn_t {
|
||||
crate::r#impl::init().into()
|
||||
crate::r#impl::unimplemented()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn nvmlInitWithFlags(flags: ::std::os::raw::c_uint) -> nvmlReturn_t {
|
||||
crate::r#impl::init_with_flags().into()
|
||||
crate::r#impl::unimplemented()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn nvmlShutdown() -> nvmlReturn_t {
|
||||
crate::r#impl::shutdown()
|
||||
crate::r#impl::unimplemented()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn nvmlErrorString(result: nvmlReturn_t) -> *const ::std::os::raw::c_char {
|
||||
crate::r#impl::error_string(result)
|
||||
c"".as_ptr()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
@ -1159,7 +1159,7 @@ pub unsafe extern "C" fn nvmlSystemGetDriverVersion(
|
|||
version: *mut ::std::os::raw::c_char,
|
||||
length: ::std::os::raw::c_uint,
|
||||
) -> nvmlReturn_t {
|
||||
crate::r#impl::system_get_driver_version(version, length).into()
|
||||
crate::r#impl::unimplemented()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
|
Loading…
Add table
Reference in a new issue