mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-09-27 19:59:08 +00:00
Merge commit '2b9c8946ec
' into demo_mode2
This commit is contained in:
commit
d35e7b29d6
26 changed files with 577 additions and 215 deletions
11
Cargo.lock
generated
11
Cargo.lock
generated
|
@ -3826,6 +3826,16 @@ dependencies = [
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zluda_replay"
|
||||||
|
version = "0.0.0"
|
||||||
|
dependencies = [
|
||||||
|
"cuda_macros",
|
||||||
|
"cuda_types",
|
||||||
|
"libloading",
|
||||||
|
"zluda_trace_common",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zluda_sparse"
|
name = "zluda_sparse"
|
||||||
version = "0.0.0"
|
version = "0.0.0"
|
||||||
|
@ -3903,6 +3913,7 @@ dependencies = [
|
||||||
"format",
|
"format",
|
||||||
"libc",
|
"libc",
|
||||||
"libloading",
|
"libloading",
|
||||||
|
"rustc-hash 2.0.0",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"tar",
|
"tar",
|
||||||
|
|
|
@ -37,6 +37,7 @@ members = [
|
||||||
"zluda_inject",
|
"zluda_inject",
|
||||||
"zluda_ld",
|
"zluda_ld",
|
||||||
"zluda_ml",
|
"zluda_ml",
|
||||||
|
"zluda_replay",
|
||||||
"zluda_redirect",
|
"zluda_redirect",
|
||||||
"zluda_sparse",
|
"zluda_sparse",
|
||||||
"compiler",
|
"compiler",
|
||||||
|
|
|
@ -29,22 +29,24 @@ fn run_method<'input>(
|
||||||
let mut remap_returns = Vec::new();
|
let mut remap_returns = Vec::new();
|
||||||
if !method.is_kernel {
|
if !method.is_kernel {
|
||||||
for arg in method.return_arguments.iter_mut() {
|
for arg in method.return_arguments.iter_mut() {
|
||||||
match arg.state_space {
|
match arg.info.state_space {
|
||||||
ptx_parser::StateSpace::Param => {
|
ptx_parser::StateSpace::Param => {
|
||||||
arg.state_space = ptx_parser::StateSpace::Reg;
|
arg.info.state_space = ptx_parser::StateSpace::Reg;
|
||||||
let old_name = arg.name;
|
let old_name = arg.name;
|
||||||
arg.name =
|
arg.name = resolver
|
||||||
resolver.register_unnamed(Some((arg.v_type.clone(), arg.state_space)));
|
.register_unnamed(Some((arg.info.v_type.clone(), arg.info.state_space)));
|
||||||
if is_declaration {
|
if is_declaration {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
remap_returns.push((old_name, arg.name, arg.v_type.clone()));
|
remap_returns.push((old_name, arg.name, arg.info.v_type.clone()));
|
||||||
body.push(Statement::Variable(ast::Variable {
|
body.push(Statement::Variable(ast::Variable {
|
||||||
align: None,
|
info: ast::VariableInfo {
|
||||||
|
align: None,
|
||||||
|
v_type: arg.info.v_type.clone(),
|
||||||
|
state_space: ptx_parser::StateSpace::Param,
|
||||||
|
array_init: Vec::new(),
|
||||||
|
},
|
||||||
name: old_name,
|
name: old_name,
|
||||||
v_type: arg.v_type.clone(),
|
|
||||||
state_space: ptx_parser::StateSpace::Param,
|
|
||||||
array_init: Vec::new(),
|
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
ptx_parser::StateSpace::Reg => {}
|
ptx_parser::StateSpace::Reg => {}
|
||||||
|
@ -52,28 +54,30 @@ fn run_method<'input>(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for arg in method.input_arguments.iter_mut() {
|
for arg in method.input_arguments.iter_mut() {
|
||||||
match arg.state_space {
|
match arg.info.state_space {
|
||||||
ptx_parser::StateSpace::Param => {
|
ptx_parser::StateSpace::Param => {
|
||||||
arg.state_space = ptx_parser::StateSpace::Reg;
|
arg.info.state_space = ptx_parser::StateSpace::Reg;
|
||||||
let old_name = arg.name;
|
let old_name = arg.name;
|
||||||
arg.name =
|
arg.name = resolver
|
||||||
resolver.register_unnamed(Some((arg.v_type.clone(), arg.state_space)));
|
.register_unnamed(Some((arg.info.v_type.clone(), arg.info.state_space)));
|
||||||
if is_declaration {
|
if is_declaration {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
body.push(Statement::Variable(ast::Variable {
|
body.push(Statement::Variable(ast::Variable {
|
||||||
align: None,
|
info: ast::VariableInfo {
|
||||||
|
align: None,
|
||||||
|
v_type: arg.info.v_type.clone(),
|
||||||
|
state_space: ptx_parser::StateSpace::Param,
|
||||||
|
array_init: Vec::new(),
|
||||||
|
},
|
||||||
name: old_name,
|
name: old_name,
|
||||||
v_type: arg.v_type.clone(),
|
|
||||||
state_space: ptx_parser::StateSpace::Param,
|
|
||||||
array_init: Vec::new(),
|
|
||||||
}));
|
}));
|
||||||
body.push(Statement::Instruction(ast::Instruction::St {
|
body.push(Statement::Instruction(ast::Instruction::St {
|
||||||
data: ast::StData {
|
data: ast::StData {
|
||||||
qualifier: ast::LdStQualifier::Weak,
|
qualifier: ast::LdStQualifier::Weak,
|
||||||
state_space: ast::StateSpace::Param,
|
state_space: ast::StateSpace::Param,
|
||||||
caching: ast::StCacheOperator::Writethrough,
|
caching: ast::StCacheOperator::Writethrough,
|
||||||
typ: arg.v_type.clone(),
|
typ: arg.info.v_type.clone(),
|
||||||
},
|
},
|
||||||
arguments: ast::StArgs {
|
arguments: ast::StArgs {
|
||||||
src1: old_name,
|
src1: old_name,
|
||||||
|
|
|
@ -30,11 +30,19 @@ fn run_function<'input>(
|
||||||
statements
|
statements
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|statement| match statement {
|
.filter_map(|statement| match statement {
|
||||||
Statement::Variable(var @ ast::Variable {
|
Statement::Variable(
|
||||||
state_space:
|
var @ ast::Variable {
|
||||||
ast::StateSpace::Global | ast::StateSpace::Const | ast::StateSpace::Shared,
|
info:
|
||||||
..
|
ast::VariableInfo {
|
||||||
}) => {
|
state_space:
|
||||||
|
ast::StateSpace::Global
|
||||||
|
| ast::StateSpace::Const
|
||||||
|
| ast::StateSpace::Shared,
|
||||||
|
..
|
||||||
|
},
|
||||||
|
..
|
||||||
|
},
|
||||||
|
) => {
|
||||||
result.push(Directive2::Variable(ast::LinkingDirective::NONE, var));
|
result.push(Directive2::Variable(ast::LinkingDirective::NONE, var));
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,14 +40,14 @@ fn run_method<'a, 'input>(
|
||||||
if is_kernel {
|
if is_kernel {
|
||||||
for arg in method.input_arguments.iter_mut() {
|
for arg in method.input_arguments.iter_mut() {
|
||||||
let old_name = arg.name;
|
let old_name = arg.name;
|
||||||
let old_space = arg.state_space;
|
let old_space = arg.info.state_space;
|
||||||
let new_space = ast::StateSpace::ParamEntry;
|
let new_space = ast::StateSpace::ParamEntry;
|
||||||
let new_name = visitor
|
let new_name = visitor
|
||||||
.resolver
|
.resolver
|
||||||
.register_unnamed(Some((arg.v_type.clone(), new_space)));
|
.register_unnamed(Some((arg.info.v_type.clone(), new_space)));
|
||||||
visitor.input_argument(old_name, new_name, old_space)?;
|
visitor.input_argument(old_name, new_name, old_space)?;
|
||||||
arg.name = new_name;
|
arg.name = new_name;
|
||||||
arg.state_space = new_space;
|
arg.info.state_space = new_space;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
for arg in method.return_arguments.iter_mut() {
|
for arg in method.return_arguments.iter_mut() {
|
||||||
|
@ -83,10 +83,10 @@ fn run_statement<'a, 'input>(
|
||||||
return_arguments
|
return_arguments
|
||||||
.iter()
|
.iter()
|
||||||
.map(|arg| {
|
.map(|arg| {
|
||||||
if arg.state_space != ast::StateSpace::Local {
|
if arg.info.state_space != ast::StateSpace::Local {
|
||||||
return Err(error_unreachable());
|
return Err(error_unreachable());
|
||||||
}
|
}
|
||||||
Ok((arg.name, arg.v_type.clone()))
|
Ok((arg.name, arg.info.v_type.clone()))
|
||||||
})
|
})
|
||||||
.collect::<Result<Vec<_>, _>>()?,
|
.collect::<Result<Vec<_>, _>>()?,
|
||||||
)
|
)
|
||||||
|
@ -332,7 +332,7 @@ impl<'a, 'input> InsertMemSSAVisitor<'a, 'input> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn visit_variable(&mut self, var: &mut ast::Variable<SpirvWord>) -> Result<(), TranslateError> {
|
fn visit_variable(&mut self, var: &mut ast::Variable<SpirvWord>) -> Result<(), TranslateError> {
|
||||||
let old_space = match var.state_space {
|
let old_space = match var.info.state_space {
|
||||||
space @ (ptx_parser::StateSpace::Reg | ptx_parser::StateSpace::Param) => space,
|
space @ (ptx_parser::StateSpace::Reg | ptx_parser::StateSpace::Param) => space,
|
||||||
// Do nothing
|
// Do nothing
|
||||||
ptx_parser::StateSpace::Local => return Ok(()),
|
ptx_parser::StateSpace::Local => return Ok(()),
|
||||||
|
@ -350,10 +350,10 @@ impl<'a, 'input> InsertMemSSAVisitor<'a, 'input> {
|
||||||
let new_space = ast::StateSpace::Local;
|
let new_space = ast::StateSpace::Local;
|
||||||
let new_name = self
|
let new_name = self
|
||||||
.resolver
|
.resolver
|
||||||
.register_unnamed(Some((var.v_type.clone(), new_space)));
|
.register_unnamed(Some((var.info.v_type.clone(), new_space)));
|
||||||
self.variable(&var.v_type, old_name, new_name, old_space)?;
|
self.variable(&var.info.v_type, old_name, new_name, old_space)?;
|
||||||
var.name = new_name;
|
var.name = new_name;
|
||||||
var.state_space = new_space;
|
var.info.state_space = new_space;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -195,7 +195,7 @@ fn compile_methods(ptx: &str) -> Vec<Function2<ast::Instruction<SpirvWord>, Spir
|
||||||
let module = ptx_parser::parse_module_checked(ptx).unwrap();
|
let module = ptx_parser::parse_module_checked(ptx).unwrap();
|
||||||
let mut flat_resolver = GlobalStringIdentResolver2::new(SpirvWord(1));
|
let mut flat_resolver = GlobalStringIdentResolver2::new(SpirvWord(1));
|
||||||
let mut scoped_resolver = ScopedResolver::new(&mut flat_resolver);
|
let mut scoped_resolver = ScopedResolver::new(&mut flat_resolver);
|
||||||
let directives = normalize_identifiers2::run(&mut scoped_resolver, module.directives).unwrap();
|
let directives = normalize_identifiers::run(&mut scoped_resolver, module.directives).unwrap();
|
||||||
let directives = normalize_predicates2::run(&mut flat_resolver, directives).unwrap();
|
let directives = normalize_predicates2::run(&mut flat_resolver, directives).unwrap();
|
||||||
let directives = expand_operands::run(&mut flat_resolver, directives).unwrap();
|
let directives = expand_operands::run(&mut flat_resolver, directives).unwrap();
|
||||||
let directives = normalize_basic_blocks::run(&mut flat_resolver, directives).unwrap();
|
let directives = normalize_basic_blocks::run(&mut flat_resolver, directives).unwrap();
|
||||||
|
|
|
@ -122,11 +122,10 @@ impl<'a, 'input> ModuleEmitContext<'a, 'input> {
|
||||||
if fn_ == ptr::null_mut() {
|
if fn_ == ptr::null_mut() {
|
||||||
let fn_type = get_function_type(
|
let fn_type = get_function_type(
|
||||||
self.context,
|
self.context,
|
||||||
method.return_arguments.iter().map(|v| &v.v_type),
|
method.return_arguments.iter().map(|v| &v.info.v_type),
|
||||||
method
|
method.input_arguments.iter().map(|v| {
|
||||||
.input_arguments
|
get_input_argument_type(self.context, &v.info.v_type, v.info.state_space)
|
||||||
.iter()
|
}),
|
||||||
.map(|v| get_input_argument_type(self.context, &v.v_type, v.state_space)),
|
|
||||||
)?;
|
)?;
|
||||||
fn_ = unsafe { LLVMAddFunction(self.module, name.as_ptr(), fn_type) };
|
fn_ = unsafe { LLVMAddFunction(self.module, name.as_ptr(), fn_type) };
|
||||||
self.emit_fn_attribute(fn_, "amdgpu-unsafe-fp-atomics", "true");
|
self.emit_fn_attribute(fn_, "amdgpu-unsafe-fp-atomics", "true");
|
||||||
|
@ -153,7 +152,7 @@ impl<'a, 'input> ModuleEmitContext<'a, 'input> {
|
||||||
for (i, param) in method.input_arguments.iter().enumerate() {
|
for (i, param) in method.input_arguments.iter().enumerate() {
|
||||||
let value = unsafe { LLVMGetParam(fn_, i as u32) };
|
let value = unsafe { LLVMGetParam(fn_, i as u32) };
|
||||||
let name = self.resolver.get_or_add(param.name);
|
let name = self.resolver.get_or_add(param.name);
|
||||||
if let Some(align) = param.align {
|
if let Some(align) = param.info.align {
|
||||||
unsafe { LLVMSetParamAlignment(value, align) };
|
unsafe { LLVMSetParamAlignment(value, align) };
|
||||||
}
|
}
|
||||||
unsafe { LLVMSetValueName2(value, name.as_ptr().cast(), name.len()) };
|
unsafe { LLVMSetValueName2(value, name.as_ptr().cast(), name.len()) };
|
||||||
|
@ -166,7 +165,7 @@ impl<'a, 'input> ModuleEmitContext<'a, 'input> {
|
||||||
LLVMCreateTypeAttribute(
|
LLVMCreateTypeAttribute(
|
||||||
self.context,
|
self.context,
|
||||||
attr_kind,
|
attr_kind,
|
||||||
get_type(self.context, ¶m.v_type)?,
|
get_type(self.context, ¶m.info.v_type)?,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
unsafe { LLVMAddAttributeAtIndex(fn_, i as u32 + 1, attr) };
|
unsafe { LLVMAddAttributeAtIndex(fn_, i as u32 + 1, attr) };
|
||||||
|
@ -241,17 +240,17 @@ impl<'a, 'input> ModuleEmitContext<'a, 'input> {
|
||||||
let global = unsafe {
|
let global = unsafe {
|
||||||
LLVMAddGlobalInAddressSpace(
|
LLVMAddGlobalInAddressSpace(
|
||||||
self.module,
|
self.module,
|
||||||
get_type(self.context, &var.v_type)?,
|
get_type(self.context, &var.info.v_type)?,
|
||||||
name.as_ptr(),
|
name.as_ptr(),
|
||||||
get_state_space(var.state_space)?,
|
get_state_space(var.info.state_space)?,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
self.resolver.register(var.name, global);
|
self.resolver.register(var.name, global);
|
||||||
if let Some(align) = var.align {
|
if let Some(align) = var.info.align {
|
||||||
unsafe { LLVMSetAlignment(global, align) };
|
unsafe { LLVMSetAlignment(global, align) };
|
||||||
}
|
}
|
||||||
if !var.array_init.is_empty() {
|
if !var.info.array_init.is_empty() {
|
||||||
let initializer = self.get_array_init(&var.v_type, &*var.array_init)?;
|
let initializer = self.get_array_init(&var.info.v_type, &*var.info.array_init)?;
|
||||||
unsafe { LLVMSetInitializer(global, initializer) };
|
unsafe { LLVMSetInitializer(global, initializer) };
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -422,16 +421,16 @@ impl<'a> MethodEmitContext<'a> {
|
||||||
let alloca = unsafe {
|
let alloca = unsafe {
|
||||||
LLVMZludaBuildAlloca(
|
LLVMZludaBuildAlloca(
|
||||||
self.variables_builder.get(),
|
self.variables_builder.get(),
|
||||||
get_type(self.context, &var.v_type)?,
|
get_type(self.context, &var.info.v_type)?,
|
||||||
get_state_space(var.state_space)?,
|
get_state_space(var.info.state_space)?,
|
||||||
self.resolver.get_or_add_raw(var.name),
|
self.resolver.get_or_add_raw(var.name),
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
self.resolver.register(var.name, alloca);
|
self.resolver.register(var.name, alloca);
|
||||||
if let Some(align) = var.align {
|
if let Some(align) = var.info.align {
|
||||||
unsafe { LLVMSetAlignment(alloca, align) };
|
unsafe { LLVMSetAlignment(alloca, align) };
|
||||||
}
|
}
|
||||||
if !var.array_init.is_empty() {
|
if !var.info.array_init.is_empty() {
|
||||||
return Err(error_unreachable());
|
return Err(error_unreachable());
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
@ -22,7 +22,7 @@ mod insert_post_saturation;
|
||||||
mod instruction_mode_to_global_mode;
|
mod instruction_mode_to_global_mode;
|
||||||
pub mod llvm;
|
pub mod llvm;
|
||||||
mod normalize_basic_blocks;
|
mod normalize_basic_blocks;
|
||||||
mod normalize_identifiers2;
|
mod normalize_identifiers;
|
||||||
mod normalize_predicates2;
|
mod normalize_predicates2;
|
||||||
mod remove_unreachable_basic_blocks;
|
mod remove_unreachable_basic_blocks;
|
||||||
mod replace_instructions_with_functions;
|
mod replace_instructions_with_functions;
|
||||||
|
@ -68,8 +68,8 @@ pub fn to_llvm_module<'input>(
|
||||||
let sreg_map = SpecialRegistersMap::new(&mut scoped_resolver)?;
|
let sreg_map = SpecialRegistersMap::new(&mut scoped_resolver)?;
|
||||||
let directives = filter_for_demo::run(ast.directives);
|
let directives = filter_for_demo::run(ast.directives);
|
||||||
on_pass_end("filter_for_demo");
|
on_pass_end("filter_for_demo");
|
||||||
let directives = normalize_identifiers2::run(&mut scoped_resolver, directives)?;
|
let directives = normalize_identifiers::run(&mut scoped_resolver, directives)?;
|
||||||
on_pass_end("normalize_identifiers2");
|
on_pass_end("normalize_identifiers");
|
||||||
let directives = replace_known_functions::run(&mut flat_resolver, directives);
|
let directives = replace_known_functions::run(&mut flat_resolver, directives);
|
||||||
on_pass_end("replace_known_functions");
|
on_pass_end("replace_known_functions");
|
||||||
let directives = normalize_predicates2::run(&mut flat_resolver, directives)?;
|
let directives = normalize_predicates2::run(&mut flat_resolver, directives)?;
|
||||||
|
@ -311,16 +311,18 @@ impl<T: ast::Operand<Ident = SpirvWord>> Statement<ast::Instruction<T>, T> {
|
||||||
Statement::Variable(var) => {
|
Statement::Variable(var) => {
|
||||||
let name = visitor.visit_ident(
|
let name = visitor.visit_ident(
|
||||||
var.name,
|
var.name,
|
||||||
Some((&var.v_type, var.state_space)),
|
Some((&var.info.v_type, var.info.state_space)),
|
||||||
true,
|
true,
|
||||||
false,
|
false,
|
||||||
)?;
|
)?;
|
||||||
Statement::Variable(ast::Variable {
|
Statement::Variable(ast::Variable {
|
||||||
align: var.align,
|
info: ast::VariableInfo {
|
||||||
v_type: var.v_type,
|
align: var.info.align,
|
||||||
state_space: var.state_space,
|
v_type: var.info.v_type,
|
||||||
|
state_space: var.info.state_space,
|
||||||
|
array_init: var.info.array_init,
|
||||||
|
},
|
||||||
name,
|
name,
|
||||||
array_init: var.array_init,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
Statement::Conditional(conditional) => {
|
Statement::Conditional(conditional) => {
|
||||||
|
@ -981,20 +983,24 @@ impl SpecialRegistersMap {
|
||||||
let return_type = sreg.get_function_return_type();
|
let return_type = sreg.get_function_return_type();
|
||||||
let input_type = sreg.get_function_input_type();
|
let input_type = sreg.get_function_input_type();
|
||||||
let return_arguments = vec![ast::Variable {
|
let return_arguments = vec![ast::Variable {
|
||||||
align: None,
|
info: ast::VariableInfo {
|
||||||
v_type: return_type.into(),
|
align: None,
|
||||||
state_space: ast::StateSpace::Reg,
|
v_type: return_type.into(),
|
||||||
|
state_space: ast::StateSpace::Reg,
|
||||||
|
array_init: Vec::new(),
|
||||||
|
},
|
||||||
name: resolver.register_unnamed(Some((return_type.into(), ast::StateSpace::Reg))),
|
name: resolver.register_unnamed(Some((return_type.into(), ast::StateSpace::Reg))),
|
||||||
array_init: Vec::new(),
|
|
||||||
}];
|
}];
|
||||||
let input_arguments = input_type
|
let input_arguments = input_type
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|type_| ast::Variable {
|
.map(|type_| ast::Variable {
|
||||||
align: None,
|
info: ast::VariableInfo {
|
||||||
v_type: type_.into(),
|
align: None,
|
||||||
state_space: ast::StateSpace::Reg,
|
v_type: type_.into(),
|
||||||
|
state_space: ast::StateSpace::Reg,
|
||||||
|
array_init: Vec::new(),
|
||||||
|
},
|
||||||
name: resolver.register_unnamed(Some((type_.into(), ast::StateSpace::Reg))),
|
name: resolver.register_unnamed(Some((type_.into(), ast::StateSpace::Reg))),
|
||||||
array_init: Vec::new(),
|
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
fn_(sreg, (return_arguments, name, input_arguments));
|
fn_(sreg, (return_arguments, name, input_arguments));
|
||||||
|
|
|
@ -80,19 +80,28 @@ fn run_function_decl<'input, 'b>(
|
||||||
Ok((return_arguments, input_arguments))
|
Ok((return_arguments, input_arguments))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn run_variable_info<'input, 'b>(
|
||||||
|
resolver: &mut ScopedResolver<'input, 'b>,
|
||||||
|
info: ast::VariableInfo<&'input str>,
|
||||||
|
) -> Result<ast::VariableInfo<SpirvWord>, TranslateError> {
|
||||||
|
Ok(ast::VariableInfo {
|
||||||
|
align: info.align,
|
||||||
|
v_type: info.v_type,
|
||||||
|
state_space: info.state_space,
|
||||||
|
array_init: run_array_init(resolver, &info.array_init)?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
fn run_variable<'input, 'b>(
|
fn run_variable<'input, 'b>(
|
||||||
resolver: &mut ScopedResolver<'input, 'b>,
|
resolver: &mut ScopedResolver<'input, 'b>,
|
||||||
variable: ast::Variable<&'input str>,
|
variable: ast::Variable<&'input str>,
|
||||||
) -> Result<ast::Variable<SpirvWord>, TranslateError> {
|
) -> Result<ast::Variable<SpirvWord>, TranslateError> {
|
||||||
Ok(ast::Variable {
|
Ok(ast::Variable {
|
||||||
|
info: run_variable_info(resolver, variable.info.clone())?,
|
||||||
name: resolver.add(
|
name: resolver.add(
|
||||||
Cow::Borrowed(variable.name),
|
Cow::Borrowed(variable.name),
|
||||||
Some((variable.v_type.clone(), variable.state_space)),
|
Some((variable.info.v_type.clone(), variable.info.state_space)),
|
||||||
)?,
|
)?,
|
||||||
align: variable.align,
|
|
||||||
v_type: variable.v_type,
|
|
||||||
state_space: variable.state_space,
|
|
||||||
array_init: run_array_init(resolver, &variable.array_init)?,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -158,36 +167,26 @@ fn run_multivariable<'input, 'b>(
|
||||||
result: &mut Vec<NormalizedStatement>,
|
result: &mut Vec<NormalizedStatement>,
|
||||||
variable: ast::MultiVariable<&'input str>,
|
variable: ast::MultiVariable<&'input str>,
|
||||||
) -> Result<(), TranslateError> {
|
) -> Result<(), TranslateError> {
|
||||||
match variable.count {
|
match variable {
|
||||||
Some(count) => {
|
ptx_parser::MultiVariable::Parameterized { info, name, count } => {
|
||||||
for i in 0..count {
|
for i in 0..count {
|
||||||
let name = Cow::Owned(format!("{}{}", variable.var.name, i));
|
let name = Cow::Owned(format!("{}{}", name, i));
|
||||||
let ident = resolver.add(
|
let ident = resolver.add(name, Some((info.v_type.clone(), info.state_space)))?;
|
||||||
name,
|
|
||||||
Some((variable.var.v_type.clone(), variable.var.state_space)),
|
|
||||||
)?;
|
|
||||||
result.push(Statement::Variable(ast::Variable {
|
result.push(Statement::Variable(ast::Variable {
|
||||||
align: variable.var.align,
|
info: run_variable_info(resolver, info.clone())?,
|
||||||
v_type: variable.var.v_type.clone(),
|
|
||||||
state_space: variable.var.state_space,
|
|
||||||
name: ident,
|
name: ident,
|
||||||
array_init: run_array_init(resolver, &variable.var.array_init)?,
|
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None => {
|
ptx_parser::MultiVariable::Names { info, names } => {
|
||||||
let name = Cow::Borrowed(variable.var.name);
|
for name in names {
|
||||||
let ident = resolver.add(
|
let name = Cow::Borrowed(name);
|
||||||
name,
|
let ident = resolver.add(name, Some((info.v_type.clone(), info.state_space)))?;
|
||||||
Some((variable.var.v_type.clone(), variable.var.state_space)),
|
result.push(Statement::Variable(ast::Variable {
|
||||||
)?;
|
info: run_variable_info(resolver, info.clone())?,
|
||||||
result.push(Statement::Variable(ast::Variable {
|
name: ident,
|
||||||
align: variable.var.align,
|
}));
|
||||||
v_type: variable.var.v_type.clone(),
|
}
|
||||||
state_space: variable.var.state_space,
|
|
||||||
name: ident,
|
|
||||||
array_init: run_array_init(resolver, &variable.var.array_init)?,
|
|
||||||
}));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
|
@ -580,11 +580,13 @@ fn to_variables<'input>(
|
||||||
arguments
|
arguments
|
||||||
.iter()
|
.iter()
|
||||||
.map(|(type_, space)| ast::Variable {
|
.map(|(type_, space)| ast::Variable {
|
||||||
align: None,
|
info: ast::VariableInfo {
|
||||||
v_type: type_.clone(),
|
align: None,
|
||||||
state_space: *space,
|
v_type: type_.clone(),
|
||||||
|
state_space: *space,
|
||||||
|
array_init: Vec::new(),
|
||||||
|
},
|
||||||
name: resolver.register_unnamed(Some((type_.clone(), *space))),
|
name: resolver.register_unnamed(Some((type_.clone(), *space))),
|
||||||
array_init: Vec::new(),
|
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,40 +33,48 @@ pub(crate) fn run<'input>(
|
||||||
ast::Type::Scalar(ast::ScalarType::F32),
|
ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
ast::StateSpace::Reg,
|
ast::StateSpace::Reg,
|
||||||
))),
|
))),
|
||||||
align: None,
|
info: ast::VariableInfo {
|
||||||
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
align: None,
|
||||||
state_space: ast::StateSpace::Reg,
|
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
array_init: Vec::new(),
|
state_space: ast::StateSpace::Reg,
|
||||||
|
array_init: Vec::new(),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
ast::Variable {
|
ast::Variable {
|
||||||
name: resolver.register_unnamed(Some((
|
name: resolver.register_unnamed(Some((
|
||||||
ast::Type::Scalar(ast::ScalarType::F32),
|
ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
ast::StateSpace::Reg,
|
ast::StateSpace::Reg,
|
||||||
))),
|
))),
|
||||||
align: None,
|
info: ast::VariableInfo {
|
||||||
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
align: None,
|
||||||
state_space: ast::StateSpace::Reg,
|
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
array_init: Vec::new(),
|
state_space: ast::StateSpace::Reg,
|
||||||
|
array_init: Vec::new(),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
ast::Variable {
|
ast::Variable {
|
||||||
name: resolver.register_unnamed(Some((
|
name: resolver.register_unnamed(Some((
|
||||||
ast::Type::Scalar(ast::ScalarType::F32),
|
ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
ast::StateSpace::Reg,
|
ast::StateSpace::Reg,
|
||||||
))),
|
))),
|
||||||
align: None,
|
info: ast::VariableInfo {
|
||||||
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
align: None,
|
||||||
state_space: ast::StateSpace::Reg,
|
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
array_init: Vec::new(),
|
state_space: ast::StateSpace::Reg,
|
||||||
|
array_init: Vec::new(),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
ast::Variable {
|
ast::Variable {
|
||||||
name: resolver.register_unnamed(Some((
|
name: resolver.register_unnamed(Some((
|
||||||
ast::Type::Scalar(ast::ScalarType::U8),
|
ast::Type::Scalar(ast::ScalarType::U8),
|
||||||
ast::StateSpace::Reg,
|
ast::StateSpace::Reg,
|
||||||
))),
|
))),
|
||||||
align: None,
|
info: ast::VariableInfo {
|
||||||
v_type: ast::Type::Scalar(ast::ScalarType::U8),
|
align: None,
|
||||||
state_space: ast::StateSpace::Reg,
|
v_type: ast::Type::Scalar(ast::ScalarType::U8),
|
||||||
array_init: Vec::new(),
|
state_space: ast::StateSpace::Reg,
|
||||||
|
array_init: Vec::new(),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
name: imports.part1,
|
name: imports.part1,
|
||||||
|
@ -76,20 +84,24 @@ pub(crate) fn run<'input>(
|
||||||
ast::Type::Scalar(ast::ScalarType::F32),
|
ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
ast::StateSpace::Reg,
|
ast::StateSpace::Reg,
|
||||||
))),
|
))),
|
||||||
align: None,
|
info: ast::VariableInfo {
|
||||||
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
align: None,
|
||||||
state_space: ast::StateSpace::Reg,
|
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
array_init: Vec::new(),
|
state_space: ast::StateSpace::Reg,
|
||||||
|
array_init: Vec::new(),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
ast::Variable {
|
ast::Variable {
|
||||||
name: resolver.register_unnamed(Some((
|
name: resolver.register_unnamed(Some((
|
||||||
ast::Type::Scalar(ast::ScalarType::F32),
|
ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
ast::StateSpace::Reg,
|
ast::StateSpace::Reg,
|
||||||
))),
|
))),
|
||||||
align: None,
|
info: ast::VariableInfo {
|
||||||
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
align: None,
|
||||||
state_space: ast::StateSpace::Reg,
|
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
array_init: Vec::new(),
|
state_space: ast::StateSpace::Reg,
|
||||||
|
array_init: Vec::new(),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
body: None,
|
body: None,
|
||||||
|
@ -108,10 +120,12 @@ pub(crate) fn run<'input>(
|
||||||
ast::Type::Scalar(ast::ScalarType::F32),
|
ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
ast::StateSpace::Reg,
|
ast::StateSpace::Reg,
|
||||||
))),
|
))),
|
||||||
align: None,
|
info: ast::VariableInfo {
|
||||||
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
align: None,
|
||||||
state_space: ast::StateSpace::Reg,
|
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
array_init: Vec::new(),
|
state_space: ast::StateSpace::Reg,
|
||||||
|
array_init: Vec::new(),
|
||||||
|
},
|
||||||
}],
|
}],
|
||||||
name: imports.part2,
|
name: imports.part2,
|
||||||
input_arguments: vec![
|
input_arguments: vec![
|
||||||
|
@ -120,60 +134,72 @@ pub(crate) fn run<'input>(
|
||||||
ast::Type::Scalar(ast::ScalarType::F32),
|
ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
ast::StateSpace::Reg,
|
ast::StateSpace::Reg,
|
||||||
))),
|
))),
|
||||||
align: None,
|
info: ast::VariableInfo {
|
||||||
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
align: None,
|
||||||
state_space: ast::StateSpace::Reg,
|
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
array_init: Vec::new(),
|
state_space: ast::StateSpace::Reg,
|
||||||
|
array_init: Vec::new(),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
ast::Variable {
|
ast::Variable {
|
||||||
name: resolver.register_unnamed(Some((
|
name: resolver.register_unnamed(Some((
|
||||||
ast::Type::Scalar(ast::ScalarType::F32),
|
ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
ast::StateSpace::Reg,
|
ast::StateSpace::Reg,
|
||||||
))),
|
))),
|
||||||
align: None,
|
info: ast::VariableInfo {
|
||||||
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
align: None,
|
||||||
state_space: ast::StateSpace::Reg,
|
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
array_init: Vec::new(),
|
state_space: ast::StateSpace::Reg,
|
||||||
|
array_init: Vec::new(),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
ast::Variable {
|
ast::Variable {
|
||||||
name: resolver.register_unnamed(Some((
|
name: resolver.register_unnamed(Some((
|
||||||
ast::Type::Scalar(ast::ScalarType::F32),
|
ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
ast::StateSpace::Reg,
|
ast::StateSpace::Reg,
|
||||||
))),
|
))),
|
||||||
align: None,
|
info: ast::VariableInfo {
|
||||||
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
align: None,
|
||||||
state_space: ast::StateSpace::Reg,
|
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
array_init: Vec::new(),
|
state_space: ast::StateSpace::Reg,
|
||||||
|
array_init: Vec::new(),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
ast::Variable {
|
ast::Variable {
|
||||||
name: resolver.register_unnamed(Some((
|
name: resolver.register_unnamed(Some((
|
||||||
ast::Type::Scalar(ast::ScalarType::F32),
|
ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
ast::StateSpace::Reg,
|
ast::StateSpace::Reg,
|
||||||
))),
|
))),
|
||||||
align: None,
|
info: ast::VariableInfo {
|
||||||
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
align: None,
|
||||||
state_space: ast::StateSpace::Reg,
|
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
array_init: Vec::new(),
|
state_space: ast::StateSpace::Reg,
|
||||||
|
array_init: Vec::new(),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
ast::Variable {
|
ast::Variable {
|
||||||
name: resolver.register_unnamed(Some((
|
name: resolver.register_unnamed(Some((
|
||||||
ast::Type::Scalar(ast::ScalarType::F32),
|
ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
ast::StateSpace::Reg,
|
ast::StateSpace::Reg,
|
||||||
))),
|
))),
|
||||||
align: None,
|
info: ast::VariableInfo {
|
||||||
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
align: None,
|
||||||
state_space: ast::StateSpace::Reg,
|
v_type: ast::Type::Scalar(ast::ScalarType::F32),
|
||||||
array_init: Vec::new(),
|
state_space: ast::StateSpace::Reg,
|
||||||
|
array_init: Vec::new(),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
ast::Variable {
|
ast::Variable {
|
||||||
name: resolver.register_unnamed(Some((
|
name: resolver.register_unnamed(Some((
|
||||||
ast::Type::Scalar(ast::ScalarType::U8),
|
ast::Type::Scalar(ast::ScalarType::U8),
|
||||||
ast::StateSpace::Reg,
|
ast::StateSpace::Reg,
|
||||||
))),
|
))),
|
||||||
align: None,
|
info: ast::VariableInfo {
|
||||||
v_type: ast::Type::Scalar(ast::ScalarType::U8),
|
align: None,
|
||||||
state_space: ast::StateSpace::Reg,
|
v_type: ast::Type::Scalar(ast::ScalarType::U8),
|
||||||
array_init: Vec::new(),
|
state_space: ast::StateSpace::Reg,
|
||||||
|
array_init: Vec::new(),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
body: None,
|
body: None,
|
||||||
|
|
|
@ -12,7 +12,7 @@ fn run_expand_operands(ptx: ptx_parser::Module) -> String {
|
||||||
// We run the minimal number of passes required to produce the input expected by expand_operands
|
// We run the minimal number of passes required to produce the input expected by expand_operands
|
||||||
let mut flat_resolver = GlobalStringIdentResolver2::new(SpirvWord(1));
|
let mut flat_resolver = GlobalStringIdentResolver2::new(SpirvWord(1));
|
||||||
let mut scoped_resolver = ScopedResolver::new(&mut flat_resolver);
|
let mut scoped_resolver = ScopedResolver::new(&mut flat_resolver);
|
||||||
let directives = normalize_identifiers2::run(&mut scoped_resolver, ptx.directives).unwrap();
|
let directives = normalize_identifiers::run(&mut scoped_resolver, ptx.directives).unwrap();
|
||||||
let directives = normalize_predicates2::run(&mut flat_resolver, directives).unwrap();
|
let directives = normalize_predicates2::run(&mut flat_resolver, directives).unwrap();
|
||||||
let directives = expand_operands::run(&mut flat_resolver, directives).unwrap();
|
let directives = expand_operands::run(&mut flat_resolver, directives).unwrap();
|
||||||
directive2_vec_to_string(&flat_resolver, directives)
|
directive2_vec_to_string(&flat_resolver, directives)
|
||||||
|
|
|
@ -12,7 +12,7 @@ fn run_insert_implicit_conversions(ptx: ptx_parser::Module) -> String {
|
||||||
// We run the minimal number of passes required to produce the input expected by insert_implicit_conversions
|
// We run the minimal number of passes required to produce the input expected by insert_implicit_conversions
|
||||||
let mut flat_resolver = GlobalStringIdentResolver2::new(SpirvWord(1));
|
let mut flat_resolver = GlobalStringIdentResolver2::new(SpirvWord(1));
|
||||||
let mut scoped_resolver = ScopedResolver::new(&mut flat_resolver);
|
let mut scoped_resolver = ScopedResolver::new(&mut flat_resolver);
|
||||||
let directives = normalize_identifiers2::run(&mut scoped_resolver, ptx.directives).unwrap();
|
let directives = normalize_identifiers::run(&mut scoped_resolver, ptx.directives).unwrap();
|
||||||
let directives = normalize_predicates2::run(&mut flat_resolver, directives).unwrap();
|
let directives = normalize_predicates2::run(&mut flat_resolver, directives).unwrap();
|
||||||
let directives = expand_operands::run(&mut flat_resolver, directives).unwrap();
|
let directives = expand_operands::run(&mut flat_resolver, directives).unwrap();
|
||||||
let directives = insert_implicit_conversions2::run(&mut flat_resolver, directives).unwrap();
|
let directives = insert_implicit_conversions2::run(&mut flat_resolver, directives).unwrap();
|
||||||
|
|
37
ptx/src/test/ll/reg_multi.ll
Normal file
37
ptx/src/test/ll/reg_multi.ll
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
define amdgpu_kernel void @reg_multi(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 {
|
||||||
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"42" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"43" = alloca i32, align 4, addrspace(5)
|
||||||
|
br label %1
|
||||||
|
|
||||||
|
1: ; preds = %0
|
||||||
|
br label %"37"
|
||||||
|
|
||||||
|
"37": ; preds = %1
|
||||||
|
%"44" = load i64, ptr addrspace(4) %"38", align 8
|
||||||
|
store i64 %"44", ptr addrspace(5) %"40", align 8
|
||||||
|
%"45" = load i64, ptr addrspace(4) %"39", align 8
|
||||||
|
store i64 %"45", ptr addrspace(5) %"41", align 8
|
||||||
|
%"47" = load i64, ptr addrspace(5) %"40", align 8
|
||||||
|
%"54" = inttoptr i64 %"47" to ptr
|
||||||
|
%"46" = load i32, ptr %"54", align 4
|
||||||
|
store i32 %"46", ptr addrspace(5) %"42", align 4
|
||||||
|
%"48" = load i64, ptr addrspace(5) %"40", align 8
|
||||||
|
%"55" = inttoptr i64 %"48" to ptr
|
||||||
|
%"34" = getelementptr inbounds i8, ptr %"55", i64 4
|
||||||
|
%"49" = load i32, ptr %"34", align 4
|
||||||
|
store i32 %"49", ptr addrspace(5) %"43", align 4
|
||||||
|
%"50" = load i64, ptr addrspace(5) %"41", align 8
|
||||||
|
%"51" = load i32, ptr addrspace(5) %"42", align 4
|
||||||
|
%"56" = inttoptr i64 %"50" to ptr
|
||||||
|
store i32 %"51", ptr %"56", align 4
|
||||||
|
%"52" = load i64, ptr addrspace(5) %"41", align 8
|
||||||
|
%"57" = inttoptr i64 %"52" to ptr
|
||||||
|
%"36" = getelementptr inbounds i8, ptr %"57", i64 4
|
||||||
|
%"53" = load i32, ptr addrspace(5) %"43", align 4
|
||||||
|
store i32 %"53", ptr %"36", align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -124,6 +124,7 @@ test_ptx!(vector4, [1u32, 2u32, 3u32, 4u32], [4u32]);
|
||||||
test_ptx!(ld_st_offset, [1u32, 2u32], [2u32, 1u32]);
|
test_ptx!(ld_st_offset, [1u32, 2u32], [2u32, 1u32]);
|
||||||
test_ptx!(ntid, [3u32], [4u32]);
|
test_ptx!(ntid, [3u32], [4u32]);
|
||||||
test_ptx!(reg_local, [12u64], [13u64]);
|
test_ptx!(reg_local, [12u64], [13u64]);
|
||||||
|
test_ptx!(reg_multi, [123u32, 456u32], [123u32, 456u32]);
|
||||||
test_ptx!(mov_address, [0xDEADu64], [0u64]);
|
test_ptx!(mov_address, [0xDEADu64], [0u64]);
|
||||||
test_ptx!(b64tof64, [111u64], [111u64]);
|
test_ptx!(b64tof64, [111u64], [111u64]);
|
||||||
// This segfaults NV compiler
|
// This segfaults NV compiler
|
||||||
|
|
22
ptx/src/test/spirv_run/reg_multi.ptx
Normal file
22
ptx/src/test/spirv_run/reg_multi.ptx
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
.version 6.5
|
||||||
|
.target sm_30
|
||||||
|
.address_size 64
|
||||||
|
|
||||||
|
.visible .entry reg_multi(
|
||||||
|
.param .u64 input,
|
||||||
|
.param .u64 output
|
||||||
|
)
|
||||||
|
{
|
||||||
|
.reg .u64 in_addr;
|
||||||
|
.reg .u64 out_addr;
|
||||||
|
.reg .u32 a, b;
|
||||||
|
|
||||||
|
ld.param.u64 in_addr, [input];
|
||||||
|
ld.param.u64 out_addr, [output];
|
||||||
|
|
||||||
|
ld.u32 a, [in_addr];
|
||||||
|
ld.u32 b, [in_addr+4];
|
||||||
|
st.u32 [out_addr], a;
|
||||||
|
st.u32 [out_addr+4], b;
|
||||||
|
ret;
|
||||||
|
}
|
|
@ -998,29 +998,41 @@ impl<T: Operand, Err> MapOperand<Err> for Option<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct MultiVariable<ID> {
|
pub enum MultiVariable<ID> {
|
||||||
pub var: Variable<ID>,
|
Parameterized {
|
||||||
pub count: Option<u32>,
|
info: VariableInfo<ID>,
|
||||||
|
name: ID,
|
||||||
|
count: u32,
|
||||||
|
},
|
||||||
|
Names {
|
||||||
|
info: VariableInfo<ID>,
|
||||||
|
names: Vec<ID>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct VariableInfo<ID> {
|
||||||
|
pub align: Option<u32>,
|
||||||
|
pub v_type: Type,
|
||||||
|
pub state_space: StateSpace,
|
||||||
|
pub array_init: Vec<RegOrImmediate<ID>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Variable<ID> {
|
pub struct Variable<ID> {
|
||||||
pub align: Option<u32>,
|
pub info: VariableInfo<ID>,
|
||||||
pub v_type: Type,
|
|
||||||
pub state_space: StateSpace,
|
|
||||||
pub name: ID,
|
pub name: ID,
|
||||||
pub array_init: Vec<RegOrImmediate<ID>>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<ID: std::fmt::Display> std::fmt::Display for Variable<ID> {
|
impl<ID: std::fmt::Display> std::fmt::Display for Variable<ID> {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
write!(f, "{}", self.state_space)?;
|
write!(f, "{}", self.info.state_space)?;
|
||||||
|
|
||||||
if let Some(align) = self.align {
|
if let Some(align) = self.info.align {
|
||||||
write!(f, " .align {}", align)?;
|
write!(f, " .align {}", align)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let (vector_size, scalar_type, array_dims) = match &self.v_type {
|
let (vector_size, scalar_type, array_dims) = match &self.info.v_type {
|
||||||
Type::Scalar(scalar_type) => (None, *scalar_type, &vec![]),
|
Type::Scalar(scalar_type) => (None, *scalar_type, &vec![]),
|
||||||
Type::Vector(size, scalar_type) => (Some(*size), *scalar_type, &vec![]),
|
Type::Vector(size, scalar_type) => (Some(*size), *scalar_type, &vec![]),
|
||||||
Type::Array(vector_size, scalar_type, array_dims) => {
|
Type::Array(vector_size, scalar_type, array_dims) => {
|
||||||
|
@ -1038,7 +1050,7 @@ impl<ID: std::fmt::Display> std::fmt::Display for Variable<ID> {
|
||||||
write!(f, "[{}]", dim)?;
|
write!(f, "[{}]", dim)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.array_init.len() > 0 {
|
if self.info.array_init.len() > 0 {
|
||||||
todo!("Need to interpret the array initializer data as the appropriate type");
|
todo!("Need to interpret the array initializer data as the appropriate type");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -136,7 +136,7 @@ impl<'a, 'input> PtxParserState<'a, 'input> {
|
||||||
fn get_type_space(input_arguments: &[Variable<&str>]) -> Vec<(Type, StateSpace)> {
|
fn get_type_space(input_arguments: &[Variable<&str>]) -> Vec<(Type, StateSpace)> {
|
||||||
input_arguments
|
input_arguments
|
||||||
.iter()
|
.iter()
|
||||||
.map(|var| (var.v_type.clone(), var.state_space))
|
.map(|var| (var.info.v_type.clone(), var.info.state_space))
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -370,7 +370,7 @@ pub fn parse_for_errors_and_params<'input>(
|
||||||
.func_directive
|
.func_directive
|
||||||
.input_arguments
|
.input_arguments
|
||||||
.iter()
|
.iter()
|
||||||
.map(|arg| arg.v_type.layout())
|
.map(|arg| arg.info.v_type.layout())
|
||||||
.collect();
|
.collect();
|
||||||
Some((func.func_directive.name().to_string(), layouts))
|
Some((func.func_directive.name().to_string(), layouts))
|
||||||
} else {
|
} else {
|
||||||
|
@ -572,7 +572,13 @@ fn module_variable<'a, 'input>(
|
||||||
let var = global_space
|
let var = global_space
|
||||||
.flat_map(|space| multi_variable(linking.contains(LinkingDirective::EXTERN), space))
|
.flat_map(|space| multi_variable(linking.contains(LinkingDirective::EXTERN), space))
|
||||||
// TODO: support multi var in globals
|
// TODO: support multi var in globals
|
||||||
.map(|multi_var| multi_var.var)
|
.verify_map(|multi_var| match multi_var {
|
||||||
|
MultiVariable::Names { info, names } if names.len() == 1 => Some(ast::Variable {
|
||||||
|
info,
|
||||||
|
name: names[0],
|
||||||
|
}),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
.parse_next(stream)?;
|
.parse_next(stream)?;
|
||||||
Ok((linking, var))
|
Ok((linking, var))
|
||||||
}
|
}
|
||||||
|
@ -906,7 +912,7 @@ fn method_parameter<'a, 'input: 'a>(
|
||||||
) -> impl Parser<PtxParser<'a, 'input>, Variable<&'input str>, ContextError> {
|
) -> impl Parser<PtxParser<'a, 'input>, Variable<&'input str>, ContextError> {
|
||||||
fn nvptx_kernel_declaration<'a, 'input>(
|
fn nvptx_kernel_declaration<'a, 'input>(
|
||||||
stream: &mut PtxParser<'a, 'input>,
|
stream: &mut PtxParser<'a, 'input>,
|
||||||
) -> PResult<(Option<u32>, Option<NonZeroU8>, ScalarType, &'input str)> {
|
) -> PResult<((Option<u32>, Option<NonZeroU8>, ScalarType), &'input str)> {
|
||||||
trace(
|
trace(
|
||||||
"nvptx_kernel_declaration",
|
"nvptx_kernel_declaration",
|
||||||
(
|
(
|
||||||
|
@ -917,15 +923,15 @@ fn method_parameter<'a, 'input: 'a>(
|
||||||
ident,
|
ident,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
.map(|(vector, type_, _, align, name)| (align, vector, type_, name))
|
.map(|(vector, type_, _, align, name)| ((align, vector, type_), name))
|
||||||
.parse_next(stream)
|
.parse_next(stream)
|
||||||
}
|
}
|
||||||
trace(
|
trace(
|
||||||
"method_parameter",
|
"method_parameter",
|
||||||
move |stream: &mut PtxParser<'a, 'input>| {
|
move |stream: &mut PtxParser<'a, 'input>| {
|
||||||
if kernel_decl_rules {}
|
if kernel_decl_rules {}
|
||||||
let (align, vector, type_, name) =
|
let ((align, vector, type_), name) =
|
||||||
alt((variable_declaration, nvptx_kernel_declaration)).parse_next(stream)?;
|
alt(((variable_info, ident), nvptx_kernel_declaration)).parse_next(stream)?;
|
||||||
let array_dimensions = if state_space != StateSpace::Reg {
|
let array_dimensions = if state_space != StateSpace::Reg {
|
||||||
opt(array_dimensions).parse_next(stream)?
|
opt(array_dimensions).parse_next(stream)?
|
||||||
} else {
|
} else {
|
||||||
|
@ -938,27 +944,28 @@ fn method_parameter<'a, 'input: 'a>(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(Variable {
|
Ok(Variable {
|
||||||
align,
|
info: VariableInfo {
|
||||||
v_type: Type::maybe_array(vector, type_, array_dimensions),
|
align,
|
||||||
state_space,
|
v_type: Type::maybe_array(vector, type_, array_dimensions),
|
||||||
|
state_space,
|
||||||
|
array_init: Vec::new(),
|
||||||
|
},
|
||||||
name,
|
name,
|
||||||
array_init: Vec::new(),
|
|
||||||
})
|
})
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: split to a separate type
|
// TODO: split to a separate type
|
||||||
fn variable_declaration<'a, 'input>(
|
fn variable_info<'a, 'input>(
|
||||||
stream: &mut PtxParser<'a, 'input>,
|
stream: &mut PtxParser<'a, 'input>,
|
||||||
) -> PResult<(Option<u32>, Option<NonZeroU8>, ScalarType, &'input str)> {
|
) -> PResult<(Option<u32>, Option<NonZeroU8>, ScalarType)> {
|
||||||
trace(
|
trace(
|
||||||
"variable_declaration",
|
"variable_info",
|
||||||
(
|
(
|
||||||
opt(align.verify(|x| x.count_ones() == 1)),
|
opt(align.verify(|x| x.count_ones() == 1)),
|
||||||
vector_prefix,
|
vector_prefix,
|
||||||
scalar_type,
|
scalar_type,
|
||||||
ident,
|
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
.parse_next(stream)
|
.parse_next(stream)
|
||||||
|
@ -971,21 +978,27 @@ fn multi_variable<'a, 'input: 'a>(
|
||||||
trace(
|
trace(
|
||||||
"multi_variable",
|
"multi_variable",
|
||||||
move |stream: &mut PtxParser<'a, 'input>| {
|
move |stream: &mut PtxParser<'a, 'input>| {
|
||||||
let ((align, vector, type_, name), count) = (
|
let ((align, vector, type_), names, count): (_, Vec<_>, _) = (
|
||||||
variable_declaration,
|
variable_info,
|
||||||
|
separated(1.., ident, Token::Comma),
|
||||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameterized-variable-names
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameterized-variable-names
|
||||||
opt(delimited(Token::Lt, u32.verify(|x| *x != 0), Token::Gt)),
|
opt(delimited(Token::Lt, u32.verify(|x| *x != 0), Token::Gt)),
|
||||||
)
|
)
|
||||||
.parse_next(stream)?;
|
.parse_next(stream)?;
|
||||||
if count.is_some() {
|
if let Some(count) = count {
|
||||||
return Ok(MultiVariable {
|
if names.len() > 1 {
|
||||||
var: Variable {
|
// nvcc does not support parameterized variable names in comma-separated lists of names.
|
||||||
|
return Err(ErrMode::from_error_kind(stream, ErrorKind::Verify));
|
||||||
|
}
|
||||||
|
let name = names[0];
|
||||||
|
return Ok(MultiVariable::Parameterized {
|
||||||
|
info: VariableInfo {
|
||||||
align,
|
align,
|
||||||
v_type: Type::maybe_vector_parsed(vector, type_),
|
v_type: Type::maybe_vector_parsed(vector, type_),
|
||||||
state_space,
|
state_space,
|
||||||
name,
|
|
||||||
array_init: Vec::new(),
|
array_init: Vec::new(),
|
||||||
},
|
},
|
||||||
|
name,
|
||||||
count,
|
count,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -1008,15 +1021,14 @@ fn multi_variable<'a, 'input: 'a>(
|
||||||
return Err(ErrMode::from_error_kind(stream, ErrorKind::Verify));
|
return Err(ErrMode::from_error_kind(stream, ErrorKind::Verify));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(MultiVariable {
|
Ok(MultiVariable::Names {
|
||||||
var: Variable {
|
info: VariableInfo {
|
||||||
align,
|
align,
|
||||||
v_type: Type::maybe_array(vector, type_, array_dimensions),
|
v_type: Type::maybe_array(vector, type_, array_dimensions),
|
||||||
state_space,
|
state_space,
|
||||||
name,
|
|
||||||
array_init: initializer.unwrap_or(Vec::new()),
|
array_init: initializer.unwrap_or(Vec::new()),
|
||||||
},
|
},
|
||||||
count,
|
names,
|
||||||
})
|
})
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
|
@ -188,12 +188,25 @@ pub(crate) unsafe fn push_current_v2(ctx: CUcontext) -> CUresult {
|
||||||
push_current(ctx)
|
push_current(ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) unsafe fn pop_current(ctx: &mut CUcontext) -> CUresult {
|
pub(crate) unsafe fn pop_current(result: Option<&mut CUcontext>) -> CUresult {
|
||||||
STACK.with(|stack| {
|
let old_ctx_and_new_device = STACK.with(|stack| {
|
||||||
if let Some((_ctx, _)) = stack.borrow_mut().pop() {
|
let mut stack = stack.borrow_mut();
|
||||||
*ctx = _ctx;
|
stack
|
||||||
}
|
.pop()
|
||||||
|
.map(|(ctx, _)| (ctx, stack.last().map(|(_, dev)| *dev)))
|
||||||
});
|
});
|
||||||
|
let ctx = match old_ctx_and_new_device {
|
||||||
|
Some((old_ctx, new_device)) => {
|
||||||
|
if let Some(new_device) = new_device {
|
||||||
|
hipSetDevice(new_device)?;
|
||||||
|
}
|
||||||
|
old_ctx
|
||||||
|
}
|
||||||
|
None => return CUresult::ERROR_INVALID_CONTEXT,
|
||||||
|
};
|
||||||
|
if let Some(out) = result {
|
||||||
|
*out = ctx;
|
||||||
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -213,7 +226,7 @@ pub(crate) unsafe fn destroy_v2(ctx: CUcontext) -> CUresult {
|
||||||
zluda_common::drop_checked::<Context>(ctx)
|
zluda_common::drop_checked::<Context>(ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) unsafe fn pop_current_v2(ctx: &mut CUcontext) -> CUresult {
|
pub(crate) unsafe fn pop_current_v2(ctx: Option<&mut CUcontext>) -> CUresult {
|
||||||
pop_current(ctx)
|
pop_current(ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -241,3 +254,27 @@ pub(crate) unsafe fn get_api_version(
|
||||||
*version = 3020;
|
*version = 3020;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::tests::CudaApi;
|
||||||
|
use cuda_macros::test_cuda;
|
||||||
|
use std::mem;
|
||||||
|
|
||||||
|
#[test_cuda]
|
||||||
|
fn empty_pop_fails(api: impl CudaApi) {
|
||||||
|
api.cuInit(0);
|
||||||
|
assert_eq!(
|
||||||
|
api.cuCtxPopCurrent_v2_unchecked(&mut unsafe { mem::zeroed() }),
|
||||||
|
CUresult::ERROR_INVALID_CONTEXT
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test_cuda]
|
||||||
|
fn pop_into_null_succeeds(api: impl CudaApi) {
|
||||||
|
api.cuInit(0);
|
||||||
|
api.cuCtxCreate_v2(&mut unsafe { mem::zeroed() }, 0, 0);
|
||||||
|
api.cuCtxPopCurrent_v2(ptr::null_mut());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -254,6 +254,10 @@ pub(crate) fn get_attribute(
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS => {
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS => {
|
||||||
return get_device_prop(pi, dev_idx, |props| props.unifiedFunctionPointers)
|
return get_device_prop(pi, dev_idx, |props| props.unifiedFunctionPointers)
|
||||||
}
|
}
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED => {
|
||||||
|
*pi = 0;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
let attrib = remap_attribute! {
|
let attrib = remap_attribute! {
|
||||||
|
|
17
zluda_replay/Cargo.toml
Normal file
17
zluda_replay/Cargo.toml
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
[package]
|
||||||
|
name = "zluda_replay"
|
||||||
|
version = "0.0.0"
|
||||||
|
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "zluda_replay"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
zluda_trace_common = { path = "../zluda_trace_common" }
|
||||||
|
cuda_macros = { path = "../cuda_macros" }
|
||||||
|
cuda_types = { path = "../cuda_types" }
|
||||||
|
libloading = "0.8"
|
||||||
|
|
||||||
|
[package.metadata.zluda]
|
||||||
|
debug_only = true
|
98
zluda_replay/src/main.rs
Normal file
98
zluda_replay/src/main.rs
Normal file
|
@ -0,0 +1,98 @@
|
||||||
|
use std::mem;
|
||||||
|
|
||||||
|
use cuda_types::cuda::{CUdeviceptr_v2, CUstream};
|
||||||
|
|
||||||
|
struct CudaDynamicFns {
|
||||||
|
handle: libloading::Library,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CudaDynamicFns {
|
||||||
|
unsafe fn new(path: &str) -> Result<Self, libloading::Error> {
|
||||||
|
let handle = libloading::Library::new(path)?;
|
||||||
|
Ok(Self { handle })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! emit_cuda_fn_table {
|
||||||
|
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||||
|
impl CudaDynamicFns {
|
||||||
|
$(
|
||||||
|
#[allow(dead_code)]
|
||||||
|
unsafe fn $fn_name(&self, $($arg_id : $arg_type),*) -> $ret_type {
|
||||||
|
let func = self.handle.get::<unsafe extern $abi fn ($($arg_type),*) -> $ret_type>(concat!(stringify!($fn_name), "\0").as_bytes());
|
||||||
|
(func.unwrap())($($arg_id),*)
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
cuda_macros::cuda_function_declarations!(emit_cuda_fn_table);
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let args: Vec<String> = std::env::args().collect();
|
||||||
|
let libcuda = unsafe { CudaDynamicFns::new(&args[1]).unwrap() };
|
||||||
|
unsafe { libcuda.cuInit(0) }.unwrap();
|
||||||
|
unsafe { libcuda.cuCtxCreate_v2(&mut mem::zeroed(), 0, 0) }.unwrap();
|
||||||
|
let reader = std::fs::File::open(&args[2]).unwrap();
|
||||||
|
let (mut manifest, mut source, mut buffers) = zluda_trace_common::replay::load(reader);
|
||||||
|
let mut args = manifest
|
||||||
|
.parameters
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(i, param)| {
|
||||||
|
let mut buffer = buffers.remove(&format!("param_{i}.bin")).unwrap();
|
||||||
|
for param_ptr in param.pointer_offsets.iter() {
|
||||||
|
let buffer_param_slice = &mut buffer[param_ptr.offset_in_param
|
||||||
|
..param_ptr.offset_in_param + std::mem::size_of::<usize>()];
|
||||||
|
let mut dev_ptr = unsafe { mem::zeroed() };
|
||||||
|
let host_buffer = buffers
|
||||||
|
.remove(&format!(
|
||||||
|
"param_{i}_ptr_{}_pre.bin",
|
||||||
|
param_ptr.offset_in_param
|
||||||
|
))
|
||||||
|
.unwrap();
|
||||||
|
unsafe { libcuda.cuMemAlloc_v2(&mut dev_ptr, host_buffer.len()) }.unwrap();
|
||||||
|
unsafe {
|
||||||
|
libcuda.cuMemcpyHtoD_v2(dev_ptr, host_buffer.as_ptr().cast(), host_buffer.len())
|
||||||
|
}
|
||||||
|
.unwrap();
|
||||||
|
dev_ptr = CUdeviceptr_v2(unsafe {
|
||||||
|
dev_ptr
|
||||||
|
.0
|
||||||
|
.cast::<u8>()
|
||||||
|
.add(param_ptr.offset_in_buffer)
|
||||||
|
.cast()
|
||||||
|
});
|
||||||
|
buffer_param_slice.copy_from_slice(&(dev_ptr.0 as usize).to_ne_bytes());
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let mut module = unsafe { mem::zeroed() };
|
||||||
|
std::fs::write("/tmp/source.ptx", &source).unwrap();
|
||||||
|
source.push('\0');
|
||||||
|
unsafe { libcuda.cuModuleLoadData(&mut module, source.as_ptr().cast()) }.unwrap();
|
||||||
|
let mut function = unsafe { mem::zeroed() };
|
||||||
|
manifest.kernel_name.push('\0');
|
||||||
|
unsafe {
|
||||||
|
libcuda.cuModuleGetFunction(&mut function, module, manifest.kernel_name.as_ptr().cast())
|
||||||
|
}
|
||||||
|
.unwrap();
|
||||||
|
unsafe {
|
||||||
|
libcuda.cuLaunchKernel(
|
||||||
|
function,
|
||||||
|
manifest.config.grid_dim.0,
|
||||||
|
manifest.config.grid_dim.1,
|
||||||
|
manifest.config.grid_dim.2,
|
||||||
|
manifest.config.block_dim.0,
|
||||||
|
manifest.config.block_dim.1,
|
||||||
|
manifest.config.block_dim.2,
|
||||||
|
manifest.config.shared_mem_bytes,
|
||||||
|
CUstream(std::ptr::null_mut()),
|
||||||
|
args.as_mut_ptr().cast(),
|
||||||
|
std::ptr::null_mut(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
.unwrap();
|
||||||
|
todo!();
|
||||||
|
}
|
|
@ -1552,14 +1552,14 @@ fn launch_kernel_pre(
|
||||||
#[allow(non_snake_case)]
|
#[allow(non_snake_case)]
|
||||||
pub(crate) fn cuLaunchKernel_Post(
|
pub(crate) fn cuLaunchKernel_Post(
|
||||||
_f: cuda_types::cuda::CUfunction,
|
_f: cuda_types::cuda::CUfunction,
|
||||||
_gridDimX: ::core::ffi::c_uint,
|
gridDimX: ::core::ffi::c_uint,
|
||||||
_gridDimY: ::core::ffi::c_uint,
|
gridDimY: ::core::ffi::c_uint,
|
||||||
_gridDimZ: ::core::ffi::c_uint,
|
gridDimZ: ::core::ffi::c_uint,
|
||||||
_blockDimX: ::core::ffi::c_uint,
|
blockDimX: ::core::ffi::c_uint,
|
||||||
_blockDimY: ::core::ffi::c_uint,
|
blockDimY: ::core::ffi::c_uint,
|
||||||
_blockDimZ: ::core::ffi::c_uint,
|
blockDimZ: ::core::ffi::c_uint,
|
||||||
_sharedMemBytes: ::core::ffi::c_uint,
|
sharedMemBytes: ::core::ffi::c_uint,
|
||||||
stream: cuda_types::cuda::CUstream,
|
hStream: cuda_types::cuda::CUstream,
|
||||||
kernel_params: *mut *mut ::core::ffi::c_void,
|
kernel_params: *mut *mut ::core::ffi::c_void,
|
||||||
_extra: *mut *mut ::core::ffi::c_void,
|
_extra: *mut *mut ::core::ffi::c_void,
|
||||||
pre_state: Option<replay::LaunchPreState>,
|
pre_state: Option<replay::LaunchPreState>,
|
||||||
|
@ -1569,7 +1569,25 @@ pub(crate) fn cuLaunchKernel_Post(
|
||||||
_result: CUresult,
|
_result: CUresult,
|
||||||
) {
|
) {
|
||||||
let pre_state = unwrap_some_or!(pre_state, return);
|
let pre_state = unwrap_some_or!(pre_state, return);
|
||||||
replay::post_kernel_launch(libcuda, state, fn_logger, stream, kernel_params, pre_state);
|
replay::post_kernel_launch(
|
||||||
|
libcuda,
|
||||||
|
state,
|
||||||
|
fn_logger,
|
||||||
|
CUlaunchConfig {
|
||||||
|
gridDimX,
|
||||||
|
gridDimY,
|
||||||
|
gridDimZ,
|
||||||
|
blockDimX,
|
||||||
|
blockDimY,
|
||||||
|
blockDimZ,
|
||||||
|
sharedMemBytes,
|
||||||
|
hStream,
|
||||||
|
attrs: ptr::null_mut(),
|
||||||
|
numAttrs: 0,
|
||||||
|
},
|
||||||
|
kernel_params,
|
||||||
|
pre_state,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(non_snake_case)]
|
#[allow(non_snake_case)]
|
||||||
|
@ -1609,7 +1627,7 @@ pub(crate) fn cuLaunchKernelEx_Post(
|
||||||
libcuda,
|
libcuda,
|
||||||
state,
|
state,
|
||||||
fn_logger,
|
fn_logger,
|
||||||
unsafe { *config }.hStream,
|
unsafe { *config },
|
||||||
kernel_params,
|
kernel_params,
|
||||||
pre_state,
|
pre_state,
|
||||||
);
|
);
|
||||||
|
|
|
@ -97,11 +97,11 @@ pub(crate) fn post_kernel_launch(
|
||||||
libcuda: &mut CudaDynamicFns,
|
libcuda: &mut CudaDynamicFns,
|
||||||
state: &trace::StateTracker,
|
state: &trace::StateTracker,
|
||||||
fn_logger: &mut FnCallLog,
|
fn_logger: &mut FnCallLog,
|
||||||
stream: CUstream,
|
config: CUlaunchConfig,
|
||||||
kernel_params: *mut *mut std::ffi::c_void,
|
kernel_params: *mut *mut std::ffi::c_void,
|
||||||
mut pre_state: LaunchPreState,
|
mut pre_state: LaunchPreState,
|
||||||
) -> Option<()> {
|
) -> Option<()> {
|
||||||
fn_logger.try_cuda(|| libcuda.cuStreamSynchronize(stream))?;
|
fn_logger.try_cuda(|| libcuda.cuStreamSynchronize(config.hStream))?;
|
||||||
let raw_args =
|
let raw_args =
|
||||||
unsafe { std::slice::from_raw_parts(kernel_params, pre_state.kernel_params.len()) };
|
unsafe { std::slice::from_raw_parts(kernel_params, pre_state.kernel_params.len()) };
|
||||||
for (raw_arg, param) in raw_args.iter().zip(pre_state.kernel_params.iter_mut()) {
|
for (raw_arg, param) in raw_args.iter().zip(pre_state.kernel_params.iter_mut()) {
|
||||||
|
@ -128,6 +128,11 @@ pub(crate) fn post_kernel_launch(
|
||||||
zluda_trace_common::replay::save(
|
zluda_trace_common::replay::save(
|
||||||
file,
|
file,
|
||||||
pre_state.kernel_name,
|
pre_state.kernel_name,
|
||||||
|
zluda_trace_common::replay::LaunchConfig {
|
||||||
|
grid_dim: (config.gridDimX, config.gridDimY, config.gridDimZ),
|
||||||
|
block_dim: (config.blockDimX, config.blockDimY, config.blockDimZ),
|
||||||
|
shared_mem_bytes: config.sharedMemBytes,
|
||||||
|
},
|
||||||
pre_state.source,
|
pre_state.source,
|
||||||
pre_state.kernel_params,
|
pre_state.kernel_params,
|
||||||
)
|
)
|
||||||
|
|
|
@ -15,6 +15,7 @@ serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_json = "1.0.142"
|
serde_json = "1.0.142"
|
||||||
tar = "0.4"
|
tar = "0.4"
|
||||||
zstd = "0.13"
|
zstd = "0.13"
|
||||||
|
rustc-hash = "2.0.0"
|
||||||
|
|
||||||
[target.'cfg(not(windows))'.dependencies]
|
[target.'cfg(not(windows))'.dependencies]
|
||||||
libc = "0.2"
|
libc = "0.2"
|
||||||
|
|
|
@ -1,21 +1,30 @@
|
||||||
use std::io::Write;
|
use rustc_hash::FxHashMap;
|
||||||
|
use std::io::{Read, Write};
|
||||||
use tar::Header;
|
use tar::Header;
|
||||||
|
|
||||||
#[derive(serde::Serialize, serde::Deserialize)]
|
#[derive(serde::Serialize, serde::Deserialize)]
|
||||||
struct Manifest {
|
pub struct Manifest {
|
||||||
kernel_name: String,
|
pub kernel_name: String,
|
||||||
parameters: Vec<Parameter>,
|
pub config: LaunchConfig,
|
||||||
|
pub parameters: Vec<Parameter>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(serde::Serialize, serde::Deserialize)]
|
#[derive(serde::Serialize, serde::Deserialize)]
|
||||||
struct Parameter {
|
pub struct LaunchConfig {
|
||||||
pointer_offsets: Vec<ParameterPointer>,
|
pub grid_dim: (u32, u32, u32),
|
||||||
|
pub block_dim: (u32, u32, u32),
|
||||||
|
pub shared_mem_bytes: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(serde::Serialize, serde::Deserialize)]
|
#[derive(serde::Serialize, serde::Deserialize)]
|
||||||
struct ParameterPointer {
|
pub struct Parameter {
|
||||||
offset_in_param: usize,
|
pub pointer_offsets: Vec<ParameterPointer>,
|
||||||
offset_in_buffer: usize,
|
}
|
||||||
|
|
||||||
|
#[derive(serde::Serialize, serde::Deserialize)]
|
||||||
|
pub struct ParameterPointer {
|
||||||
|
pub offset_in_param: usize,
|
||||||
|
pub offset_in_buffer: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Manifest {
|
impl Manifest {
|
||||||
|
@ -37,6 +46,7 @@ pub struct KernelParameter {
|
||||||
pub fn save(
|
pub fn save(
|
||||||
writer: impl Write,
|
writer: impl Write,
|
||||||
kernel_name: String,
|
kernel_name: String,
|
||||||
|
config: LaunchConfig,
|
||||||
source: String,
|
source: String,
|
||||||
kernel_params: Vec<KernelParameter>,
|
kernel_params: Vec<KernelParameter>,
|
||||||
) -> std::io::Result<()> {
|
) -> std::io::Result<()> {
|
||||||
|
@ -44,6 +54,7 @@ pub fn save(
|
||||||
let mut builder = tar::Builder::new(archive);
|
let mut builder = tar::Builder::new(archive);
|
||||||
let (mut header, manifest) = Manifest {
|
let (mut header, manifest) = Manifest {
|
||||||
kernel_name,
|
kernel_name,
|
||||||
|
config,
|
||||||
parameters: kernel_params
|
parameters: kernel_params
|
||||||
.iter()
|
.iter()
|
||||||
.map(|param| Parameter {
|
.map(|param| Parameter {
|
||||||
|
@ -85,3 +96,34 @@ pub fn save(
|
||||||
builder.into_inner()?.finish()?;
|
builder.into_inner()?.finish()?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn load(reader: impl Read) -> (Manifest, String, FxHashMap<String, Vec<u8>>) {
|
||||||
|
let archive = zstd::Decoder::new(reader).unwrap();
|
||||||
|
let mut archive = tar::Archive::new(archive);
|
||||||
|
let mut manifest = None;
|
||||||
|
let mut source = None;
|
||||||
|
let mut buffers = FxHashMap::default();
|
||||||
|
for entry in archive.entries().unwrap() {
|
||||||
|
let mut entry = entry.unwrap();
|
||||||
|
let path = entry.path().unwrap().to_string_lossy().to_string();
|
||||||
|
match &*path {
|
||||||
|
Manifest::PATH => {
|
||||||
|
manifest = Some(serde_json::from_reader::<_, Manifest>(&mut entry).unwrap());
|
||||||
|
}
|
||||||
|
"source.ptx" => {
|
||||||
|
let mut string = String::new();
|
||||||
|
entry.read_to_string(&mut string).unwrap();
|
||||||
|
dbg!(string.len());
|
||||||
|
source = Some(string);
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
entry.read_to_end(&mut buffer).unwrap();
|
||||||
|
buffers.insert(path, buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let manifest = manifest.unwrap();
|
||||||
|
let source = source.unwrap();
|
||||||
|
(manifest, source, buffers)
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue