mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-08-03 06:40:21 +00:00
Implement .noreturn directive
This commit is contained in:
parent
4363545d0e
commit
c910a85685
6 changed files with 73 additions and 2 deletions
|
@ -1316,6 +1316,7 @@ pub enum TuningDirective {
|
||||||
MaxNtid(u32, u32, u32),
|
MaxNtid(u32, u32, u32),
|
||||||
ReqNtid(u32, u32, u32),
|
ReqNtid(u32, u32, u32),
|
||||||
MinNCtaPerSm(u32),
|
MinNCtaPerSm(u32),
|
||||||
|
Noreturn
|
||||||
}
|
}
|
||||||
|
|
||||||
#[repr(u8)]
|
#[repr(u8)]
|
||||||
|
|
|
@ -575,6 +575,17 @@ fn emit_tuning_single<'a>(
|
||||||
format!("{0},{0}", size).as_bytes(),
|
format!("{0},{0}", size).as_bytes(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
ast::TuningDirective::Noreturn => {
|
||||||
|
let noreturn = b"noreturn";
|
||||||
|
let attr_kind = unsafe {
|
||||||
|
LLVMGetEnumAttributeKindForName(noreturn.as_ptr().cast(), noreturn.len())
|
||||||
|
};
|
||||||
|
if attr_kind == 0 {
|
||||||
|
panic!();
|
||||||
|
}
|
||||||
|
let noreturn = unsafe { LLVMCreateEnumAttribute(ctx.context.get(), attr_kind, 0) };
|
||||||
|
unsafe { LLVMAddAttributeAtIndex(llvm_method, LLVMAttributeFunctionIndex, noreturn) };
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -116,6 +116,7 @@ match {
|
||||||
".ne",
|
".ne",
|
||||||
".neu",
|
".neu",
|
||||||
".noftz",
|
".noftz",
|
||||||
|
".noreturn",
|
||||||
".num",
|
".num",
|
||||||
".or",
|
".or",
|
||||||
".param",
|
".param",
|
||||||
|
@ -531,6 +532,8 @@ LinkingDirective: ast::LinkingDirective = {
|
||||||
};
|
};
|
||||||
|
|
||||||
TuningDirective: ast::TuningDirective = {
|
TuningDirective: ast::TuningDirective = {
|
||||||
|
// not a performance tuning directive but fits here in the grammar
|
||||||
|
".noreturn" => ast::TuningDirective::Noreturn,
|
||||||
".maxnreg" <ncta:U32Num> => ast::TuningDirective::MaxNReg(ncta),
|
".maxnreg" <ncta:U32Num> => ast::TuningDirective::MaxNReg(ncta),
|
||||||
".maxntid" <nx:U32Num> => ast::TuningDirective::MaxNtid(nx, 1, 1),
|
".maxntid" <nx:U32Num> => ast::TuningDirective::MaxNtid(nx, 1, 1),
|
||||||
".maxntid" <nx:U32Num> "," <ny:U32Num> => ast::TuningDirective::MaxNtid(nx, ny, 1),
|
".maxntid" <nx:U32Num> "," <ny:U32Num> => ast::TuningDirective::MaxNtid(nx, ny, 1),
|
||||||
|
|
19
ptx/src/test/spirv_build/noreturn.ll
Normal file
19
ptx/src/test/spirv_build/noreturn.ll
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
|
||||||
|
target triple = "amdgcn-amd-amdhsa"
|
||||||
|
|
||||||
|
; Function Attrs: noreturn
|
||||||
|
define private void @noreturn(i64 %"6") #0 {
|
||||||
|
"9":
|
||||||
|
%"3" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"4" = alloca i1, align 1, addrspace(5)
|
||||||
|
store i1 false, ptr addrspace(5) %"4", align 1
|
||||||
|
%"5" = alloca i1, align 1, addrspace(5)
|
||||||
|
store i1 false, ptr addrspace(5) %"5", align 1
|
||||||
|
%"8" = alloca i64, align 8, addrspace(5)
|
||||||
|
store i64 %"6", ptr addrspace(5) %"3", align 8
|
||||||
|
%"7" = load i64, ptr addrspace(5) %"3", align 8
|
||||||
|
store i64 %"7", ptr addrspace(5) %"8", align 8
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { noreturn "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
8
ptx/src/test/spirv_build/noreturn.ptx
Normal file
8
ptx/src/test/spirv_build/noreturn.ptx
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
.version 6.5
|
||||||
|
.target sm_30
|
||||||
|
.address_size 64
|
||||||
|
|
||||||
|
.weak .func noreturn(.param .b64 noreturn_0)
|
||||||
|
.noreturn
|
||||||
|
{
|
||||||
|
}
|
|
@ -3345,6 +3345,7 @@ fn to_llvm_module_impl2<'a, 'input>(
|
||||||
if let Some(ref mut raytracing_state) = raytracing {
|
if let Some(ref mut raytracing_state) = raytracing {
|
||||||
translation_module = raytracing::run_on_normalized(translation_module, raytracing_state)?;
|
translation_module = raytracing::run_on_normalized(translation_module, raytracing_state)?;
|
||||||
}
|
}
|
||||||
|
let translation_module = return_from_noreturn(translation_module);
|
||||||
let translation_module = extract_builtin_functions(translation_module);
|
let translation_module = extract_builtin_functions(translation_module);
|
||||||
let translation_module = resolve_instruction_types(translation_module, functions)?;
|
let translation_module = resolve_instruction_types(translation_module, functions)?;
|
||||||
let mut translation_module = restructure_function_return_types(translation_module)?;
|
let mut translation_module = restructure_function_return_types(translation_module)?;
|
||||||
|
@ -3392,6 +3393,32 @@ fn to_llvm_module_impl2<'a, 'input>(
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// In PTX it's legal to have a function like this:
|
||||||
|
// .func noreturn(.param .b64 noreturn_0)
|
||||||
|
// .noreturn
|
||||||
|
// {
|
||||||
|
// }
|
||||||
|
// Which trips up LLVM. We normalize this by inserting `ret;`
|
||||||
|
fn return_from_noreturn(
|
||||||
|
mut translation_module: TranslationModule<NormalizedArgParams>,
|
||||||
|
) -> TranslationModule<NormalizedArgParams> {
|
||||||
|
for directive in translation_module.directives.iter_mut() {
|
||||||
|
match directive {
|
||||||
|
TranslationDirective::Method(method) => {
|
||||||
|
if let Some(ref mut body) = method.body {
|
||||||
|
if body.is_empty() && method.tuning.contains(&ast::TuningDirective::Noreturn) {
|
||||||
|
body.push(Statement::Instruction(ast::Instruction::Ret(
|
||||||
|
ast::RetData { uniform: false },
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TranslationDirective::Variable(..) => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
translation_module
|
||||||
|
}
|
||||||
|
|
||||||
// From "Performance Tips for Frontend Authors" (https://llvm.org/docs/Frontend/PerformanceTips.html):
|
// From "Performance Tips for Frontend Authors" (https://llvm.org/docs/Frontend/PerformanceTips.html):
|
||||||
// "The SROA (Scalar Replacement Of Aggregates) and Mem2Reg passes only attempt to eliminate alloca
|
// "The SROA (Scalar Replacement Of Aggregates) and Mem2Reg passes only attempt to eliminate alloca
|
||||||
// instructions that are in the entry basic block. Given SSA is the canonical form expected by much
|
// instructions that are in the entry basic block. Given SSA is the canonical form expected by much
|
||||||
|
@ -3586,7 +3613,8 @@ fn create_metadata<'input>(
|
||||||
match tuning {
|
match tuning {
|
||||||
// TODO: measure
|
// TODO: measure
|
||||||
ast::TuningDirective::MaxNReg(_)
|
ast::TuningDirective::MaxNReg(_)
|
||||||
| ast::TuningDirective::MinNCtaPerSm(_) => {}
|
| ast::TuningDirective::MinNCtaPerSm(_)
|
||||||
|
| ast::TuningDirective::Noreturn => {}
|
||||||
ast::TuningDirective::MaxNtid(x, y, z) => {
|
ast::TuningDirective::MaxNtid(x, y, z) => {
|
||||||
let size = x as u64 * y as u64 * z as u64;
|
let size = x as u64 * y as u64 * z as u64;
|
||||||
kernel_metadata.push((
|
kernel_metadata.push((
|
||||||
|
@ -3632,7 +3660,8 @@ fn insert_compilation_mode_prologue<'input>(
|
||||||
for t in tuning.iter_mut() {
|
for t in tuning.iter_mut() {
|
||||||
match t {
|
match t {
|
||||||
ast::TuningDirective::MaxNReg(_)
|
ast::TuningDirective::MaxNReg(_)
|
||||||
| ast::TuningDirective::MinNCtaPerSm(_) => {}
|
| ast::TuningDirective::MinNCtaPerSm(_)
|
||||||
|
| ast::TuningDirective::Noreturn => {}
|
||||||
ast::TuningDirective::MaxNtid(_, _, z) => {
|
ast::TuningDirective::MaxNtid(_, _, z) => {
|
||||||
*z *= 2;
|
*z *= 2;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue