Implement .noreturn directive

This commit is contained in:
Andrzej Janik 2024-02-28 13:22:25 +01:00
commit c910a85685
6 changed files with 73 additions and 2 deletions

View file

@ -1316,6 +1316,7 @@ pub enum TuningDirective {
MaxNtid(u32, u32, u32),
ReqNtid(u32, u32, u32),
MinNCtaPerSm(u32),
Noreturn
}
#[repr(u8)]

View file

@ -575,6 +575,17 @@ fn emit_tuning_single<'a>(
format!("{0},{0}", size).as_bytes(),
);
}
ast::TuningDirective::Noreturn => {
let noreturn = b"noreturn";
let attr_kind = unsafe {
LLVMGetEnumAttributeKindForName(noreturn.as_ptr().cast(), noreturn.len())
};
if attr_kind == 0 {
panic!();
}
let noreturn = unsafe { LLVMCreateEnumAttribute(ctx.context.get(), attr_kind, 0) };
unsafe { LLVMAddAttributeAtIndex(llvm_method, LLVMAttributeFunctionIndex, noreturn) };
}
}
}

View file

@ -116,6 +116,7 @@ match {
".ne",
".neu",
".noftz",
".noreturn",
".num",
".or",
".param",
@ -531,6 +532,8 @@ LinkingDirective: ast::LinkingDirective = {
};
TuningDirective: ast::TuningDirective = {
// not a performance tuning directive but fits here in the grammar
".noreturn" => ast::TuningDirective::Noreturn,
".maxnreg" <ncta:U32Num> => ast::TuningDirective::MaxNReg(ncta),
".maxntid" <nx:U32Num> => ast::TuningDirective::MaxNtid(nx, 1, 1),
".maxntid" <nx:U32Num> "," <ny:U32Num> => ast::TuningDirective::MaxNtid(nx, ny, 1),

View file

@ -0,0 +1,19 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
; Function Attrs: noreturn
define private void @noreturn(i64 %"6") #0 {
"9":
%"3" = alloca i64, align 8, addrspace(5)
%"4" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"4", align 1
%"5" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"5", align 1
%"8" = alloca i64, align 8, addrspace(5)
store i64 %"6", ptr addrspace(5) %"3", align 8
%"7" = load i64, ptr addrspace(5) %"3", align 8
store i64 %"7", ptr addrspace(5) %"8", align 8
ret void
}
attributes #0 = { noreturn "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -0,0 +1,8 @@
.version 6.5
.target sm_30
.address_size 64
.weak .func noreturn(.param .b64 noreturn_0)
.noreturn
{
}

View file

@ -3345,6 +3345,7 @@ fn to_llvm_module_impl2<'a, 'input>(
if let Some(ref mut raytracing_state) = raytracing {
translation_module = raytracing::run_on_normalized(translation_module, raytracing_state)?;
}
let translation_module = return_from_noreturn(translation_module);
let translation_module = extract_builtin_functions(translation_module);
let translation_module = resolve_instruction_types(translation_module, functions)?;
let mut translation_module = restructure_function_return_types(translation_module)?;
@ -3392,6 +3393,32 @@ fn to_llvm_module_impl2<'a, 'input>(
})
}
// In PTX it's legal to have a function like this:
// .func noreturn(.param .b64 noreturn_0)
// .noreturn
// {
// }
// Which trips up LLVM. We normalize this by inserting `ret;`
fn return_from_noreturn(
mut translation_module: TranslationModule<NormalizedArgParams>,
) -> TranslationModule<NormalizedArgParams> {
for directive in translation_module.directives.iter_mut() {
match directive {
TranslationDirective::Method(method) => {
if let Some(ref mut body) = method.body {
if body.is_empty() && method.tuning.contains(&ast::TuningDirective::Noreturn) {
body.push(Statement::Instruction(ast::Instruction::Ret(
ast::RetData { uniform: false },
)));
}
}
}
TranslationDirective::Variable(..) => {}
}
}
translation_module
}
// From "Performance Tips for Frontend Authors" (https://llvm.org/docs/Frontend/PerformanceTips.html):
// "The SROA (Scalar Replacement Of Aggregates) and Mem2Reg passes only attempt to eliminate alloca
// instructions that are in the entry basic block. Given SSA is the canonical form expected by much
@ -3586,7 +3613,8 @@ fn create_metadata<'input>(
match tuning {
// TODO: measure
ast::TuningDirective::MaxNReg(_)
| ast::TuningDirective::MinNCtaPerSm(_) => {}
| ast::TuningDirective::MinNCtaPerSm(_)
| ast::TuningDirective::Noreturn => {}
ast::TuningDirective::MaxNtid(x, y, z) => {
let size = x as u64 * y as u64 * z as u64;
kernel_metadata.push((
@ -3632,7 +3660,8 @@ fn insert_compilation_mode_prologue<'input>(
for t in tuning.iter_mut() {
match t {
ast::TuningDirective::MaxNReg(_)
| ast::TuningDirective::MinNCtaPerSm(_) => {}
| ast::TuningDirective::MinNCtaPerSm(_)
| ast::TuningDirective::Noreturn => {}
ast::TuningDirective::MaxNtid(_, _, z) => {
*z *= 2;
}