mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-09-26 03:09:05 +00:00
Implement isspacep
This commit is contained in:
parent
af0216b1a0
commit
4363545d0e
7 changed files with 208 additions and 19 deletions
|
@ -475,6 +475,7 @@ pub enum Instruction<P: ArgParams> {
|
||||||
MatchAny(Arg3<P>),
|
MatchAny(Arg3<P>),
|
||||||
Red(AtomDetails, Arg2St<P>),
|
Red(AtomDetails, Arg2St<P>),
|
||||||
Nanosleep(Arg1<P>),
|
Nanosleep(Arg1<P>),
|
||||||
|
Isspacep(StateSpace, Arg2<P>),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
|
|
103
ptx/src/emit.rs
103
ptx/src/emit.rs
|
@ -1137,6 +1137,7 @@ fn emit_instruction(
|
||||||
ast::Instruction::Vshr(arg) => emit_inst_vshr(ctx, arg)?,
|
ast::Instruction::Vshr(arg) => emit_inst_vshr(ctx, arg)?,
|
||||||
ast::Instruction::Set(details, arg) => emit_inst_set(ctx, details, arg)?,
|
ast::Instruction::Set(details, arg) => emit_inst_set(ctx, details, arg)?,
|
||||||
ast::Instruction::Red(details, arg) => emit_inst_red(ctx, details, arg)?,
|
ast::Instruction::Red(details, arg) => emit_inst_red(ctx, details, arg)?,
|
||||||
|
ast::Instruction::Isspacep(space, arg) => emit_inst_isspacep(ctx, *space, arg)?,
|
||||||
// replaced by function calls or Statement variants
|
// replaced by function calls or Statement variants
|
||||||
ast::Instruction::Activemask { .. }
|
ast::Instruction::Activemask { .. }
|
||||||
| ast::Instruction::Bar(..)
|
| ast::Instruction::Bar(..)
|
||||||
|
@ -1161,6 +1162,70 @@ fn emit_instruction(
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn emit_inst_isspacep(
|
||||||
|
ctx: &mut EmitContext,
|
||||||
|
space: ast::StateSpace,
|
||||||
|
arg: &ast::Arg2<ExpandedArgParams>,
|
||||||
|
) -> Result<(), TranslateError> {
|
||||||
|
match space {
|
||||||
|
ast::StateSpace::Local => {
|
||||||
|
emit_inst_isspacep_impl(ctx, Some(arg.dst), arg.src, b"llvm.amdgcn.is.private\0")?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
ast::StateSpace::Shared => {
|
||||||
|
emit_inst_isspacep_impl(ctx, Some(arg.dst), arg.src, b"llvm.amdgcn.is.shared\0")?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
ast::StateSpace::Global => {
|
||||||
|
let builder = ctx.builder.get();
|
||||||
|
let is_private =
|
||||||
|
emit_inst_isspacep_impl(ctx, None, arg.src, b"llvm.amdgcn.is.private\0")?;
|
||||||
|
let is_shared =
|
||||||
|
emit_inst_isspacep_impl(ctx, None, arg.src, b"llvm.amdgcn.is.shared\0")?;
|
||||||
|
let private_or_shared =
|
||||||
|
unsafe { LLVMBuildOr(builder, is_private, is_shared, LLVM_UNNAMED) };
|
||||||
|
let i1_true = unsafe {
|
||||||
|
LLVMConstInt(
|
||||||
|
get_llvm_type(ctx, &ast::Type::Scalar(ast::ScalarType::Pred))?,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
ctx.names.register_result(arg.dst, |dst| unsafe {
|
||||||
|
// I'd rathr user LLVMBuildNeg(...), but when using LLVMBuildNeg(...) in LLVM 15,
|
||||||
|
// LLVM emits this broken IR:
|
||||||
|
// %"14" = sub i1 false, %4
|
||||||
|
LLVMBuildSub(builder, i1_true, private_or_shared, dst)
|
||||||
|
});
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
_ => Err(TranslateError::unreachable()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn emit_inst_isspacep_impl(
|
||||||
|
ctx: &mut EmitContext,
|
||||||
|
dst: Option<Id>,
|
||||||
|
src: Id,
|
||||||
|
intrinsic: &[u8],
|
||||||
|
) -> Result<LLVMValueRef, TranslateError> {
|
||||||
|
let src = ctx.names.value(src)?;
|
||||||
|
let b8 = get_llvm_type(ctx, &ast::Type::Scalar(ast::ScalarType::B8))?;
|
||||||
|
let b8_generic_ptr = unsafe {
|
||||||
|
LLVMPointerType(
|
||||||
|
b8,
|
||||||
|
get_llvm_address_space(&ctx.constants, ast::StateSpace::Generic)?,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
let src = unsafe { LLVMBuildIntToPtr(ctx.builder.get(), src, b8_generic_ptr, LLVM_UNNAMED) };
|
||||||
|
emit_intrinsic_arg2(
|
||||||
|
ctx,
|
||||||
|
(ast::ScalarType::Pred, dst),
|
||||||
|
(ast::ScalarType::B8, ast::StateSpace::Generic, src),
|
||||||
|
intrinsic,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
fn emit_inst_red(
|
fn emit_inst_red(
|
||||||
ctx: &mut EmitContext,
|
ctx: &mut EmitContext,
|
||||||
details: &ast::AtomDetails,
|
details: &ast::AtomDetails,
|
||||||
|
@ -1397,7 +1462,7 @@ fn emit_inst_abs(
|
||||||
emit_intrinsic_arg2(
|
emit_intrinsic_arg2(
|
||||||
ctx,
|
ctx,
|
||||||
(details.typ, Some(args.dst)),
|
(details.typ, Some(args.dst)),
|
||||||
(details.typ, args.src),
|
(details.typ, ast::StateSpace::Reg, args.src),
|
||||||
intrinsic_name.as_bytes(),
|
intrinsic_name.as_bytes(),
|
||||||
)?;
|
)?;
|
||||||
} else {
|
} else {
|
||||||
|
@ -1565,7 +1630,7 @@ fn emit_inst_rsqrt(
|
||||||
let sqrt_result = emit_intrinsic_arg2(
|
let sqrt_result = emit_intrinsic_arg2(
|
||||||
ctx,
|
ctx,
|
||||||
(details.typ, None),
|
(details.typ, None),
|
||||||
(details.typ, args.src),
|
(details.typ, ast::StateSpace::Reg, args.src),
|
||||||
intrinsic_fn,
|
intrinsic_fn,
|
||||||
)?;
|
)?;
|
||||||
unsafe { LLVMZludaSetFastMathFlags(sqrt_result, FastMathFlags::ApproxFunc) };
|
unsafe { LLVMZludaSetFastMathFlags(sqrt_result, FastMathFlags::ApproxFunc) };
|
||||||
|
@ -1623,7 +1688,7 @@ fn emit_inst_sqrt(
|
||||||
let sqrt_result = emit_intrinsic_arg2(
|
let sqrt_result = emit_intrinsic_arg2(
|
||||||
ctx,
|
ctx,
|
||||||
(details.type_, Some(args.dst)),
|
(details.type_, Some(args.dst)),
|
||||||
(details.type_, args.src),
|
(details.type_, ast::StateSpace::Reg, args.src),
|
||||||
intrinsic_fn,
|
intrinsic_fn,
|
||||||
)?;
|
)?;
|
||||||
unsafe { LLVMZludaSetFastMathFlags(sqrt_result, fast_math) };
|
unsafe { LLVMZludaSetFastMathFlags(sqrt_result, fast_math) };
|
||||||
|
@ -2468,7 +2533,7 @@ fn emit_inst_cvt(
|
||||||
emit_intrinsic_arg2(
|
emit_intrinsic_arg2(
|
||||||
ctx,
|
ctx,
|
||||||
(type_, Some(args.dst)),
|
(type_, Some(args.dst)),
|
||||||
(type_, args.src),
|
(type_, ast::StateSpace::Reg, args.src),
|
||||||
intrinsic_fn,
|
intrinsic_fn,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
@ -2482,7 +2547,7 @@ fn emit_inst_cvt(
|
||||||
emit_intrinsic_arg2(
|
emit_intrinsic_arg2(
|
||||||
ctx,
|
ctx,
|
||||||
(type_, Some(args.dst)),
|
(type_, Some(args.dst)),
|
||||||
(type_, args.src),
|
(type_, ast::StateSpace::Reg, args.src),
|
||||||
intrinsic_fn,
|
intrinsic_fn,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
@ -2496,7 +2561,7 @@ fn emit_inst_cvt(
|
||||||
emit_intrinsic_arg2(
|
emit_intrinsic_arg2(
|
||||||
ctx,
|
ctx,
|
||||||
(type_, Some(args.dst)),
|
(type_, Some(args.dst)),
|
||||||
(type_, args.src),
|
(type_, ast::StateSpace::Reg, args.src),
|
||||||
intrinsic_fn,
|
intrinsic_fn,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
@ -2510,7 +2575,7 @@ fn emit_inst_cvt(
|
||||||
emit_intrinsic_arg2(
|
emit_intrinsic_arg2(
|
||||||
ctx,
|
ctx,
|
||||||
(type_, Some(args.dst)),
|
(type_, Some(args.dst)),
|
||||||
(type_, args.src),
|
(type_, ast::StateSpace::Reg, args.src),
|
||||||
intrinsic_fn,
|
intrinsic_fn,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
@ -2676,7 +2741,7 @@ fn emit_inst_cos(
|
||||||
let cos_value = emit_intrinsic_arg2(
|
let cos_value = emit_intrinsic_arg2(
|
||||||
ctx,
|
ctx,
|
||||||
(ast::ScalarType::F32, Some(args.dst)),
|
(ast::ScalarType::F32, Some(args.dst)),
|
||||||
(ast::ScalarType::F32, args.src),
|
(ast::ScalarType::F32, ast::StateSpace::Reg, args.src),
|
||||||
function_name,
|
function_name,
|
||||||
)?;
|
)?;
|
||||||
unsafe { LLVMZludaSetFastMathFlags(cos_value, FastMathFlags::ApproxFunc) };
|
unsafe { LLVMZludaSetFastMathFlags(cos_value, FastMathFlags::ApproxFunc) };
|
||||||
|
@ -2691,7 +2756,7 @@ fn emit_inst_sin(
|
||||||
let cos_value = emit_intrinsic_arg2(
|
let cos_value = emit_intrinsic_arg2(
|
||||||
ctx,
|
ctx,
|
||||||
(ast::ScalarType::F32, Some(args.dst)),
|
(ast::ScalarType::F32, Some(args.dst)),
|
||||||
(ast::ScalarType::F32, args.src),
|
(ast::ScalarType::F32, ast::StateSpace::Reg, args.src),
|
||||||
function_name,
|
function_name,
|
||||||
)?;
|
)?;
|
||||||
unsafe { LLVMZludaSetFastMathFlags(cos_value, FastMathFlags::ApproxFunc) };
|
unsafe { LLVMZludaSetFastMathFlags(cos_value, FastMathFlags::ApproxFunc) };
|
||||||
|
@ -2895,7 +2960,7 @@ fn emit_inst_brev(
|
||||||
emit_intrinsic_arg2(
|
emit_intrinsic_arg2(
|
||||||
ctx,
|
ctx,
|
||||||
(type_, Some(args.dst)),
|
(type_, Some(args.dst)),
|
||||||
(type_, args.src),
|
(type_, ast::StateSpace::Reg, args.src),
|
||||||
function_name,
|
function_name,
|
||||||
)?;
|
)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -2913,8 +2978,12 @@ fn emit_inst_popc(
|
||||||
_ => return Err(TranslateError::unreachable()),
|
_ => return Err(TranslateError::unreachable()),
|
||||||
};
|
};
|
||||||
let popc_dst = if shorten { None } else { Some(args.dst) };
|
let popc_dst = if shorten { None } else { Some(args.dst) };
|
||||||
let popc_result =
|
let popc_result = emit_intrinsic_arg2(
|
||||||
emit_intrinsic_arg2(ctx, (type_, popc_dst), (type_, args.src), function_name)?;
|
ctx,
|
||||||
|
(type_, popc_dst),
|
||||||
|
(type_, ast::StateSpace::Reg, args.src),
|
||||||
|
function_name,
|
||||||
|
)?;
|
||||||
if shorten {
|
if shorten {
|
||||||
let llvm_i32 = get_llvm_type(ctx, &ast::Type::Scalar(ast::ScalarType::U32))?;
|
let llvm_i32 = get_llvm_type(ctx, &ast::Type::Scalar(ast::ScalarType::U32))?;
|
||||||
ctx.names.register_result(args.dst, |dst_name| unsafe {
|
ctx.names.register_result(args.dst, |dst_name| unsafe {
|
||||||
|
@ -2932,7 +3001,7 @@ fn emit_inst_ex2(
|
||||||
let llvm_value = emit_intrinsic_arg2(
|
let llvm_value = emit_intrinsic_arg2(
|
||||||
ctx,
|
ctx,
|
||||||
(ast::ScalarType::F32, Some(args.dst)),
|
(ast::ScalarType::F32, Some(args.dst)),
|
||||||
(ast::ScalarType::F32, args.src),
|
(ast::ScalarType::F32, ast::StateSpace::Reg, args.src),
|
||||||
function_name,
|
function_name,
|
||||||
)?;
|
)?;
|
||||||
unsafe { LLVMZludaSetFastMathFlags(llvm_value, FastMathFlags::ApproxFunc) };
|
unsafe { LLVMZludaSetFastMathFlags(llvm_value, FastMathFlags::ApproxFunc) };
|
||||||
|
@ -2947,7 +3016,7 @@ fn emit_inst_lg2(
|
||||||
let llvm_value = emit_intrinsic_arg2(
|
let llvm_value = emit_intrinsic_arg2(
|
||||||
ctx,
|
ctx,
|
||||||
(ast::ScalarType::F32, Some(args.dst)),
|
(ast::ScalarType::F32, Some(args.dst)),
|
||||||
(ast::ScalarType::F32, args.src),
|
(ast::ScalarType::F32, ast::StateSpace::Reg, args.src),
|
||||||
function_name,
|
function_name,
|
||||||
)?;
|
)?;
|
||||||
unsafe { LLVMZludaSetFastMathFlags(llvm_value, FastMathFlags::ApproxFunc) };
|
unsafe { LLVMZludaSetFastMathFlags(llvm_value, FastMathFlags::ApproxFunc) };
|
||||||
|
@ -2986,16 +3055,16 @@ fn emit_intrinsic_arg0(
|
||||||
fn emit_intrinsic_arg2(
|
fn emit_intrinsic_arg2(
|
||||||
ctx: &mut EmitContext,
|
ctx: &mut EmitContext,
|
||||||
(dst_type, dst): (ast::ScalarType, Option<Id>),
|
(dst_type, dst): (ast::ScalarType, Option<Id>),
|
||||||
(src_type, src): (ast::ScalarType, Id),
|
(src_type, src_space, src): (ast::ScalarType, ast::StateSpace, impl GetLLVMValue),
|
||||||
intrinsic_name: &[u8],
|
intrinsic_name: &[u8],
|
||||||
) -> Result<LLVMValueRef, TranslateError> {
|
) -> Result<LLVMValueRef, TranslateError> {
|
||||||
let builder = ctx.builder.get();
|
let builder = ctx.builder.get();
|
||||||
let mut llvm_src = ctx.names.value(src)?;
|
let mut llvm_src = src.get_llvm_value(&mut ctx.names)?;
|
||||||
let dst_type = get_llvm_type(ctx, &ast::Type::Scalar(dst_type))?;
|
let dst_type = get_llvm_type(ctx, &ast::Type::Scalar(dst_type))?;
|
||||||
let function_type = get_llvm_function_type(
|
let function_type = get_llvm_function_type(
|
||||||
ctx,
|
ctx,
|
||||||
dst_type,
|
dst_type,
|
||||||
iter::once((&ast::Type::Scalar(src_type), ast::StateSpace::Reg)),
|
iter::once((&ast::Type::Scalar(src_type), src_space)),
|
||||||
)?;
|
)?;
|
||||||
let mut function_value =
|
let mut function_value =
|
||||||
unsafe { LLVMGetNamedFunction(ctx.module.get(), intrinsic_name.as_ptr() as _) };
|
unsafe { LLVMGetNamedFunction(ctx.module.get(), intrinsic_name.as_ptr() as _) };
|
||||||
|
|
|
@ -201,6 +201,7 @@ match {
|
||||||
"function_name",
|
"function_name",
|
||||||
"generic",
|
"generic",
|
||||||
"inlined_at",
|
"inlined_at",
|
||||||
|
"isspacep",
|
||||||
"ld",
|
"ld",
|
||||||
"ldmatrix",
|
"ldmatrix",
|
||||||
"lg2",
|
"lg2",
|
||||||
|
@ -282,6 +283,7 @@ ExtendedID : &'input str = {
|
||||||
"function_name",
|
"function_name",
|
||||||
"generic",
|
"generic",
|
||||||
"inlined_at",
|
"inlined_at",
|
||||||
|
"isspacep",
|
||||||
"ld",
|
"ld",
|
||||||
"ldmatrix",
|
"ldmatrix",
|
||||||
"lg2",
|
"lg2",
|
||||||
|
@ -839,6 +841,7 @@ Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
||||||
InstMatch,
|
InstMatch,
|
||||||
InstRed,
|
InstRed,
|
||||||
InstNanosleep,
|
InstNanosleep,
|
||||||
|
InstIsspacep,
|
||||||
};
|
};
|
||||||
|
|
||||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
|
||||||
|
@ -2372,6 +2375,19 @@ InstNanosleep: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#miscellaneous-instructions-nanosleep
|
||||||
|
InstIsspacep: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
||||||
|
"isspacep" ".local" <a:Arg2> => {
|
||||||
|
ast::Instruction::Isspacep(ast::StateSpace::Local, a)
|
||||||
|
},
|
||||||
|
"isspacep" ".shared" <a:Arg2> => {
|
||||||
|
ast::Instruction::Isspacep(ast::StateSpace::Shared, a)
|
||||||
|
},
|
||||||
|
"isspacep" ".global" <a:Arg2> => {
|
||||||
|
ast::Instruction::Isspacep(ast::StateSpace::Global, a)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
NegTypeFtz: ast::ScalarType = {
|
NegTypeFtz: ast::ScalarType = {
|
||||||
".f16" => ast::ScalarType::F16,
|
".f16" => ast::ScalarType::F16,
|
||||||
".f16x2" => ast::ScalarType::F16x2,
|
".f16x2" => ast::ScalarType::F16x2,
|
||||||
|
|
57
ptx/src/test/spirv_run/isspacep.ll
Normal file
57
ptx/src/test/spirv_run/isspacep.ll
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
|
||||||
|
target triple = "amdgcn-amd-amdhsa"
|
||||||
|
|
||||||
|
define protected amdgpu_kernel void @isspacep(ptr addrspace(4) byref(i64) %"32", ptr addrspace(4) byref(i64) %"33") #0 {
|
||||||
|
"36":
|
||||||
|
%"10" = alloca i1, align 1, addrspace(5)
|
||||||
|
store i1 false, ptr addrspace(5) %"10", align 1
|
||||||
|
%"11" = alloca i1, align 1, addrspace(5)
|
||||||
|
store i1 false, ptr addrspace(5) %"11", align 1
|
||||||
|
%"4" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"5" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"6" = alloca i1, align 1, addrspace(5)
|
||||||
|
%"7" = alloca i1, align 1, addrspace(5)
|
||||||
|
%"8" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"9" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"12" = load i64, ptr addrspace(4) %"32", align 8
|
||||||
|
store i64 %"12", ptr addrspace(5) %"4", align 8
|
||||||
|
%"13" = load i64, ptr addrspace(4) %"33", align 8
|
||||||
|
store i64 %"13", ptr addrspace(5) %"5", align 8
|
||||||
|
%"15" = load i64, ptr addrspace(5) %"4", align 8
|
||||||
|
%0 = inttoptr i64 %"15" to ptr
|
||||||
|
%1 = call i1 @llvm.amdgcn.is.private(ptr %0)
|
||||||
|
%2 = inttoptr i64 %"15" to ptr
|
||||||
|
%3 = call i1 @llvm.amdgcn.is.shared(ptr %2)
|
||||||
|
%4 = or i1 %1, %3
|
||||||
|
%"14" = sub i1 true, %4
|
||||||
|
store i1 %"14", ptr addrspace(5) %"6", align 1
|
||||||
|
%"17" = load i1, ptr addrspace(5) %"6", align 1
|
||||||
|
%"16" = select i1 %"17", i32 1, i32 0
|
||||||
|
store i32 %"16", ptr addrspace(5) %"8", align 4
|
||||||
|
%"19" = load i64, ptr addrspace(5) %"4", align 8
|
||||||
|
%5 = inttoptr i64 %"19" to ptr
|
||||||
|
%"18" = call i1 @llvm.amdgcn.is.shared(ptr %5)
|
||||||
|
store i1 %"18", ptr addrspace(5) %"7", align 1
|
||||||
|
%"21" = load i1, ptr addrspace(5) %"7", align 1
|
||||||
|
%"20" = select i1 %"21", i32 1, i32 0
|
||||||
|
store i32 %"20", ptr addrspace(5) %"9", align 4
|
||||||
|
%"22" = load i64, ptr addrspace(5) %"5", align 8
|
||||||
|
%"23" = load i32, ptr addrspace(5) %"8", align 4
|
||||||
|
%"34" = inttoptr i64 %"22" to ptr
|
||||||
|
store i32 %"23", ptr %"34", align 4
|
||||||
|
%"24" = load i64, ptr addrspace(5) %"5", align 8
|
||||||
|
%"25" = load i32, ptr addrspace(5) %"9", align 4
|
||||||
|
%"35" = inttoptr i64 %"24" to ptr
|
||||||
|
%"38" = getelementptr inbounds i8, ptr %"35", i64 4
|
||||||
|
store i32 %"25", ptr %"38", align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: nounwind readnone speculatable willreturn
|
||||||
|
declare i1 @llvm.amdgcn.is.private(ptr nocapture) #1
|
||||||
|
|
||||||
|
; Function Attrs: nounwind readnone speculatable willreturn
|
||||||
|
declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #1
|
||||||
|
|
||||||
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { nounwind readnone speculatable willreturn }
|
28
ptx/src/test/spirv_run/isspacep.ptx
Normal file
28
ptx/src/test/spirv_run/isspacep.ptx
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
.version 6.5
|
||||||
|
.target sm_30
|
||||||
|
.address_size 64
|
||||||
|
|
||||||
|
.visible .entry isspacep(
|
||||||
|
.param .u64 input,
|
||||||
|
.param .u64 output
|
||||||
|
)
|
||||||
|
{
|
||||||
|
.reg .u64 in_addr;
|
||||||
|
.reg .u64 out_addr;
|
||||||
|
.reg .pred is_global;
|
||||||
|
.reg .pred is_shared;
|
||||||
|
|
||||||
|
.reg .u32 is_global_u32;
|
||||||
|
.reg .u32 is_shared_u32;
|
||||||
|
|
||||||
|
ld.param.u64 in_addr, [input];
|
||||||
|
ld.param.u64 out_addr, [output];
|
||||||
|
|
||||||
|
isspacep.global is_global, in_addr;
|
||||||
|
selp.u32 is_global_u32, 1, 0, is_global;
|
||||||
|
isspacep.shared is_shared, in_addr;
|
||||||
|
selp.u32 is_shared_u32, 1, 0, is_shared;
|
||||||
|
st.u32 [out_addr], is_global_u32;
|
||||||
|
st.u32 [out_addr+4], is_shared_u32;
|
||||||
|
ret;
|
||||||
|
}
|
|
@ -271,7 +271,11 @@ test_ptx!(const, [0u16], [10u16, 20, 30, 40]);
|
||||||
test_ptx!(cvt_s16_s8, [0x139231C2u32], [0xFFFFFFC2u32]);
|
test_ptx!(cvt_s16_s8, [0x139231C2u32], [0xFFFFFFC2u32]);
|
||||||
test_ptx!(cvt_f64_f32, [0.125f32], [0.125f64]);
|
test_ptx!(cvt_f64_f32, [0.125f32], [0.125f64]);
|
||||||
test_ptx!(cvt_f32_f16, [0xa1u16], [0x37210000u32]);
|
test_ptx!(cvt_f32_f16, [0xa1u16], [0x37210000u32]);
|
||||||
test_ptx!(prmt, [0x70c507d6u32, 0x6fbd4b5cu32], [0x6fbdd65cu32, 0x6FFFD600]);
|
test_ptx!(
|
||||||
|
prmt,
|
||||||
|
[0x70c507d6u32, 0x6fbd4b5cu32],
|
||||||
|
[0x6fbdd65cu32, 0x6FFFD600]
|
||||||
|
);
|
||||||
test_ptx!(
|
test_ptx!(
|
||||||
prmt_non_immediate,
|
prmt_non_immediate,
|
||||||
[0x70c507d6u32, 0x6fbd4b5cu32],
|
[0x70c507d6u32, 0x6fbd4b5cu32],
|
||||||
|
@ -336,7 +340,11 @@ test_ptx!(
|
||||||
[f16::from_f32(2.0), f16::from_f32(3.0)],
|
[f16::from_f32(2.0), f16::from_f32(3.0)],
|
||||||
[f16::from_f32(2.0), f16::from_f32(5.0)]
|
[f16::from_f32(2.0), f16::from_f32(5.0)]
|
||||||
);
|
);
|
||||||
test_ptx!(set_f16x2, [0xc1690e6eu32, 0x13739444u32, 0x424834CC, 0x4248B4CC], [0xffffu32, 0x3C000000]);
|
test_ptx!(
|
||||||
|
set_f16x2,
|
||||||
|
[0xc1690e6eu32, 0x13739444u32, 0x424834CC, 0x4248B4CC],
|
||||||
|
[0xffffu32, 0x3C000000]
|
||||||
|
);
|
||||||
test_ptx!(
|
test_ptx!(
|
||||||
dp4a,
|
dp4a,
|
||||||
[0xde3032f5u32, 0x2474fe15, 0xf51d8d6c],
|
[0xde3032f5u32, 0x2474fe15, 0xf51d8d6c],
|
||||||
|
@ -350,6 +358,7 @@ test_ptx!(
|
||||||
[1923569713u64, 1923569712],
|
[1923569713u64, 1923569712],
|
||||||
[1923569713u64, 1923569712]
|
[1923569713u64, 1923569712]
|
||||||
);
|
);
|
||||||
|
test_ptx!(isspacep, [0xDEADu32], [1u32, 0]);
|
||||||
|
|
||||||
test_ptx_warp!(
|
test_ptx_warp!(
|
||||||
shfl,
|
shfl,
|
||||||
|
|
|
@ -6594,6 +6594,14 @@ impl<T: ArgParamsEx> ast::Instruction<T> {
|
||||||
ast::StateSpace::Reg,
|
ast::StateSpace::Reg,
|
||||||
)),
|
)),
|
||||||
)?),
|
)?),
|
||||||
|
ast::Instruction::Isspacep(space, arg) => ast::Instruction::Isspacep(
|
||||||
|
space,
|
||||||
|
arg.map_different_types(
|
||||||
|
visitor,
|
||||||
|
&ast::Type::Scalar(ast::ScalarType::Pred),
|
||||||
|
&ast::Type::Scalar(ast::ScalarType::U64),
|
||||||
|
)?,
|
||||||
|
),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6915,6 +6923,7 @@ impl<T: ast::ArgParams> ast::Instruction<T> {
|
||||||
ast::Instruction::Vshr { .. } => None,
|
ast::Instruction::Vshr { .. } => None,
|
||||||
ast::Instruction::Dp4a { .. } => None,
|
ast::Instruction::Dp4a { .. } => None,
|
||||||
ast::Instruction::MatchAny { .. } => None,
|
ast::Instruction::MatchAny { .. } => None,
|
||||||
|
ast::Instruction::Isspacep { .. } => None,
|
||||||
ast::Instruction::Sub(ast::ArithDetails::Signed(_), _) => None,
|
ast::Instruction::Sub(ast::ArithDetails::Signed(_), _) => None,
|
||||||
ast::Instruction::Sub(ast::ArithDetails::Unsigned(_), _) => None,
|
ast::Instruction::Sub(ast::ArithDetails::Unsigned(_), _) => None,
|
||||||
ast::Instruction::Add(ast::ArithDetails::Signed(_), _) => None,
|
ast::Instruction::Add(ast::ArithDetails::Signed(_), _) => None,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue