mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-20 00:14:45 +00:00
Implement bfi instruction
This commit is contained in:
parent
d3cd2dc8b4
commit
178ec59af6
8 changed files with 317 additions and 15 deletions
|
@ -136,6 +136,14 @@ long FUNC(bfe_s64)(long base, uint pos, uint len) {
|
|||
return intel_sbfe(base, pos, len);
|
||||
}
|
||||
|
||||
uint FUNC(bfi_b32)(uint base, uint insert, uint offset, uint count) {
|
||||
return intel_bfi(base, insert, offset, count);
|
||||
}
|
||||
|
||||
ulong FUNC(bfi_b64)(ulong base, ulong insert, uint offset, uint count) {
|
||||
return intel_bfi(base, insert, offset, count);
|
||||
}
|
||||
|
||||
void FUNC(__assertfail)(
|
||||
__private ulong* message,
|
||||
__private ulong* file,
|
||||
|
|
Binary file not shown.
|
@ -609,6 +609,7 @@ pub enum Instruction<P: ArgParams> {
|
|||
Popc { typ: BitType, arg: Arg2<P> },
|
||||
Xor { typ: BooleanType, arg: Arg3<P> },
|
||||
Bfe { typ: IntType, arg: Arg4<P> },
|
||||
Bfi { typ: BitType, arg: Arg5<P> },
|
||||
Rem { typ: IntType, arg: Arg3<P> },
|
||||
}
|
||||
|
||||
|
@ -695,6 +696,14 @@ pub struct Arg4Setp<P: ArgParams> {
|
|||
pub src2: P::Operand,
|
||||
}
|
||||
|
||||
pub struct Arg5<P: ArgParams> {
|
||||
pub dst: P::Operand,
|
||||
pub src1: P::Operand,
|
||||
pub src2: P::Operand,
|
||||
pub src3: P::Operand,
|
||||
pub src4: P::Operand,
|
||||
}
|
||||
|
||||
pub struct Arg5Setp<P: ArgParams> {
|
||||
pub dst1: P::Id,
|
||||
pub dst2: Option<P::Id>,
|
||||
|
|
|
@ -143,6 +143,7 @@ match {
|
|||
"bar",
|
||||
"barrier",
|
||||
"bfe",
|
||||
"bfi",
|
||||
"bra",
|
||||
"brev",
|
||||
"call",
|
||||
|
@ -196,6 +197,7 @@ ExtendedID : &'input str = {
|
|||
"bar",
|
||||
"barrier",
|
||||
"bfe",
|
||||
"bfi",
|
||||
"bra",
|
||||
"brev",
|
||||
"call",
|
||||
|
@ -727,6 +729,7 @@ Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|||
InstXor,
|
||||
InstRem,
|
||||
InstBfe,
|
||||
InstBfi,
|
||||
};
|
||||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
|
||||
|
@ -1658,6 +1661,11 @@ InstBfe: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|||
"bfe" <typ:IntType3264> <arg:Arg4> => ast::Instruction::Bfe{ <> }
|
||||
}
|
||||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-bfi
|
||||
InstBfi: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
||||
"bfi" <typ:BitType> <arg:Arg5> => ast::Instruction::Bfi{ <> }
|
||||
}
|
||||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-rem
|
||||
InstRem: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
||||
"rem" <typ:IntType> <arg:Arg3> => ast::Instruction::Rem{ <> }
|
||||
|
@ -1843,6 +1851,10 @@ Arg4Setp: ast::Arg4Setp<ast::ParsedArgParams<'input>> = {
|
|||
<dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> => ast::Arg4Setp{<>}
|
||||
};
|
||||
|
||||
Arg5: ast::Arg5<ast::ParsedArgParams<'input>> = {
|
||||
<dst:DstOperand> "," <src1:Operand> "," <src2:Operand> "," <src3:Operand> "," <src4:Operand> => ast::Arg5{<>}
|
||||
};
|
||||
|
||||
// TODO: pass src3 negation somewhere
|
||||
Arg5Setp: ast::Arg5Setp<ast::ParsedArgParams<'input>> = {
|
||||
<dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> "," "!"? <src3:Operand> => ast::Arg5Setp{<>}
|
||||
|
|
24
ptx/src/test/spirv_run/bfi.ptx
Normal file
24
ptx/src/test/spirv_run/bfi.ptx
Normal file
|
@ -0,0 +1,24 @@
|
|||
.version 6.5
|
||||
.target sm_30
|
||||
.address_size 64
|
||||
|
||||
.visible .entry bfi(
|
||||
.param .u64 input,
|
||||
.param .u64 output
|
||||
)
|
||||
{
|
||||
.reg .u64 in_addr;
|
||||
.reg .u64 out_addr;
|
||||
.reg .u32 temp<4>;
|
||||
|
||||
ld.param.u64 in_addr, [input];
|
||||
ld.param.u64 out_addr, [output];
|
||||
|
||||
ld.u32 temp0, [in_addr];
|
||||
ld.u32 temp1, [in_addr+4];
|
||||
ld.u32 temp2, [in_addr+8];
|
||||
ld.u32 temp3, [in_addr+12];
|
||||
bfi.b32 temp0, temp0, temp1, temp2, temp3;
|
||||
st.u32 [out_addr], temp0;
|
||||
ret;
|
||||
}
|
82
ptx/src/test/spirv_run/bfi.spvtxt
Normal file
82
ptx/src/test/spirv_run/bfi.spvtxt
Normal file
|
@ -0,0 +1,82 @@
|
|||
OpCapability GenericPointer
|
||||
OpCapability Linkage
|
||||
OpCapability Addresses
|
||||
OpCapability Kernel
|
||||
OpCapability Int8
|
||||
OpCapability Int16
|
||||
OpCapability Int64
|
||||
OpCapability Float16
|
||||
OpCapability Float64
|
||||
%51 = OpExtInstImport "OpenCL.std"
|
||||
OpMemoryModel Physical64 OpenCL
|
||||
OpEntryPoint Kernel %1 "bfi"
|
||||
OpDecorate %44 LinkageAttributes "__zluda_ptx_impl__bfi_b32" Import
|
||||
%void = OpTypeVoid
|
||||
%uint = OpTypeInt 32 0
|
||||
%54 = OpTypeFunction %uint %uint %uint %uint %uint
|
||||
%ulong = OpTypeInt 64 0
|
||||
%56 = OpTypeFunction %void %ulong %ulong
|
||||
%_ptr_Function_ulong = OpTypePointer Function %ulong
|
||||
%_ptr_Function_uint = OpTypePointer Function %uint
|
||||
%_ptr_Generic_uint = OpTypePointer Generic %uint
|
||||
%ulong_4 = OpConstant %ulong 4
|
||||
%ulong_8 = OpConstant %ulong 8
|
||||
%ulong_12 = OpConstant %ulong 12
|
||||
%44 = OpFunction %uint None %54
|
||||
%46 = OpFunctionParameter %uint
|
||||
%47 = OpFunctionParameter %uint
|
||||
%48 = OpFunctionParameter %uint
|
||||
%49 = OpFunctionParameter %uint
|
||||
OpFunctionEnd
|
||||
%1 = OpFunction %void None %56
|
||||
%10 = OpFunctionParameter %ulong
|
||||
%11 = OpFunctionParameter %ulong
|
||||
%43 = OpLabel
|
||||
%2 = OpVariable %_ptr_Function_ulong Function
|
||||
%3 = OpVariable %_ptr_Function_ulong Function
|
||||
%4 = OpVariable %_ptr_Function_ulong Function
|
||||
%5 = OpVariable %_ptr_Function_ulong Function
|
||||
%6 = OpVariable %_ptr_Function_uint Function
|
||||
%7 = OpVariable %_ptr_Function_uint Function
|
||||
%8 = OpVariable %_ptr_Function_uint Function
|
||||
%9 = OpVariable %_ptr_Function_uint Function
|
||||
OpStore %2 %10
|
||||
OpStore %3 %11
|
||||
%12 = OpLoad %ulong %2 Aligned 8
|
||||
OpStore %4 %12
|
||||
%13 = OpLoad %ulong %3 Aligned 8
|
||||
OpStore %5 %13
|
||||
%15 = OpLoad %ulong %4
|
||||
%35 = OpConvertUToPtr %_ptr_Generic_uint %15
|
||||
%14 = OpLoad %uint %35 Aligned 4
|
||||
OpStore %6 %14
|
||||
%17 = OpLoad %ulong %4
|
||||
%30 = OpIAdd %ulong %17 %ulong_4
|
||||
%36 = OpConvertUToPtr %_ptr_Generic_uint %30
|
||||
%16 = OpLoad %uint %36 Aligned 4
|
||||
OpStore %7 %16
|
||||
%19 = OpLoad %ulong %4
|
||||
%32 = OpIAdd %ulong %19 %ulong_8
|
||||
%37 = OpConvertUToPtr %_ptr_Generic_uint %32
|
||||
%18 = OpLoad %uint %37 Aligned 4
|
||||
OpStore %8 %18
|
||||
%21 = OpLoad %ulong %4
|
||||
%34 = OpIAdd %ulong %21 %ulong_12
|
||||
%38 = OpConvertUToPtr %_ptr_Generic_uint %34
|
||||
%20 = OpLoad %uint %38 Aligned 4
|
||||
OpStore %9 %20
|
||||
%23 = OpLoad %uint %6
|
||||
%24 = OpLoad %uint %7
|
||||
%25 = OpLoad %uint %8
|
||||
%26 = OpLoad %uint %9
|
||||
%40 = OpCopyObject %uint %23
|
||||
%41 = OpCopyObject %uint %24
|
||||
%39 = OpFunctionCall %uint %44 %41 %40 %25 %26
|
||||
%22 = OpCopyObject %uint %39
|
||||
OpStore %6 %22
|
||||
%27 = OpLoad %ulong %5
|
||||
%28 = OpLoad %uint %6
|
||||
%42 = OpConvertUToPtr %_ptr_Generic_uint %27
|
||||
OpStore %42 %28 Aligned 4
|
||||
OpReturn
|
||||
OpFunctionEnd
|
|
@ -139,6 +139,11 @@ test_ptx!(
|
|||
[0b11111000_11000001_00100010_10100000u32, 16u32, 8u32],
|
||||
[0b11000001u32]
|
||||
);
|
||||
test_ptx!(
|
||||
bfi,
|
||||
[0b10u32, 0b101u32, 0u32, 2u32],
|
||||
[0b110u32]
|
||||
);
|
||||
test_ptx!(stateful_ld_st_simple, [121u64], [121u64]);
|
||||
test_ptx!(stateful_ld_st_ntid, [123u64], [123u64]);
|
||||
test_ptx!(stateful_ld_st_ntid_chain, [12651u64], [12651u64]);
|
||||
|
|
|
@ -1451,6 +1451,9 @@ fn extract_globals<'input, 'b>(
|
|||
Statement::Instruction(ast::Instruction::Bfe { typ, arg }) => {
|
||||
local.push(to_ptx_impl_bfe_call(id_def, ptx_impl_imports, typ, arg));
|
||||
}
|
||||
Statement::Instruction(ast::Instruction::Bfi { typ, arg }) => {
|
||||
local.push(to_ptx_impl_bfi_call(id_def, ptx_impl_imports, typ, arg));
|
||||
}
|
||||
Statement::Instruction(ast::Instruction::Atom(
|
||||
d
|
||||
@
|
||||
|
@ -1844,6 +1847,109 @@ fn to_ptx_impl_bfe_call(
|
|||
})
|
||||
}
|
||||
|
||||
fn to_ptx_impl_bfi_call(
|
||||
id_defs: &mut NumericIdResolver,
|
||||
ptx_impl_imports: &mut HashMap<String, Directive>,
|
||||
typ: ast::BitType,
|
||||
arg: ast::Arg5<ExpandedArgParams>,
|
||||
) -> ExpandedStatement {
|
||||
let prefix = "__zluda_ptx_impl__";
|
||||
let suffix = match typ {
|
||||
ast::BitType::B32 => "bfi_b32",
|
||||
ast::BitType::B64 => "bfi_b64",
|
||||
ast::BitType::B8 | ast::BitType::B16 => unreachable!(),
|
||||
};
|
||||
let fn_name = format!("{}{}", prefix, suffix);
|
||||
let fn_id = match ptx_impl_imports.entry(fn_name) {
|
||||
hash_map::Entry::Vacant(entry) => {
|
||||
let fn_id = id_defs.new_non_variable(None);
|
||||
let func_decl = ast::MethodDecl::Func::<spirv::Word>(
|
||||
vec![ast::FnArgument {
|
||||
align: None,
|
||||
v_type: ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())),
|
||||
name: id_defs.new_non_variable(None),
|
||||
array_init: Vec::new(),
|
||||
}],
|
||||
fn_id,
|
||||
vec![
|
||||
ast::FnArgument {
|
||||
align: None,
|
||||
v_type: ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())),
|
||||
name: id_defs.new_non_variable(None),
|
||||
array_init: Vec::new(),
|
||||
},
|
||||
ast::FnArgument {
|
||||
align: None,
|
||||
v_type: ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())),
|
||||
name: id_defs.new_non_variable(None),
|
||||
array_init: Vec::new(),
|
||||
},
|
||||
ast::FnArgument {
|
||||
align: None,
|
||||
v_type: ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(
|
||||
ast::ScalarType::U32,
|
||||
)),
|
||||
name: id_defs.new_non_variable(None),
|
||||
array_init: Vec::new(),
|
||||
},
|
||||
ast::FnArgument {
|
||||
align: None,
|
||||
v_type: ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(
|
||||
ast::ScalarType::U32,
|
||||
)),
|
||||
name: id_defs.new_non_variable(None),
|
||||
array_init: Vec::new(),
|
||||
},
|
||||
],
|
||||
);
|
||||
let spirv_decl = SpirvMethodDecl::new(&func_decl);
|
||||
let func = Function {
|
||||
func_decl,
|
||||
globals: Vec::new(),
|
||||
body: None,
|
||||
import_as: Some(entry.key().clone()),
|
||||
spirv_decl,
|
||||
};
|
||||
entry.insert(Directive::Method(func));
|
||||
fn_id
|
||||
}
|
||||
hash_map::Entry::Occupied(entry) => match entry.get() {
|
||||
Directive::Method(Function {
|
||||
func_decl: ast::MethodDecl::Func(_, name, _),
|
||||
..
|
||||
}) => *name,
|
||||
_ => unreachable!(),
|
||||
},
|
||||
};
|
||||
Statement::Call(ResolvedCall {
|
||||
uniform: false,
|
||||
func: fn_id,
|
||||
ret_params: vec![(
|
||||
arg.dst,
|
||||
ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())),
|
||||
)],
|
||||
// Note, for some reason PTX and SPIR-V order base&insert arguments differently
|
||||
param_list: vec![
|
||||
(
|
||||
arg.src2,
|
||||
ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())),
|
||||
),
|
||||
(
|
||||
arg.src1,
|
||||
ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())),
|
||||
),
|
||||
(
|
||||
arg.src3,
|
||||
ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(ast::ScalarType::U32)),
|
||||
),
|
||||
(
|
||||
arg.src4,
|
||||
ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(ast::ScalarType::U32)),
|
||||
),
|
||||
],
|
||||
})
|
||||
}
|
||||
|
||||
fn to_resolved_fn_args<T>(
|
||||
params: Vec<T>,
|
||||
params_decl: &[ast::FnArgumentType],
|
||||
|
@ -3102,21 +3208,13 @@ fn emit_function_body_ops(
|
|||
let result_type = map.get_or_add_scalar(builder, (*typ).into());
|
||||
builder_fn(builder, result_type, Some(arg.dst), arg.src1, arg.src2)?;
|
||||
}
|
||||
ast::Instruction::Bfe { typ, arg } => {
|
||||
let builder_fn = if typ.is_signed() {
|
||||
dr::Builder::bit_field_s_extract
|
||||
} else {
|
||||
dr::Builder::bit_field_u_extract
|
||||
};
|
||||
let result_type = map.get_or_add_scalar(builder, (*typ).into());
|
||||
builder_fn(
|
||||
builder,
|
||||
result_type,
|
||||
Some(arg.dst),
|
||||
arg.src1,
|
||||
arg.src2,
|
||||
arg.src3,
|
||||
)?;
|
||||
ast::Instruction::Bfe { .. } => {
|
||||
// Should have beeen replaced with a funciton call earlier
|
||||
return Err(error_unreachable());
|
||||
}
|
||||
ast::Instruction::Bfi { .. } => {
|
||||
// Should have beeen replaced with a funciton call earlier
|
||||
return Err(error_unreachable());
|
||||
}
|
||||
ast::Instruction::Rem { typ, arg } => {
|
||||
let builder_fn = if typ.is_signed() {
|
||||
|
@ -5821,6 +5919,13 @@ impl<T: ArgParamsEx> ast::Instruction<T> {
|
|||
arg: arg.map_bfe(visitor, &full_type)?,
|
||||
}
|
||||
}
|
||||
ast::Instruction::Bfi { typ, arg } => {
|
||||
let full_type = ast::Type::Scalar(typ.into());
|
||||
ast::Instruction::Bfi {
|
||||
typ,
|
||||
arg: arg.map_bfi(visitor, &full_type)?,
|
||||
}
|
||||
}
|
||||
ast::Instruction::Rem { typ, arg } => {
|
||||
let full_type = ast::Type::Scalar(typ.into());
|
||||
ast::Instruction::Rem {
|
||||
|
@ -6127,6 +6232,7 @@ impl ast::Instruction<ExpandedArgParams> {
|
|||
ast::Instruction::Popc { .. } => None,
|
||||
ast::Instruction::Xor { .. } => None,
|
||||
ast::Instruction::Bfe { .. } => None,
|
||||
ast::Instruction::Bfi { .. } => None,
|
||||
ast::Instruction::Rem { .. } => None,
|
||||
ast::Instruction::Sub(ast::ArithDetails::Float(float_control), _)
|
||||
| ast::Instruction::Add(ast::ArithDetails::Float(float_control), _)
|
||||
|
@ -6804,6 +6910,62 @@ impl<T: ArgParamsEx> ast::Arg4Setp<T> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<T: ArgParamsEx> ast::Arg5<T> {
|
||||
fn map_bfi<U: ArgParamsEx, V: ArgumentMapVisitor<T, U>>(
|
||||
self,
|
||||
visitor: &mut V,
|
||||
base_type: &ast::Type,
|
||||
) -> Result<ast::Arg5<U>, TranslateError> {
|
||||
let dst = visitor.operand(
|
||||
ArgumentDescriptor {
|
||||
op: self.dst,
|
||||
is_dst: true,
|
||||
sema: ArgumentSemantics::Default,
|
||||
},
|
||||
base_type,
|
||||
)?;
|
||||
let src1 = visitor.operand(
|
||||
ArgumentDescriptor {
|
||||
op: self.src1,
|
||||
is_dst: false,
|
||||
sema: ArgumentSemantics::Default,
|
||||
},
|
||||
base_type,
|
||||
)?;
|
||||
let src2 = visitor.operand(
|
||||
ArgumentDescriptor {
|
||||
op: self.src2,
|
||||
is_dst: false,
|
||||
sema: ArgumentSemantics::Default,
|
||||
},
|
||||
base_type,
|
||||
)?;
|
||||
let src3 = visitor.operand(
|
||||
ArgumentDescriptor {
|
||||
op: self.src3,
|
||||
is_dst: false,
|
||||
sema: ArgumentSemantics::Default,
|
||||
},
|
||||
&ast::Type::Scalar(ast::ScalarType::U32),
|
||||
)?;
|
||||
let src4 = visitor.operand(
|
||||
ArgumentDescriptor {
|
||||
op: self.src4,
|
||||
is_dst: false,
|
||||
sema: ArgumentSemantics::Default,
|
||||
},
|
||||
&ast::Type::Scalar(ast::ScalarType::U32),
|
||||
)?;
|
||||
Ok(ast::Arg5 {
|
||||
dst,
|
||||
src1,
|
||||
src2,
|
||||
src3,
|
||||
src4,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: ArgParamsEx> ast::Arg5Setp<T> {
|
||||
fn map<U: ArgParamsEx, V: ArgumentMapVisitor<T, U>>(
|
||||
self,
|
||||
|
|
Loading…
Add table
Reference in a new issue