mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-20 00:14:45 +00:00
Add shr, shl
This commit is contained in:
parent
bc1074ed67
commit
47f8314a5d
2 changed files with 52 additions and 0 deletions
|
@ -165,6 +165,30 @@ gen::generate_instruction_type!(
|
|||
},
|
||||
}
|
||||
},
|
||||
Shr {
|
||||
data: ShrData,
|
||||
type: { Type::Scalar(data.type_.clone()) },
|
||||
arguments<T>: {
|
||||
dst: T,
|
||||
src1: T,
|
||||
src2: {
|
||||
repr: T,
|
||||
type: { Type::Scalar(ScalarType::U32) },
|
||||
},
|
||||
}
|
||||
},
|
||||
Shl {
|
||||
data: ScalarType,
|
||||
type: { Type::Scalar(data.clone()) },
|
||||
arguments<T>: {
|
||||
dst: T,
|
||||
src1: T,
|
||||
src2: {
|
||||
repr: T,
|
||||
type: { Type::Scalar(ScalarType::U32) },
|
||||
},
|
||||
}
|
||||
},
|
||||
Ret {
|
||||
data: RetData
|
||||
},
|
||||
|
@ -943,3 +967,13 @@ pub struct CvtDesc {
|
|||
pub dst: ScalarType,
|
||||
pub src: ScalarType,
|
||||
}
|
||||
|
||||
pub struct ShrData {
|
||||
pub type_: ScalarType,
|
||||
pub kind: RightShiftKind,
|
||||
}
|
||||
|
||||
pub enum RightShiftKind {
|
||||
Arithmetic,
|
||||
Logical,
|
||||
}
|
||||
|
|
|
@ -1652,6 +1652,24 @@ derive_parser!(
|
|||
.atype: ScalarType = { .u8, .u16, .u32, .u64,
|
||||
.s8, .s16, .s32, .s64,
|
||||
.bf16, .f16, .f32, .f64 };
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-shl
|
||||
shl.type d, a, b => {
|
||||
ast::Instruction::Shl { data: type_, arguments: ShlArgs { dst: d, src1: a, src2: b } }
|
||||
}
|
||||
.type: ScalarType = { .b16, .b32, .b64 };
|
||||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-shr
|
||||
shr.type d, a, b => {
|
||||
let kind = if type_.kind() == ast::ScalarKind::Signed { RightShiftKind::Arithmetic} else { RightShiftKind::Logical };
|
||||
ast::Instruction::Shr {
|
||||
data: ast::ShrData { type_, kind },
|
||||
arguments: ShrArgs { dst: d, src1: a, src2: b }
|
||||
}
|
||||
}
|
||||
|
||||
.type: ScalarType = { .b16, .b32, .b64,
|
||||
.u16, .u32, .u64,
|
||||
.s16, .s32, .s64 };
|
||||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-ret
|
||||
ret{.uni} => {
|
||||
|
|
Loading…
Add table
Reference in a new issue