mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-10-04 07:09:55 +00:00
Implement shf
instruction (#463)
* Implement `shf` instruction * Tests for `shf` * cargo fmt
This commit is contained in:
parent
ce9c0aac23
commit
dd05752fc4
18 changed files with 580 additions and 2 deletions
|
@ -2,7 +2,9 @@ use super::{
|
|||
AtomSemantics, MemScope, RawRoundingMode, RawSetpCompareOp, ScalarType, SetpBoolPostOp,
|
||||
StateSpace, VectorPrefix,
|
||||
};
|
||||
use crate::{Mul24Control, PtxError, PtxParserState, Reduction, ShuffleMode};
|
||||
use crate::{
|
||||
FunnelShiftMode, Mul24Control, PtxError, PtxParserState, Reduction, ShiftDirection, ShuffleMode,
|
||||
};
|
||||
use bitflags::bitflags;
|
||||
use std::{alloc::Layout, cmp::Ordering, num::NonZeroU8};
|
||||
|
||||
|
@ -549,6 +551,16 @@ ptx_parser_macros::generate_instruction_type!(
|
|||
src_membermask: T
|
||||
}
|
||||
},
|
||||
Shf {
|
||||
data: ShfDetails,
|
||||
type: Type::Scalar(ScalarType::B32),
|
||||
arguments<T>: {
|
||||
dst: T,
|
||||
src_a: T,
|
||||
src_b: T,
|
||||
src_c: T
|
||||
}
|
||||
},
|
||||
Shl {
|
||||
data: ScalarType,
|
||||
type: { Type::Scalar(data.clone()) },
|
||||
|
@ -1103,6 +1115,11 @@ pub struct CpAsyncDetails {
|
|||
pub src_size: Option<u64>,
|
||||
}
|
||||
|
||||
pub struct ShfDetails {
|
||||
pub direction: ShiftDirection,
|
||||
pub mode: FunnelShiftMode,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum ParsedOperand<Ident> {
|
||||
Reg(Ident),
|
||||
|
|
|
@ -1739,6 +1739,12 @@ derive_parser!(
|
|||
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum ShuffleMode { }
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum ShiftDirection { }
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum FunnelShiftMode { }
|
||||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov
|
||||
mov{.vec}.type d, a => {
|
||||
Instruction::Mov {
|
||||
|
@ -3625,6 +3631,17 @@ derive_parser!(
|
|||
cp.async.wait_all => {
|
||||
Instruction::CpAsyncWaitAll {}
|
||||
}
|
||||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/#logic-and-shift-instructions-shf
|
||||
shf.dir.mode.b32 d, a, b, c => {
|
||||
Instruction::Shf {
|
||||
data: ShfDetails { direction: dir, mode: mode },
|
||||
arguments: ShfArgs { dst: d, src_a: a, src_b: b, src_c: c }
|
||||
}
|
||||
}
|
||||
|
||||
.dir: ShiftDirection = { .l, .r };
|
||||
.mode: FunnelShiftMode = { .clamp, .wrap };
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue