Implement shf instruction (#463)

* Implement `shf` instruction * Tests for `shf` * cargo fmt
2025-10-04 07:09:55 +00:00 · 2025-08-04 10:15:46 -07:00 · 2025-08-04 10:15:46 -07:00 · dd05752fc4
commit dd05752fc4
parent ce9c0aac23
18 changed files with 580 additions and 2 deletions
--- a/ptx_parser/src/ast.rs
+++ b/ptx_parser/src/ast.rs
@ -2,7 +2,9 @@ use super::{
    AtomSemantics, MemScope, RawRoundingMode, RawSetpCompareOp, ScalarType, SetpBoolPostOp,
    StateSpace, VectorPrefix,
 };
-use crate::{Mul24Control, PtxError, PtxParserState, Reduction, ShuffleMode};
+use crate::{
+    FunnelShiftMode, Mul24Control, PtxError, PtxParserState, Reduction, ShiftDirection, ShuffleMode,
+};
 use bitflags::bitflags;
 use std::{alloc::Layout, cmp::Ordering, num::NonZeroU8};

@ -549,6 +551,16 @@ ptx_parser_macros::generate_instruction_type!(
                src_membermask: T
            }
        },
+        Shf {
+            data: ShfDetails,
+            type: Type::Scalar(ScalarType::B32),
+            arguments<T>: {
+                dst: T,
+                src_a: T,
+                src_b: T,
+                src_c: T
+            }
+        },
        Shl {
            data: ScalarType,
            type: { Type::Scalar(data.clone()) },
@ -1103,6 +1115,11 @@ pub struct CpAsyncDetails {
    pub src_size: Option<u64>,
 }

+pub struct ShfDetails {
+    pub direction: ShiftDirection,
+    pub mode: FunnelShiftMode,
+}
+
 #[derive(Clone)]
 pub enum ParsedOperand<Ident> {
    Reg(Ident),
--- a/ptx_parser/src/lib.rs
+++ b/ptx_parser/src/lib.rs
@ -1739,6 +1739,12 @@ derive_parser!(
    #[derive(Copy, Clone, PartialEq, Eq, Hash)]
    pub enum ShuffleMode { }

+    #[derive(Copy, Clone, PartialEq, Eq, Hash)]
+    pub enum ShiftDirection { }
+
+    #[derive(Copy, Clone, PartialEq, Eq, Hash)]
+    pub enum FunnelShiftMode { }
+
    // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov
    mov{.vec}.type  d, a => {
        Instruction::Mov {
@ -3625,6 +3631,17 @@ derive_parser!(
    cp.async.wait_all => {
        Instruction::CpAsyncWaitAll {}
    }
+
+    // https://docs.nvidia.com/cuda/parallel-thread-execution/#logic-and-shift-instructions-shf
+    shf.dir.mode.b32  d, a, b, c => {
+        Instruction::Shf {
+            data: ShfDetails { direction: dir, mode: mode },
+            arguments: ShfArgs { dst: d, src_a: a, src_b: b, src_c: c }
+        }
+    }
+
+    .dir: ShiftDirection = { .l, .r };
+    .mode: FunnelShiftMode = { .clamp, .wrap };
 );

 #[cfg(test)]