Add abs, mad

2025-08-06 16:19:29 +00:00 · 2024-08-21 02:45:52 +02:00 · 2024-08-21 02:45:52 +02:00 · 6cd18bfdb8
commit 6cd18bfdb8
parent 588d66b236
3 changed files with 237 additions and 9 deletions
--- a/gen_impl/src/parser.rs
+++ b/gen_impl/src/parser.rs
@ -73,7 +73,7 @@ pub struct OpcodeDecl(pub Instruction, pub Arguments);

 impl OpcodeDecl {
    fn peek(input: syn::parse::ParseStream) -> bool {
-        Instruction::peek(input)
+        Instruction::peek(input) && !input.peek2(Token![=])
    }
 }

@ -106,7 +106,7 @@ impl Parse for CodeBlock {
        } else {
            return Err(lookahead.error());
        };
-        Ok(Self{special, code})
+        Ok(Self { special, code })
    }
 }

--- a/ptx_parser/src/ast.rs
+++ b/ptx_parser/src/ast.rs
@ -200,6 +200,27 @@ gen::generate_instruction_type!(
                src: T,
            }
        },
+        Abs {
+            data: AbsDetails,
+            type: { Type::Scalar(data.type_) },
+            arguments<T>: {
+                dst: T,
+                src: T,
+            }
+        },
+        Mad {
+            type: { Type::from(data.type_()) },
+            data: MadDetails,
+            arguments<T>: {
+                dst: {
+                    repr: T,
+                    type: { Type::from(data.dst_type()) },
+                },
+                src1: T,
+                src2: T,
+                src3: T,
+            }
+        },
        Trap { }
    }
 );
@ -588,16 +609,14 @@ pub enum MulDetails {
 }

 impl MulDetails {
-    #[allow(unused)] // Used by generated code
-    fn type_(&self) -> ScalarType {
+    pub fn type_(&self) -> ScalarType {
        match self {
            MulDetails::Integer { type_, .. } => *type_,
            MulDetails::Float(arith) => arith.type_,
        }
    }

-    #[allow(unused)] // Used by generated code
-    fn dst_type(&self) -> ScalarType {
+    pub fn dst_type(&self) -> ScalarType {
        match self {
            MulDetails::Integer {
                type_,
@ -995,3 +1014,45 @@ pub enum CvtaDirection {
    GenericToExplicit,
    ExplicitToGeneric,
 }
+
+#[derive(Copy, Clone)]
+pub struct AbsDetails {
+    pub flush_to_zero: Option<bool>,
+    pub type_: ScalarType,
+}
+
+#[derive(Copy, Clone)]
+pub enum MadDetails {
+    Integer {
+        control: MulIntControl,
+        saturate: bool,
+        type_: ScalarType,
+    },
+    Float(ArithFloat),
+}
+
+impl MadDetails {
+    pub fn dst_type(&self) -> ScalarType {
+        match self {
+            MadDetails::Integer {
+                type_,
+                control: MulIntControl::Wide,
+                ..
+            } => match type_ {
+                ScalarType::U16 => ScalarType::U32,
+                ScalarType::S16 => ScalarType::S32,
+                ScalarType::U32 => ScalarType::U64,
+                ScalarType::S32 => ScalarType::S64,
+                _ => unreachable!(),
+            },
+            _ => self.type_(),
+        }
+    }
+
+    fn type_(&self) -> ScalarType {
+        match self {
+            MadDetails::Integer { type_, .. } => *type_,
+            MadDetails::Float(arith) => arith.type_,
+        }
+    }
+}
--- a/ptx_parser/src/main.rs
+++ b/ptx_parser/src/main.rs
@ -1450,6 +1450,8 @@ derive_parser!(
    ScalarType =        { .f16, .f16x2, .bf16, .bf16x2 };

    // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mul
+    // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mul
+    // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-mul
    mul.mode.type  d, a, b => {
        ast::Instruction::Mul {
            data: ast::MulDetails::Integer {
@ -1476,8 +1478,6 @@ derive_parser!(
                          .s16, .s32 };
    RawMulIntControl =  { .wide };

-
-    // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mul
    mul{.rnd}{.ftz}{.sat}.f32  d, a, b => {
        ast::Instruction::Mul {
            data: ast::MulDetails::Float (
@ -1507,7 +1507,6 @@ derive_parser!(
    .rnd: RawRoundingMode = { .rn, .rz, .rm, .rp };
    ScalarType = { .f32, .f64 };

-    // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-mul
    mul{.rnd}{.ftz}{.sat}.f16   d, a, b => {
        ast::Instruction::Mul {
            data: ast::MulDetails::Float (
@ -1706,6 +1705,174 @@ derive_parser!(
    .space: StateSpace = { .const, .global, .local, .shared{::cta, ::cluster}, .param{::entry} };
    .size: ScalarType  = { .u32, .u64 };

+    // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-abs
+    // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-abs
+    // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-abs
+    abs.type        d, a => {
+        ast::Instruction::Abs {
+            data: ast::AbsDetails {
+                flush_to_zero: None,
+                type_
+            },
+            arguments: ast::AbsArgs {
+                dst: d, src: a
+            }
+        }
+    }
+    abs{.ftz}.f32   d, a => {
+        ast::Instruction::Abs {
+            data: ast::AbsDetails {
+                flush_to_zero: Some(ftz),
+                type_: f32
+            },
+            arguments: ast::AbsArgs {
+                dst: d, src: a
+            }
+        }
+    }
+    abs.f64         d, a => {
+        ast::Instruction::Abs {
+            data: ast::AbsDetails {
+                flush_to_zero: None,
+                type_: f64
+            },
+            arguments: ast::AbsArgs {
+                dst: d, src: a
+            }
+        }
+    }
+    abs{.ftz}.f16   d, a => {
+        ast::Instruction::Abs {
+            data: ast::AbsDetails {
+                flush_to_zero: Some(ftz),
+                type_: f16
+            },
+            arguments: ast::AbsArgs {
+                dst: d, src: a
+            }
+        }
+    }
+    abs{.ftz}.f16x2 d, a => {
+        ast::Instruction::Abs {
+            data: ast::AbsDetails {
+                flush_to_zero: Some(ftz),
+                type_: f16x2
+            },
+            arguments: ast::AbsArgs {
+                dst: d, src: a
+            }
+        }
+    }
+    abs.bf16        d, a => {
+        ast::Instruction::Abs {
+            data: ast::AbsDetails {
+                flush_to_zero: None,
+                type_: bf16
+            },
+            arguments: ast::AbsArgs {
+                dst: d, src: a
+            }
+        }
+    }
+    abs.bf16x2      d, a => {
+        ast::Instruction::Abs {
+            data: ast::AbsDetails {
+                flush_to_zero: None,
+                type_: bf16x2
+            },
+            arguments: ast::AbsArgs {
+                dst: d, src: a
+            }
+        }
+    }
+    .type: ScalarType = { .s16, .s32, .s64 };
+    ScalarType = { .f32, .f64, .f16, .f16x2, .bf16, .bf16x2 };
+
+    // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mad
+    // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mad
+    mad.mode.type  d, a, b, c => {
+        ast::Instruction::Mad {
+            data: ast::MadDetails::Integer {
+                type_,
+                control: mode.into(),
+                saturate: false
+            },
+            arguments: MadArgs { dst: d, src1: a, src2: b, src3: c }
+        }
+    }
+    .type: ScalarType =         { .u16, .u32, .u64,
+                                  .s16, .s32, .s64 };
+    .mode: RawMulIntControl =   { .hi, .lo };
+
+    // The .wide suffix is supported only for 16-bit and 32-bit integer types.
+    mad.wide.type  d, a, b, c => {
+        ast::Instruction::Mad {
+            data: ast::MadDetails::Integer {
+                type_,
+                control: wide.into(),
+                saturate: false
+            },
+            arguments: MadArgs { dst: d, src1: a, src2: b, src3: c  }
+        }
+    }
+    .type: ScalarType = { .u16, .u32,
+                          .s16, .s32 };
+    RawMulIntControl =  { .wide };
+
+    mad.hi.sat.s32 d, a, b, c => {
+        ast::Instruction::Mad {
+            data: ast::MadDetails::Integer {
+                type_: s32,
+                control: hi.into(),
+                saturate: true
+            },
+            arguments: MadArgs { dst: d, src1: a, src2: b, src3: c  }
+        }
+    }
+    RawMulIntControl =  { .hi };
+    ScalarType =        { .s32 };
+
+    mad{.ftz}{.sat}.f32      d, a, b, c => {
+        ast::Instruction::Mad {
+            data: ast::MadDetails::Float(
+                ArithFloat {
+                    type_: f32,
+                    rounding: None,
+                    flush_to_zero: Some(ftz),
+                    saturate: sat
+                }
+            ),
+            arguments: MadArgs { dst: d, src1: a, src2: b, src3: c  }
+        }
+    }
+    mad.rnd{.ftz}{.sat}.f32  d, a, b, c => {
+        ast::Instruction::Mad {
+            data: ast::MadDetails::Float(
+                ArithFloat {
+                    type_: f32,
+                    rounding: Some(rnd.into()),
+                    flush_to_zero: Some(ftz),
+                    saturate: sat
+                }
+            ),
+            arguments: MadArgs { dst: d, src1: a, src2: b, src3: c  }
+        }
+    }
+    mad.rnd.f64              d, a, b, c => {
+        ast::Instruction::Mad {
+            data: ast::MadDetails::Float(
+                ArithFloat {
+                    type_: f64,
+                    rounding: Some(rnd.into()),
+                    flush_to_zero: None,
+                    saturate: false
+                }
+            ),
+            arguments: MadArgs { dst: d, src1: a, src2: b, src3: c  }
+        }}
+    .rnd: RawRoundingMode   = { .rn, .rz, .rm, .rp };
+    ScalarType =        { .f32, .f64 };
+
    // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-ret
    ret{.uni} => {
        Instruction::Ret { data: RetData { uniform: uni } }