diff --git a/ptx_parser/src/ast.rs b/ptx_parser/src/ast.rs index daee9da..7755c7f 100644 --- a/ptx_parser/src/ast.rs +++ b/ptx_parser/src/ast.rs @@ -165,6 +165,30 @@ gen::generate_instruction_type!( }, } }, + Shr { + data: ShrData, + type: { Type::Scalar(data.type_.clone()) }, + arguments: { + dst: T, + src1: T, + src2: { + repr: T, + type: { Type::Scalar(ScalarType::U32) }, + }, + } + }, + Shl { + data: ScalarType, + type: { Type::Scalar(data.clone()) }, + arguments: { + dst: T, + src1: T, + src2: { + repr: T, + type: { Type::Scalar(ScalarType::U32) }, + }, + } + }, Ret { data: RetData }, @@ -943,3 +967,13 @@ pub struct CvtDesc { pub dst: ScalarType, pub src: ScalarType, } + +pub struct ShrData { + pub type_: ScalarType, + pub kind: RightShiftKind, +} + +pub enum RightShiftKind { + Arithmetic, + Logical, +} diff --git a/ptx_parser/src/main.rs b/ptx_parser/src/main.rs index 68787db..6055c1d 100644 --- a/ptx_parser/src/main.rs +++ b/ptx_parser/src/main.rs @@ -1652,6 +1652,24 @@ derive_parser!( .atype: ScalarType = { .u8, .u16, .u32, .u64, .s8, .s16, .s32, .s64, .bf16, .f16, .f32, .f64 }; + // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-shl + shl.type d, a, b => { + ast::Instruction::Shl { data: type_, arguments: ShlArgs { dst: d, src1: a, src2: b } } + } + .type: ScalarType = { .b16, .b32, .b64 }; + + // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-shr + shr.type d, a, b => { + let kind = if type_.kind() == ast::ScalarKind::Signed { RightShiftKind::Arithmetic} else { RightShiftKind::Logical }; + ast::Instruction::Shr { + data: ast::ShrData { type_, kind }, + arguments: ShrArgs { dst: d, src1: a, src2: b } + } + } + + .type: ScalarType = { .b16, .b32, .b64, + .u16, .u32, .u64, + .s16, .s32, .s64 }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-ret ret{.uni} => {