mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-20 00:14:45 +00:00
Add atom and atom.cas
This commit is contained in:
parent
c16bae32b5
commit
39faaa7214
2 changed files with 261 additions and 4 deletions
|
@ -1,8 +1,8 @@
|
|||
use std::cmp::Ordering;
|
||||
|
||||
use super::{
|
||||
MemScope, RawRoundingMode, RawSetpCompareOp, ScalarType, SetpBoolPostOp, StateSpace,
|
||||
VectorPrefix,
|
||||
AtomSemantics, MemScope, RawRoundingMode, RawSetpCompareOp, ScalarType, SetpBoolPostOp,
|
||||
StateSpace, VectorPrefix,
|
||||
};
|
||||
use crate::{PtxError, PtxParserState};
|
||||
use bitflags::bitflags;
|
||||
|
@ -282,6 +282,52 @@ gen::generate_instruction_type!(
|
|||
src: T,
|
||||
}
|
||||
},
|
||||
Selp {
|
||||
type: { Type::Scalar(data.clone()) },
|
||||
data: ScalarType,
|
||||
arguments<T>: {
|
||||
dst: T,
|
||||
src1: T,
|
||||
src2: T,
|
||||
src3: {
|
||||
repr: T,
|
||||
type: Type::Scalar(ScalarType::Pred)
|
||||
},
|
||||
}
|
||||
},
|
||||
Bar {
|
||||
type: Type::Scalar(ScalarType::U32),
|
||||
data: BarData,
|
||||
arguments<T>: {
|
||||
src1: T,
|
||||
src2: Option<T>,
|
||||
}
|
||||
},
|
||||
Atom {
|
||||
type: &data.type_,
|
||||
data: AtomDetails,
|
||||
arguments<T>: {
|
||||
dst: T,
|
||||
src1: {
|
||||
repr: T,
|
||||
space: { data.space },
|
||||
},
|
||||
src2: T,
|
||||
}
|
||||
},
|
||||
AtomCas {
|
||||
type: Type::Scalar(data.type_),
|
||||
data: AtomCasDetails,
|
||||
arguments<T>: {
|
||||
dst: T,
|
||||
src1: {
|
||||
repr: T,
|
||||
space: { data.space },
|
||||
},
|
||||
src2: T,
|
||||
src3: T,
|
||||
}
|
||||
},
|
||||
Trap { }
|
||||
}
|
||||
);
|
||||
|
@ -408,8 +454,7 @@ pub enum Type {
|
|||
impl Type {
|
||||
pub(crate) fn maybe_vector(vector: Option<VectorPrefix>, scalar: ScalarType) -> Self {
|
||||
match vector {
|
||||
Some(VectorPrefix::V2) => Type::Vector(scalar, 2),
|
||||
Some(VectorPrefix::V4) => Type::Vector(scalar, 4),
|
||||
Some(prefix) => Type::Vector(scalar, prefix.len()),
|
||||
None => Type::Scalar(scalar),
|
||||
}
|
||||
}
|
||||
|
@ -1167,3 +1212,61 @@ pub struct RsqrtData {
|
|||
pub flush_to_zero: Option<bool>,
|
||||
pub type_: ScalarType,
|
||||
}
|
||||
|
||||
pub struct BarData {
|
||||
pub aligned: bool,
|
||||
}
|
||||
|
||||
pub struct AtomDetails {
|
||||
pub type_: Type,
|
||||
pub semantics: AtomSemantics,
|
||||
pub scope: MemScope,
|
||||
pub space: StateSpace,
|
||||
pub op: AtomicOp,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub enum AtomicOp {
|
||||
And,
|
||||
Or,
|
||||
Xor,
|
||||
Exchange,
|
||||
Add,
|
||||
IncrementWrap,
|
||||
DecrementWrap,
|
||||
SignedMin,
|
||||
UnsignedMin,
|
||||
SignedMax,
|
||||
UnsignedMax,
|
||||
FloatAdd,
|
||||
FloatMin,
|
||||
FloatMax,
|
||||
}
|
||||
|
||||
impl AtomicOp {
|
||||
pub(crate) fn new(op: super::RawAtomicOp, kind: ScalarKind) -> Self {
|
||||
use super::RawAtomicOp;
|
||||
match (op, kind) {
|
||||
(RawAtomicOp::And, _) => Self::And,
|
||||
(RawAtomicOp::Or, _) => Self::Or,
|
||||
(RawAtomicOp::Xor, _) => Self::Xor,
|
||||
(RawAtomicOp::Exch, _) => Self::Exchange,
|
||||
(RawAtomicOp::Add, _) => Self::Add,
|
||||
(RawAtomicOp::Inc, _) => Self::IncrementWrap,
|
||||
(RawAtomicOp::Dec, _) => Self::DecrementWrap,
|
||||
(RawAtomicOp::Min, ScalarKind::Signed) => Self::SignedMin,
|
||||
(RawAtomicOp::Min, ScalarKind::Float) => Self::FloatMin,
|
||||
(RawAtomicOp::Min, _) => Self::UnsignedMin,
|
||||
(RawAtomicOp::Max, ScalarKind::Signed) => Self::SignedMax,
|
||||
(RawAtomicOp::Max, ScalarKind::Float) => Self::FloatMax,
|
||||
(RawAtomicOp::Max, _) => Self::UnsignedMax,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AtomCasDetails {
|
||||
pub type_: ScalarType,
|
||||
pub semantics: AtomSemantics,
|
||||
pub scope: MemScope,
|
||||
pub space: StateSpace,
|
||||
}
|
||||
|
|
|
@ -71,6 +71,16 @@ impl From<RawRoundingMode> for ast::RoundingMode {
|
|||
}
|
||||
}
|
||||
|
||||
impl VectorPrefix {
|
||||
pub(crate) fn len(self) -> u8 {
|
||||
match self {
|
||||
VectorPrefix::V2 => 2,
|
||||
VectorPrefix::V4 => 4,
|
||||
VectorPrefix::V8 => 8,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct PtxParserState<'input> {
|
||||
errors: Vec<PtxError>,
|
||||
function_declarations:
|
||||
|
@ -1135,6 +1145,9 @@ derive_parser!(
|
|||
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum SetpBoolPostOp { }
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum AtomSemantics { }
|
||||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov
|
||||
mov{.vec}.type d, a => {
|
||||
Instruction::Mov {
|
||||
|
@ -2345,6 +2358,147 @@ derive_parser!(
|
|||
}
|
||||
ScalarType = { .f32, .f64 };
|
||||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#comparison-and-selection-instructions-selp
|
||||
selp.type d, a, b, c => {
|
||||
ast::Instruction::Selp {
|
||||
data: type_,
|
||||
arguments: SelpArgs { dst: d, src1: a, src2: b, src3: c }
|
||||
}
|
||||
}
|
||||
.type: ScalarType = { .b16, .b32, .b64,
|
||||
.u16, .u32, .u64,
|
||||
.s16, .s32, .s64,
|
||||
.f32, .f64 };
|
||||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar
|
||||
barrier{.cta}.sync{.aligned} a{, b} => {
|
||||
let _ = cta;
|
||||
ast::Instruction::Bar {
|
||||
data: ast::BarData { aligned },
|
||||
arguments: BarArgs { src1: a, src2: b }
|
||||
}
|
||||
}
|
||||
//barrier{.cta}.arrive{.aligned} a, b;
|
||||
//barrier{.cta}.red.popc{.aligned}.u32 d, a{, b}, {!}c;
|
||||
//barrier{.cta}.red.op{.aligned}.pred p, a{, b}, {!}c;
|
||||
bar{.cta}.sync a{, b} => {
|
||||
let _ = cta;
|
||||
ast::Instruction::Bar {
|
||||
data: ast::BarData { aligned: true },
|
||||
arguments: BarArgs { src1: a, src2: b }
|
||||
}
|
||||
}
|
||||
//bar{.cta}.arrive a, b;
|
||||
//bar{.cta}.red.popc.u32 d, a{, b}, {!}c;
|
||||
//bar{.cta}.red.op.pred p, a{, b}, {!}c;
|
||||
//.op = { .and, .or };
|
||||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-atom
|
||||
atom{.sem}{.scope}{.space}.op{.level::cache_hint}.type d, [a], b{, cache_policy} => {
|
||||
if level_cache_hint || cache_policy.is_some() {
|
||||
state.errors.push(PtxError::Todo);
|
||||
}
|
||||
ast::Instruction::Atom {
|
||||
data: AtomDetails {
|
||||
semantics: sem.map(Into::into).unwrap_or(AtomSemantics::Relaxed),
|
||||
scope: scope.unwrap_or(MemScope::Gpu),
|
||||
space: space.unwrap_or(StateSpace::Generic),
|
||||
op: ast::AtomicOp::new(op, type_.kind()),
|
||||
type_: type_.into()
|
||||
},
|
||||
arguments: AtomArgs { dst: d, src1: a, src2: b }
|
||||
}
|
||||
}
|
||||
atom{.sem}{.scope}{.space}.cas.cas_type d, [a], b, c => {
|
||||
ast::Instruction::AtomCas {
|
||||
data: AtomCasDetails {
|
||||
semantics: sem.map(Into::into).unwrap_or(AtomSemantics::Relaxed),
|
||||
scope: scope.unwrap_or(MemScope::Gpu),
|
||||
space: space.unwrap_or(StateSpace::Generic),
|
||||
type_: cas_type
|
||||
},
|
||||
arguments: AtomCasArgs { dst: d, src1: a, src2: b, src3: c }
|
||||
}
|
||||
}
|
||||
atom{.sem}{.scope}{.space}.exch{.level::cache_hint}.b128 d, [a], b{, cache_policy} => {
|
||||
if level_cache_hint || cache_policy.is_some() {
|
||||
state.errors.push(PtxError::Todo);
|
||||
}
|
||||
ast::Instruction::Atom {
|
||||
data: AtomDetails {
|
||||
semantics: sem.map(Into::into).unwrap_or(AtomSemantics::Relaxed),
|
||||
scope: scope.unwrap_or(MemScope::Gpu),
|
||||
space: space.unwrap_or(StateSpace::Generic),
|
||||
op: ast::AtomicOp::new(exch, b128.kind()),
|
||||
type_: b128.into()
|
||||
},
|
||||
arguments: AtomArgs { dst: d, src1: a, src2: b }
|
||||
}
|
||||
}
|
||||
atom{.sem}{.scope}{.global}.float_op{.level::cache_hint}.vec_32_bit.f32 d, [a], b{, cache_policy} => {
|
||||
if level_cache_hint || cache_policy.is_some() {
|
||||
state.errors.push(PtxError::Todo);
|
||||
}
|
||||
ast::Instruction::Atom {
|
||||
data: AtomDetails {
|
||||
semantics: sem.map(Into::into).unwrap_or(AtomSemantics::Relaxed),
|
||||
scope: scope.unwrap_or(MemScope::Gpu),
|
||||
space: global.unwrap_or(StateSpace::Generic),
|
||||
op: ast::AtomicOp::new(float_op, f32.kind()),
|
||||
type_: ast::Type::Vector(f32, vec_32_bit.len())
|
||||
},
|
||||
arguments: AtomArgs { dst: d, src1: a, src2: b }
|
||||
}
|
||||
}
|
||||
atom{.sem}{.scope}{.global}.float_op.noftz{.level::cache_hint}{.vec_16_bit}.half_word_type d, [a], b{, cache_policy} => {
|
||||
if level_cache_hint || cache_policy.is_some() {
|
||||
state.errors.push(PtxError::Todo);
|
||||
}
|
||||
ast::Instruction::Atom {
|
||||
data: AtomDetails {
|
||||
semantics: sem.map(Into::into).unwrap_or(AtomSemantics::Relaxed),
|
||||
scope: scope.unwrap_or(MemScope::Gpu),
|
||||
space: global.unwrap_or(StateSpace::Generic),
|
||||
op: ast::AtomicOp::new(float_op, half_word_type.kind()),
|
||||
type_: ast::Type::maybe_vector(vec_16_bit, half_word_type)
|
||||
},
|
||||
arguments: AtomArgs { dst: d, src1: a, src2: b }
|
||||
}
|
||||
}
|
||||
atom{.sem}{.scope}{.global}.float_op.noftz{.level::cache_hint}{.vec_32_bit}.packed_type d, [a], b{, cache_policy} => {
|
||||
if level_cache_hint || cache_policy.is_some() {
|
||||
state.errors.push(PtxError::Todo);
|
||||
}
|
||||
ast::Instruction::Atom {
|
||||
data: AtomDetails {
|
||||
semantics: sem.map(Into::into).unwrap_or(AtomSemantics::Relaxed),
|
||||
scope: scope.unwrap_or(MemScope::Gpu),
|
||||
space: global.unwrap_or(StateSpace::Generic),
|
||||
op: ast::AtomicOp::new(float_op, packed_type.kind()),
|
||||
type_: ast::Type::maybe_vector(vec_32_bit, packed_type)
|
||||
},
|
||||
arguments: AtomArgs { dst: d, src1: a, src2: b }
|
||||
}
|
||||
}
|
||||
.space: StateSpace = { .global, .shared{::cta, ::cluster} };
|
||||
.sem: AtomSemantics = { .relaxed, .acquire, .release, .acq_rel };
|
||||
.scope: MemScope = { .cta, .cluster, .gpu, .sys };
|
||||
.op: RawAtomicOp = { .and, .or, .xor,
|
||||
.exch,
|
||||
.add, .inc, .dec,
|
||||
.min, .max };
|
||||
.level::cache_hint = { .L2::cache_hint };
|
||||
.type: ScalarType = { .b32, .b64, .u32, .u64, .s32, .s64, .f32, .f64 };
|
||||
.cas_type: ScalarType = { .b32, .b64, .u32, .u64, .s32, .s64, .f32, .f64, .b16, .b128 };
|
||||
.half_word_type: ScalarType = { .f16, .bf16 };
|
||||
.packed_type: ScalarType = { .f16x2, .bf16x2 };
|
||||
.vec_16_bit: VectorPrefix = { .v2, .v4, .v8 };
|
||||
.vec_32_bit: VectorPrefix = { .v2, .v4 };
|
||||
.float_op: RawAtomicOp = { .add, .min, .max };
|
||||
ScalarType = { .b16, .b128, .f32 };
|
||||
StateSpace = { .global };
|
||||
RawAtomicOp = { .exch };
|
||||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-ret
|
||||
ret{.uni} => {
|
||||
Instruction::Ret { data: RetData { uniform: uni } }
|
||||
|
|
Loading…
Add table
Reference in a new issue