Implement fma.rn.fn.bf16x2 (#496)
Some checks are pending
ZLUDA / Build (Linux) (push) Waiting to run
ZLUDA / Build (Windows) (push) Waiting to run
ZLUDA / Build AMD GPU unit tests (push) Waiting to run
ZLUDA / Run AMD GPU unit tests (push) Blocked by required conditions

* Add fma bf16x2 test

* Implement fma.rn.fn.bf16x2

* cargo fmt
This commit is contained in:
Violet 2025-09-04 17:29:20 -07:00 committed by GitHub
commit b7f3a647d7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 102 additions and 5 deletions

View file

@ -2712,14 +2712,30 @@ derive_parser!(
arguments: FmaArgs { dst: d, src1: a, src2: b, src3: c }
}
}
.rnd: RawRoundingMode = { .rn };
ScalarType = { .f16 };
//fma.rnd{.ftz}{.sat}.f16x2 d, a, b, c;
//fma.rnd{.ftz}.relu.f16 d, a, b, c;
//fma.rnd{.ftz}.relu.f16x2 d, a, b, c;
//fma.rnd{.relu}.bf16 d, a, b, c;
//fma.rnd{.relu}.bf16x2 d, a, b, c;
//fma.rnd.oob.{relu}.type d, a, b, c;
fma.rnd{.relu}.bf16x2 d, a, b, c => {
if relu {
state.errors.push(PtxError::Todo);
}
ast::Instruction::Fma {
data: ast::ArithFloat {
type_: bf16x2,
rounding: rnd.into(),
flush_to_zero: None,
saturate: false,
is_fusable: false
},
arguments: FmaArgs { dst: d, src1: a, src2: b, src3: c }
}
}
.rnd: RawRoundingMode = { .rn };
ScalarType = { .f16 };
ScalarType = { .bf16x2 };
//fma.rnd.oob.{relu}.type d, a, b, c;
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-sub
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-sub