From ebeeafc94fed374b3ed9639f3c84a6c53651ce69 Mon Sep 17 00:00:00 2001 From: Malcolm Jestadt Date: Sun, 5 Jun 2022 23:53:07 -0400 Subject: [PATCH] SPU LLVM: Use vrangeps in clamp_smax - This instruction can clamp a value between a range of values, something which previously needed 2 instructions. - With the immediate byte set to 0x2 it will compute the minimum between the absolute value of the first input and the second input, and then copy the sign from the first input to the result. --- rpcs3/Emu/CPU/CPUTranslator.h | 6 ++++++ rpcs3/Emu/Cell/SPURecompiler.cpp | 16 ++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index 94e7686165..3c43eb87a2 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -3666,6 +3666,12 @@ public: { return llvm_calli>{"llvm.x86.avx512.dbpsadbw.128", {std::forward(a), std::forward(b), llvm_const_int{c}}}; } + + template , f32[4]>>> + static auto vrangeps(T&& a, U&& b, u8 c, u8 d) + { + return llvm_calli, T, llvm_const_int>{"llvm.x86.avx512.mask.range.ps.128", {std::forward(a), std::forward(b), llvm_const_int{c}, std::forward(a), llvm_const_int{d}}}; + } }; // Format llvm::SizeType diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 11089f4649..f83b48148d 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -7908,6 +7908,22 @@ public: value_t clamp_smax(value_t v) { + if (m_use_avx512) + { + if (is_input_positive(v)) + { + return eval(clamp_positive_smax(v)); + } + + if (auto [ok, data] = get_const_vector(v.value, m_pos); ok) + { + // Avoid pessimation when input is constant + return eval(clamp_positive_smax(clamp_negative_smax(v))); + } + + return eval(vrangeps(v, fsplat(0x7f7fffff), 0x2, 0Xff)); + } + return eval(clamp_positive_smax(clamp_negative_smax(v))); }