diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index 94e7686165..3c43eb87a2 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -3666,6 +3666,12 @@ public: { return llvm_calli>{"llvm.x86.avx512.dbpsadbw.128", {std::forward(a), std::forward(b), llvm_const_int{c}}}; } + + template , f32[4]>>> + static auto vrangeps(T&& a, U&& b, u8 c, u8 d) + { + return llvm_calli, T, llvm_const_int>{"llvm.x86.avx512.mask.range.ps.128", {std::forward(a), std::forward(b), llvm_const_int{c}, std::forward(a), llvm_const_int{d}}}; + } }; // Format llvm::SizeType diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 11089f4649..f83b48148d 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -7908,6 +7908,22 @@ public: value_t clamp_smax(value_t v) { + if (m_use_avx512) + { + if (is_input_positive(v)) + { + return eval(clamp_positive_smax(v)); + } + + if (auto [ok, data] = get_const_vector(v.value, m_pos); ok) + { + // Avoid pessimation when input is constant + return eval(clamp_positive_smax(clamp_negative_smax(v))); + } + + return eval(vrangeps(v, fsplat(0x7f7fffff), 0x2, 0Xff)); + } + return eval(clamp_positive_smax(clamp_negative_smax(v))); }