From f188019244e33fcdfba5c127d5e6822b781b3128 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Wed, 1 Sep 2021 13:43:34 +0300 Subject: [PATCH] LLVM DSL: reimpelement fsqrt, fabs --- rpcs3/Emu/CPU/CPUTranslator.h | 116 ++++++++++++++++++++++++++----- rpcs3/Emu/Cell/SPURecompiler.cpp | 4 +- 2 files changed, 102 insertions(+), 18 deletions(-) diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index d7147e4657..e8c3e131a5 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -2591,6 +2591,94 @@ struct llvm_avg } }; +template > +struct llvm_fsqrt +{ + using type = T; + + llvm_expr_t a1; + + static_assert(llvm_value_t::is_float, "llvm_fsqrt<>: invalid type"); + + static constexpr bool is_ok = llvm_value_t::is_float; + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + llvm::Value* v = a1.eval(ir); + + if (llvm::isa(v)) + { + if (auto c = llvm::ConstantFoldInstruction(ir->CreateUnaryIntrinsic(llvm::Intrinsic::sqrt, v), llvm::DataLayout(""))) + { + // Will fail in some cases (such as negative constant) + return c; + } + } + + return ir->CreateUnaryIntrinsic(llvm::Intrinsic::sqrt, v); + } + + llvm_match_tuple match(llvm::Value*& value) const + { + llvm::Value* v1 = {}; + + if (auto i = llvm::dyn_cast_or_null(value); i && i->getIntrinsicID() == llvm::Intrinsic::sqrt) + { + v1 = i->getOperand(0); + + if (auto r1 = a1.match(v1); v1) + { + return r1; + } + } + + value = nullptr; + return {}; + } +}; + +template > +struct llvm_fabs +{ + using type = T; + + llvm_expr_t a1; + + static_assert(llvm_value_t::is_float, "llvm_fabs<>: invalid type"); + + static constexpr bool is_ok = llvm_value_t::is_float; + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + llvm::Value* v = a1.eval(ir); + + if (llvm::isa(v)) + { + return llvm::ConstantFoldInstruction(ir->CreateUnaryIntrinsic(llvm::Intrinsic::fabs, v), llvm::DataLayout("")); + } + + return ir->CreateUnaryIntrinsic(llvm::Intrinsic::fabs, v); + } + + llvm_match_tuple match(llvm::Value*& value) const + { + llvm::Value* v1 = {}; + + if (auto i = llvm::dyn_cast_or_null(value); i && i->getIntrinsicID() == llvm::Intrinsic::fabs) + { + v1 = i->getOperand(0); + + if (auto r1 = a1.match(v1); v1) + { + return r1; + } + } + + value = nullptr; + return {}; + } +}; + class cpu_translator { protected: @@ -2890,6 +2978,18 @@ public: return llvm_avg{std::forward(a), std::forward(b)}; } + template ::is_ok>> + static auto fsqrt(T&& a) + { + return llvm_fsqrt{std::forward(a)}; + } + + template ::is_ok>> + static auto fabs(T&& a) + { + return llvm_fabs{std::forward(a)}; + } + template llvm::Function* get_intrinsic(llvm::Intrinsic::ID id) { @@ -2897,22 +2997,6 @@ public: return llvm::Intrinsic::getDeclaration(_module, id, {get_type()...}); } - template - auto sqrt(T a) - { - value_t result; - result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::sqrt), {a.eval(m_ir)}); - return result; - } - - template - auto fabs(T a) - { - value_t result; - result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fabs), {a.eval(m_ir)}); - return result; - } - // Opportunistic hardware FMA, can be used if results are identical for all possible input values template auto fmuladd(T a, T b, T c) diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 8c3554bc00..62b7be1825 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -7837,9 +7837,9 @@ public: { // TODO if (g_cfg.core.spu_accurate_xfloat) - set_vr(op.rt, fsplat(1.0) / sqrt(fabs(get_vr(op.ra)))); + set_vr(op.rt, fsplat(1.0) / fsqrt(fabs(get_vr(op.ra)))); else - set_vr(op.rt, fsplat(1.0) / sqrt(fabs(get_vr(op.ra)))); + set_vr(op.rt, fsplat(1.0) / fsqrt(fabs(get_vr(op.ra)))); } void FCGT(spu_opcode_t op)