From e2439e962cf4e2875c54901df2aba9af9ff1a522 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Tue, 16 Jan 2018 14:32:57 +0300 Subject: [PATCH] SPU: use XOP instructions --- Utilities/sysinfo.cpp | 11 +++ Utilities/sysinfo.h | 2 + rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp | 127 +++++++++++++++++++++++++ 3 files changed, 140 insertions(+) diff --git a/Utilities/sysinfo.cpp b/Utilities/sysinfo.cpp index 421d7655c4..8ec13aaf6d 100644 --- a/Utilities/sysinfo.cpp +++ b/Utilities/sysinfo.cpp @@ -39,6 +39,12 @@ bool utils::has_512() return g_value; } +bool utils::has_xop() +{ + static const bool g_value = has_avx() && get_cpuid(0x80000001, 0)[2] & 0x800; + return g_value; +} + std::string utils::get_system_info() { std::string result; @@ -92,6 +98,11 @@ std::string utils::get_system_info() { result += '+'; } + + if (has_xop()) + { + result += 'x'; + } } if (has_rtm()) diff --git a/Utilities/sysinfo.h b/Utilities/sysinfo.h index 1ef041078c..4151acb15a 100644 --- a/Utilities/sysinfo.h +++ b/Utilities/sysinfo.h @@ -26,6 +26,8 @@ namespace utils bool has_512(); + bool has_xop(); + inline bool transaction_enter() { while (true) diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index 426966ae6a..5571173c06 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -615,6 +615,16 @@ void spu_recompiler::ROT(spu_opcode_t op) return; } + if (utils::has_xop()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + c->vprotd(vt, va, vb); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); + return; + } + auto body = [](u32* t, const u32* a, const s32* b) noexcept { for (u32 i = 0; i < 4; i++) @@ -654,6 +664,22 @@ void spu_recompiler::ROTM(spu_opcode_t op) return; } + if (utils::has_xop()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + c->psubd(vb, XmmConst(_mm_set1_epi32(1))); + c->pandn(vb, XmmConst(_mm_set1_epi32(0x3f))); + c->pxor(vt, vt); + c->psubd(vt, vb); + c->pcmpgtd(vb, XmmConst(_mm_set1_epi32(31))); + c->vpshld(vt, va, vt); + c->vpandn(vt, vb, vt); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); + return; + } + auto body = [](u32* t, const u32* a, const u32* b) noexcept { for (u32 i = 0; i < 4; i++) @@ -694,6 +720,21 @@ void spu_recompiler::ROTMA(spu_opcode_t op) return; } + if (utils::has_xop()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + c->psubd(vb, XmmConst(_mm_set1_epi32(1))); + c->pandn(vb, XmmConst(_mm_set1_epi32(0x3f))); + c->pxor(vt, vt); + c->pminud(vb, XmmConst(_mm_set1_epi32(31))); + c->psubd(vt, vb); + c->vpshad(vt, va, vt); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); + return; + } + auto body = [](s32* t, const s32* a, const u32* b) noexcept { for (u32 i = 0; i < 4; i++) @@ -733,6 +774,19 @@ void spu_recompiler::SHL(spu_opcode_t op) return; } + if (utils::has_xop()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + c->pand(vb, XmmConst(_mm_set1_epi32(0x3f))); + c->vpcmpgtd(vt, vb, XmmConst(_mm_set1_epi32(31))); + c->vpshld(vb, va, vb); + c->pandn(vt, vb); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); + return; + } + auto body = [](u32* t, const u32* a, const u32* b) noexcept { for (u32 i = 0; i < 4; i++) @@ -777,6 +831,16 @@ void spu_recompiler::ROTH(spu_opcode_t op) //nf return; } + if (utils::has_xop()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + c->vprotw(vt, va, vb); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); + return; + } + auto body = [](u16* t, const u16* a, const u16* b) noexcept { for (u32 i = 0; i < 8; i++) @@ -836,6 +900,22 @@ void spu_recompiler::ROTHM(spu_opcode_t op) return; } + if (utils::has_xop()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + c->psubw(vb, XmmConst(_mm_set1_epi16(1))); + c->pandn(vb, XmmConst(_mm_set1_epi16(0x1f))); + c->pxor(vt, vt); + c->psubw(vt, vb); + c->pcmpgtw(vb, XmmConst(_mm_set1_epi16(15))); + c->vpshlw(vt, va, vt); + c->vpandn(vt, vb, vt); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); + return; + } + auto body = [](u16* t, const u16* a, const u16* b) noexcept { for (u32 i = 0; i < 8; i++) @@ -898,6 +978,21 @@ void spu_recompiler::ROTMAH(spu_opcode_t op) return; } + if (utils::has_xop()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + c->psubw(vb, XmmConst(_mm_set1_epi16(1))); + c->pandn(vb, XmmConst(_mm_set1_epi16(0x1f))); + c->pxor(vt, vt); + c->pminuw(vb, XmmConst(_mm_set1_epi16(15))); + c->psubw(vt, vb); + c->vpshaw(vt, va, vt); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); + return; + } + auto body = [](s16* t, const s16* a, const u16* b) noexcept { for (u32 i = 0; i < 8; i++) @@ -956,6 +1051,19 @@ void spu_recompiler::SHLH(spu_opcode_t op) return; } + if (utils::has_xop()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + c->pand(vb, XmmConst(_mm_set1_epi16(0x1f))); + c->vpcmpgtw(vt, vb, XmmConst(_mm_set1_epi16(15))); + c->vpshlw(vb, va, vb); + c->pandn(vt, vb); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); + return; + } + auto body = [](u16* t, const u16* a, const u16* b) noexcept { for (u32 i = 0; i < 8; i++) @@ -994,6 +1102,14 @@ void spu_recompiler::ROTI(spu_opcode_t op) return; } + if (utils::has_xop()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + c->vprotd(va, va, s); + c->movdqa(SPU_OFF_128(gpr, op.rt), va); + return; + } + const XmmLink& va = XmmGet(op.ra, XmmType::Int); const XmmLink& v1 = XmmAlloc(); c->movdqa(v1, va); @@ -3351,6 +3467,13 @@ void spu_recompiler::SELB(spu_opcode_t op) return; } + if (utils::has_xop()) + { + c->vpcmov(vc, vb, SPU_OFF_128(gpr, op.ra), vc); + c->movdqa(SPU_OFF_128(gpr, op.rt4), vc); + return; + } + c->pand(vb, vc); c->pandn(vc, SPU_OFF_128(gpr, op.ra)); c->por(vb, vc); @@ -3475,6 +3598,10 @@ void spu_recompiler::SHUFB(spu_opcode_t op) { c->vpternlogd(vc, va, vb, 0xca /* A?B:C */); } + else if (utils::has_xop()) + { + c->vpcmov(vc, va, vb, vc); + } else { c->pand(va, vc);