From 8ae255450585f34b6c816b9f75a1646b533c869a Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sun, 26 Apr 2020 19:34:55 +0300 Subject: [PATCH] Implement mov_rdata_avx --- rpcs3/Emu/Cell/SPUThread.cpp | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 7912ae5980..d2febd646f 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -41,8 +41,42 @@ static FORCE_INLINE bool cmp_rdata(const decltype(spu_thread::rdata)& lhs, const return !(r._u64[0] | r._u64[1]); } +static FORCE_INLINE void mov_rdata_avx(__m256i* dst, const __m256i* src) +{ +#if defined(_MSC_VER) || defined(__AVX__) + _mm256_storeu_si256(dst + 0, _mm256_loadu_si256(src + 0)); + _mm256_storeu_si256(dst + 1, _mm256_loadu_si256(src + 1)); + _mm256_storeu_si256(dst + 2, _mm256_loadu_si256(src + 2)); + _mm256_storeu_si256(dst + 3, _mm256_loadu_si256(src + 3)); +#else + __asm__( + "vmovdqu 0*32(%[src]), %%ymm0;" // load + "vmovdqu %%ymm0, 0*32(%[dst]);" // store + "vmovdqu 1*32(%[src]), %%ymm0;" + "vmovdqu %%ymm0, 1*32(%[dst]);" + "vmovdqu 2*32(%[src]), %%ymm0;" + "vmovdqu %%ymm0, 2*32(%[dst]);" + "vmovdqu 3*32(%[src]), %%ymm0;" + "vmovdqu %%ymm0, 3*32(%[dst]);" + "vzeroupper" + : + : [src] "r" (src) + , [dst] "r" (dst) + : "xmm0" + ); +#endif +} + static FORCE_INLINE void mov_rdata(decltype(spu_thread::rdata)& dst, const decltype(spu_thread::rdata)& src) { +#ifndef __AVX__ + if (s_tsx_avx) [[likely]] +#endif + { + mov_rdata_avx(reinterpret_cast<__m256i*>(&dst), reinterpret_cast(&src)); + return; + } + { const v128 data0 = src[0]; const v128 data1 = src[1];