diff --git a/Source/Core/Common/Swap.h b/Source/Core/Common/Swap.h index 965d1aabc2..1c51866d72 100644 --- a/Source/Core/Common/Swap.h +++ b/Source/Core/Common/Swap.h @@ -19,6 +19,7 @@ #include #include "Common/CommonTypes.h" +#include "Common/Intrinsics.h" namespace Common { @@ -168,6 +169,64 @@ inline T FromBigEndian(T data) return data; } +#ifdef __AVX__ +// Byte-swap patterns for PSHUFB. +template +inline __m128i GetSwapShuffle128() +{ + if constexpr (ByteSize == 2) + return _mm_set_epi64x(0x0e0f0c0d0a0b0809, 0x0607040502030001); + else if constexpr (ByteSize == 4) + return _mm_set_epi64x(0x0c0d0e0f08090a0b, 0x0405060700010203); + else if constexpr (ByteSize == 8) + return _mm_set_epi64x(0x08090a0b0c0d0e0f, 0x0001020304050607); + else + static_assert(false); +} +#endif + +#ifdef __AVX2__ +// Byte-swap patterns for VPSHUFB. +template +inline __m256i GetSwapShuffle256() +{ + __m128i pattern = GetSwapShuffle128(); + return _mm256_set_m128i(pattern, pattern); +} +#endif + +// Templated functions for byteswapped copies. +template +inline void CopySwapped(T* dst, const T* src, size_t byte_size) +{ + constexpr size_t S = sizeof(T); + const size_t count = byte_size / S; + size_t i = 0; + +#ifdef __AVX2__ + for (; i + 32 / S <= count; i += 32 / S) + { + const auto vdst = reinterpret_cast<__m256i*>(dst + i); + const auto vsrc = reinterpret_cast(src + i); + const auto swap = GetSwapShuffle256(); + _mm256_storeu_si256(vdst, _mm256_shuffle_epi8(_mm256_loadu_si256(vsrc), swap)); + } +#endif + +#ifdef __AVX__ + for (; i + 16 / S <= count; i += 16 / S) + { + const auto vdst = reinterpret_cast<__m128i*>(dst + i); + const auto vsrc = reinterpret_cast(src + i); + const auto swap = GetSwapShuffle128(); + _mm_storeu_si128(vdst, _mm_shuffle_epi8(_mm_loadu_si128(vsrc), swap)); + } +#endif + + for (; i < count; ++i) + dst[i] = Common::FromBigEndian(src[i]); +} + template struct BigEndianValue { diff --git a/Source/Core/Core/HW/Memmap.h b/Source/Core/Core/HW/Memmap.h index e0708605db..a4cea3aac3 100644 --- a/Source/Core/Core/HW/Memmap.h +++ b/Source/Core/Core/HW/Memmap.h @@ -130,29 +130,18 @@ public: void Write_U32_Swap(u32 var, u32 address); void Write_U64_Swap(u64 var, u32 address); - // Templated functions for byteswapped copies. template - void CopyFromEmuSwapped(T* data, u32 address, size_t size) const + void CopyFromEmuSwapped(T* dst, u32 address, size_t size) { - const T* src = reinterpret_cast(GetPointerForRange(address, size)); - - if (src == nullptr) - return; - - for (size_t i = 0; i < size / sizeof(T); i++) - data[i] = Common::FromBigEndian(src[i]); + if (const T* src = reinterpret_cast(GetPointerForRange(address, size))) + Common::CopySwapped(dst, src, size); } template - void CopyToEmuSwapped(u32 address, const T* data, size_t size) + void CopyToEmuSwapped(u32 address, const T* src, size_t size) { - T* dest = reinterpret_cast(GetPointerForRange(address, size)); - - if (dest == nullptr) - return; - - for (size_t i = 0; i < size / sizeof(T); i++) - dest[i] = Common::FromBigEndian(data[i]); + if (T* dst = reinterpret_cast(GetPointerForRange(address, size))) + Common::CopySwapped(dst, src, size); } private: