diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp
index 1fe8e3e8a3..5a2015f083 100644
--- a/rpcs3/Emu/Cell/SPUThread.cpp
+++ b/rpcs3/Emu/Cell/SPUThread.cpp
@@ -2831,6 +2831,46 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
 		}
 		default:
 		{
+			if (eal >> 28 == rsx::constants::local_mem_base >> 28)
+			{
+				if (size > s_rep_movsb_threshold)
+				{
+					__movsb(dst, src, size);
+				}
+				else
+				{
+					// Avoid unaligned stores in mov_rdata_avx
+					if (reinterpret_cast<u64>(dst) & 0x10)
+					{
+						*reinterpret_cast<v128*>(dst) = *reinterpret_cast<const v128*>(src);
+
+						dst += 16;
+						src += 16;
+						size -= 16;
+					}
+
+					while (size >= 128)
+					{
+						mov_rdata(*reinterpret_cast<spu_rdata_t*>(dst), *reinterpret_cast<const spu_rdata_t*>(src));
+
+						dst += 128;
+						src += 128;
+						size -= 128;
+					}
+
+					while (size)
+					{
+						*reinterpret_cast<v128*>(dst) = *reinterpret_cast<const v128*>(src);
+
+						dst += 16;
+						src += 16;
+						size -= 16;
+					}
+				}
+
+				break;
+			}
+
 			if (((eal & 127) + size) <= 128)
 			{
 				vm::range_lock(range_lock, eal, size);
diff --git a/rpcs3/Emu/Memory/vm_locking.h b/rpcs3/Emu/Memory/vm_locking.h
index 253af406db..c4d805554f 100644
--- a/rpcs3/Emu/Memory/vm_locking.h
+++ b/rpcs3/Emu/Memory/vm_locking.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include "vm.h"
+#include "Emu/RSX/rsx_utils.h"
 
 class cpu_thread;
 class shared_mutex;
@@ -43,6 +44,14 @@ namespace vm
 	template <uint Size = 0>
 	FORCE_INLINE void range_lock(atomic_t<u64, 64>* range_lock, u32 begin, u32 _size)
 	{
+		if constexpr (Size == 0)
+		{
+			if (begin >> 28 == rsx::constants::local_mem_base >> 28)
+			{
+				return;
+			}
+		}
+
 		// Optimistic locking.
 		// Note that we store the range we will be accessing, without any clamping.
 		range_lock->store(begin | (u64{_size} << 32));