diff --git a/Utilities/VirtualMemory.cpp b/Utilities/VirtualMemory.cpp
index d03b26e307..0d21858c2c 100644
--- a/Utilities/VirtualMemory.cpp
+++ b/Utilities/VirtualMemory.cpp
@@ -141,8 +141,9 @@ namespace utils
 #endif
 	}
 
-	shm::shm(u32 size)
+	shm::shm(u32 size, u32 flags)
 		: m_size(::align(size, 0x10000))
+		, m_flags(flags)
 	{
 #ifdef _WIN32
 		m_handle = ::CreateFileMappingW(INVALID_HANDLE_VALUE, NULL, PAGE_EXECUTE_READWRITE, 0, m_size, NULL);
diff --git a/Utilities/VirtualMemory.h b/Utilities/VirtualMemory.h
index 4e7afce3ef..5dd6be3c9f 100644
--- a/Utilities/VirtualMemory.h
+++ b/Utilities/VirtualMemory.h
@@ -48,9 +48,10 @@ namespace utils
 		int m_file;
 #endif
 		u32 m_size;
+		u32 m_flags;
 
 	public:
-		explicit shm(u32 size);
+		explicit shm(u32 size, u32 flags = 0);
 
 		shm(const shm&) = delete;
 
@@ -74,5 +75,11 @@ namespace utils
 		{
 			return m_size;
 		}
+
+		// Flags are unspecified, consider it userdata
+		u32 flags() const
+		{
+			return m_flags;
+		}
 	};
 }
diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp
index c548afff5e..d3968ba20a 100644
--- a/rpcs3/Emu/Cell/PPUThread.cpp
+++ b/rpcs3/Emu/Cell/PPUThread.cpp
@@ -604,7 +604,7 @@ void ppu_thread::cpu_task()
 		}
 		case ppu_cmd::opd_call:
 		{
-			const ppu_func_opd_t opd = cmd_get(1).as<ppu_func_opd_t>(); 
+			const ppu_func_opd_t opd = cmd_get(1).as<ppu_func_opd_t>();
 			cmd_pop(1), fast_call(opd.addr, opd.rtoc);
 			break;
 		}
@@ -1089,13 +1089,13 @@ const auto ppu_stwcx_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, u64 rd
 	Label fail = c.newLabel();
 
 	// Prepare registers
-	c.mov(x86::rax, imm_ptr(&vm::g_reservations));
-	c.mov(x86::r10, x86::qword_ptr(x86::rax));
+	c.mov(x86::r10, imm_ptr(+vm::g_reservations));
 	c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
 	c.mov(x86::r11, x86::qword_ptr(x86::rax));
 	c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
-	c.shr(args[0], 7);
-	c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0], 3));
+	c.and_(args[0].r32(), 0xff80);
+	c.shr(args[0].r32(), 1);
+	c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0]));
 	c.xor_(args[0].r32(), args[0].r32());
 	c.bswap(args[2].r32());
 	c.bswap(args[3].r32());
@@ -1135,13 +1135,13 @@ const auto ppu_stdcx_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, u64 rd
 	Label fail = c.newLabel();
 
 	// Prepare registers
-	c.mov(x86::rax, imm_ptr(&vm::g_reservations));
-	c.mov(x86::r10, x86::qword_ptr(x86::rax));
+	c.mov(x86::r10, imm_ptr(+vm::g_reservations));
 	c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
 	c.mov(x86::r11, x86::qword_ptr(x86::rax));
 	c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
-	c.shr(args[0], 7);
-	c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0], 3));
+	c.and_(args[0].r32(), 0xff80);
+	c.shr(args[0].r32(), 1);
+	c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0]));
 	c.xor_(args[0].r32(), args[0].r32());
 	c.bswap(args[2]);
 	c.bswap(args[3]);
diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp
index d7e308f3e1..608a0e78f7 100644
--- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp
+++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp
@@ -1266,10 +1266,10 @@ void spu_recompiler::get_events()
 		Label fail = c->newLabel();
 		c->bind(rcheck);
 		c->mov(qw1->r32(), *addr);
-		c->mov(*qw0, imm_ptr(vm::g_reservations));
-		c->shr(qw1->r32(), 4);
+		c->mov(*qw0, imm_ptr(+vm::g_reservations));
+		c->and_(qw1->r32(), 0xff80);
+		c->shr(qw1->r32(), 1);
 		c->mov(*qw0, x86::qword_ptr(*qw0, *qw1));
-		c->and_(qw0->r64(), -128);
 		c->cmp(*qw0, SPU_OFF_64(rtime));
 		c->jne(fail);
 		c->mov(*qw0, imm_ptr(vm::g_base_addr));
diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp
index 98a08469e5..7f79952849 100644
--- a/rpcs3/Emu/Cell/SPUThread.cpp
+++ b/rpcs3/Emu/Cell/SPUThread.cpp
@@ -219,12 +219,12 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, const
 #endif
 
 	// Prepare registers
-	c.mov(x86::rax, imm_ptr(&vm::g_reservations));
-	c.mov(x86::rbx, x86::qword_ptr(x86::rax));
+	c.mov(x86::rbx, imm_ptr(+vm::g_reservations));
 	c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
 	c.mov(x86::rbp, x86::qword_ptr(x86::rax));
 	c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
-	c.shr(args[0], 4);
+	c.and_(args[0].r32(), 0xff80);
+	c.shr(args[0].r32(), 1);
 	c.lea(x86::rbx, x86::qword_ptr(x86::rbx, args[0]));
 	c.xor_(x86::r12d, x86::r12d);
 	c.mov(x86::r13, args[1]);
@@ -496,12 +496,12 @@ const auto spu_getll_tx = build_function_asm<u64(*)(u32 raddr, void* rdata)>([](
 #endif
 
 	// Prepare registers
-	c.mov(x86::rax, imm_ptr(&vm::g_reservations));
-	c.mov(x86::rbx, x86::qword_ptr(x86::rax));
+	c.mov(x86::rbx, imm_ptr(+vm::g_reservations));
 	c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
 	c.mov(x86::rbp, x86::qword_ptr(x86::rax));
 	c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
-	c.shr(args[0], 4);
+	c.and_(args[0].r32(), 0xff80);
+	c.shr(args[0].r32(), 1);
 	c.lea(x86::rbx, x86::qword_ptr(x86::rbx, args[0]));
 	c.xor_(x86::r12d, x86::r12d);
 	c.mov(x86::r13, args[1]);
@@ -608,12 +608,12 @@ const auto spu_getll_inexact = build_function_asm<u64(*)(u32 raddr, void* rdata)
 #endif
 
 	// Prepare registers
-	c.mov(x86::rax, imm_ptr(&vm::g_reservations));
-	c.mov(x86::rbx, x86::qword_ptr(x86::rax));
+	c.mov(x86::rbx, imm_ptr(+vm::g_reservations));
 	c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
 	c.mov(x86::rbp, x86::qword_ptr(x86::rax));
 	c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
-	c.shr(args[0], 4);
+	c.and_(args[0].r32(), 0xff80);
+	c.shr(args[0].r32(), 1);
 	c.lea(x86::rbx, x86::qword_ptr(x86::rbx, args[0]));
 	c.xor_(x86::r12d, x86::r12d);
 	c.mov(x86::r13, args[1]);
@@ -775,12 +775,12 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
 #endif
 
 	// Prepare registers
-	c.mov(x86::rax, imm_ptr(&vm::g_reservations));
-	c.mov(x86::rbx, x86::qword_ptr(x86::rax));
+	c.mov(x86::rbx, imm_ptr(+vm::g_reservations));
 	c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
 	c.mov(x86::rbp, x86::qword_ptr(x86::rax));
 	c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
-	c.shr(args[0], 4);
+	c.and_(args[0].r32(), 0xff80);
+	c.shr(args[0].r32(), 1);
 	c.lea(x86::rbx, x86::qword_ptr(x86::rbx, args[0]));
 	c.xor_(x86::r12d, x86::r12d);
 	c.mov(x86::r13, args[1]);
@@ -1464,7 +1464,46 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
 				break;
 			}
 
-			auto lock = vm::passive_lock(eal & -128, ::align(eal + size, 128));
+			u32 range_addr = eal & -128;
+			u32 range_end = ::align(eal + size, 128);
+
+			// Handle the case of crossing 64K page borders
+			if (range_addr >> 16 != (range_end - 1) >> 16)
+			{
+				u32 nexta = range_end & -65536;
+				u32 size0 = nexta - eal;
+				size -= size0;
+
+				// Split locking + transfer in two parts (before 64K border, and after it)
+				const auto lock = vm::range_lock(range_addr, nexta);
+#ifdef __GNUG__
+				std::memcpy(dst, src, size0);
+				dst += size0;
+				src += size0;
+#else
+				while (size0 >= 128)
+				{
+					mov_rdata(*reinterpret_cast<decltype(spu_thread::rdata)*>(dst), *reinterpret_cast<const decltype(spu_thread::rdata)*>(src));
+
+					dst += 128;
+					src += 128;
+					size0 -= 128;
+				}
+
+				while (size0)
+				{
+					*reinterpret_cast<v128*>(dst) = *reinterpret_cast<const v128*>(src);
+
+					dst += 16;
+					src += 16;
+					size0 -= 16;
+				}
+#endif
+				lock->release(0);
+				range_addr = nexta;
+			}
+
+			const auto lock = vm::range_lock(range_addr, range_end);
 
 #ifdef __GNUG__
 			std::memcpy(dst, src, size);
diff --git a/rpcs3/Emu/Cell/lv2/sys_mmapper.cpp b/rpcs3/Emu/Cell/lv2/sys_mmapper.cpp
index 21266fff62..c6f7caa1a9 100644
--- a/rpcs3/Emu/Cell/lv2/sys_mmapper.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_mmapper.cpp
@@ -17,7 +17,7 @@ lv2_memory::lv2_memory(u32 size, u32 align, u64 flags, lv2_memory_container* ct)
 	, align(align)
 	, flags(flags)
 	, ct(ct)
-	, shm(std::make_shared<utils::shm>(size))
+	, shm(std::make_shared<utils::shm>(size, 1 /* shareable flag */))
 {
 }
 
diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp
index 4e87c09005..b328e24373 100644
--- a/rpcs3/Emu/Memory/vm.cpp
+++ b/rpcs3/Emu/Memory/vm.cpp
@@ -47,8 +47,11 @@ namespace vm
 	// Stats for debugging
 	u8* const g_stat_addr = memory_reserve_4GiB(g_exec_addr);
 
-	// Reservation stats (compressed x16)
-	u8* const g_reservations = memory_reserve_4GiB(g_stat_addr);
+	// Reservation stats
+	alignas(4096) u8 g_reservations[65536 / 128 * 64]{0};
+
+	// Shareable memory bits
+	alignas(4096) atomic_t<u8> g_shareable[65536]{0};
 
 	// Memory locations
 	std::vector<std::shared_ptr<block_t>> g_locations;
@@ -94,6 +97,34 @@ namespace vm
 		}
 	}
 
+	static void _lock_shareable_cache(u8 /*value*/, u32 addr /*mutable*/, u32 end /*mutable*/)
+	{
+		// Special value to block new range locks
+		g_addr_lock = 1;
+
+		// Convert to 64K-page numbers
+		addr >>= 16;
+		end >>= 16;
+
+		// Wait for range locks to clear
+		for (auto& lock : g_range_locks)
+		{
+			while (const u64 _lock = lock.load())
+			{
+				if (const u32 lock_page = static_cast<u32>(_lock) >> 16)
+				{
+					if (lock_page < addr || lock_page >= end)
+					{
+						// Ignoreable range lock
+						break;
+					}
+				}
+
+				_mm_pause();
+			}
+		}
+	}
+
 	void passive_lock(cpu_thread& cpu)
 	{
 		if (g_tls_locked && *g_tls_locked == &cpu) [[unlikely]]
@@ -118,21 +149,45 @@ namespace vm
 		_register_lock(&cpu);
 	}
 
-	atomic_t<u64>* passive_lock(const u32 addr, const u32 end)
+	atomic_t<u64>* range_lock(u32 addr, u32 end)
 	{
-		static const auto test_addr = [](const u32 target, const u32 addr, const u32 end)
+		static const auto test_addr = [](u32 target, u32 addr, u32 end) -> u64
 		{
-			return addr > target || end <= target;
+			if (target == 1)
+			{
+				// Shareable info is being modified
+				return 0;
+			}
+
+			if (g_shareable[target >> 16])
+			{
+				// Target within shareable memory range
+				target &= 0xffff;
+			}
+
+			if (g_shareable[addr >> 16])
+			{
+				// Track shareable memory locks in 0x0..0xffff address range
+				addr &= 0xffff;
+				end = ((end - 1) & 0xffff) + 1;
+			}
+
+			if (addr > target || end <= target)
+			{
+				return u64{end} << 32 | addr;
+			}
+
+			return 0;
 		};
 
 		atomic_t<u64>* _ret;
 
-		if (test_addr(g_addr_lock.load(), addr, end)) [[likely]]
+		if (u64 _a1 = test_addr(g_addr_lock.load(), addr, end)) [[likely]]
 		{
 			// Optimistic path (hope that address range is not locked)
-			_ret = _register_range_lock(u64{end} << 32 | addr);
+			_ret = _register_range_lock(_a1);
 
-			if (test_addr(g_addr_lock.load(), addr, end)) [[likely]]
+			if (_a1 == test_addr(g_addr_lock.load(), addr, end)) [[likely]]
 			{
 				return _ret;
 			}
@@ -142,7 +197,7 @@ namespace vm
 
 		{
 			::reader_lock lock(g_mutex);
-			_ret = _register_range_lock(u64{end} << 32 | addr);
+			_ret = _register_range_lock(test_addr(-1, addr, end));
 		}
 
 		return _ret;
@@ -233,7 +288,7 @@ namespace vm
 		m_upgraded = true;
 	}
 
-	writer_lock::writer_lock(u32 addr)
+	writer_lock::writer_lock(u32 addr /*mutable*/)
 	{
 		auto cpu = get_current_cpu_thread();
 
@@ -244,7 +299,7 @@ namespace vm
 
 		g_mutex.lock();
 
-		if (addr)
+		if (addr >= 0x10000)
 		{
 			for (auto lock = g_locks.cbegin(), end = lock + g_cfg.core.ppu_threads; lock != end; lock++)
 			{
@@ -256,6 +311,12 @@ namespace vm
 
 			g_addr_lock = addr;
 
+			if (g_shareable[addr >> 16])
+			{
+				// Reservation address in shareable memory range
+				addr = addr & 0xffff;
+			}
+
 			for (auto& lock : g_range_locks)
 			{
 				while (true)
@@ -345,6 +406,19 @@ namespace vm
 			}
 		}
 
+		if (shm && shm->flags() != 0)
+		{
+			_lock_shareable_cache(1, addr, addr + size);
+
+			for (u32 i = addr / 65536; i < addr / 65536 + size / 65536; i++)
+			{
+				g_shareable[i] = 1;
+			}
+
+			// Unlock
+			g_addr_lock.compare_and_swap(1, 0);
+		}
+
 		// Notify rsx that range has become valid
 		// Note: This must be done *before* memory gets mapped while holding the vm lock, otherwise
 		//       the RSX might try to invalidate memory that got unmapped and remapped
@@ -482,6 +556,19 @@ namespace vm
 			}
 		}
 
+		if (g_shareable[addr >> 16])
+		{
+			_lock_shareable_cache(0, addr, addr + size);
+
+			for (u32 i = addr / 65536; i < addr / 65536 + size / 65536; i++)
+			{
+				g_shareable[i] = 0;
+			}
+
+			// Unlock
+			g_addr_lock.compare_and_swap(1, 0);
+		}
+
 		// Notify rsx to invalidate range
 		// Note: This must be done *before* memory gets unmapped while holding the vm lock, otherwise
 		//       the RSX might try to call VirtualProtect on memory that is already unmapped
@@ -624,35 +711,12 @@ namespace vm
 		, size(size)
 		, flags(flags)
 	{
-		// Allocate compressed reservation info area (avoid SPU MMIO area)
-		if (addr != 0xe0000000)
-		{
-			// Beginning of the address space
-			if (addr == 0x10000)
-			{
-				utils::memory_commit(g_reservations, 0x1000);
-			}
-
-			utils::memory_commit(g_reservations + addr / 16, size / 16);
-		}
-		else
-		{
-			// RawSPU LS
-			for (u32 i = 0; i < 6; i++)
-			{
-				utils::memory_commit(g_reservations + addr / 16 + i * 0x10000, 0x4000);
-			}
-
-			// End of the address space
-			utils::memory_commit(g_reservations + 0xfff0000, 0x10000);
-		}
-
 		if (flags & 0x100)
 		{
 			// Special path for 4k-aligned pages
 			m_common = std::make_shared<utils::shm>(size);
 			verify(HERE), m_common->map_critical(vm::base(addr), utils::protection::no) == vm::base(addr);
-			verify(HERE), m_common->map_critical(vm::get_super_ptr(addr), utils::protection::rw) == vm::get_super_ptr(addr);
+			verify(HERE), m_common->map_critical(vm::get_super_ptr(addr)) == vm::get_super_ptr(addr);
 		}
 	}
 
@@ -1142,7 +1206,7 @@ namespace vm
 			g_sudo_addr, g_sudo_addr + UINT32_MAX,
 			g_exec_addr, g_exec_addr + 0x200000000 - 1,
 			g_stat_addr, g_stat_addr + UINT32_MAX,
-			g_reservations, g_reservations + UINT32_MAX);
+			g_reservations, g_reservations + sizeof(g_reservations) - 1);
 
 			g_locations =
 			{
@@ -1154,6 +1218,9 @@ namespace vm
 				std::make_shared<block_t>(0xD0000000, 0x10000000, 0x111), // stack
 				std::make_shared<block_t>(0xE0000000, 0x20000000), // SPU reserved
 			};
+
+			std::memset(g_reservations, 0, sizeof(g_reservations));
+			std::memset(g_shareable, 0, sizeof(g_shareable));
 		}
 	}
 
@@ -1164,7 +1231,6 @@ namespace vm
 		utils::memory_decommit(g_base_addr, 0x100000000);
 		utils::memory_decommit(g_exec_addr, 0x100000000);
 		utils::memory_decommit(g_stat_addr, 0x100000000);
-		utils::memory_decommit(g_reservations, 0x100000000);
 	}
 }
 
diff --git a/rpcs3/Emu/Memory/vm.h b/rpcs3/Emu/Memory/vm.h
index a9b833bae4..fbb9ab5728 100644
--- a/rpcs3/Emu/Memory/vm.h
+++ b/rpcs3/Emu/Memory/vm.h
@@ -13,7 +13,7 @@ namespace vm
 	extern u8* const g_sudo_addr;
 	extern u8* const g_exec_addr;
 	extern u8* const g_stat_addr;
-	extern u8* const g_reservations;
+	extern u8 g_reservations[];
 
 	struct writer_lock;
 
diff --git a/rpcs3/Emu/Memory/vm_locking.h b/rpcs3/Emu/Memory/vm_locking.h
index 330d8865c1..9716bab16a 100644
--- a/rpcs3/Emu/Memory/vm_locking.h
+++ b/rpcs3/Emu/Memory/vm_locking.h
@@ -13,7 +13,7 @@ namespace vm
 
 	// Register reader
 	void passive_lock(cpu_thread& cpu);
-	atomic_t<u64>* passive_lock(const u32 begin, const u32 end);
+	atomic_t<u64>* range_lock(u32 begin, u32 end);
 
 	// Unregister reader
 	void passive_unlock(cpu_thread& cpu);
diff --git a/rpcs3/Emu/Memory/vm_reservation.h b/rpcs3/Emu/Memory/vm_reservation.h
index 537600514b..3c15d40447 100644
--- a/rpcs3/Emu/Memory/vm_reservation.h
+++ b/rpcs3/Emu/Memory/vm_reservation.h
@@ -10,7 +10,7 @@ namespace vm
 	inline atomic_t<u64>& reservation_acquire(u32 addr, u32 size)
 	{
 		// Access reservation info: stamp and the lock bit
-		return reinterpret_cast<atomic_t<u64>*>(g_reservations)[addr / 128];
+		return *reinterpret_cast<atomic_t<u64>*>(g_reservations + (addr & 0xff80) / 2);
 	}
 
 	// Update reservation status
@@ -23,7 +23,7 @@ namespace vm
 	// Get reservation sync variable
 	inline atomic_t<u64>& reservation_notifier(u32 addr, u32 size)
 	{
-		return reinterpret_cast<atomic_t<u64>*>(g_reservations)[addr / 128];
+		return *reinterpret_cast<atomic_t<u64>*>(g_reservations + (addr & 0xff80) / 2);
 	}
 
 	void reservation_lock_internal(atomic_t<u64>&);