diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
index b345092287..106554c2e0 100644
--- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
@@ -493,6 +493,11 @@ VKGSRender::VKGSRender() : GSRender(frame_type::Vulkan)
 	//create command buffer...
 	m_command_buffer_pool.create((*m_device));
 	m_command_buffer.create(m_command_buffer_pool);
+	
+	//Create secondar command_buffer for parallel operations
+	m_secondary_command_buffer_pool.create((*m_device));
+	m_secondary_command_buffer.create(m_secondary_command_buffer_pool);
+	
 	open_command_buffer();
 
 	for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i)
@@ -620,6 +625,9 @@ VKGSRender::~VKGSRender()
 	m_command_buffer.destroy();
 	m_command_buffer_pool.destroy();
 
+	m_secondary_command_buffer.destroy();
+	m_secondary_command_buffer_pool.destroy();
+
 	//Device handles/contexts
 	m_swap_chain->destroy();
 	m_thread_context.close();
@@ -632,7 +640,29 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
 	if (is_writing)
 		return m_texture_cache.invalidate_address(address);
 	else
-		return m_texture_cache.flush_address(address, *m_device, m_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
+	{
+		if (!m_texture_cache.address_is_flushable(address))
+			return false;
+
+		if (std::this_thread::get_id() != rsx_thread)
+		{
+			//TODO: Guard this when the renderer is flushing the command queue, might deadlock otherwise
+			m_flush_commands = true;
+			m_queued_threads++;
+
+			//This is awful!
+			while (m_flush_commands);
+
+			std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
+			bool status = m_texture_cache.flush_address(address, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
+
+			m_queued_threads--;
+			return status;
+		}
+
+		std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
+		return m_texture_cache.flush_address(address, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
+	}
 
 	return false;
 }
@@ -646,7 +676,9 @@ void VKGSRender::begin()
 	{
 		std::chrono::time_point<steady_clock> submit_start = steady_clock::now();
 
-		close_and_submit_command_buffer({}, m_submit_fence);
+		//??Should we wait for the queue to actually render to the GPU? or just flush the queue?
+		//Needs investigation to determine what drivers expect here, bottom_of_pipe is guaranteed to work, but will be too slow
+		close_and_submit_command_buffer({}, m_submit_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
 		CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL));
 
 		vkResetDescriptorPool(*m_device, descriptor_pool, 0);
@@ -833,9 +865,9 @@ void VKGSRender::end()
 	std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
 	m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - vertex_end).count();
 
-	rsx::thread::end();
-
 	copy_render_targets_to_dma_location();
+
+	rsx::thread::end();
 }
 
 void VKGSRender::set_viewport()
@@ -875,6 +907,8 @@ void VKGSRender::on_init_thread()
 	GSRender::on_init_thread();
 	m_attrib_ring_info.init(8 * RING_BUFFER_SIZE);
 	m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, 8 * RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0));
+
+	rsx_thread = std::this_thread::get_id();
 }
 
 void VKGSRender::on_exit()
@@ -987,13 +1021,6 @@ void VKGSRender::clear_surface(u32 mask)
 
 void VKGSRender::sync_at_semaphore_release()
 {
-	close_and_submit_command_buffer({}, m_submit_fence);
-	CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL));
-
-	CHECK_RESULT(vkResetFences(*m_device, 1, &m_submit_fence));
-	CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0));
-	open_command_buffer();
-
 	m_flush_draw_buffers = true;
 }
 
@@ -1002,6 +1029,13 @@ void VKGSRender::copy_render_targets_to_dma_location()
 	if (!m_flush_draw_buffers)
 		return;
 
+	if (!g_cfg_rsx_write_color_buffers && !g_cfg_rsx_write_depth_buffer)
+		return;
+
+	//TODO: Make this asynchronous. Should be similar to a glFlush() but in this case its similar to glFinish
+	//This is due to all the hard waits for fences
+	//TODO: Use a command buffer array to allow explicit draw command tracking
+
 	if (g_cfg_rsx_write_color_buffers)
 	{
 		for (u8 index = 0; index < rsx::limits::color_buffers_count; index++)
@@ -1023,7 +1057,28 @@ void VKGSRender::copy_render_targets_to_dma_location()
 		}
 	}
 
-	m_flush_draw_buffers = false;
+	close_and_submit_command_buffer({}, m_submit_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+	CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL));
+
+	CHECK_RESULT(vkResetFences(*m_device, 1, &m_submit_fence));
+	CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0));
+	open_command_buffer();
+}
+
+void VKGSRender::do_local_task()
+{
+	if (m_flush_commands)
+	{
+		close_and_submit_command_buffer({}, m_submit_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+		CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL));
+
+		CHECK_RESULT(vkResetFences(*m_device, 1, &m_submit_fence));
+		CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0));
+		open_command_buffer();
+
+		m_flush_commands = false;
+		while (m_queued_threads);
+	}
 }
 
 bool VKGSRender::do_method(u32 cmd, u32 arg)
@@ -1294,17 +1349,16 @@ void VKGSRender::write_buffers()
 {
 }
 
-void VKGSRender::close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence)
+void VKGSRender::close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags)
 {
 	CHECK_RESULT(vkEndCommandBuffer(m_command_buffer));
 
-	VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
 	VkCommandBuffer cmd = m_command_buffer;
 
 	VkSubmitInfo infos = {};
 	infos.commandBufferCount = 1;
 	infos.pCommandBuffers = &cmd;
-	infos.pWaitDstStageMask = &pipe_stage_flags;
+	infos.pWaitDstStageMask = &pipeline_stage_flags;
 	infos.pWaitSemaphores = semaphores.data();
 	infos.waitSemaphoreCount = static_cast<uint32_t>(semaphores.size());
 	infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h
index 3ff32dd20b..de42ae1937 100644
--- a/rpcs3/Emu/RSX/VK/VKGSRender.h
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.h
@@ -11,6 +11,7 @@
 #include "VKProgramBuffer.h"
 #include "../GCM.h"
 #include "../rsx_utils.h"
+#include <atomic>
 
 #pragma comment(lib, "VKstatic.1.lib")
 
@@ -60,6 +61,9 @@ private:
 	vk::command_pool m_command_buffer_pool;
 	vk::command_buffer m_command_buffer;
 
+	std::mutex m_secondary_cb_guard;
+	vk::command_pool m_secondary_command_buffer_pool;
+	vk::command_buffer m_secondary_command_buffer;
 
 	std::array<VkRenderPass, 120> m_render_passes;
 	VkDescriptorSetLayout descriptor_layouts;
@@ -86,7 +90,13 @@ private:
 
 	rsx::gcm_framebuffer_info m_surface_info[rsx::limits::color_buffers_count];
 	rsx::gcm_framebuffer_info m_depth_surface_info;
+
 	bool m_flush_draw_buffers = false;
+	
+	std::atomic<bool> m_flush_commands = false;
+	std::atomic<int> m_queued_threads = 0;
+
+	std::thread::id rsx_thread;
 
 public:
 	VKGSRender();
@@ -94,7 +104,7 @@ public:
 
 private:
 	void clear_surface(u32 mask);
-	void close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence);
+	void close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
 	void open_command_buffer();
 	void sync_at_semaphore_release();
 	void prepare_rtts();
@@ -117,5 +127,7 @@ protected:
 	bool do_method(u32 id, u32 arg) override;
 	void flip(int buffer) override;
 
+	void do_local_task() override;
+
 	bool on_access_violation(u32 address, bool is_writing) override;
 };
diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h
index 40f0ba9bd7..3f624b64f1 100644
--- a/rpcs3/Emu/RSX/VK/VKHelpers.h
+++ b/rpcs3/Emu/RSX/VK/VKHelpers.h
@@ -32,7 +32,7 @@ namespace rsx
 
 namespace vk
 {
-#define CHECK_RESULT(expr) do { VkResult _res = (expr); if (_res != VK_SUCCESS) fmt::throw_exception("Assertion failed! Result is %Xh", (s32)_res); } while (0)
+#define CHECK_RESULT(expr) { VkResult _res = (expr); if (_res != VK_SUCCESS) fmt::throw_exception("Assertion failed! Result is %Xh" HERE, (s32)_res); }
 
 	VKAPI_ATTR void *VKAPI_CALL mem_realloc(void *pUserData, void *pOriginal, size_t size, size_t alignment, VkSystemAllocationScope allocationScope);
 	VKAPI_ATTR void *VKAPI_CALL mem_alloc(void *pUserData, size_t size, size_t alignment, VkSystemAllocationScope allocationScope);
diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h
index f4c25cd6e6..e85541c962 100644
--- a/rpcs3/Emu/RSX/VK/VKTextureCache.h
+++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h
@@ -28,6 +28,14 @@ namespace vk
 	
 		cached_texture_section() {}
 
+		void reset(u32 base, u32 length)
+		{
+			if (length > cpu_address_range)
+				release_dma_resources();
+
+			rsx::buffered_section::reset(base, length);
+		}
+
 		void create(const u16 w, const u16 h, const u16 depth, const u16 mipmaps, vk::image_view *view, vk::image *image, const u32 native_pitch = 0, bool managed=true)
 		{
 			width = w;
@@ -38,8 +46,7 @@ namespace vk
 			uploaded_image_view.reset(view);
 			vram_texture = image;
 
-			if (managed)
-				managed_texture.reset(image);
+			if (managed) managed_texture.reset(image);
 
 			//TODO: Properly compute these values
 			this->native_pitch = native_pitch;
@@ -105,16 +112,18 @@ namespace vk
 
 		bool is_flushable() const
 		{
-			if (protection == utils::protection::ro || protection == utils::protection::no)
-				return true;
-
-			if (uploaded_image_view.get() == nullptr && vram_texture != nullptr)
-				return true;
-
-			return false;
+			//This section is active and can be flushed to cpu
+			return (protection == utils::protection::no);
 		}
 
-		void copy_texture(vk::command_buffer& cmd, u32 heap_index, VkQueue submit_queue, VkImageLayout layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
+		bool is_flushed() const
+		{
+			//This memory section was flushable, but a flush has already removed protection
+			return (protection == utils::protection::rw && uploaded_image_view.get() == nullptr && managed_texture.get() == nullptr);
+		}
+
+		void copy_texture(vk::command_buffer& cmd, u32 heap_index, VkQueue submit_queue,
+				bool manage_cb_lifetime = false, VkImageLayout layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
 		{
 			if (m_device == nullptr)
 			{
@@ -130,7 +139,21 @@ namespace vk
 
 			if (dma_buffer.get() == nullptr)
 			{
-				dma_buffer.reset(new vk::buffer(*m_device, native_pitch * height, heap_index, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0));
+				dma_buffer.reset(new vk::buffer(*m_device, align(cpu_address_range, 256), heap_index, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0));
+			}
+
+			if (manage_cb_lifetime)
+			{
+				//cb has to be guaranteed to be in a closed state
+				//This function can be called asynchronously
+				VkCommandBufferInheritanceInfo inheritance_info = {};
+				inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
+
+				VkCommandBufferBeginInfo begin_infos = {};
+				begin_infos.pInheritanceInfo = &inheritance_info;
+				begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+				begin_infos.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+				CHECK_RESULT(vkBeginCommandBuffer(cmd, &begin_infos));
 			}
 
 			VkBufferImageCopy copyRegion = {};
@@ -147,52 +170,47 @@ namespace vk
 			vkCmdCopyImageToBuffer(cmd, vram_texture->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dma_buffer->value, 1, &copyRegion);
 			change_image_layout(cmd, vram_texture->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, layout, subresource_range);
 
-			CHECK_RESULT(vkEndCommandBuffer(cmd));
+			if (manage_cb_lifetime)
+			{
+				CHECK_RESULT(vkEndCommandBuffer(cmd));
 
-			VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
-			VkCommandBuffer command_buffer = cmd;
+				VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+				VkCommandBuffer command_buffer = cmd;
 
-			VkSubmitInfo infos = {};
-			infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
-			infos.commandBufferCount = 1;
-			infos.pCommandBuffers = &command_buffer;
-			infos.pWaitDstStageMask = &pipe_stage_flags;
-			infos.pWaitSemaphores = nullptr;
-			infos.waitSemaphoreCount = 0;
+				VkSubmitInfo infos = {};
+				infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+				infos.commandBufferCount = 1;
+				infos.pCommandBuffers = &command_buffer;
+				infos.pWaitDstStageMask = &pipe_stage_flags;
+				infos.pWaitSemaphores = nullptr;
+				infos.waitSemaphoreCount = 0;
 
-			CHECK_RESULT(vkQueueSubmit(submit_queue, 1, &infos, dma_fence));
+				CHECK_RESULT(vkQueueSubmit(submit_queue, 1, &infos, dma_fence));
 
-			//Now we need to restart the command-buffer to restore it to the way it was before...
-			CHECK_RESULT(vkWaitForFences(*m_device, 1, &dma_fence, VK_TRUE, UINT64_MAX));
-			CHECK_RESULT(vkResetCommandPool(*m_device, cmd.get_command_pool(), 0));
-			CHECK_RESULT(vkResetFences(*m_device, 1, &dma_fence));
-
-			VkCommandBufferInheritanceInfo inheritance_info = {};
-			inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
-
-			VkCommandBufferBeginInfo begin_infos = {};
-			begin_infos.pInheritanceInfo = &inheritance_info;
-			begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
-			begin_infos.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
-			CHECK_RESULT(vkBeginCommandBuffer(cmd, &begin_infos));
+				//Now we need to restart the command-buffer to restore it to the way it was before...
+				CHECK_RESULT(vkWaitForFences(*m_device, 1, &dma_fence, VK_TRUE, UINT64_MAX));
+				CHECK_RESULT(vkResetCommandPool(*m_device, cmd.get_command_pool(), 0));
+				CHECK_RESULT(vkResetFences(*m_device, 1, &dma_fence));
+			}
 		}
 
 		template<typename T>
 		void do_memory_transfer(void *pixels_dst, void *pixels_src)
 		{
+			//LOG_ERROR(RSX, "COPY %d -> %d", native_pitch, pitch);
 			if (pitch == native_pitch)
 			{
 				if (sizeof T == 1)
-					memcpy(pixels_dst, pixels_src, native_pitch * height);
+					memcpy(pixels_dst, pixels_src, cpu_address_range);
 				else
 				{
-					const u32 block_size = native_pitch * height / sizeof T;
+					const u32 block_size = width * height;
 					
 					auto typed_dst = (be_t<T> *)pixels_dst;
 					auto typed_src = (T *)pixels_src;
 
-					for (u8 n = 0; n < block_size; ++n)
-						typed_dst[n] = typed_src[n];
+					for (u32 px = 0; px < block_size; ++px)
+						typed_dst[px] = typed_src[px];
 				}
 			}
 			else
@@ -203,7 +221,7 @@ namespace vk
 					u8 *typed_src = (u8 *)pixels_src;
 
 					//TODO: Scaling
-					for (int row = 0; row < height; ++row)
+					for (u16 row = 0; row < height; ++row)
 					{
 						memcpy(typed_dst, typed_src, native_pitch);
 						typed_dst += pitch;
@@ -218,9 +236,9 @@ namespace vk
 					auto typed_dst = (be_t<T> *)pixels_dst;
 					auto typed_src = (T *)pixels_src;
 
-					for (int row = 0; row < height; ++row)
+					for (u16 row = 0; row < height; ++row)
 					{
-						for (int px = 0; px < width; ++px)
+						for (u16 px = 0; px < width; ++px)
 						{
 							typed_dst[px] = typed_src[px];
 						}
@@ -240,15 +258,13 @@ namespace vk
 			if (dma_fence == VK_NULL_HANDLE || dma_buffer.get() == nullptr)
 			{
 				LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base);
-				copy_texture(cmd, heap_index, submit_queue, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
-
-				verify (HERE), (dma_fence != VK_NULL_HANDLE && dma_buffer.get());
+				copy_texture(cmd, heap_index, submit_queue, true, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
 			}
 
 			protect(utils::protection::rw);
 
 			//TODO: Image scaling, etc
-			void* pixels_src = dma_buffer->map(0, VK_WHOLE_SIZE);
+			void* pixels_src = dma_buffer->map(0, cpu_address_range);
 			void* pixels_dst = vm::base(cpu_address_base);
 
 			//We have to do our own byte swapping since the driver doesnt do it for us
@@ -273,12 +289,7 @@ namespace vk
 			}
 
 			dma_buffer->unmap();
-
-			//Cleanup
-			//These sections are usually one-use only so we destroy system resources
-			//TODO: Recycle dma buffers
-			release_dma_resources();
-			vram_texture = nullptr;	//Let m_rtts handle lifetime management
+			//Its highly likely that this surface will be reused, so we just leave resources in place
 		}
 	};
 
@@ -333,7 +344,7 @@ namespace vk
 			for (auto &tex : m_cache)
 			{
 				if (tex.is_dirty()) continue;
-				if (!tex.is_flushable()) continue;
+				if (!tex.is_flushable() && !tex.is_flushed()) continue;
 
 				if (tex.matches(address, range))
 					return &tex;
@@ -529,15 +540,16 @@ namespace vk
 		void lock_memory_region(vk::render_target* image, const u32 memory_address, const u32 memory_size, const u32 width, const u32 height)
 		{
 			cached_texture_section& region = find_cached_texture(memory_address, memory_size, true, width, height, 1);
-			region.create(width, height, 1, 1, nullptr, image, image->native_pitch, false);
 			
 			if (!region.is_locked())
 			{
 				region.reset(memory_address, memory_size);
-				region.protect(utils::protection::no);
 				region.set_dirty(false);
 				texture_cache_range = region.get_min_max(texture_cache_range);
 			}
+
+			region.protect(utils::protection::no);
+			region.create(width, height, 1, 1, nullptr, image, image->native_pitch, false);
 		}
 
 		void flush_memory_to_cache(const u32 memory_address, const u32 memory_size, vk::command_buffer&cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue)
@@ -554,6 +566,20 @@ namespace vk
 			region->copy_texture(cmd, memory_types.host_visible_coherent, submit_queue);
 		}
 
+		bool address_is_flushable(u32 address)
+		{
+			for (auto &tex : m_cache)
+			{
+				if (tex.is_dirty()) continue;
+				if (!tex.is_flushable()) continue;
+
+				if (tex.overlaps(address))
+					return true;
+			}
+
+			return false;
+		}
+
 		bool flush_address(u32 address, vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue)
 		{
 			if (address < texture_cache_range.first ||
@@ -584,8 +610,6 @@ namespace vk
 
 					//TODO: Map basic host_visible memory without coherent constraint
 					tex.flush(dev, cmd, memory_types.host_visible_coherent, submit_queue);
-					tex.set_dirty(true);
-
 					response = true;
 				}
 			}
@@ -607,6 +631,7 @@ namespace vk
 				auto &tex = m_cache[i];
 
 				if (tex.is_dirty()) continue;
+				if (!tex.is_locked()) continue;	//flushable sections can be 'clean' but unlocked. TODO: Handle this better
 
 				auto overlapped = tex.overlaps_page(trampled_range, address);
 				if (std::get<0>(overlapped))
diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h
index 7691fe152e..0ac24bbe12 100644
--- a/rpcs3/Emu/RSX/rsx_cache.h
+++ b/rpcs3/Emu/RSX/rsx_cache.h
@@ -136,12 +136,13 @@ namespace rsx
 			locked_address_range = align(base + length, 4096) - locked_address_base;
 
 			protection = utils::protection::rw;
-
 			locked = false;
 		}
 
 		void protect(utils::protection prot)
 		{
+			if (prot == protection) return;
+
 			utils::memory_protect(vm::base(locked_address_base), locked_address_range, prot);
 			protection = prot;
 			locked = prot != utils::protection::rw;
@@ -149,7 +150,8 @@ namespace rsx
 
 		void unprotect()
 		{
-			return protect(utils::protection::rw);
+			protect(utils::protection::rw);
+			locked = false;
 		}
 
 		bool overlaps(std::pair<u32, u32> range)