From 5e58cf60798024ad5d5b9f30746bc84f98770d5c Mon Sep 17 00:00:00 2001
From: kd-11 <karokidii@gmail.com>
Date: Sun, 22 Oct 2017 00:12:32 +0300
Subject: [PATCH] rsx: Restructuring [WIP] - Refactor invalidate memory
 functions into one function - Add cached object rebuilding functionality to
 avoid throwing away useful memory on an invalidate - Added debug monitoring
 of texture unit VRAM usage

---
 rpcs3/Emu/RSX/Common/ring_buffer_helper.h |  10 +-
 rpcs3/Emu/RSX/Common/texture_cache.h      | 257 ++++++++++++----------
 rpcs3/Emu/RSX/GL/GLGSRender.cpp           |  63 ++----
 rpcs3/Emu/RSX/GL/GLGSRender.h             |   4 +-
 rpcs3/Emu/RSX/GL/GLRenderTargets.cpp      |   2 +-
 rpcs3/Emu/RSX/VK/VKGSRender.cpp           | 163 +++++++-------
 rpcs3/Emu/RSX/VK/VKTextureCache.h         |   6 +-
 rpcs3/Emu/RSX/rsx_cache.h                 |   5 +
 8 files changed, 263 insertions(+), 247 deletions(-)
diff --git a/rpcs3/Emu/RSX/Common/ring_buffer_helper.h b/rpcs3/Emu/RSX/Common/ring_buffer_helper.h
index 9c3beafaac..cca89f5c86 100644
--- a/rpcs3/Emu/RSX/Common/ring_buffer_helper.h
+++ b/rpcs3/Emu/RSX/Common/ring_buffer_helper.h
@@ -46,6 +46,8 @@ struct data_heap
 	size_t m_min_guard_size; //If an allocation touches the guard region, reset the heap to avoid going over budget
 	size_t m_current_allocated_size;
 	size_t m_largest_allocated_pool;
+
+	char* m_name;
 public:
 	data_heap() = default;
 	~data_heap() = default;
@@ -54,8 +56,10 @@ public:
 
 	size_t m_get_pos; // End of free space
 
-	void init(size_t heap_size, size_t min_guard_size=0x10000)
+	void init(size_t heap_size, const char* buffer_name = "unnamed", size_t min_guard_size=0x10000)
 	{
+		m_name = const_cast<char*>(buffer_name);
+
 		m_size = heap_size;
 		m_put_pos = 0;
 		m_get_pos = heap_size - 1;
@@ -71,8 +75,8 @@ public:
 	{
 		if (!can_alloc<Alignement>(size))
 		{
-			fmt::throw_exception("Working buffer not big enough, buffer_length=%d allocated=%d requested=%d guard=%d largest_pool=%d" HERE,
-					m_size, m_current_allocated_size, size, m_min_guard_size, m_largest_allocated_pool);
+			fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d allocated=%d requested=%d guard=%d largest_pool=%d" HERE,
+					m_name, m_size, m_current_allocated_size, size, m_min_guard_size, m_largest_allocated_pool);
 		}
 
 		size_t alloc_size = align(size, Alignement);
diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h
index 3410c9659f..62686544ed 100644
--- a/rpcs3/Emu/RSX/Common/texture_cache.h
+++ b/rpcs3/Emu/RSX/Common/texture_cache.h
@@ -31,6 +31,8 @@ namespace rsx
 		u16 real_pitch;
 		u16 rsx_pitch;
 
+		u64 cache_tag;
+
 		rsx::texture_create_flags view_flags = rsx::texture_create_flags::default_component_order;
 		rsx::texture_upload_context context = rsx::texture_upload_context::shader_read;
 
@@ -162,7 +164,8 @@ namespace rsx
 		
 		//Memory usage
 		const s32 m_max_zombie_objects = 128; //Limit on how many texture objects to keep around for reuse after they are invalidated
-		s32 m_unreleased_texture_objects = 0; //Number of invalidated objects not yet freed from memory
+		std::atomic<s32> m_unreleased_texture_objects = { 0 }; //Number of invalidated objects not yet freed from memory
+		std::atomic<u32> m_texture_memory_in_use = { 0 };
 		
 		/* Helpers */
 		virtual void free_texture_section(section_storage_type&) = 0;
@@ -179,10 +182,14 @@ namespace rsx
 		inline u32 get_block_address(u32 address) const { return (address & ~0xFFFFFF); }
 
 	private:
-		//Internal implementation methods
-		bool invalidate_range_impl(u32 address, u32 range, bool unprotect)
+		//Internal implementation methods and helpers
+
+		//Get intersecting set - Returns all objects intersecting a given range and their owning blocks
+		std::vector<std::pair<section_storage_type*, ranged_storage*>> get_intersecting_set(u32 address, u32 range, bool check_whole_size)
 		{
+			std::vector<std::pair<section_storage_type*, ranged_storage*>> result;
 			bool response = false;
+			u64 cache_tag = get_system_time();
 			u32 last_dirty_block = UINT32_MAX;
 			std::pair<u32, u32> trampled_range = std::make_pair(address, address + range);
 
@@ -195,7 +202,7 @@ namespace rsx
 				if (base == last_dirty_block && range_data.valid_count == 0)
 					continue;
 
-				if (trampled_range.first < trampled_range.second)
+				if (trampled_range.first <= trampled_range.second)
 				{
 					//Only if a valid range, ignore empty sets
 					if (trampled_range.first >= (range_data.max_addr + range_data.max_range) || range_data.min_addr >= trampled_range.second)
@@ -205,11 +212,10 @@ namespace rsx
 				for (int i = 0; i < range_data.data.size(); i++)
 				{
 					auto &tex = range_data.data[i];
-
-					if (tex.is_dirty()) continue;
+					if (tex.cache_tag == cache_tag) continue; //already processed
 					if (!tex.is_locked()) continue;	//flushable sections can be 'clean' but unlocked. TODO: Handle this better
 
-					auto overlapped = tex.overlaps_page(trampled_range, address, false);
+					auto overlapped = tex.overlaps_page(trampled_range, address, check_whole_size);
 					if (std::get<0>(overlapped))
 					{
 						auto &new_range = std::get<1>(overlapped);
@@ -222,19 +228,8 @@ namespace rsx
 							range_reset = true;
 						}
 
-						if (unprotect)
-						{
-							tex.set_dirty(true);
-							tex.unprotect();
-						}
-						else
-						{
-							tex.discard();
-						}
-
-						m_unreleased_texture_objects++;
-						range_data.remove_one();
-						response = true;
+						tex.cache_tag = cache_tag;
+						result.push_back({&tex, &range_data});
 					}
 				}
 
@@ -245,87 +240,92 @@ namespace rsx
 				}
 			}
 
-			return response;
+			return result;
+		}
+
+		//Invalidate range base implementation
+		//Returns a pair:
+		//1. A boolean - true if the memory range was truly locked and has been dealt with, false otherwise
+		//2. A vector of all sections that should be flushed if the caller did not set the allow_flush method. That way the caller can make preparations on how to deal with sections that require flushing
+		//   Note that the sections will be unlocked regardless of the allow_flush flag
+		template <typename ...Args>
+		std::pair<bool, std::vector<section_storage_type*>> invalidate_range_impl_base(u32 address, u32 range, bool discard_only, bool rebuild_cache, bool allow_flush, Args&... extras)
+		{
+			auto trampled_set = get_intersecting_set(address, range, allow_flush);
+
+			if (trampled_set.size() > 0)
+			{
+				// Rebuild the cache by only destroying ranges that need to be destroyed to unlock this page
+				const auto to_reprotect = std::remove_if(trampled_set.begin(), trampled_set.end(),
+				[&](const std::pair<section_storage_type*, ranged_storage*>& obj)
+				{
+					if (!rebuild_cache && !obj.first->is_flushable())
+						return false;
+
+					const std::pair<u32, u32> null_check = std::make_pair(UINT32_MAX, 0);
+					return !std::get<0>(obj.first->overlaps_page(null_check, address, true));
+				});
+
+				std::vector<section_storage_type*> sections_to_flush;
+				for (auto It = trampled_set.begin(); It != to_reprotect; ++It)
+				{
+					auto obj = *It;
+
+					if (discard_only)
+						obj.first->discard();
+					else
+						obj.first->unprotect();
+
+					if (obj.first->is_flushable() && allow_flush)
+					{
+						sections_to_flush.push_back(obj.first);
+					}
+					else
+					{
+						obj.first->set_dirty(true);
+						m_unreleased_texture_objects++;
+					}
+
+					obj.second->remove_one();
+				}
+
+				for (auto It = to_reprotect; It != trampled_set.end(); It++)
+				{
+					auto obj = *It;
+
+					auto old_prot = obj.first->get_protection();
+					obj.first->discard();
+					obj.first->protect(old_prot);
+					obj.first->set_dirty(false);
+				}
+
+				trampled_set.erase(to_reprotect, trampled_set.end());
+
+				if (allow_flush)
+				{
+					for (const auto &tex : sections_to_flush)
+					{
+						if (!tex->flush(std::forward<Args>(extras)...))
+						{
+							//Missed address, note this
+							//TODO: Lower severity when successful to keep the cache from overworking
+							record_cache_miss(*tex);
+						}
+					}
+
+					return{ true, {} };
+				}
+
+				return std::make_pair(true, sections_to_flush);
+			}
+
+			return{ false, {} };
 		}
 
 		template <typename ...Args>
-		bool flush_address_impl(u32 address, Args&&... extras)
+		std::pair<bool, std::vector<section_storage_type*>> invalidate_range_impl(u32 address, u32 range, bool discard, bool allow_flush, Args&... extras)
 		{
-			bool response = false;
-			u32 last_dirty_block = UINT32_MAX;
-			std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0);
-			std::vector<section_storage_type*> sections_to_flush;
-
-			for (auto It = m_cache.begin(); It != m_cache.end(); It++)
-			{
-				auto &range_data = It->second;
-				const u32 base = It->first;
-				bool range_reset = false;
-
-				if (base == last_dirty_block && range_data.valid_count == 0)
-					continue;
-
-				if (trampled_range.first < trampled_range.second)
-				{
-					//Only if a valid range, ignore empty sets
-					if (trampled_range.first >= (range_data.max_addr + range_data.max_range) || range_data.min_addr >= trampled_range.second)
-						continue;
-				}
-
-				for (int i = 0; i < range_data.data.size(); i++)
-				{
-					auto &tex = range_data.data[i];
-
-					if (tex.is_dirty()) continue;
-					if (!tex.is_locked()) continue;
-
-					auto overlapped = tex.overlaps_page(trampled_range, address, true);
-					if (std::get<0>(overlapped))
-					{
-						auto &new_range = std::get<1>(overlapped);
-
-						if (new_range.first != trampled_range.first ||
-							new_range.second != trampled_range.second)
-						{
-							i = 0;
-							trampled_range = new_range;
-							range_reset = true;
-						}
-
-						if (tex.is_flushable())
-						{
-							sections_to_flush.push_back(&tex);
-						}
-						else
-						{
-							m_unreleased_texture_objects++;
-							tex.set_dirty(true);
-						}
-
-						tex.unprotect();
-						range_data.remove_one();
-
-						response = true;
-					}
-				}
-
-				if (range_reset)
-				{
-					It = m_cache.begin();
-				}
-			}
-
-			for (auto tex : sections_to_flush)
-			{
-				if (!tex->flush(std::forward<Args>(extras)...))
-				{
-					//Missed address, note this
-					//TODO: Lower severity when successful to keep the cache from overworking
-					record_cache_miss(*tex);
-				}
-			}
-
-			return response;
+			return invalidate_range_impl_base(address, range, discard, true, allow_flush, std::forward<Args>(extras)...);
 		}
 
 		bool is_hw_blit_engine_compatible(const u32 format) const
@@ -427,6 +427,7 @@ namespace rsx
 						{
 							m_unreleased_texture_objects--;
 							free_texture_section(tex);
+							m_texture_memory_in_use -= tex.get_section_size();
 						}
 
 						range_data.notify(rsx_address, rsx_size);
@@ -562,22 +563,19 @@ namespace rsx
 		}
 
 		template <typename ...Args>
-		bool flush_address(u32 address, Args&&... extras)
+		std::pair<bool, std::vector<section_storage_type*>> invalidate_address(u32 address, bool allow_flush, Args&... extras)
 		{
-			if (address < no_access_range.first ||
-				address > no_access_range.second)
-				return false;
-
-			writer_lock lock(m_cache_mutex);
-			return flush_address_impl(address, std::forward<Args>(extras)...);
+			return invalidate_range(address, 4096 - (address & 4095), false, allow_flush, std::forward<Args>(extras)...);
 		}
 
-		bool invalidate_address(u32 address)
+		template <typename ...Args>
+		std::pair<bool, std::vector<section_storage_type*>> flush_address(u32 address, Args&... extras)
 		{
-			return invalidate_range(address, 4096 - (address & 4095));
+			return invalidate_range(address, 4096 - (address & 4095), false, true, std::forward<Args>(extras)...);
 		}
 
-		bool invalidate_range(u32 address, u32 range, bool unprotect = true)
+		template <typename ...Args>
+		std::pair<bool, std::vector<section_storage_type*>> invalidate_range(u32 address, u32 range, bool discard, bool allow_flush, Args&... extras)
 		{
 			std::pair<u32, u32> trampled_range = std::make_pair(address, address + range);
 
@@ -587,11 +585,31 @@ namespace rsx
 				//Doesnt fall in the read_only textures range; check render targets
 				if (trampled_range.second < no_access_range.first ||
 					trampled_range.first > no_access_range.second)
-					return false;
+					return{ false, {} };
 			}
 
 			writer_lock lock(m_cache_mutex);
-			return invalidate_range_impl(address, range, unprotect);
+			return invalidate_range_impl(address, range, discard, allow_flush, std::forward<Args>(extras)...);
+		}
+
+		template <typename ...Args>
+		bool flush_all(std::vector<section_storage_type*>& sections_to_flush, Args&... extras)
+		{
+			reader_lock lock(m_cache_mutex);
+			for (const auto &tex: sections_to_flush)
+			{
+				if (tex->is_flushed())
+					continue;
+
+				if (!tex->flush(std::forward<Args>(extras)...))
+				{
+					//Missed address, note this
+					//TODO: Lower severity when successful to keep the cache from overworking
+					record_cache_miss(*tex);
+				}
+			}
+
+			return true;
 		}
 
 		void record_cache_miss(section_storage_type &tex)
@@ -670,6 +688,7 @@ namespace rsx
 						continue;
 
 					free_texture_section(tex);
+					m_texture_memory_in_use -= tex.get_section_size();
 				}
 			}
 
@@ -882,6 +901,7 @@ namespace rsx
 			auto subresources_layout = get_subresources_layout(tex);
 			auto remap_vector = tex.decoded_remap();
 
+			m_texture_memory_in_use += (tex_pitch * tex_height);
 			return upload_image_from_cpu(cmd, texaddr, tex_width, tex_height, depth, tex.get_exact_mipmap_count(), tex_pitch, format,
 				texture_upload_context::shader_read, subresources_layout, extended_dimension, is_swizzled, remap_vector)->get_raw_view();
 		}
@@ -972,8 +992,8 @@ namespace rsx
 					const u32 memcpy_bytes_length = dst.clip_width * bpp * dst.clip_height;
 
 					lock.upgrade();
-					flush_address_impl(src_address, std::forward<Args>(extras)...);
-					invalidate_range_impl(dst_address, memcpy_bytes_length, true);
+					invalidate_range_impl(src_address, memcpy_bytes_length, false, true, std::forward<Args>(extras)...);
+					invalidate_range_impl(dst_address, memcpy_bytes_length, false, true, std::forward<Args>(extras)...);
 					memcpy(dst.pixels, src.pixels, memcpy_bytes_length);
 					return true;
 				}
@@ -1075,7 +1095,7 @@ namespace rsx
 				{
 					lock.upgrade();
 
-					flush_address_impl(src_address, std::forward<Args>(extras)...);
+					invalidate_range_impl(src_address, src.pitch * src.slice_h, false, true, std::forward<Args>(extras)...);
 
 					const u16 pitch_in_block = src_is_argb8 ? src.pitch >> 2 : src.pitch >> 1;
 					std::vector<rsx_subresource_layout> subresource_layout;
@@ -1090,6 +1110,8 @@ namespace rsx
 					const u32 gcm_format = src_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5;
 					vram_texture = upload_image_from_cpu(cmd, src_address, src.width, src.slice_h, 1, 1, src.pitch, gcm_format, texture_upload_context::blit_engine_src,
 						subresource_layout, rsx::texture_dimension_extended::texture_dimension_2d, dst.swizzled, default_remap_vector)->get_raw_texture();
+
+					m_texture_memory_in_use += src.pitch * src.slice_h;
 				}
 			}
 			else
@@ -1145,7 +1167,7 @@ namespace rsx
 			if (format_mismatch)
 			{
 				lock.upgrade();
-				invalidate_range_impl(cached_dest->get_section_base(), cached_dest->get_section_size(), true);
+				invalidate_range_impl(cached_dest->get_section_base(), cached_dest->get_section_size(), false, true, std::forward<Args>(extras)...);
 
 				dest_texture = 0;
 				cached_dest = nullptr;
@@ -1153,7 +1175,7 @@ namespace rsx
 			else if (invalidate_dst_range)
 			{
 				lock.upgrade();
-				invalidate_range_impl(dst_address, dst.pitch * dst.height, true);
+				invalidate_range_impl(dst_address, dst.pitch * dst.height, false, true, std::forward<Args>(extras)...);
 			}
 
 			//Validate clipping region
@@ -1187,6 +1209,8 @@ namespace rsx
 					gcm_format, rsx::texture_upload_context::blit_engine_dst, rsx::texture_dimension_extended::texture_dimension_2d,
 					dst.swizzled? rsx::texture_create_flags::swapped_native_component_order : rsx::texture_create_flags::native_component_order,
 					default_remap_vector)->get_raw_texture();
+
+				m_texture_memory_in_use += dst.pitch * dst_dimensions.height;
 			}
 
 			const f32 scale = rsx::get_resolution_scale();
@@ -1204,5 +1228,10 @@ namespace rsx
 		{
 			return m_unreleased_texture_objects;
 		}
+
+		const u32 get_texture_memory_in_use() const
+		{
+			return m_texture_memory_in_use;
+		}
 	};
 }
diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
index 6fc4110961..004b9d5c92 100644
--- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp
+++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
@@ -1167,7 +1167,9 @@ void GLGSRender::flip(int buffer)
 		m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), "draw call execution: " + std::to_string(m_draw_time) + "us");
 
 		auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count();
+		auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
 		m_text_printer.print_text(0, 108, m_frame->client_width(), m_frame->client_height(), "Unreleased textures: " + std::to_string(num_dirty_textures));
+		m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), "Texture memory: " + std::to_string(texture_memory_size) + "M");
 	}
 
 	m_frame->flip(m_context);
@@ -1202,37 +1204,33 @@ u64 GLGSRender::timestamp() const
 
 bool GLGSRender::on_access_violation(u32 address, bool is_writing)
 {
-	if (is_writing)
-		return m_gl_texture_cache.invalidate_address(address);
-	else
+	bool can_flush = (std::this_thread::get_id() != m_thread_id);
+	auto result = m_gl_texture_cache.invalidate_address(address, can_flush);
+
+	if (!result.first)
+		return false;
+
+	if (result.second.size() > 0)
 	{
-		if (std::this_thread::get_id() != m_thread_id)
+		work_item &task = post_flush_request(address, result.second);
+
+		vm::temporary_unlock();
 		{
-			bool flushable;
-			gl::cached_texture_section* section_to_post;
-			
-			std::tie(flushable, section_to_post) = m_gl_texture_cache.address_is_flushable(address);
-			if (!flushable) return false;
-			
-			work_item &task = post_flush_request(address, section_to_post);
-
-			vm::temporary_unlock();
-			{
-				std::unique_lock<std::mutex> lock(task.guard_mutex);
-				task.cv.wait(lock, [&task] { return task.processed; });
-			}
-
-			task.received = true;
-			return task.result;
+			std::unique_lock<std::mutex> lock(task.guard_mutex);
+			task.cv.wait(lock, [&task] { return task.processed; });
 		}
-					
-		return m_gl_texture_cache.flush_address(address);
+
+		task.received = true;
+		return true;
 	}
+
+	return false;
 }
 
 void GLGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
 {
-	if (m_gl_texture_cache.invalidate_range(address_base, size, false))
+	//Discard all memory in that range without bothering with writeback (Force it for strict?)
+	if (std::get<0>(m_gl_texture_cache.invalidate_range(address_base, size, true, false)))
 		m_gl_texture_cache.purge_dirty();
 }
 
@@ -1249,20 +1247,7 @@ void GLGSRender::do_local_task()
 		if (q.processed) continue;
 
 		std::unique_lock<std::mutex> lock(q.guard_mutex);
-
-		//Check if the suggested section is valid
-		if (!q.section_to_flush->is_flushed())
-		{
-			m_gl_texture_cache.flush_address(q.address_to_flush);
-			q.result = true;
-		}
-		else
-		{
-			//Another thread has unlocked this memory region already
-			//Return success
-			q.result = true;
-		}
-
+		q.result = m_gl_texture_cache.flush_all(q.sections_to_flush);
 		q.processed = true;
 
 		//Notify thread waiting on this
@@ -1271,14 +1256,14 @@ void GLGSRender::do_local_task()
 	}
 }
 
-work_item& GLGSRender::post_flush_request(u32 address, gl::cached_texture_section *section)
+work_item& GLGSRender::post_flush_request(u32 address, std::vector<gl::cached_texture_section*>& sections)
 {
 	std::lock_guard<std::mutex> lock(queue_guard);
 
 	work_queue.emplace_back();
 	work_item &result = work_queue.back();
 	result.address_to_flush = address;
-	result.section_to_flush = section;
+	result.sections_to_flush = std::move(sections);
 	return result;
 }
 
diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h
index ea332ec08d..a23291c363 100644
--- a/rpcs3/Emu/RSX/GL/GLGSRender.h
+++ b/rpcs3/Emu/RSX/GL/GLGSRender.h
@@ -28,7 +28,7 @@ struct work_item
 	std::mutex guard_mutex;
 
 	u32  address_to_flush = 0;
-	gl::cached_texture_section *section_to_flush = nullptr;
+	std::vector<gl::cached_texture_section*> sections_to_flush;
 
 	volatile bool processed = false;
 	volatile bool result = false;
@@ -428,7 +428,7 @@ public:
 	void set_viewport();
 
 	void synchronize_buffers();
-	work_item& post_flush_request(u32 address, gl::cached_texture_section *section);
+	work_item& post_flush_request(u32 address, std::vector<gl::cached_texture_section*>& sections);
 
 	bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override;
 	
diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp
index 442e15fb7a..0d2bb334ac 100644
--- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp
+++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp
@@ -403,7 +403,7 @@ void GLGSRender::read_buffers()
 					}
 					else
 					{
-						m_gl_texture_cache.invalidate_range(texaddr, range);
+						m_gl_texture_cache.invalidate_range(texaddr, range, false, true);
 
 						std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
 						color_buffer.read(buffer.get(), width, height, pitch);
diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
index 81260bad79..6324c51efd 100644
--- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
@@ -583,13 +583,13 @@ VKGSRender::VKGSRender() : GSRender()
 	semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
 
 	//VRAM allocation
-	m_attrib_ring_info.init(VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, 0x400000);
+	m_attrib_ring_info.init(VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000);
 	m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0));
-	m_uniform_buffer_ring_info.init(VK_UBO_RING_BUFFER_SIZE_M * 0x100000);
+	m_uniform_buffer_ring_info.init(VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "uniform buffer");
 	m_uniform_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0));
-	m_index_buffer_ring_info.init(VK_INDEX_RING_BUFFER_SIZE_M * 0x100000);
+	m_index_buffer_ring_info.init(VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer");
 	m_index_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0));
-	m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000);
+	m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 0x400000);
 	m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0));
 
 	for (auto &ctx : frame_context_storage)
@@ -739,105 +739,90 @@ VKGSRender::~VKGSRender()
 
 bool VKGSRender::on_access_violation(u32 address, bool is_writing)
 {
-	if (is_writing)
-		return m_texture_cache.invalidate_address(address);
-	else
+	std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
+	auto result = m_texture_cache.invalidate_address(address, false, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
+
+	if (!result.first)
+		return false;
+
+	if (result.second.size() > 0)
 	{
-		if (g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer)
+		const bool is_rsxthr = std::this_thread::get_id() == rsx_thread;
+		bool has_queue_ref = false;
+
+		u64 sync_timestamp = 0ull;
+		for (const auto& tex : result.second)
+			sync_timestamp = std::max(sync_timestamp, tex->get_sync_timestamp());
+
+		if (!is_rsxthr)
 		{
-			bool flushable;
-			vk::cached_texture_section* section;
+			vm::temporary_unlock();
+		}
 
-			std::tie(flushable, section) = m_texture_cache.address_is_flushable(address);
-			
-			if (!flushable)
-				return false;
-				
-			const u64 sync_timestamp = section->get_sync_timestamp();
-			const bool is_rsxthr = std::this_thread::get_id() == rsx_thread;
-
-			if (section->is_synchronized())
+		if (sync_timestamp > 0)
+		{
+			//Wait for any cb submitted after the sync timestamp to finish
+			while (true)
 			{
-				//Wait for any cb submitted after the sync timestamp to finish
-				while (true)
-				{
-					u32 pending = 0;
+				u32 pending = 0;
 
-					if (m_last_flushable_cb < 0)
+				if (m_last_flushable_cb < 0)
+					break;
+
+				for (auto &cb : m_primary_cb_list)
+				{
+					if (!cb.pending && cb.last_sync >= sync_timestamp)
+					{
+						pending = 0;
 						break;
-
-					for (auto &cb : m_primary_cb_list)
-					{
-						if (!cb.pending && cb.last_sync >= sync_timestamp)
-						{
-							pending = 0;
-							break;
-						}
-
-						if (cb.pending)
-						{
-							pending++;
-
-							if (is_rsxthr)
-								cb.poke();
-						}
 					}
 
-					if (!pending)
-						break;
+					if (cb.pending)
+					{
+						pending++;
 
-					std::this_thread::yield();
+						if (is_rsxthr)
+							cb.poke();
+					}
 				}
 
-				if (is_rsxthr)
-					m_last_flushable_cb = -1;
+				if (!pending)
+					break;
+
+				std::this_thread::yield();
 			}
-			else
-			{
-				//This region is buffered, but no previous sync point has been put in place to start sync efforts
-				//Just stall and get what we have at this point
-				if (!is_rsxthr)
-				{
-					{
-						std::lock_guard<std::mutex> lock(m_flush_queue_mutex);
 
-						m_flush_commands = true;
-						m_queued_threads++;
-					}
-
-					//Wait for the RSX thread to process
-					while (m_flush_commands)
-					{
-						_mm_lfence();
-						_mm_pause();
-					}
-
-					std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
-					bool status = m_texture_cache.flush_address(address, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
-
-					m_queued_threads--;
-					_mm_sfence();
-
-					return status;
-				}
-				else
-				{
-					//NOTE: If the rsx::thread is trampling its own data, we have an operation that should be moved to the GPU
-					//We should never interrupt our own cb recording since some operations are not interruptible
-					if (!vk::is_uninterruptible())
-						//TODO: Investigate driver behaviour to determine if we need a hard sync or a soft flush
-						flush_command_queue();
-				}
-			}
+			if (is_rsxthr)
+				m_last_flushable_cb = -1;
 		}
 		else
 		{
-			//If we aren't managing buffer sync, dont bother checking the cache
-			return false;
+			if (!is_rsxthr)
+			{
+				{
+					std::lock_guard<std::mutex> lock(m_flush_queue_mutex);
+
+					m_flush_commands = true;
+					m_queued_threads++;
+				}
+
+				//Wait for the RSX thread to process
+				while (m_flush_commands)
+				{
+					_mm_lfence();
+					_mm_pause();
+				}
+
+				has_queue_ref = true;
+			}
 		}
 
-		std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
-		return m_texture_cache.flush_address(address, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
+		m_texture_cache.flush_all(result.second, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
+
+		if (has_queue_ref)
+		{
+			m_queued_threads--;
+		}
 	}
 
 	return false;
@@ -845,8 +830,12 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
 
 void VKGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
 {
-	if (m_texture_cache.invalidate_range(address_base, size, false))
+	std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
+	if (std::get<0>(m_texture_cache.invalidate_range(address_base, size, false, false,
+		*m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue())))
+	{
 		m_texture_cache.purge_dirty();
+	}
 }
 
 void VKGSRender::begin()
@@ -2651,7 +2640,9 @@ void VKGSRender::flip(int buffer)
 		m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), "submit and flip: " + std::to_string(m_flip_time) + "us");
 
 		auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count();
+		auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
 		m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 126, direct_fbo->width(), direct_fbo->height(), "Unreleased textures: " + std::to_string(num_dirty_textures));
+		m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), "Texture memory: " + std::to_string(texture_memory_size) + "M");
 
 		vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, subres);
 		m_framebuffers_to_clean.push_back(std::move(direct_fbo));
diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h
index 36a203dd0c..bad61220b2 100644
--- a/rpcs3/Emu/RSX/VK/VKTextureCache.h
+++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h
@@ -347,6 +347,7 @@ namespace vk
 
 			m_discardable_storage.clear();
 			m_unreleased_texture_objects = 0;
+			m_texture_memory_in_use = 0;
 		}
 		
 	protected:
@@ -707,12 +708,13 @@ namespace vk
 			}
 			helper(&cmd);
 
-			return upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, *m_device, cmd, m_memory_types, m_submit_queue);
+			const VkQueue& queue = m_submit_queue;
+			return upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, *m_device, cmd, m_memory_types, queue);
 		}
 
 		const u32 get_unreleased_textures_count() const override
 		{
-			return std::max(m_unreleased_texture_objects, 0) + (u32)m_discardable_storage.size();
+			return m_unreleased_texture_objects + (u32)m_discardable_storage.size();
 		}
 	};
 }
diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h
index 4c04573f71..2f48faada0 100644
--- a/rpcs3/Emu/RSX/rsx_cache.h
+++ b/rpcs3/Emu/RSX/rsx_cache.h
@@ -238,6 +238,11 @@ namespace rsx
 
 			return std::make_pair(min, max);
 		}
+
+		utils::protection get_protection()
+		{
+			return protection;
+		}
 	};
 
 	template <typename pipeline_storage_type, typename backend_storage>