diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index f24f62ba2e..92f35bf206 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -5,6 +5,7 @@ #include "Emu/System.h" #include "../Common/TextureUtils.h" #include "../rsx_utils.h" +#include "Utilities/mutex.h" namespace vk { @@ -292,9 +293,34 @@ namespace vk class texture_cache { + struct ranged_storage + { + std::vector data; //Stored data + std::atomic_int valid_count = { 0 }; //Number of usable (non-dirty) blocks + u32 max_range = 0; //Largest stored block + + void notify(u32 data_size) + { + max_range = std::max(data_size, max_range); + valid_count++; + } + + void add(cached_texture_section& section, u32 data_size) + { + max_range = std::max(data_size, max_range); + valid_count++; + + data.push_back(std::move(section)); + } + }; + private: - std::vector m_cache; - std::pair texture_cache_range = std::make_pair(0xFFFFFFFF, 0); + shared_mutex m_cache_mutex; + std::unordered_map m_cache; + + std::pair read_only_range = std::make_pair(0xFFFFFFFF, 0); + std::pair no_access_range = std::make_pair(0xFFFFFFFF, 0); + std::vector > m_temporary_image_view; std::vector> m_dirty_textures; @@ -310,51 +336,71 @@ namespace vk cached_texture_section& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0) { - for (auto &tex : m_cache) { - if (tex.matches(rsx_address, rsx_size) && !tex.is_dirty()) - { - if (!confirm_dimensions) return tex; + reader_lock lock(m_cache_mutex); - if (tex.matches(rsx_address, width, height, mipmaps)) - return tex; - else + auto found = m_cache.find(rsx_address); + if (found != m_cache.end()) + { + auto &range_data = found->second; + + for (auto &tex : range_data.data) { - LOG_ERROR(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters.", rsx_address); - LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.get_width(), tex.get_height()); + if (tex.matches(rsx_address, rsx_size) && !tex.is_dirty()) + { + if (!confirm_dimensions) return tex; + + if (tex.matches(rsx_address, width, height, mipmaps)) + return tex; + else + { + LOG_ERROR(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters.", rsx_address); + LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.get_width(), tex.get_height()); + } + } + } + + for (auto &tex : range_data.data) + { + if (tex.is_dirty()) + { + if (tex.exists()) + { + m_dirty_textures.push_back(std::move(tex.get_texture())); + m_temporary_image_view.push_back(std::move(tex.get_view())); + } + + tex.release_dma_resources(); + range_data.notify(rsx_size); + return tex; + } } } } - for (auto &tex : m_cache) - { - if (tex.is_dirty()) - { - if (tex.exists()) - { - m_dirty_textures.push_back(std::move(tex.get_texture())); - m_temporary_image_view.push_back(std::move(tex.get_view())); - } + writer_lock lock(m_cache_mutex); - tex.release_dma_resources(); - return tex; - } - } - - m_cache.push_back(cached_texture_section()); - - return m_cache[m_cache.size() - 1]; + cached_texture_section tmp; + m_cache[rsx_address].add(tmp, rsx_size); + return m_cache[rsx_address].data.back(); } cached_texture_section* find_flushable_section(const u32 address, const u32 range) { - for (auto &tex : m_cache) - { - if (tex.is_dirty()) continue; - if (!tex.is_flushable() && !tex.is_flushed()) continue; + reader_lock lock(m_cache_mutex); - if (tex.matches(address, range)) - return &tex; + auto found = m_cache.find(address); + if (found != m_cache.end()) + { + auto &range_data = found->second; + for (auto &tex : range_data.data) + { + if (tex.is_dirty()) continue; + if (!tex.is_flushable() && !tex.is_flushed()) continue; + + if (tex.matches(address, range)) + return &tex; + } } return nullptr; @@ -362,24 +408,28 @@ namespace vk void purge_cache() { - for (auto &tex : m_cache) + for (auto &address_range : m_cache) { - if (tex.exists()) + auto &range_data = address_range.second; + for (auto &tex : range_data.data) { - m_dirty_textures.push_back(std::move(tex.get_texture())); - m_temporary_image_view.push_back(std::move(tex.get_view())); + if (tex.exists()) + { + m_dirty_textures.push_back(std::move(tex.get_texture())); + m_temporary_image_view.push_back(std::move(tex.get_view())); + } + + if (tex.is_locked()) + tex.unprotect(); + + tex.release_dma_resources(); } - if (tex.is_locked()) - tex.unprotect(); - - tex.release_dma_resources(); + range_data.data.resize(0); } m_temporary_image_view.clear(); m_dirty_textures.clear(); - - m_cache.resize(0); } //Helpers @@ -611,13 +661,14 @@ namespace vk change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range); vk::leave_uninterruptible(); + writer_lock lock(m_cache_mutex); region.reset(texaddr, range); region.create(tex.width(), height, depth, tex.get_exact_mipmap_count(), view, image); region.protect(utils::protection::ro); region.set_dirty(false); - texture_cache_range = region.get_min_max(texture_cache_range); + read_only_range = region.get_min_max(read_only_range); return view; } @@ -625,11 +676,13 @@ namespace vk { cached_texture_section& region = find_cached_texture(memory_address, memory_size, true, width, height, 1); + writer_lock lock(m_cache_mutex); + if (!region.is_locked()) { region.reset(memory_address, memory_size); region.set_dirty(false); - texture_cache_range = region.get_min_max(texture_cache_range); + no_access_range = region.get_min_max(no_access_range); } region.protect(utils::protection::no); @@ -656,17 +709,48 @@ namespace vk std::tuple address_is_flushable(u32 address) { - if (address < texture_cache_range.first || - address > texture_cache_range.second) + if (address < no_access_range.first || + address > no_access_range.second) return std::make_tuple(false, false); - for (auto &tex : m_cache) - { - if (tex.is_dirty()) continue; - if (!tex.is_flushable()) continue; + reader_lock lock(m_cache_mutex); - if (tex.overlaps(address)) - return std::make_tuple(true, tex.is_synchronized()); + auto found = m_cache.find(address); + if (found != m_cache.end()) + { + auto &range_data = found->second; + for (auto &tex : range_data.data) + { + if (tex.is_dirty()) continue; + if (!tex.is_flushable()) continue; + + if (tex.overlaps(address)) + return std::make_tuple(true, tex.is_synchronized()); + } + } + + for (auto &address_range : m_cache) + { + if (address_range.first == address) + continue; + + auto &range_data = address_range.second; + + //Quickly discard range + const u32 lock_base = address_range.first & ~0xfff; + const u32 lock_limit = align(range_data.max_range + address_range.first, 4096); + + if (address < lock_base || address >= lock_limit) + continue; + + for (auto &tex : range_data.data) + { + if (tex.is_dirty()) continue; + if (!tex.is_flushable()) continue; + + if (tex.overlaps(address)) + return std::make_tuple(true, tex.is_synchronized()); + } } return std::make_tuple(false, false); @@ -674,42 +758,75 @@ namespace vk bool flush_address(u32 address, vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue) { - if (address < texture_cache_range.first || - address > texture_cache_range.second) + if (address < no_access_range.first || + address > no_access_range.second) return false; bool response = false; std::pair trampled_range = std::make_pair(0xffffffff, 0x0); + std::unordered_map processed_ranges; - for (int i = 0; i < m_cache.size(); ++i) + reader_lock lock(m_cache_mutex); + + for (auto It = m_cache.begin(); It != m_cache.end(); It++) { - auto &tex = m_cache[i]; + auto &range_data = It->second; + const u32 base = It->first; + bool range_reset = false; - if (tex.is_dirty()) continue; - if (!tex.is_flushable()) continue; + if (processed_ranges[base] || range_data.valid_count == 0) + continue; - auto overlapped = tex.overlaps_page(trampled_range, address); - if (std::get<0>(overlapped)) + //Quickly discard range + const u32 lock_base = base & ~0xfff; + const u32 lock_limit = align(range_data.max_range + base, 4096); + + if ((trampled_range.first >= lock_limit || lock_base >= trampled_range.second) && + (lock_base > address || lock_limit <= address)) { - auto &new_range = std::get<1>(overlapped); - - if (new_range.first != trampled_range.first || - new_range.second != trampled_range.second) - { - trampled_range = new_range; - i = 0; - } - - //TODO: Map basic host_visible memory without coherent constraint - if (!tex.flush(dev, cmd, memory_types.host_visible_coherent, submit_queue)) - { - //Missed address, note this - //TODO: Lower severity when successful to keep the cache from overworking - record_cache_miss(tex); - } - - response = true; + processed_ranges[base] = true; + continue; } + + for (int i = 0; i < range_data.data.size(); i++) + { + auto &tex = range_data.data[i]; + + if (tex.is_dirty()) continue; + if (!tex.is_flushable()) continue; + + auto overlapped = tex.overlaps_page(trampled_range, address); + if (std::get<0>(overlapped)) + { + auto &new_range = std::get<1>(overlapped); + + if (new_range.first != trampled_range.first || + new_range.second != trampled_range.second) + { + i = 0; + trampled_range = new_range; + range_reset = true; + } + + //TODO: Map basic host_visible memory without coherent constraint + if (!tex.flush(dev, cmd, memory_types.host_visible_coherent, submit_queue)) + { + //Missed address, note this + //TODO: Lower severity when successful to keep the cache from overworking + record_cache_miss(tex); + } + + response = true; + } + } + + if (range_reset) + { + processed_ranges.clear(); + It = m_cache.begin(); + } + + processed_ranges[base] = true; } return response; @@ -717,37 +834,79 @@ namespace vk bool invalidate_address(u32 address) { - if (address < texture_cache_range.first || - address > texture_cache_range.second) - return false; + if (address < read_only_range.first || + address > read_only_range.second) + { + //Doesnt fall in the read_only textures range; check render targets + if (address < no_access_range.first || + address > no_access_range.second) + return false; + } bool response = false; std::pair trampled_range = std::make_pair(0xffffffff, 0x0); + std::unordered_map processed_ranges; - for (int i = 0; i < m_cache.size(); ++i) + reader_lock lock(m_cache_mutex); + + for (auto It = m_cache.begin(); It != m_cache.end(); It++) { - auto &tex = m_cache[i]; + auto &range_data = It->second; + const u32 base = It->first; + bool range_reset = false; - if (tex.is_dirty()) continue; - if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better + if (processed_ranges[base] || range_data.valid_count == 0) + continue; - auto overlapped = tex.overlaps_page(trampled_range, address); - if (std::get<0>(overlapped)) + //Quickly discard range + const u32 lock_base = base & ~0xfff; + const u32 lock_limit = align(range_data.max_range + base, 4096); + + if ((trampled_range.first >= lock_limit || lock_base >= trampled_range.second) && + (lock_base > address || lock_limit <= address)) { - auto &new_range = std::get<1>(overlapped); - - if (new_range.first != trampled_range.first || - new_range.second != trampled_range.second) - { - trampled_range = new_range; - i = 0; - } - - tex.set_dirty(true); - tex.unprotect(); - - response = true; + processed_ranges[base] = true; + continue; } + + for (int i = 0; i < range_data.data.size(); i++) + { + auto &tex = range_data.data[i]; + + if (tex.is_dirty()) continue; + if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better + + auto overlapped = tex.overlaps_page(trampled_range, address); + if (std::get<0>(overlapped)) + { + auto &new_range = std::get<1>(overlapped); + + if (new_range.first != trampled_range.first || + new_range.second != trampled_range.second) + { + i = 0; + trampled_range = new_range; + range_reset = true; + } + + // Upgrade to writer lock + lock.upgrade(); + + tex.set_dirty(true); + tex.unprotect(); + + range_data.valid_count--; + response = true; + } + } + + if (range_reset) + { + processed_ranges.clear(); + It = m_cache.begin(); + } + + processed_ranges[base] = true; } return response;