From 5e58cf60798024ad5d5b9f30746bc84f98770d5c Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 22 Oct 2017 00:12:32 +0300 Subject: [PATCH] rsx: Restructuring [WIP] - Refactor invalidate memory functions into one function - Add cached object rebuilding functionality to avoid throwing away useful memory on an invalidate - Added debug monitoring of texture unit VRAM usage --- rpcs3/Emu/RSX/Common/ring_buffer_helper.h | 10 +- rpcs3/Emu/RSX/Common/texture_cache.h | 257 ++++++++++++---------- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 63 ++---- rpcs3/Emu/RSX/GL/GLGSRender.h | 4 +- rpcs3/Emu/RSX/GL/GLRenderTargets.cpp | 2 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 163 +++++++------- rpcs3/Emu/RSX/VK/VKTextureCache.h | 6 +- rpcs3/Emu/RSX/rsx_cache.h | 5 + 8 files changed, 263 insertions(+), 247 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/ring_buffer_helper.h b/rpcs3/Emu/RSX/Common/ring_buffer_helper.h index 9c3beafaac..cca89f5c86 100644 --- a/rpcs3/Emu/RSX/Common/ring_buffer_helper.h +++ b/rpcs3/Emu/RSX/Common/ring_buffer_helper.h @@ -46,6 +46,8 @@ struct data_heap size_t m_min_guard_size; //If an allocation touches the guard region, reset the heap to avoid going over budget size_t m_current_allocated_size; size_t m_largest_allocated_pool; + + char* m_name; public: data_heap() = default; ~data_heap() = default; @@ -54,8 +56,10 @@ public: size_t m_get_pos; // End of free space - void init(size_t heap_size, size_t min_guard_size=0x10000) + void init(size_t heap_size, const char* buffer_name = "unnamed", size_t min_guard_size=0x10000) { + m_name = const_cast(buffer_name); + m_size = heap_size; m_put_pos = 0; m_get_pos = heap_size - 1; @@ -71,8 +75,8 @@ public: { if (!can_alloc(size)) { - fmt::throw_exception("Working buffer not big enough, buffer_length=%d allocated=%d requested=%d guard=%d largest_pool=%d" HERE, - m_size, m_current_allocated_size, size, m_min_guard_size, m_largest_allocated_pool); + fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d allocated=%d requested=%d guard=%d largest_pool=%d" HERE, + m_name, m_size, m_current_allocated_size, size, m_min_guard_size, m_largest_allocated_pool); } size_t alloc_size = align(size, Alignement); diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 3410c9659f..62686544ed 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -31,6 +31,8 @@ namespace rsx u16 real_pitch; u16 rsx_pitch; + u64 cache_tag; + rsx::texture_create_flags view_flags = rsx::texture_create_flags::default_component_order; rsx::texture_upload_context context = rsx::texture_upload_context::shader_read; @@ -162,7 +164,8 @@ namespace rsx //Memory usage const s32 m_max_zombie_objects = 128; //Limit on how many texture objects to keep around for reuse after they are invalidated - s32 m_unreleased_texture_objects = 0; //Number of invalidated objects not yet freed from memory + std::atomic m_unreleased_texture_objects = { 0 }; //Number of invalidated objects not yet freed from memory + std::atomic m_texture_memory_in_use = { 0 }; /* Helpers */ virtual void free_texture_section(section_storage_type&) = 0; @@ -179,10 +182,14 @@ namespace rsx inline u32 get_block_address(u32 address) const { return (address & ~0xFFFFFF); } private: - //Internal implementation methods - bool invalidate_range_impl(u32 address, u32 range, bool unprotect) + //Internal implementation methods and helpers + + //Get intersecting set - Returns all objects intersecting a given range and their owning blocks + std::vector> get_intersecting_set(u32 address, u32 range, bool check_whole_size) { + std::vector> result; bool response = false; + u64 cache_tag = get_system_time(); u32 last_dirty_block = UINT32_MAX; std::pair trampled_range = std::make_pair(address, address + range); @@ -195,7 +202,7 @@ namespace rsx if (base == last_dirty_block && range_data.valid_count == 0) continue; - if (trampled_range.first < trampled_range.second) + if (trampled_range.first <= trampled_range.second) { //Only if a valid range, ignore empty sets if (trampled_range.first >= (range_data.max_addr + range_data.max_range) || range_data.min_addr >= trampled_range.second) @@ -205,11 +212,10 @@ namespace rsx for (int i = 0; i < range_data.data.size(); i++) { auto &tex = range_data.data[i]; - - if (tex.is_dirty()) continue; + if (tex.cache_tag == cache_tag) continue; //already processed if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better - auto overlapped = tex.overlaps_page(trampled_range, address, false); + auto overlapped = tex.overlaps_page(trampled_range, address, check_whole_size); if (std::get<0>(overlapped)) { auto &new_range = std::get<1>(overlapped); @@ -222,19 +228,8 @@ namespace rsx range_reset = true; } - if (unprotect) - { - tex.set_dirty(true); - tex.unprotect(); - } - else - { - tex.discard(); - } - - m_unreleased_texture_objects++; - range_data.remove_one(); - response = true; + tex.cache_tag = cache_tag; + result.push_back({&tex, &range_data}); } } @@ -245,87 +240,92 @@ namespace rsx } } - return response; + return result; + } + + //Invalidate range base implementation + //Returns a pair: + //1. A boolean - true if the memory range was truly locked and has been dealt with, false otherwise + //2. A vector of all sections that should be flushed if the caller did not set the allow_flush method. That way the caller can make preparations on how to deal with sections that require flushing + // Note that the sections will be unlocked regardless of the allow_flush flag + template + std::pair> invalidate_range_impl_base(u32 address, u32 range, bool discard_only, bool rebuild_cache, bool allow_flush, Args&... extras) + { + auto trampled_set = get_intersecting_set(address, range, allow_flush); + + if (trampled_set.size() > 0) + { + // Rebuild the cache by only destroying ranges that need to be destroyed to unlock this page + const auto to_reprotect = std::remove_if(trampled_set.begin(), trampled_set.end(), + [&](const std::pair& obj) + { + if (!rebuild_cache && !obj.first->is_flushable()) + return false; + + const std::pair null_check = std::make_pair(UINT32_MAX, 0); + return !std::get<0>(obj.first->overlaps_page(null_check, address, true)); + }); + + std::vector sections_to_flush; + for (auto It = trampled_set.begin(); It != to_reprotect; ++It) + { + auto obj = *It; + + if (discard_only) + obj.first->discard(); + else + obj.first->unprotect(); + + if (obj.first->is_flushable() && allow_flush) + { + sections_to_flush.push_back(obj.first); + } + else + { + obj.first->set_dirty(true); + m_unreleased_texture_objects++; + } + + obj.second->remove_one(); + } + + for (auto It = to_reprotect; It != trampled_set.end(); It++) + { + auto obj = *It; + + auto old_prot = obj.first->get_protection(); + obj.first->discard(); + obj.first->protect(old_prot); + obj.first->set_dirty(false); + } + + trampled_set.erase(to_reprotect, trampled_set.end()); + + if (allow_flush) + { + for (const auto &tex : sections_to_flush) + { + if (!tex->flush(std::forward(extras)...)) + { + //Missed address, note this + //TODO: Lower severity when successful to keep the cache from overworking + record_cache_miss(*tex); + } + } + + return{ true, {} }; + } + + return std::make_pair(true, sections_to_flush); + } + + return{ false, {} }; } template - bool flush_address_impl(u32 address, Args&&... extras) + std::pair> invalidate_range_impl(u32 address, u32 range, bool discard, bool allow_flush, Args&... extras) { - bool response = false; - u32 last_dirty_block = UINT32_MAX; - std::pair trampled_range = std::make_pair(0xffffffff, 0x0); - std::vector sections_to_flush; - - for (auto It = m_cache.begin(); It != m_cache.end(); It++) - { - auto &range_data = It->second; - const u32 base = It->first; - bool range_reset = false; - - if (base == last_dirty_block && range_data.valid_count == 0) - continue; - - if (trampled_range.first < trampled_range.second) - { - //Only if a valid range, ignore empty sets - if (trampled_range.first >= (range_data.max_addr + range_data.max_range) || range_data.min_addr >= trampled_range.second) - continue; - } - - for (int i = 0; i < range_data.data.size(); i++) - { - auto &tex = range_data.data[i]; - - if (tex.is_dirty()) continue; - if (!tex.is_locked()) continue; - - auto overlapped = tex.overlaps_page(trampled_range, address, true); - if (std::get<0>(overlapped)) - { - auto &new_range = std::get<1>(overlapped); - - if (new_range.first != trampled_range.first || - new_range.second != trampled_range.second) - { - i = 0; - trampled_range = new_range; - range_reset = true; - } - - if (tex.is_flushable()) - { - sections_to_flush.push_back(&tex); - } - else - { - m_unreleased_texture_objects++; - tex.set_dirty(true); - } - - tex.unprotect(); - range_data.remove_one(); - - response = true; - } - } - - if (range_reset) - { - It = m_cache.begin(); - } - } - - for (auto tex : sections_to_flush) - { - if (!tex->flush(std::forward(extras)...)) - { - //Missed address, note this - //TODO: Lower severity when successful to keep the cache from overworking - record_cache_miss(*tex); - } - } - - return response; + return invalidate_range_impl_base(address, range, discard, true, allow_flush, std::forward(extras)...); } bool is_hw_blit_engine_compatible(const u32 format) const @@ -427,6 +427,7 @@ namespace rsx { m_unreleased_texture_objects--; free_texture_section(tex); + m_texture_memory_in_use -= tex.get_section_size(); } range_data.notify(rsx_address, rsx_size); @@ -562,22 +563,19 @@ namespace rsx } template - bool flush_address(u32 address, Args&&... extras) + std::pair> invalidate_address(u32 address, bool allow_flush, Args&... extras) { - if (address < no_access_range.first || - address > no_access_range.second) - return false; - - writer_lock lock(m_cache_mutex); - return flush_address_impl(address, std::forward(extras)...); + return invalidate_range(address, 4096 - (address & 4095), false, allow_flush, std::forward(extras)...); } - bool invalidate_address(u32 address) + template + std::pair> flush_address(u32 address, Args&... extras) { - return invalidate_range(address, 4096 - (address & 4095)); + return invalidate_range(address, 4096 - (address & 4095), false, true, std::forward(extras)...); } - bool invalidate_range(u32 address, u32 range, bool unprotect = true) + template + std::pair> invalidate_range(u32 address, u32 range, bool discard, bool allow_flush, Args&... extras) { std::pair trampled_range = std::make_pair(address, address + range); @@ -587,11 +585,31 @@ namespace rsx //Doesnt fall in the read_only textures range; check render targets if (trampled_range.second < no_access_range.first || trampled_range.first > no_access_range.second) - return false; + return{ false, {} }; } writer_lock lock(m_cache_mutex); - return invalidate_range_impl(address, range, unprotect); + return invalidate_range_impl(address, range, discard, allow_flush, std::forward(extras)...); + } + + template + bool flush_all(std::vector& sections_to_flush, Args&... extras) + { + reader_lock lock(m_cache_mutex); + for (const auto &tex: sections_to_flush) + { + if (tex->is_flushed()) + continue; + + if (!tex->flush(std::forward(extras)...)) + { + //Missed address, note this + //TODO: Lower severity when successful to keep the cache from overworking + record_cache_miss(*tex); + } + } + + return true; } void record_cache_miss(section_storage_type &tex) @@ -670,6 +688,7 @@ namespace rsx continue; free_texture_section(tex); + m_texture_memory_in_use -= tex.get_section_size(); } } @@ -882,6 +901,7 @@ namespace rsx auto subresources_layout = get_subresources_layout(tex); auto remap_vector = tex.decoded_remap(); + m_texture_memory_in_use += (tex_pitch * tex_height); return upload_image_from_cpu(cmd, texaddr, tex_width, tex_height, depth, tex.get_exact_mipmap_count(), tex_pitch, format, texture_upload_context::shader_read, subresources_layout, extended_dimension, is_swizzled, remap_vector)->get_raw_view(); } @@ -972,8 +992,8 @@ namespace rsx const u32 memcpy_bytes_length = dst.clip_width * bpp * dst.clip_height; lock.upgrade(); - flush_address_impl(src_address, std::forward(extras)...); - invalidate_range_impl(dst_address, memcpy_bytes_length, true); + invalidate_range_impl(src_address, memcpy_bytes_length, false, true, std::forward(extras)...); + invalidate_range_impl(dst_address, memcpy_bytes_length, false, true, std::forward(extras)...); memcpy(dst.pixels, src.pixels, memcpy_bytes_length); return true; } @@ -1075,7 +1095,7 @@ namespace rsx { lock.upgrade(); - flush_address_impl(src_address, std::forward(extras)...); + invalidate_range_impl(src_address, src.pitch * src.slice_h, false, true, std::forward(extras)...); const u16 pitch_in_block = src_is_argb8 ? src.pitch >> 2 : src.pitch >> 1; std::vector subresource_layout; @@ -1090,6 +1110,8 @@ namespace rsx const u32 gcm_format = src_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5; vram_texture = upload_image_from_cpu(cmd, src_address, src.width, src.slice_h, 1, 1, src.pitch, gcm_format, texture_upload_context::blit_engine_src, subresource_layout, rsx::texture_dimension_extended::texture_dimension_2d, dst.swizzled, default_remap_vector)->get_raw_texture(); + + m_texture_memory_in_use += src.pitch * src.slice_h; } } else @@ -1145,7 +1167,7 @@ namespace rsx if (format_mismatch) { lock.upgrade(); - invalidate_range_impl(cached_dest->get_section_base(), cached_dest->get_section_size(), true); + invalidate_range_impl(cached_dest->get_section_base(), cached_dest->get_section_size(), false, true, std::forward(extras)...); dest_texture = 0; cached_dest = nullptr; @@ -1153,7 +1175,7 @@ namespace rsx else if (invalidate_dst_range) { lock.upgrade(); - invalidate_range_impl(dst_address, dst.pitch * dst.height, true); + invalidate_range_impl(dst_address, dst.pitch * dst.height, false, true, std::forward(extras)...); } //Validate clipping region @@ -1187,6 +1209,8 @@ namespace rsx gcm_format, rsx::texture_upload_context::blit_engine_dst, rsx::texture_dimension_extended::texture_dimension_2d, dst.swizzled? rsx::texture_create_flags::swapped_native_component_order : rsx::texture_create_flags::native_component_order, default_remap_vector)->get_raw_texture(); + + m_texture_memory_in_use += dst.pitch * dst_dimensions.height; } const f32 scale = rsx::get_resolution_scale(); @@ -1204,5 +1228,10 @@ namespace rsx { return m_unreleased_texture_objects; } + + const u32 get_texture_memory_in_use() const + { + return m_texture_memory_in_use; + } }; } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 6fc4110961..004b9d5c92 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1167,7 +1167,9 @@ void GLGSRender::flip(int buffer) m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), "draw call execution: " + std::to_string(m_draw_time) + "us"); auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count(); + auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024); m_text_printer.print_text(0, 108, m_frame->client_width(), m_frame->client_height(), "Unreleased textures: " + std::to_string(num_dirty_textures)); + m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), "Texture memory: " + std::to_string(texture_memory_size) + "M"); } m_frame->flip(m_context); @@ -1202,37 +1204,33 @@ u64 GLGSRender::timestamp() const bool GLGSRender::on_access_violation(u32 address, bool is_writing) { - if (is_writing) - return m_gl_texture_cache.invalidate_address(address); - else + bool can_flush = (std::this_thread::get_id() != m_thread_id); + auto result = m_gl_texture_cache.invalidate_address(address, can_flush); + + if (!result.first) + return false; + + if (result.second.size() > 0) { - if (std::this_thread::get_id() != m_thread_id) + work_item &task = post_flush_request(address, result.second); + + vm::temporary_unlock(); { - bool flushable; - gl::cached_texture_section* section_to_post; - - std::tie(flushable, section_to_post) = m_gl_texture_cache.address_is_flushable(address); - if (!flushable) return false; - - work_item &task = post_flush_request(address, section_to_post); - - vm::temporary_unlock(); - { - std::unique_lock lock(task.guard_mutex); - task.cv.wait(lock, [&task] { return task.processed; }); - } - - task.received = true; - return task.result; + std::unique_lock lock(task.guard_mutex); + task.cv.wait(lock, [&task] { return task.processed; }); } - - return m_gl_texture_cache.flush_address(address); + + task.received = true; + return true; } + + return false; } void GLGSRender::on_notify_memory_unmapped(u32 address_base, u32 size) { - if (m_gl_texture_cache.invalidate_range(address_base, size, false)) + //Discard all memory in that range without bothering with writeback (Force it for strict?) + if (std::get<0>(m_gl_texture_cache.invalidate_range(address_base, size, true, false))) m_gl_texture_cache.purge_dirty(); } @@ -1249,20 +1247,7 @@ void GLGSRender::do_local_task() if (q.processed) continue; std::unique_lock lock(q.guard_mutex); - - //Check if the suggested section is valid - if (!q.section_to_flush->is_flushed()) - { - m_gl_texture_cache.flush_address(q.address_to_flush); - q.result = true; - } - else - { - //Another thread has unlocked this memory region already - //Return success - q.result = true; - } - + q.result = m_gl_texture_cache.flush_all(q.sections_to_flush); q.processed = true; //Notify thread waiting on this @@ -1271,14 +1256,14 @@ void GLGSRender::do_local_task() } } -work_item& GLGSRender::post_flush_request(u32 address, gl::cached_texture_section *section) +work_item& GLGSRender::post_flush_request(u32 address, std::vector& sections) { std::lock_guard lock(queue_guard); work_queue.emplace_back(); work_item &result = work_queue.back(); result.address_to_flush = address; - result.section_to_flush = section; + result.sections_to_flush = std::move(sections); return result; } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index ea332ec08d..a23291c363 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -28,7 +28,7 @@ struct work_item std::mutex guard_mutex; u32 address_to_flush = 0; - gl::cached_texture_section *section_to_flush = nullptr; + std::vector sections_to_flush; volatile bool processed = false; volatile bool result = false; @@ -428,7 +428,7 @@ public: void set_viewport(); void synchronize_buffers(); - work_item& post_flush_request(u32 address, gl::cached_texture_section *section); + work_item& post_flush_request(u32 address, std::vector& sections); bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override; diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 442e15fb7a..0d2bb334ac 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -403,7 +403,7 @@ void GLGSRender::read_buffers() } else { - m_gl_texture_cache.invalidate_range(texaddr, range); + m_gl_texture_cache.invalidate_range(texaddr, range, false, true); std::unique_ptr buffer(new u8[pitch * height]); color_buffer.read(buffer.get(), width, height, pitch); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 81260bad79..6324c51efd 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -583,13 +583,13 @@ VKGSRender::VKGSRender() : GSRender() semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; //VRAM allocation - m_attrib_ring_info.init(VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, 0x400000); + m_attrib_ring_info.init(VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000); m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0)); - m_uniform_buffer_ring_info.init(VK_UBO_RING_BUFFER_SIZE_M * 0x100000); + m_uniform_buffer_ring_info.init(VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "uniform buffer"); m_uniform_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0)); - m_index_buffer_ring_info.init(VK_INDEX_RING_BUFFER_SIZE_M * 0x100000); + m_index_buffer_ring_info.init(VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer"); m_index_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0)); - m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000); + m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 0x400000); m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0)); for (auto &ctx : frame_context_storage) @@ -739,105 +739,90 @@ VKGSRender::~VKGSRender() bool VKGSRender::on_access_violation(u32 address, bool is_writing) { - if (is_writing) - return m_texture_cache.invalidate_address(address); - else + std::lock_guard lock(m_secondary_cb_guard); + auto result = m_texture_cache.invalidate_address(address, false, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue()); + + if (!result.first) + return false; + + if (result.second.size() > 0) { - if (g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer) + const bool is_rsxthr = std::this_thread::get_id() == rsx_thread; + bool has_queue_ref = false; + + u64 sync_timestamp = 0ull; + for (const auto& tex : result.second) + sync_timestamp = std::max(sync_timestamp, tex->get_sync_timestamp()); + + if (!is_rsxthr) { - bool flushable; - vk::cached_texture_section* section; + vm::temporary_unlock(); + } - std::tie(flushable, section) = m_texture_cache.address_is_flushable(address); - - if (!flushable) - return false; - - const u64 sync_timestamp = section->get_sync_timestamp(); - const bool is_rsxthr = std::this_thread::get_id() == rsx_thread; - - if (section->is_synchronized()) + if (sync_timestamp > 0) + { + //Wait for any cb submitted after the sync timestamp to finish + while (true) { - //Wait for any cb submitted after the sync timestamp to finish - while (true) - { - u32 pending = 0; + u32 pending = 0; - if (m_last_flushable_cb < 0) + if (m_last_flushable_cb < 0) + break; + + for (auto &cb : m_primary_cb_list) + { + if (!cb.pending && cb.last_sync >= sync_timestamp) + { + pending = 0; break; - - for (auto &cb : m_primary_cb_list) - { - if (!cb.pending && cb.last_sync >= sync_timestamp) - { - pending = 0; - break; - } - - if (cb.pending) - { - pending++; - - if (is_rsxthr) - cb.poke(); - } } - if (!pending) - break; + if (cb.pending) + { + pending++; - std::this_thread::yield(); + if (is_rsxthr) + cb.poke(); + } } - if (is_rsxthr) - m_last_flushable_cb = -1; + if (!pending) + break; + + std::this_thread::yield(); } - else - { - //This region is buffered, but no previous sync point has been put in place to start sync efforts - //Just stall and get what we have at this point - if (!is_rsxthr) - { - { - std::lock_guard lock(m_flush_queue_mutex); - m_flush_commands = true; - m_queued_threads++; - } - - //Wait for the RSX thread to process - while (m_flush_commands) - { - _mm_lfence(); - _mm_pause(); - } - - std::lock_guard lock(m_secondary_cb_guard); - bool status = m_texture_cache.flush_address(address, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue()); - - m_queued_threads--; - _mm_sfence(); - - return status; - } - else - { - //NOTE: If the rsx::thread is trampling its own data, we have an operation that should be moved to the GPU - //We should never interrupt our own cb recording since some operations are not interruptible - if (!vk::is_uninterruptible()) - //TODO: Investigate driver behaviour to determine if we need a hard sync or a soft flush - flush_command_queue(); - } - } + if (is_rsxthr) + m_last_flushable_cb = -1; } else { - //If we aren't managing buffer sync, dont bother checking the cache - return false; + if (!is_rsxthr) + { + { + std::lock_guard lock(m_flush_queue_mutex); + + m_flush_commands = true; + m_queued_threads++; + } + + //Wait for the RSX thread to process + while (m_flush_commands) + { + _mm_lfence(); + _mm_pause(); + } + + has_queue_ref = true; + } } - std::lock_guard lock(m_secondary_cb_guard); - return m_texture_cache.flush_address(address, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue()); + m_texture_cache.flush_all(result.second, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue()); + + if (has_queue_ref) + { + m_queued_threads--; + } } return false; @@ -845,8 +830,12 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) void VKGSRender::on_notify_memory_unmapped(u32 address_base, u32 size) { - if (m_texture_cache.invalidate_range(address_base, size, false)) + std::lock_guard lock(m_secondary_cb_guard); + if (std::get<0>(m_texture_cache.invalidate_range(address_base, size, false, false, + *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue()))) + { m_texture_cache.purge_dirty(); + } } void VKGSRender::begin() @@ -2651,7 +2640,9 @@ void VKGSRender::flip(int buffer) m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), "submit and flip: " + std::to_string(m_flip_time) + "us"); auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count(); + auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 126, direct_fbo->width(), direct_fbo->height(), "Unreleased textures: " + std::to_string(num_dirty_textures)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), "Texture memory: " + std::to_string(texture_memory_size) + "M"); vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, subres); m_framebuffers_to_clean.push_back(std::move(direct_fbo)); diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 36a203dd0c..bad61220b2 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -347,6 +347,7 @@ namespace vk m_discardable_storage.clear(); m_unreleased_texture_objects = 0; + m_texture_memory_in_use = 0; } protected: @@ -707,12 +708,13 @@ namespace vk } helper(&cmd); - return upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, *m_device, cmd, m_memory_types, m_submit_queue); + const VkQueue& queue = m_submit_queue; + return upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, *m_device, cmd, m_memory_types, queue); } const u32 get_unreleased_textures_count() const override { - return std::max(m_unreleased_texture_objects, 0) + (u32)m_discardable_storage.size(); + return m_unreleased_texture_objects + (u32)m_discardable_storage.size(); } }; } diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index 4c04573f71..2f48faada0 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -238,6 +238,11 @@ namespace rsx return std::make_pair(min, max); } + + utils::protection get_protection() + { + return protection; + } }; template