From 24f4c927590893cfab7ce9edf015d9d75925314e Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 24 Jun 2018 13:23:30 +0300 Subject: [PATCH] rsx: Improve texture cache read speculation --- rpcs3/Emu/RSX/Common/TextureUtils.h | 8 +-- rpcs3/Emu/RSX/Common/texture_cache.h | 75 +++++++++++++++------------- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 3 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 3 +- 4 files changed, 48 insertions(+), 41 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index 0051d6b464..b5bd608eec 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -9,10 +9,10 @@ namespace rsx { enum texture_upload_context { - shader_read = 0, - blit_engine_src = 1, - blit_engine_dst = 2, - framebuffer_storage = 3 + shader_read = 1, + blit_engine_src = 2, + blit_engine_dst = 4, + framebuffer_storage = 8 }; enum texture_colorspace diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index b6da62b6cb..5462856be3 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -204,52 +204,47 @@ namespace rsx bool writes_likely_completed() const { // TODO: Move this to the miss statistics block - if (context == rsx::texture_upload_context::blit_engine_dst) + const auto num_records = read_history.size(); + + if (num_records == 0) { - const auto num_records = read_history.size(); + return false; + } + else if (num_records == 1) + { + return num_writes >= read_history.front(); + } + else + { + const u32 last = read_history.front(); + const u32 prev_last = read_history[1]; - if (num_records == 0) + if (last == prev_last && num_records <= 3) { - return false; + return num_writes >= last; } - else if (num_records == 1) - { - return num_writes >= read_history.front(); - } - else - { - const u32 last = read_history.front(); - const u32 prev_last = read_history[1]; - if (last == prev_last && num_records <= 3) + u32 compare = UINT32_MAX; + for (u32 n = 1; n < num_records; n++) + { + if (read_history[n] == last) { - return num_writes >= last; - } + // Uncertain, but possible + compare = read_history[n - 1]; - u32 compare = UINT32_MAX; - for (u32 n = 1; n < num_records; n++) - { - if (read_history[n] == last) + if (num_records > (n + 1)) { - // Uncertain, but possible - compare = read_history[n - 1]; - - if (num_records > (n + 1)) + if (read_history[n + 1] == prev_last) { - if (read_history[n + 1] == prev_last) - { - // Confirmed with 2 values - break; - } + // Confirmed with 2 values + break; } } } - - return num_writes >= compare; } - } - return true; + return num_writes >= compare; + } } void reprotect(utils::protection prot, const std::pair& range) @@ -473,8 +468,10 @@ namespace rsx std::atomic m_texture_memory_in_use = { 0 }; //Other statistics + const u32 m_cache_miss_threshold = 8; // How many times an address can miss speculative writing before it is considered high priority std::atomic m_num_flush_requests = { 0 }; std::atomic m_num_cache_misses = { 0 }; + std::atomic m_num_cache_speculative_writes = { 0 }; std::atomic m_num_cache_mispredictions = { 0 }; /* Helpers */ @@ -729,7 +726,7 @@ namespace rsx { if (obj.first->get_memory_read_flags() == rsx::memory_read_flags::flush_always) { - // This region is set to always read from itself (unavoi + // This region is set to always read from itself (unavoidable hard sync) const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp; if (obj.first->is_synchronized() && ROP_timestamp > obj.first->get_sync_timestamp()) { @@ -1072,6 +1069,7 @@ namespace rsx region.set_sampler_status(rsx::texture_sampler_status::status_uninitialized); region.set_image_type(rsx::texture_dimension_extended::texture_dimension_2d); region.set_memory_read_flags(memory_read_flags::flush_always); + region.touch(); m_flush_always_cache[memory_address] = memory_size; @@ -1151,6 +1149,7 @@ namespace rsx return true; region->copy_texture(false, std::forward(extra)...); + m_num_cache_speculative_writes++; return true; } @@ -1256,7 +1255,7 @@ namespace rsx { if (tex->get_memory_read_flags() == rsx::memory_read_flags::flush_always) { - // This region is set to always read from itself (unavoi + // This region is set to always read from itself (unavoidable hard sync) const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp; if (tex->is_synchronized() && ROP_timestamp > tex->get_sync_timestamp()) { @@ -1351,7 +1350,7 @@ namespace rsx u32 flush_mask = rsx::texture_upload_context::blit_engine_dst; // Auto flush if this address keeps missing (not properly synchronized) - if (value.misses >= 4) + if (value.misses >= m_cache_miss_threshold) { // Disable prediction if memory is flagged as flush_always if (m_flush_always_cache.find(memory_address) == m_flush_always_cache.end()) @@ -2485,6 +2484,7 @@ namespace rsx m_num_flush_requests.store(0u); m_num_cache_misses.store(0u); m_num_cache_mispredictions.store(0u); + m_num_cache_speculative_writes.store(0u); } virtual const u32 get_unreleased_textures_count() const @@ -2507,6 +2507,11 @@ namespace rsx return m_num_cache_mispredictions; } + virtual u32 get_num_cache_speculative_writes() const + { + return m_num_cache_speculative_writes; + } + virtual f32 get_cache_miss_ratio() const { const auto num_flushes = m_num_flush_requests.load(); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index f1851774cc..b0b03282f0 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1518,10 +1518,11 @@ void GLGSRender::flip(int buffer) const auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024); const auto num_flushes = m_gl_texture_cache.get_num_flush_requests(); const auto num_mispredict = m_gl_texture_cache.get_num_cache_mispredictions(); + const auto num_speculate = m_gl_texture_cache.get_num_cache_speculative_writes(); const auto cache_miss_ratio = (u32)ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100); m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), "Unreleased textures: " + std::to_string(num_dirty_textures)); m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), "Texture memory: " + std::to_string(texture_memory_size) + "M"); - m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %d (%d%% hard faults, %d mispredictions)", num_flushes, cache_miss_ratio, num_mispredict)); + m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %d (%d%% hard faults, %d misprediction(s), %d speculation(s))", num_flushes, cache_miss_ratio, num_mispredict, num_speculate)); } m_frame->flip(m_context); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 44b56d4d4b..c46d565883 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -3241,11 +3241,12 @@ void VKGSRender::flip(int buffer) const auto tmp_texture_memory_size = m_texture_cache.get_temporary_memory_in_use() / (1024 * 1024); const auto num_flushes = m_texture_cache.get_num_flush_requests(); const auto num_mispredict = m_texture_cache.get_num_cache_mispredictions(); + const auto num_speculate = m_texture_cache.get_num_cache_speculative_writes(); const auto cache_miss_ratio = (u32)ceil(m_texture_cache.get_cache_miss_ratio() * 100); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), "Unreleased textures: " + std::to_string(num_dirty_textures)); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), "Texture cache memory: " + std::to_string(texture_memory_size) + "M"); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), "Temporary texture memory: " + std::to_string(tmp_texture_memory_size) + "M"); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %d (%d%% hard faults, %d mispredictions)", num_flushes, cache_miss_ratio, num_mispredict)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %d (%d%% hard faults, %d misprediction(s), %d speculation(s))", num_flushes, cache_miss_ratio, num_mispredict, num_speculate)); } vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, present_layout, subres);