From 20d4c09a1c7aeaf8fcdbe3d763ce8c7705676c28 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 23 Feb 2018 22:49:59 +0300 Subject: [PATCH] rsx/vk/gl: Enforce format matching for render target resources. Fall back to raw data copy if match fails - Forces Bitcast of texture data if input format cannot possibly be the same as the existing texture format - rsx: Other minor improvements to texture cache :- - remove obsolete blit engine incompatibility warning. The texture will be re-uploaded if it is indeed incompatible - Implement warn_once and err_once to avoid spamming the log with systemic errors - Track mispredicted flushes - Reswizzle bitcasted texture data to native layout TODO: Also needs reshuffle according to input remap vector --- rpcs3/Emu/RSX/Common/texture_cache.h | 60 +++++++++++++++++++++++----- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 11 ++--- rpcs3/Emu/RSX/GL/GLTextureCache.h | 52 ++++++++++++++++++++---- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 13 +++--- rpcs3/Emu/RSX/VK/VKTextureCache.h | 36 ++++++++++++++++- 5 files changed, 141 insertions(+), 31 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index b436311673..9c9e220efa 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -353,8 +353,8 @@ namespace rsx std::unordered_map m_cache_miss_statistics_table; - //Set when a hw blit engine incompatibility is detected - bool blit_engine_incompatibility_warning_raised = false; + //Map of messages to only emit once + std::unordered_map m_once_only_messages_map; //Set when a shader read-only texture data suddenly becomes contested, usually by fbo memory bool read_only_tex_invalidate = false; @@ -371,6 +371,7 @@ namespace rsx //Other statistics std::atomic m_num_flush_requests = { 0 }; std::atomic m_num_cache_misses = { 0 }; + std::atomic m_num_cache_mispredictions = { 0 }; /* Helpers */ virtual void free_texture_section(section_storage_type&) = 0; @@ -386,6 +387,7 @@ namespace rsx virtual image_view_type generate_cubemap_from_images(commandbuffer_type&, u32 gcm_format, u16 size, const std::array& sources) = 0; virtual image_view_type generate_atlas_from_images(commandbuffer_type&, u32 gcm_format, u16 width, u16 height, const std::vector& sections_to_copy) = 0; virtual void update_image_contents(commandbuffer_type&, image_view_type dst, image_resource_type src, u16 width, u16 height) = 0; + virtual bool render_target_format_is_compatible(image_storage_type* tex, u32 gcm_format) = 0; constexpr u32 get_block_size() const { return 0x1000000; } inline u32 get_block_address(u32 address) const { return (address & ~0xFFFFFF); } @@ -395,6 +397,33 @@ namespace rsx m_cache_update_tag++; } + template + void emit_once(bool error, const char* fmt, Args&&... params) + { + const std::string message = fmt::format(fmt, std::forward(params)...); + if (m_once_only_messages_map.find(message) != m_once_only_messages_map.end()) + return; + + if (error) + logs::RSX.error(message.c_str()); + else + logs::RSX.warning(message.c_str()); + + m_once_only_messages_map[message] = true; + } + + template + void err_once(const char* fmt, Args&&... params) + { + emit_once(true, fmt, std::forward(params)...); + } + + template + void warn_once(const char* fmt, Args&&... params) + { + emit_once(false, fmt, std::forward(params)...); + } + private: //Internal implementation methods and helpers @@ -1431,6 +1460,12 @@ namespace rsx } } + if (!requires_processing) + { + //Check if we need to do anything about the formats + requires_processing = !render_target_format_is_compatible(texptr, format); + } + if (requires_processing) { const auto w = rsx::apply_resolution_scale(internal_width, true); @@ -1610,7 +1645,7 @@ namespace rsx } } - if ((!blit_engine_incompatibility_warning_raised && g_cfg.video.use_gpu_texture_scaling) || is_hw_blit_engine_compatible(format)) + if (is_hw_blit_engine_compatible(format)) { //Find based on range instead auto overlapping_surfaces = find_texture_from_range(texaddr, tex_size); @@ -1641,14 +1676,6 @@ namespace rsx break; } - if (!blit_engine_incompatibility_warning_raised && !is_hw_blit_engine_compatible(format)) - { - LOG_ERROR(RSX, "Format 0x%X is not compatible with the hardware blit acceleration." - " Consider turning off GPU texture scaling in the options to partially handle textures on your CPU.", format); - blit_engine_incompatibility_warning_raised = true; - break; - } - if (surface->get_sampler_status() != rsx::texture_sampler_status::status_ready) set_up_remap_vector(*surface, tex.decoded_remap()); @@ -2041,6 +2068,11 @@ namespace rsx cached_dest->reprotect(utils::protection::no); m_cache[get_block_address(cached_dest->get_section_base())].notify(); } + else if (cached_dest->is_synchronized()) + { + //Prematurely read back + m_num_cache_mispredictions++; + } cached_dest->touch(); } @@ -2100,6 +2132,7 @@ namespace rsx { m_num_flush_requests.store(0u); m_num_cache_misses.store(0u); + m_num_cache_mispredictions.store(0u); } virtual const u32 get_unreleased_textures_count() const @@ -2117,6 +2150,11 @@ namespace rsx return m_num_flush_requests; } + virtual u32 get_num_cache_mispredictions() const + { + return m_num_cache_mispredictions; + } + virtual f32 get_cache_miss_ratio() const { const auto num_flushes = m_num_flush_requests.load(); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 2c27924e02..0bad51a6e2 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1376,13 +1376,14 @@ void GLGSRender::flip(int buffer) m_text_printer.print_text(0, 54, m_frame->client_width(), m_frame->client_height(), "textures upload time: " + std::to_string(m_textures_upload_time) + "us"); m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), "draw call execution: " + std::to_string(m_draw_time) + "us"); - auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count(); - auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024); - auto num_flushes = m_gl_texture_cache.get_num_flush_requests(); - auto cache_miss_ratio = (u32)ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100); + const auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count(); + const auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024); + const auto num_flushes = m_gl_texture_cache.get_num_flush_requests(); + const auto num_mispredict = m_gl_texture_cache.get_num_cache_mispredictions(); + const auto cache_miss_ratio = (u32)ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100); m_text_printer.print_text(0, 108, m_frame->client_width(), m_frame->client_height(), "Unreleased textures: " + std::to_string(num_dirty_textures)); m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), "Texture memory: " + std::to_string(texture_memory_size) + "M"); - m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), "Flush requests: " + std::to_string(num_flushes) + " (" + std::to_string(cache_miss_ratio) + "% hard faults)"); + m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %d (%d%% hard faults, %d mispedictions)", num_flushes, cache_miss_ratio, num_mispredict)); } m_frame->flip(m_context); diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index f88ceb90cb..33cf63b34f 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -654,10 +654,13 @@ namespace gl m_temporary_surfaces.resize(0); } - u32 create_temporary_subresource_impl(u32 src_id, GLenum sized_internal_fmt, GLenum dst_type, u16 x, u16 y, u16 width, u16 height, bool copy = true) + u32 create_temporary_subresource_impl(u32 src_id, GLenum sized_internal_fmt, GLenum dst_type, u32 gcm_format, u16 x, u16 y, u16 width, u16 height, bool copy = true) { u32 dst_id = 0; + if (sized_internal_fmt == GL_NONE) + sized_internal_fmt = gl::get_sized_internal_format(gcm_format); + GLenum ifmt; glBindTexture(GL_TEXTURE_2D, src_id); glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_INTERNAL_FORMAT, (GLint*)&ifmt); @@ -702,6 +705,13 @@ namespace gl } } + if (ifmt != sized_internal_fmt) + { + err_once("GL format mismatch (data cast?). Sized ifmt=0x%X vs Src ifmt=0x%X", sized_internal_fmt, ifmt); + //Apply base component map onto the new texture if a data cast has been done + apply_component_mapping_flags(dst_type, gcm_format, rsx::texture_create_flags::default_component_order); + } + return dst_id; } @@ -764,20 +774,18 @@ namespace gl u32 create_temporary_subresource_view(void*&, u32* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h) override { - const GLenum ifmt = gl::get_sized_internal_format(gcm_format); - return create_temporary_subresource_impl(*src, ifmt, GL_TEXTURE_2D, x, y, w, h); + return create_temporary_subresource_impl(*src, GL_NONE, GL_TEXTURE_2D, gcm_format, x, y, w, h); } u32 create_temporary_subresource_view(void*&, gl::texture* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h) override { if (auto as_rtt = dynamic_cast(src)) { - return create_temporary_subresource_impl(src->id(), (GLenum)as_rtt->get_compatible_internal_format(), GL_TEXTURE_2D, x, y, w, h); + return create_temporary_subresource_impl(src->id(), (GLenum)as_rtt->get_compatible_internal_format(), GL_TEXTURE_2D, gcm_format, x, y, w, h); } else { - const GLenum ifmt = gl::get_sized_internal_format(gcm_format); - return create_temporary_subresource_impl(src->id(), ifmt, GL_TEXTURE_2D, x, y, w, h); + return create_temporary_subresource_impl(src->id(), GL_NONE, GL_TEXTURE_2D, gcm_format, x, y, w, h); } } @@ -820,8 +828,7 @@ namespace gl u32 generate_atlas_from_images(void*&, u32 gcm_format, u16 width, u16 height, const std::vector& sections_to_copy) override { - const GLenum ifmt = gl::get_sized_internal_format(gcm_format); - auto result = create_temporary_subresource_impl(sections_to_copy.front().src, ifmt, GL_TEXTURE_2D, 0, 0, width, height, false); + auto result = create_temporary_subresource_impl(sections_to_copy.front().src, GL_NONE, GL_TEXTURE_2D, gcm_format, 0, 0, width, height, false); for (const auto ®ion : sections_to_copy) { @@ -970,6 +977,35 @@ namespace gl glTextureBarrierNV(); } + bool render_target_format_is_compatible(gl::texture* tex, u32 gcm_format) override + { + if (auto as_rtt = dynamic_cast(tex)) + { + auto ifmt = as_rtt->get_compatible_internal_format(); + switch (gcm_format) + { + default: + //TODO + err_once("Format incompatibility detected, reporting failure to force data copy (GL_INTERNAL_FORMAT=0x%X, GCM_FORMAT=0x%X)", (u32)ifmt, gcm_format); + return false; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + return (ifmt == gl::texture::internal_format::rgba16f); + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + return (ifmt == gl::texture::internal_format::rgba32f); + case CELL_GCM_TEXTURE_X32_FLOAT: + return (ifmt == gl::texture::internal_format::r32f); + case CELL_GCM_TEXTURE_R5G6B5: + return (ifmt == gl::texture::internal_format::r5g6b5); + case CELL_GCM_TEXTURE_DEPTH24_D8: + return (ifmt == gl::texture::internal_format::depth24_stencil8 || ifmt == gl::texture::internal_format::depth32f_stencil8); + case CELL_GCM_TEXTURE_A8R8G8B8: + return (ifmt == gl::texture::internal_format::rgba8 || ifmt == gl::texture::internal_format::depth24_stencil8 || ifmt == gl::texture::internal_format::depth32f_stencil8); + } + } + + fmt::throw_exception("Format comparison for non-rendertargets is not implemented" HERE); + } + public: texture_cache() {} diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 9483bc2c5e..d36faaf7c3 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -3113,15 +3113,16 @@ void VKGSRender::flip(int buffer) m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), "draw call execution: " + std::to_string(m_draw_time) + "us"); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), "submit and flip: " + std::to_string(m_flip_time) + "us"); - auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count(); - auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024); - auto tmp_texture_memory_size = m_texture_cache.get_temporary_memory_in_use() / (1024 * 1024); - auto num_flushes = m_texture_cache.get_num_flush_requests(); - auto cache_miss_ratio = (u32)ceil(m_texture_cache.get_cache_miss_ratio() * 100); + const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count(); + const auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024); + const auto tmp_texture_memory_size = m_texture_cache.get_temporary_memory_in_use() / (1024 * 1024); + const auto num_flushes = m_texture_cache.get_num_flush_requests(); + const auto num_mispredict = m_texture_cache.get_num_cache_mispredictions(); + const auto cache_miss_ratio = (u32)ceil(m_texture_cache.get_cache_miss_ratio() * 100); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 126, direct_fbo->width(), direct_fbo->height(), "Unreleased textures: " + std::to_string(num_dirty_textures)); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), "Texture cache memory: " + std::to_string(texture_memory_size) + "M"); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), "Temporary texture memory: " + std::to_string(tmp_texture_memory_size) + "M"); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), "Flush requests: " + std::to_string(num_flushes) + " (" + std::to_string(cache_miss_ratio) + "% hard faults)"); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %d (%d%% hard faults, %d mispedictions)", num_flushes, cache_miss_ratio, num_mispredict)); } vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, present_layout, subres); diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 8425a797f1..5d6e000ebe 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -597,8 +597,18 @@ namespace vk w, h, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, source->info.flags)); + VkComponentMapping view_swizzle = source->native_component_map; + if (dst_format != source->info.format) + { + //This is a data cast operation + //Use native mapping for the new type + //TODO: Also reapply the view swizzle + const auto remap = get_component_mapping(gcm_format); + view_swizzle = { remap[1], remap[2], remap[3], remap[0] }; + } + VkImageSubresourceRange view_range = { aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 1, 0, 1 }; - view.reset(new vk::image_view(*vk::get_current_renderer(), image->value, view_type, dst_format, source->native_component_map, view_range)); + view.reset(new vk::image_view(*vk::get_current_renderer(), image->value, view_type, dst_format, view_swizzle, view_range)); if (copy) { @@ -983,6 +993,30 @@ namespace vk vk::insert_texture_barrier(cmd, tex); } + bool render_target_format_is_compatible(vk::image* tex, u32 gcm_format) override + { + auto vk_format = tex->info.format; + switch (gcm_format) + { + default: + //TODO + err_once("Format incompatibility detected, reporting failure to force data copy (VK_FORMAT=0x%X, GCM_FORMAT=0x%X)", (u32)vk_format, gcm_format); + return false; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + return (vk_format == VK_FORMAT_R16G16B16A16_SFLOAT); + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + return (vk_format == VK_FORMAT_R32G32B32A32_SFLOAT); + case CELL_GCM_TEXTURE_X32_FLOAT: + return (vk_format == VK_FORMAT_R32_SFLOAT); + case CELL_GCM_TEXTURE_R5G6B5: + return (vk_format == VK_FORMAT_R5G6B5_UNORM_PACK16); + case CELL_GCM_TEXTURE_DEPTH24_D8: + return (vk_format == VK_FORMAT_D24_UNORM_S8_UINT || vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT); + case CELL_GCM_TEXTURE_A8R8G8B8: + return (vk_format == VK_FORMAT_B8G8R8A8_UNORM || vk_format == VK_FORMAT_D24_UNORM_S8_UINT || vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT); + } + } + public: struct vk_blit_op_result : public blit_op_result