diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index f997002a78..57bd417e6f 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -1,6 +1,7 @@ -#pragma once +#pragma once #include "Utilities/GSL.h" +#include "Emu/Memory/vm.h" #include "../GCM.h" #include @@ -91,8 +92,10 @@ namespace rsx struct render_target_descriptor { u64 last_use_tag = 0; // tag indicating when this block was last confirmed to have been written to + u32 tag_address = 0; bool dirty = false; + bool needs_tagging = false; image_storage_type old_contents = nullptr; rsx::surface_antialiasing read_aa_mode = rsx::surface_antialiasing::center_1_sample; @@ -116,6 +119,33 @@ namespace rsx write_aa_mode = read_aa_mode = rsx::surface_antialiasing::center_1_sample; } + void tag() + { + auto ptr = vm::get_super_ptr>(tag_address); + *ptr = tag_address; + + needs_tagging = false; + } + + bool test() + { + if (needs_tagging && dirty) + { + // TODO + LOG_ERROR(RSX, "Resource used before memory initialization"); + return false; + } + + auto ptr = vm::get_super_ptr>(tag_address); + return (*ptr == tag_address); + } + + void queue_tag(u32 address) + { + tag_address = address; + needs_tagging = true; + } + void on_write(u64 write_tag = 0) { if (write_tag) @@ -124,6 +154,11 @@ namespace rsx last_use_tag = write_tag; } + if (needs_tagging) + { + tag(); + } + read_aa_mode = write_aa_mode; dirty = false; old_contents = nullptr; @@ -353,7 +388,7 @@ namespace rsx invalidated_resources.erase(It); new_surface = Traits::get(new_surface_storage); - Traits::invalidate_surface_contents(command_list, new_surface, contents_to_copy); + Traits::invalidate_surface_contents(address, command_list, new_surface, contents_to_copy); Traits::prepare_rtt_for_drawing(command_list, new_surface); break; } @@ -438,7 +473,7 @@ namespace rsx new_surface = Traits::get(new_surface_storage); Traits::prepare_ds_for_drawing(command_list, new_surface); - Traits::invalidate_surface_contents(command_list, new_surface, contents_to_copy); + Traits::invalidate_surface_contents(address, command_list, new_surface, contents_to_copy); break; } } @@ -1080,6 +1115,7 @@ namespace rsx for (auto &entry : e.overlapping_set) { + // GPU-side contents changed entry._ref->dirty = true; } } diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 2ba0e4c3b7..772c18dd40 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -372,7 +372,17 @@ namespace rsx return true; } + void tag_framebuffer(u32 texaddr) + { + auto ptr = vm::get_super_ptr>(texaddr); + *ptr = texaddr; + } + bool test_framebuffer(u32 texaddr) + { + auto ptr = vm::get_super_ptr>(texaddr); + return *ptr == texaddr; + } /** * Section invalidation @@ -1712,7 +1722,7 @@ namespace rsx //TODO: When framebuffer Y compression is properly handled, this section can be removed. A more accurate framebuffer storage check exists below this block if (auto texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr)) { - if (test_framebuffer(texaddr)) + if (texptr->test()) { return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, tex_width, tex_height, depth, tex_pitch, extended_dimension, false, tex.remap(), @@ -1727,7 +1737,7 @@ namespace rsx if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr)) { - if (test_framebuffer(texaddr)) + if (texptr->test()) { return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, tex_width, tex_height, depth, tex_pitch, extended_dimension, true, tex.remap(), @@ -1755,7 +1765,7 @@ namespace rsx const auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, tex_width, tex_height, tex_pitch); if (rsc.surface) { - if (!test_framebuffer(rsc.base_address)) + if (!rsc.surface->test()) { m_rtts.invalidate_surface_address(rsc.base_address, rsc.is_depth_surface); invalidate_address(rsc.base_address, invalidation_cause::read, std::forward(extras)...); @@ -1936,14 +1946,14 @@ namespace rsx src_is_render_target = false; } - if (src_is_render_target && !test_framebuffer(src_subres.base_address)) + if (src_is_render_target && !src_subres.surface->test()) { m_rtts.invalidate_surface_address(src_subres.base_address, src_subres.is_depth_surface); invalidate_address(src_subres.base_address, invalidation_cause::read, std::forward(extras)...); src_is_render_target = false; } - if (dst_is_render_target && !test_framebuffer(dst_subres.base_address)) + if (dst_is_render_target && !dst_subres.surface->test()) { m_rtts.invalidate_surface_address(dst_subres.base_address, dst_subres.is_depth_surface); invalidate_address(dst_subres.base_address, invalidation_cause::read, std::forward(extras)...); @@ -2451,18 +2461,6 @@ namespace rsx return read_only_tex_invalidate; } - void tag_framebuffer(u32 texaddr) - { - auto ptr = vm::get_super_ptr>(texaddr); - *ptr = texaddr; - } - - bool test_framebuffer(u32 texaddr) - { - auto ptr = vm::get_super_ptr>(texaddr); - return *ptr == texaddr; - } - /** * Per-frame statistics diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h index 5a3b2421c8..49259758f6 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include #include @@ -130,6 +130,7 @@ struct render_target_traits static void invalidate_surface_contents( + u32, ID3D12GraphicsCommandList*, ID3D12Resource*, ID3D12Resource*) {} diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 671fff80cb..ffb8f63ac2 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -173,14 +173,10 @@ namespace void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool skip_reading) { - if (m_framebuffer_state_contested && (m_framebuffer_contest_type != context)) - { - // Clear commands affect contested memory - m_rtts_dirty = true; - } - - if (m_draw_fbo && !m_rtts_dirty) + if (m_current_framebuffer_context == context && !m_rtts_dirty && m_draw_fbo) { + // Fast path + // Framebuffer usage has not changed, framebuffer exists and config regs have not changed set_scissor(); return; } @@ -257,7 +253,6 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk rtt->tile = find_tile(color_offsets[i], color_locations[i]); rtt->write_aa_mode = layout.aa_mode; m_gl_texture_cache.notify_surface_changed(m_surface_info[i].address); - m_gl_texture_cache.tag_framebuffer(m_surface_info[i].address); } else { @@ -286,7 +281,6 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk ds->write_aa_mode = layout.aa_mode; m_gl_texture_cache.notify_surface_changed(layout.zeta_address); - m_gl_texture_cache.tag_framebuffer(layout.zeta_address); } else { diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index cd1d124b07..f238745606 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -147,7 +147,7 @@ struct gl_render_target_traits static std::unique_ptr create_new_surface( - u32 /*address*/, + u32 address, rsx::surface_color_format surface_color_format, size_t width, size_t height, @@ -165,6 +165,7 @@ struct gl_render_target_traits result->set_native_component_layout(native_layout); result->old_contents = old_surface; + result->queue_tag(address); result->set_cleared(false); result->update_surface(); return result; @@ -172,7 +173,7 @@ struct gl_render_target_traits static std::unique_ptr create_new_surface( - u32 /*address*/, + u32 address, rsx::surface_depth_format surface_depth_format, size_t width, size_t height, @@ -192,6 +193,7 @@ struct gl_render_target_traits result->set_native_component_layout(native_layout); result->old_contents = old_surface; + result->queue_tag(address); result->set_cleared(false); result->update_surface(); return result; @@ -214,11 +216,12 @@ struct gl_render_target_traits static void prepare_ds_for_sampling(void *, gl::render_target*) {} static - void invalidate_surface_contents(void *, gl::render_target *surface, gl::render_target* old_surface) + void invalidate_surface_contents(u32 address, void *, gl::render_target *surface, gl::render_target* old_surface) { - surface->set_cleared(false); surface->old_contents = old_surface; surface->reset_aa_mode(); + surface->queue_tag(address); + surface->set_cleared(false); } static diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 58b25737a3..e91ea67b05 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1124,7 +1124,7 @@ namespace rsx framebuffer_status_valid = false; m_framebuffer_state_contested = false; - m_framebuffer_contest_type = context; + m_current_framebuffer_context = context; if (layout.width == 0 || layout.height == 0) { @@ -1159,106 +1159,146 @@ namespace rsx // Restriction is that the dimensions are powers of 2. Also, dimensions are passed via log2w and log2h entries const auto required_zeta_pitch = std::max((u32)(layout.depth_format == rsx::surface_depth_format::z16 ? layout.width * 2 : layout.width * 4) * aa_factor_u, 64u); const auto required_color_pitch = std::max((u32)rsx::utility::get_packed_pitch(layout.color_format, layout.width) * aa_factor_u, 64u); - const bool color_write_enabled = (context & rsx::framebuffer_creation_context::context_clear_color) ? true : rsx::method_registers.color_write_enabled(); - const bool depth_write_enabled = (context & rsx::framebuffer_creation_context::context_clear_depth) ? true : rsx::method_registers.depth_write_enabled(); - const auto lg2w = rsx::method_registers.surface_log2_width(); - const auto lg2h = rsx::method_registers.surface_log2_height(); - const auto clipw_log2 = (u32)floor(log2(layout.width)); - const auto cliph_log2 = (u32)floor(log2(layout.height)); - + const bool color_write_enabled = rsx::method_registers.color_write_enabled(); + const bool depth_write_enabled = rsx::method_registers.depth_write_enabled(); const bool stencil_test_enabled = layout.depth_format == rsx::surface_depth_format::z24s8 && rsx::method_registers.stencil_test_enabled(); const bool depth_test_enabled = rsx::method_registers.depth_test_enabled(); - const bool ignore_depth = (context == rsx::framebuffer_creation_context::context_clear_color); - const bool ignore_color = (context == rsx::framebuffer_creation_context::context_clear_depth); - if (layout.zeta_address) + bool depth_buffer_unused = false, color_buffer_unused = false; + + switch (context) { - if (!depth_test_enabled && - !stencil_test_enabled && - layout.target != rsx::surface_target::none) + case rsx::framebuffer_creation_context::context_clear_all: + break; + case rsx::framebuffer_creation_context::context_clear_depth: + color_buffer_unused = true; + break; + case rsx::framebuffer_creation_context::context_clear_color: + depth_buffer_unused = true; + break; + case rsx::framebuffer_creation_context::context_draw: + // NOTE: As with all other hw, depth/stencil writes involve the corresponding depth/stencil test, i.e No test = No write + color_buffer_unused = !color_write_enabled || layout.target == rsx::surface_target::none; + depth_buffer_unused = !depth_test_enabled && !stencil_test_enabled; + m_framebuffer_state_contested = color_buffer_unused || depth_buffer_unused; + break; + default: + fmt::throw_exception("Unknown framebuffer context 0x%x" HERE, (u32)context); + } + + auto check_swizzled_render = [&]() + { + // Packed rasterization with optimal memory layout + // Pitch has to be packed for all active render targets, i.e 64 + // Formats also seemingly need matching depth and color pitch if both are active + + if (color_buffer_unused) { - // Disable depth buffer if depth testing is not enabled, unless a clear command is targeting the depth buffer - const bool is_depth_clear = !!(context & rsx::framebuffer_creation_context::context_clear_depth); - if (!is_depth_clear) - { - layout.zeta_address = 0; - m_framebuffer_state_contested = true; - } + // Check only depth + return (layout.zeta_pitch == 64); } - - if (layout.zeta_address && layout.zeta_pitch < required_zeta_pitch) + else if (depth_buffer_unused) { - if (lg2w < clipw_log2 || lg2h < cliph_log2) + // Check only color + for (const auto& index : rsx::utility::get_rtt_indexes(layout.target)) { - // Cannot fit - layout.zeta_address = 0; - - if (lg2w > 0 || lg2h > 0) + if (layout.color_pitch[index] != 64) { - // Something was actually declared for the swizzle context dimensions - LOG_WARNING(RSX, "Invalid swizzled context depth surface dims, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, layout.width, layout.height); + return false; } } - else - { - LOG_TRACE(RSX, "Swizzled context depth surface, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, layout.width, layout.height); - } + + return true; } - if (layout.zeta_address) + if (required_color_pitch != required_zeta_pitch) { - // Still exists? Unlikely to get discarded - layout.actual_zeta_pitch = std::max(layout.zeta_pitch, required_zeta_pitch); + // Both depth and color exist, but pixel size differs + return false; } + else + { + // Qualifies, but only if all the pitch values are disabled (64) + // Both depth and color are assumed to exist in this case, unless proven otherwise + if (layout.zeta_pitch != 64) + { + return false; + } + + for (const auto& index : rsx::utility::get_rtt_indexes(layout.target)) + { + if (layout.color_pitch[index] != 64) + { + return false; + } + } + + return true; + } + }; + + // Swizzled render does tight packing of bytes + const bool packed_render = check_swizzled_render(); + + if (depth_buffer_unused) + { + layout.zeta_address = 0; + } + else if (layout.zeta_pitch < required_zeta_pitch && !packed_render) + { + layout.zeta_address = 0; + } + else + { + // Still exists? Unlikely to get discarded + layout.actual_zeta_pitch = std::max(layout.zeta_pitch, required_zeta_pitch); } for (const auto &index : rsx::utility::get_rtt_indexes(layout.target)) { - if (layout.color_pitch[index] < required_color_pitch) + if (color_buffer_unused) { - if (lg2w < clipw_log2 || lg2h < cliph_log2) - { - layout.color_addresses[index] = 0; - - if (lg2w > 0 || lg2h > 0) - { - // Something was actually declared for the swizzle context dimensions - LOG_WARNING(RSX, "Invalid swizzled context color surface dims, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, layout.width, layout.height); - } - } - else - { - LOG_TRACE(RSX, "Swizzled context color surface, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, layout.width, layout.height); - } + layout.color_addresses[index] = 0; + continue; } - if (layout.zeta_address && (layout.color_addresses[index] == layout.zeta_address)) + if (layout.color_pitch[index] < required_color_pitch && !packed_render) { - LOG_TRACE(RSX, "Framebuffer at 0x%X has aliasing color/depth targets, color_index=%d, zeta_pitch = %d, color_pitch=%d, context=%d", + // Unlike the depth buffer, when given a color target we know it is intended to be rendered to + LOG_ERROR(RSX, "Framebuffer setup error: Color target failed pitch check, Pitch=[%d, %d, %d, %d] + %d, target=%d, context=%d", + layout.color_pitch[0], layout.color_pitch[1], layout.color_pitch[2], layout.color_pitch[3], + layout.zeta_pitch, (u32)layout.target, (u32)context); + + // Do not remove this buffer for now as it implies something went horribly wrong anyway + break; + } + + if (layout.color_addresses[index] == layout.zeta_address) + { + LOG_WARNING(RSX, "Framebuffer at 0x%X has aliasing color/depth targets, color_index=%d, zeta_pitch = %d, color_pitch=%d, context=%d", layout.zeta_address, index, layout.zeta_pitch, layout.color_pitch[index], (u32)context); + m_framebuffer_state_contested = true; + // TODO: Research clearing both depth AND color // TODO: If context is creation_draw, deal with possibility of a lost buffer clear - if (!ignore_depth && - (ignore_color || depth_test_enabled || stencil_test_enabled || - (!color_write_enabled && depth_write_enabled))) + if (depth_test_enabled || stencil_test_enabled || (!color_write_enabled && depth_write_enabled)) { // Use address for depth data layout.color_addresses[index] = 0; + continue; } else { // Use address for color data layout.zeta_address = 0; - m_framebuffer_state_contested = true; } } - if (layout.color_addresses[index]) - { - layout.actual_color_pitch[index] = std::max(layout.color_pitch[index], required_color_pitch); - framebuffer_status_valid = true; - } + verify(HERE), layout.color_addresses[index]; + + layout.actual_color_pitch[index] = std::max(layout.color_pitch[index], required_color_pitch); + framebuffer_status_valid = true; } if (!framebuffer_status_valid && !layout.zeta_address) diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index ab68cebaf9..7fec5a58bd 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -461,7 +461,7 @@ namespace rsx bool m_textures_dirty[16]; bool m_vertex_textures_dirty[4]; bool m_framebuffer_state_contested = false; - rsx::framebuffer_creation_context m_framebuffer_contest_type = rsx::framebuffer_creation_context::context_draw; + rsx::framebuffer_creation_context m_current_framebuffer_context = rsx::framebuffer_creation_context::context_draw; u32 m_graphics_state = 0; u64 ROP_sync_timestamp = 0; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 13380b0f93..fec90ddcd1 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2831,14 +2831,10 @@ void VKGSRender::open_command_buffer() void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) { - if (m_framebuffer_state_contested && (m_framebuffer_contest_type != context)) - { - // Clear commands affect contested memory - m_rtts_dirty = true; - } - - if (m_draw_fbo && !m_rtts_dirty) + if (m_current_framebuffer_context == context && !m_rtts_dirty && m_draw_fbo) { + // Fast path + // Framebuffer usage has not changed, framebuffer exists and config regs have not changed set_scissor(); return; } @@ -2939,7 +2935,6 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) surface->write_aa_mode = layout.aa_mode; m_texture_cache.notify_surface_changed(layout.color_addresses[index]); - m_texture_cache.tag_framebuffer(layout.color_addresses[index]); m_draw_buffers.push_back(index); } } @@ -2955,7 +2950,6 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) ds->write_aa_mode = layout.aa_mode; m_texture_cache.notify_surface_changed(layout.zeta_address); - m_texture_cache.tag_framebuffer(layout.zeta_address); } if (g_cfg.video.write_color_buffers) diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index c203483cfa..75bce80931 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -79,7 +79,7 @@ namespace rsx using download_buffer_object = void*; static std::unique_ptr create_new_surface( - u32 /*address*/, + u32 address, surface_color_format format, size_t width, size_t height, vk::render_target* old_surface, @@ -106,6 +106,7 @@ namespace rsx rtt->native_pitch = (u16)width * get_format_block_size_in_bytes(format); rtt->surface_width = (u16)width; rtt->surface_height = (u16)height; + rtt->queue_tag(address); rtt->dirty = true; if (old_surface != nullptr && old_surface->info.format == requested_format) @@ -115,7 +116,7 @@ namespace rsx } static std::unique_ptr create_new_surface( - u32 /* address */, + u32 address, surface_depth_format format, size_t width, size_t height, vk::render_target* old_surface, @@ -151,6 +152,7 @@ namespace rsx ds->attachment_aspect_flag = range.aspectMask; ds->surface_width = (u16)width; ds->surface_height = (u16)height; + ds->queue_tag(address); ds->dirty = true; if (old_surface != nullptr && old_surface->info.format == requested_format) @@ -202,11 +204,12 @@ namespace rsx } static - void invalidate_surface_contents(vk::command_buffer* /*pcmd*/, vk::render_target *surface, vk::render_target *old_surface) + void invalidate_surface_contents(u32 address, vk::command_buffer* /*pcmd*/, vk::render_target *surface, vk::render_target *old_surface) { surface->old_contents = old_surface; - surface->dirty = true; surface->reset_aa_mode(); + surface->queue_tag(address); + surface->dirty = true; } static diff --git a/rpcs3/Emu/RSX/rsx_decode.h b/rpcs3/Emu/RSX/rsx_decode.h index c8b1681d40..3c88cb16e9 100644 --- a/rpcs3/Emu/RSX/rsx_decode.h +++ b/rpcs3/Emu/RSX/rsx_decode.h @@ -2255,7 +2255,7 @@ struct registers_decoder if ((s32)val < 0) { - return 1. / (((val & ~(1<<31)) / 1048576.f) - 2048.f); + return 1.f / (((val & ~(1<<31)) / 1048576.f) - 2048.f); } return 1048576.f / val; @@ -2293,7 +2293,7 @@ struct registers_decoder if ((s32)val < 0) { - return 1. / (((val & ~(1<<31)) / 1048576.f) - 2048.f); + return 1.f / (((val & ~(1<<31)) / 1048576.f) - 2048.f); } return 1048576.f / val;