diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index a1dc06c1de..ca8accb87f 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -59,11 +59,13 @@ namespace rsx rsx::address_range m_depth_stencil_memory_range; bool m_invalidate_on_write = false; + bool m_skip_write_updates = false; public: std::pair m_bound_render_targets_config = {}; std::array, 4> m_bound_render_targets = {}; std::pair m_bound_depth_stencil = {}; + u8 m_bound_buffers_count = 0; // List of sections derived from a section that has been split and invalidated std::vector orphaned_surfaces; @@ -602,6 +604,7 @@ namespace rsx cache_tag = rsx::get_shared_tag(); m_invalidate_on_write = (antialias != rsx::surface_antialiasing::center_1_sample); + m_bound_buffers_count = 0; // Make previous RTTs sampleable for (int i = m_bound_render_targets_config.first, count = 0; @@ -628,7 +631,8 @@ namespace rsx bind_address_as_render_targets(command_list, surface_addresses[surface_index], color_format, antialias, clip_width, clip_height, surface_pitch[surface_index], std::forward(extra_params)...)); - m_bound_render_targets_config.second++; + ++m_bound_render_targets_config.second; + ++m_bound_buffers_count; } } else @@ -647,6 +651,8 @@ namespace rsx m_bound_depth_stencil = std::make_pair(address_z, bind_address_as_depth_stencil(command_list, address_z, depth_format, antialias, clip_width, clip_height, zeta_pitch, std::forward(extra_params)...)); + + ++m_bound_buffers_count; } else { @@ -654,6 +660,11 @@ namespace rsx } } + u8 get_color_surface_count() const + { + return m_bound_render_targets_config.second; + } + surface_type get_surface_at(u32 address) { auto It = m_render_targets_storage.find(address); @@ -866,74 +877,61 @@ namespace rsx return result; } - void on_write(bool color, bool z, u32 address = 0) + void on_write(const bool* color, bool z) { - if (!address) + if (write_tag == cache_tag && m_skip_write_updates) { - if (write_tag == cache_tag) - { - if (m_invalidate_on_write) - { - if (color) - { - for (int i = m_bound_render_targets_config.first, count = 0; - count < m_bound_render_targets_config.second; - ++i, ++count) - { - m_bound_render_targets[i].second->on_invalidate_children(); - } - } + // Nothing to do + return; + } - if (z && m_bound_depth_stencil.first) - { - m_bound_depth_stencil.second->on_invalidate_children(); - } - } + write_tag = cache_tag; + m_skip_write_updates = false; + int tagged = 0; - return; - } - else + // Tag surfaces + if (color) + { + for (int i = m_bound_render_targets_config.first, count = 0; + count < m_bound_render_targets_config.second; + ++i, ++count) { - write_tag = cache_tag; - } + if (!color[i]) + continue; - // Tag all available surfaces - if (color) - { - for (int i = m_bound_render_targets_config.first, count = 0; - count < m_bound_render_targets_config.second; - ++i, ++count) + auto& surface = m_bound_render_targets[i].second; + if (surface->last_use_tag != write_tag) { m_bound_render_targets[i].second->on_write(write_tag); } - } + else if (m_invalidate_on_write) + { + m_bound_render_targets[i].second->on_invalidate_children(); + } - if (z && m_bound_depth_stencil.first) - { - m_bound_depth_stencil.second->on_write(write_tag); + ++tagged; } } - else + + if (z && m_bound_depth_stencil.first) { - if (color) - { - for (int i = m_bound_render_targets_config.first, count = 0; - count < m_bound_render_targets_config.second; - ++i, ++count) - { - if (m_bound_render_targets[i].first != address) - { - continue; - } - - m_bound_render_targets[i].second->on_write(write_tag); - } - } - - if (z && m_bound_depth_stencil.first == address) + auto& surface = m_bound_depth_stencil.second; + if (surface->last_use_tag != write_tag) { m_bound_depth_stencil.second->on_write(write_tag); } + else if (m_invalidate_on_write) + { + m_bound_depth_stencil.second->on_invalidate_children(); + } + + ++tagged; + } + + if (!m_invalidate_on_write && tagged == m_bound_buffers_count) + { + // Skip any further updates as all active surfaces have been updated + m_skip_write_updates = true; } } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 668c26bedb..cbe2bc02b5 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -287,10 +287,10 @@ void D3D12GSRender::load_program() prop.Rasterization.FrontCounterClockwise = get_front_face_ccw(rsx::method_registers.front_face_mode()); UINT8 mask = 0; - mask |= rsx::method_registers.color_mask_r() ? D3D12_COLOR_WRITE_ENABLE_RED : 0; - mask |= rsx::method_registers.color_mask_g() ? D3D12_COLOR_WRITE_ENABLE_GREEN : 0; - mask |= rsx::method_registers.color_mask_b() ? D3D12_COLOR_WRITE_ENABLE_BLUE : 0; - mask |= rsx::method_registers.color_mask_a() ? D3D12_COLOR_WRITE_ENABLE_ALPHA : 0; + mask |= rsx::method_registers.color_mask_r(0) ? D3D12_COLOR_WRITE_ENABLE_RED : 0; + mask |= rsx::method_registers.color_mask_g(0) ? D3D12_COLOR_WRITE_ENABLE_GREEN : 0; + mask |= rsx::method_registers.color_mask_b(0) ? D3D12_COLOR_WRITE_ENABLE_BLUE : 0; + mask |= rsx::method_registers.color_mask_a(0) ? D3D12_COLOR_WRITE_ENABLE_ALPHA : 0; for (unsigned i = 0; i < prop.numMRT; i++) prop.Blend.RenderTarget[i].RenderTargetWriteMask = mask; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp index 554f1f8568..47e6a00844 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp @@ -177,7 +177,8 @@ void D3D12GSRender::prepare_render_targets(ID3D12GraphicsCommandList *copycmdlis rsx::method_registers.clear_color_a() / 255.f, }; - const auto layout = get_framebuffer_layout(rsx::framebuffer_creation_context::context_draw); + rsx::framebuffer_layout layout; + get_framebuffer_layout(rsx::framebuffer_creation_context::context_draw, layout); if (!framebuffer_status_valid) return; diff --git a/rpcs3/Emu/RSX/GL/GLExecutionState.h b/rpcs3/Emu/RSX/GL/GLExecutionState.h index 25040492b6..483fded64e 100644 --- a/rpcs3/Emu/RSX/GL/GLExecutionState.h +++ b/rpcs3/Emu/RSX/GL/GLExecutionState.h @@ -68,6 +68,15 @@ namespace gl return (found->second == test); } + inline bool test_propertyi(GLenum property, u32 test, GLint index) const + { + auto found = indexed_properties.find(property); + if (found == indexed_properties.end()) + return false; + + return found->second[index] == test; + } + void depth_func(GLenum func) { if (!test_property(GL_DEPTH_FUNC, func)) @@ -115,16 +124,16 @@ namespace gl } } - void color_mask(u32 mask) + void color_maski(GLint index, u32 mask) { - if (!test_property(GL_COLOR_WRITEMASK, mask)) + if (!test_propertyi(GL_COLOR_WRITEMASK, mask, index)) { - glColorMask(((mask & 0x10) ? 1 : 0), ((mask & 0x20) ? 1 : 0), ((mask & 0x40) ? 1 : 0), ((mask & 0x80) ? 1 : 0)); - properties[GL_COLOR_WRITEMASK] = mask; + glColorMaski(index, ((mask & 0x10) ? 1 : 0), ((mask & 0x20) ? 1 : 0), ((mask & 0x40) ? 1 : 0), ((mask & 0x80) ? 1 : 0)); + indexed_properties[GL_COLOR_WRITEMASK][index] = mask; } } - void color_mask(bool r, bool g, bool b, bool a) + void color_maski(GLint index, bool r, bool g, bool b, bool a) { u32 mask = 0; if (r) mask |= 0x10; @@ -132,7 +141,7 @@ namespace gl if (b) mask |= 0x40; if (a) mask |= 0x80; - color_mask(mask); + color_maski(index, mask); } void clear_color(u8 r, u8 g, u8 b, u8 a) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index eb301340d6..0082f71cf7 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -625,7 +625,7 @@ void GLGSRender::end() } } while (rsx::method_registers.current_draw_clause.next()); - m_rtts.on_write(rsx::method_registers.color_write_enabled(), rsx::method_registers.depth_write_enabled()); + m_rtts.on_write(m_framebuffer_layout.color_write_enabled.data(), m_framebuffer_layout.zeta_write_enabled); m_attrib_ring_buffer->notify(); m_index_ring_buffer->notify(); @@ -1098,6 +1098,7 @@ void GLGSRender::clear_surface(u32 arg) rsx::method_registers.scissor_width() < rsx::method_registers.surface_clip_width() || rsx::method_registers.scissor_height() < rsx::method_registers.surface_clip_height(); + bool update_color = false, update_z = false; rsx::surface_depth_format surface_depth_format = rsx::method_registers.surface_depth_fmt(); if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); arg & 0x3) @@ -1150,7 +1151,7 @@ void GLGSRender::clear_surface(u32 arg) if (require_mem_load) ds->write_barrier(cmd); // Memory has been initialized - m_rtts.on_write(false, true); + update_z = true; } } @@ -1177,25 +1178,29 @@ void GLGSRender::clear_surface(u32 arg) u8 clear_g = rsx::method_registers.clear_color_g(); u8 clear_b = rsx::method_registers.clear_color_b(); - gl_state.color_mask(colormask); gl_state.clear_color(clear_r, clear_g, clear_b, clear_a); - mask |= GLenum(gl::buffers::color); - for (auto &rtt : m_rtts.m_bound_render_targets) + for (u8 index = m_rtts.m_bound_render_targets_config.first, count = 0; + count < m_rtts.m_bound_render_targets_config.second; + ++count, ++index) { - if (const auto address = rtt.first) - { - if (require_mem_load) rtt.second->write_barrier(cmd); - m_rtts.on_write(true, false, address); - } + if (require_mem_load) m_rtts.m_bound_render_targets[index].second->write_barrier(cmd); + gl_state.color_maski(count, colormask); } + update_color = true; break; } } } + if (update_color || update_z) + { + const bool write_all_mask[] = { true, true, true, true }; + m_rtts.on_write(update_color ? write_all_mask : nullptr, update_z); + } + glClear(mask); } @@ -1416,18 +1421,22 @@ void GLGSRender::update_draw_state() { m_profiler.start(); - bool color_mask_b = rsx::method_registers.color_mask_b(); - bool color_mask_g = rsx::method_registers.color_mask_g(); - bool color_mask_r = rsx::method_registers.color_mask_r(); - bool color_mask_a = rsx::method_registers.color_mask_a(); - - if (rsx::method_registers.surface_color() == rsx::surface_color_format::g8b8) + for (int index = 0; index < m_rtts.get_color_surface_count(); ++index) { - //Map GB components onto RG - rsx::get_g8b8_r8g8_colormask(color_mask_r, color_mask_g, color_mask_b, color_mask_a); + bool color_mask_b = rsx::method_registers.color_mask_b(index); + bool color_mask_g = rsx::method_registers.color_mask_g(index); + bool color_mask_r = rsx::method_registers.color_mask_r(index); + bool color_mask_a = rsx::method_registers.color_mask_a(index); + + if (rsx::method_registers.surface_color() == rsx::surface_color_format::g8b8) + { + //Map GB components onto RG + rsx::get_g8b8_r8g8_colormask(color_mask_r, color_mask_g, color_mask_b, color_mask_a); + } + + gl_state.color_maski(index, color_mask_r, color_mask_g, color_mask_b, color_mask_a); } - gl_state.color_mask(color_mask_r, color_mask_g, color_mask_b, color_mask_a); gl_state.depth_mask(rsx::method_registers.depth_write_enabled()); gl_state.stencil_mask(rsx::method_registers.stencil_mask()); diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.cpp b/rpcs3/Emu/RSX/GL/GLHelpers.cpp index c7ae7c255a..e27c0207c1 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.cpp +++ b/rpcs3/Emu/RSX/GL/GLHelpers.cpp @@ -468,7 +468,7 @@ namespace gl blit_dst.check(); cmd.drv->clear_color(color); - cmd.drv->color_mask(true, true, true, true); + cmd.drv->color_maski(0, true, true, true, true); glClear(GL_COLOR_BUFFER_BIT); } diff --git a/rpcs3/Emu/RSX/GL/GLProcTable.h b/rpcs3/Emu/RSX/GL/GLProcTable.h index 788c698c89..928131879e 100644 --- a/rpcs3/Emu/RSX/GL/GLProcTable.h +++ b/rpcs3/Emu/RSX/GL/GLProcTable.h @@ -1,4 +1,4 @@ -OPENGL_PROC(PFNGLGENBUFFERSPROC, GenBuffers); +OPENGL_PROC(PFNGLGENBUFFERSPROC, GenBuffers); OPENGL_PROC(PFNGLDELETEBUFFERSPROC, DeleteBuffers); OPENGL_PROC(PFNGLBINDBUFFERPROC, BindBuffer); OPENGL_PROC(PFNGLISBUFFERPROC, IsBuffer); @@ -160,6 +160,8 @@ OPENGL_PROC(PFNGLENABLEIPROC, Enablei); OPENGL_PROC(PFNGLDISABLEIPROC, Disablei); OPENGL_PROC(PFNGLISENABLEDIPROC, IsEnabledi); +OPENGL_PROC(PFNGLCOLORMASKIPROC, ColorMaski); + OPENGL_PROC(PFNGLPRIMITIVERESTARTINDEXPROC, PrimitiveRestartIndex); OPENGL_PROC(PFNGLGETINTEGER64VPROC, GetInteger64v); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 8c568db1eb..a6d0255187 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -142,13 +142,13 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk framebuffer_status_valid = false; m_framebuffer_state_contested = false; - const auto layout = get_framebuffer_layout(context); + get_framebuffer_layout(context, m_framebuffer_layout); if (!framebuffer_status_valid) { return; } - if (m_draw_fbo && layout.ignore_change) + if (m_draw_fbo && m_framebuffer_layout.ignore_change) { // Nothing has changed, we're still using the same framebuffer // Update flags to match current @@ -161,17 +161,17 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk gl::command_context cmd{ gl_state }; m_rtts.prepare_render_target(cmd, - layout.color_format, layout.depth_format, - layout.width, layout.height, - layout.target, layout.aa_mode, - layout.color_addresses, layout.zeta_address, - layout.actual_color_pitch, layout.actual_zeta_pitch); + m_framebuffer_layout.color_format, m_framebuffer_layout.depth_format, + m_framebuffer_layout.width, m_framebuffer_layout.height, + m_framebuffer_layout.target, m_framebuffer_layout.aa_mode, + m_framebuffer_layout.color_addresses, m_framebuffer_layout.zeta_address, + m_framebuffer_layout.actual_color_pitch, m_framebuffer_layout.actual_zeta_pitch); std::array color_targets; GLuint depth_stencil_target; - const u8 color_bpp = get_format_block_size_in_bytes(layout.color_format); - const auto samples = get_format_sample_count(layout.aa_mode); + const u8 color_bpp = get_format_block_size_in_bytes(m_framebuffer_layout.color_format); + const auto samples = get_format_sample_count(m_framebuffer_layout.aa_mode); for (int i = 0; i < rsx::limits::color_buffers_count; ++i) { @@ -187,15 +187,15 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk auto rtt = std::get<1>(m_rtts.m_bound_render_targets[i]); color_targets[i] = rtt->id(); - verify("Pitch mismatch!" HERE), rtt->get_rsx_pitch() == layout.actual_color_pitch[i]; - m_surface_info[i].address = layout.color_addresses[i]; - m_surface_info[i].pitch = layout.actual_color_pitch[i]; - m_surface_info[i].width = layout.width; - m_surface_info[i].height = layout.height; - m_surface_info[i].color_format = layout.color_format; + verify("Pitch mismatch!" HERE), rtt->get_rsx_pitch() == m_framebuffer_layout.actual_color_pitch[i]; + m_surface_info[i].address = m_framebuffer_layout.color_addresses[i]; + m_surface_info[i].pitch = m_framebuffer_layout.actual_color_pitch[i]; + m_surface_info[i].width = m_framebuffer_layout.width; + m_surface_info[i].height = m_framebuffer_layout.height; + m_surface_info[i].color_format = m_framebuffer_layout.color_format; m_surface_info[i].bpp = color_bpp; m_surface_info[i].samples = samples; - m_gl_texture_cache.notify_surface_changed(m_surface_info[i].get_memory_range(layout.aa_factors)); + m_gl_texture_cache.notify_surface_changed(m_surface_info[i].get_memory_range(m_framebuffer_layout.aa_factors)); } else { @@ -215,20 +215,20 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk { auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); depth_stencil_target = ds->id(); - ds->set_depth_render_mode(!layout.depth_float); + ds->set_depth_render_mode(!m_framebuffer_layout.depth_float); - verify("Pitch mismatch!" HERE), std::get<1>(m_rtts.m_bound_depth_stencil)->get_rsx_pitch() == layout.actual_zeta_pitch; + verify("Pitch mismatch!" HERE), std::get<1>(m_rtts.m_bound_depth_stencil)->get_rsx_pitch() == m_framebuffer_layout.actual_zeta_pitch; - m_depth_surface_info.address = layout.zeta_address; - m_depth_surface_info.pitch = layout.actual_zeta_pitch; - m_depth_surface_info.width = layout.width; - m_depth_surface_info.height = layout.height; - m_depth_surface_info.depth_format = layout.depth_format; - m_depth_surface_info.depth_buffer_float = layout.depth_float; - m_depth_surface_info.bpp = (layout.depth_format == rsx::surface_depth_format::z16? 2 : 4); + m_depth_surface_info.address = m_framebuffer_layout.zeta_address; + m_depth_surface_info.pitch = m_framebuffer_layout.actual_zeta_pitch; + m_depth_surface_info.width = m_framebuffer_layout.width; + m_depth_surface_info.height = m_framebuffer_layout.height; + m_depth_surface_info.depth_format = m_framebuffer_layout.depth_format; + m_depth_surface_info.depth_buffer_float = m_framebuffer_layout.depth_float; + m_depth_surface_info.bpp = (m_framebuffer_layout.depth_format == rsx::surface_depth_format::z16? 2 : 4); m_depth_surface_info.samples = samples; - m_gl_texture_cache.notify_surface_changed(m_depth_surface_info.get_memory_range(layout.aa_factors)); + m_gl_texture_cache.notify_surface_changed(m_depth_surface_info.get_memory_range(m_framebuffer_layout.aa_factors)); } else { @@ -252,7 +252,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk m_draw_fbo = &fbo; m_draw_fbo->bind(); - m_draw_fbo->set_extents({ (int)layout.width, (int)layout.height }); + m_draw_fbo->set_extents({ (int)m_framebuffer_layout.width, (int)m_framebuffer_layout.height }); framebuffer_status_valid = true; break; @@ -267,7 +267,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk m_draw_fbo = &m_framebuffer_cache.back(); m_draw_fbo->create(); m_draw_fbo->bind(); - m_draw_fbo->set_extents({ (int)layout.width, (int)layout.height }); + m_draw_fbo->set_extents({ (int)m_framebuffer_layout.width, (int)m_framebuffer_layout.height }); for (int i = 0; i < 4; ++i) { @@ -279,7 +279,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk if (depth_stencil_target) { - if (layout.depth_format == rsx::surface_depth_format::z24s8) + if (m_framebuffer_layout.depth_format == rsx::surface_depth_format::z24s8) { m_draw_fbo->depth_stencil = depth_stencil_target; } @@ -329,7 +329,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk m_gl_texture_cache.clear_ro_tex_invalidate_intr(); - const auto color_format = rsx::internals::surface_color_format_to_gl(layout.color_format); + const auto color_format = rsx::internals::surface_color_format_to_gl(m_framebuffer_layout.color_format); for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i) { if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue; @@ -354,7 +354,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk const auto surface_range = m_depth_surface_info.get_memory_range(); if (g_cfg.video.write_depth_buffer) { - const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format); + const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(m_framebuffer_layout.depth_format); m_gl_texture_cache.lock_memory_region( cmd, m_rtts.m_bound_depth_stencil.second, surface_range, true, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index fe9c161a54..1bb662cd21 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1017,9 +1017,8 @@ namespace rsx return rsx::get_address(offset_zeta, m_context_dma_z); } - framebuffer_layout thread::get_framebuffer_layout(rsx::framebuffer_creation_context context) + void thread::get_framebuffer_layout(rsx::framebuffer_creation_context context, framebuffer_layout &layout) { - framebuffer_layout layout; memset(&layout, 0, sizeof(layout)); layout.ignore_change = true; @@ -1033,7 +1032,7 @@ namespace rsx if (layout.width == 0 || layout.height == 0) { LOG_TRACE(RSX, "Invalid framebuffer setup, w=%d, h=%d", layout.width, layout.height); - return layout; + return; } const u16 clip_x = rsx::method_registers.surface_clip_origin_x(); @@ -1050,11 +1049,21 @@ namespace rsx rsx::method_registers.surface_d_pitch(), }; + layout.zeta_write_enabled = rsx::method_registers.depth_write_enabled(); + layout.color_write_enabled = + { + method_registers.color_write_enabled(0), + method_registers.color_write_enabled(1), + method_registers.color_write_enabled(2), + method_registers.color_write_enabled(3) + }; + layout.color_format = rsx::method_registers.surface_color(); layout.depth_format = rsx::method_registers.surface_depth_fmt(); layout.depth_float = rsx::method_registers.depth_buffer_float_enabled(); layout.target = rsx::method_registers.surface_color_target(); + const auto mrt_buffers = rsx::utility::get_rtt_indexes(layout.target); const auto aa_mode = rsx::method_registers.surface_antialias(); const u32 aa_factor_u = (aa_mode == rsx::surface_antialiasing::center_1_sample) ? 1 : 2; const u32 aa_factor_v = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2; @@ -1062,11 +1071,19 @@ namespace rsx const auto depth_texel_size = (layout.depth_format == rsx::surface_depth_format::z16 ? 2 : 4) * aa_factor_u; const auto color_texel_size = get_format_block_size_in_bytes(layout.color_format) * aa_factor_u; - const bool color_write_enabled = rsx::method_registers.color_write_enabled(); - const bool depth_write_enabled = rsx::method_registers.depth_write_enabled(); const bool stencil_test_enabled = layout.depth_format == rsx::surface_depth_format::z24s8 && rsx::method_registers.stencil_test_enabled(); const bool depth_test_enabled = rsx::method_registers.depth_test_enabled(); + bool color_write_enabled = false; + for (const auto &index : mrt_buffers) + { + if (layout.color_write_enabled[index]) + { + color_write_enabled = true; + break; + } + } + bool depth_buffer_unused = false, color_buffer_unused = false; switch (context) @@ -1167,7 +1184,7 @@ namespace rsx // TODO: Research clearing both depth AND color // TODO: If context is creation_draw, deal with possibility of a lost buffer clear - if (depth_test_enabled || stencil_test_enabled || (!color_write_enabled && depth_write_enabled)) + if (depth_test_enabled || stencil_test_enabled || (!layout.color_write_enabled[index] && layout.zeta_write_enabled)) { // Use address for depth data layout.color_addresses[index] = 0; @@ -1203,7 +1220,7 @@ namespace rsx if (!framebuffer_status_valid && !layout.zeta_address) { LOG_WARNING(RSX, "Framebuffer setup failed. Draw calls may have been lost"); - return layout; + return; } // At least one attachment exists @@ -1280,12 +1297,11 @@ namespace rsx sample_count == m_depth_surface_info.samples) { // Same target is reused - return layout; + return; } } layout.ignore_change = false; - return layout; } bool thread::get_scissor(areau& region, bool clip_viewport) diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index f50d946208..c630ea7a60 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -308,9 +308,11 @@ namespace rsx std::array color_addresses; std::array color_pitch; std::array actual_color_pitch; + std::array color_write_enabled; u32 zeta_address; u32 zeta_pitch; u32 actual_zeta_pitch; + bool zeta_write_enabled; rsx::surface_target target; rsx::surface_color_format color_format; rsx::surface_depth_format depth_format; @@ -443,6 +445,7 @@ namespace rsx // Framebuffer setup rsx::gcm_framebuffer_info m_surface_info[rsx::limits::color_buffers_count]; rsx::gcm_framebuffer_info m_depth_surface_info; + framebuffer_layout m_framebuffer_layout; bool framebuffer_status_valid = false; // Overlays @@ -527,7 +530,7 @@ namespace rsx std::array get_color_surface_addresses() const; u32 get_zeta_surface_address() const; - framebuffer_layout get_framebuffer_layout(rsx::framebuffer_creation_context context); + void get_framebuffer_layout(rsx::framebuffer_creation_context context, framebuffer_layout &layout); bool get_scissor(areau& region, bool clip_viewport); /** diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 5e0b5fbbbd..bea1094042 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1761,7 +1761,7 @@ void VKGSRender::end() close_render_pass(); vk::leave_uninterruptible(); - m_rtts.on_write(rsx::method_registers.color_write_enabled(), rsx::method_registers.depth_write_enabled()); + m_rtts.on_write(m_framebuffer_layout.color_write_enabled.data(), m_framebuffer_layout.zeta_write_enabled); rsx::thread::end(); } @@ -1919,6 +1919,7 @@ void VKGSRender::clear_surface(u32 mask) VkClearRect region = { { { scissor_x, scissor_y }, { scissor_w, scissor_h } }, 0, 1 }; const bool require_mem_load = (scissor_w * scissor_h) < (fb_width * fb_height); + bool update_color = false, update_z = false; auto surface_depth_format = rsx::method_registers.surface_depth_fmt(); if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); mask & 0x3) @@ -2057,14 +2058,15 @@ void VKGSRender::clear_surface(u32 mask) } } - for (auto &rtt : m_rtts.m_bound_render_targets) + for (u8 index = m_rtts.m_bound_render_targets_config.first, count = 0; + count < m_rtts.m_bound_render_targets_config.second; + ++count, ++index) { - if (const auto address = rtt.first) - { - if (require_mem_load) rtt.second->write_barrier(*m_current_command_buffer); - m_rtts.on_write(true, false, address); - } + if (require_mem_load) + m_rtts.m_bound_render_targets[index].second->write_barrier(*m_current_command_buffer); } + + update_color = true; } } } @@ -2074,11 +2076,18 @@ void VKGSRender::clear_surface(u32 mask) if (m_rtts.m_bound_depth_stencil.first) { if (require_mem_load) m_rtts.m_bound_depth_stencil.second->write_barrier(*m_current_command_buffer); - m_rtts.on_write(false, true); + clear_descriptors.push_back({ (VkImageAspectFlags)depth_stencil_mask, 0, depth_stencil_clear_values }); + update_z = true; } } + if (update_color || update_z) + { + const bool write_all_mask[] = { true, true, true, true }; + m_rtts.on_write(update_color ? write_all_mask : nullptr, update_z); + } + if (!clear_descriptors.empty()) { begin_render_pass(); @@ -2478,15 +2487,18 @@ bool VKGSRender::load_program() if (rsx::method_registers.cull_face_enabled()) properties.state.enable_cull_face(vk::get_cull_face(rsx::method_registers.cull_face_mode())); - bool color_mask_b = rsx::method_registers.color_mask_b(); - bool color_mask_g = rsx::method_registers.color_mask_g(); - bool color_mask_r = rsx::method_registers.color_mask_r(); - bool color_mask_a = rsx::method_registers.color_mask_a(); + for (int index = 0; index < m_draw_buffers.size(); ++index) + { + bool color_mask_b = rsx::method_registers.color_mask_b(index); + bool color_mask_g = rsx::method_registers.color_mask_g(index); + bool color_mask_r = rsx::method_registers.color_mask_r(index); + bool color_mask_a = rsx::method_registers.color_mask_a(index); - if (rsx::method_registers.surface_color() == rsx::surface_color_format::g8b8) - rsx::get_g8b8_r8g8_colormask(color_mask_r, color_mask_g, color_mask_b, color_mask_a); + if (rsx::method_registers.surface_color() == rsx::surface_color_format::g8b8) + rsx::get_g8b8_r8g8_colormask(color_mask_r, color_mask_g, color_mask_b, color_mask_a); - properties.state.set_color_mask(color_mask_r, color_mask_g, color_mask_b, color_mask_a); + properties.state.set_color_mask(index, color_mask_r, color_mask_g, color_mask_b, color_mask_a); + } bool mrt_blend_enabled[] = { @@ -2831,13 +2843,13 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) framebuffer_status_valid = false; m_framebuffer_state_contested = false; - const auto layout = get_framebuffer_layout(context); + get_framebuffer_layout(context, m_framebuffer_layout); if (!framebuffer_status_valid) { return; } - if (m_draw_fbo && layout.ignore_change) + if (m_draw_fbo && m_framebuffer_layout.ignore_change) { // Nothing has changed, we're still using the same framebuffer // Update flags to match current @@ -2846,16 +2858,16 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) } m_rtts.prepare_render_target(*m_current_command_buffer, - layout.color_format, layout.depth_format, - layout.width, layout.height, - layout.target, layout.aa_mode, - layout.color_addresses, layout.zeta_address, - layout.actual_color_pitch, layout.actual_zeta_pitch, + m_framebuffer_layout.color_format, m_framebuffer_layout.depth_format, + m_framebuffer_layout.width, m_framebuffer_layout.height, + m_framebuffer_layout.target, m_framebuffer_layout.aa_mode, + m_framebuffer_layout.color_addresses, m_framebuffer_layout.zeta_address, + m_framebuffer_layout.actual_color_pitch, m_framebuffer_layout.actual_zeta_pitch, (*m_device), *m_current_command_buffer); // Reset framebuffer information - const auto color_bpp = get_format_block_size_in_bytes(layout.color_format); - const auto samples = get_format_sample_count(layout.aa_mode); + const auto color_bpp = get_format_block_size_in_bytes(m_framebuffer_layout.color_format); + const auto samples = get_format_sample_count(m_framebuffer_layout.aa_mode); for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i) { @@ -2868,9 +2880,9 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) } m_surface_info[i].address = m_surface_info[i].pitch = 0; - m_surface_info[i].width = layout.width; - m_surface_info[i].height = layout.height; - m_surface_info[i].color_format = layout.color_format; + m_surface_info[i].width = m_framebuffer_layout.width; + m_surface_info[i].height = m_framebuffer_layout.height; + m_surface_info[i].color_format = m_framebuffer_layout.color_format; m_surface_info[i].bpp = color_bpp; m_surface_info[i].samples = samples; } @@ -2885,16 +2897,16 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) } m_depth_surface_info.address = m_depth_surface_info.pitch = 0; - m_depth_surface_info.width = layout.width; - m_depth_surface_info.height = layout.height; - m_depth_surface_info.depth_format = layout.depth_format; - m_depth_surface_info.depth_buffer_float = layout.depth_float; - m_depth_surface_info.bpp = (layout.depth_format == rsx::surface_depth_format::z16? 2 : 4); + m_depth_surface_info.width = m_framebuffer_layout.width; + m_depth_surface_info.height = m_framebuffer_layout.height; + m_depth_surface_info.depth_format = m_framebuffer_layout.depth_format; + m_depth_surface_info.depth_buffer_float = m_framebuffer_layout.depth_float; + m_depth_surface_info.bpp = (m_framebuffer_layout.depth_format == rsx::surface_depth_format::z16? 2 : 4); m_depth_surface_info.samples = samples; } //Bind created rtts as current fbo... - const auto draw_buffers = rsx::utility::get_rtt_indexes(layout.target); + const auto draw_buffers = rsx::utility::get_rtt_indexes(m_framebuffer_layout.target); m_draw_buffers.clear(); m_fbo_images.clear(); @@ -2904,11 +2916,11 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) { m_fbo_images.push_back(surface); - m_surface_info[index].address = layout.color_addresses[index]; - m_surface_info[index].pitch = layout.actual_color_pitch[index]; - verify("Pitch mismatch!" HERE), surface->rsx_pitch == layout.actual_color_pitch[index]; + m_surface_info[index].address = m_framebuffer_layout.color_addresses[index]; + m_surface_info[index].pitch = m_framebuffer_layout.actual_color_pitch[index]; + verify("Pitch mismatch!" HERE), surface->rsx_pitch == m_framebuffer_layout.actual_color_pitch[index]; - m_texture_cache.notify_surface_changed(m_surface_info[index].get_memory_range(layout.aa_factors)); + m_texture_cache.notify_surface_changed(m_surface_info[index].get_memory_range(m_framebuffer_layout.aa_factors)); m_draw_buffers.push_back(index); } } @@ -2916,14 +2928,14 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) if (std::get<0>(m_rtts.m_bound_depth_stencil) != 0) { auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); - ds->set_depth_render_mode(!layout.depth_float); + ds->set_depth_render_mode(!m_framebuffer_layout.depth_float); m_fbo_images.push_back(ds); - m_depth_surface_info.address = layout.zeta_address; - m_depth_surface_info.pitch = layout.actual_zeta_pitch; - verify("Pitch mismatch!" HERE), ds->rsx_pitch == layout.actual_zeta_pitch; + m_depth_surface_info.address = m_framebuffer_layout.zeta_address; + m_depth_surface_info.pitch = m_framebuffer_layout.actual_zeta_pitch; + verify("Pitch mismatch!" HERE), ds->rsx_pitch == m_framebuffer_layout.actual_zeta_pitch; - m_texture_cache.notify_surface_changed(m_depth_surface_info.get_memory_range(layout.aa_factors)); + m_texture_cache.notify_surface_changed(m_depth_surface_info.get_memory_range(m_framebuffer_layout.aa_factors)); } // Before messing with memory properties, flush command queue if there are dma transfers queued up @@ -2932,7 +2944,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) flush_command_queue(); } - const auto color_fmt_info = get_compatible_gcm_format(layout.color_format); + const auto color_fmt_info = get_compatible_gcm_format(m_framebuffer_layout.color_format); for (u8 index : m_draw_buffers) { if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue; @@ -2942,7 +2954,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) { m_texture_cache.lock_memory_region( *m_current_command_buffer, m_rtts.m_bound_render_targets[index].second, surface_range, true, - m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], + m_surface_info[index].width, m_surface_info[index].height, m_framebuffer_layout.actual_color_pitch[index], color_fmt_info.first, color_fmt_info.second); } else @@ -2959,7 +2971,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8; m_texture_cache.lock_memory_region( *m_current_command_buffer, m_rtts.m_bound_depth_stencil.second, surface_range, true, - m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, gcm_format, false); + m_depth_surface_info.width, m_depth_surface_info.height, m_framebuffer_layout.actual_zeta_pitch, gcm_format, false); } else { @@ -3008,8 +3020,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) m_cached_renderpass = vk::get_renderpass(*m_device, m_current_renderpass_key); // Search old framebuffers for this same configuration - const auto fbo_width = rsx::apply_resolution_scale(layout.width, true); - const auto fbo_height = rsx::apply_resolution_scale(layout.height, true); + const auto fbo_width = rsx::apply_resolution_scale(m_framebuffer_layout.width, true); + const auto fbo_height = rsx::apply_resolution_scale(m_framebuffer_layout.height, true); if (m_draw_fbo) { diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 9d577770e7..e10dc1ca79 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -617,12 +617,6 @@ namespace vk VkPipelineStageFlags src_stage; if (range.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) { - if (!rsx::method_registers.color_write_enabled() && current_layout == new_layout) - { - // Nothing to do - return; - } - src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; } diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index caeaaec3c7..db0622f4ca 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -2946,7 +2946,7 @@ public: ia.primitiveRestartEnable = enable? VK_TRUE : VK_FALSE; } - void set_color_mask(bool r, bool g, bool b, bool a) + void set_color_mask(int index, bool r, bool g, bool b, bool a) { VkColorComponentFlags mask = 0; if (a) mask |= VK_COLOR_COMPONENT_A_BIT; @@ -2954,10 +2954,7 @@ public: if (g) mask |= VK_COLOR_COMPONENT_G_BIT; if (r) mask |= VK_COLOR_COMPONENT_R_BIT; - att_state[0].colorWriteMask = mask; - att_state[1].colorWriteMask = mask; - att_state[2].colorWriteMask = mask; - att_state[3].colorWriteMask = mask; + att_state[index].colorWriteMask = mask; } void set_depth_mask(bool enable) diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.h b/rpcs3/Emu/RSX/VK/VKOverlays.h index 3dbd058b4e..95aa0c0934 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.h +++ b/rpcs3/Emu/RSX/VK/VKOverlays.h @@ -572,7 +572,7 @@ namespace vk "}\n"; renderpass_config.set_attachment_count(1); - renderpass_config.set_color_mask(true, true, true, true); + renderpass_config.set_color_mask(0, true, true, true, true); renderpass_config.set_depth_mask(false); renderpass_config.enable_blend(0, VK_BLEND_FACTOR_SRC_ALPHA, VK_BLEND_FACTOR_SRC_ALPHA, @@ -848,7 +848,7 @@ namespace vk "}\n"; renderpass_config.set_depth_mask(false); - renderpass_config.set_color_mask(true, true, true, true); + renderpass_config.set_color_mask(0, true, true, true, true); renderpass_config.set_attachment_count(1); } diff --git a/rpcs3/Emu/RSX/rsx_decode.h b/rpcs3/Emu/RSX/rsx_decode.h index 984c54c78b..a6ca58890a 100644 --- a/rpcs3/Emu/RSX/rsx_decode.h +++ b/rpcs3/Emu/RSX/rsx_decode.h @@ -3029,6 +3029,60 @@ struct registers_decoder } }; +template<> +struct registers_decoder +{ + struct decoded_type + { + private: + u32 value; + + public: + decoded_type(u32 value) : value(value) {} + + bool color_b(int index) const + { + return bf_decoder<3, 1, bool>(value >> (index * 4)); + } + + bool color_g(int index) const + { + return bf_decoder<2, 1, bool>(value >> (index * 4)); + } + + bool color_r(int index) const + { + return bf_decoder<1, 1, bool>(value >> (index * 4)); + } + + bool color_a(int index) const + { + return bf_decoder<0, 1, bool>(value >> (index * 4)); + } + + bool color_write_enabled(int index) const + { + return ((value >> (index * 4)) & 0xF) != 0; + } + }; + + static std::string dump(decoded_type &&decoded_values) + { + std::string result; + for (int index = 1; index < 4; ++index) + { + result += fmt::format("Surface[%d]: A:%d R:%d G:%d B:%d\n", + index, + decoded_values.color_a(index), + decoded_values.color_r(index), + decoded_values.color_g(index), + decoded_values.color_b(index)); + } + + return "Color Mask MRT:\n" + result; + } +}; + template<> struct registers_decoder { diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index c859ab437b..9b6d9936cc 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -2874,6 +2874,7 @@ namespace rsx bind(); bind(); bind(); + bind(); bind(); bind(); bind(); diff --git a/rpcs3/Emu/RSX/rsx_methods.h b/rpcs3/Emu/RSX/rsx_methods.h index 640efd8a75..c78d113841 100644 --- a/rpcs3/Emu/RSX/rsx_methods.h +++ b/rpcs3/Emu/RSX/rsx_methods.h @@ -705,29 +705,64 @@ namespace rsx return decode().type(); } - bool color_mask_b() const + bool color_mask_b(int index) const { - return decode().color_b(); + if (index == 0) + { + return decode().color_b(); + } + else + { + return decode().color_b(index); + } } - bool color_mask_g() const + bool color_mask_g(int index) const { - return decode().color_g(); + if (index == 0) + { + return decode().color_g(); + } + else + { + return decode().color_g(index); + } } - bool color_mask_r() const + bool color_mask_r(int index) const { - return decode().color_r(); + if (index == 0) + { + return decode().color_r(); + } + else + { + return decode().color_r(index); + } } - bool color_mask_a() const + bool color_mask_a(int index) const { - return decode().color_a(); + if (index == 0) + { + return decode().color_a(); + } + else + { + return decode().color_a(index); + } } - bool color_write_enabled() const + bool color_write_enabled(int index) const { - return decode().color_write_enabled(); + if (index == 0) + { + return decode().color_write_enabled(); + } + else + { + return decode().color_write_enabled(index); + } } u8 clear_color_b() const