diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index fb69385ecb..09273fda31 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -153,12 +153,14 @@ namespace rsx surface_storage_type new_surface_storage; surface_type old_surface = nullptr; surface_type new_surface = nullptr; + surface_type convert_surface = nullptr; // Remove any depth surfaces occupying this memory address (TODO: Discard all overlapping range) auto aliased_depth_surface = m_depth_stencil_storage.find(address); if (aliased_depth_surface != m_depth_stencil_storage.end()) { Traits::notify_surface_invalidated(aliased_depth_surface->second); + convert_surface = Traits::get(aliased_depth_surface->second); invalidated_resources.push_back(std::move(aliased_depth_surface->second)); m_depth_stencil_storage.erase(aliased_depth_surface); } @@ -178,7 +180,10 @@ namespace rsx m_render_targets_storage.erase(address); } - //Search invalidated resources for a suitable surface + // Select source of original data if any + auto contents_to_copy = old_surface == nullptr ? convert_surface : old_surface; + + // Search invalidated resources for a suitable surface for (auto It = invalidated_resources.begin(); It != invalidated_resources.end(); It++) { auto &rtt = *It; @@ -197,7 +202,7 @@ namespace rsx invalidated_resources.erase(It); new_surface = Traits::get(new_surface_storage); - Traits::invalidate_rtt_surface_contents(command_list, new_surface, old_surface, true); + Traits::invalidate_rtt_surface_contents(command_list, new_surface, contents_to_copy, true); Traits::prepare_rtt_for_drawing(command_list, new_surface); break; } @@ -217,7 +222,7 @@ namespace rsx return new_surface; } - m_render_targets_storage[address] = Traits::create_new_surface(address, color_format, width, height, old_surface, std::forward(extra_params)...); + m_render_targets_storage[address] = Traits::create_new_surface(address, color_format, width, height, contents_to_copy, std::forward(extra_params)...); return Traits::get(m_render_targets_storage[address]); } @@ -232,12 +237,14 @@ namespace rsx surface_storage_type new_surface_storage; surface_type old_surface = nullptr; surface_type new_surface = nullptr; + surface_type convert_surface = nullptr; // Remove any color surfaces occupying this memory range (TODO: Discard all overlapping surfaces) auto aliased_rtt_surface = m_render_targets_storage.find(address); if (aliased_rtt_surface != m_render_targets_storage.end()) { Traits::notify_surface_invalidated(aliased_rtt_surface->second); + convert_surface = Traits::get(aliased_rtt_surface->second); invalidated_resources.push_back(std::move(aliased_rtt_surface->second)); m_render_targets_storage.erase(aliased_rtt_surface); } @@ -257,6 +264,9 @@ namespace rsx m_depth_stencil_storage.erase(address); } + // Select source of original data if any + auto contents_to_copy = old_surface == nullptr ? convert_surface : old_surface; + //Search invalidated resources for a suitable surface for (auto It = invalidated_resources.begin(); It != invalidated_resources.end(); It++) { @@ -276,7 +286,7 @@ namespace rsx new_surface = Traits::get(new_surface_storage); Traits::prepare_ds_for_drawing(command_list, new_surface); - Traits::invalidate_depth_surface_contents(command_list, new_surface, old_surface, true); + Traits::invalidate_depth_surface_contents(command_list, new_surface, contents_to_copy, true); break; } } @@ -295,7 +305,7 @@ namespace rsx return new_surface; } - m_depth_stencil_storage[address] = Traits::create_new_surface(address, depth_format, width, height, old_surface, std::forward(extra_params)...); + m_depth_stencil_storage[address] = Traits::create_new_surface(address, depth_format, width, height, contents_to_copy, std::forward(extra_params)...); return Traits::get(m_depth_stencil_storage[address]); } public: diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index c03cc13112..45183984d1 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -464,6 +464,13 @@ void GLGSRender::end() ds->set_cleared(); } + if (ds && ds->old_contents != nullptr && ds->get_rsx_pitch() == ds->old_contents->get_rsx_pitch() && + ds->old_contents->get_compatible_internal_format() == gl::texture::internal_format::rgba8) + { + m_depth_converter.run(ds->width(), ds->height(), ds->id(), ds->old_contents->id()); + ds->old_contents = nullptr; + } + if (g_cfg.video.strict_rendering_mode) { if (ds->old_contents != nullptr) @@ -479,6 +486,11 @@ void GLGSRender::end() } } } + else + { + // Old contents are one use only. Keep the depth conversion check from firing over and over + if (ds) ds->old_contents = nullptr; + } glEnable(GL_SCISSOR_TEST); @@ -758,6 +770,8 @@ void GLGSRender::on_init_thread() glEnable(GL_CLIP_DISTANCE0 + 4); glEnable(GL_CLIP_DISTANCE0 + 5); + m_depth_converter.create(); + m_gl_texture_cache.initialize(); m_thread_id = std::this_thread::get_id(); @@ -826,6 +840,7 @@ void GLGSRender::on_exit() m_text_printer.close(); m_gl_texture_cache.destroy(); + m_depth_converter.destroy(); for (u32 i = 0; i < occlusion_query_count; ++i) { diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 0c07ab8236..9851a9c245 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -8,6 +8,7 @@ #include "define_new_memleakdetect.h" #include "GLProgramBuffer.h" #include "GLTextOut.h" +#include "GLOverlays.h" #include "../rsx_utils.h" #include "../rsx_cache.h" @@ -348,6 +349,7 @@ private: bool manually_flush_ring_buffers = false; gl::text_writer m_text_printer; + gl::depth_convert_pass m_depth_converter; std::mutex queue_guard; std::list work_queue; diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index 7779ab3309..d687a1be3d 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -2260,22 +2260,16 @@ namespace gl return m_location; } - void operator = (int rhs) const { m_program.use(); glUniform1i(location(), rhs); } - void operator = (float rhs) const { m_program.use(); glUniform1f(location(), rhs); } - //void operator = (double rhs) const { m_program.use(); glUniform1d(location(), rhs); } - - void operator = (const color1i& rhs) const { m_program.use(); glUniform1i(location(), rhs.r); } - void operator = (const color1f& rhs) const { m_program.use(); glUniform1f(location(), rhs.r); } - //void operator = (const color1d& rhs) const { m_program.use(); glUniform1d(location(), rhs.r); } - void operator = (const color2i& rhs) const { m_program.use(); glUniform2i(location(), rhs.r, rhs.g); } - void operator = (const color2f& rhs) const { m_program.use(); glUniform2f(location(), rhs.r, rhs.g); } - //void operator = (const color2d& rhs) const { m_program.use(); glUniform2d(location(), rhs.r, rhs.g); } - void operator = (const color3i& rhs) const { m_program.use(); glUniform3i(location(), rhs.r, rhs.g, rhs.b); } - void operator = (const color3f& rhs) const { m_program.use(); glUniform3f(location(), rhs.r, rhs.g, rhs.b); } - //void operator = (const color3d& rhs) const { m_program.use(); glUniform3d(location(), rhs.r, rhs.g, rhs.b); } - void operator = (const color4i& rhs) const { m_program.use(); glUniform4i(location(), rhs.r, rhs.g, rhs.b, rhs.a); } - void operator = (const color4f& rhs) const { m_program.use(); glUniform4f(location(), rhs.r, rhs.g, rhs.b, rhs.a); } - //void operator = (const color4d& rhs) const { m_program.use(); glUniform4d(location(), rhs.r, rhs.g, rhs.b, rhs.a); } + void operator = (int rhs) const { glProgramUniform1i(m_program.id(), location(), rhs); } + void operator = (float rhs) const { glProgramUniform1f(m_program.id(), location(), rhs); } + void operator = (const color1i& rhs) const { glProgramUniform1i(m_program.id(), location(), rhs.r); } + void operator = (const color1f& rhs) const { glProgramUniform1f(m_program.id(), location(), rhs.r); } + void operator = (const color2i& rhs) const { glProgramUniform2i(m_program.id(), location(), rhs.r, rhs.g); } + void operator = (const color2f& rhs) const { glProgramUniform2f(m_program.id(), location(), rhs.r, rhs.g); } + void operator = (const color3i& rhs) const { glProgramUniform3i(m_program.id(), location(), rhs.r, rhs.g, rhs.b); } + void operator = (const color3f& rhs) const { glProgramUniform3f(m_program.id(), location(), rhs.r, rhs.g, rhs.b); } + void operator = (const color4i& rhs) const { glProgramUniform4i(m_program.id(), location(), rhs.r, rhs.g, rhs.b, rhs.a); } + void operator = (const color4f& rhs) const { glProgramUniform4f(m_program.id(), location(), rhs.r, rhs.g, rhs.b, rhs.a); } }; class attrib_t diff --git a/rpcs3/Emu/RSX/GL/GLOverlays.h b/rpcs3/Emu/RSX/GL/GLOverlays.h new file mode 100644 index 0000000000..0fdbc60412 --- /dev/null +++ b/rpcs3/Emu/RSX/GL/GLOverlays.h @@ -0,0 +1,201 @@ +#pragma once + +#include "stdafx.h" +#include "GLHelpers.h" + +namespace gl +{ + struct overlay_pass + { + std::string fs_src; + std::string vs_src; + + gl::glsl::program program_handle; + gl::glsl::shader vs; + gl::glsl::shader fs; + + gl::fbo fbo; + + bool compiled = false; + + void create() + { + if (!compiled) + { + fs.create(gl::glsl::shader::type::fragment); + fs.source(fs_src); + fs.compile(); + + vs.create(gl::glsl::shader::type::vertex); + vs.source(vs_src); + vs.compile(); + + program_handle.create(); + program_handle.attach(vs); + program_handle.attach(fs); + program_handle.make(); + + fbo.create(); + + compiled = true; + } + } + + void destroy() + { + if (compiled) + { + program_handle.remove(); + vs.remove(); + fs.remove(); + + fbo.remove(); + + compiled = false; + } + } + + virtual void on_load() {} + virtual void on_unload() {} + + virtual void bind_resources() {} + virtual void cleanup_resources() {} + + virtual void emit_geometry() + { + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + } + + virtual void run(u16 w, u16 h, GLuint target_texture, bool depth_target) + { + if (!compiled) + { + LOG_ERROR(RSX, "You must initialize overlay passes with create() before calling run()"); + return; + } + + GLint program; + GLint old_fbo; + GLint depth_func; + GLint viewport[4]; + GLboolean color_writes[4]; + GLboolean depth_write; + + glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_fbo); + glBindFramebuffer(GL_FRAMEBUFFER, fbo.id()); + + if (depth_target) + { + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, target_texture, 0); + glDrawBuffer(GL_NONE); + } + else + { + GLenum buffer = GL_COLOR_ATTACHMENT0; + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, target_texture, 0); + glDrawBuffers(1, &buffer); + } + + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE) + { + // Push rasterizer state + glGetIntegerv(GL_VIEWPORT, viewport); + glGetBooleanv(GL_COLOR_WRITEMASK, color_writes); + glGetBooleanv(GL_DEPTH_WRITEMASK, &depth_write); + glGetIntegerv(GL_CURRENT_PROGRAM, &program); + glGetIntegerv(GL_DEPTH_FUNC, &depth_func); + + GLboolean scissor_enabled = glIsEnabled(GL_SCISSOR_TEST); + GLboolean depth_test_enabled = glIsEnabled(GL_DEPTH_TEST); + GLboolean cull_face_enabled = glIsEnabled(GL_CULL_FACE); + GLboolean blend_enabled = glIsEnabled(GL_BLEND); + GLboolean stencil_test_enabled = glIsEnabled(GL_STENCIL_TEST); + + // Set initial state + glViewport(0, 0, w, h); + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glDepthMask(depth_target ? GL_TRUE : GL_FALSE); + + // AMD driver bug, disabling depth test doesnt work when doing depth replace (gl_FragDepth writes still go through the depth test) + glDepthFunc(GL_ALWAYS); + glEnable(GL_DEPTH_TEST); + + if (scissor_enabled) glDisable(GL_SCISSOR_TEST); + if (cull_face_enabled) glDisable(GL_CULL_FACE); + if (blend_enabled) glDisable(GL_BLEND); + if (stencil_test_enabled) glDisable(GL_STENCIL_TEST); + + // Render + program_handle.use(); + on_load(); + bind_resources(); + emit_geometry(); + + // Clean up + if (depth_target) + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + else + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + + glBindFramebuffer(GL_FRAMEBUFFER, old_fbo); + glUseProgram((GLuint)program); + + glViewport(viewport[0], viewport[1], viewport[2], viewport[3]); + glColorMask(color_writes[0], color_writes[1], color_writes[2], color_writes[3]); + glDepthMask(depth_write); + glDepthFunc(depth_func); + + if (!depth_test_enabled) glDisable(GL_DEPTH_TEST); + if (scissor_enabled) glEnable(GL_SCISSOR_TEST); + if (cull_face_enabled) glEnable(GL_CULL_FACE); + if (blend_enabled) glEnable(GL_BLEND); + if (stencil_test_enabled) glEnable(GL_STENCIL_TEST); + } + else + { + LOG_ERROR(RSX, "Overlay pass failed because framebuffer was not complete. Run with debug output enabled to diagnose the problem"); + } + } + }; + + struct depth_convert_pass : public overlay_pass + { + depth_convert_pass() + { + vs_src = + { + "#version 420\n\n" + "out vec2 tc0;\n" + "\n" + "void main()\n" + "{\n" + " vec2 positions[] = {vec2(-1., -1.), vec2(1., -1.), vec2(-1., 1.), vec2(1., 1.)};\n" + " vec2 coords[] = {vec2(0., 0.), vec2(1., 0.), vec2(0., 1.), vec2(1., 1.)};\n" + " gl_Position = vec4(positions[gl_VertexID % 4], 0., 1.);\n" + " tc0 = coords[gl_VertexID % 4];\n" + "}\n" + }; + + fs_src = + { + "#version 420\n\n" + "in vec2 tc0;\n" + "layout(binding=31) uniform sampler2D fs0;\n" + "\n" + "void main()\n" + "{\n" + " vec4 rgba_in = texture(fs0, tc0);\n" + " gl_FragDepth = rgba_in.r * 0.99609 + rgba_in.g * 0.00389 + rgba_in.b * 0.00002;\n" + "}\n" + }; + } + + void run(u16 w, u16 h, GLuint target, GLuint source) + { + glActiveTexture(GL_TEXTURE31); + glBindTexture(GL_TEXTURE_2D, source); + + overlay_pass::run(w, h, target, true); + } + }; +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index b555016326..9c3b7ec7cd 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -185,7 +185,7 @@ void GLGSRender::init_buffers(bool skip_reading) const auto depth_format = rsx::method_registers.surface_depth_fmt(); const auto required_color_pitch = rsx::utility::get_packed_pitch(surface_format, clip_horizontal); - const auto required_z_pitch = depth_format == rsx::surface_depth_format::z16 ? clip_horizontal * 2 : clip_horizontal * 4; + const u32 required_z_pitch = depth_format == rsx::surface_depth_format::z16 ? clip_horizontal * 2 : clip_horizontal * 4; if (depth_address && zeta_pitch < required_z_pitch) depth_address = 0; @@ -207,8 +207,9 @@ void GLGSRender::init_buffers(bool skip_reading) if (surface_addresses[index] == depth_address && zeta_pitch >= required_z_pitch) { - LOG_ERROR(RSX, "Some game dev set up the MRT to write to the same address as depth and color attachment. Not sure how to deal with that so the draw is discarded."); - framebuffer_status_valid = false; + //LOG_ERROR(RSX, "Some game dev set up the MRT to write to the same address as depth and color attachment. Not sure how to deal with that so the draw is discarded."); + //framebuffer_status_valid = false; + depth_address = 0; break; } } diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index 250fc11293..b844fe2cc6 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -184,8 +184,7 @@ struct gl_render_target_traits __glcheck result->pixel_pack_settings().swap_bytes(format.swap_bytes).aligment(1); __glcheck result->pixel_unpack_settings().swap_bytes(format.swap_bytes).aligment(1); - if (old_surface != nullptr && old_surface->get_compatible_internal_format() == internal_fmt) - result->old_contents = old_surface; + result->old_contents = old_surface; result->set_cleared(); result->update_surface(); @@ -227,8 +226,7 @@ struct gl_render_target_traits result->set_native_pitch(native_pitch); result->set_compatible_format(format.internal_format); - if (old_surface != nullptr && old_surface->get_compatible_internal_format() == format.internal_format) - result->old_contents = old_surface; + result->old_contents = old_surface; result->update_surface(); return result; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index c978746894..78d8256679 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2354,8 +2354,9 @@ void VKGSRender::prepare_rtts() if (surface_addresses[index] == zeta_address && zeta_pitch >= required_z_pitch) { - LOG_ERROR(RSX, "Some game dev set up the MRT to write to the same address as depth and color attachment. Not sure how to deal with that so the draw is discarded."); - framebuffer_status_valid = false; + //LOG_ERROR(RSX, "Some game dev set up the MRT to write to the same address as depth and color attachment. Not sure how to deal with that so the draw is discarded."); + //framebuffer_status_valid = false; + zeta_address = 0; break; } } diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.h b/rpcs3/Emu/RSX/VK/VKOverlays.h new file mode 100644 index 0000000000..8cc2a4b890 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKOverlays.h @@ -0,0 +1,12 @@ +#pragma once +#include "VKHelpers.h" +#include "VKVertexProgram.h" +#include "VKFragmentProgram.h" + +namespace vk +{ + struct overlay_pass + { + //TODO + }; +} diff --git a/rpcs3/GLGSRender.vcxproj b/rpcs3/GLGSRender.vcxproj index ebe9e3d385..c0922ab3fa 100644 --- a/rpcs3/GLGSRender.vcxproj +++ b/rpcs3/GLGSRender.vcxproj @@ -63,6 +63,7 @@ + @@ -70,6 +71,7 @@ + diff --git a/rpcs3/GLGSRender.vcxproj.filters b/rpcs3/GLGSRender.vcxproj.filters index 932ef00516..742287d10a 100644 --- a/rpcs3/GLGSRender.vcxproj.filters +++ b/rpcs3/GLGSRender.vcxproj.filters @@ -24,5 +24,6 @@ + \ No newline at end of file diff --git a/rpcs3/VKGSRender.vcxproj b/rpcs3/VKGSRender.vcxproj index 939431dfa5..9eb0d7dcd4 100644 --- a/rpcs3/VKGSRender.vcxproj +++ b/rpcs3/VKGSRender.vcxproj @@ -28,6 +28,7 @@ + @@ -101,4 +102,4 @@ - + \ No newline at end of file diff --git a/rpcs3/VKGSRender.vcxproj.filters b/rpcs3/VKGSRender.vcxproj.filters index a25f4d1abd..6cdae63153 100644 --- a/rpcs3/VKGSRender.vcxproj.filters +++ b/rpcs3/VKGSRender.vcxproj.filters @@ -40,6 +40,9 @@ Source Files + + Source Files +