diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index a1ebf91088..3538cb47bf 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -214,12 +214,20 @@ void GLGSRender::end() gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); // Handle special memory barrier for ARGB8->D24S8 in an active DSV - if (ds && ds->old_contents != nullptr && ds->get_rsx_pitch() == static_cast(ds->old_contents)->get_rsx_pitch() && - ds->old_contents->get_internal_format() == gl::texture::internal_format::rgba8) + if (ds && ds->old_contents != nullptr && + ds->old_contents->get_internal_format() == gl::texture::internal_format::rgba8 && + ds->get_rsx_pitch() == static_cast(ds->old_contents)->get_rsx_pitch()) { - // TODO: Partial memory transfer gl_state.enable(GL_FALSE, GL_SCISSOR_TEST); - m_depth_converter.run(ds->width(), ds->height(), ds->id(), ds->old_contents->id()); + + // TODO: Stencil transfer + gl::g_hw_blitter->fast_clear_image(cmd, ds, 1.f, 0xFF); + + const auto region = rsx::get_transferable_region(ds); + m_depth_converter.run({0, 0, std::get<0>(region), std::get<1>(region)}, + {0, 0, std::get<2>(region), std::get<3>(region)}, + ds->old_contents, ds); + ds->on_write(); } diff --git a/rpcs3/Emu/RSX/GL/GLOverlays.h b/rpcs3/Emu/RSX/GL/GLOverlays.h index 1b73795461..c604e09f96 100644 --- a/rpcs3/Emu/RSX/GL/GLOverlays.h +++ b/rpcs3/Emu/RSX/GL/GLOverlays.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "stdafx.h" #include "GLHelpers.h" @@ -250,6 +250,7 @@ namespace gl vs_src = { "#version 420\n\n" + "uniform vec2 tex_scale;\n" "out vec2 tc0;\n" "\n" "void main()\n" @@ -257,7 +258,7 @@ namespace gl " vec2 positions[] = {vec2(-1., -1.), vec2(1., -1.), vec2(-1., 1.), vec2(1., 1.)};\n" " vec2 coords[] = {vec2(0., 0.), vec2(1., 0.), vec2(0., 1.), vec2(1., 1.)};\n" " gl_Position = vec4(positions[gl_VertexID % 4], 0., 1.);\n" - " tc0 = coords[gl_VertexID % 4];\n" + " tc0 = coords[gl_VertexID % 4] * tex_scale;\n" "}\n" }; @@ -275,12 +276,17 @@ namespace gl }; } - void run(u16 w, u16 h, GLuint target, GLuint source) + void run(const areai& src_area, const areai& dst_area, gl::texture* source, gl::texture* target) { - glActiveTexture(GL_TEXTURE31); - glBindTexture(GL_TEXTURE_2D, source); + const auto src_ratio_x = f32(src_area.x2) / source->width(); + const auto src_ratio_y = f32(src_area.y2) / source->height(); - overlay_pass::run(w, h, target, true); + program_handle.uniforms["tex_scale"] = color2f(src_ratio_x, src_ratio_y); + + glActiveTexture(GL_TEXTURE31); + glBindTexture(GL_TEXTURE_2D, source->id()); + + overlay_pass::run(dst_area.x2, dst_area.y2, target->id(), true); } }; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index ff1c73fcc4..4a577bc485 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1334,21 +1334,32 @@ void VKGSRender::end() // Check for data casts auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); - if (ds && ds->old_contents) + if (ds && ds->old_contents && + ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM && + ds->get_rsx_pitch() == static_cast(ds->old_contents)->get_rsx_pitch()) { - if (UNLIKELY(ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM)) - { - // TODO: Partial memory transfer - auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, ds->info.format, 0); - auto render_pass = m_render_passes[rp]; - verify("Usupported renderpass configuration" HERE), render_pass != VK_NULL_HANDLE; + auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, ds->info.format, 0); + auto render_pass = m_render_passes[rp]; + verify("Usupported renderpass configuration" HERE), render_pass != VK_NULL_HANDLE; - m_depth_converter->run(*m_current_command_buffer, ds->width(), ds->height(), ds, - static_cast(ds->old_contents)->get_view(0xAAE4, rsx::default_remap_vector), - render_pass, m_framebuffers_to_clean); + VkClearDepthStencilValue clear = { 1.f, 0xFF }; + VkImageSubresourceRange range = { VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 1, 0, 1 }; - ds->on_write(); - } + // Clear explicitly before starting the inheritance transfer + vk::change_image_layout(*m_current_command_buffer, ds, VK_IMAGE_LAYOUT_GENERAL, range); + vkCmdClearDepthStencilImage(*m_current_command_buffer, ds->value, VK_IMAGE_LAYOUT_GENERAL, &clear, 1, &range); + vk::change_image_layout(*m_current_command_buffer, ds, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range); + + // TODO: Stencil transfer + const auto region = rsx::get_transferable_region(ds); + m_depth_converter->run(*m_current_command_buffer, + { 0, 0, std::get<0>(region), std::get<1>(region) }, + { 0, 0, std::get<2>(region), std::get<3>(region) }, + static_cast(ds->old_contents)->get_view(0xAAE4, rsx::default_remap_vector), + ds, render_pass, m_framebuffers_to_clean); + + // TODO: Flush management to avoid pass running out of ubo space (very unlikely) + ds->on_write(); } //Load textures diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.h b/rpcs3/Emu/RSX/VK/VKOverlays.h index 5a1c844510..399a5e21a7 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.h +++ b/rpcs3/Emu/RSX/VK/VKOverlays.h @@ -396,12 +396,16 @@ namespace vk struct depth_convert_pass : public overlay_pass { + f32 src_scale_x; + f32 src_scale_y; + depth_convert_pass() { vs_src = { "#version 450\n" "#extension GL_ARB_separate_shader_objects : enable\n" + "layout(std140, set=0, binding=0) uniform static_data{ vec4 regs[8]; };\n" "layout(location=0) out vec2 tc0;\n" "\n" "void main()\n" @@ -409,7 +413,7 @@ namespace vk " vec2 positions[] = {vec2(-1., -1.), vec2(1., -1.), vec2(-1., 1.), vec2(1., 1.)};\n" " vec2 coords[] = {vec2(0., 0.), vec2(1., 0.), vec2(0., 1.), vec2(1., 1.)};\n" " gl_Position = vec4(positions[gl_VertexIndex % 4], 0., 1.);\n" - " tc0 = coords[gl_VertexIndex % 4];\n" + " tc0 = coords[gl_VertexIndex % 4] * regs[0].xy;\n" "}\n" }; @@ -430,7 +434,28 @@ namespace vk renderpass_config.set_depth_mask(true); renderpass_config.enable_depth_test(VK_COMPARE_OP_ALWAYS); - renderpass_config.enable_stencil_test(VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, VK_COMPARE_OP_ALWAYS, 0xFF, 0xFF); + } + + void update_uniforms(vk::glsl::program* /*program*/) override + { + m_ubo_offset = (u32)m_ubo.alloc<256>(128); + auto dst = (f32*)m_ubo.map(m_ubo_offset, 128); + dst[0] = src_scale_x; + dst[1] = src_scale_y; + dst[2] = 0.f; + dst[3] = 0.f; + m_ubo.unmap(); + } + + void run(vk::command_buffer& cmd, const areai& src_area, const areai& dst_area, vk::image_view* src, vk::image* dst, VkRenderPass render_pass, std::list>& framebuffer_resources) + { + auto real_src = src->image(); + verify(HERE), real_src; + + src_scale_x = f32(src_area.x2) / real_src->width(); + src_scale_y = f32(src_area.y2) / real_src->height(); + + overlay_pass::run(cmd, dst_area.x2, dst_area.y2, dst, src, render_pass, framebuffer_resources); } }; diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index 08959483ee..40e12ccdf5 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -448,6 +448,7 @@ namespace rsx /** * Calculates the regions used for memory transfer between rendertargets on succession events + * Returns */ template std::tuple get_transferable_region(SurfaceType* surface)