diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index a134153f11..093055d657 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -1593,8 +1593,8 @@ namespace rsx if (requires_processing) { - const auto w = rsx::apply_resolution_scale(internal_width, true); - const auto h = rsx::apply_resolution_scale(internal_height, true); + const auto w = rsx::apply_resolution_scale(std::min(internal_width, surface_width), true); + const auto h = rsx::apply_resolution_scale(std::min(internal_height, surface_height), true); auto command = update_subresource_cache ? deferred_request_command::copy_image_dynamic : deferred_request_command::copy_image_static; return { texptr->get_surface(), command, texaddr, format, 0, 0, w, h, 1, diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 7f17b26b8a..0afbf517e6 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -207,19 +207,13 @@ void GLGSRender::end() //Check if depth buffer is bound and valid //If ds is not initialized clear it; it seems new depth textures should have depth cleared - auto copy_rtt_contents = [](gl::render_target *surface) + auto copy_rtt_contents = [](gl::render_target *surface, bool is_depth) { if (surface->get_internal_format() == surface->old_contents->get_internal_format()) { //Copy data from old contents onto this one - //1. Clip a rectangular region defining the data - //2. Perform a GPU blit - u16 parent_w = surface->old_contents->width(); - u16 parent_h = surface->old_contents->height(); - u16 copy_w, copy_h; - - std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true); - glCopyImageSubData(surface->old_contents->id(), GL_TEXTURE_2D, 0, 0, 0, 0, surface->id(), GL_TEXTURE_2D, 0, 0, 0, 0, copy_w, copy_h, 1); + const auto region = rsx::get_transferable_region(surface); + gl::g_hw_blitter->scale_image(surface->old_contents, surface, { 0, 0, std::get<0>(region), std::get<1>(region) }, { 0, 0, std::get<2>(region) , std::get<3>(region) }, !is_depth, is_depth, {}); surface->set_cleared(); } //TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible @@ -293,14 +287,14 @@ void GLGSRender::end() if (g_cfg.video.strict_rendering_mode) { if (ds && ds->old_contents != nullptr) - copy_rtt_contents(ds); + copy_rtt_contents(ds, true); for (auto &rtt : m_rtts.m_bound_render_targets) { if (auto surface = std::get<1>(rtt)) { if (surface->old_contents != nullptr) - copy_rtt_contents(surface); + copy_rtt_contents(surface, false); } } } diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 42bd5396e4..bac2ce7bbc 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -366,7 +366,24 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk { if (depth_address == m_depth_surface_info.address) { - //Nothing has changed, we're still using the same framebuffer + // Nothing has changed, we're still using the same framebuffer + // Update flags to match current + + const auto aa_mode = rsx::method_registers.surface_antialias(); + + for (u32 index = 0; index < 4; index++) + { + if (auto surface = std::get<1>(m_rtts.m_bound_render_targets[index])) + { + surface->aa_mode = aa_mode; + } + } + + if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil)) + { + ds->aa_mode = aa_mode; + } + return; } } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 6ff52f8dc8..a3506f845f 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1161,33 +1161,34 @@ void VKGSRender::end() if (g_cfg.video.strict_rendering_mode) { - auto copy_rtt_contents = [&](vk::render_target* surface) + auto copy_rtt_contents = [&](vk::render_target* surface, bool is_depth) { if (surface->info.format == surface->old_contents->info.format) { - const VkImageAspectFlags aspect = surface->attachment_aspect_flag; + const auto region = rsx::get_transferable_region(surface); + const auto src_w = std::get<0>(region); + const auto src_h = std::get<1>(region); + const auto dst_w = std::get<2>(region); + const auto dst_h = std::get<3>(region); - const u16 parent_w = surface->old_contents->width(); - const u16 parent_h = surface->old_contents->height(); - u16 copy_w, copy_h; + if (!is_depth || (src_w == dst_w && src_h == dst_h)) + { + const VkImageAspectFlags aspect = surface->attachment_aspect_flag; - std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true); + vk::copy_scaled_image(*m_current_command_buffer, surface->old_contents->value, surface->value, + surface->old_contents->current_layout, surface->current_layout, 0, 0, src_w, src_h, + 0, 0, dst_w, dst_h, 1, aspect, true); + } + else + { + auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, surface->info.format, 0); + auto render_pass = m_render_passes[rp]; - VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 }; - VkImageLayout old_layout = surface->current_layout; + vk::change_image_layout(*m_current_command_buffer, surface->old_contents, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - vk::change_image_layout(*m_current_command_buffer, surface, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); - vk::change_image_layout(*m_current_command_buffer, surface->old_contents, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range); - - VkImageCopy copy_rgn; - copy_rgn.srcOffset = { 0, 0, 0 }; - copy_rgn.dstOffset = { 0, 0, 0 }; - copy_rgn.dstSubresource = { aspect, 0, 0, 1 }; - copy_rgn.srcSubresource = { aspect, 0, 0, 1 }; - copy_rgn.extent = { copy_w, copy_h, 1 }; - - vkCmdCopyImage(*m_current_command_buffer, surface->old_contents->value, surface->old_contents->current_layout, surface->value, surface->current_layout, 1, ©_rgn); - vk::change_image_layout(*m_current_command_buffer, surface, old_layout, subresource_range); + m_depth_scaler->run(*m_current_command_buffer, { 0, 0, (f32)src_w, (f32)src_h }, { 0, 0, (f32)dst_w, (f32)dst_h }, surface, + surface->old_contents, surface->old_contents->get_view(0xAAE4, rsx::default_remap_vector), render_pass, m_framebuffers_to_clean); + } surface->dirty = false; } @@ -1202,13 +1203,13 @@ void VKGSRender::end() if (auto surface = std::get<1>(rtt)) { if (surface->old_contents != nullptr) - copy_rtt_contents(surface); + copy_rtt_contents(surface, false); } } if (ds && ds->old_contents) { - copy_rtt_contents(ds); + copy_rtt_contents(ds, true); } } @@ -2734,7 +2735,24 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) { if (zeta_address == m_depth_surface_info.address) { - //Nothing has changed, we're still using the same framebuffer + // Nothing has changed, we're still using the same framebuffer + // Update flags to match current + + const auto aa_mode = rsx::method_registers.surface_antialias(); + + for (u32 index = 0; index < 4; index++) + { + if (auto surface = std::get<1>(m_rtts.m_bound_render_targets[index])) + { + surface->aa_mode = aa_mode; + } + } + + if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil)) + { + ds->aa_mode = aa_mode; + } + return; } } @@ -3324,8 +3342,11 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst vk::change_image_layout(*m_current_command_buffer, result.src_image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); vk::change_image_layout(*m_current_command_buffer, result.dst_image, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); - m_depth_scaler->run(*m_current_command_buffer, result.dst_image->width(), result.dst_image->height(), result.dst_image, - result.src_view, render_pass, m_framebuffers_to_clean); + // TODO: Insets + const areaf src_area = { 0, 0, (f32)result.src_image->width(), (f32)result.src_image->height() }; + const areaf dst_area = { 0, 0, (f32)result.dst_image->width(), (f32)result.dst_image->height() }; + m_depth_scaler->run(*m_current_command_buffer, src_area, dst_area, result.dst_image, result.src_image, + result.src_view, render_pass, m_framebuffers_to_clean); vk::change_image_layout(*m_current_command_buffer, result.src_image, old_src_layout); vk::change_image_layout(*m_current_command_buffer, result.dst_image, old_dst_layout); diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index bf284271c6..6bb7395ef9 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -110,7 +110,7 @@ namespace vk void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, VkImageSubresourceRange range); void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout); void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 width, u32 height, u32 mipmaps, VkImageAspectFlagBits aspect); - void copy_scaled_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 src_x_offset, u32 src_y_offset, u32 src_width, u32 src_height, u32 dst_x_offset, u32 dst_y_offset, u32 dst_width, u32 dst_height, u32 mipmaps, VkImageAspectFlagBits aspect, bool compatible_formats); + void copy_scaled_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 src_x_offset, u32 src_y_offset, u32 src_width, u32 src_height, u32 dst_x_offset, u32 dst_y_offset, u32 dst_width, u32 dst_height, u32 mipmaps, VkImageAspectFlags aspect, bool compatible_formats); std::pair get_compatible_surface_format(rsx::surface_color_format color_format); size_t get_render_pass_location(VkFormat color_surface_format, VkFormat depth_stencil_format, u8 color_surface_count); diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.h b/rpcs3/Emu/RSX/VK/VKOverlays.h index 3f0773d4ae..d576b40ebd 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.h +++ b/rpcs3/Emu/RSX/VK/VKOverlays.h @@ -126,7 +126,7 @@ namespace vk check_heap(); const auto size = count * sizeof(f32); - m_vao_offset = m_vao.alloc<16>(size); + m_vao_offset = (u32)m_vao.alloc<16>(size); auto dst = m_vao.map(m_vao_offset, size); std::memcpy(dst, data, size); m_vao.unmap(); @@ -631,7 +631,7 @@ namespace vk void update_uniforms(vk::glsl::program* /*program*/) override { - m_ubo_offset = m_ubo.alloc<256>(128); + m_ubo_offset = (u32)m_ubo.alloc<256>(128); auto dst = (f32*)m_ubo.map(m_ubo_offset, 128); dst[0] = m_scale_offset.r; dst[1] = m_scale_offset.g; @@ -713,12 +713,18 @@ namespace vk struct depth_scaling_pass : public overlay_pass { + areaf src_area; + areaf dst_area; + u16 src_width; + u16 src_height; + depth_scaling_pass() { vs_src = { "#version 450\n" "#extension GL_ARB_separate_shader_objects : enable\n" + "layout(std140, set=0, binding=1) uniform static_data{ vec4 regs[8]; };\n" "layout(location=0) out vec2 tc0;\n" "\n" "void main()\n" @@ -726,7 +732,7 @@ namespace vk " vec2 positions[] = {vec2(-1., -1.), vec2(1., -1.), vec2(-1., 1.), vec2(1., 1.)};\n" " vec2 coords[] = {vec2(0., 0.), vec2(1., 0.), vec2(0., 1.), vec2(1., 1.)};\n" " gl_Position = vec4(positions[gl_VertexIndex % 4], 0., 1.);\n" - " tc0 = coords[gl_VertexIndex % 4];\n" + " tc0 = coords[gl_VertexIndex % 4] * regs[0].xy + regs[0].zw;\n" "}\n" }; @@ -749,6 +755,42 @@ namespace vk m_vertex_shader.id = 100006; m_fragment_shader.id = 100007; } + + void update_uniforms(vk::glsl::program* /*program*/) override + { + m_ubo_offset = (u32)m_ubo.alloc<256>(128); + auto dst = (f32*)m_ubo.map(m_ubo_offset, 128); + dst[0] = f32(src_area.x2 - src_area.x1) / src_width; + dst[1] = f32(src_area.y2 - src_area.y1) / src_height; + dst[2] = src_area.x1 / f32(src_area.x2 - src_area.x1); + dst[3] = src_area.y1 / f32(src_area.y2 - src_area.y1); + m_ubo.unmap(); + } + + void set_up_viewport(vk::command_buffer &cmd, u16 max_w, u16 max_h) override + { + VkRect2D region = { { s32(dst_area.x1), s32(dst_area.y1) },{ u32(dst_area.x2 - dst_area.x1), u32(dst_area.y2 - dst_area.y1) } }; + vkCmdSetScissor(cmd, 0, 1, ®ion); + + VkViewport vp{}; + vp.x = dst_area.x1; + vp.y = dst_area.y1; + vp.width = f32(region.extent.width); + vp.height = f32(region.extent.height); + vp.minDepth = 0.f; + vp.maxDepth = 1.f; + vkCmdSetViewport(cmd, 0, 1, &vp); + } + + void run(vk::command_buffer &cmd, const areaf& src_rect, const areaf& dst_rect, vk::image* target, vk::image* src, vk::image_view* src_view, VkRenderPass render_pass, std::list>& framebuffer_resources) + { + src_area = src_rect; + dst_area = dst_rect; + src_width = src->width(); + src_height = src->height(); + + overlay_pass::run(cmd, target->width(), target->height(), target, src_view, render_pass, framebuffer_resources); + } }; struct attachment_clear_pass : public overlay_pass @@ -806,7 +848,7 @@ namespace vk void update_uniforms(vk::glsl::program* /*program*/) override { - m_ubo_offset = m_ubo.alloc<256>(128); + m_ubo_offset = (u32)m_ubo.alloc<256>(128); auto dst = (f32*)m_ubo.map(m_ubo_offset, 128); dst[0] = clear_color.r; dst[1] = clear_color.g; diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index a695ed96e4..bc8895a9f6 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -100,7 +100,7 @@ namespace vk VkImageLayout srcLayout, VkImageLayout dstLayout, u32 src_x_offset, u32 src_y_offset, u32 src_width, u32 src_height, u32 dst_x_offset, u32 dst_y_offset, u32 dst_width, u32 dst_height, - u32 mipmaps, VkImageAspectFlagBits aspect, bool compatible_formats) + u32 mipmaps, VkImageAspectFlags aspect, bool compatible_formats) { VkImageSubresourceLayers a_src = {}, a_dst = {}; a_src.aspectMask = aspect; diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index dc8b102eca..4d3b7401e2 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -586,6 +586,52 @@ namespace rsx return result; } + /** + * Calculates the regions used for memory transfer between rendertargets on succession events + */ + template + std::tuple get_transferable_region(SurfaceType* surface) + { + const u16 src_w = surface->old_contents->width(); + const u16 src_h = surface->old_contents->height(); + u16 dst_w = src_w; + u16 dst_h = src_h; + + switch (surface->old_contents->aa_mode) + { + case rsx::surface_antialiasing::center_1_sample: + break; + case rsx::surface_antialiasing::diagonal_centered_2_samples: + dst_w *= 2; + break; + case rsx::surface_antialiasing::square_centered_4_samples: + case rsx::surface_antialiasing::square_rotated_4_samples: + dst_w *= 2; + dst_h *= 2; + break; + } + + switch (surface->aa_mode) + { + case rsx::surface_antialiasing::center_1_sample: + break; + case rsx::surface_antialiasing::diagonal_centered_2_samples: + dst_w /= 2; + break; + case rsx::surface_antialiasing::square_centered_4_samples: + case rsx::surface_antialiasing::square_rotated_4_samples: + dst_w /= 2; + dst_h /= 2; + break; + } + + const f32 scale_x = (f32)dst_w / src_w; + const f32 scale_y = (f32)dst_h / src_h; + + std::tie(std::ignore, std::ignore, dst_w, dst_h) = clip_region(dst_w, dst_h, 0, 0, surface->width(), surface->height(), true); + return std::make_tuple(u16(dst_w / scale_x), u16(dst_h / scale_y), dst_w, dst_h); + } + /** * Remove restart index and emulate using degenerate triangles * Can be used as a workaround when restart_index doesnt work too well