diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 86506870cd..eb04bf7b45 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -248,6 +248,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk const auto color_locations = get_locations(); const auto aa_mode = rsx::method_registers.surface_antialias(); const auto bpp = get_format_block_size_in_bytes(surface_format); + const u32 aa_factor = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2; for (int i = 0; i < rsx::limits::color_buffers_count; ++i) { @@ -354,7 +355,6 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk { if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue; - const u32 aa_factor = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2; const u32 range = m_surface_info[i].pitch * m_surface_info[i].height * aa_factor; m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), m_surface_info[i].address, range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch, color_format.format, color_format.type, color_format.swap_bytes); @@ -370,7 +370,6 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk u32 pitch = m_depth_surface_info.width * 2; if (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) pitch *= 2; - const u32 aa_factor = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2; const u32 range = pitch * m_depth_surface_info.height * aa_factor; m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range, m_depth_surface_info.width, m_depth_surface_info.height, pitch, depth_format_gl.format, depth_format_gl.type, true); diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 6685091fc1..d85e4b2aa0 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -442,12 +442,10 @@ namespace gl } else { - //TODO: Use compression hint from the gcm tile information - //TODO: Fall back to bilinear filtering if samples > 2 - const u8 pixel_size = get_pixel_size(format, type); - const u8 samples = rsx_pitch / real_pitch; - rsx::scale_image_nearest(dst, const_cast(data), width, height, rsx_pitch, real_pitch, pixel_size, samples); + const u8 samples_u = (aa_mode == rsx::surface_antialiasing::center_1_sample) ? 1 : 2; + const u8 samples_v = (aa_mode == rsx::surface_antialiasing::square_centered_4_samples || aa_mode == rsx::surface_antialiasing::square_rotated_4_samples) ? 2 : 1; + rsx::scale_image_nearest(dst, const_cast(data), width, height, rsx_pitch, real_pitch, pixel_size, samples_u, samples_v); } /* switch (gcm_format) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 8fbc25df7f..1033b0c458 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2481,6 +2481,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) const auto fbo_height = rsx::apply_resolution_scale(clip_height, true); const auto aa_mode = rsx::method_registers.surface_antialias(); const auto bpp = get_format_block_size_in_bytes(color_fmt); + const u32 aa_factor = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2; if (m_draw_fbo) { @@ -2588,8 +2589,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) for (u8 index : draw_buffers) { if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue; - const u32 range = m_surface_info[index].pitch * m_surface_info[index].height; + const u32 range = m_surface_info[index].pitch * m_surface_info[index].height * aa_factor; m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range, m_surface_info[index].width, m_surface_info[index].height, m_surface_info[index].pitch, color_fmt_info.first, color_fmt_info.second); } @@ -2608,7 +2609,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) pitch *= 2; } - const u32 range = pitch * m_depth_surface_info.height; + const u32 range = pitch * m_depth_surface_info.height * aa_factor; m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, gcm_format, true); } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 6e1d41ebb6..59848d8651 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -308,9 +308,20 @@ namespace vk { //Scale image to fit //usually we can just get away with nearest filtering - const u8 samples = rsx_pitch / real_pitch; + u8 samples_u = 1, samples_v = 1; + switch (static_cast(vram_texture)->aa_mode) + { + case rsx::surface_antialiasing::diagonal_centered_2_samples: + samples_u = 2; + break; + case rsx::surface_antialiasing::square_centered_4_samples: + case rsx::surface_antialiasing::square_rotated_4_samples: + samples_u = 2; + samples_v = 2; + break; + } - rsx::scale_image_nearest(pixels_dst, pixels_src, width, height, rsx_pitch, real_pitch, bpp, samples, pack_unpack_swap_bytes); + rsx::scale_image_nearest(pixels_dst, pixels_src, width, height, rsx_pitch, real_pitch, bpp, samples_u, samples_v, pack_unpack_swap_bytes); } dma_buffer->unmap(); diff --git a/rpcs3/Emu/RSX/rsx_utils.cpp b/rpcs3/Emu/RSX/rsx_utils.cpp index 393e31fffc..f992d4dba4 100644 --- a/rpcs3/Emu/RSX/rsx_utils.cpp +++ b/rpcs3/Emu/RSX/rsx_utils.cpp @@ -140,18 +140,19 @@ namespace rsx * N - Sample count */ template - void scale_image_fallback_impl(T* dst, const U* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples) + void scale_image_fallback_impl(T* dst, const U* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v) { u32 dst_offset = 0; u32 src_offset = 0; - u32 padding = (dst_pitch - (src_pitch * samples)) / sizeof(T); + u32 padding = (dst_pitch - (src_pitch * samples_u)) / sizeof(T); for (u16 h = 0; h < src_height; ++h) { + const auto row_start = dst_offset; for (u16 w = 0; w < src_width; ++w) { - for (u8 n = 0; n < samples; ++n) + for (u8 n = 0; n < samples_u; ++n) { dst[dst_offset++] = src[src_offset]; } @@ -160,51 +161,57 @@ namespace rsx } dst_offset += padding; + + for (int n = 1; n < samples_v; ++n) + { + memcpy(&dst[dst_offset], &dst[row_start], dst_pitch); + dst_offset += dst_pitch; + } } } - void scale_image_fallback(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples) + void scale_image_fallback(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v) { switch (pixel_size) { case 1: - scale_image_fallback_impl((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + scale_image_fallback_impl((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); break; case 2: - scale_image_fallback_impl((u16*)dst, (const u16*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + scale_image_fallback_impl((u16*)dst, (const u16*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); break; case 4: - scale_image_fallback_impl((u32*)dst, (const u32*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + scale_image_fallback_impl((u32*)dst, (const u32*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); break; case 8: - scale_image_fallback_impl((u64*)dst, (const u64*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + scale_image_fallback_impl((u64*)dst, (const u64*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); break; case 16: - scale_image_fallback_impl((u128*)dst, (const u128*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + scale_image_fallback_impl((u128*)dst, (const u128*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); break; default: fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size); } } - void scale_image_fallback_with_byte_swap(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples) + void scale_image_fallback_with_byte_swap(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v) { switch (pixel_size) { case 1: - scale_image_fallback_impl((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + scale_image_fallback_impl((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); break; case 2: - scale_image_fallback_impl>((u16*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + scale_image_fallback_impl>((u16*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); break; case 4: - scale_image_fallback_impl>((u32*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + scale_image_fallback_impl>((u32*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); break; case 8: - scale_image_fallback_impl>((u64*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + scale_image_fallback_impl>((u64*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); break; case 16: - scale_image_fallback_impl>((u128*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + scale_image_fallback_impl>((u128*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); break; default: fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size); @@ -279,61 +286,79 @@ namespace rsx } } - void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples, bool swap_bytes) + void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v, bool swap_bytes) { //Scale this image by repeating pixel data n times //n = expected_pitch / real_pitch //Use of fixed argument templates for performance reasons const u16 dst_width = dst_pitch / pixel_size; - const u16 padding = dst_width - (src_width * samples); + const u16 padding = dst_width - (src_width * samples_u); if (!swap_bytes) { - switch (samples) + if (samples_v == 1) { - case 2: - scale_image_fast<2>(dst, src, pixel_size, src_width, src_height, padding); - break; - case 3: - scale_image_fast<3>(dst, src, pixel_size, src_width, src_height, padding); - break; - case 4: - scale_image_fast<4>(dst, src, pixel_size, src_width, src_height, padding); - break; - case 8: - scale_image_fast<8>(dst, src, pixel_size, src_width, src_height, padding); - break; - case 16: - scale_image_fast<16>(dst, src, pixel_size, src_width, src_height, padding); - break; - default: - LOG_ERROR(RSX, "Unsupported RTT scaling factor: dst_pitch=%d src_pitch=%d", dst_pitch, src_pitch); - scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + switch (samples_u) + { + case 1: + scale_image_fast<1>(dst, src, pixel_size, src_width, src_height, padding); + break; + case 2: + scale_image_fast<2>(dst, src, pixel_size, src_width, src_height, padding); + break; + case 3: + scale_image_fast<3>(dst, src, pixel_size, src_width, src_height, padding); + break; + case 4: + scale_image_fast<4>(dst, src, pixel_size, src_width, src_height, padding); + break; + case 8: + scale_image_fast<8>(dst, src, pixel_size, src_width, src_height, padding); + break; + case 16: + scale_image_fast<16>(dst, src, pixel_size, src_width, src_height, padding); + break; + default: + scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, 1); + } + } + else + { + scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); } } else { - switch (samples) + if (samples_v == 1) { - case 2: - scale_image_fast_with_byte_swap<2>(dst, src, pixel_size, src_width, src_height, padding); - break; - case 3: - scale_image_fast_with_byte_swap<3>(dst, src, pixel_size, src_width, src_height, padding); - break; - case 4: - scale_image_fast_with_byte_swap<4>(dst, src, pixel_size, src_width, src_height, padding); - break; - case 8: - scale_image_fast_with_byte_swap<8>(dst, src, pixel_size, src_width, src_height, padding); - break; - case 16: - scale_image_fast_with_byte_swap<16>(dst, src, pixel_size, src_width, src_height, padding); - break; - default: - LOG_ERROR(RSX, "Unsupported RTT scaling factor: dst_pitch=%d src_pitch=%d", dst_pitch, src_pitch); - scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + switch (samples_u) + { + case 1: + scale_image_fast_with_byte_swap<1>(dst, src, pixel_size, src_width, src_height, padding); + break; + case 2: + scale_image_fast_with_byte_swap<2>(dst, src, pixel_size, src_width, src_height, padding); + break; + case 3: + scale_image_fast_with_byte_swap<3>(dst, src, pixel_size, src_width, src_height, padding); + break; + case 4: + scale_image_fast_with_byte_swap<4>(dst, src, pixel_size, src_width, src_height, padding); + break; + case 8: + scale_image_fast_with_byte_swap<8>(dst, src, pixel_size, src_width, src_height, padding); + break; + case 16: + scale_image_fast_with_byte_swap<16>(dst, src, pixel_size, src_width, src_height, padding); + break; + default: + scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, 1); + } + } + else + { + scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); } } } diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index ccaeb5fa86..408a803801 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -143,7 +143,7 @@ namespace rsx } } - void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples, bool swap_bytes = false); + void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v, bool swap_bytes = false); void convert_scale_image(u8 *dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch, const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear);