From 1a0d98f98401c8791385d543ee563aeb1b3ab738 Mon Sep 17 00:00:00 2001
From: lui <lui@vub63vv26q6v27xzv2dtcd25xumubshogm67yrpaz2rculqxs7jlfqad.onion>
Date: Thu, 3 Oct 2024 13:25:58 +0000
Subject: [PATCH] renderer: add area sampling scaling method (#57)

Adds Area Sampling to the list of scaling options. Works well to achieve a high-quality, smooth super-sampling effect. Dolphin has had this for a while, and now Ryujinx has recently added it too, so I decided to port it.

Not sure if adding the extra uniform to the OpenGL WindowAdaptPass was a good idea or not, or if using the push constants under Vulkan was either, but I wasn't sure about the best way to get the window size for use in the shader, and other scaling methods still work fine. Implementation seems to work fine under both Vulkan and OpenGL, but might still need some minor tweaks to the shader. Should definitely do some testing before merging, I have tested on an Nvidia RTX 3080 under Windows.

Adapted from these two PRs:
https://github.com/Ryujinx/Ryujinx/pull/7304
https://github.com/dolphin-emu/dolphin/pull/11999

Reviewed-on: http://vub63vv26q6v27xzv2dtcd25xumubshogm67yrpaz2rculqxs7jlfqad.onion/torzu-emu/torzu/pulls/57
Co-authored-by: lui <lui@vub63vv26q6v27xzv2dtcd25xumubshogm67yrpaz2rculqxs7jlfqad.onion>
Co-committed-by: lui <lui@vub63vv26q6v27xzv2dtcd25xumubshogm67yrpaz2rculqxs7jlfqad.onion>
---
 src/common/settings_enums.h                   |   2 +-
 src/video_core/host_shaders/CMakeLists.txt    |   1 +
 src/video_core/host_shaders/present_area.frag | 107 ++++++++++++++++++
 .../renderer_opengl/gl_blit_screen.cpp        |   3 +
 .../renderer_opengl/present/filters.cpp       |   6 +
 .../renderer_opengl/present/filters.h         |   1 +
 .../present/present_uniforms.h                |   1 +
 .../present/window_adapt_pass.cpp             |   3 +
 .../renderer_vulkan/present/filters.cpp       |   6 +
 .../renderer_vulkan/present/filters.h         |   1 +
 .../renderer_vulkan/vk_blit_screen.cpp        |   3 +
 src/yuzu/configuration/shared_translation.cpp |   1 +
 src/yuzu/configuration/shared_translation.h   |   1 +
 13 files changed, 135 insertions(+), 1 deletion(-)
 create mode 100644 src/video_core/host_shaders/present_area.frag

diff --git a/src/common/settings_enums.h b/src/common/settings_enums.h
index a636154413..71675fbcc4 100644
--- a/src/common/settings_enums.h
+++ b/src/common/settings_enums.h
@@ -145,7 +145,7 @@ ENUM(NvdecEmulation, Off, Cpu, Gpu);
 ENUM(ResolutionSetup, Res1_2X, Res3_4X, Res1X, Res3_2X, Res2X, Res3X, Res4X, Res5X, Res6X, Res7X,
      Res8X);
 
-ENUM(ScalingFilter, NearestNeighbor, Bilinear, Bicubic, Gaussian, ScaleForce, Fsr, MaxEnum);
+ENUM(ScalingFilter, NearestNeighbor, Bilinear, Bicubic, Gaussian, ScaleForce, Fsr, Area, MaxEnum);
 
 ENUM(AntiAliasing, None, Fxaa, Smaa, MaxEnum);
 
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 969f21d509..ee6cf5f4dc 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -41,6 +41,7 @@ set(SHADER_FILES
     opengl_present_scaleforce.frag
     opengl_smaa.glsl
     pitch_unswizzle.comp
+    present_area.frag
     present_bicubic.frag
     present_gaussian.frag
     queries_prefix_scan_sum.comp
diff --git a/src/video_core/host_shaders/present_area.frag b/src/video_core/host_shaders/present_area.frag
new file mode 100644
index 0000000000..41f5bc52fc
--- /dev/null
+++ b/src/video_core/host_shaders/present_area.frag
@@ -0,0 +1,107 @@
+#version 460 core
+
+layout(location = 0) in vec2 frag_tex_coord;
+layout(location = 0) out vec4 color;
+layout(binding = 0) uniform sampler2D color_texture;
+
+#ifdef VULKAN
+
+struct ScreenRectVertex {
+    vec2 position;
+    vec2 tex_coord;
+};
+layout (push_constant) uniform PushConstants {
+    mat4 modelview_matrix;
+    ScreenRectVertex vertices[4];
+};
+
+#else // OpenGL
+
+layout(location = 1) uniform uvec2 screen_size;
+
+#endif
+
+/***** Area Sampling *****/
+
+// By Sam Belliveau and Filippo Tarpini. Public Domain license.
+// Effectively a more accurate sharp bilinear filter when upscaling,
+// that also works as a mathematically perfect downscale filter.
+// https://entropymine.com/imageworsener/pixelmixing/
+// https://github.com/obsproject/obs-studio/pull/1715
+// https://legacy.imagemagick.org/Usage/filter/
+vec4 AreaSampling(sampler2D textureSampler, vec2 texCoords, vec2 source_size, vec2 target_size) {
+    // Determine the sizes of the source and target images.
+    vec2 inverted_target_size = vec2(1.0) / target_size;
+
+    // Determine the range of the source image that the target pixel will cover.
+    vec2 range = source_size * inverted_target_size;
+    vec2 beg = (texCoords.xy * source_size) - (range * 0.5);
+    vec2 end = beg + range;
+
+    // Compute the top-left and bottom-right corners of the pixel box.
+    ivec2 f_beg = ivec2(floor(beg));
+    ivec2 f_end = ivec2(floor(end));
+
+    // Compute how much of the start and end pixels are covered horizontally & vertically.
+    float area_w = 1.0 - fract(beg.x);
+    float area_n = 1.0 - fract(beg.y);
+    float area_e = fract(end.x);
+    float area_s = fract(end.y);
+
+    // Compute the areas of the corner pixels in the pixel box.
+    float area_nw = area_n * area_w;
+    float area_ne = area_n * area_e;
+    float area_sw = area_s * area_w;
+    float area_se = area_s * area_e;
+
+    // Initialize the color accumulator.
+    vec4 avg_color = vec4(0.0, 0.0, 0.0, 0.0);
+
+    // Accumulate corner pixels.
+    avg_color += area_nw * texelFetch(textureSampler, ivec2(f_beg.x, f_beg.y), 0);
+    avg_color += area_ne * texelFetch(textureSampler, ivec2(f_end.x, f_beg.y), 0);
+    avg_color += area_sw * texelFetch(textureSampler, ivec2(f_beg.x, f_end.y), 0);
+    avg_color += area_se * texelFetch(textureSampler, ivec2(f_end.x, f_end.y), 0);
+
+    // Determine the size of the pixel box.
+    int x_range = int(f_end.x - f_beg.x - 0.5);
+    int y_range = int(f_end.y - f_beg.y - 0.5);
+
+    // Accumulate top and bottom edge pixels.
+    for (int x = f_beg.x + 1; x <= f_beg.x + x_range; ++x) {
+        avg_color += area_n * texelFetch(textureSampler, ivec2(x, f_beg.y), 0);
+        avg_color += area_s * texelFetch(textureSampler, ivec2(x, f_end.y), 0);
+    }
+
+    // Accumulate left and right edge pixels and all the pixels in between.
+    for (int y = f_beg.y + 1; y <= f_beg.y + y_range; ++y) {
+        avg_color += area_w * texelFetch(textureSampler, ivec2(f_beg.x, y), 0);
+        avg_color += area_e * texelFetch(textureSampler, ivec2(f_end.x, y), 0);
+
+        for (int x = f_beg.x + 1; x <= f_beg.x + x_range; ++x) {
+            avg_color += texelFetch(textureSampler, ivec2(x, y), 0);
+        }
+    }
+
+    // Compute the area of the pixel box that was sampled.
+    float area_corners = area_nw + area_ne + area_sw + area_se;
+    float area_edges = float(x_range) * (area_n + area_s) + float(y_range) * (area_w + area_e);
+    float area_center = float(x_range) * float(y_range);
+
+    // Return the normalized average color.
+    return avg_color / (area_corners + area_edges + area_center);
+}
+
+void main() {
+    vec2 source_image_size = textureSize(color_texture, 0);
+    vec2 window_size;
+
+    #ifdef VULKAN
+    window_size.x = vertices[1].position.x - vertices[0].position.x;
+    window_size.y = vertices[2].position.y - vertices[0].position.y;
+    #else // OpenGL
+    window_size = screen_size;
+    #endif
+
+    color = AreaSampling(color_texture, frag_tex_coord, source_image_size, window_size);
+}
diff --git a/src/video_core/renderer_opengl/gl_blit_screen.cpp b/src/video_core/renderer_opengl/gl_blit_screen.cpp
index 9260a4dc43..16e94bed41 100644
--- a/src/video_core/renderer_opengl/gl_blit_screen.cpp
+++ b/src/video_core/renderer_opengl/gl_blit_screen.cpp
@@ -86,6 +86,9 @@ void BlitScreen::CreateWindowAdapt() {
     case Settings::ScalingFilter::ScaleForce:
         window_adapt = MakeScaleForce(device);
         break;
+    case Settings::ScalingFilter::Area:
+        window_adapt = MakeArea(device);
+        break;
     case Settings::ScalingFilter::Fsr:
     case Settings::ScalingFilter::Bilinear:
     default:
diff --git a/src/video_core/renderer_opengl/present/filters.cpp b/src/video_core/renderer_opengl/present/filters.cpp
index 819e5d77f4..231045d3b1 100644
--- a/src/video_core/renderer_opengl/present/filters.cpp
+++ b/src/video_core/renderer_opengl/present/filters.cpp
@@ -3,6 +3,7 @@
 
 #include "video_core/host_shaders/opengl_present_frag.h"
 #include "video_core/host_shaders/opengl_present_scaleforce_frag.h"
+#include "video_core/host_shaders/present_area_frag.h"
 #include "video_core/host_shaders/present_bicubic_frag.h"
 #include "video_core/host_shaders/present_gaussian_frag.h"
 #include "video_core/renderer_opengl/present/filters.h"
@@ -36,4 +37,9 @@ std::unique_ptr<WindowAdaptPass> MakeScaleForce(const Device& device) {
         fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG));
 }
 
+std::unique_ptr<WindowAdaptPass> MakeArea(const Device& device) {
+    return std::make_unique<WindowAdaptPass>(device, CreateBilinearSampler(),
+                                             HostShaders::PRESENT_AREA_FRAG);
+}
+
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/present/filters.h b/src/video_core/renderer_opengl/present/filters.h
index 122ab74365..6e7bfaf350 100644
--- a/src/video_core/renderer_opengl/present/filters.h
+++ b/src/video_core/renderer_opengl/present/filters.h
@@ -13,5 +13,6 @@ std::unique_ptr<WindowAdaptPass> MakeBilinear(const Device& device);
 std::unique_ptr<WindowAdaptPass> MakeBicubic(const Device& device);
 std::unique_ptr<WindowAdaptPass> MakeGaussian(const Device& device);
 std::unique_ptr<WindowAdaptPass> MakeScaleForce(const Device& device);
+std::unique_ptr<WindowAdaptPass> MakeArea(const Device& device);
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/present/present_uniforms.h b/src/video_core/renderer_opengl/present/present_uniforms.h
index 3a19f05c72..d6ff0b60aa 100644
--- a/src/video_core/renderer_opengl/present/present_uniforms.h
+++ b/src/video_core/renderer_opengl/present/present_uniforms.h
@@ -10,6 +10,7 @@ namespace OpenGL {
 constexpr GLint PositionLocation = 0;
 constexpr GLint TexCoordLocation = 1;
 constexpr GLint ModelViewMatrixLocation = 0;
+constexpr GLint ScreenSizeLocation = 1;
 
 struct ScreenRectVertex {
     constexpr ScreenRectVertex() = default;
diff --git a/src/video_core/renderer_opengl/present/window_adapt_pass.cpp b/src/video_core/renderer_opengl/present/window_adapt_pass.cpp
index d8b6a11cb2..186eedf7ca 100644
--- a/src/video_core/renderer_opengl/present/window_adapt_pass.cpp
+++ b/src/video_core/renderer_opengl/present/window_adapt_pass.cpp
@@ -110,6 +110,9 @@ void WindowAdaptPass::DrawToFramebuffer(ProgramManager& program_manager, std::li
         glBindTextureUnit(0, textures[i]);
         glProgramUniformMatrix3x2fv(vert.handle, ModelViewMatrixLocation, 1, GL_FALSE,
                                     matrices[i].data());
+        glProgramUniform2ui(frag.handle, ScreenSizeLocation,
+                            static_cast<GLuint>(layout.screen.GetWidth()),
+                            static_cast<GLuint>(layout.screen.GetHeight()));
         glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices[i]), std::data(vertices[i]));
         glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
     }
diff --git a/src/video_core/renderer_vulkan/present/filters.cpp b/src/video_core/renderer_vulkan/present/filters.cpp
index b5e08938e7..afa981b0dd 100644
--- a/src/video_core/renderer_vulkan/present/filters.cpp
+++ b/src/video_core/renderer_vulkan/present/filters.cpp
@@ -3,6 +3,7 @@
 
 #include "common/common_types.h"
 
+#include "video_core/host_shaders/present_area_frag_spv.h"
 #include "video_core/host_shaders/present_bicubic_frag_spv.h"
 #include "video_core/host_shaders/present_gaussian_frag_spv.h"
 #include "video_core/host_shaders/vulkan_present_frag_spv.h"
@@ -53,4 +54,9 @@ std::unique_ptr<WindowAdaptPass> MakeScaleForce(const Device& device, VkFormat f
                                              SelectScaleForceShader(device));
 }
 
+std::unique_ptr<WindowAdaptPass> MakeArea(const Device& device, VkFormat frame_format) {
+    return std::make_unique<WindowAdaptPass>(device, frame_format, CreateBilinearSampler(device),
+                                             BuildShader(device, PRESENT_AREA_FRAG_SPV));
+}
+
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/present/filters.h b/src/video_core/renderer_vulkan/present/filters.h
index 6c83726dd4..7ff577acff 100644
--- a/src/video_core/renderer_vulkan/present/filters.h
+++ b/src/video_core/renderer_vulkan/present/filters.h
@@ -14,5 +14,6 @@ std::unique_ptr<WindowAdaptPass> MakeBilinear(const Device& device, VkFormat fra
 std::unique_ptr<WindowAdaptPass> MakeBicubic(const Device& device, VkFormat frame_format);
 std::unique_ptr<WindowAdaptPass> MakeGaussian(const Device& device, VkFormat frame_format);
 std::unique_ptr<WindowAdaptPass> MakeScaleForce(const Device& device, VkFormat frame_format);
+std::unique_ptr<WindowAdaptPass> MakeArea(const Device& device, VkFormat frame_format);
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index b7797f8337..314236db36 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -43,6 +43,9 @@ void BlitScreen::SetWindowAdaptPass() {
     case Settings::ScalingFilter::ScaleForce:
         window_adapt = MakeScaleForce(device, swapchain_view_format);
         break;
+    case Settings::ScalingFilter::Area:
+        window_adapt = MakeArea(device, swapchain_view_format);
+        break;
     case Settings::ScalingFilter::Fsr:
     case Settings::ScalingFilter::Bilinear:
     default:
diff --git a/src/yuzu/configuration/shared_translation.cpp b/src/yuzu/configuration/shared_translation.cpp
index 6c08c5a57d..9315e8a678 100644
--- a/src/yuzu/configuration/shared_translation.cpp
+++ b/src/yuzu/configuration/shared_translation.cpp
@@ -411,6 +411,7 @@ std::unique_ptr<ComboboxTranslationMap> ComboboxEnumeration(QWidget* parent) {
                               PAIR(ScalingFilter, Gaussian, tr("Gaussian")),
                               PAIR(ScalingFilter, ScaleForce, tr("ScaleForce")),
                               PAIR(ScalingFilter, Fsr, tr("AMD FidelityFX™️ Super Resolution")),
+                              PAIR(ScalingFilter, Area, tr("Area")),
                           }});
     translations->insert({Settings::EnumMetadata<Settings::AntiAliasing>::Index(),
                           {
diff --git a/src/yuzu/configuration/shared_translation.h b/src/yuzu/configuration/shared_translation.h
index d5fc3b8def..31b9d42557 100644
--- a/src/yuzu/configuration/shared_translation.h
+++ b/src/yuzu/configuration/shared_translation.h
@@ -40,6 +40,7 @@ static const std::map<Settings::ScalingFilter, QString> scaling_filter_texts_map
     {Settings::ScalingFilter::ScaleForce,
      QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "ScaleForce"))},
     {Settings::ScalingFilter::Fsr, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "FSR"))},
+    {Settings::ScalingFilter::Area, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Area"))},
 };
 
 static const std::map<Settings::ConsoleMode, QString> use_docked_mode_texts_map = {