From 0b2f9f0f17641dcfe552e0d865a50d6e0cef3697 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 14 Oct 2019 01:24:04 +0300 Subject: [PATCH] rsx: Add support for delayed shader discard. - Noticed a glitch on AMD hw and windows drivers where discard seems to affect entire 4x4 cells. - Dead fragments (outside the primitive boundary) could have their discards trigger as they do not have proper access to variables. - This introduces dead fragments along triangle edges, causing a diagonal line pattern across the screen that is very annoying. --- .../RSX/Common/FragmentProgramDecompiler.cpp | 2 +- rpcs3/Emu/RSX/Common/GLSLCommon.h | 52 ++++++++++++++----- rpcs3/Emu/RSX/Common/GLSLTypes.h | 2 + rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp | 31 +++++------ rpcs3/Emu/RSX/GL/GLFragmentProgram.h | 10 +++- rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp | 31 +++++------ rpcs3/Emu/RSX/VK/VKFragmentProgram.h | 5 +- 7 files changed, 84 insertions(+), 49 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp index a49f515762..1e8777e267 100644 --- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp @@ -1184,7 +1184,7 @@ std::string FragmentProgramDecompiler::Decompile() case RSX_FP_OPCODE_NOP: break; case RSX_FP_OPCODE_KIL: properties.has_discard_op = true; - AddFlowOp("discard"); + AddFlowOp("_kill()"); break; default: diff --git a/rpcs3/Emu/RSX/Common/GLSLCommon.h b/rpcs3/Emu/RSX/Common/GLSLCommon.h index 0e50f097d3..5aeb2e0370 100644 --- a/rpcs3/Emu/RSX/Common/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h @@ -429,26 +429,39 @@ namespace glsl "}\n\n"; } - static void insert_rop(std::ostream& OS, bool _32_bit_exports, bool native_half_support, bool emulate_coverage_tests) + static void insert_rop(std::ostream& OS, const shader_properties& props) { - const std::string reg0 = _32_bit_exports ? "r0" : "h0"; - const std::string reg1 = _32_bit_exports ? "r2" : "h4"; - const std::string reg2 = _32_bit_exports ? "r3" : "h6"; - const std::string reg3 = _32_bit_exports ? "r4" : "h8"; + const std::string reg0 = props.fp32_outputs ? "r0" : "h0"; + const std::string reg1 = props.fp32_outputs ? "r2" : "h4"; + const std::string reg2 = props.fp32_outputs ? "r3" : "h6"; + const std::string reg3 = props.fp32_outputs ? "r4" : "h8"; //TODO: Implement all ROP options like CSAA and ALPHA_TO_ONE here + if (props.disable_early_discard) + { + OS << + " if (_fragment_discard)\n" + " {\n" + " discard;\n" + " }\n" + " else if ((rop_control & 0xFF) != 0)\n"; + } + else + { + OS << " if ((rop_control & 0xFF) != 0)\n"; + } + OS << - " if ((rop_control & 0xFF) != 0)\n" " {\n" " bool alpha_test = (rop_control & 0x1) > 0;\n" " uint alpha_func = ((rop_control >> 16) & 0x7);\n"; - if (!_32_bit_exports) + if (!props.fp32_outputs) { OS << " bool srgb_convert = (rop_control & 0x2) > 0;\n\n"; } - if (emulate_coverage_tests) + if (props.emulate_coverage_tests) { OS << " bool a2c_enabled = (rop_control & 0x10) > 0;\n"; } @@ -459,7 +472,7 @@ namespace glsl " discard;\n" " }\n"; - if (emulate_coverage_tests) + if (props.emulate_coverage_tests) { OS << " else if (a2c_enabled && !coverage_test_passes(" << reg0 << ", rop_control >> 5))\n" @@ -468,10 +481,10 @@ namespace glsl " }\n"; } - if (!_32_bit_exports) + if (!props.fp32_outputs) { // Tested using NPUB90375; some shaders (32-bit output only?) do not obey srgb flags - if (native_half_support) + if (props.supports_native_fp16) { OS << " else if (srgb_convert)\n" @@ -510,6 +523,21 @@ namespace glsl OS << "#define _saturate(x) clamp(x, 0., 1.)\n"; OS << "#define _rand(seed) fract(sin(dot(seed.xy, vec2(12.9898f, 78.233f))) * 43758.5453f)\n\n"; + if (props.domain == glsl::program_domain::glsl_fragment_program) + { + OS << "// Workaround for broken early discard in some drivers\n"; + + if (props.disable_early_discard) + { + OS << "bool _fragment_discard = false;\n"; + OS << "#define _kill() _fragment_discard = true\n\n"; + } + else + { + OS << "#define _kill() discard\n\n"; + } + } + if (props.require_lit_emulation) { OS << @@ -684,7 +712,7 @@ namespace glsl " // Alphakill\n" " if (rgba.a < 0.000001)\n" " {\n" - " discard;\n" + " _kill();\n" " return rgba;\n" " }\n" " }\n" diff --git a/rpcs3/Emu/RSX/Common/GLSLTypes.h b/rpcs3/Emu/RSX/Common/GLSLTypes.h index 61e8ba69d3..227331e6da 100644 --- a/rpcs3/Emu/RSX/Common/GLSLTypes.h +++ b/rpcs3/Emu/RSX/Common/GLSLTypes.h @@ -30,5 +30,7 @@ namespace glsl bool emulate_coverage_tests; bool emulate_shadow_compare; bool low_precision_tests; + bool disable_early_discard; + bool supports_native_fp16; }; }; diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index f1eafcb795..620ae9b00b 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -197,19 +197,20 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS) void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) { - glsl::shader_properties properties2; - properties2.domain = glsl::glsl_fragment_program; - properties2.require_lit_emulation = properties.has_lit_op; - properties2.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS); - properties2.require_depth_conversion = m_prog.redirected_textures != 0; - properties2.require_wpos = !!(properties.in_register_mask & in_wpos); - properties2.require_texture_ops = properties.has_tex_op; - properties2.require_shadow_ops = m_prog.shadow_textures != 0; - properties2.emulate_coverage_tests = true; // g_cfg.video.antialiasing_level == msaa_level::none; - properties2.emulate_shadow_compare = device_props.emulate_depth_compare; - properties2.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA; + m_shader_props.domain = glsl::glsl_fragment_program; + m_shader_props.require_lit_emulation = properties.has_lit_op; + m_shader_props.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS); + m_shader_props.require_depth_conversion = m_prog.redirected_textures != 0; + m_shader_props.require_wpos = !!(properties.in_register_mask & in_wpos); + m_shader_props.require_texture_ops = properties.has_tex_op; + m_shader_props.require_shadow_ops = m_prog.shadow_textures != 0; + m_shader_props.emulate_coverage_tests = true; // g_cfg.video.antialiasing_level == msaa_level::none; + m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare; + m_shader_props.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA; + m_shader_props.disable_early_discard = !::gl::get_driver_caps().vendor_NVIDIA; + m_shader_props.supports_native_fp16 = device_props.has_native_half_support; - glsl::insert_glsl_legacy_function(OS, properties2); + glsl::insert_glsl_legacy_function(OS, m_shader_props); } void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS) @@ -307,11 +308,7 @@ void GLFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) OS << "\n" << " fs_main();\n\n"; - glsl::insert_rop( - OS, - !!(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS), - device_props.has_native_half_support, - g_cfg.video.antialiasing_level == msaa_level::none); + glsl::insert_rop(OS, m_shader_props); if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) { diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.h b/rpcs3/Emu/RSX/GL/GLFragmentProgram.h index 19b980e8b4..5e9f2acb2f 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.h +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.h @@ -1,11 +1,19 @@ -#pragma once +#pragma once #include "../Common/FragmentProgramDecompiler.h" +#include "../Common/GLSLTypes.h" #include "Emu/RSX/RSXFragmentProgram.h" +namespace glsl +{ + struct shader_properties; +} + struct GLFragmentDecompilerThread : public FragmentProgramDecompiler { std::string& m_shader; ParamArray& m_parrDummy; + glsl::shader_properties m_shader_props{}; + public: GLFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size) : FragmentProgramDecompiler(prog, size) diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index a3413ce61f..6fb41f9815 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -228,19 +228,20 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) { - glsl::shader_properties properties2; - properties2.domain = glsl::glsl_fragment_program; - properties2.require_lit_emulation = properties.has_lit_op; - properties2.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS); - properties2.require_depth_conversion = m_prog.redirected_textures != 0; - properties2.require_wpos = !!(properties.in_register_mask & in_wpos); - properties2.require_texture_ops = properties.has_tex_op; - properties2.require_shadow_ops = m_prog.shadow_textures != 0; - properties2.emulate_coverage_tests = g_cfg.video.antialiasing_level == msaa_level::none; - properties2.emulate_shadow_compare = device_props.emulate_depth_compare; - properties2.low_precision_tests = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA; + m_shader_props.domain = glsl::glsl_fragment_program; + m_shader_props.require_lit_emulation = properties.has_lit_op; + m_shader_props.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS); + m_shader_props.require_depth_conversion = m_prog.redirected_textures != 0; + m_shader_props.require_wpos = !!(properties.in_register_mask & in_wpos); + m_shader_props.require_texture_ops = properties.has_tex_op; + m_shader_props.require_shadow_ops = m_prog.shadow_textures != 0; + m_shader_props.emulate_coverage_tests = g_cfg.video.antialiasing_level == msaa_level::none; + m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare; + m_shader_props.low_precision_tests = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA; + m_shader_props.disable_early_discard = vk::get_driver_vendor() != vk::driver_vendor::NVIDIA; + m_shader_props.supports_native_fp16 = device_props.has_native_half_support; - glsl::insert_glsl_legacy_function(OS, properties2); + glsl::insert_glsl_legacy_function(OS, m_shader_props); } void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS) @@ -338,11 +339,7 @@ void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) OS << "\n" << " fs_main();\n\n"; - glsl::insert_rop( - OS, - !!(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS), - device_props.has_native_half_support, - g_cfg.video.antialiasing_level == msaa_level::none); + glsl::insert_rop(OS, m_shader_props); if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) { diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h index a84c479e0d..d658f4acd9 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h @@ -1,5 +1,6 @@ -#pragma once +#pragma once #include "../Common/FragmentProgramDecompiler.h" +#include "../Common/GLSLTypes.h" #include "Emu/RSX/RSXFragmentProgram.h" #include "VulkanAPI.h" #include "VKHelpers.h" @@ -10,6 +11,8 @@ struct VKFragmentDecompilerThread : public FragmentProgramDecompiler ParamArray& m_parrDummy; std::vector inputs; class VKFragmentProgram *vk_prog; + glsl::shader_properties m_shader_props{}; + public: VKFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size, class VKFragmentProgram& dst) : FragmentProgramDecompiler(prog, size)