rsx: Add support for delayed shader discard.

- Noticed a glitch on AMD hw and windows drivers where discard seems to affect entire 4x4 cells.
- Dead fragments (outside the primitive boundary) could have their discards trigger as they do not have proper access to variables.
- This introduces dead fragments along triangle edges, causing a diagonal line pattern across the screen that is very annoying.
This commit is contained in:
kd-11 2019-10-14 01:24:04 +03:00 committed by kd-11
parent 901942f24a
commit 0b2f9f0f17
7 changed files with 84 additions and 49 deletions

View file

@ -1184,7 +1184,7 @@ std::string FragmentProgramDecompiler::Decompile()
case RSX_FP_OPCODE_NOP: break;
case RSX_FP_OPCODE_KIL:
properties.has_discard_op = true;
AddFlowOp("discard");
AddFlowOp("_kill()");
break;
default:

View file

@ -429,26 +429,39 @@ namespace glsl
"}\n\n";
}
static void insert_rop(std::ostream& OS, bool _32_bit_exports, bool native_half_support, bool emulate_coverage_tests)
static void insert_rop(std::ostream& OS, const shader_properties& props)
{
const std::string reg0 = _32_bit_exports ? "r0" : "h0";
const std::string reg1 = _32_bit_exports ? "r2" : "h4";
const std::string reg2 = _32_bit_exports ? "r3" : "h6";
const std::string reg3 = _32_bit_exports ? "r4" : "h8";
const std::string reg0 = props.fp32_outputs ? "r0" : "h0";
const std::string reg1 = props.fp32_outputs ? "r2" : "h4";
const std::string reg2 = props.fp32_outputs ? "r3" : "h6";
const std::string reg3 = props.fp32_outputs ? "r4" : "h8";
//TODO: Implement all ROP options like CSAA and ALPHA_TO_ONE here
if (props.disable_early_discard)
{
OS <<
" if (_fragment_discard)\n"
" {\n"
" discard;\n"
" }\n"
" else if ((rop_control & 0xFF) != 0)\n";
}
else
{
OS << " if ((rop_control & 0xFF) != 0)\n";
}
OS <<
" if ((rop_control & 0xFF) != 0)\n"
" {\n"
" bool alpha_test = (rop_control & 0x1) > 0;\n"
" uint alpha_func = ((rop_control >> 16) & 0x7);\n";
if (!_32_bit_exports)
if (!props.fp32_outputs)
{
OS << " bool srgb_convert = (rop_control & 0x2) > 0;\n\n";
}
if (emulate_coverage_tests)
if (props.emulate_coverage_tests)
{
OS << " bool a2c_enabled = (rop_control & 0x10) > 0;\n";
}
@ -459,7 +472,7 @@ namespace glsl
" discard;\n"
" }\n";
if (emulate_coverage_tests)
if (props.emulate_coverage_tests)
{
OS <<
" else if (a2c_enabled && !coverage_test_passes(" << reg0 << ", rop_control >> 5))\n"
@ -468,10 +481,10 @@ namespace glsl
" }\n";
}
if (!_32_bit_exports)
if (!props.fp32_outputs)
{
// Tested using NPUB90375; some shaders (32-bit output only?) do not obey srgb flags
if (native_half_support)
if (props.supports_native_fp16)
{
OS <<
" else if (srgb_convert)\n"
@ -510,6 +523,21 @@ namespace glsl
OS << "#define _saturate(x) clamp(x, 0., 1.)\n";
OS << "#define _rand(seed) fract(sin(dot(seed.xy, vec2(12.9898f, 78.233f))) * 43758.5453f)\n\n";
if (props.domain == glsl::program_domain::glsl_fragment_program)
{
OS << "// Workaround for broken early discard in some drivers\n";
if (props.disable_early_discard)
{
OS << "bool _fragment_discard = false;\n";
OS << "#define _kill() _fragment_discard = true\n\n";
}
else
{
OS << "#define _kill() discard\n\n";
}
}
if (props.require_lit_emulation)
{
OS <<
@ -684,7 +712,7 @@ namespace glsl
" // Alphakill\n"
" if (rgba.a < 0.000001)\n"
" {\n"
" discard;\n"
" _kill();\n"
" return rgba;\n"
" }\n"
" }\n"

View file

@ -30,5 +30,7 @@ namespace glsl
bool emulate_coverage_tests;
bool emulate_shadow_compare;
bool low_precision_tests;
bool disable_early_discard;
bool supports_native_fp16;
};
};

View file

@ -197,19 +197,20 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
{
glsl::shader_properties properties2;
properties2.domain = glsl::glsl_fragment_program;
properties2.require_lit_emulation = properties.has_lit_op;
properties2.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS);
properties2.require_depth_conversion = m_prog.redirected_textures != 0;
properties2.require_wpos = !!(properties.in_register_mask & in_wpos);
properties2.require_texture_ops = properties.has_tex_op;
properties2.require_shadow_ops = m_prog.shadow_textures != 0;
properties2.emulate_coverage_tests = true; // g_cfg.video.antialiasing_level == msaa_level::none;
properties2.emulate_shadow_compare = device_props.emulate_depth_compare;
properties2.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA;
m_shader_props.domain = glsl::glsl_fragment_program;
m_shader_props.require_lit_emulation = properties.has_lit_op;
m_shader_props.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS);
m_shader_props.require_depth_conversion = m_prog.redirected_textures != 0;
m_shader_props.require_wpos = !!(properties.in_register_mask & in_wpos);
m_shader_props.require_texture_ops = properties.has_tex_op;
m_shader_props.require_shadow_ops = m_prog.shadow_textures != 0;
m_shader_props.emulate_coverage_tests = true; // g_cfg.video.antialiasing_level == msaa_level::none;
m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare;
m_shader_props.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA;
m_shader_props.disable_early_discard = !::gl::get_driver_caps().vendor_NVIDIA;
m_shader_props.supports_native_fp16 = device_props.has_native_half_support;
glsl::insert_glsl_legacy_function(OS, properties2);
glsl::insert_glsl_legacy_function(OS, m_shader_props);
}
void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
@ -307,11 +308,7 @@ void GLFragmentDecompilerThread::insertMainEnd(std::stringstream & OS)
OS << "\n" << " fs_main();\n\n";
glsl::insert_rop(
OS,
!!(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS),
device_props.has_native_half_support,
g_cfg.video.antialiasing_level == msaa_level::none);
glsl::insert_rop(OS, m_shader_props);
if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)
{

View file

@ -1,11 +1,19 @@
#pragma once
#pragma once
#include "../Common/FragmentProgramDecompiler.h"
#include "../Common/GLSLTypes.h"
#include "Emu/RSX/RSXFragmentProgram.h"
namespace glsl
{
struct shader_properties;
}
struct GLFragmentDecompilerThread : public FragmentProgramDecompiler
{
std::string& m_shader;
ParamArray& m_parrDummy;
glsl::shader_properties m_shader_props{};
public:
GLFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size)
: FragmentProgramDecompiler(prog, size)

View file

@ -228,19 +228,20 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
{
glsl::shader_properties properties2;
properties2.domain = glsl::glsl_fragment_program;
properties2.require_lit_emulation = properties.has_lit_op;
properties2.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS);
properties2.require_depth_conversion = m_prog.redirected_textures != 0;
properties2.require_wpos = !!(properties.in_register_mask & in_wpos);
properties2.require_texture_ops = properties.has_tex_op;
properties2.require_shadow_ops = m_prog.shadow_textures != 0;
properties2.emulate_coverage_tests = g_cfg.video.antialiasing_level == msaa_level::none;
properties2.emulate_shadow_compare = device_props.emulate_depth_compare;
properties2.low_precision_tests = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA;
m_shader_props.domain = glsl::glsl_fragment_program;
m_shader_props.require_lit_emulation = properties.has_lit_op;
m_shader_props.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS);
m_shader_props.require_depth_conversion = m_prog.redirected_textures != 0;
m_shader_props.require_wpos = !!(properties.in_register_mask & in_wpos);
m_shader_props.require_texture_ops = properties.has_tex_op;
m_shader_props.require_shadow_ops = m_prog.shadow_textures != 0;
m_shader_props.emulate_coverage_tests = g_cfg.video.antialiasing_level == msaa_level::none;
m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare;
m_shader_props.low_precision_tests = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA;
m_shader_props.disable_early_discard = vk::get_driver_vendor() != vk::driver_vendor::NVIDIA;
m_shader_props.supports_native_fp16 = device_props.has_native_half_support;
glsl::insert_glsl_legacy_function(OS, properties2);
glsl::insert_glsl_legacy_function(OS, m_shader_props);
}
void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
@ -338,11 +339,7 @@ void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS)
OS << "\n" << " fs_main();\n\n";
glsl::insert_rop(
OS,
!!(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS),
device_props.has_native_half_support,
g_cfg.video.antialiasing_level == msaa_level::none);
glsl::insert_rop(OS, m_shader_props);
if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)
{

View file

@ -1,5 +1,6 @@
#pragma once
#pragma once
#include "../Common/FragmentProgramDecompiler.h"
#include "../Common/GLSLTypes.h"
#include "Emu/RSX/RSXFragmentProgram.h"
#include "VulkanAPI.h"
#include "VKHelpers.h"
@ -10,6 +11,8 @@ struct VKFragmentDecompilerThread : public FragmentProgramDecompiler
ParamArray& m_parrDummy;
std::vector<vk::glsl::program_input> inputs;
class VKFragmentProgram *vk_prog;
glsl::shader_properties m_shader_props{};
public:
VKFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size, class VKFragmentProgram& dst)
: FragmentProgramDecompiler(prog, size)