mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 11:36:13 +00:00
rsx: Add support for delayed shader discard.
- Noticed a glitch on AMD hw and windows drivers where discard seems to affect entire 4x4 cells. - Dead fragments (outside the primitive boundary) could have their discards trigger as they do not have proper access to variables. - This introduces dead fragments along triangle edges, causing a diagonal line pattern across the screen that is very annoying.
This commit is contained in:
parent
901942f24a
commit
0b2f9f0f17
7 changed files with 84 additions and 49 deletions
|
@ -1184,7 +1184,7 @@ std::string FragmentProgramDecompiler::Decompile()
|
|||
case RSX_FP_OPCODE_NOP: break;
|
||||
case RSX_FP_OPCODE_KIL:
|
||||
properties.has_discard_op = true;
|
||||
AddFlowOp("discard");
|
||||
AddFlowOp("_kill()");
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -429,26 +429,39 @@ namespace glsl
|
|||
"}\n\n";
|
||||
}
|
||||
|
||||
static void insert_rop(std::ostream& OS, bool _32_bit_exports, bool native_half_support, bool emulate_coverage_tests)
|
||||
static void insert_rop(std::ostream& OS, const shader_properties& props)
|
||||
{
|
||||
const std::string reg0 = _32_bit_exports ? "r0" : "h0";
|
||||
const std::string reg1 = _32_bit_exports ? "r2" : "h4";
|
||||
const std::string reg2 = _32_bit_exports ? "r3" : "h6";
|
||||
const std::string reg3 = _32_bit_exports ? "r4" : "h8";
|
||||
const std::string reg0 = props.fp32_outputs ? "r0" : "h0";
|
||||
const std::string reg1 = props.fp32_outputs ? "r2" : "h4";
|
||||
const std::string reg2 = props.fp32_outputs ? "r3" : "h6";
|
||||
const std::string reg3 = props.fp32_outputs ? "r4" : "h8";
|
||||
|
||||
//TODO: Implement all ROP options like CSAA and ALPHA_TO_ONE here
|
||||
if (props.disable_early_discard)
|
||||
{
|
||||
OS <<
|
||||
" if (_fragment_discard)\n"
|
||||
" {\n"
|
||||
" discard;\n"
|
||||
" }\n"
|
||||
" else if ((rop_control & 0xFF) != 0)\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
OS << " if ((rop_control & 0xFF) != 0)\n";
|
||||
}
|
||||
|
||||
OS <<
|
||||
" if ((rop_control & 0xFF) != 0)\n"
|
||||
" {\n"
|
||||
" bool alpha_test = (rop_control & 0x1) > 0;\n"
|
||||
" uint alpha_func = ((rop_control >> 16) & 0x7);\n";
|
||||
|
||||
if (!_32_bit_exports)
|
||||
if (!props.fp32_outputs)
|
||||
{
|
||||
OS << " bool srgb_convert = (rop_control & 0x2) > 0;\n\n";
|
||||
}
|
||||
|
||||
if (emulate_coverage_tests)
|
||||
if (props.emulate_coverage_tests)
|
||||
{
|
||||
OS << " bool a2c_enabled = (rop_control & 0x10) > 0;\n";
|
||||
}
|
||||
|
@ -459,7 +472,7 @@ namespace glsl
|
|||
" discard;\n"
|
||||
" }\n";
|
||||
|
||||
if (emulate_coverage_tests)
|
||||
if (props.emulate_coverage_tests)
|
||||
{
|
||||
OS <<
|
||||
" else if (a2c_enabled && !coverage_test_passes(" << reg0 << ", rop_control >> 5))\n"
|
||||
|
@ -468,10 +481,10 @@ namespace glsl
|
|||
" }\n";
|
||||
}
|
||||
|
||||
if (!_32_bit_exports)
|
||||
if (!props.fp32_outputs)
|
||||
{
|
||||
// Tested using NPUB90375; some shaders (32-bit output only?) do not obey srgb flags
|
||||
if (native_half_support)
|
||||
if (props.supports_native_fp16)
|
||||
{
|
||||
OS <<
|
||||
" else if (srgb_convert)\n"
|
||||
|
@ -510,6 +523,21 @@ namespace glsl
|
|||
OS << "#define _saturate(x) clamp(x, 0., 1.)\n";
|
||||
OS << "#define _rand(seed) fract(sin(dot(seed.xy, vec2(12.9898f, 78.233f))) * 43758.5453f)\n\n";
|
||||
|
||||
if (props.domain == glsl::program_domain::glsl_fragment_program)
|
||||
{
|
||||
OS << "// Workaround for broken early discard in some drivers\n";
|
||||
|
||||
if (props.disable_early_discard)
|
||||
{
|
||||
OS << "bool _fragment_discard = false;\n";
|
||||
OS << "#define _kill() _fragment_discard = true\n\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
OS << "#define _kill() discard\n\n";
|
||||
}
|
||||
}
|
||||
|
||||
if (props.require_lit_emulation)
|
||||
{
|
||||
OS <<
|
||||
|
@ -684,7 +712,7 @@ namespace glsl
|
|||
" // Alphakill\n"
|
||||
" if (rgba.a < 0.000001)\n"
|
||||
" {\n"
|
||||
" discard;\n"
|
||||
" _kill();\n"
|
||||
" return rgba;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
|
|
|
@ -30,5 +30,7 @@ namespace glsl
|
|||
bool emulate_coverage_tests;
|
||||
bool emulate_shadow_compare;
|
||||
bool low_precision_tests;
|
||||
bool disable_early_discard;
|
||||
bool supports_native_fp16;
|
||||
};
|
||||
};
|
||||
|
|
|
@ -197,19 +197,20 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
|
|||
|
||||
void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
|
||||
{
|
||||
glsl::shader_properties properties2;
|
||||
properties2.domain = glsl::glsl_fragment_program;
|
||||
properties2.require_lit_emulation = properties.has_lit_op;
|
||||
properties2.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS);
|
||||
properties2.require_depth_conversion = m_prog.redirected_textures != 0;
|
||||
properties2.require_wpos = !!(properties.in_register_mask & in_wpos);
|
||||
properties2.require_texture_ops = properties.has_tex_op;
|
||||
properties2.require_shadow_ops = m_prog.shadow_textures != 0;
|
||||
properties2.emulate_coverage_tests = true; // g_cfg.video.antialiasing_level == msaa_level::none;
|
||||
properties2.emulate_shadow_compare = device_props.emulate_depth_compare;
|
||||
properties2.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA;
|
||||
m_shader_props.domain = glsl::glsl_fragment_program;
|
||||
m_shader_props.require_lit_emulation = properties.has_lit_op;
|
||||
m_shader_props.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS);
|
||||
m_shader_props.require_depth_conversion = m_prog.redirected_textures != 0;
|
||||
m_shader_props.require_wpos = !!(properties.in_register_mask & in_wpos);
|
||||
m_shader_props.require_texture_ops = properties.has_tex_op;
|
||||
m_shader_props.require_shadow_ops = m_prog.shadow_textures != 0;
|
||||
m_shader_props.emulate_coverage_tests = true; // g_cfg.video.antialiasing_level == msaa_level::none;
|
||||
m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare;
|
||||
m_shader_props.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA;
|
||||
m_shader_props.disable_early_discard = !::gl::get_driver_caps().vendor_NVIDIA;
|
||||
m_shader_props.supports_native_fp16 = device_props.has_native_half_support;
|
||||
|
||||
glsl::insert_glsl_legacy_function(OS, properties2);
|
||||
glsl::insert_glsl_legacy_function(OS, m_shader_props);
|
||||
}
|
||||
|
||||
void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
|
||||
|
@ -307,11 +308,7 @@ void GLFragmentDecompilerThread::insertMainEnd(std::stringstream & OS)
|
|||
|
||||
OS << "\n" << " fs_main();\n\n";
|
||||
|
||||
glsl::insert_rop(
|
||||
OS,
|
||||
!!(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS),
|
||||
device_props.has_native_half_support,
|
||||
g_cfg.video.antialiasing_level == msaa_level::none);
|
||||
glsl::insert_rop(OS, m_shader_props);
|
||||
|
||||
if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)
|
||||
{
|
||||
|
|
|
@ -1,11 +1,19 @@
|
|||
#pragma once
|
||||
#pragma once
|
||||
#include "../Common/FragmentProgramDecompiler.h"
|
||||
#include "../Common/GLSLTypes.h"
|
||||
#include "Emu/RSX/RSXFragmentProgram.h"
|
||||
|
||||
namespace glsl
|
||||
{
|
||||
struct shader_properties;
|
||||
}
|
||||
|
||||
struct GLFragmentDecompilerThread : public FragmentProgramDecompiler
|
||||
{
|
||||
std::string& m_shader;
|
||||
ParamArray& m_parrDummy;
|
||||
glsl::shader_properties m_shader_props{};
|
||||
|
||||
public:
|
||||
GLFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size)
|
||||
: FragmentProgramDecompiler(prog, size)
|
||||
|
|
|
@ -228,19 +228,20 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
|
|||
|
||||
void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
|
||||
{
|
||||
glsl::shader_properties properties2;
|
||||
properties2.domain = glsl::glsl_fragment_program;
|
||||
properties2.require_lit_emulation = properties.has_lit_op;
|
||||
properties2.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS);
|
||||
properties2.require_depth_conversion = m_prog.redirected_textures != 0;
|
||||
properties2.require_wpos = !!(properties.in_register_mask & in_wpos);
|
||||
properties2.require_texture_ops = properties.has_tex_op;
|
||||
properties2.require_shadow_ops = m_prog.shadow_textures != 0;
|
||||
properties2.emulate_coverage_tests = g_cfg.video.antialiasing_level == msaa_level::none;
|
||||
properties2.emulate_shadow_compare = device_props.emulate_depth_compare;
|
||||
properties2.low_precision_tests = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA;
|
||||
m_shader_props.domain = glsl::glsl_fragment_program;
|
||||
m_shader_props.require_lit_emulation = properties.has_lit_op;
|
||||
m_shader_props.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS);
|
||||
m_shader_props.require_depth_conversion = m_prog.redirected_textures != 0;
|
||||
m_shader_props.require_wpos = !!(properties.in_register_mask & in_wpos);
|
||||
m_shader_props.require_texture_ops = properties.has_tex_op;
|
||||
m_shader_props.require_shadow_ops = m_prog.shadow_textures != 0;
|
||||
m_shader_props.emulate_coverage_tests = g_cfg.video.antialiasing_level == msaa_level::none;
|
||||
m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare;
|
||||
m_shader_props.low_precision_tests = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA;
|
||||
m_shader_props.disable_early_discard = vk::get_driver_vendor() != vk::driver_vendor::NVIDIA;
|
||||
m_shader_props.supports_native_fp16 = device_props.has_native_half_support;
|
||||
|
||||
glsl::insert_glsl_legacy_function(OS, properties2);
|
||||
glsl::insert_glsl_legacy_function(OS, m_shader_props);
|
||||
}
|
||||
|
||||
void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
|
||||
|
@ -338,11 +339,7 @@ void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS)
|
|||
|
||||
OS << "\n" << " fs_main();\n\n";
|
||||
|
||||
glsl::insert_rop(
|
||||
OS,
|
||||
!!(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS),
|
||||
device_props.has_native_half_support,
|
||||
g_cfg.video.antialiasing_level == msaa_level::none);
|
||||
glsl::insert_rop(OS, m_shader_props);
|
||||
|
||||
if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)
|
||||
{
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#pragma once
|
||||
#pragma once
|
||||
#include "../Common/FragmentProgramDecompiler.h"
|
||||
#include "../Common/GLSLTypes.h"
|
||||
#include "Emu/RSX/RSXFragmentProgram.h"
|
||||
#include "VulkanAPI.h"
|
||||
#include "VKHelpers.h"
|
||||
|
@ -10,6 +11,8 @@ struct VKFragmentDecompilerThread : public FragmentProgramDecompiler
|
|||
ParamArray& m_parrDummy;
|
||||
std::vector<vk::glsl::program_input> inputs;
|
||||
class VKFragmentProgram *vk_prog;
|
||||
glsl::shader_properties m_shader_props{};
|
||||
|
||||
public:
|
||||
VKFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size, class VKFragmentProgram& dst)
|
||||
: FragmentProgramDecompiler(prog, size)
|
||||
|
|
Loading…
Add table
Reference in a new issue