From e04855a0da053a458ffe5f1499a8fc0efbf0b59b Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 16 Nov 2022 17:37:49 +0300 Subject: [PATCH] rsx: Improve ROP output handling - Perform 8-bit quantization/rounding before emulated operations like ALPHA_TEST --- rpcs3/Emu/RSX/Program/GLSLCommon.cpp | 106 +++++++++++++++++---------- rpcs3/Emu/RSX/Program/GLSLCommon.h | 40 ++++++++++ rpcs3/Emu/RSX/RSXThread.cpp | 46 +++++++----- rpcs3/Emu/RSX/RSXThread.h | 11 --- 4 files changed, 134 insertions(+), 69 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp index 5fedf97364..7218d66bb7 100644 --- a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp +++ b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp @@ -404,7 +404,7 @@ namespace glsl void insert_rop_init(std::ostream& OS) { OS << - " if (_test_bit(rop_control, 9))\n" + " if (_test_bit(rop_control, POLYGON_STIPPLE_ENABLE_BIT))\n" " {\n" " // Convert x,y to linear address\n" " const uvec2 stipple_coord = uvec2(gl_FragCoord.xy) % uvec2(32, 32);\n" @@ -435,30 +435,31 @@ namespace glsl " {\n" " discard;\n" " }\n" - " else if (_get_bits(rop_control, 0, 8) != 0)\n"; + " else if ((rop_control & ROP_CMD_MASK) != 0)\n"; } else { - OS << " if (_get_bits(rop_control, 0, 8) != 0)\n"; + OS << " if ((rop_control & ROP_CMD_MASK) != 0)\n"; } OS << " {\n" - " const bool alpha_test = _test_bit(rop_control, 0);\n" - " const uint alpha_func = _get_bits(rop_control, 16, 3);\n"; + " const bool alpha_test = _test_bit(rop_control, ALPHA_TEST_ENABLE_BIT);\n" + " const uint alpha_func = _get_bits(rop_control, ALPHA_TEST_FUNC_OFFSET, ALPHA_TEST_FUNC_LENGTH);\n"; if (!props.fp32_outputs) { - OS << " const bool srgb_convert = _test_bit(rop_control, 1);\n\n"; + OS << " const bool srgb_convert = _test_bit(rop_control, SRGB_FRAMEBUFFER_BIT);\n\n"; } if (props.emulate_coverage_tests) { - OS << " const bool a2c_enabled = _test_bit(rop_control, 4);\n"; + OS << " const bool a2c_enabled = _test_bit(rop_control, ALPHA_TO_COVERAGE_ENABLE_BIT);\n"; + OS << " const bool msaa_write_enabled = _test_bit(rop_control, MSAA_WRITE_ENABLE_BIT);\n"; } OS << - " if (alpha_test && !comparison_passes(" << reg0 << ".a, alpha_ref, alpha_func))\n" + " if (alpha_test && !comparison_passes(ROP_quantize(" << reg0 << ").a, alpha_ref, alpha_func))\n" " {\n" " discard;\n" " }\n"; @@ -466,7 +467,7 @@ namespace glsl if (props.emulate_coverage_tests) { OS << - " else if (a2c_enabled && !coverage_test_passes(" << reg0 << ", rop_control >> 5))\n" + " else if (a2c_enabled && (!msaa_write_enabled || !coverage_test_passes(" << reg0 << ")))\n" " {\n" " discard;\n" " }\n"; @@ -480,10 +481,10 @@ namespace glsl OS << " else if (srgb_convert)\n" " {\n" - " " << reg0 << " = round_to_8bit(f16vec4(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a));\n" - " " << reg1 << " = round_to_8bit(f16vec4(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a));\n" - " " << reg2 << " = round_to_8bit(f16vec4(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a));\n" - " " << reg3 << " = round_to_8bit(f16vec4(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a));\n" + " " << reg0 << " = round_srgb8(f16vec4(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a));\n" + " " << reg1 << " = round_srgb8(f16vec4(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a));\n" + " " << reg2 << " = round_srgb8(f16vec4(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a));\n" + " " << reg3 << " = round_srgb8(f16vec4(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a));\n" " }\n"; } else @@ -491,10 +492,10 @@ namespace glsl OS << " else if (srgb_convert)\n" " {\n" - " " << reg0 << " = round_to_8bit(vec4(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a));\n" - " " << reg1 << " = round_to_8bit(vec4(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a));\n" - " " << reg2 << " = round_to_8bit(vec4(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a));\n" - " " << reg3 << " = round_to_8bit(vec4(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a));\n" + " " << reg0 << " = round_srgb8(vec4(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a));\n" + " " << reg1 << " = round_srgb8(vec4(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a));\n" + " " << reg2 << " = round_srgb8(vec4(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a));\n" + " " << reg3 << " = round_srgb8(vec4(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a));\n" " }\n"; } } @@ -528,8 +529,37 @@ namespace glsl if (props.domain == glsl::program_domain::glsl_fragment_program) { - OS << "// Workaround for broken early discard in some drivers\n"; + OS << "// ROP control\n"; + OS << "#define ALPHA_TEST_ENABLE_BIT " << rsx::ROP_control_bits::ALPHA_TEST_ENABLE_BIT << "\n"; + OS << "#define SRGB_FRAMEBUFFER_BIT " << rsx::ROP_control_bits::SRGB_FRAMEBUFFER_BIT << "\n"; + OS << "#define ALPHA_TO_COVERAGE_ENABLE_BIT " << rsx::ROP_control_bits::ALPHA_TO_COVERAGE_ENABLE_BIT << "\n"; + OS << "#define MSAA_WRITE_ENABLE_BIT " << rsx::ROP_control_bits::MSAA_WRITE_ENABLE_BIT << "\n"; + OS << "#define INT_FRAMEBUFFER_BIT " << rsx::ROP_control_bits::INT_FRAMEBUFFER_BIT << "\n"; + OS << "#define POLYGON_STIPPLE_ENABLE_BIT " << rsx::ROP_control_bits::POLYGON_STIPPLE_ENABLE_BIT << "\n"; + OS << "#define ALPHA_TEST_FUNC_OFFSET " << rsx::ROP_control_bits::ALPHA_FUNC_OFFSET << "\n"; + OS << "#define ALPHA_TEST_FUNC_LENGTH " << rsx::ROP_control_bits::ALPHA_FUNC_NUM_BITS << "\n"; + OS << "#define MSAA_SAMPLE_CTRL_OFFSET " << rsx::ROP_control_bits::MSAA_SAMPLE_CTRL_OFFSET << "\n"; + OS << "#define MSAA_SAMPLE_CTRL_LENGTH " << rsx::ROP_control_bits::MSAA_SAMPLE_CTRL_NUM_BITS << "\n"; + OS << "#define ROP_CMD_MASK " << rsx::ROP_control_bits::ROP_CMD_MASK << "\n\n"; + // 8-bit rounding/quantization + { + const auto _255 = (props.supports_native_fp16) ? "f16vec4(255.)" : "vec4(255.)"; + const auto _1_over_2 = (props.supports_native_fp16) ? "f16vec4(0.5)" : "vec4(0.5)"; + OS << "#define round_to_8bit(v4) (floor(fma(v4, " << _255 << ", " << _1_over_2 << ")) / " << _255 << ")\n"; + } + + if (!props.fp32_outputs && props.srgb_output_rounding) + { + OS << "#define round_srgb8 round_to_8bit\n\n"; + } + else + { + // We can get the 8-bit rounding for free on non-NVIDIA hardware + OS << "#define round_srgb8(v4) (v4)\n\n"; + } + + OS << "// Workaround for broken early discard in some drivers\n"; if (props.disable_early_discard) { OS << "bool _fragment_discard = false;\n"; @@ -540,21 +570,6 @@ namespace glsl OS << "#define _kill() discard\n\n"; } - if (!props.fp32_outputs) - { - OS << "// Workaround broken output rounding behavior\n"; - if (props.srgb_output_rounding) - { - const auto _255 = (props.supports_native_fp16) ? "f16vec4(255.)" : "vec4(255.)"; - const auto _1_over_2 = (props.supports_native_fp16) ? "f16vec4(0.5)" : "vec4(0.5)"; - OS << "#define round_to_8bit(v4) (floor(fma(v4, " << _255 << ", " << _1_over_2 << ")) / " << _255 << ")\n\n"; - } - else - { - OS << "#define round_to_8bit(v4) (v4)\n\n"; - } - } - if (props.require_texture_ops) { // Declare special texture control flags @@ -567,17 +582,32 @@ namespace glsl OS << "#define EXPAND_B_MASK (1 << " << rsx::texture_control_bits::EXPAND_B << ")\n"; OS << "#define EXPAND_A_MASK (1 << " << rsx::texture_control_bits::EXPAND_A << ")\n\n"; - OS << "#define ALPHAKILL " << rsx::texture_control_bits::ALPHAKILL << "\n"; - OS << "#define RENORMALIZE " << rsx::texture_control_bits::RENORMALIZE << "\n"; + OS << "#define ALPHAKILL " << rsx::texture_control_bits::ALPHAKILL << "\n"; + OS << "#define RENORMALIZE " << rsx::texture_control_bits::RENORMALIZE << "\n"; OS << "#define DEPTH_FLOAT " << rsx::texture_control_bits::DEPTH_FLOAT << "\n"; OS << "#define DEPTH_COMPARE " << rsx::texture_control_bits::DEPTH_COMPARE_OP << "\n"; OS << "#define FILTERED_MAG_BIT " << rsx::texture_control_bits::FILTERED_MAG << "\n"; OS << "#define FILTERED_MIN_BIT " << rsx::texture_control_bits::FILTERED_MIN << "\n"; - OS << "#define INT_COORDS_BIT " << rsx::texture_control_bits::UNNORMALIZED_COORDS << "\n"; + OS << "#define INT_COORDS_BIT " << rsx::texture_control_bits::UNNORMALIZED_COORDS << "\n"; OS << "#define GAMMA_CTRL_MASK (GAMMA_R_MASK|GAMMA_G_MASK|GAMMA_B_MASK|GAMMA_A_MASK)\n"; OS << "#define SIGN_EXPAND_MASK (EXPAND_R_MASK|EXPAND_G_MASK|EXPAND_B_MASK|EXPAND_A_MASK)\n"; OS << "#define FILTERED_MASK (FILTERED_MAG_BIT|FILTERED_MIN_BIT)\n\n"; } + + OS << fmt::replace_all( + "$Ty ROP_quantize(const in $Ty v)\n" + "{\n" + " if (!_test_bit(rop_control, INT_FRAMEBUFFER_BIT))\n" + " {\n" + " return v;\n" + " }\n" + "\n" + " return round_to_8bit(v);\n" + "}\n", + { + { "$Ty"sv, (props.fp32_outputs || !props.supports_native_fp16) ? "vec4" : "f16vec4"} + } + ); } if (props.require_lit_emulation) @@ -667,10 +697,8 @@ namespace glsl { // Purely stochastic OS << - "bool coverage_test_passes(const in vec4 _sample, const in uint control)\n" + "bool coverage_test_passes(const in vec4 _sample)\n" "{\n" - " if (!_test_bit(control, 0)) return false;\n" - "\n" " float random = _rand(gl_FragCoord);\n" " return (_sample.a > random);\n" "}\n\n"; diff --git a/rpcs3/Emu/RSX/Program/GLSLCommon.h b/rpcs3/Emu/RSX/Program/GLSLCommon.h index 13813ea4ac..7c6ff28e5a 100644 --- a/rpcs3/Emu/RSX/Program/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Program/GLSLCommon.h @@ -31,6 +31,46 @@ namespace rsx EXPAND_MASK = (1 << EXPAND_R) | (1 << EXPAND_G) | (1 << EXPAND_B) | (1 << EXPAND_A), EXPAND_OFFSET = EXPAND_A }; + + enum ROP_control_bits : u32 + { + // Commands. These trigger explicit action. + ALPHA_TEST_ENABLE_BIT = 0, + SRGB_FRAMEBUFFER_BIT = 1, + ALPHA_TO_COVERAGE_ENABLE_BIT = 2, + POLYGON_STIPPLE_ENABLE_BIT = 3, + + // Auxilliary config + INT_FRAMEBUFFER_BIT = 16, + MSAA_WRITE_ENABLE_BIT = 17, + + // Data + ALPHA_FUNC_OFFSET = 18, + MSAA_SAMPLE_CTRL_OFFSET = 21, + + // Data lengths + ALPHA_FUNC_NUM_BITS = 3, + MSAA_SAMPLE_CTRL_NUM_BITS = 2, + + // Meta + ROP_CMD_MASK = 0xF // Commands are encoded in the lower 16 bits + }; + + struct ROP_control_t + { + u32 value = 0; + + void enable_alpha_test() { value |= (1u << ROP_control_bits::ALPHA_TEST_ENABLE_BIT); } + void enable_framebuffer_sRGB() { value |= (1u << ROP_control_bits::SRGB_FRAMEBUFFER_BIT); } + void enable_alpha_to_coverage() { value |= (1u << ROP_control_bits::ALPHA_TO_COVERAGE_ENABLE_BIT); } + void enable_polygon_stipple() { value |= (1u << ROP_control_bits::POLYGON_STIPPLE_ENABLE_BIT); } + + void enable_framebuffer_INT() { value |= (1u << ROP_control_bits::INT_FRAMEBUFFER_BIT); } + void enable_MSAA_writes() { value |= (1u << ROP_control_bits::MSAA_WRITE_ENABLE_BIT); } + + void set_alpha_test_func(uint func) { value |= (func << ROP_control_bits::ALPHA_FUNC_OFFSET); } + void set_msaa_control(uint ctrl) { value |= (ctrl << ROP_control_bits::MSAA_SAMPLE_CTRL_OFFSET); } + }; } namespace program_common diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index ee17dcb429..b19c4cbddf 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1050,18 +1050,18 @@ namespace rsx void thread::fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& /*fragment_program*/) { - u32 rop_control = 0u; + ROP_control_t rop_control{}; if (rsx::method_registers.alpha_test_enabled()) { const u32 alpha_func = static_cast(rsx::method_registers.alpha_func()); - rop_control |= (alpha_func << 16); - rop_control |= ROP_control::alpha_test_enable; + rop_control.set_alpha_test_func(alpha_func); + rop_control.enable_alpha_test(); } if (rsx::method_registers.polygon_stipple_enabled()) { - rop_control |= ROP_control::polygon_stipple_enable; + rop_control.enable_polygon_stipple(); } if (rsx::method_registers.msaa_alpha_to_coverage_enabled() && !backend_config.supports_hw_a2c) @@ -1070,8 +1070,11 @@ namespace rsx // Alpha values generate a coverage mask for order independent blending // Requires hardware AA to work properly (or just fragment sample stage in fragment shaders) // Simulated using combined alpha blend and alpha test - if (rsx::method_registers.msaa_sample_mask()) rop_control |= ROP_control::msaa_mask_enable; - rop_control |= ROP_control::csaa_enable; + rop_control.enable_alpha_to_coverage(); + if (rsx::method_registers.msaa_sample_mask()) + { + rop_control.enable_MSAA_writes(); + } // Sample configuration bits switch (rsx::method_registers.surface_antialias()) @@ -1079,10 +1082,10 @@ namespace rsx case rsx::surface_antialiasing::center_1_sample: break; case rsx::surface_antialiasing::diagonal_centered_2_samples: - rop_control |= 1u << 6; + rop_control.set_msaa_control(1u); break; default: - rop_control |= 3u << 6; + rop_control.set_msaa_control(3u); break; } } @@ -1091,19 +1094,24 @@ namespace rsx const f32 fog1 = rsx::method_registers.fog_params_1(); const u32 fog_mode = static_cast(rsx::method_registers.fog_equation()); - if (rsx::method_registers.framebuffer_srgb_enabled()) + // Check if framebuffer is actually an XRGB format and not a WZYX format + switch (rsx::method_registers.surface_color()) { - // Check if framebuffer is actually an XRGB format and not a WZYX format - switch (rsx::method_registers.surface_color()) + case rsx::surface_color_format::w16z16y16x16: + case rsx::surface_color_format::w32z32y32x32: + case rsx::surface_color_format::x32: + // These behave very differently from "normal" formats. + break; + default: + // Integer framebuffer formats. + rop_control.enable_framebuffer_INT(); + + // Check if we want sRGB conversion. + if (rsx::method_registers.framebuffer_srgb_enabled()) { - case rsx::surface_color_format::w16z16y16x16: - case rsx::surface_color_format::w32z32y32x32: - case rsx::surface_color_format::x32: - break; - default: - rop_control |= ROP_control::framebuffer_srgb_enable; - break; + rop_control.enable_framebuffer_sRGB(); } + break; } // Generate wpos coefficients @@ -1120,7 +1128,7 @@ namespace rsx const f32 alpha_ref = rsx::method_registers.alpha_ref(); u32 *dst = static_cast(buffer); - utils::stream_vector(dst, std::bit_cast(fog0), std::bit_cast(fog1), rop_control, std::bit_cast(alpha_ref)); + utils::stream_vector(dst, std::bit_cast(fog0), std::bit_cast(fog1), rop_control.value, std::bit_cast(alpha_ref)); utils::stream_vector(dst + 4, 0u, fog_mode, std::bit_cast(wpos_scale), std::bit_cast(wpos_bias)); } diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 7091e955a7..879738525a 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -198,17 +198,6 @@ namespace rsx result_zcull_intr = 2 }; - enum ROP_control : u32 - { - alpha_test_enable = (1u << 0), - framebuffer_srgb_enable = (1u << 1), - csaa_enable = (1u << 4), - msaa_mask_enable = (1u << 5), - msaa_config_mask = (3u << 6), - polygon_stipple_enable = (1u << 9), - alpha_func_mask = (7u << 16) - }; - u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size); u32 get_address(u32 offset, u32 location, u32 size_to_check = 0,