rsx: More pipeine refactoring

2025-04-20 11:36:13 +00:00 · 2024-01-20 02:09:51 +03:00 · 2024-01-20 02:09:51 +03:00 · 0971636c07
commit 0971636c07
parent 50fcec63e0
7 changed files with 307 additions and 91 deletions
--- a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp
+++ b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp
@ -14,7 +14,14 @@ namespace program_common
 	{
 		for (const auto& e : enums)
 		{
-			OS << "#define " << e.first << " " << static_cast<int>(e.second) << "\n";
+			if constexpr (std::is_enum_v<T> || std::is_integral_v<T>)
+			{
+				OS << "#define " << e.first << " " << static_cast<int>(e.second) << "\n";
+			}
+			else
+			{
+				OS << "#define " << e.first << " " << e.second << "\n";
+			}
 		}

 		OS << "\n";
@ -137,99 +144,25 @@ namespace glsl
 		;
 	}

+	void insert_blend_prologue(std::ostream& OS)
+	{
+		OS <<
+			#include "GLSLSnippets/RSXProg/RSXProgrammableBlendPrologue.glsl"
+			;
+	}
+
 	void insert_rop_init(std::ostream& OS)
 	{
 		OS <<
-		"	if (_test_bit(rop_control, POLYGON_STIPPLE_ENABLE_BIT))\n"
-		"	{\n"
-		"		// Convert x,y to linear address\n"
-		"		const uvec2 stipple_coord = uvec2(gl_FragCoord.xy) % uvec2(32, 32);\n"
-		"		const uint address = stipple_coord.y * 32u + stipple_coord.x;\n"
-		"		const uint bit_offset = (address & 31u);\n"
-		"		const uint word_index = _get_bits(address, 7, 3);\n"
-		"		const uint sub_index = _get_bits(address, 5, 2);\n\n"
-
-		"		if (!_test_bit(stipple_pattern[word_index][sub_index], int(bit_offset)))\n"
-		"		{\n"
-		"			_kill();\n"
-		"		}\n"
-		"	}\n\n";
+			#include "GLSLSnippets/RSXProg/RSXROPPrologue.glsl"
+			;
 	}

 	void insert_rop(std::ostream& OS, const shader_properties& props)
 	{
-		const std::string reg0 = props.fp32_outputs ? "r0" : "h0";
-		const std::string reg1 = props.fp32_outputs ? "r2" : "h4";
-		const std::string reg2 = props.fp32_outputs ? "r3" : "h6";
-		const std::string reg3 = props.fp32_outputs ? "r4" : "h8";
-
-		if (props.disable_early_discard)
-		{
-			OS <<
-			"	if (_fragment_discard)\n"
-			"	{\n"
-			"		discard;\n"
-			"	}\n\n";
-		}
-
-		// Pre-output stages
-		if (!props.fp32_outputs)
-		{
-			// Tested using NPUB90375; some shaders (32-bit output only?) do not obey srgb flags
-			const auto vtype = (props.fp32_outputs || !props.supports_native_fp16) ? "vec4" : "f16vec4";
-			OS <<
-			"	if (_test_bit(rop_control, SRGB_FRAMEBUFFER_BIT))\n"
-			"	{\n"
-			"		" << reg0 << " = " << vtype << "(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a);\n"
-			"		" << reg1 << " = " << vtype << "(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a);\n"
-			"		" << reg2 << " = " << vtype << "(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a);\n"
-			"		" << reg3 << " = " << vtype << "(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a);\n"
-			"	}\n\n";
-		}
-
-		// Output conversion
-		if (props.ROP_output_rounding)
-		{
-			OS <<
-			"	if (_test_bit(rop_control, INT_FRAMEBUFFER_BIT))\n"
-			"	{\n"
-			"		" << reg0 << " = round_to_8bit(" << reg0 << ");\n"
-			"		" << reg1 << " = round_to_8bit(" << reg1 << ");\n"
-			"		" << reg2 << " = round_to_8bit(" << reg2 << ");\n"
-			"		" << reg3 << " = round_to_8bit(" << reg3 << ");\n"
-			"	}\n\n";
-		}
-
-		// Post-output stages
-		// TODO: Implement all ROP options like CSAA and ALPHA_TO_ONE here
 		OS <<
-		// Alpha Testing
-		"	if (_test_bit(rop_control, ALPHA_TEST_ENABLE_BIT))\n"
-		"	{\n"
-		"		const uint alpha_func = _get_bits(rop_control, ALPHA_TEST_FUNC_OFFSET, ALPHA_TEST_FUNC_LENGTH);\n"
-		"		if (!comparison_passes(" << reg0 << ".a, alpha_ref, alpha_func)) discard;\n"
-		"	}\n\n";
-
-		// ALPHA_TO_COVERAGE
-		if (props.emulate_coverage_tests)
-		{
-			OS <<
-			"	if (_test_bit(rop_control, ALPHA_TO_COVERAGE_ENABLE_BIT))\n"
-			"	{\n"
-			"		if (!_test_bit(rop_control, MSAA_WRITE_ENABLE_BIT) ||\n"
-			"			!coverage_test_passes(" << reg0 << "))\n"
-			"		{\n"
-			"			discard;\n"
-			"		}\n"
-			"	}\n\n";
-		}
-
-		// Commit
-		OS <<
-		"	ocol0 = " << reg0 << ";\n"
-		"	ocol1 = " << reg1 << ";\n"
-		"	ocol2 = " << reg2 << ";\n"
-		"	ocol3 = " << reg3 << ";\n\n";
+			#include "GLSLSnippets//RSXProg/RSXROPEpilogue.glsl"
+			;
 	}

 	void insert_glsl_legacy_function(std::ostream& OS, const shader_properties& props)
@ -271,15 +204,35 @@ namespace glsl
 				{ "ROP_CMD_MASK                ", rsx::ROP_control_bits::ROP_CMD_MASK }
 			});

+			program_common::define_glsl_constants<const char*>(OS,
+			{
+				{ "col0", props.fp32_outputs ? "r0" : "h0" },
+				{ "col1", props.fp32_outputs ? "r2" : "h4" },
+				{ "col2", props.fp32_outputs ? "r3" : "h6" },
+				{ "col3", props.fp32_outputs ? "r4" : "h8" }
+			});
+
 			if (props.fp32_outputs || !props.supports_native_fp16)
 			{
 				enabled_options.push_back("_32_BIT_OUTPUT");
 			}

+			if (!props.fp32_outputs)
+			{
+				enabled_options.push_back("_ENABLE_FRAMEBUFFER_SRGB");
+			}
+
 			if (props.disable_early_discard)
 			{
 				enabled_options.push_back("_DISABLE_EARLY_DISCARD");
 			}
+
+			if (props.ROP_output_rounding)
+			{
+				enabled_options.push_back("_ENABLE_ROP_OUTPUT_ROUNDING");
+			}
+
+			enabled_options.push_back("_ENABLE_POLYGON_STIPPLE");
 		}

 		// Import common header
--- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentPrologue.glsl
+++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentPrologue.glsl
@ -1,10 +1,19 @@
 R"(
-#ifdef _32_BIT_OUTPUT
+
+#if defined(_ENABLE_ROP_OUTPUT_ROUNDING) || defined(_ENABLE_PROGRAMMABLE_BLENDING)
 // Default. Used when we're not utilizing native fp16
-#define round_to_8bit(v4) (floor(fma(v4, vec4(255.), vec4(0.5))) / vec4(255.))
-#else
-// FP16 version
-#define round_to_8bit(v4) (floor(fma(v4, f16vec4(255.), f16vec4(0.5))) / f16vec4(255.))
+vec4 round_to_8bit(const in vec4 v4)
+{
+	uvec4 raw = uvec4(floor(fma(v4, vec4(255.), vec4(0.5))));
+	return vec4(raw) / vec4(255.);
+}
+#if !defined(_32_BIT_OUTPUT)
+f16vec4 round_to_8bit(const in f16vec4 v4)
+{
+	uvec4 raw = uvec4(floor(fma(v4, f16vec4(255.), f16vec4(0.5))));
+	return f16vec4(raw) / f16vec4(255.);
+}
+#endif
 #endif

 #ifdef _DISABLE_EARLY_DISCARD
--- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXProgrammableBlendPrologue.glsl
+++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXProgrammableBlendPrologue.glsl
@ -0,0 +1,152 @@
+R"(
+
+/**
+ * Required register definitions from ROP config
+ struct {
+ 	vec4 blend_constants;    // fp32x4
+	uint blend_func;         // rgb16, a16
+	uint blend_factors_a;    // src16, dst16
+	uint blend_factors_rgb;  // src16, dst16
+ }
+*/
+
+#define BLEND_FACTOR_ZERO 0
+#define BLEND_FACTOR_ONE  1
+#define BLEND_FACTOR_SRC_COLOR 0x0300
+#define BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x0301
+#define BLEND_FACTOR_SRC_ALPHA 0x0302
+#define BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x0303
+#define BLEND_FACTOR_DST_ALPHA 0x0304
+#define BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x0305
+#define BLEND_FACTOR_DST_COLOR 0x0306
+#define BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x0307
+#define BLEND_FACTOR_SRC_ALPHA_SATURATE 0x0308
+#define BLEND_FACTOR_CONSTANT_COLOR 0x8001
+#define BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x8002
+#define BLEND_FACTOR_CONSTANT_ALPHA 0x8003
+#define BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x8004
+
+#define BLEND_FUNC_ADD 0x8006
+#define BLEND_MIN 0x8007
+#define BLEND_MAX 0x8008
+#define BLEND_FUNC_SUBTRACT 0x800A
+#define BLEND_FUNC_REVERSE_SUBTRACT 0x800B
+#define BLEND_FUNC_REVERSE_SUBTRACT_SIGNED 0x0000F005
+#define BLEND_FUNC_ADD_SIGNED 0x0000F006
+#define BLEND_FUNC_REVERSE_ADD_SIGNED 0x0000F007
+
+float get_blend_factor_a(const in uint op, const in vec4 src, const in vec4 dst)
+{
+	switch (op)
+	{
+	case BLEND_FACTOR_ZERO: return 0.;
+	case BLEND_FACTOR_ONE: return 1.;
+	case BLEND_FACTOR_SRC_COLOR:
+	case BLEND_FACTOR_SRC_ALPHA: return src.a;
+	case BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
+	case BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return 1. - src.a;
+	case BLEND_FACTOR_DST_ALPHA:
+	case BLEND_FACTOR_DST_COLOR: return dst.a;
+	case BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
+	case BLEND_FACTOR_ONE_MINUS_DST_COLOR: return 1. - dst.a;
+	case BLEND_FACTOR_SRC_ALPHA_SATURATE: return 1;
+	case BLEND_FACTOR_CONSTANT_COLOR:
+	case BLEND_FACTOR_CONSTANT_ALPHA: return constants.a;
+	case BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
+	case BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: return 1. - constants.a;
+	}
+	return 0.;
+}
+
+vec3 get_blend_factor_rgb(const in uint op, const in vec4 src, const in vec4 dst)
+{
+	switch (op)
+	{
+	case BLEND_FACTOR_ZERO: return vec3(0.);
+	case BLEND_FACTOR_ONE: return vec3(1.);
+	case BLEND_FACTOR_SRC_COLOR: return src.rgb;
+	case BLEND_FACTOR_SRC_ALPHA: return src.aaa;
+	case BLEND_FACTOR_ONE_MINUS_SRC_COLOR: return 1. - src.rgb;
+	case BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return 1. - src.aaa;
+	case BLEND_FACTOR_DST_COLOR: return dst.rgb;
+	case BLEND_FACTOR_DST_ALPHA: return dst.a;
+	case BLEND_FACTOR_ONE_MINUS_DST_COLOR: return 1. - dst.rgb;
+	case BLEND_FACTOR_ONE_MINUS_DST_ALPHA: return 1. - dst.a;
+	case BLEND_FACTOR_SRC_ALPHA_SATURATE: return src.rgb;
+	case BLEND_FACTOR_CONSTANT_COLOR: return blend_constants.rgb;
+	case BLEND_FACTOR_CONSTANT_ALPHA: return blend_constants.aaa;
+	case BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: return 1. - blend_constants.rgb;
+	case BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: return 1. - blend_constants.aaa;
+	}
+	return vec3(0.);
+}
+
+float apply_blend_func_a(const in vec4 src, const in vec4 dst)
+{
+	uint blend_factor_a_s = _get_bits(blend_factors_a, 0, 16);
+	uint blend_factor_a_d = _get_bits(blend_factors_a, 16, 16);
+	uint func = _get_bits(blend_func, 16, 16);
+
+	const float src_factor_a = get_blend_factor_a(blend_factor_a_s, src, dst);
+	const float dst_factor_a = get_blend_factor_a(blend_factor_a_d, src, dst);
+
+	// NOTE: Destination data is already saturated due to encoding.
+	const float s = src.a * src_factor_a;
+	const float d = dst.a * dst_factor_a;
+
+	switch (func)
+	{
+	case BLEND_FUNC_ADD: return _saturate(s) + d;
+	case BLEND_MIN: return min(_saturate(s), d);
+	case BLEND_MAX: return max(_saturate(s), d);
+	case BLEND_FUNC_SUBTRACT: return _saturate(s) - d;
+	case BLEND_FUNC_REVERSE_SUBTRACT: return d - _saturate(s);
+	case BLEND_FUNC_REVERSE_SUBTRACT_SIGNED: return d - s;
+	case BLEND_FUNC_ADD_SIGNED: return s + d;
+	case BLEND_FUNC_REVERSE_ADD_SIGNED: return s + d;
+	}
+
+	return vec3(0.);
+}
+
+vec3 apply_blend_func_rgb(const in vec4 src, const in vec4 dst)
+{
+	uint blend_factor_rgb_s = _get_bits(blend_factors_rgb, 0, 16);
+	uint blend_factor_rgb_d = _get_bits(blend_factors_rgb, 16, 16);
+	uint func = _get_bits(blend_func, 0, 16);
+
+	const vec3 src_factor_rgb = get_blend_factor_rgb(blend_factor_rgb_s, src, dst);
+	const vec3 dst_factor_rgb = get_blend_factor_rgb(blend_factor_rgb_d, src, dst);
+
+	// NOTE: Destination data is already saturated due to encoding.
+	const vec3 s = src.rgb * src_factor_rgb;
+	const vec3 d = dst.rgb * dst_factor_rgb;
+
+	switch (func)
+	{
+	case BLEND_FUNC_ADD: return _saturate(s) + d;
+	case BLEND_MIN: return min(_saturate(s), d);
+	case BLEND_MAX: return max(_saturate(s), d);
+	case BLEND_FUNC_SUBTRACT: return _saturate(s) - d;
+	case BLEND_FUNC_REVERSE_SUBTRACT: return d - _saturate(s);
+	case BLEND_FUNC_REVERSE_SUBTRACT_SIGNED: return d - s;
+	case BLEND_FUNC_ADD_SIGNED: return s + d;
+	case BLEND_FUNC_REVERSE_ADD_SIGNED: return s + d;
+	}
+
+	return vec3(0.);
+}
+
+vec4 do_blend(const in vec4 src, const in vec4 dst)
+{
+	// Read blend_constants from config and apply blend op
+	const vec4 result = vec4(
+		apply_blend_func_rgb(src, dst),
+		apply_blend_func_a(src, dst)
+	);
+
+	// Accurate int conversion with wrapping
+	return round_to_8bit(result);
+}
+
+)"
--- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPEpilogue.glsl
+++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPEpilogue.glsl
@ -0,0 +1,63 @@
+R"(
+
+#ifdef _DISABLE_EARLY_DISCARD
+	if (_fragment_discard)
+	{
+		discard;
+	}
+#endif
+
+#ifdef _ENABLE_FRAMEBUFFER_SRGB
+	if (_test_bit(rop_control, SRGB_FRAMEBUFFER_BIT))
+	{
+		col0.rgb = linear_to_srgb(col0).rgb;
+		col1.rgb = linear_to_srgb(col1).rgb;
+		col2.rgb = linear_to_srgb(col2).rgb;
+		col3.rgb = linear_to_srgb(col3).rgb;
+	}
+#endif
+
+#ifdef _ENABLE_ROP_OUTPUT_ROUNDING
+	if (_test_bit(rop_control, INT_FRAMEBUFFER_BIT))
+	{
+		col0 = round_to_8bit(col0);
+		col1  = round_to_8bit(col1);
+		col2 = round_to_8bit(col2);
+		col3 = round_to_8bit(col3);
+	}
+#endif
+
+	// Post-output stages
+	// Alpha Testing
+	if (_test_bit(rop_control, ALPHA_TEST_ENABLE_BIT))
+	{
+		const uint alpha_func = _get_bits(rop_control, ALPHA_TEST_FUNC_OFFSET, ALPHA_TEST_FUNC_LENGTH);
+		if (!comparison_passes(col0.a, alpha_ref, alpha_func))
+		{
+			discard;
+		}
+	}
+
+#ifdef _EMULATE_COVERAGE_TEST
+	if (_test_bit(rop_control, ALPHA_TO_COVERAGE_ENABLE_BIT))
+	{
+		if (!_test_bit(rop_control, MSAA_WRITE_ENABLE_BIT) || !coverage_test_passes(col0))
+		{
+			discard;
+		}
+	}
+#endif
+
+#ifdef _ENABLE_PROGRAMMABLE_BLENDING
+	col0 = do_blend(col0, mrt_color[0]);
+	if (framebufferCount > 1) col1 = do_blend(col1, mrt_color[1]);
+	if (framebufferCount > 2) col2 = do_blend(col2, mrt_color[2]);
+	if (framebufferCount > 3) col3 = do_blend(col3, mrt_color[3]);
+#endif
+
+	// Commit
+	ocol0 = col0;
+	ocol1 = col1;
+	ocol2 = col2;
+	ocol3 = col3;
+)"
--- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPPrologue.glsl
+++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPPrologue.glsl
@ -0,0 +1,27 @@
+R"(
+
+#ifdef _ENABLE_POLYGON_STIPPLE
+	if (_test_bit(rop_control, POLYGON_STIPPLE_ENABLE_BIT))
+	{
+		// Convert x,y to linear address
+		const uvec2 stipple_coord = uvec2(gl_FragCoord.xy) % uvec2(32, 32);
+		const uint address = stipple_coord.y * 32u + stipple_coord.x;
+		const uint bit_offset = (address & 31u);
+		const uint word_index = _get_bits(address, 7, 3);
+		const uint sub_index = _get_bits(address, 5, 2);
+
+		if (!_test_bit(stipple_pattern[word_index][sub_index], int(bit_offset)))
+		{
+			_kill();
+		}
+	}
+#endif
+
+#ifdef _ENABLE_PROGRAMMABLE_BLENDING
+	vec4 mrt_color[4];
+	for (int n = 0; n < framebufferCount; ++n)
+	{
+		mrt_color[n] = subPassLoad(mrtAttachments[n]);
+	}
+#endif
+)"
--- a/rpcs3/emucore.vcxproj
+++ b/rpcs3/emucore.vcxproj
@ -928,6 +928,9 @@
    <None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXFragmentTextureMSAAOpsInternal.glsl" />
    <None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXFragmentTextureOps.glsl" />
    <None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXProgramCommon.glsl" />
+    <None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXProgrammableBlendPrologue.glsl" />
+    <None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXROPEpilogue.glsl" />
+    <None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXROPPrologue.glsl" />
    <None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXVertexFetch.glsl" />
    <None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXVertexPrologue.glsl" />
    <None Include="Emu\RSX\Program\GLSLSnippets\ShuffleBytes.glsl" />
--- a/rpcs3/emucore.vcxproj.filters
+++ b/rpcs3/emucore.vcxproj.filters
@ -2472,5 +2472,14 @@
    <None Include="Emu\RSX\Program\GLSLSnippets\RSXMemoryTiling.glsl">
      <Filter>Emu\GPU\RSX\Program\Snippets</Filter>
    </None>
+    <None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXProgrammableBlendPrologue.glsl">
+      <Filter>Emu\GPU\RSX\Program\Snippets\RSXProg</Filter>
+    </None>
+    <None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXROPEpilogue.glsl">
+      <Filter>Emu\GPU\RSX\Program\Snippets\RSXProg</Filter>
+    </None>
+    <None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXROPPrologue.glsl">
+      <Filter>Emu\GPU\RSX\Program\Snippets\RSXProg</Filter>
+    </None>
  </ItemGroup>
 </Project>