rsx: More pipeine refactoring

This commit is contained in:
kd-11 2024-01-20 02:09:51 +03:00 committed by kd-11
parent 50fcec63e0
commit 0971636c07
7 changed files with 307 additions and 91 deletions

View file

@ -14,7 +14,14 @@ namespace program_common
{
for (const auto& e : enums)
{
OS << "#define " << e.first << " " << static_cast<int>(e.second) << "\n";
if constexpr (std::is_enum_v<T> || std::is_integral_v<T>)
{
OS << "#define " << e.first << " " << static_cast<int>(e.second) << "\n";
}
else
{
OS << "#define " << e.first << " " << e.second << "\n";
}
}
OS << "\n";
@ -137,99 +144,25 @@ namespace glsl
;
}
void insert_blend_prologue(std::ostream& OS)
{
OS <<
#include "GLSLSnippets/RSXProg/RSXProgrammableBlendPrologue.glsl"
;
}
void insert_rop_init(std::ostream& OS)
{
OS <<
" if (_test_bit(rop_control, POLYGON_STIPPLE_ENABLE_BIT))\n"
" {\n"
" // Convert x,y to linear address\n"
" const uvec2 stipple_coord = uvec2(gl_FragCoord.xy) % uvec2(32, 32);\n"
" const uint address = stipple_coord.y * 32u + stipple_coord.x;\n"
" const uint bit_offset = (address & 31u);\n"
" const uint word_index = _get_bits(address, 7, 3);\n"
" const uint sub_index = _get_bits(address, 5, 2);\n\n"
" if (!_test_bit(stipple_pattern[word_index][sub_index], int(bit_offset)))\n"
" {\n"
" _kill();\n"
" }\n"
" }\n\n";
#include "GLSLSnippets/RSXProg/RSXROPPrologue.glsl"
;
}
void insert_rop(std::ostream& OS, const shader_properties& props)
{
const std::string reg0 = props.fp32_outputs ? "r0" : "h0";
const std::string reg1 = props.fp32_outputs ? "r2" : "h4";
const std::string reg2 = props.fp32_outputs ? "r3" : "h6";
const std::string reg3 = props.fp32_outputs ? "r4" : "h8";
if (props.disable_early_discard)
{
OS <<
" if (_fragment_discard)\n"
" {\n"
" discard;\n"
" }\n\n";
}
// Pre-output stages
if (!props.fp32_outputs)
{
// Tested using NPUB90375; some shaders (32-bit output only?) do not obey srgb flags
const auto vtype = (props.fp32_outputs || !props.supports_native_fp16) ? "vec4" : "f16vec4";
OS <<
" if (_test_bit(rop_control, SRGB_FRAMEBUFFER_BIT))\n"
" {\n"
" " << reg0 << " = " << vtype << "(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a);\n"
" " << reg1 << " = " << vtype << "(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a);\n"
" " << reg2 << " = " << vtype << "(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a);\n"
" " << reg3 << " = " << vtype << "(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a);\n"
" }\n\n";
}
// Output conversion
if (props.ROP_output_rounding)
{
OS <<
" if (_test_bit(rop_control, INT_FRAMEBUFFER_BIT))\n"
" {\n"
" " << reg0 << " = round_to_8bit(" << reg0 << ");\n"
" " << reg1 << " = round_to_8bit(" << reg1 << ");\n"
" " << reg2 << " = round_to_8bit(" << reg2 << ");\n"
" " << reg3 << " = round_to_8bit(" << reg3 << ");\n"
" }\n\n";
}
// Post-output stages
// TODO: Implement all ROP options like CSAA and ALPHA_TO_ONE here
OS <<
// Alpha Testing
" if (_test_bit(rop_control, ALPHA_TEST_ENABLE_BIT))\n"
" {\n"
" const uint alpha_func = _get_bits(rop_control, ALPHA_TEST_FUNC_OFFSET, ALPHA_TEST_FUNC_LENGTH);\n"
" if (!comparison_passes(" << reg0 << ".a, alpha_ref, alpha_func)) discard;\n"
" }\n\n";
// ALPHA_TO_COVERAGE
if (props.emulate_coverage_tests)
{
OS <<
" if (_test_bit(rop_control, ALPHA_TO_COVERAGE_ENABLE_BIT))\n"
" {\n"
" if (!_test_bit(rop_control, MSAA_WRITE_ENABLE_BIT) ||\n"
" !coverage_test_passes(" << reg0 << "))\n"
" {\n"
" discard;\n"
" }\n"
" }\n\n";
}
// Commit
OS <<
" ocol0 = " << reg0 << ";\n"
" ocol1 = " << reg1 << ";\n"
" ocol2 = " << reg2 << ";\n"
" ocol3 = " << reg3 << ";\n\n";
#include "GLSLSnippets//RSXProg/RSXROPEpilogue.glsl"
;
}
void insert_glsl_legacy_function(std::ostream& OS, const shader_properties& props)
@ -271,15 +204,35 @@ namespace glsl
{ "ROP_CMD_MASK ", rsx::ROP_control_bits::ROP_CMD_MASK }
});
program_common::define_glsl_constants<const char*>(OS,
{
{ "col0", props.fp32_outputs ? "r0" : "h0" },
{ "col1", props.fp32_outputs ? "r2" : "h4" },
{ "col2", props.fp32_outputs ? "r3" : "h6" },
{ "col3", props.fp32_outputs ? "r4" : "h8" }
});
if (props.fp32_outputs || !props.supports_native_fp16)
{
enabled_options.push_back("_32_BIT_OUTPUT");
}
if (!props.fp32_outputs)
{
enabled_options.push_back("_ENABLE_FRAMEBUFFER_SRGB");
}
if (props.disable_early_discard)
{
enabled_options.push_back("_DISABLE_EARLY_DISCARD");
}
if (props.ROP_output_rounding)
{
enabled_options.push_back("_ENABLE_ROP_OUTPUT_ROUNDING");
}
enabled_options.push_back("_ENABLE_POLYGON_STIPPLE");
}
// Import common header

View file

@ -1,10 +1,19 @@
R"(
#ifdef _32_BIT_OUTPUT
#if defined(_ENABLE_ROP_OUTPUT_ROUNDING) || defined(_ENABLE_PROGRAMMABLE_BLENDING)
// Default. Used when we're not utilizing native fp16
#define round_to_8bit(v4) (floor(fma(v4, vec4(255.), vec4(0.5))) / vec4(255.))
#else
// FP16 version
#define round_to_8bit(v4) (floor(fma(v4, f16vec4(255.), f16vec4(0.5))) / f16vec4(255.))
vec4 round_to_8bit(const in vec4 v4)
{
uvec4 raw = uvec4(floor(fma(v4, vec4(255.), vec4(0.5))));
return vec4(raw) / vec4(255.);
}
#if !defined(_32_BIT_OUTPUT)
f16vec4 round_to_8bit(const in f16vec4 v4)
{
uvec4 raw = uvec4(floor(fma(v4, f16vec4(255.), f16vec4(0.5))));
return f16vec4(raw) / f16vec4(255.);
}
#endif
#endif
#ifdef _DISABLE_EARLY_DISCARD

View file

@ -0,0 +1,152 @@
R"(
/**
* Required register definitions from ROP config
struct {
vec4 blend_constants; // fp32x4
uint blend_func; // rgb16, a16
uint blend_factors_a; // src16, dst16
uint blend_factors_rgb; // src16, dst16
}
*/
#define BLEND_FACTOR_ZERO 0
#define BLEND_FACTOR_ONE 1
#define BLEND_FACTOR_SRC_COLOR 0x0300
#define BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x0301
#define BLEND_FACTOR_SRC_ALPHA 0x0302
#define BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x0303
#define BLEND_FACTOR_DST_ALPHA 0x0304
#define BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x0305
#define BLEND_FACTOR_DST_COLOR 0x0306
#define BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x0307
#define BLEND_FACTOR_SRC_ALPHA_SATURATE 0x0308
#define BLEND_FACTOR_CONSTANT_COLOR 0x8001
#define BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x8002
#define BLEND_FACTOR_CONSTANT_ALPHA 0x8003
#define BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x8004
#define BLEND_FUNC_ADD 0x8006
#define BLEND_MIN 0x8007
#define BLEND_MAX 0x8008
#define BLEND_FUNC_SUBTRACT 0x800A
#define BLEND_FUNC_REVERSE_SUBTRACT 0x800B
#define BLEND_FUNC_REVERSE_SUBTRACT_SIGNED 0x0000F005
#define BLEND_FUNC_ADD_SIGNED 0x0000F006
#define BLEND_FUNC_REVERSE_ADD_SIGNED 0x0000F007
float get_blend_factor_a(const in uint op, const in vec4 src, const in vec4 dst)
{
switch (op)
{
case BLEND_FACTOR_ZERO: return 0.;
case BLEND_FACTOR_ONE: return 1.;
case BLEND_FACTOR_SRC_COLOR:
case BLEND_FACTOR_SRC_ALPHA: return src.a;
case BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
case BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return 1. - src.a;
case BLEND_FACTOR_DST_ALPHA:
case BLEND_FACTOR_DST_COLOR: return dst.a;
case BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
case BLEND_FACTOR_ONE_MINUS_DST_COLOR: return 1. - dst.a;
case BLEND_FACTOR_SRC_ALPHA_SATURATE: return 1;
case BLEND_FACTOR_CONSTANT_COLOR:
case BLEND_FACTOR_CONSTANT_ALPHA: return constants.a;
case BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
case BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: return 1. - constants.a;
}
return 0.;
}
vec3 get_blend_factor_rgb(const in uint op, const in vec4 src, const in vec4 dst)
{
switch (op)
{
case BLEND_FACTOR_ZERO: return vec3(0.);
case BLEND_FACTOR_ONE: return vec3(1.);
case BLEND_FACTOR_SRC_COLOR: return src.rgb;
case BLEND_FACTOR_SRC_ALPHA: return src.aaa;
case BLEND_FACTOR_ONE_MINUS_SRC_COLOR: return 1. - src.rgb;
case BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return 1. - src.aaa;
case BLEND_FACTOR_DST_COLOR: return dst.rgb;
case BLEND_FACTOR_DST_ALPHA: return dst.a;
case BLEND_FACTOR_ONE_MINUS_DST_COLOR: return 1. - dst.rgb;
case BLEND_FACTOR_ONE_MINUS_DST_ALPHA: return 1. - dst.a;
case BLEND_FACTOR_SRC_ALPHA_SATURATE: return src.rgb;
case BLEND_FACTOR_CONSTANT_COLOR: return blend_constants.rgb;
case BLEND_FACTOR_CONSTANT_ALPHA: return blend_constants.aaa;
case BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: return 1. - blend_constants.rgb;
case BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: return 1. - blend_constants.aaa;
}
return vec3(0.);
}
float apply_blend_func_a(const in vec4 src, const in vec4 dst)
{
uint blend_factor_a_s = _get_bits(blend_factors_a, 0, 16);
uint blend_factor_a_d = _get_bits(blend_factors_a, 16, 16);
uint func = _get_bits(blend_func, 16, 16);
const float src_factor_a = get_blend_factor_a(blend_factor_a_s, src, dst);
const float dst_factor_a = get_blend_factor_a(blend_factor_a_d, src, dst);
// NOTE: Destination data is already saturated due to encoding.
const float s = src.a * src_factor_a;
const float d = dst.a * dst_factor_a;
switch (func)
{
case BLEND_FUNC_ADD: return _saturate(s) + d;
case BLEND_MIN: return min(_saturate(s), d);
case BLEND_MAX: return max(_saturate(s), d);
case BLEND_FUNC_SUBTRACT: return _saturate(s) - d;
case BLEND_FUNC_REVERSE_SUBTRACT: return d - _saturate(s);
case BLEND_FUNC_REVERSE_SUBTRACT_SIGNED: return d - s;
case BLEND_FUNC_ADD_SIGNED: return s + d;
case BLEND_FUNC_REVERSE_ADD_SIGNED: return s + d;
}
return vec3(0.);
}
vec3 apply_blend_func_rgb(const in vec4 src, const in vec4 dst)
{
uint blend_factor_rgb_s = _get_bits(blend_factors_rgb, 0, 16);
uint blend_factor_rgb_d = _get_bits(blend_factors_rgb, 16, 16);
uint func = _get_bits(blend_func, 0, 16);
const vec3 src_factor_rgb = get_blend_factor_rgb(blend_factor_rgb_s, src, dst);
const vec3 dst_factor_rgb = get_blend_factor_rgb(blend_factor_rgb_d, src, dst);
// NOTE: Destination data is already saturated due to encoding.
const vec3 s = src.rgb * src_factor_rgb;
const vec3 d = dst.rgb * dst_factor_rgb;
switch (func)
{
case BLEND_FUNC_ADD: return _saturate(s) + d;
case BLEND_MIN: return min(_saturate(s), d);
case BLEND_MAX: return max(_saturate(s), d);
case BLEND_FUNC_SUBTRACT: return _saturate(s) - d;
case BLEND_FUNC_REVERSE_SUBTRACT: return d - _saturate(s);
case BLEND_FUNC_REVERSE_SUBTRACT_SIGNED: return d - s;
case BLEND_FUNC_ADD_SIGNED: return s + d;
case BLEND_FUNC_REVERSE_ADD_SIGNED: return s + d;
}
return vec3(0.);
}
vec4 do_blend(const in vec4 src, const in vec4 dst)
{
// Read blend_constants from config and apply blend op
const vec4 result = vec4(
apply_blend_func_rgb(src, dst),
apply_blend_func_a(src, dst)
);
// Accurate int conversion with wrapping
return round_to_8bit(result);
}
)"

View file

@ -0,0 +1,63 @@
R"(
#ifdef _DISABLE_EARLY_DISCARD
if (_fragment_discard)
{
discard;
}
#endif
#ifdef _ENABLE_FRAMEBUFFER_SRGB
if (_test_bit(rop_control, SRGB_FRAMEBUFFER_BIT))
{
col0.rgb = linear_to_srgb(col0).rgb;
col1.rgb = linear_to_srgb(col1).rgb;
col2.rgb = linear_to_srgb(col2).rgb;
col3.rgb = linear_to_srgb(col3).rgb;
}
#endif
#ifdef _ENABLE_ROP_OUTPUT_ROUNDING
if (_test_bit(rop_control, INT_FRAMEBUFFER_BIT))
{
col0 = round_to_8bit(col0);
col1 = round_to_8bit(col1);
col2 = round_to_8bit(col2);
col3 = round_to_8bit(col3);
}
#endif
// Post-output stages
// Alpha Testing
if (_test_bit(rop_control, ALPHA_TEST_ENABLE_BIT))
{
const uint alpha_func = _get_bits(rop_control, ALPHA_TEST_FUNC_OFFSET, ALPHA_TEST_FUNC_LENGTH);
if (!comparison_passes(col0.a, alpha_ref, alpha_func))
{
discard;
}
}
#ifdef _EMULATE_COVERAGE_TEST
if (_test_bit(rop_control, ALPHA_TO_COVERAGE_ENABLE_BIT))
{
if (!_test_bit(rop_control, MSAA_WRITE_ENABLE_BIT) || !coverage_test_passes(col0))
{
discard;
}
}
#endif
#ifdef _ENABLE_PROGRAMMABLE_BLENDING
col0 = do_blend(col0, mrt_color[0]);
if (framebufferCount > 1) col1 = do_blend(col1, mrt_color[1]);
if (framebufferCount > 2) col2 = do_blend(col2, mrt_color[2]);
if (framebufferCount > 3) col3 = do_blend(col3, mrt_color[3]);
#endif
// Commit
ocol0 = col0;
ocol1 = col1;
ocol2 = col2;
ocol3 = col3;
)"

View file

@ -0,0 +1,27 @@
R"(
#ifdef _ENABLE_POLYGON_STIPPLE
if (_test_bit(rop_control, POLYGON_STIPPLE_ENABLE_BIT))
{
// Convert x,y to linear address
const uvec2 stipple_coord = uvec2(gl_FragCoord.xy) % uvec2(32, 32);
const uint address = stipple_coord.y * 32u + stipple_coord.x;
const uint bit_offset = (address & 31u);
const uint word_index = _get_bits(address, 7, 3);
const uint sub_index = _get_bits(address, 5, 2);
if (!_test_bit(stipple_pattern[word_index][sub_index], int(bit_offset)))
{
_kill();
}
}
#endif
#ifdef _ENABLE_PROGRAMMABLE_BLENDING
vec4 mrt_color[4];
for (int n = 0; n < framebufferCount; ++n)
{
mrt_color[n] = subPassLoad(mrtAttachments[n]);
}
#endif
)"

View file

@ -928,6 +928,9 @@
<None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXFragmentTextureMSAAOpsInternal.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXFragmentTextureOps.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXProgramCommon.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXProgrammableBlendPrologue.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXROPEpilogue.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXROPPrologue.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXVertexFetch.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXVertexPrologue.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\ShuffleBytes.glsl" />

View file

@ -2472,5 +2472,14 @@
<None Include="Emu\RSX\Program\GLSLSnippets\RSXMemoryTiling.glsl">
<Filter>Emu\GPU\RSX\Program\Snippets</Filter>
</None>
<None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXProgrammableBlendPrologue.glsl">
<Filter>Emu\GPU\RSX\Program\Snippets\RSXProg</Filter>
</None>
<None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXROPEpilogue.glsl">
<Filter>Emu\GPU\RSX\Program\Snippets\RSXProg</Filter>
</None>
<None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXROPPrologue.glsl">
<Filter>Emu\GPU\RSX\Program\Snippets\RSXProg</Filter>
</None>
</ItemGroup>
</Project>