diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp index 3d48784647..e739c6ae06 100644 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp @@ -556,10 +556,10 @@ void PixelShaderCache::Shutdown() bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode) { - PixelShaderUid uid = GetPixelShaderUid(dstAlphaMode, API_D3D); + PixelShaderUid uid = GetPixelShaderUid(dstAlphaMode); if (g_ActiveConfig.bEnableShaderDebugging) { - ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D); + ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D, uid.GetUidData()); pixel_uid_checker.AddToIndexAndCheck(code, uid, "Pixel", "p"); } @@ -588,7 +588,7 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode) } // Need to compile a new shader - ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D); + ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D, uid.GetUidData()); D3DBlob* pbytecode; if (!D3D::CompilePixelShader(code.GetBuffer(), &pbytecode)) diff --git a/Source/Core/VideoBackends/D3D12/ShaderCache.cpp b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp index 5c4cf17962..5fe90bd410 100644 --- a/Source/Core/VideoBackends/D3D12/ShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp @@ -163,7 +163,7 @@ void ShaderCache::LoadAndSetActiveShaders(DSTALPHA_MODE ps_dst_alpha_mode, u32 g SetCurrentPrimitiveTopology(gs_primitive_type); GeometryShaderUid gs_uid = GetGeometryShaderUid(gs_primitive_type); - PixelShaderUid ps_uid = GetPixelShaderUid(ps_dst_alpha_mode, API_D3D); + PixelShaderUid ps_uid = GetPixelShaderUid(ps_dst_alpha_mode); VertexShaderUid vs_uid = GetVertexShaderUid(); bool gs_changed = gs_uid != s_last_geometry_shader_uid; @@ -263,7 +263,7 @@ void ShaderCache::HandlePSUIDChange(PixelShaderUid ps_uid, DSTALPHA_MODE ps_dst_ if (g_ActiveConfig.bEnableShaderDebugging) { - ShaderCode code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D); + ShaderCode code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D, ps_uid.GetUidData()); s_pixel_uid_checker.AddToIndexAndCheck(code, ps_uid, "Pixel", "p"); } @@ -275,7 +275,7 @@ void ShaderCache::HandlePSUIDChange(PixelShaderUid ps_uid, DSTALPHA_MODE ps_dst_ } else { - ShaderCode ps_code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D); + ShaderCode ps_code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D, ps_uid.GetUidData()); ID3DBlob* ps_bytecode = nullptr; if (!D3D::CompilePixelShader(ps_code.GetBuffer(), &ps_bytecode)) diff --git a/Source/Core/VideoBackends/Null/ShaderCache.h b/Source/Core/VideoBackends/Null/ShaderCache.h index 9cdc57c892..7cd3e2863e 100644 --- a/Source/Core/VideoBackends/Null/ShaderCache.h +++ b/Source/Core/VideoBackends/Null/ShaderCache.h @@ -81,12 +81,12 @@ protected: PixelShaderUid GetUid(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type) override { - return GetPixelShaderUid(dst_alpha_mode, api_type); + return GetPixelShaderUid(dst_alpha_mode); } ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type, - PixelShaderUid) override + PixelShaderUid uid) override { - return GeneratePixelShaderCode(dst_alpha_mode, api_type); + return GeneratePixelShaderCode(dst_alpha_mode, api_type, uid.GetUidData()); } }; diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 5d9da051bb..2a8accd5b9 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -210,7 +210,7 @@ SHADER* ProgramShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 primitive_ newentry.in_cache = 0; ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL, uid.vuid.GetUidData()); - ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL); + ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, uid.puid.GetUidData()); ShaderCode gcode; if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && !uid.guid.GetUidData()->IsPassthrough()) @@ -397,13 +397,13 @@ GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& c void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 primitive_type) { - uid->puid = GetPixelShaderUid(dstAlphaMode, API_OPENGL); + uid->puid = GetPixelShaderUid(dstAlphaMode); uid->vuid = GetVertexShaderUid(); uid->guid = GetGeometryShaderUid(primitive_type); if (g_ActiveConfig.bEnableShaderDebugging) { - ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL); + ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, uid->puid.GetUidData()); pixel_uid_checker.AddToIndexAndCheck(pcode, uid->puid, "Pixel", "p"); ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL, uid->vuid.GetUidData()); diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 006fa7584a..a94cd02c15 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -151,28 +151,14 @@ static const char* tevRasTable[] = { static const char* tevCOutputTable[] = {"prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb"}; static const char* tevAOutputTable[] = {"prev.a", "c0.a", "c1.a", "c2.a"}; -template -static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE ApiType); -template -static void WriteTevRegular(T& out, const char* components, int bias, int op, int clamp, int shift); -template -static void SampleTexture(T& out, const char* texcoords, const char* texswap, int texmap, - bool stereo, API_TYPE ApiType); -template -static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType, - DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); -template -static void WriteFog(T& out, pixel_shader_uid_data* uid_data); - -template -static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) +// FIXME: Some of the video card's capabilities (BBox support, EarlyZ support, dstAlpha support) +// leak +// into this UID; This is really unhelpful if these UIDs ever move from one machine to +// another. +PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode) { - T out; - // Non-uid template parameters will write to the dummy data (=> gets optimized out) - pixel_shader_uid_data dummy_data; - pixel_shader_uid_data* uid_data = out.template GetUidData(); - if (uid_data == nullptr) - uid_data = &dummy_data; + PixelShaderUid out; + pixel_shader_uid_data* uid_data = out.GetUidData(); memset(uid_data, 0, sizeof(*uid_data)); uid_data->dstAlphaMode = dstAlphaMode; @@ -185,6 +171,186 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) u32 numStages = uid_data->genMode_numtevstages + 1; + const bool forced_early_z = + g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && + (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED) + // We can't allow early_ztest for zfreeze because depth is overridden per-pixel. + // This means it's impossible for zcomploc to be emulated on a zfrozen polygon. + && !(bpmem.zmode.testenable && bpmem.genMode.zfreeze); + const bool per_pixel_depth = + (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || + (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || + (bpmem.zmode.testenable && bpmem.genMode.zfreeze); + + uid_data->per_pixel_depth = per_pixel_depth; + uid_data->forced_early_z = forced_early_z; + uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc; + uid_data->msaa = g_ActiveConfig.iMultisamples > 1; + uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA; + uid_data->stereo = g_ActiveConfig.iStereoMode > 0; + + if (!uid_data->forced_early_z && bpmem.UseEarlyDepthTest() && + (!uid_data->fast_depth_calc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED)) + { + static bool warn_once = true; + if (warn_once) + WARN_LOG(VIDEO, "Early z test enabled but not possible to emulate with current " + "configuration. Make sure to enable fast depth calculations. If this message " + "still shows up your hardware isn't able to emulate the feature properly (a " + "GPU with D3D 11.0 / OGL 4.2 support is required)."); + warn_once = false; + } + + if (uid_data->per_pixel_lighting) + { + // The lighting shader only needs the two color bits of the 23bit component bit array. + uid_data->components = + (VertexLoaderManager::g_current_components & (VB_HAS_COL0 | VB_HAS_COL1)) >> VB_COL_SHIFT; + ; + GetLightingShaderUid(uid_data->lighting); + } + + if (uid_data->genMode_numtexgens > 0) + { + for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i) + { + // optional perspective divides + uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i; + } + } + + // indirect texture map lookup + int nIndirectStagesUsed = 0; + if (uid_data->genMode_numindstages > 0) + { + for (unsigned int i = 0; i < numStages; ++i) + { + if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < uid_data->genMode_numindstages) + nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; + } + } + + uid_data->nIndirectStagesUsed = nIndirectStagesUsed; + for (u32 i = 0; i < uid_data->genMode_numindstages; ++i) + { + if (uid_data->nIndirectStagesUsed & (1 << i)) + uid_data->SetTevindrefValues(i, bpmem.tevindref.getTexCoord(i), bpmem.tevindref.getTexMap(i)); + } + + for (unsigned int n = 0; n < numStages; n++) + { + int texcoord = bpmem.tevorders[n / 2].getTexCoord(n & 1); + bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; + // HACK to handle cases where the tex gen is not enabled + if (!bHasTexCoord) + texcoord = bpmem.genMode.numtexgens; + + uid_data->stagehash[n].hasindstage = bpmem.tevind[n].bt < bpmem.genMode.numindstages; + uid_data->stagehash[n].tevorders_texcoord = texcoord; + if (uid_data->stagehash[n].hasindstage) + uid_data->stagehash[n].tevind = bpmem.tevind[n].hex; + + TevStageCombiner::ColorCombiner& cc = bpmem.combiners[n].colorC; + TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[n].alphaC; + uid_data->stagehash[n].cc = cc.hex & 0xFFFFFF; + uid_data->stagehash[n].ac = ac.hex & 0xFFFFF0; // Storing rswap and tswap later + + if (cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC || cc.b == TEVCOLORARG_RASA || + cc.b == TEVCOLORARG_RASC || cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC || + cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC || ac.a == TEVALPHAARG_RASA || + ac.b == TEVALPHAARG_RASA || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) + { + const int i = bpmem.combiners[n].alphaC.rswap; + uid_data->stagehash[n].tevksel_swap1a = bpmem.tevksel[i * 2].swap1; + uid_data->stagehash[n].tevksel_swap2a = bpmem.tevksel[i * 2].swap2; + uid_data->stagehash[n].tevksel_swap1b = bpmem.tevksel[i * 2 + 1].swap1; + uid_data->stagehash[n].tevksel_swap2b = bpmem.tevksel[i * 2 + 1].swap2; + uid_data->stagehash[n].tevorders_colorchan = bpmem.tevorders[n / 2].getColorChan(n & 1); + } + + uid_data->stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1); + if (uid_data->stagehash[n].tevorders_enable) + { + const int i = bpmem.combiners[n].alphaC.tswap; + uid_data->stagehash[n].tevksel_swap1c = bpmem.tevksel[i * 2].swap1; + uid_data->stagehash[n].tevksel_swap2c = bpmem.tevksel[i * 2].swap2; + uid_data->stagehash[n].tevksel_swap1d = bpmem.tevksel[i * 2 + 1].swap1; + uid_data->stagehash[n].tevksel_swap2d = bpmem.tevksel[i * 2 + 1].swap2; + uid_data->stagehash[n].tevorders_texmap = bpmem.tevorders[n / 2].getTexMap(n & 1); + } + + if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || + cc.d == TEVCOLORARG_KONST || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || + ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST) + { + uid_data->stagehash[n].tevksel_kc = bpmem.tevksel[n / 2].getKC(n & 1); + uid_data->stagehash[n].tevksel_ka = bpmem.tevksel[n / 2].getKA(n & 1); + } + } + +#define MY_STRUCT_OFFSET(str, elem) ((u32)((u64) & (str).elem - (u64) & (str))) + uid_data->num_values = (uid_data->per_pixel_lighting) ? + sizeof(*uid_data) : + MY_STRUCT_OFFSET(*uid_data, stagehash[numStages]); + + AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult(); + uid_data->Pretest = Pretest; + uid_data->late_ztest = bpmem.UseLateDepthTest(); + + // NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled + // (in this case we need to write a depth value if depth test passes regardless of the alpha + // testing result) + if (uid_data->Pretest == AlphaTest::UNDETERMINED || + (uid_data->Pretest == AlphaTest::FAIL && uid_data->late_ztest)) + { + uid_data->alpha_test_comp0 = bpmem.alpha_test.comp0; + uid_data->alpha_test_comp1 = bpmem.alpha_test.comp1; + uid_data->alpha_test_logic = bpmem.alpha_test.logic; + + // ZCOMPLOC HACK: + // The only way to emulate alpha test + early-z is to force early-z in the shader. + // As this isn't available on all drivers and as we can't emulate this feature otherwise, + // we are only able to choose which one we want to respect more. + // Tests seem to have proven that writing depth even when the alpha test fails is more + // important that a reliable alpha test, so we just force the alpha test to always succeed. + // At least this seems to be less buggy. + uid_data->alpha_test_use_zcomploc_hack = + bpmem.UseEarlyDepthTest() && bpmem.zmode.updateenable && + !g_ActiveConfig.backend_info.bSupportsEarlyZ && !bpmem.genMode.zfreeze; + } + + uid_data->zfreeze = bpmem.genMode.zfreeze; + uid_data->ztex_op = bpmem.ztex2.op; + uid_data->early_ztest = bpmem.UseEarlyDepthTest(); + uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel; + + if (dstAlphaMode != DSTALPHA_ALPHA_PASS) + { + uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel; + uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj; + uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled; + } + + return out; +} + +static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n, + API_TYPE ApiType); +static void WriteTevRegular(ShaderCode& out, const char* components, int bias, int op, int clamp, + int shift); +static void SampleTexture(ShaderCode& out, const char* texcoords, const char* texswap, int texmap, + bool stereo, API_TYPE ApiType); +static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_data, API_TYPE ApiType, + DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); +static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data); + +ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, + const pixel_shader_uid_data* uid_data) +{ + ShaderCode out; + + u32 numStages = uid_data->genMode_numtevstages + 1; + out.Write("//Pixel Shader for TEV stages\n"); out.Write("//%i TEV stages, %i texgens, %i IND stages\n", numStages, uid_data->genMode_numtexgens, uid_data->genMode_numindstages); @@ -281,27 +447,10 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } out.Write("struct VS_OUTPUT {\n"); - GenerateVSOutputMembers(out, ApiType, uid_data->genMode_numtexgens, - uid_data->per_pixel_lighting, ""); + GenerateVSOutputMembers(out, ApiType, uid_data->genMode_numtexgens, uid_data->per_pixel_lighting, + ""); out.Write("};\n"); - { - const bool forced_early_z = - g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && - (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED) - // We can't allow early_ztest for zfreeze because depth is overridden per-pixel. - // This means it's impossible for zcomploc to be emulated on a zfrozen polygon. - && !(bpmem.zmode.testenable && bpmem.genMode.zfreeze); - const bool per_pixel_depth = - (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || - (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || - (bpmem.zmode.testenable && bpmem.genMode.zfreeze); - - uid_data->per_pixel_depth = per_pixel_depth; - uid_data->forced_early_z = forced_early_z; - uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc; - } - if (uid_data->forced_early_z) { // Zcomploc (aka early_ztest) is a way to control whether depth test is done before @@ -360,9 +509,6 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) warn_once = false; } - uid_data->msaa = g_ActiveConfig.iMultisamples > 1; - uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA; - uid_data->stereo = g_ActiveConfig.iStereoMode > 0; if (ApiType == API_OPENGL) { out.Write("out vec4 ocol0;\n"); @@ -375,7 +521,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { out.Write("in VertexData {\n"); - GenerateVSOutputMembers( + GenerateVSOutputMembers( out, ApiType, uid_data->genMode_numtexgens, uid_data->per_pixel_lighting, GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, true, true)); @@ -478,20 +624,13 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) "\tfloat3 ldir, h, cosAttn, distAttn;\n" "\tfloat dist, dist2, attn;\n"); - // The lighting shader only needs the two color bits of the 23bit component bit array. - uid_data->components = - (VertexLoaderManager::g_current_components & (VB_HAS_COL0 | VB_HAS_COL1)) >> VB_COL_SHIFT; - ; - // TODO: Our current constant usage code isn't able to handle more than one buffer. // So we can't mark the VS constant as used here. But keep them here as reference. // out.SetConstantsUsed(C_PLIGHT_COLORS, C_PLIGHT_COLORS+7); // TODO: Can be optimized further // out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+31); // TODO: Can be optimized further // out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3); - - // FIXME: Disabled until pixelshadergen is split - // GenerateLightingShader(out, uid_data->lighting, uid_data->components << VB_COL_SHIFT, - // "colors_", "col"); + GenerateLightingShaderCode(out, uid_data->lighting, uid_data->components << VB_COL_SHIFT, + "colors_", "col"); } // HACK to handle cases where the tex gen is not enabled @@ -506,7 +645,6 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) { out.Write("\tint2 fixpoint_uv%d = itrunc(", i); // optional perspective divides - uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i; if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ) { out.Write("(uv%d.z == 0.0 ? uv%d.xy : uv%d.xy / uv%d.z)", i, i, i, i); @@ -520,24 +658,10 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } } - // indirect texture map lookup - int nIndirectStagesUsed = 0; - if (uid_data->genMode_numindstages > 0) - { - for (unsigned int i = 0; i < numStages; ++i) - { - if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < uid_data->genMode_numindstages) - nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; - } - } - - uid_data->nIndirectStagesUsed = nIndirectStagesUsed; for (u32 i = 0; i < uid_data->genMode_numindstages; ++i) { if (uid_data->nIndirectStagesUsed & (1 << i)) { - uid_data->SetTevindrefValues(i, bpmem.tevindref.getTexCoord(i), bpmem.tevindref.getTexMap(i)); - unsigned int texcoord = uid_data->GetTevindirefCoord(i); unsigned int texmap = uid_data->GetTevindirefMap(i); @@ -551,17 +675,12 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) out.Write("\ttempcoord = int2(0, 0);\n"); out.Write("\tint3 iindtex%d = ", i); - SampleTexture(out, "float2(tempcoord)", "abg", texmap, uid_data->stereo, ApiType); + SampleTexture(out, "float2(tempcoord)", "abg", texmap, uid_data->stereo, ApiType); } } for (unsigned int i = 0; i < numStages; i++) - WriteStage(out, uid_data, i, ApiType); // build the equation for this stage - -#define MY_STRUCT_OFFSET(str, elem) ((u32)((u64) & (str).elem - (u64) & (str))) - uid_data->num_values = (uid_data->per_pixel_lighting) ? - sizeof(*uid_data) : - MY_STRUCT_OFFSET(*uid_data, stagehash[numStages]); + WriteStage(out, uid_data, i, ApiType); // build the equation for this stage { // The results of the last texenv stage are put onto the screen, @@ -581,18 +700,12 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } out.Write("\tprev = prev & 255;\n"); - AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult(); - uid_data->Pretest = Pretest; - uid_data->late_ztest = bpmem.UseLateDepthTest(); - // NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled // (in this case we need to write a depth value if depth test passes regardless of the alpha // testing result) if (uid_data->Pretest == AlphaTest::UNDETERMINED || (uid_data->Pretest == AlphaTest::FAIL && uid_data->late_ztest)) - WriteAlphaTest(out, uid_data, ApiType, dstAlphaMode, uid_data->per_pixel_depth); - - uid_data->zfreeze = bpmem.genMode.zfreeze; + WriteAlphaTest(out, uid_data, ApiType, dstAlphaMode, uid_data->per_pixel_depth); if (uid_data->zfreeze) { @@ -629,10 +742,6 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } out.Write("\tzCoord = clamp(zCoord, 0, 0xFFFFFF);\n"); - uid_data->ztex_op = bpmem.ztex2.op; - uid_data->early_ztest = bpmem.UseEarlyDepthTest(); - uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel; - // depth texture can safely be ignored if the result won't be written to the depth buffer // (early_ztest) and isn't used for fog either const bool skip_ztexture = !uid_data->per_pixel_depth && !uid_data->fog_fsel; @@ -674,7 +783,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } else { - WriteFog(out, uid_data); + WriteFog(out, uid_data); out.Write("\tocol0 = float4(prev) / 255.0;\n"); } @@ -704,25 +813,20 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) return out; } -template -static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE ApiType) +static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n, + API_TYPE ApiType) { - int texcoord = bpmem.tevorders[n / 2].getTexCoord(n & 1); - bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; - bool bHasIndStage = bpmem.tevind[n].bt < bpmem.genMode.numindstages; + auto& stage = uid_data->stagehash[n]; + out.Write("\n\t// TEV stage %d\n", n); + // HACK to handle cases where the tex gen is not enabled + u32 texcoord = stage.tevorders_texcoord; + bool bHasTexCoord = texcoord < uid_data->genMode_numtexgens; if (!bHasTexCoord) texcoord = 0; - out.Write("\n\t// TEV stage %d\n", n); - - auto& stage = uid_data->stagehash[n]; - - uid_data->stagehash[n].hasindstage = bHasIndStage; - uid_data->stagehash[n].tevorders_texcoord = texcoord; if (stage.hasindstage) { - uid_data->stagehash[n].tevind = bpmem.tevind[n].hex; TevStageIndirect tevind; tevind.hex = stage.tevind; @@ -785,7 +889,7 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx); out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n, - stage.tevorders_texcoord, n); + texcoord, n); out.Write("\tif (" I_INDTEXMTX "[%d].w >= 0) indtevtrans%d >>= " I_INDTEXMTX "[%d].w;\n", mtxidx, n, mtxidx); @@ -798,7 +902,7 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx); out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n, - stage.tevorders_texcoord, n); + texcoord, n); out.Write("\tif (" I_INDTEXMTX "[%d].w >= 0) indtevtrans%d >>= " I_INDTEXMTX "[%d].w;\n", mtxidx, n, mtxidx); @@ -823,20 +927,20 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE // wrap S if (tevind.sw == ITW_OFF) - out.Write("\twrappedcoord.x = fixpoint_uv%d.x;\n", stage.tevorders_texcoord); + out.Write("\twrappedcoord.x = fixpoint_uv%d.x;\n", texcoord); else if (tevind.sw == ITW_0) out.Write("\twrappedcoord.x = 0;\n"); else - out.Write("\twrappedcoord.x = fixpoint_uv%d.x & (%s - 1);\n", stage.tevorders_texcoord, + out.Write("\twrappedcoord.x = fixpoint_uv%d.x & (%s - 1);\n", texcoord, tevIndWrapStart[tevind.sw]); // wrap T if (tevind.tw == ITW_OFF) - out.Write("\twrappedcoord.y = fixpoint_uv%d.y;\n", stage.tevorders_texcoord); + out.Write("\twrappedcoord.y = fixpoint_uv%d.y;\n", texcoord); else if (tevind.tw == ITW_0) out.Write("\twrappedcoord.y = 0;\n"); else - out.Write("\twrappedcoord.y = fixpoint_uv%d.y & (%s - 1);\n", stage.tevorders_texcoord, + out.Write("\twrappedcoord.y = fixpoint_uv%d.y & (%s - 1);\n", texcoord, tevIndWrapStart[tevind.tw]); if (tevind.fb_addprev) // add previous tevcoord @@ -848,10 +952,6 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE out.Write("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n"); } - uid_data->stagehash[n].cc = bpmem.combiners[n].colorC.hex & 0xFFFFFF; - uid_data->stagehash[n].ac = - bpmem.combiners[n].alphaC.hex & 0xFFFFF0; // Storing rswap and tswap later - TevStageCombiner::ColorCombiner cc; TevStageCombiner::AlphaCombiner ac; cc.hex = stage.cc; @@ -862,35 +962,20 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC || ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) { - const int i = bpmem.combiners[n].alphaC.rswap; - uid_data->stagehash[n].ac |= bpmem.combiners[n].alphaC.rswap; - ac.rswap = bpmem.combiners[n].alphaC.rswap; - uid_data->stagehash[n].tevksel_swap1a = bpmem.tevksel[i * 2].swap1; - uid_data->stagehash[n].tevksel_swap2a = bpmem.tevksel[i * 2].swap2; - uid_data->stagehash[n].tevksel_swap1b = bpmem.tevksel[i * 2 + 1].swap1; - uid_data->stagehash[n].tevksel_swap2b = bpmem.tevksel[i * 2 + 1].swap2; - uid_data->stagehash[n].tevorders_colorchan = bpmem.tevorders[n / 2].getColorChan(n & 1); - + // Generate swizzle string to represent the Ras color channel swapping char rasswap[5] = {"rgba"[stage.tevksel_swap1a], "rgba"[stage.tevksel_swap2a], "rgba"[stage.tevksel_swap1b], "rgba"[stage.tevksel_swap2b], '\0'}; out.Write("\trastemp = %s.%s;\n", tevRasTable[stage.tevorders_colorchan], rasswap); } - uid_data->stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1); if (stage.tevorders_enable) { - int texmap = bpmem.tevorders[n / 2].getTexMap(n & 1); - const int i = bpmem.combiners[n].alphaC.tswap; - uid_data->stagehash[n].ac |= bpmem.combiners[n].alphaC.tswap << 2; - ac.tswap = bpmem.combiners[n].alphaC.tswap; - uid_data->stagehash[n].tevksel_swap1c = bpmem.tevksel[i * 2].swap1; - uid_data->stagehash[n].tevksel_swap2c = bpmem.tevksel[i * 2].swap2; - uid_data->stagehash[n].tevksel_swap1d = bpmem.tevksel[i * 2 + 1].swap1; - uid_data->stagehash[n].tevksel_swap2d = bpmem.tevksel[i * 2 + 1].swap2; - uid_data->stagehash[n].tevorders_texmap = bpmem.tevorders[n / 2].getTexMap(n & 1); + // Generate swizzle string to represent the texture color channel swapping + char texswap[5] = {"rgba"[stage.tevksel_swap1c], "rgba"[stage.tevksel_swap2c], + "rgba"[stage.tevksel_swap1d], "rgba"[stage.tevksel_swap2d], '\0'}; - if (!bHasIndStage) + if (!stage.hasindstage) { // calc tevcord if (bHasTexCoord) @@ -898,12 +983,9 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE else out.Write("\ttevcoord.xy = int2(0, 0);\n"); } - - char texswap[5] = {"rgba"[stage.tevksel_swap1c], "rgba"[stage.tevksel_swap2c], - "rgba"[stage.tevksel_swap1d], "rgba"[stage.tevksel_swap2d], '\0'}; - out.Write("\ttextemp = "); - SampleTexture(out, "float2(tevcoord.xy)", texswap, texmap, uid_data->stereo, ApiType); + SampleTexture(out, "float2(tevcoord.xy)", texswap, stage.tevorders_texmap, uid_data->stereo, + ApiType); } else { @@ -914,8 +996,6 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE cc.d == TEVCOLORARG_KONST || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST) { - uid_data->stagehash[n].tevksel_kc = bpmem.tevksel[n / 2].getKC(n & 1); - uid_data->stagehash[n].tevksel_ka = bpmem.tevksel[n / 2].getKA(n & 1); out.Write("\tkonsttemp = int4(%s, %s);\n", tevKSelTableC[stage.tevksel_kc], tevKSelTableA[stage.tevksel_ka]); @@ -1014,8 +1094,8 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE out.Write(";\n"); } -template -static void WriteTevRegular(T& out, const char* components, int bias, int op, int clamp, int shift) +static void WriteTevRegular(ShaderCode& out, const char* components, int bias, int op, int clamp, + int shift) { const char* tevScaleTableLeft[] = { "", // SCALE_1 @@ -1061,8 +1141,7 @@ static void WriteTevRegular(T& out, const char* components, int bias, int op, in out.Write(")%s", tevScaleTableRight[shift]); } -template -static void SampleTexture(T& out, const char* texcoords, const char* texswap, int texmap, +static void SampleTexture(ShaderCode& out, const char* texcoords, const char* texswap, int texmap, bool stereo, API_TYPE ApiType) { out.SetConstantsUsed(C_TEXDIMS + texmap, C_TEXDIMS + texmap); @@ -1094,8 +1173,7 @@ static const char* tevAlphaFunclogicTable[] = { " == " // xnor }; -template -static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType, +static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_data, API_TYPE ApiType, DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth) { static const char* alphaRef[2] = {I_ALPHA ".r", I_ALPHA ".g"}; @@ -1107,10 +1185,6 @@ static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE Api else out.Write("\tif(!( "); - uid_data->alpha_test_comp0 = bpmem.alpha_test.comp0; - uid_data->alpha_test_comp1 = bpmem.alpha_test.comp1; - uid_data->alpha_test_logic = bpmem.alpha_test.logic; - // Lookup the first component from the alpha function table int compindex = uid_data->alpha_test_comp0; out.Write(tevAlphaFuncsTable[compindex], alphaRef[0]); @@ -1133,16 +1207,6 @@ static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE Api out.Write("\t\tdepth = %s;\n", (ApiType == API_D3D) ? "0.0" : "1.0"); // ZCOMPLOC HACK: - // The only way to emulate alpha test + early-z is to force early-z in the shader. - // As this isn't available on all drivers and as we can't emulate this feature otherwise, - // we are only able to choose which one we want to respect more. - // Tests seem to have proven that writing depth even when the alpha test fails is more - // important that a reliable alpha test, so we just force the alpha test to always succeed. - // At least this seems to be less buggy. - uid_data->alpha_test_use_zcomploc_hack = bpmem.UseEarlyDepthTest() && bpmem.zmode.updateenable && - !g_ActiveConfig.backend_info.bSupportsEarlyZ && - !bpmem.genMode.zfreeze; - if (!uid_data->alpha_test_use_zcomploc_hack) { out.Write("\t\tdiscard;\n"); @@ -1164,15 +1228,11 @@ static const char* tevFogFuncsTable[] = { "\tfog = 1.0 - fog;\n fog = exp2(-8.0 * fog * fog);\n" // backward exp2 }; -template -static void WriteFog(T& out, pixel_shader_uid_data* uid_data) +static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data) { - uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel; if (uid_data->fog_fsel == 0) return; // no Fog - uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj; - out.SetConstantsUsed(C_FOGCOLOR, C_FOGCOLOR); out.SetConstantsUsed(C_FOGI, C_FOGI); out.SetConstantsUsed(C_FOGF, C_FOGF + 1); @@ -1198,7 +1258,6 @@ static void WriteFog(T& out, pixel_shader_uid_data* uid_data) // ze *= x_adjust // TODO Instead of this theoretical calculation, we should use the // coefficient table given in the fog range BP registers! - uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled; if (uid_data->fog_RangeBaseEnabled) { out.SetConstantsUsed(C_FOGF, C_FOGF); @@ -1223,13 +1282,3 @@ static void WriteFog(T& out, pixel_shader_uid_data* uid_data) out.Write("\tint ifog = iround(fog * 256.0);\n"); out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n"); } - -PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) -{ - return GeneratePixelShader(dstAlphaMode, ApiType); -} - -ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) -{ - return GeneratePixelShader(dstAlphaMode, ApiType); -} diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index ed4299e9de..5b02171129 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -87,7 +87,7 @@ struct pixel_shader_uid_data } } - inline u32 GetTevindirefCoord(int index) + inline u32 GetTevindirefCoord(int index) const { if (index == 0) { @@ -108,7 +108,7 @@ struct pixel_shader_uid_data return 0; } - inline u32 GetTevindirefMap(int index) + inline u32 GetTevindirefMap(int index) const { if (index == 0) { @@ -165,5 +165,6 @@ struct pixel_shader_uid_data typedef ShaderUid PixelShaderUid; -ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType); -PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType); +ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, + const pixel_shader_uid_data* uid_data); +PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode);