diff --git a/Source/Core/VideoCommon/GraphicsModSystem/Runtime/CustomShaderCache.cpp b/Source/Core/VideoCommon/GraphicsModSystem/Runtime/CustomShaderCache.cpp index 29d801cc52..0c9cf836ca 100644 --- a/Source/Core/VideoCommon/GraphicsModSystem/Runtime/CustomShaderCache.cpp +++ b/Source/Core/VideoCommon/GraphicsModSystem/Runtime/CustomShaderCache.cpp @@ -346,8 +346,8 @@ std::unique_ptr CustomShaderCache::CompilePixelShader(const PixelShaderUid& uid, const CustomShaderInstance& custom_shaders) const { - const ShaderCode source_code = GeneratePixelShaderCode( - m_api_type, m_host_config, uid.GetUidData(), custom_shaders.pixel_contents); + const ShaderCode source_code = + PixelShader::WriteFullShader(m_api_type, m_host_config, uid.GetUidData(), "", ""); return g_gfx->CreateShaderFromSource(ShaderStage::Pixel, source_code.GetBuffer(), "Custom Pixel Shader"); } diff --git a/Source/Core/VideoCommon/LightingShaderGen.cpp b/Source/Core/VideoCommon/LightingShaderGen.cpp index 4fb2c98ebd..c486f79af7 100644 --- a/Source/Core/VideoCommon/LightingShaderGen.cpp +++ b/Source/Core/VideoCommon/LightingShaderGen.cpp @@ -81,29 +81,34 @@ static void GenerateLightShader(ShaderCode& object, const LightingUidData& uid_d // materials name is I_MATERIALS in vs and I_PMATERIALS in ps // inColorName is color in vs and colors_ in ps // dest is o.colors_ in vs and colors_ in ps -void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_data, - std::string_view in_color_name, std::string_view dest) +void GenerateLightingShaderHeader(ShaderCode& object, const LightingUidData& uid_data) { for (u32 j = 0; j < NUM_XF_COLOR_CHANNELS; j++) { + object.Write( + "vec4 dolphin_calculate_lighting_chn{}(vec4 vertex_color, vec4 pos, vec3 _normal)\n", j); object.Write("{{\n"); + object.Write("\tint4 lacc;\n" + "\tfloat3 ldir, h, cosAttn, distAttn;\n" + "\tfloat dist, dist2, attn;\n"); + const bool colormatsource = !!(uid_data.matsource & (1 << j)); if (colormatsource) // from vertex - object.Write("int4 mat = int4(round({}{} * 255.0));\n", in_color_name, j); + object.Write("\tint4 mat = int4(round(vertex_color * 255.0));\n"); else // from color - object.Write("int4 mat = {}[{}];\n", I_MATERIALS, j + 2); + object.Write("\tint4 mat = {}[{}];\n", I_MATERIALS, j + 2); if ((uid_data.enablelighting & (1 << j)) != 0) { if ((uid_data.ambsource & (1 << j)) != 0) // from vertex - object.Write("lacc = int4(round({}{} * 255.0));\n", in_color_name, j); + object.Write("\tlacc = int4(round(vertex_color * 255.0));\n"); else // from color - object.Write("lacc = {}[{}];\n", I_MATERIALS, j); + object.Write("\tlacc = {}[{}];\n", I_MATERIALS, j); } else { - object.Write("lacc = int4(255, 255, 255, 255);\n"); + object.Write("\tlacc = int4(255, 255, 255, 255);\n"); } // check if alpha is different @@ -111,21 +116,21 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d if (alphamatsource != colormatsource) { if (alphamatsource) // from vertex - object.Write("mat.w = int(round({}{}.w * 255.0));\n", in_color_name, j); + object.Write("\tmat.w = int(round(vertex_color.w * 255.0));\n"); else // from color - object.Write("mat.w = {}[{}].w;\n", I_MATERIALS, j + 2); + object.Write("\tmat.w = {}[{}].w;\n", I_MATERIALS, j + 2); } if ((uid_data.enablelighting & (1 << (j + 2))) != 0) { if ((uid_data.ambsource & (1 << (j + 2))) != 0) // from vertex - object.Write("lacc.w = int(round({}{}.w * 255.0));\n", in_color_name, j); + object.Write("\tlacc.w = int(round(vertex_color.w * 255.0));\n"); else // from color - object.Write("lacc.w = {}[{}].w;\n", I_MATERIALS, j); + object.Write("\tlacc.w = {}[{}].w;\n", I_MATERIALS, j); } else { - object.Write("lacc.w = 255;\n"); + object.Write("\tlacc.w = 255;\n"); } if ((uid_data.enablelighting & (1 << j)) != 0) // Color lights @@ -144,9 +149,9 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d GenerateLightShader(object, uid_data, i, j + 2, true); } } - object.Write("lacc = clamp(lacc, 0, 255);\n"); - object.Write("{}{} = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j); - object.Write("}}\n"); + object.Write("\tlacc = clamp(lacc, 0, 255);\n"); + object.Write("\treturn vec4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n"); + object.Write("}}\n\n"); } } @@ -176,47 +181,9 @@ void GetLightingShaderUid(LightingUidData& uid_data) } } -void GenerateCustomLightingHeaderDetails(ShaderCode* out, u32 enablelighting, u32 light_mask) -{ - u32 light_count = 0; - for (u32 j = 0; j < NUM_XF_COLOR_CHANNELS; j++) - { - if ((enablelighting & (1 << j)) != 0) // Color lights - { - for (int i = 0; i < 8; ++i) - { - if ((light_mask & (1 << (i + 8 * j))) != 0) - { - light_count++; - } - } - } - if ((enablelighting & (1 << (j + 2))) != 0) // Alpha lights - { - for (int i = 0; i < 8; ++i) - { - if ((light_mask & (1 << (i + 8 * (j + 2)))) != 0) - { - light_count++; - } - } - } - } - if (light_count > 0) - { - out->Write("\tCustomShaderLightData[{}] light;\n", light_count); - } - else - { - // Cheat so shaders compile - out->Write("\tCustomShaderLightData[1] light;\n", light_count); - } - out->Write("\tint light_count;\n"); -} - -static void GenerateLighting(ShaderCode* out, const LightingUidData& uid_data, int index, - int litchan_index, u32 channel_index, u32 custom_light_index, - bool alpha) +static void GenerateLightingImpl(ShaderCode* out, const LightingUidData& uid_data, int index, + int litchan_index, u32 channel_index, u32 custom_light_index, + bool alpha) { const auto attnfunc = static_cast((uid_data.attnfunc >> (2 * litchan_index)) & 0x3); @@ -225,60 +192,59 @@ static void GenerateLighting(ShaderCode* out, const LightingUidData& uid_data, i const std::string name = fmt::format("lights_chan{}_{}", channel_index, light_type); out->Write("\t{{\n"); - out->Write("\t\tcustom_data.{}[{}].direction = " LIGHT_DIR ".xyz;\n", name, custom_light_index, + out->Write("\t\tfrag_input.{}[{}].direction = " LIGHT_DIR ".xyz;\n", name, custom_light_index, LIGHT_DIR_PARAMS(index)); - out->Write("\t\tcustom_data.{}[{}].position = " LIGHT_POS ".xyz;\n", name, custom_light_index, + out->Write("\t\tfrag_input.{}[{}].position = " LIGHT_POS ".xyz;\n", name, custom_light_index, LIGHT_POS_PARAMS(index)); - out->Write("\t\tcustom_data.{}[{}].cosatt = " LIGHT_COSATT ";\n", name, custom_light_index, + out->Write("\t\tfrag_input.{}[{}].cosatt = " LIGHT_COSATT ";\n", name, custom_light_index, LIGHT_COSATT_PARAMS(index)); - out->Write("\t\tcustom_data.{}[{}].distatt = " LIGHT_DISTATT ";\n", name, custom_light_index, + out->Write("\t\tfrag_input.{}[{}].distatt = " LIGHT_DISTATT ";\n", name, custom_light_index, LIGHT_DISTATT_PARAMS(index)); - out->Write("\t\tcustom_data.{}[{}].attenuation_type = {};\n", name, custom_light_index, + out->Write("\t\tfrag_input.{}[{}].attenuation_type = {};\n", name, custom_light_index, static_cast(attnfunc)); if (alpha) { - out->Write("\t\tcustom_data.{}[{}].color = float3(" LIGHT_COL + out->Write("\t\tfrag_input.{}[{}].color = float3(" LIGHT_COL ") / float3(255.0, 255.0, 255.0);\n", name, custom_light_index, LIGHT_COL_PARAMS(index, alpha ? "a" : "rgb")); } else { - out->Write("\t\tcustom_data.{}[{}].color = " LIGHT_COL " / float3(255.0, 255.0, 255.0);\n", - name, custom_light_index, LIGHT_COL_PARAMS(index, alpha ? "a" : "rgb")); + out->Write("\t\tfrag_input.{}[{}].color = " LIGHT_COL " / float3(255.0, 255.0, 255.0);\n", name, + custom_light_index, LIGHT_COL_PARAMS(index, alpha ? "a" : "rgb")); } out->Write("\t}}\n"); } -void GenerateCustomLightingImplementation(ShaderCode* out, const LightingUidData& uid_data, - std::string_view in_color_name) +void GenerateCustomLighting(ShaderCode* out, const LightingUidData& uid_data) { for (u32 i = 0; i < 8; i++) { for (u32 channel_index = 0; channel_index < NUM_XF_COLOR_CHANNELS; channel_index++) { - out->Write("\tcustom_data.lights_chan{}_color[{}].direction = float3(0, 0, 0);\n", + out->Write("\tfrag_input.lights_chan{}_color[{}].direction = float3(0, 0, 0);\n", channel_index, i); - out->Write("\tcustom_data.lights_chan{}_color[{}].position = float3(0, 0, 0);\n", + out->Write("\tfrag_input.lights_chan{}_color[{}].position = float3(0, 0, 0);\n", channel_index, i); - out->Write("\tcustom_data.lights_chan{}_color[{}].color = float3(0, 0, 0);\n", channel_index, + out->Write("\tfrag_input.lights_chan{}_color[{}].color = float3(0, 0, 0);\n", channel_index, i); - out->Write("\tcustom_data.lights_chan{}_color[{}].cosatt = float4(0, 0, 0, 0);\n", + out->Write("\tfrag_input.lights_chan{}_color[{}].cosatt = float4(0, 0, 0, 0);\n", channel_index, i); - out->Write("\tcustom_data.lights_chan{}_color[{}].distatt = float4(0, 0, 0, 0);\n", + out->Write("\tfrag_input.lights_chan{}_color[{}].distatt = float4(0, 0, 0, 0);\n", channel_index, i); - out->Write("\tcustom_data.lights_chan{}_color[{}].attenuation_type = 0;\n", channel_index, i); + out->Write("\tfrag_input.lights_chan{}_color[{}].attenuation_type = 0;\n", channel_index, i); - out->Write("\tcustom_data.lights_chan{}_alpha[{}].direction = float3(0, 0, 0);\n", + out->Write("\tfrag_input.lights_chan{}_alpha[{}].direction = float3(0, 0, 0);\n", channel_index, i); - out->Write("\tcustom_data.lights_chan{}_alpha[{}].position = float3(0, 0, 0);\n", + out->Write("\tfrag_input.lights_chan{}_alpha[{}].position = float3(0, 0, 0);\n", channel_index, i); - out->Write("\tcustom_data.lights_chan{}_alpha[{}].color = float3(0, 0, 0);\n", channel_index, + out->Write("\tfrag_input.lights_chan{}_alpha[{}].color = float3(0, 0, 0);\n", channel_index, i); - out->Write("\tcustom_data.lights_chan{}_alpha[{}].cosatt = float4(0, 0, 0, 0);\n", + out->Write("\tfrag_input.lights_chan{}_alpha[{}].cosatt = float4(0, 0, 0, 0);\n", channel_index, i); - out->Write("\tcustom_data.lights_chan{}_alpha[{}].distatt = float4(0, 0, 0, 0);\n", + out->Write("\tfrag_input.lights_chan{}_alpha[{}].distatt = float4(0, 0, 0, 0);\n", channel_index, i); - out->Write("\tcustom_data.lights_chan{}_alpha[{}].attenuation_type = 0;\n", channel_index, i); + out->Write("\tfrag_input.lights_chan{}_alpha[{}].attenuation_type = 0;\n", channel_index, i); } } @@ -286,20 +252,20 @@ void GenerateCustomLightingImplementation(ShaderCode* out, const LightingUidData { const bool colormatsource = !!(uid_data.matsource & (1 << j)); if (colormatsource) // from vertex - out->Write("custom_data.base_material[{}] = {}{};\n", j, in_color_name, j); + out->Write("frag_input.base_material[{}] = frag_input.color_{};\n", j, j); else // from color - out->Write("custom_data.base_material[{}] = {}[{}] / 255.0;\n", j, I_MATERIALS, j + 2); + out->Write("frag_input.base_material[{}] = {}[{}] / 255.0;\n", j, I_MATERIALS, j + 2); if ((uid_data.enablelighting & (1 << j)) != 0) { if ((uid_data.ambsource & (1 << j)) != 0) // from vertex - out->Write("custom_data.ambient_lighting[{}] = {}{};\n", j, in_color_name, j); + out->Write("frag_input.ambient_lighting[{}] = frag_input.color_{};\n", j, j); else // from color - out->Write("custom_data.ambient_lighting[{}] = {}[{}] / 255.0;\n", j, I_MATERIALS, j); + out->Write("frag_input.ambient_lighting[{}] = {}[{}] / 255.0;\n", j, I_MATERIALS, j); } else { - out->Write("custom_data.ambient_lighting[{}] = float4(1, 1, 1, 1);\n", j); + out->Write("frag_input.ambient_lighting[{}] = float4(1, 1, 1, 1);\n", j); } // check if alpha is different @@ -307,21 +273,21 @@ void GenerateCustomLightingImplementation(ShaderCode* out, const LightingUidData if (alphamatsource != colormatsource) { if (alphamatsource) // from vertex - out->Write("custom_data.base_material[{}].w = {}{}.w;\n", j, in_color_name, j); + out->Write("frag_input.base_material[{}].w = frag_input.color_{}.w;\n", j, j); else // from color - out->Write("custom_data.base_material[{}].w = {}[{}].w / 255.0;\n", j, I_MATERIALS, j + 2); + out->Write("frag_input.base_material[{}].w = {}[{}].w / 255.0;\n", j, I_MATERIALS, j + 2); } if ((uid_data.enablelighting & (1 << (j + 2))) != 0) { if ((uid_data.ambsource & (1 << (j + 2))) != 0) // from vertex - out->Write("custom_data.ambient_lighting[{}].w = {}{}.w;\n", j, in_color_name, j); + out->Write("frag_input.ambient_lighting[{}].w = frag_input.color_{}.w;\n", j, j); else // from color - out->Write("custom_data.ambient_lighting[{}].w = {}[{}].w / 255.0;\n", j, I_MATERIALS, j); + out->Write("frag_input.ambient_lighting[{}].w = {}[{}].w / 255.0;\n", j, I_MATERIALS, j); } else { - out->Write("custom_data.ambient_lighting[{}].w = 1;\n", j); + out->Write("frag_input.ambient_lighting[{}].w = 1;\n", j); } u32 light_count = 0; @@ -331,12 +297,12 @@ void GenerateCustomLightingImplementation(ShaderCode* out, const LightingUidData { if ((uid_data.light_mask & (1 << (i + 8 * j))) != 0) { - GenerateLighting(out, uid_data, i, j, j, light_count, false); + GenerateLightingImpl(out, uid_data, i, j, j, light_count, false); light_count++; } } } - out->Write("\tcustom_data.light_chan{}_color_count = {};\n", j, light_count); + out->Write("\tfrag_input.light_chan{}_color_count = {};\n", j, light_count); light_count = 0; if ((uid_data.enablelighting & (1 << (j + 2))) != 0) // Alpha lights @@ -345,11 +311,11 @@ void GenerateCustomLightingImplementation(ShaderCode* out, const LightingUidData { if ((uid_data.light_mask & (1 << (i + 8 * (j + 2)))) != 0) { - GenerateLighting(out, uid_data, i, j + 2, j, light_count, true); + GenerateLightingImpl(out, uid_data, i, j + 2, j, light_count, true); light_count++; } } } - out->Write("\tcustom_data.light_chan{}_alpha_count = {};\n", j, light_count); + out->Write("\tfrag_input.light_chan{}_alpha_count = {};\n", j, light_count); } } diff --git a/Source/Core/VideoCommon/LightingShaderGen.h b/Source/Core/VideoCommon/LightingShaderGen.h index b06ec40c4a..e7d6f1ed13 100644 --- a/Source/Core/VideoCommon/LightingShaderGen.h +++ b/Source/Core/VideoCommon/LightingShaderGen.h @@ -44,10 +44,6 @@ constexpr char s_lighting_struct[] = "struct Light {\n" "\tfloat4 dir;\n" "};\n"; -void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_data, - std::string_view in_color_name, std::string_view dest); +void GenerateLightingShaderHeader(ShaderCode& object, const LightingUidData& uid_data); void GetLightingShaderUid(LightingUidData& uid_data); - -void GenerateCustomLightingHeaderDetails(ShaderCode* out, u32 enablelighting, u32 light_mask); -void GenerateCustomLightingImplementation(ShaderCode* out, const LightingUidData& uid_data, - std::string_view in_color_name); +void GenerateCustomLighting(ShaderCode* out, const LightingUidData& uid_data); diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index a76681634f..22a980f4e6 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -427,14 +427,6 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, out.Write("}};\n"); } - if (!custom_details.shaders.empty() && - !custom_details.shaders.back().material_uniform_block.empty()) - { - out.Write("UBO_BINDING(std140, 3) uniform CustomShaderBlock {{\n"); - out.Write("{}", custom_details.shaders.back().material_uniform_block); - out.Write("}} custom_uniforms;\n"); - } - if (bounding_box) { out.Write("SSBO_BINDING(0) coherent buffer BBox {{\n" @@ -761,599 +753,13 @@ uint WrapCoord(int coord, uint wrap, int size) {{ } } -void WriteCustomShaderStructImpl(ShaderCode* out, u32 num_stages, bool per_pixel_lighting, - const pixel_shader_uid_data* uid_data) -{ - out->Write("\tCustomShaderData custom_data;\n"); - - if (per_pixel_lighting) - { - out->Write("\tcustom_data.position = WorldPos;\n"); - out->Write("\tcustom_data.normal = Normal;\n"); - } - else - { - out->Write("\tcustom_data.position = float3(0, 0, 0);\n"); - out->Write("\tcustom_data.normal = float3(0, 0, 0);\n"); - } - - if (uid_data->genMode_numtexgens == 0) [[unlikely]] - { - out->Write("\tcustom_data.texcoord[0] = float3(0, 0, 0);\n"); - } - else - { - for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i) - { - out->Write("\tif (tex{0}.z == 0.0)\n", i); - out->Write("\t{{\n"); - out->Write("\t\tcustom_data.texcoord[{0}] = tex{0};\n", i); - out->Write("\t}}\n"); - out->Write("\telse {{\n"); - out->Write("\t\tcustom_data.texcoord[{0}] = float3(tex{0}.xy / tex{0}.z, 0);\n", i); - out->Write("\t}}\n"); - } - } - - for (u32 i = 0; i < 8; i++) - { - // Shader compilation complains if every index isn't initialized - out->Write("\tcustom_data.texmap_to_texcoord_index[{0}] = 0;\n", i); - } - - for (u32 i = 0; i < uid_data->genMode_numindstages; ++i) - { - if ((uid_data->nIndirectStagesUsed & (1U << i)) != 0) - { - u32 texcoord = uid_data->GetTevindirefCoord(i); - const u32 texmap = uid_data->GetTevindirefMap(i); - - // Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does - // not exist), then tex coord 0 is used (though sometimes glitchy effects happen on console). - // This affects the Mario portrait in Luigi's Mansion, where the developers forgot to set - // the number of tex gens to 2 (bug 11462). - if (texcoord >= uid_data->genMode_numtexgens) - texcoord = 0; - - out->Write("\tcustom_data.texmap_to_texcoord_index[{}] = {};\n", texmap, texcoord); - } - } - out->Write("\tcustom_data.texcoord_count = {};\n", uid_data->genMode_numtexgens); - - // Try and do a best guess on what the texcoord index is - // Note: one issue with this would be textures that are used - // multiple times in the same draw but with different texture coordinates. - // In that scenario, only the last texture coordinate would be defined. - // This issue can be seen in how Rogue Squadron 2 does bump mapping - for (u32 i = 0; i < num_stages; i++) - { - auto& tevstage = uid_data->stagehash[i]; - // Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does - // not exist), then tex coord 0 is used (though sometimes glitchy effects happen on console). - u32 texcoord = tevstage.tevorders_texcoord; - const bool has_tex_coord = texcoord < uid_data->genMode_numtexgens; - if (!has_tex_coord) - texcoord = 0; - - out->Write("\tcustom_data.texmap_to_texcoord_index[{}] = {};\n", tevstage.tevorders_texmap, - texcoord); - } - - if (per_pixel_lighting) - GenerateCustomLightingImplementation(out, uid_data->lighting, "colors_"); - - for (u32 i = 0; i < 16; i++) - { - // Shader compilation complains if every struct isn't initialized - - // Color Input - for (u32 j = 0; j < 4; j++) - { - out->Write("\tcustom_data.tev_stages[{}].input_color[{}].input_type = " - "CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_UNUSED;\n", - i, j); - out->Write("\tcustom_data.tev_stages[{}].input_color[{}].value = " - "float3(0, 0, 0);\n", - i, j); - } - - // Alpha Input - for (u32 j = 0; j < 4; j++) - { - out->Write("\tcustom_data.tev_stages[{}].input_alpha[{}].input_type = " - "CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_UNUSED;\n", - i, j); - out->Write("\tcustom_data.tev_stages[{}].input_alpha[{}].value = " - "float(0);\n", - i, j); - } - - // Texmap - out->Write("\tcustom_data.tev_stages[{}].texmap = 0u;\n", i); - - // Output - out->Write("\tcustom_data.tev_stages[{}].output_color = " - "float4(0, 0, 0, 0);\n", - i); - } - - // Actual data will be filled out in the tev stage code, just set the - // stage count for now - out->Write("\tcustom_data.tev_stage_count = {};\n", num_stages); - - // Time - out->Write("\tcustom_data.time_ms = time_ms;\n"); -} - static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n, APIType api_type, bool stereo, bool has_custom_shaders); static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBias bias, TevOp op, bool clamp, TevScale scale); static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_data, APIType api_type, bool per_pixel_depth, bool use_dual_source); -static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data); -static void WriteLogicOp(ShaderCode& out, const pixel_shader_uid_data* uid_data); static void WriteLogicOpBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data); -static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data, - bool use_dual_source); -static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data); - -ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& host_config, - const pixel_shader_uid_data* uid_data, - const CustomPixelShaderContents& custom_details) -{ - ShaderCode out; - - const bool per_pixel_lighting = g_ActiveConfig.bEnablePixelLighting; - const bool msaa = host_config.msaa; - const bool ssaa = host_config.ssaa; - const bool stereo = host_config.stereo; - const u32 numStages = uid_data->genMode_numtevstages + 1; - - out.Write("// Pixel Shader for TEV stages\n"); - out.Write("// {} TEV stages, {} texgens, {} IND stages\n", numStages, - uid_data->genMode_numtexgens, uid_data->genMode_numindstages); - - // Stuff that is shared between ubershaders and pixelgen. - WriteBitfieldExtractHeader(out, api_type, host_config); - - WritePixelShaderCommonHeader(out, api_type, host_config, uid_data->bounding_box, custom_details); - - // Custom shader details - WriteCustomShaderStructDef(&out, uid_data->genMode_numtexgens); - for (std::size_t i = 0; i < custom_details.shaders.size(); i++) - { - const auto& shader_details = custom_details.shaders[i]; - out.Write(fmt::runtime(shader_details.custom_shader), i); - } - - out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) " - "sampleTexture(texmap, samp[texmap], uv, layer)\n"); - - if (uid_data->ztest == EmulatedZ::ForcedEarly) - { - // Zcomploc (aka early_ztest) is a way to control whether depth test is done before - // or after texturing and alpha test. PC graphics APIs used to provide no way to emulate - // this feature properly until 2012: Depth tests were always done after alpha testing. - // Most importantly, it was not possible to write to the depth buffer without also writing - // a color value (unless color writing was disabled altogether). - - // OpenGL 4.2 actually provides two extensions which can force an early z test: - // * ARB_image_load_store has 'layout(early_fragment_tests)' which forces the driver to do z - // and stencil tests early. - // * ARB_conservative_depth has 'layout(depth_unchanged) which signals to the driver that it - // can make optimisations - // which assume the pixel shader won't update the depth buffer. - - // early_fragment_tests is the best option, as it requires the driver to do early-z and defines - // early-z exactly as - // we expect, with discard causing the shader to exit with only the depth buffer updated. - - // Conservative depth's 'depth_unchanged' only hints to the driver that an early-z optimisation - // can be made and - // doesn't define what will happen if we discard the fragment. But the way modern graphics - // hardware is implemented - // means it is not unreasonable to expect the same behaviour as early_fragment_tests. - // We can also assume that if a driver has gone out of its way to support conservative depth and - // not image_load_store - // as required by OpenGL 4.2 that it will be doing the optimisation. - // If the driver doesn't actually do an early z optimisation, ZCompLoc will be broken and depth - // will only be written - // if the alpha test passes. - - // We support Conservative as a fallback, because many drivers based on Mesa haven't implemented - // all of the - // ARB_image_load_store extension yet. - - // This is a #define which signals whatever early-z method the driver supports. - out.Write("FORCE_EARLY_Z; \n"); - } - - const bool use_framebuffer_fetch = uid_data->blend_enable || uid_data->logic_op_enable || - uid_data->ztest == EmulatedZ::EarlyWithFBFetch; - -#ifdef __APPLE__ - // Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK) - // if we want to use it. - if (api_type == APIType::Vulkan || api_type == APIType::Metal) - { - if (!uid_data->no_dual_src) - { - out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n" - "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n", - use_framebuffer_fetch ? "real_ocol0" : "ocol0"); - } - else - { - // Metal doesn't support a single unified variable for both input and output, - // so when using framebuffer fetch, we declare the input separately below. - out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 {};\n", - use_framebuffer_fetch ? "real_ocol0" : "ocol0"); - } - - if (use_framebuffer_fetch) - { - // Subpass inputs will be converted to framebuffer fetch by SPIRV-Cross. - out.Write("INPUT_ATTACHMENT_BINDING(0, 0, 0) uniform subpassInput in_ocol0;\n"); - } - } - else -#endif - { - if (use_framebuffer_fetch) - { - out.Write("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n"); - } - else - { - out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out {} ocol0;\n", - uid_data->uint_output ? "uvec4" : "vec4"); - } - - if (!uid_data->no_dual_src) - { - out.Write("{} out {} ocol1;\n", "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1)", - uid_data->uint_output ? "uvec4" : "vec4"); - } - } - - if (uid_data->per_pixel_depth) - out.Write("#define depth gl_FragDepth\n"); - - if (host_config.backend_geometry_shaders) - { - out.Write("VARYING_LOCATION(0) in VertexData {{\n"); - GenerateVSOutputMembers(out, api_type, uid_data->genMode_numtexgens, host_config, - GetInterpolationQualifier(msaa, ssaa, true, true), ShaderStage::Pixel); - - out.Write("}};\n"); - if (stereo && !host_config.backend_gl_layer_in_fs) - out.Write("flat in int layer;"); - } - else - { - // Let's set up attributes - u32 counter = 0; - out.Write("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - out.Write("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i) - { - out.Write("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++, - GetInterpolationQualifier(msaa, ssaa), i); - } - if (!host_config.fast_depth_calc) - { - out.Write("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - } - if (per_pixel_lighting) - { - out.Write("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - out.Write("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - } - } - - out.Write("void main()\n{{\n"); - out.Write("\tfloat4 rawpos = gl_FragCoord;\n"); - - bool has_custom_shaders = false; - if (std::any_of(custom_details.shaders.begin(), custom_details.shaders.end(), - [](const std::optional& ps) { return ps.has_value(); })) - { - WriteCustomShaderStructImpl(&out, numStages, per_pixel_lighting, uid_data); - has_custom_shaders = true; - } - - if (use_framebuffer_fetch) - { - // Store off a copy of the initial framebuffer value. - // - // If FB_FETCH_VALUE isn't defined (i.e. no special keyword for fetching from the - // framebuffer), we read from real_ocol0. - out.Write("#ifdef FB_FETCH_VALUE\n" - "\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n" - "#else\n" - "\tfloat4 initial_ocol0 = real_ocol0;\n" - "#endif\n"); - - // QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an - // intermediate value with multiple reads & modifications, so we pull out the "real" output - // value above and use a temporary for calculations, then set the output value once at the - // end of the shader. - out.Write("\tfloat4 ocol0;\n"); - } - - if (uid_data->blend_enable) - { - out.Write("\tfloat4 ocol1;\n"); - } - - if (host_config.backend_geometry_shaders && stereo) - { - if (host_config.backend_gl_layer_in_fs) - out.Write("\tint layer = gl_Layer;\n"); - } - else - { - out.Write("\tint layer = 0;\n"); - } - - out.Write("\tint4 c0 = " I_COLORS "[1], c1 = " I_COLORS "[2], c2 = " I_COLORS - "[3], prev = " I_COLORS "[0];\n" - "\tint4 rastemp = int4(0, 0, 0, 0), textemp = int4(0, 0, 0, 0), konsttemp = int4(0, 0, " - "0, 0);\n" - "\tint3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n" - "\tint alphabump=0;\n" - "\tint3 tevcoord=int3(0, 0, 0);\n" - "\tint2 wrappedcoord=int2(0,0), tempcoord=int2(0,0);\n" - "\tint4 " - "tevin_a=int4(0,0,0,0),tevin_b=int4(0,0,0,0),tevin_c=int4(0,0,0,0),tevin_d=int4(0,0,0," - "0);\n\n"); // tev combiner inputs - - // On GLSL, input variables must not be assigned to. - // This is why we declare these variables locally instead. - out.Write("\tfloat4 col0 = colors_0;\n" - "\tfloat4 col1 = colors_1;\n"); - - if (per_pixel_lighting) - { - out.Write("\tfloat3 _normal = normalize(Normal.xyz);\n\n" - "\tfloat3 pos = WorldPos;\n"); - - out.Write("\tint4 lacc;\n" - "\tfloat3 ldir, h, cosAttn, distAttn;\n" - "\tfloat dist, dist2, attn;\n"); - - // TODO: Our current constant usage code isn't able to handle more than one buffer. - // So we can't mark the VS constant as used here. But keep them here as reference. - // out.SetConstantsUsed(C_PLIGHT_COLORS, C_PLIGHT_COLORS+7); // TODO: Can be optimized further - // out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+31); // TODO: Can be optimized further - // out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3); - GenerateLightingShaderCode(out, uid_data->lighting, "colors_", "col"); - // The number of colors available to TEV is determined by numColorChans. - // Normally this is performed in the vertex shader after lighting, but with per-pixel lighting, - // we need to perform it here. (It needs to be done after lighting, as what was originally - // black might become a different color after lighting). - if (uid_data->numColorChans == 0) - out.Write("col0 = float4(0.0, 0.0, 0.0, 0.0);\n"); - if (uid_data->numColorChans <= 1) - out.Write("col1 = float4(0.0, 0.0, 0.0, 0.0);\n"); - } - - if (uid_data->genMode_numtexgens == 0) - { - // TODO: This is a hack to ensure that shaders still compile when setting out of bounds tex - // coord indices to 0. Ideally, it shouldn't exist at all, but the exact behavior hasn't been - // tested. - out.Write("\tint2 fixpoint_uv0 = int2(0, 0);\n\n"); - } - else - { - out.SetConstantsUsed(C_TEXDIMS, C_TEXDIMS + uid_data->genMode_numtexgens - 1); - for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i) - { - out.Write("\tint2 fixpoint_uv{} = int2(", i); - out.Write("(tex{}.z == 0.0 ? tex{}.xy : tex{}.xy / tex{}.z)", i, i, i, i); - out.Write(" * float2(" I_TEXDIMS "[{}].zw * 128));\n", i); - // TODO: S24 overflows here? - } - } - - for (u32 i = 0; i < uid_data->genMode_numindstages; ++i) - { - if ((uid_data->nIndirectStagesUsed & (1U << i)) != 0) - { - u32 texcoord = uid_data->GetTevindirefCoord(i); - const u32 texmap = uid_data->GetTevindirefMap(i); - - // Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does - // not exist), then tex coord 0 is used (though sometimes glitchy effects happen on console). - // This affects the Mario portrait in Luigi's Mansion, where the developers forgot to set - // the number of tex gens to 2 (bug 11462). - if (texcoord >= uid_data->genMode_numtexgens) - texcoord = 0; - - out.SetConstantsUsed(C_INDTEXSCALE + i / 2, C_INDTEXSCALE + i / 2); - out.Write("\ttempcoord = fixpoint_uv{} >> " I_INDTEXSCALE "[{}].{};\n", texcoord, i / 2, - (i & 1) ? "zw" : "xy"); - - out.Write("\tint3 iindtex{0} = sampleTextureWrapper({1}u, tempcoord, layer).abg;\n", i, - texmap); - } - } - - for (u32 i = 0; i < numStages; i++) - { - // Build the equation for this stage - WriteStage(out, uid_data, i, api_type, stereo, has_custom_shaders); - } - - { - // The results of the last texenv stage are put onto the screen, - // regardless of the used destination register - TevStageCombiner::ColorCombiner last_cc; - TevStageCombiner::AlphaCombiner last_ac; - last_cc.hex = uid_data->stagehash[uid_data->genMode_numtevstages].cc; - last_ac.hex = uid_data->stagehash[uid_data->genMode_numtevstages].ac; - if (last_cc.dest != TevOutput::Prev) - { - out.Write("\tprev.rgb = {};\n", tev_c_output_table[last_cc.dest]); - } - if (last_ac.dest != TevOutput::Prev) - { - out.Write("\tprev.a = {};\n", tev_a_output_table[last_ac.dest]); - } - } - out.Write("\tprev = prev & 255;\n"); - - // NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled - // (in this case we need to write a depth value if depth test passes regardless of the alpha - // testing result) - if (uid_data->Pretest == AlphaTestResult::Undetermined || - (uid_data->Pretest == AlphaTestResult::Fail && uid_data->ztest == EmulatedZ::Late)) - { - WriteAlphaTest(out, uid_data, api_type, uid_data->per_pixel_depth, - !uid_data->no_dual_src || uid_data->blend_enable); - } - - // This situation is important for Mario Kart Wii's menus (they will render incorrectly if the - // alpha test for the FMV in the background fails, since they depend on depth for drawing a yellow - // border) and Fortune Street's gameplay (where a rectangle with an alpha value of 1 is drawn over - // the center of the screen several times, but those rectangles shouldn't be visible). - // Blending seems to result in no changes to the output with an alpha of 1, even if the input - // color is white. - // TODO: Investigate this further: we might be handling blending incorrectly in general (though - // there might not be any good way of changing blending behavior) - out.Write("\t// Hardware testing indicates that an alpha of 1 can pass an alpha test,\n" - "\t// but doesn't do anything in blending\n" - "\tif (prev.a == 1) prev.a = 0;\n"); - - if (uid_data->zfreeze) - { - out.SetConstantsUsed(C_ZSLOPE, C_ZSLOPE); - out.SetConstantsUsed(C_EFBSCALE, C_EFBSCALE); - - out.Write("\tfloat2 screenpos = rawpos.xy * " I_EFBSCALE ".xy;\n"); - - // Opengl has reversed vertical screenspace coordinates - if (api_type == APIType::OpenGL) - out.Write("\tscreenpos.y = {}.0 - screenpos.y;\n", EFB_HEIGHT); - - out.Write("\tint zCoord = int(" I_ZSLOPE ".z + " I_ZSLOPE ".x * screenpos.x + " I_ZSLOPE - ".y * screenpos.y);\n"); - } - else if (!host_config.fast_depth_calc) - { - // FastDepth means to trust the depth generated in perspective division. - // It should be correct, but it seems not to be as accurate as required. TODO: Find out why! - // For disabled FastDepth we just calculate the depth value again. - // The performance impact of this additional calculation doesn't matter, but it prevents - // the host GPU driver from performing any early depth test optimizations. - out.SetConstantsUsed(C_ZBIAS + 1, C_ZBIAS + 1); - // the screen space depth value = far z + (clip z / clip w) * z range - out.Write("\tint zCoord = " I_ZBIAS "[1].x + int((clipPos.z / clipPos.w) * float(" I_ZBIAS - "[1].y));\n"); - } - else - { - if (!host_config.backend_reversed_depth_range) - out.Write("\tint zCoord = int((1.0 - rawpos.z) * 16777216.0);\n"); - else - out.Write("\tint zCoord = int(rawpos.z * 16777216.0);\n"); - } - out.Write("\tzCoord = clamp(zCoord, 0, 0xFFFFFF);\n"); - - // depth texture can safely be ignored if the result won't be written to the depth buffer - // (early_ztest) and isn't used for fog either - const bool skip_ztexture = !uid_data->per_pixel_depth && uid_data->fog_fsel == FogType::Off; - - // Note: z-textures are not written to depth buffer if early depth test is used - const bool early_ztest = uid_data->ztest == EmulatedZ::Early || - uid_data->ztest == EmulatedZ::EarlyWithFBFetch || - uid_data->ztest == EmulatedZ::EarlyWithZComplocHack; - if (uid_data->per_pixel_depth && early_ztest) - { - if (!host_config.backend_reversed_depth_range) - out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); - else - out.Write("\tdepth = float(zCoord) / 16777216.0;\n"); - } - - // Note: depth texture output is only written to depth buffer if late depth test is used - // theoretical final depth value is used for fog calculation, though, so we have to emulate - // ztextures anyway - if (uid_data->ztex_op != ZTexOp::Disabled && !skip_ztexture) - { - // use the texture input of the last texture stage (textemp), hopefully this has been read and - // is in correct format... - out.SetConstantsUsed(C_ZBIAS, C_ZBIAS + 1); - out.Write("\tzCoord = idot(" I_ZBIAS "[0].xyzw, textemp.xyzw) + " I_ZBIAS "[1].w {};\n", - (uid_data->ztex_op == ZTexOp::Add) ? "+ zCoord" : ""); - out.Write("\tzCoord = zCoord & 0xFFFFFF;\n"); - } - - if (uid_data->per_pixel_depth && uid_data->ztest == EmulatedZ::Late) - { - if (!host_config.backend_reversed_depth_range) - out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); - else - out.Write("\tdepth = float(zCoord) / 16777216.0;\n"); - } - - // No dithering for RGB8 mode - if (uid_data->dither) - { - // Flipper uses a standard 2x2 Bayer Matrix for 6 bit dithering - // Here the matrix is encoded into the two factor constants - out.Write("\tint2 dither = int2(rawpos.xy) & 1;\n"); - out.Write("\tprev.rgb = (prev.rgb - (prev.rgb >> 6)) + abs(dither.y * 3 - dither.x * 2);\n"); - } - - WriteFog(out, uid_data); - - for (std::size_t i = 0; i < custom_details.shaders.size(); i++) - { - const auto& shader_details = custom_details.shaders[i]; - - if (!shader_details.custom_shader.empty()) - { - out.Write("\t{{\n"); - out.Write("\t\tcustom_data.final_color = float4(prev.r / 255.0, prev.g / 255.0, prev.b " - "/ 255.0, prev.a / 255.0);\n"); - out.Write("\t\tCustomShaderOutput custom_output = {}_{}(custom_data);\n", - CUSTOM_PIXELSHADER_COLOR_FUNC, i); - out.Write("\t\tprev = int4(custom_output.main_rt.r * 255, custom_output.main_rt.g * 255, " - "custom_output.main_rt.b * 255, custom_output.main_rt.a * 255);\n"); - out.Write("\t}}\n\n"); - } - } - - if (uid_data->logic_op_enable) - WriteLogicOp(out, uid_data); - else if (uid_data->emulate_logic_op_with_blend) - WriteLogicOpBlend(out, uid_data); - - // Write the color and alpha values to the framebuffer - // If using shader blend, we still use the separate alpha - const bool use_dual_source = !uid_data->no_dual_src || uid_data->blend_enable; - WriteColor(out, api_type, uid_data, use_dual_source); - - if (uid_data->blend_enable) - WriteBlend(out, uid_data); - else if (use_framebuffer_fetch) - out.Write("\treal_ocol0 = ocol0;\n"); - - if (uid_data->bounding_box) - out.Write("\tUpdateBoundingBox(rawpos.xy);\n"); - - out.Write("}}\n"); - - return out; -} static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n, APIType api_type, bool stereo, bool has_custom_shaders) @@ -1750,58 +1156,6 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i out.Write(", -1024, 1023)"); out.Write(";\n"); - - if (has_custom_shaders) - { - // Color input - out.Write( - "\tcustom_data.tev_stages[{}].input_color[0].value = {} / float3(255.0, 255.0, 255.0);\n", - n, tev_c_input_table[cc.a]); - out.Write("\tcustom_data.tev_stages[{}].input_color[0].input_type = {};\n", n, - tev_c_input_type[cc.a]); - out.Write( - "\tcustom_data.tev_stages[{}].input_color[1].value = {} / float3(255.0, 255.0, 255.0);\n", - n, tev_c_input_table[cc.b]); - out.Write("\tcustom_data.tev_stages[{}].input_color[1].input_type = {};\n", n, - tev_c_input_type[cc.b]); - out.Write( - "\tcustom_data.tev_stages[{}].input_color[2].value = {} / float3(255.0, 255.0, 255.0);\n", - n, tev_c_input_table[cc.c]); - out.Write("\tcustom_data.tev_stages[{}].input_color[2].input_type = {};\n", n, - tev_c_input_type[cc.c]); - out.Write( - "\tcustom_data.tev_stages[{}].input_color[3].value = {} / float3(255.0, 255.0, 255.0);\n", - n, tev_c_input_table[cc.d]); - out.Write("\tcustom_data.tev_stages[{}].input_color[3].input_type = {};\n", n, - tev_c_input_type[cc.d]); - - // Alpha input - out.Write("\tcustom_data.tev_stages[{}].input_alpha[0].value = {} / float(255.0);\n", n, - tev_a_input_table[ac.a]); - out.Write("\tcustom_data.tev_stages[{}].input_alpha[0].input_type = {};\n", n, - tev_a_input_type[ac.a]); - out.Write("\tcustom_data.tev_stages[{}].input_alpha[1].value = {} / float(255.0);\n", n, - tev_a_input_table[ac.b]); - out.Write("\tcustom_data.tev_stages[{}].input_alpha[1].input_type = {};\n", n, - tev_a_input_type[ac.b]); - out.Write("\tcustom_data.tev_stages[{}].input_alpha[2].value = {} / float(255.0);\n", n, - tev_a_input_table[ac.c]); - out.Write("\tcustom_data.tev_stages[{}].input_alpha[2].input_type = {};\n", n, - tev_a_input_type[ac.c]); - out.Write("\tcustom_data.tev_stages[{}].input_alpha[3].value = {} / float(255.0);\n", n, - tev_a_input_table[ac.d]); - out.Write("\tcustom_data.tev_stages[{}].input_alpha[3].input_type = {};\n", n, - tev_a_input_type[ac.d]); - - // Texmap - out.Write("\tcustom_data.tev_stages[{}].texmap = {}u;\n", n, stage.tevorders_texmap); - - // Output - out.Write("\tcustom_data.tev_stages[{}].output_color.rgb = {} / float3(255.0, 255.0, 255.0);\n", - n, tev_c_output_table[cc.dest]); - out.Write("\tcustom_data.tev_stages[{}].output_color.a = {} / float(255.0);\n", n, - tev_a_output_table[ac.dest]); - } } static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBias bias, TevOp op, @@ -1961,10 +1315,57 @@ constexpr Common::EnumMap tev_fog_funcs_ta "\tfog = 1.0 - fog;\n fog = exp2(-8.0 * fog * fog);\n" // backward exp2 }; -static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data) +static void WriteLogicOpBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data) { + switch (static_cast(uid_data->logic_op_mode)) + { + case LogicOp::Clear: + case LogicOp::NoOp: + out.Write("\tprev = int4(0, 0, 0, 0);\n"); + break; + case LogicOp::Copy: + // Do nothing! + break; + case LogicOp::CopyInverted: + out.Write("\tprev ^= 255;\n"); + break; + case LogicOp::Set: + case LogicOp::Invert: // In cooperation with blend + out.Write("\tprev = int4(255, 255, 255, 255);\n"); + break; + default: + break; + } +} + +namespace PixelShader +{ +void WriteDitherHeader(APIType api_type, const ShaderHostConfig& host_config, + const pixel_shader_uid_data* uid_data, ShaderCode& out) +{ + if (uid_data->dither) + { + out.Write("ivec3 dolphin_calculate_dither(ivec4 prev, ivec4 pos)\n"); + out.Write("{{\n"); + // Flipper uses a standard 2x2 Bayer Matrix for 6 bit dithering + // Here the matrix is encoded into the two factor constants + out.Write("\tint2 dither = int2(pos.xy) & 1;\n"); + out.Write("\treturn (prev.rgb - (prev.rgb >> 6)) + abs(dither.y * 3 - dither.x * 2);\n"); + out.Write("}}\n\n"); + } +} + +void WriteFogHeader(APIType api_type, const ShaderHostConfig& host_config, + const pixel_shader_uid_data* uid_data, ShaderCode& out) +{ + out.Write("ivec3 dolphin_calculate_fog(ivec4 color, vec4 pos, int zCoord)\n"); + out.Write("{{\n"); if (uid_data->fog_fsel == FogType::Off) + { + out.Write("\treturn color.rgb;\n"); + out.Write("}}\n\n"); return; // no Fog + } out.SetConstantsUsed(C_FOGCOLOR, C_FOGCOLOR); out.SetConstantsUsed(C_FOGI, C_FOGI); @@ -1992,7 +1393,7 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data) if (uid_data->fog_RangeBaseEnabled) { out.SetConstantsUsed(C_FOGF, C_FOGF); - out.Write("\tfloat offset = (2.0 * (rawpos.x / " I_FOGF ".w)) - 1.0 - " I_FOGF ".z;\n" + out.Write("\tfloat offset = (2.0 * (pos.x / " I_FOGF ".w)) - 1.0 - " I_FOGF ".z;\n" "\tfloat floatindex = clamp(9.0 - abs(offset) * 9.0, 0.0, 9.0);\n" "\tuint indexlower = uint(floatindex);\n" "\tuint indexupper = indexlower + 1u;\n" @@ -2016,119 +1417,198 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data) } out.Write("\tint ifog = iround(fog * 256.0);\n"); - out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n"); + out.Write("\treturn (color.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n"); + out.Write("}}\n\n"); } -static void WriteLogicOp(ShaderCode& out, const pixel_shader_uid_data* uid_data) +void WriteDepthHeader(APIType api_type, const ShaderHostConfig& host_config, + const pixel_shader_uid_data* uid_data, ShaderCode& out) +{ + out.Write("int dolphin_calculate_zcoord(vec4 rawpos, vec4 clipPos, ivec4 last_stage_texmap)\n"); + out.Write("{{\n"); + + if (uid_data->zfreeze) + { + out.SetConstantsUsed(C_ZSLOPE, C_ZSLOPE); + out.SetConstantsUsed(C_EFBSCALE, C_EFBSCALE); + + out.Write("\tvec2 screenpos = rawpos.xy * " I_EFBSCALE ".xy;\n"); + + // Opengl has reversed vertical screenspace coordinates + if (api_type == APIType::OpenGL) + out.Write("\tscreenpos.y = {}.0 - screenpos.y;\n", EFB_HEIGHT); + + out.Write("\tint zCoord = int(" I_ZSLOPE ".z + " I_ZSLOPE ".x * screenpos.x + " I_ZSLOPE + ".y * screenpos.y);\n"); + } + else if (!host_config.fast_depth_calc) + { + // FastDepth means to trust the depth generated in perspective division. + // It should be correct, but it seems not to be as accurate as required. TODO: Find out why! + // For disabled FastDepth we just calculate the depth value again. + // The performance impact of this additional calculation doesn't matter, but it prevents + // the host GPU driver from performing any early depth test optimizations. + out.SetConstantsUsed(C_ZBIAS + 1, C_ZBIAS + 1); + // the screen space depth value = far z + (clip z / clip w) * z range + out.Write("\tint zCoord = " I_ZBIAS "[1].x + int((clipPos.z / clipPos.w) * float(" I_ZBIAS + "[1].y));\n"); + } + else + { + if (!host_config.backend_reversed_depth_range) + out.Write("\tint zCoord = int((1.0 - rawpos.z) * 16777216.0);\n"); + else + out.Write("\tint zCoord = int(rawpos.z * 16777216.0);\n"); + } + out.Write("\tzCoord = clamp(zCoord, 0, 0xFFFFFF);\n"); + + // depth texture can safely be ignored if the result won't be written to the depth buffer + // (early_ztest) and isn't used for fog either + const bool skip_ztexture = !uid_data->per_pixel_depth && uid_data->fog_fsel == FogType::Off; + + // Note: depth texture output is only written to depth buffer if late depth test is used + // theoretical final depth value is used for fog calculation, though, so we have to emulate + // ztextures anyway + if (uid_data->ztex_op != ZTexOp::Disabled && !skip_ztexture) + { + // use the texture input of the last texture stage, hopefully this has been read and + // is in correct format... + out.SetConstantsUsed(C_ZBIAS, C_ZBIAS + 1); + out.Write("\tzCoord = idot(" I_ZBIAS "[0].xyzw, last_stage_texmap.xyzw) + " I_ZBIAS + "[1].w {};\n", + (uid_data->ztex_op == ZTexOp::Add) ? "+ zCoord" : ""); + out.Write("\tzCoord = zCoord & 0xFFFFFF;\n"); + } + + out.Write("\treturn zCoord;\n"); + + out.Write("}}\n\n"); +} + +void WriteLogicOpHeader(APIType api_type, const ShaderHostConfig& host_config, + const pixel_shader_uid_data* uid_data, ShaderCode& out) { static constexpr std::array logic_op_mode{ - "int4(0, 0, 0, 0)", // CLEAR - "prev & fb_value", // AND - "prev & ~fb_value", // AND_REVERSE - "prev", // COPY - "~prev & fb_value", // AND_INVERTED - "fb_value", // NOOP - "prev ^ fb_value", // XOR - "prev | fb_value", // OR - "~(prev | fb_value)", // NOR - "~(prev ^ fb_value)", // EQUIV - "~fb_value", // INVERT - "prev | ~fb_value", // OR_REVERSE - "~prev", // COPY_INVERTED - "~prev | fb_value", // OR_INVERTED - "~(prev & fb_value)", // NAND - "int4(255, 255, 255, 255)", // SET + "ivec4(0, 0, 0, 0)", // CLEAR + "prev & fb_value", // AND + "prev & ~fb_value", // AND_REVERSE + "prev", // COPY + "~prev & fb_value", // AND_INVERTED + "fb_value", // NOOP + "prev ^ fb_value", // XOR + "prev | fb_value", // OR + "~(prev | fb_value)", // NOR + "~(prev ^ fb_value)", // EQUIV + "~fb_value", // INVERT + "prev | ~fb_value", // OR_REVERSE + "~prev", // COPY_INVERTED + "~prev | fb_value", // OR_INVERTED + "~(prev & fb_value)", // NAND + "ivec4(255, 255, 255, 255)", // SET }; - out.Write("\tint4 fb_value = iround(initial_ocol0 * 255.0);\n"); - out.Write("\tprev = ({}) & 0xff;\n", logic_op_mode[uid_data->logic_op_mode]); + out.Write("ivec4 dolphin_calculate_logicop(vec4 color, vec4 prev)\n"); + out.Write("{{\n"); + + out.Write("\tivec4 fb_value = iround(color * 255.0);\n"); + out.Write("\treturn ({}) & 0xff;\n", logic_op_mode[uid_data->logic_op_mode]); + out.Write("}}\n\n"); } -static void WriteLogicOpBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data) +void WriteColorHeader(APIType api_type, const ShaderHostConfig& host_config, + const pixel_shader_uid_data* uid_data, ShaderCode& out) { - switch (static_cast(uid_data->logic_op_mode)) + if (uid_data->uint_output) { - case LogicOp::Clear: - case LogicOp::NoOp: - out.Write("\tprev = int4(0, 0, 0, 0);\n"); - break; - case LogicOp::Copy: - // Do nothing! - break; - case LogicOp::CopyInverted: - out.Write("\tprev ^= 255;\n"); - break; - case LogicOp::Set: - case LogicOp::Invert: // In cooperation with blend - out.Write("\tprev = int4(255, 255, 255, 255);\n"); - break; - default: - break; + out.Write("uvec4 dolphin_calculate_final_color0(ivec4 prev)\n"); } -} + else + { + out.Write("vec4 dolphin_calculate_final_color0(ivec4 prev)\n"); + } + out.Write("{{\n"); -static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data, - bool use_dual_source) -{ // Some backends require the shader outputs be uint when writing to a uint render target for logic // op. if (uid_data->uint_output) { if (uid_data->rgba6_format) - out.Write("\tocol0 = uint4(prev & 0xFC);\n"); + out.Write("\treturn uint4(prev & 0xFC);\n"); else - out.Write("\tocol0 = uint4(prev);\n"); - return; - } - - if (uid_data->rgba6_format) - out.Write("\tocol0.rgb = float3(prev.rgb >> 2) / 63.0;\n"); - else - out.Write("\tocol0.rgb = float3(prev.rgb) / 255.0;\n"); - - // Colors will be blended against the 8-bit alpha from ocol1 and - // the 6-bit alpha from ocol0 will be written to the framebuffer - if (uid_data->useDstAlpha) - { - out.SetConstantsUsed(C_ALPHA, C_ALPHA); - out.Write("\tocol0.a = float(" I_ALPHA ".a >> 2) / 63.0;\n"); - - // Use dual-source color blending to perform dst alpha in a single pass - if (use_dual_source) - out.Write("\tocol1 = float4(0.0, 0.0, 0.0, float(prev.a) / 255.0);\n"); + out.Write("\treturn uint4(prev);\n"); } else { - out.Write("\tocol0.a = float(prev.a >> 2) / 63.0;\n"); - if (use_dual_source) - out.Write("\tocol1 = float4(0.0, 0.0, 0.0, float(prev.a) / 255.0);\n"); + out.Write("\tvec4 result;\n"); + if (uid_data->rgba6_format) + out.Write("\tresult.rgb = float3(prev.rgb >> 2) / 63.0;\n"); + else + out.Write("\tresult.rgb = float3(prev.rgb) / 255.0;\n"); + + // Colors will be blended against the 8-bit alpha from ocol1 and + // the 6-bit alpha from ocol0 will be written to the framebuffer + if (uid_data->useDstAlpha) + { + out.SetConstantsUsed(C_ALPHA, C_ALPHA); + out.Write("\tresult.a = float(" I_ALPHA ".a >> 2) / 63.0;\n"); + } + else + { + out.Write("\tresult.a = float(prev.a >> 2) / 63.0;\n"); + } + out.Write("\treturn result;\n"); } + + out.Write("}}\n\n"); + + const bool use_dual_source = !uid_data->no_dual_src || uid_data->blend_enable; + if (!uid_data->uint_output && use_dual_source) + { + out.Write("vec4 dolphin_calculate_final_color1(ivec4 prev)\n"); + out.Write("{{\n"); + + // Colors will be blended against the 8-bit alpha from ocol1 and + // the 6-bit alpha from ocol0 will be written to the framebuffer + if (uid_data->useDstAlpha) + { + // Use dual-source color blending to perform dst alpha in a single pass + out.Write("\treturn vec4(0.0, 0.0, 0.0, float(prev.a) / 255.0);\n"); + } + else + { + out.Write("\treturn vec4(0.0, 0.0, 0.0, float(prev.a) / 255.0);\n"); + } + } + + out.Write("}}\n\n"); } -static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data) +void WriteBlendHeader(ShaderCode& out, const pixel_shader_uid_data* uid_data) { if (uid_data->blend_enable) { + out.Write("vec4 dolphin_calculate_blend(vec4 initial_color, vec4 src_color)\n"); + out.Write("{{\n"); using Common::EnumMap; static constexpr EnumMap blend_src_factor{ "float3(0,0,0);", // ZERO "float3(1,1,1);", // ONE - "initial_ocol0.rgb;", // DSTCLR - "float3(1,1,1) - initial_ocol0.rgb;", // INVDSTCLR + "initial_color.rgb;", // DSTCLR + "float3(1,1,1) - initial_color.rgb;", // INVDSTCLR "src_color.aaa;", // SRCALPHA "float3(1,1,1) - src_color.aaa;", // INVSRCALPHA - "initial_ocol0.aaa;", // DSTALPHA - "float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA + "initial_color.aaa;", // DSTALPHA + "float3(1,1,1) - initial_color.aaa;", // INVDSTALPHA }; static constexpr EnumMap blend_src_factor_alpha{ "0.0;", // ZERO "1.0;", // ONE - "initial_ocol0.a;", // DSTCLR - "1.0 - initial_ocol0.a;", // INVDSTCLR + "initial_color.a;", // DSTCLR + "1.0 - initial_color.a;", // INVDSTCLR "src_color.a;", // SRCALPHA "1.0 - src_color.a;", // INVSRCALPHA - "initial_ocol0.a;", // DSTALPHA - "1.0 - initial_ocol0.a;", // INVDSTALPHA + "initial_color.a;", // DSTALPHA + "1.0 - initial_color.a;", // INVDSTALPHA }; static constexpr EnumMap blend_dst_factor{ "float3(0,0,0);", // ZERO @@ -2137,8 +1617,8 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data) "float3(1,1,1) - ocol0.rgb;", // INVSRCCLR "src_color.aaa;", // SRCALHA "float3(1,1,1) - src_color.aaa;", // INVSRCALPHA - "initial_ocol0.aaa;", // DSTALPHA - "float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA + "initial_color.aaa;", // DSTALPHA + "float3(1,1,1) - initial_color.aaa;", // INVDSTALPHA }; static constexpr EnumMap blend_dst_factor_alpha{ "0.0;", // ZERO @@ -2148,11 +1628,9 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data) "src_color.a;", // SRCALPHA "1.0 - src_color.a;", // INVSRCALPHA "initial_ocol0.a;", // DSTALPHA - "1.0 - initial_ocol0.a;", // INVDSTALPHA + "1.0 - initial_color.a;", // INVDSTALPHA }; - out.Write("\tfloat4 src_color = {};\n" - "\tfloat4 blend_src;", - uid_data->useDstAlpha ? "ocol1" : "ocol0"); + out.Write("\tfloat4 blend_src;"); out.Write("\tblend_src.rgb = {}\n", blend_src_factor[uid_data->blend_src_factor]); out.Write("\tblend_src.a = {}\n", blend_src_factor_alpha[uid_data->blend_src_factor_alpha]); out.Write("\tfloat4 blend_dst;\n"); @@ -2162,24 +1640,562 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data) out.Write("\tfloat4 blend_result;\n"); if (uid_data->blend_subtract) { - out.Write("\tblend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * " + out.Write("\tblend_result.rgb = initial_color.rgb * blend_dst.rgb - ocol0.rgb * " "blend_src.rgb;\n"); } else { out.Write( - "\tblend_result.rgb = initial_ocol0.rgb * blend_dst.rgb + ocol0.rgb * blend_src.rgb;\n"); + "\tblend_result.rgb = initial_color.rgb * blend_dst.rgb + ocol0.rgb * blend_src.rgb;\n"); } if (uid_data->blend_subtract_alpha) - out.Write("\tblend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a;\n"); + out.Write("\tblend_result.a = initial_color.a * blend_dst.a - ocol0.a * blend_src.a;\n"); else - out.Write("\tblend_result.a = initial_ocol0.a * blend_dst.a + ocol0.a * blend_src.a;\n"); + out.Write("\tblend_result.a = initial_color.a * blend_dst.a + ocol0.a * blend_src.a;\n"); + + out.Write("\treturn blend_result;\n"); + out.Write("}}\n\n"); + } +} + +void WriteEmulatedFragmentBodyHeader(APIType api_type, const ShaderHostConfig& host_config, + const pixel_shader_uid_data* uid_data, ShaderCode& out) +{ + constexpr std::string_view emulated_fragment_definition = + "void dolphin_emulated_fragment(in DolphinFragmentInput frag_input, out " + "DolphinFragmentOutput frag_output)"; + out.Write("{}\n", emulated_fragment_definition); + out.Write("{{\n"); + + WriteFragmentBody(api_type, host_config, uid_data, out); + + out.Write("}}\n"); +} + +void WriteFragmentDefinitions(APIType api_type, const ShaderHostConfig& host_config, + const pixel_shader_uid_data* uid_data, ShaderCode& out, + bool as_comment) +{ + out.Write("struct DolphinLightData\n"); + out.Write("{{\n"); + out.Write("\tfloat3 position;\n"); + out.Write("\tfloat3 direction;\n"); + out.Write("\tfloat3 color;\n"); + out.Write("\tuint attenuation_type;\n"); + out.Write("\tfloat4 cosatt;\n"); + out.Write("\tfloat4 distatt;\n"); + out.Write("}};\n\n"); + + out.Write("struct DolphinFragmentInput\n"); + out.Write("{{\n"); + out.Write("\tvec4 color_0;\n"); + out.Write("\tvec4 color_1;\n"); + out.Write("\tint layer;\n"); + out.Write("\tvec3 normal;\n"); + out.Write("\tvec3 position;\n"); + for (u32 i = 0; i < uid_data->genMode_numtexgens; i++) + { + out.Write("\tvec3 tex{};\n", i); + } + for (u32 i = uid_data->genMode_numtexgens; i < 8; i++) + { + out.Write("\tvec3 tex{};\n", i); + } + out.Write("\n"); + + out.Write("\tDolphinLightData[8] lights_chan0_color;\n"); + out.Write("\tDolphinLightData[8] lights_chan0_alpha;\n"); + out.Write("\tDolphinLightData[8] lights_chan1_color;\n"); + out.Write("\tDolphinLightData[8] lights_chan1_alpha;\n"); + out.Write("\tfloat4[2] ambient_lighting;\n"); + out.Write("\tfloat4[2] base_material;\n"); + out.Write("\tuint light_chan0_color_count;\n"); + out.Write("\tuint light_chan0_alpha_count;\n"); + out.Write("\tuint light_chan1_color_count;\n"); + out.Write("\tuint light_chan1_alpha_count;\n"); + + out.Write("}};\n\n"); + + out.Write("struct DolphinFragmentOutput\n"); + out.Write("{{\n"); + out.Write("\tivec4 main;\n"); + out.Write("\tivec4 last_texture;\n"); + out.Write("}};\n\n"); + + // CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE "enum" values + out.Write("const uint CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE_NONE = {}u;\n", + static_cast(AttenuationFunc::None)); + out.Write("const uint CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE_POINT = {}u;\n", + static_cast(AttenuationFunc::Spec)); + out.Write("const uint CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE_DIR = {}u;\n", + static_cast(AttenuationFunc::Dir)); + out.Write("const uint CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE_SPOT = {}u;\n", + static_cast(AttenuationFunc::Spot)); +} + +void WriteFragmentBody(APIType api_type, const ShaderHostConfig& host_config, + const pixel_shader_uid_data* uid_data, ShaderCode& out) +{ + const bool per_pixel_lighting = host_config.per_pixel_lighting; + const bool stereo = host_config.stereo; + const u32 numStages = uid_data->genMode_numtevstages + 1; + + out.Write("\tvec4 col0 = frag_input.color_0;\n"); + out.Write("\tvec4 col1 = frag_input.color_1;\n"); + out.Write("\tint layer = frag_input.layer;\n"); + + out.Write("\tint4 c0 = " I_COLORS "[1], c1 = " I_COLORS "[2], c2 = " I_COLORS + "[3], prev = " I_COLORS "[0];\n" + "\tint4 rastemp = int4(0, 0, 0, 0), textemp = int4(0, 0, 0, 0), konsttemp = int4(0, 0, " + "0, 0);\n" + "\tint3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n" + "\tint alphabump=0;\n" + "\tint3 tevcoord=int3(0, 0, 0);\n" + "\tint2 wrappedcoord=int2(0,0), tempcoord=int2(0,0);\n" + "\tint4 " + "tevin_a=int4(0,0,0,0),tevin_b=int4(0,0,0,0),tevin_c=int4(0,0,0,0),tevin_d=int4(0,0,0," + "0);\n\n"); // tev combiner inputs + + if (per_pixel_lighting) + { + if (uid_data->numColorChans > 0) + { + out.Write("\tcol0 = dolphin_calculate_lighting_chn0(col0, vec4(frag_input.position, 1), " + "frag_input.normal);\n"); + } + else + { + // The number of colors available to TEV is determined by numColorChans. + // We have to provide the fields to match the interface, so set to zero if it's not enabled. + out.Write("\tcol0 = vec4(0.0, 0.0, 0.0, 0.0);\n"); + } + + if (uid_data->numColorChans == 2) + { + out.Write("\tcol1 = dolphin_calculate_lighting_chn1(col1, vec4(frag_input.position, 1), " + "frag_input.normal);\n"); + } + else + { + // The number of colors available to TEV is determined by numColorChans. + // We have to provide the fields to match the interface, so set to zero if it's not enabled. + out.Write("\tcol1 = vec4(0.0, 0.0, 0.0, 0.0);\n"); + } + } + + if (uid_data->genMode_numtexgens == 0) + { + // TODO: This is a hack to ensure that shaders still compile when setting out of bounds tex + // coord indices to 0. Ideally, it shouldn't exist at all, but the exact behavior hasn't been + // tested. + out.Write("\tint2 fixpoint_uv0 = int2(0, 0);\n\n"); } else { - out.Write("\tfloat4 blend_result = ocol0;\n"); + out.SetConstantsUsed(C_TEXDIMS, C_TEXDIMS + uid_data->genMode_numtexgens - 1); + for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i) + { + out.Write("\tint2 fixpoint_uv{} = int2(", i); + out.Write("(frag_input.tex{}.z == 0.0 ? frag_input.tex{}.xy : frag_input.tex{}.xy / " + "frag_input.tex{}.z)", + i, i, i, i); + out.Write(" * float2(" I_TEXDIMS "[{}].zw * 128));\n", i); + // TODO: S24 overflows here? + } } - out.Write("\treal_ocol0 = blend_result;\n"); + for (u32 i = 0; i < uid_data->genMode_numindstages; ++i) + { + if ((uid_data->nIndirectStagesUsed & (1U << i)) != 0) + { + u32 texcoord = uid_data->GetTevindirefCoord(i); + const u32 texmap = uid_data->GetTevindirefMap(i); + + // Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord + // does not exist), then tex coord 0 is used (though sometimes glitchy effects happen on + // console). This affects the Mario portrait in Luigi's Mansion, where the developers forgot + // to set the number of tex gens to 2 (bug 11462). + if (texcoord >= uid_data->genMode_numtexgens) + texcoord = 0; + + out.SetConstantsUsed(C_INDTEXSCALE + i / 2, C_INDTEXSCALE + i / 2); + out.Write("\ttempcoord = fixpoint_uv{} >> " I_INDTEXSCALE "[{}].{};\n", texcoord, i / 2, + (i & 1) ? "zw" : "xy"); + + out.Write("\tint3 iindtex{0} = sampleTextureWrapper({1}u, tempcoord, layer).abg;\n", i, + texmap); + } + } + + for (u32 i = 0; i < numStages; i++) + { + // Build the equation for this stage + WriteStage(out, uid_data, i, api_type, stereo, false); + } + + { + // The results of the last texenv stage are put onto the screen, + // regardless of the used destination register + TevStageCombiner::ColorCombiner last_cc; + TevStageCombiner::AlphaCombiner last_ac; + last_cc.hex = uid_data->stagehash[uid_data->genMode_numtevstages].cc; + last_ac.hex = uid_data->stagehash[uid_data->genMode_numtevstages].ac; + if (last_cc.dest != TevOutput::Prev) + { + out.Write("\tprev.rgb = {};\n", tev_c_output_table[last_cc.dest]); + } + if (last_ac.dest != TevOutput::Prev) + { + out.Write("\tprev.a = {};\n", tev_a_output_table[last_ac.dest]); + } + } + + out.Write("\tfrag_output.last_texture = textemp;\n"); + out.Write("\tfrag_output.main = prev;\n"); } + +ShaderCode WriteFullShader(APIType api_type, const ShaderHostConfig& host_config, + const pixel_shader_uid_data* uid_data, std::string_view custom_pixel, + std::string_view custom_uniforms) +{ + ShaderCode out; + + const bool per_pixel_lighting = g_ActiveConfig.bEnablePixelLighting; + const bool msaa = host_config.msaa; + const bool ssaa = host_config.ssaa; + const bool stereo = host_config.stereo; + const u32 numStages = uid_data->genMode_numtevstages + 1; + + out.Write("// Pixel Shader for TEV stages\n"); + out.Write("// {} TEV stages, {} texgens, {} IND stages\n", numStages, + uid_data->genMode_numtexgens, uid_data->genMode_numindstages); + + // Stuff that is shared between ubershaders and pixelgen. + WriteBitfieldExtractHeader(out, api_type, host_config); + + WritePixelShaderCommonHeader(out, api_type, host_config, uid_data->bounding_box, {}); + + if (per_pixel_lighting) + { + GenerateLightingShaderHeader(out, uid_data->lighting); + } + + WriteDitherHeader(api_type, host_config, uid_data, out); + + WriteFogHeader(api_type, host_config, uid_data, out); + + WriteDepthHeader(api_type, host_config, uid_data, out); + + WriteLogicOpHeader(api_type, host_config, uid_data, out); + + WriteColorHeader(api_type, host_config, uid_data, out); + + WriteBlendHeader(out, uid_data); + + out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) " + "sampleTexture(texmap, samp[texmap], uv, layer)\n"); + + if (uid_data->ztest == EmulatedZ::ForcedEarly) + { + // Zcomploc (aka early_ztest) is a way to control whether depth test is done before + // or after texturing and alpha test. PC graphics APIs used to provide no way to emulate + // this feature properly until 2012: Depth tests were always done after alpha testing. + // Most importantly, it was not possible to write to the depth buffer without also writing + // a color value (unless color writing was disabled altogether). + + // OpenGL 4.2 actually provides two extensions which can force an early z test: + // * ARB_image_load_store has 'layout(early_fragment_tests)' which forces the driver to do z + // and stencil tests early. + // * ARB_conservative_depth has 'layout(depth_unchanged) which signals to the driver that it + // can make optimisations + // which assume the pixel shader won't update the depth buffer. + + // early_fragment_tests is the best option, as it requires the driver to do early-z and defines + // early-z exactly as + // we expect, with discard causing the shader to exit with only the depth buffer updated. + + // Conservative depth's 'depth_unchanged' only hints to the driver that an early-z optimisation + // can be made and + // doesn't define what will happen if we discard the fragment. But the way modern graphics + // hardware is implemented + // means it is not unreasonable to expect the same behaviour as early_fragment_tests. + // We can also assume that if a driver has gone out of its way to support conservative depth and + // not image_load_store + // as required by OpenGL 4.2 that it will be doing the optimisation. + // If the driver doesn't actually do an early z optimisation, ZCompLoc will be broken and depth + // will only be written + // if the alpha test passes. + + // We support Conservative as a fallback, because many drivers based on Mesa haven't implemented + // all of the + // ARB_image_load_store extension yet. + + // This is a #define which signals whatever early-z method the driver supports. + out.Write("FORCE_EARLY_Z; \n"); + } + + const bool use_framebuffer_fetch = uid_data->blend_enable || uid_data->logic_op_enable || + uid_data->ztest == EmulatedZ::EarlyWithFBFetch; + +#ifdef __APPLE__ + // Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK) + // if we want to use it. + if (api_type == APIType::Vulkan || api_type == APIType::Metal) + { + if (!uid_data->no_dual_src) + { + out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n" + "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n", + use_framebuffer_fetch ? "real_ocol0" : "ocol0"); + } + else + { + // Metal doesn't support a single unified variable for both input and output, + // so when using framebuffer fetch, we declare the input separately below. + out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 {};\n", + use_framebuffer_fetch ? "real_ocol0" : "ocol0"); + } + + if (use_framebuffer_fetch) + { + // Subpass inputs will be converted to framebuffer fetch by SPIRV-Cross. + out.Write("INPUT_ATTACHMENT_BINDING(0, 0, 0) uniform subpassInput in_ocol0;\n"); + } + } + else +#endif + { + if (use_framebuffer_fetch) + { + out.Write("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n"); + } + else + { + out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out {} ocol0;\n", + uid_data->uint_output ? "uvec4" : "vec4"); + } + + if (!uid_data->no_dual_src) + { + out.Write("{} out {} ocol1;\n", "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1)", + uid_data->uint_output ? "uvec4" : "vec4"); + } + } + + if (uid_data->per_pixel_depth) + out.Write("#define depth gl_FragDepth\n"); + + if (host_config.backend_geometry_shaders) + { + out.Write("VARYING_LOCATION(0) in VertexData {{\n"); + GenerateVSOutputMembers(out, api_type, uid_data->genMode_numtexgens, host_config, + GetInterpolationQualifier(msaa, ssaa, true, true), ShaderStage::Pixel); + + out.Write("}};\n"); + if (stereo && !host_config.backend_gl_layer_in_fs) + out.Write("flat in int layer;"); + } + else + { + // Let's set up attributes + u32 counter = 0; + out.Write("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i) + { + out.Write("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++, + GetInterpolationQualifier(msaa, ssaa), i); + } + if (!host_config.fast_depth_calc) + { + out.Write("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + } + if (per_pixel_lighting) + { + out.Write("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + } + } + + WriteFragmentDefinitions(api_type, host_config, uid_data, out, false); + + if (!custom_uniforms.empty()) + { + out.Write("UBO_BINDING(std140, 3) uniform CustomShaderBlock {{\n"); + out.Write("{}", custom_uniforms); + out.Write("}} custom_uniforms;\n"); + } + + WriteEmulatedFragmentBodyHeader(api_type, host_config, uid_data, out); + + if (custom_pixel.empty()) + { + out.Write("{}\n", fragment_definition); + out.Write("{{\n"); + + out.Write("\tdolphin_emulated_fragment(frag_input, frag_output);\n"); + + out.Write("}}\n"); + } + else + { + out.Write("{}\n", custom_pixel); + } + + out.Write("void main()\n{{\n"); + out.Write("\tfloat4 rawpos = gl_FragCoord;\n"); + + if (use_framebuffer_fetch) + { + // Store off a copy of the initial framebuffer value. + // + // If FB_FETCH_VALUE isn't defined (i.e. no special keyword for fetching from the + // framebuffer), we read from real_ocol0. + out.Write("#ifdef FB_FETCH_VALUE\n" + "\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n" + "#else\n" + "\tfloat4 initial_ocol0 = real_ocol0;\n" + "#endif\n"); + + // QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an + // intermediate value with multiple reads & modifications, so we pull out the "real" output + // value above and use a temporary for calculations, then set the output value once at the + // end of the shader. + out.Write("\tfloat4 ocol0;\n"); + } + + if (uid_data->blend_enable) + { + out.Write("\tfloat4 ocol1;\n"); + } + + if (host_config.backend_geometry_shaders && stereo) + { + if (host_config.backend_gl_layer_in_fs) + out.Write("\tint layer = gl_Layer;\n"); + } + else + { + out.Write("\tint layer = 0;\n"); + } + + out.Write("\tDolphinFragmentInput frag_input;\n"); + out.Write("\tfrag_input.color_0 = colors_0;\n"); + out.Write("\tfrag_input.color_1 = colors_1;\n"); + out.Write("\tfrag_input.layer = layer;\n"); + if (per_pixel_lighting) + { + out.Write("\tfrag_input.normal = normalize(Normal);\n"); + out.Write("\tfrag_input.position = WorldPos;\n"); + } + else + { + out.Write("\tfrag_input.normal = vec3(0, 0, 0);\n"); + out.Write("\tfrag_input.position = vec3(0, 0, 0);\n"); + } + for (u32 i = 0; i < uid_data->genMode_numtexgens; i++) + { + out.Write("\tfrag_input.tex{0} = tex{0};\n", i); + } + + if (!custom_pixel.empty()) + GenerateCustomLighting(&out, uid_data->lighting); + + out.Write("\tDolphinFragmentOutput frag_output;\n"); + out.Write("\tfragment(frag_input, frag_output);\n"); + out.Write("\tivec4 prev = frag_output.main & 255;\n"); + + // NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled + // (in this case we need to write a depth value if depth test passes regardless of the alpha + // testing result) + if (uid_data->Pretest == AlphaTestResult::Undetermined || + (uid_data->Pretest == AlphaTestResult::Fail && uid_data->ztest == EmulatedZ::Late)) + { + WriteAlphaTest(out, uid_data, api_type, uid_data->per_pixel_depth, + !uid_data->no_dual_src || uid_data->blend_enable); + } + + // This situation is important for Mario Kart Wii's menus (they will render incorrectly if the + // alpha test for the FMV in the background fails, since they depend on depth for drawing a yellow + // border) and Fortune Street's gameplay (where a rectangle with an alpha value of 1 is drawn over + // the center of the screen several times, but those rectangles shouldn't be visible). + // Blending seems to result in no changes to the output with an alpha of 1, even if the input + // color is white. + // TODO: Investigate this further: we might be handling blending incorrectly in general (though + // there might not be any good way of changing blending behavior) + out.Write("\t// Hardware testing indicates that an alpha of 1 can pass an alpha test,\n" + "\t// but doesn't do anything in blending\n" + "\tif (prev.a == 1) prev.a = 0;\n"); + + const bool write_depth = + uid_data->ztest == EmulatedZ::Early || uid_data->ztest == EmulatedZ::EarlyWithFBFetch || + uid_data->ztest == EmulatedZ::EarlyWithZComplocHack || uid_data->ztest == EmulatedZ::Late; + const bool needs_depth = uid_data->per_pixel_depth && write_depth; + const bool needs_zcoord = needs_depth || uid_data->fog_fsel != FogType::Off; + if (needs_zcoord) + { + if (!host_config.fast_depth_calc) + { + out.Write( + "\tint zCoord = dolphin_calculate_zcoord(rawpos, clipPos, frag_output.last_texture);\n"); + } + else + { + out.Write("\tint zCoord = dolphin_calculate_zcoord(rawpos, vec4(0, 0, 0, 0), " + "frag_output.last_texture);\n"); + } + } + + if (needs_depth) + { + if (!host_config.backend_reversed_depth_range) + out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); + else + out.Write("\tdepth = float(zCoord) / 16777216.0;\n"); + } + + // No dithering for RGB8 mode + if (uid_data->dither) + out.Write("\tprev.rgb = dolphin_calculate_dither(rawpos, prev);\n"); + + if (uid_data->fog_fsel != FogType::Off) + out.Write("\tprev.rgb = dolphin_calculate_fog(prev, rawpos, zCoord);\n"); + + if (uid_data->logic_op_enable) + out.Write("\tprev = dolphin_calculate_logicop(initial_ocol0, prev);\n"); + else if (uid_data->emulate_logic_op_with_blend) + WriteLogicOpBlend(out, uid_data); + + // Write the color and alpha values to the framebuffer + // If using shader blend, we still use the separate alpha + out.Write("\tocol0 = dolphin_calculate_final_color0(prev);\n"); + + const bool use_dual_source = !uid_data->no_dual_src || uid_data->blend_enable; + if (use_dual_source) + out.Write("\tocol1 = dolphin_calculate_final_color1(prev);\n"); + + if (uid_data->blend_enable) + { + if (uid_data->useDstAlpha) + out.Write("\tocol0 = dolphin_calculate_blend(initial_ocol0, ocol1);\n"); + else + out.Write("\tocol0 = dolphin_calculate_blend(initial_ocol0, ocol0);\n"); + } + + if (use_framebuffer_fetch) + out.Write("\treal_ocol0 = ocol0;\n"); + + if (uid_data->bounding_box) + out.Write("\tUpdateBoundingBox(rawpos.xy);\n"); + + out.Write("}}\n"); + + return out; +} +} // namespace PixelShader diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index e5dd43d754..ce41a7e841 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -158,15 +158,24 @@ struct pixel_shader_uid_data using PixelShaderUid = ShaderUid; -void WriteCustomShaderStructImpl(ShaderCode* out, u32 num_stages, bool per_pixel_lighting, - const pixel_shader_uid_data* uid_data); - -ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& host_config, - const pixel_shader_uid_data* uid_data, - const CustomPixelShaderContents& custom_details); void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, const ShaderHostConfig& host_config, bool bounding_box, const CustomPixelShaderContents& custom_details); void ClearUnusedPixelShaderUidBits(APIType api_type, const ShaderHostConfig& host_config, PixelShaderUid* uid); PixelShaderUid GetPixelShaderUid(); + +namespace PixelShader +{ +constexpr std::string_view fragment_definition = + "void fragment(in DolphinFragmentInput frag_input, out DolphinFragmentOutput frag_output)"; + +void WriteFragmentDefinitions(APIType api_type, const ShaderHostConfig& host_config, + const pixel_shader_uid_data* uid_data, ShaderCode& out, + bool as_comment); +void WriteFragmentBody(APIType api_type, const ShaderHostConfig& host_config, + const pixel_shader_uid_data* uid_data, ShaderCode& out); +ShaderCode WriteFullShader(APIType api_type, const ShaderHostConfig& host_config, + const pixel_shader_uid_data* uid_data, std::string_view custom_pixel, + std::string_view custom_uniforms); +} // namespace PixelShader diff --git a/Source/Core/VideoCommon/ShaderCache.cpp b/Source/Core/VideoCommon/ShaderCache.cpp index 587ee3cc7f..3438d2fdf3 100644 --- a/Source/Core/VideoCommon/ShaderCache.cpp +++ b/Source/Core/VideoCommon/ShaderCache.cpp @@ -433,7 +433,7 @@ void ShaderCache::CompileMissingPipelines() std::unique_ptr ShaderCache::CompileVertexShader(const VertexShaderUid& uid) const { const ShaderCode source_code = - GenerateVertexShaderCode(m_api_type, m_host_config, uid.GetUidData()); + VertexShader::WriteFullShader(m_api_type, m_host_config, uid.GetUidData(), "", ""); return g_gfx->CreateShaderFromSource(ShaderStage::Vertex, source_code.GetBuffer()); } @@ -449,7 +449,7 @@ ShaderCache::CompileVertexUberShader(const UberShader::VertexShaderUid& uid) con std::unique_ptr ShaderCache::CompilePixelShader(const PixelShaderUid& uid) const { const ShaderCode source_code = - GeneratePixelShaderCode(m_api_type, m_host_config, uid.GetUidData(), {}); + PixelShader::WriteFullShader(m_api_type, m_host_config, uid.GetUidData(), "", ""); return g_gfx->CreateShaderFromSource(ShaderStage::Pixel, source_code.GetBuffer()); } diff --git a/Source/Core/VideoCommon/ShaderGenCommon.cpp b/Source/Core/VideoCommon/ShaderGenCommon.cpp index d132847f14..83bc4e6201 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.cpp +++ b/Source/Core/VideoCommon/ShaderGenCommon.cpp @@ -363,95 +363,3 @@ const char* GetInterpolationQualifier(bool msaa, bool ssaa, bool in_glsl_interfa return "sample"; } } - -void WriteCustomShaderStructDef(ShaderCode* out, u32 numtexgens) -{ - // Bump this when there are breaking changes to the API - out->Write("#define CUSTOM_SHADER_API_VERSION 1;\n"); - - // CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE "enum" values - out->Write("const uint CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE_NONE = {}u;\n", - static_cast(AttenuationFunc::None)); - out->Write("const uint CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE_POINT = {}u;\n", - static_cast(AttenuationFunc::Spec)); - out->Write("const uint CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE_DIR = {}u;\n", - static_cast(AttenuationFunc::Dir)); - out->Write("const uint CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE_SPOT = {}u;\n", - static_cast(AttenuationFunc::Spot)); - - out->Write("struct CustomShaderOutput\n"); - out->Write("{{\n"); - out->Write("\tfloat4 main_rt;\n"); - out->Write("}};\n\n"); - - out->Write("struct CustomShaderLightData\n"); - out->Write("{{\n"); - out->Write("\tfloat3 position;\n"); - out->Write("\tfloat3 direction;\n"); - out->Write("\tfloat3 color;\n"); - out->Write("\tuint attenuation_type;\n"); - out->Write("\tfloat4 cosatt;\n"); - out->Write("\tfloat4 distatt;\n"); - out->Write("}};\n\n"); - - // CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE "enum" values - out->Write("const uint CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_PREV = 0u;\n"); - out->Write("const uint CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR = 1u;\n"); - out->Write("const uint CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_TEX = 2u;\n"); - out->Write("const uint CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_RAS = 3u;\n"); - out->Write("const uint CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_KONST = 4u;\n"); - out->Write("const uint CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_NUMERIC = 5u;\n"); - out->Write("const uint CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_UNUSED = 6u;\n"); - - out->Write("struct CustomShaderTevStageInputColor\n"); - out->Write("{{\n"); - out->Write("\tuint input_type;\n"); - out->Write("\tfloat3 value;\n"); - out->Write("}};\n\n"); - - out->Write("struct CustomShaderTevStageInputAlpha\n"); - out->Write("{{\n"); - out->Write("\tuint input_type;\n"); - out->Write("\tfloat value;\n"); - out->Write("}};\n\n"); - - out->Write("struct CustomShaderTevStage\n"); - out->Write("{{\n"); - out->Write("\tCustomShaderTevStageInputColor[4] input_color;\n"); - out->Write("\tCustomShaderTevStageInputAlpha[4] input_alpha;\n"); - out->Write("\tuint texmap;\n"); - out->Write("\tfloat4 output_color;\n"); - out->Write("}};\n\n"); - - // Custom structure for data we pass to custom shader hooks - out->Write("struct CustomShaderData\n"); - out->Write("{{\n"); - out->Write("\tfloat3 position;\n"); - out->Write("\tfloat3 normal;\n"); - if (numtexgens == 0) - { - // Cheat so shaders compile - out->Write("\tfloat3[1] texcoord;\n"); - } - else - { - out->Write("\tfloat3[{}] texcoord;\n", numtexgens); - } - out->Write("\tuint texcoord_count;\n"); - out->Write("\tuint[8] texmap_to_texcoord_index;\n"); - out->Write("\tCustomShaderLightData[8] lights_chan0_color;\n"); - out->Write("\tCustomShaderLightData[8] lights_chan0_alpha;\n"); - out->Write("\tCustomShaderLightData[8] lights_chan1_color;\n"); - out->Write("\tCustomShaderLightData[8] lights_chan1_alpha;\n"); - out->Write("\tfloat4[2] ambient_lighting;\n"); - out->Write("\tfloat4[2] base_material;\n"); - out->Write("\tuint light_chan0_color_count;\n"); - out->Write("\tuint light_chan0_alpha_count;\n"); - out->Write("\tuint light_chan1_color_count;\n"); - out->Write("\tuint light_chan1_alpha_count;\n"); - out->Write("\tCustomShaderTevStage[16] tev_stages;\n"); - out->Write("\tuint tev_stage_count;\n"); - out->Write("\tfloat4 final_color;\n"); - out->Write("\tuint time_ms;\n"); - out->Write("}};\n\n"); -} diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index 4723cbfc79..7d2c80f086 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -345,5 +345,3 @@ struct CustomPixelShaderContents bool operator==(const CustomPixelShaderContents& other) const = default; }; - -void WriteCustomShaderStructDef(ShaderCode* out, u32 numtexgens); diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index ca8f42ec8f..dd03c883ae 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -17,263 +17,6 @@ namespace UberShader { -namespace -{ -void WriteCustomShaderStructImpl(ShaderCode* out, u32 num_texgen, bool per_pixel_lighting) -{ - out->Write("\tCustomShaderData custom_data;\n"); - if (per_pixel_lighting) - { - out->Write("\tcustom_data.position = WorldPos;\n"); - out->Write("\tcustom_data.normal = Normal;\n"); - } - else - { - out->Write("\tcustom_data.position = float3(0, 0, 0);\n"); - out->Write("\tcustom_data.normal = float3(0, 0, 0);\n"); - } - - if (num_texgen == 0) [[unlikely]] - { - out->Write("\tcustom_data.texcoord[0] = float3(0, 0, 0);\n"); - } - else - { - for (u32 i = 0; i < num_texgen; ++i) - { - out->Write("\tif (tex{0}.z == 0.0)\n", i); - out->Write("\t{{\n"); - out->Write("\t\tcustom_data.texcoord[{0}] = tex{0};\n", i); - out->Write("\t}}\n"); - out->Write("\telse {{\n"); - out->Write("\t\tcustom_data.texcoord[{0}] = float3(tex{0}.xy / tex{0}.z, 0);\n", i); - out->Write("\t}}\n"); - } - } - - out->Write("\tcustom_data.texcoord_count = {};\n", num_texgen); - - for (u32 i = 0; i < 8; i++) - { - // Shader compilation complains if every index isn't initialized - out->Write("\tcustom_data.texmap_to_texcoord_index[{0}] = {0};\n", i); - } - - for (u32 i = 0; i < NUM_XF_COLOR_CHANNELS; i++) - { - out->Write("\tcustom_data.base_material[{}] = vec4(0, 0, 0, 1);\n", i); - out->Write("\tcustom_data.ambient_lighting[{}] = vec4(0, 0, 0, 1);\n", i); - - // Shader compilation errors can throw if not everything is initialized - for (u32 light_count_index = 0; light_count_index < 8; light_count_index++) - { - // Color - out->Write("\tcustom_data.lights_chan{}_color[{}].direction = float3(0, 0, 0);\n", i, - light_count_index); - out->Write("\tcustom_data.lights_chan{}_color[{}].position = float3(0, 0, 0);\n", i, - light_count_index); - out->Write("\tcustom_data.lights_chan{}_color[{}].color = float3(0, 0, 0);\n", i, - light_count_index); - out->Write("\tcustom_data.lights_chan{}_color[{}].cosatt = float4(0, 0, 0, 0);\n", i, - light_count_index); - out->Write("\tcustom_data.lights_chan{}_color[{}].distatt = float4(0, 0, 0, 0);\n", i, - light_count_index); - out->Write("\tcustom_data.lights_chan{}_color[{}].attenuation_type = 0;\n", i, - light_count_index); - - // Alpha - out->Write("\tcustom_data.lights_chan{}_alpha[{}].direction = float3(0, 0, 0);\n", i, - light_count_index); - out->Write("\tcustom_data.lights_chan{}_alpha[{}].position = float3(0, 0, 0);\n", i, - light_count_index); - out->Write("\tcustom_data.lights_chan{}_alpha[{}].color = float3(0, 0, 0);\n", i, - light_count_index); - out->Write("\tcustom_data.lights_chan{}_alpha[{}].cosatt = float4(0, 0, 0, 0);\n", i, - light_count_index); - out->Write("\tcustom_data.lights_chan{}_alpha[{}].distatt = float4(0, 0, 0, 0);\n", i, - light_count_index); - out->Write("\tcustom_data.lights_chan{}_alpha[{}].attenuation_type = 0;\n", i, - light_count_index); - } - - out->Write("\tcustom_data.light_chan{}_color_count = 0;\n", i); - out->Write("\tcustom_data.light_chan{}_alpha_count = 0;\n", i); - } - - if (num_texgen > 0) [[likely]] - { - out->Write("\n"); - out->Write("\tfor(uint stage = 0u; stage <= num_stages; stage++)\n"); - out->Write("\t{{\n"); - out->Write("\t\tStageState ss;\n"); - out->Write("\t\tss.order = bpmem_tevorder(stage>>1);\n"); - out->Write("\t\tif ((stage & 1u) == 1u)\n"); - out->Write("\t\t\tss.order = ss.order >> {};\n\n", - int(TwoTevStageOrders().enable_tex_odd.StartBit() - - TwoTevStageOrders().enable_tex_even.StartBit())); - out->Write("\t\tuint texmap = {};\n", - BitfieldExtract<&TwoTevStageOrders::texcoord_even>("ss.order")); - // Shader compilation is weird, shader arrays can't use indexing by variable - // to set values unless the variable is an index in a for loop. - // So instead we have to do this if check nonsense - for (u32 i = 0; i < 8; i++) - { - out->Write("\t\tif (texmap == {})\n", i); - out->Write("\t\t{{\n"); - out->Write("\t\t\tcustom_data.texmap_to_texcoord_index[{}] = selectTexCoordIndex(texmap);\n", - i); - out->Write("\t\t}}\n"); - } - out->Write("\t}}\n"); - } - - if (per_pixel_lighting) - { - out->Write("\tuint light_count = 0;\n"); - out->Write("\tfor (uint chan = 0u; chan < {}u; chan++)\n", NUM_XF_COLOR_CHANNELS); - out->Write("\t{{\n"); - out->Write("\t\tuint colorreg = xfmem_color(chan);\n"); - out->Write("\t\tuint alphareg = xfmem_alpha(chan);\n"); - for (const auto& color_type : std::array{"colorreg", "alphareg"}) - { - if (color_type == "colorreg") - { - out->Write("\t\tcustom_data.base_material[0] = " I_MATERIALS "[2u] / 255.0; \n"); - out->Write("\t\tif ({} != 0u)\n", BitfieldExtract<&LitChannel::enablelighting>(color_type)); - out->Write("\t\t\tcustom_data.base_material[0] = colors_0; \n"); - } - else - { - out->Write("custom_data.base_material[1].w = " I_MATERIALS "[3u].w / 255.0; \n"); - out->Write("\t\tif ({} != 0u)\n", BitfieldExtract<&LitChannel::enablelighting>(color_type)); - out->Write("\t\t\tcustom_data.base_material[1].w = colors_1.w; \n"); - } - out->Write("\t\tif ({} != 0u)\n", BitfieldExtract<&LitChannel::enablelighting>(color_type)); - out->Write("\t\t{{\n"); - out->Write("\t\t\tuint light_mask = {} | ({} << 4u);\n", - BitfieldExtract<&LitChannel::lightMask0_3>(color_type), - BitfieldExtract<&LitChannel::lightMask4_7>(color_type)); - out->Write("\t\t\tuint attnfunc = {};\n", BitfieldExtract<&LitChannel::attnfunc>(color_type)); - out->Write("\t\t\tfor (uint light_index = 0u; light_index < 8u; light_index++)\n"); - out->Write("\t\t\t{{\n"); - out->Write("\t\t\t\tif ((light_mask & (1u << light_index)) != 0u)\n"); - out->Write("\t\t\t\t{{\n"); - // Shader compilation is weird, shader arrays can't use indexing by variable - // to set values unless the variable is an index in a for loop. - // So instead we have to do this if check nonsense - for (u32 light_count_index = 0; light_count_index < 8; light_count_index++) - { - out->Write("\t\t\t\t\tif (light_index == {})\n", light_count_index); - out->Write("\t\t\t\t\t{{\n"); - if (color_type == "colorreg") - { - for (u32 channel_index = 0; channel_index < NUM_XF_COLOR_CHANNELS; channel_index++) - { - out->Write("\t\t\t\t\t\tif (chan == {})\n", channel_index); - out->Write("\t\t\t\t\t\t{{\n"); - out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].direction = " I_LIGHTS - "[light_index].dir.xyz;\n", - channel_index, light_count_index); - out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].position = " I_LIGHTS - "[light_index].pos.xyz;\n", - channel_index, light_count_index); - out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].cosatt = " I_LIGHTS - "[light_index].cosatt;\n", - channel_index, light_count_index); - out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].distatt = " I_LIGHTS - "[light_index].distatt;\n", - channel_index, light_count_index); - out->Write( - "\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].attenuation_type = attnfunc;\n", - channel_index, light_count_index); - out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].color = " I_LIGHTS - "[light_index].color.rgb / float3(255.0, 255.0, 255.0);\n", - channel_index, light_count_index); - out->Write("\t\t\t\t\t\t\tcustom_data.light_chan{}_color_count += 1;\n", channel_index); - out->Write("\t\t\t\t\t\t}}\n"); - } - } - else - { - for (u32 channel_index = 0; channel_index < NUM_XF_COLOR_CHANNELS; channel_index++) - { - out->Write("\t\t\t\t\t\tif (chan == {})\n", channel_index); - out->Write("\t\t\t\t\t\t{{\n"); - out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].direction = " I_LIGHTS - "[light_index].dir.xyz;\n", - channel_index, light_count_index); - out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].position = " I_LIGHTS - "[light_index].pos.xyz;\n", - channel_index, light_count_index); - out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].cosatt = " I_LIGHTS - "[light_index].cosatt;\n", - channel_index, light_count_index); - out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].distatt = " I_LIGHTS - "[light_index].distatt;\n", - channel_index, light_count_index); - out->Write( - "\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].attenuation_type = attnfunc;\n", - channel_index, light_count_index); - out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].color = float3(" I_LIGHTS - "[light_index].color.a) / float3(255.0, 255.0, 255.0);\n", - channel_index, light_count_index); - out->Write("\t\t\t\t\t\t\tcustom_data.light_chan{}_alpha_count += 1;\n", channel_index); - out->Write("\t\t\t\t\t\t}}\n"); - } - } - - out->Write("\t\t\t\t\t}}\n"); - } - out->Write("\t\t\t\t}}\n"); - out->Write("\t\t\t}}\n"); - out->Write("\t\t}}\n"); - } - out->Write("\t}}\n"); - } - - for (u32 i = 0; i < 16; i++) - { - // Shader compilation complains if every struct isn't initialized - - // Color Input - for (u32 j = 0; j < 4; j++) - { - out->Write("\tcustom_data.tev_stages[{}].input_color[{}].input_type = " - "CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_UNUSED;\n", - i, j); - out->Write("\tcustom_data.tev_stages[{}].input_color[{}].value = " - "float3(0, 0, 0);\n", - i, j); - } - - // Alpha Input - for (u32 j = 0; j < 4; j++) - { - out->Write("\tcustom_data.tev_stages[{}].input_alpha[{}].input_type = " - "CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_UNUSED;\n", - i, j); - out->Write("\tcustom_data.tev_stages[{}].input_alpha[{}].value = " - "float(0);\n", - i, j); - } - - // Texmap - out->Write("\tcustom_data.tev_stages[{}].texmap = 0u;\n", i); - - // Output - out->Write("\tcustom_data.tev_stages[{}].output_color = " - "float4(0, 0, 0, 0);\n", - i); - } - - // Actual data will be filled out in the tev stage code, just set the - // stage count for now - out->Write("\tcustom_data.tev_stage_count = num_stages;\n"); - - // Time - out->Write("\tcustom_data.time_ms = time_ms;\n"); -} -} // namespace PixelShaderUid GetPixelShaderUid() { PixelShaderUid out; @@ -334,12 +77,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, out.Write("// {}\n", *uid_data); WriteBitfieldExtractHeader(out, api_type, host_config); WritePixelShaderCommonHeader(out, api_type, host_config, bounding_box, custom_details); - WriteCustomShaderStructDef(&out, numTexgen); - for (std::size_t i = 0; i < custom_details.shaders.size(); i++) - { - const auto& shader_details = custom_details.shaders[i]; - out.Write(fmt::runtime(shader_details.custom_shader), i); - } if (per_pixel_lighting) WriteLightingFunction(out); @@ -768,25 +505,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, "return int3(0, 0, 0);", // ZERO }; - static constexpr Common::EnumMap tev_c_input_type{ - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_PREV;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_PREV;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_TEX;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_TEX;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_RAS;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_RAS;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_NUMERIC;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_NUMERIC;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_KONST;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_NUMERIC;", - }; - static constexpr Common::EnumMap tev_a_input_table{ "return s.Reg[0].a;", // APREV, "return s.Reg[1].a;", // A0, @@ -798,17 +516,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, "return 0;", // ZERO }; - static constexpr Common::EnumMap tev_a_input_type{ - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_PREV;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_TEX;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_RAS;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_KONST;", - "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_NUMERIC;", - }; - static constexpr Common::EnumMap tev_regs_lookup_table{ "return s.Reg[0];", "return s.Reg[1];", @@ -850,16 +557,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, out.Write("}}\n" "\n"); - out.Write("// Helper function for Custom Shader Input Type\n" - "uint getColorInputType(uint index) {{\n"); - WriteSwitch(out, api_type, "index", tev_c_input_type, 2, false); - out.Write("}}\n" - "\n" - "uint getAlphaInputType(uint index) {{\n"); - WriteSwitch(out, api_type, "index", tev_a_input_type, 2, false); - out.Write("}}\n" - "\n"); - // Since the fixed-point texture coodinate variables aren't global, we need to pass // them to the select function. This applies to all backends. if (numTexgen > 0) @@ -879,14 +576,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, out.Write(" uint num_stages = {};\n\n", BitfieldExtract<&GenMode::numtevstages>("bpmem_genmode")); - bool has_custom_shader_details = false; - if (std::any_of(custom_details.shaders.begin(), custom_details.shaders.end(), - [](const std::optional& ps) { return ps.has_value(); })) - { - WriteCustomShaderStructImpl(&out, numTexgen, per_pixel_lighting); - has_custom_shader_details = true; - } - if (use_framebuffer_fetch) { // Store off a copy of the initial framebuffer value. @@ -1237,78 +926,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, "\n" " // Write result to the correct input register of the next stage\n"); WriteSwitch(out, api_type, "alpha_dest", tev_a_set_table, 6, true); - if (has_custom_shader_details) - { - for (u32 stage_index = 0; stage_index < 16; stage_index++) - { - out.Write("\tif (stage == {}u) {{\n", stage_index); - // Color input - out.Write("\t\tcustom_data.tev_stages[{}].input_color[0].value = color_A / float3(255.0, " - "255.0, 255.0);\n", - stage_index); - out.Write("\t\tcustom_data.tev_stages[{}].input_color[0].input_type = " - "getColorInputType(color_a);\n", - stage_index); - out.Write("\t\tcustom_data.tev_stages[{}].input_color[1].value = color_B / float3(255.0, " - "255.0, 255.0);\n", - stage_index); - out.Write("\t\tcustom_data.tev_stages[{}].input_color[1].input_type = " - "getColorInputType(color_b);\n", - stage_index); - out.Write("\t\tcustom_data.tev_stages[{}].input_color[2].value = color_C / float3(255.0, " - "255.0, 255.0);\n", - stage_index); - out.Write("\t\tcustom_data.tev_stages[{}].input_color[2].input_type = " - "getColorInputType(color_c);\n", - stage_index); - out.Write("\t\tcustom_data.tev_stages[{}].input_color[3].value = color_D / float3(255.0, " - "255.0, 255.0);\n", - stage_index); - out.Write("\t\tcustom_data.tev_stages[{}].input_color[3].input_type = " - "getColorInputType(color_c);\n", - stage_index); - - // Alpha input - out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[0].value = alpha_A / float(255.0);\n", - stage_index); - out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[0].input_type = " - "getAlphaInputType(alpha_a);\n", - stage_index); - out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[1].value = alpha_B / float(255.0);\n", - stage_index); - out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[1].input_type = " - "getAlphaInputType(alpha_b);\n", - stage_index); - out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[2].value = alpha_C / float(255.0);\n", - stage_index); - out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[2].input_type = " - "getAlphaInputType(alpha_c);\n", - stage_index); - out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[3].value = alpha_D / float(255.0);\n", - stage_index); - out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[3].input_type = " - "getAlphaInputType(alpha_d);\n", - stage_index); - - if (numTexgen != 0) - { - // Texmap - out.Write("\t\tif (texture_enabled) {{\n"); - out.Write("\t\t\tuint sampler_num = {};\n", - BitfieldExtract<&TwoTevStageOrders::texmap_even>("ss.order")); - out.Write("\t\tcustom_data.tev_stages[{}].texmap = sampler_num;\n", stage_index); - out.Write("\t\t}}\n"); - } - - // Output - out.Write("\t\tcustom_data.tev_stages[{}].output_color.rgb = color / float3(255.0, 255.0, " - "255.0);\n", - stage_index); - out.Write("\t\tcustom_data.tev_stages[{}].output_color.a = alpha / float(255.0);\n", - stage_index); - out.Write("\t}}\n"); - } - } out.Write(" }}\n"); out.Write(" }} // Main TEV loop\n"); out.Write("\n"); @@ -1506,24 +1123,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, " }}\n" "\n"); - for (std::size_t i = 0; i < custom_details.shaders.size(); i++) - { - const auto& shader_details = custom_details.shaders[i]; - - if (!shader_details.custom_shader.empty()) - { - out.Write("\t{{\n"); - out.Write("\t\tcustom_data.final_color = float4(TevResult.r / 255.0, TevResult.g / 255.0, " - "TevResult.b / 255.0, TevResult.a / 255.0);\n"); - out.Write("\t\tCustomShaderOutput custom_output = {}_{}(custom_data);\n", - CUSTOM_PIXELSHADER_COLOR_FUNC, i); - out.Write( - "\t\tTevResult = int4(custom_output.main_rt.r * 255, custom_output.main_rt.g * 255, " - "custom_output.main_rt.b * 255, custom_output.main_rt.a * 255);\n"); - out.Write("\t}}\n\n"); - } - } - if (use_framebuffer_fetch) { static constexpr std::array logic_op_mode{ diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 4a46834c14..71dfee2960 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -74,18 +74,223 @@ VertexShaderUid GetVertexShaderUid() return out; } -ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& host_config, - const vertex_shader_uid_data* uid_data) +namespace VertexShader { - ShaderCode out; +void WriteTransforms(APIType api_type, const ShaderHostConfig& host_config, + const vertex_shader_uid_data* uid_data, ShaderCode& out) +{ + out.Write("vec4 dolphin_transform_position(vec4 rawpos)\n"); + out.Write("{{\n"); + if ((uid_data->components & VB_HAS_POSMTXIDX) != 0) + { + // Vertex format has a per-vertex matrix + out.Write("\tint posidx = int(posmtx.r);\n" + "\tvec4 P0 = " I_TRANSFORMMATRICES "[posidx];\n" + "\tvec4 P1 = " I_TRANSFORMMATRICES "[posidx + 1];\n" + "\tvec4 P2 = " I_TRANSFORMMATRICES "[posidx + 2];\n"); + } + else + { + // One shared matrix + out.Write("\tvec4 P0 = " I_POSNORMALMATRIX "[0];\n" + "\tvec4 P1 = " I_POSNORMALMATRIX "[1];\n" + "\tvec4 P2 = " I_POSNORMALMATRIX "[2];\n"); + } + out.Write("\t// Multiply the position vector by the position matrix\n" + "\treturn vec4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n"); + out.Write("}}\n\n"); - const bool per_pixel_lighting = g_ActiveConfig.bEnablePixelLighting; - const bool msaa = host_config.msaa; - const bool ssaa = host_config.ssaa; - const bool vertex_rounding = host_config.vertex_rounding; + out.Write("vec4 dolphin_project_position(vec4 pos)\n"); + out.Write("{{\n"); + out.Write("\treturn vec4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION + "[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n"); + out.Write("}}\n\n"); - ShaderCode input_extract; + out.Write("vec3 dolphin_transform_normal(vec3 norm)\n"); + out.Write("{{\n"); + if ((uid_data->components & VB_HAS_NORMAL) != 0) + { + if ((uid_data->components & VB_HAS_POSMTXIDX) != 0) + { + // Vertex format has a per-vertex matrix + out.Write("\tint posidx = int(posmtx.r);\n"); + out.Write("\tint normidx = posidx & 31;\n" + "\tvec3 N0 = " I_NORMALMATRICES "[normidx].xyz;\n" + "\tvec3 N1 = " I_NORMALMATRICES "[normidx + 1].xyz;\n" + "\tvec3 N2 = " I_NORMALMATRICES "[normidx + 2].xyz;\n"); + } + else + { + // One shared matrix + out.Write("\tvec3 N0 = " I_POSNORMALMATRIX "[3].xyz;\n" + "\tvec3 N1 = " I_POSNORMALMATRIX "[4].xyz;\n" + "\tvec3 N2 = " I_POSNORMALMATRIX "[5].xyz;\n"); + } + // The scale of the transform matrix is used to control the size of the emboss map effect, by + // changing the scale of the transformed binormals (which only get used by emboss map texgens). + // By normalising the first transformed normal (which is used by lighting calculations and needs + // to be unit length), the same transform matrix can do double duty, scaling for emboss mapping, + // and not scaling for lighting. + out.Write("\treturn normalize(vec3(dot(N0, norm), dot(N1, norm), dot(N2, " + "norm)));\n"); + } + else + { + out.Write("\treturn norm;\n"); + } + + out.Write("}}\n\n"); + + out.Write("vec3 dolphin_transform_binormal(vec3 binormal)\n"); + out.Write("{{\n"); + + if ((uid_data->components & VB_HAS_NORMAL) != 0) + { + if ((uid_data->components & VB_HAS_POSMTXIDX) != 0) + { + // Vertex format has a per-vertex matrix + out.Write("\tint posidx = int(posmtx.r);\n"); + out.Write("\tint normidx = posidx & 31;\n" + "\tvec3 N0 = " I_NORMALMATRICES "[normidx].xyz;\n" + "\tvec3 N1 = " I_NORMALMATRICES "[normidx + 1].xyz;\n" + "\tvec3 N2 = " I_NORMALMATRICES "[normidx + 2].xyz;\n"); + } + else + { + // One shared matrix + out.Write("\tvec3 N0 = " I_POSNORMALMATRIX "[3].xyz;\n" + "\tvec3 N1 = " I_POSNORMALMATRIX "[4].xyz;\n" + "\tvec3 N2 = " I_POSNORMALMATRIX "[5].xyz;\n"); + } + + // The scale of the transform matrix is used to control the size of the emboss map effect, by + // changing the scale of the transformed binormals (which only get used by emboss map texgens). + // By normalising the first transformed normal (which is used by lighting calculations and needs + // to be unit length), the same transform matrix can do double duty, scaling for emboss mapping, + // and not scaling for lighting. + out.Write("\treturn vec3(dot(N0, binormal), dot(N1, binormal), dot(N2, " + "binormal));\n"); + } + else + { + out.Write("\treturn vec3(0, 0, 0);\n"); + } + + out.Write("}}\n\n"); + + out.Write("vec3 dolphin_transform_tangent(vec3 tangent)\n"); + out.Write("{{\n"); + + if ((uid_data->components & VB_HAS_NORMAL) != 0) + { + if ((uid_data->components & VB_HAS_POSMTXIDX) != 0) + { + // Vertex format has a per-vertex matrix + out.Write("\tint posidx = int(posmtx.r);\n"); + out.Write("\tint normidx = posidx & 31;\n" + "\tvec3 N0 = " I_NORMALMATRICES "[normidx].xyz;\n" + "\tvec3 N1 = " I_NORMALMATRICES "[normidx + 1].xyz;\n" + "\tvec3 N2 = " I_NORMALMATRICES "[normidx + 2].xyz;\n"); + } + else + { + // One shared matrix + out.Write("\tvec3 N0 = " I_POSNORMALMATRIX "[3].xyz;\n" + "\tvec3 N1 = " I_POSNORMALMATRIX "[4].xyz;\n" + "\tvec3 N2 = " I_POSNORMALMATRIX "[5].xyz;\n"); + } + + // The scale of the transform matrix is used to control the size of the emboss map effect, by + // changing the scale of the transformed binormals (which only get used by emboss map texgens). + // By normalising the first transformed normal (which is used by lighting calculations and needs + // to be unit length), the same transform matrix can do double duty, scaling for emboss mapping, + // and not scaling for lighting. + out.Write("\treturn vec3(dot(N0, tangent), dot(N1, tangent), dot(N2, " + "tangent));\n"); + } + else + { + out.Write("\treturn vec3(0, 0, 0);\n"); + } + + out.Write("}}\n\n"); + + for (u32 i = 0; i < uid_data->numTexGens; ++i) + { + auto& texinfo = uid_data->texMtxInfo[i]; + out.Write("vec3 dolphin_transform_texcoord{}(vec4 coord)\n", i); + out.Write("{{\n"); + if (texinfo.texgentype != TexGenType::Regular) + { + out.Write("\treturn vec3(coord.xyz);\n"); + } + else + { + out.Write("\tvec3 result;\n"); + if ((uid_data->components & (VB_HAS_TEXMTXIDX0 << i)) != 0) + { + out.Write("\tint tmp = int(rawtex{}.z);\n", i); + if (static_cast((uid_data->texMtxInfo_n_projection >> i) & 1) == TexSize::STQ) + { + out.Write("\tresult = vec3(dot(coord, " I_TRANSFORMMATRICES + "[tmp]), dot(coord, " I_TRANSFORMMATRICES + "[tmp+1]), dot(coord, " I_TRANSFORMMATRICES "[tmp+2]));\n"); + } + else + { + out.Write("\tresult = vec3(dot(coord, " I_TRANSFORMMATRICES + "[tmp]), dot(coord, " I_TRANSFORMMATRICES "[tmp+1]), 1);\n"); + } + } + else + { + if (static_cast((uid_data->texMtxInfo_n_projection >> i) & 1) == TexSize::STQ) + { + out.Write("\tresult = vec3(dot(coord, " I_TEXMATRICES "[{}]), dot(coord, " I_TEXMATRICES + "[{}]), dot(coord, " I_TEXMATRICES "[{}]));\n", + 3 * i, 3 * i + 1, 3 * i + 2); + } + else + { + out.Write("\tresult = vec3(dot(coord, " I_TEXMATRICES "[{}]), dot(coord, " I_TEXMATRICES + "[{}]), 1);\n", + 3 * i, 3 * i + 1); + } + } + // CHECKME: does this only work for regular tex gen types? + if (uid_data->dualTexTrans_enabled) + { + auto& postInfo = uid_data->postMtxInfo[i]; + + out.Write("\tvec4 P0 = " I_POSTTRANSFORMMATRICES "[{}];\n" + "\tvec4 P1 = " I_POSTTRANSFORMMATRICES "[{}];\n" + "\tvec4 P2 = " I_POSTTRANSFORMMATRICES "[{}];\n", + postInfo.index & 0x3f, (postInfo.index + 1) & 0x3f, (postInfo.index + 2) & 0x3f); + + if (postInfo.normalize) + out.Write("\tresult = normalize(result);\n"); + + // multiply by postmatrix + out.Write("\tresult = vec3(dot(P0.xyz, result) + P0.w, dot(P1.xyz, result) + " + "P1.w, dot(P2.xyz, result) + P2.w);\n"); + } + + // When q is 0, the GameCube appears to have a special case + // This can be seen in devkitPro's neheGX Lesson08 example for Wii + // Makes differences in Rogue Squadron 3 (Hoth sky) and The Last Story (shadow culling) + // TODO: check if this only affects XF_TEXGEN_REGULAR + out.Write("\tif(result.z == 0.0f)\n" + "\t\tresult.xy = clamp(result.xy / 2.0f, vec2(-1.0f,-1.0f), vec2(1.0f,1.0f));\n"); + out.Write("\treturn result;\n"); + } + out.Write("}}\n\n"); + } +} + +void WriteHeader(APIType api_type, const ShaderHostConfig& host_config, + const vertex_shader_uid_data* uid_data, ShaderCode& out, ShaderCode& input_extract) +{ out.Write("{}", s_lighting_struct); // uniforms @@ -116,6 +321,8 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho WriteIsNanHeader(out, api_type); + GenerateLightingShaderHeader(out, uid_data->lighting); + if (uid_data->vs_expand == VSExpand::None) { out.Write("ATTRIBUTE_LOCATION({:s}) in float4 rawpos;\n", ShaderAttrib::Position); @@ -225,6 +432,10 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho "}};\n\n"); } + const bool msaa = host_config.msaa; + const bool ssaa = host_config.ssaa; + const bool per_pixel_lighting = g_ActiveConfig.bEnablePixelLighting; + if (host_config.backend_geometry_shaders) { out.Write("VARYING_LOCATION(0) out VertexData {{\n"); @@ -260,28 +471,376 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho } } + // Write the transforms after so they can use the position matrix if it's available + // or should the functions take the posmtx too? + WriteTransforms(api_type, host_config, uid_data, out); + + // TODO: move... + out.Write("vec4 dolphin_pixel_correction(vec4 pos)\n"); + out.Write("{{\n"); + + // Write the true depth value. If the game uses depth textures, then the pixel shader will + // override it with the correct values if not then early z culling will improve speed. + // There are two different ways to do this, when the depth range is oversized, we process + // the depth range in the vertex shader, if not we let the host driver handle it. + // + // Adjust z for the depth range. We're using an equation which incorperates a depth inversion, + // so we can map the console -1..0 range to the 0..1 range used in the depth buffer. + // We have to handle the depth range in the vertex shader instead of after the perspective + // divide, because some games will use a depth range larger than what is allowed by the + // graphics API. These large depth ranges will still be clipped to the 0..1 range, so these + // games effectively add a depth bias to the values written to the depth buffer. + out.Write("\tpos.z = pos.w * " I_PIXELCENTERCORRECTION ".w - " + "\tpos.z * " I_PIXELCENTERCORRECTION ".z;\n"); + + if (!host_config.backend_clip_control) + { + // If the graphics API doesn't support a depth range of 0..1, then we need to map z to + // the -1..1 range. Unfortunately we have to use a substraction, which is a lossy + // floating-point operation that can introduce a round-trip error. + out.Write("\tpos.z = pos.z * 2.0 - pos.w;\n"); + } + + // Correct for negative viewports by mirroring all vertices. We need to negate the height here, + // since the viewport height is already negated by the render backend. + out.Write("\tpos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n"); + + // The console GPU places the pixel center at 7/12 in screen space unless + // antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results + // in some primitives being placed one pixel too far to the bottom-right, + // which in turn can be critical if it happens for clear quads. + // Hence, we compensate for this pixel center difference so that primitives + // get rasterized correctly. + out.Write("\tpos.xy = pos.xy - pos.w * " I_PIXELCENTERCORRECTION ".xy;\n"); + + const bool vertex_rounding = host_config.vertex_rounding; + if (vertex_rounding) + { + // By now our position is in clip space + // however, higher resolutions than the Wii outputs + // cause an additional pixel offset + // due to a higher pixel density + // we need to correct this by converting our + // clip-space position into the Wii's screen-space + // acquire the right pixel and then convert it back + out.Write("\tif (pos.w == 1.0f)\n" + "\t{{\n" + + "\t\tfloat ss_pixel_x = ((pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n" + "\t\tfloat ss_pixel_y = ((pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n" + + "\t\tss_pixel_x = round(ss_pixel_x);\n" + "\t\tss_pixel_y = round(ss_pixel_y);\n" + + "\t\tpos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n" + "\t\tpos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n" + "\t}}\n"); + } + + out.Write("\treturn pos;\n"); + + out.Write("}}\n"); +} + +void WriteEmulatedVertexBodyHeader(APIType api_type, const ShaderHostConfig& host_config, + const vertex_shader_uid_data* uid_data, ShaderCode& out) +{ + constexpr std::string_view emulated_vertex_definition = + "void dolphin_emulated_vertex(in DolphinVertexInput vertex_input, out DolphinVertexOutput " + "vertex_output)"; + out.Write("{}\n", emulated_vertex_definition); + out.Write("{{\n"); + + WriteVertexBody(api_type, host_config, uid_data, out); + + out.Write("}}\n"); +} + +void WriteVertexStructs(APIType api_type, const ShaderHostConfig& host_config, + const vertex_shader_uid_data* uid_data, ShaderCode& out) +{ + out.Write("struct DolphinVertexInput\n"); + out.Write("{{\n"); + out.Write("\tvec4 color_0;\n"); + out.Write("\tvec4 color_1;\n"); + out.Write("\tvec4 position;\n"); + out.Write("\tvec3 normal;\n"); + out.Write("\tvec3 binormal;\n"); + out.Write("\tvec3 tangent;\n"); + for (u32 i = 0; i < uid_data->numTexGens; i++) + { + out.Write("\tvec4 texture_coord_{};\n", i); + } + for (u32 i = uid_data->numTexGens; i < 8; i++) + { + out.Write("\tvec4 texture_coord_{};\n", i); + } + out.Write("}};\n\n"); + + out.Write("struct DolphinVertexOutput\n"); + out.Write("{{\n"); + out.Write("\tvec4 color_0;\n"); + out.Write("\tvec4 color_1;\n"); + out.Write("\tvec4 position;\n"); + out.Write("\tvec3 normal;\n"); + for (u32 i = 0; i < uid_data->numTexGens; i++) + { + out.Write("\tvec3 texture_coord_{};\n", i); + } + for (u32 i = uid_data->numTexGens; i < 8; i++) + { + out.Write("\tvec3 texture_coord_{};\n", i); + } + out.Write("}};\n\n"); +} + +void WriteVertexDefines(APIType, const ShaderHostConfig&, const vertex_shader_uid_data* uid_data, + ShaderCode& out) +{ + if ((uid_data->components & VB_HAS_COL0) != 0) + { + out.Write("#define HAS_COLOR_0 1\n"); + } + + if ((uid_data->components & VB_HAS_COL1) != 0) + { + out.Write("#define HAS_COLOR_1 1\n"); + } + + if ((uid_data->components & VB_HAS_NORMAL) != 0) + { + out.Write("#define HAS_NORMAL 1\n"); + } + + if ((uid_data->components & VB_HAS_BINORMAL) != 0) + { + out.Write("#define HAS_BINORMAL 1\n"); + } + + if ((uid_data->components & VB_HAS_TANGENT) != 0) + { + out.Write("#define HAS_TANGENT 1\n"); + } + + for (u32 i = 0; i < uid_data->numTexGens; i++) + { + if ((uid_data->components & (VB_HAS_UV0 << i)) != 0) + { + out.Write("#define HAS_TEXTURE_COORD_{} 1\n", i); + } + } + + for (u32 i = uid_data->numTexGens; i < 8; i++) + { + out.Write("#define HAS_TEXTURE_COORD_{} 0\n", i); + } +} + +void WriteVertexBody(APIType api_type, const ShaderHostConfig& host_config, + const vertex_shader_uid_data* uid_data, ShaderCode& out) +{ + out.Write("\tvertex_output.position = dolphin_transform_position(vertex_input.position);\n"); + + if ((uid_data->components & VB_HAS_NORMAL) != 0) + { + out.Write("\tvertex_output.normal = dolphin_transform_normal(vertex_input.normal);\n"); + } + else + { + out.Write("\tvertex_output.normal = vec3(0, 0, 0);\n"); + } + + const bool has_color0_texture_coordinate = + std::ranges::any_of(uid_data->texMtxInfo, [](const auto& texinfo) { + return texinfo.texgentype == TexGenType::Color0; + }); + + const bool has_color1_texture_coordinate = + std::ranges::any_of(uid_data->texMtxInfo, [](const auto& texinfo) { + return texinfo.texgentype == TexGenType::Color1; + }); + + const bool per_pixel_lighting = host_config.per_pixel_lighting; + if (per_pixel_lighting) + { + // When per-pixel lighting is enabled, the vertex colors are passed through + // unmodified so we can evaluate the lighting in the pixel shader. + out.Write("\tvertex_output.color_0 = vertex_input.color_0;\n"); + out.Write("\tvertex_output.color_1 = vertex_input.color_1;\n"); + // Note that the numColorChans logic is performed in the pixel shader. + + // We may still need to calculate the lighting per vertex if the vertex + // shader generates texture coordinates with this information + if (has_color0_texture_coordinate) + { + out.Write("\tvec4 vertex_lighting_0 = dolphin_calculate_lighting_chn0(vertex_input.color_0, " + "vertex_input.position, " + "vertex_input.normal);\n"); + } + if (has_color1_texture_coordinate) + { + out.Write("\tvec4 vertex_lighting_1 = dolphin_calculate_lighting_chn1(vertex_input.color_1, " + "vertex_input.position, " + "vertex_input.normal);\n"); + } + } + else + { + if (uid_data->numColorChans > 0) + { + out.Write("\tvec4 vertex_lighting_0 = dolphin_calculate_lighting_chn0(vertex_input.color_0, " + "vertex_input.position, " + "vertex_input.normal);\n"); + out.Write("\tvertex_output.color_0 = vertex_lighting_0;\n"); + } + else + { + // The number of colors available to TEV is determined by numColorChans. + // We have to provide the fields to match the interface, so set to zero if it's not enabled. + out.Write("\tvertex_output.color_0 = vec4(0.0, 0.0, 0.0, 0.0);\n"); + if (has_color0_texture_coordinate) + { + out.Write("\tvec4 vertex_lighting_0 = " + "dolphin_calculate_lighting_chn0(vertex_input.color_0, vertex_input.position," + "vertex_input.normal);\n"); + } + } + + if (uid_data->numColorChans == 2) + { + out.Write("\tvec4 vertex_lighting_1 = dolphin_calculate_lighting_chn1(vertex_input.color_1, " + "vertex_input.position, " + "vertex_input.normal);\n"); + out.Write("\tvertex_output.color_1 = vertex_lighting_1;\n"); + } + else + { + // The number of colors available to TEV is determined by numColorChans. + // We have to provide the fields to match the interface, so set to zero if it's not enabled. + out.Write("\tvertex_output.color_1 = vec4(0.0, 0.0, 0.0, 0.0);\n"); + if (has_color1_texture_coordinate) + { + out.Write("\tvec4 vertex_lighting_1 = " + "dolphin_calculate_lighting_chn1(vertex_input.color_1, vertex_input.position," + "normal);\n"); + } + } + } + + for (u32 i = 0; i < uid_data->numTexGens; ++i) + { + auto& texinfo = uid_data->texMtxInfo[i]; + + switch (texinfo.texgentype) + { + case TexGenType::EmbossMap: // calculate tex coords into bump map + + // transform the light dir into tangent space + out.Write("\tvec3 ldir = normalize(" LIGHT_POS ".xyz - vertex_input.position.xyz);\n", + LIGHT_POS_PARAMS(texinfo.embosslightshift)); + + if ((uid_data->components & VB_HAS_TANGENT) == 0) + out.Write("\tvec3 rawtangent = " I_CACHED_TANGENT ".xyz;\n"); + else + out.Write("\tvec3 rawtangent = vertex_input.tangent;\n"); + + if ((uid_data->components & VB_HAS_BINORMAL) == 0) + out.Write("\tvec3 rawbinormal = " I_CACHED_BINORMAL ".xyz;\n"); + else + out.Write("\tvec3 rawbinormal = vertex_input.binormal;\n"); + + out.Write("\tvertex_output.texture_coord_{}.xyz = vertex_output.texture_coord_{}.xyz + " + "vec3(dot(ldir, " + "dolphin_transform_tangent(rawtangent)), " + "dot(ldir, dolphin_transform_binormal(rawbinormal)), 0.0);\n", + i, texinfo.embosssourceshift); + + break; + case TexGenType::Color0: + out.Write("\tvertex_output.texture_coord_{}.xyz = vec3(vertex_lighting_0.x, " + "vertex_lighting_0.y, 1);\n", + i); + break; + case TexGenType::Color1: + out.Write("\tvertex_output.texture_coord_{}.xyz = vec3(vertex_lighting_1.x, " + "vertex_lighting_1.y, 1);\n", + i); + break; + case TexGenType::Regular: + out.Write("\tvertex_output.texture_coord_{0} = " + "dolphin_transform_texcoord{0}(vertex_input.texture_coord_{0});\n", + i); + break; + }; + } + + // Fill out output that is unused + for (u32 i = uid_data->numTexGens; i < 8; i++) + { + out.Write("\tvertex_output.texture_coord_{0} = vec3(0, 0, 0);\n", i); + } +} + +ShaderCode WriteFullShader(APIType api_type, const ShaderHostConfig& host_config, + const vertex_shader_uid_data* uid_data, std::string_view custom_vertex, + std::string_view custom_uniforms) +{ + ShaderCode out; + + const bool per_pixel_lighting = g_ActiveConfig.bEnablePixelLighting; + + ShaderCode input_extract; + + WriteHeader(api_type, host_config, uid_data, out, input_extract); + + WriteVertexStructs(api_type, host_config, uid_data, out); + WriteVertexDefines(api_type, host_config, uid_data, out); + + if (!custom_uniforms.empty()) + { + out.Write("UBO_BINDING(std140, 3) uniform CustomShaderBlock {{\n"); + out.Write("{}", custom_uniforms); + out.Write("}} custom_uniforms;\n"); + } + + WriteEmulatedVertexBodyHeader(api_type, host_config, uid_data, out); + + if (custom_vertex.empty()) + { + out.Write("{}\n", vertex_definition); + out.Write("{{\n"); + + out.Write("\tdolphin_emulated_vertex(vertex_input, vertex_output);\n"); + + out.Write("}}\n"); + } + else + { + out.Write("{}\n", custom_vertex); + } + out.Write("void main()\n{{\n"); if (uid_data->vs_expand != VSExpand::None) { - out.Write("bool is_bottom = (gl_VertexID & 2) != 0;\n" - "bool is_right = (gl_VertexID & 1) != 0;\n"); + out.Write("\tbool is_bottom = (gl_VertexID & 2) != 0;\n" + "\tbool is_right = (gl_VertexID & 1) != 0;\n"); // D3D doesn't include the base vertex in SV_VertexID // See comment in UberShaderVertex for details if (api_type == APIType::D3D) - out.Write("uint vertex_id = (gl_VertexID >> 2) + base_vertex;\n"); + out.Write("\tuint vertex_id = (gl_VertexID >> 2) + base_vertex;\n"); else - out.Write("uint vertex_id = uint(gl_VertexID) >> 2u;\n"); - out.Write("InputData i = input_buffer[vertex_id];\n" + out.Write("\tuint vertex_id = uint(gl_VertexID) >> 2u;\n"); + out.Write("\tInputData i = input_buffer[vertex_id];\n" "{}", input_extract.GetBuffer()); } - out.Write("VS_OUTPUT o;\n"); + out.Write("\tVS_OUTPUT o;\n"); // xfmem.numColorChans controls the number of color channels available to TEV, but we still need // to generate all channels here, as it can be used in texgen. Cel-damage is an example of this. - out.Write("float4 vertex_color_0, vertex_color_1;\n"); + out.Write("\tvec4 vertex_color_0, vertex_color_1;\n"); // To use color 1, the vertex descriptor must have color 0 and 1. // If color 1 is present but not color 0, it is used for lighting channel 0. @@ -292,90 +851,66 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho if ((color == 0 || use_color_1) && (uid_data->components & (VB_HAS_COL0 << color)) != 0) { // Use color0 for channel 0, and color1 for channel 1 if both colors 0 and 1 are present. - out.Write("vertex_color_{0} = rawcolor{0};\n", color); + out.Write("\tvertex_color_{0} = rawcolor{0};\n", color); } else if (color == 0 && (uid_data->components & VB_HAS_COL1) != 0) { // Use color1 for channel 0 if color0 is not present. - out.Write("vertex_color_{} = rawcolor1;\n", color); + out.Write("\tvertex_color_{} = rawcolor1;\n", color); } else { - out.Write("vertex_color_{0} = missing_color_value;\n", color); + out.Write("\tvertex_color_{0} = missing_color_value;\n", color); } } - // transforms - if ((uid_data->components & VB_HAS_POSMTXIDX) != 0) + out.Write("\tDolphinVertexInput vertex_input;\n"); + out.Write("\tvertex_input.color_0 = vertex_color_0;\n"); + out.Write("\tvertex_input.color_1 = vertex_color_1;\n"); + out.Write("\tvertex_input.position = rawpos;\n"); + + if ((uid_data->components & VB_HAS_NORMAL) != 0) { - // Vertex format has a per-vertex matrix - out.Write("int posidx = int(posmtx.r);\n" - "float4 P0 = " I_TRANSFORMMATRICES "[posidx];\n" - "float4 P1 = " I_TRANSFORMMATRICES "[posidx + 1];\n" - "float4 P2 = " I_TRANSFORMMATRICES "[posidx + 2];\n" - "int normidx = posidx & 31;\n" - "float3 N0 = " I_NORMALMATRICES "[normidx].xyz;\n" - "float3 N1 = " I_NORMALMATRICES "[normidx + 1].xyz;\n" - "float3 N2 = " I_NORMALMATRICES "[normidx + 2].xyz;\n"); + out.Write("\tvertex_input.normal = rawnormal;\n"); } else { - // One shared matrix - out.Write("float4 P0 = " I_POSNORMALMATRIX "[0];\n" - "float4 P1 = " I_POSNORMALMATRIX "[1];\n" - "float4 P2 = " I_POSNORMALMATRIX "[2];\n" - "float3 N0 = " I_POSNORMALMATRIX "[3].xyz;\n" - "float3 N1 = " I_POSNORMALMATRIX "[4].xyz;\n" - "float3 N2 = " I_POSNORMALMATRIX "[5].xyz;\n"); + out.Write("\tvertex_input.normal = vec3(0, 0, 0);\n"); } - out.Write("// Multiply the position vector by the position matrix\n" - "float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n"); - if ((uid_data->components & VB_HAS_NORMAL) == 0) - out.Write("float3 rawnormal = " I_CACHED_NORMAL ".xyz;\n"); - if ((uid_data->components & VB_HAS_TANGENT) == 0) - out.Write("float3 rawtangent = " I_CACHED_TANGENT ".xyz;\n"); - if ((uid_data->components & VB_HAS_BINORMAL) == 0) - out.Write("float3 rawbinormal = " I_CACHED_BINORMAL ".xyz;\n"); + if ((uid_data->components & VB_HAS_BINORMAL) != 0) + { + out.Write("\tvertex_input.binormal = rawbinormal;\n"); + } + else + { + out.Write("\tvertex_input.binormal = vec3(0, 0, 0);\n"); + } - // The scale of the transform matrix is used to control the size of the emboss map effect, by - // changing the scale of the transformed binormals (which only get used by emboss map texgens). - // By normalising the first transformed normal (which is used by lighting calculations and needs - // to be unit length), the same transform matrix can do double duty, scaling for emboss mapping, - // and not scaling for lighting. - out.Write("float3 _normal = normalize(float3(dot(N0, rawnormal), dot(N1, rawnormal), dot(N2, " - "rawnormal)));\n" - "float3 _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, " - "rawtangent));\n" - "float3 _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, " - "rawbinormal));\n"); + if ((uid_data->components & VB_HAS_TANGENT) != 0) + { + out.Write("\tvertex_input.tangent = rawtangent;\n"); + } + else + { + out.Write("\tvertex_input.tangent = vec3(0, 0, 0);\n"); + } - out.Write("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION - "[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n"); - - out.Write("int4 lacc;\n" - "float3 ldir, h, cosAttn, distAttn;\n" - "float dist, dist2, attn;\n"); - - GenerateLightingShaderCode(out, uid_data->lighting, "vertex_color_", "o.colors_"); - - // transform texcoords - out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"); for (u32 i = 0; i < uid_data->numTexGens; ++i) { auto& texinfo = uid_data->texMtxInfo[i]; - out.Write("{{\n"); - out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n"); + out.Write("\t{{\n"); + out.Write("\t\tvec4 coord = vec4(0.0, 0.0, 1.0, 1.0);\n"); switch (texinfo.sourcerow) { case SourceRow::Geom: - out.Write("coord.xyz = rawpos.xyz;\n"); + out.Write("\t\tcoord.xyz = rawpos.xyz;\n"); break; case SourceRow::Normal: if ((uid_data->components & VB_HAS_NORMAL) != 0) { - out.Write("coord.xyz = rawnormal.xyz;\n"); + out.Write("\t\tcoord.xyz = rawnormal.xyz;\n"); } break; case SourceRow::Colors: @@ -384,13 +919,13 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho case SourceRow::BinormalT: if ((uid_data->components & VB_HAS_TANGENT) != 0) { - out.Write("coord.xyz = rawtangent.xyz;\n"); + out.Write("\t\tcoord.xyz = rawtangent.xyz;\n"); } break; case SourceRow::BinormalB: if ((uid_data->components & VB_HAS_BINORMAL) != 0) { - out.Write("coord.xyz = rawbinormal.xyz;\n"); + out.Write("\t\tcoord.xyz = rawbinormal.xyz;\n"); } break; default: @@ -398,112 +933,49 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho u32 texnum = static_cast(texinfo.sourcerow) - static_cast(SourceRow::Tex0); if ((uid_data->components & (VB_HAS_UV0 << (texnum))) != 0) { - out.Write("coord = float4(rawtex{}.x, rawtex{}.y, 1.0, 1.0);\n", texnum, texnum); + out.Write("\t\tcoord = vec4(rawtex{}.x, rawtex{}.y, 1.0, 1.0);\n", texnum, texnum); } break; } // Input form of AB11 sets z element to 1.0 if (texinfo.inputform == TexInputForm::AB11) - out.Write("coord.z = 1.0;\n"); + out.Write("\t\tcoord.z = 1.0;\n"); // Convert NaNs to 1 - needed to fix eyelids in Shadow the Hedgehog during cutscenes // See https://bugs.dolphin-emu.org/issues/11458 - out.Write("// Convert NaN to 1\n"); - out.Write("if (dolphin_isnan(coord.x)) coord.x = 1.0;\n"); - out.Write("if (dolphin_isnan(coord.y)) coord.y = 1.0;\n"); - out.Write("if (dolphin_isnan(coord.z)) coord.z = 1.0;\n"); + out.Write("\t\t// Convert NaN to 1\n"); + out.Write("\t\tif (dolphin_isnan(coord.x)) coord.x = 1.0;\n"); + out.Write("\t\tif (dolphin_isnan(coord.y)) coord.y = 1.0;\n"); + out.Write("\t\tif (dolphin_isnan(coord.z)) coord.z = 1.0;\n"); - // first transformation - switch (texinfo.texgentype) - { - case TexGenType::EmbossMap: // calculate tex coords into bump map + out.Write("\t\tvertex_input.texture_coord_{0} = coord;\n", i); + out.Write("\t}}\n"); + } - // transform the light dir into tangent space - out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", - LIGHT_POS_PARAMS(texinfo.embosslightshift)); - out.Write( - "o.tex{}.xyz = o.tex{}.xyz + float3(dot(ldir, _tangent), dot(ldir, _binormal), 0.0);\n", - i, texinfo.embosssourceshift); + // Initialize other texture coordinates that are unused + for (u32 i = uid_data->numTexGens; i < 8; i++) + { + out.Write("\tvertex_input.texture_coord_{0} = vec4(0, 0, 0, 0);\n", i); + } - break; - case TexGenType::Color0: - out.Write("o.tex{}.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i); - break; - case TexGenType::Color1: - out.Write("o.tex{}.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i); - break; - case TexGenType::Regular: - default: - if ((uid_data->components & (VB_HAS_TEXMTXIDX0 << i)) != 0) - { - out.Write("int tmp = int(rawtex{}.z);\n", i); - if (static_cast((uid_data->texMtxInfo_n_projection >> i) & 1) == TexSize::STQ) - { - out.Write("o.tex{}.xyz = float3(dot(coord, " I_TRANSFORMMATRICES - "[tmp]), dot(coord, " I_TRANSFORMMATRICES - "[tmp+1]), dot(coord, " I_TRANSFORMMATRICES "[tmp+2]));\n", - i); - } - else - { - out.Write("o.tex{}.xyz = float3(dot(coord, " I_TRANSFORMMATRICES - "[tmp]), dot(coord, " I_TRANSFORMMATRICES "[tmp+1]), 1);\n", - i); - } - } - else - { - if (static_cast((uid_data->texMtxInfo_n_projection >> i) & 1) == TexSize::STQ) - { - out.Write("o.tex{}.xyz = float3(dot(coord, " I_TEXMATRICES - "[{}]), dot(coord, " I_TEXMATRICES "[{}]), dot(coord, " I_TEXMATRICES - "[{}]));\n", - i, 3 * i, 3 * i + 1, 3 * i + 2); - } - else - { - out.Write("o.tex{}.xyz = float3(dot(coord, " I_TEXMATRICES - "[{}]), dot(coord, " I_TEXMATRICES "[{}]), 1);\n", - i, 3 * i, 3 * i + 1); - } - } - break; - } + out.Write("\tDolphinVertexOutput vertex_output;\n"); + out.Write("\tvertex(vertex_input, vertex_output);\n"); - // CHECKME: does this only work for regular tex gen types? - if (uid_data->dualTexTrans_enabled && texinfo.texgentype == TexGenType::Regular) - { - auto& postInfo = uid_data->postMtxInfo[i]; + out.Write("\to.pos = dolphin_project_position(vertex_output.position);\n"); + for (u32 i = 0; i < uid_data->numTexGens; ++i) + { + out.Write("\to.tex{0} = vertex_output.texture_coord_{0};\n", i); + } - out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES "[{}];\n" - "float4 P1 = " I_POSTTRANSFORMMATRICES "[{}];\n" - "float4 P2 = " I_POSTTRANSFORMMATRICES "[{}];\n", - postInfo.index & 0x3f, (postInfo.index + 1) & 0x3f, (postInfo.index + 2) & 0x3f); + out.Write("\to.colors_0 = vertex_output.color_0;\n"); + out.Write("\to.colors_1 = vertex_output.color_1;\n"); + if (per_pixel_lighting) + { + out.Write("\to.Normal = vertex_output.normal;\n"); - if (postInfo.normalize) - out.Write("o.tex{}.xyz = normalize(o.tex{}.xyz);\n", i, i); - - // multiply by postmatrix - out.Write( - "o.tex{0}.xyz = float3(dot(P0.xyz, o.tex{0}.xyz) + P0.w, dot(P1.xyz, o.tex{0}.xyz) + " - "P1.w, dot(P2.xyz, o.tex{0}.xyz) + P2.w);\n", - i); - } - - // When q is 0, the GameCube appears to have a special case - // This can be seen in devkitPro's neheGX Lesson08 example for Wii - // Makes differences in Rogue Squadron 3 (Hoth sky) and The Last Story (shadow culling) - // TODO: check if this only affects XF_TEXGEN_REGULAR - if (texinfo.texgentype == TexGenType::Regular) - { - out.Write( - "if(o.tex{0}.z == 0.0f)\n" - "\to.tex{0}.xy = clamp(o.tex{0}.xy / 2.0f, float2(-1.0f,-1.0f), float2(1.0f,1.0f));\n", - i); - } - - out.Write("}}\n"); + // TODO: Rename, this is actually in Viewspace... + out.Write("\to.WorldPos = vertex_output.position.xyz;\n"); } if (uid_data->vs_expand == VSExpand::Line) @@ -542,120 +1014,37 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho GenerateVSPointExpansion(out, "", uid_data->numTexGens); } - if (per_pixel_lighting) - { - // When per-pixel lighting is enabled, the vertex colors are passed through - // unmodified so we can evaluate the lighting in the pixel shader. - - // Lighting is also still computed in the vertex shader since it can be used to - // generate texture coordinates. We generated them above, so now the colors can - // be reverted to their previous stage. - out.Write("o.colors_0 = vertex_color_0;\n"); - out.Write("o.colors_1 = vertex_color_1;\n"); - // Note that the numColorChans logic is performed in the pixel shader. - } - else - { - // The number of colors available to TEV is determined by numColorChans. - // We have to provide the fields to match the interface, so set to zero if it's not enabled. - if (uid_data->numColorChans == 0) - out.Write("o.colors_0 = float4(0.0, 0.0, 0.0, 0.0);\n"); - if (uid_data->numColorChans <= 1) - out.Write("o.colors_1 = float4(0.0, 0.0, 0.0, 0.0);\n"); - } - // clipPos/w needs to be done in pixel shader, not here if (!host_config.fast_depth_calc) - out.Write("o.clipPos = o.pos;\n"); - - if (per_pixel_lighting) - { - out.Write("o.Normal = _normal;\n" - "o.WorldPos = pos.xyz;\n"); - } + out.Write("\to.clipPos = o.pos;\n"); // If we can disable the incorrect depth clipping planes using depth clamping, then we can do // our own depth clipping and calculate the depth range before the perspective divide if // necessary. if (host_config.backend_depth_clamp) { - // Since we're adjusting z for the depth range before the perspective divide, we have to do our - // own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range. - // We adjust our depth value for clipping purposes to match the perspective projection in the - // software backend, which is a hack to fix Sonic Adventure and Unleashed games. - out.Write("float clipDepth = o.pos.z * (1.0 - 1e-7);\n" - "float clipDist0 = clipDepth + o.pos.w;\n" // Near: z < -w - "float clipDist1 = -clipDepth;\n"); // Far: z > 0 + // Since we're adjusting z for the depth range before the perspective divide, we have to do + // our own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 + // range. We adjust our depth value for clipping purposes to match the perspective projection + // in the software backend, which is a hack to fix Sonic Adventure and Unleashed games. + out.Write("\tfloat clipDepth = o.pos.z * (1.0 - 1e-7);\n" + "\tfloat clipDist0 = clipDepth + o.pos.w;\n" // Near: z < -w + "\tfloat clipDist1 = -clipDepth;\n"); // Far: z > 0 if (host_config.backend_geometry_shaders) { - out.Write("o.clipDist0 = clipDist0;\n" - "o.clipDist1 = clipDist1;\n"); + out.Write("\to.clipDist0 = clipDist0;\n" + "\to.clipDist1 = clipDist1;\n"); } } else { // Same depth adjustment for Sonic. Without depth clamping, it unfortunately // affects non-clipping uses of depth too. - out.Write("o.pos.z = o.pos.z * (1.0 - 1e-7);\n"); + out.Write("\to.pos.z = o.pos.z * (1.0 - 1e-7);\n"); } - // Write the true depth value. If the game uses depth textures, then the pixel shader will - // override it with the correct values if not then early z culling will improve speed. - // There are two different ways to do this, when the depth range is oversized, we process - // the depth range in the vertex shader, if not we let the host driver handle it. - // - // Adjust z for the depth range. We're using an equation which incorperates a depth inversion, - // so we can map the console -1..0 range to the 0..1 range used in the depth buffer. - // We have to handle the depth range in the vertex shader instead of after the perspective - // divide, because some games will use a depth range larger than what is allowed by the - // graphics API. These large depth ranges will still be clipped to the 0..1 range, so these - // games effectively add a depth bias to the values written to the depth buffer. - out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - " - "o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n"); - - if (!host_config.backend_clip_control) - { - // If the graphics API doesn't support a depth range of 0..1, then we need to map z to - // the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point - // operation that can introduce a round-trip error. - out.Write("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n"); - } - - // Correct for negative viewports by mirroring all vertices. We need to negate the height here, - // since the viewport height is already negated by the render backend. - out.Write("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n"); - - // The console GPU places the pixel center at 7/12 in screen space unless - // antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results - // in some primitives being placed one pixel too far to the bottom-right, - // which in turn can be critical if it happens for clear quads. - // Hence, we compensate for this pixel center difference so that primitives - // get rasterized correctly. - out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n"); - - if (vertex_rounding) - { - // By now our position is in clip space - // however, higher resolutions than the Wii outputs - // cause an additional pixel offset - // due to a higher pixel density - // we need to correct this by converting our - // clip-space position into the Wii's screen-space - // acquire the right pixel and then convert it back - out.Write("if (o.pos.w == 1.0f)\n" - "{{\n" - - "\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n" - "\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n" - - "\tss_pixel_x = round(ss_pixel_x);\n" - "\tss_pixel_y = round(ss_pixel_y);\n" - - "\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n" - "\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n" - "}}\n"); - } + out.Write("\to.pos = dolphin_pixel_correction(o.pos);\n"); if (host_config.backend_geometry_shaders) { @@ -666,30 +1055,31 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho // TODO: Pass interface blocks between shader stages even if geometry shaders // are not supported, however that will require at least OpenGL 3.2 support. for (u32 i = 0; i < uid_data->numTexGens; ++i) - out.Write("tex{}.xyz = o.tex{};\n", i, i); + out.Write("\ttex{}.xyz = o.tex{};\n", i, i); if (!host_config.fast_depth_calc) - out.Write("clipPos = o.clipPos;\n"); + out.Write("\tclipPos = o.clipPos;\n"); if (per_pixel_lighting) { - out.Write("Normal = o.Normal;\n" - "WorldPos = o.WorldPos;\n"); + out.Write("\tNormal = o.Normal;\n" + "\tWorldPos = o.WorldPos;\n"); } - out.Write("colors_0 = o.colors_0;\n" - "colors_1 = o.colors_1;\n"); + out.Write("\tcolors_0 = o.colors_0;\n" + "\tcolors_1 = o.colors_1;\n"); } if (host_config.backend_depth_clamp) { - out.Write("gl_ClipDistance[0] = clipDist0;\n" - "gl_ClipDistance[1] = clipDist1;\n"); + out.Write("\tgl_ClipDistance[0] = clipDist0;\n" + "\tgl_ClipDistance[1] = clipDist1;\n"); } // Vulkan NDC space has Y pointing down (right-handed NDC space). if (api_type == APIType::Vulkan) - out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n"); + out.Write("\tgl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n"); else - out.Write("gl_Position = o.pos;\n"); + out.Write("\tgl_Position = o.pos;\n"); out.Write("}}\n"); return out; } +} // namespace VertexShader diff --git a/Source/Core/VideoCommon/VertexShaderGen.h b/Source/Core/VideoCommon/VertexShaderGen.h index 94f2a170c9..74976950bf 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.h +++ b/Source/Core/VideoCommon/VertexShaderGen.h @@ -91,3 +91,19 @@ using VertexShaderUid = ShaderUid; VertexShaderUid GetVertexShaderUid(); ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& host_config, const vertex_shader_uid_data* uid_data); + +namespace VertexShader +{ +constexpr std::string_view vertex_definition = + "void vertex(in DolphinVertexInput vertex_input, out DolphinVertexOutput vertex_output)"; + +void WriteVertexStructs(APIType api_type, const ShaderHostConfig& host_config, + const vertex_shader_uid_data* uid_data, ShaderCode& out); +void WriteVertexDefines(APIType api_type, const ShaderHostConfig& host_config, + const vertex_shader_uid_data* uid_data, ShaderCode& out); +void WriteVertexBody(APIType api_type, const ShaderHostConfig& host_config, + const vertex_shader_uid_data* uid_data, ShaderCode& out); +ShaderCode WriteFullShader(APIType api_type, const ShaderHostConfig& host_config, + const vertex_shader_uid_data* uid_data, std::string_view custom_vertex, + std::string_view custom_uniforms); +} // namespace VertexShader