diff --git a/Source/Core/VideoCommon/GraphicsModSystem/Runtime/CustomShaderCache.cpp b/Source/Core/VideoCommon/GraphicsModSystem/Runtime/CustomShaderCache.cpp
index 29d801cc52..0c9cf836ca 100644
--- a/Source/Core/VideoCommon/GraphicsModSystem/Runtime/CustomShaderCache.cpp
+++ b/Source/Core/VideoCommon/GraphicsModSystem/Runtime/CustomShaderCache.cpp
@@ -346,8 +346,8 @@ std::unique_ptr<AbstractShader>
 CustomShaderCache::CompilePixelShader(const PixelShaderUid& uid,
                                       const CustomShaderInstance& custom_shaders) const
 {
-  const ShaderCode source_code = GeneratePixelShaderCode(
-      m_api_type, m_host_config, uid.GetUidData(), custom_shaders.pixel_contents);
+  const ShaderCode source_code =
+      PixelShader::WriteFullShader(m_api_type, m_host_config, uid.GetUidData(), "", "");
   return g_gfx->CreateShaderFromSource(ShaderStage::Pixel, source_code.GetBuffer(),
                                        "Custom Pixel Shader");
 }
diff --git a/Source/Core/VideoCommon/LightingShaderGen.cpp b/Source/Core/VideoCommon/LightingShaderGen.cpp
index 4fb2c98ebd..c486f79af7 100644
--- a/Source/Core/VideoCommon/LightingShaderGen.cpp
+++ b/Source/Core/VideoCommon/LightingShaderGen.cpp
@@ -81,29 +81,34 @@ static void GenerateLightShader(ShaderCode& object, const LightingUidData& uid_d
 // materials name is I_MATERIALS in vs and I_PMATERIALS in ps
 // inColorName is color in vs and colors_ in ps
 // dest is o.colors_ in vs and colors_ in ps
-void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_data,
-                                std::string_view in_color_name, std::string_view dest)
+void GenerateLightingShaderHeader(ShaderCode& object, const LightingUidData& uid_data)
 {
   for (u32 j = 0; j < NUM_XF_COLOR_CHANNELS; j++)
   {
+    object.Write(
+        "vec4 dolphin_calculate_lighting_chn{}(vec4 vertex_color, vec4 pos, vec3 _normal)\n", j);
     object.Write("{{\n");
 
+    object.Write("\tint4 lacc;\n"
+                 "\tfloat3 ldir, h, cosAttn, distAttn;\n"
+                 "\tfloat dist, dist2, attn;\n");
+
     const bool colormatsource = !!(uid_data.matsource & (1 << j));
     if (colormatsource)  // from vertex
-      object.Write("int4 mat = int4(round({}{} * 255.0));\n", in_color_name, j);
+      object.Write("\tint4 mat = int4(round(vertex_color * 255.0));\n");
     else  // from color
-      object.Write("int4 mat = {}[{}];\n", I_MATERIALS, j + 2);
+      object.Write("\tint4 mat = {}[{}];\n", I_MATERIALS, j + 2);
 
     if ((uid_data.enablelighting & (1 << j)) != 0)
     {
       if ((uid_data.ambsource & (1 << j)) != 0)  // from vertex
-        object.Write("lacc = int4(round({}{} * 255.0));\n", in_color_name, j);
+        object.Write("\tlacc = int4(round(vertex_color * 255.0));\n");
       else  // from color
-        object.Write("lacc = {}[{}];\n", I_MATERIALS, j);
+        object.Write("\tlacc = {}[{}];\n", I_MATERIALS, j);
     }
     else
     {
-      object.Write("lacc = int4(255, 255, 255, 255);\n");
+      object.Write("\tlacc = int4(255, 255, 255, 255);\n");
     }
 
     // check if alpha is different
@@ -111,21 +116,21 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d
     if (alphamatsource != colormatsource)
     {
       if (alphamatsource)  // from vertex
-        object.Write("mat.w = int(round({}{}.w * 255.0));\n", in_color_name, j);
+        object.Write("\tmat.w = int(round(vertex_color.w * 255.0));\n");
       else  // from color
-        object.Write("mat.w = {}[{}].w;\n", I_MATERIALS, j + 2);
+        object.Write("\tmat.w = {}[{}].w;\n", I_MATERIALS, j + 2);
     }
 
     if ((uid_data.enablelighting & (1 << (j + 2))) != 0)
     {
       if ((uid_data.ambsource & (1 << (j + 2))) != 0)  // from vertex
-        object.Write("lacc.w = int(round({}{}.w * 255.0));\n", in_color_name, j);
+        object.Write("\tlacc.w = int(round(vertex_color.w * 255.0));\n");
       else  // from color
-        object.Write("lacc.w = {}[{}].w;\n", I_MATERIALS, j);
+        object.Write("\tlacc.w = {}[{}].w;\n", I_MATERIALS, j);
     }
     else
     {
-      object.Write("lacc.w = 255;\n");
+      object.Write("\tlacc.w = 255;\n");
     }
 
     if ((uid_data.enablelighting & (1 << j)) != 0)  // Color lights
@@ -144,9 +149,9 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d
           GenerateLightShader(object, uid_data, i, j + 2, true);
       }
     }
-    object.Write("lacc = clamp(lacc, 0, 255);\n");
-    object.Write("{}{} = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j);
-    object.Write("}}\n");
+    object.Write("\tlacc = clamp(lacc, 0, 255);\n");
+    object.Write("\treturn vec4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n");
+    object.Write("}}\n\n");
   }
 }
 
@@ -176,47 +181,9 @@ void GetLightingShaderUid(LightingUidData& uid_data)
   }
 }
 
-void GenerateCustomLightingHeaderDetails(ShaderCode* out, u32 enablelighting, u32 light_mask)
-{
-  u32 light_count = 0;
-  for (u32 j = 0; j < NUM_XF_COLOR_CHANNELS; j++)
-  {
-    if ((enablelighting & (1 << j)) != 0)  // Color lights
-    {
-      for (int i = 0; i < 8; ++i)
-      {
-        if ((light_mask & (1 << (i + 8 * j))) != 0)
-        {
-          light_count++;
-        }
-      }
-    }
-    if ((enablelighting & (1 << (j + 2))) != 0)  // Alpha lights
-    {
-      for (int i = 0; i < 8; ++i)
-      {
-        if ((light_mask & (1 << (i + 8 * (j + 2)))) != 0)
-        {
-          light_count++;
-        }
-      }
-    }
-  }
-  if (light_count > 0)
-  {
-    out->Write("\tCustomShaderLightData[{}] light;\n", light_count);
-  }
-  else
-  {
-    // Cheat so shaders compile
-    out->Write("\tCustomShaderLightData[1] light;\n", light_count);
-  }
-  out->Write("\tint light_count;\n");
-}
-
-static void GenerateLighting(ShaderCode* out, const LightingUidData& uid_data, int index,
-                             int litchan_index, u32 channel_index, u32 custom_light_index,
-                             bool alpha)
+static void GenerateLightingImpl(ShaderCode* out, const LightingUidData& uid_data, int index,
+                                 int litchan_index, u32 channel_index, u32 custom_light_index,
+                                 bool alpha)
 {
   const auto attnfunc =
       static_cast<AttenuationFunc>((uid_data.attnfunc >> (2 * litchan_index)) & 0x3);
@@ -225,60 +192,59 @@ static void GenerateLighting(ShaderCode* out, const LightingUidData& uid_data, i
   const std::string name = fmt::format("lights_chan{}_{}", channel_index, light_type);
 
   out->Write("\t{{\n");
-  out->Write("\t\tcustom_data.{}[{}].direction = " LIGHT_DIR ".xyz;\n", name, custom_light_index,
+  out->Write("\t\tfrag_input.{}[{}].direction = " LIGHT_DIR ".xyz;\n", name, custom_light_index,
              LIGHT_DIR_PARAMS(index));
-  out->Write("\t\tcustom_data.{}[{}].position = " LIGHT_POS ".xyz;\n", name, custom_light_index,
+  out->Write("\t\tfrag_input.{}[{}].position = " LIGHT_POS ".xyz;\n", name, custom_light_index,
              LIGHT_POS_PARAMS(index));
-  out->Write("\t\tcustom_data.{}[{}].cosatt = " LIGHT_COSATT ";\n", name, custom_light_index,
+  out->Write("\t\tfrag_input.{}[{}].cosatt = " LIGHT_COSATT ";\n", name, custom_light_index,
              LIGHT_COSATT_PARAMS(index));
-  out->Write("\t\tcustom_data.{}[{}].distatt = " LIGHT_DISTATT ";\n", name, custom_light_index,
+  out->Write("\t\tfrag_input.{}[{}].distatt = " LIGHT_DISTATT ";\n", name, custom_light_index,
              LIGHT_DISTATT_PARAMS(index));
-  out->Write("\t\tcustom_data.{}[{}].attenuation_type = {};\n", name, custom_light_index,
+  out->Write("\t\tfrag_input.{}[{}].attenuation_type = {};\n", name, custom_light_index,
              static_cast<u32>(attnfunc));
   if (alpha)
   {
-    out->Write("\t\tcustom_data.{}[{}].color = float3(" LIGHT_COL
+    out->Write("\t\tfrag_input.{}[{}].color = float3(" LIGHT_COL
                ") / float3(255.0, 255.0, 255.0);\n",
                name, custom_light_index, LIGHT_COL_PARAMS(index, alpha ? "a" : "rgb"));
   }
   else
   {
-    out->Write("\t\tcustom_data.{}[{}].color = " LIGHT_COL " / float3(255.0, 255.0, 255.0);\n",
-               name, custom_light_index, LIGHT_COL_PARAMS(index, alpha ? "a" : "rgb"));
+    out->Write("\t\tfrag_input.{}[{}].color = " LIGHT_COL " / float3(255.0, 255.0, 255.0);\n", name,
+               custom_light_index, LIGHT_COL_PARAMS(index, alpha ? "a" : "rgb"));
   }
   out->Write("\t}}\n");
 }
 
-void GenerateCustomLightingImplementation(ShaderCode* out, const LightingUidData& uid_data,
-                                          std::string_view in_color_name)
+void GenerateCustomLighting(ShaderCode* out, const LightingUidData& uid_data)
 {
   for (u32 i = 0; i < 8; i++)
   {
     for (u32 channel_index = 0; channel_index < NUM_XF_COLOR_CHANNELS; channel_index++)
     {
-      out->Write("\tcustom_data.lights_chan{}_color[{}].direction = float3(0, 0, 0);\n",
+      out->Write("\tfrag_input.lights_chan{}_color[{}].direction = float3(0, 0, 0);\n",
                  channel_index, i);
-      out->Write("\tcustom_data.lights_chan{}_color[{}].position = float3(0, 0, 0);\n",
+      out->Write("\tfrag_input.lights_chan{}_color[{}].position = float3(0, 0, 0);\n",
                  channel_index, i);
-      out->Write("\tcustom_data.lights_chan{}_color[{}].color = float3(0, 0, 0);\n", channel_index,
+      out->Write("\tfrag_input.lights_chan{}_color[{}].color = float3(0, 0, 0);\n", channel_index,
                  i);
-      out->Write("\tcustom_data.lights_chan{}_color[{}].cosatt = float4(0, 0, 0, 0);\n",
+      out->Write("\tfrag_input.lights_chan{}_color[{}].cosatt = float4(0, 0, 0, 0);\n",
                  channel_index, i);
-      out->Write("\tcustom_data.lights_chan{}_color[{}].distatt = float4(0, 0, 0, 0);\n",
+      out->Write("\tfrag_input.lights_chan{}_color[{}].distatt = float4(0, 0, 0, 0);\n",
                  channel_index, i);
-      out->Write("\tcustom_data.lights_chan{}_color[{}].attenuation_type = 0;\n", channel_index, i);
+      out->Write("\tfrag_input.lights_chan{}_color[{}].attenuation_type = 0;\n", channel_index, i);
 
-      out->Write("\tcustom_data.lights_chan{}_alpha[{}].direction = float3(0, 0, 0);\n",
+      out->Write("\tfrag_input.lights_chan{}_alpha[{}].direction = float3(0, 0, 0);\n",
                  channel_index, i);
-      out->Write("\tcustom_data.lights_chan{}_alpha[{}].position = float3(0, 0, 0);\n",
+      out->Write("\tfrag_input.lights_chan{}_alpha[{}].position = float3(0, 0, 0);\n",
                  channel_index, i);
-      out->Write("\tcustom_data.lights_chan{}_alpha[{}].color = float3(0, 0, 0);\n", channel_index,
+      out->Write("\tfrag_input.lights_chan{}_alpha[{}].color = float3(0, 0, 0);\n", channel_index,
                  i);
-      out->Write("\tcustom_data.lights_chan{}_alpha[{}].cosatt = float4(0, 0, 0, 0);\n",
+      out->Write("\tfrag_input.lights_chan{}_alpha[{}].cosatt = float4(0, 0, 0, 0);\n",
                  channel_index, i);
-      out->Write("\tcustom_data.lights_chan{}_alpha[{}].distatt = float4(0, 0, 0, 0);\n",
+      out->Write("\tfrag_input.lights_chan{}_alpha[{}].distatt = float4(0, 0, 0, 0);\n",
                  channel_index, i);
-      out->Write("\tcustom_data.lights_chan{}_alpha[{}].attenuation_type = 0;\n", channel_index, i);
+      out->Write("\tfrag_input.lights_chan{}_alpha[{}].attenuation_type = 0;\n", channel_index, i);
     }
   }
 
@@ -286,20 +252,20 @@ void GenerateCustomLightingImplementation(ShaderCode* out, const LightingUidData
   {
     const bool colormatsource = !!(uid_data.matsource & (1 << j));
     if (colormatsource)  // from vertex
-      out->Write("custom_data.base_material[{}] = {}{};\n", j, in_color_name, j);
+      out->Write("frag_input.base_material[{}] = frag_input.color_{};\n", j, j);
     else  // from color
-      out->Write("custom_data.base_material[{}] = {}[{}] / 255.0;\n", j, I_MATERIALS, j + 2);
+      out->Write("frag_input.base_material[{}] = {}[{}] / 255.0;\n", j, I_MATERIALS, j + 2);
 
     if ((uid_data.enablelighting & (1 << j)) != 0)
     {
       if ((uid_data.ambsource & (1 << j)) != 0)  // from vertex
-        out->Write("custom_data.ambient_lighting[{}] = {}{};\n", j, in_color_name, j);
+        out->Write("frag_input.ambient_lighting[{}] = frag_input.color_{};\n", j, j);
       else  // from color
-        out->Write("custom_data.ambient_lighting[{}] = {}[{}] / 255.0;\n", j, I_MATERIALS, j);
+        out->Write("frag_input.ambient_lighting[{}] = {}[{}] / 255.0;\n", j, I_MATERIALS, j);
     }
     else
     {
-      out->Write("custom_data.ambient_lighting[{}] = float4(1, 1, 1, 1);\n", j);
+      out->Write("frag_input.ambient_lighting[{}] = float4(1, 1, 1, 1);\n", j);
     }
 
     // check if alpha is different
@@ -307,21 +273,21 @@ void GenerateCustomLightingImplementation(ShaderCode* out, const LightingUidData
     if (alphamatsource != colormatsource)
     {
       if (alphamatsource)  // from vertex
-        out->Write("custom_data.base_material[{}].w = {}{}.w;\n", j, in_color_name, j);
+        out->Write("frag_input.base_material[{}].w = frag_input.color_{}.w;\n", j, j);
       else  // from color
-        out->Write("custom_data.base_material[{}].w = {}[{}].w / 255.0;\n", j, I_MATERIALS, j + 2);
+        out->Write("frag_input.base_material[{}].w = {}[{}].w / 255.0;\n", j, I_MATERIALS, j + 2);
     }
 
     if ((uid_data.enablelighting & (1 << (j + 2))) != 0)
     {
       if ((uid_data.ambsource & (1 << (j + 2))) != 0)  // from vertex
-        out->Write("custom_data.ambient_lighting[{}].w = {}{}.w;\n", j, in_color_name, j);
+        out->Write("frag_input.ambient_lighting[{}].w = frag_input.color_{}.w;\n", j, j);
       else  // from color
-        out->Write("custom_data.ambient_lighting[{}].w = {}[{}].w / 255.0;\n", j, I_MATERIALS, j);
+        out->Write("frag_input.ambient_lighting[{}].w = {}[{}].w / 255.0;\n", j, I_MATERIALS, j);
     }
     else
     {
-      out->Write("custom_data.ambient_lighting[{}].w = 1;\n", j);
+      out->Write("frag_input.ambient_lighting[{}].w = 1;\n", j);
     }
 
     u32 light_count = 0;
@@ -331,12 +297,12 @@ void GenerateCustomLightingImplementation(ShaderCode* out, const LightingUidData
       {
         if ((uid_data.light_mask & (1 << (i + 8 * j))) != 0)
         {
-          GenerateLighting(out, uid_data, i, j, j, light_count, false);
+          GenerateLightingImpl(out, uid_data, i, j, j, light_count, false);
           light_count++;
         }
       }
     }
-    out->Write("\tcustom_data.light_chan{}_color_count = {};\n", j, light_count);
+    out->Write("\tfrag_input.light_chan{}_color_count = {};\n", j, light_count);
 
     light_count = 0;
     if ((uid_data.enablelighting & (1 << (j + 2))) != 0)  // Alpha lights
@@ -345,11 +311,11 @@ void GenerateCustomLightingImplementation(ShaderCode* out, const LightingUidData
       {
         if ((uid_data.light_mask & (1 << (i + 8 * (j + 2)))) != 0)
         {
-          GenerateLighting(out, uid_data, i, j + 2, j, light_count, true);
+          GenerateLightingImpl(out, uid_data, i, j + 2, j, light_count, true);
           light_count++;
         }
       }
     }
-    out->Write("\tcustom_data.light_chan{}_alpha_count = {};\n", j, light_count);
+    out->Write("\tfrag_input.light_chan{}_alpha_count = {};\n", j, light_count);
   }
 }
diff --git a/Source/Core/VideoCommon/LightingShaderGen.h b/Source/Core/VideoCommon/LightingShaderGen.h
index b06ec40c4a..e7d6f1ed13 100644
--- a/Source/Core/VideoCommon/LightingShaderGen.h
+++ b/Source/Core/VideoCommon/LightingShaderGen.h
@@ -44,10 +44,6 @@ constexpr char s_lighting_struct[] = "struct Light {\n"
                                      "\tfloat4 dir;\n"
                                      "};\n";
 
-void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_data,
-                                std::string_view in_color_name, std::string_view dest);
+void GenerateLightingShaderHeader(ShaderCode& object, const LightingUidData& uid_data);
 void GetLightingShaderUid(LightingUidData& uid_data);
-
-void GenerateCustomLightingHeaderDetails(ShaderCode* out, u32 enablelighting, u32 light_mask);
-void GenerateCustomLightingImplementation(ShaderCode* out, const LightingUidData& uid_data,
-                                          std::string_view in_color_name);
+void GenerateCustomLighting(ShaderCode* out, const LightingUidData& uid_data);
diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp
index a76681634f..22a980f4e6 100644
--- a/Source/Core/VideoCommon/PixelShaderGen.cpp
+++ b/Source/Core/VideoCommon/PixelShaderGen.cpp
@@ -427,14 +427,6 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type,
     out.Write("}};\n");
   }
 
-  if (!custom_details.shaders.empty() &&
-      !custom_details.shaders.back().material_uniform_block.empty())
-  {
-    out.Write("UBO_BINDING(std140, 3) uniform CustomShaderBlock {{\n");
-    out.Write("{}", custom_details.shaders.back().material_uniform_block);
-    out.Write("}} custom_uniforms;\n");
-  }
-
   if (bounding_box)
   {
     out.Write("SSBO_BINDING(0) coherent buffer BBox {{\n"
@@ -761,599 +753,13 @@ uint WrapCoord(int coord, uint wrap, int size) {{
   }
 }
 
-void WriteCustomShaderStructImpl(ShaderCode* out, u32 num_stages, bool per_pixel_lighting,
-                                 const pixel_shader_uid_data* uid_data)
-{
-  out->Write("\tCustomShaderData custom_data;\n");
-
-  if (per_pixel_lighting)
-  {
-    out->Write("\tcustom_data.position = WorldPos;\n");
-    out->Write("\tcustom_data.normal = Normal;\n");
-  }
-  else
-  {
-    out->Write("\tcustom_data.position = float3(0, 0, 0);\n");
-    out->Write("\tcustom_data.normal = float3(0, 0, 0);\n");
-  }
-
-  if (uid_data->genMode_numtexgens == 0) [[unlikely]]
-  {
-    out->Write("\tcustom_data.texcoord[0] = float3(0, 0, 0);\n");
-  }
-  else
-  {
-    for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i)
-    {
-      out->Write("\tif (tex{0}.z == 0.0)\n", i);
-      out->Write("\t{{\n");
-      out->Write("\t\tcustom_data.texcoord[{0}] = tex{0};\n", i);
-      out->Write("\t}}\n");
-      out->Write("\telse {{\n");
-      out->Write("\t\tcustom_data.texcoord[{0}] = float3(tex{0}.xy / tex{0}.z, 0);\n", i);
-      out->Write("\t}}\n");
-    }
-  }
-
-  for (u32 i = 0; i < 8; i++)
-  {
-    // Shader compilation complains if every index isn't initialized
-    out->Write("\tcustom_data.texmap_to_texcoord_index[{0}] = 0;\n", i);
-  }
-
-  for (u32 i = 0; i < uid_data->genMode_numindstages; ++i)
-  {
-    if ((uid_data->nIndirectStagesUsed & (1U << i)) != 0)
-    {
-      u32 texcoord = uid_data->GetTevindirefCoord(i);
-      const u32 texmap = uid_data->GetTevindirefMap(i);
-
-      // Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does
-      // not exist), then tex coord 0 is used (though sometimes glitchy effects happen on console).
-      // This affects the Mario portrait in Luigi's Mansion, where the developers forgot to set
-      // the number of tex gens to 2 (bug 11462).
-      if (texcoord >= uid_data->genMode_numtexgens)
-        texcoord = 0;
-
-      out->Write("\tcustom_data.texmap_to_texcoord_index[{}] = {};\n", texmap, texcoord);
-    }
-  }
-  out->Write("\tcustom_data.texcoord_count = {};\n", uid_data->genMode_numtexgens);
-
-  // Try and do a best guess on what the texcoord index is
-  // Note: one issue with this would be textures that are used
-  // multiple times in the same draw but with different texture coordinates.
-  // In that scenario, only the last texture coordinate would be defined.
-  // This issue can be seen in how Rogue Squadron 2 does bump mapping
-  for (u32 i = 0; i < num_stages; i++)
-  {
-    auto& tevstage = uid_data->stagehash[i];
-    // Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does
-    // not exist), then tex coord 0 is used (though sometimes glitchy effects happen on console).
-    u32 texcoord = tevstage.tevorders_texcoord;
-    const bool has_tex_coord = texcoord < uid_data->genMode_numtexgens;
-    if (!has_tex_coord)
-      texcoord = 0;
-
-    out->Write("\tcustom_data.texmap_to_texcoord_index[{}] = {};\n", tevstage.tevorders_texmap,
-               texcoord);
-  }
-
-  if (per_pixel_lighting)
-    GenerateCustomLightingImplementation(out, uid_data->lighting, "colors_");
-
-  for (u32 i = 0; i < 16; i++)
-  {
-    // Shader compilation complains if every struct isn't initialized
-
-    // Color Input
-    for (u32 j = 0; j < 4; j++)
-    {
-      out->Write("\tcustom_data.tev_stages[{}].input_color[{}].input_type = "
-                 "CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_UNUSED;\n",
-                 i, j);
-      out->Write("\tcustom_data.tev_stages[{}].input_color[{}].value = "
-                 "float3(0, 0, 0);\n",
-                 i, j);
-    }
-
-    // Alpha Input
-    for (u32 j = 0; j < 4; j++)
-    {
-      out->Write("\tcustom_data.tev_stages[{}].input_alpha[{}].input_type = "
-                 "CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_UNUSED;\n",
-                 i, j);
-      out->Write("\tcustom_data.tev_stages[{}].input_alpha[{}].value = "
-                 "float(0);\n",
-                 i, j);
-    }
-
-    // Texmap
-    out->Write("\tcustom_data.tev_stages[{}].texmap = 0u;\n", i);
-
-    // Output
-    out->Write("\tcustom_data.tev_stages[{}].output_color = "
-               "float4(0, 0, 0, 0);\n",
-               i);
-  }
-
-  // Actual data will be filled out in the tev stage code, just set the
-  // stage count for now
-  out->Write("\tcustom_data.tev_stage_count = {};\n", num_stages);
-
-  // Time
-  out->Write("\tcustom_data.time_ms = time_ms;\n");
-}
-
 static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n,
                        APIType api_type, bool stereo, bool has_custom_shaders);
 static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBias bias, TevOp op,
                             bool clamp, TevScale scale);
 static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_data, APIType api_type,
                            bool per_pixel_depth, bool use_dual_source);
-static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data);
-static void WriteLogicOp(ShaderCode& out, const pixel_shader_uid_data* uid_data);
 static void WriteLogicOpBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data);
-static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data,
-                       bool use_dual_source);
-static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data);
-
-ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& host_config,
-                                   const pixel_shader_uid_data* uid_data,
-                                   const CustomPixelShaderContents& custom_details)
-{
-  ShaderCode out;
-
-  const bool per_pixel_lighting = g_ActiveConfig.bEnablePixelLighting;
-  const bool msaa = host_config.msaa;
-  const bool ssaa = host_config.ssaa;
-  const bool stereo = host_config.stereo;
-  const u32 numStages = uid_data->genMode_numtevstages + 1;
-
-  out.Write("// Pixel Shader for TEV stages\n");
-  out.Write("// {} TEV stages, {} texgens, {} IND stages\n", numStages,
-            uid_data->genMode_numtexgens, uid_data->genMode_numindstages);
-
-  // Stuff that is shared between ubershaders and pixelgen.
-  WriteBitfieldExtractHeader(out, api_type, host_config);
-
-  WritePixelShaderCommonHeader(out, api_type, host_config, uid_data->bounding_box, custom_details);
-
-  // Custom shader details
-  WriteCustomShaderStructDef(&out, uid_data->genMode_numtexgens);
-  for (std::size_t i = 0; i < custom_details.shaders.size(); i++)
-  {
-    const auto& shader_details = custom_details.shaders[i];
-    out.Write(fmt::runtime(shader_details.custom_shader), i);
-  }
-
-  out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) "
-            "sampleTexture(texmap, samp[texmap], uv, layer)\n");
-
-  if (uid_data->ztest == EmulatedZ::ForcedEarly)
-  {
-    // Zcomploc (aka early_ztest) is a way to control whether depth test is done before
-    // or after texturing and alpha test. PC graphics APIs used to provide no way to emulate
-    // this feature properly until 2012: Depth tests were always done after alpha testing.
-    // Most importantly, it was not possible to write to the depth buffer without also writing
-    // a color value (unless color writing was disabled altogether).
-
-    // OpenGL 4.2 actually provides two extensions which can force an early z test:
-    //  * ARB_image_load_store has 'layout(early_fragment_tests)' which forces the driver to do z
-    //  and stencil tests early.
-    //  * ARB_conservative_depth has 'layout(depth_unchanged) which signals to the driver that it
-    //  can make optimisations
-    //    which assume the pixel shader won't update the depth buffer.
-
-    // early_fragment_tests is the best option, as it requires the driver to do early-z and defines
-    // early-z exactly as
-    // we expect, with discard causing the shader to exit with only the depth buffer updated.
-
-    // Conservative depth's 'depth_unchanged' only hints to the driver that an early-z optimisation
-    // can be made and
-    // doesn't define what will happen if we discard the fragment. But the way modern graphics
-    // hardware is implemented
-    // means it is not unreasonable to expect the same behaviour as early_fragment_tests.
-    // We can also assume that if a driver has gone out of its way to support conservative depth and
-    // not image_load_store
-    // as required by OpenGL 4.2 that it will be doing the optimisation.
-    // If the driver doesn't actually do an early z optimisation, ZCompLoc will be broken and depth
-    // will only be written
-    // if the alpha test passes.
-
-    // We support Conservative as a fallback, because many drivers based on Mesa haven't implemented
-    // all of the
-    // ARB_image_load_store extension yet.
-
-    // This is a #define which signals whatever early-z method the driver supports.
-    out.Write("FORCE_EARLY_Z; \n");
-  }
-
-  const bool use_framebuffer_fetch = uid_data->blend_enable || uid_data->logic_op_enable ||
-                                     uid_data->ztest == EmulatedZ::EarlyWithFBFetch;
-
-#ifdef __APPLE__
-  // Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK)
-  // if we want to use it.
-  if (api_type == APIType::Vulkan || api_type == APIType::Metal)
-  {
-    if (!uid_data->no_dual_src)
-    {
-      out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n"
-                "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n",
-                use_framebuffer_fetch ? "real_ocol0" : "ocol0");
-    }
-    else
-    {
-      // Metal doesn't support a single unified variable for both input and output,
-      // so when using framebuffer fetch, we declare the input separately below.
-      out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 {};\n",
-                use_framebuffer_fetch ? "real_ocol0" : "ocol0");
-    }
-
-    if (use_framebuffer_fetch)
-    {
-      // Subpass inputs will be converted to framebuffer fetch by SPIRV-Cross.
-      out.Write("INPUT_ATTACHMENT_BINDING(0, 0, 0) uniform subpassInput in_ocol0;\n");
-    }
-  }
-  else
-#endif
-  {
-    if (use_framebuffer_fetch)
-    {
-      out.Write("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n");
-    }
-    else
-    {
-      out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out {} ocol0;\n",
-                uid_data->uint_output ? "uvec4" : "vec4");
-    }
-
-    if (!uid_data->no_dual_src)
-    {
-      out.Write("{} out {} ocol1;\n", "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1)",
-                uid_data->uint_output ? "uvec4" : "vec4");
-    }
-  }
-
-  if (uid_data->per_pixel_depth)
-    out.Write("#define depth gl_FragDepth\n");
-
-  if (host_config.backend_geometry_shaders)
-  {
-    out.Write("VARYING_LOCATION(0) in VertexData {{\n");
-    GenerateVSOutputMembers(out, api_type, uid_data->genMode_numtexgens, host_config,
-                            GetInterpolationQualifier(msaa, ssaa, true, true), ShaderStage::Pixel);
-
-    out.Write("}};\n");
-    if (stereo && !host_config.backend_gl_layer_in_fs)
-      out.Write("flat in int layer;");
-  }
-  else
-  {
-    // Let's set up attributes
-    u32 counter = 0;
-    out.Write("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++,
-              GetInterpolationQualifier(msaa, ssaa));
-    out.Write("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++,
-              GetInterpolationQualifier(msaa, ssaa));
-    for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i)
-    {
-      out.Write("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++,
-                GetInterpolationQualifier(msaa, ssaa), i);
-    }
-    if (!host_config.fast_depth_calc)
-    {
-      out.Write("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++,
-                GetInterpolationQualifier(msaa, ssaa));
-    }
-    if (per_pixel_lighting)
-    {
-      out.Write("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++,
-                GetInterpolationQualifier(msaa, ssaa));
-      out.Write("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++,
-                GetInterpolationQualifier(msaa, ssaa));
-    }
-  }
-
-  out.Write("void main()\n{{\n");
-  out.Write("\tfloat4 rawpos = gl_FragCoord;\n");
-
-  bool has_custom_shaders = false;
-  if (std::any_of(custom_details.shaders.begin(), custom_details.shaders.end(),
-                  [](const std::optional<CustomPixelShader>& ps) { return ps.has_value(); }))
-  {
-    WriteCustomShaderStructImpl(&out, numStages, per_pixel_lighting, uid_data);
-    has_custom_shaders = true;
-  }
-
-  if (use_framebuffer_fetch)
-  {
-    // Store off a copy of the initial framebuffer value.
-    //
-    // If FB_FETCH_VALUE isn't defined (i.e. no special keyword for fetching from the
-    // framebuffer), we read from real_ocol0.
-    out.Write("#ifdef FB_FETCH_VALUE\n"
-              "\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n"
-              "#else\n"
-              "\tfloat4 initial_ocol0 = real_ocol0;\n"
-              "#endif\n");
-
-    // QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an
-    // intermediate value with multiple reads & modifications, so we pull out the "real" output
-    // value above and use a temporary for calculations, then set the output value once at the
-    // end of the shader.
-    out.Write("\tfloat4 ocol0;\n");
-  }
-
-  if (uid_data->blend_enable)
-  {
-    out.Write("\tfloat4 ocol1;\n");
-  }
-
-  if (host_config.backend_geometry_shaders && stereo)
-  {
-    if (host_config.backend_gl_layer_in_fs)
-      out.Write("\tint layer = gl_Layer;\n");
-  }
-  else
-  {
-    out.Write("\tint layer = 0;\n");
-  }
-
-  out.Write("\tint4 c0 = " I_COLORS "[1], c1 = " I_COLORS "[2], c2 = " I_COLORS
-            "[3], prev = " I_COLORS "[0];\n"
-            "\tint4 rastemp = int4(0, 0, 0, 0), textemp = int4(0, 0, 0, 0), konsttemp = int4(0, 0, "
-            "0, 0);\n"
-            "\tint3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n"
-            "\tint alphabump=0;\n"
-            "\tint3 tevcoord=int3(0, 0, 0);\n"
-            "\tint2 wrappedcoord=int2(0,0), tempcoord=int2(0,0);\n"
-            "\tint4 "
-            "tevin_a=int4(0,0,0,0),tevin_b=int4(0,0,0,0),tevin_c=int4(0,0,0,0),tevin_d=int4(0,0,0,"
-            "0);\n\n");  // tev combiner inputs
-
-  // On GLSL, input variables must not be assigned to.
-  // This is why we declare these variables locally instead.
-  out.Write("\tfloat4 col0 = colors_0;\n"
-            "\tfloat4 col1 = colors_1;\n");
-
-  if (per_pixel_lighting)
-  {
-    out.Write("\tfloat3 _normal = normalize(Normal.xyz);\n\n"
-              "\tfloat3 pos = WorldPos;\n");
-
-    out.Write("\tint4 lacc;\n"
-              "\tfloat3 ldir, h, cosAttn, distAttn;\n"
-              "\tfloat dist, dist2, attn;\n");
-
-    // TODO: Our current constant usage code isn't able to handle more than one buffer.
-    //       So we can't mark the VS constant as used here. But keep them here as reference.
-    // out.SetConstantsUsed(C_PLIGHT_COLORS, C_PLIGHT_COLORS+7); // TODO: Can be optimized further
-    // out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+31); // TODO: Can be optimized further
-    // out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3);
-    GenerateLightingShaderCode(out, uid_data->lighting, "colors_", "col");
-    // The number of colors available to TEV is determined by numColorChans.
-    // Normally this is performed in the vertex shader after lighting, but with per-pixel lighting,
-    // we need to perform it here.  (It needs to be done after lighting, as what was originally
-    // black might become a different color after lighting).
-    if (uid_data->numColorChans == 0)
-      out.Write("col0 = float4(0.0, 0.0, 0.0, 0.0);\n");
-    if (uid_data->numColorChans <= 1)
-      out.Write("col1 = float4(0.0, 0.0, 0.0, 0.0);\n");
-  }
-
-  if (uid_data->genMode_numtexgens == 0)
-  {
-    // TODO: This is a hack to ensure that shaders still compile when setting out of bounds tex
-    // coord indices to 0.  Ideally, it shouldn't exist at all, but the exact behavior hasn't been
-    // tested.
-    out.Write("\tint2 fixpoint_uv0 = int2(0, 0);\n\n");
-  }
-  else
-  {
-    out.SetConstantsUsed(C_TEXDIMS, C_TEXDIMS + uid_data->genMode_numtexgens - 1);
-    for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i)
-    {
-      out.Write("\tint2 fixpoint_uv{} = int2(", i);
-      out.Write("(tex{}.z == 0.0 ? tex{}.xy : tex{}.xy / tex{}.z)", i, i, i, i);
-      out.Write(" * float2(" I_TEXDIMS "[{}].zw * 128));\n", i);
-      // TODO: S24 overflows here?
-    }
-  }
-
-  for (u32 i = 0; i < uid_data->genMode_numindstages; ++i)
-  {
-    if ((uid_data->nIndirectStagesUsed & (1U << i)) != 0)
-    {
-      u32 texcoord = uid_data->GetTevindirefCoord(i);
-      const u32 texmap = uid_data->GetTevindirefMap(i);
-
-      // Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does
-      // not exist), then tex coord 0 is used (though sometimes glitchy effects happen on console).
-      // This affects the Mario portrait in Luigi's Mansion, where the developers forgot to set
-      // the number of tex gens to 2 (bug 11462).
-      if (texcoord >= uid_data->genMode_numtexgens)
-        texcoord = 0;
-
-      out.SetConstantsUsed(C_INDTEXSCALE + i / 2, C_INDTEXSCALE + i / 2);
-      out.Write("\ttempcoord = fixpoint_uv{} >> " I_INDTEXSCALE "[{}].{};\n", texcoord, i / 2,
-                (i & 1) ? "zw" : "xy");
-
-      out.Write("\tint3 iindtex{0} = sampleTextureWrapper({1}u, tempcoord, layer).abg;\n", i,
-                texmap);
-    }
-  }
-
-  for (u32 i = 0; i < numStages; i++)
-  {
-    // Build the equation for this stage
-    WriteStage(out, uid_data, i, api_type, stereo, has_custom_shaders);
-  }
-
-  {
-    // The results of the last texenv stage are put onto the screen,
-    // regardless of the used destination register
-    TevStageCombiner::ColorCombiner last_cc;
-    TevStageCombiner::AlphaCombiner last_ac;
-    last_cc.hex = uid_data->stagehash[uid_data->genMode_numtevstages].cc;
-    last_ac.hex = uid_data->stagehash[uid_data->genMode_numtevstages].ac;
-    if (last_cc.dest != TevOutput::Prev)
-    {
-      out.Write("\tprev.rgb = {};\n", tev_c_output_table[last_cc.dest]);
-    }
-    if (last_ac.dest != TevOutput::Prev)
-    {
-      out.Write("\tprev.a = {};\n", tev_a_output_table[last_ac.dest]);
-    }
-  }
-  out.Write("\tprev = prev & 255;\n");
-
-  // NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled
-  // (in this case we need to write a depth value if depth test passes regardless of the alpha
-  // testing result)
-  if (uid_data->Pretest == AlphaTestResult::Undetermined ||
-      (uid_data->Pretest == AlphaTestResult::Fail && uid_data->ztest == EmulatedZ::Late))
-  {
-    WriteAlphaTest(out, uid_data, api_type, uid_data->per_pixel_depth,
-                   !uid_data->no_dual_src || uid_data->blend_enable);
-  }
-
-  // This situation is important for Mario Kart Wii's menus (they will render incorrectly if the
-  // alpha test for the FMV in the background fails, since they depend on depth for drawing a yellow
-  // border) and Fortune Street's gameplay (where a rectangle with an alpha value of 1 is drawn over
-  // the center of the screen several times, but those rectangles shouldn't be visible).
-  // Blending seems to result in no changes to the output with an alpha of 1, even if the input
-  // color is white.
-  // TODO: Investigate this further: we might be handling blending incorrectly in general (though
-  // there might not be any good way of changing blending behavior)
-  out.Write("\t// Hardware testing indicates that an alpha of 1 can pass an alpha test,\n"
-            "\t// but doesn't do anything in blending\n"
-            "\tif (prev.a == 1) prev.a = 0;\n");
-
-  if (uid_data->zfreeze)
-  {
-    out.SetConstantsUsed(C_ZSLOPE, C_ZSLOPE);
-    out.SetConstantsUsed(C_EFBSCALE, C_EFBSCALE);
-
-    out.Write("\tfloat2 screenpos = rawpos.xy * " I_EFBSCALE ".xy;\n");
-
-    // Opengl has reversed vertical screenspace coordinates
-    if (api_type == APIType::OpenGL)
-      out.Write("\tscreenpos.y = {}.0 - screenpos.y;\n", EFB_HEIGHT);
-
-    out.Write("\tint zCoord = int(" I_ZSLOPE ".z + " I_ZSLOPE ".x * screenpos.x + " I_ZSLOPE
-              ".y * screenpos.y);\n");
-  }
-  else if (!host_config.fast_depth_calc)
-  {
-    // FastDepth means to trust the depth generated in perspective division.
-    // It should be correct, but it seems not to be as accurate as required. TODO: Find out why!
-    // For disabled FastDepth we just calculate the depth value again.
-    // The performance impact of this additional calculation doesn't matter, but it prevents
-    // the host GPU driver from performing any early depth test optimizations.
-    out.SetConstantsUsed(C_ZBIAS + 1, C_ZBIAS + 1);
-    // the screen space depth value = far z + (clip z / clip w) * z range
-    out.Write("\tint zCoord = " I_ZBIAS "[1].x + int((clipPos.z / clipPos.w) * float(" I_ZBIAS
-              "[1].y));\n");
-  }
-  else
-  {
-    if (!host_config.backend_reversed_depth_range)
-      out.Write("\tint zCoord = int((1.0 - rawpos.z) * 16777216.0);\n");
-    else
-      out.Write("\tint zCoord = int(rawpos.z * 16777216.0);\n");
-  }
-  out.Write("\tzCoord = clamp(zCoord, 0, 0xFFFFFF);\n");
-
-  // depth texture can safely be ignored if the result won't be written to the depth buffer
-  // (early_ztest) and isn't used for fog either
-  const bool skip_ztexture = !uid_data->per_pixel_depth && uid_data->fog_fsel == FogType::Off;
-
-  // Note: z-textures are not written to depth buffer if early depth test is used
-  const bool early_ztest = uid_data->ztest == EmulatedZ::Early ||
-                           uid_data->ztest == EmulatedZ::EarlyWithFBFetch ||
-                           uid_data->ztest == EmulatedZ::EarlyWithZComplocHack;
-  if (uid_data->per_pixel_depth && early_ztest)
-  {
-    if (!host_config.backend_reversed_depth_range)
-      out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n");
-    else
-      out.Write("\tdepth = float(zCoord) / 16777216.0;\n");
-  }
-
-  // Note: depth texture output is only written to depth buffer if late depth test is used
-  // theoretical final depth value is used for fog calculation, though, so we have to emulate
-  // ztextures anyway
-  if (uid_data->ztex_op != ZTexOp::Disabled && !skip_ztexture)
-  {
-    // use the texture input of the last texture stage (textemp), hopefully this has been read and
-    // is in correct format...
-    out.SetConstantsUsed(C_ZBIAS, C_ZBIAS + 1);
-    out.Write("\tzCoord = idot(" I_ZBIAS "[0].xyzw, textemp.xyzw) + " I_ZBIAS "[1].w {};\n",
-              (uid_data->ztex_op == ZTexOp::Add) ? "+ zCoord" : "");
-    out.Write("\tzCoord = zCoord & 0xFFFFFF;\n");
-  }
-
-  if (uid_data->per_pixel_depth && uid_data->ztest == EmulatedZ::Late)
-  {
-    if (!host_config.backend_reversed_depth_range)
-      out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n");
-    else
-      out.Write("\tdepth = float(zCoord) / 16777216.0;\n");
-  }
-
-  // No dithering for RGB8 mode
-  if (uid_data->dither)
-  {
-    // Flipper uses a standard 2x2 Bayer Matrix for 6 bit dithering
-    // Here the matrix is encoded into the two factor constants
-    out.Write("\tint2 dither = int2(rawpos.xy) & 1;\n");
-    out.Write("\tprev.rgb = (prev.rgb - (prev.rgb >> 6)) + abs(dither.y * 3 - dither.x * 2);\n");
-  }
-
-  WriteFog(out, uid_data);
-
-  for (std::size_t i = 0; i < custom_details.shaders.size(); i++)
-  {
-    const auto& shader_details = custom_details.shaders[i];
-
-    if (!shader_details.custom_shader.empty())
-    {
-      out.Write("\t{{\n");
-      out.Write("\t\tcustom_data.final_color = float4(prev.r / 255.0, prev.g / 255.0, prev.b "
-                "/ 255.0, prev.a / 255.0);\n");
-      out.Write("\t\tCustomShaderOutput custom_output = {}_{}(custom_data);\n",
-                CUSTOM_PIXELSHADER_COLOR_FUNC, i);
-      out.Write("\t\tprev = int4(custom_output.main_rt.r * 255, custom_output.main_rt.g * 255, "
-                "custom_output.main_rt.b * 255, custom_output.main_rt.a * 255);\n");
-      out.Write("\t}}\n\n");
-    }
-  }
-
-  if (uid_data->logic_op_enable)
-    WriteLogicOp(out, uid_data);
-  else if (uid_data->emulate_logic_op_with_blend)
-    WriteLogicOpBlend(out, uid_data);
-
-  // Write the color and alpha values to the framebuffer
-  // If using shader blend, we still use the separate alpha
-  const bool use_dual_source = !uid_data->no_dual_src || uid_data->blend_enable;
-  WriteColor(out, api_type, uid_data, use_dual_source);
-
-  if (uid_data->blend_enable)
-    WriteBlend(out, uid_data);
-  else if (use_framebuffer_fetch)
-    out.Write("\treal_ocol0 = ocol0;\n");
-
-  if (uid_data->bounding_box)
-    out.Write("\tUpdateBoundingBox(rawpos.xy);\n");
-
-  out.Write("}}\n");
-
-  return out;
-}
 
 static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n,
                        APIType api_type, bool stereo, bool has_custom_shaders)
@@ -1750,58 +1156,6 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
     out.Write(", -1024, 1023)");
 
   out.Write(";\n");
-
-  if (has_custom_shaders)
-  {
-    // Color input
-    out.Write(
-        "\tcustom_data.tev_stages[{}].input_color[0].value = {} / float3(255.0, 255.0, 255.0);\n",
-        n, tev_c_input_table[cc.a]);
-    out.Write("\tcustom_data.tev_stages[{}].input_color[0].input_type = {};\n", n,
-              tev_c_input_type[cc.a]);
-    out.Write(
-        "\tcustom_data.tev_stages[{}].input_color[1].value = {} / float3(255.0, 255.0, 255.0);\n",
-        n, tev_c_input_table[cc.b]);
-    out.Write("\tcustom_data.tev_stages[{}].input_color[1].input_type = {};\n", n,
-              tev_c_input_type[cc.b]);
-    out.Write(
-        "\tcustom_data.tev_stages[{}].input_color[2].value = {} / float3(255.0, 255.0, 255.0);\n",
-        n, tev_c_input_table[cc.c]);
-    out.Write("\tcustom_data.tev_stages[{}].input_color[2].input_type = {};\n", n,
-              tev_c_input_type[cc.c]);
-    out.Write(
-        "\tcustom_data.tev_stages[{}].input_color[3].value = {} / float3(255.0, 255.0, 255.0);\n",
-        n, tev_c_input_table[cc.d]);
-    out.Write("\tcustom_data.tev_stages[{}].input_color[3].input_type = {};\n", n,
-              tev_c_input_type[cc.d]);
-
-    // Alpha input
-    out.Write("\tcustom_data.tev_stages[{}].input_alpha[0].value = {} / float(255.0);\n", n,
-              tev_a_input_table[ac.a]);
-    out.Write("\tcustom_data.tev_stages[{}].input_alpha[0].input_type = {};\n", n,
-              tev_a_input_type[ac.a]);
-    out.Write("\tcustom_data.tev_stages[{}].input_alpha[1].value = {} / float(255.0);\n", n,
-              tev_a_input_table[ac.b]);
-    out.Write("\tcustom_data.tev_stages[{}].input_alpha[1].input_type = {};\n", n,
-              tev_a_input_type[ac.b]);
-    out.Write("\tcustom_data.tev_stages[{}].input_alpha[2].value = {} / float(255.0);\n", n,
-              tev_a_input_table[ac.c]);
-    out.Write("\tcustom_data.tev_stages[{}].input_alpha[2].input_type = {};\n", n,
-              tev_a_input_type[ac.c]);
-    out.Write("\tcustom_data.tev_stages[{}].input_alpha[3].value = {} / float(255.0);\n", n,
-              tev_a_input_table[ac.d]);
-    out.Write("\tcustom_data.tev_stages[{}].input_alpha[3].input_type = {};\n", n,
-              tev_a_input_type[ac.d]);
-
-    // Texmap
-    out.Write("\tcustom_data.tev_stages[{}].texmap = {}u;\n", n, stage.tevorders_texmap);
-
-    // Output
-    out.Write("\tcustom_data.tev_stages[{}].output_color.rgb = {} / float3(255.0, 255.0, 255.0);\n",
-              n, tev_c_output_table[cc.dest]);
-    out.Write("\tcustom_data.tev_stages[{}].output_color.a = {} / float(255.0);\n", n,
-              tev_a_output_table[ac.dest]);
-  }
 }
 
 static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBias bias, TevOp op,
@@ -1961,10 +1315,57 @@ constexpr Common::EnumMap<const char*, FogType::BackwardsExpSq> tev_fog_funcs_ta
     "\tfog = 1.0 - fog;\n   fog = exp2(-8.0 * fog * fog);\n"  // backward exp2
 };
 
-static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data)
+static void WriteLogicOpBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
 {
+  switch (static_cast<LogicOp>(uid_data->logic_op_mode))
+  {
+  case LogicOp::Clear:
+  case LogicOp::NoOp:
+    out.Write("\tprev = int4(0, 0, 0, 0);\n");
+    break;
+  case LogicOp::Copy:
+    // Do nothing!
+    break;
+  case LogicOp::CopyInverted:
+    out.Write("\tprev ^= 255;\n");
+    break;
+  case LogicOp::Set:
+  case LogicOp::Invert:  // In cooperation with blend
+    out.Write("\tprev = int4(255, 255, 255, 255);\n");
+    break;
+  default:
+    break;
+  }
+}
+
+namespace PixelShader
+{
+void WriteDitherHeader(APIType api_type, const ShaderHostConfig& host_config,
+                       const pixel_shader_uid_data* uid_data, ShaderCode& out)
+{
+  if (uid_data->dither)
+  {
+    out.Write("ivec3 dolphin_calculate_dither(ivec4 prev, ivec4 pos)\n");
+    out.Write("{{\n");
+    // Flipper uses a standard 2x2 Bayer Matrix for 6 bit dithering
+    // Here the matrix is encoded into the two factor constants
+    out.Write("\tint2 dither = int2(pos.xy) & 1;\n");
+    out.Write("\treturn (prev.rgb - (prev.rgb >> 6)) + abs(dither.y * 3 - dither.x * 2);\n");
+    out.Write("}}\n\n");
+  }
+}
+
+void WriteFogHeader(APIType api_type, const ShaderHostConfig& host_config,
+                    const pixel_shader_uid_data* uid_data, ShaderCode& out)
+{
+  out.Write("ivec3 dolphin_calculate_fog(ivec4 color, vec4 pos, int zCoord)\n");
+  out.Write("{{\n");
   if (uid_data->fog_fsel == FogType::Off)
+  {
+    out.Write("\treturn color.rgb;\n");
+    out.Write("}}\n\n");
     return;  // no Fog
+  }
 
   out.SetConstantsUsed(C_FOGCOLOR, C_FOGCOLOR);
   out.SetConstantsUsed(C_FOGI, C_FOGI);
@@ -1992,7 +1393,7 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data)
   if (uid_data->fog_RangeBaseEnabled)
   {
     out.SetConstantsUsed(C_FOGF, C_FOGF);
-    out.Write("\tfloat offset = (2.0 * (rawpos.x / " I_FOGF ".w)) - 1.0 - " I_FOGF ".z;\n"
+    out.Write("\tfloat offset = (2.0 * (pos.x / " I_FOGF ".w)) - 1.0 - " I_FOGF ".z;\n"
               "\tfloat floatindex = clamp(9.0 - abs(offset) * 9.0, 0.0, 9.0);\n"
               "\tuint indexlower = uint(floatindex);\n"
               "\tuint indexupper = indexlower + 1u;\n"
@@ -2016,119 +1417,198 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data)
   }
 
   out.Write("\tint ifog = iround(fog * 256.0);\n");
-  out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n");
+  out.Write("\treturn (color.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n");
+  out.Write("}}\n\n");
 }
 
-static void WriteLogicOp(ShaderCode& out, const pixel_shader_uid_data* uid_data)
+void WriteDepthHeader(APIType api_type, const ShaderHostConfig& host_config,
+                      const pixel_shader_uid_data* uid_data, ShaderCode& out)
+{
+  out.Write("int dolphin_calculate_zcoord(vec4 rawpos, vec4 clipPos, ivec4 last_stage_texmap)\n");
+  out.Write("{{\n");
+
+  if (uid_data->zfreeze)
+  {
+    out.SetConstantsUsed(C_ZSLOPE, C_ZSLOPE);
+    out.SetConstantsUsed(C_EFBSCALE, C_EFBSCALE);
+
+    out.Write("\tvec2 screenpos = rawpos.xy * " I_EFBSCALE ".xy;\n");
+
+    // Opengl has reversed vertical screenspace coordinates
+    if (api_type == APIType::OpenGL)
+      out.Write("\tscreenpos.y = {}.0 - screenpos.y;\n", EFB_HEIGHT);
+
+    out.Write("\tint zCoord = int(" I_ZSLOPE ".z + " I_ZSLOPE ".x * screenpos.x + " I_ZSLOPE
+              ".y * screenpos.y);\n");
+  }
+  else if (!host_config.fast_depth_calc)
+  {
+    // FastDepth means to trust the depth generated in perspective division.
+    // It should be correct, but it seems not to be as accurate as required. TODO: Find out why!
+    // For disabled FastDepth we just calculate the depth value again.
+    // The performance impact of this additional calculation doesn't matter, but it prevents
+    // the host GPU driver from performing any early depth test optimizations.
+    out.SetConstantsUsed(C_ZBIAS + 1, C_ZBIAS + 1);
+    // the screen space depth value = far z + (clip z / clip w) * z range
+    out.Write("\tint zCoord = " I_ZBIAS "[1].x + int((clipPos.z / clipPos.w) * float(" I_ZBIAS
+              "[1].y));\n");
+  }
+  else
+  {
+    if (!host_config.backend_reversed_depth_range)
+      out.Write("\tint zCoord = int((1.0 - rawpos.z) * 16777216.0);\n");
+    else
+      out.Write("\tint zCoord = int(rawpos.z * 16777216.0);\n");
+  }
+  out.Write("\tzCoord = clamp(zCoord, 0, 0xFFFFFF);\n");
+
+  // depth texture can safely be ignored if the result won't be written to the depth buffer
+  // (early_ztest) and isn't used for fog either
+  const bool skip_ztexture = !uid_data->per_pixel_depth && uid_data->fog_fsel == FogType::Off;
+
+  // Note: depth texture output is only written to depth buffer if late depth test is used
+  // theoretical final depth value is used for fog calculation, though, so we have to emulate
+  // ztextures anyway
+  if (uid_data->ztex_op != ZTexOp::Disabled && !skip_ztexture)
+  {
+    // use the texture input of the last texture stage, hopefully this has been read and
+    // is in correct format...
+    out.SetConstantsUsed(C_ZBIAS, C_ZBIAS + 1);
+    out.Write("\tzCoord = idot(" I_ZBIAS "[0].xyzw, last_stage_texmap.xyzw) + " I_ZBIAS
+              "[1].w {};\n",
+              (uid_data->ztex_op == ZTexOp::Add) ? "+ zCoord" : "");
+    out.Write("\tzCoord = zCoord & 0xFFFFFF;\n");
+  }
+
+  out.Write("\treturn zCoord;\n");
+
+  out.Write("}}\n\n");
+}
+
+void WriteLogicOpHeader(APIType api_type, const ShaderHostConfig& host_config,
+                        const pixel_shader_uid_data* uid_data, ShaderCode& out)
 {
   static constexpr std::array<const char*, 16> logic_op_mode{
-      "int4(0, 0, 0, 0)",          // CLEAR
-      "prev & fb_value",           // AND
-      "prev & ~fb_value",          // AND_REVERSE
-      "prev",                      // COPY
-      "~prev & fb_value",          // AND_INVERTED
-      "fb_value",                  // NOOP
-      "prev ^ fb_value",           // XOR
-      "prev | fb_value",           // OR
-      "~(prev | fb_value)",        // NOR
-      "~(prev ^ fb_value)",        // EQUIV
-      "~fb_value",                 // INVERT
-      "prev | ~fb_value",          // OR_REVERSE
-      "~prev",                     // COPY_INVERTED
-      "~prev | fb_value",          // OR_INVERTED
-      "~(prev & fb_value)",        // NAND
-      "int4(255, 255, 255, 255)",  // SET
+      "ivec4(0, 0, 0, 0)",          // CLEAR
+      "prev & fb_value",            // AND
+      "prev & ~fb_value",           // AND_REVERSE
+      "prev",                       // COPY
+      "~prev & fb_value",           // AND_INVERTED
+      "fb_value",                   // NOOP
+      "prev ^ fb_value",            // XOR
+      "prev | fb_value",            // OR
+      "~(prev | fb_value)",         // NOR
+      "~(prev ^ fb_value)",         // EQUIV
+      "~fb_value",                  // INVERT
+      "prev | ~fb_value",           // OR_REVERSE
+      "~prev",                      // COPY_INVERTED
+      "~prev | fb_value",           // OR_INVERTED
+      "~(prev & fb_value)",         // NAND
+      "ivec4(255, 255, 255, 255)",  // SET
   };
 
-  out.Write("\tint4 fb_value = iround(initial_ocol0 * 255.0);\n");
-  out.Write("\tprev = ({}) & 0xff;\n", logic_op_mode[uid_data->logic_op_mode]);
+  out.Write("ivec4 dolphin_calculate_logicop(vec4 color, vec4 prev)\n");
+  out.Write("{{\n");
+
+  out.Write("\tivec4 fb_value = iround(color * 255.0);\n");
+  out.Write("\treturn ({}) & 0xff;\n", logic_op_mode[uid_data->logic_op_mode]);
+  out.Write("}}\n\n");
 }
 
-static void WriteLogicOpBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
+void WriteColorHeader(APIType api_type, const ShaderHostConfig& host_config,
+                      const pixel_shader_uid_data* uid_data, ShaderCode& out)
 {
-  switch (static_cast<LogicOp>(uid_data->logic_op_mode))
+  if (uid_data->uint_output)
   {
-  case LogicOp::Clear:
-  case LogicOp::NoOp:
-    out.Write("\tprev = int4(0, 0, 0, 0);\n");
-    break;
-  case LogicOp::Copy:
-    // Do nothing!
-    break;
-  case LogicOp::CopyInverted:
-    out.Write("\tprev ^= 255;\n");
-    break;
-  case LogicOp::Set:
-  case LogicOp::Invert:  // In cooperation with blend
-    out.Write("\tprev = int4(255, 255, 255, 255);\n");
-    break;
-  default:
-    break;
+    out.Write("uvec4 dolphin_calculate_final_color0(ivec4 prev)\n");
   }
-}
+  else
+  {
+    out.Write("vec4 dolphin_calculate_final_color0(ivec4 prev)\n");
+  }
+  out.Write("{{\n");
 
-static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data,
-                       bool use_dual_source)
-{
   // Some backends require the shader outputs be uint when writing to a uint render target for logic
   // op.
   if (uid_data->uint_output)
   {
     if (uid_data->rgba6_format)
-      out.Write("\tocol0 = uint4(prev & 0xFC);\n");
+      out.Write("\treturn uint4(prev & 0xFC);\n");
     else
-      out.Write("\tocol0 = uint4(prev);\n");
-    return;
-  }
-
-  if (uid_data->rgba6_format)
-    out.Write("\tocol0.rgb = float3(prev.rgb >> 2) / 63.0;\n");
-  else
-    out.Write("\tocol0.rgb = float3(prev.rgb) / 255.0;\n");
-
-  // Colors will be blended against the 8-bit alpha from ocol1 and
-  // the 6-bit alpha from ocol0 will be written to the framebuffer
-  if (uid_data->useDstAlpha)
-  {
-    out.SetConstantsUsed(C_ALPHA, C_ALPHA);
-    out.Write("\tocol0.a = float(" I_ALPHA ".a >> 2) / 63.0;\n");
-
-    // Use dual-source color blending to perform dst alpha in a single pass
-    if (use_dual_source)
-      out.Write("\tocol1 = float4(0.0, 0.0, 0.0, float(prev.a) / 255.0);\n");
+      out.Write("\treturn uint4(prev);\n");
   }
   else
   {
-    out.Write("\tocol0.a = float(prev.a >> 2) / 63.0;\n");
-    if (use_dual_source)
-      out.Write("\tocol1 = float4(0.0, 0.0, 0.0, float(prev.a) / 255.0);\n");
+    out.Write("\tvec4 result;\n");
+    if (uid_data->rgba6_format)
+      out.Write("\tresult.rgb = float3(prev.rgb >> 2) / 63.0;\n");
+    else
+      out.Write("\tresult.rgb = float3(prev.rgb) / 255.0;\n");
+
+    // Colors will be blended against the 8-bit alpha from ocol1 and
+    // the 6-bit alpha from ocol0 will be written to the framebuffer
+    if (uid_data->useDstAlpha)
+    {
+      out.SetConstantsUsed(C_ALPHA, C_ALPHA);
+      out.Write("\tresult.a = float(" I_ALPHA ".a >> 2) / 63.0;\n");
+    }
+    else
+    {
+      out.Write("\tresult.a = float(prev.a >> 2) / 63.0;\n");
+    }
+    out.Write("\treturn result;\n");
   }
+
+  out.Write("}}\n\n");
+
+  const bool use_dual_source = !uid_data->no_dual_src || uid_data->blend_enable;
+  if (!uid_data->uint_output && use_dual_source)
+  {
+    out.Write("vec4 dolphin_calculate_final_color1(ivec4 prev)\n");
+    out.Write("{{\n");
+
+    // Colors will be blended against the 8-bit alpha from ocol1 and
+    // the 6-bit alpha from ocol0 will be written to the framebuffer
+    if (uid_data->useDstAlpha)
+    {
+      // Use dual-source color blending to perform dst alpha in a single pass
+      out.Write("\treturn vec4(0.0, 0.0, 0.0, float(prev.a) / 255.0);\n");
+    }
+    else
+    {
+      out.Write("\treturn vec4(0.0, 0.0, 0.0, float(prev.a) / 255.0);\n");
+    }
+  }
+
+  out.Write("}}\n\n");
 }
 
-static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
+void WriteBlendHeader(ShaderCode& out, const pixel_shader_uid_data* uid_data)
 {
   if (uid_data->blend_enable)
   {
+    out.Write("vec4 dolphin_calculate_blend(vec4 initial_color, vec4 src_color)\n");
+    out.Write("{{\n");
     using Common::EnumMap;
     static constexpr EnumMap<const char*, SrcBlendFactor::InvDstAlpha> blend_src_factor{
         "float3(0,0,0);",                      // ZERO
         "float3(1,1,1);",                      // ONE
-        "initial_ocol0.rgb;",                  // DSTCLR
-        "float3(1,1,1) - initial_ocol0.rgb;",  // INVDSTCLR
+        "initial_color.rgb;",                  // DSTCLR
+        "float3(1,1,1) - initial_color.rgb;",  // INVDSTCLR
         "src_color.aaa;",                      // SRCALPHA
         "float3(1,1,1) - src_color.aaa;",      // INVSRCALPHA
-        "initial_ocol0.aaa;",                  // DSTALPHA
-        "float3(1,1,1) - initial_ocol0.aaa;",  // INVDSTALPHA
+        "initial_color.aaa;",                  // DSTALPHA
+        "float3(1,1,1) - initial_color.aaa;",  // INVDSTALPHA
     };
     static constexpr EnumMap<const char*, SrcBlendFactor::InvDstAlpha> blend_src_factor_alpha{
         "0.0;",                    // ZERO
         "1.0;",                    // ONE
-        "initial_ocol0.a;",        // DSTCLR
-        "1.0 - initial_ocol0.a;",  // INVDSTCLR
+        "initial_color.a;",        // DSTCLR
+        "1.0 - initial_color.a;",  // INVDSTCLR
         "src_color.a;",            // SRCALPHA
         "1.0 - src_color.a;",      // INVSRCALPHA
-        "initial_ocol0.a;",        // DSTALPHA
-        "1.0 - initial_ocol0.a;",  // INVDSTALPHA
+        "initial_color.a;",        // DSTALPHA
+        "1.0 - initial_color.a;",  // INVDSTALPHA
     };
     static constexpr EnumMap<const char*, DstBlendFactor::InvDstAlpha> blend_dst_factor{
         "float3(0,0,0);",                      // ZERO
@@ -2137,8 +1617,8 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
         "float3(1,1,1) - ocol0.rgb;",          // INVSRCCLR
         "src_color.aaa;",                      // SRCALHA
         "float3(1,1,1) - src_color.aaa;",      // INVSRCALPHA
-        "initial_ocol0.aaa;",                  // DSTALPHA
-        "float3(1,1,1) - initial_ocol0.aaa;",  // INVDSTALPHA
+        "initial_color.aaa;",                  // DSTALPHA
+        "float3(1,1,1) - initial_color.aaa;",  // INVDSTALPHA
     };
     static constexpr EnumMap<const char*, DstBlendFactor::InvDstAlpha> blend_dst_factor_alpha{
         "0.0;",                    // ZERO
@@ -2148,11 +1628,9 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
         "src_color.a;",            // SRCALPHA
         "1.0 - src_color.a;",      // INVSRCALPHA
         "initial_ocol0.a;",        // DSTALPHA
-        "1.0 - initial_ocol0.a;",  // INVDSTALPHA
+        "1.0 - initial_color.a;",  // INVDSTALPHA
     };
-    out.Write("\tfloat4 src_color = {};\n"
-              "\tfloat4 blend_src;",
-              uid_data->useDstAlpha ? "ocol1" : "ocol0");
+    out.Write("\tfloat4 blend_src;");
     out.Write("\tblend_src.rgb = {}\n", blend_src_factor[uid_data->blend_src_factor]);
     out.Write("\tblend_src.a = {}\n", blend_src_factor_alpha[uid_data->blend_src_factor_alpha]);
     out.Write("\tfloat4 blend_dst;\n");
@@ -2162,24 +1640,562 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
     out.Write("\tfloat4 blend_result;\n");
     if (uid_data->blend_subtract)
     {
-      out.Write("\tblend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * "
+      out.Write("\tblend_result.rgb = initial_color.rgb * blend_dst.rgb - ocol0.rgb * "
                 "blend_src.rgb;\n");
     }
     else
     {
       out.Write(
-          "\tblend_result.rgb = initial_ocol0.rgb * blend_dst.rgb + ocol0.rgb * blend_src.rgb;\n");
+          "\tblend_result.rgb = initial_color.rgb * blend_dst.rgb + ocol0.rgb * blend_src.rgb;\n");
     }
 
     if (uid_data->blend_subtract_alpha)
-      out.Write("\tblend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a;\n");
+      out.Write("\tblend_result.a = initial_color.a * blend_dst.a - ocol0.a * blend_src.a;\n");
     else
-      out.Write("\tblend_result.a = initial_ocol0.a * blend_dst.a + ocol0.a * blend_src.a;\n");
+      out.Write("\tblend_result.a = initial_color.a * blend_dst.a + ocol0.a * blend_src.a;\n");
+
+    out.Write("\treturn blend_result;\n");
+    out.Write("}}\n\n");
+  }
+}
+
+void WriteEmulatedFragmentBodyHeader(APIType api_type, const ShaderHostConfig& host_config,
+                                     const pixel_shader_uid_data* uid_data, ShaderCode& out)
+{
+  constexpr std::string_view emulated_fragment_definition =
+      "void dolphin_emulated_fragment(in DolphinFragmentInput frag_input, out "
+      "DolphinFragmentOutput frag_output)";
+  out.Write("{}\n", emulated_fragment_definition);
+  out.Write("{{\n");
+
+  WriteFragmentBody(api_type, host_config, uid_data, out);
+
+  out.Write("}}\n");
+}
+
+void WriteFragmentDefinitions(APIType api_type, const ShaderHostConfig& host_config,
+                              const pixel_shader_uid_data* uid_data, ShaderCode& out,
+                              bool as_comment)
+{
+  out.Write("struct DolphinLightData\n");
+  out.Write("{{\n");
+  out.Write("\tfloat3 position;\n");
+  out.Write("\tfloat3 direction;\n");
+  out.Write("\tfloat3 color;\n");
+  out.Write("\tuint attenuation_type;\n");
+  out.Write("\tfloat4 cosatt;\n");
+  out.Write("\tfloat4 distatt;\n");
+  out.Write("}};\n\n");
+
+  out.Write("struct DolphinFragmentInput\n");
+  out.Write("{{\n");
+  out.Write("\tvec4 color_0;\n");
+  out.Write("\tvec4 color_1;\n");
+  out.Write("\tint layer;\n");
+  out.Write("\tvec3 normal;\n");
+  out.Write("\tvec3 position;\n");
+  for (u32 i = 0; i < uid_data->genMode_numtexgens; i++)
+  {
+    out.Write("\tvec3 tex{};\n", i);
+  }
+  for (u32 i = uid_data->genMode_numtexgens; i < 8; i++)
+  {
+    out.Write("\tvec3 tex{};\n", i);
+  }
+  out.Write("\n");
+
+  out.Write("\tDolphinLightData[8] lights_chan0_color;\n");
+  out.Write("\tDolphinLightData[8] lights_chan0_alpha;\n");
+  out.Write("\tDolphinLightData[8] lights_chan1_color;\n");
+  out.Write("\tDolphinLightData[8] lights_chan1_alpha;\n");
+  out.Write("\tfloat4[2] ambient_lighting;\n");
+  out.Write("\tfloat4[2] base_material;\n");
+  out.Write("\tuint light_chan0_color_count;\n");
+  out.Write("\tuint light_chan0_alpha_count;\n");
+  out.Write("\tuint light_chan1_color_count;\n");
+  out.Write("\tuint light_chan1_alpha_count;\n");
+
+  out.Write("}};\n\n");
+
+  out.Write("struct DolphinFragmentOutput\n");
+  out.Write("{{\n");
+  out.Write("\tivec4 main;\n");
+  out.Write("\tivec4 last_texture;\n");
+  out.Write("}};\n\n");
+
+  // CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE "enum" values
+  out.Write("const uint CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE_NONE = {}u;\n",
+            static_cast<u32>(AttenuationFunc::None));
+  out.Write("const uint CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE_POINT = {}u;\n",
+            static_cast<u32>(AttenuationFunc::Spec));
+  out.Write("const uint CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE_DIR = {}u;\n",
+            static_cast<u32>(AttenuationFunc::Dir));
+  out.Write("const uint CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE_SPOT = {}u;\n",
+            static_cast<u32>(AttenuationFunc::Spot));
+}
+
+void WriteFragmentBody(APIType api_type, const ShaderHostConfig& host_config,
+                       const pixel_shader_uid_data* uid_data, ShaderCode& out)
+{
+  const bool per_pixel_lighting = host_config.per_pixel_lighting;
+  const bool stereo = host_config.stereo;
+  const u32 numStages = uid_data->genMode_numtevstages + 1;
+
+  out.Write("\tvec4 col0 = frag_input.color_0;\n");
+  out.Write("\tvec4 col1 = frag_input.color_1;\n");
+  out.Write("\tint layer = frag_input.layer;\n");
+
+  out.Write("\tint4 c0 = " I_COLORS "[1], c1 = " I_COLORS "[2], c2 = " I_COLORS
+            "[3], prev = " I_COLORS "[0];\n"
+            "\tint4 rastemp = int4(0, 0, 0, 0), textemp = int4(0, 0, 0, 0), konsttemp = int4(0, 0, "
+            "0, 0);\n"
+            "\tint3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n"
+            "\tint alphabump=0;\n"
+            "\tint3 tevcoord=int3(0, 0, 0);\n"
+            "\tint2 wrappedcoord=int2(0,0), tempcoord=int2(0,0);\n"
+            "\tint4 "
+            "tevin_a=int4(0,0,0,0),tevin_b=int4(0,0,0,0),tevin_c=int4(0,0,0,0),tevin_d=int4(0,0,0,"
+            "0);\n\n");  // tev combiner inputs
+
+  if (per_pixel_lighting)
+  {
+    if (uid_data->numColorChans > 0)
+    {
+      out.Write("\tcol0 = dolphin_calculate_lighting_chn0(col0, vec4(frag_input.position, 1), "
+                "frag_input.normal);\n");
+    }
+    else
+    {
+      // The number of colors available to TEV is determined by numColorChans.
+      // We have to provide the fields to match the interface, so set to zero if it's not enabled.
+      out.Write("\tcol0 = vec4(0.0, 0.0, 0.0, 0.0);\n");
+    }
+
+    if (uid_data->numColorChans == 2)
+    {
+      out.Write("\tcol1 = dolphin_calculate_lighting_chn1(col1, vec4(frag_input.position, 1), "
+                "frag_input.normal);\n");
+    }
+    else
+    {
+      // The number of colors available to TEV is determined by numColorChans.
+      // We have to provide the fields to match the interface, so set to zero if it's not enabled.
+      out.Write("\tcol1 = vec4(0.0, 0.0, 0.0, 0.0);\n");
+    }
+  }
+
+  if (uid_data->genMode_numtexgens == 0)
+  {
+    // TODO: This is a hack to ensure that shaders still compile when setting out of bounds tex
+    // coord indices to 0.  Ideally, it shouldn't exist at all, but the exact behavior hasn't been
+    // tested.
+    out.Write("\tint2 fixpoint_uv0 = int2(0, 0);\n\n");
   }
   else
   {
-    out.Write("\tfloat4 blend_result = ocol0;\n");
+    out.SetConstantsUsed(C_TEXDIMS, C_TEXDIMS + uid_data->genMode_numtexgens - 1);
+    for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i)
+    {
+      out.Write("\tint2 fixpoint_uv{} = int2(", i);
+      out.Write("(frag_input.tex{}.z == 0.0 ? frag_input.tex{}.xy : frag_input.tex{}.xy / "
+                "frag_input.tex{}.z)",
+                i, i, i, i);
+      out.Write(" * float2(" I_TEXDIMS "[{}].zw * 128));\n", i);
+      // TODO: S24 overflows here?
+    }
   }
 
-  out.Write("\treal_ocol0 = blend_result;\n");
+  for (u32 i = 0; i < uid_data->genMode_numindstages; ++i)
+  {
+    if ((uid_data->nIndirectStagesUsed & (1U << i)) != 0)
+    {
+      u32 texcoord = uid_data->GetTevindirefCoord(i);
+      const u32 texmap = uid_data->GetTevindirefMap(i);
+
+      // Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord
+      // does not exist), then tex coord 0 is used (though sometimes glitchy effects happen on
+      // console). This affects the Mario portrait in Luigi's Mansion, where the developers forgot
+      // to set the number of tex gens to 2 (bug 11462).
+      if (texcoord >= uid_data->genMode_numtexgens)
+        texcoord = 0;
+
+      out.SetConstantsUsed(C_INDTEXSCALE + i / 2, C_INDTEXSCALE + i / 2);
+      out.Write("\ttempcoord = fixpoint_uv{} >> " I_INDTEXSCALE "[{}].{};\n", texcoord, i / 2,
+                (i & 1) ? "zw" : "xy");
+
+      out.Write("\tint3 iindtex{0} = sampleTextureWrapper({1}u, tempcoord, layer).abg;\n", i,
+                texmap);
+    }
+  }
+
+  for (u32 i = 0; i < numStages; i++)
+  {
+    // Build the equation for this stage
+    WriteStage(out, uid_data, i, api_type, stereo, false);
+  }
+
+  {
+    // The results of the last texenv stage are put onto the screen,
+    // regardless of the used destination register
+    TevStageCombiner::ColorCombiner last_cc;
+    TevStageCombiner::AlphaCombiner last_ac;
+    last_cc.hex = uid_data->stagehash[uid_data->genMode_numtevstages].cc;
+    last_ac.hex = uid_data->stagehash[uid_data->genMode_numtevstages].ac;
+    if (last_cc.dest != TevOutput::Prev)
+    {
+      out.Write("\tprev.rgb = {};\n", tev_c_output_table[last_cc.dest]);
+    }
+    if (last_ac.dest != TevOutput::Prev)
+    {
+      out.Write("\tprev.a = {};\n", tev_a_output_table[last_ac.dest]);
+    }
+  }
+
+  out.Write("\tfrag_output.last_texture = textemp;\n");
+  out.Write("\tfrag_output.main = prev;\n");
 }
+
+ShaderCode WriteFullShader(APIType api_type, const ShaderHostConfig& host_config,
+                           const pixel_shader_uid_data* uid_data, std::string_view custom_pixel,
+                           std::string_view custom_uniforms)
+{
+  ShaderCode out;
+
+  const bool per_pixel_lighting = g_ActiveConfig.bEnablePixelLighting;
+  const bool msaa = host_config.msaa;
+  const bool ssaa = host_config.ssaa;
+  const bool stereo = host_config.stereo;
+  const u32 numStages = uid_data->genMode_numtevstages + 1;
+
+  out.Write("// Pixel Shader for TEV stages\n");
+  out.Write("// {} TEV stages, {} texgens, {} IND stages\n", numStages,
+            uid_data->genMode_numtexgens, uid_data->genMode_numindstages);
+
+  // Stuff that is shared between ubershaders and pixelgen.
+  WriteBitfieldExtractHeader(out, api_type, host_config);
+
+  WritePixelShaderCommonHeader(out, api_type, host_config, uid_data->bounding_box, {});
+
+  if (per_pixel_lighting)
+  {
+    GenerateLightingShaderHeader(out, uid_data->lighting);
+  }
+
+  WriteDitherHeader(api_type, host_config, uid_data, out);
+
+  WriteFogHeader(api_type, host_config, uid_data, out);
+
+  WriteDepthHeader(api_type, host_config, uid_data, out);
+
+  WriteLogicOpHeader(api_type, host_config, uid_data, out);
+
+  WriteColorHeader(api_type, host_config, uid_data, out);
+
+  WriteBlendHeader(out, uid_data);
+
+  out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) "
+            "sampleTexture(texmap, samp[texmap], uv, layer)\n");
+
+  if (uid_data->ztest == EmulatedZ::ForcedEarly)
+  {
+    // Zcomploc (aka early_ztest) is a way to control whether depth test is done before
+    // or after texturing and alpha test. PC graphics APIs used to provide no way to emulate
+    // this feature properly until 2012: Depth tests were always done after alpha testing.
+    // Most importantly, it was not possible to write to the depth buffer without also writing
+    // a color value (unless color writing was disabled altogether).
+
+    // OpenGL 4.2 actually provides two extensions which can force an early z test:
+    //  * ARB_image_load_store has 'layout(early_fragment_tests)' which forces the driver to do z
+    //  and stencil tests early.
+    //  * ARB_conservative_depth has 'layout(depth_unchanged) which signals to the driver that it
+    //  can make optimisations
+    //    which assume the pixel shader won't update the depth buffer.
+
+    // early_fragment_tests is the best option, as it requires the driver to do early-z and defines
+    // early-z exactly as
+    // we expect, with discard causing the shader to exit with only the depth buffer updated.
+
+    // Conservative depth's 'depth_unchanged' only hints to the driver that an early-z optimisation
+    // can be made and
+    // doesn't define what will happen if we discard the fragment. But the way modern graphics
+    // hardware is implemented
+    // means it is not unreasonable to expect the same behaviour as early_fragment_tests.
+    // We can also assume that if a driver has gone out of its way to support conservative depth and
+    // not image_load_store
+    // as required by OpenGL 4.2 that it will be doing the optimisation.
+    // If the driver doesn't actually do an early z optimisation, ZCompLoc will be broken and depth
+    // will only be written
+    // if the alpha test passes.
+
+    // We support Conservative as a fallback, because many drivers based on Mesa haven't implemented
+    // all of the
+    // ARB_image_load_store extension yet.
+
+    // This is a #define which signals whatever early-z method the driver supports.
+    out.Write("FORCE_EARLY_Z; \n");
+  }
+
+  const bool use_framebuffer_fetch = uid_data->blend_enable || uid_data->logic_op_enable ||
+                                     uid_data->ztest == EmulatedZ::EarlyWithFBFetch;
+
+#ifdef __APPLE__
+  // Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK)
+  // if we want to use it.
+  if (api_type == APIType::Vulkan || api_type == APIType::Metal)
+  {
+    if (!uid_data->no_dual_src)
+    {
+      out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n"
+                "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n",
+                use_framebuffer_fetch ? "real_ocol0" : "ocol0");
+    }
+    else
+    {
+      // Metal doesn't support a single unified variable for both input and output,
+      // so when using framebuffer fetch, we declare the input separately below.
+      out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 {};\n",
+                use_framebuffer_fetch ? "real_ocol0" : "ocol0");
+    }
+
+    if (use_framebuffer_fetch)
+    {
+      // Subpass inputs will be converted to framebuffer fetch by SPIRV-Cross.
+      out.Write("INPUT_ATTACHMENT_BINDING(0, 0, 0) uniform subpassInput in_ocol0;\n");
+    }
+  }
+  else
+#endif
+  {
+    if (use_framebuffer_fetch)
+    {
+      out.Write("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n");
+    }
+    else
+    {
+      out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out {} ocol0;\n",
+                uid_data->uint_output ? "uvec4" : "vec4");
+    }
+
+    if (!uid_data->no_dual_src)
+    {
+      out.Write("{} out {} ocol1;\n", "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1)",
+                uid_data->uint_output ? "uvec4" : "vec4");
+    }
+  }
+
+  if (uid_data->per_pixel_depth)
+    out.Write("#define depth gl_FragDepth\n");
+
+  if (host_config.backend_geometry_shaders)
+  {
+    out.Write("VARYING_LOCATION(0) in VertexData {{\n");
+    GenerateVSOutputMembers(out, api_type, uid_data->genMode_numtexgens, host_config,
+                            GetInterpolationQualifier(msaa, ssaa, true, true), ShaderStage::Pixel);
+
+    out.Write("}};\n");
+    if (stereo && !host_config.backend_gl_layer_in_fs)
+      out.Write("flat in int layer;");
+  }
+  else
+  {
+    // Let's set up attributes
+    u32 counter = 0;
+    out.Write("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++,
+              GetInterpolationQualifier(msaa, ssaa));
+    out.Write("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++,
+              GetInterpolationQualifier(msaa, ssaa));
+    for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i)
+    {
+      out.Write("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++,
+                GetInterpolationQualifier(msaa, ssaa), i);
+    }
+    if (!host_config.fast_depth_calc)
+    {
+      out.Write("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++,
+                GetInterpolationQualifier(msaa, ssaa));
+    }
+    if (per_pixel_lighting)
+    {
+      out.Write("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++,
+                GetInterpolationQualifier(msaa, ssaa));
+      out.Write("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++,
+                GetInterpolationQualifier(msaa, ssaa));
+    }
+  }
+
+  WriteFragmentDefinitions(api_type, host_config, uid_data, out, false);
+
+  if (!custom_uniforms.empty())
+  {
+    out.Write("UBO_BINDING(std140, 3) uniform CustomShaderBlock {{\n");
+    out.Write("{}", custom_uniforms);
+    out.Write("}} custom_uniforms;\n");
+  }
+
+  WriteEmulatedFragmentBodyHeader(api_type, host_config, uid_data, out);
+
+  if (custom_pixel.empty())
+  {
+    out.Write("{}\n", fragment_definition);
+    out.Write("{{\n");
+
+    out.Write("\tdolphin_emulated_fragment(frag_input, frag_output);\n");
+
+    out.Write("}}\n");
+  }
+  else
+  {
+    out.Write("{}\n", custom_pixel);
+  }
+
+  out.Write("void main()\n{{\n");
+  out.Write("\tfloat4 rawpos = gl_FragCoord;\n");
+
+  if (use_framebuffer_fetch)
+  {
+    // Store off a copy of the initial framebuffer value.
+    //
+    // If FB_FETCH_VALUE isn't defined (i.e. no special keyword for fetching from the
+    // framebuffer), we read from real_ocol0.
+    out.Write("#ifdef FB_FETCH_VALUE\n"
+              "\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n"
+              "#else\n"
+              "\tfloat4 initial_ocol0 = real_ocol0;\n"
+              "#endif\n");
+
+    // QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an
+    // intermediate value with multiple reads & modifications, so we pull out the "real" output
+    // value above and use a temporary for calculations, then set the output value once at the
+    // end of the shader.
+    out.Write("\tfloat4 ocol0;\n");
+  }
+
+  if (uid_data->blend_enable)
+  {
+    out.Write("\tfloat4 ocol1;\n");
+  }
+
+  if (host_config.backend_geometry_shaders && stereo)
+  {
+    if (host_config.backend_gl_layer_in_fs)
+      out.Write("\tint layer = gl_Layer;\n");
+  }
+  else
+  {
+    out.Write("\tint layer = 0;\n");
+  }
+
+  out.Write("\tDolphinFragmentInput frag_input;\n");
+  out.Write("\tfrag_input.color_0 = colors_0;\n");
+  out.Write("\tfrag_input.color_1 = colors_1;\n");
+  out.Write("\tfrag_input.layer = layer;\n");
+  if (per_pixel_lighting)
+  {
+    out.Write("\tfrag_input.normal = normalize(Normal);\n");
+    out.Write("\tfrag_input.position = WorldPos;\n");
+  }
+  else
+  {
+    out.Write("\tfrag_input.normal = vec3(0, 0, 0);\n");
+    out.Write("\tfrag_input.position = vec3(0, 0, 0);\n");
+  }
+  for (u32 i = 0; i < uid_data->genMode_numtexgens; i++)
+  {
+    out.Write("\tfrag_input.tex{0} = tex{0};\n", i);
+  }
+
+  if (!custom_pixel.empty())
+    GenerateCustomLighting(&out, uid_data->lighting);
+
+  out.Write("\tDolphinFragmentOutput frag_output;\n");
+  out.Write("\tfragment(frag_input, frag_output);\n");
+  out.Write("\tivec4 prev = frag_output.main & 255;\n");
+
+  // NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled
+  // (in this case we need to write a depth value if depth test passes regardless of the alpha
+  // testing result)
+  if (uid_data->Pretest == AlphaTestResult::Undetermined ||
+      (uid_data->Pretest == AlphaTestResult::Fail && uid_data->ztest == EmulatedZ::Late))
+  {
+    WriteAlphaTest(out, uid_data, api_type, uid_data->per_pixel_depth,
+                   !uid_data->no_dual_src || uid_data->blend_enable);
+  }
+
+  // This situation is important for Mario Kart Wii's menus (they will render incorrectly if the
+  // alpha test for the FMV in the background fails, since they depend on depth for drawing a yellow
+  // border) and Fortune Street's gameplay (where a rectangle with an alpha value of 1 is drawn over
+  // the center of the screen several times, but those rectangles shouldn't be visible).
+  // Blending seems to result in no changes to the output with an alpha of 1, even if the input
+  // color is white.
+  // TODO: Investigate this further: we might be handling blending incorrectly in general (though
+  // there might not be any good way of changing blending behavior)
+  out.Write("\t// Hardware testing indicates that an alpha of 1 can pass an alpha test,\n"
+            "\t// but doesn't do anything in blending\n"
+            "\tif (prev.a == 1) prev.a = 0;\n");
+
+  const bool write_depth =
+      uid_data->ztest == EmulatedZ::Early || uid_data->ztest == EmulatedZ::EarlyWithFBFetch ||
+      uid_data->ztest == EmulatedZ::EarlyWithZComplocHack || uid_data->ztest == EmulatedZ::Late;
+  const bool needs_depth = uid_data->per_pixel_depth && write_depth;
+  const bool needs_zcoord = needs_depth || uid_data->fog_fsel != FogType::Off;
+  if (needs_zcoord)
+  {
+    if (!host_config.fast_depth_calc)
+    {
+      out.Write(
+          "\tint zCoord = dolphin_calculate_zcoord(rawpos, clipPos, frag_output.last_texture);\n");
+    }
+    else
+    {
+      out.Write("\tint zCoord = dolphin_calculate_zcoord(rawpos, vec4(0, 0, 0, 0), "
+                "frag_output.last_texture);\n");
+    }
+  }
+
+  if (needs_depth)
+  {
+    if (!host_config.backend_reversed_depth_range)
+      out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n");
+    else
+      out.Write("\tdepth = float(zCoord) / 16777216.0;\n");
+  }
+
+  // No dithering for RGB8 mode
+  if (uid_data->dither)
+    out.Write("\tprev.rgb = dolphin_calculate_dither(rawpos, prev);\n");
+
+  if (uid_data->fog_fsel != FogType::Off)
+    out.Write("\tprev.rgb = dolphin_calculate_fog(prev, rawpos, zCoord);\n");
+
+  if (uid_data->logic_op_enable)
+    out.Write("\tprev = dolphin_calculate_logicop(initial_ocol0, prev);\n");
+  else if (uid_data->emulate_logic_op_with_blend)
+    WriteLogicOpBlend(out, uid_data);
+
+  // Write the color and alpha values to the framebuffer
+  // If using shader blend, we still use the separate alpha
+  out.Write("\tocol0 = dolphin_calculate_final_color0(prev);\n");
+
+  const bool use_dual_source = !uid_data->no_dual_src || uid_data->blend_enable;
+  if (use_dual_source)
+    out.Write("\tocol1 = dolphin_calculate_final_color1(prev);\n");
+
+  if (uid_data->blend_enable)
+  {
+    if (uid_data->useDstAlpha)
+      out.Write("\tocol0 = dolphin_calculate_blend(initial_ocol0, ocol1);\n");
+    else
+      out.Write("\tocol0 = dolphin_calculate_blend(initial_ocol0, ocol0);\n");
+  }
+
+  if (use_framebuffer_fetch)
+    out.Write("\treal_ocol0 = ocol0;\n");
+
+  if (uid_data->bounding_box)
+    out.Write("\tUpdateBoundingBox(rawpos.xy);\n");
+
+  out.Write("}}\n");
+
+  return out;
+}
+}  // namespace PixelShader
diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h
index e5dd43d754..ce41a7e841 100644
--- a/Source/Core/VideoCommon/PixelShaderGen.h
+++ b/Source/Core/VideoCommon/PixelShaderGen.h
@@ -158,15 +158,24 @@ struct pixel_shader_uid_data
 
 using PixelShaderUid = ShaderUid<pixel_shader_uid_data>;
 
-void WriteCustomShaderStructImpl(ShaderCode* out, u32 num_stages, bool per_pixel_lighting,
-                                 const pixel_shader_uid_data* uid_data);
-
-ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& host_config,
-                                   const pixel_shader_uid_data* uid_data,
-                                   const CustomPixelShaderContents& custom_details);
 void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type,
                                   const ShaderHostConfig& host_config, bool bounding_box,
                                   const CustomPixelShaderContents& custom_details);
 void ClearUnusedPixelShaderUidBits(APIType api_type, const ShaderHostConfig& host_config,
                                    PixelShaderUid* uid);
 PixelShaderUid GetPixelShaderUid();
+
+namespace PixelShader
+{
+constexpr std::string_view fragment_definition =
+    "void fragment(in DolphinFragmentInput frag_input, out DolphinFragmentOutput frag_output)";
+
+void WriteFragmentDefinitions(APIType api_type, const ShaderHostConfig& host_config,
+                              const pixel_shader_uid_data* uid_data, ShaderCode& out,
+                              bool as_comment);
+void WriteFragmentBody(APIType api_type, const ShaderHostConfig& host_config,
+                       const pixel_shader_uid_data* uid_data, ShaderCode& out);
+ShaderCode WriteFullShader(APIType api_type, const ShaderHostConfig& host_config,
+                           const pixel_shader_uid_data* uid_data, std::string_view custom_pixel,
+                           std::string_view custom_uniforms);
+}  // namespace PixelShader
diff --git a/Source/Core/VideoCommon/ShaderCache.cpp b/Source/Core/VideoCommon/ShaderCache.cpp
index 587ee3cc7f..3438d2fdf3 100644
--- a/Source/Core/VideoCommon/ShaderCache.cpp
+++ b/Source/Core/VideoCommon/ShaderCache.cpp
@@ -433,7 +433,7 @@ void ShaderCache::CompileMissingPipelines()
 std::unique_ptr<AbstractShader> ShaderCache::CompileVertexShader(const VertexShaderUid& uid) const
 {
   const ShaderCode source_code =
-      GenerateVertexShaderCode(m_api_type, m_host_config, uid.GetUidData());
+      VertexShader::WriteFullShader(m_api_type, m_host_config, uid.GetUidData(), "", "");
   return g_gfx->CreateShaderFromSource(ShaderStage::Vertex, source_code.GetBuffer());
 }
 
@@ -449,7 +449,7 @@ ShaderCache::CompileVertexUberShader(const UberShader::VertexShaderUid& uid) con
 std::unique_ptr<AbstractShader> ShaderCache::CompilePixelShader(const PixelShaderUid& uid) const
 {
   const ShaderCode source_code =
-      GeneratePixelShaderCode(m_api_type, m_host_config, uid.GetUidData(), {});
+      PixelShader::WriteFullShader(m_api_type, m_host_config, uid.GetUidData(), "", "");
   return g_gfx->CreateShaderFromSource(ShaderStage::Pixel, source_code.GetBuffer());
 }
 
diff --git a/Source/Core/VideoCommon/ShaderGenCommon.cpp b/Source/Core/VideoCommon/ShaderGenCommon.cpp
index d132847f14..83bc4e6201 100644
--- a/Source/Core/VideoCommon/ShaderGenCommon.cpp
+++ b/Source/Core/VideoCommon/ShaderGenCommon.cpp
@@ -363,95 +363,3 @@ const char* GetInterpolationQualifier(bool msaa, bool ssaa, bool in_glsl_interfa
       return "sample";
   }
 }
-
-void WriteCustomShaderStructDef(ShaderCode* out, u32 numtexgens)
-{
-  // Bump this when there are breaking changes to the API
-  out->Write("#define CUSTOM_SHADER_API_VERSION 1;\n");
-
-  // CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE "enum" values
-  out->Write("const uint CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE_NONE = {}u;\n",
-             static_cast<u32>(AttenuationFunc::None));
-  out->Write("const uint CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE_POINT = {}u;\n",
-             static_cast<u32>(AttenuationFunc::Spec));
-  out->Write("const uint CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE_DIR = {}u;\n",
-             static_cast<u32>(AttenuationFunc::Dir));
-  out->Write("const uint CUSTOM_SHADER_LIGHTING_ATTENUATION_TYPE_SPOT = {}u;\n",
-             static_cast<u32>(AttenuationFunc::Spot));
-
-  out->Write("struct CustomShaderOutput\n");
-  out->Write("{{\n");
-  out->Write("\tfloat4 main_rt;\n");
-  out->Write("}};\n\n");
-
-  out->Write("struct CustomShaderLightData\n");
-  out->Write("{{\n");
-  out->Write("\tfloat3 position;\n");
-  out->Write("\tfloat3 direction;\n");
-  out->Write("\tfloat3 color;\n");
-  out->Write("\tuint attenuation_type;\n");
-  out->Write("\tfloat4 cosatt;\n");
-  out->Write("\tfloat4 distatt;\n");
-  out->Write("}};\n\n");
-
-  // CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE "enum" values
-  out->Write("const uint CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_PREV = 0u;\n");
-  out->Write("const uint CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR = 1u;\n");
-  out->Write("const uint CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_TEX = 2u;\n");
-  out->Write("const uint CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_RAS = 3u;\n");
-  out->Write("const uint CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_KONST = 4u;\n");
-  out->Write("const uint CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_NUMERIC = 5u;\n");
-  out->Write("const uint CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_UNUSED = 6u;\n");
-
-  out->Write("struct CustomShaderTevStageInputColor\n");
-  out->Write("{{\n");
-  out->Write("\tuint input_type;\n");
-  out->Write("\tfloat3 value;\n");
-  out->Write("}};\n\n");
-
-  out->Write("struct CustomShaderTevStageInputAlpha\n");
-  out->Write("{{\n");
-  out->Write("\tuint input_type;\n");
-  out->Write("\tfloat value;\n");
-  out->Write("}};\n\n");
-
-  out->Write("struct CustomShaderTevStage\n");
-  out->Write("{{\n");
-  out->Write("\tCustomShaderTevStageInputColor[4] input_color;\n");
-  out->Write("\tCustomShaderTevStageInputAlpha[4] input_alpha;\n");
-  out->Write("\tuint texmap;\n");
-  out->Write("\tfloat4 output_color;\n");
-  out->Write("}};\n\n");
-
-  // Custom structure for data we pass to custom shader hooks
-  out->Write("struct CustomShaderData\n");
-  out->Write("{{\n");
-  out->Write("\tfloat3 position;\n");
-  out->Write("\tfloat3 normal;\n");
-  if (numtexgens == 0)
-  {
-    // Cheat so shaders compile
-    out->Write("\tfloat3[1] texcoord;\n");
-  }
-  else
-  {
-    out->Write("\tfloat3[{}] texcoord;\n", numtexgens);
-  }
-  out->Write("\tuint texcoord_count;\n");
-  out->Write("\tuint[8] texmap_to_texcoord_index;\n");
-  out->Write("\tCustomShaderLightData[8] lights_chan0_color;\n");
-  out->Write("\tCustomShaderLightData[8] lights_chan0_alpha;\n");
-  out->Write("\tCustomShaderLightData[8] lights_chan1_color;\n");
-  out->Write("\tCustomShaderLightData[8] lights_chan1_alpha;\n");
-  out->Write("\tfloat4[2] ambient_lighting;\n");
-  out->Write("\tfloat4[2] base_material;\n");
-  out->Write("\tuint light_chan0_color_count;\n");
-  out->Write("\tuint light_chan0_alpha_count;\n");
-  out->Write("\tuint light_chan1_color_count;\n");
-  out->Write("\tuint light_chan1_alpha_count;\n");
-  out->Write("\tCustomShaderTevStage[16] tev_stages;\n");
-  out->Write("\tuint tev_stage_count;\n");
-  out->Write("\tfloat4 final_color;\n");
-  out->Write("\tuint time_ms;\n");
-  out->Write("}};\n\n");
-}
diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h
index 4723cbfc79..7d2c80f086 100644
--- a/Source/Core/VideoCommon/ShaderGenCommon.h
+++ b/Source/Core/VideoCommon/ShaderGenCommon.h
@@ -345,5 +345,3 @@ struct CustomPixelShaderContents
 
   bool operator==(const CustomPixelShaderContents& other) const = default;
 };
-
-void WriteCustomShaderStructDef(ShaderCode* out, u32 numtexgens);
diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp
index ca8f42ec8f..dd03c883ae 100644
--- a/Source/Core/VideoCommon/UberShaderPixel.cpp
+++ b/Source/Core/VideoCommon/UberShaderPixel.cpp
@@ -17,263 +17,6 @@
 
 namespace UberShader
 {
-namespace
-{
-void WriteCustomShaderStructImpl(ShaderCode* out, u32 num_texgen, bool per_pixel_lighting)
-{
-  out->Write("\tCustomShaderData custom_data;\n");
-  if (per_pixel_lighting)
-  {
-    out->Write("\tcustom_data.position = WorldPos;\n");
-    out->Write("\tcustom_data.normal = Normal;\n");
-  }
-  else
-  {
-    out->Write("\tcustom_data.position = float3(0, 0, 0);\n");
-    out->Write("\tcustom_data.normal = float3(0, 0, 0);\n");
-  }
-
-  if (num_texgen == 0) [[unlikely]]
-  {
-    out->Write("\tcustom_data.texcoord[0] = float3(0, 0, 0);\n");
-  }
-  else
-  {
-    for (u32 i = 0; i < num_texgen; ++i)
-    {
-      out->Write("\tif (tex{0}.z == 0.0)\n", i);
-      out->Write("\t{{\n");
-      out->Write("\t\tcustom_data.texcoord[{0}] = tex{0};\n", i);
-      out->Write("\t}}\n");
-      out->Write("\telse {{\n");
-      out->Write("\t\tcustom_data.texcoord[{0}] = float3(tex{0}.xy / tex{0}.z, 0);\n", i);
-      out->Write("\t}}\n");
-    }
-  }
-
-  out->Write("\tcustom_data.texcoord_count = {};\n", num_texgen);
-
-  for (u32 i = 0; i < 8; i++)
-  {
-    // Shader compilation complains if every index isn't initialized
-    out->Write("\tcustom_data.texmap_to_texcoord_index[{0}] = {0};\n", i);
-  }
-
-  for (u32 i = 0; i < NUM_XF_COLOR_CHANNELS; i++)
-  {
-    out->Write("\tcustom_data.base_material[{}] = vec4(0, 0, 0, 1);\n", i);
-    out->Write("\tcustom_data.ambient_lighting[{}] = vec4(0, 0, 0, 1);\n", i);
-
-    // Shader compilation errors can throw if not everything is initialized
-    for (u32 light_count_index = 0; light_count_index < 8; light_count_index++)
-    {
-      // Color
-      out->Write("\tcustom_data.lights_chan{}_color[{}].direction = float3(0, 0, 0);\n", i,
-                 light_count_index);
-      out->Write("\tcustom_data.lights_chan{}_color[{}].position = float3(0, 0, 0);\n", i,
-                 light_count_index);
-      out->Write("\tcustom_data.lights_chan{}_color[{}].color = float3(0, 0, 0);\n", i,
-                 light_count_index);
-      out->Write("\tcustom_data.lights_chan{}_color[{}].cosatt = float4(0, 0, 0, 0);\n", i,
-                 light_count_index);
-      out->Write("\tcustom_data.lights_chan{}_color[{}].distatt = float4(0, 0, 0, 0);\n", i,
-                 light_count_index);
-      out->Write("\tcustom_data.lights_chan{}_color[{}].attenuation_type = 0;\n", i,
-                 light_count_index);
-
-      // Alpha
-      out->Write("\tcustom_data.lights_chan{}_alpha[{}].direction = float3(0, 0, 0);\n", i,
-                 light_count_index);
-      out->Write("\tcustom_data.lights_chan{}_alpha[{}].position = float3(0, 0, 0);\n", i,
-                 light_count_index);
-      out->Write("\tcustom_data.lights_chan{}_alpha[{}].color = float3(0, 0, 0);\n", i,
-                 light_count_index);
-      out->Write("\tcustom_data.lights_chan{}_alpha[{}].cosatt = float4(0, 0, 0, 0);\n", i,
-                 light_count_index);
-      out->Write("\tcustom_data.lights_chan{}_alpha[{}].distatt = float4(0, 0, 0, 0);\n", i,
-                 light_count_index);
-      out->Write("\tcustom_data.lights_chan{}_alpha[{}].attenuation_type = 0;\n", i,
-                 light_count_index);
-    }
-
-    out->Write("\tcustom_data.light_chan{}_color_count = 0;\n", i);
-    out->Write("\tcustom_data.light_chan{}_alpha_count = 0;\n", i);
-  }
-
-  if (num_texgen > 0) [[likely]]
-  {
-    out->Write("\n");
-    out->Write("\tfor(uint stage = 0u; stage <= num_stages; stage++)\n");
-    out->Write("\t{{\n");
-    out->Write("\t\tStageState ss;\n");
-    out->Write("\t\tss.order = bpmem_tevorder(stage>>1);\n");
-    out->Write("\t\tif ((stage & 1u) == 1u)\n");
-    out->Write("\t\t\tss.order = ss.order >> {};\n\n",
-               int(TwoTevStageOrders().enable_tex_odd.StartBit() -
-                   TwoTevStageOrders().enable_tex_even.StartBit()));
-    out->Write("\t\tuint texmap = {};\n",
-               BitfieldExtract<&TwoTevStageOrders::texcoord_even>("ss.order"));
-    // Shader compilation is weird, shader arrays can't use indexing by variable
-    //  to set values unless the variable is an index in a for loop.
-    // So instead we have to do this if check nonsense
-    for (u32 i = 0; i < 8; i++)
-    {
-      out->Write("\t\tif (texmap == {})\n", i);
-      out->Write("\t\t{{\n");
-      out->Write("\t\t\tcustom_data.texmap_to_texcoord_index[{}] = selectTexCoordIndex(texmap);\n",
-                 i);
-      out->Write("\t\t}}\n");
-    }
-    out->Write("\t}}\n");
-  }
-
-  if (per_pixel_lighting)
-  {
-    out->Write("\tuint light_count = 0;\n");
-    out->Write("\tfor (uint chan = 0u; chan < {}u; chan++)\n", NUM_XF_COLOR_CHANNELS);
-    out->Write("\t{{\n");
-    out->Write("\t\tuint colorreg = xfmem_color(chan);\n");
-    out->Write("\t\tuint alphareg = xfmem_alpha(chan);\n");
-    for (const auto& color_type : std::array<std::string_view, 2>{"colorreg", "alphareg"})
-    {
-      if (color_type == "colorreg")
-      {
-        out->Write("\t\tcustom_data.base_material[0] = " I_MATERIALS "[2u] / 255.0; \n");
-        out->Write("\t\tif ({} != 0u)\n", BitfieldExtract<&LitChannel::enablelighting>(color_type));
-        out->Write("\t\t\tcustom_data.base_material[0] = colors_0; \n");
-      }
-      else
-      {
-        out->Write("custom_data.base_material[1].w = " I_MATERIALS "[3u].w / 255.0; \n");
-        out->Write("\t\tif ({} != 0u)\n", BitfieldExtract<&LitChannel::enablelighting>(color_type));
-        out->Write("\t\t\tcustom_data.base_material[1].w = colors_1.w; \n");
-      }
-      out->Write("\t\tif ({} != 0u)\n", BitfieldExtract<&LitChannel::enablelighting>(color_type));
-      out->Write("\t\t{{\n");
-      out->Write("\t\t\tuint light_mask = {} | ({} << 4u);\n",
-                 BitfieldExtract<&LitChannel::lightMask0_3>(color_type),
-                 BitfieldExtract<&LitChannel::lightMask4_7>(color_type));
-      out->Write("\t\t\tuint attnfunc = {};\n", BitfieldExtract<&LitChannel::attnfunc>(color_type));
-      out->Write("\t\t\tfor (uint light_index = 0u; light_index < 8u; light_index++)\n");
-      out->Write("\t\t\t{{\n");
-      out->Write("\t\t\t\tif ((light_mask & (1u << light_index)) != 0u)\n");
-      out->Write("\t\t\t\t{{\n");
-      // Shader compilation is weird, shader arrays can't use indexing by variable
-      //  to set values unless the variable is an index in a for loop.
-      // So instead we have to do this if check nonsense
-      for (u32 light_count_index = 0; light_count_index < 8; light_count_index++)
-      {
-        out->Write("\t\t\t\t\tif (light_index == {})\n", light_count_index);
-        out->Write("\t\t\t\t\t{{\n");
-        if (color_type == "colorreg")
-        {
-          for (u32 channel_index = 0; channel_index < NUM_XF_COLOR_CHANNELS; channel_index++)
-          {
-            out->Write("\t\t\t\t\t\tif (chan == {})\n", channel_index);
-            out->Write("\t\t\t\t\t\t{{\n");
-            out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].direction = " I_LIGHTS
-                       "[light_index].dir.xyz;\n",
-                       channel_index, light_count_index);
-            out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].position = " I_LIGHTS
-                       "[light_index].pos.xyz;\n",
-                       channel_index, light_count_index);
-            out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].cosatt = " I_LIGHTS
-                       "[light_index].cosatt;\n",
-                       channel_index, light_count_index);
-            out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].distatt = " I_LIGHTS
-                       "[light_index].distatt;\n",
-                       channel_index, light_count_index);
-            out->Write(
-                "\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].attenuation_type = attnfunc;\n",
-                channel_index, light_count_index);
-            out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].color = " I_LIGHTS
-                       "[light_index].color.rgb / float3(255.0, 255.0, 255.0);\n",
-                       channel_index, light_count_index);
-            out->Write("\t\t\t\t\t\t\tcustom_data.light_chan{}_color_count += 1;\n", channel_index);
-            out->Write("\t\t\t\t\t\t}}\n");
-          }
-        }
-        else
-        {
-          for (u32 channel_index = 0; channel_index < NUM_XF_COLOR_CHANNELS; channel_index++)
-          {
-            out->Write("\t\t\t\t\t\tif (chan == {})\n", channel_index);
-            out->Write("\t\t\t\t\t\t{{\n");
-            out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].direction = " I_LIGHTS
-                       "[light_index].dir.xyz;\n",
-                       channel_index, light_count_index);
-            out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].position = " I_LIGHTS
-                       "[light_index].pos.xyz;\n",
-                       channel_index, light_count_index);
-            out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].cosatt = " I_LIGHTS
-                       "[light_index].cosatt;\n",
-                       channel_index, light_count_index);
-            out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].distatt = " I_LIGHTS
-                       "[light_index].distatt;\n",
-                       channel_index, light_count_index);
-            out->Write(
-                "\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].attenuation_type = attnfunc;\n",
-                channel_index, light_count_index);
-            out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].color = float3(" I_LIGHTS
-                       "[light_index].color.a) / float3(255.0, 255.0, 255.0);\n",
-                       channel_index, light_count_index);
-            out->Write("\t\t\t\t\t\t\tcustom_data.light_chan{}_alpha_count += 1;\n", channel_index);
-            out->Write("\t\t\t\t\t\t}}\n");
-          }
-        }
-
-        out->Write("\t\t\t\t\t}}\n");
-      }
-      out->Write("\t\t\t\t}}\n");
-      out->Write("\t\t\t}}\n");
-      out->Write("\t\t}}\n");
-    }
-    out->Write("\t}}\n");
-  }
-
-  for (u32 i = 0; i < 16; i++)
-  {
-    // Shader compilation complains if every struct isn't initialized
-
-    // Color Input
-    for (u32 j = 0; j < 4; j++)
-    {
-      out->Write("\tcustom_data.tev_stages[{}].input_color[{}].input_type = "
-                 "CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_UNUSED;\n",
-                 i, j);
-      out->Write("\tcustom_data.tev_stages[{}].input_color[{}].value = "
-                 "float3(0, 0, 0);\n",
-                 i, j);
-    }
-
-    // Alpha Input
-    for (u32 j = 0; j < 4; j++)
-    {
-      out->Write("\tcustom_data.tev_stages[{}].input_alpha[{}].input_type = "
-                 "CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_UNUSED;\n",
-                 i, j);
-      out->Write("\tcustom_data.tev_stages[{}].input_alpha[{}].value = "
-                 "float(0);\n",
-                 i, j);
-    }
-
-    // Texmap
-    out->Write("\tcustom_data.tev_stages[{}].texmap = 0u;\n", i);
-
-    // Output
-    out->Write("\tcustom_data.tev_stages[{}].output_color = "
-               "float4(0, 0, 0, 0);\n",
-               i);
-  }
-
-  // Actual data will be filled out in the tev stage code, just set the
-  // stage count for now
-  out->Write("\tcustom_data.tev_stage_count = num_stages;\n");
-
-  // Time
-  out->Write("\tcustom_data.time_ms = time_ms;\n");
-}
-}  // namespace
 PixelShaderUid GetPixelShaderUid()
 {
   PixelShaderUid out;
@@ -334,12 +77,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
   out.Write("// {}\n", *uid_data);
   WriteBitfieldExtractHeader(out, api_type, host_config);
   WritePixelShaderCommonHeader(out, api_type, host_config, bounding_box, custom_details);
-  WriteCustomShaderStructDef(&out, numTexgen);
-  for (std::size_t i = 0; i < custom_details.shaders.size(); i++)
-  {
-    const auto& shader_details = custom_details.shaders[i];
-    out.Write(fmt::runtime(shader_details.custom_shader), i);
-  }
   if (per_pixel_lighting)
     WriteLightingFunction(out);
 
@@ -768,25 +505,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
       "return int3(0, 0, 0);",                               // ZERO
   };
 
-  static constexpr Common::EnumMap<std::string_view, TevColorArg::Zero> tev_c_input_type{
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_PREV;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_PREV;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_TEX;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_TEX;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_RAS;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_RAS;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_NUMERIC;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_NUMERIC;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_KONST;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_NUMERIC;",
-  };
-
   static constexpr Common::EnumMap<std::string_view, TevAlphaArg::Zero> tev_a_input_table{
       "return s.Reg[0].a;",                                // APREV,
       "return s.Reg[1].a;",                                // A0,
@@ -798,17 +516,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
       "return 0;",                                         // ZERO
   };
 
-  static constexpr Common::EnumMap<std::string_view, TevAlphaArg::Zero> tev_a_input_type{
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_PREV;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_TEX;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_RAS;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_KONST;",
-      "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_NUMERIC;",
-  };
-
   static constexpr Common::EnumMap<std::string_view, TevOutput::Color2> tev_regs_lookup_table{
       "return s.Reg[0];",
       "return s.Reg[1];",
@@ -850,16 +557,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
   out.Write("}}\n"
             "\n");
 
-  out.Write("// Helper function for Custom Shader Input Type\n"
-            "uint getColorInputType(uint index) {{\n");
-  WriteSwitch(out, api_type, "index", tev_c_input_type, 2, false);
-  out.Write("}}\n"
-            "\n"
-            "uint getAlphaInputType(uint index) {{\n");
-  WriteSwitch(out, api_type, "index", tev_a_input_type, 2, false);
-  out.Write("}}\n"
-            "\n");
-
   // Since the fixed-point texture coodinate variables aren't global, we need to pass
   // them to the select function.  This applies to all backends.
   if (numTexgen > 0)
@@ -879,14 +576,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
   out.Write("  uint num_stages = {};\n\n",
             BitfieldExtract<&GenMode::numtevstages>("bpmem_genmode"));
 
-  bool has_custom_shader_details = false;
-  if (std::any_of(custom_details.shaders.begin(), custom_details.shaders.end(),
-                  [](const std::optional<CustomPixelShader>& ps) { return ps.has_value(); }))
-  {
-    WriteCustomShaderStructImpl(&out, numTexgen, per_pixel_lighting);
-    has_custom_shader_details = true;
-  }
-
   if (use_framebuffer_fetch)
   {
     // Store off a copy of the initial framebuffer value.
@@ -1237,78 +926,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
             "\n"
             "      // Write result to the correct input register of the next stage\n");
   WriteSwitch(out, api_type, "alpha_dest", tev_a_set_table, 6, true);
-  if (has_custom_shader_details)
-  {
-    for (u32 stage_index = 0; stage_index < 16; stage_index++)
-    {
-      out.Write("\tif (stage == {}u) {{\n", stage_index);
-      // Color input
-      out.Write("\t\tcustom_data.tev_stages[{}].input_color[0].value = color_A / float3(255.0, "
-                "255.0, 255.0);\n",
-                stage_index);
-      out.Write("\t\tcustom_data.tev_stages[{}].input_color[0].input_type = "
-                "getColorInputType(color_a);\n",
-                stage_index);
-      out.Write("\t\tcustom_data.tev_stages[{}].input_color[1].value = color_B / float3(255.0, "
-                "255.0, 255.0);\n",
-                stage_index);
-      out.Write("\t\tcustom_data.tev_stages[{}].input_color[1].input_type = "
-                "getColorInputType(color_b);\n",
-                stage_index);
-      out.Write("\t\tcustom_data.tev_stages[{}].input_color[2].value = color_C / float3(255.0, "
-                "255.0, 255.0);\n",
-                stage_index);
-      out.Write("\t\tcustom_data.tev_stages[{}].input_color[2].input_type = "
-                "getColorInputType(color_c);\n",
-                stage_index);
-      out.Write("\t\tcustom_data.tev_stages[{}].input_color[3].value = color_D / float3(255.0, "
-                "255.0, 255.0);\n",
-                stage_index);
-      out.Write("\t\tcustom_data.tev_stages[{}].input_color[3].input_type = "
-                "getColorInputType(color_c);\n",
-                stage_index);
-
-      // Alpha input
-      out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[0].value = alpha_A / float(255.0);\n",
-                stage_index);
-      out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[0].input_type = "
-                "getAlphaInputType(alpha_a);\n",
-                stage_index);
-      out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[1].value = alpha_B / float(255.0);\n",
-                stage_index);
-      out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[1].input_type = "
-                "getAlphaInputType(alpha_b);\n",
-                stage_index);
-      out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[2].value = alpha_C / float(255.0);\n",
-                stage_index);
-      out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[2].input_type = "
-                "getAlphaInputType(alpha_c);\n",
-                stage_index);
-      out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[3].value = alpha_D / float(255.0);\n",
-                stage_index);
-      out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[3].input_type = "
-                "getAlphaInputType(alpha_d);\n",
-                stage_index);
-
-      if (numTexgen != 0)
-      {
-        // Texmap
-        out.Write("\t\tif (texture_enabled) {{\n");
-        out.Write("\t\t\tuint sampler_num = {};\n",
-                  BitfieldExtract<&TwoTevStageOrders::texmap_even>("ss.order"));
-        out.Write("\t\tcustom_data.tev_stages[{}].texmap = sampler_num;\n", stage_index);
-        out.Write("\t\t}}\n");
-      }
-
-      // Output
-      out.Write("\t\tcustom_data.tev_stages[{}].output_color.rgb = color / float3(255.0, 255.0, "
-                "255.0);\n",
-                stage_index);
-      out.Write("\t\tcustom_data.tev_stages[{}].output_color.a = alpha / float(255.0);\n",
-                stage_index);
-      out.Write("\t}}\n");
-    }
-  }
   out.Write("    }}\n");
   out.Write("    }} // Main TEV loop\n");
   out.Write("\n");
@@ -1506,24 +1123,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
             "  }}\n"
             "\n");
 
-  for (std::size_t i = 0; i < custom_details.shaders.size(); i++)
-  {
-    const auto& shader_details = custom_details.shaders[i];
-
-    if (!shader_details.custom_shader.empty())
-    {
-      out.Write("\t{{\n");
-      out.Write("\t\tcustom_data.final_color = float4(TevResult.r / 255.0, TevResult.g / 255.0, "
-                "TevResult.b / 255.0, TevResult.a / 255.0);\n");
-      out.Write("\t\tCustomShaderOutput custom_output = {}_{}(custom_data);\n",
-                CUSTOM_PIXELSHADER_COLOR_FUNC, i);
-      out.Write(
-          "\t\tTevResult = int4(custom_output.main_rt.r * 255, custom_output.main_rt.g * 255, "
-          "custom_output.main_rt.b * 255, custom_output.main_rt.a * 255);\n");
-      out.Write("\t}}\n\n");
-    }
-  }
-
   if (use_framebuffer_fetch)
   {
     static constexpr std::array<const char*, 16> logic_op_mode{
diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp
index 4a46834c14..71dfee2960 100644
--- a/Source/Core/VideoCommon/VertexShaderGen.cpp
+++ b/Source/Core/VideoCommon/VertexShaderGen.cpp
@@ -74,18 +74,223 @@ VertexShaderUid GetVertexShaderUid()
   return out;
 }
 
-ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& host_config,
-                                    const vertex_shader_uid_data* uid_data)
+namespace VertexShader
 {
-  ShaderCode out;
+void WriteTransforms(APIType api_type, const ShaderHostConfig& host_config,
+                     const vertex_shader_uid_data* uid_data, ShaderCode& out)
+{
+  out.Write("vec4 dolphin_transform_position(vec4 rawpos)\n");
+  out.Write("{{\n");
+  if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
+  {
+    // Vertex format has a per-vertex matrix
+    out.Write("\tint posidx = int(posmtx.r);\n"
+              "\tvec4 P0 = " I_TRANSFORMMATRICES "[posidx];\n"
+              "\tvec4 P1 = " I_TRANSFORMMATRICES "[posidx + 1];\n"
+              "\tvec4 P2 = " I_TRANSFORMMATRICES "[posidx + 2];\n");
+  }
+  else
+  {
+    // One shared matrix
+    out.Write("\tvec4 P0 = " I_POSNORMALMATRIX "[0];\n"
+              "\tvec4 P1 = " I_POSNORMALMATRIX "[1];\n"
+              "\tvec4 P2 = " I_POSNORMALMATRIX "[2];\n");
+  }
+  out.Write("\t// Multiply the position vector by the position matrix\n"
+            "\treturn vec4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n");
+  out.Write("}}\n\n");
 
-  const bool per_pixel_lighting = g_ActiveConfig.bEnablePixelLighting;
-  const bool msaa = host_config.msaa;
-  const bool ssaa = host_config.ssaa;
-  const bool vertex_rounding = host_config.vertex_rounding;
+  out.Write("vec4 dolphin_project_position(vec4 pos)\n");
+  out.Write("{{\n");
+  out.Write("\treturn vec4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
+            "[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n");
+  out.Write("}}\n\n");
 
-  ShaderCode input_extract;
+  out.Write("vec3 dolphin_transform_normal(vec3 norm)\n");
+  out.Write("{{\n");
 
+  if ((uid_data->components & VB_HAS_NORMAL) != 0)
+  {
+    if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
+    {
+      // Vertex format has a per-vertex matrix
+      out.Write("\tint posidx = int(posmtx.r);\n");
+      out.Write("\tint normidx = posidx & 31;\n"
+                "\tvec3 N0 = " I_NORMALMATRICES "[normidx].xyz;\n"
+                "\tvec3 N1 = " I_NORMALMATRICES "[normidx + 1].xyz;\n"
+                "\tvec3 N2 = " I_NORMALMATRICES "[normidx + 2].xyz;\n");
+    }
+    else
+    {
+      // One shared matrix
+      out.Write("\tvec3 N0 = " I_POSNORMALMATRIX "[3].xyz;\n"
+                "\tvec3 N1 = " I_POSNORMALMATRIX "[4].xyz;\n"
+                "\tvec3 N2 = " I_POSNORMALMATRIX "[5].xyz;\n");
+    }
+    // The scale of the transform matrix is used to control the size of the emboss map effect, by
+    // changing the scale of the transformed binormals (which only get used by emboss map texgens).
+    // By normalising the first transformed normal (which is used by lighting calculations and needs
+    // to be unit length), the same transform matrix can do double duty, scaling for emboss mapping,
+    // and not scaling for lighting.
+    out.Write("\treturn normalize(vec3(dot(N0, norm), dot(N1, norm), dot(N2, "
+              "norm)));\n");
+  }
+  else
+  {
+    out.Write("\treturn norm;\n");
+  }
+
+  out.Write("}}\n\n");
+
+  out.Write("vec3 dolphin_transform_binormal(vec3 binormal)\n");
+  out.Write("{{\n");
+
+  if ((uid_data->components & VB_HAS_NORMAL) != 0)
+  {
+    if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
+    {
+      // Vertex format has a per-vertex matrix
+      out.Write("\tint posidx = int(posmtx.r);\n");
+      out.Write("\tint normidx = posidx & 31;\n"
+                "\tvec3 N0 = " I_NORMALMATRICES "[normidx].xyz;\n"
+                "\tvec3 N1 = " I_NORMALMATRICES "[normidx + 1].xyz;\n"
+                "\tvec3 N2 = " I_NORMALMATRICES "[normidx + 2].xyz;\n");
+    }
+    else
+    {
+      // One shared matrix
+      out.Write("\tvec3 N0 = " I_POSNORMALMATRIX "[3].xyz;\n"
+                "\tvec3 N1 = " I_POSNORMALMATRIX "[4].xyz;\n"
+                "\tvec3 N2 = " I_POSNORMALMATRIX "[5].xyz;\n");
+    }
+
+    // The scale of the transform matrix is used to control the size of the emboss map effect, by
+    // changing the scale of the transformed binormals (which only get used by emboss map texgens).
+    // By normalising the first transformed normal (which is used by lighting calculations and needs
+    // to be unit length), the same transform matrix can do double duty, scaling for emboss mapping,
+    // and not scaling for lighting.
+    out.Write("\treturn vec3(dot(N0, binormal), dot(N1, binormal), dot(N2, "
+              "binormal));\n");
+  }
+  else
+  {
+    out.Write("\treturn vec3(0, 0, 0);\n");
+  }
+
+  out.Write("}}\n\n");
+
+  out.Write("vec3 dolphin_transform_tangent(vec3 tangent)\n");
+  out.Write("{{\n");
+
+  if ((uid_data->components & VB_HAS_NORMAL) != 0)
+  {
+    if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
+    {
+      // Vertex format has a per-vertex matrix
+      out.Write("\tint posidx = int(posmtx.r);\n");
+      out.Write("\tint normidx = posidx & 31;\n"
+                "\tvec3 N0 = " I_NORMALMATRICES "[normidx].xyz;\n"
+                "\tvec3 N1 = " I_NORMALMATRICES "[normidx + 1].xyz;\n"
+                "\tvec3 N2 = " I_NORMALMATRICES "[normidx + 2].xyz;\n");
+    }
+    else
+    {
+      // One shared matrix
+      out.Write("\tvec3 N0 = " I_POSNORMALMATRIX "[3].xyz;\n"
+                "\tvec3 N1 = " I_POSNORMALMATRIX "[4].xyz;\n"
+                "\tvec3 N2 = " I_POSNORMALMATRIX "[5].xyz;\n");
+    }
+
+    // The scale of the transform matrix is used to control the size of the emboss map effect, by
+    // changing the scale of the transformed binormals (which only get used by emboss map texgens).
+    // By normalising the first transformed normal (which is used by lighting calculations and needs
+    // to be unit length), the same transform matrix can do double duty, scaling for emboss mapping,
+    // and not scaling for lighting.
+    out.Write("\treturn vec3(dot(N0, tangent), dot(N1, tangent), dot(N2, "
+              "tangent));\n");
+  }
+  else
+  {
+    out.Write("\treturn vec3(0, 0, 0);\n");
+  }
+
+  out.Write("}}\n\n");
+
+  for (u32 i = 0; i < uid_data->numTexGens; ++i)
+  {
+    auto& texinfo = uid_data->texMtxInfo[i];
+    out.Write("vec3 dolphin_transform_texcoord{}(vec4 coord)\n", i);
+    out.Write("{{\n");
+    if (texinfo.texgentype != TexGenType::Regular)
+    {
+      out.Write("\treturn vec3(coord.xyz);\n");
+    }
+    else
+    {
+      out.Write("\tvec3 result;\n");
+      if ((uid_data->components & (VB_HAS_TEXMTXIDX0 << i)) != 0)
+      {
+        out.Write("\tint tmp = int(rawtex{}.z);\n", i);
+        if (static_cast<TexSize>((uid_data->texMtxInfo_n_projection >> i) & 1) == TexSize::STQ)
+        {
+          out.Write("\tresult = vec3(dot(coord, " I_TRANSFORMMATRICES
+                    "[tmp]), dot(coord, " I_TRANSFORMMATRICES
+                    "[tmp+1]), dot(coord, " I_TRANSFORMMATRICES "[tmp+2]));\n");
+        }
+        else
+        {
+          out.Write("\tresult = vec3(dot(coord, " I_TRANSFORMMATRICES
+                    "[tmp]), dot(coord, " I_TRANSFORMMATRICES "[tmp+1]), 1);\n");
+        }
+      }
+      else
+      {
+        if (static_cast<TexSize>((uid_data->texMtxInfo_n_projection >> i) & 1) == TexSize::STQ)
+        {
+          out.Write("\tresult = vec3(dot(coord, " I_TEXMATRICES "[{}]), dot(coord, " I_TEXMATRICES
+                    "[{}]), dot(coord, " I_TEXMATRICES "[{}]));\n",
+                    3 * i, 3 * i + 1, 3 * i + 2);
+        }
+        else
+        {
+          out.Write("\tresult = vec3(dot(coord, " I_TEXMATRICES "[{}]), dot(coord, " I_TEXMATRICES
+                    "[{}]), 1);\n",
+                    3 * i, 3 * i + 1);
+        }
+      }
+      // CHECKME: does this only work for regular tex gen types?
+      if (uid_data->dualTexTrans_enabled)
+      {
+        auto& postInfo = uid_data->postMtxInfo[i];
+
+        out.Write("\tvec4 P0 = " I_POSTTRANSFORMMATRICES "[{}];\n"
+                  "\tvec4 P1 = " I_POSTTRANSFORMMATRICES "[{}];\n"
+                  "\tvec4 P2 = " I_POSTTRANSFORMMATRICES "[{}];\n",
+                  postInfo.index & 0x3f, (postInfo.index + 1) & 0x3f, (postInfo.index + 2) & 0x3f);
+
+        if (postInfo.normalize)
+          out.Write("\tresult = normalize(result);\n");
+
+        // multiply by postmatrix
+        out.Write("\tresult = vec3(dot(P0.xyz, result) + P0.w, dot(P1.xyz, result) + "
+                  "P1.w, dot(P2.xyz, result) + P2.w);\n");
+      }
+
+      // When q is 0, the GameCube appears to have a special case
+      // This can be seen in devkitPro's neheGX Lesson08 example for Wii
+      // Makes differences in Rogue Squadron 3 (Hoth sky) and The Last Story (shadow culling)
+      // TODO: check if this only affects XF_TEXGEN_REGULAR
+      out.Write("\tif(result.z == 0.0f)\n"
+                "\t\tresult.xy = clamp(result.xy / 2.0f, vec2(-1.0f,-1.0f), vec2(1.0f,1.0f));\n");
+      out.Write("\treturn result;\n");
+    }
+    out.Write("}}\n\n");
+  }
+}
+
+void WriteHeader(APIType api_type, const ShaderHostConfig& host_config,
+                 const vertex_shader_uid_data* uid_data, ShaderCode& out, ShaderCode& input_extract)
+{
   out.Write("{}", s_lighting_struct);
 
   // uniforms
@@ -116,6 +321,8 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
 
   WriteIsNanHeader(out, api_type);
 
+  GenerateLightingShaderHeader(out, uid_data->lighting);
+
   if (uid_data->vs_expand == VSExpand::None)
   {
     out.Write("ATTRIBUTE_LOCATION({:s}) in float4 rawpos;\n", ShaderAttrib::Position);
@@ -225,6 +432,10 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
               "}};\n\n");
   }
 
+  const bool msaa = host_config.msaa;
+  const bool ssaa = host_config.ssaa;
+  const bool per_pixel_lighting = g_ActiveConfig.bEnablePixelLighting;
+
   if (host_config.backend_geometry_shaders)
   {
     out.Write("VARYING_LOCATION(0) out VertexData {{\n");
@@ -260,28 +471,376 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
     }
   }
 
+  // Write the transforms after so they can use the position matrix if it's available
+  // or should the functions take the posmtx too?
+  WriteTransforms(api_type, host_config, uid_data, out);
+
+  // TODO: move...
+  out.Write("vec4 dolphin_pixel_correction(vec4 pos)\n");
+  out.Write("{{\n");
+
+  // Write the true depth value. If the game uses depth textures, then the pixel shader will
+  // override it with the correct values if not then early z culling will improve speed.
+  // There are two different ways to do this, when the depth range is oversized, we process
+  // the depth range in the vertex shader, if not we let the host driver handle it.
+  //
+  // Adjust z for the depth range. We're using an equation which incorperates a depth inversion,
+  // so we can map the console -1..0 range to the 0..1 range used in the depth buffer.
+  // We have to handle the depth range in the vertex shader instead of after the perspective
+  // divide, because some games will use a depth range larger than what is allowed by the
+  // graphics API. These large depth ranges will still be clipped to the 0..1 range, so these
+  // games effectively add a depth bias to the values written to the depth buffer.
+  out.Write("\tpos.z = pos.w * " I_PIXELCENTERCORRECTION ".w - "
+            "\tpos.z * " I_PIXELCENTERCORRECTION ".z;\n");
+
+  if (!host_config.backend_clip_control)
+  {
+    // If the graphics API doesn't support a depth range of 0..1, then we need to map z to
+    // the -1..1 range. Unfortunately we have to use a substraction, which is a lossy
+    // floating-point operation that can introduce a round-trip error.
+    out.Write("\tpos.z = pos.z * 2.0 - pos.w;\n");
+  }
+
+  // Correct for negative viewports by mirroring all vertices. We need to negate the height here,
+  // since the viewport height is already negated by the render backend.
+  out.Write("\tpos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n");
+
+  // The console GPU places the pixel center at 7/12 in screen space unless
+  // antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results
+  // in some primitives being placed one pixel too far to the bottom-right,
+  // which in turn can be critical if it happens for clear quads.
+  // Hence, we compensate for this pixel center difference so that primitives
+  // get rasterized correctly.
+  out.Write("\tpos.xy = pos.xy - pos.w * " I_PIXELCENTERCORRECTION ".xy;\n");
+
+  const bool vertex_rounding = host_config.vertex_rounding;
+  if (vertex_rounding)
+  {
+    // By now our position is in clip space
+    // however, higher resolutions than the Wii outputs
+    // cause an additional pixel offset
+    // due to a higher pixel density
+    // we need to correct this by converting our
+    // clip-space position into the Wii's screen-space
+    // acquire the right pixel and then convert it back
+    out.Write("\tif (pos.w == 1.0f)\n"
+              "\t{{\n"
+
+              "\t\tfloat ss_pixel_x = ((pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n"
+              "\t\tfloat ss_pixel_y = ((pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n"
+
+              "\t\tss_pixel_x = round(ss_pixel_x);\n"
+              "\t\tss_pixel_y = round(ss_pixel_y);\n"
+
+              "\t\tpos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n"
+              "\t\tpos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n"
+              "\t}}\n");
+  }
+
+  out.Write("\treturn pos;\n");
+
+  out.Write("}}\n");
+}
+
+void WriteEmulatedVertexBodyHeader(APIType api_type, const ShaderHostConfig& host_config,
+                                   const vertex_shader_uid_data* uid_data, ShaderCode& out)
+{
+  constexpr std::string_view emulated_vertex_definition =
+      "void dolphin_emulated_vertex(in DolphinVertexInput vertex_input, out DolphinVertexOutput "
+      "vertex_output)";
+  out.Write("{}\n", emulated_vertex_definition);
+  out.Write("{{\n");
+
+  WriteVertexBody(api_type, host_config, uid_data, out);
+
+  out.Write("}}\n");
+}
+
+void WriteVertexStructs(APIType api_type, const ShaderHostConfig& host_config,
+                        const vertex_shader_uid_data* uid_data, ShaderCode& out)
+{
+  out.Write("struct DolphinVertexInput\n");
+  out.Write("{{\n");
+  out.Write("\tvec4 color_0;\n");
+  out.Write("\tvec4 color_1;\n");
+  out.Write("\tvec4 position;\n");
+  out.Write("\tvec3 normal;\n");
+  out.Write("\tvec3 binormal;\n");
+  out.Write("\tvec3 tangent;\n");
+  for (u32 i = 0; i < uid_data->numTexGens; i++)
+  {
+    out.Write("\tvec4 texture_coord_{};\n", i);
+  }
+  for (u32 i = uid_data->numTexGens; i < 8; i++)
+  {
+    out.Write("\tvec4 texture_coord_{};\n", i);
+  }
+  out.Write("}};\n\n");
+
+  out.Write("struct DolphinVertexOutput\n");
+  out.Write("{{\n");
+  out.Write("\tvec4 color_0;\n");
+  out.Write("\tvec4 color_1;\n");
+  out.Write("\tvec4 position;\n");
+  out.Write("\tvec3 normal;\n");
+  for (u32 i = 0; i < uid_data->numTexGens; i++)
+  {
+    out.Write("\tvec3 texture_coord_{};\n", i);
+  }
+  for (u32 i = uid_data->numTexGens; i < 8; i++)
+  {
+    out.Write("\tvec3 texture_coord_{};\n", i);
+  }
+  out.Write("}};\n\n");
+}
+
+void WriteVertexDefines(APIType, const ShaderHostConfig&, const vertex_shader_uid_data* uid_data,
+                        ShaderCode& out)
+{
+  if ((uid_data->components & VB_HAS_COL0) != 0)
+  {
+    out.Write("#define HAS_COLOR_0 1\n");
+  }
+
+  if ((uid_data->components & VB_HAS_COL1) != 0)
+  {
+    out.Write("#define HAS_COLOR_1 1\n");
+  }
+
+  if ((uid_data->components & VB_HAS_NORMAL) != 0)
+  {
+    out.Write("#define HAS_NORMAL 1\n");
+  }
+
+  if ((uid_data->components & VB_HAS_BINORMAL) != 0)
+  {
+    out.Write("#define HAS_BINORMAL 1\n");
+  }
+
+  if ((uid_data->components & VB_HAS_TANGENT) != 0)
+  {
+    out.Write("#define HAS_TANGENT 1\n");
+  }
+
+  for (u32 i = 0; i < uid_data->numTexGens; i++)
+  {
+    if ((uid_data->components & (VB_HAS_UV0 << i)) != 0)
+    {
+      out.Write("#define HAS_TEXTURE_COORD_{} 1\n", i);
+    }
+  }
+
+  for (u32 i = uid_data->numTexGens; i < 8; i++)
+  {
+    out.Write("#define HAS_TEXTURE_COORD_{} 0\n", i);
+  }
+}
+
+void WriteVertexBody(APIType api_type, const ShaderHostConfig& host_config,
+                     const vertex_shader_uid_data* uid_data, ShaderCode& out)
+{
+  out.Write("\tvertex_output.position = dolphin_transform_position(vertex_input.position);\n");
+
+  if ((uid_data->components & VB_HAS_NORMAL) != 0)
+  {
+    out.Write("\tvertex_output.normal = dolphin_transform_normal(vertex_input.normal);\n");
+  }
+  else
+  {
+    out.Write("\tvertex_output.normal = vec3(0, 0, 0);\n");
+  }
+
+  const bool has_color0_texture_coordinate =
+      std::ranges::any_of(uid_data->texMtxInfo, [](const auto& texinfo) {
+        return texinfo.texgentype == TexGenType::Color0;
+      });
+
+  const bool has_color1_texture_coordinate =
+      std::ranges::any_of(uid_data->texMtxInfo, [](const auto& texinfo) {
+        return texinfo.texgentype == TexGenType::Color1;
+      });
+
+  const bool per_pixel_lighting = host_config.per_pixel_lighting;
+  if (per_pixel_lighting)
+  {
+    // When per-pixel lighting is enabled, the vertex colors are passed through
+    // unmodified so we can evaluate the lighting in the pixel shader.
+    out.Write("\tvertex_output.color_0 = vertex_input.color_0;\n");
+    out.Write("\tvertex_output.color_1 = vertex_input.color_1;\n");
+    // Note that the numColorChans logic is performed in the pixel shader.
+
+    // We may still need to calculate the lighting per vertex if the vertex
+    // shader generates texture coordinates with this information
+    if (has_color0_texture_coordinate)
+    {
+      out.Write("\tvec4 vertex_lighting_0 = dolphin_calculate_lighting_chn0(vertex_input.color_0, "
+                "vertex_input.position, "
+                "vertex_input.normal);\n");
+    }
+    if (has_color1_texture_coordinate)
+    {
+      out.Write("\tvec4 vertex_lighting_1 = dolphin_calculate_lighting_chn1(vertex_input.color_1, "
+                "vertex_input.position, "
+                "vertex_input.normal);\n");
+    }
+  }
+  else
+  {
+    if (uid_data->numColorChans > 0)
+    {
+      out.Write("\tvec4 vertex_lighting_0 = dolphin_calculate_lighting_chn0(vertex_input.color_0, "
+                "vertex_input.position, "
+                "vertex_input.normal);\n");
+      out.Write("\tvertex_output.color_0 = vertex_lighting_0;\n");
+    }
+    else
+    {
+      // The number of colors available to TEV is determined by numColorChans.
+      // We have to provide the fields to match the interface, so set to zero if it's not enabled.
+      out.Write("\tvertex_output.color_0 = vec4(0.0, 0.0, 0.0, 0.0);\n");
+      if (has_color0_texture_coordinate)
+      {
+        out.Write("\tvec4 vertex_lighting_0 = "
+                  "dolphin_calculate_lighting_chn0(vertex_input.color_0, vertex_input.position,"
+                  "vertex_input.normal);\n");
+      }
+    }
+
+    if (uid_data->numColorChans == 2)
+    {
+      out.Write("\tvec4 vertex_lighting_1 = dolphin_calculate_lighting_chn1(vertex_input.color_1, "
+                "vertex_input.position, "
+                "vertex_input.normal);\n");
+      out.Write("\tvertex_output.color_1 = vertex_lighting_1;\n");
+    }
+    else
+    {
+      // The number of colors available to TEV is determined by numColorChans.
+      // We have to provide the fields to match the interface, so set to zero if it's not enabled.
+      out.Write("\tvertex_output.color_1 = vec4(0.0, 0.0, 0.0, 0.0);\n");
+      if (has_color1_texture_coordinate)
+      {
+        out.Write("\tvec4 vertex_lighting_1 = "
+                  "dolphin_calculate_lighting_chn1(vertex_input.color_1, vertex_input.position,"
+                  "normal);\n");
+      }
+    }
+  }
+
+  for (u32 i = 0; i < uid_data->numTexGens; ++i)
+  {
+    auto& texinfo = uid_data->texMtxInfo[i];
+
+    switch (texinfo.texgentype)
+    {
+    case TexGenType::EmbossMap:  // calculate tex coords into bump map
+
+      // transform the light dir into tangent space
+      out.Write("\tvec3 ldir = normalize(" LIGHT_POS ".xyz - vertex_input.position.xyz);\n",
+                LIGHT_POS_PARAMS(texinfo.embosslightshift));
+
+      if ((uid_data->components & VB_HAS_TANGENT) == 0)
+        out.Write("\tvec3 rawtangent = " I_CACHED_TANGENT ".xyz;\n");
+      else
+        out.Write("\tvec3 rawtangent = vertex_input.tangent;\n");
+
+      if ((uid_data->components & VB_HAS_BINORMAL) == 0)
+        out.Write("\tvec3 rawbinormal = " I_CACHED_BINORMAL ".xyz;\n");
+      else
+        out.Write("\tvec3 rawbinormal = vertex_input.binormal;\n");
+
+      out.Write("\tvertex_output.texture_coord_{}.xyz = vertex_output.texture_coord_{}.xyz + "
+                "vec3(dot(ldir, "
+                "dolphin_transform_tangent(rawtangent)), "
+                "dot(ldir, dolphin_transform_binormal(rawbinormal)), 0.0);\n",
+                i, texinfo.embosssourceshift);
+
+      break;
+    case TexGenType::Color0:
+      out.Write("\tvertex_output.texture_coord_{}.xyz = vec3(vertex_lighting_0.x, "
+                "vertex_lighting_0.y, 1);\n",
+                i);
+      break;
+    case TexGenType::Color1:
+      out.Write("\tvertex_output.texture_coord_{}.xyz = vec3(vertex_lighting_1.x, "
+                "vertex_lighting_1.y, 1);\n",
+                i);
+      break;
+    case TexGenType::Regular:
+      out.Write("\tvertex_output.texture_coord_{0} = "
+                "dolphin_transform_texcoord{0}(vertex_input.texture_coord_{0});\n",
+                i);
+      break;
+    };
+  }
+
+  // Fill out output that is unused
+  for (u32 i = uid_data->numTexGens; i < 8; i++)
+  {
+    out.Write("\tvertex_output.texture_coord_{0} = vec3(0, 0, 0);\n", i);
+  }
+}
+
+ShaderCode WriteFullShader(APIType api_type, const ShaderHostConfig& host_config,
+                           const vertex_shader_uid_data* uid_data, std::string_view custom_vertex,
+                           std::string_view custom_uniforms)
+{
+  ShaderCode out;
+
+  const bool per_pixel_lighting = g_ActiveConfig.bEnablePixelLighting;
+
+  ShaderCode input_extract;
+
+  WriteHeader(api_type, host_config, uid_data, out, input_extract);
+
+  WriteVertexStructs(api_type, host_config, uid_data, out);
+  WriteVertexDefines(api_type, host_config, uid_data, out);
+
+  if (!custom_uniforms.empty())
+  {
+    out.Write("UBO_BINDING(std140, 3) uniform CustomShaderBlock {{\n");
+    out.Write("{}", custom_uniforms);
+    out.Write("}} custom_uniforms;\n");
+  }
+
+  WriteEmulatedVertexBodyHeader(api_type, host_config, uid_data, out);
+
+  if (custom_vertex.empty())
+  {
+    out.Write("{}\n", vertex_definition);
+    out.Write("{{\n");
+
+    out.Write("\tdolphin_emulated_vertex(vertex_input, vertex_output);\n");
+
+    out.Write("}}\n");
+  }
+  else
+  {
+    out.Write("{}\n", custom_vertex);
+  }
+
   out.Write("void main()\n{{\n");
 
   if (uid_data->vs_expand != VSExpand::None)
   {
-    out.Write("bool is_bottom = (gl_VertexID & 2) != 0;\n"
-              "bool is_right = (gl_VertexID & 1) != 0;\n");
+    out.Write("\tbool is_bottom = (gl_VertexID & 2) != 0;\n"
+              "\tbool is_right = (gl_VertexID & 1) != 0;\n");
     // D3D doesn't include the base vertex in SV_VertexID
     // See comment in UberShaderVertex for details
     if (api_type == APIType::D3D)
-      out.Write("uint vertex_id = (gl_VertexID >> 2) + base_vertex;\n");
+      out.Write("\tuint vertex_id = (gl_VertexID >> 2) + base_vertex;\n");
     else
-      out.Write("uint vertex_id = uint(gl_VertexID) >> 2u;\n");
-    out.Write("InputData i = input_buffer[vertex_id];\n"
+      out.Write("\tuint vertex_id = uint(gl_VertexID) >> 2u;\n");
+    out.Write("\tInputData i = input_buffer[vertex_id];\n"
               "{}",
               input_extract.GetBuffer());
   }
 
-  out.Write("VS_OUTPUT o;\n");
+  out.Write("\tVS_OUTPUT o;\n");
 
   // xfmem.numColorChans controls the number of color channels available to TEV, but we still need
   // to generate all channels here, as it can be used in texgen. Cel-damage is an example of this.
-  out.Write("float4 vertex_color_0, vertex_color_1;\n");
+  out.Write("\tvec4 vertex_color_0, vertex_color_1;\n");
 
   // To use color 1, the vertex descriptor must have color 0 and 1.
   // If color 1 is present but not color 0, it is used for lighting channel 0.
@@ -292,90 +851,66 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
     if ((color == 0 || use_color_1) && (uid_data->components & (VB_HAS_COL0 << color)) != 0)
     {
       // Use color0 for channel 0, and color1 for channel 1 if both colors 0 and 1 are present.
-      out.Write("vertex_color_{0} = rawcolor{0};\n", color);
+      out.Write("\tvertex_color_{0} = rawcolor{0};\n", color);
     }
     else if (color == 0 && (uid_data->components & VB_HAS_COL1) != 0)
     {
       // Use color1 for channel 0 if color0 is not present.
-      out.Write("vertex_color_{} = rawcolor1;\n", color);
+      out.Write("\tvertex_color_{} = rawcolor1;\n", color);
     }
     else
     {
-      out.Write("vertex_color_{0} = missing_color_value;\n", color);
+      out.Write("\tvertex_color_{0} = missing_color_value;\n", color);
     }
   }
 
-  // transforms
-  if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
+  out.Write("\tDolphinVertexInput vertex_input;\n");
+  out.Write("\tvertex_input.color_0 = vertex_color_0;\n");
+  out.Write("\tvertex_input.color_1 = vertex_color_1;\n");
+  out.Write("\tvertex_input.position = rawpos;\n");
+
+  if ((uid_data->components & VB_HAS_NORMAL) != 0)
   {
-    // Vertex format has a per-vertex matrix
-    out.Write("int posidx = int(posmtx.r);\n"
-              "float4 P0 = " I_TRANSFORMMATRICES "[posidx];\n"
-              "float4 P1 = " I_TRANSFORMMATRICES "[posidx + 1];\n"
-              "float4 P2 = " I_TRANSFORMMATRICES "[posidx + 2];\n"
-              "int normidx = posidx & 31;\n"
-              "float3 N0 = " I_NORMALMATRICES "[normidx].xyz;\n"
-              "float3 N1 = " I_NORMALMATRICES "[normidx + 1].xyz;\n"
-              "float3 N2 = " I_NORMALMATRICES "[normidx + 2].xyz;\n");
+    out.Write("\tvertex_input.normal = rawnormal;\n");
   }
   else
   {
-    // One shared matrix
-    out.Write("float4 P0 = " I_POSNORMALMATRIX "[0];\n"
-              "float4 P1 = " I_POSNORMALMATRIX "[1];\n"
-              "float4 P2 = " I_POSNORMALMATRIX "[2];\n"
-              "float3 N0 = " I_POSNORMALMATRIX "[3].xyz;\n"
-              "float3 N1 = " I_POSNORMALMATRIX "[4].xyz;\n"
-              "float3 N2 = " I_POSNORMALMATRIX "[5].xyz;\n");
+    out.Write("\tvertex_input.normal = vec3(0, 0, 0);\n");
   }
 
-  out.Write("// Multiply the position vector by the position matrix\n"
-            "float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n");
-  if ((uid_data->components & VB_HAS_NORMAL) == 0)
-    out.Write("float3 rawnormal = " I_CACHED_NORMAL ".xyz;\n");
-  if ((uid_data->components & VB_HAS_TANGENT) == 0)
-    out.Write("float3 rawtangent = " I_CACHED_TANGENT ".xyz;\n");
-  if ((uid_data->components & VB_HAS_BINORMAL) == 0)
-    out.Write("float3 rawbinormal = " I_CACHED_BINORMAL ".xyz;\n");
+  if ((uid_data->components & VB_HAS_BINORMAL) != 0)
+  {
+    out.Write("\tvertex_input.binormal = rawbinormal;\n");
+  }
+  else
+  {
+    out.Write("\tvertex_input.binormal = vec3(0, 0, 0);\n");
+  }
 
-  // The scale of the transform matrix is used to control the size of the emboss map effect, by
-  // changing the scale of the transformed binormals (which only get used by emboss map texgens).
-  // By normalising the first transformed normal (which is used by lighting calculations and needs
-  // to be unit length), the same transform matrix can do double duty, scaling for emboss mapping,
-  // and not scaling for lighting.
-  out.Write("float3 _normal = normalize(float3(dot(N0, rawnormal), dot(N1, rawnormal), dot(N2, "
-            "rawnormal)));\n"
-            "float3 _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, "
-            "rawtangent));\n"
-            "float3 _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, "
-            "rawbinormal));\n");
+  if ((uid_data->components & VB_HAS_TANGENT) != 0)
+  {
+    out.Write("\tvertex_input.tangent = rawtangent;\n");
+  }
+  else
+  {
+    out.Write("\tvertex_input.tangent = vec3(0, 0, 0);\n");
+  }
 
-  out.Write("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
-            "[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n");
-
-  out.Write("int4 lacc;\n"
-            "float3 ldir, h, cosAttn, distAttn;\n"
-            "float dist, dist2, attn;\n");
-
-  GenerateLightingShaderCode(out, uid_data->lighting, "vertex_color_", "o.colors_");
-
-  // transform texcoords
-  out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n");
   for (u32 i = 0; i < uid_data->numTexGens; ++i)
   {
     auto& texinfo = uid_data->texMtxInfo[i];
 
-    out.Write("{{\n");
-    out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n");
+    out.Write("\t{{\n");
+    out.Write("\t\tvec4 coord = vec4(0.0, 0.0, 1.0, 1.0);\n");
     switch (texinfo.sourcerow)
     {
     case SourceRow::Geom:
-      out.Write("coord.xyz = rawpos.xyz;\n");
+      out.Write("\t\tcoord.xyz = rawpos.xyz;\n");
       break;
     case SourceRow::Normal:
       if ((uid_data->components & VB_HAS_NORMAL) != 0)
       {
-        out.Write("coord.xyz = rawnormal.xyz;\n");
+        out.Write("\t\tcoord.xyz = rawnormal.xyz;\n");
       }
       break;
     case SourceRow::Colors:
@@ -384,13 +919,13 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
     case SourceRow::BinormalT:
       if ((uid_data->components & VB_HAS_TANGENT) != 0)
       {
-        out.Write("coord.xyz = rawtangent.xyz;\n");
+        out.Write("\t\tcoord.xyz = rawtangent.xyz;\n");
       }
       break;
     case SourceRow::BinormalB:
       if ((uid_data->components & VB_HAS_BINORMAL) != 0)
       {
-        out.Write("coord.xyz = rawbinormal.xyz;\n");
+        out.Write("\t\tcoord.xyz = rawbinormal.xyz;\n");
       }
       break;
     default:
@@ -398,112 +933,49 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
       u32 texnum = static_cast<u32>(texinfo.sourcerow) - static_cast<u32>(SourceRow::Tex0);
       if ((uid_data->components & (VB_HAS_UV0 << (texnum))) != 0)
       {
-        out.Write("coord = float4(rawtex{}.x, rawtex{}.y, 1.0, 1.0);\n", texnum, texnum);
+        out.Write("\t\tcoord = vec4(rawtex{}.x, rawtex{}.y, 1.0, 1.0);\n", texnum, texnum);
       }
       break;
     }
     // Input form of AB11 sets z element to 1.0
 
     if (texinfo.inputform == TexInputForm::AB11)
-      out.Write("coord.z = 1.0;\n");
+      out.Write("\t\tcoord.z = 1.0;\n");
 
     // Convert NaNs to 1 - needed to fix eyelids in Shadow the Hedgehog during cutscenes
     // See https://bugs.dolphin-emu.org/issues/11458
-    out.Write("// Convert NaN to 1\n");
-    out.Write("if (dolphin_isnan(coord.x)) coord.x = 1.0;\n");
-    out.Write("if (dolphin_isnan(coord.y)) coord.y = 1.0;\n");
-    out.Write("if (dolphin_isnan(coord.z)) coord.z = 1.0;\n");
+    out.Write("\t\t// Convert NaN to 1\n");
+    out.Write("\t\tif (dolphin_isnan(coord.x)) coord.x = 1.0;\n");
+    out.Write("\t\tif (dolphin_isnan(coord.y)) coord.y = 1.0;\n");
+    out.Write("\t\tif (dolphin_isnan(coord.z)) coord.z = 1.0;\n");
 
-    // first transformation
-    switch (texinfo.texgentype)
-    {
-    case TexGenType::EmbossMap:  // calculate tex coords into bump map
+    out.Write("\t\tvertex_input.texture_coord_{0} = coord;\n", i);
+    out.Write("\t}}\n");
+  }
 
-      // transform the light dir into tangent space
-      out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n",
-                LIGHT_POS_PARAMS(texinfo.embosslightshift));
-      out.Write(
-          "o.tex{}.xyz = o.tex{}.xyz + float3(dot(ldir, _tangent), dot(ldir, _binormal), 0.0);\n",
-          i, texinfo.embosssourceshift);
+  // Initialize other texture coordinates that are unused
+  for (u32 i = uid_data->numTexGens; i < 8; i++)
+  {
+    out.Write("\tvertex_input.texture_coord_{0} = vec4(0, 0, 0, 0);\n", i);
+  }
 
-      break;
-    case TexGenType::Color0:
-      out.Write("o.tex{}.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i);
-      break;
-    case TexGenType::Color1:
-      out.Write("o.tex{}.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i);
-      break;
-    case TexGenType::Regular:
-    default:
-      if ((uid_data->components & (VB_HAS_TEXMTXIDX0 << i)) != 0)
-      {
-        out.Write("int tmp = int(rawtex{}.z);\n", i);
-        if (static_cast<TexSize>((uid_data->texMtxInfo_n_projection >> i) & 1) == TexSize::STQ)
-        {
-          out.Write("o.tex{}.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
-                    "[tmp]), dot(coord, " I_TRANSFORMMATRICES
-                    "[tmp+1]), dot(coord, " I_TRANSFORMMATRICES "[tmp+2]));\n",
-                    i);
-        }
-        else
-        {
-          out.Write("o.tex{}.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
-                    "[tmp]), dot(coord, " I_TRANSFORMMATRICES "[tmp+1]), 1);\n",
-                    i);
-        }
-      }
-      else
-      {
-        if (static_cast<TexSize>((uid_data->texMtxInfo_n_projection >> i) & 1) == TexSize::STQ)
-        {
-          out.Write("o.tex{}.xyz = float3(dot(coord, " I_TEXMATRICES
-                    "[{}]), dot(coord, " I_TEXMATRICES "[{}]), dot(coord, " I_TEXMATRICES
-                    "[{}]));\n",
-                    i, 3 * i, 3 * i + 1, 3 * i + 2);
-        }
-        else
-        {
-          out.Write("o.tex{}.xyz = float3(dot(coord, " I_TEXMATRICES
-                    "[{}]), dot(coord, " I_TEXMATRICES "[{}]), 1);\n",
-                    i, 3 * i, 3 * i + 1);
-        }
-      }
-      break;
-    }
+  out.Write("\tDolphinVertexOutput vertex_output;\n");
+  out.Write("\tvertex(vertex_input, vertex_output);\n");
 
-    // CHECKME: does this only work for regular tex gen types?
-    if (uid_data->dualTexTrans_enabled && texinfo.texgentype == TexGenType::Regular)
-    {
-      auto& postInfo = uid_data->postMtxInfo[i];
+  out.Write("\to.pos = dolphin_project_position(vertex_output.position);\n");
+  for (u32 i = 0; i < uid_data->numTexGens; ++i)
+  {
+    out.Write("\to.tex{0} = vertex_output.texture_coord_{0};\n", i);
+  }
 
-      out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES "[{}];\n"
-                "float4 P1 = " I_POSTTRANSFORMMATRICES "[{}];\n"
-                "float4 P2 = " I_POSTTRANSFORMMATRICES "[{}];\n",
-                postInfo.index & 0x3f, (postInfo.index + 1) & 0x3f, (postInfo.index + 2) & 0x3f);
+  out.Write("\to.colors_0 = vertex_output.color_0;\n");
+  out.Write("\to.colors_1 = vertex_output.color_1;\n");
+  if (per_pixel_lighting)
+  {
+    out.Write("\to.Normal = vertex_output.normal;\n");
 
-      if (postInfo.normalize)
-        out.Write("o.tex{}.xyz = normalize(o.tex{}.xyz);\n", i, i);
-
-      // multiply by postmatrix
-      out.Write(
-          "o.tex{0}.xyz = float3(dot(P0.xyz, o.tex{0}.xyz) + P0.w, dot(P1.xyz, o.tex{0}.xyz) + "
-          "P1.w, dot(P2.xyz, o.tex{0}.xyz) + P2.w);\n",
-          i);
-    }
-
-    // When q is 0, the GameCube appears to have a special case
-    // This can be seen in devkitPro's neheGX Lesson08 example for Wii
-    // Makes differences in Rogue Squadron 3 (Hoth sky) and The Last Story (shadow culling)
-    // TODO: check if this only affects XF_TEXGEN_REGULAR
-    if (texinfo.texgentype == TexGenType::Regular)
-    {
-      out.Write(
-          "if(o.tex{0}.z == 0.0f)\n"
-          "\to.tex{0}.xy = clamp(o.tex{0}.xy / 2.0f, float2(-1.0f,-1.0f), float2(1.0f,1.0f));\n",
-          i);
-    }
-
-    out.Write("}}\n");
+    // TODO: Rename, this is actually in Viewspace...
+    out.Write("\to.WorldPos = vertex_output.position.xyz;\n");
   }
 
   if (uid_data->vs_expand == VSExpand::Line)
@@ -542,120 +1014,37 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
     GenerateVSPointExpansion(out, "", uid_data->numTexGens);
   }
 
-  if (per_pixel_lighting)
-  {
-    // When per-pixel lighting is enabled, the vertex colors are passed through
-    // unmodified so we can evaluate the lighting in the pixel shader.
-
-    // Lighting is also still computed in the vertex shader since it can be used to
-    // generate texture coordinates. We generated them above, so now the colors can
-    // be reverted to their previous stage.
-    out.Write("o.colors_0 = vertex_color_0;\n");
-    out.Write("o.colors_1 = vertex_color_1;\n");
-    // Note that the numColorChans logic is performed in the pixel shader.
-  }
-  else
-  {
-    // The number of colors available to TEV is determined by numColorChans.
-    // We have to provide the fields to match the interface, so set to zero if it's not enabled.
-    if (uid_data->numColorChans == 0)
-      out.Write("o.colors_0 = float4(0.0, 0.0, 0.0, 0.0);\n");
-    if (uid_data->numColorChans <= 1)
-      out.Write("o.colors_1 = float4(0.0, 0.0, 0.0, 0.0);\n");
-  }
-
   // clipPos/w needs to be done in pixel shader, not here
   if (!host_config.fast_depth_calc)
-    out.Write("o.clipPos = o.pos;\n");
-
-  if (per_pixel_lighting)
-  {
-    out.Write("o.Normal = _normal;\n"
-              "o.WorldPos = pos.xyz;\n");
-  }
+    out.Write("\to.clipPos = o.pos;\n");
 
   // If we can disable the incorrect depth clipping planes using depth clamping, then we can do
   // our own depth clipping and calculate the depth range before the perspective divide if
   // necessary.
   if (host_config.backend_depth_clamp)
   {
-    // Since we're adjusting z for the depth range before the perspective divide, we have to do our
-    // own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range.
-    // We adjust our depth value for clipping purposes to match the perspective projection in the
-    // software backend, which is a hack to fix Sonic Adventure and Unleashed games.
-    out.Write("float clipDepth = o.pos.z * (1.0 - 1e-7);\n"
-              "float clipDist0 = clipDepth + o.pos.w;\n"  // Near: z < -w
-              "float clipDist1 = -clipDepth;\n");         // Far: z > 0
+    // Since we're adjusting z for the depth range before the perspective divide, we have to do
+    // our own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0
+    // range. We adjust our depth value for clipping purposes to match the perspective projection
+    // in the software backend, which is a hack to fix Sonic Adventure and Unleashed games.
+    out.Write("\tfloat clipDepth = o.pos.z * (1.0 - 1e-7);\n"
+              "\tfloat clipDist0 = clipDepth + o.pos.w;\n"  // Near: z < -w
+              "\tfloat clipDist1 = -clipDepth;\n");         // Far: z > 0
 
     if (host_config.backend_geometry_shaders)
     {
-      out.Write("o.clipDist0 = clipDist0;\n"
-                "o.clipDist1 = clipDist1;\n");
+      out.Write("\to.clipDist0 = clipDist0;\n"
+                "\to.clipDist1 = clipDist1;\n");
     }
   }
   else
   {
     // Same depth adjustment for Sonic. Without depth clamping, it unfortunately
     // affects non-clipping uses of depth too.
-    out.Write("o.pos.z = o.pos.z * (1.0 - 1e-7);\n");
+    out.Write("\to.pos.z = o.pos.z * (1.0 - 1e-7);\n");
   }
 
-  // Write the true depth value. If the game uses depth textures, then the pixel shader will
-  // override it with the correct values if not then early z culling will improve speed.
-  // There are two different ways to do this, when the depth range is oversized, we process
-  // the depth range in the vertex shader, if not we let the host driver handle it.
-  //
-  // Adjust z for the depth range. We're using an equation which incorperates a depth inversion,
-  // so we can map the console -1..0 range to the 0..1 range used in the depth buffer.
-  // We have to handle the depth range in the vertex shader instead of after the perspective
-  // divide, because some games will use a depth range larger than what is allowed by the
-  // graphics API. These large depth ranges will still be clipped to the 0..1 range, so these
-  // games effectively add a depth bias to the values written to the depth buffer.
-  out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - "
-            "o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n");
-
-  if (!host_config.backend_clip_control)
-  {
-    // If the graphics API doesn't support a depth range of 0..1, then we need to map z to
-    // the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point
-    // operation that can introduce a round-trip error.
-    out.Write("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n");
-  }
-
-  // Correct for negative viewports by mirroring all vertices. We need to negate the height here,
-  // since the viewport height is already negated by the render backend.
-  out.Write("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n");
-
-  // The console GPU places the pixel center at 7/12 in screen space unless
-  // antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results
-  // in some primitives being placed one pixel too far to the bottom-right,
-  // which in turn can be critical if it happens for clear quads.
-  // Hence, we compensate for this pixel center difference so that primitives
-  // get rasterized correctly.
-  out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n");
-
-  if (vertex_rounding)
-  {
-    // By now our position is in clip space
-    // however, higher resolutions than the Wii outputs
-    // cause an additional pixel offset
-    // due to a higher pixel density
-    // we need to correct this by converting our
-    // clip-space position into the Wii's screen-space
-    // acquire the right pixel and then convert it back
-    out.Write("if (o.pos.w == 1.0f)\n"
-              "{{\n"
-
-              "\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n"
-              "\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n"
-
-              "\tss_pixel_x = round(ss_pixel_x);\n"
-              "\tss_pixel_y = round(ss_pixel_y);\n"
-
-              "\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n"
-              "\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n"
-              "}}\n");
-  }
+  out.Write("\to.pos = dolphin_pixel_correction(o.pos);\n");
 
   if (host_config.backend_geometry_shaders)
   {
@@ -666,30 +1055,31 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
     // TODO: Pass interface blocks between shader stages even if geometry shaders
     // are not supported, however that will require at least OpenGL 3.2 support.
     for (u32 i = 0; i < uid_data->numTexGens; ++i)
-      out.Write("tex{}.xyz = o.tex{};\n", i, i);
+      out.Write("\ttex{}.xyz = o.tex{};\n", i, i);
     if (!host_config.fast_depth_calc)
-      out.Write("clipPos = o.clipPos;\n");
+      out.Write("\tclipPos = o.clipPos;\n");
     if (per_pixel_lighting)
     {
-      out.Write("Normal = o.Normal;\n"
-                "WorldPos = o.WorldPos;\n");
+      out.Write("\tNormal = o.Normal;\n"
+                "\tWorldPos = o.WorldPos;\n");
     }
-    out.Write("colors_0 = o.colors_0;\n"
-              "colors_1 = o.colors_1;\n");
+    out.Write("\tcolors_0 = o.colors_0;\n"
+              "\tcolors_1 = o.colors_1;\n");
   }
 
   if (host_config.backend_depth_clamp)
   {
-    out.Write("gl_ClipDistance[0] = clipDist0;\n"
-              "gl_ClipDistance[1] = clipDist1;\n");
+    out.Write("\tgl_ClipDistance[0] = clipDist0;\n"
+              "\tgl_ClipDistance[1] = clipDist1;\n");
   }
 
   // Vulkan NDC space has Y pointing down (right-handed NDC space).
   if (api_type == APIType::Vulkan)
-    out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
+    out.Write("\tgl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
   else
-    out.Write("gl_Position = o.pos;\n");
+    out.Write("\tgl_Position = o.pos;\n");
   out.Write("}}\n");
 
   return out;
 }
+}  // namespace VertexShader
diff --git a/Source/Core/VideoCommon/VertexShaderGen.h b/Source/Core/VideoCommon/VertexShaderGen.h
index 94f2a170c9..74976950bf 100644
--- a/Source/Core/VideoCommon/VertexShaderGen.h
+++ b/Source/Core/VideoCommon/VertexShaderGen.h
@@ -91,3 +91,19 @@ using VertexShaderUid = ShaderUid<vertex_shader_uid_data>;
 VertexShaderUid GetVertexShaderUid();
 ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& host_config,
                                     const vertex_shader_uid_data* uid_data);
+
+namespace VertexShader
+{
+constexpr std::string_view vertex_definition =
+    "void vertex(in DolphinVertexInput vertex_input, out DolphinVertexOutput vertex_output)";
+
+void WriteVertexStructs(APIType api_type, const ShaderHostConfig& host_config,
+                        const vertex_shader_uid_data* uid_data, ShaderCode& out);
+void WriteVertexDefines(APIType api_type, const ShaderHostConfig& host_config,
+                        const vertex_shader_uid_data* uid_data, ShaderCode& out);
+void WriteVertexBody(APIType api_type, const ShaderHostConfig& host_config,
+                     const vertex_shader_uid_data* uid_data, ShaderCode& out);
+ShaderCode WriteFullShader(APIType api_type, const ShaderHostConfig& host_config,
+                           const vertex_shader_uid_data* uid_data, std::string_view custom_vertex,
+                           std::string_view custom_uniforms);
+}  // namespace VertexShader