diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index d409d1bacc..b7b51b5e4c 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -385,10 +385,6 @@ const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool // bool bRenderZToCol0 = ; // output z and alpha to color0 assert( !bRenderZToCol0 || bRenderZ ); - int ztexcoord = -1; - if (bInputZ) - ztexcoord = numTexgen == 0 ? 0 : numTexgen-1; - int nIndirectStagesUsed = 0; if (bpmem.genMode.numindstages > 0) { for (int i = 0; i < numStages; ++i) { @@ -442,23 +438,16 @@ const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool if (bOutputZ ) WRITE(p, " out float depth : DEPTH,\n"); - // if zcoord might come from vertex shader in texcoord - if (bInputZ) { - if (numTexgen) { - for (int i = 0; i < numTexgen; ++i) - WRITE(p, " in float%d uv%d : TEXCOORD%d, \n", i==ztexcoord?4:3, i,i); - } - else - WRITE(p, " in float4 uv0 : TEXCOORD0,"); //HACK - } - else { - if (numTexgen) { - for (int i = 0; i < numTexgen; ++i) - WRITE(p, " in float3 uv%d : TEXCOORD%d,\n",i,i); - } - else - WRITE(p, " in float3 uv0 : TEXCOORD0,\n"); //HACK - } + // compute window position if needed because binding semantic WPOS is not widely supported + if (numTexgen < 7) { + for (int i = 0; i < numTexgen; ++i) + WRITE(p, " in float3 uv%d : TEXCOORD%d, \n", i, i); + WRITE(p, " in float4 clipPos : TEXCOORD%d, \n", numTexgen); + } else { + // wpos is in w of first 4 texcoords + for (int i = 0; i < numTexgen; ++i) + WRITE(p, " in float%d uv%d : TEXCOORD%d, \n", i<4?4:3, i, i); + } WRITE(p, " in float4 colors[2] : COLOR0){\n"); @@ -499,14 +488,23 @@ const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool for (int i = 0; i < numStages; i++) WriteStage(p, i, texture_mask); //build the equation for this stage + if (numTexgen >= 7) { + WRITE(p, "float4 clipPos = float4(uv0.w, uv1.w, uv2.w, uv3.w);\n"); + } + + if (bInputZ) { + // the screen space depth value = far z + (clip z / clip w) * z range + WRITE(p, "float zCoord = "I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * "I_ZBIAS"[1].y;\n"); + } + if (bOutputZ) { // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format... if (bpmem.ztex2.op == ZTEXTURE_ADD) { - WRITE(p, "depth = frac(dot("I_ZBIAS"[0].xyzw, textemp.xyzw) + "I_ZBIAS"[1].w + uv%d.w);\n", ztexcoord); + WRITE(p, "depth = frac(dot("I_ZBIAS"[0].xyzw, textemp.xyzw) + "I_ZBIAS"[1].w + zCoord);\n"); } else { _assert_(bpmem.ztex2.op == ZTEXTURE_REPLACE); - WRITE(p, "depth = frac(dot("I_ZBIAS"[0].xyz, textemp.xyz) + "I_ZBIAS"[0].w);\n"); + WRITE(p, "depth = frac(dot("I_ZBIAS"[0].xyzw, textemp.xyzw) + "I_ZBIAS"[1].w);\n"); } } @@ -539,14 +537,14 @@ const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool if (bOutputZ ) WRITE(p, "ocol0.xyz = frac(float3(256.0f*256.0f, 256.0f, 1.0f) * depth);\n"); else - WRITE(p, "ocol0.xyz = frac(float3(256.0f*256.0f, 256.0f, 1.0f) * uv%d.w);\n", ztexcoord); + WRITE(p, "ocol0.xyz = frac(float3(256.0f*256.0f, 256.0f, 1.0f) * zCoord);\n"); WRITE(p, "ocol0.w = prev.w;\n"); } else { if (bOutputZ) WRITE(p, "ocol1 = frac(float4(256.0f*256.0f, 256.0f, 1.0f, 0.0f) * depth);\n"); else - WRITE(p, "ocol1 = frac(float4(256.0f*256.0f, 256.0f, 1.0f, 0.0f) * uv%d.w);\n", ztexcoord); + WRITE(p, "ocol1 = frac(float4(256.0f*256.0f, 256.0f, 1.0f, 0.0f) * zCoord);\n"); } } WRITE(p, "}\n"); diff --git a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp index ba2c528e11..18c85a9dcf 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp @@ -30,6 +30,9 @@ static int s_nIndTexMtxChanged = 0; static bool s_bAlphaChanged; static bool s_bZBiasChanged; static bool s_bIndTexScaleChanged; +static bool s_bZTextureTypeChanged; +static bool s_bDepthRangeChanged; +static float lastDepthRange[2] = {0}; // 0 = far z, 1 = far - near static float lastRGBAfull[2][4][4]; static u8 s_nTexDimsChanged; static u32 lastAlpha = 0; @@ -49,7 +52,7 @@ void PixelShaderManager::Init() s_nColorsChanged[0] = s_nColorsChanged[1] = 0; s_nTexDimsChanged = 0; s_nIndTexMtxChanged = 15; - s_bAlphaChanged = s_bZBiasChanged = s_bIndTexScaleChanged = true; + s_bAlphaChanged = s_bZBiasChanged = s_bIndTexScaleChanged = s_bZTextureTypeChanged = s_bDepthRangeChanged = true; for (int i = 0; i < 8; ++i) maptocoord[i] = -1; maptocoord_mask = 0; @@ -109,29 +112,36 @@ void PixelShaderManager::SetConstants() if (s_bAlphaChanged) { SetPSConstant4f(C_ALPHA, (lastAlpha&0xff)/255.0f, ((lastAlpha>>8)&0xff)/255.0f, 0, ((lastAlpha>>16)&0xff)/255.0f); + s_bAlphaChanged = false; } - if (s_bZBiasChanged) { - u32 bits; - float ffrac = 255.0f/256.0f; + if (s_bZTextureTypeChanged) { + static float ffrac = 255.0f/256.0f; float ftemp[4]; switch (bpmem.ztex2.type) { case 0: - bits = 8; - ftemp[0] = ffrac/(256.0f*256.0f); ftemp[1] = ffrac/256.0f; ftemp[2] = ffrac; ftemp[3] = 0; + // 8 bits + // this breaks the menu in SSBM when it is set correctly to + //ftemp[0] = ffrac/(65536.0f); ftemp[1] = 0; ftemp[2] = 0; ftemp[3] = 0; + ftemp[0] = ffrac/65536.0f; ftemp[1] = ffrac/256.0f; ftemp[2] = ffrac; ftemp[3] = 0; break; case 1: - bits = 16; - ftemp[0] = 0; ftemp[1] = ffrac/(256.0f*256.0f); ftemp[2] = ffrac/256.0f; ftemp[3] = ffrac; + // 16 bits + ftemp[0] = ffrac/65536.0f; ftemp[1] = 0; ftemp[2] = 0; ftemp[3] = ffrac/256.0f; break; case 2: - bits = 24; - ftemp[0] = ffrac/(256.0f*256.0f); ftemp[1] = ffrac/256.0f; ftemp[2] = ffrac; ftemp[3] = 0; + // 24 bits + ftemp[0] = ffrac; ftemp[1] = ffrac/256.0f; ftemp[2] = ffrac/65536.0f; ftemp[3] = 0; break; } - //ERROR_LOG("pixel=%x,%x, bias=%x\n", bpmem.zcontrol.pixel_format, bpmem.ztex2.type, lastZBias); - SetPSConstant4fv(C_ZBIAS, ftemp); - SetPSConstant4f(C_ZBIAS+1, 0, 0, 0, (float)( (((int)lastZBias<<8)>>8))/16777216.0f); + SetPSConstant4fv(C_ZBIAS, ftemp); + s_bZTextureTypeChanged = false; + } + + if (s_bZBiasChanged || s_bDepthRangeChanged) { + //ERROR_LOG("pixel=%x,%x, bias=%x\n", bpmem.zcontrol.pixel_format, bpmem.ztex2.type, lastZBias); + SetPSConstant4f(C_ZBIAS+1, lastDepthRange[0] / 16777215.0f, lastDepthRange[1] / 16777215.0f, 0, (float)( (((int)lastZBias<<8)>>8))/16777216.0f); + s_bZBiasChanged = s_bDepthRangeChanged = false; } // indirect incoming texture scales, update all! @@ -276,6 +286,24 @@ void PixelShaderManager::SetZTextureBias(u32 bias) } } +void PixelShaderManager::SetViewport(float* viewport) +{ + // reversed gxsetviewport(xorig, yorig, width, height, nearz, farz) + // [0] = width/2 + // [1] = height/2 + // [2] = 16777215 * (farz - nearz) + // [3] = xorig + width/2 + 342 + // [4] = yorig + height/2 + 342 + // [5] = 16777215 * farz + + if(lastDepthRange[0] != viewport[5] || lastDepthRange[1] != viewport[2]) { + lastDepthRange[0] = viewport[5]; + lastDepthRange[1] = viewport[2]; + + s_bDepthRangeChanged = true; + } +} + void PixelShaderManager::SetIndTexScaleChanged() { s_bIndTexScaleChanged = true; @@ -308,7 +336,11 @@ void PixelShaderManager::SetTevIndirectChanged(int id) void PixelShaderManager::SetZTextureOpChanged() { - s_bZBiasChanged = true; +} + +void PixelShaderManager::SetZTextureTypeChanged() +{ + s_bZTextureTypeChanged = true; } void PixelShaderManager::SetTexturesUsed(u32 nonpow2tex) @@ -318,7 +350,7 @@ void PixelShaderManager::SetTexturesUsed(u32 nonpow2tex) if (nonpow2tex & (0x10101 << i)) { // this check was previously implicit, but should it be here? if (s_nTexDimsChanged ) - s_nTexDimsChanged |= 1 << i; + s_nTexDimsChanged |= 1 << i; } } s_texturemask = nonpow2tex; diff --git a/Source/Core/VideoCommon/Src/PixelShaderManager.h b/Source/Core/VideoCommon/Src/PixelShaderManager.h index 4a8a597e03..afe694cd4e 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderManager.h +++ b/Source/Core/VideoCommon/Src/PixelShaderManager.h @@ -40,6 +40,7 @@ public: static void SetDestAlpha(const ConstantAlpha& alpha); static void SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wrapt); static void SetZTextureBias(u32 bias); + static void SetViewport(float* viewport); static void SetIndTexScaleChanged(); static void SetIndMatrixChanged(int matrixidx); @@ -49,6 +50,7 @@ public: static void SetTevOrderChanged(int id); static void SetTevIndirectChanged(int id); static void SetZTextureOpChanged(); + static void SetZTextureTypeChanged(); static void SetTexturesUsed(u32 nonpow2tex); static void SetTexDimsChanged(int texmapid); diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index a3fdfc13f0..93cc52b3f8 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -91,9 +91,6 @@ const char *GenerateVertexShader(u32 components, bool has_zbuffer_target) if (xfregs.nNumChans > 1) lightMask |= xfregs.colChans[1].color.GetFullLightMask() | xfregs.colChans[1].alpha.GetFullLightMask(); - bool bOutputZ = bpmem.ztex2.op==ZTEXTURE_ADD || has_zbuffer_target; - int ztexcoord = -1; - char *p = text; WRITE(p, "//Vertex Shader: comp:%x, \n", components); WRITE(p, "typedef struct {\n" @@ -139,18 +136,15 @@ const char *GenerateVertexShader(u32 components, bool has_zbuffer_target) WRITE(p, " float4 pos : POSITION;\n"); WRITE(p, " float4 colors[2] : COLOR0;\n"); - // if outputting Z, embed the Z coordinate in the w component of a texture coordinate - // if number of tex gens occupies all the texture coordinates, use the last tex coord - // otherwise use the next available tex coord - for (int i = 0; i < xfregs.numTexGens; ++i) { - WRITE(p, " float%d tex%d : TEXCOORD%d;\n", (i==(xfregs.numTexGens-1)&&bOutputZ)?4:3, i, i); - } - if (bOutputZ && xfregs.numTexGens == 0) { - ztexcoord = 0; - WRITE(p, " float4 tex%d : TEXCOORD%d;\n", ztexcoord, ztexcoord); - } - else if (bOutputZ) - ztexcoord = xfregs.numTexGens - 1; + if (xfregs.numTexGens < 7) { + for (int i = 0; i < xfregs.numTexGens; ++i) + WRITE(p, " float3 tex%d : TEXCOORD%d;\n", i, i); + WRITE(p, " float4 clipPos : TEXCOORD%d;\n", xfregs.numTexGens); + } else { + // clip position is in w of first 4 texcoords + for (int i = 0; i < xfregs.numTexGens; ++i) + WRITE(p, " float%d tex%d : TEXCOORD%d;\n", i<4?4:3, i, i); + } WRITE(p, "};\n"); WRITE(p, "\n"); @@ -429,8 +423,15 @@ const char *GenerateVertexShader(u32 components, bool has_zbuffer_target) WRITE(p, "}\n"); } - if (ztexcoord >= 0 ) - WRITE(p, "o.tex%d.w = o.pos.z/o.pos.w;\n", ztexcoord); + // clipPos/w needs to be done in pixel shader, not here + if (xfregs.numTexGens < 7) { + WRITE(p, "o.clipPos = o.pos;\n"); + } else { + WRITE(p, "o.tex0.w = o.pos.x;\n"); + WRITE(p, "o.tex1.w = o.pos.y;\n"); + WRITE(p, "o.tex2.w = o.pos.z;\n"); + WRITE(p, "o.tex3.w = o.pos.w;\n"); + } // if (bpmem.fog.c_proj_fsel.fsel != 0) { // switch (bpmem.fog.c_proj_fsel.fsel) { @@ -449,6 +450,9 @@ const char *GenerateVertexShader(u32 components, bool has_zbuffer_target) // WRITE(p, "o.fog = o.pos.z/o.pos.w;\n"); // } + // scale to gl clip space + WRITE(p, "o.pos.z = (o.pos.z * 2.0f) + o.pos.w;\n"); + WRITE(p, "return o;\n}\n"); if (text[sizeof(text) - 1] != 0x7C) diff --git a/Source/Core/VideoCommon/Src/VertexShaderManager.cpp b/Source/Core/VideoCommon/Src/VertexShaderManager.cpp index 51c373f93c..9ee50ccb5e 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderManager.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderManager.cpp @@ -252,7 +252,7 @@ void VertexShaderManager::SetConstants(bool proj_hax_1, bool proj_hax_2) //---------Projection[11]--------- // No hacks if ((!proj_hax_1 && !proj_hax_2) || (proj_hax_1 && proj_hax_2)) - g_fProjectionMatrix[11] = -(-1.0f - xfregs.rawProjection[5]); + g_fProjectionMatrix[11] = -(-0.5f - xfregs.rawProjection[5]); // Before R945 Hack if (proj_hax_1 && !proj_hax_2) @@ -260,7 +260,7 @@ void VertexShaderManager::SetConstants(bool proj_hax_1, bool proj_hax_2) // R844 Hack if (!proj_hax_1 && proj_hax_2) - g_fProjectionMatrix[11] = -xfregs.rawProjection[5]; + g_fProjectionMatrix[11] = xfregs.rawProjection[5]; //-------------------------------- @@ -431,4 +431,4 @@ void VertexShaderManager::SetMaterialColor(int index, u32 data) s_fMaterials[15] = ((data)&0xFF)/255.0f; break; } -} \ No newline at end of file +} diff --git a/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp b/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp index c7b29e6f12..d41de5d10e 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp @@ -341,6 +341,9 @@ void BPWritten(int addr, int changes, int newval) if (changes) { VertexManager::Flush(); ((u32*)&bpmem)[addr] = newval; + if (changes & 3) { + PixelShaderManager::SetZTextureTypeChanged(); + } #if defined(_DEBUG) || defined(DEBUGFAST) const char* pzop[] = {"DISABLE", "ADD", "REPLACE", "?"}; const char* pztype[] = {"Z8", "Z16", "Z24", "?"}; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/XFStructs.cpp b/Source/Plugins/Plugin_VideoOGL/Src/XFStructs.cpp index 31d2ee5d7f..63c7c51eca 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/XFStructs.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/XFStructs.cpp @@ -20,6 +20,7 @@ #include "XFMemory.h" #include "VertexManager.h" #include "VertexShaderManager.h" +#include "PixelShaderManager.h" // LoadXFReg 0x10 void LoadXFReg(u32 transferSize, u32 baseAddress, u32 *pData) @@ -153,6 +154,7 @@ void LoadXFReg(u32 transferSize, u32 baseAddress, u32 *pData) case 0x101a: VertexManager::Flush(); VertexShaderManager::SetViewport((float*)&pData[i]); + PixelShaderManager::SetViewport((float*)&pData[i]); i += 6; break;