diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 4077c4041f..cb3cd07e8a 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -26,7 +26,7 @@ // Mash together all the inputs that contribute to the code of a generated pixel shader into // a unique identifier, basically containing all the bits. Yup, it's a lot .... -void GetPixelShaderId(PIXELSHADERUID &uid, u32 s_texturemask, u32 zbufrender, u32 zBufRenderToCol0, u32 dstAlphaEnable) +void GetPixelShaderId(PIXELSHADERUID &uid, u32 s_texturemask, u32 dstAlphaEnable) { u32 projtexcoords = 0; for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; i++) { @@ -42,9 +42,7 @@ void GetPixelShaderId(PIXELSHADERUID &uid, u32 s_texturemask, u32 zbufrender, u3 ((u32)dstAlphaEnable << 11) | ((u32)((bpmem.alphaFunc.hex >> 16) & 0xff) << 12) | (projtexcoords << 20) | - ((u32)bpmem.ztex2.op << 28) | - (zbufrender << 30) | - (zBufRenderToCol0 << 31); + ((u32)bpmem.ztex2.op << 28); uid.values[0] = (uid.values[0] & ~0x0ff00000) | (projtexcoords << 20); // swap table @@ -134,7 +132,7 @@ static void WriteStage(char *&p, int n, u32 texture_mask); static void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, u32 texture_mask); static void WriteAlphaCompare(char *&p, int num, int comp); static bool WriteAlphaTest(char *&p, bool HLSL); -static void WriteFog(char *&p, bool bOutputZ); +static void WriteFog(char *&p); const float epsilon8bit = 1.0f / 255.0f; @@ -369,7 +367,7 @@ static void BuildSwapModeTable() } } -const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool bRenderZToCol0, bool dstAlphaEnable, bool HLSL) +const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, bool HLSL) { text[sizeof(text) - 1] = 0x7C; // canary DVSTARTPROFILE(); @@ -383,13 +381,6 @@ const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool WRITE(p, "//%i TEV stages, %i texgens, %i IND stages\n", numStages, numTexgen, bpmem.genMode.numindstages); - bool bRenderZ = has_zbuffer_target && bpmem.zmode.updateenable; - bool bOutputZ = bpmem.ztex2.op != ZTEXTURE_DISABLE; - bool bInputZ = bpmem.ztex2.op==ZTEXTURE_ADD || bRenderZ || bpmem.fog.c_proj_fsel.fsel != 0; - - // bool bRenderZToCol0 = ; // output z and alpha to color0 - assert( !bRenderZToCol0 || bRenderZ ); - int nIndirectStagesUsed = 0; if (bpmem.genMode.numindstages > 0) { for (int i = 0; i < numStages; ++i) { @@ -437,12 +428,8 @@ const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool WRITE(p, "void main(\n"); - WRITE(p, "out half4 ocol0 : COLOR0,\n"); - if (bRenderZ && !bRenderZToCol0 ) - WRITE(p, "out half4 ocol1 : COLOR1,\n"); - - if (bOutputZ ) - WRITE(p, " out float depth : DEPTH,\n"); + WRITE(p, " out half4 ocol0 : COLOR0,\n"); + WRITE(p, " out float depth : DEPTH,\n"); // compute window position if needed because binding semantic WPOS is not widely supported if (numTexgen < 7) { @@ -504,20 +491,18 @@ const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool WRITE(p, "float4 clipPos = float4(uv0.w, uv1.w, uv2.w, uv3.w);\n"); } - if (bInputZ) { - // the screen space depth value = far z + (clip z / clip w) * z range - WRITE(p, "float zCoord = "I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * "I_ZBIAS"[1].y;\n"); - } + // the screen space depth value = far z + (clip z / clip w) * z range + WRITE(p, "float zCoord = "I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * "I_ZBIAS"[1].y;\n"); - if (bOutputZ) { - // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format... - if (bpmem.ztex2.op == ZTEXTURE_ADD) { - WRITE(p, "depth = frac(dot("I_ZBIAS"[0].xyzw, textemp.xyzw) + "I_ZBIAS"[1].w + zCoord);\n"); - } - else { - _assert_(bpmem.ztex2.op == ZTEXTURE_REPLACE); - WRITE(p, "depth = frac(dot("I_ZBIAS"[0].xyzw, textemp.xyzw) + "I_ZBIAS"[1].w);\n"); - } + // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format... + if (bpmem.ztex2.op == ZTEXTURE_ADD) { + WRITE(p, "depth = frac(dot("I_ZBIAS"[0].xyzw, textemp.xyzw) + "I_ZBIAS"[1].w + zCoord);\n"); + } + else if (bpmem.ztex2.op == ZTEXTURE_REPLACE) { + WRITE(p, "depth = frac(dot("I_ZBIAS"[0].xyzw, textemp.xyzw) + "I_ZBIAS"[1].w);\n"); + } + else { + WRITE(p, "depth = zCoord;\n"); } //if (bpmem.genMode.numindstages ) WRITE(p, "prev.rg = indtex0.xy;\nprev.b = 0;\n"); @@ -529,34 +514,14 @@ const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool WRITE(p, "ocol0 = 0;\n"); } else { - if (!bRenderZToCol0) { - if (dstAlphaEnable) { - WRITE(p, " ocol0 = float4(prev.rgb,"I_ALPHA"[0].w);\n"); - } else { - WriteFog(p, bOutputZ); - WRITE(p, " ocol0 = prev;\n"); - } - } else { - WRITE(p, " ocol0 = prev;\n"); - } - } - - if (bRenderZ) { - // write depth as color - if (bRenderZToCol0) { - if (bOutputZ ) - WRITE(p, "ocol0.xyz = frac(float3(256.0f*256.0f, 256.0f, 1.0f) * depth);\n"); - else - WRITE(p, "ocol0.xyz = frac(float3(256.0f*256.0f, 256.0f, 1.0f) * zCoord);\n"); - WRITE(p, "ocol0.w = prev.w;\n"); - } - else { - if (bOutputZ) - WRITE(p, "ocol1 = frac(float4(256.0f*256.0f, 256.0f, 1.0f, 0.0f) * depth);\n"); - else - WRITE(p, "ocol1 = frac(float4(256.0f*256.0f, 256.0f, 1.0f, 0.0f) * zCoord);\n"); + if (dstAlphaEnable) { + WRITE(p, " ocol0 = float4(prev.rgb,"I_ALPHA"[0].w);\n"); + } else { + WriteFog(p); + WRITE(p, " ocol0 = prev;\n"); } } + WRITE(p, "}\n"); if (text[sizeof(text) - 1] != 0x7C) PanicAlert("PixelShader generator - buffer too small, canary has been eaten!"); @@ -888,7 +853,7 @@ static bool WriteAlphaTest(char *&p, bool HLSL) return true; } -static void WriteFog(char *&p, bool bOutputZ) +static void WriteFog(char *&p) { bool enabled = bpmem.fog.c_proj_fsel.fsel == 0 ? false : true; @@ -896,11 +861,11 @@ static void WriteFog(char *&p, bool bOutputZ) if (bpmem.fog.c_proj_fsel.proj == 0) { // perspective // ze = A/(B - Zs) - WRITE (p, " float ze = "I_FOG"[1].x / ("I_FOG"[1].y - %s);\n", bOutputZ ? "depth" : "zCoord"); + WRITE (p, " float ze = "I_FOG"[1].x / ("I_FOG"[1].y - depth);\n"); } else { // orthographic // ze = a*Zs - WRITE (p, " float ze = "I_FOG"[1].x * %s;\n", bOutputZ ? "depth" : "zCoord"); + WRITE (p, " float ze = "I_FOG"[1].x * depth;\n"); } WRITE (p, " float fog = clamp(ze - "I_FOG"[1].z, 0.0f, 1.0f);\n"); diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 665c1ab27f..ded4887850 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -92,7 +92,7 @@ public: } }; -const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool bRenderZToCol0, bool dstAlphaEnable, bool HLSL = false); -void GetPixelShaderId(PIXELSHADERUID &, u32 s_texturemask, u32 zbufrender, u32 zBufRenderToCol0, u32 dstAlphaEnable); +const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, bool HLSL = false); +void GetPixelShaderId(PIXELSHADERUID &, u32 s_texturemask, u32 dstAlphaEnable); #endif diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index 93cc52b3f8..2ee3857077 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -25,13 +25,12 @@ // Mash together all the inputs that contribute to the code of a generated vertex shader into // a unique identifier, basically containing all the bits. Yup, it's a lot .... -void GetVertexShaderId(VERTEXSHADERUID& vid, u32 components, u32 zbufrender) +void GetVertexShaderId(VERTEXSHADERUID& vid, u32 components) { vid.values[0] = components | (xfregs.numTexGens << 23) | (xfregs.nNumChans << 27) | - ((u32)xfregs.bEnableDualTexTransform << 29) | - (zbufrender << 30); + ((u32)xfregs.bEnableDualTexTransform << 29); for (int i = 0; i < 2; ++i) { vid.values[1+i] = xfregs.colChans[i].color.enablelighting ? @@ -77,7 +76,7 @@ static char text[16384]; char *GenerateLightShader(char* p, int index, const LitChannel& chan, const char* dest, int coloralpha); -const char *GenerateVertexShader(u32 components, bool has_zbuffer_target) +const char *GenerateVertexShader(u32 components) { text[sizeof(text) - 1] = 0x7C; // canary DVSTARTPROFILE(); diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.h b/Source/Core/VideoCommon/Src/VertexShaderGen.h index 8518f299dd..22a65ed0fa 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.h +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.h @@ -94,7 +94,7 @@ public: } }; -const char *GenerateVertexShader(u32 components, bool has_zbuffer_target); -void GetVertexShaderId(VERTEXSHADERUID& vid, u32 components, u32 zbufrender); +const char *GenerateVertexShader(u32 components); +void GetVertexShaderId(VERTEXSHADERUID& vid, u32 components); #endif diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp index de316d2d8f..8093ed401f 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp @@ -67,7 +67,7 @@ void PixelShaderCache::SetShader() DVSTARTPROFILE(); PIXELSHADERUID uid; - GetPixelShaderId(uid, PixelShaderManager::GetTextureMask(), false, false, false); + GetPixelShaderId(uid, PixelShaderManager::GetTextureMask(), false); PSCache::iterator iter; iter = PixelShaders.find(uid); @@ -85,7 +85,7 @@ void PixelShaderCache::SetShader() } bool HLSL = false; - const char *code = GeneratePixelShader(PixelShaderManager::GetTextureMask(), false, false, false, HLSL); + const char *code = GeneratePixelShader(PixelShaderManager::GetTextureMask(), false, HLSL); LPDIRECT3DPIXELSHADER9 shader = HLSL ? D3D::CompilePixelShader(code, (int)strlen(code), false) : CompileCgShader(code); if (shader) { diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp index fc784633b3..fb1c0ee303 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp @@ -69,7 +69,7 @@ void VertexShaderCache::SetShader(u32 components) DVSTARTPROFILE(); VERTEXSHADERUID uid; - GetVertexShaderId(uid, components, false); + GetVertexShaderId(uid, components); VSCache::iterator iter; iter = vshaders.find(uid); @@ -86,7 +86,7 @@ void VertexShaderCache::SetShader(u32 components) } bool HLSL = false; - const char *code = GenerateVertexShader(components, false); + const char *code = GenerateVertexShader(components); LPDIRECT3DVERTEXSHADER9 shader = HLSL ? D3D::CompileVertexShader(code, (int)strlen(code), false) : CompileCgShader(code); if (shader) { diff --git a/Source/Plugins/Plugin_VideoOGL/Src/BPFunctions.cpp b/Source/Plugins/Plugin_VideoOGL/Src/BPFunctions.cpp index b3b05635fd..92ac70602e 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/BPFunctions.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/BPFunctions.cpp @@ -88,9 +88,6 @@ void SetDepthMode(const Bypass &bp) glDisable(GL_DEPTH_TEST); glDepthMask(GL_FALSE); } - - if (!bpmem.zmode.updateenable) - Renderer::SetRenderMode(Renderer::RM_Normal); } void SetBlendMode(const Bypass &bp) { @@ -160,68 +157,38 @@ void RenderToXFB(const Bypass &bp, const TRectangle &multirc, const float &yScal } void ClearScreen(const Bypass &bp, const TRectangle &multirc) { - - // Clear color - Renderer::SetRenderMode(Renderer::RM_Normal); - // Clear Z-Buffer target - bool bRestoreZBufferTarget = Renderer::UseFakeZTarget(); - - // Update the view port for clearing the picture - glViewport(0, 0, Renderer::GetTargetWidth(), Renderer::GetTargetHeight()); + // Update the view port for clearing the picture + glViewport(0, 0, Renderer::GetTargetWidth(), Renderer::GetTargetHeight()); - // Always set the scissor in case it was set by the game and has not been reset - glScissor(multirc.left, (Renderer::GetTargetHeight() - multirc.bottom), - (multirc.right - multirc.left), (multirc.bottom - multirc.top)); - // --------------------------- + // Always set the scissor in case it was set by the game and has not been reset + glScissor(multirc.left, (Renderer::GetTargetHeight() - multirc.bottom), + (multirc.right - multirc.left), (multirc.bottom - multirc.top)); + // --------------------------- - VertexShaderManager::SetViewportChanged(); + VertexShaderManager::SetViewportChanged(); - // Since clear operations use the source rectangle, we have to do - // regular renders (glClear clears the entire buffer) - if (bpmem.blendmode.colorupdate || bpmem.blendmode.alphaupdate || bpmem.zmode.updateenable) - { - GLbitfield bits = 0; - if (bpmem.blendmode.colorupdate || bpmem.blendmode.alphaupdate) - { - u32 clearColor = (bpmem.clearcolorAR << 16) | bpmem.clearcolorGB; - glClearColor(((clearColor>>16) & 0xff)*(1/255.0f), - ((clearColor>>8 ) & 0xff)*(1/255.0f), - ((clearColor>>0 ) & 0xff)*(1/255.0f), - ((clearColor>>24) & 0xff)*(1/255.0f)); - bits |= GL_COLOR_BUFFER_BIT; - } - if (bpmem.zmode.updateenable) - { - glClearDepth((float)(bpmem.clearZValue & 0xFFFFFF) / float(0xFFFFFF)); - bits |= GL_DEPTH_BUFFER_BIT; - } - if (bRestoreZBufferTarget) - glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); // don't clear ztarget here - glClear(bits); - } - - // Have to clear the target zbuffer - if (bpmem.zmode.updateenable && bRestoreZBufferTarget) + // Since clear operations use the source rectangle, we have to do + // regular renders (glClear clears the entire buffer) + if (bpmem.blendmode.colorupdate || bpmem.blendmode.alphaupdate || bpmem.zmode.updateenable) + { + GLbitfield bits = 0; + if (bpmem.blendmode.colorupdate || bpmem.blendmode.alphaupdate) { - glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT); - GL_REPORT_ERRORD(); - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - - // red should probably be the LSB - glClearColor(((bpmem.clearZValue>>0)&0xff)*(1/255.0f), - ((bpmem.clearZValue>>8)&0xff)*(1/255.0f), - ((bpmem.clearZValue>>16)&0xff)*(1/255.0f), 0); - glClear(GL_COLOR_BUFFER_BIT); - Renderer::SetColorMask(); - GL_REPORT_ERRORD(); + u32 clearColor = (bpmem.clearcolorAR << 16) | bpmem.clearcolorGB; + glClearColor(((clearColor>>16) & 0xff)*(1/255.0f), + ((clearColor>>8 ) & 0xff)*(1/255.0f), + ((clearColor>>0 ) & 0xff)*(1/255.0f), + ((clearColor>>24) & 0xff)*(1/255.0f)); + bits |= GL_COLOR_BUFFER_BIT; } - - if (bRestoreZBufferTarget) + if (bpmem.zmode.updateenable) { - // restore target - GLenum s_drawbuffers[2] = {GL_COLOR_ATTACHMENT0_EXT, GL_COLOR_ATTACHMENT1_EXT}; - glDrawBuffers(2, s_drawbuffers); + glClearDepth((float)(bpmem.clearZValue & 0xFFFFFF) / float(0xFFFFFF)); + bits |= GL_DEPTH_BUFFER_BIT; } + glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); + glClear(bits); + } } void RestoreRenderState(const Bypass &bp) @@ -256,4 +223,4 @@ void SetInterlacingMode(const Bypass &bp) { // TODO } -}; \ No newline at end of file +}; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp index a6927acb86..7da020485c 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp @@ -36,6 +36,7 @@ static int s_nMaxPixelInstructions; static GLuint s_ColorMatrixProgram = 0; +static GLuint s_DepthMatrixProgram = 0; PixelShaderCache::PSCache PixelShaderCache::pshaders; PIXELSHADERUID PixelShaderCache::s_curuid; bool PixelShaderCache::s_displayCompileAlert; @@ -86,12 +87,42 @@ void PixelShaderCache::Init() glDeleteProgramsARB(1, &s_ColorMatrixProgram); s_ColorMatrixProgram = 0; } + + sprintf(pmatrixprog, "!!ARBfp1.0" + "TEMP R0;\n" + "TEMP R1;\n" + "TEMP R2;\n" + "PARAM K0 = { 65536.0, 256.0 };\n" + "TEX R2, fragment.texcoord[0], texture[0], RECT;\n" + "MUL R0.x, R2.x, K0.x;\n" + "FRC R0.x, R0.x;\n" + "MUL R0.y, R2.x, K0.y;\n" + "FRC R0.y, R0.y;\n" + "MOV R0.z, R2.x;\n" + "DP4 R1.x, R0, program.env[%d];\n" + "DP4 R1.y, R0, program.env[%d];\n" + "DP4 R1.z, R0, program.env[%d];\n" + "DP4 R1.w, R0, program.env[%d];\n" + "ADD result.color, R1, program.env[%d];\n" + "END\n", C_COLORMATRIX, C_COLORMATRIX+1, C_COLORMATRIX+2, C_COLORMATRIX+3, C_COLORMATRIX+4); + glGenProgramsARB(1, &s_DepthMatrixProgram); + glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, s_DepthMatrixProgram); + glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pmatrixprog), pmatrixprog); + + err = GL_REPORT_ERROR(); + if (err != GL_NO_ERROR) { + ERROR_LOG(VIDEO, "Failed to create depth matrix fragment program"); + glDeleteProgramsARB(1, &s_DepthMatrixProgram); + s_DepthMatrixProgram = 0; + } } void PixelShaderCache::Shutdown() { glDeleteProgramsARB(1, &s_ColorMatrixProgram); s_ColorMatrixProgram = 0; + glDeleteProgramsARB(1, &s_DepthMatrixProgram); + s_DepthMatrixProgram = 0; PSCache::iterator iter = pshaders.begin(); for (; iter != pshaders.end(); iter++) iter->second.Destroy(); @@ -103,15 +134,18 @@ GLuint PixelShaderCache::GetColorMatrixProgram() return s_ColorMatrixProgram; } +GLuint PixelShaderCache::GetDepthMatrixProgram() +{ + return s_DepthMatrixProgram; +} + FRAGMENTSHADER* PixelShaderCache::GetShader(bool dstAlphaEnable) { DVSTARTPROFILE(); PIXELSHADERUID uid; - u32 zbufrender = (Renderer::UseFakeZTarget() && bpmem.zmode.updateenable) ? 1 : 0; - u32 zBufRenderToCol0 = Renderer::GetRenderMode() != Renderer::RM_Normal; u32 dstAlpha = dstAlphaEnable ? 1 : 0; - GetPixelShaderId(uid, PixelShaderManager::GetTextureMask(), zbufrender, zBufRenderToCol0, dstAlpha); + GetPixelShaderId(uid, PixelShaderManager::GetTextureMask(), dstAlpha); PSCache::iterator iter = pshaders.find(uid); @@ -127,8 +161,6 @@ FRAGMENTSHADER* PixelShaderCache::GetShader(bool dstAlphaEnable) PSCacheEntry& newentry = pshaders[uid]; const char *code = GeneratePixelShader(PixelShaderManager::GetTextureMask(), - Renderer::UseFakeZTarget(), - Renderer::GetRenderMode() != Renderer::RM_Normal, dstAlphaEnable); #if defined(_DEBUG) || defined(DEBUGFAST) diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h index 9b19f5f454..33b92db313 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h @@ -65,6 +65,8 @@ public: static bool CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrprogram); static GLuint GetColorMatrixProgram(); + + static GLuint GetDepthMatrixProgram(); }; #endif // _PIXELSHADERCACHE_H_ diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp index 482e275e5c..82327c6086 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp @@ -81,27 +81,27 @@ static bool s_bAVIDumping = false; static FILE* f_pFrameDump; #endif -static int nZBufferRender = 0; // if > 0, then use zbuffer render, and count down. - // 1 for no MSAA. Use s_MSAASamples > 1 to check for MSAA. static int s_MSAASamples = 1; static int s_MSAACoverageSamples = 0; // Normal Mode +// +// By default the depth target is used +// if there is an error creating and attaching it a depth buffer will be used instead +// // s_RenderTarget is a texture_rect -// s_DepthTarget is a Z renderbuffer -// s_FakeZTarget is a texture_rect +// s_DepthTarget is a texture_rect +// s_DepthBuffer is a Z renderbuffer // MSAA mode // s_uFramebuffer is a FBO // s_RenderTarget is a MSAA renderbuffer -// s_FakeZBufferTarget is a MSAA renderbuffer -// s_DepthTarget is a real MSAA z/stencilbuffer +// s_DepthTarget is a MSAA renderbuffer // // s_ResolvedFramebuffer is a FBO -// s_ResolvedColorTarget is a texture -// s_ResolvedFakeZTarget is a texture -// s_ResolvedDepthTarget is a Z renderbuffer +// s_ResolvedRenderTarget is a texture +// s_ResolvedDepthTarget is a texture // A framebuffer is a set of render targets: a color and a z buffer. They can be either RenderBuffers or Textures. static GLuint s_uFramebuffer = 0; @@ -110,11 +110,10 @@ static GLuint s_uResolvedFramebuffer = 0; // The size of these should be a (not necessarily even) multiple of the EFB size, 640x528, but isn't. // These are all texture IDs. Bind them as rect arb textures. static GLuint s_RenderTarget = 0; -static GLuint s_FakeZTarget = 0; static GLuint s_DepthTarget = 0; +static GLuint s_DepthBuffer = 0; static GLuint s_ResolvedRenderTarget = 0; -static GLuint s_ResolvedFakeZTarget = 0; static GLuint s_ResolvedDepthTarget = 0; static bool s_bATIDrawBuffers = false; @@ -128,8 +127,6 @@ static volatile bool s_bScreenshot = false; static Common::CriticalSection s_criticalScreenshot; static std::string s_sScreenshotName; -static Renderer::RenderMode s_RenderMode = Renderer::RM_Normal; - int frameCount; static int s_fps = 0; @@ -328,42 +325,35 @@ bool Renderer::Init() glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, s_targetwidth, s_targetheight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); SetDefaultRectTexParams(); - GLint nMaxMRT = 0; - glGetIntegerv(GL_MAX_COLOR_ATTACHMENTS_EXT, &nMaxMRT); - if (nMaxMRT > 1) - { - // There's MRT support. Create a color texture image to use as secondary render target. - // We use MRT to render Z into this one, for various purposes (mostly copy Z to texture). - glGenTextures(1, (GLuint *)&s_FakeZTarget); - glBindTexture(GL_TEXTURE_RECTANGLE_ARB, s_FakeZTarget); - glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, s_targetwidth, s_targetheight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - SetDefaultRectTexParams(); - } - - // Create the real depth/stencil buffer. It's a renderbuffer, not a texture. - glGenRenderbuffersEXT(1, &s_DepthTarget); - glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, s_DepthTarget); - glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_DEPTH24_STENCIL8_EXT, s_targetwidth, s_targetheight); + // Create the depth target texture + glGenTextures(1, &s_DepthTarget); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, s_DepthTarget); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_DEPTH_COMPONENT24, s_targetwidth, s_targetheight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, NULL); + SetDefaultRectTexParams(); - // Our framebuffer object is still bound here. Attach the two render targets, color and Z/stencil, to the framebuffer object. + // Our framebuffer object is still bound here. Attach the two render targets, color and depth, to the framebuffer object. glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, s_RenderTarget, 0); - glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, s_DepthTarget); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_TEXTURE_RECTANGLE_ARB, s_DepthTarget, 0); GL_REPORT_FBO_ERROR(); - if (s_FakeZTarget != 0) { - // We do a simple test to make sure that MRT works. I don't really know why - this is probably a workaround for - // some terribly buggy ancient driver. - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_TEXTURE_RECTANGLE_ARB, s_FakeZTarget, 0); - bool bFailed = glGetError() != GL_NO_ERROR || glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) != GL_FRAMEBUFFER_COMPLETE_EXT; - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_TEXTURE_RECTANGLE_ARB, 0, 0); - if (bFailed) { - glDeleteTextures(1, (GLuint *)&s_FakeZTarget); - s_FakeZTarget = 0; - } - } + bool bFailed = glGetError() != GL_NO_ERROR || glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) != GL_FRAMEBUFFER_COMPLETE_EXT; - if (s_FakeZTarget == 0) - ERROR_LOG(VIDEO, "Disabling ztarget MRT feature (max MRT = %d)", nMaxMRT); + // Check that the FBO is attached. If there is an error revert to a depth buffer. + if (bFailed) { + ERROR_LOG(VIDEO, "Disabling ztarget feature"); + + // detach and delete depth texture + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_TEXTURE_RECTANGLE_ARB, 0, 0); + glDeleteTextures(1, (GLuint *)&s_DepthTarget); + s_DepthTarget = 0; + + // create and attach depth buffer + glGenRenderbuffersEXT(1, (GLuint *)&s_DepthBuffer); + glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, s_DepthBuffer); + glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_DEPTH24_STENCIL8_EXT, s_targetwidth, s_targetheight); + glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, s_DepthBuffer); + GL_REPORT_FBO_ERROR(); + } } else { @@ -376,25 +366,17 @@ bool Renderer::Init() } else { glRenderbufferStorageMultisampleEXT(GL_RENDERBUFFER_EXT, s_MSAASamples, GL_RGBA, s_targetwidth, s_targetheight); } - glGenRenderbuffersEXT(1, &s_FakeZTarget); - glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, s_FakeZTarget); - if (s_MSAACoverageSamples) { - glRenderbufferStorageMultisampleCoverageNV(GL_RENDERBUFFER_EXT, s_MSAACoverageSamples, s_MSAASamples, GL_RGBA, s_targetwidth, s_targetheight); - } else { - glRenderbufferStorageMultisampleEXT(GL_RENDERBUFFER_EXT, s_MSAASamples, GL_RGBA, s_targetwidth, s_targetheight); - } glGenRenderbuffersEXT(1, &s_DepthTarget); glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, s_DepthTarget); if (s_MSAACoverageSamples) { - glRenderbufferStorageMultisampleCoverageNV(GL_RENDERBUFFER_EXT, s_MSAACoverageSamples, s_MSAASamples, GL_DEPTH24_STENCIL8_EXT, s_targetwidth, s_targetheight); + glRenderbufferStorageMultisampleCoverageNV(GL_RENDERBUFFER_EXT, s_MSAACoverageSamples, s_MSAASamples, GL_DEPTH_COMPONENT24, s_targetwidth, s_targetheight); } else { - glRenderbufferStorageMultisampleEXT(GL_RENDERBUFFER_EXT, s_MSAASamples, GL_DEPTH24_STENCIL8_EXT, s_targetwidth, s_targetheight); + glRenderbufferStorageMultisampleEXT(GL_RENDERBUFFER_EXT, s_MSAASamples, GL_DEPTH_COMPONENT24, s_targetwidth, s_targetheight); } glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, 0); // Attach them to our multisampled FBO. The multisampled FBO is still bound here. glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_EXT, s_RenderTarget); - glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_RENDERBUFFER_EXT, s_FakeZTarget); glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, s_DepthTarget); GL_REPORT_FBO_ERROR(); @@ -411,25 +393,23 @@ bool Renderer::Init() glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, s_targetwidth, s_targetheight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); SetDefaultRectTexParams(); // Generate the resolve targets. - glGenTextures(1, (GLuint *)&s_ResolvedFakeZTarget); - glBindTexture(GL_TEXTURE_RECTANGLE_ARB, s_ResolvedFakeZTarget); - glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, s_targetwidth, s_targetheight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glGenTextures(1, (GLuint *)&s_ResolvedDepthTarget); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, s_ResolvedDepthTarget); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_DEPTH_COMPONENT, s_targetwidth, s_targetheight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, NULL); SetDefaultRectTexParams(); - // Create the real depth/stencil buffer. It's a renderbuffer, not a texture. - glGenRenderbuffersEXT(1, &s_ResolvedDepthTarget); - glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, s_ResolvedDepthTarget); - glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_DEPTH24_STENCIL8_EXT, s_targetwidth, s_targetheight); - glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, 0); // Attach our resolve targets to our resolved FBO. glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, s_ResolvedRenderTarget, 0); - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_TEXTURE_RECTANGLE_ARB, s_ResolvedFakeZTarget, 0); - glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, s_ResolvedDepthTarget); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_TEXTURE_RECTANGLE_ARB, s_ResolvedDepthTarget, 0); GL_REPORT_FBO_ERROR(); bFailed = glGetError() != GL_NO_ERROR || glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) != GL_FRAMEBUFFER_COMPLETE_EXT; if (bFailed) PanicAlert("Incomplete rt2"); + + if (bFailed) { + ERROR_LOG(VIDEO, "AA rendering init failed."); + } } if (GL_REPORT_ERROR() != GL_NO_ERROR) @@ -438,8 +418,6 @@ bool Renderer::Init() // glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, s_uFramebuffer); glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); - nZBufferRender = 0; // Initialize the Z render shutoff countdown. We only render Z if it's desired, to save GPU power. - if (GL_REPORT_ERROR() != GL_NO_ERROR) bSuccess = false; @@ -477,7 +455,6 @@ bool Renderer::Init() cgGLSetDebugMode(GL_FALSE); #endif - s_RenderMode = Renderer::RM_Normal; if (!InitializeGL()) return false; @@ -592,12 +569,6 @@ void Renderer::SetRenderTarget(GLuint targ) targ != 0 ? targ : s_RenderTarget, 0); } -void Renderer::SetDepthTarget(GLuint targ) -{ - glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, - targ != 0 ? targ : s_DepthTarget); -} - void Renderer::SetFramebuffer(GLuint fb) { glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb != 0 ? fb : s_uFramebuffer); @@ -628,38 +599,39 @@ GLuint Renderer::ResolveAndGetRenderTarget(const TRectangle &source_rect) } } -GLuint Renderer::ResolveAndGetFakeZTarget(const TRectangle &source_rect) +GLuint Renderer::ResolveAndGetDepthTarget(const TRectangle &source_rect) { // This logic should be moved elsewhere. if (s_MSAASamples > 1) { // Flip the rectangle TRectangle flipped_rect; - source_rect.FlipYPosition(GetTargetHeight(), &flipped_rect); + //source_rect.FlipYPosition(GetTargetHeight(), &flipped_rect); + + // donkopunchstania - some bug causes the offsets to be ignored. driver bug? + flipped_rect.top = 0; + flipped_rect.bottom = GetTargetHeight(); + + flipped_rect.left = 0; + flipped_rect.right = GetTargetWidth(); flipped_rect.Clamp(0, 0, GetTargetWidth(), GetTargetHeight()); - // Do the resolve. We resolve both color channels, not very necessary. + // Do the resolve. glBindFramebufferEXT(GL_READ_FRAMEBUFFER_EXT, s_uFramebuffer); glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, s_uResolvedFramebuffer); glBlitFramebufferEXT(flipped_rect.left, flipped_rect.top, flipped_rect.right, flipped_rect.bottom, flipped_rect.left, flipped_rect.top, flipped_rect.right, flipped_rect.bottom, - GL_COLOR_BUFFER_BIT, GL_NEAREST); + GL_DEPTH_BUFFER_BIT, GL_NEAREST); // Return the resolved target. - return s_ResolvedFakeZTarget; + return s_ResolvedDepthTarget; } else { - return s_FakeZTarget; + return s_DepthTarget; } } -bool Renderer::UseFakeZTarget() -{ - // This logic should be moved elsewhere. - return nZBufferRender > 0; -} - void Renderer::ResetGLState() { // Gets us to a reasonably sane state where it's possible to do things like @@ -801,151 +773,6 @@ bool Renderer::IsUsingATIDrawBuffers() return s_bATIDrawBuffers; } -bool Renderer::HaveStencilBuffer() -{ - return s_bHaveStencilBuffer; -} - -void Renderer::SetZBufferRender() -{ - nZBufferRender = 10; // The game asked for Z. Give it 10 frames, then turn it off for speed. - GLenum s_drawbuffers[2] = { - GL_COLOR_ATTACHMENT0_EXT, - GL_COLOR_ATTACHMENT1_EXT - }; - glDrawBuffers(2, s_drawbuffers); - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_TEXTURE_RECTANGLE_ARB, s_FakeZTarget, 0); - _assert_(glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) == GL_FRAMEBUFFER_COMPLETE_EXT); -} - - -// Does this function even work correctly??? -void Renderer::FlushZBufferAlphaToTarget() -{ - ResetGLState(); - - SetRenderTarget(0); - glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); - glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_TRUE); - glViewport(0, 0, GetTargetWidth(), GetTargetHeight()); - - // disable all other stages - for (int i = 1; i < 8; ++i) - TextureMngr::DisableStage(i); - // texture map s_RenderTargets[s_curtarget] onto the main buffer - glActiveTexture(GL_TEXTURE0); - glEnable(GL_TEXTURE_RECTANGLE_ARB); - glBindTexture(GL_TEXTURE_RECTANGLE_ARB, s_FakeZTarget); - GL_REPORT_ERRORD(); - - // setup the stencil to only accept pixels that have been written - glStencilFunc(GL_EQUAL, 1, 0xff); - glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP); - - // TODO: This code should not have to bother with stretchtofit checking - - // all necessary scale initialization should be done elsewhere. - if (s_bNativeResolution) - { - //TODO: Do Correctly in a bit - float FactorW = 640.f / (float)OpenGL_GetBackbufferWidth(); - float FactorH = 480.f / (float)OpenGL_GetBackbufferHeight(); - - float Max = (FactorW < FactorH) ? FactorH : FactorW; - float Temp = 1.0f / Max; - FactorW *= Temp; - FactorH *= Temp; - - glBegin(GL_QUADS); - glTexCoord2f(0, 0); glVertex2f(-FactorW,-FactorH); - glTexCoord2f(0, (float)GetTargetHeight()); glVertex2f(-FactorW,FactorH); - glTexCoord2f((float)GetTargetWidth(), (float)GetTargetHeight()); glVertex2f(FactorW,FactorH); - glTexCoord2f((float)GetTargetWidth(), 0); glVertex2f(FactorW,-FactorH); - glEnd(); - } - else - { - glBegin(GL_QUADS); - glTexCoord2f(0, 0); glVertex2f(-1,-1); - glTexCoord2f(0, (float)(GetTargetHeight())); glVertex2f(-1,1); - glTexCoord2f((float)(GetTargetWidth()), (float)(GetTargetHeight())); glVertex2f(1,1); - glTexCoord2f((float)(GetTargetWidth()), 0); glVertex2f(1,-1); - glEnd(); - } - - GL_REPORT_ERRORD(); - - glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); - RestoreGLState(); -} - -void Renderer::SetRenderMode(RenderMode mode) -{ - if (!s_bHaveStencilBuffer && mode == RM_ZBufferAlpha) - mode = RM_ZBufferOnly; - - if (s_RenderMode == mode) - return; - - if (mode == RM_Normal) { - // flush buffers - if (s_RenderMode == RM_ZBufferAlpha) { - FlushZBufferAlphaToTarget(); - glDisable(GL_STENCIL_TEST); - } - SetColorMask(); - SetRenderTarget(0); - SetZBufferRender(); - GL_REPORT_ERRORD(); - } - else if (s_RenderMode == RM_Normal) { - // setup buffers - _assert_(UseFakeZTarget() && bpmem.zmode.updateenable); - if (mode == RM_ZBufferAlpha) { - glEnable(GL_STENCIL_TEST); - glClearStencil(0); - glClear(GL_STENCIL_BUFFER_BIT); - glStencilFunc(GL_ALWAYS, 1, 0xff); - glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); - } - - glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT); - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - GL_REPORT_ERRORD(); - } - else { - _assert_(UseFakeZTarget()); - _assert_(s_bHaveStencilBuffer); - - if (mode == RM_ZBufferOnly) { - // flush and remove stencil - _assert_(s_RenderMode == RM_ZBufferAlpha); - FlushZBufferAlphaToTarget(); - glDisable(GL_STENCIL_TEST); - - SetRenderTarget(s_FakeZTarget); - glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); - GL_REPORT_ERRORD(); - } - else { - _assert_(mode == RM_ZBufferAlpha && s_RenderMode == RM_ZBufferOnly); - - // setup stencil - glEnable(GL_STENCIL_TEST); - glClearStencil(0); - glClear(GL_STENCIL_BUFFER_BIT); - glStencilFunc(GL_ALWAYS, 1, 0xff); - glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); - } - } - - s_RenderMode = mode; -} - -Renderer::RenderMode Renderer::GetRenderMode() -{ - return s_RenderMode; -} - void ComputeBackbufferRectangle(TRectangle *rc) { float FloatGLWidth = (float)OpenGL_GetBackbufferWidth(); @@ -1021,8 +848,6 @@ void Renderer::Swap(const TRectangle& rc) OpenGL_Update(); // just updates the render window position and the backbuffer size DVSTARTPROFILE(); - Renderer::SetRenderMode(Renderer::RM_Normal); - ResetGLState(); TRectangle back_rc; @@ -1412,17 +1237,6 @@ void Renderer::SwapBuffers() // Render to the framebuffer. glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, s_uFramebuffer); - if (nZBufferRender > 0) - { - if (--nZBufferRender == 0) - { - // turn off - nZBufferRender = 0; - glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_TEXTURE_RECTANGLE_ARB, 0, 0); - Renderer::SetRenderMode(RM_Normal); // turn off any zwrites - } - } GL_REPORT_ERRORD(); } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Render.h b/Source/Plugins/Plugin_VideoOGL/Src/Render.h index 298a08f872..60a6b6de83 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Render.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/Render.h @@ -41,19 +41,9 @@ extern int frameCount; class Renderer { -private: - static void FlushZBufferAlphaToTarget(); public: - enum RenderMode - { - RM_Normal=0, // normal target as color0, ztarget as color1 - RM_ZBufferOnly, // zbuffer as color0 - RM_ZBufferAlpha, // zbuffer as color0, also will dump alpha info to regular target once mode is switched - // use stencil buffer to indicate what pixels were written - }; - - static bool Init(); + static bool Init(); static void Shutdown(); // initialize opengl standard values (like viewport) @@ -65,15 +55,11 @@ public: static void SwapBuffers(); static bool IsUsingATIDrawBuffers(); - static bool HaveStencilBuffer(); static void SetColorMask(); static void SetBlendMode(bool forceUpdate); static bool SetScissorRect(); - static void SetRenderMode(RenderMode mode); - static RenderMode GetRenderMode(); - // Render target management static int GetTargetWidth(); static int GetTargetHeight(); @@ -83,9 +69,7 @@ public: static float GetTargetScaleY(); static void SetFramebuffer(GLuint fb); - static void SetZBufferRender(); // sets rendering of the zbuffer using MRTs static void SetRenderTarget(GLuint targ); // if targ is 0, sets to original render target - static void SetDepthTarget(GLuint targ); // If in MSAA mode, this will perform a resolve of the specified rectangle, and return the resolve target as a texture ID. // Thus, this call may be expensive. Don't repeat it unnecessarily. @@ -93,10 +77,9 @@ public: // After calling this, before you render anything else, you MUST bind the framebuffer you want to draw to. static GLuint ResolveAndGetRenderTarget(const TRectangle &rect); - // Same as above but for the FakeZ Target. + // Same as above but for the depth Target. // After calling this, before you render anything else, you MUST bind the framebuffer you want to draw to. - static GLuint ResolveAndGetFakeZTarget(const TRectangle &rect); - static bool UseFakeZTarget(); // This is used by some functions to check for Z target existence. + static GLuint ResolveAndGetDepthTarget(const TRectangle &rect); // Random utilities static void RenderText(const char* pstr, int left, int top, u32 color); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureConversionShader.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureConversionShader.cpp index be690ec116..12e6e7e913 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureConversionShader.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureConversionShader.cpp @@ -165,6 +165,51 @@ void WriteSwizzler(char*& p, u32 format) " sampleUv.y = sampleUv.y + textureDims.w;\n"); } +// block dimensions : widthStride, heightStride +// texture dims : width, height, x offset, y offset +void Write32BitSwizzler(char*& p, u32 format) +{ + WRITE(p, "uniform float4 blkDims : register(c%d);\n", C_COLORMATRIX); + WRITE(p, "uniform float4 textureDims : register(c%d);\n", C_COLORMATRIX + 1); + + float blkW = (float)GetBlockWidthInTexels(format); + float blkH = (float)GetBlockHeightInTexels(format); + float samples = (float)GetEncodedSampleCount(format); + + // 32 bit textures (RGBA8 and Z24) are store in 2 cache line increments + + WRITE(p, + "uniform samplerRECT samp0 : register(s0);\n" + "void main(\n" + " out float4 ocol0 : COLOR0,\n" + " in float2 uv0 : TEXCOORD0)\n" + "{\n" + " float2 sampleUv;\n" + " float2 uv1 = floor(uv0);\n"); + + WRITE(p, " float yl = floor(uv1.y / %f);\n", blkH); + WRITE(p, " float yb = yl * %f;\n", blkH); + WRITE(p, " float yoff = uv1.y - yb;\n"); + WRITE(p, " float xp = uv1.x + (yoff * textureDims.x);\n"); + WRITE(p, " float xel = floor(xp / 2);\n"); + WRITE(p, " float xb = floor(xel / %f);\n", blkH); + WRITE(p, " float xoff = xel - (xb * %f);\n", blkH); + + WRITE(p, " float x2 = uv1.x * 2;\n"); + WRITE(p, " float xl = floor(x2 / %f);\n", blkW); + WRITE(p, " float xib = x2 - (xl * %f);\n", blkW); + WRITE(p, " float halfxb = floor(xb / 2);\n"); + + + WRITE(p, " sampleUv.x = xib + (halfxb * %f);\n", blkW); + WRITE(p, " sampleUv.y = yb + xoff;\n"); + WRITE(p, " sampleUv = sampleUv * blkDims.xy;\n"); + WRITE(p, " sampleUv.y = textureDims.y - sampleUv.y;\n"); + + WRITE(p, " sampleUv.x = sampleUv.x + textureDims.z;\n"); + WRITE(p, " sampleUv.y = sampleUv.y + textureDims.w;\n"); +} + void WriteSampleColor(char*& p, const char* colorComp, const char* dest) { WRITE(p, " %s = texRECT(samp0, sampleUv).%s;\n", dest, colorComp); @@ -432,48 +477,9 @@ void WriteRGBA4443Encoder(char* p) WRITE(p, "}\n"); } -// block dimensions : widthStride, heightStride -// texture dims : width, height, x offset, y offset -void WriteRGBA8Encoder(char* p, bool fromDepth) +void WriteRGBA8Encoder(char* p) { - WRITE(p, "uniform float4 blkDims : register(c%d);\n", C_COLORMATRIX); - WRITE(p, "uniform float4 textureDims : register(c%d);\n", C_COLORMATRIX + 1); - - float blkW = (float)GetBlockWidthInTexels(GX_TF_RGBA8); - float blkH = (float)GetBlockHeightInTexels(GX_TF_RGBA8); - float samples = (float)GetEncodedSampleCount(GX_TF_RGBA8); - - // Swizzling for RGBA8 format - WRITE(p, - "uniform samplerRECT samp0 : register(s0);\n" - "void main(\n" - " out float4 ocol0 : COLOR0,\n" - " in float2 uv0 : TEXCOORD0)\n" - "{\n" - " float2 sampleUv;\n" - " float2 uv1 = floor(uv0);\n"); - - WRITE(p, " float yl = floor(uv1.y / %f);\n", blkH); - WRITE(p, " float yb = yl * %f;\n", blkH); - WRITE(p, " float yoff = uv1.y - yb;\n"); - WRITE(p, " float xp = uv1.x + (yoff * textureDims.x);\n"); - WRITE(p, " float xel = floor(xp / 2);\n"); - WRITE(p, " float xb = floor(xel / %f);\n", blkH); - WRITE(p, " float xoff = xel - (xb * %f);\n", blkH); - - WRITE(p, " float x2 = uv1.x * 2;\n"); - WRITE(p, " float xl = floor(x2 / %f);\n", blkW); - WRITE(p, " float xib = x2 - (xl * %f);\n", blkW); - WRITE(p, " float halfxb = floor(xb / 2);\n"); - - - WRITE(p, " sampleUv.x = xib + (halfxb * %f);\n", blkW); - WRITE(p, " sampleUv.y = yb + xoff;\n"); - WRITE(p, " sampleUv = sampleUv * blkDims.xy;\n"); - WRITE(p, " sampleUv.y = textureDims.y - sampleUv.y;\n"); - - WRITE(p, " sampleUv.x = sampleUv.x + textureDims.z;\n"); - WRITE(p, " sampleUv.y = sampleUv.y + textureDims.w;\n"); + Write32BitSwizzler(p, GX_TF_RGBA8); WRITE(p, " float cl1 = xb - (halfxb * 2);\n"); WRITE(p, " float cl0 = 1.0f - cl1;\n"); @@ -483,10 +489,7 @@ void WriteRGBA8Encoder(char* p, bool fromDepth) WRITE(p, " float4 color1;\n"); WriteSampleColor(p, "rgba", "texSample"); - if(fromDepth) - WRITE(p, " color0.b = 1.0f;\n"); - else - WRITE(p, " color0.b = texSample.a;\n"); + WRITE(p, " color0.b = texSample.a;\n"); WRITE(p, " color0.g = texSample.r;\n"); WRITE(p, " color1.b = texSample.g;\n"); WRITE(p, " color1.g = texSample.b;\n"); @@ -494,10 +497,7 @@ void WriteRGBA8Encoder(char* p, bool fromDepth) WriteIncrementSampleX(p); WriteSampleColor(p, "rgba", "texSample"); - if(fromDepth) - WRITE(p, " color0.r = 1.0f;\n"); - else - WRITE(p, " color0.r = texSample.a;\n"); + WRITE(p, " color0.r = texSample.a;\n"); WRITE(p, " color0.a = texSample.r;\n"); WRITE(p, " color1.r = texSample.g;\n"); WRITE(p, " color1.a = texSample.b;\n"); @@ -606,6 +606,101 @@ void WriteCC8Encoder(char* p, const char* comp) WRITE(p, "}\n"); } +void WriteZ8Encoder(char* p, const char* multiplier) +{ + WriteSwizzler(p, GX_CTF_Z8M); + + WRITE(p, " float depth;\n"); + + WriteSampleColor(p, "b", "depth"); + WRITE(p, "ocol0.b = frac(depth * %s);\n", multiplier); + WriteIncrementSampleX(p); + + WriteSampleColor(p, "b", "depth"); + WRITE(p, "ocol0.g = frac(depth * %s);\n", multiplier); + WriteIncrementSampleX(p); + + WriteSampleColor(p, "b", "depth"); + WRITE(p, "ocol0.r = frac(depth * %s);\n", multiplier); + WriteIncrementSampleX(p); + + WriteSampleColor(p, "b", "depth"); + WRITE(p, "ocol0.a = frac(depth * %s);\n", multiplier); + + WRITE(p, "}\n"); +} + +void WriteZ16Encoder(char* p) +{ + WriteSwizzler(p, GX_TF_Z16); + + WRITE(p, " float depth;\n"); + + // byte order is reversed + + WriteSampleColor(p, "b", "depth"); + WRITE(p, " ocol0.b = frac(depth * 256.0f);\n"); + WRITE(p, " ocol0.g = depth;\n"); + + WriteIncrementSampleX(p); + + WriteSampleColor(p, "b", "depth"); + WRITE(p, " ocol0.r = frac(depth * 256.0f);\n"); + WRITE(p, " ocol0.a = depth;\n"); + + WRITE(p, "}\n"); +} + +void WriteZ16LEncoder(char* p) +{ + WriteSwizzler(p, GX_CTF_Z16L); + + WRITE(p, " float depth;\n"); + + // byte order is reversed + + WriteSampleColor(p, "b", "depth"); + WRITE(p, " ocol0.b = frac(depth * 65536.0f);\n"); + WRITE(p, " ocol0.g = frac(depth * 256.0f);\n"); + + WriteIncrementSampleX(p); + + WriteSampleColor(p, "b", "depth"); + WRITE(p, " ocol0.r = frac(depth * 65536.0f);\n"); + WRITE(p, " ocol0.a = frac(depth * 256.0f);\n"); + + WRITE(p, "}\n"); +} + +void WriteZ24Encoder(char* p) +{ + Write32BitSwizzler(p, GX_TF_Z24X8); + + WRITE(p, " float cl = xb - (halfxb * 2);\n"); + + WRITE(p, " float depth0;\n"); + WRITE(p, " float depth1;\n"); + + WriteSampleColor(p, "b", "depth0"); + WriteIncrementSampleX(p); + WriteSampleColor(p, "b", "depth1"); + + WRITE(p, " if(cl > 0.5f) {\n"); + // upper 16 + WRITE(p, " ocol0.b = frac(depth0 * 256.0f);\n"); + WRITE(p, " ocol0.g = depth0\n"); + WRITE(p, " ocol0.r = frac(depth1 * 256.0f);\n"); + WRITE(p, " ocol0.a = depth1\n"); + WRITE(p, " } else {\n"); + // lower 8 + WRITE(p, " ocol0.b = 1.0f;\n"); + WRITE(p, " ocol0.g = frac(depth0 * 65536.0f)\n"); + WRITE(p, " ocol0.r = 1.0f);\n"); + WRITE(p, " ocol0.a = frac(depth0 * 65536.0f)\n"); + WRITE(p, " }\n" + "}\n"); +} + const char *GenerateEncodingShader(u32 format) { text[sizeof(text) - 1] = 0x7C; // canary @@ -633,7 +728,7 @@ const char *GenerateEncodingShader(u32 format) WriteRGB5A3Encoder(p); break; case GX_TF_RGBA8: - WriteRGBA8Encoder(p, false); + WriteRGBA8Encoder(p); break; case GX_CTF_R4: WriteC4Encoder(p, "r"); @@ -666,24 +761,22 @@ const char *GenerateEncodingShader(u32 format) WriteC8Encoder(p, "b"); break; case GX_TF_Z16: - // byte order is reversed - WriteCC8Encoder(p, "gb"); + WriteZ16Encoder(p); break; case GX_TF_Z24X8: - WriteRGBA8Encoder(p, true); + WriteZ24Encoder(p); break; case GX_CTF_Z4: WriteC4Encoder(p, "b"); break; case GX_CTF_Z8M: - WriteC8Encoder(p, "g"); + WriteZ8Encoder(p, "256.0f"); break; case GX_CTF_Z8L: - WriteC8Encoder(p, "r"); + WriteZ8Encoder(p, "65536.0f" ); break; case GX_CTF_Z16L: - // byte order is reversed - WriteCC8Encoder(p, "rg"); + WriteZ16LEncoder(p); break; default: PanicAlert("Unknown texture copy format: 0x%x\n", format); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp index 5a70a0088a..b6b51b2321 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp @@ -160,8 +160,6 @@ void Shutdown() void EncodeToRamUsingShader(FRAGMENTSHADER& shader, GLuint srcTexture, const TRectangle& sourceRc, u8* destAddr, int dstWidth, int dstHeight, bool linearFilter) { - Renderer::SetRenderMode(Renderer::RM_Normal); - Renderer::ResetGLState(); // switch to texture converter frame buffer @@ -243,7 +241,7 @@ void EncodeToRam(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyf u8 *dest_ptr = Memory_GetPtr(address); - u32 source_texture = bFromZBuffer ? Renderer::ResolveAndGetFakeZTarget(source) : Renderer::ResolveAndGetRenderTarget(source); + u32 source_texture = bFromZBuffer ? Renderer::ResolveAndGetDepthTarget(source) : Renderer::ResolveAndGetRenderTarget(source); int width = source.right - source.left; int height = source.bottom - source.top; @@ -288,9 +286,6 @@ void EncodeToRam(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyf scaledSource.right = expandedWidth / samples; EncodeToRamUsingShader(texconv_shader, source_texture, scaledSource, dest_ptr, expandedWidth / samples, expandedHeight, bScaleByHalf); - - if (bFromZBuffer) - Renderer::SetZBufferRender(); // notify for future settings } void EncodeToRamYUYV(GLuint srcTexture, const TRectangle& sourceRc, @@ -303,7 +298,6 @@ void EncodeToRamYUYV(GLuint srcTexture, const TRectangle& sourceRc, // Should be scale free. void DecodeToTexture(u8* srcAddr, int srcWidth, int srcHeight, GLuint destTexture) { - Renderer::SetRenderMode(Renderer::RM_Normal); Renderer::ResetGLState(); float srcFormatFactor = 0.5f; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp index db81ff5ff4..c8ae106990 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp @@ -55,7 +55,6 @@ u8 *TextureMngr::temp = NULL; TextureMngr::TexCache TextureMngr::textures; -std::map TextureMngr::mapDepthTargets; extern int frameCount; static u32 s_TempFramebuffer = 0; @@ -176,13 +175,6 @@ void TextureMngr::Shutdown() { Invalidate(true); - std::map::iterator itdepth = mapDepthTargets.begin(); - for (itdepth = mapDepthTargets.begin(); itdepth != mapDepthTargets.end(); ++itdepth) - { - glDeleteRenderbuffersEXT(1, &itdepth->second.targ); - } - mapDepthTargets.clear(); - if (s_TempFramebuffer) { glDeleteFramebuffersEXT(1, (GLuint *)&s_TempFramebuffer); s_TempFramebuffer = 0; @@ -217,14 +209,6 @@ void TextureMngr::ProgressiveCleanup() else iter++; } - - std::map::iterator itdepth = mapDepthTargets.begin(); - while (itdepth != mapDepthTargets.end()) - { - if (frameCount > 20 + itdepth->second.framecount) - ERASE_THROUGH_ITERATOR(mapDepthTargets, itdepth); - else ++itdepth; - } } void TextureMngr::InvalidateRange(u32 start_address, u32 size) { @@ -588,8 +572,9 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool break; case 3: // Z16 //? - case 11: // Z16 colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1; + case 11: // Z16 (reverse order) + colmat[2] = colmat[6] = colmat[10] = colmat[13] = 1; break; case 6: // Z24X8 colmat[0] = 1; @@ -702,9 +687,8 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool // Make sure to resolve anything we need to read from. // TODO - it seems that it sometimes doesn't resolve the entire area we are interested in. See shadows in Burnout 2. - GLuint read_texture = bFromZBuffer ? Renderer::ResolveAndGetFakeZTarget(scaled_rect) : Renderer::ResolveAndGetRenderTarget(scaled_rect); + GLuint read_texture = bFromZBuffer ? Renderer::ResolveAndGetDepthTarget(scaled_rect) : Renderer::ResolveAndGetRenderTarget(scaled_rect); - Renderer::SetRenderMode(Renderer::RM_Normal); // set back to normal GL_REPORT_ERRORD(); // We have to run a pixel shader, for color conversion. @@ -716,29 +700,7 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool Renderer::SetFramebuffer(s_TempFramebuffer); Renderer::SetRenderTarget(entry.texture); GL_REPORT_ERRORD(); - - // create and attach the render target - std::map::iterator itdepth = mapDepthTargets.find((h << 16) | w); - if (itdepth == mapDepthTargets.end()) - { - DEPTHTARGET& depth = mapDepthTargets[(h << 16) | w]; - depth.framecount = frameCount; - - glGenRenderbuffersEXT(1, &depth.targ); - glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, depth.targ); - glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_DEPTH_COMPONENT, w, h); - GL_REPORT_ERRORD(); - - Renderer::SetDepthTarget(depth.targ); - GL_REPORT_ERRORD(); - } - else - { - itdepth->second.framecount = frameCount; - Renderer::SetDepthTarget(itdepth->second.targ); - GL_REPORT_ERRORD(); - } - + glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); glActiveTexture(GL_TEXTURE0); glEnable(GL_TEXTURE_RECTANGLE_ARB); @@ -747,7 +709,7 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool glViewport(0, 0, w, h); glEnable(GL_FRAGMENT_PROGRAM_ARB); - glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, PixelShaderCache::GetColorMatrixProgram()); + glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, bFromZBuffer ? PixelShaderCache::GetDepthMatrixProgram() : PixelShaderCache::GetColorMatrixProgram()); PixelShaderManager::SetColorMatrix(colmat, fConstAdd); // set transformation GL_REPORT_ERRORD(); @@ -765,9 +727,6 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool VertexShaderManager::SetViewportChanged(); TextureMngr::DisableStage(0); - if (bFromZBuffer) - Renderer::SetZBufferRender(); // notify for future settings - GL_REPORT_ERRORD(); if (g_Config.bDumpEFBTarget) diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.h b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.h index e46ba59d95..0fb6c5e80d 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.h @@ -57,19 +57,11 @@ public: bool IntersectsMemoryRange(u32 range_address, u32 range_size); }; - struct DEPTHTARGET - { - DEPTHTARGET() : targ(0), framecount(0) {} - GLuint targ; - int framecount; - }; - private: typedef std::map TexCache; static u8 *temp; static TexCache textures; - static std::map mapDepthTargets; public: static void Init(); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp index cd5452634e..a81151eb58 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp @@ -257,29 +257,6 @@ void Flush() FRAGMENTSHADER* ps = PixelShaderCache::GetShader(false); VERTEXSHADER* vs = VertexShaderCache::GetShader(g_nativeVertexFmt->m_components); - bool bRestoreBuffers = false; - if (Renderer::UseFakeZTarget()) - { - if (bpmem.zmode.updateenable) - { - if (!bpmem.blendmode.colorupdate) - { - Renderer::SetRenderMode(bpmem.blendmode.alphaupdate ? - Renderer::RM_ZBufferAlpha : - Renderer::RM_ZBufferOnly); - } - } - else - { - Renderer::SetRenderMode(Renderer::RM_Normal); - // remove temporarily - glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); - bRestoreBuffers = true; - } - } - else - Renderer::SetRenderMode(Renderer::RM_Normal); - // set global constants VertexShaderManager::SetConstants(g_Config.bProjHack1,g_Config.bPhackvalue1, g_Config.fhackvalue1, g_Config.bPhackvalue2, g_Config.fhackvalue2, g_Config.bFreeLook); PixelShaderManager::SetConstants(); @@ -330,8 +307,7 @@ void Flush() } // restore color mask - if (!bRestoreBuffers) - Renderer::SetColorMask(); + Renderer::SetColorMask(); if (bpmem.blendmode.blendenable || bpmem.blendmode.subtract) glEnable(GL_BLEND); @@ -361,13 +337,6 @@ void Flush() GL_REPORT_ERRORD(); - if (bRestoreBuffers) - { - GLenum s_drawbuffers[2] = {GL_COLOR_ATTACHMENT0_EXT, GL_COLOR_ATTACHMENT1_EXT}; - glDrawBuffers(2, s_drawbuffers); - Renderer::SetColorMask(); - } - ResetBuffer(); } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp index 5a780f7929..3e94fdfcdd 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp @@ -71,8 +71,7 @@ VERTEXSHADER* VertexShaderCache::GetShader(u32 components) { DVSTARTPROFILE(); VERTEXSHADERUID uid; - u32 zbufrender = (bpmem.ztex2.op == ZTEXTURE_ADD) || Renderer::UseFakeZTarget(); - GetVertexShaderId(uid, components, zbufrender); + GetVertexShaderId(uid, components); VSCache::iterator iter = vshaders.find(uid); @@ -86,7 +85,7 @@ VERTEXSHADER* VertexShaderCache::GetShader(u32 components) } VSCacheEntry& entry = vshaders[uid]; - const char *code = GenerateVertexShader(components, Renderer::UseFakeZTarget()); + const char *code = GenerateVertexShader(components); #if defined(_DEBUG) || defined(DEBUGFAST) if (g_Config.iLog & CONF_SAVESHADERS && code) {