diff --git a/Source/Core/Common/Src/x64Emitter.cpp b/Source/Core/Common/Src/x64Emitter.cpp index a489d42b26..4d2590d7a0 100644 --- a/Source/Core/Common/Src/x64Emitter.cpp +++ b/Source/Core/Common/Src/x64Emitter.cpp @@ -582,7 +582,7 @@ enum NormalSSEOps } else if (bits == 8) { - + // Do nothing - can't bswap a single byte... } else { diff --git a/Source/Core/VideoCommon/Src/PixelShader.cpp b/Source/Core/VideoCommon/Src/PixelShader.cpp index 9b5615be18..779898bafa 100644 --- a/Source/Core/VideoCommon/Src/PixelShader.cpp +++ b/Source/Core/VideoCommon/Src/PixelShader.cpp @@ -24,6 +24,99 @@ #include "XFMemory.h" // for texture projection mode #include "BPMemory.h" +// Mash together all the inputs that contribute to the code of a generated pixel shader into +// a unique identifier, basically containing all the bits. Yup, it's a lot .... +void GetPixelShaderId(PIXELSHADERUID &uid, u32 s_texturemask, u32 zbufrender, u32 zBufRenderToCol0) +{ + u32 projtexcoords = 0; + for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; i++) { + if (bpmem.tevorders[i/2].getEnable(i&1)) { + int texcoord = bpmem.tevorders[i/2].getTexCoord(i&1); + if (xfregs.texcoords[texcoord].texmtxinfo.projection ) + projtexcoords |= 1 << texcoord; + } + } + uid.values[0] = (u32)bpmem.genMode.numtevstages | + ((u32)bpmem.genMode.numindstages << 4) | + ((u32)bpmem.genMode.numtexgens << 7) | + ((u32)bpmem.dstalpha.enable << 11) | + ((u32)((bpmem.alphaFunc.hex >> 16) & 0xff) << 12) | + (projtexcoords << 20) | + ((u32)bpmem.ztex2.op << 28) | + (zbufrender << 30) | + (zBufRenderToCol0 << 31); + + uid.values[0] = (uid.values[0] & ~0x0ff00000) | (projtexcoords << 20); + // swap table + for (int i = 0; i < 8; i += 2) + ((u8*)&uid.values[1])[i/2] = (bpmem.tevksel[i].hex & 0xf) | ((bpmem.tevksel[i + 1].hex & 0xf) << 4); + + uid.values[2] = s_texturemask; + int hdr = 3; + u32* pcurvalue = &uid.values[hdr]; + for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages+1; ++i) { + TevStageCombiner::ColorCombiner &cc = bpmem.combiners[i].colorC; + TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[i].alphaC; + + u32 val0 = cc.hex&0xffffff; + u32 val1 = ac.hex&0xffffff; + val0 |= bpmem.tevksel[i/2].getKC(i&1)<<24; + val1 |= bpmem.tevksel[i/2].getKA(i&1)<<24; + pcurvalue[0] = val0; + pcurvalue[1] = val1; + pcurvalue += 2; + } + + for (u32 i = 0; i < ((u32)bpmem.genMode.numtevstages+1)/2; ++i) { + u32 val0, val1; + if (bpmem.tevorders[i].hex & 0x40) + val0 = bpmem.tevorders[i].hex & 0x3ff; + else + val0 = bpmem.tevorders[i].hex & 0x380; + if (bpmem.tevorders[i].hex & 0x40000) + val1 = (bpmem.tevorders[i].hex & 0x3ff000) >> 12; + else + val1 = (bpmem.tevorders[i].hex & 0x380000) >> 12; + + switch (i % 3) { + case 0: pcurvalue[0] = val0|(val1<<10); break; + case 1: pcurvalue[0] |= val0<<20; pcurvalue[1] = val1; pcurvalue++; break; + case 2: pcurvalue[1] |= (val0<<10)|(val1<<20); pcurvalue++; break; + } + } + + if ((bpmem.genMode.numtevstages + 1) & 1) { // odd + u32 val0; + if (bpmem.tevorders[bpmem.genMode.numtevstages/2].hex & 0x40) + val0 = bpmem.tevorders[bpmem.genMode.numtevstages/2].hex&0x3ff; + else + val0 = bpmem.tevorders[bpmem.genMode.numtevstages/2].hex & 0x380; + + switch (bpmem.genMode.numtevstages % 3) { + case 0: pcurvalue[0] = val0; break; + case 1: pcurvalue[0] |= val0 << 20; break; + case 2: pcurvalue[1] |= val0 << 10; pcurvalue++; break; + } + } + + if ((bpmem.genMode.numtevstages % 3) != 2) + ++pcurvalue; + + uid.tevstages = (u32)(pcurvalue-&uid.values[0]-hdr); + + for (u32 i = 0; i < bpmem.genMode.numindstages; ++i) { + u32 val = bpmem.tevind[i].hex & 0x1fffff; // 21 bits + switch (i%3) { + case 0: pcurvalue[0] = val; break; + case 1: pcurvalue[0] |= val << 21; pcurvalue[1] = val >> 11; ++pcurvalue; break; + case 2: pcurvalue[0] |= val << 10; ++pcurvalue; break; + } + } + + // yeah, well .... + uid.indstages = (u32)(pcurvalue - &uid.values[0] - 2 - uid.tevstages); +} + // old tev->pixelshader notes // // color for this stage (alpha, color) is given by bpmem.tevorders[0].colorchan0 diff --git a/Source/Core/VideoCommon/Src/PixelShader.h b/Source/Core/VideoCommon/Src/PixelShader.h index 9823a2870b..876daefb2f 100644 --- a/Source/Core/VideoCommon/Src/PixelShader.h +++ b/Source/Core/VideoCommon/Src/PixelShader.h @@ -39,6 +39,57 @@ #define C_COLORMATRIX (C_INDTEXMTX+6) +class PIXELSHADERUID +{ +public: + u32 values[4+32+6+11]; + u16 tevstages, indstages; + + PIXELSHADERUID() { + memset(values, 0, (4+32+6+11) * 4); + tevstages = indstages = 0; + } + PIXELSHADERUID(const PIXELSHADERUID& r) + { + tevstages = r.tevstages; + indstages = r.indstages; + int N = tevstages + indstages + 3; + _assert_(N <= 4+32+6+11); + for (int i = 0; i < N; ++i) + values[i] = r.values[i]; + } + int GetNumValues() const { + return tevstages + indstages + 3; // numTevStages*3/2+1 + } + bool operator <(const PIXELSHADERUID& _Right) const + { + if (values[0] < _Right.values[0]) + return true; + else if (values[0] > _Right.values[0]) + return false; + int N = GetNumValues(); + for (int i = 1; i < N; ++i) { + if (values[i] < _Right.values[i]) + return true; + else if (values[i] > _Right.values[i]) + return false; + } + return false; + } + bool operator ==(const PIXELSHADERUID& _Right) const + { + if (values[0] != _Right.values[0]) + return false; + int N = GetNumValues(); + for (int i = 1; i < N; ++i) { + if (values[i] != _Right.values[i]) + return false; + } + return true; + } +}; + char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool bRenderZToCol0); +void GetPixelShaderId(PIXELSHADERUID &, u32 s_texturemask, u32 zbufrender, u32 zBufRenderToCol0); #endif diff --git a/Source/Core/VideoCommon/Src/VertexShader.cpp b/Source/Core/VideoCommon/Src/VertexShader.cpp index 04acf1c651..5fd307b881 100644 --- a/Source/Core/VideoCommon/Src/VertexShader.cpp +++ b/Source/Core/VideoCommon/Src/VertexShader.cpp @@ -23,6 +23,52 @@ #include "BPMemory.h" #include "VertexShader.h" +// Mash together all the inputs that contribute to the code of a generated vertex shader into +// a unique identifier, basically containing all the bits. Yup, it's a lot .... +void GetVertexShaderId(VERTEXSHADERUID& vid, u32 components, u32 zbufrender) +{ + vid.values[0] = components | + (xfregs.numTexGens << 23) | + (xfregs.nNumChans << 27) | + ((u32)xfregs.bEnableDualTexTransform << 29) | + (zbufrender << 30); + + for (int i = 0; i < 2; ++i) { + vid.values[1+i] = xfregs.colChans[i].color.enablelighting ? + (u32)xfregs.colChans[i].color.hex : + (u32)xfregs.colChans[i].color.matsource; + vid.values[1+i] |= (xfregs.colChans[i].alpha.enablelighting ? + (u32)xfregs.colChans[i].alpha.hex : + (u32)xfregs.colChans[i].alpha.matsource) << 15; + } + + // fog + vid.values[1] |= (((u32)bpmem.fog.c_proj_fsel.fsel & 3) << 30); + vid.values[2] |= (((u32)bpmem.fog.c_proj_fsel.fsel >> 2) << 30); + + u32* pcurvalue = &vid.values[3]; + for (int i = 0; i < xfregs.numTexGens; ++i) { + TexMtxInfo tinfo = xfregs.texcoords[i].texmtxinfo; + if (tinfo.texgentype != XF_TEXGEN_EMBOSS_MAP) + tinfo.hex &= 0x7ff; + if (tinfo.texgentype != XF_TEXGEN_REGULAR) + tinfo.projection = 0; + + u32 val = ((tinfo.hex >> 1) & 0x1ffff); + if (xfregs.bEnableDualTexTransform && tinfo.texgentype == XF_TEXGEN_REGULAR) { + // rewrite normalization and post index + val |= ((u32)xfregs.texcoords[i].postmtxinfo.index << 17) | ((u32)xfregs.texcoords[i].postmtxinfo.normalize << 23); + } + + switch (i & 3) { + case 0: pcurvalue[0] |= val; break; + case 1: pcurvalue[0] |= val << 24; pcurvalue[1] = val >> 8; ++pcurvalue; break; + case 2: pcurvalue[0] |= val << 16; pcurvalue[1] = val >> 16; ++pcurvalue; break; + case 3: pcurvalue[0] |= val << 8; ++pcurvalue; break; + } + } +} + static char text[16384]; #define WRITE p+=sprintf diff --git a/Source/Core/VideoCommon/Src/VertexShader.h b/Source/Core/VideoCommon/Src/VertexShader.h index 309a6ab8b5..2680581acb 100644 --- a/Source/Core/VideoCommon/Src/VertexShader.h +++ b/Source/Core/VideoCommon/Src/VertexShader.h @@ -46,6 +46,55 @@ #define C_POSTTRANSFORMMATRICES (C_NORMALMATRICES+32) #define C_FOGPARAMS (C_POSTTRANSFORMMATRICES+64) + +class VERTEXSHADERUID +{ +public: + u32 values[9]; + + VERTEXSHADERUID() { + memset(values, 0, sizeof(values)); + } + + VERTEXSHADERUID(const VERTEXSHADERUID& r) { + for (size_t i = 0; i < sizeof(values) / sizeof(u32); ++i) + values[i] = r.values[i]; + } + + int GetNumValues() const { + return (((values[0] >> 23) & 0xf)*3 + 3)/4 + 3; // numTexGens*3/4+1 + } + + bool operator <(const VERTEXSHADERUID& _Right) const + { + if (values[0] < _Right.values[0]) + return true; + else if (values[0] > _Right.values[0]) + return false; + int N = GetNumValues(); + for (int i = 1; i < N; ++i) { + if (values[i] < _Right.values[i]) + return true; + else if (values[i] > _Right.values[i]) + return false; + } + return false; + } + + bool operator ==(const VERTEXSHADERUID& _Right) const + { + if (values[0] != _Right.values[0]) + return false; + int N = GetNumValues(); + for (int i = 1; i < N; ++i) { + if (values[i] != _Right.values[i]) + return false; + } + return true; + } +}; + char *GenerateVertexShader(u32 components, bool has_zbuffer_target); +void GetVertexShaderId(VERTEXSHADERUID& vid, u32 components, u32 zbufrender); #endif diff --git a/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp b/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp index 73ce4d43e6..48aaf120d3 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp @@ -96,7 +96,7 @@ void BPWritten(int addr, int changes, int newval) else if(glIsEnabled(GL_CULL_FACE) == GL_TRUE) glDisable(GL_CULL_FACE); - PixelShaderMngr::SetGenModeChanged(); + PixelShaderManager::SetGenModeChanged(); } break; @@ -112,7 +112,7 @@ void BPWritten(int addr, int changes, int newval) if (changes) { VertexManager::Flush(); ((u32*)&bpmem)[addr] = newval; - PixelShaderMngr::SetIndMatrixChanged((addr-BPMEM_IND_MTX)/3); + PixelShaderManager::SetIndMatrixChanged((addr-BPMEM_IND_MTX)/3); } break; case BPMEM_RAS1_SS0: @@ -120,7 +120,7 @@ void BPWritten(int addr, int changes, int newval) if (changes) { VertexManager::Flush(); ((u32*)&bpmem)[addr] = newval; - PixelShaderMngr::SetIndTexScaleChanged(); + PixelShaderManager::SetIndTexScaleChanged(); } break; case BPMEM_ZMODE: @@ -152,7 +152,7 @@ void BPWritten(int addr, int changes, int newval) ((u32*)&bpmem)[addr] = newval; PRIM_LOG("alphacmp: ref0=%d, ref1=%d, comp0=%d, comp1=%d, logic=%d\n", bpmem.alphaFunc.ref0, bpmem.alphaFunc.ref1, bpmem.alphaFunc.comp0, bpmem.alphaFunc.comp1, bpmem.alphaFunc.logic); - PixelShaderMngr::SetAlpha(bpmem.alphaFunc); + PixelShaderManager::SetAlpha(bpmem.alphaFunc); } break; @@ -161,13 +161,13 @@ void BPWritten(int addr, int changes, int newval) VertexManager::Flush(); ((u32*)&bpmem)[addr] = newval; PRIM_LOG("constalpha: alp=%d, en=%d\n", bpmem.dstalpha.alpha, bpmem.dstalpha.enable); - PixelShaderMngr::SetDestAlpha(bpmem.dstalpha); + PixelShaderManager::SetDestAlpha(bpmem.dstalpha); } break; case BPMEM_LINEPTWIDTH: { - float fratio = VertexShaderMngr::GetPixelAspectRatio(); + float fratio = xfregs.rawViewport[0] != 0 ? (float)Renderer::GetTargetWidth() / 640.0f : 1.0f; if (bpmem.lineptwidth.linesize > 0) glLineWidth((float)bpmem.lineptwidth.linesize * fratio / 6.0f); // scale by ratio of widths if (bpmem.lineptwidth.pointsize > 0) @@ -363,7 +363,7 @@ void BPWritten(int addr, int changes, int newval) VertexManager::Flush(); ((u32*)&bpmem)[addr] = newval; PRIM_LOG("ztex bias=0x%x\n", bpmem.ztex1.bias); - PixelShaderMngr::SetZTextureBias(bpmem.ztex1.bias); + PixelShaderManager::SetZTextureBias(bpmem.ztex1.bias); } break; case BPMEM_ZTEX2: @@ -390,7 +390,7 @@ void BPWritten(int addr, int changes, int newval) { VertexManager::Flush(); ((u32*)&bpmem)[addr] = newval; - PixelShaderMngr::SetTevKSelChanged(addr-0xf6); + PixelShaderManager::SetTevKSelChanged(addr-0xf6); } break; case 0x45: //GXSetDrawDone @@ -488,7 +488,7 @@ void BPWritten(int addr, int changes, int newval) glScissor(multirc.left, (Renderer::GetTargetHeight() - multirc.bottom), (multirc.right - multirc.left), (multirc.bottom - multirc.top)); - VertexShaderMngr::SetViewportChanged(); + VertexShaderManager::SetViewportChanged(); // Since clear operations use the source rectangle, we have to do // regular renders (glClear clears the entire buffer) @@ -572,7 +572,7 @@ void BPWritten(int addr, int changes, int newval) { VertexManager::Flush(); ((u32*)&bpmem)[addr] = newval; - PixelShaderMngr::SetTevOrderChanged(addr - 0x28); + PixelShaderManager::SetTevOrderChanged(addr - 0x28); } break; @@ -643,7 +643,7 @@ void BPWritten(int addr, int changes, int newval) VertexManager::Flush(); ((u32*)&bpmem)[addr] = newval; int num = (addr>>1)&0x3; - PixelShaderMngr::SetColorChanged(bpmem.tevregs[num].high.type, num); + PixelShaderManager::SetColorChanged(bpmem.tevregs[num].high.type, num); } else ((u32*)&bpmem)[addr] = newval; @@ -655,7 +655,7 @@ void BPWritten(int addr, int changes, int newval) if (changes) { VertexManager::Flush(); ((u32*)&bpmem)[addr] = newval; - PixelShaderMngr::SetTevIndirectChanged(addr-0x10); + PixelShaderManager::SetTevIndirectChanged(addr-0x10); } break; @@ -663,7 +663,7 @@ void BPWritten(int addr, int changes, int newval) if (changes) { VertexManager::Flush(); ((u32*)&bpmem)[addr] = newval; - PixelShaderMngr::SetTexDimsChanged((addr>>1)&0x7); + PixelShaderManager::SetTexDimsChanged((addr>>1)&0x7); } break; @@ -673,7 +673,7 @@ void BPWritten(int addr, int changes, int newval) { VertexManager::Flush(); ((u32*)&bpmem)[addr] = newval; - PixelShaderMngr::SetTevCombinerChanged((addr&0x1f)/2); + PixelShaderManager::SetTevCombinerChanged((addr&0x1f)/2); } break; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.cpp index 449cee8df3..3eafc15be8 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.cpp @@ -32,12 +32,11 @@ #include "Render.h" #include "VertexShader.h" #include "PixelShaderManager.h" -#include "PixelShader.h" -PixelShaderMngr::PSCache PixelShaderMngr::pshaders; -FRAGMENTSHADER* PixelShaderMngr::pShaderLast = NULL; -PIXELSHADERUID PixelShaderMngr::s_curuid; +PixelShaderCache::PSCache PixelShaderCache::pshaders; +PIXELSHADERUID PixelShaderCache::s_curuid; +static FRAGMENTSHADER* pShaderLast = NULL; static int s_nMaxPixelInstructions; static int s_nColorsChanged[2]; // 0 - regular colors, 1 - k colors static int s_nIndTexMtxChanged = 0; @@ -53,31 +52,24 @@ static u32 lastZBias = 0; // lower byte describes if a texture is nonpow2 or pow2 // next byte describes whether the repeat wrap mode is enabled for the s channel // next byte is for t channel -u32 s_texturemask = 0; +static u32 s_texturemask = 0; static int maptocoord[8]; // indexed by texture map, holds the texcoord associated with the map static u32 maptocoord_mask = 0; static GLuint s_ColorMatrixProgram = 0; -void PixelShaderMngr::SetPSConstant4f(int const_number, float f1, float f2, float f3, float f4) { +void SetPSConstant4f(int const_number, float f1, float f2, float f3, float f4) { glProgramEnvParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, const_number, f1, f2, f3, f4); } -void PixelShaderMngr::SetPSConstant4fv(int const_number, const float *f) { +void SetPSConstant4fv(int const_number, const float *f) { glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, const_number, f); } -void PixelShaderMngr::Init() +void PixelShaderCache::Init() { - s_nColorsChanged[0] = s_nColorsChanged[1] = 0; - s_nTexDimsChanged = 0; - s_nIndTexMtxChanged = 15; - s_bAlphaChanged = s_bZBiasChanged = s_bIndTexScaleChanged = true; GL_REPORT_ERRORD(); - for (int i = 0; i < 8; ++i) maptocoord[i] = -1; - maptocoord_mask = 0; - memset(lastRGBAfull, 0, sizeof(lastRGBAfull)); glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_ALU_INSTRUCTIONS_ARB, (GLint *)&s_nMaxPixelInstructions); @@ -110,7 +102,7 @@ void PixelShaderMngr::Init() } } -void PixelShaderMngr::Shutdown() +void PixelShaderCache::Shutdown() { glDeleteProgramsARB(1, &s_ColorMatrixProgram); s_ColorMatrixProgram = 0; @@ -120,11 +112,13 @@ void PixelShaderMngr::Shutdown() pshaders.clear(); } -FRAGMENTSHADER* PixelShaderMngr::GetShader() +FRAGMENTSHADER* PixelShaderCache::GetShader() { DVSTARTPROFILE(); PIXELSHADERUID uid; - GetPixelShaderId(uid); + u32 zbufrender = (Renderer::GetZBufferTarget() && bpmem.zmode.updateenable) ? 1 : 0; + u32 zBufRenderToCol0 = Renderer::GetRenderMode() != Renderer::RM_Normal; + GetPixelShaderId(uid, s_texturemask, zbufrender, zBufRenderToCol0); PSCache::iterator iter = pshaders.find(uid); @@ -168,7 +162,7 @@ FRAGMENTSHADER* PixelShaderMngr::GetShader() return pShaderLast; } -void PixelShaderMngr::Cleanup() +void PixelShaderCache::Cleanup() { PSCache::iterator iter = pshaders.begin(); while (iter != pshaders.end()) { @@ -187,7 +181,7 @@ void PixelShaderMngr::Cleanup() SETSTAT(stats.numPixelShadersAlive,(int)pshaders.size()); } -bool PixelShaderMngr::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrprogram) +bool PixelShaderCache::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrprogram) { char stropt[64]; sprintf(stropt, "MaxLocalParams=32,NumInstructionSlots=%d", s_nMaxPixelInstructions); @@ -238,7 +232,24 @@ bool PixelShaderMngr::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrpro return true; } -void PixelShaderMngr::SetConstants() +void PixelShaderManager::Init() +{ + s_nColorsChanged[0] = s_nColorsChanged[1] = 0; + s_nTexDimsChanged = 0; + s_nIndTexMtxChanged = 15; + s_bAlphaChanged = s_bZBiasChanged = s_bIndTexScaleChanged = true; + for (int i = 0; i < 8; ++i) + maptocoord[i] = -1; + maptocoord_mask = 0; + memset(lastRGBAfull, 0, sizeof(lastRGBAfull)); +} + +void PixelShaderManager::Shutdown() +{ + +} + +void PixelShaderManager::SetConstants() { for (int i = 0; i < 2; ++i) { if (s_nColorsChanged[i]) { @@ -278,7 +289,7 @@ void PixelShaderMngr::SetConstants() if (s_nTexDimsChanged) { for (int i = 0; i < 8; ++i) { if (s_nTexDimsChanged & (1<> 16)) { lastAlpha = (lastAlpha & ~0xff0000) | ((alpha.hex & 0xff) << 16); @@ -436,7 +447,7 @@ void PixelShaderMngr::SetDestAlpha(const ConstantAlpha& alpha) } } -void PixelShaderMngr::SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wrapt) +void PixelShaderManager::SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wrapt) { u32 wh = width | (height << 16) | (wraps << 28) | (wrapt << 30); if (lastTexDims[texmapid] != wh) { @@ -445,7 +456,7 @@ void PixelShaderMngr::SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, } } -void PixelShaderMngr::SetZTextureBias(u32 bias) +void PixelShaderManager::SetZTextureBias(u32 bias) { if (lastZBias != bias) { s_bZBiasChanged = true; @@ -453,42 +464,42 @@ void PixelShaderMngr::SetZTextureBias(u32 bias) } } -void PixelShaderMngr::SetIndTexScaleChanged() +void PixelShaderManager::SetIndTexScaleChanged() { s_bIndTexScaleChanged = true; } -void PixelShaderMngr::SetIndMatrixChanged(int matrixidx) +void PixelShaderManager::SetIndMatrixChanged(int matrixidx) { s_nIndTexMtxChanged |= 1 << matrixidx; } -void PixelShaderMngr::SetGenModeChanged() +void PixelShaderManager::SetGenModeChanged() { } -void PixelShaderMngr::SetTevCombinerChanged(int id) +void PixelShaderManager::SetTevCombinerChanged(int id) { } -void PixelShaderMngr::SetTevKSelChanged(int id) +void PixelShaderManager::SetTevKSelChanged(int id) { } -void PixelShaderMngr::SetTevOrderChanged(int id) +void PixelShaderManager::SetTevOrderChanged(int id) { } -void PixelShaderMngr::SetTevIndirectChanged(int id) +void PixelShaderManager::SetTevIndirectChanged(int id) { } -void PixelShaderMngr::SetZTextureOpChanged() +void PixelShaderManager::SetZTextureOpChanged() { s_bZBiasChanged = true; } -void PixelShaderMngr::SetTexturesUsed(u32 nonpow2tex) +void PixelShaderManager::SetTexturesUsed(u32 nonpow2tex) { if (s_texturemask != nonpow2tex) { for (int i = 0; i < 8; ++i) { @@ -502,7 +513,7 @@ void PixelShaderMngr::SetTexturesUsed(u32 nonpow2tex) } } -void PixelShaderMngr::SetTexDimsChanged(int texmapid) +void PixelShaderManager::SetTexDimsChanged(int texmapid) { // this check was previously implicit, but should it be here? if (s_nTexDimsChanged) @@ -511,7 +522,7 @@ void PixelShaderMngr::SetTexDimsChanged(int texmapid) SetIndTexScaleChanged(); } -void PixelShaderMngr::SetColorMatrix(const float* pmatrix, const float* pfConstAdd) +void PixelShaderManager::SetColorMatrix(const float* pmatrix, const float* pfConstAdd) { SetPSConstant4fv(C_COLORMATRIX, pmatrix); SetPSConstant4fv(C_COLORMATRIX+1, pmatrix+4); @@ -520,102 +531,8 @@ void PixelShaderMngr::SetColorMatrix(const float* pmatrix, const float* pfConstA SetPSConstant4fv(C_COLORMATRIX+4, pfConstAdd); } -GLuint PixelShaderMngr::GetColorMatrixProgram() +GLuint PixelShaderManager::GetColorMatrixProgram() { return s_ColorMatrixProgram; } -// Mash together all the inputs that contribute to the code of a generated pixel shader into -// a unique identifier, basically containing all the bits. Yup, it's a lot .... -void PixelShaderMngr::GetPixelShaderId(PIXELSHADERUID &uid) -{ - u32 projtexcoords = 0; - for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; i++) { - if (bpmem.tevorders[i/2].getEnable(i&1)) { - int texcoord = bpmem.tevorders[i/2].getTexCoord(i&1); - if (xfregs.texcoords[texcoord].texmtxinfo.projection ) - projtexcoords |= 1 << texcoord; - } - } - u32 zbufrender = (Renderer::GetZBufferTarget() && bpmem.zmode.updateenable) ? 1 : 0; - u32 zBufRenderToCol0 = Renderer::GetRenderMode() != Renderer::RM_Normal; - uid.values[0] = (u32)bpmem.genMode.numtevstages | - ((u32)bpmem.genMode.numindstages << 4) | - ((u32)bpmem.genMode.numtexgens << 7) | - ((u32)bpmem.dstalpha.enable << 11) | - ((u32)((bpmem.alphaFunc.hex >> 16) & 0xff) << 12) | - (projtexcoords << 20) | - ((u32)bpmem.ztex2.op << 28) | - (zbufrender << 30) | - (zBufRenderToCol0 << 31); - - s_curuid.values[0] = (s_curuid.values[0] & ~0x0ff00000) | (projtexcoords << 20); - // swap table - for (int i = 0; i < 8; i += 2) - ((u8*)&uid.values[1])[i/2] = (bpmem.tevksel[i].hex & 0xf) | ((bpmem.tevksel[i + 1].hex & 0xf) << 4); - - uid.values[2] = s_texturemask; - int hdr = 3; - u32* pcurvalue = &uid.values[hdr]; - for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages+1; ++i) { - TevStageCombiner::ColorCombiner &cc = bpmem.combiners[i].colorC; - TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[i].alphaC; - - u32 val0 = cc.hex&0xffffff; - u32 val1 = ac.hex&0xffffff; - val0 |= bpmem.tevksel[i/2].getKC(i&1)<<24; - val1 |= bpmem.tevksel[i/2].getKA(i&1)<<24; - pcurvalue[0] = val0; - pcurvalue[1] = val1; - pcurvalue += 2; - } - - for (u32 i = 0; i < ((u32)bpmem.genMode.numtevstages+1)/2; ++i) { - u32 val0, val1; - if (bpmem.tevorders[i].hex & 0x40) - val0 = bpmem.tevorders[i].hex & 0x3ff; - else - val0 = bpmem.tevorders[i].hex & 0x380; - if (bpmem.tevorders[i].hex & 0x40000) - val1 = (bpmem.tevorders[i].hex & 0x3ff000) >> 12; - else - val1 = (bpmem.tevorders[i].hex & 0x380000) >> 12; - - switch (i % 3) { - case 0: pcurvalue[0] = val0|(val1<<10); break; - case 1: pcurvalue[0] |= val0<<20; pcurvalue[1] = val1; pcurvalue++; break; - case 2: pcurvalue[1] |= (val0<<10)|(val1<<20); pcurvalue++; break; - } - } - - if ((bpmem.genMode.numtevstages + 1) & 1) { // odd - u32 val0; - if (bpmem.tevorders[bpmem.genMode.numtevstages/2].hex & 0x40) - val0 = bpmem.tevorders[bpmem.genMode.numtevstages/2].hex&0x3ff; - else - val0 = bpmem.tevorders[bpmem.genMode.numtevstages/2].hex & 0x380; - - switch (bpmem.genMode.numtevstages % 3) { - case 0: pcurvalue[0] = val0; break; - case 1: pcurvalue[0] |= val0 << 20; break; - case 2: pcurvalue[1] |= val0 << 10; pcurvalue++; break; - } - } - - if ((bpmem.genMode.numtevstages % 3) != 2) - ++pcurvalue; - - uid.tevstages = (u32)(pcurvalue-&uid.values[0]-hdr); - - for (u32 i = 0; i < bpmem.genMode.numindstages; ++i) { - u32 val = bpmem.tevind[i].hex & 0x1fffff; // 21 bits - switch (i%3) { - case 0: pcurvalue[0] = val; break; - case 1: pcurvalue[0] |= val << 21; pcurvalue[1] = val >> 11; ++pcurvalue; break; - case 2: pcurvalue[0] |= val << 10; ++pcurvalue; break; - } - } - - // yeah, well .... - uid.indstages = (u32)(pcurvalue - &uid.values[0] - 2 - uid.tevstages); -} diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.h b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.h index 0048326fd7..f1ea587f1a 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.h @@ -22,124 +22,81 @@ #include #include "BPMemory.h" +#include "PixelShader.h" struct FRAGMENTSHADER { - FRAGMENTSHADER() : glprogid(0) { } - GLuint glprogid; // opengl program id + FRAGMENTSHADER() : glprogid(0) { } + GLuint glprogid; // opengl program id #if defined(_DEBUG) || defined(DEBUGFAST) std::string strprog; #endif }; -class PIXELSHADERUID +class PixelShaderCache { -public: - u32 values[4+32+6+11]; - u16 tevstages, indstages; + struct PSCacheEntry + { + FRAGMENTSHADER shader; + int frameCount; + PSCacheEntry() : frameCount(0) {} + ~PSCacheEntry() {} + void Destroy() { + // printf("Destroying ps %i\n", shader.glprogid); + glDeleteProgramsARB(1, &shader.glprogid); + shader.glprogid = 0; + } + }; - PIXELSHADERUID() { - memset(values, 0, (4+32+6+11) * 4); - tevstages = indstages = 0; - } - PIXELSHADERUID(const PIXELSHADERUID& r) - { - tevstages = r.tevstages; - indstages = r.indstages; - int N = tevstages + indstages + 3; - _assert_(N <= 4+32+6+11); - for (int i = 0; i < N; ++i) - values[i] = r.values[i]; - } - int GetNumValues() const { - return tevstages + indstages + 3; // numTevStages*3/2+1 - } - bool operator <(const PIXELSHADERUID& _Right) const - { - if (values[0] < _Right.values[0]) - return true; - else if (values[0] > _Right.values[0]) - return false; - int N = GetNumValues(); - for (int i = 1; i < N; ++i) { - if (values[i] < _Right.values[i]) - return true; - else if (values[i] > _Right.values[i]) - return false; - } - return false; - } - bool operator ==(const PIXELSHADERUID& _Right) const - { - if (values[0] != _Right.values[0]) - return false; - int N = GetNumValues(); - for (int i = 1; i < N; ++i) { - if (values[i] != _Right.values[i]) - return false; - } - return true; - } + typedef std::map PSCache; + + static PSCache pshaders; + + static PIXELSHADERUID s_curuid; // the current pixel shader uid (progressively changed as memory is written) + +public: + static void Init(); + static void Cleanup(); + static void Shutdown(); + + static FRAGMENTSHADER* GetShader(); + static bool CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrprogram); }; +void SetPSConstant4f(int const_number, float f1, float f2, float f3, float f4); +void SetPSConstant4fv(int const_number, const float *f); -class PixelShaderMngr +// The non-API dependent parts. +class PixelShaderManager { - struct PSCacheEntry - { - FRAGMENTSHADER shader; - int frameCount; - PSCacheEntry() : frameCount(0) {} - ~PSCacheEntry() {} - void Destroy() { - // printf("Destroying ps %i\n", shader.glprogid); - glDeleteProgramsARB(1, &shader.glprogid); - shader.glprogid = 0; - } - }; - - typedef std::map PSCache; - - static FRAGMENTSHADER* pShaderLast; // last used shader - static PSCache pshaders; - - static void GetPixelShaderId(PIXELSHADERUID&); - static PIXELSHADERUID s_curuid; // the current pixel shader uid (progressively changed as memory is written) - - static void SetPSConstant4f(int const_number, float f1, float f2, float f3, float f4); - static void SetPSConstant4fv(int const_number, const float *f); static void SetPSTextureDims(int texid); - public: - static void Init(); - static void Cleanup(); - static void Shutdown(); - static FRAGMENTSHADER* GetShader(); - static bool CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrprogram); + static void Init(); + static void Shutdown(); - static void SetConstants(); // sets pixel shader constants + static void SetConstants(); // sets pixel shader constants - // constant management, should be called after memory is committed - static void SetColorChanged(int type, int index); - static void SetAlpha(const AlphaFunc& alpha); - static void SetDestAlpha(const ConstantAlpha& alpha); - static void SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wrapt); - static void SetZTextureBias(u32 bias); - static void SetIndTexScaleChanged(); - static void SetIndMatrixChanged(int matrixidx); + // constant management, should be called after memory is committed + static void SetColorChanged(int type, int index); + static void SetAlpha(const AlphaFunc& alpha); + static void SetDestAlpha(const ConstantAlpha& alpha); + static void SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wrapt); + static void SetZTextureBias(u32 bias); + static void SetIndTexScaleChanged(); + static void SetIndMatrixChanged(int matrixidx); - static void SetGenModeChanged(); - static void SetTevCombinerChanged(int id); - static void SetTevKSelChanged(int id); - static void SetTevOrderChanged(int id); - static void SetTevIndirectChanged(int id); - static void SetZTextureOpChanged(); - static void SetTexturesUsed(u32 nonpow2tex); - static void SetTexDimsChanged(int texmapid); + static void SetGenModeChanged(); + static void SetTevCombinerChanged(int id); + static void SetTevKSelChanged(int id); + static void SetTevOrderChanged(int id); + static void SetTevIndirectChanged(int id); + static void SetZTextureOpChanged(); + static void SetTexturesUsed(u32 nonpow2tex); + static void SetTexDimsChanged(int texmapid); - static void SetColorMatrix(const float* pmatrix, const float* pfConstAdd); - static GLuint GetColorMatrixProgram(); + static void SetColorMatrix(const float* pmatrix, const float* pfConstAdd); + static GLuint GetColorMatrixProgram(); }; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp index 31d2fc0706..b08e196d8c 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp @@ -907,7 +907,7 @@ void Renderer::SwapBuffers() GL_REPORT_ERRORD(); //clean out old stuff from caches - PixelShaderMngr::Cleanup(); + PixelShaderCache::Cleanup(); TextureMngr::Cleanup(); frameCount++; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp index 3c43ef3aed..f52e9a8dc0 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp @@ -56,7 +56,7 @@ void CreateRgbToYuyvProgram() " ocol0 = float4(y1, u0, y0, v1);\n" "}\n"; - if (!PixelShaderMngr::CompilePixelShader(s_rgbToYuyvProgram, FProgram)) { + if (!PixelShaderCache::CompilePixelShader(s_rgbToYuyvProgram, FProgram)) { ERROR_LOG("Failed to create RGB to YUYV fragment program\n"); } } @@ -83,7 +83,7 @@ void CreateYuyvToRgbProgram() " 1.0f);\n" "}\n"; - if (!PixelShaderMngr::CompilePixelShader(s_yuyvToRgbProgram, FProgram)) { + if (!PixelShaderCache::CompilePixelShader(s_yuyvToRgbProgram, FProgram)) { ERROR_LOG("Failed to create YUYV to RGB fragment program\n"); } } @@ -161,7 +161,7 @@ void EncodeToRam(GLuint srcTexture, const TRectangle& sourceRc, Renderer::SetFramebuffer(0); Renderer::RestoreGLState(); - VertexShaderMngr::SetViewportChanged(); + VertexShaderManager::SetViewportChanged(); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); TextureMngr::DisableStage(0); @@ -215,7 +215,7 @@ void DecodeToTexture(u8* srcAddr, int srcWidth, int srcHeight, GLuint destTextur glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); TextureMngr::DisableStage(0); - VertexShaderMngr::SetViewportChanged(); + VertexShaderManager::SetViewportChanged(); Renderer::RestoreGLState(); GL_REPORT_ERRORD(); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp index 77d68e4d06..e8d027fb07 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp @@ -578,8 +578,8 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool glViewport(0, 0, w, h); glEnable(GL_FRAGMENT_PROGRAM_ARB); - glBindProgramARB( GL_FRAGMENT_PROGRAM_ARB, PixelShaderMngr::GetColorMatrixProgram()); - PixelShaderMngr::SetColorMatrix(colmat, fConstAdd); // set transformation + glBindProgramARB( GL_FRAGMENT_PROGRAM_ARB, PixelShaderManager::GetColorMatrixProgram()); + PixelShaderManager::SetColorMatrix(colmat, fConstAdd); // set transformation GL_REPORT_ERRORD(); glBegin(GL_QUADS); @@ -595,7 +595,7 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool Renderer::SetFramebuffer(0); Renderer::RestoreGLState(); - VertexShaderMngr::SetViewportChanged(); + VertexShaderManager::SetViewportChanged(); TextureMngr::DisableStage(0); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoaderManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoaderManager.cpp index 34d81f1c12..897e387207 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoaderManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoaderManager.cpp @@ -106,11 +106,11 @@ void LoadCPReg(u32 sub_cmd, u32 value) switch (sub_cmd & 0xF0) { case 0x30: - VertexShaderMngr::SetTexMatrixChangedA(value); + VertexShaderManager::SetTexMatrixChangedA(value); break; case 0x40: - VertexShaderMngr::SetTexMatrixChangedB(value); + VertexShaderManager::SetTexMatrixChangedB(value); break; case 0x50: diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp index eb41aa7683..00361aca26 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp @@ -193,7 +193,7 @@ void Flush() if (tentry != NULL) { // texture loaded fine, set dims for pixel shader if (tentry->isNonPow2) { - PixelShaderMngr::SetTexDims(i, tentry->w, tentry->h, tentry->mode.wrap_s, tentry->mode.wrap_t); + PixelShaderManager::SetTexDims(i, tentry->w, tentry->h, tentry->mode.wrap_s, tentry->mode.wrap_t); nonpow2tex |= 1 << i; if (tentry->mode.wrap_s > 0) nonpow2tex |= 1 << (8 + i); if (tentry->mode.wrap_t > 0) nonpow2tex |= 1 << (16 + i); @@ -202,7 +202,7 @@ void Flush() // if texture is power of two, set to ones (since don't need scaling) else { - PixelShaderMngr::SetTexDims(i, tentry->w, tentry->h, 0, 0); + PixelShaderManager::SetTexDims(i, tentry->w, tentry->h, 0, 0); TextureMngr::EnableTex2D(i); } if (g_Config.iLog & CONF_PRIMLOG) { @@ -222,11 +222,11 @@ void Flush() } } - PixelShaderMngr::SetTexturesUsed(nonpow2tex); + PixelShaderManager::SetTexturesUsed(nonpow2tex); } - FRAGMENTSHADER* ps = PixelShaderMngr::GetShader(); - VERTEXSHADER* vs = VertexShaderMngr::GetShader(s_prevcomponents); + FRAGMENTSHADER* ps = PixelShaderCache::GetShader(); + VERTEXSHADER* vs = VertexShaderCache::GetShader(s_prevcomponents); bool bRestoreBuffers = false; if (Renderer::GetZBufferTarget()) { @@ -246,8 +246,8 @@ void Flush() } // set global constants - VertexShaderMngr::SetConstants(); - PixelShaderMngr::SetConstants(); + VertexShaderManager::SetConstants(); + PixelShaderManager::SetConstants(); // finally bind diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.cpp index 21c68ed86d..47938f5482 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.cpp @@ -36,12 +36,11 @@ #include "BPMemory.h" #include "XFMemory.h" -VertexShaderMngr::VSCache VertexShaderMngr::vshaders; -VERTEXSHADER* VertexShaderMngr::pShaderLast = NULL; +VertexShaderCache::VSCache VertexShaderCache::vshaders; -float GC_ALIGNED16(g_fProjectionMatrix[16]); +static VERTEXSHADER *pShaderLast = NULL; -extern int A, B; +static float GC_ALIGNED16(g_fProjectionMatrix[16]); // Internal Variables static int s_nMaxVertexInstructions; @@ -56,15 +55,15 @@ static int nNormalMatricesChanged[2]; // min,max static int nPostTransformMatricesChanged[2]; // min,max static int nLightsChanged[2]; // min,max -void VertexShaderMngr::SetVSConstant4f(int const_number, float f1, float f2, float f3, float f4) { +void SetVSConstant4f(int const_number, float f1, float f2, float f3, float f4) { glProgramEnvParameter4fARB(GL_VERTEX_PROGRAM_ARB, const_number, f1, f2, f3, f4); } -void VertexShaderMngr::SetVSConstant4fv(int const_number, const float *f) { +void SetVSConstant4fv(int const_number, const float *f) { glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB, const_number, f); } -void VertexShaderMngr::Init() +void VertexShaderManager::Init() { nTransformMatricesChanged[0] = nTransformMatricesChanged[1] = -1; nNormalMatricesChanged[0] = nNormalMatricesChanged[1] = -1; @@ -76,26 +75,31 @@ void VertexShaderMngr::Init() memset(&xfregs, 0, sizeof(xfregs)); memset(xfmem, 0, sizeof(xfmem)); +} +void VertexShaderCache::Init() +{ glGetProgramivARB(GL_VERTEX_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_INSTRUCTIONS_ARB, (GLint *)&s_nMaxVertexInstructions); } -void VertexShaderMngr::Shutdown() +void VertexShaderCache::Shutdown() { for (VSCache::iterator iter = vshaders.begin(); iter != vshaders.end(); iter++) iter->second.Destroy(); vshaders.clear(); } -float VertexShaderMngr::GetPixelAspectRatio() { - return xfregs.rawViewport[0] != 0 ? (float)Renderer::GetTargetWidth() / 640.0f : 1.0f; +void VertexShaderManager::Shutdown() +{ + } -VERTEXSHADER* VertexShaderMngr::GetShader(u32 components) +VERTEXSHADER* VertexShaderCache::GetShader(u32 components) { DVSTARTPROFILE(); VERTEXSHADERUID uid; - GetVertexShaderId(uid, components); + u32 zbufrender = (bpmem.ztex2.op == ZTEXTURE_ADD) || Renderer::GetZBufferTarget() != 0; + GetVertexShaderId(uid, components, zbufrender); VSCache::iterator iter = vshaders.find(uid); @@ -109,7 +113,7 @@ VERTEXSHADER* VertexShaderMngr::GetShader(u32 components) } VSCacheEntry& entry = vshaders[uid]; - char *code = GenerateVertexShader(components, Renderer::GetZBufferTarget() != 0); + const char *code = GenerateVertexShader(components, Renderer::GetZBufferTarget() != 0); #if defined(_DEBUG) || defined(DEBUGFAST) if (g_Config.iLog & CONF_SAVESHADERS && code) { @@ -121,26 +125,25 @@ VERTEXSHADER* VertexShaderMngr::GetShader(u32 components) } #endif - if (!code || !VertexShaderMngr::CompileVertexShader(entry.shader, code)) { + if (!code || !VertexShaderCache::CompileVertexShader(entry.shader, code)) { ERROR_LOG("failed to create vertex shader\n"); return NULL; } //Make an entry in the table - entry.frameCount=frameCount; - + entry.frameCount = frameCount; pShaderLast = &entry.shader; INCSTAT(stats.numVertexShadersCreated); - SETSTAT(stats.numVertexShadersAlive,vshaders.size()); + SETSTAT(stats.numVertexShadersAlive, vshaders.size()); return pShaderLast; } -void VertexShaderMngr::Cleanup() +void VertexShaderCache::Cleanup() { - VSCache::iterator iter=vshaders.begin(); + VSCache::iterator iter = vshaders.begin(); while (iter != vshaders.end()) { VSCacheEntry &entry = iter->second; - if (entry.frameCount < frameCount-200) { + if (entry.frameCount < frameCount - 200) { entry.Destroy(); #ifdef _WIN32 iter = vshaders.erase(iter); @@ -153,20 +156,10 @@ void VertexShaderMngr::Cleanup() } } -// static int frame = 0; -// if( frame++ > 30 ) { -// VSCache::iterator iter=vshaders.begin(); -// while(iter!=vshaders.end()) { -// iter->second.Destroy(); -// ++iter; -// } -// vshaders.clear(); -// } - - SETSTAT(stats.numPixelShadersAlive,vshaders.size()); + SETSTAT(stats.numVertexShadersAlive, vshaders.size()); } -bool VertexShaderMngr::CompileVertexShader(VERTEXSHADER& vs, const char* pstrprogram) +bool VertexShaderCache::CompileVertexShader(VERTEXSHADER& vs, const char* pstrprogram) { char stropt[64]; sprintf(stropt, "MaxLocalParams=256,MaxInstructions=%d", s_nMaxVertexInstructions); @@ -211,16 +204,10 @@ bool VertexShaderMngr::CompileVertexShader(VERTEXSHADER& vs, const char* pstrpro return true; } -const u16 s_mtrltable[16][2] = {{0, 0}, {0, 1}, {1, 1}, {0, 2}, - {2, 1}, {0, 3}, {1, 2}, {0, 3}, - {3, 1}, {0, 4}, {1, 3}, {0, 4}, - {2, 2}, {0, 4}, {1, 3}, {0, 4}}; - - // ======================================================================================= // Syncs the shader constant buffers with xfmem // ---------------- -void VertexShaderMngr::SetConstants() +void VertexShaderManager::SetConstants() { //nTransformMatricesChanged[0] = 0; nTransformMatricesChanged[1] = 256; //nNormalMatricesChanged[0] = 0; nNormalMatricesChanged[1] = 96; @@ -510,7 +497,7 @@ void VertexShaderMngr::SetConstants() } } -void VertexShaderMngr::InvalidateXFRange(int start, int end) +void VertexShaderManager::InvalidateXFRange(int start, int end) { if (((u32)start >= (u32)MatrixIndexA.PosNormalMtxIdx*4 && (u32)start < (u32)MatrixIndexA.PosNormalMtxIdx*4 + 12) || @@ -587,7 +574,7 @@ void VertexShaderMngr::InvalidateXFRange(int start, int end) } } -void VertexShaderMngr::SetTexMatrixChangedA(u32 Value) +void VertexShaderManager::SetTexMatrixChangedA(u32 Value) { if (MatrixIndexA.Hex != Value) { VertexManager::Flush(); @@ -598,7 +585,7 @@ void VertexShaderMngr::SetTexMatrixChangedA(u32 Value) } } -void VertexShaderMngr::SetTexMatrixChangedB(u32 Value) +void VertexShaderManager::SetTexMatrixChangedB(u32 Value) { if (MatrixIndexB.Hex != Value) { VertexManager::Flush(); @@ -607,7 +594,7 @@ void VertexShaderMngr::SetTexMatrixChangedB(u32 Value) } } -void VertexShaderMngr::SetViewport(float* _Viewport) +void VertexShaderManager::SetViewport(float* _Viewport) { // Workaround for paper mario, yep this is bizarre. for (size_t i = 0; i < ARRAYSIZE(xfregs.rawViewport); ++i) { @@ -618,69 +605,22 @@ void VertexShaderMngr::SetViewport(float* _Viewport) bViewportChanged = true; } -void VertexShaderMngr::SetViewportChanged() +void VertexShaderManager::SetViewportChanged() { bViewportChanged = true; } -void VertexShaderMngr::SetProjection(float* _pProjection, int constantIndex) +void VertexShaderManager::SetProjection(float* _pProjection, int constantIndex) { memcpy(xfregs.rawProjection, _pProjection, sizeof(xfregs.rawProjection)); bProjectionChanged = true; } -float* VertexShaderMngr::GetPosNormalMat() +float* VertexShaderManager::GetPosNormalMat() { return (float*)xfmem + MatrixIndexA.PosNormalMtxIdx * 4; } -// Mash together all the inputs that contribute to the code of a generated vertex shader into -// a unique identifier, basically containing all the bits. Yup, it's a lot .... -void VertexShaderMngr::GetVertexShaderId(VERTEXSHADERUID& vid, u32 components) -{ - u32 zbufrender = (bpmem.ztex2.op == ZTEXTURE_ADD) || Renderer::GetZBufferTarget() != 0; - vid.values[0] = components | - (xfregs.numTexGens << 23) | - (xfregs.nNumChans << 27) | - ((u32)xfregs.bEnableDualTexTransform << 29) | - (zbufrender << 30); - - for (int i = 0; i < 2; ++i) { - vid.values[1+i] = xfregs.colChans[i].color.enablelighting ? - (u32)xfregs.colChans[i].color.hex : - (u32)xfregs.colChans[i].color.matsource; - vid.values[1+i] |= (xfregs.colChans[i].alpha.enablelighting ? - (u32)xfregs.colChans[i].alpha.hex : - (u32)xfregs.colChans[i].alpha.matsource) << 15; - } - - // fog - vid.values[1] |= (((u32)bpmem.fog.c_proj_fsel.fsel & 3) << 30); - vid.values[2] |= (((u32)bpmem.fog.c_proj_fsel.fsel >> 2) << 30); - - u32* pcurvalue = &vid.values[3]; - for (int i = 0; i < xfregs.numTexGens; ++i) { - TexMtxInfo tinfo = xfregs.texcoords[i].texmtxinfo; - if (tinfo.texgentype != XF_TEXGEN_EMBOSS_MAP) - tinfo.hex &= 0x7ff; - if (tinfo.texgentype != XF_TEXGEN_REGULAR) - tinfo.projection = 0; - - u32 val = ((tinfo.hex >> 1) & 0x1ffff); - if (xfregs.bEnableDualTexTransform && tinfo.texgentype == XF_TEXGEN_REGULAR) { - // rewrite normalization and post index - val |= ((u32)xfregs.texcoords[i].postmtxinfo.index << 17) | ((u32)xfregs.texcoords[i].postmtxinfo.normalize << 23); - } - - switch (i & 3) { - case 0: pcurvalue[0] |= val; break; - case 1: pcurvalue[0] |= val << 24; pcurvalue[1] = val >> 8; ++pcurvalue; break; - case 2: pcurvalue[0] |= val << 16; pcurvalue[1] = val >> 16; ++pcurvalue; break; - case 3: pcurvalue[0] |= val << 8; ++pcurvalue; break; - } - } -} - // LoadXFReg 0x10 void LoadXFReg(u32 transferSize, u32 baseAddress, u32 *pData) @@ -694,7 +634,7 @@ void LoadXFReg(u32 transferSize, u32 baseAddress, u32 *pData) if (address < 0x1000) { VertexManager::Flush(); - VertexShaderMngr::InvalidateXFRange(address, address + transferSize); + VertexShaderManager::InvalidateXFRange(address, address + transferSize); //PRIM_LOG("xfmem write: 0x%x-0x%x\n", address, address+transferSize); u32* p1 = &xfmem[address]; @@ -820,16 +760,16 @@ void LoadXFReg(u32 transferSize, u32 baseAddress, u32 *pData) break; case 0x1018: //_assert_msg_(GX_XF, 0, "XF matrixindex0"); - VertexShaderMngr::SetTexMatrixChangedA(data); //? + VertexShaderManager::SetTexMatrixChangedA(data); //? break; case 0x1019: //_assert_msg_(GX_XF, 0, "XF matrixindex1"); - VertexShaderMngr::SetTexMatrixChangedB(data); //? + VertexShaderManager::SetTexMatrixChangedB(data); //? break; case 0x101a: VertexManager::Flush(); - VertexShaderMngr::SetViewport((float*)&pData[i]); + VertexShaderManager::SetViewport((float*)&pData[i]); i += 6; break; @@ -840,7 +780,7 @@ void LoadXFReg(u32 transferSize, u32 baseAddress, u32 *pData) case 0x1020: VertexManager::Flush(); - VertexShaderMngr::SetProjection((float*)&pData[i]); + VertexShaderManager::SetProjection((float*)&pData[i]); i += 7; return; @@ -901,7 +841,7 @@ void LoadIndexedXF(u32 val, int array) //load stuff from array to address in xf mem VertexManager::Flush(); - VertexShaderMngr::InvalidateXFRange(address, address+size); + VertexShaderManager::InvalidateXFRange(address, address+size); //PRIM_LOG("xfmem iwrite: 0x%x-0x%x\n", address, address+size); for (int i = 0; i < size; i++) diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.h index 09d0548749..bad21a6e74 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.h @@ -22,6 +22,7 @@ #include #include "GLUtil.h" +#include "VertexShader.h" struct VERTEXSHADER { @@ -33,56 +34,7 @@ struct VERTEXSHADER #endif }; - -class VERTEXSHADERUID -{ -public: - u32 values[9]; - - VERTEXSHADERUID() { - memset(values, 0, sizeof(values)); - } - - VERTEXSHADERUID(const VERTEXSHADERUID& r) { - for (size_t i = 0; i < sizeof(values) / sizeof(u32); ++i) - values[i] = r.values[i]; - } - - int GetNumValues() const { - return (((values[0] >> 23) & 0xf)*3 + 3)/4 + 3; // numTexGens*3/4+1 - } - - bool operator <(const VERTEXSHADERUID& _Right) const - { - if (values[0] < _Right.values[0]) - return true; - else if (values[0] > _Right.values[0]) - return false; - int N = GetNumValues(); - for (int i = 1; i < N; ++i) { - if (values[i] < _Right.values[i]) - return true; - else if (values[i] > _Right.values[i]) - return false; - } - return false; - } - - bool operator ==(const VERTEXSHADERUID& _Right) const - { - if (values[0] != _Right.values[0]) - return false; - int N = GetNumValues(); - for (int i = 1; i < N; ++i) { - if (values[i] != _Right.values[i]) - return false; - } - return true; - } -}; - - -class VertexShaderMngr +class VertexShaderCache { struct VSCacheEntry { @@ -99,18 +51,22 @@ class VertexShaderMngr typedef std::map VSCache; static VSCache vshaders; - static VERTEXSHADER* pShaderLast; - static void GetVertexShaderId(VERTEXSHADERUID& uid, u32 components); - - static void SetVSConstant4f(int const_number, float f1, float f2, float f3, float f4); - static void SetVSConstant4fv(int const_number, const float *f); public: static void Init(); static void Cleanup(); static void Shutdown(); - static VERTEXSHADER* GetShader(u32 components); + + static VERTEXSHADER* GetShader(u32 components); static bool CompileVertexShader(VERTEXSHADER& ps, const char* pstrprogram); +}; + +// The non-API dependent parts. +class VertexShaderManager +{ +public: + static void Init(); + static void Shutdown(); // constant management static void SetConstants(); @@ -123,7 +79,6 @@ public: static void SetTexMatrixChangedB(u32 Value); static float* GetPosNormalMat(); - static float GetPixelAspectRatio(); }; #endif diff --git a/Source/Plugins/Plugin_VideoOGL/Src/XFB.cpp b/Source/Plugins/Plugin_VideoOGL/Src/XFB.cpp index ed04413a38..ad82da474c 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/XFB.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/XFB.cpp @@ -175,7 +175,7 @@ void XFB_Write(u8 *xfb_in_ram, const TRectangle& sourceRc, u32 dstWd, u32 dstHt) Renderer::SetFramebuffer(0); Renderer::RestoreGLState(); - VertexShaderMngr::SetViewportChanged(); + VertexShaderManager::SetViewportChanged(); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); TextureMngr::DisableStage(0); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp index 60d743304e..d61571f947 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp @@ -274,8 +274,10 @@ void Video_Prepare(void) VertexManager::Init(); Fifo_Init(); // must be done before OpcodeDecoder_Init() OpcodeDecoder_Init(); - VertexShaderMngr::Init(); - PixelShaderMngr::Init(); + VertexShaderCache::Init(); + VertexShaderManager::Init(); + PixelShaderCache::Init(); + PixelShaderManager::Init(); GL_REPORT_ERRORD(); VertexLoaderManager::Init(); TextureConverter::Init(); @@ -285,8 +287,10 @@ void Video_Shutdown(void) { TextureConverter::Shutdown(); VertexLoaderManager::Shutdown(); - VertexShaderMngr::Shutdown(); - PixelShaderMngr::Shutdown(); + VertexShaderCache::Shutdown(); + VertexShaderManager::Shutdown(); + PixelShaderManager::Shutdown(); + PixelShaderCache::Shutdown(); Fifo_Shutdown(); VertexManager::Shutdown(); TextureMngr::Shutdown();