diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 106ff9143b..a1b6d44602 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -211,7 +211,7 @@ bool D3D12GSRender::LoadProgram() return false; } - m_PSO = getGraphicPipelineState(m_device, m_cur_vertex_prog, m_cur_fragment_prog); + m_PSO = cachePSO.getGraphicPipelineState(m_device, m_cur_vertex_prog, m_cur_fragment_prog); return true; } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index c0a36a241d..a1ee7947e8 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -47,6 +47,7 @@ private: // std::vector m_vdata; // std::vector m_post_draw_objs; + PipelineStateObjectCache cachePSO; ID3D12PipelineState *m_PSO; int m_fp_buf_num; int m_vp_buf_num; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 4e83f034ae..7e74641c49 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -12,84 +12,6 @@ #pragma comment (lib, "d3dcompiler.lib") - -struct GLBufferInfo -{ - ID3D12PipelineState *prog_id; - u32 fp_id; - u32 vp_id; - std::vector fp_data; - std::vector vp_data; - std::string fp_shader; - std::string vp_shader; - Microsoft::WRL::ComPtr fp_bytecode; - Microsoft::WRL::ComPtr vp_bytecode; -}; - -enum class SHADER_TYPE -{ - SHADER_TYPE_VERTEX, - SHADER_TYPE_FRAGMENT -}; - -/** Storage for a shader -* Embeds the D3DBlob corresponding to -*/ -class Shader -{ -public: - Shader() : bytecode(nullptr) {} - ~Shader() {} - - u32 Id; - Microsoft::WRL::ComPtr bytecode; - std::vector RSXBinary; - - /** - * Decompile a fragment shader located in the PS3's Memory. This function operates synchronously. - * @param prog RSXShaderProgram specifying the location and size of the shader in memory - */ -// void Decompile(RSXFragmentProgram& prog) - - /** Compile the decompiled fragment shader into a format we can use with OpenGL. */ - void Compile(SHADER_TYPE st) - { - static const char VSstring[] = - "float4 main(float4 pos : POSITION) : SV_POSITION" - "{" - " return pos;" - "}"; - static const char FSstring[] = - "float4 main() : SV_TARGET" - "{" - "return float4(1.0f, 1.0f, 1.0f, 1.0f);" - "}"; - HRESULT hr; - Microsoft::WRL::ComPtr errorBlob; - switch (st) - { - case SHADER_TYPE::SHADER_TYPE_VERTEX: - hr = D3DCompile(VSstring, sizeof(VSstring), "test", nullptr, nullptr, "main", "vs_5_0", 0, 0, bytecode.GetAddressOf(), errorBlob.GetAddressOf()); - if (hr != S_OK) - LOG_ERROR(RSX, "VS build failed:%s", errorBlob->GetBufferPointer()); - break; - case SHADER_TYPE::SHADER_TYPE_FRAGMENT: - hr = D3DCompile(FSstring, sizeof(FSstring), "test", nullptr, nullptr, "main", "ps_5_0", 0, 0, bytecode.GetAddressOf(), errorBlob.GetAddressOf()); - if (hr != S_OK) - LOG_ERROR(RSX, "FS build failed:%s", errorBlob->GetBufferPointer()); - break; - } - } -}; - -// Based on -// https://github.com/AlexAltea/nucleus/blob/master/nucleus/gpu/rsx_pgraph.cpp -union qword -{ - u64 dword[2]; - u32 word[4]; -}; - size_t getFPBinarySize(void *ptr) { const qword *instBuffer = (const qword*)ptr; @@ -104,187 +26,78 @@ size_t getFPBinarySize(void *ptr) } } -struct HashVertexProgram -{ - size_t operator()(const void *program) const - { - // 64-bit Fowler/Noll/Vo FNV-1a hash code - size_t hash = 0xCBF29CE484222325ULL; - const qword *instbuffer = (const qword*)program; - size_t instIndex = 0; - bool end = false; - return 0; - while (true) - { - const qword inst = instbuffer[instIndex]; - bool end = inst.word[0] >> 31; - if (end) - return hash; - hash ^= inst.dword[0]; - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); - hash ^= inst.dword[1]; - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); - instIndex++; - } - return 0; - } -}; -struct HashFragmentProgram -{ - size_t operator()(const void *program) const - { - // 64-bit Fowler/Noll/Vo FNV-1a hash code - size_t hash = 0xCBF29CE484222325ULL; - const qword *instbuffer = (const qword*)program; - size_t instIndex = 0; - while (true) - { - const qword& inst = instbuffer[instIndex]; - bool end = (inst.word[0] >> 8) & 0x1; - if (end) - return hash; - hash ^= inst.dword[0]; - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); - hash ^= inst.dword[1]; - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); - instIndex++; - } - return 0; - } -}; +PipelineStateObjectCache::PipelineStateObjectCache() : currentShaderId(0) +{} -struct VertexProgramCompare +bool PipelineStateObjectCache::SearchFp(const RSXFragmentProgram& rsx_fp, Shader& shader) { - bool operator()(const void *binary1, const void *binary2) const + binary2FS::const_iterator It = cacheFS.find(vm::get_ptr(rsx_fp.addr)); + if (It != cacheFS.end()) { - const qword *instBuffer1 = (const qword*)binary1; - const qword *instBuffer2 = (const qword*)binary2; - size_t instIndex = 0; + shader = It->second; return true; - while (true) - { - const qword& inst1 = instBuffer1[instIndex]; - const qword& inst2 = instBuffer2[instIndex]; - bool end = (inst1.word[0] >> 31) && (inst2.word[0] >> 31); - if (end) - return true; - if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1]) - return false; - instIndex++; - } } -}; + return false; +} -struct FragmentProgramCompare +bool PipelineStateObjectCache::SearchVp(const RSXVertexProgram& rsx_vp, Shader& shader) { - bool operator()(const void *binary1, const void *binary2) const + binary2VS::const_iterator It = cacheVS.find((void*)rsx_vp.data.data()); + if (It != cacheVS.end()) { - const qword *instBuffer1 = (const qword*)binary1; - const qword *instBuffer2 = (const qword*)binary2; - size_t instIndex = 0; - while (true) - { - const qword& inst1 = instBuffer1[instIndex]; - const qword& inst2 = instBuffer2[instIndex]; - bool end = ((inst1.word[0] >> 8) & 0x1) && ((inst2.word[0] >> 8) & 0x1); - if (end) - return true; - if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1]) - return false; - instIndex++; - } + shader = It->second; + return true; } -}; + return false; +} -typedef std::unordered_map binary2VS; -typedef std::unordered_map binary2FS; - -class ProgramBuffer +ID3D12PipelineState *PipelineStateObjectCache::GetProg(u32 fp, u32 vp) const { -public: - binary2VS cacheVS; - binary2FS cacheFS; + u64 key = vp << 32 | fp; + std::unordered_map::const_iterator It = cachePSO.find(key); + if (It == cachePSO.end()) + return nullptr; + return It->second; +} - // Key is vertex << 32 | fragment ids - std::unordered_map cachePSO; +void PipelineStateObjectCache::AddVertexProgram(Shader& vp, RSXVertexProgram& rsx_vp) +{ + size_t actualVPSize = rsx_vp.data.size() * 4; + void *fpShadowCopy = malloc(actualVPSize); + memcpy(fpShadowCopy, rsx_vp.data.data(), actualVPSize); + vp.Id = currentShaderId++; + cacheVS.insert(std::make_pair(fpShadowCopy, vp)); +} - size_t currentShaderId; +void PipelineStateObjectCache::AddFragmentProgram(Shader& fp, RSXFragmentProgram& rsx_fp) +{ + size_t actualFPSize = getFPBinarySize(vm::get_ptr(rsx_fp.addr)); + void *fpShadowCopy = malloc(actualFPSize); + memcpy(fpShadowCopy, vm::get_ptr(rsx_fp.addr), actualFPSize); + fp.Id = currentShaderId++; + cacheFS.insert(std::make_pair(fpShadowCopy, fp)); +} - ProgramBuffer() : currentShaderId(0) - {} +void PipelineStateObjectCache::Add(ID3D12PipelineState *prog, Shader& fp, Shader& vp) +{ + u64 key = vp.Id << 32 | fp.Id; + cachePSO.insert(std::make_pair(key, prog)); +} - bool SearchFp(const RSXFragmentProgram& rsx_fp, Shader& shader) - { - binary2FS::const_iterator It = cacheFS.find(vm::get_ptr(rsx_fp.addr)); - if (It != cacheFS.end()) - { - shader = It->second; - return true; - } - return false; - } - - bool SearchVp(const RSXVertexProgram& rsx_vp, Shader& shader) - { - binary2VS::const_iterator It = cacheVS.find((void*)rsx_vp.data.data()); - if (It != cacheVS.end()) - { - shader = It->second; - return true; - } - return false; - } - - ID3D12PipelineState *GetProg(u32 fp, u32 vp) const - { - u64 key = vp << 32 | fp; - std::unordered_map::const_iterator It = cachePSO.find(key); - if (It == cachePSO.end()) - return nullptr; - return It->second; - } - - void AddVertexProgram(Shader& vp, RSXVertexProgram& rsx_vp) - { - size_t actualVPSize = rsx_vp.data.size() * 4; - void *fpShadowCopy = malloc(actualVPSize); - memcpy(fpShadowCopy, rsx_vp.data.data(), actualVPSize); - vp.Id = currentShaderId++; - cacheVS.insert(std::make_pair(fpShadowCopy, vp)); - } - - void AddFragmentProgram(Shader& fp, RSXFragmentProgram& rsx_fp) - { - size_t actualFPSize = getFPBinarySize(vm::get_ptr(rsx_fp.addr)); - void *fpShadowCopy = malloc(actualFPSize); - memcpy(fpShadowCopy, vm::get_ptr(rsx_fp.addr), actualFPSize); - fp.Id = currentShaderId++; - cacheFS.insert(std::make_pair(fpShadowCopy, fp)); - } - - void Add(ID3D12PipelineState *prog, Shader& fp, Shader& vp) - { - u64 key = vp.Id << 32 | fp.Id; - cachePSO.insert(std::make_pair(key, prog)); - } -}; - -static ProgramBuffer g_cachedProgram; - -ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader) +ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader) { ID3D12PipelineState *result = nullptr; Shader m_vertex_prog, m_fragment_prog; - bool m_fp_buf_num = g_cachedProgram.SearchFp(*fragmentShader, m_fragment_prog); - bool m_vp_buf_num = g_cachedProgram.SearchVp(*vertexShader, m_vertex_prog); + bool m_fp_buf_num = SearchFp(*fragmentShader, m_fragment_prog); + bool m_vp_buf_num = SearchVp(*vertexShader, m_vertex_prog); if (!m_fp_buf_num) { LOG_WARNING(RSX, "FP not found in buffer!"); -// Decompile(*fragmentShader); + // Decompile(*fragmentShader); m_fragment_prog.Compile(SHADER_TYPE::SHADER_TYPE_FRAGMENT); - g_cachedProgram.AddFragmentProgram(m_fragment_prog, *fragmentShader); + AddFragmentProgram(m_fragment_prog, *fragmentShader); // TODO: This shouldn't use current dir //fs::file("./FragmentProgram.txt", o_write | o_create | o_trunc).write(m_fragment_prog.shader.c_str(), m_fragment_prog.shader.size()); @@ -293,49 +106,49 @@ ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, RSXVertexProg if (!m_vp_buf_num) { LOG_WARNING(RSX, "VP not found in buffer!"); -// m_vertex_prog.Decompile(*vertexShader); + // m_vertex_prog.Decompile(*vertexShader); m_vertex_prog.Compile(SHADER_TYPE::SHADER_TYPE_VERTEX); - g_cachedProgram.AddVertexProgram(m_vertex_prog, *vertexShader); + AddVertexProgram(m_vertex_prog, *vertexShader); // TODO: This shouldn't use current dir // fs::file("./VertexProgram.txt", o_write | o_create | o_trunc).write(m_vertex_prog.shader.c_str(), m_vertex_prog.shader.size()); } if (m_fp_buf_num && m_vp_buf_num) - result = g_cachedProgram.GetProg(m_fragment_prog.Id, m_vertex_prog.Id); + result = GetProg(m_fragment_prog.Id, m_vertex_prog.Id); if (result != nullptr) { return result; -/* // RSX Debugger: Check if this program was modified and update it - if (Ini.GSLogPrograms.GetValue()) - { - for (auto& program : m_debug_programs) - { - if (program.id == m_program.id && program.modified) + /* // RSX Debugger: Check if this program was modified and update it + if (Ini.GSLogPrograms.GetValue()) { - // TODO: This isn't working perfectly. Is there any better/shorter way to update the program - m_vertex_prog.shader = program.vp_shader; - m_fragment_prog.shader = program.fp_shader; - m_vertex_prog.Wait(); - m_vertex_prog.Compile(); - checkForGlError("m_vertex_prog.Compile"); - m_fragment_prog.Wait(); - m_fragment_prog.Compile(); - checkForGlError("m_fragment_prog.Compile"); - glAttachShader(m_program.id, m_vertex_prog.id); - glAttachShader(m_program.id, m_fragment_prog.id); - glLinkProgram(m_program.id); - checkForGlError("glLinkProgram"); - glDetachShader(m_program.id, m_vertex_prog.id); - glDetachShader(m_program.id, m_fragment_prog.id); - program.vp_id = m_vertex_prog.id; - program.fp_id = m_fragment_prog.id; - program.modified = false; + for (auto& program : m_debug_programs) + { + if (program.id == m_program.id && program.modified) + { + // TODO: This isn't working perfectly. Is there any better/shorter way to update the program + m_vertex_prog.shader = program.vp_shader; + m_fragment_prog.shader = program.fp_shader; + m_vertex_prog.Wait(); + m_vertex_prog.Compile(); + checkForGlError("m_vertex_prog.Compile"); + m_fragment_prog.Wait(); + m_fragment_prog.Compile(); + checkForGlError("m_fragment_prog.Compile"); + glAttachShader(m_program.id, m_vertex_prog.id); + glAttachShader(m_program.id, m_fragment_prog.id); + glLinkProgram(m_program.id); + checkForGlError("glLinkProgram"); + glDetachShader(m_program.id, m_vertex_prog.id); + glDetachShader(m_program.id, m_fragment_prog.id); + program.vp_id = m_vertex_prog.id; + program.fp_id = m_fragment_prog.id; + program.modified = false; + } + } } - } - } - m_program.Use();*/ + m_program.Use();*/ } else { @@ -350,7 +163,7 @@ ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, RSXVertexProg graphicPipelineStateDesc.PS.BytecodeLength = m_fragment_prog.bytecode->GetBufferSize(); graphicPipelineStateDesc.PS.pShaderBytecode = m_fragment_prog.bytecode->GetBufferPointer(); device->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&result)); - g_cachedProgram.Add(result, m_fragment_prog, m_vertex_prog); + Add(result, m_fragment_prog, m_vertex_prog); // RSX Debugger /*if (Ini.GSLogPrograms.GetValue()) @@ -367,4 +180,33 @@ ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, RSXVertexProg } -#endif \ No newline at end of file +#endif + +void Shader::Compile(SHADER_TYPE st) +{ + static const char VSstring[] = + "float4 main(float4 pos : POSITION) : SV_POSITION" + "{" + " return pos;" + "}"; + static const char FSstring[] = + "float4 main() : SV_TARGET" + "{" + "return float4(1.0f, 1.0f, 1.0f, 1.0f);" + "}"; + HRESULT hr; + Microsoft::WRL::ComPtr errorBlob; + switch (st) + { + case SHADER_TYPE::SHADER_TYPE_VERTEX: + hr = D3DCompile(VSstring, sizeof(VSstring), "test", nullptr, nullptr, "main", "vs_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf()); + if (hr != S_OK) + LOG_ERROR(RSX, "VS build failed:%s", errorBlob->GetBufferPointer()); + break; + case SHADER_TYPE::SHADER_TYPE_FRAGMENT: + hr = D3DCompile(FSstring, sizeof(FSstring), "test", nullptr, nullptr, "main", "ps_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf()); + if (hr != S_OK) + LOG_ERROR(RSX, "FS build failed:%s", errorBlob->GetBufferPointer()); + break; + } +} diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index f4b428f2a5..81aca20d00 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -5,6 +5,159 @@ #include "Emu/RSX/RSXFragmentProgram.h" #include "Emu/RSX/RSXVertexProgram.h" -ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader); + +enum class SHADER_TYPE +{ + SHADER_TYPE_VERTEX, + SHADER_TYPE_FRAGMENT +}; + +/** Storage for a shader +* Embeds the D3DBlob corresponding to +*/ +class Shader +{ +public: + Shader() : bytecode(nullptr) {} + ~Shader() {} + + u32 Id; + ID3DBlob *bytecode; + + /** + * Decompile a fragment shader located in the PS3's Memory. This function operates synchronously. + * @param prog RSXShaderProgram specifying the location and size of the shader in memory + */ + // void Decompile(RSXFragmentProgram& prog) + + /** Compile the decompiled fragment shader into a format we can use with OpenGL. */ + void Compile(SHADER_TYPE st); +}; + +// Based on +// https://github.com/AlexAltea/nucleus/blob/master/nucleus/gpu/rsx_pgraph.cpp +union qword +{ + u64 dword[2]; + u32 word[4]; +}; + +struct HashVertexProgram +{ + size_t operator()(const void *program) const + { + // 64-bit Fowler/Noll/Vo FNV-1a hash code + size_t hash = 0xCBF29CE484222325ULL; + const qword *instbuffer = (const qword*)program; + size_t instIndex = 0; + bool end = false; + return 0; + while (true) + { + const qword inst = instbuffer[instIndex]; + bool end = inst.word[0] >> 31; + if (end) + return hash; + hash ^= inst.dword[0]; + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); + hash ^= inst.dword[1]; + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); + instIndex++; + } + return 0; + } +}; + +struct HashFragmentProgram +{ + size_t operator()(const void *program) const + { + // 64-bit Fowler/Noll/Vo FNV-1a hash code + size_t hash = 0xCBF29CE484222325ULL; + const qword *instbuffer = (const qword*)program; + size_t instIndex = 0; + while (true) + { + const qword& inst = instbuffer[instIndex]; + bool end = (inst.word[0] >> 8) & 0x1; + if (end) + return hash; + hash ^= inst.dword[0]; + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); + hash ^= inst.dword[1]; + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); + instIndex++; + } + return 0; + } +}; + +struct VertexProgramCompare +{ + bool operator()(const void *binary1, const void *binary2) const + { + const qword *instBuffer1 = (const qword*)binary1; + const qword *instBuffer2 = (const qword*)binary2; + size_t instIndex = 0; + return true; + while (true) + { + const qword& inst1 = instBuffer1[instIndex]; + const qword& inst2 = instBuffer2[instIndex]; + bool end = (inst1.word[0] >> 31) && (inst2.word[0] >> 31); + if (end) + return true; + if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1]) + return false; + instIndex++; + } + } +}; + +struct FragmentProgramCompare +{ + bool operator()(const void *binary1, const void *binary2) const + { + const qword *instBuffer1 = (const qword*)binary1; + const qword *instBuffer2 = (const qword*)binary2; + size_t instIndex = 0; + while (true) + { + const qword& inst1 = instBuffer1[instIndex]; + const qword& inst2 = instBuffer2[instIndex]; + bool end = ((inst1.word[0] >> 8) & 0x1) && ((inst2.word[0] >> 8) & 0x1); + if (end) + return true; + if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1]) + return false; + instIndex++; + } + } +}; + +typedef std::unordered_map binary2VS; +typedef std::unordered_map binary2FS; + +class PipelineStateObjectCache +{ +private: + size_t currentShaderId; + binary2VS cacheVS; + binary2FS cacheFS; + // Key is vertex << 32 | fragment ids + std::unordered_map cachePSO; + + bool SearchFp(const RSXFragmentProgram& rsx_fp, Shader& shader); + bool SearchVp(const RSXVertexProgram& rsx_vp, Shader& shader); + ID3D12PipelineState *GetProg(u32 fp, u32 vp) const; + void AddVertexProgram(Shader& vp, RSXVertexProgram& rsx_vp); + void AddFragmentProgram(Shader& fp, RSXFragmentProgram& rsx_fp); + void Add(ID3D12PipelineState *prog, Shader& fp, Shader& vp); +public: + PipelineStateObjectCache(); + ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader); +}; + + #endif \ No newline at end of file