From a52ea7f8700b4205a186ed4d99d69a929a54465c Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 20 Mar 2018 14:14:45 +0300 Subject: [PATCH] rsx: Improve fragment and vertex program usage - Introduces a gpu program analyser step to examine shader contents before attempting compilation or cache search - Avoids detecting shader as being different because of unused textures having state changes - Adds better program size detection for vertex programs - Improved vertex program decompiler - Properly support CAL type instructions - Support jumping over instructions marked with a termination marker with BRA/CAL class opcodes - Fix SRC checks and abort - Fix CC register initialization - NOTE: Even unused SRC registers have to be valid (usually referencing in.POS) --- rpcs3/Emu/RSX/Common/ProgramStateCache.cpp | 81 ++++- rpcs3/Emu/RSX/Common/ProgramStateCache.h | 15 +- .../RSX/Common/VertexProgramDecompiler.cpp | 294 ++++++++++-------- .../Emu/RSX/Common/VertexProgramDecompiler.h | 8 +- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 8 +- rpcs3/Emu/RSX/RSXFragmentProgram.h | 9 - rpcs3/Emu/RSX/RSXThread.cpp | 52 +--- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 2 +- rpcs3/Emu/RSX/rsx_cache.h | 24 +- 9 files changed, 287 insertions(+), 206 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp b/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp index 39b2082793..e15a4e25f2 100644 --- a/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp +++ b/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp @@ -22,6 +22,46 @@ size_t vertex_program_utils::get_vertex_program_ucode_hash(const RSXVertexProgra return hash; } +vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vertex_program(const std::vector& data) +{ + u32 ucode_size = 0; + u32 current_instrution = 0; + u32 last_instruction_address = 0; + D3 d3; + D2 d2; + D1 d1; + + for (; ucode_size < data.size(); ucode_size += 4) + { + d1.HEX = data[ucode_size + 1]; + d3.HEX = data[ucode_size + 3]; + + switch (d1.sca_opcode) + { + case RSX_SCA_OPCODE_BRI: + case RSX_SCA_OPCODE_BRB: + case RSX_SCA_OPCODE_CAL: + case RSX_SCA_OPCODE_CLI: + case RSX_SCA_OPCODE_CLB: + { + d2.HEX = data[ucode_size + 2]; + + u32 jump_address = ((d2.iaddrh << 3) | d3.iaddrl) * 4; + last_instruction_address = std::max(last_instruction_address, jump_address); + break; + } + } + + if (d3.end && (ucode_size >= last_instruction_address)) + { + //Jumping over an end label is legal (verified) + break; + } + } + + return{ ucode_size + 4 }; +} + size_t vertex_program_storage_hash::operator()(const RSXVertexProgram &program) const { size_t hash = vertex_program_utils::get_vertex_program_ucode_hash(program); @@ -84,21 +124,56 @@ size_t fragment_program_utils::get_fragment_program_ucode_size(void *ptr) } } -u32 fragment_program_utils::get_fragment_program_start(void *ptr) +fragment_program_utils::fragment_program_metadata fragment_program_utils::analyse_fragment_program(void *ptr) { const qword *instBuffer = (const qword*)ptr; size_t instIndex = 0; + s32 program_offset = -1; + u16 textures_mask = 0; + while (true) { const qword& inst = instBuffer[instIndex]; - u32 opcode = inst.word[0] >> 16 & 0x3F; + const u32 opcode = (inst.word[0] >> 16) & 0x3F; + if (opcode) + { + if (program_offset < 0) + program_offset = instIndex * 16; + + if (opcode == RSX_FP_OPCODE_TEX || + opcode == RSX_FP_OPCODE_TEXBEM || + opcode == RSX_FP_OPCODE_TXP || + opcode == RSX_FP_OPCODE_TXPBEM || + opcode == RSX_FP_OPCODE_TXD || + opcode == RSX_FP_OPCODE_TXB || + opcode == RSX_FP_OPCODE_TXL) + { + //Bits 17-20 of word 1, swapped within u16 sections + //Bits 16-23 are swapped into the upper 8 bits (24-31) + const u32 tex_num = (inst.word[0] >> 25) & 15; + textures_mask |= (1 << tex_num); + } + + if (is_constant(inst.word[1]) || is_constant(inst.word[2]) || is_constant(inst.word[3])) + { + //Instruction references constant, skip one slot occupied by data + instIndex++; + } + } + + if ((inst.word[0] >> 8) & 0x1) + { + if (program_offset < 0) + program_offset = instIndex * 16; + break; + } instIndex++; } - return instIndex * 16; + return{ (u32)program_offset, textures_mask }; } size_t fragment_program_utils::get_fragment_program_ucode_hash(const RSXFragmentProgram& program) diff --git a/rpcs3/Emu/RSX/Common/ProgramStateCache.h b/rpcs3/Emu/RSX/Common/ProgramStateCache.h index 519f3a5ba4..96bd4aec76 100644 --- a/rpcs3/Emu/RSX/Common/ProgramStateCache.h +++ b/rpcs3/Emu/RSX/Common/ProgramStateCache.h @@ -26,7 +26,14 @@ namespace program_hash_util struct vertex_program_utils { + struct vertex_program_metadata + { + u32 ucode_size; + }; + static size_t get_vertex_program_ucode_hash(const RSXVertexProgram &program); + + static vertex_program_metadata analyse_vertex_program(const std::vector& data); }; struct vertex_program_storage_hash @@ -41,6 +48,12 @@ namespace program_hash_util struct fragment_program_utils { + struct fragment_program_metadata + { + u32 program_start_offset; + u16 referenced_textures_mask; + }; + /** * returns true if the given source Operand is a constant */ @@ -48,7 +61,7 @@ namespace program_hash_util static size_t get_fragment_program_ucode_size(void *ptr); - static u32 get_fragment_program_start(void *ptr); + static fragment_program_metadata analyse_fragment_program(void *ptr); static size_t get_fragment_program_ucode_hash(const RSXFragmentProgram &program); }; diff --git a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp index b048a29501..9eb56ab462 100644 --- a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp @@ -158,7 +158,7 @@ void VertexProgramDecompiler::SetDST(bool is_sca, std::string value) if (d0.cond_update_enable_0 || d0.cond_update_enable_1) { - dest = m_parr.AddParam(PF_PARAM_NONE, getFloatTypeName(4), "cc" + std::to_string(d0.cond_reg_sel_1), getFloatTypeName(4) + "(0., 0., 0., 0.)") + mask; + dest = AddCondReg() + mask; } else if (d3.dst != 0x1f || (is_sca ? d3.sca_dst_tmp != 0x3f : d0.dst_tmp != 0x3f)) { @@ -174,24 +174,6 @@ void VertexProgramDecompiler::SetDST(bool is_sca, std::string value) AddCodeCond(Format(dest), value); } -std::string VertexProgramDecompiler::GetFunc() -{ - std::string name = "func$a"; - - for (const auto& func : m_funcs) { - if (func.name.compare(name) == 0) { - return name + "()"; - } - } - - m_funcs.emplace_back(); - FuncInfo &idx = m_funcs.back(); - idx.offset = GetAddr(); - idx.name = name; - - return name + "()"; -} - std::string VertexProgramDecompiler::GetTex() { return m_parr.AddParam(PF_PARAM_UNIFORM, "sampler2D", std::string("vtex") + std::to_string(d2.tex_num)); @@ -210,17 +192,13 @@ std::string VertexProgramDecompiler::Format(const std::string& code) { "$am", std::bind(std::mem_fn(&VertexProgramDecompiler::AddAddrMask), this) }, { "$a", std::bind(std::mem_fn(&VertexProgramDecompiler::AddAddrReg), this) }, { "$vm", std::bind(std::mem_fn(&VertexProgramDecompiler::GetVecMask), this) }, - { "$t", std::bind(std::mem_fn(&VertexProgramDecompiler::GetTex), this) }, - - { "$fa", [this]()->std::string { return std::to_string(GetAddr()); } }, - { "$f()", std::bind(std::mem_fn(&VertexProgramDecompiler::GetFunc), this) }, { "$ifcond ", [this]() -> std::string - { - const std::string& cond = GetCond(); - if (cond == "true") return ""; - return "if(" + cond + ") "; - } + { + const std::string& cond = GetCond(); + if (cond == "true") return ""; + return "if(" + cond + ") "; + } }, { "$cond", std::bind(std::mem_fn(&VertexProgramDecompiler::GetCond), this) }, { "$ifbcond", std::bind(std::mem_fn(&VertexProgramDecompiler::GetOptionalBranchCond), this) } @@ -261,7 +239,7 @@ std::string VertexProgramDecompiler::GetCond() swizzle += f[d0.mask_w]; swizzle = swizzle == "xyzw" ? "" : "." + swizzle; - return "any(" + compareFunction(cond_string_table[d0.cond], "cc" + std::to_string(d0.cond_reg_sel_1) + swizzle, getFloatTypeName(4) + "(0., 0., 0., 0.)" + swizzle) + ")"; + return "any(" + compareFunction(cond_string_table[d0.cond], AddCondReg() + swizzle, getFloatTypeName(4) + "(0., 0., 0., 0.)" + swizzle) + ")"; } std::string VertexProgramDecompiler::GetOptionalBranchCond() @@ -315,7 +293,7 @@ void VertexProgramDecompiler::AddCodeCond(const std::string& dst, const std::str swizzle = swizzle == "xyzw" ? "" : "." + swizzle; - std::string cond = compareFunction(cond_string_table[d0.cond], "cc" + std::to_string(d0.cond_reg_sel_1) + swizzle, getFloatTypeName(4) + "(0., 0., 0., 0.)"); + std::string cond = compareFunction(cond_string_table[d0.cond], AddCondReg() + swizzle, getFloatTypeName(4) + "(0., 0., 0., 0.)"); ShaderVariable dst_var(dst); dst_var.symplify(); @@ -353,6 +331,11 @@ std::string VertexProgramDecompiler::AddAddrRegWithoutMask() return m_parr.AddParam(PF_PARAM_NONE, getIntTypeName(4), "a" + std::to_string(d0.addr_reg_sel_1), getIntTypeName(4) + "(0, 0, 0, 0)"); } +std::string VertexProgramDecompiler::AddCondReg() +{ + return m_parr.AddParam(PF_PARAM_NONE, getFloatTypeName(4), "cc" + std::to_string(d0.cond_reg_sel_1), getFloatTypeName(4) + "(0., 0., 0., 0.)"); +} + u32 VertexProgramDecompiler::GetAddr() { return (d2.iaddrh << 3) | d3.iaddrl; @@ -379,37 +362,6 @@ std::string VertexProgramDecompiler::NotZeroPositive(const std::string& code) return "max(" + code + ", 0.0000000001)"; } -std::string VertexProgramDecompiler::BuildFuncBody(const FuncInfo& func) -{ - std::string result; - - for (uint i = func.offset; ibody = "\tgl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n"; } std::string VertexProgramDecompiler::Decompile() { for (unsigned i = 0; i < PF_PARAM_COUNT; i++) m_parr.params[i].clear(); - m_instr_count = 0; + + m_instr_count = m_data.size() / 4; for (int i = 0; i < m_max_instr_count; ++i) { @@ -480,11 +426,11 @@ std::string VertexProgramDecompiler::Decompile() } bool is_has_BRA = false; + bool program_end = false; + u32 i = 1; - for (u32 i = 1; m_instr_count < m_max_instr_count; m_instr_count++) + while (i < m_data.size()) { - m_cur_instr = &m_instructions[m_instr_count]; - if (is_has_BRA) { d3.HEX = m_data[i]; @@ -497,6 +443,7 @@ std::string VertexProgramDecompiler::Decompile() switch (d1.sca_opcode) { case RSX_SCA_OPCODE_BRA: + LOG_ERROR(RSX, "Unimplemented VP opcode BRA"); is_has_BRA = true; m_jump_lvls.clear(); d3.HEX = m_data[++i]; @@ -505,6 +452,9 @@ std::string VertexProgramDecompiler::Decompile() case RSX_SCA_OPCODE_BRB: case RSX_SCA_OPCODE_BRI: + case RSX_SCA_OPCODE_CAL: + case RSX_SCA_OPCODE_CLI: + case RSX_SCA_OPCODE_CLB: d2.HEX = m_data[i++]; d3.HEX = m_data[i]; i += 2; @@ -517,22 +467,9 @@ std::string VertexProgramDecompiler::Decompile() break; } } - - if (d3.end) - { - m_instr_count++; - - if (i < m_data.size()) - { - LOG_ERROR(RSX, "Program end before buffer end."); - } - - break; - } } uint jump_position = 0; - if (is_has_BRA || !m_jump_lvls.empty()) { m_cur_instr = &m_instructions[0]; @@ -561,9 +498,58 @@ std::string VertexProgramDecompiler::Decompile() return jump; }; - for (u32 i = 0; i < m_instr_count; ++i) + auto do_function_call = [this, &i](const std::string& condition) { - m_cur_instr = &m_instructions[i]; + //call function + m_call_stack.push(i+1); + AddCode(condition); + AddCode("{"); + m_cur_instr->open_scopes++; + i = GetAddr(); + }; + + auto do_function_return = [this, &i]() + { + if (!m_call_stack.empty()) + { + //TODO: Conditional returns + i = m_call_stack.top(); + m_call_stack.pop(); + m_cur_instr->close_scopes++; + AddCode("}"); + } + else + { + AddCode("$ifcond return"); + } + }; + + auto do_program_exit = [this, do_function_return, &i](bool abort) + { + if (abort) + { + AddCode("//ABORT"); + } + + while (!m_call_stack.empty()) + { + LOG_ERROR(RSX, "vertex program end in subroutine call!"); + do_function_return(); + } + + if ((i + 1) < m_instr_count) + { + //Forcefully exit + AddCode("return;"); + } + }; + + for (i = 0; i < m_instr_count; ++i) + { + if (m_call_stack.empty()) + { + m_cur_instr = &m_instructions[i]; + } d0.HEX = m_data[i * 4 + 0]; d1.HEX = m_data[i * 4 + 1]; @@ -576,22 +562,30 @@ std::string VertexProgramDecompiler::Decompile() src[2].src2l = d3.src2l; src[2].src2h = d2.src2h; - if (i && (is_has_BRA || std::find(m_jump_lvls.begin(), m_jump_lvls.end(), i) != m_jump_lvls.end())) + if (!src[0].reg_type || !src[1].reg_type || !src[2].reg_type) { - m_cur_instr->close_scopes++; - AddCode("}"); - AddCode(""); - - AddCode(fmt::format("if (jump_position <= %u)", jump_position++)); - AddCode("{"); - m_cur_instr->open_scopes++; + AddCode("//Src check failed. Aborting"); + do_program_exit(true); + break; } - if (!d1.sca_opcode && !d1.vec_opcode) + if (m_call_stack.empty()) { - AddCode("//nop"); + //TODO: Subroutines can also have arbitrary jumps! + if (i && (is_has_BRA || std::find(m_jump_lvls.begin(), m_jump_lvls.end(), i) != m_jump_lvls.end())) + { + m_cur_instr->close_scopes++; + AddCode("}"); + AddCode(""); + + AddCode(fmt::format("if (jump_position <= %u)", jump_position++)); + AddCode("{"); + m_cur_instr->open_scopes++; + } } + program_end = !!d3.end; + switch (d1.vec_opcode) { case RSX_VEC_OPCODE_NOP: break; @@ -624,7 +618,7 @@ std::string VertexProgramDecompiler::Decompile() default: AddCode(fmt::format("//Unknown vp opcode 0x%x", u32{ d1.vec_opcode })); LOG_ERROR(RSX, "Unknown vp opcode 0x%x", u32{ d1.vec_opcode }); - Emu.Pause(); + program_end = true; break; } @@ -644,39 +638,58 @@ std::string VertexProgramDecompiler::Decompile() break; case RSX_SCA_OPCODE_BRA: { - AddCode("$if ($cond) //BRA"); - AddCode("{"); - m_cur_instr->open_scopes++; - AddCode("jump_position = $a$am;"); - AddCode("continue;"); - m_cur_instr->close_scopes++; - AddCode("}"); + if (m_call_stack.empty()) + { + AddCode("$if ($cond) //BRA"); + AddCode("{"); + m_cur_instr->open_scopes++; + AddCode("jump_position = $a$am;"); + AddCode("continue;"); + m_cur_instr->close_scopes++; + AddCode("}"); + } + else + { + //TODO + LOG_ERROR(RSX, "BRA opcode found in subroutine!"); + } } break; case RSX_SCA_OPCODE_BRI: // works differently (BRI o[1].x(TR) L0;) { - u32 jump_position = find_jump_lvl(GetAddr()); + if (m_call_stack.empty()) + { + u32 jump_position = find_jump_lvl(GetAddr()); - AddCode("$ifcond //BRI"); - AddCode("{"); - m_cur_instr->open_scopes++; - AddCode(fmt::format("jump_position = %u;", jump_position)); - AddCode("continue;"); - m_cur_instr->close_scopes++; - AddCode("}"); + AddCode("$ifcond //BRI"); + AddCode("{"); + m_cur_instr->open_scopes++; + AddCode(fmt::format("jump_position = %u;", jump_position)); + AddCode("continue;"); + m_cur_instr->close_scopes++; + AddCode("}"); + } + else + { + //TODO + LOG_ERROR(RSX, "BRI opcode found in subroutine!"); + } } break; case RSX_SCA_OPCODE_CAL: // works same as BRI - AddCode("$ifcond $f(); //CAL"); + AddCode("//CAL"); + do_function_call("$ifcond"); break; case RSX_SCA_OPCODE_CLI: // works same as BRI - AddCode("$ifcond $f(); //CLI"); + LOG_ERROR(RSX, "Unimplemented VP opcode CLI"); + AddCode("//CLI"); + do_function_call("$ifcond"); break; case RSX_SCA_OPCODE_RET: // works like BRI but shorter (RET o[1].x(TR);) - AddCode("$ifcond return;"); + do_function_return(); break; case RSX_SCA_OPCODE_LG2: SetDSTSca("log2(" + NotZeroPositive("$s") + ")"); break; case RSX_SCA_OPCODE_EX2: SetDSTSca("exp2($s)"); break; @@ -686,29 +699,32 @@ std::string VertexProgramDecompiler::Decompile() // works differently (BRB o[1].x !b0, L0;) { LOG_WARNING(RSX, "sca_opcode BRB, d0=0x%X, d1=0x%X, d2=0x%X, d3=0x%X", d0.HEX, d1.HEX, d2.HEX, d3.HEX); - AddCode(fmt::format("//BRB opcode, d0=0x%X, d1=0x%X, d2=0x%X, d3=0x%X", d0.HEX, d1.HEX, d2.HEX, d3.HEX)); - - u32 jump_position = find_jump_lvl(GetAddr()); - - AddCode("$ifbcond //BRB"); - AddCode("{"); - m_cur_instr->open_scopes++; - AddCode(fmt::format("jump_position = %u;", jump_position)); - AddCode("continue;"); - m_cur_instr->close_scopes++; - AddCode("}"); - AddCode(""); + + if (m_call_stack.empty()) + { + u32 jump_position = find_jump_lvl(GetAddr()); + + AddCode("$ifbcond //BRB"); + AddCode("{"); + m_cur_instr->open_scopes++; + AddCode(fmt::format("jump_position = %u;", jump_position)); + AddCode("continue;"); + m_cur_instr->close_scopes++; + AddCode("}"); + AddCode(""); + } + else + { + //TODO + LOG_ERROR(RSX, "BRA opcode found in subroutine!"); + } break; } case RSX_SCA_OPCODE_CLB: break; // works same as BRB - LOG_WARNING(RSX, "sca_opcode CLB, d0=0x%X, d1=0x%X, d2=0x%X, d3=0x%X", d0.HEX, d1.HEX, d2.HEX, d3.HEX); AddCode("//CLB"); - - AddCode("$ifbcond $f(); //CLB"); - AddCode(""); - + do_function_call("$ifbcond"); break; case RSX_SCA_OPCODE_PSH: break; // works differently (PSH o[1].x A0;) @@ -722,7 +738,13 @@ std::string VertexProgramDecompiler::Decompile() default: AddCode(fmt::format("//Unknown vp sca_opcode 0x%x", u32{ d1.sca_opcode })); LOG_ERROR(RSX, "Unknown vp sca_opcode 0x%x", u32{ d1.sca_opcode }); - Emu.Pause(); + program_end = true; + break; + } + + if (program_end) + { + do_program_exit(!d3.end); break; } } @@ -741,10 +763,6 @@ std::string VertexProgramDecompiler::Decompile() m_jump_lvls.clear(); m_body.clear(); - if (m_funcs.size() > 2) - { - m_funcs.erase(m_funcs.begin() + 2, m_funcs.end()); - } return result; } diff --git a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.h b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.h index b02cac3e6e..95afec7986 100644 --- a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.h @@ -2,6 +2,7 @@ #include "Emu/RSX/RSXVertexProgram.h" #include #include +#include #include #include "ShaderParam.h" @@ -54,9 +55,7 @@ struct VertexProgramDecompiler std::set m_jump_lvls; std::vector m_body; - std::vector m_funcs; - - //wxString main; + std::stack m_call_stack; const std::vector& m_data; ParamArray m_parr; @@ -67,13 +66,13 @@ struct VertexProgramDecompiler std::string GetScaMask(); std::string GetDST(bool is_sca = false); std::string GetSRC(const u32 n); - std::string GetFunc(); std::string GetTex(); std::string GetCond(); std::string GetOptionalBranchCond(); //Conditional branch expression modified externally at runtime std::string AddAddrMask(); std::string AddAddrReg(); std::string AddAddrRegWithoutMask(); + std::string AddCondReg(); u32 GetAddr(); std::string Format(const std::string& code); @@ -82,7 +81,6 @@ struct VertexProgramDecompiler void SetDST(bool is_sca, std::string value); void SetDSTVec(const std::string& code); void SetDSTSca(const std::string& code); - std::string BuildFuncBody(const FuncInfo& func); std::string BuildCode(); protected: diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index fb4fa39a20..81886998f2 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -24,7 +24,7 @@ namespace GLGSRender::GLGSRender() : GSRender() { - m_shaders_cache.reset(new gl::shader_cache(m_prog_buffer, "opengl", "v1.2")); + m_shaders_cache.reset(new gl::shader_cache(m_prog_buffer, "opengl", "v1.3")); if (g_cfg.video.disable_vertex_cache) m_vertex_cache.reset(new gl::null_vertex_cache()); @@ -610,9 +610,9 @@ void GLGSRender::on_init_thread() if (g_cfg.video.debug_output) gl::enable_debugging(); - LOG_NOTICE(RSX, "%s", (const char*)glGetString(GL_VERSION)); - LOG_NOTICE(RSX, "%s", (const char*)glGetString(GL_SHADING_LANGUAGE_VERSION)); - LOG_NOTICE(RSX, "%s", (const char*)glGetString(GL_VENDOR)); + LOG_NOTICE(RSX, "GL RENDERER: %s (%s)", (const char*)glGetString(GL_RENDERER), (const char*)glGetString(GL_VENDOR)); + LOG_NOTICE(RSX, "GL VERSION: %s", (const char*)glGetString(GL_VERSION)); + LOG_NOTICE(RSX, "GLSL VERSION: %s", (const char*)glGetString(GL_SHADING_LANGUAGE_VERSION)); auto& gl_caps = gl::get_driver_caps(); diff --git a/rpcs3/Emu/RSX/RSXFragmentProgram.h b/rpcs3/Emu/RSX/RSXFragmentProgram.h index a8d1a00943..d71ab1c6e2 100644 --- a/rpcs3/Emu/RSX/RSXFragmentProgram.h +++ b/rpcs3/Emu/RSX/RSXFragmentProgram.h @@ -241,15 +241,6 @@ struct RSXFragmentProgram return (rsx::texture_dimension_extended)((texture_dimensions >> (id * 2)) & 0x3); } - void set_texture_dimension(const std::array &dimensions) - { - texture_dimensions = 0; - for (u32 i = 0, offset = 0; i < 16; ++i, offset += 2) - { - texture_dimensions |= (u32)dimensions[i] << offset; - } - } - RSXFragmentProgram() { memset(this, 0, sizeof(RSXFragmentProgram)); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index b0bc2b6755..9a75b6bd91 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1338,23 +1338,11 @@ namespace rsx u32* ucode_src = rsx::method_registers.transform_program.data() + (transform_program_start * 4); u32* ucode_dst = current_vertex_program.data.data(); - u32 ucode_size = 0; - D3 d3; - for (int i = transform_program_start; i < 512; ++i) - { - ucode_size += 4; - memcpy(ucode_dst, ucode_src, 4 * sizeof(u32)); + memcpy(ucode_dst, ucode_src, current_vertex_program.data.size() * sizeof(u32)); - d3.HEX = ucode_src[3]; - if (d3.end) - break; - - ucode_src += 4; - ucode_dst += 4; - } - - current_vertex_program.data.resize(ucode_size); + auto program_info = program_hash_util::vertex_program_utils::analyse_vertex_program(current_vertex_program.data); + current_vertex_program.data.resize(program_info.ucode_size); const u32 input_mask = rsx::method_registers.vertex_attrib_input_mask(); const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask(); @@ -1562,10 +1550,10 @@ namespace rsx const u32 program_offset = (shader_program & ~0x3); result.addr = vm::base(rsx::get_address(program_offset, program_location)); - auto program_start = program_hash_util::fragment_program_utils::get_fragment_program_start(result.addr); + const auto program_info = program_hash_util::fragment_program_utils::analyse_fragment_program(result.addr); - result.addr = ((u8*)result.addr + program_start); - result.offset = program_offset + program_start; + result.addr = ((u8*)result.addr + program_info.program_start_offset); + result.offset = program_offset + program_info.program_start_offset; result.valid = true; result.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT); result.unnormalized_coords = 0; @@ -1577,7 +1565,6 @@ namespace rsx result.redirected_textures = 0; result.shadow_textures = 0; - std::array texture_dimensions; const auto resolution_scale = rsx::get_resolution_scale(); for (u32 i = 0; i < rsx::limits::fragment_textures_count; ++i) @@ -1587,14 +1574,10 @@ namespace rsx result.texture_scale[i][1] = sampler_descriptors[i]->scale_y; result.texture_scale[i][2] = (f32)tex.remap(); //Debug value - if (!tex.enabled()) - { - texture_dimensions[i] = texture_dimension_extended::texture_dimension_2d; - } - else + if (tex.enabled() && (program_info.referenced_textures_mask & (1 << i))) { u32 texture_control = 0; - texture_dimensions[i] = sampler_descriptors[i]->image_type; + result.texture_dimensions |= ((u32)sampler_descriptors[i]->image_type << (i << 1)); if (tex.alpha_kill_enabled()) { @@ -1669,8 +1652,6 @@ namespace rsx } } - result.set_texture_dimension(texture_dimensions); - //Sanity checks if (result.ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) { @@ -1694,10 +1675,10 @@ namespace rsx const u32 program_offset = (shader_program & ~0x3); result.addr = vm::base(rsx::get_address(program_offset, program_location)); - auto program_start = program_hash_util::fragment_program_utils::get_fragment_program_start(result.addr); + auto program_info = program_hash_util::fragment_program_utils::analyse_fragment_program(result.addr); - result.addr = ((u8*)result.addr + program_start); - result.offset = program_offset + program_start; + result.addr = ((u8*)result.addr + program_info.program_start_offset); + result.offset = program_offset + program_info.program_start_offset; result.valid = true; result.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT); result.unnormalized_coords = 0; @@ -1709,7 +1690,6 @@ namespace rsx result.redirected_textures = 0; result.shadow_textures = 0; - std::array texture_dimensions; const auto resolution_scale = rsx::get_resolution_scale(); for (u32 i = 0; i < rsx::limits::fragment_textures_count; ++i) @@ -1720,13 +1700,9 @@ namespace rsx result.textures_alpha_kill[i] = 0; result.textures_zfunc[i] = 0; - if (!tex.enabled()) + if (tex.enabled() && (program_info.referenced_textures_mask & (1 << i))) { - texture_dimensions[i] = texture_dimension_extended::texture_dimension_2d; - } - else - { - texture_dimensions[i] = tex.get_extended_texture_dimension(); + result.texture_dimensions |= ((u32)tex.get_extended_texture_dimension() << (i << 1)); if (tex.alpha_kill_enabled()) { @@ -1801,8 +1777,6 @@ namespace rsx } } } - - result.set_texture_dimension(texture_dimensions); } void thread::reset() diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 5c84b1bc3f..8064ac7727 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -632,7 +632,7 @@ VKGSRender::VKGSRender() : GSRender() else m_vertex_cache.reset(new vk::weak_vertex_cache()); - m_shaders_cache.reset(new vk::shader_cache(*m_prog_buffer.get(), "vulkan", "v1.25")); + m_shaders_cache.reset(new vk::shader_cache(*m_prog_buffer.get(), "vulkan", "v1.3")); open_command_buffer(); diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index 5c72b64537..5887014ed5 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -306,7 +306,7 @@ namespace rsx return; } - std::string directory_path = root_path + "/pipelines/" + pipeline_class_name; + std::string directory_path = root_path + "/pipelines/" + pipeline_class_name + "/" + version_prefix; if (!fs::is_dir(directory_path)) { @@ -331,6 +331,9 @@ namespace rsx root.rewind(); + // Invalid pipeline entries to be removed + std::vector invalid_entries; + // Progress dialog std::unique_ptr fallback_dlg; if (!dlg) @@ -348,11 +351,9 @@ namespace rsx if (tmp.name == "." || tmp.name == "..") continue; - if (tmp.name.compare(0, prefix_length, version_prefix) != 0) - continue; - + const auto filename = directory_path + "/" + tmp.name; std::vector bytes; - fs::file f(directory_path + "/" + tmp.name); + fs::file f(filename); processed++; dlg->update_msg(processed, entry_count); @@ -360,6 +361,7 @@ namespace rsx if (f.size() != sizeof(pipeline_data)) { LOG_ERROR(RSX, "Cached pipeline object %s is not binary compatible with the current shader cache", tmp.name.c_str()); + invalid_entries.push_back(filename); continue; } @@ -377,6 +379,16 @@ namespace rsx } } + if (!invalid_entries.empty()) + { + for (const auto &filename : invalid_entries) + { + fs::remove_file(filename); + } + + LOG_NOTICE(RSX, "shader cache: %d entries were marked as invalid and removed", invalid_entries.size()); + } + dlg->close(); } @@ -416,7 +428,7 @@ namespace rsx state_hash ^= rpcs3::hash_base(data.fp_zfunc_mask); std::string pipeline_file_name = fmt::format("%llX+%llX+%llX+%llX.bin", data.vertex_program_hash, data.fragment_program_hash, data.pipeline_storage_hash, state_hash); - std::string pipeline_path = root_path + "/pipelines/" + pipeline_class_name + "/" + version_prefix + "-" + pipeline_file_name; + std::string pipeline_path = root_path + "/pipelines/" + pipeline_class_name + "/" + version_prefix + "/" + pipeline_file_name; fs::file(pipeline_path, fs::rewrite).write(&data, sizeof(pipeline_data)); }