diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index ff0724f32e..2ecbf1e901 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -15,7 +15,7 @@ u64 GLGSRender::get_cycles() GLGSRender::GLGSRender() : GSRender() { - m_shaders_cache = std::make_unique(m_prog_buffer, "opengl", "v1.91"); + m_shaders_cache = std::make_unique(m_prog_buffer, "opengl", "v1.92"); if (g_cfg.video.disable_vertex_cache || g_cfg.video.multithreaded_rsx) m_vertex_cache = std::make_unique(); diff --git a/rpcs3/Emu/RSX/Program/ProgramStateCache.cpp b/rpcs3/Emu/RSX/Program/ProgramStateCache.cpp index 24378816d6..9cabeda367 100644 --- a/rpcs3/Emu/RSX/Program/ProgramStateCache.cpp +++ b/rpcs3/Emu/RSX/Program/ProgramStateCache.cpp @@ -36,10 +36,10 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert //u32 last_instruction_address = 0; //u32 first_instruction_address = entry; - std::stack call_stack; - std::pair instruction_range{umax, 0}; - std::bitset<512> instructions_to_patch; + std::bitset instructions_to_patch; + std::pair instruction_range{ umax, 0 }; bool has_branch_instruction = false; + std::stack call_stack; D3 d3; D2 d2; @@ -54,7 +54,7 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert while (true) { - ensure(current_instruction < 512); + ensure(current_instruction < rsx::max_vertex_program_instructions); if (result.instruction_mask[current_instruction]) { @@ -120,8 +120,9 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert instructions_to_patch[current_instruction] = true; has_branch_instruction = true; + d0.HEX = instruction._u32[0]; d2.HEX = instruction._u32[2]; - const u32 jump_address = ((d2.iaddrh << 3) | d3.iaddrl); + const u32 jump_address = (d0.iaddrh2 << 9) | (d2.iaddrh << 3) | d3.iaddrl; if (function_call) { @@ -162,7 +163,7 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert } if ((d3.end && (fast_exit || current_instruction >= instruction_range.second)) || - (current_instruction + 1) == 512) + (current_instruction + 1) == rsx::max_vertex_program_instructions) { break; } @@ -183,7 +184,7 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert { fs::file dump(fs::get_cache_dir() + "shaderlog/vp_analyser.bin", fs::rewrite); dump.write(&entry, 4); - dump.write(data, 512 * 16); + dump.write(data, rsx::max_vertex_program_instructions * 16); dump.close(); } @@ -215,14 +216,17 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert if (instructions_to_patch[i]) { + d0.HEX = dst[0]; d2.HEX = dst[2]; d3.HEX = dst[3]; - u32 address = ((d2.iaddrh << 3) | d3.iaddrl); + u32 address = (d0.iaddrh2 << 9) | (d2.iaddrh << 3) | d3.iaddrl; address -= instruction_range.first; - d2.iaddrh = (address >> 3); + d0.iaddrh2 = (address >> 9) & 0x1; + d2.iaddrh = (address >> 3) & 0x3F; d3.iaddrl = (address & 0x7); + dst[0] = d0.HEX; dst[2] = d2.HEX; dst[3] = d3.HEX; diff --git a/rpcs3/Emu/RSX/Program/ProgramStateCache.h b/rpcs3/Emu/RSX/Program/ProgramStateCache.h index 54b12fcb1c..235ff6e9ca 100644 --- a/rpcs3/Emu/RSX/Program/ProgramStateCache.h +++ b/rpcs3/Emu/RSX/Program/ProgramStateCache.h @@ -22,7 +22,7 @@ namespace program_hash_util { struct vertex_program_metadata { - std::bitset<512> instruction_mask; + std::bitset instruction_mask; u32 ucode_length; u32 referenced_textures_mask; }; diff --git a/rpcs3/Emu/RSX/Program/RSXVertexProgram.h b/rpcs3/Emu/RSX/Program/RSXVertexProgram.h index 92013bfde8..df7e243e07 100644 --- a/rpcs3/Emu/RSX/Program/RSXVertexProgram.h +++ b/rpcs3/Emu/RSX/Program/RSXVertexProgram.h @@ -93,6 +93,13 @@ union D0 u32 vec_result : 1; u32 : 1; }; + + struct + { + u32 : 23; + u32 iaddrh2 : 1; + u32 : 8; + }; }; union D1 @@ -239,7 +246,7 @@ struct RSXVertexProgram u32 base_address; u32 entry; - std::bitset<512> instruction_mask; + std::bitset instruction_mask; std::set jump_table; rsx::texture_dimension_extended get_texture_dimension(u8 id) const diff --git a/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.cpp index 79af20991c..feb6b4218c 100644 --- a/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.cpp @@ -330,7 +330,7 @@ std::string VertexProgramDecompiler::AddCondReg() u32 VertexProgramDecompiler::GetAddr() const { - return (d2.iaddrh << 3) | d3.iaddrl; + return (d0.iaddrh2 << 9) | (d2.iaddrh << 3) | d3.iaddrl; } void VertexProgramDecompiler::AddCode(const std::string& code) diff --git a/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.h b/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.h index 1ce4f96fd0..35a3c69f24 100644 --- a/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.h @@ -53,8 +53,7 @@ struct VertexProgramDecompiler } }; - static const usz m_max_instr_count = 512; - Instruction m_instructions[m_max_instr_count]; + Instruction m_instructions[rsx::max_vertex_program_instructions]; Instruction* m_cur_instr; usz m_instr_count; diff --git a/rpcs3/Emu/RSX/Program/program_util.h b/rpcs3/Emu/RSX/Program/program_util.h index f74af801b1..c99ab6ac48 100644 --- a/rpcs3/Emu/RSX/Program/program_util.h +++ b/rpcs3/Emu/RSX/Program/program_util.h @@ -5,6 +5,11 @@ namespace rsx { + enum program_limits + { + max_vertex_program_instructions = 544 + }; + #pragma pack(push, 1) // NOTE: This structure must be packed to match GPU layout. struct fragment_program_texture_config diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 240b2df2ab..6153b691c6 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -501,7 +501,7 @@ VKGSRender::VKGSRender() : GSRender() else m_vertex_cache = std::make_unique(); - m_shaders_cache = std::make_unique(*m_prog_buffer, "vulkan", "v1.91"); + m_shaders_cache = std::make_unique(*m_prog_buffer, "vulkan", "v1.92"); open_command_buffer(); diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index ba629ca7dc..d972841e94 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -30,7 +30,7 @@ namespace rsx u32 vp_ctrl; u32 vp_texture_dimensions; - u64 vp_instruction_mask[8]; + u64 vp_instruction_mask[9]; u32 vp_base_address; u32 vp_entry; @@ -365,7 +365,7 @@ namespace rsx vp.base_address = data.vp_base_address; vp.entry = data.vp_entry; - pack_bitset<512>(vp.instruction_mask, data.vp_instruction_mask); + pack_bitset(vp.instruction_mask, data.vp_instruction_mask); for (u8 index = 0; index < 32; ++index) { @@ -403,7 +403,7 @@ namespace rsx data_block.vp_base_address = vp.base_address; data_block.vp_entry = vp.entry; - unpack_bitset<512>(vp.instruction_mask, data_block.vp_instruction_mask); + unpack_bitset(vp.instruction_mask, data_block.vp_instruction_mask); u8 index = 0; while (index < 32) diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 4cce0bf476..1be9b80021 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -467,13 +467,10 @@ namespace rsx u32 rcount = count; if (const u32 max = load_pos * 4 + rcount + (index % 4); - max > 512 * 4) + max > max_vertex_program_instructions * 4) { - // PS3 seems to allow exceeding the program buffer by upto 32 instructions before crashing - // Discard the "excess" instructions to not overflow our transform program buffer - // TODO: Check if the instructions in the overflow area are executed by PS3 - rsx_log.warning("Program buffer overflow!"); - rcount -= max - (512 * 4); + rsx_log.warning("Program buffer overflow! Attempted to write %u VP instructions.", max / 4); + rcount -= max - (max_vertex_program_instructions * 4); } stream_data_to_memory_swapped_u32(&rsx::method_registers.transform_program[load_pos * 4 + index % 4] diff --git a/rpcs3/Emu/RSX/rsx_methods.h b/rpcs3/Emu/RSX/rsx_methods.h index a9845c79e5..e1e9ff9be5 100644 --- a/rpcs3/Emu/RSX/rsx_methods.h +++ b/rpcs3/Emu/RSX/rsx_methods.h @@ -8,6 +8,7 @@ #include "rsx_vertex_data.h" #include "rsx_utils.h" #include "Emu/Cell/timers.hpp" +#include "Program/program_util.h" namespace rsx { @@ -492,7 +493,7 @@ namespace rsx std::array vertex_textures; - std::array transform_program{}; + std::array transform_program{}; std::array transform_constants{}; draw_clause current_draw_clause{}; diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index 120a5aab89..df0c0e03be 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -855,18 +855,17 @@ namespace rsx template void unpack_bitset(const std::bitset& block, u64* values) { - constexpr int count = N / 64; - for (int n = 0; n < count; ++n) + for (int bit = 0, n = -1, shift = 0; bit < N; ++bit, ++shift) { - int i = (n << 6); - values[n] = 0; - - for (int bit = 0; bit < 64; ++bit, ++i) + if ((bit % 64) == 0) { - if (block[i]) - { - values[n] |= (1ull << bit); - } + values[++n] = 0; + shift = 0; + } + + if (block[bit]) + { + values[n] |= (1ull << shift); } } } @@ -874,18 +873,11 @@ namespace rsx template void pack_bitset(std::bitset& block, u64* values) { - constexpr int count = N / 64; - for (int n = (count - 1); n >= 0; --n) + for (int n = 0, shift = 0; shift < N; ++n, shift += 64) { - if ((n + 1) < count) - { - block <<= 64; - } - - if (values[n]) - { - block |= values[n]; - } + std::bitset tmp = values[n]; + tmp <<= shift; + block |= tmp; } }