From 930bc9179d6ceac1172c301e01712e2d4cc65179 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 18 Apr 2020 20:38:56 +0300 Subject: [PATCH] rsx/interpreter: Improve instructions support - Must statically write the gl_ClipDistance registers else you get uninitialized trash. This problem is more readily apparent on NVIDIA technology but even AMD is not completely immune. --- .../Interpreter/FragmentInterpreter.glsl | 4 +- .../Common/Interpreter/VertexInterpreter.glsl | 46 +++++++++++-------- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 1 + rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp | 28 +++++------ rpcs3/Emu/RSX/VK/VKGSRender.cpp | 5 +- rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp | 2 +- 6 files changed, 45 insertions(+), 41 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/Interpreter/FragmentInterpreter.glsl b/rpcs3/Emu/RSX/Common/Interpreter/FragmentInterpreter.glsl index f7550cc811..bb1ce2b6c0 100644 --- a/rpcs3/Emu/RSX/Common/Interpreter/FragmentInterpreter.glsl +++ b/rpcs3/Emu/RSX/Common/Interpreter/FragmentInterpreter.glsl @@ -176,9 +176,9 @@ vec4 read_src(const in int index) // TODO: wpos value = vec4(0.); break; case 1: - value = gl_FrontFacing? in_regs[1] : in_regs[3]; break; + value = gl_FrontFacing? in_regs[3] : in_regs[1]; break; case 2: - value = gl_FrontFacing? in_regs[2] : in_regs[4]; break; + value = gl_FrontFacing? in_regs[4] : in_regs[2]; break; case 3: value = fetch_fog_value(fog_mode, in_regs[5]); break; case 13: diff --git a/rpcs3/Emu/RSX/Common/Interpreter/VertexInterpreter.glsl b/rpcs3/Emu/RSX/Common/Interpreter/VertexInterpreter.glsl index 5580c65cd0..8e67a4fe8c 100644 --- a/rpcs3/Emu/RSX/Common/Interpreter/VertexInterpreter.glsl +++ b/rpcs3/Emu/RSX/Common/Interpreter/VertexInterpreter.glsl @@ -258,6 +258,13 @@ void write_output(const in int oid, const in int mask_bit) } } +// Cannot dynamically index into the gl_ClipDistance array without causing problems due to it's unknown size +#define write_clip_distance(plane, mask_bit, test, value)\ + if (test && attribute_enabled(1 << mask_bit))\ + gl_ClipDistance[plane] = value;\ + else\ + gl_ClipDistance[plane] = 0.5f;\ + ivec4 read_addr_reg() { return a[d0.addr_reg_sel_1]; @@ -524,15 +531,21 @@ void main() } } - // TODO: 2-sided lighting - if (!attribute_enabled(1 << 0 | 1 << 2)) - { - dest[1] = dest[3] = vec4(0, 0, 0, 1); - } + // Unconditionally update COLOR0 and SPECULAR0 + write_output(1, 0); + write_output(2, 1); - if (!attribute_enabled(1 << 1 | 1 << 3)) + // Conditionally update COLOR1 and SPECULAR1 depending on 2-sided mask + if (control == 0) { - dest[2] = dest[4] = vec4(0, 0, 0, 1); + dest[3] = dest[1]; + dest[4] = dest[2]; + } + else + { + // 2-sided lighting + write_output(3, 2); + write_output(4, 3); } if (!attribute_enabled(1 << 4)) @@ -549,19 +562,12 @@ void main() gl_PointSize = point_size; } - if (attribute_enabled(1 << 6 | 1 << 7 | 1 << 8)) - { - gl_ClipDistance[0] = (user_clip_enabled[0].x > 0)? dest[5].y * user_clip_factor[0].x : 0.5f; - gl_ClipDistance[1] = (user_clip_enabled[0].y > 0)? dest[5].z * user_clip_factor[0].y : 0.5f; - gl_ClipDistance[2] = (user_clip_enabled[0].z > 0)? dest[5].w * user_clip_factor[0].z : 0.5f; - } - - if (attribute_enabled(1 << 9 | 1 << 10 | 1 << 11)) - { - gl_ClipDistance[3] = (user_clip_enabled[0].w > 0)? dest[6].y * user_clip_factor[0].w : 0.5f; - gl_ClipDistance[4] = (user_clip_enabled[1].x > 0)? dest[6].z * user_clip_factor[1].x : 0.5f; - gl_ClipDistance[5] = (user_clip_enabled[1].y > 0)? dest[6].w * user_clip_factor[1].y : 0.5f; - } + write_clip_distance(0, 6, user_clip_enabled[0].x > 0, dest[5].y * user_clip_factor[0].x); + write_clip_distance(1, 7, user_clip_enabled[0].y > 0, dest[5].z * user_clip_factor[0].y); + write_clip_distance(2, 8, user_clip_enabled[0].z > 0, dest[5].w * user_clip_factor[0].z); + write_clip_distance(3, 9, user_clip_enabled[0].w > 0, dest[6].y * user_clip_factor[0].w); + write_clip_distance(4, 10, user_clip_enabled[1].x > 0, dest[6].z * user_clip_factor[1].x); + write_clip_distance(5, 11, user_clip_enabled[1].y > 0, dest[6].w * user_clip_factor[1].y); write_output(15, 12); write_output(6, 13); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 3cc565f8e3..64c0ca3ef1 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -778,6 +778,7 @@ void GLGSRender::load_program_env() vp_config[0] = current_vertex_program.base_address; vp_config[1] = current_vertex_program.entry; vp_config[2] = current_vertex_program.output_mask; + vp_config[3] = rsx::method_registers.two_side_light_en() ? 1u : 0u; std::memcpy(vp_buf + 16, current_vertex_program.data.data(), current_vp_metadata.ucode_length); diff --git a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp index f4d82c268d..256174abfe 100644 --- a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp @@ -114,7 +114,7 @@ namespace gl " uint base_address;\n" " uint entry;\n" " uint output_mask;\n" - " uint reserved;\n" + " uint control;\n" " uvec4 vp_instructions[];\n" "};\n\n"; @@ -285,26 +285,22 @@ namespace gl return; } - if (get_driver_caps().vendor_AMD) + // Overlapping texture bindings are trouble. Cannot bind one TIU to two types of samplers simultaneously + for (unsigned i = 0; i < replacement_map.size(); ++i) { - // AMD drivers don't like texture bindings overlapping which means workarounds are needed - // Technically this is accurate to spec, but makes efficient usage of shader resources difficult - for (unsigned i = 0; i < replacement_map.size(); ++i) + for (int j = 0; j < 4; ++j) { - for (int j = 0; j < 4; ++j) + auto& pool = allocator.pools[j]; + for (int k = pool.num_used; k < pool.pool_size; ++k) { - auto& pool = allocator.pools[j]; - for (int k = pool.num_used; k < pool.pool_size; ++k) + if (pool.allocated[k] == replacement_map[i].second) { - if (pool.allocated[k] == replacement_map[i].second) - { - pool.allocated[k] = replacement_map[i].first; - pool.flags |= static_cast(interpreter::texture_pool_flags::dirty); + pool.allocated[k] = replacement_map[i].first; + pool.flags |= static_cast(interpreter::texture_pool_flags::dirty); - // Exit nested loop - j = 4; - break; - } + // Exit nested loop + j = 4; + break; } } } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index cc5a125b84..e6ef2727bd 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -467,8 +467,8 @@ VKGSRender::VKGSRender() : GSRender() if (g_cfg.video.shader_interpreter_mode != shader_interpreter_mode::disabled) { - m_vertex_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 16 * 0x100000, "vertex instructions buffer", 512 * 16); - m_fragment_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 16 * 0x100000, "fragment instructions buffer", 2048); + m_vertex_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 64 * 0x100000, "vertex instructions buffer", 512 * 16); + m_fragment_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 64 * 0x100000, "fragment instructions buffer", 2048); } const auto limits = m_device->gpu().get_limits(); @@ -1759,6 +1759,7 @@ void VKGSRender::load_program_env() vp_config[0] = current_vertex_program.base_address; vp_config[1] = current_vertex_program.entry; vp_config[2] = current_vertex_program.output_mask; + vp_config[3] = rsx::method_registers.two_side_light_en()? 1u: 0u; std::memcpy(vp_buf + 16, current_vertex_program.data.data(), current_vp_metadata.ucode_length); m_vertex_instructions_buffer.unmap(); diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp index a78e48c788..129e9339de 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp @@ -34,7 +34,7 @@ namespace vk " uint base_address;\n" " uint entry;\n" " uint output_mask;\n" - " uint reserved;\n" + " uint control;\n" " uvec4 vp_instructions[];\n" "};\n\n";