diff --git a/rpcs3/Emu/RSX/Common/GLSLCommon.h b/rpcs3/Emu/RSX/Common/GLSLCommon.h index dc8a2a331b..4d897e01db 100644 --- a/rpcs3/Emu/RSX/Common/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h @@ -818,7 +818,7 @@ namespace glsl case FUNCTION::FUNCTION_FRACT: return "fract($0)"; case FUNCTION::FUNCTION_REFL: - return "$Ty($0 - 2.0 * (dot($0, $1)) * $1)"; + return "reflect($0, $1)"; case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D: return "TEX1D($_i, $0.x)"; case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D_BIAS: diff --git a/rpcs3/Emu/RSX/Common/Interpreter/FragmentInterpreter.glsl b/rpcs3/Emu/RSX/Common/Interpreter/FragmentInterpreter.glsl index ac4d614937..6ca1cb0b3a 100644 --- a/rpcs3/Emu/RSX/Common/Interpreter/FragmentInterpreter.glsl +++ b/rpcs3/Emu/RSX/Common/Interpreter/FragmentInterpreter.glsl @@ -142,11 +142,22 @@ const float modifier_scale[] = {1.f, 2.f, 4.f, 8.f, 1.f, 0.5f, 0.25f, 0.125f}; vec4 regs16[48]; vec4 regs32[48]; -vec4 cc[2]; +vec4 cc[2] = { vec4(0.), vec4(0.) }; int inst_length = 1; int ip = -1; instruction_t inst; +#ifdef WITH_FLOW_CTRL +int test_addr = -1; +int jump_addr = -1; +int loop_start_addr = -1; +int loop_end_addr = -1; +int counter = 0; +#endif + +vec4 wpos = gl_FragCoord * vec4(abs(wpos_scale), wpos_scale, 1., 1.) + vec4(0., wpos_bias, 0., 0.); +vec4 fogc = fetch_fog_value(fog_mode, in_regs[5]); + vec4 read_src(const in int index) { const uint type = GET_BITS(index + 1, 0, 2); @@ -173,14 +184,13 @@ vec4 read_src(const in int index) switch (i) { case 0: - // TODO: wpos - value = vec4(0.); break; + value = wpos; break; case 1: value = gl_FrontFacing? in_regs[3] : in_regs[1]; break; case 2: value = gl_FrontFacing? in_regs[4] : in_regs[2]; break; case 3: - value = fetch_fog_value(fog_mode, in_regs[5]); break; + value = fogc; break; case 13: value = in_regs[6]; break; case 14: @@ -223,6 +233,40 @@ vec4 read_cond() return shuffle(cc[GET_BITS(1, 31, 1)], GET_BITS(1, 21, 8)); } +#if defined(WITH_FLOW_CTRL) || defined(WITH_KIL) + +bool check_cond() +{ + const uint exec_mask = GET_BITS(1, 18, 3); + if (exec_mask == 0x7) + { + return true; + } + else + { + const vec4 cond = read_cond(); + switch (exec_mask) + { + case EXEC_GT | EXEC_EQ: + return any(greaterThanEqual(cond, vec4(0.))); + case EXEC_LT | EXEC_EQ: + return any(lessThanEqual(cond, vec4(0.))); + case EXEC_LT | EXEC_GT: + return any(notEqual(cond, vec4(0.))); + case EXEC_GT: + return any(greaterThan(cond, vec4(0.))); + case EXEC_LT: + return any(lessThan(cond, vec4(0.))); + case EXEC_EQ: + return any(equal(cond, vec4(0.))); + default: + return false; + } + } +} + +#endif + #ifdef WITH_TEXTURES vec4 _texture(in vec4 coord, float bias) @@ -236,19 +280,25 @@ vec4 _texture(in vec4 coord, float bias) const uint type = bitfieldExtract(texture_control, int(tex_num + tex_num), 2); coord.xy *= texture_parameters[tex_num].scale; + vec4 value; switch (type) { case 0: - return texture(SAMPLER1D(tex_num), coord.x, bias); + value = texture(SAMPLER1D(tex_num), coord.x, bias); break; case 1: - return texture(SAMPLER2D(tex_num), coord.xy, bias); + value = texture(SAMPLER2D(tex_num), coord.xy, bias); break; case 2: - return texture(SAMPLER3D(tex_num), coord.xyz, bias); + value = texture(SAMPLERCUBE(tex_num), coord.xyz, bias); break; case 3: - return texture(SAMPLERCUBE(tex_num), coord.xyz, bias); + value = texture(SAMPLER3D(tex_num), coord.xyz, bias); break; } - return vec4(0.); + if (TEST_BIT(0, 21)) + { + value = fma(value, vec4(2.), vec4(-1.)); + } + + return value; } vec4 _textureLod(in vec4 coord, float lod) @@ -262,19 +312,25 @@ vec4 _textureLod(in vec4 coord, float lod) const uint type = bitfieldExtract(texture_control, int(tex_num + tex_num), 2); coord.xy *= texture_parameters[tex_num].scale; + vec4 value; switch (type) { case 0: - return textureLod(SAMPLER1D(tex_num), coord.x, lod); + value = textureLod(SAMPLER1D(tex_num), coord.x, lod); break; case 1: - return textureLod(SAMPLER2D(tex_num), coord.xy, lod); + value = textureLod(SAMPLER2D(tex_num), coord.xy, lod); break; case 2: - return textureLod(SAMPLER3D(tex_num), coord.xyz, lod); + value = textureLod(SAMPLERCUBE(tex_num), coord.xyz, lod); break; case 3: - return textureLod(SAMPLERCUBE(tex_num), coord.xyz, lod); + value = textureLod(SAMPLER3D(tex_num), coord.xyz, lod); break; } - return vec4(0.); + if (TEST_BIT(0, 21)) + { + value = fma(value, vec4(2.), vec4(-1.)); + } + + return value; } #endif @@ -359,7 +415,9 @@ void initialize() regs16[j++] = vec4(0.); register_count--; } -} +})" + +R"( void main() { @@ -374,6 +432,28 @@ void main() ip += inst_length; inst_length = 1; +#ifdef WITH_FLOW_CTRL + if (ip == test_addr) + { + ip = jump_addr; + test_addr = -1; + jump_addr = -1; + } + else if (ip == loop_end_addr) + { + if (counter > 0) + { + counter--; + ip = loop_start_addr; + } + else + { + loop_end_addr = -1; + loop_start_addr = -1; + } + } +#endif + // Decode instruction // endian swap + word swap inst.words = @@ -383,6 +463,64 @@ void main() inst.opcode = GET_BITS(0, 24, 6); inst.end = TEST_BIT(0, 0); +#ifdef WITH_FLOW_CTRL + if (TEST_BIT(2, 31)) + { + // Flow control + switch (inst.opcode | (1 << 6)) + { + //case RSX_FP_OPCODE_CAL: + // Function call not yet found in the wild for this hw class + case RSX_FP_OPCODE_RET: + inst.end = true; + continue; + case RSX_FP_OPCODE_IFE: + if (check_cond()) + { + // Go down IF path + if (inst.words.z < inst.words.w) + { + test_addr = int(inst.words.z >> 2); + jump_addr = int(inst.words.w >> 2); + } + // If simple IF..ENDIF, do nothing + } + else + { + // Go to ELSE path + ip = int(inst.words.z >> 2); + inst_length = 0; + } + continue; + case RSX_FP_OPCODE_LOOP: + case RSX_FP_OPCODE_REP: + if (check_cond()) + { + counter = int(GET_BITS(2, 2, 8) - GET_BITS(2, 10, 8)); + counter /= int(GET_BITS(2, 19, 8)); + loop_start_addr = ip + 1; + loop_end_addr = int(inst.words.w >> 2); + } + else + { + ip = int(inst.words.w >> 2); + inst_length = 0; + } + continue; + case RSX_FP_OPCODE_BRK: + if (loop_end_addr > 0) + { + ip = loop_end_addr; + inst_length = 0; + counter = 0; + } + continue; + } + + continue; + } +#endif + // Class 1, no input/output switch (inst.opcode) { @@ -390,8 +528,15 @@ void main() case RSX_FP_OPCODE_FENCT: case RSX_FP_OPCODE_FENCB: continue; +#ifdef WITH_KIL case RSX_FP_OPCODE_KIL: - discard; return; + if (check_cond()) + { + discard; + return; + } + continue; +#endif } // Class 2, 1 input @@ -431,6 +576,31 @@ void main() #ifdef WITH_TEXTURES case RSX_FP_OPCODE_TEX: value = _texture(s0, 0.f); break; + case RSX_FP_OPCODE_TXP: + value = _texture(vec4(s0.xyz / s0.w, s0.w), 0.f); break; +#endif + +#ifdef WITH_PACKING + case RSX_FP_OPCODE_PK2: + value = vec4(uintBitsToFloat(packHalf2x16(s0.xy))); break; + case RSX_FP_OPCODE_PK4: + value = vec4(uintBitsToFloat(packSnorm4x8(s0))); break; + case RSX_FP_OPCODE_PK16: + value = vec4(uintBitsToFloat(packSnorm2x16(s0.xy))); break; + case RSX_FP_OPCODE_PKG: + // Should be similar to PKB but with gamma correction, see description of PK4UBG in khronos page + case RSX_FP_OPCODE_PKB: + value = vec4(uintBitsToFloat(packUnorm4x8(s0))); break; + case RSX_FP_OPCODE_UP2: + value = unpackHalf2x16(floatBitsToUint(s0.x)).xyxy; break; + case RSX_FP_OPCODE_UP4: + value = unpackSnorm4x8(floatBitsToUint(s0.x)); break; + case RSX_FP_OPCODE_UP16: + value = unpackSnorm2x16(floatBitsToUint(s0.x)).xyxy; break; + case RSX_FP_OPCODE_UPG: + // Same as UPB with gamma correction + case RSX_FP_OPCODE_UPB: + value = unpackUnorm4x8(floatBitsToUint(s0.x)); break; #endif default: handled = false; @@ -474,12 +644,13 @@ void main() case RSX_FP_OPCODE_POW: value = pow(s0, s1).xxxx; break; case RSX_FP_OPCODE_DIV: - value = s0 / s1.xxxx; + value = s0 / s1.xxxx; break; case RSX_FP_OPCODE_DIVSQ: value = s0 * inversesqrt(s1.xxxx); break; + case RSX_FP_OPCODE_REFL: + value = reflect(s0, s1); break; #ifdef WITH_TEXTURES - //case RSX_FP_OPCODE_TXP: //case RSX_FP_OPCODE_TXD: case RSX_FP_OPCODE_TXL: value = _textureLod(s0, s1.x); break; @@ -507,48 +678,27 @@ void main() value = dot(s0.xy, s1.xy).xxxx + s2.xxxx; break; } } - - // Flow control -/* case RSX_FP_OPCODE_BRK: - case RSX_FP_OPCODE_CAL: - case RSX_FP_OPCODE_IFE: - case RSX_FP_OPCODE_LOOP: - case RSX_FP_OPCODE_REP: - case RSX_FP_OPCODE_RET: - +#if 0 // Other - case RSX_FP_OPCODE_PK4: - case RSX_FP_OPCODE_UP4: + case RSX_FP_OPCODE_BEM: + case RSX_FP_OPCODE_BEMLUM: case RSX_FP_OPCODE_LIT: case RSX_FP_OPCODE_LIF: - case RSX_FP_OPCODE_PK2: - case RSX_FP_OPCODE_FENCT: - case RSX_FP_OPCODE_FENCB: - case RSX_FP_OPCODE_UP2: - case RSX_FP_OPCODE_PKB: - case RSX_FP_OPCODE_UPB: - case RSX_FP_OPCODE_PK16: - case RSX_FP_OPCODE_UP16: - case RSX_FP_OPCODE_BEM: - case RSX_FP_OPCODE_PKG: - case RSX_FP_OPCODE_UPG: - case RSX_FP_OPCODE_BEMLUM: - case RSX_FP_OPCODE_REFL: - case RSX_FP_OPCODE_TIMESWTEX:*/ - + case RSX_FP_OPCODE_TIMESWTEX: +#endif write_dst(value); } #ifdef WITH_HALF_OUTPUT_REGISTER ocol0 = regs16[0]; ocol1 = regs16[4]; - ocol1 = regs16[6]; - ocol1 = regs16[8]; + ocol2 = regs16[6]; + ocol3 = regs16[8]; #else ocol0 = regs32[0]; ocol1 = regs32[2]; - ocol1 = regs32[3]; - ocol1 = regs32[4]; + ocol2 = regs32[3]; + ocol3 = regs32[4]; #endif #ifdef WITH_DEPTH_EXPORT @@ -560,7 +710,7 @@ void main() if (ocol0.a < alpha_ref) discard; // gequal #endif #ifdef ALPHA_TEST_GREATER - if (ocol0.a > alpha_ref) discard; // greater + if (ocol0.a <= alpha_ref) discard; // greater #endif #ifdef ALPHA_TEST_LESS if (ocol0.a >= alpha_ref) discard; // less diff --git a/rpcs3/Emu/RSX/Common/Interpreter/VertexInterpreter.glsl b/rpcs3/Emu/RSX/Common/Interpreter/VertexInterpreter.glsl index 8e67a4fe8c..5e5427c2f4 100644 --- a/rpcs3/Emu/RSX/Common/Interpreter/VertexInterpreter.glsl +++ b/rpcs3/Emu/RSX/Common/Interpreter/VertexInterpreter.glsl @@ -191,6 +191,29 @@ vec4 _distance(const in vec4 a, const in vec4 b) return vec4(1., a.y * b.y, a.z, b.w); } +bvec4 test_cond(const in vec4 cond, const in uint mode) +{ + switch (mode) + { + case EXEC_GT | EXEC_EQ | EXEC_LT: + return bvec4(true); + case EXEC_GT | EXEC_EQ: + return greaterThanEqual(cond, vec4(0.)); + case EXEC_LT | EXEC_EQ: + return lessThanEqual(cond, vec4(0.)); + case EXEC_LT | EXEC_GT: + return notEqual(cond, vec4(0.)); + case EXEC_GT: + return greaterThan(cond, vec4(0.)); + case EXEC_LT: + return lessThan(cond, vec4(0.)); + case EXEC_EQ: + return equal(cond, vec4(0.)); + default: + return bvec4(false); + } +} + // Local registers uvec4 instr; vec4 temp[32]; @@ -202,6 +225,11 @@ D1 d1; D2 d2; D3 d3; +vec4 get_cond() +{ + return shuffle(cc[d0.cond_reg_sel_1], d0.swizzle); +} + void write_sca(in float value) { if (d0.saturate) @@ -229,23 +257,30 @@ void write_vec(in vec4 value) value = clamp(value, 0, 1); } + bvec4 write_mask = d3.vec_mask; + if (d0.cond_test_enable) + { + const bvec4 mask = test_cond(get_cond(), d0.cond); + write_mask = bvec4(uvec4(write_mask) & uvec4(mask)); + } + if (d0.dst_tmp == 0x3f && !d0.vec_result) { if (d0.cond_update_enable_1) { - reg_mov(cc[d0.cond_reg_sel_1], value, d3.vec_mask); + reg_mov(cc[d0.cond_reg_sel_1], value, write_mask); } } else { if (d0.vec_result && d3.dst < 16) { - reg_mov(dest[d3.dst], value, d3.vec_mask); + reg_mov(dest[d3.dst], value, write_mask); } if (d0.dst_tmp != 0x3f) { - reg_mov(temp[d0.dst_tmp], value, d3.vec_mask); + reg_mov(temp[d0.dst_tmp], value, write_mask); } } } @@ -286,34 +321,12 @@ bool static_branch() return (cond == actual); } -bvec4 test_cond(vec4 cond, uint mode) -{ - switch (mode) - { - case EXEC_GT | EXEC_EQ: - return greaterThanEqual(cond, vec4(0.)); - case EXEC_LT | EXEC_EQ: - return lessThanEqual(cond, vec4(0.)); - case EXEC_LT | EXEC_GT: - return notEqual(cond, vec4(0.)); - case EXEC_GT: - return greaterThan(cond, vec4(0.)); - case EXEC_LT: - return lessThan(cond, vec4(0.)); - case EXEC_EQ: - return equal(cond, vec4(0.)); - } - - return bvec4(false); -} - bool dynamic_branch() { if (d0.cond == (EXEC_LT | EXEC_GT | EXEC_EQ)) return true; if (d0.cond == 0) return false; - vec4 cond = shuffle(cc[d0.cond_reg_sel_1], d0.swizzle); - return any(test_cond(cond, d0.cond)); + return any(test_cond(get_cond(), d0.cond)); } vec4 read_src(const in int index) diff --git a/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp b/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp index 185417f7b6..86db857751 100644 --- a/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp +++ b/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp @@ -328,61 +328,81 @@ size_t fragment_program_utils::get_fragment_program_ucode_size(const void* ptr) fragment_program_utils::fragment_program_metadata fragment_program_utils::analyse_fragment_program(const void* ptr) { + fragment_program_utils::fragment_program_metadata result{}; + result.program_start_offset = UINT32_MAX; const auto instBuffer = ptr; s32 index = 0; - s32 program_offset = -1; - u32 ucode_size = 0; - u32 constants_size = 0; - u16 textures_mask = 0; while (true) { const auto inst = v128::loadu(instBuffer, index); - const u32 opcode = (inst._u32[0] >> 16) & 0x3F; - if (opcode) + // Check for opcode high bit which indicates a branch instructions (opcode 0x40...0x45) + if (inst._u32[2] & (1 << 23)) { - if (program_offset < 0) - program_offset = index * 16; + result.has_branch_instructions = true; + } + else + { + const u32 opcode = (inst._u32[0] >> 16) & 0x3F; + if (opcode) + { + if (result.program_start_offset == umax) + result.program_start_offset = index * 16; - switch(opcode) - { - case RSX_FP_OPCODE_TEX: - case RSX_FP_OPCODE_TEXBEM: - case RSX_FP_OPCODE_TXP: - case RSX_FP_OPCODE_TXPBEM: - case RSX_FP_OPCODE_TXD: - case RSX_FP_OPCODE_TXB: - case RSX_FP_OPCODE_TXL: - { - //Bits 17-20 of word 1, swapped within u16 sections - //Bits 16-23 are swapped into the upper 8 bits (24-31) - const u32 tex_num = (inst._u32[0] >> 25) & 15; - textures_mask |= (1 << tex_num); - break; - } + switch (opcode) + { + case RSX_FP_OPCODE_TEX: + case RSX_FP_OPCODE_TEXBEM: + case RSX_FP_OPCODE_TXP: + case RSX_FP_OPCODE_TXPBEM: + case RSX_FP_OPCODE_TXD: + case RSX_FP_OPCODE_TXB: + case RSX_FP_OPCODE_TXL: + { + //Bits 17-20 of word 1, swapped within u16 sections + //Bits 16-23 are swapped into the upper 8 bits (24-31) + const u32 tex_num = (inst._u32[0] >> 25) & 15; + result.referenced_textures_mask |= (1 << tex_num); + break; + } + case RSX_FP_OPCODE_PK4: + case RSX_FP_OPCODE_UP4: + case RSX_FP_OPCODE_PK2: + case RSX_FP_OPCODE_UP2: + case RSX_FP_OPCODE_PKB: + case RSX_FP_OPCODE_UPB: + case RSX_FP_OPCODE_PK16: + case RSX_FP_OPCODE_UP16: + case RSX_FP_OPCODE_PKG: + case RSX_FP_OPCODE_UPG: + { + result.has_pack_instructions = true; + break; + } + } } if (is_constant(inst._u32[1]) || is_constant(inst._u32[2]) || is_constant(inst._u32[3])) { //Instruction references constant, skip one slot occupied by data index++; - ucode_size += 16; - constants_size += 16; + result.program_ucode_length += 16; + result.program_constants_buffer_length += 16; } } - if (program_offset >= 0) + if (result.program_start_offset != umax) { - ucode_size += 16; + result.program_ucode_length += 16; } if ((inst._u32[0] >> 8) & 0x1) { - if (program_offset < 0) + if (result.program_start_offset == umax) { - program_offset = index * 16; - ucode_size = 16; + result.program_start_offset = index * 16; + result.program_constants_buffer_length = 16; } break; @@ -391,7 +411,7 @@ fragment_program_utils::fragment_program_metadata fragment_program_utils::analys index++; } - return{ static_cast(program_offset), ucode_size, constants_size, textures_mask }; + return result; } size_t fragment_program_utils::get_fragment_program_ucode_hash(const RSXFragmentProgram& program) diff --git a/rpcs3/Emu/RSX/Common/ProgramStateCache.h b/rpcs3/Emu/RSX/Common/ProgramStateCache.h index 9cddb4f527..b78bb8200e 100644 --- a/rpcs3/Emu/RSX/Common/ProgramStateCache.h +++ b/rpcs3/Emu/RSX/Common/ProgramStateCache.h @@ -50,6 +50,9 @@ namespace program_hash_util u32 program_ucode_length; u32 program_constants_buffer_length; u16 referenced_textures_mask; + + bool has_pack_instructions; + bool has_branch_instructions; }; /** diff --git a/rpcs3/Emu/RSX/Common/ShaderInterpreter.h b/rpcs3/Emu/RSX/Common/ShaderInterpreter.h index c11bf1aae9..56b863bb65 100644 --- a/rpcs3/Emu/RSX/Common/ShaderInterpreter.h +++ b/rpcs3/Emu/RSX/Common/ShaderInterpreter.h @@ -16,6 +16,9 @@ namespace program_common COMPILER_OPT_ENABLE_ALPHA_TEST_L = 64, COMPILER_OPT_ENABLE_ALPHA_TEST_EQ = 128, COMPILER_OPT_ENABLE_ALPHA_TEST_NE = 256, + COMPILER_OPT_ENABLE_FLOW_CTRL = 512, + COMPILER_OPT_ENABLE_PACKING = 1024, + COMPILER_OPT_ENABLE_KIL = 2048 }; static std::string get_vertex_interpreter() diff --git a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp index 24a98f0da3..97ab04a2b8 100644 --- a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp @@ -95,8 +95,10 @@ namespace gl if (rsx::method_registers.shader_control() & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_DEPTH_EXPORT; if (rsx::method_registers.shader_control() & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT; - + if (rsx::method_registers.shader_control() & RSX_SHADER_CONTROL_USES_KIL) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_KIL; if (metadata.referenced_textures_mask) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES; + if (metadata.has_branch_instructions) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL; + if (metadata.has_pack_instructions) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_PACKING; if (auto it = m_program_cache.find(opt); it != m_program_cache.end()) [[likely]] { @@ -250,6 +252,21 @@ namespace gl builder << "#define WITH_DEPTH_EXPORT\n"; } + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL) + { + builder << "#define WITH_FLOW_CTRL\n"; + } + + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_PACKING) + { + builder << "#define WITH_PACKING\n"; + } + + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_KIL) + { + builder << "#define WITH_KIL\n"; + } + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES) { builder << "#define WITH_TEXTURES\n\n"; diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp index 1f5c377d26..585e6ae610 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp @@ -142,6 +142,21 @@ namespace vk builder << "#define WITH_DEPTH_EXPORT\n"; } + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL) + { + builder << "#define WITH_FLOW_CTRL\n"; + } + + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_PACKING) + { + builder << "#define WITH_PACKING\n"; + } + + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_KIL) + { + builder << "#define WITH_KIL\n"; + } + const char* type_names[] = { "sampler1D", "sampler2D", "sampler3D", "samplerCube" }; if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES) { @@ -561,7 +576,10 @@ namespace vk if (rsx::method_registers.shader_control() & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_DEPTH_EXPORT; if (rsx::method_registers.shader_control() & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT; + if (rsx::method_registers.shader_control() & RSX_SHADER_CONTROL_USES_KIL) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_KIL; if (metadata.referenced_textures_mask) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES; + if (metadata.has_branch_instructions) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL; + if (metadata.has_pack_instructions) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_PACKING; if (m_current_key == key) [[likely]] { diff --git a/rpcs3/Emu/RSX/gcm_enums.h b/rpcs3/Emu/RSX/gcm_enums.h index d717083d8e..bafc7a5f64 100644 --- a/rpcs3/Emu/RSX/gcm_enums.h +++ b/rpcs3/Emu/RSX/gcm_enums.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "Utilities/types.h" namespace rsx