diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index fe55ed219e..353cccb537 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -4,6 +4,7 @@ #include "Emu/Memory/Memory.h" #include "Emu/Cell/SPUThread.h" #include "Emu/SysCalls/SysCalls.h" +#include "Crypto/sha1.h" #define UNIMPLEMENTED() UNK(__FUNCTION__) @@ -14,6 +15,8 @@ __m128d m128d; } __u32x4; */ +#define LOG2_OPCODE(...) //unsigned char cs[20]; sha1(&Memory[CPU.dmac.ls_offset], 256*1024, cs); ConLog.Write("Mem Dump: 0x%llx", *(u64*)cs); ConLog.Write(__FUNCTION__ "(): " __VA_ARGS__) + class SPUInterpreter : public SPUOpcodes { private: @@ -267,23 +270,55 @@ private: } void BIZ(u32 rt, u32 ra) { - if(CPU.GPR[rt]._u32[3] == 0) - CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); + u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0); + if (CPU.GPR[rt]._u32[3] == 0) + { + LOG2_OPCODE("taken (0x%llx)", target); + CPU.SetBranch(target); + } + else + { + LOG2_OPCODE("not taken (0x%llx)", target); + } } void BINZ(u32 rt, u32 ra) { - if(CPU.GPR[rt]._u32[3] != 0) - CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); + u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0); + if (CPU.GPR[rt]._u32[3] != 0) + { + LOG2_OPCODE("taken (0x%llx)", target); + CPU.SetBranch(target); + } + else + { + LOG2_OPCODE("not taken (0x%llx)", target); + } } void BIHZ(u32 rt, u32 ra) { - if(CPU.GPR[rt]._u16[6] == 0) - CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); + u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0); + if (CPU.GPR[rt]._u16[6] == 0) + { + LOG2_OPCODE("taken (0x%llx)", target); + CPU.SetBranch(target); + } + else + { + LOG2_OPCODE("not taken (0x%llx)", target); + } } void BIHNZ(u32 rt, u32 ra) { - if(CPU.GPR[rt]._u16[6] != 0) - CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); + u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0); + if (CPU.GPR[rt]._u16[6] != 0) + { + LOG2_OPCODE("taken (0x%llx)", target); + CPU.SetBranch(target); + } + else + { + LOG2_OPCODE("not taken (0x%llx)", target); + } } void STOPD(u32 rc, u32 ra, u32 rb) { @@ -304,14 +339,17 @@ private: } void BI(u32 ra) { - CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); + u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0); + LOG2_OPCODE("branch (0x%llx)", target); + CPU.SetBranch(target); } void BISL(u32 rt, u32 ra) { - const u32 NewPC = CPU.GPR[ra]._u32[3]; + u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0); CPU.GPR[rt].Reset(); CPU.GPR[rt]._u32[3] = CPU.PC + 4; - CPU.SetBranch(branchTarget(NewPC, 0)); + LOG2_OPCODE("branch (0x%llx)", target); + CPU.SetBranch(target); } void IRET(u32 ra) { @@ -1048,8 +1086,16 @@ private: //0 - 8 void BRZ(u32 rt, s32 i16) { + u64 target = branchTarget(CPU.PC, i16); if (CPU.GPR[rt]._u32[3] == 0) - CPU.SetBranch(branchTarget(CPU.PC, i16)); + { + LOG2_OPCODE("taken (0x%llx)", target); + CPU.SetBranch(target); + } + else + { + LOG2_OPCODE("not taken (0x%llx)", target); + } } void STQA(u32 rt, s32 i16) { @@ -1065,18 +1111,42 @@ private: } void BRNZ(u32 rt, s32 i16) { + u64 target = branchTarget(CPU.PC, i16); if (CPU.GPR[rt]._u32[3] != 0) - CPU.SetBranch(branchTarget(CPU.PC, i16)); + { + LOG2_OPCODE("taken (0x%llx)", target); + CPU.SetBranch(target); + } + else + { + LOG2_OPCODE("not taken (0x%llx)", target); + } } void BRHZ(u32 rt, s32 i16) { - if (CPU.GPR[rt]._u16[6] == 0) - CPU.SetBranch(branchTarget(CPU.PC, i16)); + u64 target = branchTarget(CPU.PC, i16); + if (CPU.GPR[rt]._u16[6] == 0) + { + LOG2_OPCODE("taken (0x%llx)", target); + CPU.SetBranch(target); + } + else + { + LOG2_OPCODE("not taken (0x%llx)", target); + } } void BRHNZ(u32 rt, s32 i16) { - if (CPU.GPR[rt]._u16[6] != 0) - CPU.SetBranch(branchTarget(CPU.PC, i16)); + u64 target = branchTarget(CPU.PC, i16); + if (CPU.GPR[rt]._u16[6] != 0) + { + LOG2_OPCODE("taken (0x%llx)", target); + CPU.SetBranch(target); + } + else + { + LOG2_OPCODE("not taken (0x%llx)", target); + } } void STQR(u32 rt, s32 i16) { @@ -1092,7 +1162,9 @@ private: } void BRA(s32 i16) { - CPU.SetBranch(branchTarget(0, i16)); + u64 target = branchTarget(0, i16); + LOG2_OPCODE("branch (0x%llx)", target); + CPU.SetBranch(target); } void LQA(u32 rt, s32 i16) { @@ -1108,13 +1180,17 @@ private: } void BRASL(u32 rt, s32 i16) { + u64 target = branchTarget(0, i16); CPU.GPR[rt].Reset(); CPU.GPR[rt]._u32[3] = CPU.PC + 4; - CPU.SetBranch(branchTarget(0, i16)); + LOG2_OPCODE("branch (0x%llx)", target); + CPU.SetBranch(target); } void BR(s32 i16) { - CPU.SetBranch(branchTarget(CPU.PC, i16)); + u64 target = branchTarget(CPU.PC, i16); + LOG2_OPCODE("branch (0x%llx)", target); + CPU.SetBranch(target); } void FSMBI(u32 rt, s32 i16) { @@ -1134,9 +1210,11 @@ private: } void BRSL(u32 rt, s32 i16) { + u64 target = branchTarget(CPU.PC, i16); CPU.GPR[rt].Reset(); CPU.GPR[rt]._u32[3] = CPU.PC + 4; - CPU.SetBranch(branchTarget(CPU.PC, i16)); + LOG2_OPCODE("branch (0x%llx)", target); + CPU.SetBranch(target); } void LQR(u32 rt, s32 i16) { @@ -1236,6 +1314,7 @@ private: Emu.Pause(); return; } + //ConLog.Write("STQD(lsa=0x%x): GPR[%d] (0x%llx%llx)", lsa, rt, CPU.GPR[rt]._u64[1], CPU.GPR[rt]._u64[0]); CPU.WriteLS128(lsa, CPU.GPR[rt]._u128); } void LQD(u32 rt, s32 i10, u32 ra) //i10 is shifted left by 4 while decoding diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index 478d5d3d16..a1e636ee2b 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -87,7 +87,7 @@ public: struct SPURecEntry { - u16 host; // absolute position of first instruction of current block + //u16 host; // absolute position of first instruction of current block (not used now) u16 count; // count of instructions compiled from current point (and to be checked) u32 valid; // copy of valid opcode for validation void* pointer; // pointer to executable memory object @@ -114,6 +114,8 @@ public: #define imm_xmm(x) oword_ptr(*imm_var, offsetof(SPUImmTable, x)) +#define LOG_OPCODE(...) //ConLog.Write(__FUNCTION__ "()" __VA_ARGS__) + #define WRAPPER_BEGIN(a0, a1, a2, a3) struct opcode_wrapper \ { \ static void opcode(u32 a0, u32 a1, u32 a2, u32 a3) \ @@ -122,11 +124,13 @@ public: #define WRAPPER_END(a0, a1, a2, a3) } \ }; \ + c.mov(cpu_qword(PC), (u32)CPU.PC); \ X86X64CallNode* call = c.call(imm_ptr(&opcode_wrapper::opcode), kFuncConvHost, FuncBuilder4()); \ call->setArg(0, imm_u(a0)); \ call->setArg(1, imm_u(a1)); \ call->setArg(2, imm_u(a2)); \ - call->setArg(3, imm_u(a3)); + call->setArg(3, imm_u(a3)); \ + LOG_OPCODE(); class SPURecompiler : public SPUOpcodes @@ -156,26 +160,31 @@ private: WRAPPER_END(code, 0, 0, 0); c.mov(*pos_var, (CPU.PC >> 2) + 1); do_finalize = true; - ConLog.Write("STOP(code=%d)", code); } void LNOP() { - /*c.mov(*pos_var, (CPU.PC >> 2) + 1); + c.mov(cpu_qword(PC), (u32)CPU.PC); + /* do_finalize = true; - ConLog.Write("LNOP()");*/ + c.mov(*pos_var, (CPU.PC >> 2) + 1); + */ + LOG_OPCODE(); } void SYNC(u32 Cbit) { + c.mov(cpu_qword(PC), (u32)CPU.PC); // This instruction must be used following a store instruction that modifies the instruction stream. c.mfence(); c.mov(*pos_var, (CPU.PC >> 2) + 1); do_finalize = true; - ConLog.Write("SYNC()"); + LOG_OPCODE(); } void DSYNC() { + c.mov(cpu_qword(PC), (u32)CPU.PC); // This instruction forces all earlier load, store, and channel instructions to complete before proceeding. c.mfence(); + LOG_OPCODE(); } void MFSPR(u32 rt, u32 sa) { @@ -563,27 +572,55 @@ private: } void BIZ(u32 rt, u32 ra) { - UNIMPLEMENTED(); - if(CPU.GPR[rt]._u32[3] == 0) - CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + GpVar pos_next(c, kVarTypeUInt32); + c.mov(pos_next, (u32)CPU.PC + 4); + c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); + c.cmp(cpu_dword(GPR[rt]._u32[3]), 0); + c.cmovne(*pos_var, pos_next); + c.shr(*pos_var, 2); + LOG_OPCODE(); } void BINZ(u32 rt, u32 ra) { - UNIMPLEMENTED(); - if(CPU.GPR[rt]._u32[3] != 0) - CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + GpVar pos_next(c, kVarTypeUInt32); + c.mov(pos_next, (u32)CPU.PC + 4); + c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); + c.cmp(cpu_dword(GPR[rt]._u32[3]), 0); + c.cmove(*pos_var, pos_next); + c.shr(*pos_var, 2); + LOG_OPCODE(); } void BIHZ(u32 rt, u32 ra) { - UNIMPLEMENTED(); - if(CPU.GPR[rt]._u16[6] == 0) - CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + GpVar pos_next(c, kVarTypeUInt32); + c.mov(pos_next, (u32)CPU.PC + 4); + c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); + c.cmp(cpu_word(GPR[rt]._u16[6]), 0); + c.cmovne(*pos_var, pos_next); + c.shr(*pos_var, 2); + LOG_OPCODE(); } void BIHNZ(u32 rt, u32 ra) { - UNIMPLEMENTED(); - if(CPU.GPR[rt]._u16[6] != 0) - CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + GpVar pos_next(c, kVarTypeUInt32); + c.mov(pos_next, (u32)CPU.PC + 4); + c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); + c.cmp(cpu_word(GPR[rt]._u16[6]), 0); + c.cmove(*pos_var, pos_next); + c.shr(*pos_var, 2); + LOG_OPCODE(); } void STOPD(u32 rc, u32 ra, u32 rb) { @@ -606,23 +643,26 @@ private: } void BI(u32 ra) { + c.mov(cpu_qword(PC), (u32)CPU.PC); do_finalize = true; + c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); c.shr(*pos_var, 2); - //ConLog.Write("BI(ra=%d)", ra); + LOG_OPCODE(); } void BISL(u32 rt, u32 ra) { + c.mov(cpu_qword(PC), (u32)CPU.PC); do_finalize = true; - c.int3(); + c.xor_(*pos_var, *pos_var); c.mov(cpu_dword(GPR[rt]._u32[0]), *pos_var); c.mov(cpu_dword(GPR[rt]._u32[1]), *pos_var); c.mov(cpu_dword(GPR[rt]._u32[2]), *pos_var); c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); - c.mov(cpu_dword(GPR[rt]._u32[3]), (CPU.PC >> 2) + 1); + c.mov(cpu_dword(GPR[rt]._u32[3]), (u32)CPU.PC + 4); c.shr(*pos_var, 2); - ConLog.Write("BISL(rt=%d,ra=%d)", rt, ra); + LOG_OPCODE(); } void IRET(u32 ra) { @@ -635,6 +675,7 @@ private: } void HBR(u32 p, u32 ro, u32 ra) { + LOG_OPCODE(); } void GB(u32 rt, u32 ra) { @@ -885,7 +926,7 @@ private: void CHD(u32 rt, u32 ra, s32 i7) { WRAPPER_BEGIN(rt, ra, i7, zz); - const int t = (CPU.GPR[ra]._u32[3] + i7) & 0xE; + const int t = (CPU.GPR[ra]._u32[3] + (s32)i7) & 0xE; CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; @@ -895,7 +936,7 @@ private: void CWD(u32 rt, u32 ra, s32 i7) { WRAPPER_BEGIN(rt, ra, i7, zz); - const int t = (CPU.GPR[ra]._u32[3] + i7) & 0xC; + const int t = (CPU.GPR[ra]._u32[3] + (s32)i7) & 0xC; CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; @@ -1045,6 +1086,7 @@ private: } void NOP(u32 rt) { + LOG_OPCODE(); } void CGT(u32 rt, u32 ra, u32 rb) { @@ -1096,8 +1138,11 @@ private: //HGT uses signed values. HLGT uses unsigned values void HGT(u32 rt, s32 ra, s32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); if(CPU.GPR[ra]._i32[3] > CPU.GPR[rb]._i32[3]) CPU.Stop(); + WRAPPER_END(rt, ra, rb, 0); + c.mov(*pos_var, (CPU.PC >> 2) + 1); + do_finalize = true; } void CLZ(u32 rt, u32 ra) { @@ -1287,8 +1332,11 @@ private: } void HLGT(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); if(CPU.GPR[ra]._u32[3] > CPU.GPR[rb]._u32[3]) CPU.Stop(); + WRAPPER_END(rt, ra, rb, 0); + c.mov(*pos_var, (CPU.PC >> 2) + 1); + do_finalize = true; } void DFMA(u32 rt, u32 ra, u32 rb) { @@ -1567,8 +1615,11 @@ private: } void HEQ(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); if(CPU.GPR[ra]._i32[3] == CPU.GPR[rb]._i32[3]) CPU.Stop(); + WRAPPER_END(rt, ra, rb, 0); + c.mov(*pos_var, (CPU.PC >> 2) + 1); + do_finalize = true; } //0 - 9 @@ -1687,13 +1738,15 @@ private: //0 - 8 void BRZ(u32 rt, s32 i16) { + c.mov(cpu_qword(PC), (u32)CPU.PC); do_finalize = true; + GpVar pos_next(c, kVarTypeUInt32); c.mov(pos_next, (CPU.PC >> 2) + 1); c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); c.cmp(cpu_dword(GPR[rt]._u32[3]), 0); - c.cmovnz(*pos_var, pos_next); - //ConLog.Write("BRZ(rt=%d,i16=%d)", rt, i16); + c.cmovne(*pos_var, pos_next); + LOG_OPCODE(); } void STQA(u32 rt, s32 i16) { @@ -1711,45 +1764,50 @@ private: } void BRNZ(u32 rt, s32 i16) { + c.mov(cpu_qword(PC), (u32)CPU.PC); do_finalize = true; + GpVar pos_next(c, kVarTypeUInt32); c.mov(pos_next, (CPU.PC >> 2) + 1); c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); c.cmp(cpu_dword(GPR[rt]._u32[3]), 0); - c.cmovz(*pos_var, pos_next); - //ConLog.Write("BRNZ(rt=%d,i16=%d)", rt, i16); + c.cmove(*pos_var, pos_next); + LOG_OPCODE(); } void BRHZ(u32 rt, s32 i16) { + c.mov(cpu_qword(PC), (u32)CPU.PC); do_finalize = true; + GpVar pos_next(c, kVarTypeUInt32); c.mov(pos_next, (CPU.PC >> 2) + 1); c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); c.cmp(cpu_word(GPR[rt]._u16[6]), 0); c.cmovnz(*pos_var, pos_next); - ConLog.Write("BRHZ(rt=%d,i16=%d)", rt, i16); + LOG_OPCODE(); } void BRHNZ(u32 rt, s32 i16) { + c.mov(cpu_qword(PC), (u32)CPU.PC); do_finalize = true; + GpVar pos_next(c, kVarTypeUInt32); c.mov(pos_next, (CPU.PC >> 2) + 1); c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); c.cmp(cpu_word(GPR[rt]._u16[6]), 0); c.cmovz(*pos_var, pos_next); - ConLog.Write("BRHNZ(rt=%d,i16=%d)", rt, i16); + LOG_OPCODE(); } void STQR(u32 rt, s32 i16) { WRAPPER_BEGIN(rt, i16, PC, zz); - u32 lsa = branchTarget(PC, i16) & 0x3fff0; + u32 lsa = branchTarget(PC, (s32)i16) & 0x3fff0; if (!CPU.IsGoodLSA(lsa)) { ConLog.Error("STQR: bad lsa (0x%x)", lsa); Emu.Pause(); return; } - CPU.WriteLS128(lsa, CPU.GPR[rt]._u128); WRAPPER_END(rt, i16, CPU.PC, 0); /*u32 lsa = branchTarget(CPU.PC, i16) & 0x3fff0; @@ -1765,8 +1823,11 @@ private: } void BRA(s32 i16) { - UNIMPLEMENTED(); - CPU.SetBranch(branchTarget(0, i16)); + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + c.mov(*pos_var, branchTarget(0, i16) >> 2); + LOG_OPCODE(); } void LQA(u32 rt, s32 i16) { @@ -1784,16 +1845,24 @@ private: } void BRASL(u32 rt, s32 i16) { - UNIMPLEMENTED(); - CPU.GPR[rt].Reset(); - CPU.GPR[rt]._u32[3] = CPU.PC + 4; - CPU.SetBranch(branchTarget(0, i16)); + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + GpVar v0(c, kVarTypeUInt64); + c.xor_(v0, v0); + c.mov(cpu_qword(GPR[rt]._u64[1]), v0); + c.mov(cpu_qword(GPR[rt]._u64[0]), v0); + c.mov(cpu_dword(GPR[rt]._u32[3]), (u32)CPU.PC + 4); + c.mov(*pos_var, branchTarget(0, i16) >> 2); + LOG_OPCODE(); } void BR(s32 i16) { + c.mov(cpu_qword(PC), (u32)CPU.PC); do_finalize = true; + c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); - //ConLog.Write("BR(i16=%d)", i16); + LOG_OPCODE(); } void FSMBI(u32 rt, s32 i16) { @@ -1818,27 +1887,27 @@ private: } void BRSL(u32 rt, s32 i16) { + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + GpVar v0(c, kVarTypeUInt64); c.xor_(v0, v0); c.mov(cpu_qword(GPR[rt]._u64[1]), v0); c.mov(cpu_qword(GPR[rt]._u64[0]), v0); - c.mov(cpu_dword(GPR[rt]._u32[3]), CPU.PC + 4); - - do_finalize = true; + c.mov(cpu_dword(GPR[rt]._u32[3]), (u32)CPU.PC + 4); c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); - //ConLog.Write("BRSL(rt=%d,i16=%d)", rt, i16); + LOG_OPCODE(); } void LQR(u32 rt, s32 i16) { WRAPPER_BEGIN(rt, i16, PC, zz); - u32 lsa = branchTarget(PC, i16) & 0x3fff0; + u32 lsa = branchTarget(PC, (s32)i16) & 0x3fff0; if (!CPU.IsGoodLSA(lsa)) { ConLog.Error("LQR: bad lsa (0x%x)", lsa); Emu.Pause(); return; } - CPU.GPR[rt]._u128 = CPU.ReadLS128(lsa); WRAPPER_END(rt, i16, CPU.PC, 0); /*u32 lsa = branchTarget(CPU.PC, i16) & 0x3fff0; @@ -1858,7 +1927,7 @@ private: CPU.GPR[rt]._i32[0] = CPU.GPR[rt]._i32[1] = CPU.GPR[rt]._i32[2] = - CPU.GPR[rt]._i32[3] = i16; + CPU.GPR[rt]._i32[3] = (s32)i16; WRAPPER_END(rt, i16, 0, 0); /*XmmVar v0(c); if (i16 == 0) @@ -1879,7 +1948,7 @@ private: { WRAPPER_BEGIN(rt, i16, yy, zz); for (int w = 0; w < 4; w++) - CPU.GPR[rt]._i32[w] = i16 << 16; + CPU.GPR[rt]._i32[w] = (s32)i16 << 16; WRAPPER_END(rt, i16, 0, 0); /*XmmVar v0(c); if (i16 == 0) @@ -2013,10 +2082,10 @@ private: void AI(u32 rt, u32 ra, s32 i10) { WRAPPER_BEGIN(rt, ra, i10, zz); - CPU.GPR[rt]._i32[0] = CPU.GPR[ra]._i32[0] + i10; - CPU.GPR[rt]._i32[1] = CPU.GPR[ra]._i32[1] + i10; - CPU.GPR[rt]._i32[2] = CPU.GPR[ra]._i32[2] + i10; - CPU.GPR[rt]._i32[3] = CPU.GPR[ra]._i32[3] + i10; + CPU.GPR[rt]._i32[0] = CPU.GPR[ra]._i32[0] + (s32)i10; + CPU.GPR[rt]._i32[1] = CPU.GPR[ra]._i32[1] + (s32)i10; + CPU.GPR[rt]._i32[2] = CPU.GPR[ra]._i32[2] + (s32)i10; + CPU.GPR[rt]._i32[3] = CPU.GPR[ra]._i32[3] + (s32)i10; WRAPPER_END(rt, ra, i10, 0); /*XmmVar v0(c); if (i10 == 0) @@ -2050,13 +2119,14 @@ private: void STQD(u32 rt, s32 i10, u32 ra) //i10 is shifted left by 4 while decoding { WRAPPER_BEGIN(rt, i10, ra, zz); - const u32 lsa = (CPU.GPR[ra]._i32[3] + i10) & 0x3fff0; + const u32 lsa = (CPU.GPR[ra]._i32[3] + (s32)i10) & 0x3fff0; if (!CPU.IsGoodLSA(lsa)) { ConLog.Error("STQD: bad lsa (0x%x)", lsa); Emu.Pause(); return; } + //ConLog.Write("wrapper::STQD (lsa=0x%x): GPR[%d] (0x%llx%llx)", lsa, rt, CPU.GPR[rt]._u64[1], CPU.GPR[rt]._u64[0]); CPU.WriteLS128(lsa, CPU.GPR[rt]._u128); WRAPPER_END(rt, i10, ra, 0); /*GpVar lsa(c, kVarTypeUInt32); @@ -2076,7 +2146,7 @@ private: void LQD(u32 rt, s32 i10, u32 ra) //i10 is shifted left by 4 while decoding { WRAPPER_BEGIN(rt, i10, ra, zz); - const u32 lsa = (CPU.GPR[ra]._i32[3] + i10) & 0x3fff0; + const u32 lsa = (CPU.GPR[ra]._i32[3] + (s32)i10) & 0x3fff0; if (!CPU.IsGoodLSA(lsa)) { ConLog.Error("LQD: bad lsa (0x%x)", lsa); @@ -2145,14 +2215,19 @@ private: } void HGTI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); - if(CPU.GPR[ra]._i32[3] > i10) CPU.Stop(); + WRAPPER_BEGIN(rt, ra, i10, zz); + if(CPU.GPR[ra]._i32[3] > (s32)i10) CPU.Stop(); + WRAPPER_END(rt, ra, i10, 0); + c.mov(*pos_var, (CPU.PC >> 2) + 1); + do_finalize = true; } void CLGTI(u32 rt, u32 ra, s32 i10) { WRAPPER_BEGIN(rt, ra, i10, zz); - for (int w = 0; w < 4; w++) - CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._i32[w] > (s32)i10 ? 0xffffffff : 0; + for (u32 i = 0; i < 4; ++i) + { + CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._u32[i] > (u32)i10) ? 0xffffffff : 0x00000000; + } WRAPPER_END(rt, ra, i10, 0); /*XmmVar v0(c); if (i10 == -1) @@ -2182,7 +2257,7 @@ private: WRAPPER_BEGIN(rt, ra, i10, zz); for(u32 i = 0; i < 8; ++i) { - CPU.GPR[rt]._u16[i] = (CPU.GPR[ra]._u16[i] > (u16)(s32)i10) ? 0xffff : 0x0000; + CPU.GPR[rt]._u16[i] = (CPU.GPR[ra]._u16[i] > (u16)i10) ? 0xffff : 0x0000; } WRAPPER_END(rt, ra, i10, 0); } @@ -2195,8 +2270,11 @@ private: } void HLGTI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); if(CPU.GPR[ra]._u32[3] > (u32)i10) CPU.Stop(); + WRAPPER_END(rt, ra, i10, 0); + c.mov(*pos_var, (CPU.PC >> 2) + 1); + do_finalize = true; } void MPYI(u32 rt, u32 ra, s32 i10) { @@ -2236,20 +2314,22 @@ private: } void HEQI(u32 rt, u32 ra, s32 i10) { - // TODO - UNIMPLEMENTED(); - if(CPU.GPR[ra]._i32[3] == i10) CPU.Stop(); + WRAPPER_BEGIN(rt, ra, i10, zz); + if(CPU.GPR[ra]._i32[3] == (s32)i10) CPU.Stop(); + WRAPPER_END(rt, ra, i10, 0); + c.mov(*pos_var, (CPU.PC >> 2) + 1); + do_finalize = true; } //0 - 6 void HBRA(s32 ro, s32 i16) { //i16 is shifted left by 2 while decoding - //UNIMPLEMENTED(); + LOG_OPCODE(); } void HBRR(s32 ro, s32 i16) { - //UNIMPLEMENTED(); + LOG_OPCODE(); } void ILA(u32 rt, u32 i18) { @@ -2376,6 +2456,7 @@ private: void UNK(const std::string& err) { ConLog.Error(err + fmt::Format(" #pc: 0x%x", CPU.PC)); + c.mov(cpu_qword(PC), (u32)CPU.PC); do_finalize = true; Emu.Pause(); } diff --git a/rpcs3/Emu/Cell/SPURecompilerCore.cpp b/rpcs3/Emu/Cell/SPURecompilerCore.cpp index 940ef44a4a..e7ecf4755e 100644 --- a/rpcs3/Emu/Cell/SPURecompilerCore.cpp +++ b/rpcs3/Emu/Cell/SPURecompilerCore.cpp @@ -28,7 +28,8 @@ void SPURecompilerCore::Decode(const u32 code) // decode instruction and run wit void SPURecompilerCore::Compile(u16 pos) { compiler.addFunc(kFuncConvHost, FuncBuilder4()); - entry[pos].host = pos; + const u16 start = pos; + entry[start].count = 0; GpVar cpu_var(compiler, kVarTypeIntPtr, "cpu"); compiler.setArg(0, cpu_var); @@ -60,32 +61,32 @@ void SPURecompilerCore::Compile(u16 pos) if (opcode) { (*SPU_instr::rrr_list)(m_enc, opcode); // compile single opcode + entry[start].count++; } else { m_enc->do_finalize = true; } bool fin = m_enc->do_finalize; - entry[pos].valid = opcode; + entry[pos].valid = re(opcode); if (fin) break; CPU.PC += 4; pos++; - entry[pos].host = entry[pos - 1].host; } compiler.ret(pos_var); compiler.endFunc(); - entry[entry[pos].host].pointer = compiler.make(); + entry[start].pointer = compiler.make(); } u8 SPURecompilerCore::DecodeMemory(const u64 address) { - const u64 m_offset = address - CPU.PC; + const u64 m_offset = CPU.dmac.ls_offset; const u16 pos = (CPU.PC >> 2); //ConLog.Write("DecodeMemory: pos=%d", pos); - u32* ls = (u32*)Memory.VirtualToRealAddr(m_offset); + u32* ls = (u32*)&Memory[m_offset]; if (!pos) { @@ -98,7 +99,7 @@ u8 SPURecompilerCore::DecodeMemory(const u64 address) { // check data (hard way) bool is_valid = true; - for (u32 i = pos; i < entry[pos].count + pos; i++) + for (u32 i = pos; i < (u32)(entry[pos].count + pos); i++) { if (entry[i].valid != ls[i]) { @@ -110,6 +111,9 @@ u8 SPURecompilerCore::DecodeMemory(const u64 address) if (!is_valid) { // TODO + ConLog.Error("SPURecompilerCore::DecodeMemory(ls_addr=0x%x): code has changed", pos * sizeof(u32)); + Emu.Pause(); + return 0; } } @@ -117,6 +121,12 @@ u8 SPURecompilerCore::DecodeMemory(const u64 address) { // compile from current position to nearest dynamic or statically unresolved branch, zero data or something other Compile(pos); + if (entry[pos].valid == 0) + { + ConLog.Error("SPURecompilerCore::Compile(ls_addr=0x%x): branch to 0x0 opcode", pos * sizeof(u32)); + Emu.Pause(); + return 0; + } } if (!entry[pos].pointer) @@ -128,16 +138,23 @@ u8 SPURecompilerCore::DecodeMemory(const u64 address) // jump typedef u32(*Func)(void* _cpu, void* _ls, const SPUImmTable* _imm, u32 _pos); - Func func = asmjit_cast(entry[entry[pos].host].pointer); + Func func = asmjit_cast(entry[pos].pointer); void* cpu = (u8*)&CPU.GPR[0] - offsetof(SPUThread, GPR[0]); // ugly cpu base offset detection - u16 res = (pos == entry[pos].host) ? 0 : pos; - res = (u16)func(cpu, ls, &g_spu_imm, res); + u16 res = pos; + res = (u16)func(cpu, &Memory[m_offset], &g_spu_imm, res); - CPU.SetBranch((u64)res << 2); - - return 0; + LOG2_OPCODE("SPURecompilerCore::DecodeMemory(ls_addr=0x%x): NewPC = 0x%llx", address, (u64)res << 2); + if ((res - 1) == (CPU.PC >> 2)) + { + return 4; + } + else + { + CPU.SetBranch((u64)res << 2); + return 0; + } /*Decode(Memory.Read32(address)); return 4;*/ } \ No newline at end of file diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index a581130473..71d7bbc05e 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -300,19 +300,17 @@ public: #else static const bool x86 = true; #endif + private: union _CRT_ALIGN(8) { struct { volatile u32 m_index; u32 m_value[max_count]; }; - struct { - volatile u32 m_index2; - u16 m_val16[max_count * 2]; - }; volatile u64 m_indval; }; std::mutex m_lock; + public: Channel() { Init();