From 29d2ea7513fc18ddc700446114520a79af9fa05f Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 17 Apr 2014 03:28:21 +0400 Subject: [PATCH] Some bugs fixed --- asmjit | 2 +- rpcs3/Emu/Cell/SPUInterpreter.h | 49 +++++++++++++++------------- rpcs3/Emu/Cell/SPURecompiler.h | 41 ++++++++++++++++------- rpcs3/Emu/Cell/SPURecompilerCore.cpp | 27 +++++++++++++-- 4 files changed, 80 insertions(+), 39 deletions(-) diff --git a/asmjit b/asmjit index 906f89bfc5..6c50029aa0 160000 --- a/asmjit +++ b/asmjit @@ -1 +1 @@ -Subproject commit 906f89bfc59138f0e4c7c43551f16f8c43887572 +Subproject commit 6c50029aa0aa23722b3c4c507113afa04191e5df diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index 66f259e591..269fcd2043 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -20,7 +20,9 @@ unsigned char reg_h[20]; sha1((const unsigned char*)CPU.GPR, sizeof(CPU.GPR), reg_h); \ ConLog.Write("Mem hash: 0x%llx, reg hash: 0x%llx", *(u64*)mem_h, *(u64*)reg_h); -#define LOG2_OPCODE(...) // ConLog.Write(__FUNCTION__ "(): " __VA_ARGS__) +#define LOG2_OPCODE(...) //MEM_AND_REG_HASH(); ConLog.Write(__FUNCTION__ "(): " __VA_ARGS__) + +#define LOG5_OPCODE(...) /// class SPUInterpreter : public SPUOpcodes { @@ -41,6 +43,7 @@ private: void STOP(u32 code) { CPU.DoStop(code); + LOG2_OPCODE(); } void LNOP() { @@ -278,12 +281,12 @@ private: u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0); if (CPU.GPR[rt]._u32[3] == 0) { - LOG2_OPCODE("taken (0x%llx)", target); + LOG5_OPCODE("taken (0x%llx)", target); CPU.SetBranch(target); } else { - LOG2_OPCODE("not taken (0x%llx)", target); + LOG5_OPCODE("not taken (0x%llx)", target); } } void BINZ(u32 rt, u32 ra) @@ -291,12 +294,12 @@ private: u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0); if (CPU.GPR[rt]._u32[3] != 0) { - LOG2_OPCODE("taken (0x%llx)", target); + LOG5_OPCODE("taken (0x%llx)", target); CPU.SetBranch(target); } else { - LOG2_OPCODE("not taken (0x%llx)", target); + LOG5_OPCODE("not taken (0x%llx)", target); } } void BIHZ(u32 rt, u32 ra) @@ -304,12 +307,12 @@ private: u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0); if (CPU.GPR[rt]._u16[6] == 0) { - LOG2_OPCODE("taken (0x%llx)", target); + LOG5_OPCODE("taken (0x%llx)", target); CPU.SetBranch(target); } else { - LOG2_OPCODE("not taken (0x%llx)", target); + LOG5_OPCODE("not taken (0x%llx)", target); } } void BIHNZ(u32 rt, u32 ra) @@ -317,12 +320,12 @@ private: u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0); if (CPU.GPR[rt]._u16[6] != 0) { - LOG2_OPCODE("taken (0x%llx)", target); + LOG5_OPCODE("taken (0x%llx)", target); CPU.SetBranch(target); } else { - LOG2_OPCODE("not taken (0x%llx)", target); + LOG5_OPCODE("not taken (0x%llx)", target); } } void STOPD(u32 rc, u32 ra, u32 rb) @@ -345,7 +348,7 @@ private: void BI(u32 ra) { u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0); - LOG2_OPCODE("branch (0x%llx)", target); + LOG5_OPCODE("branch (0x%llx)", target); CPU.SetBranch(target); } void BISL(u32 rt, u32 ra) @@ -353,7 +356,7 @@ private: u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0); CPU.GPR[rt].Reset(); CPU.GPR[rt]._u32[3] = CPU.PC + 4; - LOG2_OPCODE("branch (0x%llx)", target); + LOG5_OPCODE("branch (0x%llx)", target); CPU.SetBranch(target); } void IRET(u32 ra) @@ -1094,12 +1097,12 @@ private: u64 target = branchTarget(CPU.PC, i16); if (CPU.GPR[rt]._u32[3] == 0) { - LOG2_OPCODE("taken (0x%llx)", target); + LOG5_OPCODE("taken (0x%llx)", target); CPU.SetBranch(target); } else { - LOG2_OPCODE("not taken (0x%llx)", target); + LOG5_OPCODE("not taken (0x%llx)", target); } } void STQA(u32 rt, s32 i16) @@ -1119,12 +1122,12 @@ private: u64 target = branchTarget(CPU.PC, i16); if (CPU.GPR[rt]._u32[3] != 0) { - LOG2_OPCODE("taken (0x%llx)", target); + LOG5_OPCODE("taken (0x%llx)", target); CPU.SetBranch(target); } else { - LOG2_OPCODE("not taken (0x%llx)", target); + LOG5_OPCODE("not taken (0x%llx)", target); } } void BRHZ(u32 rt, s32 i16) @@ -1132,12 +1135,12 @@ private: u64 target = branchTarget(CPU.PC, i16); if (CPU.GPR[rt]._u16[6] == 0) { - LOG2_OPCODE("taken (0x%llx)", target); + LOG5_OPCODE("taken (0x%llx)", target); CPU.SetBranch(target); } else { - LOG2_OPCODE("not taken (0x%llx)", target); + LOG5_OPCODE("not taken (0x%llx)", target); } } void BRHNZ(u32 rt, s32 i16) @@ -1145,12 +1148,12 @@ private: u64 target = branchTarget(CPU.PC, i16); if (CPU.GPR[rt]._u16[6] != 0) { - LOG2_OPCODE("taken (0x%llx)", target); + LOG5_OPCODE("taken (0x%llx)", target); CPU.SetBranch(target); } else { - LOG2_OPCODE("not taken (0x%llx)", target); + LOG5_OPCODE("not taken (0x%llx)", target); } } void STQR(u32 rt, s32 i16) @@ -1168,7 +1171,7 @@ private: void BRA(s32 i16) { u64 target = branchTarget(0, i16); - LOG2_OPCODE("branch (0x%llx)", target); + LOG5_OPCODE("branch (0x%llx)", target); CPU.SetBranch(target); } void LQA(u32 rt, s32 i16) @@ -1188,13 +1191,13 @@ private: u64 target = branchTarget(0, i16); CPU.GPR[rt].Reset(); CPU.GPR[rt]._u32[3] = CPU.PC + 4; - LOG2_OPCODE("branch (0x%llx)", target); + LOG5_OPCODE("branch (0x%llx)", target); CPU.SetBranch(target); } void BR(s32 i16) { u64 target = branchTarget(CPU.PC, i16); - LOG2_OPCODE("branch (0x%llx)", target); + LOG5_OPCODE("branch (0x%llx)", target); CPU.SetBranch(target); } void FSMBI(u32 rt, s32 i16) @@ -1218,7 +1221,7 @@ private: u64 target = branchTarget(CPU.PC, i16); CPU.GPR[rt].Reset(); CPU.GPR[rt]._u32[3] = CPU.PC + 4; - LOG2_OPCODE("branch (0x%llx)", target); + LOG5_OPCODE("branch (0x%llx)", target); CPU.SetBranch(target); } void LQR(u32 rt, s32 i16) diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index 99a0d66c0c..82468f002b 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -212,7 +212,6 @@ public: xmm_var[i].taken = true; xmm_var[i].got = false; LOG4_OPCODE("free reg taken (i=%d)", i); - xmm_var[i].reg = -1; return xmm_var[i]; } } @@ -220,12 +219,12 @@ public: { if (!xmm_var[i].taken) { - //(saving cached data?) + // (saving cached data?) //c.movaps(cpu_xmm(GPR[xmm_var[i].reg]), *xmm_var[i].data); xmm_var[i].taken = true; xmm_var[i].got = false; LOG4_OPCODE("cached reg taken (i=%d): GPR[%d] lost", i, xmm_var[i].reg); - xmm_var[i].reg = -1; + xmm_var[i].reg = -1; // ??? return xmm_var[i]; } } @@ -245,7 +244,7 @@ public: if (xmm_var[i].taken) throw "XmmGet(): xmm_var is taken"; xmm_var[i].taken = true; xmm_var[i].got = false; - xmm_var[i].reg = -1; + //xmm_var[i].reg = -1; for (u32 j = i + 1; j < 16; j++) { if (xmm_var[j].reg == reg) throw "XmmGet(): xmm_var duplicate"; @@ -258,7 +257,7 @@ public: { res = &(XmmLink&)XmmAlloc(); c.movaps(*res->data, cpu_xmm(GPR[reg])); - res->reg = -1; + res->reg = -1; // ??? LOG4_OPCODE("* cached GPR[%d] not found", reg); } return *res; @@ -268,7 +267,7 @@ public: { XmmLink* res = &(XmmLink&)XmmAlloc(); c.movaps(*res->data, *from.data); - res->reg = -1; + res->reg = -1; // ??? LOG4_OPCODE("*"); return *res; } @@ -329,7 +328,7 @@ public: } LOG4_OPCODE("GPR[%d] finalized (i=%d), GPR[%d] replaced", reg, i, xmm_var[i].reg); // (to disable caching:) - reg = -1; + //reg = -1; xmm_var[i].reg = reg; xmm_var[i].taken = false; return; @@ -589,7 +588,9 @@ private: WRAPPER_END(rt, ra, rb, 0); // AVX2: masking with 0x3f + VPSLLVD may be better - /*for (u32 i = 0; i < 4; i++) + /*XmmInvalidate(rt); + + for (u32 i = 0; i < 4; i++) { GpVar v0(c, kVarTypeUInt32); c.mov(v0, cpu_dword(GPR[ra]._u32[i])); @@ -861,7 +862,9 @@ private: WRAPPER_END(ra, rt, 0, 0); // TODO - /*GpVar v(c, kVarTypeUInt32); + /*XmmInvalidate(rt); + + GpVar v(c, kVarTypeUInt32); c.mov(v, cpu_dword(GPR[rt]._u32[3])); switch (ra) { @@ -977,6 +980,8 @@ private: } void BISL(u32 rt, u32 ra) { + XmmInvalidate(rt); + c.mov(cpu_qword(PC), (u32)CPU.PC); do_finalize = true; @@ -1084,6 +1089,8 @@ private: } void LQX(u32 rt, u32 ra, u32 rb) { + XmmInvalidate(rt); + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); if (ra == rb) { @@ -2223,6 +2230,8 @@ private: } void LQA(u32 rt, s32 i16) { + XmmInvalidate(rt); + const u32 lsa = (i16 << 2) & 0x3fff0; c.mov(*qw0, qword_ptr(*ls_var, lsa)); c.mov(*qw1, qword_ptr(*ls_var, lsa + 8)); @@ -2234,6 +2243,8 @@ private: } void BRASL(u32 rt, s32 i16) { + XmmInvalidate(rt); + c.mov(cpu_qword(PC), (u32)CPU.PC); do_finalize = true; @@ -2272,6 +2283,8 @@ private: } void BRSL(u32 rt, s32 i16) { + XmmInvalidate(rt); + c.mov(cpu_qword(PC), (u32)CPU.PC); do_finalize = true; @@ -2285,6 +2298,8 @@ private: } void LQR(u32 rt, s32 i16) { + XmmInvalidate(rt); + const u32 lsa = branchTarget(CPU.PC, i16) & 0x3fff0; c.mov(*qw0, qword_ptr(*ls_var, lsa)); c.mov(*qw1, qword_ptr(*ls_var, lsa + 8)); @@ -2303,7 +2318,7 @@ private: } else if (i16 == -1) { - c.cmpps(vr.get(), vr.get(), 0); + c.pcmpeqd(vr.get(), vr.get()); } else { @@ -2321,7 +2336,7 @@ private: } else if (i16 == -1) { - c.cmpps(vr.get(), vr.get(), 0); + c.pcmpeqd(vr.get(), vr.get()); c.pslld(vr.get(), 16); } else @@ -2362,7 +2377,7 @@ private: { // fill with 1 const XmmLink& v1 = XmmAlloc(); - c.cmpps(v1.get(), v1.get(), 0); + c.pcmpeqd(v1.get(), v1.get()); XmmFinalize(v1, rt); } else if (i10 == 0) @@ -2515,6 +2530,8 @@ private: } void LQD(u32 rt, s32 i10, u32 ra) // i10 is shifted left by 4 while decoding { + XmmInvalidate(rt); + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); if (i10) c.add(*addr, i10); c.and_(*addr, 0x3fff0); diff --git a/rpcs3/Emu/Cell/SPURecompilerCore.cpp b/rpcs3/Emu/Cell/SPURecompilerCore.cpp index a5519a9342..35f784d857 100644 --- a/rpcs3/Emu/Cell/SPURecompilerCore.cpp +++ b/rpcs3/Emu/Cell/SPURecompilerCore.cpp @@ -144,6 +144,7 @@ void SPURecompilerCore::Compile(u16 pos) u8 SPURecompilerCore::DecodeMemory(const u64 address) { + assert(CPU.dmac.ls_offset == address - CPU.PC); const u64 m_offset = CPU.dmac.ls_offset; const u16 pos = (CPU.PC >> 2); @@ -179,10 +180,11 @@ u8 SPURecompilerCore::DecodeMemory(const u64 address) } } + bool did_compile = false; if (!entry[pos].pointer) { - // compile from current position to nearest dynamic or statically unresolved branch, zero data or something other Compile(pos); + did_compile = true; if (entry[pos].valid == 0) { ConLog.Error("SPURecompilerCore::Compile(ls_addr=0x%x): branch to 0x0 opcode", pos * sizeof(u32)); @@ -197,17 +199,36 @@ u8 SPURecompilerCore::DecodeMemory(const u64 address) Emu.Pause(); return 0; } - // jump + typedef u32(*Func)(void* _cpu, void* _ls, const SPUImmTable* _imm, u32 _pos); Func func = asmjit_cast(entry[pos].pointer); void* cpu = (u8*)&CPU.GPR[0] - offsetof(SPUThread, GPR[0]); // ugly cpu base offset detection + //if (did_compile) + { + //LOG2_OPCODE("SPURecompilerCore::DecodeMemory(ls_addr=0x%x): NewPC = 0x%llx", address, (u64)res << 2); + //if (pos == 0x19c >> 2) + { + //Emu.Pause(); + //for (uint i = 0; i < 128; ++i) ConLog.Write("r%d = 0x%s", i, CPU.GPR[i].ToString().c_str()); + } + } + u16 res = pos; res = (u16)func(cpu, &Memory[m_offset], &g_spu_imm, res); - LOG2_OPCODE("SPURecompilerCore::DecodeMemory(ls_addr=0x%x): NewPC = 0x%llx", address, (u64)res << 2); + if (did_compile) + { + //LOG2_OPCODE("SPURecompilerCore::DecodeMemory(ls_addr=0x%x): NewPC = 0x%llx", address, (u64)res << 2); + //if (pos == 0x340 >> 2) + { + //Emu.Pause(); + //for (uint i = 0; i < 128; ++i) ConLog.Write("r%d = 0x%s", i, CPU.GPR[i].ToString().c_str()); + } + } + if ((res - 1) == (CPU.PC >> 2)) { return 4;