SPU JIT fix

Nothing changed in interpreter.
This commit is contained in:
Nekotekina 2014-04-07 17:06:13 +04:00
commit f9b68bc012
4 changed files with 280 additions and 105 deletions

View file

@ -4,6 +4,7 @@
#include "Emu/Memory/Memory.h" #include "Emu/Memory/Memory.h"
#include "Emu/Cell/SPUThread.h" #include "Emu/Cell/SPUThread.h"
#include "Emu/SysCalls/SysCalls.h" #include "Emu/SysCalls/SysCalls.h"
#include "Crypto/sha1.h"
#define UNIMPLEMENTED() UNK(__FUNCTION__) #define UNIMPLEMENTED() UNK(__FUNCTION__)
@ -14,6 +15,8 @@
__m128d m128d; __m128d m128d;
} __u32x4; */ } __u32x4; */
#define LOG2_OPCODE(...) //unsigned char cs[20]; sha1(&Memory[CPU.dmac.ls_offset], 256*1024, cs); ConLog.Write("Mem Dump: 0x%llx", *(u64*)cs); ConLog.Write(__FUNCTION__ "(): " __VA_ARGS__)
class SPUInterpreter : public SPUOpcodes class SPUInterpreter : public SPUOpcodes
{ {
private: private:
@ -267,23 +270,55 @@ private:
} }
void BIZ(u32 rt, u32 ra) void BIZ(u32 rt, u32 ra)
{ {
if(CPU.GPR[rt]._u32[3] == 0) u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0);
CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); if (CPU.GPR[rt]._u32[3] == 0)
{
LOG2_OPCODE("taken (0x%llx)", target);
CPU.SetBranch(target);
}
else
{
LOG2_OPCODE("not taken (0x%llx)", target);
}
} }
void BINZ(u32 rt, u32 ra) void BINZ(u32 rt, u32 ra)
{ {
if(CPU.GPR[rt]._u32[3] != 0) u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0);
CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); if (CPU.GPR[rt]._u32[3] != 0)
{
LOG2_OPCODE("taken (0x%llx)", target);
CPU.SetBranch(target);
}
else
{
LOG2_OPCODE("not taken (0x%llx)", target);
}
} }
void BIHZ(u32 rt, u32 ra) void BIHZ(u32 rt, u32 ra)
{ {
if(CPU.GPR[rt]._u16[6] == 0) u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0);
CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); if (CPU.GPR[rt]._u16[6] == 0)
{
LOG2_OPCODE("taken (0x%llx)", target);
CPU.SetBranch(target);
}
else
{
LOG2_OPCODE("not taken (0x%llx)", target);
}
} }
void BIHNZ(u32 rt, u32 ra) void BIHNZ(u32 rt, u32 ra)
{ {
if(CPU.GPR[rt]._u16[6] != 0) u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0);
CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); if (CPU.GPR[rt]._u16[6] != 0)
{
LOG2_OPCODE("taken (0x%llx)", target);
CPU.SetBranch(target);
}
else
{
LOG2_OPCODE("not taken (0x%llx)", target);
}
} }
void STOPD(u32 rc, u32 ra, u32 rb) void STOPD(u32 rc, u32 ra, u32 rb)
{ {
@ -304,14 +339,17 @@ private:
} }
void BI(u32 ra) void BI(u32 ra)
{ {
CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0);
LOG2_OPCODE("branch (0x%llx)", target);
CPU.SetBranch(target);
} }
void BISL(u32 rt, u32 ra) void BISL(u32 rt, u32 ra)
{ {
const u32 NewPC = CPU.GPR[ra]._u32[3]; u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0);
CPU.GPR[rt].Reset(); CPU.GPR[rt].Reset();
CPU.GPR[rt]._u32[3] = CPU.PC + 4; CPU.GPR[rt]._u32[3] = CPU.PC + 4;
CPU.SetBranch(branchTarget(NewPC, 0)); LOG2_OPCODE("branch (0x%llx)", target);
CPU.SetBranch(target);
} }
void IRET(u32 ra) void IRET(u32 ra)
{ {
@ -1048,8 +1086,16 @@ private:
//0 - 8 //0 - 8
void BRZ(u32 rt, s32 i16) void BRZ(u32 rt, s32 i16)
{ {
u64 target = branchTarget(CPU.PC, i16);
if (CPU.GPR[rt]._u32[3] == 0) if (CPU.GPR[rt]._u32[3] == 0)
CPU.SetBranch(branchTarget(CPU.PC, i16)); {
LOG2_OPCODE("taken (0x%llx)", target);
CPU.SetBranch(target);
}
else
{
LOG2_OPCODE("not taken (0x%llx)", target);
}
} }
void STQA(u32 rt, s32 i16) void STQA(u32 rt, s32 i16)
{ {
@ -1065,18 +1111,42 @@ private:
} }
void BRNZ(u32 rt, s32 i16) void BRNZ(u32 rt, s32 i16)
{ {
u64 target = branchTarget(CPU.PC, i16);
if (CPU.GPR[rt]._u32[3] != 0) if (CPU.GPR[rt]._u32[3] != 0)
CPU.SetBranch(branchTarget(CPU.PC, i16)); {
LOG2_OPCODE("taken (0x%llx)", target);
CPU.SetBranch(target);
}
else
{
LOG2_OPCODE("not taken (0x%llx)", target);
}
} }
void BRHZ(u32 rt, s32 i16) void BRHZ(u32 rt, s32 i16)
{ {
if (CPU.GPR[rt]._u16[6] == 0) u64 target = branchTarget(CPU.PC, i16);
CPU.SetBranch(branchTarget(CPU.PC, i16)); if (CPU.GPR[rt]._u16[6] == 0)
{
LOG2_OPCODE("taken (0x%llx)", target);
CPU.SetBranch(target);
}
else
{
LOG2_OPCODE("not taken (0x%llx)", target);
}
} }
void BRHNZ(u32 rt, s32 i16) void BRHNZ(u32 rt, s32 i16)
{ {
if (CPU.GPR[rt]._u16[6] != 0) u64 target = branchTarget(CPU.PC, i16);
CPU.SetBranch(branchTarget(CPU.PC, i16)); if (CPU.GPR[rt]._u16[6] != 0)
{
LOG2_OPCODE("taken (0x%llx)", target);
CPU.SetBranch(target);
}
else
{
LOG2_OPCODE("not taken (0x%llx)", target);
}
} }
void STQR(u32 rt, s32 i16) void STQR(u32 rt, s32 i16)
{ {
@ -1092,7 +1162,9 @@ private:
} }
void BRA(s32 i16) void BRA(s32 i16)
{ {
CPU.SetBranch(branchTarget(0, i16)); u64 target = branchTarget(0, i16);
LOG2_OPCODE("branch (0x%llx)", target);
CPU.SetBranch(target);
} }
void LQA(u32 rt, s32 i16) void LQA(u32 rt, s32 i16)
{ {
@ -1108,13 +1180,17 @@ private:
} }
void BRASL(u32 rt, s32 i16) void BRASL(u32 rt, s32 i16)
{ {
u64 target = branchTarget(0, i16);
CPU.GPR[rt].Reset(); CPU.GPR[rt].Reset();
CPU.GPR[rt]._u32[3] = CPU.PC + 4; CPU.GPR[rt]._u32[3] = CPU.PC + 4;
CPU.SetBranch(branchTarget(0, i16)); LOG2_OPCODE("branch (0x%llx)", target);
CPU.SetBranch(target);
} }
void BR(s32 i16) void BR(s32 i16)
{ {
CPU.SetBranch(branchTarget(CPU.PC, i16)); u64 target = branchTarget(CPU.PC, i16);
LOG2_OPCODE("branch (0x%llx)", target);
CPU.SetBranch(target);
} }
void FSMBI(u32 rt, s32 i16) void FSMBI(u32 rt, s32 i16)
{ {
@ -1134,9 +1210,11 @@ private:
} }
void BRSL(u32 rt, s32 i16) void BRSL(u32 rt, s32 i16)
{ {
u64 target = branchTarget(CPU.PC, i16);
CPU.GPR[rt].Reset(); CPU.GPR[rt].Reset();
CPU.GPR[rt]._u32[3] = CPU.PC + 4; CPU.GPR[rt]._u32[3] = CPU.PC + 4;
CPU.SetBranch(branchTarget(CPU.PC, i16)); LOG2_OPCODE("branch (0x%llx)", target);
CPU.SetBranch(target);
} }
void LQR(u32 rt, s32 i16) void LQR(u32 rt, s32 i16)
{ {
@ -1236,6 +1314,7 @@ private:
Emu.Pause(); Emu.Pause();
return; return;
} }
//ConLog.Write("STQD(lsa=0x%x): GPR[%d] (0x%llx%llx)", lsa, rt, CPU.GPR[rt]._u64[1], CPU.GPR[rt]._u64[0]);
CPU.WriteLS128(lsa, CPU.GPR[rt]._u128); CPU.WriteLS128(lsa, CPU.GPR[rt]._u128);
} }
void LQD(u32 rt, s32 i10, u32 ra) //i10 is shifted left by 4 while decoding void LQD(u32 rt, s32 i10, u32 ra) //i10 is shifted left by 4 while decoding

View file

@ -87,7 +87,7 @@ public:
struct SPURecEntry struct SPURecEntry
{ {
u16 host; // absolute position of first instruction of current block //u16 host; // absolute position of first instruction of current block (not used now)
u16 count; // count of instructions compiled from current point (and to be checked) u16 count; // count of instructions compiled from current point (and to be checked)
u32 valid; // copy of valid opcode for validation u32 valid; // copy of valid opcode for validation
void* pointer; // pointer to executable memory object void* pointer; // pointer to executable memory object
@ -114,6 +114,8 @@ public:
#define imm_xmm(x) oword_ptr(*imm_var, offsetof(SPUImmTable, x)) #define imm_xmm(x) oword_ptr(*imm_var, offsetof(SPUImmTable, x))
#define LOG_OPCODE(...) //ConLog.Write(__FUNCTION__ "()" __VA_ARGS__)
#define WRAPPER_BEGIN(a0, a1, a2, a3) struct opcode_wrapper \ #define WRAPPER_BEGIN(a0, a1, a2, a3) struct opcode_wrapper \
{ \ { \
static void opcode(u32 a0, u32 a1, u32 a2, u32 a3) \ static void opcode(u32 a0, u32 a1, u32 a2, u32 a3) \
@ -122,11 +124,13 @@ public:
#define WRAPPER_END(a0, a1, a2, a3) } \ #define WRAPPER_END(a0, a1, a2, a3) } \
}; \ }; \
c.mov(cpu_qword(PC), (u32)CPU.PC); \
X86X64CallNode* call = c.call(imm_ptr(&opcode_wrapper::opcode), kFuncConvHost, FuncBuilder4<void, u32, u32, u32, u32>()); \ X86X64CallNode* call = c.call(imm_ptr(&opcode_wrapper::opcode), kFuncConvHost, FuncBuilder4<void, u32, u32, u32, u32>()); \
call->setArg(0, imm_u(a0)); \ call->setArg(0, imm_u(a0)); \
call->setArg(1, imm_u(a1)); \ call->setArg(1, imm_u(a1)); \
call->setArg(2, imm_u(a2)); \ call->setArg(2, imm_u(a2)); \
call->setArg(3, imm_u(a3)); call->setArg(3, imm_u(a3)); \
LOG_OPCODE();
class SPURecompiler : public SPUOpcodes class SPURecompiler : public SPUOpcodes
@ -156,26 +160,31 @@ private:
WRAPPER_END(code, 0, 0, 0); WRAPPER_END(code, 0, 0, 0);
c.mov(*pos_var, (CPU.PC >> 2) + 1); c.mov(*pos_var, (CPU.PC >> 2) + 1);
do_finalize = true; do_finalize = true;
ConLog.Write("STOP(code=%d)", code);
} }
void LNOP() void LNOP()
{ {
/*c.mov(*pos_var, (CPU.PC >> 2) + 1); c.mov(cpu_qword(PC), (u32)CPU.PC);
/*
do_finalize = true; do_finalize = true;
ConLog.Write("LNOP()");*/ c.mov(*pos_var, (CPU.PC >> 2) + 1);
*/
LOG_OPCODE();
} }
void SYNC(u32 Cbit) void SYNC(u32 Cbit)
{ {
c.mov(cpu_qword(PC), (u32)CPU.PC);
// This instruction must be used following a store instruction that modifies the instruction stream. // This instruction must be used following a store instruction that modifies the instruction stream.
c.mfence(); c.mfence();
c.mov(*pos_var, (CPU.PC >> 2) + 1); c.mov(*pos_var, (CPU.PC >> 2) + 1);
do_finalize = true; do_finalize = true;
ConLog.Write("SYNC()"); LOG_OPCODE();
} }
void DSYNC() void DSYNC()
{ {
c.mov(cpu_qword(PC), (u32)CPU.PC);
// This instruction forces all earlier load, store, and channel instructions to complete before proceeding. // This instruction forces all earlier load, store, and channel instructions to complete before proceeding.
c.mfence(); c.mfence();
LOG_OPCODE();
} }
void MFSPR(u32 rt, u32 sa) void MFSPR(u32 rt, u32 sa)
{ {
@ -563,27 +572,55 @@ private:
} }
void BIZ(u32 rt, u32 ra) void BIZ(u32 rt, u32 ra)
{ {
UNIMPLEMENTED(); c.mov(cpu_qword(PC), (u32)CPU.PC);
if(CPU.GPR[rt]._u32[3] == 0) do_finalize = true;
CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0));
GpVar pos_next(c, kVarTypeUInt32);
c.mov(pos_next, (u32)CPU.PC + 4);
c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
c.cmp(cpu_dword(GPR[rt]._u32[3]), 0);
c.cmovne(*pos_var, pos_next);
c.shr(*pos_var, 2);
LOG_OPCODE();
} }
void BINZ(u32 rt, u32 ra) void BINZ(u32 rt, u32 ra)
{ {
UNIMPLEMENTED(); c.mov(cpu_qword(PC), (u32)CPU.PC);
if(CPU.GPR[rt]._u32[3] != 0) do_finalize = true;
CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0));
GpVar pos_next(c, kVarTypeUInt32);
c.mov(pos_next, (u32)CPU.PC + 4);
c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
c.cmp(cpu_dword(GPR[rt]._u32[3]), 0);
c.cmove(*pos_var, pos_next);
c.shr(*pos_var, 2);
LOG_OPCODE();
} }
void BIHZ(u32 rt, u32 ra) void BIHZ(u32 rt, u32 ra)
{ {
UNIMPLEMENTED(); c.mov(cpu_qword(PC), (u32)CPU.PC);
if(CPU.GPR[rt]._u16[6] == 0) do_finalize = true;
CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0));
GpVar pos_next(c, kVarTypeUInt32);
c.mov(pos_next, (u32)CPU.PC + 4);
c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
c.cmp(cpu_word(GPR[rt]._u16[6]), 0);
c.cmovne(*pos_var, pos_next);
c.shr(*pos_var, 2);
LOG_OPCODE();
} }
void BIHNZ(u32 rt, u32 ra) void BIHNZ(u32 rt, u32 ra)
{ {
UNIMPLEMENTED(); c.mov(cpu_qword(PC), (u32)CPU.PC);
if(CPU.GPR[rt]._u16[6] != 0) do_finalize = true;
CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0));
GpVar pos_next(c, kVarTypeUInt32);
c.mov(pos_next, (u32)CPU.PC + 4);
c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
c.cmp(cpu_word(GPR[rt]._u16[6]), 0);
c.cmove(*pos_var, pos_next);
c.shr(*pos_var, 2);
LOG_OPCODE();
} }
void STOPD(u32 rc, u32 ra, u32 rb) void STOPD(u32 rc, u32 ra, u32 rb)
{ {
@ -606,23 +643,26 @@ private:
} }
void BI(u32 ra) void BI(u32 ra)
{ {
c.mov(cpu_qword(PC), (u32)CPU.PC);
do_finalize = true; do_finalize = true;
c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
c.shr(*pos_var, 2); c.shr(*pos_var, 2);
//ConLog.Write("BI(ra=%d)", ra); LOG_OPCODE();
} }
void BISL(u32 rt, u32 ra) void BISL(u32 rt, u32 ra)
{ {
c.mov(cpu_qword(PC), (u32)CPU.PC);
do_finalize = true; do_finalize = true;
c.int3();
c.xor_(*pos_var, *pos_var); c.xor_(*pos_var, *pos_var);
c.mov(cpu_dword(GPR[rt]._u32[0]), *pos_var); c.mov(cpu_dword(GPR[rt]._u32[0]), *pos_var);
c.mov(cpu_dword(GPR[rt]._u32[1]), *pos_var); c.mov(cpu_dword(GPR[rt]._u32[1]), *pos_var);
c.mov(cpu_dword(GPR[rt]._u32[2]), *pos_var); c.mov(cpu_dword(GPR[rt]._u32[2]), *pos_var);
c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
c.mov(cpu_dword(GPR[rt]._u32[3]), (CPU.PC >> 2) + 1); c.mov(cpu_dword(GPR[rt]._u32[3]), (u32)CPU.PC + 4);
c.shr(*pos_var, 2); c.shr(*pos_var, 2);
ConLog.Write("BISL(rt=%d,ra=%d)", rt, ra); LOG_OPCODE();
} }
void IRET(u32 ra) void IRET(u32 ra)
{ {
@ -635,6 +675,7 @@ private:
} }
void HBR(u32 p, u32 ro, u32 ra) void HBR(u32 p, u32 ro, u32 ra)
{ {
LOG_OPCODE();
} }
void GB(u32 rt, u32 ra) void GB(u32 rt, u32 ra)
{ {
@ -885,7 +926,7 @@ private:
void CHD(u32 rt, u32 ra, s32 i7) void CHD(u32 rt, u32 ra, s32 i7)
{ {
WRAPPER_BEGIN(rt, ra, i7, zz); WRAPPER_BEGIN(rt, ra, i7, zz);
const int t = (CPU.GPR[ra]._u32[3] + i7) & 0xE; const int t = (CPU.GPR[ra]._u32[3] + (s32)i7) & 0xE;
CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F;
CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617;
@ -895,7 +936,7 @@ private:
void CWD(u32 rt, u32 ra, s32 i7) void CWD(u32 rt, u32 ra, s32 i7)
{ {
WRAPPER_BEGIN(rt, ra, i7, zz); WRAPPER_BEGIN(rt, ra, i7, zz);
const int t = (CPU.GPR[ra]._u32[3] + i7) & 0xC; const int t = (CPU.GPR[ra]._u32[3] + (s32)i7) & 0xC;
CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F;
CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617;
@ -1045,6 +1086,7 @@ private:
} }
void NOP(u32 rt) void NOP(u32 rt)
{ {
LOG_OPCODE();
} }
void CGT(u32 rt, u32 ra, u32 rb) void CGT(u32 rt, u32 ra, u32 rb)
{ {
@ -1096,8 +1138,11 @@ private:
//HGT uses signed values. HLGT uses unsigned values //HGT uses signed values. HLGT uses unsigned values
void HGT(u32 rt, s32 ra, s32 rb) void HGT(u32 rt, s32 ra, s32 rb)
{ {
UNIMPLEMENTED(); WRAPPER_BEGIN(rt, ra, rb, zz);
if(CPU.GPR[ra]._i32[3] > CPU.GPR[rb]._i32[3]) CPU.Stop(); if(CPU.GPR[ra]._i32[3] > CPU.GPR[rb]._i32[3]) CPU.Stop();
WRAPPER_END(rt, ra, rb, 0);
c.mov(*pos_var, (CPU.PC >> 2) + 1);
do_finalize = true;
} }
void CLZ(u32 rt, u32 ra) void CLZ(u32 rt, u32 ra)
{ {
@ -1287,8 +1332,11 @@ private:
} }
void HLGT(u32 rt, u32 ra, u32 rb) void HLGT(u32 rt, u32 ra, u32 rb)
{ {
UNIMPLEMENTED(); WRAPPER_BEGIN(rt, ra, rb, zz);
if(CPU.GPR[ra]._u32[3] > CPU.GPR[rb]._u32[3]) CPU.Stop(); if(CPU.GPR[ra]._u32[3] > CPU.GPR[rb]._u32[3]) CPU.Stop();
WRAPPER_END(rt, ra, rb, 0);
c.mov(*pos_var, (CPU.PC >> 2) + 1);
do_finalize = true;
} }
void DFMA(u32 rt, u32 ra, u32 rb) void DFMA(u32 rt, u32 ra, u32 rb)
{ {
@ -1567,8 +1615,11 @@ private:
} }
void HEQ(u32 rt, u32 ra, u32 rb) void HEQ(u32 rt, u32 ra, u32 rb)
{ {
UNIMPLEMENTED(); WRAPPER_BEGIN(rt, ra, rb, zz);
if(CPU.GPR[ra]._i32[3] == CPU.GPR[rb]._i32[3]) CPU.Stop(); if(CPU.GPR[ra]._i32[3] == CPU.GPR[rb]._i32[3]) CPU.Stop();
WRAPPER_END(rt, ra, rb, 0);
c.mov(*pos_var, (CPU.PC >> 2) + 1);
do_finalize = true;
} }
//0 - 9 //0 - 9
@ -1687,13 +1738,15 @@ private:
//0 - 8 //0 - 8
void BRZ(u32 rt, s32 i16) void BRZ(u32 rt, s32 i16)
{ {
c.mov(cpu_qword(PC), (u32)CPU.PC);
do_finalize = true; do_finalize = true;
GpVar pos_next(c, kVarTypeUInt32); GpVar pos_next(c, kVarTypeUInt32);
c.mov(pos_next, (CPU.PC >> 2) + 1); c.mov(pos_next, (CPU.PC >> 2) + 1);
c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2);
c.cmp(cpu_dword(GPR[rt]._u32[3]), 0); c.cmp(cpu_dword(GPR[rt]._u32[3]), 0);
c.cmovnz(*pos_var, pos_next); c.cmovne(*pos_var, pos_next);
//ConLog.Write("BRZ(rt=%d,i16=%d)", rt, i16); LOG_OPCODE();
} }
void STQA(u32 rt, s32 i16) void STQA(u32 rt, s32 i16)
{ {
@ -1711,45 +1764,50 @@ private:
} }
void BRNZ(u32 rt, s32 i16) void BRNZ(u32 rt, s32 i16)
{ {
c.mov(cpu_qword(PC), (u32)CPU.PC);
do_finalize = true; do_finalize = true;
GpVar pos_next(c, kVarTypeUInt32); GpVar pos_next(c, kVarTypeUInt32);
c.mov(pos_next, (CPU.PC >> 2) + 1); c.mov(pos_next, (CPU.PC >> 2) + 1);
c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2);
c.cmp(cpu_dword(GPR[rt]._u32[3]), 0); c.cmp(cpu_dword(GPR[rt]._u32[3]), 0);
c.cmovz(*pos_var, pos_next); c.cmove(*pos_var, pos_next);
//ConLog.Write("BRNZ(rt=%d,i16=%d)", rt, i16); LOG_OPCODE();
} }
void BRHZ(u32 rt, s32 i16) void BRHZ(u32 rt, s32 i16)
{ {
c.mov(cpu_qword(PC), (u32)CPU.PC);
do_finalize = true; do_finalize = true;
GpVar pos_next(c, kVarTypeUInt32); GpVar pos_next(c, kVarTypeUInt32);
c.mov(pos_next, (CPU.PC >> 2) + 1); c.mov(pos_next, (CPU.PC >> 2) + 1);
c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2);
c.cmp(cpu_word(GPR[rt]._u16[6]), 0); c.cmp(cpu_word(GPR[rt]._u16[6]), 0);
c.cmovnz(*pos_var, pos_next); c.cmovnz(*pos_var, pos_next);
ConLog.Write("BRHZ(rt=%d,i16=%d)", rt, i16); LOG_OPCODE();
} }
void BRHNZ(u32 rt, s32 i16) void BRHNZ(u32 rt, s32 i16)
{ {
c.mov(cpu_qword(PC), (u32)CPU.PC);
do_finalize = true; do_finalize = true;
GpVar pos_next(c, kVarTypeUInt32); GpVar pos_next(c, kVarTypeUInt32);
c.mov(pos_next, (CPU.PC >> 2) + 1); c.mov(pos_next, (CPU.PC >> 2) + 1);
c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2);
c.cmp(cpu_word(GPR[rt]._u16[6]), 0); c.cmp(cpu_word(GPR[rt]._u16[6]), 0);
c.cmovz(*pos_var, pos_next); c.cmovz(*pos_var, pos_next);
ConLog.Write("BRHNZ(rt=%d,i16=%d)", rt, i16); LOG_OPCODE();
} }
void STQR(u32 rt, s32 i16) void STQR(u32 rt, s32 i16)
{ {
WRAPPER_BEGIN(rt, i16, PC, zz); WRAPPER_BEGIN(rt, i16, PC, zz);
u32 lsa = branchTarget(PC, i16) & 0x3fff0; u32 lsa = branchTarget(PC, (s32)i16) & 0x3fff0;
if (!CPU.IsGoodLSA(lsa)) if (!CPU.IsGoodLSA(lsa))
{ {
ConLog.Error("STQR: bad lsa (0x%x)", lsa); ConLog.Error("STQR: bad lsa (0x%x)", lsa);
Emu.Pause(); Emu.Pause();
return; return;
} }
CPU.WriteLS128(lsa, CPU.GPR[rt]._u128); CPU.WriteLS128(lsa, CPU.GPR[rt]._u128);
WRAPPER_END(rt, i16, CPU.PC, 0); WRAPPER_END(rt, i16, CPU.PC, 0);
/*u32 lsa = branchTarget(CPU.PC, i16) & 0x3fff0; /*u32 lsa = branchTarget(CPU.PC, i16) & 0x3fff0;
@ -1765,8 +1823,11 @@ private:
} }
void BRA(s32 i16) void BRA(s32 i16)
{ {
UNIMPLEMENTED(); c.mov(cpu_qword(PC), (u32)CPU.PC);
CPU.SetBranch(branchTarget(0, i16)); do_finalize = true;
c.mov(*pos_var, branchTarget(0, i16) >> 2);
LOG_OPCODE();
} }
void LQA(u32 rt, s32 i16) void LQA(u32 rt, s32 i16)
{ {
@ -1784,16 +1845,24 @@ private:
} }
void BRASL(u32 rt, s32 i16) void BRASL(u32 rt, s32 i16)
{ {
UNIMPLEMENTED(); c.mov(cpu_qword(PC), (u32)CPU.PC);
CPU.GPR[rt].Reset(); do_finalize = true;
CPU.GPR[rt]._u32[3] = CPU.PC + 4;
CPU.SetBranch(branchTarget(0, i16)); GpVar v0(c, kVarTypeUInt64);
c.xor_(v0, v0);
c.mov(cpu_qword(GPR[rt]._u64[1]), v0);
c.mov(cpu_qword(GPR[rt]._u64[0]), v0);
c.mov(cpu_dword(GPR[rt]._u32[3]), (u32)CPU.PC + 4);
c.mov(*pos_var, branchTarget(0, i16) >> 2);
LOG_OPCODE();
} }
void BR(s32 i16) void BR(s32 i16)
{ {
c.mov(cpu_qword(PC), (u32)CPU.PC);
do_finalize = true; do_finalize = true;
c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2);
//ConLog.Write("BR(i16=%d)", i16); LOG_OPCODE();
} }
void FSMBI(u32 rt, s32 i16) void FSMBI(u32 rt, s32 i16)
{ {
@ -1818,27 +1887,27 @@ private:
} }
void BRSL(u32 rt, s32 i16) void BRSL(u32 rt, s32 i16)
{ {
c.mov(cpu_qword(PC), (u32)CPU.PC);
do_finalize = true;
GpVar v0(c, kVarTypeUInt64); GpVar v0(c, kVarTypeUInt64);
c.xor_(v0, v0); c.xor_(v0, v0);
c.mov(cpu_qword(GPR[rt]._u64[1]), v0); c.mov(cpu_qword(GPR[rt]._u64[1]), v0);
c.mov(cpu_qword(GPR[rt]._u64[0]), v0); c.mov(cpu_qword(GPR[rt]._u64[0]), v0);
c.mov(cpu_dword(GPR[rt]._u32[3]), CPU.PC + 4); c.mov(cpu_dword(GPR[rt]._u32[3]), (u32)CPU.PC + 4);
do_finalize = true;
c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2);
//ConLog.Write("BRSL(rt=%d,i16=%d)", rt, i16); LOG_OPCODE();
} }
void LQR(u32 rt, s32 i16) void LQR(u32 rt, s32 i16)
{ {
WRAPPER_BEGIN(rt, i16, PC, zz); WRAPPER_BEGIN(rt, i16, PC, zz);
u32 lsa = branchTarget(PC, i16) & 0x3fff0; u32 lsa = branchTarget(PC, (s32)i16) & 0x3fff0;
if (!CPU.IsGoodLSA(lsa)) if (!CPU.IsGoodLSA(lsa))
{ {
ConLog.Error("LQR: bad lsa (0x%x)", lsa); ConLog.Error("LQR: bad lsa (0x%x)", lsa);
Emu.Pause(); Emu.Pause();
return; return;
} }
CPU.GPR[rt]._u128 = CPU.ReadLS128(lsa); CPU.GPR[rt]._u128 = CPU.ReadLS128(lsa);
WRAPPER_END(rt, i16, CPU.PC, 0); WRAPPER_END(rt, i16, CPU.PC, 0);
/*u32 lsa = branchTarget(CPU.PC, i16) & 0x3fff0; /*u32 lsa = branchTarget(CPU.PC, i16) & 0x3fff0;
@ -1858,7 +1927,7 @@ private:
CPU.GPR[rt]._i32[0] = CPU.GPR[rt]._i32[0] =
CPU.GPR[rt]._i32[1] = CPU.GPR[rt]._i32[1] =
CPU.GPR[rt]._i32[2] = CPU.GPR[rt]._i32[2] =
CPU.GPR[rt]._i32[3] = i16; CPU.GPR[rt]._i32[3] = (s32)i16;
WRAPPER_END(rt, i16, 0, 0); WRAPPER_END(rt, i16, 0, 0);
/*XmmVar v0(c); /*XmmVar v0(c);
if (i16 == 0) if (i16 == 0)
@ -1879,7 +1948,7 @@ private:
{ {
WRAPPER_BEGIN(rt, i16, yy, zz); WRAPPER_BEGIN(rt, i16, yy, zz);
for (int w = 0; w < 4; w++) for (int w = 0; w < 4; w++)
CPU.GPR[rt]._i32[w] = i16 << 16; CPU.GPR[rt]._i32[w] = (s32)i16 << 16;
WRAPPER_END(rt, i16, 0, 0); WRAPPER_END(rt, i16, 0, 0);
/*XmmVar v0(c); /*XmmVar v0(c);
if (i16 == 0) if (i16 == 0)
@ -2013,10 +2082,10 @@ private:
void AI(u32 rt, u32 ra, s32 i10) void AI(u32 rt, u32 ra, s32 i10)
{ {
WRAPPER_BEGIN(rt, ra, i10, zz); WRAPPER_BEGIN(rt, ra, i10, zz);
CPU.GPR[rt]._i32[0] = CPU.GPR[ra]._i32[0] + i10; CPU.GPR[rt]._i32[0] = CPU.GPR[ra]._i32[0] + (s32)i10;
CPU.GPR[rt]._i32[1] = CPU.GPR[ra]._i32[1] + i10; CPU.GPR[rt]._i32[1] = CPU.GPR[ra]._i32[1] + (s32)i10;
CPU.GPR[rt]._i32[2] = CPU.GPR[ra]._i32[2] + i10; CPU.GPR[rt]._i32[2] = CPU.GPR[ra]._i32[2] + (s32)i10;
CPU.GPR[rt]._i32[3] = CPU.GPR[ra]._i32[3] + i10; CPU.GPR[rt]._i32[3] = CPU.GPR[ra]._i32[3] + (s32)i10;
WRAPPER_END(rt, ra, i10, 0); WRAPPER_END(rt, ra, i10, 0);
/*XmmVar v0(c); /*XmmVar v0(c);
if (i10 == 0) if (i10 == 0)
@ -2050,13 +2119,14 @@ private:
void STQD(u32 rt, s32 i10, u32 ra) //i10 is shifted left by 4 while decoding void STQD(u32 rt, s32 i10, u32 ra) //i10 is shifted left by 4 while decoding
{ {
WRAPPER_BEGIN(rt, i10, ra, zz); WRAPPER_BEGIN(rt, i10, ra, zz);
const u32 lsa = (CPU.GPR[ra]._i32[3] + i10) & 0x3fff0; const u32 lsa = (CPU.GPR[ra]._i32[3] + (s32)i10) & 0x3fff0;
if (!CPU.IsGoodLSA(lsa)) if (!CPU.IsGoodLSA(lsa))
{ {
ConLog.Error("STQD: bad lsa (0x%x)", lsa); ConLog.Error("STQD: bad lsa (0x%x)", lsa);
Emu.Pause(); Emu.Pause();
return; return;
} }
//ConLog.Write("wrapper::STQD (lsa=0x%x): GPR[%d] (0x%llx%llx)", lsa, rt, CPU.GPR[rt]._u64[1], CPU.GPR[rt]._u64[0]);
CPU.WriteLS128(lsa, CPU.GPR[rt]._u128); CPU.WriteLS128(lsa, CPU.GPR[rt]._u128);
WRAPPER_END(rt, i10, ra, 0); WRAPPER_END(rt, i10, ra, 0);
/*GpVar lsa(c, kVarTypeUInt32); /*GpVar lsa(c, kVarTypeUInt32);
@ -2076,7 +2146,7 @@ private:
void LQD(u32 rt, s32 i10, u32 ra) //i10 is shifted left by 4 while decoding void LQD(u32 rt, s32 i10, u32 ra) //i10 is shifted left by 4 while decoding
{ {
WRAPPER_BEGIN(rt, i10, ra, zz); WRAPPER_BEGIN(rt, i10, ra, zz);
const u32 lsa = (CPU.GPR[ra]._i32[3] + i10) & 0x3fff0; const u32 lsa = (CPU.GPR[ra]._i32[3] + (s32)i10) & 0x3fff0;
if (!CPU.IsGoodLSA(lsa)) if (!CPU.IsGoodLSA(lsa))
{ {
ConLog.Error("LQD: bad lsa (0x%x)", lsa); ConLog.Error("LQD: bad lsa (0x%x)", lsa);
@ -2145,14 +2215,19 @@ private:
} }
void HGTI(u32 rt, u32 ra, s32 i10) void HGTI(u32 rt, u32 ra, s32 i10)
{ {
UNIMPLEMENTED(); WRAPPER_BEGIN(rt, ra, i10, zz);
if(CPU.GPR[ra]._i32[3] > i10) CPU.Stop(); if(CPU.GPR[ra]._i32[3] > (s32)i10) CPU.Stop();
WRAPPER_END(rt, ra, i10, 0);
c.mov(*pos_var, (CPU.PC >> 2) + 1);
do_finalize = true;
} }
void CLGTI(u32 rt, u32 ra, s32 i10) void CLGTI(u32 rt, u32 ra, s32 i10)
{ {
WRAPPER_BEGIN(rt, ra, i10, zz); WRAPPER_BEGIN(rt, ra, i10, zz);
for (int w = 0; w < 4; w++) for (u32 i = 0; i < 4; ++i)
CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._i32[w] > (s32)i10 ? 0xffffffff : 0; {
CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._u32[i] > (u32)i10) ? 0xffffffff : 0x00000000;
}
WRAPPER_END(rt, ra, i10, 0); WRAPPER_END(rt, ra, i10, 0);
/*XmmVar v0(c); /*XmmVar v0(c);
if (i10 == -1) if (i10 == -1)
@ -2182,7 +2257,7 @@ private:
WRAPPER_BEGIN(rt, ra, i10, zz); WRAPPER_BEGIN(rt, ra, i10, zz);
for(u32 i = 0; i < 8; ++i) for(u32 i = 0; i < 8; ++i)
{ {
CPU.GPR[rt]._u16[i] = (CPU.GPR[ra]._u16[i] > (u16)(s32)i10) ? 0xffff : 0x0000; CPU.GPR[rt]._u16[i] = (CPU.GPR[ra]._u16[i] > (u16)i10) ? 0xffff : 0x0000;
} }
WRAPPER_END(rt, ra, i10, 0); WRAPPER_END(rt, ra, i10, 0);
} }
@ -2195,8 +2270,11 @@ private:
} }
void HLGTI(u32 rt, u32 ra, s32 i10) void HLGTI(u32 rt, u32 ra, s32 i10)
{ {
UNIMPLEMENTED(); WRAPPER_BEGIN(rt, ra, i10, zz);
if(CPU.GPR[ra]._u32[3] > (u32)i10) CPU.Stop(); if(CPU.GPR[ra]._u32[3] > (u32)i10) CPU.Stop();
WRAPPER_END(rt, ra, i10, 0);
c.mov(*pos_var, (CPU.PC >> 2) + 1);
do_finalize = true;
} }
void MPYI(u32 rt, u32 ra, s32 i10) void MPYI(u32 rt, u32 ra, s32 i10)
{ {
@ -2236,20 +2314,22 @@ private:
} }
void HEQI(u32 rt, u32 ra, s32 i10) void HEQI(u32 rt, u32 ra, s32 i10)
{ {
// TODO WRAPPER_BEGIN(rt, ra, i10, zz);
UNIMPLEMENTED(); if(CPU.GPR[ra]._i32[3] == (s32)i10) CPU.Stop();
if(CPU.GPR[ra]._i32[3] == i10) CPU.Stop(); WRAPPER_END(rt, ra, i10, 0);
c.mov(*pos_var, (CPU.PC >> 2) + 1);
do_finalize = true;
} }
//0 - 6 //0 - 6
void HBRA(s32 ro, s32 i16) void HBRA(s32 ro, s32 i16)
{ //i16 is shifted left by 2 while decoding { //i16 is shifted left by 2 while decoding
//UNIMPLEMENTED(); LOG_OPCODE();
} }
void HBRR(s32 ro, s32 i16) void HBRR(s32 ro, s32 i16)
{ {
//UNIMPLEMENTED(); LOG_OPCODE();
} }
void ILA(u32 rt, u32 i18) void ILA(u32 rt, u32 i18)
{ {
@ -2376,6 +2456,7 @@ private:
void UNK(const std::string& err) void UNK(const std::string& err)
{ {
ConLog.Error(err + fmt::Format(" #pc: 0x%x", CPU.PC)); ConLog.Error(err + fmt::Format(" #pc: 0x%x", CPU.PC));
c.mov(cpu_qword(PC), (u32)CPU.PC);
do_finalize = true; do_finalize = true;
Emu.Pause(); Emu.Pause();
} }

View file

@ -28,7 +28,8 @@ void SPURecompilerCore::Decode(const u32 code) // decode instruction and run wit
void SPURecompilerCore::Compile(u16 pos) void SPURecompilerCore::Compile(u16 pos)
{ {
compiler.addFunc(kFuncConvHost, FuncBuilder4<u32, void*, void*, void*, u32>()); compiler.addFunc(kFuncConvHost, FuncBuilder4<u32, void*, void*, void*, u32>());
entry[pos].host = pos; const u16 start = pos;
entry[start].count = 0;
GpVar cpu_var(compiler, kVarTypeIntPtr, "cpu"); GpVar cpu_var(compiler, kVarTypeIntPtr, "cpu");
compiler.setArg(0, cpu_var); compiler.setArg(0, cpu_var);
@ -60,32 +61,32 @@ void SPURecompilerCore::Compile(u16 pos)
if (opcode) if (opcode)
{ {
(*SPU_instr::rrr_list)(m_enc, opcode); // compile single opcode (*SPU_instr::rrr_list)(m_enc, opcode); // compile single opcode
entry[start].count++;
} }
else else
{ {
m_enc->do_finalize = true; m_enc->do_finalize = true;
} }
bool fin = m_enc->do_finalize; bool fin = m_enc->do_finalize;
entry[pos].valid = opcode; entry[pos].valid = re(opcode);
if (fin) break; if (fin) break;
CPU.PC += 4; CPU.PC += 4;
pos++; pos++;
entry[pos].host = entry[pos - 1].host;
} }
compiler.ret(pos_var); compiler.ret(pos_var);
compiler.endFunc(); compiler.endFunc();
entry[entry[pos].host].pointer = compiler.make(); entry[start].pointer = compiler.make();
} }
u8 SPURecompilerCore::DecodeMemory(const u64 address) u8 SPURecompilerCore::DecodeMemory(const u64 address)
{ {
const u64 m_offset = address - CPU.PC; const u64 m_offset = CPU.dmac.ls_offset;
const u16 pos = (CPU.PC >> 2); const u16 pos = (CPU.PC >> 2);
//ConLog.Write("DecodeMemory: pos=%d", pos); //ConLog.Write("DecodeMemory: pos=%d", pos);
u32* ls = (u32*)Memory.VirtualToRealAddr(m_offset); u32* ls = (u32*)&Memory[m_offset];
if (!pos) if (!pos)
{ {
@ -98,7 +99,7 @@ u8 SPURecompilerCore::DecodeMemory(const u64 address)
{ {
// check data (hard way) // check data (hard way)
bool is_valid = true; bool is_valid = true;
for (u32 i = pos; i < entry[pos].count + pos; i++) for (u32 i = pos; i < (u32)(entry[pos].count + pos); i++)
{ {
if (entry[i].valid != ls[i]) if (entry[i].valid != ls[i])
{ {
@ -110,6 +111,9 @@ u8 SPURecompilerCore::DecodeMemory(const u64 address)
if (!is_valid) if (!is_valid)
{ {
// TODO // TODO
ConLog.Error("SPURecompilerCore::DecodeMemory(ls_addr=0x%x): code has changed", pos * sizeof(u32));
Emu.Pause();
return 0;
} }
} }
@ -117,6 +121,12 @@ u8 SPURecompilerCore::DecodeMemory(const u64 address)
{ {
// compile from current position to nearest dynamic or statically unresolved branch, zero data or something other // compile from current position to nearest dynamic or statically unresolved branch, zero data or something other
Compile(pos); Compile(pos);
if (entry[pos].valid == 0)
{
ConLog.Error("SPURecompilerCore::Compile(ls_addr=0x%x): branch to 0x0 opcode", pos * sizeof(u32));
Emu.Pause();
return 0;
}
} }
if (!entry[pos].pointer) if (!entry[pos].pointer)
@ -128,16 +138,23 @@ u8 SPURecompilerCore::DecodeMemory(const u64 address)
// jump // jump
typedef u32(*Func)(void* _cpu, void* _ls, const SPUImmTable* _imm, u32 _pos); typedef u32(*Func)(void* _cpu, void* _ls, const SPUImmTable* _imm, u32 _pos);
Func func = asmjit_cast<Func>(entry[entry[pos].host].pointer); Func func = asmjit_cast<Func>(entry[pos].pointer);
void* cpu = (u8*)&CPU.GPR[0] - offsetof(SPUThread, GPR[0]); // ugly cpu base offset detection void* cpu = (u8*)&CPU.GPR[0] - offsetof(SPUThread, GPR[0]); // ugly cpu base offset detection
u16 res = (pos == entry[pos].host) ? 0 : pos; u16 res = pos;
res = (u16)func(cpu, ls, &g_spu_imm, res); res = (u16)func(cpu, &Memory[m_offset], &g_spu_imm, res);
CPU.SetBranch((u64)res << 2); LOG2_OPCODE("SPURecompilerCore::DecodeMemory(ls_addr=0x%x): NewPC = 0x%llx", address, (u64)res << 2);
if ((res - 1) == (CPU.PC >> 2))
return 0; {
return 4;
}
else
{
CPU.SetBranch((u64)res << 2);
return 0;
}
/*Decode(Memory.Read32(address)); /*Decode(Memory.Read32(address));
return 4;*/ return 4;*/
} }

View file

@ -300,19 +300,17 @@ public:
#else #else
static const bool x86 = true; static const bool x86 = true;
#endif #endif
private:
union _CRT_ALIGN(8) { union _CRT_ALIGN(8) {
struct { struct {
volatile u32 m_index; volatile u32 m_index;
u32 m_value[max_count]; u32 m_value[max_count];
}; };
struct {
volatile u32 m_index2;
u16 m_val16[max_count * 2];
};
volatile u64 m_indval; volatile u64 m_indval;
}; };
std::mutex m_lock; std::mutex m_lock;
public:
Channel() Channel()
{ {
Init(); Init();