From 94c30924dd3174a091f350d70ad92fc210f081de Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Wed, 8 Oct 2014 01:37:04 +0400 Subject: [PATCH] Some things for u128 --- Utilities/BEType.h | 73 ++++++++++++++++++++---- Utilities/GNU.h | 12 +--- rpcs3/Emu/CPU/CPUThread.cpp | 26 ++++++++- rpcs3/Emu/CPU/CPUThread.h | 1 + rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 73 ++++++++++++------------ 5 files changed, 124 insertions(+), 61 deletions(-) diff --git a/Utilities/BEType.h b/Utilities/BEType.h index 12bb176531..86f47f0815 100644 --- a/Utilities/BEType.h +++ b/Utilities/BEType.h @@ -2,9 +2,6 @@ union u128 { - __m128 vf; - __m128i vi; - u64 _u64[2]; s64 _s64[2]; @@ -87,6 +84,8 @@ union u128 float _f[4]; double _d[2]; + __m128 vf; + __m128i vi; class bit_array_128 { @@ -186,6 +185,20 @@ union u128 return from32(_0, _1, _2, _3); } + static u128 from32p(u32 value) + { + u128 ret; + ret.vi = _mm_set1_epi32((int)value); + return ret; + } + + static u128 from8p(u8 value) + { + u128 ret; + ret.vi = _mm_set1_epi8((char)value); + return ret; + } + static u128 fromBit(u32 bit) { u128 ret = {}; @@ -193,9 +206,41 @@ union u128 return ret; } - void setBit(u32 bit) + static u128 fromV(__m128i value) { - _bit[bit] = true; + u128 ret; + ret.vi = value; + return ret; + } + + static __forceinline u128 add8(const u128& left, const u128& right) + { + return fromV(_mm_add_epi8(left.vi, right.vi)); + } + + static __forceinline u128 sub8(const u128& left, const u128& right) + { + return fromV(_mm_sub_epi8(left.vi, right.vi)); + } + + static __forceinline u128 minu8(const u128& left, const u128& right) + { + return fromV(_mm_min_epu8(left.vi, right.vi)); + } + + static __forceinline u128 eq8(const u128& left, const u128& right) + { + return fromV(_mm_cmpeq_epi8(left.vi, right.vi)); + } + + static __forceinline u128 gtu8(const u128& left, const u128& right) + { + return fromV(_mm_cmpgt_epu8(left.vi, right.vi)); + } + + static __forceinline u128 leu8(const u128& left, const u128& right) + { + return fromV(_mm_cmple_epu8(left.vi, right.vi)); } bool operator == (const u128& right) const @@ -208,19 +253,19 @@ union u128 return (_u64[0] != right._u64[0]) || (_u64[1] != right._u64[1]); } - u128 operator | (const u128& right) const + __forceinline u128 operator | (const u128& right) const { - return from64(_u64[0] | right._u64[0], _u64[1] | right._u64[1]); + return fromV(_mm_or_si128(vi, right.vi)); } - u128 operator & (const u128& right) const + __forceinline u128 operator & (const u128& right) const { - return from64(_u64[0] & right._u64[0], _u64[1] & right._u64[1]); + return fromV(_mm_and_si128(vi, right.vi)); } - u128 operator ^ (const u128& right) const + __forceinline u128 operator ^ (const u128& right) const { - return from64(_u64[0] ^ right._u64[0], _u64[1] ^ right._u64[1]); + return fromV(_mm_xor_si128(vi, right.vi)); } u128 operator ~ () const @@ -228,6 +273,12 @@ union u128 return from64(~_u64[0], ~_u64[1]); } + // result = (~left) & (right) + static __forceinline u128 andnot(const u128& left, const u128& right) + { + return fromV(_mm_andnot_si128(left.vi, right.vi)); + } + void clear() { _u64[1] = _u64[0] = 0; diff --git a/Utilities/GNU.h b/Utilities/GNU.h index a56df8db94..9b01b57622 100644 --- a/Utilities/GNU.h +++ b/Utilities/GNU.h @@ -225,16 +225,6 @@ static __forceinline uint64_t cntlz64(uint64_t arg) #endif } -static __forceinline __m128i operator & (__m128i A, __m128i B) -{ - return _mm_and_si128(A, B); -} - -static __forceinline __m128i operator | (__m128i A, __m128i B) -{ - return _mm_or_si128(A, B); -} - // compare 16 packed unsigned byte values (greater than) static __forceinline __m128i _mm_cmpgt_epu8(__m128i A, __m128i B) { @@ -246,5 +236,5 @@ static __forceinline __m128i _mm_cmpgt_epu8(__m128i A, __m128i B) static __forceinline __m128i _mm_cmple_epu8(__m128i A, __m128i B) { // ((B xor 0x80) > (A xor 0x80)) || A == B - return _mm_cmpgt_epu8(B, A) | _mm_cmpeq_epi8(A, B); + return _mm_or_si128(_mm_cmpgt_epu8(B, A), _mm_cmpeq_epi8(A, B)); } diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index fac66da910..df95edf2da 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -28,6 +28,7 @@ CPUThread::CPUThread(CPUThreadType type) , m_is_branch(false) , m_status(Stopped) , m_last_syscall(0) + , m_trace_enabled(false) { } @@ -298,7 +299,7 @@ void _se_translator(unsigned int u, EXCEPTION_POINTERS* pExp) void CPUThread::Task() { - if (Ini.HLELogging.GetValue()) LOG_NOTICE(PPU, "%s enter", CPUThread::GetFName().c_str()); + if (Ini.HLELogging.GetValue()) LOG_NOTICE(GENERAL, "%s enter", CPUThread::GetFName().c_str()); const std::vector& bp = Emu.GetBreakPoints(); @@ -338,6 +339,7 @@ void CPUThread::Task() Step(); //if (PC - 0x13ED4 < 0x288) trace.push_back(PC); + if (m_trace_enabled) trace.push_back(PC); NextPc(m_dec->DecodeMemory(PC + m_offset)); if (status == CPUThread_Step) @@ -373,7 +375,25 @@ void CPUThread::Task() // TODO: linux version #endif - for (auto& v : trace) LOG_NOTICE(PPU, "PC = 0x%x", v); + if (trace.size()) + { + LOG_NOTICE(GENERAL, "Trace begin (%d elements)", trace.size()); - if (Ini.HLELogging.GetValue()) LOG_NOTICE(PPU, "%s leave", CPUThread::GetFName().c_str()); + u32 start = trace[0], prev = trace[0] - 4; + + for (auto& v : trace) //LOG_NOTICE(GENERAL, "PC = 0x%x", v); + { + if (v - prev != 4) + { + LOG_NOTICE(GENERAL, "Trace: 0x%08x .. 0x%08x", start, prev); + start = v; + } + prev = v; + } + + LOG_NOTICE(GENERAL, "Trace end: 0x%08x .. 0x%08x", start, prev); + } + + + if (Ini.HLELogging.GetValue()) LOG_NOTICE(GENERAL, "%s leave", CPUThread::GetFName().c_str()); } diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index e70007773c..3acb2c12b3 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -119,6 +119,7 @@ public: u32 nPC; u64 cycle; bool m_is_branch; + bool m_trace_enabled; bool m_is_interrupt; bool m_has_interrupt; diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 50c6842697..a2d392c6ad 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -183,9 +183,9 @@ s64 spursInit( u128 wklB = vm::read128(spurs.addr() + 0x30); u128 savedA = SPU.ReadLS128(0x180); u128 savedB = SPU.ReadLS128(0x190); - u128 vAA; vAA.vi = _mm_sub_epi32(wklA.vi, savedA.vi); - u128 vBB; vBB.vi = _mm_sub_epi32(wklB.vi, savedB.vi); - u128 vAABB; vAABB.vi = (arg1 == 0) ? _mm_add_epi32(vAA.vi, _mm_andnot_si128(g_imm_table.fsmb_table[0x8000 >> var1], vBB.vi)) : vAA.vi; + u128 vAA = u128::sub8(wklA, savedA); + u128 vBB = u128::sub8(wklB, savedB); + u128 vAABB = (arg1 == 0) ? vAA : u128::add8(vAA, u128::andnot(u128::fromV(g_imm_table.fsmb_table[0x8000 >> var1]), vBB)); u32 vNUM = 0x20; u64 vRES = 0x20ull << 32; @@ -205,34 +205,34 @@ s64 spursInit( u128 wklReadyCount1 = vm::read128(spurs.addr() + 0x10); u128 savedC = SPU.ReadLS128(0x1A0); u128 savedD = SPU.ReadLS128(0x1B0); - u128 vRC; vRC.vi = _mm_add_epi32(_mm_min_epu8(wklReadyCount0.vi, _mm_set1_epi8(8)), _mm_min_epu8(wklReadyCount1.vi, _mm_set1_epi8(8))); + u128 vRC = u128::add8(u128::minu8(wklReadyCount0, u128::from8p(8)), u128::minu8(wklReadyCount1, u128::from8p(8))); u32 wklFlag = spurs->m.wklFlag.flag.read_relaxed(); u32 flagRecv = spurs->m.flagRecv.read_relaxed(); - u128 vFM; vFM.vi = g_imm_table.fsmb_table[wklFlag == 0 ? 0x8000 >> flagRecv : 0]; - u128 wklSet1; wklSet1.vi = g_imm_table.fsmb_table[spurs->m.wklSet1.read_relaxed()]; - u128 vFMS1; vFMS1.vi = vFM.vi | wklSet1.vi; - u128 vFMV1; vFMV1.vi = g_imm_table.fsmb_table[(wklFlag == 0 ? 0x8000 >> flagRecv : 0) >> var1]; + u128 vFM = u128::fromV(g_imm_table.fsmb_table[wklFlag == 0 ? 0x8000 >> flagRecv : 0]); + u128 wklSet1 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSet1.read_relaxed()]); + u128 vFMS1 = vFM | wklSet1; + u128 vFMV1 = u128::fromV(g_imm_table.fsmb_table[0x8000 >> var1]); u32 var5 = SPU.ReadLS32(0x1ec); u128 wklMinCnt = vm::read128(spurs.addr() + 0x40); u128 wklMaxCnt = vm::read128(spurs.addr() + 0x50); - u128 vCC; vCC.vi = _mm_andnot_si128(vFMS1.vi, - _mm_cmpeq_epi8(wklReadyCount0.vi, _mm_set1_epi8(0)) | _mm_cmple_epu8(vRC.vi, vAABB.vi)) | - _mm_cmple_epu8(wklMaxCnt.vi, vAABB.vi) | - _mm_cmpeq_epi8(savedC.vi, _mm_set1_epi8(0)) | - g_imm_table.fsmb_table[(~var5) >> 16]; - u128 vCCH1; vCCH1.vi = _mm_andnot_si128(vCC.vi, - _mm_set1_epi8((char)0x80) & (vFMS1.vi | _mm_cmpgt_epu8(wklReadyCount0.vi, vAABB.vi)) | - _mm_set1_epi8(0x7f) & savedC.vi); - u128 vCCL1; vCCL1.vi = _mm_andnot_si128(vCC.vi, - _mm_set1_epi8((char)0x80) & vFMV1.vi | - _mm_set1_epi8(0x40) & _mm_cmpgt_epu8(vAABB.vi, _mm_set1_epi8(0)) & _mm_cmpgt_epu8(wklMinCnt.vi, vAABB.vi) | - _mm_set1_epi8(0x3c) & _mm_slli_epi32(_mm_sub_epi32(_mm_set1_epi8(8), vAABB.vi), 2) | - _mm_set1_epi8(0x02) & _mm_cmpeq_epi8(savedD.vi, _mm_set1_epi8((s8)var0)) | - _mm_set1_epi8(0x01)); - u128 vSTAT; vSTAT.vi = - _mm_set1_epi8(0x01) & _mm_cmpgt_epu8(wklReadyCount0.vi, vAABB.vi) | - _mm_set1_epi8(0x02) & wklSet1.vi | - _mm_set1_epi8(0x04) & vFM.vi; + u128 vCC = u128::andnot(vFMS1, u128::eq8(wklReadyCount0, {}) | u128::leu8(vRC, vAABB)) | + u128::leu8(wklMaxCnt, vAABB) | + u128::eq8(savedC, {}) | + u128::fromV(g_imm_table.fsmb_table[(~var5) >> 16]); + cellSpurs->Notice("vCC = %s", vCC.to_hex().c_str()); + u128 vCCH1 = u128::andnot(vCC, + u128::from8p(0x80) & (vFMS1 | u128::gtu8(wklReadyCount0, vAABB)) | + u128::from8p(0x7f) & savedC); + u128 vCCL1 = u128::andnot(vCC, + u128::from8p(0x80) & vFMV1 | + u128::from8p(0x40) & u128::gtu8(vAABB, {}) & u128::gtu8(wklMinCnt, vAABB) | + u128::from8p(0x3c) & u128::fromV(_mm_slli_epi32(u128::sub8(u128::from8p(8), vAABB).vi, 2)) | + u128::from8p(0x02) & u128::eq8(savedD, u128::from8p((u8)var0)) | + u128::from8p(0x01)); + u128 vSTAT = + u128::from8p(0x01) & u128::gtu8(wklReadyCount0, vAABB) | + u128::from8p(0x02) & wklSet1 | + u128::from8p(0x04) & vFM; for (s32 i = 0, max = -1; i < 0x10; i++) { @@ -246,7 +246,7 @@ s64 spursInit( if (vNUM < 0x10) { - vRES == ((u64)vNUM << 32) | vSTAT.u8r[vNUM]; + vRES = ((u64)vNUM << 32) | vSTAT.u8r[vNUM]; vSET.u8r[vNUM] = 0x01; } @@ -255,18 +255,16 @@ s64 spursInit( if (!arg1 || var1 == vNUM) { spurs->m.wklSet1._and_not(be_t::make(0x8000 >> vNUM)); - } - - if (vNUM == flagRecv) - { - spurs->m.wklFlag.flag |= be_t::make(-1); + if (vNUM == flagRecv) + { + spurs->m.wklFlag.flag |= be_t::make(-1); + } } } if (arg1 == 0) { - vAA.vi = _mm_add_epi32(vAA.vi, vSET.vi); - vm::write128(spurs.addr() + 0x20, vAA); // update wklA + vm::write128(spurs.addr() + 0x20, u128::add8(vAA, vSET)); // update wklA SPU.WriteLS128(0x180, vSET); // update savedA SPU.WriteLS32(0x1dc, vNUM); // update var1 @@ -274,8 +272,7 @@ s64 spursInit( if (arg1 == 1 && vNUM != var1) { - vBB.vi = _mm_add_epi32(vBB.vi, vSET.vi); - vm::write128(spurs.addr() + 0x30, vBB); // update wklB + vm::write128(spurs.addr() + 0x30, u128::add8(vBB, vSET)); // update wklB SPU.WriteLS128(0x190, vSET); // update savedB } @@ -290,6 +287,7 @@ s64 spursInit( //{ // //}; + SPU.m_code3_func = nullptr; if (SPU.m_code3_func) { @@ -330,7 +328,10 @@ s64 spursInit( SPU.GPR[3]._u32[3] = 0x100; SPU.GPR[4]._u64[1] = wkl.data; SPU.GPR[5]._u32[3] = stat; + cellSpurs->Notice("In: [0x1e0] = %s", SPU.ReadLS128(0x1e0).to_hex().c_str()); + //SPU.m_trace_enabled = (num == 0 && wid == 0x20); SPU.FastCall(0xa00); + SPU.m_trace_enabled = false; } else {