diff --git a/asmjit.vcxproj b/asmjit.vcxproj
index 98dbfb40b4..4f37749805 100644
--- a/asmjit.vcxproj
+++ b/asmjit.vcxproj
@@ -22,6 +22,7 @@
+
@@ -44,6 +45,9 @@
+
+
+
{AC40FF01-426E-4838-A317-66354CEFAE88}
asmjit
diff --git a/asmjit.vcxproj.filters b/asmjit.vcxproj.filters
index bddd91cf68..43ebc37017 100644
--- a/asmjit.vcxproj.filters
+++ b/asmjit.vcxproj.filters
@@ -25,5 +25,9 @@
+
+
+
+
\ No newline at end of file
diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h
index 4d07456c0c..edb2695d66 100644
--- a/rpcs3/Emu/Cell/SPURecompiler.h
+++ b/rpcs3/Emu/Cell/SPURecompiler.h
@@ -16,7 +16,7 @@ using namespace asmjit::host;
struct g_imm_table_struct
{
- u16 cntb_table[65536];
+ //u16 cntb_table[65536];
__m128i fsmb_table[65536];
__m128i fsmh_table[256];
@@ -28,7 +28,7 @@ struct g_imm_table_struct
g_imm_table_struct()
{
- static_assert(offsetof(g_imm_table_struct, cntb_table) == 0, "offsetof(cntb_table) != 0");
+ /*static_assert(offsetof(g_imm_table_struct, cntb_table) == 0, "offsetof(cntb_table) != 0");
for (u32 i = 0; i < sizeof(cntb_table) / sizeof(cntb_table[0]); i++)
{
u32 cnt_low = 0, cnt_high = 0;
@@ -38,7 +38,7 @@ struct g_imm_table_struct
cnt_high += (i >> (j + 8)) & 1;
}
cntb_table[i] = (cnt_high << 8) | cnt_low;
- }
+ }*/
for (u32 i = 0; i < sizeof(fsm_table) / sizeof(fsm_table[0]); i++)
{
for (u32 j = 0; j < 4; j++) fsm_table[i].m128i_u32[j] = (i & (1 << j)) ? ~0 : 0;
@@ -660,6 +660,7 @@ private:
}
void ROTMA(u32 rt, u32 ra, u32 rb)
{
+#ifdef _M_X64
XmmInvalidate(rt);
for (u32 i = 0; i < 4; i++)
{
@@ -670,6 +671,14 @@ private:
c.mov(cpu_dword(GPR[rt]._u32[i]), qw0->r32());
}
LOG_OPCODE();
+#else
+ WRAPPER_BEGIN(rt, ra, rb, zz);
+ CPU.GPR[rt]._i32[0] = ((0 - CPU.GPR[rb]._u32[0]) & 0x3f) < 32 ? CPU.GPR[ra]._i32[0] >> ((0 - CPU.GPR[rb]._u32[0]) & 0x3f) : CPU.GPR[ra]._i32[0] >> 31;
+ CPU.GPR[rt]._i32[1] = ((0 - CPU.GPR[rb]._u32[1]) & 0x3f) < 32 ? CPU.GPR[ra]._i32[1] >> ((0 - CPU.GPR[rb]._u32[1]) & 0x3f) : CPU.GPR[ra]._i32[1] >> 31;
+ CPU.GPR[rt]._i32[2] = ((0 - CPU.GPR[rb]._u32[2]) & 0x3f) < 32 ? CPU.GPR[ra]._i32[2] >> ((0 - CPU.GPR[rb]._u32[2]) & 0x3f) : CPU.GPR[ra]._i32[2] >> 31;
+ CPU.GPR[rt]._i32[3] = ((0 - CPU.GPR[rb]._u32[3]) & 0x3f) < 32 ? CPU.GPR[ra]._i32[3] >> ((0 - CPU.GPR[rb]._u32[3]) & 0x3f) : CPU.GPR[ra]._i32[3] >> 31;
+ WRAPPER_END(rt, ra, rb, 0);
+#endif
}
void SHL(u32 rt, u32 ra, u32 rb)
{
@@ -1922,12 +1931,19 @@ private:
}
void XSWD(u32 rt, u32 ra)
{
+#ifdef _M_X64
c.movsxd(*qw0, cpu_dword(GPR[ra]._i32[0]));
c.movsxd(*qw1, cpu_dword(GPR[ra]._i32[2]));
c.mov(cpu_qword(GPR[rt]._i64[0]), *qw0);
c.mov(cpu_qword(GPR[rt]._i64[1]), *qw1);
XmmInvalidate(rt);
LOG_OPCODE();
+#else
+ WRAPPER_BEGIN(rt, ra, yy, zz);
+ CPU.GPR[rt]._i64[0] = (s64)CPU.GPR[ra]._i32[0];
+ CPU.GPR[rt]._i64[1] = (s64)CPU.GPR[ra]._i32[2];
+ WRAPPER_END(rt, ra, 0, 0);
+#endif
}
void XSHW(u32 rt, u32 ra)
{
@@ -1939,13 +1955,27 @@ private:
}
void CNTB(u32 rt, u32 ra)
{
- XmmInvalidate(rt);
+ /*XmmInvalidate(rt);
for (u32 i = 0; i < 8; i++)
{
c.movzx(*addr, cpu_word(GPR[ra]._u16[i]));
c.movzx(*addr, word_ptr(*g_imm_var, *addr, 1, offsetof(g_imm_table_struct, cntb_table[0])));
c.mov(cpu_word(GPR[rt]._u16[i]), addr->r16());
- }
+ }*/
+ const XmmLink& va = XmmGet(ra, rt);
+ const XmmLink& v1 = XmmCopy(va);
+ const XmmLink& vm = XmmAlloc();
+ c.psrlw(v1.get(), 4);
+ c.pand(va.get(), XmmConst(_mm_set1_epi8(0xf)));
+ c.pand(v1.get(), XmmConst(_mm_set1_epi8(0xf)));
+ c.movdqa(vm.get(), XmmConst(_mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0)));
+ c.pshufb(vm.get(), va.get());
+ c.movdqa(va.get(), XmmConst(_mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0)));
+ c.pshufb(va.get(), v1.get());
+ c.paddb(va.get(), vm.get());
+ XmmFinalize(va, rt);
+ XmmFinalize(v1);
+ XmmFinalize(vm);
LOG_OPCODE();
}
void XSBH(u32 rt, u32 ra)
diff --git a/rpcs3/Emu/SysCalls/Modules/cellAudio.cpp b/rpcs3/Emu/SysCalls/Modules/cellAudio.cpp
index a0418e3be5..035a1e405d 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellAudio.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellAudio.cpp
@@ -299,10 +299,12 @@ int cellAudioInit()
// convert the data from float to u16 with clipping:
if (!first_mix)
{
- /*for (u32 i = 0; i < (sizeof(buffer) / sizeof(float)); i++)
+#ifndef _M_X64
+ for (u32 i = 0; i < (sizeof(buf2ch) / sizeof(float)); i++)
{
- oal_buffer[oal_pos][oal_buffer_offset + i] = (s16)(min(max(buffer[i] * 0x8000, -0x8000), 0x7fff));
- }*/
+ oal_buffer[oal_pos][oal_buffer_offset + i] = (s16)(min(max(buf2ch[i] * 0x8000, -0x8000), 0x7fff));
+ }
+#else
// 2x MULPS
// 2x MAXPS (optional)
// 2x MINPS (optional)
@@ -315,6 +317,7 @@ int cellAudioInit()
_mm_cvtps_epi32(_mm_mul_ps((__m128&)(buf2ch[i]), float2u16)),
_mm_cvtps_epi32(_mm_mul_ps((__m128&)(buf2ch[i + 4]), float2u16)));
}
+#endif
}
const u64 stamp1 = get_system_time();