diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index ca2f4f794f..92cfc5beb1 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -94,24 +94,24 @@ extern __m128i sse_altivec_vperm(__m128i A, __m128i B, __m128i C) extern __m128i sse_altivec_lvsl(u64 addr) { - alignas(16) static const u64 lvsl_values[0x10][2] = + alignas(16) static const u8 lvsl_values[0x10][0x10] = { - { 0x08090A0B0C0D0E0F, 0x0001020304050607 }, - { 0x090A0B0C0D0E0F10, 0x0102030405060708 }, - { 0x0A0B0C0D0E0F1011, 0x0203040506070809 }, - { 0x0B0C0D0E0F101112, 0x030405060708090A }, - { 0x0C0D0E0F10111213, 0x0405060708090A0B }, - { 0x0D0E0F1011121314, 0x05060708090A0B0C }, - { 0x0E0F101112131415, 0x060708090A0B0C0D }, - { 0x0F10111213141516, 0x0708090A0B0C0D0E }, - { 0x1011121314151617, 0x08090A0B0C0D0E0F }, - { 0x1112131415161718, 0x090A0B0C0D0E0F10 }, - { 0x1213141516171819, 0x0A0B0C0D0E0F1011 }, - { 0x131415161718191A, 0x0B0C0D0E0F101112 }, - { 0x1415161718191A1B, 0x0C0D0E0F10111213 }, - { 0x15161718191A1B1C, 0x0D0E0F1011121314 }, - { 0x161718191A1B1C1D, 0x0E0F101112131415 }, - { 0x1718191A1B1C1D1E, 0x0F10111213141516 }, + { 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 }, + { 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01 }, + { 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02 }, + { 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03 }, + { 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04 }, + { 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05 }, + { 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06 }, + { 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07 }, + { 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 }, + { 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09 }, + { 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a }, + { 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b }, + { 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c }, + { 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d }, + { 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e }, + { 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f }, }; return _mm_load_si128((__m128i*)lvsl_values[addr & 0xf]); @@ -119,24 +119,24 @@ extern __m128i sse_altivec_lvsl(u64 addr) extern __m128i sse_altivec_lvsr(u64 addr) { - alignas(16) static const u64 lvsr_values[0x10][2] = + alignas(16) static const u8 lvsr_values[0x10][0x10] = { - { 0x18191A1B1C1D1E1F, 0x1011121314151617 }, - { 0x1718191A1B1C1D1E, 0x0F10111213141516 }, - { 0x161718191A1B1C1D, 0x0E0F101112131415 }, - { 0x15161718191A1B1C, 0x0D0E0F1011121314 }, - { 0x1415161718191A1B, 0x0C0D0E0F10111213 }, - { 0x131415161718191A, 0x0B0C0D0E0F101112 }, - { 0x1213141516171819, 0x0A0B0C0D0E0F1011 }, - { 0x1112131415161718, 0x090A0B0C0D0E0F10 }, - { 0x1011121314151617, 0x08090A0B0C0D0E0F }, - { 0x0F10111213141516, 0x0708090A0B0C0D0E }, - { 0x0E0F101112131415, 0x060708090A0B0C0D }, - { 0x0D0E0F1011121314, 0x05060708090A0B0C }, - { 0x0C0D0E0F10111213, 0x0405060708090A0B }, - { 0x0B0C0D0E0F101112, 0x030405060708090A }, - { 0x0A0B0C0D0E0F1011, 0x0203040506070809 }, - { 0x090A0B0C0D0E0F10, 0x0102030405060708 }, + { 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10 }, + { 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f }, + { 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e }, + { 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d }, + { 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c }, + { 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b }, + { 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a }, + { 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09 }, + { 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 }, + { 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07 }, + { 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06 }, + { 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05 }, + { 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04 }, + { 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03 }, + { 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02 }, + { 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01 }, }; return _mm_load_si128((__m128i*)lvsr_values[addr & 0xf]); @@ -2050,7 +2050,7 @@ bool ppu_interpreter::VSRB(ppu_thread& ppu, ppu_opcode_t op) auto& d = ppu.vr[op.vd]; const auto& a = ppu.vr[op.va]; const auto& b = ppu.vr[op.vb]; - + for (uint i = 0; i < 16; i++) { d._u8[i] = a._u8[i] >> (b._u8[i] & 0x7); @@ -2964,7 +2964,7 @@ bool ppu_interpreter::BCCTR(ppu_thread& ppu, ppu_opcode_t op) if (op.lk) ppu.lr = link; return false; } - + return true; } @@ -3163,7 +3163,7 @@ bool ppu_interpreter::MFOCRF(ppu_thread& ppu, ppu_opcode_t op) const u32 n = cntlz32(op.crm) & 7; const u32 p = n * 4; const u32 v = ppu.cr[p + 0] << 3 | ppu.cr[p + 1] << 2 | ppu.cr[p + 2] << 1 | ppu.cr[p + 3] << 0; - + ppu.gpr[op.rd] = v << (p ^ 0x1c); } else diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index cd4cd733ff..e3f535c1f3 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -122,7 +122,7 @@ Type* PPUTranslator::GetContextType() Function* PPUTranslator::Translate(const ppu_function& info) { m_function = m_module->getFunction(info.name); - + std::fill(std::begin(m_globals), std::end(m_globals), nullptr); std::fill(std::begin(m_locals), std::end(m_locals), nullptr); @@ -207,7 +207,7 @@ Value* PPUTranslator::GetAddr(u64 _add) // Load segment address from global variable, compute actual instruction address return m_ir->CreateAdd(m_ir->getInt64(m_addr + _add), m_ir->CreateLoad(m_segs[m_reloc - m_info.segs.data()])); } - + return m_ir->getInt64(m_addr + _add); } @@ -342,7 +342,7 @@ Value* PPUTranslator::Solid(Value* value) const u32 size = value->getType()->getPrimitiveSizeInBits(); /* Workarounds (casting bool vectors directly may produce invalid code) */ - + if (value->getType() == GetType()) { return m_ir->CreateBitCast(SExt(value, GetType()), m_ir->getIntNTy(128)); @@ -1759,7 +1759,7 @@ void PPUTranslator::B(ppu_opcode_t op) { RegStore(GetAddr(+4), m_lr); } - + FlushRegisters(); CallFunction(target); } @@ -1972,7 +1972,7 @@ void PPUTranslator::RLWNM(ppu_opcode_t op) // Generic op result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs, 32), GetGpr(op.rb, 32)), mask); } - + // Extend 32-bit op result result = ZExt(result); } @@ -2064,7 +2064,7 @@ void PPUTranslator::RLDICL(ppu_opcode_t op) // Generic op, including CLRLDI mnemonic result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs), sh), mask); } - + SetGpr(op.ra, result); if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); } @@ -2196,6 +2196,9 @@ void PPUTranslator::TW(ppu_opcode_t op) void PPUTranslator::LVSL(ppu_opcode_t op) { const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb); + //const auto _add = m_ir->CreateInsertElement(ConstantVector::getSplat(16, m_ir->getInt8(0)), Trunc(m_ir->CreateAnd(addr, 0xf), GetType()), m_ir->getInt32(0)); + //const auto base = ConstantDataVector::get(m_context, std::vector{15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); + //SetVr(op.vd, m_ir->CreateAdd(base, Shuffle(_add, nullptr, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}))); SetVr(op.vd, Call(GetType(), m_pure_attr, "__lvsl", addr)); } @@ -2351,6 +2354,9 @@ void PPUTranslator::CMPL(ppu_opcode_t op) void PPUTranslator::LVSR(ppu_opcode_t op) { const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb); + //const auto _add = m_ir->CreateInsertElement(ConstantVector::getSplat(16, m_ir->getInt8(0)), Trunc(m_ir->CreateAnd(addr, 0xf), GetType()), m_ir->getInt32(0)); + //const auto base = ConstantDataVector::get(m_context, std::vector{31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16}); + //SetVr(op.vd, m_ir->CreateSub(base, Shuffle(_add, nullptr, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}))); SetVr(op.vd, Call(GetType(), m_pure_attr, "__lvsr", addr)); }