diff --git a/Source/Core/Common/Src/ABI.cpp b/Source/Core/Common/Src/ABI.cpp
index 681ba5f432..522a8a572a 100644
--- a/Source/Core/Common/Src/ABI.cpp
+++ b/Source/Core/Common/Src/ABI.cpp
@@ -107,6 +107,7 @@ void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) {
ABI_RestoreStack(1 * 4);
}
+// Pass two registers as parameters.
void XEmitter::ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2)
{
ABI_AlignStack(2 * 4);
@@ -216,18 +217,18 @@ void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) {
CALL(func);
}
-// Pass a register as a paremeter.
+// Pass two registers as paremeters.
void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2) {
if (reg2 != ABI_PARAM1) {
if (reg1 != ABI_PARAM1)
- MOV(32, R(ABI_PARAM1), R(reg1));
+ MOV(64, R(ABI_PARAM1), R(reg1));
if (reg2 != ABI_PARAM2)
- MOV(32, R(ABI_PARAM2), R(reg2));
+ MOV(64, R(ABI_PARAM2), R(reg2));
} else {
if (reg2 != ABI_PARAM2)
- MOV(32, R(ABI_PARAM2), R(reg2));
+ MOV(64, R(ABI_PARAM2), R(reg2));
if (reg1 != ABI_PARAM1)
- MOV(32, R(ABI_PARAM1), R(reg1));
+ MOV(64, R(ABI_PARAM1), R(reg1));
}
CALL(func);
}
diff --git a/Source/Core/Core/Core.vcproj b/Source/Core/Core/Core.vcproj
index 25a74714c0..c1d812659e 100644
--- a/Source/Core/Core/Core.vcproj
+++ b/Source/Core/Core/Core.vcproj
@@ -1938,6 +1938,18 @@
RelativePath=".\Src\PowerPC\JitCommon\Jit_Util.cpp"
>
+
+
+
+
+
+
diff --git a/Source/Core/Core/Src/ConfigManager.cpp b/Source/Core/Core/Src/ConfigManager.cpp
index 766318d282..6c6fc5b555 100644
--- a/Source/Core/Core/Src/ConfigManager.cpp
+++ b/Source/Core/Core/Src/ConfigManager.cpp
@@ -108,7 +108,6 @@ void SConfig::SaveSettings()
ini.Set("Core", "DefaultGCM", m_LocalCoreStartupParameter.m_strDefaultGCM);
ini.Set("Core", "DVDRoot", m_LocalCoreStartupParameter.m_strDVDRoot);
ini.Set("Core", "Apploader", m_LocalCoreStartupParameter.m_strApploader);
- ini.Set("Core", "OptimizeQuantizers", m_LocalCoreStartupParameter.bOptimizeQuantizers);
ini.Set("Core", "EnableCheats", m_LocalCoreStartupParameter.bEnableCheats);
ini.Set("Core", "SelectedLanguage", m_LocalCoreStartupParameter.SelectedLanguage);
ini.Set("Core", "MemcardA", m_strMemoryCardA);
@@ -225,7 +224,6 @@ void SConfig::LoadSettings()
ini.Get("Core", "DefaultGCM", &m_LocalCoreStartupParameter.m_strDefaultGCM);
ini.Get("Core", "DVDRoot", &m_LocalCoreStartupParameter.m_strDVDRoot);
ini.Get("Core", "Apploader", &m_LocalCoreStartupParameter.m_strApploader);
- ini.Get("Core", "OptimizeQuantizers", &m_LocalCoreStartupParameter.bOptimizeQuantizers, true);
ini.Get("Core", "EnableCheats", &m_LocalCoreStartupParameter.bEnableCheats, false);
ini.Get("Core", "SelectedLanguage", &m_LocalCoreStartupParameter.SelectedLanguage, 0);
ini.Get("Core", "MemcardA", &m_strMemoryCardA);
diff --git a/Source/Core/Core/Src/CoreParameter.h b/Source/Core/Core/Src/CoreParameter.h
index c3079a8835..eedeebfb89 100644
--- a/Source/Core/Core/Src/CoreParameter.h
+++ b/Source/Core/Core/Src/CoreParameter.h
@@ -63,7 +63,6 @@ struct SCoreStartupParameter
bool bHLE_BS2;
bool bUseFastMem;
bool bLockThreads;
- bool bOptimizeQuantizers;
bool bEnableCheats;
bool bEnableIsoCache;
diff --git a/Source/Core/Core/Src/HW/MemmapFunctions.cpp b/Source/Core/Core/Src/HW/MemmapFunctions.cpp
index b14be8156b..28e44baecf 100644
--- a/Source/Core/Core/Src/HW/MemmapFunctions.cpp
+++ b/Source/Core/Core/Src/HW/MemmapFunctions.cpp
@@ -126,7 +126,7 @@ inline void hwWriteIOBridge(u32 var, u32 addr) {WII_IOBridge::Write32(var, addr)
inline void hwWriteIOBridge(u64 var, u32 addr) {PanicAlert("hwWriteIOBridge: There's no 64-bit HW write. %08x", addr);}
template
-void ReadFromHardware(T &_var, u32 em_address, u32 effective_address, Memory::XCheckTLBFlag flag)
+inline void ReadFromHardware(T &_var, u32 em_address, u32 effective_address, Memory::XCheckTLBFlag flag)
{
// TODO: Figure out the fastest order of tests for both read and write (they are probably different).
if ((em_address & 0xC8000000) == 0xC8000000)
@@ -204,7 +204,7 @@ void ReadFromHardware(T &_var, u32 em_address, u32 effective_address, Memory::XC
template
-void WriteToHardware(u32 em_address, const T data, u32 effective_address, Memory::XCheckTLBFlag flag)
+inline void WriteToHardware(u32 em_address, const T data, u32 effective_address, Memory::XCheckTLBFlag flag)
{
/* Debugging: CheckForBadAddresses##_type(em_address, data, false);*/
if ((em_address & 0xC8000000) == 0xC8000000)
@@ -343,13 +343,6 @@ u16 Read_U16(const u32 _Address)
u32 Read_U32(const u32 _Address)
{
- /*#if MAX_LOGLEVEL >= 4
- if (_Address == 0x00000000)
- {
- //PanicAlert("Program tried to read from [00000000]");
- //return 0x00000000;
- }
- #endif*/
u32 _var = 0;
ReadFromHardware(_var, _Address, _Address, FLAG_READ);
#ifdef ENABLE_MEM_CHECK
diff --git a/Source/Core/Core/Src/LuaInterface.cpp b/Source/Core/Core/Src/LuaInterface.cpp
index ef71d5355f..1fedd31a05 100644
--- a/Source/Core/Core/Src/LuaInterface.cpp
+++ b/Source/Core/Core/Src/LuaInterface.cpp
@@ -2751,7 +2751,6 @@ DEFINE_LUA_FUNCTION(emulua_loadrom, "filename")
// General settings
game_ini.Get("Core", "CPUOnThread", &StartUp.bCPUThread, StartUp.bCPUThread);
game_ini.Get("Core", "SkipIdle", &StartUp.bSkipIdle, StartUp.bSkipIdle);
- game_ini.Get("Core", "OptimizeQuantizers", &StartUp.bOptimizeQuantizers, StartUp.bOptimizeQuantizers);
game_ini.Get("Core", "EnableFPRF", &StartUp.bEnableFPRF, StartUp.bEnableFPRF);
game_ini.Get("Core", "TLBHack", &StartUp.iTLBHack, StartUp.iTLBHack);
// Wii settings
diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h
index 5280f0e0ad..40d68e0655 100644
--- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h
+++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h
@@ -42,6 +42,7 @@
#include "../PPCAnalyst.h"
#include "../JitCommon/JitCache.h"
+#include "../JitCommon/Jit_Util.h"
#include "JitRegCache.h"
#include "x64Emitter.h"
#include "x64Analyzer.h"
@@ -93,7 +94,7 @@ public:
};
-class Jit64 : public Gen::XCodeBlock
+class Jit64 : public EmuCodeBlock
{
private:
struct JitState
@@ -182,26 +183,14 @@ public:
void WriteRfiExitDestInEAX();
void WriteCallInterpreter(UGeckoInstruction _inst);
void Cleanup();
-
- void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
- void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0);
- void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false);
- void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset);
- void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address);
- void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
void GenerateCarry(Gen::X64Reg temp_reg);
- void ForceSinglePrecisionS(Gen::X64Reg xmm);
- void ForceSinglePrecisionP(Gen::X64Reg xmm);
- void JitClearCA();
- void JitSetCA();
void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
typedef u32 (*Operation)(u32 a, u32 b);
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
void fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
-
// OPCODES
void unknown_instruction(UGeckoInstruction _inst);
void Default(UGeckoInstruction _inst);
diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp
index 67567e9931..60c98721a7 100644
--- a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp
@@ -216,61 +216,6 @@ void AsmRoutineManager::Generate()
GenerateCommon();
}
-
-void AsmRoutineManager::GenFifoWrite(int size)
-{
- // Assume value in ABI_PARAM1
- PUSH(ESI);
- if (size != 32)
- PUSH(EDX);
- BSWAP(size, ABI_PARAM1);
- MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
- MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
- if (size != 32) {
- MOV(32, R(EDX), R(ABI_PARAM1));
- MOV(size, MComplex(RAX, RSI, 1, 0), R(EDX));
- } else {
- MOV(size, MComplex(RAX, RSI, 1, 0), R(ABI_PARAM1));
- }
- ADD(32, R(ESI), Imm8(size >> 3));
- MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
- if (size != 32)
- POP(EDX);
- POP(ESI);
- RET();
-}
-
-void AsmRoutineManager::GenFifoFloatWrite()
-{
- // Assume value in XMM0
- PUSH(ESI);
- PUSH(EDX);
- MOVSS(M(&temp32), XMM0);
- MOV(32, R(EDX), M(&temp32));
- BSWAP(32, EDX);
- MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
- MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
- MOV(32, MComplex(RAX, RSI, 1, 0), R(EDX));
- ADD(32, R(ESI), Imm8(4));
- MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
- POP(EDX);
- POP(ESI);
- RET();
-}
-
-void AsmRoutineManager::GenFifoXmm64Write()
-{
- // Assume value in XMM0. Assume pre-byteswapped (unlike the others here!)
- PUSH(ESI);
- MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
- MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
- MOVQ_xmm(MComplex(RAX, RSI, 1, 0), XMM0);
- ADD(32, R(ESI), Imm8(8));
- MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
- POP(ESI);
- RET();
-}
-
void AsmRoutineManager::GenerateCommon()
{
// USES_CR
@@ -298,7 +243,9 @@ void AsmRoutineManager::GenerateCommon()
fifoDirectWriteXmm64 = AlignCode4();
GenFifoXmm64Write();
- computeRcFp = AlignCode16();
+ GenQuantizedLoads();
+ GenQuantizedStores();
+
//CMPSD(R(XMM0), M(&zero),
// TODO
diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h
index 923bee3469..ebfc871227 100644
--- a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h
+++ b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h
@@ -19,6 +19,7 @@
#define _JITASM_H
#include "x64Emitter.h"
+#include "../JitCommon/JitAsmCommon.h"
// In Dolphin, we don't use inline assembly. Instead, we generate all machine-near
// code at runtime. In the case of fixed code like this, after writing it, we write
@@ -34,14 +35,11 @@
// To add a new asm routine, just add another const here, and add the code to Generate.
// Also, possibly increase the size of the code buffer.
-class AsmRoutineManager : public Gen::XCodeBlock
+class AsmRoutineManager : public CommonAsmRoutines
{
private:
void Generate();
void GenerateCommon();
- void GenFifoWrite(int size);
- void GenFifoFloatWrite();
- void GenFifoXmm64Write();
public:
void Init() {
@@ -65,7 +63,6 @@ public:
const u8 *fpException;
const u8 *computeRc;
- const u8 *computeRcFp;
const u8 *testExceptions;
const u8 *dispatchPcInEAX;
const u8 *doTiming;
diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h
index 6f28b4af72..43566e189e 100644
--- a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h
+++ b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h
@@ -70,7 +70,6 @@ protected:
PPCCachedReg saved_regs[32];
X64CachedReg saved_xregs[NUMXREGS];
- void DiscardRegContentsIfCached(int preg);
virtual const int *GetAllocationOrder(int &count) = 0;
XEmitter *emit;
@@ -79,6 +78,7 @@ public:
virtual ~RegCache() {}
virtual void Start(PPCAnalyst::BlockRegStats &stats) = 0;
+ void DiscardRegContentsIfCached(int preg);
void SetEmitter(XEmitter *emitter) {emit = emitter;}
void FlushR(X64Reg reg);
diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp
index 758e547537..718e0ebe59 100644
--- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp
@@ -39,7 +39,7 @@ const u8 GC_ALIGNED16(pbswapShuffleNoop[16]) = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
static double GC_ALIGNED16(psTemp[2]) = {1.0, 1.0};
static u64 GC_ALIGNED16(temp64);
-
+
// TODO(ector): Improve 64-bit version
static void WriteDual32(u64 value, u32 address)
{
@@ -95,27 +95,23 @@ void Jit64::psq_st(UGeckoInstruction inst)
JITDISABLE(LoadStorePaired)
js.block_flags |= BLOCK_USE_GQR0 << inst.I;
- if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers)
+ if (js.blockSetsQuantizers || !inst.RA)
{
- Default(inst);
- return;
- }
- if (!inst.RA)
- {
- // This really should never happen. Unless we change this to also support stwux
+ // TODO: Support these cases if it becomes necessary.
Default(inst);
return;
}
- const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
- const EQuantizeType stType = static_cast(gqr.ST_TYPE);
- int stScale = gqr.ST_SCALE;
bool update = inst.OPCD == 61;
int offset = inst.SIMM_12;
int a = inst.RA;
int s = inst.RS; // Fp numbers
+ const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
+ const EQuantizeType stType = static_cast(gqr.ST_TYPE);
+ int stScale = gqr.ST_SCALE;
+
if (inst.W) {
// PanicAlert("W=1: stType %i stScale %i update %i", (int)stType, (int)stScale, (int)update);
// It's fairly common that games write stuff to the pipe using this. Then, it's pretty much only
@@ -165,9 +161,11 @@ void Jit64::psq_st(UGeckoInstruction inst)
Default(inst);
return;
}
- return;
}
+ // Is this specialization still worth it? Let's keep it for now. It's probably
+ // not very risky since a game most likely wouldn't use the same code to process
+ // floats as integers (but you never know....).
if (stType == QUANTIZE_FLOAT)
{
if (gpr.R(a).IsImm() && !update && cpu_info.bSSSE3)
@@ -182,115 +180,30 @@ void Jit64::psq_st(UGeckoInstruction inst)
return;
}
}
+ }
- gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
- gpr.Lock(a);
- fpr.Lock(s);
- if (update)
- gpr.LoadToX64(a, true, true);
- MOV(32, R(ABI_PARAM2), gpr.R(a));
- if (offset)
- ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
- TEST(32, R(ABI_PARAM2), Imm32(0x0C000000));
- if (update && offset)
- MOV(32, gpr.R(a), R(ABI_PARAM2));
- CVTPD2PS(XMM0, fpr.R(s));
- SHUFPS(XMM0, R(XMM0), 1);
- MOVQ_xmm(M(&temp64), XMM0);
-#ifdef _M_X64
- MOV(64, R(ABI_PARAM1), M(&temp64));
- FixupBranch argh = J_CC(CC_NZ);
- BSWAP(64, ABI_PARAM1);
- MOV(64, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
- FixupBranch arg2 = J();
- SetJumpTarget(argh);
- CALL(thunks.ProtectFunction((void *)&WriteDual32, 0));
+ gpr.FlushLockX(EAX, EDX);
+ gpr.FlushLockX(ECX);
+ if (update)
+ gpr.LoadToX64(inst.RA, true, true);
+ fpr.LoadToX64(inst.RS, true);
+ MOV(32, R(ECX), gpr.R(inst.RA));
+ if (offset)
+ ADD(32, R(ECX), Imm32((u32)offset));
+ if (update && offset)
+ MOV(32, gpr.R(a), R(ECX));
+ MOVZX(32, 16, EAX, M(&PowerPC::ppcState.spr[SPR_GQR0 + inst.I]));
+ MOVZX(32, 8, EDX, R(AL));
+ // FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]!
+#ifdef _M_IX86
+ SHL(32, R(EDX), Imm8(2));
#else
- FixupBranch argh = J_CC(CC_NZ);
- MOV(32, R(ABI_PARAM1), M(((char*)&temp64) + 4));
- BSWAP(32, ABI_PARAM1);
- AND(32, R(ABI_PARAM2), Imm32(Memory::MEMVIEW32_MASK));
- MOV(32, MDisp(ABI_PARAM2, (u32)Memory::base), R(ABI_PARAM1));
- MOV(32, R(ABI_PARAM1), M(&temp64));
- BSWAP(32, ABI_PARAM1);
- MOV(32, MDisp(ABI_PARAM2, 4+(u32)Memory::base), R(ABI_PARAM1));
- FixupBranch arg2 = J();
- SetJumpTarget(argh);
- MOV(32, R(ABI_PARAM1), M(((char*)&temp64) + 4));
- ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
- MOV(32, R(ABI_PARAM1), M(((char*)&temp64)));
- ADD(32, R(ABI_PARAM2), Imm32(4));
- ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
+ SHL(32, R(EDX), Imm8(3));
#endif
- SetJumpTarget(arg2);
- gpr.UnlockAll();
- gpr.UnlockAllX();
- fpr.UnlockAll();
- }
- else if (stType == QUANTIZE_U8)
- {
- gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
- gpr.Lock(a);
- fpr.Lock(s);
- if (update)
- gpr.LoadToX64(a, true, update);
- MOV(32, R(ABI_PARAM2), gpr.R(a));
- if (offset)
- ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
- if (update && offset)
- MOV(32, gpr.R(a), R(ABI_PARAM2));
- MOVAPD(XMM0, fpr.R(s));
- MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
- MULPD(XMM0, R(XMM1));
- CVTPD2DQ(XMM0, R(XMM0));
- PACKSSDW(XMM0, R(XMM0));
- PACKUSWB(XMM0, R(XMM0));
- MOVD_xmm(M(&temp64), XMM0);
- MOV(16, R(ABI_PARAM1), M(&temp64));
-#ifdef _M_X64
- MOV(16, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
-#else
- MOV(32, R(EAX), R(ABI_PARAM2));
- AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
- MOV(16, MDisp(EAX, (u32)Memory::base), R(ABI_PARAM1));
-#endif
- if (update)
- MOV(32, gpr.R(a), R(ABI_PARAM2));
- gpr.UnlockAll();
- gpr.UnlockAllX();
- fpr.UnlockAll();
- }
- else if (stType == QUANTIZE_S16)
- {
- gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
- gpr.Lock(a);
- fpr.Lock(s);
- if (update)
- gpr.LoadToX64(a, true, update);
- MOV(32, R(ABI_PARAM2), gpr.R(a));
- if (offset)
- ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
- if (update)
- MOV(32, gpr.R(a), R(ABI_PARAM2));
- MOVAPD(XMM0, fpr.R(s));
- MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
- MULPD(XMM0, R(XMM1));
- SHUFPD(XMM0, R(XMM0), 1);
- CVTPD2DQ(XMM0, R(XMM0));
- PACKSSDW(XMM0, R(XMM0));
- MOVD_xmm(M(&temp64), XMM0);
- MOV(32, R(ABI_PARAM1), M(&temp64));
- SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, 0);
- gpr.UnlockAll();
- gpr.UnlockAllX();
- fpr.UnlockAll();
- }
- else {
- // Dodger uses this.
- // mario tennis
- //PanicAlert("st %i:%i", stType, inst.W);
- Default(inst);
- }
+ CVTPD2PS(XMM0, fpr.R(s));
+ CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedStoreQuantized));
+ gpr.UnlockAll();
+ gpr.UnlockAllX();
}
void Jit64::psq_l(UGeckoInstruction inst)
@@ -300,144 +213,35 @@ void Jit64::psq_l(UGeckoInstruction inst)
js.block_flags |= BLOCK_USE_GQR0 << inst.I;
- if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers)
+ if (js.blockSetsQuantizers || !inst.RA || inst.W)
{
Default(inst);
return;
}
- const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
- const EQuantizeType ldType = static_cast(gqr.LD_TYPE);
- int ldScale = gqr.LD_SCALE;
bool update = inst.OPCD == 57;
- if (!inst.RA || inst.W)
- {
- // 0 1 during load
- //PanicAlert("ld:%i %i", ldType, (int)inst.W);
- Default(inst);
- return;
- }
int offset = inst.SIMM_12;
- switch (ldType) {
- case QUANTIZE_FLOAT: // We know this is from RAM, so we don't need to check the address.
- {
-#ifdef _M_X64
- gpr.LoadToX64(inst.RA, true, update);
- fpr.LoadToX64(inst.RS, false);
- if (cpu_info.bSSSE3) {
- X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
- MOVQ_xmm(xd, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
- PSHUFB(xd, M((void *)pbswapShuffle2x4));
- CVTPS2PD(xd, R(xd));
- } else {
- MOV(64, R(RAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
- BSWAP(64, RAX);
- MOV(64, M(&psTemp[0]), R(RAX));
- X64Reg r = fpr.R(inst.RS).GetSimpleReg();
- CVTPS2PD(r, M(&psTemp[0]));
- SHUFPD(r, R(r), 1);
- }
- if (update && offset != 0)
- ADD(32, gpr.R(inst.RA), Imm32(offset));
- break;
-#else
- if (cpu_info.bSSSE3) {
- gpr.LoadToX64(inst.RA, true, update);
- fpr.LoadToX64(inst.RS, false);
- X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
- MOV(32, R(EAX), gpr.R(inst.RA));
- AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
- MOVQ_xmm(xd, MDisp(EAX, (u32)Memory::base + offset));
- PSHUFB(xd, M((void *)pbswapShuffle2x4));
- CVTPS2PD(xd, R(xd));
- } else {
- gpr.FlushLockX(ECX);
- gpr.LoadToX64(inst.RA, true, update);
- // This can probably be optimized somewhat.
- LEA(32, ECX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));
- AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
- MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base));
- BSWAP(32, RAX);
- MOV(32, M(&psTemp[0]), R(RAX));
- MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4));
- BSWAP(32, RAX);
- MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX));
- fpr.LoadToX64(inst.RS, false, true);
- X64Reg r = fpr.R(inst.RS).GetSimpleReg();
- CVTPS2PD(r, M(&psTemp[0]));
- gpr.UnlockAllX();
- }
- if (update && offset != 0)
- ADD(32, gpr.R(inst.RA), Imm32(offset));
- break;
-#endif
- }
- case QUANTIZE_U8:
- {
- gpr.LoadToX64(inst.RA, true, update);
-#ifdef _M_X64
- MOVZX(32, 16, EAX, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
-#else
- LEA(32, EAX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));
- AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
- MOVZX(32, 16, EAX, MDisp(EAX, (u32)Memory::base));
-#endif
- MOV(32, M(&temp64), R(EAX));
- MOVD_xmm(XMM0, M(&temp64));
- // SSE4 optimization opportunity here.
- PXOR(XMM1, R(XMM1));
- PUNPCKLBW(XMM0, R(XMM1));
- PUNPCKLWD(XMM0, R(XMM1));
- CVTDQ2PD(XMM0, R(XMM0));
- fpr.LoadToX64(inst.RS, false, true);
- X64Reg r = fpr.R(inst.RS).GetSimpleReg();
- MOVDDUP(r, M((void *)&m_dequantizeTableD[ldScale]));
- MULPD(r, R(XMM0));
- if (update && offset != 0)
- ADD(32, gpr.R(inst.RA), Imm32(offset));
- }
- break;
- case QUANTIZE_S16:
- {
- gpr.LoadToX64(inst.RA, true, update);
-#ifdef _M_X64
- MOV(32, R(EAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
-#else
- LEA(32, EAX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));
- AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
- MOV(32, R(EAX), MDisp(EAX, (u32)Memory::base));
-#endif
- BSWAP(32, EAX);
- MOV(32, M(&temp64), R(EAX));
- fpr.LoadToX64(inst.RS, false, true);
- X64Reg r = fpr.R(inst.RS).GetSimpleReg();
- MOVD_xmm(XMM0, M(&temp64));
- PUNPCKLWD(XMM0, R(XMM0)); // unpack to higher word in each dword..
- PSRAD(XMM0, 16); // then use this signed shift to sign extend. clever eh? :P
- CVTDQ2PD(XMM0, R(XMM0));
- MOVDDUP(r, M((void*)&m_dequantizeTableD[ldScale]));
- MULPD(r, R(XMM0));
- SHUFPD(r, R(r), 1);
- if (update && offset != 0)
- ADD(32, gpr.R(inst.RA), Imm32(offset));
- }
- break;
- /*
- Dynamic quantizer. Todo when we have a test set.
- MOVZX(32, 8, EAX, M(((char *)&PowerPC::ppcState.spr[SPR_GQR0 + inst.I]) + 3)); // it's in the high byte.
- AND(32, R(EAX), Imm8(0x3F));
- MOV(32, R(ECX), Imm32((u32)&m_dequantizeTableD));
- MOVDDUP(r, MComplex(RCX, EAX, 8, 0));
- */
- default:
- // 4 0
- // 6 0 //power tennis
- // 5 0
- // PanicAlert("ld:%i %i", ldType, (int)inst.W);
- Default(inst);
- return;
- }
-
- //u32 EA = (m_GPR[_inst.RA] + _inst.SIMM_12) : _inst.SIMM_12;
+ gpr.FlushLockX(EAX, EDX);
+ gpr.FlushLockX(ECX);
+ gpr.LoadToX64(inst.RA, true, true);
+ fpr.LoadToX64(inst.RS, false, true);
+ if (offset)
+ LEA(32, ECX, MDisp(gpr.RX(inst.RA), offset));
+ else
+ MOV(32, R(ECX), gpr.R(inst.RA));
+ if (update && offset)
+ MOV(32, gpr.R(inst.RA), R(ECX));
+ MOVZX(32, 16, EAX, M(((char *)&GQR(inst.I)) + 2));
+ MOVZX(32, 8, EDX, R(AL));
+ // FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]! (MComplex can do this, no?)
+#ifdef _M_IX86
+ SHL(32, R(EDX), Imm8(2));
+#else
+ SHL(32, R(EDX), Imm8(3));
+#endif
+ CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedLoadQuantized));
+ CVTPS2PD(fpr.RX(inst.RS), R(XMM0));
+ gpr.UnlockAll();
+ gpr.UnlockAllX();
}
diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h
index e0838321a0..b6b163a113 100644
--- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h
+++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h
@@ -32,6 +32,7 @@
#include "../PPCAnalyst.h"
#include "../JitCommon/JitCache.h"
+#include "../JitCommon/Jit_Util.h"
#include "x64Emitter.h"
#include "x64Analyzer.h"
#include "IR.h"
@@ -85,7 +86,7 @@ public:
};
-class Jit64 : public Gen::XCodeBlock
+class Jit64 : public EmuCodeBlock
{
private:
struct JitState
@@ -175,19 +176,10 @@ public:
void WriteCallInterpreter(UGeckoInstruction _inst);
void Cleanup();
- void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
- void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0);
- void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false);
- void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset);
-
void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address);
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
void GenerateCarry(Gen::X64Reg temp_reg);
- void ForceSinglePrecisionS(Gen::X64Reg xmm);
- void ForceSinglePrecisionP(Gen::X64Reg xmm);
- void JitClearCA();
- void JitSetCA();
void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
typedef u32 (*Operation)(u32 a, u32 b);
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp
index 329e103524..0723b97a01 100644
--- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp
@@ -215,403 +215,6 @@ void AsmRoutineManager::Generate()
GenerateCommon();
}
-const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
-
-const float m_quantizeTableS[] =
-{
- (1 << 0), (1 << 1), (1 << 2), (1 << 3),
- (1 << 4), (1 << 5), (1 << 6), (1 << 7),
- (1 << 8), (1 << 9), (1 << 10), (1 << 11),
- (1 << 12), (1 << 13), (1 << 14), (1 << 15),
- (1 << 16), (1 << 17), (1 << 18), (1 << 19),
- (1 << 20), (1 << 21), (1 << 22), (1 << 23),
- (1 << 24), (1 << 25), (1 << 26), (1 << 27),
- (1 << 28), (1 << 29), (1 << 30), (1 << 31),
- 1.0 / (1ULL << 32), 1.0 / (1 << 31), 1.0 / (1 << 30), 1.0 / (1 << 29),
- 1.0 / (1 << 28), 1.0 / (1 << 27), 1.0 / (1 << 26), 1.0 / (1 << 25),
- 1.0 / (1 << 24), 1.0 / (1 << 23), 1.0 / (1 << 22), 1.0 / (1 << 21),
- 1.0 / (1 << 20), 1.0 / (1 << 19), 1.0 / (1 << 18), 1.0 / (1 << 17),
- 1.0 / (1 << 16), 1.0 / (1 << 15), 1.0 / (1 << 14), 1.0 / (1 << 13),
- 1.0 / (1 << 12), 1.0 / (1 << 11), 1.0 / (1 << 10), 1.0 / (1 << 9),
- 1.0 / (1 << 8), 1.0 / (1 << 7), 1.0 / (1 << 6), 1.0 / (1 << 5),
- 1.0 / (1 << 4), 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1),
-};
-
-const float m_dequantizeTableS[] =
-{
- 1.0 / (1 << 0), 1.0 / (1 << 1), 1.0 / (1 << 2), 1.0 / (1 << 3),
- 1.0 / (1 << 4), 1.0 / (1 << 5), 1.0 / (1 << 6), 1.0 / (1 << 7),
- 1.0 / (1 << 8), 1.0 / (1 << 9), 1.0 / (1 << 10), 1.0 / (1 << 11),
- 1.0 / (1 << 12), 1.0 / (1 << 13), 1.0 / (1 << 14), 1.0 / (1 << 15),
- 1.0 / (1 << 16), 1.0 / (1 << 17), 1.0 / (1 << 18), 1.0 / (1 << 19),
- 1.0 / (1 << 20), 1.0 / (1 << 21), 1.0 / (1 << 22), 1.0 / (1 << 23),
- 1.0 / (1 << 24), 1.0 / (1 << 25), 1.0 / (1 << 26), 1.0 / (1 << 27),
- 1.0 / (1 << 28), 1.0 / (1 << 29), 1.0 / (1 << 30), 1.0 / (1 << 31),
- (1ULL << 32), (1 << 31), (1 << 30), (1 << 29),
- (1 << 28), (1 << 27), (1 << 26), (1 << 25),
- (1 << 24), (1 << 23), (1 << 22), (1 << 21),
- (1 << 20), (1 << 19), (1 << 18), (1 << 17),
- (1 << 16), (1 << 15), (1 << 14), (1 << 13),
- (1 << 12), (1 << 11), (1 << 10), (1 << 9),
- (1 << 8), (1 << 7), (1 << 6), (1 << 5),
- (1 << 4), (1 << 3), (1 << 2), (1 << 1),
-};
-
-float psTemp[2];
-
-const float m_65535 = 65535.0f;
-
-
-#define QUANTIZE_OVERFLOW_SAFE
-
-// according to Intel Docs CVTPS2DQ writes 0x80000000 if the source floating point value is out of int32 range
-// while it's OK for large negatives, it isn't for positives
-// I don't know whether the overflow actually happens in any games
-// but it potentially can cause problems, so we need some clamping
-
-// TODO(ector): Improve 64-bit version
-static void WriteDual32(u64 value, u32 address)
-{
- Memory::Write_U32((u32)(value >> 32), address);
- Memory::Write_U32((u32)value, address + 4);
-}
-
-void AsmRoutineManager::GenQuantizedStores() {
- const u8* storePairedIllegal = AlignCode4();
- UD2();
- const u8* storePairedFloat = AlignCode4();
- // IN: value = XMM0, two singles in bottom. PPC address = ECX.
-#ifdef _M_X64
- // INT3();
- MOVQ_xmm(M(&psTemp[0]), XMM0);
- MOV(64, R(RAX), M(&psTemp[0]));
- //INT3();
- //MOVQ_xmm(R(RAX), XMM0);
- //INT3();
- ROL(64, R(RAX), Imm8(32)); // Swap the two - the big BSWAP will unswap.
- TEST(32, R(ECX), Imm32(0x0C000000));
- FixupBranch argh = J_CC(CC_NZ);
- BSWAP(64, RAX);
- MOV(64, MComplex(RBX, RCX, SCALE_1, 0), R(RAX));
- FixupBranch arg2 = J();
- SetJumpTarget(argh);
- ABI_CallFunctionRR(thunks.ProtectFunction((void *)&WriteDual32, 2), RAX, RCX);
- SetJumpTarget(arg2);
-#else
- MOVQ_xmm(M(&psTemp[0]), XMM0);
- TEST(32, R(ECX), Imm32(0x0C000000));
- FixupBranch argh = J_CC(CC_NZ);
- MOV(32, R(EAX), M(&psTemp));
- BSWAP(32, EAX);
- AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
- MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
- MOV(32, R(EAX), M(((char*)&psTemp) + 4));
- BSWAP(32, EAX);
- MOV(32, MDisp(ECX, 4+(u32)Memory::base), R(EAX));
- FixupBranch arg2 = J();
- SetJumpTarget(argh);
- MOV(32, R(EAX), M(((char*)&psTemp)));
- ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
- MOV(32, R(EAX), M(((char*)&psTemp)+4));
- ADD(32, R(ECX), Imm32(4));
- ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
- SetJumpTarget(arg2);
-#endif
- RET();
-
- const u8* storePairedU8 = AlignCode4();
- //INT3();
- SHR(32, R(EAX), Imm8(6));
- MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
- PUNPCKLDQ(XMM1, R(XMM1));
- MULPS(XMM0, R(XMM1));
-#ifdef QUANTIZE_OVERFLOW_SAFE
- MOVSS(XMM1, M((void *)&m_65535));
- PUNPCKLDQ(XMM1, R(XMM1));
- MINPS(XMM0, R(XMM1));
-#endif
- CVTPS2DQ(XMM0, R(XMM0));
- PACKSSDW(XMM0, R(XMM0));
- PACKUSWB(XMM0, R(XMM0));
- MOVD_xmm(R(EAX), XMM0);
-#ifdef _M_X64
- MOV(16, MComplex(RBX, RCX, 1, 0), R(AX));
-#else
- AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
- MOV(16, MDisp(ECX, (u32)Memory::base), R(AX));
-#endif
- RET();
-
- const u8* storePairedS8 = AlignCode4();
- //INT3();
- SHR(32, R(EAX), Imm8(6));
- MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
- PUNPCKLDQ(XMM1, R(XMM1));
- MULPS(XMM0, R(XMM1));
-#ifdef QUANTIZE_OVERFLOW_SAFE
- MOVSS(XMM1, M((void *)&m_65535));
- PUNPCKLDQ(XMM1, R(XMM1));
- MINPS(XMM0, R(XMM1));
-#endif
- CVTPS2DQ(XMM0, R(XMM0));
- PACKSSDW(XMM0, R(XMM0));
- PACKSSWB(XMM0, R(XMM0));
- MOVD_xmm(R(EAX), XMM0);
-#ifdef _M_X64
- MOV(16, MComplex(RBX, RCX, 1, 0), R(AX));
-#else
- AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
- MOV(16, MDisp(ECX, (u32)Memory::base), R(AX));
-#endif
- RET();
-
- const u8* storePairedU16 = AlignCode4();
- //INT3();
- SHR(32, R(EAX), Imm8(6));
- MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
- PUNPCKLDQ(XMM1, R(XMM1));
- MULPS(XMM0, R(XMM1));
-
- // PACKUSDW is available only in SSE4
- PXOR(XMM1, R(XMM1));
- MAXPS(XMM0, R(XMM1));
- MOVSS(XMM1, M((void *)&m_65535));
- PUNPCKLDQ(XMM1, R(XMM1));
- MINPS(XMM0, R(XMM1));
-
- CVTPS2DQ(XMM0, R(XMM0));
- MOVQ_xmm(M(psTemp), XMM0);
- // place ps[0] into the higher word, ps[1] into the lower
- // so no need in ROL after BSWAP
- MOVZX(32, 16, EAX, M((char*)psTemp + 0));
- SHL(32, R(EAX), Imm8(16));
- MOV(16, R(AX), M((char*)psTemp + 4));
-
- BSWAP(32, EAX);
- //ROL(32, R(EAX), Imm8(16));
-#ifdef _M_X64
- MOV(32, MComplex(RBX, RCX, 1, 0), R(EAX));
-#else
- AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
- MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
-#endif
- RET();
-
- const u8* storePairedS16 = AlignCode4();
- //INT3();
- SHR(32, R(EAX), Imm8(6));
- MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
- PUNPCKLDQ(XMM1, R(XMM1));
- MULPS(XMM0, R(XMM1));
-#ifdef QUANTIZE_OVERFLOW_SAFE
- MOVSS(XMM1, M((void *)&m_65535));
- PUNPCKLDQ(XMM1, R(XMM1));
- MINPS(XMM0, R(XMM1));
-#endif
- CVTPS2DQ(XMM0, R(XMM0));
- PACKSSDW(XMM0, R(XMM0));
- MOVD_xmm(R(EAX), XMM0);
- BSWAP(32, EAX);
- ROL(32, R(EAX), Imm8(16));
-#ifdef _M_X64
- MOV(32, MComplex(RBX, RCX, 1, 0), R(EAX));
-#else
- AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
- MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
-#endif
- RET();
-
- pairedStoreQuantized[0] = storePairedFloat;
- pairedStoreQuantized[1] = storePairedIllegal;
- pairedStoreQuantized[2] = storePairedIllegal;
- pairedStoreQuantized[3] = storePairedIllegal;
- pairedStoreQuantized[4] = storePairedU8;
- pairedStoreQuantized[5] = storePairedU16;
- pairedStoreQuantized[6] = storePairedS8;
- pairedStoreQuantized[7] = storePairedS16;
-}
-
-void AsmRoutineManager::GenQuantizedLoads() {
- const u8* loadPairedIllegal = AlignCode4();
- UD2();
- const u8* loadPairedFloat = AlignCode4();
- if (cpu_info.bSSSE3) {
-#ifdef _M_X64
- MOVQ_xmm(XMM0, MComplex(RBX, RCX, 1, 0));
-#else
- AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
- MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base));
-#endif
- PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
- } else {
-#ifdef _M_X64
- MOV(64, R(RCX), MComplex(RBX, RCX, 1, 0));
- BSWAP(64, RCX);
- ROL(64, R(RCX), Imm8(32));
- MOVQ_xmm(XMM0, R(RCX));
-#else
-#if 0
- AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
- MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base));
- PXOR(XMM1, R(XMM1));
- PSHUFLW(XMM0, R(XMM0), 0xB1);
- MOVAPD(XMM1, R(XMM0));
- PSRLW(XMM0, 8);
- PSLLW(XMM1, 8);
- POR(XMM0, R(XMM1));
-#else
- AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
- MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base));
- BSWAP(32, EAX);
- MOV(32, M(&psTemp[0]), R(RAX));
- MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4));
- BSWAP(32, EAX);
- MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX));
- MOVQ_xmm(XMM0, M(&psTemp[0]));
-#endif
-#endif
- }
- RET();
-
- const u8* loadPairedU8 = AlignCode4();
-#ifdef _M_X64
- MOVZX(32, 16, ECX, MComplex(RBX, RCX, 1, 0));
-#else
- AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
- MOVZX(32, 16, ECX, MDisp(ECX, (u32)Memory::base));
-#endif
- MOVD_xmm(XMM0, R(ECX));
- PXOR(XMM1, R(XMM1));
- PUNPCKLBW(XMM0, R(XMM1));
- PUNPCKLWD(XMM0, R(XMM1));
- CVTDQ2PS(XMM0, R(XMM0));
- SHR(32, R(EAX), Imm8(6));
- MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
- PUNPCKLDQ(XMM1, R(XMM1));
- MULPS(XMM0, R(XMM1));
- RET();
-
- const u8* loadPairedS8 = AlignCode4();
-#ifdef _M_X64
- MOVZX(32, 16, ECX, MComplex(RBX, RCX, 1, 0));
-#else
- AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
- MOVZX(32, 16, ECX, MDisp(ECX, (u32)Memory::base));
-#endif
- MOVD_xmm(XMM0, R(ECX));
- PUNPCKLBW(XMM0, R(XMM0));
- PUNPCKLWD(XMM0, R(XMM0));
- PSRAD(XMM0, 24);
- CVTDQ2PS(XMM0, R(XMM0));
- SHR(32, R(EAX), Imm8(6));
- MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
- PUNPCKLDQ(XMM1, R(XMM1));
- MULPS(XMM0, R(XMM1));
- RET();
-
- const u8* loadPairedU16 = AlignCode4();
-#ifdef _M_X64
- MOV(32, R(ECX), MComplex(RBX, RCX, 1, 0));
-#else
- AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
- MOV(32, R(ECX), MDisp(ECX, (u32)Memory::base));
-#endif
- BSWAP(32, ECX);
- ROL(32, R(ECX), Imm8(16));
- MOVD_xmm(XMM0, R(ECX));
- PXOR(XMM1, R(XMM1));
- PUNPCKLWD(XMM0, R(XMM1));
- CVTDQ2PS(XMM0, R(XMM0));
- SHR(32, R(EAX), Imm8(6));
- MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
- PUNPCKLDQ(XMM1, R(XMM1));
- MULPS(XMM0, R(XMM1));
- RET();
-
- const u8* loadPairedS16 = AlignCode4();
-#ifdef _M_X64
- MOV(32, R(ECX), MComplex(RBX, RCX, 1, 0));
-#else
- AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
- MOV(32, R(ECX), MDisp(ECX, (u32)Memory::base));
-#endif
- BSWAP(32, ECX);
- ROL(32, R(ECX), Imm8(16));
- MOVD_xmm(XMM0, R(ECX));
- PUNPCKLWD(XMM0, R(XMM0));
- PSRAD(XMM0, 16);
- CVTDQ2PS(XMM0, R(XMM0));
- SHR(32, R(EAX), Imm8(6));
- AND(32, R(EAX), Imm32(0xFC));
- MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
- PUNPCKLDQ(XMM1, R(XMM1));
- MULPS(XMM0, R(XMM1));
- RET();
-
- pairedLoadQuantized[0] = loadPairedFloat;
- pairedLoadQuantized[1] = loadPairedIllegal;
- pairedLoadQuantized[2] = loadPairedIllegal;
- pairedLoadQuantized[3] = loadPairedIllegal;
- pairedLoadQuantized[4] = loadPairedU8;
- pairedLoadQuantized[5] = loadPairedU16;
- pairedLoadQuantized[6] = loadPairedS8;
- pairedLoadQuantized[7] = loadPairedS16;
-}
-
-void AsmRoutineManager::GenFifoWrite(int size)
-{
- // Assume value in ABI_PARAM1
- PUSH(ESI);
- if (size != 32)
- PUSH(EDX);
- BSWAP(size, ABI_PARAM1);
- MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
- MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
- if (size != 32) {
- MOV(32, R(EDX), R(ABI_PARAM1));
- MOV(size, MComplex(RAX, RSI, 1, 0), R(EDX));
- } else {
- MOV(size, MComplex(RAX, RSI, 1, 0), R(ABI_PARAM1));
- }
- ADD(32, R(ESI), Imm8(size >> 3));
- MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
- if (size != 32)
- POP(EDX);
- POP(ESI);
- RET();
-}
-
-void AsmRoutineManager::GenFifoFloatWrite()
-{
- // Assume value in XMM0
- PUSH(ESI);
- PUSH(EDX);
- MOVSS(M(&temp32), XMM0);
- MOV(32, R(EDX), M(&temp32));
- BSWAP(32, EDX);
- MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
- MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
- MOV(32, MComplex(RAX, RSI, 1, 0), R(EDX));
- ADD(32, R(ESI), Imm8(4));
- MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
- POP(EDX);
- POP(ESI);
- RET();
-}
-
-void AsmRoutineManager::GenFifoXmm64Write()
-{
- // Assume value in XMM0. Assume pre-byteswapped (unlike the others here!)
- PUSH(ESI);
- MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
- MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
- MOVQ_xmm(MComplex(RAX, RSI, 1, 0), XMM0);
- ADD(32, R(ESI), Imm8(8));
- MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
- POP(ESI);
- RET();
-}
-
void AsmRoutineManager::GenerateCommon()
{
// USES_CR
@@ -649,7 +252,6 @@ void AsmRoutineManager::GenerateCommon()
GenQuantizedLoads();
GenQuantizedStores();
- computeRcFp = AlignCode16();
//CMPSD(R(XMM0), M(&zero),
// TODO
diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.h b/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.h
index fb296b2cf9..051c486290 100644
--- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.h
+++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.h
@@ -19,6 +19,7 @@
#define _JITASM_H
#include "x64Emitter.h"
+#include "../JitCommon/JitAsmCommon.h"
// In Dolphin, we don't use inline assembly. Instead, we generate all machine-near
// code at runtime. In the case of fixed code like this, after writing it, we write
@@ -34,16 +35,11 @@
// To add a new asm routine, just add another const here, and add the code to Generate.
// Also, possibly increase the size of the code buffer.
-class AsmRoutineManager : public Gen::XCodeBlock
+class AsmRoutineManager : public CommonAsmRoutines
{
private:
void Generate();
void GenerateCommon();
- void GenFifoWrite(int size);
- void GenFifoFloatWrite();
- void GenFifoXmm64Write(); // yes, 32 & 64-bit compatible
- void GenQuantizedLoads();
- void GenQuantizedStores();
public:
void Init() {
@@ -67,7 +63,6 @@ public:
const u8 *fpException;
const u8 *computeRc;
- const u8 *computeRcFp;
const u8 *testExceptions;
const u8 *dispatchPcInEAX;
const u8 *doTiming;
@@ -82,8 +77,6 @@ public:
const u8 *doReJit;
- const u8 *pairedLoadQuantized[8];
- const u8 *pairedStoreQuantized[8];
bool compareEnabled;
};
diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStorePaired.cpp
index b39a3e1193..9c486efb54 100644
--- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStorePaired.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStorePaired.cpp
@@ -15,9 +15,6 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
-// TODO(ector): Tons of pshufb optimization of the loads/stores, for SSSE3+, possibly SSE4, only.
-// Should give a very noticable speed boost to paired single heavy code.
-
#include "Common.h"
#include "Thunk.h"
@@ -39,9 +36,8 @@
void Jit64::psq_st(UGeckoInstruction inst)
{
INSTRUCTION_START
- DISABLE64
JITDISABLE(LoadStorePaired)
- if (inst.W || !Core::GetStartupParameter().bOptimizeQuantizers) {Default(inst); return;}
+ if (inst.W) {Default(inst); return;}
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12), val;
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
@@ -55,9 +51,8 @@ void Jit64::psq_st(UGeckoInstruction inst)
void Jit64::psq_l(UGeckoInstruction inst)
{
INSTRUCTION_START
- DISABLE64
JITDISABLE(LoadStorePaired)
- if (inst.W || !Core::GetStartupParameter().bOptimizeQuantizers) {Default(inst); return;}
+ if (inst.W) {Default(inst); return;}
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12), val;
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp
new file mode 100644
index 0000000000..ad8e5c31d5
--- /dev/null
+++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp
@@ -0,0 +1,394 @@
+// Copyright (C) 2003 Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official SVN repository and contact information can be found at
+// http://code.google.com/p/dolphin-emu/
+
+#include "ABI.h"
+#include "Thunk.h"
+#include "CPUDetect.h"
+#include "x64Emitter.h"
+
+#include "../../HW/Memmap.h"
+
+#include "../PowerPC.h"
+#include "../../CoreTiming.h"
+#include "MemoryUtil.h"
+
+#include "ABI.h"
+#include "../JitCommon/JitCache.h"
+
+#include "../../HW/GPFifo.h"
+#include "../../Core.h"
+#include "JitAsmCommon.h"
+
+using namespace Gen;
+
+static int temp32;
+
+void CommonAsmRoutines::GenFifoWrite(int size)
+{
+ // Assume value in ABI_PARAM1
+ PUSH(ESI);
+ if (size != 32)
+ PUSH(EDX);
+ BSWAP(size, ABI_PARAM1);
+ MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
+ MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
+ if (size != 32) {
+ MOV(32, R(EDX), R(ABI_PARAM1));
+ MOV(size, MComplex(RAX, RSI, 1, 0), R(EDX));
+ } else {
+ MOV(size, MComplex(RAX, RSI, 1, 0), R(ABI_PARAM1));
+ }
+ ADD(32, R(ESI), Imm8(size >> 3));
+ MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
+ if (size != 32)
+ POP(EDX);
+ POP(ESI);
+ RET();
+}
+
+void CommonAsmRoutines::GenFifoFloatWrite()
+{
+ // Assume value in XMM0
+ PUSH(ESI);
+ PUSH(EDX);
+ MOVSS(M(&temp32), XMM0);
+ MOV(32, R(EDX), M(&temp32));
+ BSWAP(32, EDX);
+ MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
+ MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
+ MOV(32, MComplex(RAX, RSI, 1, 0), R(EDX));
+ ADD(32, R(ESI), Imm8(4));
+ MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
+ POP(EDX);
+ POP(ESI);
+ RET();
+}
+
+void CommonAsmRoutines::GenFifoXmm64Write()
+{
+ // Assume value in XMM0. Assume pre-byteswapped (unlike the others here!)
+ PUSH(ESI);
+ MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
+ MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
+ MOVQ_xmm(MComplex(RAX, RSI, 1, 0), XMM0);
+ ADD(32, R(ESI), Imm8(8));
+ MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
+ POP(ESI);
+ RET();
+}
+
+// Safe + Fast Quantizers, originally from JITIL by magumagu
+
+static const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
+
+static const float GC_ALIGNED16(m_quantizeTableS[]) =
+{
+ (1 << 0), (1 << 1), (1 << 2), (1 << 3),
+ (1 << 4), (1 << 5), (1 << 6), (1 << 7),
+ (1 << 8), (1 << 9), (1 << 10), (1 << 11),
+ (1 << 12), (1 << 13), (1 << 14), (1 << 15),
+ (1 << 16), (1 << 17), (1 << 18), (1 << 19),
+ (1 << 20), (1 << 21), (1 << 22), (1 << 23),
+ (1 << 24), (1 << 25), (1 << 26), (1 << 27),
+ (1 << 28), (1 << 29), (1 << 30), (1 << 31),
+ 1.0 / (1ULL << 32), 1.0 / (1 << 31), 1.0 / (1 << 30), 1.0 / (1 << 29),
+ 1.0 / (1 << 28), 1.0 / (1 << 27), 1.0 / (1 << 26), 1.0 / (1 << 25),
+ 1.0 / (1 << 24), 1.0 / (1 << 23), 1.0 / (1 << 22), 1.0 / (1 << 21),
+ 1.0 / (1 << 20), 1.0 / (1 << 19), 1.0 / (1 << 18), 1.0 / (1 << 17),
+ 1.0 / (1 << 16), 1.0 / (1 << 15), 1.0 / (1 << 14), 1.0 / (1 << 13),
+ 1.0 / (1 << 12), 1.0 / (1 << 11), 1.0 / (1 << 10), 1.0 / (1 << 9),
+ 1.0 / (1 << 8), 1.0 / (1 << 7), 1.0 / (1 << 6), 1.0 / (1 << 5),
+ 1.0 / (1 << 4), 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1),
+};
+
+static const float GC_ALIGNED16(m_dequantizeTableS[]) =
+{
+ 1.0 / (1 << 0), 1.0 / (1 << 1), 1.0 / (1 << 2), 1.0 / (1 << 3),
+ 1.0 / (1 << 4), 1.0 / (1 << 5), 1.0 / (1 << 6), 1.0 / (1 << 7),
+ 1.0 / (1 << 8), 1.0 / (1 << 9), 1.0 / (1 << 10), 1.0 / (1 << 11),
+ 1.0 / (1 << 12), 1.0 / (1 << 13), 1.0 / (1 << 14), 1.0 / (1 << 15),
+ 1.0 / (1 << 16), 1.0 / (1 << 17), 1.0 / (1 << 18), 1.0 / (1 << 19),
+ 1.0 / (1 << 20), 1.0 / (1 << 21), 1.0 / (1 << 22), 1.0 / (1 << 23),
+ 1.0 / (1 << 24), 1.0 / (1 << 25), 1.0 / (1 << 26), 1.0 / (1 << 27),
+ 1.0 / (1 << 28), 1.0 / (1 << 29), 1.0 / (1 << 30), 1.0 / (1 << 31),
+ (1ULL << 32), (1 << 31), (1 << 30), (1 << 29),
+ (1 << 28), (1 << 27), (1 << 26), (1 << 25),
+ (1 << 24), (1 << 23), (1 << 22), (1 << 21),
+ (1 << 20), (1 << 19), (1 << 18), (1 << 17),
+ (1 << 16), (1 << 15), (1 << 14), (1 << 13),
+ (1 << 12), (1 << 11), (1 << 10), (1 << 9),
+ (1 << 8), (1 << 7), (1 << 6), (1 << 5),
+ (1 << 4), (1 << 3), (1 << 2), (1 << 1),
+};
+
+static float GC_ALIGNED16(psTemp[4]);
+
+static const float m_65535 = 65535.0f;
+
+
+#define QUANTIZE_OVERFLOW_SAFE
+
+// according to Intel Docs CVTPS2DQ writes 0x80000000 if the source floating point value is out of int32 range
+// while it's OK for large negatives, it isn't for positives
+// I don't know whether the overflow actually happens in any games
+// but it potentially can cause problems, so we need some clamping
+
+// TODO(ector): Improve 64-bit version
+static void WriteDual32(u64 value, u32 address)
+{
+ Memory::Write_U32((u32)(value >> 32), address);
+ Memory::Write_U32((u32)value, address + 4);
+}
+
+// See comment in header for in/outs.
+void CommonAsmRoutines::GenQuantizedStores() {
+ const u8* storePairedIllegal = AlignCode4();
+ UD2();
+ const u8* storePairedFloat = AlignCode4();
+
+#ifdef _M_X64
+ SHUFPS(XMM0, R(XMM0), 1);
+ MOVQ_xmm(M(&psTemp[0]), XMM0);
+ MOV(64, R(RAX), M(&psTemp[0]));
+ TEST(32, R(ECX), Imm32(0x0C000000));
+ FixupBranch too_complex = J_CC(CC_NZ);
+ BSWAP(64, RAX);
+ MOV(64, MComplex(RBX, RCX, SCALE_1, 0), R(RAX));
+ FixupBranch skip_complex = J();
+ SetJumpTarget(too_complex);
+ ABI_CallFunctionRR(thunks.ProtectFunction((void *)&WriteDual32, 2), RAX, RCX);
+ SetJumpTarget(skip_complex);
+ RET();
+#else
+ MOVQ_xmm(M(&psTemp[0]), XMM0);
+ TEST(32, R(ECX), Imm32(0x0C000000));
+ FixupBranch argh = J_CC(CC_NZ);
+ MOV(32, R(EAX), M(&psTemp));
+ BSWAP(32, EAX);
+ AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
+ MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
+ MOV(32, R(EAX), M(((char*)&psTemp) + 4));
+ BSWAP(32, EAX);
+ MOV(32, MDisp(ECX, 4+(u32)Memory::base), R(EAX));
+ FixupBranch arg2 = J();
+ SetJumpTarget(argh);
+ MOV(32, R(EAX), M(((char*)&psTemp)));
+ ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
+ MOV(32, R(EAX), M(((char*)&psTemp)+4));
+ ADD(32, R(ECX), Imm32(4));
+ ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
+ SetJumpTarget(arg2);
+ RET();
+#endif
+
+ const u8* storePairedU8 = AlignCode4();
+ SHR(32, R(EAX), Imm8(6));
+ MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
+ PUNPCKLDQ(XMM1, R(XMM1));
+ MULPS(XMM0, R(XMM1));
+#ifdef QUANTIZE_OVERFLOW_SAFE
+ MOVSS(XMM1, M((void *)&m_65535));
+ PUNPCKLDQ(XMM1, R(XMM1));
+ MINPS(XMM0, R(XMM1));
+#endif
+ CVTPS2DQ(XMM0, R(XMM0));
+ PACKSSDW(XMM0, R(XMM0));
+ PACKUSWB(XMM0, R(XMM0));
+ MOVD_xmm(R(EAX), XMM0);
+ SafeWriteRegToReg(AX, ECX, 16, 0, false);
+
+ RET();
+
+ const u8* storePairedS8 = AlignCode4();
+ SHR(32, R(EAX), Imm8(6));
+ MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
+ PUNPCKLDQ(XMM1, R(XMM1));
+ MULPS(XMM0, R(XMM1));
+#ifdef QUANTIZE_OVERFLOW_SAFE
+ MOVSS(XMM1, M((void *)&m_65535));
+ PUNPCKLDQ(XMM1, R(XMM1));
+ MINPS(XMM0, R(XMM1));
+#endif
+ CVTPS2DQ(XMM0, R(XMM0));
+ PACKSSDW(XMM0, R(XMM0));
+ PACKSSWB(XMM0, R(XMM0));
+ MOVD_xmm(R(EAX), XMM0);
+
+ SafeWriteRegToReg(AX, ECX, 16, 0, false);
+
+ RET();
+
+ const u8* storePairedU16 = AlignCode4();
+ SHR(32, R(EAX), Imm8(6));
+ MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
+ PUNPCKLDQ(XMM1, R(XMM1));
+ MULPS(XMM0, R(XMM1));
+
+ // PACKUSDW is available only in SSE4
+ PXOR(XMM1, R(XMM1));
+ MAXPS(XMM0, R(XMM1));
+ MOVSS(XMM1, M((void *)&m_65535));
+ PUNPCKLDQ(XMM1, R(XMM1));
+ MINPS(XMM0, R(XMM1));
+
+ CVTPS2DQ(XMM0, R(XMM0));
+ MOVQ_xmm(M(psTemp), XMM0);
+ // place ps[0] into the higher word, ps[1] into the lower
+ // so no need in ROL after BSWAP
+ MOVZX(32, 16, EAX, M((char*)psTemp + 0));
+ SHL(32, R(EAX), Imm8(16));
+ MOV(16, R(AX), M((char*)psTemp + 4));
+
+ BSWAP(32, EAX);
+ SafeWriteRegToReg(EAX, ECX, 32, 0, false);
+
+ RET();
+
+ const u8* storePairedS16 = AlignCode4();
+ SHR(32, R(EAX), Imm8(6));
+ MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
+ // SHUFPS or UNPCKLPS might be a better choice here. The last one might just be an alias though.
+ PUNPCKLDQ(XMM1, R(XMM1));
+ MULPS(XMM0, R(XMM1));
+#ifdef QUANTIZE_OVERFLOW_SAFE
+ MOVSS(XMM1, M((void *)&m_65535));
+ PUNPCKLDQ(XMM1, R(XMM1));
+ MINPS(XMM0, R(XMM1));
+#endif
+ CVTPS2DQ(XMM0, R(XMM0));
+ PACKSSDW(XMM0, R(XMM0));
+ MOVD_xmm(R(EAX), XMM0);
+ BSWAP(32, EAX);
+ ROL(32, R(EAX), Imm8(16));
+ SafeWriteRegToReg(EAX, ECX, 32, 0, false);
+
+ RET();
+
+ pairedStoreQuantized[0] = storePairedFloat;
+ pairedStoreQuantized[1] = storePairedIllegal;
+ pairedStoreQuantized[2] = storePairedIllegal;
+ pairedStoreQuantized[3] = storePairedIllegal;
+ pairedStoreQuantized[4] = storePairedU8;
+ pairedStoreQuantized[5] = storePairedU16;
+ pairedStoreQuantized[6] = storePairedS8;
+ pairedStoreQuantized[7] = storePairedS16;
+}
+
+void CommonAsmRoutines::GenQuantizedLoads() {
+ const u8* loadPairedIllegal = AlignCode4();
+ UD2();
+ const u8* loadPairedFloat = AlignCode4();
+ if (cpu_info.bSSSE3) {
+#ifdef _M_X64
+ MOVQ_xmm(XMM0, MComplex(RBX, RCX, 1, 0));
+#else
+ AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
+ MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base));
+#endif
+ PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
+ } else {
+#ifdef _M_X64
+ MOV(64, R(RCX), MComplex(RBX, RCX, 1, 0));
+ BSWAP(64, RCX);
+ ROL(64, R(RCX), Imm8(32));
+ MOVQ_xmm(XMM0, R(RCX));
+#else
+#if 0
+ AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
+ MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base));
+ PXOR(XMM1, R(XMM1));
+ PSHUFLW(XMM0, R(XMM0), 0xB1);
+ MOVAPD(XMM1, R(XMM0));
+ PSRLW(XMM0, 8);
+ PSLLW(XMM1, 8);
+ POR(XMM0, R(XMM1));
+#else
+ AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
+ MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base));
+ BSWAP(32, EAX);
+ MOV(32, M(&psTemp[0]), R(RAX));
+ MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4));
+ BSWAP(32, EAX);
+ MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX));
+ MOVQ_xmm(XMM0, M(&psTemp[0]));
+#endif
+#endif
+ }
+ RET();
+
+ const u8* loadPairedU8 = AlignCode4();
+ UnsafeLoadRegToRegNoSwap(ECX, ECX, 16, 0);
+ MOVD_xmm(XMM0, R(ECX));
+ PXOR(XMM1, R(XMM1));
+ PUNPCKLBW(XMM0, R(XMM1));
+ PUNPCKLWD(XMM0, R(XMM1));
+ CVTDQ2PS(XMM0, R(XMM0));
+ SHR(32, R(EAX), Imm8(6));
+ MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
+ PUNPCKLDQ(XMM1, R(XMM1));
+ MULPS(XMM0, R(XMM1));
+ RET();
+
+ const u8* loadPairedS8 = AlignCode4();
+ UnsafeLoadRegToRegNoSwap(ECX, ECX, 16, 0);
+ MOVD_xmm(XMM0, R(ECX));
+ PUNPCKLBW(XMM0, R(XMM0));
+ PUNPCKLWD(XMM0, R(XMM0));
+ PSRAD(XMM0, 24);
+ CVTDQ2PS(XMM0, R(XMM0));
+ SHR(32, R(EAX), Imm8(6));
+ MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
+ PUNPCKLDQ(XMM1, R(XMM1));
+ MULPS(XMM0, R(XMM1));
+ RET();
+
+ const u8* loadPairedU16 = AlignCode4();
+ UnsafeLoadRegToReg(ECX, ECX, 32, 0, false);
+ ROL(32, R(ECX), Imm8(16));
+ MOVD_xmm(XMM0, R(ECX));
+ PXOR(XMM1, R(XMM1));
+ PUNPCKLWD(XMM0, R(XMM1));
+ CVTDQ2PS(XMM0, R(XMM0));
+ SHR(32, R(EAX), Imm8(6));
+ MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
+ PUNPCKLDQ(XMM1, R(XMM1));
+ MULPS(XMM0, R(XMM1));
+ RET();
+
+ const u8* loadPairedS16 = AlignCode4();
+ UnsafeLoadRegToReg(ECX, ECX, 32, 0, false);
+ ROL(32, R(ECX), Imm8(16));
+ MOVD_xmm(XMM0, R(ECX));
+ PUNPCKLWD(XMM0, R(XMM0));
+ PSRAD(XMM0, 16);
+ CVTDQ2PS(XMM0, R(XMM0));
+ SHR(32, R(EAX), Imm8(6));
+ AND(32, R(EAX), Imm32(0xFC));
+ MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
+ PUNPCKLDQ(XMM1, R(XMM1));
+ MULPS(XMM0, R(XMM1));
+ RET();
+
+ pairedLoadQuantized[0] = loadPairedFloat;
+ pairedLoadQuantized[1] = loadPairedIllegal;
+ pairedLoadQuantized[2] = loadPairedIllegal;
+ pairedLoadQuantized[3] = loadPairedIllegal;
+ pairedLoadQuantized[4] = loadPairedU8;
+ pairedLoadQuantized[5] = loadPairedU16;
+ pairedLoadQuantized[6] = loadPairedS8;
+ pairedLoadQuantized[7] = loadPairedS16;
+}
diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h
new file mode 100644
index 0000000000..f84fa76fbf
--- /dev/null
+++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h
@@ -0,0 +1,47 @@
+// Copyright (C) 2003 Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official SVN repository and contact information can be found at
+// http://code.google.com/p/dolphin-emu/
+
+#ifndef _JITASMCOMMON_H
+#define _JITASMCOMMON_H
+
+#include "../JitCommon/Jit_Util.h"
+
+class CommonAsmRoutines : public EmuCodeBlock {
+protected:
+ void GenQuantizedLoads();
+ void GenQuantizedStores();
+public:
+ void GenFifoWrite(int size);
+ void GenFifoXmm64Write();
+ void GenFifoFloatWrite();
+
+ // In: array index: GQR to use.
+ // In: ECX: Address to read from.
+ // Out: XMM0: Bottom two 32-bit slots hold the read value,
+ // converted to a pair of floats.
+ // Trashes: EAX ECX EDX
+ const u8 GC_ALIGNED16(*pairedLoadQuantized[8]);
+
+ // In: array index: GQR to use.
+ // In: ECX: Address to write to.
+ // In: XMM0: Bottom two 32-bit slots hold the pair of floats to be written.
+ // Out: Nothing.
+ // Trashes: EAX ECX EDX
+ const u8 GC_ALIGNED16(*pairedStoreQuantized[8]);
+};
+
+#endif
diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp
index 98f067fbd3..e15ba7d5df 100644
--- a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp
+++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp
@@ -288,7 +288,7 @@ bool JitBlock::ContainsAddress(u32 em_address)
block_numbers->push_back(i);
}
- u32 JitBlockCache::GetOriginalFirstOp(u32 block_num)
+ u32 JitBlockCache::GetOriginalFirstOp(int block_num)
{
if (block_num >= num_blocks)
{
@@ -298,9 +298,9 @@ bool JitBlock::ContainsAddress(u32 em_address)
return blocks[block_num].originalFirstOpcode;
}
- CompiledCode JitBlockCache::GetCompiledCodeFromBlock(int blockNumber)
+ CompiledCode JitBlockCache::GetCompiledCodeFromBlock(int block_num)
{
- return (CompiledCode)blockCodePointers[blockNumber];
+ return (CompiledCode)blockCodePointers[block_num];
}
//Block linker
@@ -351,25 +351,25 @@ bool JitBlock::ContainsAddress(u32 em_address)
}
}
- void JitBlockCache::DestroyBlock(int blocknum, bool invalidate)
+ void JitBlockCache::DestroyBlock(int block_num, bool invalidate)
{
- if (blocknum < 0 || blocknum >= num_blocks)
+ if (block_num < 0 || block_num >= num_blocks)
{
- PanicAlert("DestroyBlock: Invalid block number %d", blocknum);
+ PanicAlert("DestroyBlock: Invalid block number %d", block_num);
return;
}
- JitBlock &b = blocks[blocknum];
+ JitBlock &b = blocks[block_num];
if (b.invalid)
{
if (invalidate)
- PanicAlert("Invalidating invalid block %d", blocknum);
+ PanicAlert("Invalidating invalid block %d", block_num);
return;
}
b.invalid = true;
#ifdef JIT_UNLIMITED_ICACHE
Memory::Write_Opcode_JIT(b.originalAddress, b.originalFirstOpcode);
#else
- if (Memory::ReadFast32(b.originalAddress) == blocknum)
+ if (Memory::ReadFast32(b.originalAddress) == block_num)
Memory::WriteUnchecked_U32(b.originalFirstOpcode, b.originalAddress);
#endif
diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h
index e72bb6b4e9..20f9d759bb 100644
--- a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h
+++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h
@@ -130,12 +130,12 @@ public:
// This one is slow so should only be used for one-shots from the debugger UI, not for anything during runtime.
void GetBlockNumbersFromAddress(u32 em_address, std::vector *block_numbers);
- u32 GetOriginalFirstOp(u32 block_num);
- CompiledCode GetCompiledCodeFromBlock(int blockNumber);
+ u32 GetOriginalFirstOp(int block_num);
+ CompiledCode GetCompiledCodeFromBlock(int block_num);
// DOES NOT WORK CORRECTLY WITH INLINING
void InvalidateICache(u32 em_address);
- void DestroyBlock(int blocknum, bool invalidate);
+ void DestroyBlock(int block_num, bool invalidate);
// Not currently used
//void DestroyBlocksWithFlag(BlockFlag death_flag);
diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp
index 8f3e643468..74784e7d30 100644
--- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp
+++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp
@@ -39,17 +39,17 @@
using namespace Gen;
-void Jit64::JitClearCA()
+void EmuCodeBlock::JitClearCA()
{
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
}
-void Jit64::JitSetCA()
+void EmuCodeBlock::JitSetCA()
{
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1
}
-void Jit64::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
+void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
{
#ifdef _M_IX86
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
@@ -74,7 +74,17 @@ void Jit64::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize
}
}
-void Jit64::SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signExtend)
+void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset)
+{
+#ifdef _M_IX86
+ AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
+ MOVZX(32, accessSize, reg_value, MDisp(reg_addr, (u32)Memory::base + offset));
+#else
+ MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset));
+#endif
+}
+
+void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signExtend)
{
if (offset)
ADD(32, R(reg), Imm32((u32)offset));
@@ -96,12 +106,12 @@ void Jit64::SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signEx
SetJumpTarget(arg2);
}
-void Jit64::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset)
+void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
{
if (accessSize == 8 && reg_value >= 4) {
PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!");
}
- BSWAP(accessSize, reg_value);
+ if (swap) BSWAP(accessSize, reg_value);
#ifdef _M_IX86
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value));
@@ -111,7 +121,7 @@ void Jit64::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSiz
}
// Destroys both arg registers
-void Jit64::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset)
+void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
{
if (offset)
ADD(32, R(reg_addr), Imm32(offset));
@@ -125,11 +135,11 @@ void Jit64::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize,
}
FixupBranch arg2 = J();
SetJumpTarget(argh);
- UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0);
+ UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
SetJumpTarget(arg2);
}
-void Jit64::WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address)
+void EmuCodeBlock::WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address)
{
#ifdef _M_X64
MOV(accessSize, MDisp(RBX, address & 0x3FFFFFFF), arg);
@@ -138,7 +148,7 @@ void Jit64::WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 ad
#endif
}
-void Jit64::WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address)
+void EmuCodeBlock::WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address)
{
#ifdef _M_X64
MOV(32, R(RAX), Imm32(address));
@@ -148,18 +158,18 @@ void Jit64::WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address)
#endif
}
-void Jit64::ForceSinglePrecisionS(X64Reg xmm) {
+void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm) {
// Most games don't need these. Zelda requires it though - some platforms get stuck without them.
- if (jo.accurateSinglePrecision)
+ if (jit.jo.accurateSinglePrecision)
{
CVTSD2SS(xmm, R(xmm));
CVTSS2SD(xmm, R(xmm));
}
}
-void Jit64::ForceSinglePrecisionP(X64Reg xmm) {
+void EmuCodeBlock::ForceSinglePrecisionP(X64Reg xmm) {
// Most games don't need these. Zelda requires it though - some platforms get stuck without them.
- if (jo.accurateSinglePrecision)
+ if (jit.jo.accurateSinglePrecision)
{
CVTPD2PS(xmm, R(xmm));
CVTPS2PD(xmm, R(xmm));
diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h
new file mode 100644
index 0000000000..4fad3db64a
--- /dev/null
+++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h
@@ -0,0 +1,41 @@
+// Copyright (C) 2003 Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official SVN repository and contact information can be found at
+// http://code.google.com/p/dolphin-emu/
+
+#ifndef _JITUTIL_H
+#define _JITUTIL_H
+
+#include "x64Emitter.h"
+
+// Like XCodeBlock but has some utilities for memory access.
+class EmuCodeBlock : public Gen::XCodeBlock {
+public:
+ void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
+ void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset);
+ void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true);
+ void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false);
+ void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, bool swap = true);
+
+ void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address);
+ void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
+ void JitClearCA();
+ void JitSetCA();
+
+ void ForceSinglePrecisionS(Gen::X64Reg xmm);
+ void ForceSinglePrecisionP(Gen::X64Reg xmm);
+};
+
+#endif // _JITUTIL_H
diff --git a/Source/Core/Core/Src/SConscript b/Source/Core/Core/Src/SConscript
index c3b03c6e9f..3ea24ce9b0 100644
--- a/Source/Core/Core/Src/SConscript
+++ b/Source/Core/Core/Src/SConscript
@@ -85,7 +85,8 @@ files = ["ActionReplay.cpp",
"PowerPC/Interpreter/Interpreter_LoadStore.cpp",
"PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp",
"PowerPC/Interpreter/Interpreter_SystemRegisters.cpp",
- "PowerPC/Interpreter/Interpreter_Tables.cpp",
+ "PowerPC/Interpreter/Interpreter_Tables.cpp",
+ "PowerPC/JitCommon/JitAsmCommon.cpp",
"PowerPC/JitCommon/JitCache.cpp",
"PowerPC/JitCommon/JitBackpatch.cpp",
"PowerPC/JitCommon/Jit_Util.cpp",
diff --git a/Source/Core/DolphinWX/Src/BootManager.cpp b/Source/Core/DolphinWX/Src/BootManager.cpp
index cec4521124..10bef0d7bd 100644
--- a/Source/Core/DolphinWX/Src/BootManager.cpp
+++ b/Source/Core/DolphinWX/Src/BootManager.cpp
@@ -122,7 +122,6 @@ bool BootCore(const std::string& _rFilename)
// General settings
game_ini.Get("Core", "CPUOnThread", &StartUp.bCPUThread, StartUp.bCPUThread);
game_ini.Get("Core", "SkipIdle", &StartUp.bSkipIdle, StartUp.bSkipIdle);
- game_ini.Get("Core", "OptimizeQuantizers", &StartUp.bOptimizeQuantizers, StartUp.bOptimizeQuantizers);
game_ini.Get("Core", "EnableFPRF", &StartUp.bEnableFPRF, StartUp.bEnableFPRF);
game_ini.Get("Core", "TLBHack", &StartUp.iTLBHack, StartUp.iTLBHack);
// Wii settings
diff --git a/Source/Core/DolphinWX/Src/ConfigMain.cpp b/Source/Core/DolphinWX/Src/ConfigMain.cpp
index 233a5f6937..2abeab78eb 100644
--- a/Source/Core/DolphinWX/Src/ConfigMain.cpp
+++ b/Source/Core/DolphinWX/Src/ConfigMain.cpp
@@ -60,13 +60,12 @@ EVT_CHECKBOX(ID_INTERFACE_WIIMOTE_LEDS, CConfigMain::CoreSettingsChanged)
EVT_CHECKBOX(ID_INTERFACE_WIIMOTE_SPEAKERS, CConfigMain::CoreSettingsChanged)
EVT_CHOICE(ID_INTERFACE_LANG, CConfigMain::CoreSettingsChanged)
-EVT_CHECKBOX(ID_ALLWAYS_HLE_BS2, CConfigMain::CoreSettingsChanged)
+EVT_CHECKBOX(ID_ALWAYS_HLE_BS2, CConfigMain::CoreSettingsChanged)
EVT_RADIOBUTTON(ID_RADIOJIT, CConfigMain::CoreSettingsChanged)
EVT_RADIOBUTTON(ID_RADIOINT, CConfigMain::CoreSettingsChanged)
EVT_CHECKBOX(ID_CPUTHREAD, CConfigMain::CoreSettingsChanged)
EVT_CHECKBOX(ID_DSPTHREAD, CConfigMain::CoreSettingsChanged)
EVT_CHECKBOX(ID_LOCKTHREADS, CConfigMain::CoreSettingsChanged)
-EVT_CHECKBOX(ID_OPTIMIZEQUANTIZERS, CConfigMain::CoreSettingsChanged)
EVT_CHECKBOX(ID_IDLESKIP, CConfigMain::CoreSettingsChanged)
EVT_CHECKBOX(ID_ENABLECHEATS, CConfigMain::CoreSettingsChanged)
EVT_CHOICE(ID_FRAMELIMIT, CConfigMain::CoreSettingsChanged)
@@ -142,7 +141,6 @@ void CConfigMain::UpdateGUI()
CPUThread->Disable();
DSPThread->Disable();
LockThreads->Disable();
- OptimizeQuantizers->Disable();
SkipIdle->Disable();
EnableCheats->Disable();
@@ -222,15 +220,13 @@ void CConfigMain::CreateGUIControls()
// Core Settings - Advanced
//
- AlwaysHLE_BS2 = new wxCheckBox(GeneralPage, ID_ALLWAYS_HLE_BS2, wxT("HLE the IPL (recommended)"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator);
+ AlwaysHLE_BS2 = new wxCheckBox(GeneralPage, ID_ALWAYS_HLE_BS2, wxT("HLE the IPL (recommended)"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator);
AlwaysHLE_BS2->SetValue(SConfig::GetInstance().m_LocalCoreStartupParameter.bHLE_BS2);
m_RadioJIT = new wxRadioButton(GeneralPage, ID_RADIOJIT, wxT("JIT Recompiler (recommended)"));
m_RadioInt = new wxRadioButton(GeneralPage, ID_RADIOINT, wxT("Interpreter (very slow)"));
SConfig::GetInstance().m_LocalCoreStartupParameter.bUseJIT ? m_RadioJIT->SetValue(true) : m_RadioInt->SetValue(true);
LockThreads = new wxCheckBox(GeneralPage, ID_LOCKTHREADS, wxT("Lock threads to cores"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator);
LockThreads->SetValue(SConfig::GetInstance().m_LocalCoreStartupParameter.bLockThreads);
- OptimizeQuantizers = new wxCheckBox(GeneralPage, ID_OPTIMIZEQUANTIZERS, wxT("Optimize Quantizers (speedup)"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator);
- OptimizeQuantizers->SetValue(SConfig::GetInstance().m_LocalCoreStartupParameter.bOptimizeQuantizers);
DSPThread = new wxCheckBox(GeneralPage, ID_DSPTHREAD, wxT("DSP on thread (recommended)"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator);
DSPThread->SetValue(SConfig::GetInstance().m_LocalCoreStartupParameter.bDSPThread);
@@ -317,7 +313,6 @@ void CConfigMain::CreateGUIControls()
sizerCoreType->Add(m_RadioInt, 0, wxALL | wxEXPAND, 5);
sbAdvanced->Add(sizerCoreType, 0, wxALL, 5);
sbAdvanced->Add(LockThreads, 0, wxALL, 5);
- sbAdvanced->Add(OptimizeQuantizers, 0, wxALL, 5);
sbAdvanced->Add(DSPThread, 0, wxALL, 5);
sCore->Add(sbBasic, 0, wxEXPAND);
sCore->AddStretchSpacer();
@@ -690,7 +685,7 @@ void CConfigMain::CoreSettingsChanged(wxCommandEvent& event)
case ID_FRAMELIMIT:
SConfig::GetInstance().m_Framelimit = (u32)Framelimit->GetSelection();
break;
- case ID_ALLWAYS_HLE_BS2: // Core
+ case ID_ALWAYS_HLE_BS2: // Core
SConfig::GetInstance().m_LocalCoreStartupParameter.bHLE_BS2 = AlwaysHLE_BS2->IsChecked();
break;
case ID_RADIOJIT:
@@ -710,9 +705,6 @@ void CConfigMain::CoreSettingsChanged(wxCommandEvent& event)
case ID_LOCKTHREADS:
SConfig::GetInstance().m_LocalCoreStartupParameter.bLockThreads = LockThreads->IsChecked();
break;
- case ID_OPTIMIZEQUANTIZERS:
- SConfig::GetInstance().m_LocalCoreStartupParameter.bOptimizeQuantizers = OptimizeQuantizers->IsChecked();
- break;
case ID_IDLESKIP:
SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle = SkipIdle->IsChecked();
break;
diff --git a/Source/Core/DolphinWX/Src/ConfigMain.h b/Source/Core/DolphinWX/Src/ConfigMain.h
index 79da4bdb66..584aec8640 100644
--- a/Source/Core/DolphinWX/Src/ConfigMain.h
+++ b/Source/Core/DolphinWX/Src/ConfigMain.h
@@ -68,7 +68,6 @@ private:
wxCheckBox* CPUThread;
wxCheckBox* DSPThread;
wxCheckBox* LockThreads;
- wxCheckBox* OptimizeQuantizers;
wxCheckBox* SkipIdle;
wxCheckBox* EnableCheats;
@@ -159,13 +158,12 @@ private:
ID_PATHSPAGE,
ID_PLUGINPAGE,
- ID_ALLWAYS_HLE_BS2,
+ ID_ALWAYS_HLE_BS2,
ID_RADIOJIT,
ID_RADIOINT,
ID_CPUTHREAD,
ID_DSPTHREAD,
ID_LOCKTHREADS,
- ID_OPTIMIZEQUANTIZERS,
ID_IDLESKIP,
ID_ENABLECHEATS,
diff --git a/Source/Core/DolphinWX/Src/ISOProperties.cpp b/Source/Core/DolphinWX/Src/ISOProperties.cpp
index 3b30c614b1..b6f14eafe6 100644
--- a/Source/Core/DolphinWX/Src/ISOProperties.cpp
+++ b/Source/Core/DolphinWX/Src/ISOProperties.cpp
@@ -290,7 +290,6 @@ void CISOProperties::CreateGUIControls(bool IsWad)
sbCoreOverrides = new wxStaticBoxSizer(wxVERTICAL, m_GameConfig, _("Core"));
CPUThread = new wxCheckBox(m_GameConfig, ID_USEDUALCORE, _("Enable Dual Core"), wxDefaultPosition, wxDefaultSize, wxCHK_3STATE|wxCHK_ALLOW_3RD_STATE_FOR_USER, wxDefaultValidator);
SkipIdle = new wxCheckBox(m_GameConfig, ID_IDLESKIP, _("Enable Idle Skipping"), wxDefaultPosition, wxDefaultSize, wxCHK_3STATE|wxCHK_ALLOW_3RD_STATE_FOR_USER, wxDefaultValidator);
- OptimizeQuantizers = new wxCheckBox(m_GameConfig, ID_OPTIMIZEQUANTIZERS, _("Optimize Quantizers"), wxDefaultPosition, wxDefaultSize, wxCHK_3STATE|wxCHK_ALLOW_3RD_STATE_FOR_USER, wxDefaultValidator);
TLBHack = new wxCheckBox(m_GameConfig, ID_TLBHACK, _("TLB Hack"), wxDefaultPosition, wxDefaultSize, wxCHK_3STATE|wxCHK_ALLOW_3RD_STATE_FOR_USER, wxDefaultValidator);
// Wii Console
sbWiiOverrides = new wxStaticBoxSizer(wxVERTICAL, m_GameConfig, _("Wii Console"));
@@ -347,7 +346,6 @@ void CISOProperties::CreateGUIControls(bool IsWad)
sbCoreOverrides->Add(CPUThread, 0, wxEXPAND|wxLEFT, 5);
sbCoreOverrides->Add(SkipIdle, 0, wxEXPAND|wxLEFT, 5);
sbCoreOverrides->Add(TLBHack, 0, wxEXPAND|wxLEFT, 5);
- sbCoreOverrides->Add(OptimizeQuantizers, 0, wxEXPAND|wxLEFT, 5);
sbWiiOverrides->Add(EnableProgressiveScan, 0, wxEXPAND|wxLEFT, 5);
sbWiiOverrides->Add(EnableWideScreen, 0, wxEXPAND|wxLEFT, 5);
sbVideoOverrides->Add(ForceFiltering, 0, wxEXPAND|wxLEFT, 5);
@@ -806,11 +804,6 @@ void CISOProperties::LoadGameConfig()
else
SkipIdle->Set3StateValue(wxCHK_UNDETERMINED);
- if (GameIni.Get("Core", "OptimizeQuantizers", &bTemp))
- OptimizeQuantizers->Set3StateValue((wxCheckBoxState)bTemp);
- else
- OptimizeQuantizers->Set3StateValue(wxCHK_UNDETERMINED);
-
if (GameIni.Get("Core", "TLBHack", &bTemp))
TLBHack->Set3StateValue((wxCheckBoxState)bTemp);
else
@@ -896,11 +889,6 @@ bool CISOProperties::SaveGameConfig()
else
GameIni.Set("Core", "SkipIdle", SkipIdle->Get3StateValue());
- if (OptimizeQuantizers->Get3StateValue() == wxCHK_UNDETERMINED)
- GameIni.DeleteKey("Core", "OptimizeQuantizers");
- else
- GameIni.Set("Core", "OptimizeQuantizers", OptimizeQuantizers->Get3StateValue());
-
if (TLBHack->Get3StateValue() == wxCHK_UNDETERMINED)
GameIni.DeleteKey("Core", "TLBHack");
else
diff --git a/Source/Core/DolphinWX/Src/ISOProperties.h b/Source/Core/DolphinWX/Src/ISOProperties.h
index bee7ad7742..347b2ce637 100644
--- a/Source/Core/DolphinWX/Src/ISOProperties.h
+++ b/Source/Core/DolphinWX/Src/ISOProperties.h
@@ -81,7 +81,7 @@ class CISOProperties : public wxDialog
wxStaticText *OverrideText;
// Core
- wxCheckBox *CPUThread, *SkipIdle, *OptimizeQuantizers, *TLBHack, *BPHack;
+ wxCheckBox *CPUThread, *SkipIdle, *TLBHack, *BPHack;
// Wii
wxCheckBox *EnableProgressiveScan, *EnableWideScreen;
// Video
@@ -172,7 +172,6 @@ class CISOProperties : public wxDialog
ID_RE0FIX,
ID_ENABLEPROGRESSIVESCAN,
ID_ENABLEWIDESCREEN,
- ID_OPTIMIZEQUANTIZERS,
ID_EDITCONFIG,
ID_EMUSTATE_TEXT,
ID_EMUSTATE,