mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-05-05 02:32:45 +00:00
TextureDecoder.cpp: new SSE2 optimized GX_TF_I8 decoder. Probably not ultimately optimal SSE2 code, but provably better (on my machine) than the memset version. Tested with __rdtsc counts in an independent project. I get about 6-7 FPS more on average during the intro movie playback in Mario Kart Wii. Hope this compiles for GCC okay.
TextureDecoder.cpp: merged two functionally identical decode5A3RGBA and decode5A3rgba methods. OpcodeDecoding.cpp and DLCache.cpp: optimization for GX_LOAD_XF_REG. The PSUHFB solution sounds better for SSSE3, but this is a small win for the default case. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6692 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
6cf9b3688d
commit
b038df64bf
5 changed files with 164 additions and 66 deletions
Source/Core/VideoCommon/Src
|
@ -49,6 +49,24 @@
|
|||
#endif
|
||||
|
||||
u8* g_pVideoData = 0;
|
||||
DataReadU32xNfunc DataReadU32xFuncs[16] = {
|
||||
DataReadU32xN<1>,
|
||||
DataReadU32xN<2>,
|
||||
DataReadU32xN<3>,
|
||||
DataReadU32xN<4>,
|
||||
DataReadU32xN<5>,
|
||||
DataReadU32xN<6>,
|
||||
DataReadU32xN<7>,
|
||||
DataReadU32xN<8>,
|
||||
DataReadU32xN<9>,
|
||||
DataReadU32xN<10>,
|
||||
DataReadU32xN<11>,
|
||||
DataReadU32xN<12>,
|
||||
DataReadU32xN<13>,
|
||||
DataReadU32xN<14>,
|
||||
DataReadU32xN<15>,
|
||||
DataReadU32xN<16>
|
||||
};
|
||||
|
||||
extern u8* FAKE_GetFifoStartPtr();
|
||||
extern u8* FAKE_GetFifoEndPtr();
|
||||
|
@ -233,12 +251,13 @@ static void Decode()
|
|||
{
|
||||
u32 Cmd2 = DataReadU32();
|
||||
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
|
||||
u32 xf_address = Cmd2 & 0xFFFF;
|
||||
u32 xf_address = Cmd2 & 0xFFFF;
|
||||
// TODO - speed this up. pshufb?
|
||||
u32 data_buffer[16];
|
||||
for (int i = 0; i < transfer_size; i++)
|
||||
data_buffer[i] = DataReadU32();
|
||||
u32 data_buffer[16];
|
||||
DataReadU32xFuncs[transfer_size-1](data_buffer);
|
||||
|
||||
LoadXFReg(transfer_size, xf_address, data_buffer);
|
||||
|
||||
INCSTAT(stats.thisFrame.numXFLoads);
|
||||
}
|
||||
break;
|
||||
|
@ -317,7 +336,7 @@ static void DecodeSemiNop()
|
|||
u8 sub_cmd = DataReadU8();
|
||||
u32 value = DataReadU32();
|
||||
LoadCPReg(sub_cmd, value);
|
||||
INCSTAT(stats.thisFrame.numCPLoads);
|
||||
INCSTAT(stats.thisFrame.numCPLoads);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -328,10 +347,9 @@ static void DecodeSemiNop()
|
|||
u32 address = Cmd2 & 0xFFFF;
|
||||
// TODO - speed this up. pshufb?
|
||||
u32 data_buffer[16];
|
||||
for (int i = 0; i < transfer_size; i++)
|
||||
data_buffer[i] = DataReadU32();
|
||||
DataReadU32xFuncs[transfer_size-1](data_buffer);
|
||||
LoadXFReg(transfer_size, address, data_buffer);
|
||||
INCSTAT(stats.thisFrame.numXFLoads);
|
||||
INCSTAT(stats.thisFrame.numXFLoads);
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue