From 346614b0da4dc74fe482624f2c0e903feda0bea1 Mon Sep 17 00:00:00 2001 From: Michael Yu Date: Mon, 19 May 2014 02:07:45 -0700 Subject: [PATCH 1/4] gitignore now ignores files generated by visual studio profiler, solution configured for profiling. --- .gitignore | 4 ++++ rpcs3.sln | 3 +++ 2 files changed, 7 insertions(+) diff --git a/.gitignore b/.gitignore index 2e4c753253..46313b81af 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,10 @@ /bin/*.exp rpcs3/git-version.h +# Visual Studio Profiler Files +*.vspx +*.psess + # Copyrighted files /bin/data/ /bin/dev_flash/data/font diff --git a/rpcs3.sln b/rpcs3.sln index 7b0d30d2e0..d06a3dabd7 100644 --- a/rpcs3.sln +++ b/rpcs3.sln @@ -457,4 +457,7 @@ Global {23E1C437-A951-5943-8639-A17F3CF2E606} = {5812E712-6213-4372-B095-9EB9BAA1F2DF} {74827EBD-93DC-5110-BA95-3F2AB029B6B0} = {5812E712-6213-4372-B095-9EB9BAA1F2DF} EndGlobalSection + GlobalSection(Performance) = preSolution + HasPerformanceSessions = true + EndGlobalSection EndGlobal From 2834697f1f8dbad36aa7a932a5a2b9e60ecd3470 Mon Sep 17 00:00:00 2001 From: Michael Yu Date: Mon, 19 May 2014 02:09:54 -0700 Subject: [PATCH 2/4] MEMORY: Copy from real's destination parameter should be immutable --- rpcs3/Emu/Memory/Memory.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/Memory/Memory.h b/rpcs3/Emu/Memory/Memory.h index d1dcef50e2..61d7c872ff 100644 --- a/rpcs3/Emu/Memory/Memory.h +++ b/rpcs3/Emu/Memory/Memory.h @@ -333,11 +333,11 @@ public: return true; } - bool CopyFromReal(u32 to, void* real, u32 count) // (4K pages) copy from real to virtual memory + bool CopyFromReal(u32 to, const void* real, u32 count) // (4K pages) copy from real to virtual memory { if (!count) return true; - u8* from = (u8*)real; + const u8* from = (const u8*)real; if (u32 frag = to & 4095) { From 9bdb12e3da1f7f3f0607c6ff2673a54f1ed5dc72 Mon Sep 17 00:00:00 2001 From: Michael Yu Date: Mon, 19 May 2014 02:10:33 -0700 Subject: [PATCH 3/4] MEMORY: Added AppendRawBytes, which also advances the memory pointer unlike Memory.CopyFromReal --- rpcs3/Emu/Memory/Memory.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rpcs3/Emu/Memory/Memory.h b/rpcs3/Emu/Memory/Memory.h index 61d7c872ff..e5d56a111c 100644 --- a/rpcs3/Emu/Memory/Memory.h +++ b/rpcs3/Emu/Memory/Memory.h @@ -795,6 +795,12 @@ public: return this->m_addr; } + u32 AppendRawBytes(const u8 * bytes, size_t count) { + Memory.CopyFromReal(this->m_addr, bytes, count); + this->m_addr += count; + return this->m_addr; + } + u32 Skip(const u32 offset) { return this->m_addr += offset; } operator be_t*() { return GetPtr(); } From 3aeb0b0f9552cbae1856f396189f1dff7d5ca8c6 Mon Sep 17 00:00:00 2001 From: Michael Yu Date: Mon, 19 May 2014 02:14:07 -0700 Subject: [PATCH 4/4] cellPngDecDecodeData handles CELL_PNGDEC_ARGB case much faster. Profiling done with two samples on Solar v2.1 from rpcs3 init to first frame. Before optimization, profiler found rpcs3 in cellPngDecDecodeData 15.3% of the time. Post-optimization, profiler finds rpcs3 in cellPngDecDecodeData 0.33% of the time for ~50x improvement. --- rpcs3/Emu/SysCalls/Modules/cellPngDec.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellPngDec.cpp b/rpcs3/Emu/SysCalls/Modules/cellPngDec.cpp index d169321c51..31886ab113 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellPngDec.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellPngDec.cpp @@ -201,7 +201,7 @@ int cellPngDecDecodeData(u32 mainHandle, u32 subHandle, mem8_ptr_t data, const m case CELL_PNGDEC_ARGB: { - const char nComponents = 4; + const int nComponents = 4; image_size *= nComponents; if (bytesPerLine > width * nComponents || flip) //check if we need padding { @@ -225,13 +225,19 @@ int cellPngDecDecodeData(u32 mainHandle, u32 subHandle, mem8_ptr_t data, const m } else { - for (uint i = 0; i < image_size; i += nComponents) + uint* dest = (uint*)new char[image_size]; + uint* source_current = (uint*)&(image.get()[0]); + uint* dest_current = dest; + for (uint i = 0; i < image_size / nComponents; i++) { - data += image.get()[i + 3]; - data += image.get()[i + 0]; - data += image.get()[i + 1]; - data += image.get()[i + 2]; + uint val = *source_current; + *dest_current = (val >> 24) | (val << 8); // set alpha (A8) as leftmost byte + source_current++; + dest_current++; } + // NOTE: AppendRawBytes has diff side-effect vs Memory.CopyFromReal + data.AppendRawBytes((u8*)dest, image_size); + delete[] dest; } } break;