From 35322b5d14803369d4e0a95b2162116bb4f98151 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Tue, 29 Dec 2020 20:19:01 +0300 Subject: [PATCH] Remove deprecated _bit accessor from v128 Complicated (a lot of code), confusing (ambiguous) --- rpcs3/Emu/CPU/CPUTranslator.cpp | 6 +- rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp | 84 ++++++++++++------------- rpcs3/util/v128.hpp | 14 ----- rpcs3/util/v128sse.hpp | 72 --------------------- 4 files changed, 43 insertions(+), 133 deletions(-) diff --git a/rpcs3/Emu/CPU/CPUTranslator.cpp b/rpcs3/Emu/CPU/CPUTranslator.cpp index c0320c54d5..5d49f3af61 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.cpp +++ b/rpcs3/Emu/CPU/CPUTranslator.cpp @@ -112,10 +112,8 @@ std::pair cpu_translator::get_const_vector(llvm::Value* c, u32 { auto cv = ci->getValue(); - for (int i = 0; i < 128; i++) - { - result._bit[i] = cv[i]; - } + result._u64[0] = cv.extractBitsAsZExtValue(64, 0); + result._u64[1] = cv.extractBitsAsZExtValue(64, 64); return {true, result}; } diff --git a/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp b/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp index a279db99fb..c3d022a227 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp @@ -13,6 +13,7 @@ #include #include +#include "util/asm.hpp" #include "util/v128.hpp" #include "util/v128sse.hpp" @@ -1439,16 +1440,9 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i // Find the number of tasks that have become ready since the last iteration { - auto newlyReadyTasks = v128::andnot(ready, signalled | pready); + v128 newlyReadyTasks = v128::andnot(ready, signalled | pready); - // TODO: Optimize this shit with std::popcount when it's known to be fixed - for (auto i = 0; i < 128; i++) - { - if (newlyReadyTasks._bit[i]) - { - numNewlyReadyTasks++; - } - } + numNewlyReadyTasks = utils::popcnt128(newlyReadyTasks._u); } v128 readyButNotRunning; @@ -1456,38 +1450,40 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i v128 signalled0 = (signalled & (ready | pready)); v128 ready0 = (signalled | ready | pready); + u128 ctxtTaskIdMask = u128{1} << +(~ctxt->taskId & 127); + switch (request) { case SPURS_TASKSET_REQUEST_POLL_SIGNAL: { - rc = signalled0._bit[ctxt->taskId] ? 1 : 0; - signalled0._bit[ctxt->taskId] = false; + rc = signalled0._u & ctxtTaskIdMask ? 1 : 0; + signalled0._u &= ~ctxtTaskIdMask; break; } case SPURS_TASKSET_REQUEST_DESTROY_TASK: { numNewlyReadyTasks--; - running._bit[ctxt->taskId] = false; - enabled._bit[ctxt->taskId] = false; - signalled0._bit[ctxt->taskId] = false; - ready0._bit[ctxt->taskId] = false; + running._u &= ~ctxtTaskIdMask; + enabled._u &= ~ctxtTaskIdMask; + signalled0._u &= ~ctxtTaskIdMask; + ready0._u &= ~ctxtTaskIdMask; break; } case SPURS_TASKSET_REQUEST_YIELD_TASK: { - running._bit[ctxt->taskId] = false; - waiting._bit[ctxt->taskId] = true; + running._u &= ~ctxtTaskIdMask; + waiting._u |= ctxtTaskIdMask; break; } case SPURS_TASKSET_REQUEST_WAIT_SIGNAL: { - if (signalled0._bit[ctxt->taskId] == false) + if (!(signalled0._u & ctxtTaskIdMask)) { numNewlyReadyTasks--; - running._bit[ctxt->taskId] = false; - waiting._bit[ctxt->taskId] = true; - signalled0._bit[ctxt->taskId] = false; - ready0._bit[ctxt->taskId] = false; + running._u &= ~ctxtTaskIdMask; + waiting._u |= ctxtTaskIdMask; + signalled0._u &= ~ctxtTaskIdMask; + ready0._u &= ~ctxtTaskIdMask; } break; } @@ -1496,10 +1492,10 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i readyButNotRunning = v128::andnot(running, ready0); if (taskset->wkl_flag_wait_task < CELL_SPURS_MAX_TASK) { - readyButNotRunning._bit[taskset->wkl_flag_wait_task] = false; + readyButNotRunning._u &= ~(u128{1} << (~taskset->wkl_flag_wait_task & 127)); } - rc = readyButNotRunning != v128{} ? 1 : 0; + rc = readyButNotRunning._u ? 1 : 0; break; } case SPURS_TASKSET_REQUEST_WAIT_WKL_FLAG: @@ -1514,8 +1510,8 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i { // No tasks are waiting for the workload flag. Mark this task as waiting for the workload flag. taskset->wkl_flag_wait_task = ctxt->taskId; - running._bit[ctxt->taskId] = false; - waiting._bit[ctxt->taskId] = true; + running._u &= ~ctxtTaskIdMask; + waiting._u |= ctxtTaskIdMask; rc = 1; numNewlyReadyTasks--; } @@ -1531,13 +1527,13 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i readyButNotRunning = v128::andnot(running, ready0); if (taskset->wkl_flag_wait_task < CELL_SPURS_MAX_TASK) { - readyButNotRunning._bit[taskset->wkl_flag_wait_task] = false; + readyButNotRunning._u &= ~(u128{1} << (~taskset->wkl_flag_wait_task & 127)); } // Select a task from the readyButNotRunning set to run. Start from the task after the last scheduled task to ensure fairness. for (selectedTaskId = taskset->last_scheduled_task + 1; selectedTaskId < 128; selectedTaskId++) { - if (readyButNotRunning._bit[selectedTaskId]) + if (readyButNotRunning._u & (u128{1} << (~selectedTaskId & 127))) { break; } @@ -1547,7 +1543,7 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i { for (selectedTaskId = 0; selectedTaskId < taskset->last_scheduled_task + 1; selectedTaskId++) { - if (readyButNotRunning._bit[selectedTaskId]) + if (readyButNotRunning._u & (u128{1} << (~selectedTaskId & 127))) { break; } @@ -1560,13 +1556,21 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i } *taskId = selectedTaskId; - *isWaiting = waiting._bit[selectedTaskId < CELL_SPURS_MAX_TASK ? selectedTaskId : 0] ? 1 : 0; + if (selectedTaskId != CELL_SPURS_MAX_TASK) { + const u128 selectedTaskIdMask = u128{1} << (~selectedTaskId & 127); + + *isWaiting = waiting._u & selectedTaskIdMask ? 1 : 0; taskset->last_scheduled_task = selectedTaskId; - running._bit[selectedTaskId] = true; - waiting._bit[selectedTaskId] = false; + running._u |= selectedTaskIdMask; + waiting._u &= ~selectedTaskIdMask; } + else + { + *isWaiting = waiting._u & (u128{1} << 127) ? 1 : 0; + } + break; } case SPURS_TASKSET_REQUEST_RECV_WKL_FLAG: @@ -1691,15 +1695,9 @@ s32 spursTasketSaveTaskContext(spu_thread& spu) } u32 allocLsBlocks = static_cast(taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F); - u32 lsBlocks = 0; v128 ls_pattern = v128::from64r(taskInfo->ls_pattern._u64[0], taskInfo->ls_pattern._u64[1]); - for (auto i = 0; i < 128; i++) - { - if (ls_pattern._bit[i]) - { - lsBlocks++; - } - } + + const u32 lsBlocks = utils::popcnt128(ls_pattern._u); if (lsBlocks > allocLsBlocks) { @@ -1709,7 +1707,7 @@ s32 spursTasketSaveTaskContext(spu_thread& spu) // Make sure the stack is area is specified in the ls pattern for (auto i = (ctxt->savedContextSp.value()._u32[3]) >> 11; i < 128; i++) { - if (ls_pattern._bit[i] == false) + if (!(ls_pattern._u & (u128{1} << (i ^ 127)))) { return CELL_SPURS_TASK_ERROR_STAT; } @@ -1729,7 +1727,7 @@ s32 spursTasketSaveTaskContext(spu_thread& spu) // Save LS context for (auto i = 6; i < 128; i++) { - if (ls_pattern._bit[i]) + if (ls_pattern._u & (u128{1} << (i ^ 127))) { // TODO: Combine DMA requests for consecutive blocks into a single request std::memcpy(vm::base(contextSaveStorage + 0x400 + ((i - 6) << 11)), spu._ptr(CELL_SPURS_TASK_TOP + ((i - 6) << 11)), 0x800); @@ -1837,7 +1835,7 @@ void spursTasksetDispatch(spu_thread& spu) std::memcpy(spu._ptr(0x2C80), vm::base(contextSaveStorage), 0x380); for (auto i = 6; i < 128; i++) { - if (ls_pattern._bit[i]) + if (ls_pattern._u & (u128{1} << (i ^ 127))) { // TODO: Combine DMA requests for consecutive blocks into a single request std::memcpy(spu._ptr(CELL_SPURS_TASK_TOP + ((i - 6) << 11)), vm::base(contextSaveStorage + 0x400 + ((i - 6) << 11)), 0x800); diff --git a/rpcs3/util/v128.hpp b/rpcs3/util/v128.hpp index fef14bc543..fbfd102e58 100644 --- a/rpcs3/util/v128.hpp +++ b/rpcs3/util/v128.hpp @@ -92,20 +92,6 @@ union alignas(16) v128 __m128d vd; #endif - struct bit_array_128 - { - char m_data[16]; - - public: - class bit_element; - - // Index 0 returns the MSB and index 127 returns the LSB - [[deprecated]] bit_element operator[](u32 index); - - // Index 0 returns the MSB and index 127 returns the LSB - [[deprecated]] bool operator[](u32 index) const; - } _bit; - static v128 from64(u64 _0, u64 _1 = 0) { v128 ret; diff --git a/rpcs3/util/v128sse.hpp b/rpcs3/util/v128sse.hpp index eb543aac86..4dfe036c32 100644 --- a/rpcs3/util/v128sse.hpp +++ b/rpcs3/util/v128sse.hpp @@ -17,78 +17,6 @@ inline bool v128_use_fma = utils::has_fma3(); -class v128::bit_array_128::bit_element -{ - u64& data; - const u64 mask; - -public: - bit_element(u64& data, const u64 mask) - : data(data) - , mask(mask) - { - } - - operator bool() const - { - return (data & mask) != 0; - } - - bit_element& operator=(const bool right) - { - if (right) - { - data |= mask; - } - else - { - data &= ~mask; - } - return *this; - } - - bit_element& operator=(const bit_element& right) - { - if (right) - { - data |= mask; - } - else - { - data &= ~mask; - } - return *this; - } -}; - -[[deprecated]] inline v128::bit_array_128::bit_element v128::bit_array_128::operator[](u32 index) -{ - const auto data_ptr = reinterpret_cast(m_data); - - if constexpr (std::endian::little == std::endian::native) - { - return bit_element(data_ptr[1 - (index >> 6)], 0x8000000000000000ull >> (index & 0x3F)); - } - else - { - return bit_element(data_ptr[index >> 6], 0x8000000000000000ull >> (index & 0x3F)); - } -} - -[[deprecated]] inline bool v128::bit_array_128::operator[](u32 index) const -{ - const auto data_ptr = reinterpret_cast(m_data); - - if constexpr (std::endian::little == std::endian::native) - { - return (data_ptr[1 - (index >> 6)] & (0x8000000000000000ull >> (index & 0x3F))) != 0; - } - else - { - return (data_ptr[index >> 6] & (0x8000000000000000ull >> (index & 0x3F))) != 0; - } -} - inline v128 v128::fromV(const __m128i& value) { v128 ret;