Remove deprecated _bit accessor from v128

Complicated (a lot of code), confusing (ambiguous)
This commit is contained in:
Nekotekina 2020-12-29 20:19:01 +03:00
commit 35322b5d14
4 changed files with 43 additions and 133 deletions

View file

@ -112,10 +112,8 @@ std::pair<bool, v128> cpu_translator::get_const_vector<v128>(llvm::Value* c, u32
{ {
auto cv = ci->getValue(); auto cv = ci->getValue();
for (int i = 0; i < 128; i++) result._u64[0] = cv.extractBitsAsZExtValue(64, 0);
{ result._u64[1] = cv.extractBitsAsZExtValue(64, 64);
result._bit[i] = cv[i];
}
return {true, result}; return {true, result};
} }

View file

@ -13,6 +13,7 @@
#include <thread> #include <thread>
#include <mutex> #include <mutex>
#include "util/asm.hpp"
#include "util/v128.hpp" #include "util/v128.hpp"
#include "util/v128sse.hpp" #include "util/v128sse.hpp"
@ -1439,16 +1440,9 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i
// Find the number of tasks that have become ready since the last iteration // Find the number of tasks that have become ready since the last iteration
{ {
auto newlyReadyTasks = v128::andnot(ready, signalled | pready); v128 newlyReadyTasks = v128::andnot(ready, signalled | pready);
// TODO: Optimize this shit with std::popcount when it's known to be fixed numNewlyReadyTasks = utils::popcnt128(newlyReadyTasks._u);
for (auto i = 0; i < 128; i++)
{
if (newlyReadyTasks._bit[i])
{
numNewlyReadyTasks++;
}
}
} }
v128 readyButNotRunning; v128 readyButNotRunning;
@ -1456,38 +1450,40 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i
v128 signalled0 = (signalled & (ready | pready)); v128 signalled0 = (signalled & (ready | pready));
v128 ready0 = (signalled | ready | pready); v128 ready0 = (signalled | ready | pready);
u128 ctxtTaskIdMask = u128{1} << +(~ctxt->taskId & 127);
switch (request) switch (request)
{ {
case SPURS_TASKSET_REQUEST_POLL_SIGNAL: case SPURS_TASKSET_REQUEST_POLL_SIGNAL:
{ {
rc = signalled0._bit[ctxt->taskId] ? 1 : 0; rc = signalled0._u & ctxtTaskIdMask ? 1 : 0;
signalled0._bit[ctxt->taskId] = false; signalled0._u &= ~ctxtTaskIdMask;
break; break;
} }
case SPURS_TASKSET_REQUEST_DESTROY_TASK: case SPURS_TASKSET_REQUEST_DESTROY_TASK:
{ {
numNewlyReadyTasks--; numNewlyReadyTasks--;
running._bit[ctxt->taskId] = false; running._u &= ~ctxtTaskIdMask;
enabled._bit[ctxt->taskId] = false; enabled._u &= ~ctxtTaskIdMask;
signalled0._bit[ctxt->taskId] = false; signalled0._u &= ~ctxtTaskIdMask;
ready0._bit[ctxt->taskId] = false; ready0._u &= ~ctxtTaskIdMask;
break; break;
} }
case SPURS_TASKSET_REQUEST_YIELD_TASK: case SPURS_TASKSET_REQUEST_YIELD_TASK:
{ {
running._bit[ctxt->taskId] = false; running._u &= ~ctxtTaskIdMask;
waiting._bit[ctxt->taskId] = true; waiting._u |= ctxtTaskIdMask;
break; break;
} }
case SPURS_TASKSET_REQUEST_WAIT_SIGNAL: case SPURS_TASKSET_REQUEST_WAIT_SIGNAL:
{ {
if (signalled0._bit[ctxt->taskId] == false) if (!(signalled0._u & ctxtTaskIdMask))
{ {
numNewlyReadyTasks--; numNewlyReadyTasks--;
running._bit[ctxt->taskId] = false; running._u &= ~ctxtTaskIdMask;
waiting._bit[ctxt->taskId] = true; waiting._u |= ctxtTaskIdMask;
signalled0._bit[ctxt->taskId] = false; signalled0._u &= ~ctxtTaskIdMask;
ready0._bit[ctxt->taskId] = false; ready0._u &= ~ctxtTaskIdMask;
} }
break; break;
} }
@ -1496,10 +1492,10 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i
readyButNotRunning = v128::andnot(running, ready0); readyButNotRunning = v128::andnot(running, ready0);
if (taskset->wkl_flag_wait_task < CELL_SPURS_MAX_TASK) if (taskset->wkl_flag_wait_task < CELL_SPURS_MAX_TASK)
{ {
readyButNotRunning._bit[taskset->wkl_flag_wait_task] = false; readyButNotRunning._u &= ~(u128{1} << (~taskset->wkl_flag_wait_task & 127));
} }
rc = readyButNotRunning != v128{} ? 1 : 0; rc = readyButNotRunning._u ? 1 : 0;
break; break;
} }
case SPURS_TASKSET_REQUEST_WAIT_WKL_FLAG: case SPURS_TASKSET_REQUEST_WAIT_WKL_FLAG:
@ -1514,8 +1510,8 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i
{ {
// No tasks are waiting for the workload flag. Mark this task as waiting for the workload flag. // No tasks are waiting for the workload flag. Mark this task as waiting for the workload flag.
taskset->wkl_flag_wait_task = ctxt->taskId; taskset->wkl_flag_wait_task = ctxt->taskId;
running._bit[ctxt->taskId] = false; running._u &= ~ctxtTaskIdMask;
waiting._bit[ctxt->taskId] = true; waiting._u |= ctxtTaskIdMask;
rc = 1; rc = 1;
numNewlyReadyTasks--; numNewlyReadyTasks--;
} }
@ -1531,13 +1527,13 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i
readyButNotRunning = v128::andnot(running, ready0); readyButNotRunning = v128::andnot(running, ready0);
if (taskset->wkl_flag_wait_task < CELL_SPURS_MAX_TASK) if (taskset->wkl_flag_wait_task < CELL_SPURS_MAX_TASK)
{ {
readyButNotRunning._bit[taskset->wkl_flag_wait_task] = false; readyButNotRunning._u &= ~(u128{1} << (~taskset->wkl_flag_wait_task & 127));
} }
// Select a task from the readyButNotRunning set to run. Start from the task after the last scheduled task to ensure fairness. // Select a task from the readyButNotRunning set to run. Start from the task after the last scheduled task to ensure fairness.
for (selectedTaskId = taskset->last_scheduled_task + 1; selectedTaskId < 128; selectedTaskId++) for (selectedTaskId = taskset->last_scheduled_task + 1; selectedTaskId < 128; selectedTaskId++)
{ {
if (readyButNotRunning._bit[selectedTaskId]) if (readyButNotRunning._u & (u128{1} << (~selectedTaskId & 127)))
{ {
break; break;
} }
@ -1547,7 +1543,7 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i
{ {
for (selectedTaskId = 0; selectedTaskId < taskset->last_scheduled_task + 1; selectedTaskId++) for (selectedTaskId = 0; selectedTaskId < taskset->last_scheduled_task + 1; selectedTaskId++)
{ {
if (readyButNotRunning._bit[selectedTaskId]) if (readyButNotRunning._u & (u128{1} << (~selectedTaskId & 127)))
{ {
break; break;
} }
@ -1560,13 +1556,21 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i
} }
*taskId = selectedTaskId; *taskId = selectedTaskId;
*isWaiting = waiting._bit[selectedTaskId < CELL_SPURS_MAX_TASK ? selectedTaskId : 0] ? 1 : 0;
if (selectedTaskId != CELL_SPURS_MAX_TASK) if (selectedTaskId != CELL_SPURS_MAX_TASK)
{ {
const u128 selectedTaskIdMask = u128{1} << (~selectedTaskId & 127);
*isWaiting = waiting._u & selectedTaskIdMask ? 1 : 0;
taskset->last_scheduled_task = selectedTaskId; taskset->last_scheduled_task = selectedTaskId;
running._bit[selectedTaskId] = true; running._u |= selectedTaskIdMask;
waiting._bit[selectedTaskId] = false; waiting._u &= ~selectedTaskIdMask;
} }
else
{
*isWaiting = waiting._u & (u128{1} << 127) ? 1 : 0;
}
break; break;
} }
case SPURS_TASKSET_REQUEST_RECV_WKL_FLAG: case SPURS_TASKSET_REQUEST_RECV_WKL_FLAG:
@ -1691,15 +1695,9 @@ s32 spursTasketSaveTaskContext(spu_thread& spu)
} }
u32 allocLsBlocks = static_cast<u32>(taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F); u32 allocLsBlocks = static_cast<u32>(taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F);
u32 lsBlocks = 0;
v128 ls_pattern = v128::from64r(taskInfo->ls_pattern._u64[0], taskInfo->ls_pattern._u64[1]); v128 ls_pattern = v128::from64r(taskInfo->ls_pattern._u64[0], taskInfo->ls_pattern._u64[1]);
for (auto i = 0; i < 128; i++)
{ const u32 lsBlocks = utils::popcnt128(ls_pattern._u);
if (ls_pattern._bit[i])
{
lsBlocks++;
}
}
if (lsBlocks > allocLsBlocks) if (lsBlocks > allocLsBlocks)
{ {
@ -1709,7 +1707,7 @@ s32 spursTasketSaveTaskContext(spu_thread& spu)
// Make sure the stack is area is specified in the ls pattern // Make sure the stack is area is specified in the ls pattern
for (auto i = (ctxt->savedContextSp.value()._u32[3]) >> 11; i < 128; i++) for (auto i = (ctxt->savedContextSp.value()._u32[3]) >> 11; i < 128; i++)
{ {
if (ls_pattern._bit[i] == false) if (!(ls_pattern._u & (u128{1} << (i ^ 127))))
{ {
return CELL_SPURS_TASK_ERROR_STAT; return CELL_SPURS_TASK_ERROR_STAT;
} }
@ -1729,7 +1727,7 @@ s32 spursTasketSaveTaskContext(spu_thread& spu)
// Save LS context // Save LS context
for (auto i = 6; i < 128; i++) for (auto i = 6; i < 128; i++)
{ {
if (ls_pattern._bit[i]) if (ls_pattern._u & (u128{1} << (i ^ 127)))
{ {
// TODO: Combine DMA requests for consecutive blocks into a single request // TODO: Combine DMA requests for consecutive blocks into a single request
std::memcpy(vm::base(contextSaveStorage + 0x400 + ((i - 6) << 11)), spu._ptr<void>(CELL_SPURS_TASK_TOP + ((i - 6) << 11)), 0x800); std::memcpy(vm::base(contextSaveStorage + 0x400 + ((i - 6) << 11)), spu._ptr<void>(CELL_SPURS_TASK_TOP + ((i - 6) << 11)), 0x800);
@ -1837,7 +1835,7 @@ void spursTasksetDispatch(spu_thread& spu)
std::memcpy(spu._ptr<void>(0x2C80), vm::base(contextSaveStorage), 0x380); std::memcpy(spu._ptr<void>(0x2C80), vm::base(contextSaveStorage), 0x380);
for (auto i = 6; i < 128; i++) for (auto i = 6; i < 128; i++)
{ {
if (ls_pattern._bit[i]) if (ls_pattern._u & (u128{1} << (i ^ 127)))
{ {
// TODO: Combine DMA requests for consecutive blocks into a single request // TODO: Combine DMA requests for consecutive blocks into a single request
std::memcpy(spu._ptr<void>(CELL_SPURS_TASK_TOP + ((i - 6) << 11)), vm::base(contextSaveStorage + 0x400 + ((i - 6) << 11)), 0x800); std::memcpy(spu._ptr<void>(CELL_SPURS_TASK_TOP + ((i - 6) << 11)), vm::base(contextSaveStorage + 0x400 + ((i - 6) << 11)), 0x800);

View file

@ -92,20 +92,6 @@ union alignas(16) v128
__m128d vd; __m128d vd;
#endif #endif
struct bit_array_128
{
char m_data[16];
public:
class bit_element;
// Index 0 returns the MSB and index 127 returns the LSB
[[deprecated]] bit_element operator[](u32 index);
// Index 0 returns the MSB and index 127 returns the LSB
[[deprecated]] bool operator[](u32 index) const;
} _bit;
static v128 from64(u64 _0, u64 _1 = 0) static v128 from64(u64 _0, u64 _1 = 0)
{ {
v128 ret; v128 ret;

View file

@ -17,78 +17,6 @@
inline bool v128_use_fma = utils::has_fma3(); inline bool v128_use_fma = utils::has_fma3();
class v128::bit_array_128::bit_element
{
u64& data;
const u64 mask;
public:
bit_element(u64& data, const u64 mask)
: data(data)
, mask(mask)
{
}
operator bool() const
{
return (data & mask) != 0;
}
bit_element& operator=(const bool right)
{
if (right)
{
data |= mask;
}
else
{
data &= ~mask;
}
return *this;
}
bit_element& operator=(const bit_element& right)
{
if (right)
{
data |= mask;
}
else
{
data &= ~mask;
}
return *this;
}
};
[[deprecated]] inline v128::bit_array_128::bit_element v128::bit_array_128::operator[](u32 index)
{
const auto data_ptr = reinterpret_cast<u64*>(m_data);
if constexpr (std::endian::little == std::endian::native)
{
return bit_element(data_ptr[1 - (index >> 6)], 0x8000000000000000ull >> (index & 0x3F));
}
else
{
return bit_element(data_ptr[index >> 6], 0x8000000000000000ull >> (index & 0x3F));
}
}
[[deprecated]] inline bool v128::bit_array_128::operator[](u32 index) const
{
const auto data_ptr = reinterpret_cast<const u64*>(m_data);
if constexpr (std::endian::little == std::endian::native)
{
return (data_ptr[1 - (index >> 6)] & (0x8000000000000000ull >> (index & 0x3F))) != 0;
}
else
{
return (data_ptr[index >> 6] & (0x8000000000000000ull >> (index & 0x3F))) != 0;
}
}
inline v128 v128::fromV(const __m128i& value) inline v128 v128::fromV(const __m128i& value)
{ {
v128 ret; v128 ret;