This commit is contained in:
digant 2024-12-30 22:19:35 +01:00
parent 673182f608
commit 720dc9dd72
55 changed files with 1101 additions and 1198 deletions

View file

@ -476,6 +476,7 @@ target_sources(rpcs3_emu PRIVATE
RSX/Common/TextureUtils.cpp
RSX/Common/texture_cache.cpp
RSX/Core/RSXContext.cpp
RSX/Core/RSXDrawCommands.cpp
RSX/gcm_enums.cpp
RSX/gcm_printing.cpp
RSX/GL/GLCommonDecompiler.cpp

View file

@ -622,7 +622,7 @@ bool spursKernel2SelectWorkload(spu_thread& spu)
void spursKernelDispatchWorkload(spu_thread& spu, u64 widAndPollStatus)
{
const auto ctxt = spu._ptr<SpursKernelContext>(0x100);
auto isKernel2 = ctxt->spurs->flags1 & SF1_32_WORKLOADS ? true : false;
const bool isKernel2 = ctxt->spurs->flags1 & SF1_32_WORKLOADS ? true : false;
auto pollStatus = static_cast<u32>(widAndPollStatus);
auto wid = static_cast<u32>(widAndPollStatus >> 32);
@ -674,7 +674,7 @@ void spursKernelDispatchWorkload(spu_thread& spu, u64 widAndPollStatus)
bool spursKernelWorkloadExit(spu_thread& spu)
{
const auto ctxt = spu._ptr<SpursKernelContext>(0x100);
auto isKernel2 = ctxt->spurs->flags1 & SF1_32_WORKLOADS ? true : false;
const bool isKernel2 = ctxt->spurs->flags1 & SF1_32_WORKLOADS ? true : false;
// Select next workload to run
spu.gpr[3].clear();
@ -701,7 +701,7 @@ bool spursKernelEntry(spu_thread& spu)
ctxt->spuNum = spu.gpr[3]._u32[3];
ctxt->spurs.set(spu.gpr[4]._u64[1]);
auto isKernel2 = ctxt->spurs->flags1 & SF1_32_WORKLOADS ? true : false;
const bool isKernel2 = ctxt->spurs->flags1 & SF1_32_WORKLOADS ? true : false;
// Initialise the SPURS context to its initial values
ctxt->dmaTagId = CELL_SPURS_KERNEL_DMA_TAG_ID;
@ -785,8 +785,8 @@ void spursSysServiceIdleHandler(spu_thread& spu, SpursKernelContext* ctxt)
}
}
bool allSpusIdle = nIdlingSpus == spurs->nSpus ? true : false;
bool exitIfNoWork = spurs->flags1 & SF1_EXIT_IF_NO_WORK ? true : false;
const bool allSpusIdle = nIdlingSpus == spurs->nSpus;
const bool exitIfNoWork = spurs->flags1 & SF1_EXIT_IF_NO_WORK ? true : false;
shouldExit = allSpusIdle && exitIfNoWork;
// Check if any workloads can be scheduled
@ -843,7 +843,7 @@ void spursSysServiceIdleHandler(spu_thread& spu, SpursKernelContext* ctxt)
}
}
bool spuIdling = spurs->spuIdling & (1 << ctxt->spuNum) ? true : false;
const bool spuIdling = spurs->spuIdling & (1 << ctxt->spuNum) ? true : false;
if (foundReadyWorkload && shouldExit == false)
{
spurs->spuIdling &= ~(1 << ctxt->spuNum);

View file

@ -341,6 +341,21 @@ extern void mov_rdata(spu_rdata_t& _dst, const spu_rdata_t& _src)
#endif
}
#ifdef _MSC_VER
__forceinline
#endif
extern u32 compute_rdata_hash32(const spu_rdata_t& _src)
{
const auto rhs = reinterpret_cast<const v128*>(_src);
const v128 a = gv_add32(rhs[0], rhs[1]);
const v128 c = gv_add32(rhs[4], rhs[5]);
const v128 b = gv_add32(rhs[2], rhs[3]);
const v128 d = gv_add32(rhs[6], rhs[7]);
const v128 r = gv_add32(gv_add32(a, b), gv_add32(c, d));
const v128 r1 = gv_add32(r, gv_shuffle32<1, 0, 3, 2>(r));
return r1._u32[0] + r1._u32[2];
}
#if defined(ARCH_X64)
static FORCE_INLINE void mov_rdata_nt_avx(__m256i* dst, const __m256i* src)
{
@ -4718,6 +4733,12 @@ bool spu_thread::process_mfc_cmd()
busy_wait(300);
}
if (getllar_spin_count == 3)
{
// Check other reservations in other threads
lv2_obj::notify_all();
}
// Reset perf
perf0.restart();
}
@ -4729,12 +4750,17 @@ bool spu_thread::process_mfc_cmd()
// Spinning, might as well yield cpu resources
state += cpu_flag::wait;
usz cache_line_waiter_index = umax;
if (auto wait_var = vm::reservation_notifier_begin_wait(addr, rtime))
{
cache_line_waiter_index = register_cache_line_waiter(addr);
utils::bless<atomic_t<u32>>(&wait_var->raw().wait_flag)->wait(1, atomic_wait_timeout{100'000});
vm::reservation_notifier_end_wait(*wait_var);
}
deregister_cache_line_waiter(cache_line_waiter_index);
static_cast<void>(test_stopped());
// Quick check if there were reservation changes
@ -5372,6 +5398,140 @@ bool spu_thread::reservation_check(u32 addr, const decltype(rdata)& data) const
return !res;
}
bool spu_thread::reservation_check(u32 addr, u32 hash, atomic_t<u64, 64>* range_lock)
{
if ((addr >> 28) < 2 || (addr >> 28) == 0xd)
{
// Always-allocated memory does not need strict checking (vm::main or vm::stack)
return compute_rdata_hash32(*vm::get_super_ptr<decltype(rdata)>(addr)) == hash;
}
// Ensure data is allocated (HACK: would raise LR event if not)
// Set range_lock first optimistically
range_lock->store(u64{128} << 32 | addr);
u64 lock_val = *std::prev(std::end(vm::g_range_lock_set));
u64 old_lock = 0;
while (lock_val != old_lock)
{
// Since we want to read data, let's check readability first
if (!(lock_val & vm::range_readable))
{
// Only one abnormal operation is "unreadable"
if ((lock_val >> vm::range_pos) == (vm::range_locked >> vm::range_pos))
{
// All page flags are untouched and can be read safely
if (!vm::check_addr(addr))
{
// Assume our memory is being (de)allocated
range_lock->release(0);
break;
}
// g_shmem values are unchanged too
const u64 is_shmem = vm::g_shmem[addr >> 16];
const u64 test_addr = is_shmem ? (is_shmem | static_cast<u16>(addr)) / 128 : u64{addr} / 128;
const u64 lock_addr = lock_val / 128;
if (test_addr == lock_addr)
{
// Our reservation is locked
range_lock->release(0);
break;
}
break;
}
}
// Fallback to normal range check
const u64 lock_addr = static_cast<u32>(lock_val);
const u32 lock_size = static_cast<u32>(lock_val << 3 >> 35);
if (lock_addr + lock_size <= addr || lock_addr >= addr + 128)
{
// We are outside locked range, so page flags are unaffected
if (!vm::check_addr(addr))
{
range_lock->release(0);
break;
}
}
else if (!(lock_val & vm::range_readable))
{
range_lock->release(0);
break;
}
old_lock = std::exchange(lock_val, *std::prev(std::end(vm::g_range_lock_set)));
}
if (!range_lock->load()) [[unlikely]]
{
return true;
}
const bool res = compute_rdata_hash32(*vm::get_super_ptr<decltype(rdata)>(addr)) == hash;
range_lock->release(0);
return !res;
}
usz spu_thread::register_cache_line_waiter(u32 addr)
{
const u64 value = u64{compute_rdata_hash32(rdata)} << 32 | raddr;
for (usz i = 0; i < std::size(g_spu_waiters_by_value); i++)
{
auto [old, ok] = g_spu_waiters_by_value[i].fetch_op([value](u64& x)
{
if (x == 0)
{
x = value + 1;
return true;
}
if ((x & -128) == value)
{
x++;
return true;
}
return false;
});
if (ok)
{
return i;
}
}
return umax;
}
void spu_thread::deregister_cache_line_waiter(usz index)
{
if (index == umax)
{
return;
}
g_spu_waiters_by_value[index].fetch_op([](u64& x)
{
x--;
if ((x & 127) == 0)
{
x = 0;
}
return false;
});
}
std::pair<u32, u32> spu_thread::read_dec() const
{
const u64 res = ch_dec_value - (is_dec_frozen ? 0 : (get_timebased_time() - ch_dec_start_timestamp));
@ -5739,6 +5899,24 @@ s64 spu_thread::get_ch_value(u32 ch)
#else
const bool reservation_busy_waiting = (seed + ((raddr == spurs_addr) ? 50u : 0u)) < g_cfg.core.spu_reservation_busy_waiting_percentage;
#endif
usz cache_line_waiter_index = umax;
auto check_cache_line_waiter = [&]()
{
if (cache_line_waiter_index == umax)
{
return true;
}
if ((g_spu_waiters_by_value[cache_line_waiter_index] & -128) == 0)
{
deregister_cache_line_waiter(cache_line_waiter_index);
cache_line_waiter_index = umax;
return false;
}
return true;
};
for (; !events.count; events = get_events(mask1 & ~SPU_EVENT_LR, true, true))
{
@ -5746,12 +5924,22 @@ s64 spu_thread::get_ch_value(u32 ch)
if (is_stopped(old))
{
if (cache_line_waiter_index != umax)
{
g_spu_waiters_by_value[cache_line_waiter_index].release(0);
}
return -1;
}
// Optimized check
if (raddr)
if (raddr && mask1 & SPU_EVENT_LR)
{
if (cache_line_waiter_index == umax)
{
cache_line_waiter_index = register_cache_line_waiter(raddr);
}
bool set_lr = false;
if (!vm::check_addr(raddr) || rtime != vm::reservation_acquire(raddr))
@ -5819,13 +6007,20 @@ s64 spu_thread::get_ch_value(u32 ch)
}
}
// Check other reservations in other threads
lv2_obj::notify_all();
if (raddr - spurs_addr <= 0x80 && !g_cfg.core.spu_accurate_reservations && mask1 == SPU_EVENT_LR)
{
// Wait with extended timeout, in this situation we have notifications for nearly all writes making it possible
// Abort notifications are handled specially for performance reasons
if (auto wait_var = vm::reservation_notifier_begin_wait(raddr, rtime))
{
utils::bless<atomic_t<u32>>(&wait_var->raw().wait_flag)->wait(1, atomic_wait_timeout{300'000});
if (check_cache_line_waiter())
{
utils::bless<atomic_t<u32>>(&wait_var->raw().wait_flag)->wait(1, atomic_wait_timeout{300'000});
}
vm::reservation_notifier_end_wait(*wait_var);
}
@ -5834,9 +6029,14 @@ s64 spu_thread::get_ch_value(u32 ch)
const u32 _raddr = this->raddr;
#ifdef __linux__
if (auto wait_var = vm::reservation_notifier_begin_wait(_raddr, rtime))
{
utils::bless<atomic_t<u32>>(&wait_var->raw().wait_flag)->wait(1, atomic_wait_timeout{50'000});
if (check_cache_line_waiter())
{
utils::bless<atomic_t<u32>>(&wait_var->raw().wait_flag)->wait(1, atomic_wait_timeout{50'000});
}
vm::reservation_notifier_end_wait(*wait_var);
}
#else
@ -5891,13 +6091,20 @@ s64 spu_thread::get_ch_value(u32 ch)
return false;
}
// Check other reservations in other threads
lv2_obj::notify_all();
return true;
};
if (auto wait_var = vm::reservation_notifier_begin_wait(_raddr, rtime))
{
atomic_wait_engine::set_one_time_use_wait_callback(wait_cb);
utils::bless<atomic_t<u32>>(&wait_var->raw().wait_flag)->wait(1, atomic_wait_timeout{80'000});
if (check_cache_line_waiter())
{
atomic_wait_engine::set_one_time_use_wait_callback(wait_cb);
utils::bless<atomic_t<u32>>(&wait_var->raw().wait_flag)->wait(1, atomic_wait_timeout{80'000});
}
vm::reservation_notifier_end_wait(*wait_var);
}
@ -5918,6 +6125,8 @@ s64 spu_thread::get_ch_value(u32 ch)
thread_ctrl::wait_on(state, old, 100);
}
deregister_cache_line_waiter(cache_line_waiter_index);
wakeup_delay();
if (is_paused(state - cpu_flag::suspend))
@ -6617,6 +6826,8 @@ bool spu_thread::stop_and_signal(u32 code)
}
}
lv2_obj::notify_all();
while (auto old = +state)
{
if (old & cpu_flag::signal && state.test_and_reset(cpu_flag::signal))
@ -7185,6 +7396,8 @@ s64 spu_channel::pop_wait(cpu_thread& spu, bool pop)
}
}
lv2_obj::notify_all();
const u32 wait_on_val = static_cast<u32>(((pop ? bit_occupy : 0) | bit_wait) >> 32);
while (true)
@ -7470,3 +7683,4 @@ void fmt_class_string<spu_channel_4_t>::format(std::string& out, u64 arg)
DECLARE(spu_thread::g_raw_spu_ctr){};
DECLARE(spu_thread::g_raw_spu_id){};
DECLARE(spu_thread::g_spu_work_count){};
DECLARE(spu_thread::g_spu_waiters_by_value){};

View file

@ -888,6 +888,9 @@ public:
// Returns true if reservation existed but was just discovered to be lost
// It is safe to use on any address, even if not directly accessed by SPU (so it's slower)
bool reservation_check(u32 addr, const decltype(rdata)& data) const;
static bool reservation_check(u32 addr, u32 hash, atomic_t<u64, 64>* range_lock);
usz register_cache_line_waiter(u32 addr);
void deregister_cache_line_waiter(usz index);
bool read_reg(const u32 addr, u32& value);
bool write_reg(const u32 addr, const u32 value);
@ -897,6 +900,8 @@ public:
static atomic_t<u32> g_raw_spu_id[5];
static atomic_t<u32> g_spu_work_count;
static atomic_t<u64> g_spu_waiters_by_value[6];
static u32 find_raw_spu(u32 id)
{
if (id < std::size(g_raw_spu_id)) [[likely]]

View file

@ -6,6 +6,8 @@
#include "Emu/Memory/vm_locking.h"
#include "Emu/Cell/PPUFunction.h"
#include "Emu/Cell/PPUThread.h"
#include "Emu/Cell/SPUThread.h"
#include "Emu/Cell/ErrorCodes.h"
#include "Emu/Cell/MFC.h"
#include "sys_sync.h"
@ -54,6 +56,7 @@
#include <algorithm>
#include <optional>
#include <deque>
#include <shared_mutex>
#include "util/tsc.hpp"
#include "util/sysinfo.hpp"
#include "util/init_mutex.hpp"
@ -75,6 +78,9 @@ namespace rsx
void set_rsx_yield_flag() noexcept;
}
using spu_rdata_t = decltype(spu_thread::rdata);
extern u32 compute_rdata_hash32(const spu_rdata_t& _src);
template <>
void fmt_class_string<ppu_syscall_code>::format(std::string& out, u64 arg)
{
@ -2215,3 +2221,129 @@ void lv2_obj::prepare_for_sleep(cpu_thread& cpu)
vm::temporary_unlock(cpu);
cpu_counter::remove(&cpu);
}
void lv2_obj::notify_all() noexcept
{
for (auto cpu : g_to_notify)
{
if (!cpu)
{
break;
}
if (cpu != &g_to_notify)
{
const auto res_start = vm::reservation_notifier(0).second;
const auto res_end = vm::reservation_notifier(umax).second;
if (cpu >= res_start && cpu <= res_end)
{
atomic_wait_engine::notify_all(cpu);
}
else
{
// Note: by the time of notification the thread could have been deallocated which is why the direct function is used
atomic_wait_engine::notify_one(cpu);
}
}
}
g_to_notify[0] = nullptr;
g_postpone_notify_barrier = false;
const auto cpu = cpu_thread::get_current();
if (!cpu)
{
return;
}
if (cpu->get_class() != thread_class::spu && cpu->state.none_of(cpu_flag::suspend))
{
return;
}
std::optional<vm::writer_lock> lock;
constexpr usz total_waiters = std::size(spu_thread::g_spu_waiters_by_value);
u32 notifies[total_waiters]{};
// There may be 6 waiters, but checking them all may be performance expensive
// Instead, check 2 at max, but use the CPU ID index to tell which index to start checking so the work would be distributed across all threads
atomic_t<u64, 64>* range_lock = nullptr;
for (usz i = 0, checked = 0; checked < 3 && i < total_waiters; i++)
{
auto& waiter = spu_thread::g_spu_waiters_by_value[(i + cpu->id) % total_waiters];
const u64 value = waiter.load();
u32 raddr = static_cast<u32>(value) & -128;
if (vm::check_addr(raddr))
{
if (((raddr >> 28) < 2 || (raddr >> 28) == 0xd))
{
checked++;
if (compute_rdata_hash32(*vm::get_super_ptr<spu_rdata_t>(raddr)) != static_cast<u32>(value >> 32))
{
// Clear address to avoid a race, keep waiter counter
if (waiter.fetch_op([&](u64& x)
{
if ((x & -128) == (value & -128))
{
x &= 127;
return true;
}
return false;
}).second)
{
notifies[i] = raddr;
}
}
continue;
}
if (!range_lock)
{
range_lock = vm::alloc_range_lock();
}
checked++;
if (spu_thread::reservation_check(raddr, static_cast<u32>(value >> 32), range_lock))
{
// Clear address to avoid a race, keep waiter counter
if (waiter.fetch_op([&](u64& x)
{
if ((x & -128) == (value & -128))
{
x &= 127;
return true;
}
return false;
}).second)
{
notifies[i] = raddr;
}
}
}
}
if (range_lock)
{
vm::free_range_lock(range_lock);
}
for (u32 addr : notifies)
{
if (addr)
{
vm::reservation_notifier_notify(addr);
}
}
}

View file

@ -166,8 +166,16 @@ s32 sys_ppu_thread_yield(ppu_thread& ppu)
sys_ppu_thread.trace("sys_ppu_thread_yield()");
const s32 success = lv2_obj::yield(ppu) ? CELL_OK : CELL_CANCEL;
if (success == CELL_CANCEL)
{
// Do other work in the meantime
lv2_obj::notify_all();
}
// Return 0 on successful context switch, 1 otherwise
return +!lv2_obj::yield(ppu);
return success;
}
error_code sys_ppu_thread_join(ppu_thread& ppu, u32 thread_id, vm::ptr<u64> vptr)

View file

@ -16,7 +16,7 @@ namespace
struct storage_manager
{
// This is probably wrong and should be assigned per fd or something
atomic_ptr<shared_ptr<lv2_event_queue>> asyncequeue;
atomic_ptr<lv2_event_queue> asyncequeue;
};
}
@ -137,7 +137,7 @@ error_code sys_storage_async_send_device_command(u32 dev_handle, u64 cmd, vm::pt
auto& manager = g_fxo->get<storage_manager>();
if (auto q = *manager.asyncequeue.load())
if (auto q = manager.asyncequeue.load())
{
q->send(0, unk, unk, unk);
}

View file

@ -6,6 +6,7 @@
#include "Emu/CPU/CPUThread.h"
#include "Emu/Cell/ErrorCodes.h"
#include "Emu/Cell/timers.hpp"
#include "Emu/Memory/vm_reservation.h"
#include "Emu/IdManager.h"
#include "Emu/IPC.h"
@ -68,11 +69,6 @@ struct ppu_non_sleeping_count_t
u32 onproc_count;
};
namespace vm
{
extern u8 g_reservations[65536 / 128 * 64];
}
// Base class for some kernel objects (shared set of 8192 objects).
struct lv2_obj
{
@ -458,32 +454,7 @@ public:
static bool wait_timeout(u64 usec, ppu_thread* cpu = {}, bool scale = true, bool is_usleep = false);
static inline void notify_all()
{
for (auto cpu : g_to_notify)
{
if (!cpu)
{
break;
}
if (cpu != &g_to_notify)
{
if (cpu >= vm::g_reservations && cpu <= vm::g_reservations + (std::size(vm::g_reservations) - 1))
{
atomic_wait_engine::notify_all(cpu);
}
else
{
// Note: by the time of notification the thread could have been deallocated which is why the direct function is used
atomic_wait_engine::notify_one(cpu);
}
}
}
g_to_notify[0] = nullptr;
g_postpone_notify_barrier = false;
}
static void notify_all() noexcept;
// Can be called before the actual sleep call in order to move it out of mutex scope
static void prepare_for_sleep(cpu_thread& cpu);

View file

@ -33,7 +33,7 @@ std::vector<std::pair<u128, id_manager::typeinfo>>& id_manager::get_typeinfo_map
return s_map;
}
id_manager::id_key* idm::allocate_id(std::span<id_manager::id_key> keys, usz& highest_index, u32 type_id, u32 dst_id, u32 base, u32 step, u32 count, bool uses_lowest_id, std::pair<u32, u32> invl_range)
id_manager::id_key* idm::allocate_id(std::span<id_manager::id_key> keys, u32& highest_index, u32 type_id, u32 dst_id, u32 base, u32 step, u32 count, bool uses_lowest_id, std::pair<u32, u32> invl_range)
{
if (dst_id != (base ? 0 : u32{umax}))
{
@ -41,7 +41,7 @@ id_manager::id_key* idm::allocate_id(std::span<id_manager::id_key> keys, usz& hi
const u32 index = id_manager::get_index(dst_id, base, step, count, invl_range);
ensure(index < count);
highest_index = std::max<usz>(highest_index, index + 1);
highest_index = std::max(highest_index, index + 1);
if (keys[index].type() != umax)
{

View file

@ -286,7 +286,7 @@ namespace id_manager
std::array<stx::atomic_ptr<T>, T::id_count> vec_data{};
std::array<stx::shared_ptr<T>, T::id_count> private_copy{};
std::array<id_key, T::id_count> vec_keys{};
usz highest_index = 0;
u32 highest_index = 0;
shared_mutex mutex{}; // TODO: Use this instead of global mutex
@ -330,11 +330,11 @@ namespace id_manager
// Simulate construction semantics (idm::last_id() value)
g_id = id;
const usz object_index = get_index(id, info->base, info->step, info->count, info->invl_range);
const u32 object_index = get_index(id, info->base, info->step, info->count, info->invl_range);
auto& obj = ::at32(vec_data, object_index);
ensure(!obj);
highest_index = std::max<usz>(highest_index, object_index + 1);
highest_index = std::max(highest_index, object_index + 1);
vec_keys[object_index] = id_key(id, static_cast<u32>(static_cast<u64>(type_init_pos >> 64)));
info->load(ar)(&obj);
@ -383,7 +383,7 @@ namespace id_manager
reader_lock lock(g_mutex);
// Save all entries
for (usz i = 0; i < highest_index; i++)
for (u32 i = 0; i < highest_index; i++)
{
private_copy[i] = vec_data[i].load();
}
@ -489,7 +489,7 @@ class idm
}
// Prepare new ID (returns nullptr if out of resources)
static id_manager::id_key* allocate_id(std::span<id_manager::id_key> vec, usz& highest_index, u32 type_id, u32 dst_id, u32 base, u32 step, u32 count, bool uses_lowest_id, std::pair<u32, u32> invl_range);
static id_manager::id_key* allocate_id(std::span<id_manager::id_key> keys, u32& highest_index, u32 type_id, u32 dst_id, u32 base, u32 step, u32 count, bool uses_lowest_id, std::pair<u32, u32> invl_range);
// Get object by internal index if exists (additionally check type if types are not equal)
template <typename T, typename Type>

View file

@ -238,7 +238,7 @@ matching_ctx::matching_ctx(vm::ptr<SceNpId> npId, vm::ptr<SceNpMatchingHandler>
this->handler = handler;
this->arg = arg;
}
void matching_ctx::queue_callback(u32 req_id, s32 event, s32 error_code)
void matching_ctx::queue_callback(u32 req_id, s32 event, s32 error_code) const
{
if (handler)
{
@ -249,7 +249,7 @@ void matching_ctx::queue_callback(u32 req_id, s32 event, s32 error_code)
});
}
}
void matching_ctx::queue_gui_callback(s32 event, s32 error_code)
void matching_ctx::queue_gui_callback(s32 event, s32 error_code) const
{
if (gui_handler)
{

View file

@ -289,8 +289,8 @@ struct matching_ctx
{
matching_ctx(vm::ptr<SceNpId> npid, vm::ptr<SceNpMatchingHandler> handler, vm::ptr<void> arg);
void queue_callback(u32 req_id, s32 event, s32 error_code);
void queue_gui_callback(s32 event, s32 error_code);
void queue_callback(u32 req_id, s32 event, s32 error_code) const;
void queue_gui_callback(s32 event, s32 error_code) const;
static const u32 id_base = 0x9001;
static const u32 id_step = 1;

View file

@ -513,7 +513,7 @@ void GLGSRender::emit_geometry(u32 sub_index)
if (vertex_state & rsx::vertex_arrays_changed)
{
analyse_inputs_interleaved(m_vertex_layout);
m_draw_processor.analyse_inputs_interleaved(m_vertex_layout, current_vp_metadata);
}
else if (vertex_state & rsx::vertex_base_changed)
{

View file

@ -840,8 +840,8 @@ void GLGSRender::load_program_env()
// Vertex state
auto mapping = m_vertex_env_buffer->alloc_from_heap(144, m_uniform_buffer_offset_align);
auto buf = static_cast<u8*>(mapping.first);
fill_scale_offset_data(buf, false);
fill_user_clip_data(buf + 64);
m_draw_processor.fill_scale_offset_data(buf, false);
m_draw_processor.fill_user_clip_data(buf + 64);
*(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits();
*(reinterpret_cast<f32*>(buf + 132)) = rsx::method_registers.point_size() * rsx::get_resolution_scale();
*(reinterpret_cast<f32*>(buf + 136)) = rsx::method_registers.clip_min();
@ -887,7 +887,7 @@ void GLGSRender::load_program_env()
// Fragment state
auto mapping = m_fragment_env_buffer->alloc_from_heap(32, m_uniform_buffer_offset_align);
auto buf = static_cast<u8*>(mapping.first);
fill_fragment_state_buffer(buf, current_fragment_program);
m_draw_processor.fill_fragment_state_buffer(buf, current_fragment_program);
m_fragment_env_buffer->bind_range(GL_FRAGMENT_STATE_BIND_SLOT, mapping.second, 32);
}
@ -988,7 +988,7 @@ void GLGSRender::upload_transform_constants(const rsx::io_buffer& buffer)
: std::span<const u16>(m_vertex_prog->constant_ids);
buffer.reserve(transform_constants_size);
fill_vertex_program_constants_data(buffer.data(), constant_ids);
m_draw_processor.fill_vertex_program_constants_data(buffer.data(), constant_ids);
}
}
@ -1007,7 +1007,14 @@ void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info)
buf[1] = upload_info.vertex_index_offset;
buf += 4;
fill_vertex_layout_state(m_vertex_layout, upload_info.first_vertex, upload_info.allocated_vertex_count, reinterpret_cast<s32*>(buf), upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset);
m_draw_processor.fill_vertex_layout_state(
m_vertex_layout,
current_vp_metadata,
upload_info.first_vertex,
upload_info.allocated_vertex_count,
reinterpret_cast<s32*>(buf),
upload_info.persistent_mapping_offset,
upload_info.volatile_mapping_offset);
m_vertex_layout_buffer->bind_range(GL_VERTEX_LAYOUT_BIND_SLOT, mapping.second, 128 + 16);

View file

@ -153,7 +153,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
m_profiler.start();
//Write index buffers and count verts
auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), get_draw_command(rsx::method_registers));
auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), m_draw_processor.get_draw_command(rsx::method_registers));
const u32 vertex_count = (result.max_index - result.min_index) + 1;
u32 vertex_base = result.min_index;
@ -250,7 +250,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
}
//Write all the data
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping.first, volatile_mapping.first);
m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping.first, volatile_mapping.first);
m_frame_stats.vertex_upload_time += m_profiler.duration();
return upload_info;

View file

@ -89,6 +89,52 @@ namespace rsx
}
}
bool draw_clause::check_trivially_instanced() const
{
if (pass_count() <= 1)
{
// Cannot instance one draw call or less
return false;
}
// For instancing all draw calls must be identical
const auto& ref = draw_command_ranges.front();
for (const auto& range : draw_command_ranges)
{
if (range.first != ref.first || range.count != ref.count)
{
return false;
}
}
if (draw_command_barriers.empty())
{
// Raise alarm here for investigation, we may be missing a corner case.
rsx_log.error("Instanced draw detected, but no command barriers found!");
return false;
}
// Barriers must exist, but can only involve updating transform constants (for now)
for (const auto& barrier : draw_command_barriers)
{
if (barrier.type != rsx::transform_constant_load_modifier_barrier &&
barrier.type != rsx::transform_constant_update_barrier)
{
ensure(barrier.draw_id < ::size32(draw_command_ranges));
if (draw_command_ranges[barrier.draw_id].count == 0)
{
// Dangling command barriers are ignored. We're also at the end of the command, so abort.
break;
}
// Fail. Only transform constant instancing is supported at the moment.
return false;
}
}
return true;
}
void draw_clause::reset(primitive_type type)
{
current_range_index = ~0u;
@ -97,6 +143,7 @@ namespace rsx
command = draw_command::none;
primitive = type;
primitive_barrier_enable = false;
is_trivial_instanced_draw = false;
draw_command_ranges.clear();
draw_command_barriers.clear();
@ -105,7 +152,7 @@ namespace rsx
is_disjoint_primitive = is_primitive_disjointed(primitive);
}
u32 draw_clause::execute_pipeline_dependencies(context* ctx) const
u32 draw_clause::execute_pipeline_dependencies(context* ctx, instanced_draw_config_t* instance_config) const
{
u32 result = 0u;
for (;
@ -151,7 +198,20 @@ namespace rsx
// Update transform constants
auto ptr = RSX(ctx)->fifo_ctrl->translate_address(barrier.arg0);
auto buffer = std::span<const u32>(static_cast<const u32*>(vm::base(ptr)), barrier.arg1);
nv4097::set_transform_constant::batch_decode(ctx, NV4097_SET_TRANSFORM_CONSTANT + barrier.index, buffer);
auto notify = [&](rsx::context*, u32 load, u32 count)
{
if (!instance_config)
{
return false;
}
instance_config->transform_constants_data_changed = true;
instance_config->patch_load_offset = load;
instance_config->patch_load_count = count;
return true;
};
nv4097::set_transform_constant::batch_decode(ctx, NV4097_SET_TRANSFORM_CONSTANT + barrier.index, buffer, notify);
result |= transform_constants_changed;
break;
}

View file

@ -7,6 +7,14 @@
namespace rsx
{
struct instanced_draw_config_t
{
bool transform_constants_data_changed;
u32 patch_load_offset;
u32 patch_load_count;
};
class draw_clause
{
// Stores the first and count argument from draw/draw indexed parameters between begin/end clauses.
@ -51,6 +59,8 @@ namespace rsx
}
}
bool check_trivially_instanced() const;
public:
primitive_type primitive{};
draw_command command{};
@ -59,6 +69,7 @@ namespace rsx
bool is_disjoint_primitive{}; // Set if primitive type does not rely on adjacency information
bool primitive_barrier_enable{}; // Set once to signal that a primitive restart barrier can be inserted
bool is_rendering{}; // Set while we're actually pushing the draw calls to host GPU
bool is_trivial_instanced_draw{}; // Set if the draw call can be executed on the host GPU as a single instanced draw.
simple_array<u32> inline_vertex_array{};
@ -73,8 +84,8 @@ namespace rsx
{
// End draw call append mode
current_range_index = ~0u;
// TODO
// Check if we can instance on host
is_trivial_instanced_draw = check_trivially_instanced();
}
/**
@ -269,7 +280,7 @@ namespace rsx
/**
* Executes commands reqiured to make the current draw state valid
*/
u32 execute_pipeline_dependencies(struct context* ctx) const;
u32 execute_pipeline_dependencies(struct context* ctx, instanced_draw_config_t* instance_config = nullptr) const;
const draw_range_t& get_range() const
{

View file

@ -18,7 +18,7 @@ namespace rsx
// NOTE: Push buffers still behave like register writes.
// You do not need to specify each attribute for each vertex, the register is referenced instead.
// This is classic OpenGL 1.x behavior as I remember.
RSX(ctx)->append_to_push_buffer(attrib_index, count, channel_select, vtype, value);
RSX(ctx)->GRAPH_frontend().append_to_push_buffer(attrib_index, count, channel_select, vtype, value);
}
auto& info = REGS(ctx)->register_vertex_info[attrib_index];

View file

@ -30,7 +30,7 @@ namespace rsx
REGS(ctx)->transform_constants[load + constant_id][subreg] = arg;
}
void set_transform_constant::batch_decode(context* ctx, u32 reg, const std::span<const u32>& args)
void set_transform_constant::batch_decode(context* ctx, u32 reg, const std::span<const u32>& args, const std::function<bool(context*, u32, u32)>& notify)
{
const u32 index = reg - NV4097_SET_TRANSFORM_CONSTANT;
const u32 constant_id = index / 4;
@ -40,8 +40,15 @@ namespace rsx
auto dst = &REGS(ctx)->transform_constants[load + constant_id][subreg];
copy_data_swap_u32(dst, args.data(), ::size32(args));
// Notify
const u32 last_constant_id = ((reg + ::size32(args) + 3) - NV4097_SET_TRANSFORM_CONSTANT) / 4; // Aligned div
RSX(ctx)->patch_transform_constants(ctx, load + constant_id, last_constant_id - constant_id);
const u32 load_index = load + constant_id;
const u32 load_count = last_constant_id - constant_id;
if (!notify || !notify(ctx, load_index, load_count))
{
RSX(ctx)->patch_transform_constants(ctx, load_index, load_count);
}
}
void set_transform_constant::impl(context* ctx, u32 reg, [[maybe_unused]] u32 arg)
@ -256,15 +263,15 @@ namespace rsx
{
if (RSX(ctx)->in_begin_end)
{
RSX(ctx)->append_array_element(arg & 0xFFFF);
RSX(ctx)->append_array_element(arg >> 16);
RSX(ctx)->GRAPH_frontend().append_array_element(arg & 0xFFFF);
RSX(ctx)->GRAPH_frontend().append_array_element(arg >> 16);
}
}
void set_array_element32(context* ctx, u32, u32 arg)
{
if (RSX(ctx)->in_begin_end)
RSX(ctx)->append_array_element(arg);
RSX(ctx)->GRAPH_frontend().append_array_element(arg);
}
void draw_arrays(context* /*rsx*/, u32 /*reg*/, u32 arg)
@ -353,8 +360,8 @@ namespace rsx
// Check if we have immediate mode vertex data in a driver-local buffer
if (REGS(ctx)->current_draw_clause.command == rsx::draw_command::none)
{
const u32 push_buffer_vertices_count = RSX(ctx)->get_push_buffer_vertex_count();
const u32 push_buffer_index_count = RSX(ctx)->get_push_buffer_index_count();
const u32 push_buffer_vertices_count = RSX(ctx)->GRAPH_frontend().get_push_buffer_vertex_count();
const u32 push_buffer_index_count = RSX(ctx)->GRAPH_frontend().get_push_buffer_index_count();
// Need to set this flag since it overrides some register contents
REGS(ctx)->current_draw_clause.is_immediate_draw = true;
@ -386,6 +393,12 @@ namespace rsx
return;
}
// Notify the backend if the drawing style changes (instanced vs non-instanced)
if (REGS(ctx)->current_draw_clause.is_trivial_instanced_draw != RSX(ctx)->is_current_vertex_program_instanced())
{
RSX(ctx)->m_graphics_state |= rsx::pipeline_state::xform_instancing_state_dirty;
}
RSX(ctx)->end();
}
else

View file

@ -204,7 +204,7 @@ namespace rsx
static void decode_one(context* ctx, u32 reg, u32 arg);
static void batch_decode(context* ctx, u32 reg, const std::span<const u32>& args);
static void batch_decode(context* ctx, u32 reg, const std::span<const u32>& args, const std::function<bool(context*, u32, u32)>& notify = {});
};
struct set_transform_program

View file

@ -269,6 +269,7 @@ public:
struct
{
// Configuration properties (in)
u16 in_register_mask = 0;
u16 common_access_sampler_mask = 0;
@ -276,6 +277,7 @@ public:
u16 redirected_sampler_mask = 0;
u16 multisampled_sampler_mask = 0;
// Decoded properties (out)
bool has_lit_op = false;
bool has_gather_op = false;
bool has_no_output = false;

View file

@ -262,6 +262,11 @@ namespace glsl
}
}
if (props.require_instanced_render)
{
enabled_options.push_back("_ENABLE_INSTANCED_CONSTANTS");
}
// Import vertex header
program_common::define_glsl_switches(OS, enabled_options);

View file

@ -55,4 +55,23 @@ vec4 apply_zclip_xform(
}
#endif
#if defined(_ENABLE_INSTANCED_CONSTANTS)
// Workaround for GL vs VK builtin variable naming
#ifdef VULKAN
#define _gl_InstanceID gl_InstanceIndex
#else
#define _gl_InstanceID gl_InstanceID
#endif
vec4 _fetch_constant(const in int base_offset)
{
// Get virtual draw/instance id. Normally will be 1:1 based on instance index
const int indirection_offset = (_gl_InstanceID * CONSTANTS_ARRAY_LENGTH) + base_offset;
const int corrected_offset = constants_addressing_lookup[indirection_offset];
return instanced_constants_array[corrected_offset];
}
#else
#define _fetch_constant(x) vc[x]
#endif
)"

View file

@ -22,6 +22,7 @@ namespace glsl
// Applicable in vertex stage
bool require_lit_emulation : 1;
bool require_explicit_invariance : 1;
bool require_instanced_render : 1;
bool emulate_zclip_transform : 1;
bool emulate_depth_clip_only : 1;

View file

@ -341,6 +341,7 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
usz vertex_program_storage_hash::operator()(const RSXVertexProgram &program) const
{
usz hash = vertex_program_utils::get_vertex_program_ucode_hash(program);
hash ^= program.ctrl;
hash ^= program.output_mask;
hash ^= program.texture_state.texture_dimensions;
hash ^= program.texture_state.multisampled_textures;
@ -351,6 +352,8 @@ bool vertex_program_compare::operator()(const RSXVertexProgram &binary1, const R
{
if (binary1.output_mask != binary2.output_mask)
return false;
if (binary1.ctrl != binary2.ctrl)
return false;
if (binary1.texture_state != binary2.texture_state)
return false;
if (binary1.data.size() != binary2.data.size())

View file

@ -131,7 +131,7 @@ std::string VertexProgramDecompiler::GetSRC(const u32 n)
m_parr.AddParam(PF_PARAM_UNIFORM, float4, std::string("vc[468]"));
properties.has_indexed_constants |= !!d3.index_const;
m_constant_ids.insert(static_cast<u16>(d1.const_src));
ret += std::string("vc[") + std::to_string(d1.const_src) + (d3.index_const ? " + " + AddAddrReg() : "") + "]";
fmt::append(ret, "_fetch_constant(%u%s)", d1.const_src, (d3.index_const ? " + " + AddAddrReg() : ""));
break;
default:
@ -362,14 +362,13 @@ std::string VertexProgramDecompiler::NotZeroPositive(const std::string& code)
std::string VertexProgramDecompiler::BuildCode()
{
std::string main_body;
for (uint i = 0, lvl = 1; i < m_instr_count; i++)
for (int i = 0, lvl = 1; i < static_cast<int>(m_instr_count); i++)
{
lvl -= m_instructions[i].close_scopes;
if (lvl < 1) lvl = 1;
lvl = std::max<int>(lvl - m_instructions[i].close_scopes, 0);
for (int j = 0; j < m_instructions[i].put_close_scopes; ++j)
{
--lvl;
if (lvl < 1) lvl = 1;
if (lvl > 1) --lvl;
main_body.append(lvl, '\t') += "}\n";
}
@ -380,6 +379,8 @@ std::string VertexProgramDecompiler::BuildCode()
lvl++;
}
ensure(lvl >= 0); // Underflow of indent level will cause crashes!!
for (const auto& instruction_body : m_instructions[i].body)
{
main_body.append(lvl, '\t') += instruction_body + "\n";
@ -409,7 +410,7 @@ std::string VertexProgramDecompiler::BuildCode()
{
const auto i = offset++;
if (i == index) continue; // Replace with self
reloc_table.emplace_back(fmt::format("vc[%d]", index), fmt::format("vc[%d]", i));
reloc_table.emplace_back(fmt::format("_fetch_constant(%d)", index), fmt::format("_fetch_constant(%d)", i));
}
// One-time patch

View file

@ -132,6 +132,10 @@ protected:
public:
struct
{
// Configuration properties (in)
// None
// Decoded properties (out)
bool has_lit_op = false;
bool has_indexed_constants = false;
}

View file

@ -409,12 +409,13 @@ namespace rsx
}
};
const auto element_push_buffer = render->draw_processor()->element_push_buffer();
if (index_size == 4)
{
if (!render->element_push_buffer.empty()) [[unlikely]]
if (!element_push_buffer.empty()) [[unlikely]]
{
// Indices provided via immediate mode
re_evaluate(reinterpret_cast<const std::byte*>(render->element_push_buffer.data()), u32{});
re_evaluate(reinterpret_cast<const std::byte*>(element_push_buffer.data()), u32{});
}
else
{
@ -424,10 +425,10 @@ namespace rsx
}
else
{
if (!render->element_push_buffer.empty()) [[unlikely]]
if (!element_push_buffer.empty()) [[unlikely]]
{
// Indices provided via immediate mode
re_evaluate(reinterpret_cast<const std::byte*>(render->element_push_buffer.data()), u16{});
re_evaluate(reinterpret_cast<const std::byte*>(element_push_buffer.data()), u16{});
}
else
{
@ -619,12 +620,12 @@ namespace rsx
ar(rsx::method_registers);
for (auto& v : vertex_push_buffers)
for (auto& v : m_draw_processor.m_vertex_push_buffers)
{
ar(v.attr, v.size, v.type, v.vertex_count, v.dword_count, v.data);
}
ar(element_push_buffer, fifo_ret_addr, saved_fifo_ret, zcull_surface_active, m_surface_info, m_depth_surface_info, m_framebuffer_layout);
ar(m_draw_processor.m_element_push_buffer, fifo_ret_addr, saved_fifo_ret, zcull_surface_active, m_surface_info, m_depth_surface_info, m_framebuffer_layout);
ar(dma_address, iomap_table, restore_point, tiles, zculls, display_buffers, display_buffers_count, current_display_buffer);
ar(enable_second_vhandler, requested_vsync);
ar(device_addr, label_addr, main_mem_size, local_mem_size, rsx_event_port, driver_info);
@ -696,6 +697,8 @@ namespace rsx
s_ctx.rsxthr = this;
m_ctx = &s_ctx;
m_draw_processor.init(m_ctx);
if (g_cfg.misc.use_native_interface && (g_cfg.video.renderer == video_renderer::opengl || g_cfg.video.renderer == video_renderer::vulkan))
{
m_overlay_manager = g_fxo->init<rsx::overlays::display_manager>(0);
@ -801,39 +804,6 @@ namespace rsx
in_begin_end = true;
}
void thread::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value)
{
if (!(rsx::method_registers.vertex_attrib_input_mask() & (1 << attribute)))
{
return;
}
// Enforce ATTR0 as vertex attribute for push buffers.
// This whole thing becomes a mess if we don't have a provoking attribute.
const auto vertex_id = vertex_push_buffers[0].get_vertex_id();
vertex_push_buffers[attribute].set_vertex_data(attribute, vertex_id, subreg_index, type, size, value);
m_graphics_state |= rsx::pipeline_state::push_buffer_arrays_dirty;
}
u32 thread::get_push_buffer_vertex_count() const
{
// Enforce ATTR0 as vertex attribute for push buffers.
// This whole thing becomes a mess if we don't have a provoking attribute.
return vertex_push_buffers[0].vertex_count;
}
void thread::append_array_element(u32 index)
{
// Endianness is swapped because common upload code expects input in BE
// TODO: Implement fast upload path for LE inputs and do away with this
element_push_buffer.push_back(std::bit_cast<u32, be_t<u32>>(index));
}
u32 thread::get_push_buffer_index_count() const
{
return ::size32(element_push_buffer);
}
void thread::end()
{
if (capture_current_frame)
@ -850,20 +820,7 @@ namespace rsx
m_eng_interrupt_mask |= rsx::backend_interrupt;
ROP_sync_timestamp = rsx::get_shared_tag();
if (m_graphics_state & rsx::pipeline_state::push_buffer_arrays_dirty)
{
for (auto& push_buf : vertex_push_buffers)
{
//Disabled, see https://github.com/RPCS3/rpcs3/issues/1932
//rsx::method_registers.register_vertex_info[index].size = 0;
push_buf.clear();
}
m_graphics_state.clear(rsx::pipeline_state::push_buffer_arrays_dirty);
}
element_push_buffer.clear();
m_draw_processor.clear_push_buffers();
zcull_ctrl->on_draw();
@ -1197,180 +1154,6 @@ namespace rsx
state += cpu_flag::exit;
}
void thread::fill_scale_offset_data(void *buffer, bool flip_y) const
{
int clip_w = rsx::method_registers.surface_clip_width();
int clip_h = rsx::method_registers.surface_clip_height();
float scale_x = rsx::method_registers.viewport_scale_x() / (clip_w / 2.f);
float offset_x = rsx::method_registers.viewport_offset_x() - (clip_w / 2.f);
offset_x /= clip_w / 2.f;
float scale_y = rsx::method_registers.viewport_scale_y() / (clip_h / 2.f);
float offset_y = (rsx::method_registers.viewport_offset_y() - (clip_h / 2.f));
offset_y /= clip_h / 2.f;
if (flip_y) scale_y *= -1;
if (flip_y) offset_y *= -1;
float scale_z = rsx::method_registers.viewport_scale_z();
float offset_z = rsx::method_registers.viewport_offset_z();
float one = 1.f;
utils::stream_vector(buffer, std::bit_cast<u32>(scale_x), 0, 0, std::bit_cast<u32>(offset_x));
utils::stream_vector(static_cast<char*>(buffer) + 16, 0, std::bit_cast<u32>(scale_y), 0, std::bit_cast<u32>(offset_y));
utils::stream_vector(static_cast<char*>(buffer) + 32, 0, 0, std::bit_cast<u32>(scale_z), std::bit_cast<u32>(offset_z));
utils::stream_vector(static_cast<char*>(buffer) + 48, 0, 0, 0, std::bit_cast<u32>(one));
}
void thread::fill_user_clip_data(void *buffer) const
{
const rsx::user_clip_plane_op clip_plane_control[6] =
{
rsx::method_registers.clip_plane_0_enabled(),
rsx::method_registers.clip_plane_1_enabled(),
rsx::method_registers.clip_plane_2_enabled(),
rsx::method_registers.clip_plane_3_enabled(),
rsx::method_registers.clip_plane_4_enabled(),
rsx::method_registers.clip_plane_5_enabled(),
};
u8 data_block[64];
s32* clip_enabled_flags = reinterpret_cast<s32*>(data_block);
f32* clip_distance_factors = reinterpret_cast<f32*>(data_block + 32);
for (int index = 0; index < 6; ++index)
{
switch (clip_plane_control[index])
{
default:
rsx_log.error("bad clip plane control (0x%x)", static_cast<u8>(clip_plane_control[index]));
[[fallthrough]];
case rsx::user_clip_plane_op::disable:
clip_enabled_flags[index] = 0;
clip_distance_factors[index] = 0.f;
break;
case rsx::user_clip_plane_op::greater_or_equal:
clip_enabled_flags[index] = 1;
clip_distance_factors[index] = 1.f;
break;
case rsx::user_clip_plane_op::less_than:
clip_enabled_flags[index] = 1;
clip_distance_factors[index] = -1.f;
break;
}
}
memcpy(buffer, data_block, 2 * 8 * sizeof(u32));
}
/**
* Fill buffer with vertex program constants.
* Buffer must be at least 512 float4 wide.
*/
void thread::fill_vertex_program_constants_data(void* buffer, const std::span<const u16>& reloc_table)
{
if (!reloc_table.empty()) [[ likely ]]
{
char* dst = reinterpret_cast<char*>(buffer);
for (const auto& index : reloc_table)
{
utils::stream_vector_from_memory(dst, &rsx::method_registers.transform_constants[index]);
dst += 16;
}
}
else
{
memcpy(buffer, rsx::method_registers.transform_constants.data(), 468 * 4 * sizeof(float));
}
}
void thread::fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& /*fragment_program*/)
{
ROP_control_t rop_control{};
if (rsx::method_registers.alpha_test_enabled())
{
const u32 alpha_func = static_cast<u32>(rsx::method_registers.alpha_func());
rop_control.set_alpha_test_func(alpha_func);
rop_control.enable_alpha_test();
}
if (rsx::method_registers.polygon_stipple_enabled())
{
rop_control.enable_polygon_stipple();
}
if (rsx::method_registers.msaa_alpha_to_coverage_enabled() && !backend_config.supports_hw_a2c)
{
// TODO: Properly support alpha-to-coverage and alpha-to-one behavior in shaders
// Alpha values generate a coverage mask for order independent blending
// Requires hardware AA to work properly (or just fragment sample stage in fragment shaders)
// Simulated using combined alpha blend and alpha test
rop_control.enable_alpha_to_coverage();
if (rsx::method_registers.msaa_sample_mask())
{
rop_control.enable_MSAA_writes();
}
// Sample configuration bits
switch (rsx::method_registers.surface_antialias())
{
case rsx::surface_antialiasing::center_1_sample:
break;
case rsx::surface_antialiasing::diagonal_centered_2_samples:
rop_control.set_msaa_control(1u);
break;
default:
rop_control.set_msaa_control(3u);
break;
}
}
const f32 fog0 = rsx::method_registers.fog_params_0();
const f32 fog1 = rsx::method_registers.fog_params_1();
const u32 fog_mode = static_cast<u32>(rsx::method_registers.fog_equation());
// Check if framebuffer is actually an XRGB format and not a WZYX format
switch (rsx::method_registers.surface_color())
{
case rsx::surface_color_format::w16z16y16x16:
case rsx::surface_color_format::w32z32y32x32:
case rsx::surface_color_format::x32:
// These behave very differently from "normal" formats.
break;
default:
// Integer framebuffer formats.
rop_control.enable_framebuffer_INT();
// Check if we want sRGB conversion.
if (rsx::method_registers.framebuffer_srgb_enabled())
{
rop_control.enable_framebuffer_sRGB();
}
break;
}
// Generate wpos coefficients
// wpos equation is now as follows:
// wpos.y = (frag_coord / resolution_scale) * ((window_origin!=top)?-1.: 1.) + ((window_origin!=top)? window_height : 0)
// wpos.x = (frag_coord / resolution_scale)
// wpos.zw = frag_coord.zw
const auto window_origin = rsx::method_registers.shader_window_origin();
const u32 window_height = rsx::method_registers.shader_window_height();
const f32 resolution_scale = (window_height <= static_cast<u32>(g_cfg.video.min_scalable_dimension)) ? 1.f : rsx::get_resolution_scale();
const f32 wpos_scale = (window_origin == rsx::window_origin::top) ? (1.f / resolution_scale) : (-1.f / resolution_scale);
const f32 wpos_bias = (window_origin == rsx::window_origin::top) ? 0.f : window_height;
const f32 alpha_ref = rsx::method_registers.alpha_ref();
u32 *dst = static_cast<u32*>(buffer);
utils::stream_vector(dst, std::bit_cast<u32>(fog0), std::bit_cast<u32>(fog1), rop_control.value, std::bit_cast<u32>(alpha_ref));
utils::stream_vector(dst + 4, 0u, fog_mode, std::bit_cast<u32>(wpos_scale), std::bit_cast<u32>(wpos_bias));
}
u64 thread::timestamp()
{
const u64 freq = sys_time_get_timebase_frequency();
@ -1409,51 +1192,6 @@ namespace rsx
return t + timestamp_subvalue;
}
std::span<const std::byte> thread::get_raw_index_array(const draw_clause& draw_indexed_clause) const
{
if (!element_push_buffer.empty()) [[ unlikely ]]
{
// Indices provided via immediate mode
return {reinterpret_cast<const std::byte*>(element_push_buffer.data()), ::narrow<u32>(element_push_buffer.size() * sizeof(u32))};
}
const rsx::index_array_type type = rsx::method_registers.index_type();
const u32 type_size = get_index_type_size(type);
// Force aligned indices as realhw
const u32 address = (0 - type_size) & get_address(rsx::method_registers.index_array_address(), rsx::method_registers.index_array_location());
const u32 first = draw_indexed_clause.min_index();
const u32 count = draw_indexed_clause.get_elements_count();
const auto ptr = vm::_ptr<const std::byte>(address);
return { ptr + first * type_size, count * type_size };
}
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
thread::get_draw_command(const rsx::rsx_state& state) const
{
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed) [[ likely ]]
{
return draw_indexed_array_command
{
get_raw_index_array(state.current_draw_clause)
};
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array)
{
return draw_array_command{};
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
{
return draw_inlined_array{};
}
fmt::throw_exception("ill-formed draw command");
}
void thread::do_local_task(FIFO::state state)
{
m_eng_interrupt_mask.clear(rsx::backend_interrupt);
@ -2249,6 +1987,17 @@ namespace rsx
void thread::get_current_vertex_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::vertex_textures_count>& sampler_descriptors)
{
if (m_graphics_state.test(rsx::pipeline_state::xform_instancing_state_dirty))
{
current_vertex_program.ctrl = 0;
if (rsx::method_registers.current_draw_clause.is_trivial_instanced_draw)
{
current_vertex_program.ctrl |= RSX_SHADER_CONTROL_INSTANCED_CONSTANTS;
}
m_graphics_state.clear(rsx::pipeline_state::xform_instancing_state_dirty);
}
if (!m_graphics_state.test(rsx::pipeline_state::vertex_program_dirty))
{
return;
@ -2256,7 +2005,6 @@ namespace rsx
ensure(!m_graphics_state.test(rsx::pipeline_state::vertex_program_ucode_dirty));
current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask();
current_vertex_program.ctrl = 0; // Reserved
for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
{
@ -2279,183 +2027,6 @@ namespace rsx
current_vertex_program.texture_state.import(current_vp_texture_state, current_vp_metadata.referenced_textures_mask);
}
void thread::analyse_inputs_interleaved(vertex_input_layout& result)
{
const rsx_state& state = rsx::method_registers;
const u32 input_mask = state.vertex_attrib_input_mask() & current_vp_metadata.referenced_inputs_mask;
result.clear();
result.attribute_mask = static_cast<u16>(input_mask);
if (state.current_draw_clause.command == rsx::draw_command::inlined_array)
{
interleaved_range_info& info = *result.alloc_interleaved_block();
info.interleaved = true;
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
{
auto &vinfo = state.vertex_arrays_info[index];
result.attribute_placement[index] = attribute_buffer_placement::none;
if (vinfo.size() > 0)
{
// Stride must be updated even if the stream is disabled
info.attribute_stride += rsx::get_vertex_type_size_on_host(vinfo.type(), vinfo.size());
info.locations.push_back({ index, false, 1 });
if (input_mask & (1u << index))
{
result.attribute_placement[index] = attribute_buffer_placement::transient;
}
}
else if (state.register_vertex_info[index].size > 0 && input_mask & (1u << index))
{
// Reads from register
result.referenced_registers.push_back(index);
result.attribute_placement[index] = attribute_buffer_placement::transient;
}
}
if (info.attribute_stride)
{
// At least one array feed must be enabled for vertex input
result.interleaved_blocks.push_back(&info);
}
return;
}
const u32 frequency_divider_mask = rsx::method_registers.frequency_divider_operation_mask();
result.interleaved_blocks.reserve(16);
result.referenced_registers.reserve(16);
for (auto [ref_mask, index] = std::tuple{ input_mask, u8(0) }; ref_mask; ++index, ref_mask >>= 1)
{
ensure(index < rsx::limits::vertex_count);
if (!(ref_mask & 1u))
{
// Nothing to do, uninitialized
continue;
}
// Always reset attribute placement by default
result.attribute_placement[index] = attribute_buffer_placement::none;
// Check for interleaving
if (rsx::method_registers.current_draw_clause.is_immediate_draw &&
rsx::method_registers.current_draw_clause.command != rsx::draw_command::indexed)
{
// NOTE: In immediate rendering mode, all vertex setup is ignored
// Observed with GT5, immediate render bypasses array pointers completely, even falling back to fixed-function register defaults
if (vertex_push_buffers[index].vertex_count > 1)
{
// Ensure consistent number of vertices per attribute.
vertex_push_buffers[index].pad_to(vertex_push_buffers[0].vertex_count, false);
// Read temp buffer (register array)
std::pair<u8, u32> volatile_range_info = std::make_pair(index, static_cast<u32>(vertex_push_buffers[index].data.size() * sizeof(u32)));
result.volatile_blocks.push_back(volatile_range_info);
result.attribute_placement[index] = attribute_buffer_placement::transient;
}
else if (state.register_vertex_info[index].size > 0)
{
// Reads from register
result.referenced_registers.push_back(index);
result.attribute_placement[index] = attribute_buffer_placement::transient;
}
// Fall back to the default register value if no source is specified via register
continue;
}
const auto& info = state.vertex_arrays_info[index];
if (!info.size())
{
if (state.register_vertex_info[index].size > 0)
{
//Reads from register
result.referenced_registers.push_back(index);
result.attribute_placement[index] = attribute_buffer_placement::transient;
continue;
}
}
else
{
result.attribute_placement[index] = attribute_buffer_placement::persistent;
const u32 base_address = info.offset() & 0x7fffffff;
bool alloc_new_block = true;
bool modulo = !!(frequency_divider_mask & (1 << index));
for (auto &block : result.interleaved_blocks)
{
if (block->single_vertex)
{
//Single vertex definition, continue
continue;
}
if (block->attribute_stride != info.stride())
{
//Stride does not match, continue
continue;
}
if (base_address > block->base_offset)
{
const u32 diff = base_address - block->base_offset;
if (diff > info.stride())
{
//Not interleaved, continue
continue;
}
}
else
{
const u32 diff = block->base_offset - base_address;
if (diff > info.stride())
{
//Not interleaved, continue
continue;
}
//Matches, and this address is lower than existing
block->base_offset = base_address;
}
alloc_new_block = false;
block->locations.push_back({ index, modulo, info.frequency() });
block->interleaved = true;
break;
}
if (alloc_new_block)
{
interleaved_range_info& block = *result.alloc_interleaved_block();
block.base_offset = base_address;
block.attribute_stride = info.stride();
block.memory_location = info.offset() >> 31;
block.locations.reserve(16);
block.locations.push_back({ index, modulo, info.frequency() });
if (block.attribute_stride == 0)
{
block.single_vertex = true;
block.attribute_stride = rsx::get_vertex_type_size_on_host(info.type(), info.size());
}
result.interleaved_blocks.push_back(&block);
}
}
}
for (auto &info : result.interleaved_blocks)
{
//Calculate real data address to be used during upload
info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info->base_offset), info->memory_location);
}
}
void thread::get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors)
{
if (!m_graphics_state.test(rsx::pipeline_state::fragment_program_dirty))
@ -2908,267 +2479,6 @@ namespace rsx
return std::make_pair(persistent_memory_size, volatile_memory_size);
}
void thread::fill_vertex_layout_state(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, s32* buffer, u32 persistent_offset_base, u32 volatile_offset_base)
{
std::array<s32, 16> offset_in_block = {};
u32 volatile_offset = volatile_offset_base;
u32 persistent_offset = persistent_offset_base;
//NOTE: Order is important! Transient ayout is always push_buffers followed by register data
if (rsx::method_registers.current_draw_clause.is_immediate_draw)
{
for (const auto &info : layout.volatile_blocks)
{
offset_in_block[info.first] = volatile_offset;
volatile_offset += info.second;
}
}
for (u8 index : layout.referenced_registers)
{
offset_in_block[index] = volatile_offset;
volatile_offset += 16;
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
{
const auto &block = layout.interleaved_blocks[0];
u32 inline_data_offset = volatile_offset;
for (const auto& attrib : block->locations)
{
auto &info = rsx::method_registers.vertex_arrays_info[attrib.index];
offset_in_block[attrib.index] = inline_data_offset;
inline_data_offset += rsx::get_vertex_type_size_on_host(info.type(), info.size());
}
}
else
{
for (const auto &block : layout.interleaved_blocks)
{
for (const auto& attrib : block->locations)
{
const u32 local_address = (rsx::method_registers.vertex_arrays_info[attrib.index].offset() & 0x7fffffff);
offset_in_block[attrib.index] = persistent_offset + (local_address - block->base_offset);
}
const auto range = block->calculate_required_range(first_vertex, vertex_count);
persistent_offset += block->attribute_stride * range.second;
}
}
// Fill the data
// Each descriptor field is 64 bits wide
// [0-8] attribute stride
// [8-24] attribute divisor
// [24-27] attribute type
// [27-30] attribute size
// [30-31] reserved
// [31-60] starting offset
// [60-21] swap bytes flag
// [61-22] volatile flag
// [62-63] modulo enable flag
const s32 default_frequency_mask = (1 << 8);
const s32 swap_storage_mask = (1 << 29);
const s32 volatile_storage_mask = (1 << 30);
const s32 modulo_op_frequency_mask = smin;
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
const auto max_index = (first_vertex + vertex_count) - 1;
for (u16 ref_mask = current_vp_metadata.referenced_inputs_mask, index = 0; ref_mask; ++index, ref_mask >>= 1)
{
if (!(ref_mask & 1u))
{
// Unused input, ignore this
continue;
}
if (layout.attribute_placement[index] == attribute_buffer_placement::none)
{
static constexpr u64 zero = 0;
std::memcpy(buffer + index * 2, &zero, sizeof(zero));
continue;
}
rsx::vertex_base_type type = {};
s32 size = 0;
s32 attrib0 = 0;
s32 attrib1 = 0;
if (layout.attribute_placement[index] == attribute_buffer_placement::transient)
{
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
{
const auto &info = rsx::method_registers.vertex_arrays_info[index];
if (!info.size())
{
// Register
const auto& reginfo = rsx::method_registers.register_vertex_info[index];
type = reginfo.type;
size = reginfo.size;
attrib0 = rsx::get_vertex_type_size_on_host(type, size);
}
else
{
// Array
type = info.type();
size = info.size();
attrib0 = layout.interleaved_blocks[0]->attribute_stride | default_frequency_mask;
}
}
else
{
// Data is either from an immediate render or register input
// Immediate data overrides register input
if (rsx::method_registers.current_draw_clause.is_immediate_draw &&
vertex_push_buffers[index].vertex_count > 1)
{
// Push buffer
const auto &info = vertex_push_buffers[index];
type = info.type;
size = info.size;
attrib0 = rsx::get_vertex_type_size_on_host(type, size) | default_frequency_mask;
}
else
{
// Register
const auto& info = rsx::method_registers.register_vertex_info[index];
type = info.type;
size = info.size;
attrib0 = rsx::get_vertex_type_size_on_host(type, size);
}
}
attrib1 |= volatile_storage_mask;
}
else
{
auto &info = rsx::method_registers.vertex_arrays_info[index];
type = info.type();
size = info.size();
auto stride = info.stride();
attrib0 = stride;
if (stride > 0) //when stride is 0, input is not an array but a single element
{
const u32 frequency = info.frequency();
switch (frequency)
{
case 0:
case 1:
{
attrib0 |= default_frequency_mask;
break;
}
default:
{
if (modulo_mask & (1 << index))
{
if (max_index >= frequency)
{
// Only set modulo mask if a modulo op is actually necessary!
// This requires that the uploaded range for this attr = [0, freq-1]
// Ignoring modulo op if the rendered range does not wrap allows for range optimization
attrib0 |= (frequency << 8);
attrib1 |= modulo_op_frequency_mask;
}
else
{
attrib0 |= default_frequency_mask;
}
}
else
{
// Division
attrib0 |= (frequency << 8);
}
break;
}
}
}
} //end attribute placement check
// Special compressed 4 components into one 4-byte value. Decoded as one value.
if (type == rsx::vertex_base_type::cmp)
{
size = 1;
}
// All data is passed in in PS3-native order (BE) so swap flag should be set
attrib1 |= swap_storage_mask;
attrib0 |= (static_cast<s32>(type) << 24);
attrib0 |= (size << 27);
attrib1 |= offset_in_block[index];
buffer[index * 2 + 0] = attrib0;
buffer[index * 2 + 1] = attrib1;
}
}
void thread::write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data)
{
auto transient = static_cast<char*>(volatile_data);
auto persistent = static_cast<char*>(persistent_data);
auto &draw_call = rsx::method_registers.current_draw_clause;
if (transient != nullptr)
{
if (draw_call.command == rsx::draw_command::inlined_array)
{
for (const u8 index : layout.referenced_registers)
{
memcpy(transient, rsx::method_registers.register_vertex_info[index].data.data(), 16);
transient += 16;
}
memcpy(transient, draw_call.inline_vertex_array.data(), draw_call.inline_vertex_array.size() * sizeof(u32));
//Is it possible to reference data outside of the inlined array?
return;
}
//NOTE: Order is important! Transient layout is always push_buffers followed by register data
if (draw_call.is_immediate_draw)
{
//NOTE: It is possible for immediate draw to only contain index data, so vertex data can be in persistent memory
for (const auto &info : layout.volatile_blocks)
{
memcpy(transient, vertex_push_buffers[info.first].data.data(), info.second);
transient += info.second;
}
}
for (const u8 index : layout.referenced_registers)
{
memcpy(transient, rsx::method_registers.register_vertex_info[index].data.data(), 16);
transient += 16;
}
}
if (persistent != nullptr)
{
for (interleaved_range_info* block : layout.interleaved_blocks)
{
auto range = block->calculate_required_range(first_vertex, vertex_count);
const u32 data_size = range.second * block->attribute_stride;
const u32 vertex_base = range.first * block->attribute_stride;
g_fxo->get<rsx::dma_manager>().copy(persistent, vm::_ptr<char>(block->real_offset_address) + vertex_base, data_size);
persistent += data_size;
}
}
}
void thread::flip(const display_flip_info_t& info)
{
m_eng_interrupt_mask.clear(rsx::display_interrupt);
@ -3651,6 +2961,9 @@ namespace rsx
void thread::on_notify_pre_memory_unmapped(u32 address, u32 size, std::vector<std::pair<u64, u64>>& event_data)
{
// Always flush MM if memory mapping is going to change.
rsx::mm_flush();
if (rsx_thread_running && address < rsx::constants::local_mem_base)
{
// Each bit represents io entry to be unmapped
@ -3828,7 +3141,7 @@ namespace rsx
u32 thread::get_load()
{
//Average load over around 30 frames
// Average load over around 30 frames
if (!performance_counters.last_update_timestamp || performance_counters.sampled_frames > 30)
{
const auto timestamp = get_system_time();

View file

@ -28,6 +28,8 @@
#include "Emu/IdManager.h"
#include "Core/RSXDisplay.h"
#include "Core/RSXDrawCommands.h"
#include "Core/RSXDriverState.h"
#include "Core/RSXFrameBuffer.h"
#include "Core/RSXContext.h"
#include "Core/RSXIOMap.hpp"
@ -59,52 +61,6 @@ namespace rsx
context_clear_all = context_clear_color | context_clear_depth
};
enum pipeline_state : u32
{
fragment_program_ucode_dirty = (1 << 0), // Fragment program ucode changed
vertex_program_ucode_dirty = (1 << 1), // Vertex program ucode changed
fragment_program_state_dirty = (1 << 2), // Fragment program state changed
vertex_program_state_dirty = (1 << 3), // Vertex program state changed
fragment_state_dirty = (1 << 4), // Fragment state changed (alpha test, etc)
vertex_state_dirty = (1 << 5), // Vertex state changed (scale_offset, clip planes, etc)
transform_constants_dirty = (1 << 6), // Transform constants changed
fragment_constants_dirty = (1 << 7), // Fragment constants changed
framebuffer_reads_dirty = (1 << 8), // Framebuffer contents changed
fragment_texture_state_dirty = (1 << 9), // Fragment texture parameters changed
vertex_texture_state_dirty = (1 << 10), // Fragment texture parameters changed
scissor_config_state_dirty = (1 << 11), // Scissor region changed
zclip_config_state_dirty = (1 << 12), // Viewport Z clip changed
scissor_setup_invalid = (1 << 13), // Scissor configuration is broken
scissor_setup_clipped = (1 << 14), // Scissor region is cropped by viewport constraint
polygon_stipple_pattern_dirty = (1 << 15), // Rasterizer stippling pattern changed
line_stipple_pattern_dirty = (1 << 16), // Line stippling pattern changed
push_buffer_arrays_dirty = (1 << 17), // Push buffers have data written to them (immediate mode vertex buffers)
polygon_offset_state_dirty = (1 << 18), // Polygon offset config was changed
depth_bounds_state_dirty = (1 << 19), // Depth bounds configuration changed
pipeline_config_dirty = (1 << 20), // Generic pipeline configuration changes. Shader peek hint.
rtt_config_dirty = (1 << 21), // Render target configuration changed
rtt_config_contested = (1 << 22), // Render target configuration is indeterminate
rtt_config_valid = (1 << 23), // Render target configuration is valid
rtt_cache_state_dirty = (1 << 24), // Texture cache state is indeterminate
fragment_program_dirty = fragment_program_ucode_dirty | fragment_program_state_dirty,
vertex_program_dirty = vertex_program_ucode_dirty | vertex_program_state_dirty,
invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty,
invalidate_zclip_bits = vertex_state_dirty | zclip_config_state_dirty,
memory_barrier_bits = framebuffer_reads_dirty,
// Vulkan-specific signals
invalidate_vk_dynamic_state = zclip_config_state_dirty | scissor_config_state_dirty | polygon_offset_state_dirty | depth_bounds_state_dirty,
all_dirty = ~0u
};
enum eng_interrupt_reason : u32
{
backend_interrupt = 0x0001, // Backend-related interrupt
@ -161,8 +117,6 @@ namespace rsx
void cpu_task() override;
protected:
std::array<push_buffer_vertex_info, 16> vertex_push_buffers;
s32 m_skip_frame_ctr = 0;
bool skip_current_frame = false;
@ -217,6 +171,9 @@ namespace rsx
// Host DMA
std::unique_ptr<RSXDMAWriter> m_host_dma_ctrl;
// Draw call management
draw_command_processor m_draw_processor;
public:
atomic_t<u64> new_get_put = u64{umax};
u32 restore_point = 0;
@ -225,7 +182,7 @@ namespace rsx
atomic_t<u32> external_interrupt_lock{ 0 };
atomic_t<bool> external_interrupt_ack{ false };
atomic_t<u32> is_initialized{0};
rsx::simple_array<u32> element_push_buffer;
bool is_fifo_idle() const;
void flush_fifo();
@ -268,6 +225,8 @@ namespace rsx
void capture_frame(const std::string& name);
const backend_configuration& get_backend_config() const { return backend_config; }
const draw_command_processor* draw_processor() const { return &m_draw_processor; }
public:
shared_ptr<named_thread<ppu_thread>> intr_thread;
@ -301,11 +260,6 @@ namespace rsx
void get_framebuffer_layout(rsx::framebuffer_creation_context context, framebuffer_layout &layout);
bool get_scissor(areau& region, bool clip_viewport);
/**
* Analyze vertex inputs and group all interleaved blocks
*/
void analyse_inputs_interleaved(vertex_input_layout&);
RSXVertexProgram current_vertex_program = {};
RSXFragmentProgram current_fragment_program = {};
@ -424,21 +378,6 @@ namespace rsx
virtual void sync_hint(FIFO::interrupt_hint hint, reports::sync_hint_payload_t payload);
virtual bool release_GCM_label(u32 /*address*/, u32 /*value*/) { return false; }
std::span<const std::byte> get_raw_index_array(const draw_clause& draw_indexed_clause) const;
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
get_draw_command(const rsx::rsx_state& state) const;
/**
* Immediate mode rendering requires a temp push buffer to hold attrib values
* Appends a value to the push buffer (currently only supports 32-wide types)
*/
void append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value);
u32 get_push_buffer_vertex_count() const;
void append_array_element(u32 index);
u32 get_push_buffer_index_count() const;
protected:
/**
@ -448,17 +387,6 @@ namespace rsx
*/
std::pair<u32, u32> calculate_memory_requirements(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count);
/**
* Generates vertex input descriptors as an array of 16x4 s32s
*/
void fill_vertex_layout_state(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, s32* buffer, u32 persistent_offset = 0, u32 volatile_offset = 0);
/**
* Uploads vertex data described in the layout descriptor
* Copies from local memory to the write-only output buffers provided in a sequential manner
*/
void write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data);
void evaluate_cpu_usage_reduction_limits();
private:
@ -468,29 +396,8 @@ namespace rsx
void handle_invalidated_memory_range();
public:
/**
* Fill buffer with 4x4 scale offset matrix.
* Vertex shader's position is to be multiplied by this matrix.
* if flip_y is set, the matrix is modified to use d3d convention.
*/
void fill_scale_offset_data(void *buffer, bool flip_y) const;
/**
* Fill buffer with user clip information
*/
void fill_user_clip_data(void *buffer) const;
/**
* Fill buffer with vertex program constants.
* Relocation table allows to do a partial fill with only selected registers.
*/
void fill_vertex_program_constants_data(void* buffer, const std::span<const u16>& reloc_table);
/**
* Fill buffer with fragment rasterization state.
* Fills current fog values, alpha test parameters and texture scaling parameters
*/
void fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& fragment_program);
draw_command_processor& GRAPH_frontend() { return m_draw_processor; }
/**
* Notify that a section of memory has been mapped
@ -517,9 +424,17 @@ namespace rsx
*/
virtual void on_semaphore_acquire_wait() {}
/**
* Load an image from memory with optional scaling and rotation.
* Returns false to tell the HW decoder to perform the operation on the CPU as a fallback when the operation cannot be safely accelerated.
*/
virtual bool scaled_image_from_memory(const blit_src_info& /*src_info*/, const blit_dst_info& /*dst_info*/, bool /*interpolate*/) { return false; }
// Program public "get" handlers
virtual std::pair<std::string, std::string> get_programs() const { return std::make_pair("", ""); }
virtual bool scaled_image_from_memory(const blit_src_info& /*src_info*/, const blit_dst_info& /*dst_info*/, bool /*interpolate*/) { return false; }
bool is_current_vertex_program_instanced() const { return !!(current_vertex_program.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS); }
public:
void reset();

View file

@ -730,7 +730,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
if (state_flags & rsx::vertex_arrays_changed)
{
analyse_inputs_interleaved(m_vertex_layout);
m_draw_processor.analyse_inputs_interleaved(m_vertex_layout, current_vp_metadata);
}
else if (state_flags & rsx::vertex_base_changed)
{
@ -929,7 +929,11 @@ void VKGSRender::emit_geometry(u32 sub_index)
if (!upload_info.index_info)
{
if (draw_call.is_single_draw())
if (draw_call.is_trivial_instanced_draw)
{
vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, draw_call.pass_count(), 0, 0);
}
else if (draw_call.is_single_draw())
{
vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0);
}
@ -951,10 +955,13 @@ void VKGSRender::emit_geometry(u32 sub_index)
vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type);
if (rsx::method_registers.current_draw_clause.is_single_draw())
if (draw_call.is_trivial_instanced_draw)
{
const u32 index_count = upload_info.vertex_draw_count;
vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0);
vkCmdDrawIndexed(*m_current_command_buffer, upload_info.vertex_draw_count, draw_call.pass_count(), 0, 0, 0);
}
else if (rsx::method_registers.current_draw_clause.is_single_draw())
{
vkCmdDrawIndexed(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0, 0);
}
else
{
@ -1052,7 +1059,10 @@ void VKGSRender::end()
m_frame_stats.setup_time += m_profiler.duration();
// Apply write memory barriers
if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil)) ds->write_barrier(*m_current_command_buffer);
if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil))
{
ds->write_barrier(*m_current_command_buffer);
}
for (auto &rtt : m_rtts.m_bound_render_targets)
{
@ -1111,12 +1121,19 @@ void VKGSRender::end()
m_current_command_buffer->flags |= vk::command_buffer::cb_reload_dynamic_state;
}
rsx::method_registers.current_draw_clause.begin();
auto& draw_call = rsx::method_registers.current_draw_clause;
draw_call.begin();
do
{
emit_geometry(sub_index++);
if (draw_call.is_trivial_instanced_draw)
{
// We already completed. End the draw.
draw_call.end();
}
}
while (rsx::method_registers.current_draw_clause.next());
while (draw_call.next());
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_conditional_render)
{

View file

@ -477,6 +477,22 @@ namespace
idx++;
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[idx].binding = binding_table.instancing_lookup_table_bind_slot;
bindings[idx].pImmutableSamplers = nullptr;
idx++;
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[idx].binding = binding_table.instancing_constants_buffer_slot;
bindings[idx].pImmutableSamplers = nullptr;
idx++;
for (auto binding = binding_table.textures_first_bind_slot;
binding < binding_table.vertex_textures_first_bind_slot;
binding++)
@ -643,7 +659,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
{ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , (num_fs_samplers + 4) },
// Conditional rendering predicate slot; refactor to allow skipping this when not needed
{ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1 }
{ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3 }
};
m_descriptor_pool.create(*m_device, descriptor_type_sizes, max_draw_calls);
@ -661,6 +677,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer");
m_texture_upload_buffer_ring_info.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000);
m_raster_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "raster env buffer");
m_instancing_buffer_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "instancing data buffer");
const auto shadermode = g_cfg.video.shadermode.get();
@ -949,6 +966,7 @@ VKGSRender::~VKGSRender()
m_vertex_instructions_buffer.destroy();
m_fragment_instructions_buffer.destroy();
m_raster_env_ring_info.destroy();
m_instancing_buffer_ring_info.destroy();
// Fallback bindables
null_buffer.reset();
@ -1286,7 +1304,8 @@ void VKGSRender::check_heap_status(u32 flags)
m_fragment_constants_ring_info.is_critical() ||
m_transform_constants_ring_info.is_critical() ||
m_index_buffer_ring_info.is_critical() ||
m_raster_env_ring_info.is_critical();
m_raster_env_ring_info.is_critical() ||
m_instancing_buffer_ring_info.is_critical();
}
else
{
@ -1318,7 +1337,9 @@ void VKGSRender::check_heap_status(u32 flags)
heap_critical = m_vertex_layout_ring_info.is_critical();
break;
case VK_HEAP_CHECK_TRANSFORM_CONSTANTS_STORAGE:
heap_critical = m_transform_constants_ring_info.is_critical();
heap_critical = (current_vertex_program.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS)
? m_instancing_buffer_ring_info.is_critical()
: m_transform_constants_ring_info.is_critical();
break;
case VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE:
heap_critical = m_fragment_constants_ring_info.is_critical();
@ -1361,6 +1382,7 @@ void VKGSRender::check_heap_status(u32 flags)
m_attrib_ring_info.reset_allocation_stats();
m_texture_upload_buffer_ring_info.reset_allocation_stats();
m_raster_env_ring_info.reset_allocation_stats();
m_instancing_buffer_ring_info.reset_allocation_stats();
m_current_frame->reset_heap_ptrs();
m_last_heap_sync_time = rsx::get_shared_tag();
}
@ -2130,6 +2152,7 @@ void VKGSRender::load_program_env()
const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty);
const bool update_instruction_buffers = (!!m_interpreter_state && m_shader_interpreter.is_interpreter(m_program));
const bool update_raster_env = (rsx::method_registers.polygon_stipple_enabled() && !!(m_graphics_state & rsx::pipeline_state::polygon_stipple_pattern_dirty));
const bool update_instancing_data = rsx::method_registers.current_draw_clause.is_trivial_instanced_draw;
if (update_vertex_env)
{
@ -2139,8 +2162,8 @@ void VKGSRender::load_program_env()
const auto mem = m_vertex_env_ring_info.alloc<256>(256);
auto buf = static_cast<u8*>(m_vertex_env_ring_info.map(mem, 148));
fill_scale_offset_data(buf, false);
fill_user_clip_data(buf + 64);
m_draw_processor.fill_scale_offset_data(buf, false);
m_draw_processor.fill_user_clip_data(buf + 64);
*(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits();
*(reinterpret_cast<f32*>(buf + 132)) = rsx::method_registers.point_size() * rsx::get_resolution_scale();
*(reinterpret_cast<f32*>(buf + 136)) = rsx::method_registers.clip_min();
@ -2150,7 +2173,32 @@ void VKGSRender::load_program_env()
m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, mem, 144 };
}
if (update_transform_constants)
if (update_instancing_data)
{
// Combines transform load + instancing lookup table
const auto alignment = m_device->gpu().get_limits().minStorageBufferOffsetAlignment;
usz indirection_table_offset = 0;
usz constants_data_table_offset = 0;
rsx::io_buffer indirection_table_buf([&](usz size) -> std::pair<void*, usz>
{
indirection_table_offset = m_instancing_buffer_ring_info.alloc<1>(utils::align(size, alignment));
return std::make_pair(m_instancing_buffer_ring_info.map(indirection_table_offset, size), size);
});
rsx::io_buffer constants_array_buf([&](usz size) -> std::pair<void*, usz>
{
constants_data_table_offset = m_instancing_buffer_ring_info.alloc<1>(utils::align(size, alignment));
return std::make_pair(m_instancing_buffer_ring_info.map(constants_data_table_offset, size), size);
});
m_draw_processor.fill_constants_instancing_buffer(indirection_table_buf, constants_array_buf, *m_vertex_prog);
m_instancing_buffer_ring_info.unmap();
m_instancing_indirection_buffer_info = { m_instancing_buffer_ring_info.heap->value, indirection_table_offset, indirection_table_buf.size() };
m_instancing_constants_array_buffer_info = { m_instancing_buffer_ring_info.heap->value, constants_data_table_offset, constants_array_buf.size() };
}
else if (update_transform_constants)
{
// Transform constants
usz mem_offset = 0;
@ -2200,7 +2248,7 @@ void VKGSRender::load_program_env()
auto mem = m_fragment_env_ring_info.alloc<256>(256);
auto buf = m_fragment_env_ring_info.map(mem, 32);
fill_fragment_state_buffer(buf, current_fragment_program);
m_draw_processor.fill_fragment_state_buffer(buf, current_fragment_program);
m_fragment_env_ring_info.unmap();
m_fragment_env_buffer_info = { m_fragment_env_ring_info.heap->value, mem, 32 };
}
@ -2295,13 +2343,24 @@ void VKGSRender::load_program_env()
m_program->bind_buffer({ predicate, 0, 4 }, binding_table.conditional_render_predicate_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set);
}
if (current_vertex_program.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS)
{
m_program->bind_buffer(m_instancing_indirection_buffer_info, binding_table.instancing_lookup_table_bind_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set);
m_program->bind_buffer(m_instancing_constants_array_buffer_info, binding_table.instancing_constants_buffer_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set);
}
// Clear flags
m_graphics_state.clear(
rsx::pipeline_state::fragment_state_dirty |
u32 handled_flags = rsx::pipeline_state::fragment_state_dirty |
rsx::pipeline_state::vertex_state_dirty |
rsx::pipeline_state::transform_constants_dirty |
rsx::pipeline_state::fragment_constants_dirty |
rsx::pipeline_state::fragment_texture_state_dirty);
rsx::pipeline_state::fragment_texture_state_dirty;
if (!update_instancing_data)
{
handled_flags |= rsx::pipeline_state::transform_constants_dirty;
}
m_graphics_state.clear(handled_flags);
}
void VKGSRender::upload_transform_constants(const rsx::io_buffer& buffer)
@ -2317,7 +2376,7 @@ void VKGSRender::upload_transform_constants(const rsx::io_buffer& buffer)
const auto constant_ids = (transform_constants_size == 8192)
? std::span<const u16>{}
: std::span<const u16>(m_vertex_prog->constant_ids);
fill_vertex_program_constants_data(buf, constant_ids);
m_draw_processor.fill_vertex_program_constants_data(buf, constant_ids);
}
}
@ -2360,8 +2419,14 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_
const usz data_offset = (id * 128) + m_vertex_layout_stream_info.offset;
auto dst = m_vertex_layout_ring_info.map(data_offset, 128);
fill_vertex_layout_state(m_vertex_layout, vertex_info.first_vertex, vertex_info.allocated_vertex_count, static_cast<s32*>(dst),
vertex_info.persistent_window_offset, vertex_info.volatile_window_offset);
m_draw_processor.fill_vertex_layout_state(
m_vertex_layout,
current_vp_metadata,
vertex_info.first_vertex,
vertex_info.allocated_vertex_count,
static_cast<s32*>(dst),
vertex_info.persistent_window_offset,
vertex_info.volatile_window_offset);
m_vertex_layout_ring_info.unmap();
}
@ -2482,7 +2547,8 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
m_index_buffer_ring_info.is_dirty() ||
m_transform_constants_ring_info.is_dirty() ||
m_texture_upload_buffer_ring_info.is_dirty() ||
m_raster_env_ring_info.is_dirty())
m_raster_env_ring_info.is_dirty() ||
m_instancing_buffer_ring_info.is_dirty())
{
auto secondary_command_buffer = m_secondary_cb_list.next();
secondary_command_buffer->begin();
@ -2497,6 +2563,7 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
m_transform_constants_ring_info.sync(*secondary_command_buffer);
m_texture_upload_buffer_ring_info.sync(*secondary_command_buffer);
m_raster_env_ring_info.sync(*secondary_command_buffer);
m_instancing_buffer_ring_info.sync(*secondary_command_buffer);
secondary_command_buffer->end();

View file

@ -149,6 +149,7 @@ private:
vk::data_heap m_index_buffer_ring_info; // Index data
vk::data_heap m_texture_upload_buffer_ring_info; // Texture upload heap
vk::data_heap m_raster_env_ring_info; // Raster control such as polygon and line stipple
vk::data_heap m_instancing_buffer_ring_info; // Instanced rendering data (constants indirection table + instanced constants)
vk::data_heap m_fragment_instructions_buffer;
vk::data_heap m_vertex_instructions_buffer;
@ -160,6 +161,8 @@ private:
VkDescriptorBufferInfo m_fragment_constants_buffer_info {};
VkDescriptorBufferInfo m_fragment_texture_params_buffer_info {};
VkDescriptorBufferInfo m_raster_env_buffer_info {};
VkDescriptorBufferInfo m_instancing_indirection_buffer_info {};
VkDescriptorBufferInfo m_instancing_constants_array_buffer_info{};
VkDescriptorBufferInfo m_vertex_instructions_buffer_info {};
VkDescriptorBufferInfo m_fragment_instructions_buffer_info {};

View file

@ -197,6 +197,7 @@ namespace vk
s64 index_heap_ptr = 0;
s64 texture_upload_heap_ptr = 0;
s64 rasterizer_env_heap_ptr = 0;
s64 instancing_heap_ptr = 0;
u64 last_frame_sync_time = 0;
@ -218,6 +219,7 @@ namespace vk
index_heap_ptr = other.index_heap_ptr;
texture_upload_heap_ptr = other.texture_upload_heap_ptr;
rasterizer_env_heap_ptr = other.rasterizer_env_heap_ptr;
instancing_heap_ptr = other.instancing_heap_ptr;
}
// Exchange storage (non-copyable)
@ -229,7 +231,7 @@ namespace vk
void tag_frame_end(
s64 attrib_loc, s64 vtxenv_loc, s64 fragenv_loc, s64 vtxlayout_loc,
s64 fragtex_loc, s64 fragconst_loc, s64 vtxconst_loc, s64 index_loc,
s64 texture_loc, s64 rasterizer_loc)
s64 texture_loc, s64 rasterizer_loc, s64 instancing_loc)
{
attrib_heap_ptr = attrib_loc;
vtx_env_heap_ptr = vtxenv_loc;
@ -241,6 +243,7 @@ namespace vk
index_heap_ptr = index_loc;
texture_upload_heap_ptr = texture_loc;
rasterizer_env_heap_ptr = rasterizer_loc;
instancing_heap_ptr = instancing_loc;
last_frame_sync_time = rsx::get_shared_tag();
}

View file

@ -163,7 +163,8 @@ void VKGSRender::advance_queued_frames()
m_transform_constants_ring_info.get_current_put_pos_minus_one(),
m_index_buffer_ring_info.get_current_put_pos_minus_one(),
m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one(),
m_raster_env_ring_info.get_current_put_pos_minus_one());
m_raster_env_ring_info.get_current_put_pos_minus_one(),
m_instancing_buffer_ring_info.get_current_put_pos_minus_one());
m_queued_frames.push_back(m_current_frame);
ensure(m_queued_frames.size() <= VK_MAX_ASYNC_FRAMES);
@ -266,6 +267,8 @@ void VKGSRender::frame_context_cleanup(vk::frame_context_t *ctx)
m_fragment_texture_params_ring_info.m_get_pos = ctx->frag_texparam_heap_ptr;
m_index_buffer_ring_info.m_get_pos = ctx->index_heap_ptr;
m_texture_upload_buffer_ring_info.m_get_pos = ctx->texture_upload_heap_ptr;
m_raster_env_ring_info.m_get_pos = ctx->rasterizer_env_heap_ptr;
m_instancing_buffer_ring_info.m_get_pos = ctx->instancing_heap_ptr;
m_attrib_ring_info.notify();
m_vertex_env_ring_info.notify();
@ -276,6 +279,8 @@ void VKGSRender::frame_context_cleanup(vk::frame_context_t *ctx)
m_fragment_texture_params_ring_info.notify();
m_index_buffer_ring_info.notify();
m_texture_upload_buffer_ring_info.notify();
m_raster_env_ring_info.notify();
m_instancing_buffer_ring_info.notify();
}
}

View file

@ -217,7 +217,7 @@ namespace
vk::vertex_upload_info VKGSRender::upload_vertex_data()
{
draw_command_visitor visitor(m_index_buffer_ring_info, m_vertex_layout);
auto result = std::visit(visitor, get_draw_command(rsx::method_registers));
auto result = std::visit(visitor, m_draw_processor.get_draw_command(rsx::method_registers));
const u32 vertex_count = (result.max_index - result.min_index) + 1;
u32 vertex_base = result.min_index;
@ -294,7 +294,7 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
const usz volatile_offset_in_block = volatile_offset - persistent_offset;
void *block_mapping = m_attrib_ring_info.map(persistent_offset, block_size);
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, block_mapping, static_cast<char*>(block_mapping) + volatile_offset_in_block);
m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, block_mapping, static_cast<char*>(block_mapping) + volatile_offset_in_block);
m_attrib_ring_info.unmap();
}
else
@ -302,14 +302,14 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
if (required.first > 0 && persistent_offset != umax)
{
void *persistent_mapping = m_attrib_ring_info.map(persistent_offset, required.first);
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping, nullptr);
m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping, nullptr);
m_attrib_ring_info.unmap();
}
if (required.second > 0)
{
void *volatile_mapping = m_attrib_ring_info.map(volatile_offset, required.second);
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, nullptr, volatile_mapping);
m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, nullptr, volatile_mapping);
m_attrib_ring_info.unmap();
}
}

View file

@ -32,31 +32,34 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
OS << "#version 450\n\n";
OS << "#extension GL_ARB_separate_shader_objects : enable\n\n";
OS << "layout(std140, set = 0, binding = 0) uniform VertexContextBuffer\n";
OS << "{\n";
OS << " mat4 scale_offset_mat;\n";
OS << " ivec4 user_clip_enabled[2];\n";
OS << " vec4 user_clip_factor[2];\n";
OS << " uint transform_branch_bits;\n";
OS << " float point_size;\n";
OS << " float z_near;\n";
OS << " float z_far;\n";
OS << "};\n\n";
OS <<
"layout(std140, set = 0, binding = 0) uniform VertexContextBuffer\n"
"{\n"
" mat4 scale_offset_mat;\n"
" ivec4 user_clip_enabled[2];\n"
" vec4 user_clip_factor[2];\n"
" uint transform_branch_bits;\n"
" float point_size;\n"
" float z_near;\n"
" float z_far;\n"
"};\n\n";
if (m_device_props.emulate_conditional_rendering)
{
OS << "layout(std430, set = 0, binding = 8) readonly buffer EXT_Conditional_Rendering\n";
OS << "{\n";
OS << " uint conditional_rendering_predicate;\n";
OS << "};\n\n";
OS <<
"layout(std430, set = 0, binding = 8) readonly buffer EXT_Conditional_Rendering\n"
"{\n"
" uint conditional_rendering_predicate;\n"
"};\n\n";
}
OS << "layout(push_constant) uniform VertexLayoutBuffer\n";
OS << "{\n";
OS << " uint vertex_base_index;\n";
OS << " uint vertex_index_offset;\n";
OS << " uint draw_id;\n";
OS << " uint layout_ptr_offset;\n";
OS <<
"layout(push_constant) uniform VertexLayoutBuffer\n"
"{\n"
" uint vertex_base_index;\n"
" uint vertex_index_offset;\n"
" uint draw_id;\n"
" uint layout_ptr_offset;\n";
if (m_device_props.emulate_conditional_rendering)
{
@ -110,18 +113,50 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std
{
if (PI.name.starts_with("vc["))
{
OS << "layout(std140, set=0, binding = " << static_cast<int>(m_binding_table.vertex_constant_buffers_bind_slot) << ") uniform VertexConstantsBuffer\n";
OS << "{\n";
OS << " vec4 " << PI.name << ";\n";
OS << "};\n\n";
if (!(m_prog.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS))
{
OS << "layout(std140, set=0, binding=" << static_cast<int>(m_binding_table.vertex_constant_buffers_bind_slot) << ") uniform VertexConstantsBuffer\n";
OS << "{\n";
OS << " vec4 " << PI.name << ";\n";
OS << "};\n\n";
in.location = m_binding_table.vertex_constant_buffers_bind_slot;
in.domain = glsl::glsl_vertex_program;
in.name = "VertexConstantsBuffer";
in.type = vk::glsl::input_type_uniform_buffer;
in.location = m_binding_table.vertex_constant_buffers_bind_slot;
in.domain = glsl::glsl_vertex_program;
in.name = "VertexConstantsBuffer";
in.type = vk::glsl::input_type_uniform_buffer;
inputs.push_back(in);
continue;
inputs.push_back(in);
continue;
}
else
{
// 1. Bind indirection lookup buffer
OS << "layout(std430, set=0, binding=" << static_cast<int>(m_binding_table.instancing_lookup_table_bind_slot) << ") readonly buffer InstancingData\n";
OS << "{\n";
OS << " int constants_addressing_lookup[];\n";
OS << "};\n\n";
in.location = m_binding_table.instancing_lookup_table_bind_slot;
in.domain = glsl::glsl_vertex_program;
in.name = "InstancingData";
in.type = vk::glsl::input_type_storage_buffer;
inputs.push_back(in);
// 2. Bind actual constants buffer
OS << "layout(std430, set=0, binding=" << static_cast<int>(m_binding_table.instancing_constants_buffer_slot) << ") readonly buffer VertexConstantsBuffer\n";
OS << "{\n";
OS << " vec4 instanced_constants_array[];\n";
OS << "};\n\n";
OS << "#define CONSTANTS_ARRAY_LENGTH " << (properties.has_indexed_constants ? 468 : ::size32(m_constant_ids)) << "\n\n";
in.location = m_binding_table.instancing_constants_buffer_slot;
in.domain = glsl::glsl_vertex_program;
in.name = "VertexConstantsBuffer";
in.type = vk::glsl::input_type_storage_buffer;
inputs.push_back(in);
continue;
}
}
if (PT.type == "sampler2D" ||
@ -209,6 +244,7 @@ void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS)
properties2.emulate_depth_clip_only = vk::g_render_device->get_shader_types_support().allow_float64;
properties2.low_precision_tests = vk::is_NVIDIA(vk::get_driver_vendor());
properties2.require_explicit_invariance = (vk::is_NVIDIA(vk::get_driver_vendor()) && g_cfg.video.shader_precision != gpu_preset_level::low);
properties2.require_instanced_render = !!(m_prog.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS);
glsl::insert_glsl_legacy_function(OS, properties2);
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_vulkan);

View file

@ -14,8 +14,10 @@ namespace vk
u8 vertex_buffers_first_bind_slot = 5;
u8 conditional_render_predicate_slot = 8;
u8 rasterizer_env_bind_slot = 9;
u8 textures_first_bind_slot = 10;
u8 vertex_textures_first_bind_slot = 10; // Invalid, has to be initialized properly
u8 instancing_lookup_table_bind_slot = 10;
u8 instancing_constants_buffer_slot = 11;
u8 textures_first_bind_slot = 12;
u8 vertex_textures_first_bind_slot = 12; // Invalid, has to be initialized properly
u8 total_descriptor_bindings = vertex_textures_first_bind_slot; // Invalid, has to be initialized properly
};
}

View file

@ -455,7 +455,8 @@ namespace gcm
RSX_SHADER_CONTROL_UNKNOWN1 = 0x8000, // seemingly set when srgb packer is used??
// Custom
RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION = 0x10000 // Rasterizing triangles and not lines or points
RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION = 0x10000, // Rasterizing triangles and not lines or points
RSX_SHADER_CONTROL_INSTANCED_CONSTANTS = 0x20000, // Support instance ID offsets when loading constants
};
// GCM Reports

View file

@ -27,6 +27,10 @@
#elif defined(__APPLE__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#pragma GCC diagnostic ignored "-Wmissing-declarations"
#pragma GCC diagnostic ignored "-Wnullability-completeness"
#pragma GCC diagnostic ignored "-Wdeprecated-anon-enum-enum-conversion"
#include <ApplicationServices/ApplicationServices.h>
#include <Carbon/Carbon.h>
#pragma GCC diagnostic pop

View file

@ -104,6 +104,7 @@
<ClCompile Include="Emu\perf_monitor.cpp" />
<ClCompile Include="Emu\RSX\Common\texture_cache.cpp" />
<ClCompile Include="Emu\RSX\Core\RSXContext.cpp" />
<ClCompile Include="Emu\RSX\Core\RSXDrawCommands.cpp" />
<ClCompile Include="Emu\RSX\Host\MM.cpp" />
<ClCompile Include="Emu\RSX\Host\RSXDMAWriter.cpp" />
<ClCompile Include="Emu\RSX\NV47\FW\draw_call.cpp" />
@ -587,7 +588,10 @@
<ClInclude Include="Emu\IPC_socket.h" />
<ClInclude Include="Emu\localized_string.h" />
<ClInclude Include="Emu\localized_string_id.h" />
<ClInclude Include="Emu\NP\fb_helpers.h" />
<ClInclude Include="Emu\NP\generated\np2_structs_generated.h" />
<ClInclude Include="Emu\NP\np_contexts.h" />
<ClInclude Include="Emu\NP\np_gui_cache.h" />
<ClInclude Include="Emu\NP\np_handler.h" />
<ClInclude Include="Emu\NP\rpcn_countries.h" />
<ClInclude Include="Emu\NP\signaling_handler.h" />
@ -616,6 +620,8 @@
<ClInclude Include="Emu\RSX\Common\time.hpp" />
<ClInclude Include="Emu\RSX\Common\unordered_map.hpp" />
<ClInclude Include="Emu\RSX\Core\RSXContext.h" />
<ClInclude Include="Emu\RSX\Core\RSXDrawCommands.h" />
<ClInclude Include="Emu\RSX\Core\RSXDriverState.h" />
<ClInclude Include="Emu\RSX\Core\RSXEngLock.hpp" />
<ClInclude Include="Emu\RSX\Core\RSXFrameBuffer.h" />
<ClInclude Include="Emu\RSX\Core\RSXIOMap.hpp" />

View file

@ -1315,6 +1315,9 @@
<ClCompile Include="Emu\RSX\Host\MM.cpp">
<Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\Core\RSXDrawCommands.cpp">
<Filter>Emu\GPU\RSX\Core</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Crypto\aes.h">
@ -2650,6 +2653,21 @@
<ClInclude Include="Emu\RSX\Host\MM.h">
<Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
</ClInclude>
<ClInclude Include="Emu\NP\fb_helpers.h">
<Filter>Emu\NP</Filter>
</ClInclude>
<ClInclude Include="Emu\NP\np_contexts.h">
<Filter>Emu\NP</Filter>
</ClInclude>
<ClInclude Include="Emu\NP\np_gui_cache.h">
<Filter>Emu\NP</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Core\RSXDrawCommands.h">
<Filter>Emu\GPU\RSX\Core</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Core\RSXDriverState.h">
<Filter>Emu\GPU\RSX\Core</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">

View file

@ -33,7 +33,7 @@
#ifdef _WIN32
#include "module_verifier.hpp"
#include "util/dyn_lib.hpp"
#include <shellapi.h>
// TODO(cjj19970505@live.cn)
// When compiling with WIN32_LEAN_AND_MEAN definition
@ -620,11 +620,26 @@ int main(int argc, char** argv)
std::string argument_str;
for (int i = 0; i < argc; i++)
{
if (i > 0) argument_str += " ";
argument_str += '\'' + std::string(argv[i]) + '\'';
if (i != argc - 1) argument_str += " ";
}
sys_log.notice("argc: %d, argv: %s", argc, argument_str);
#ifdef _WIN32
int n_args = 0;
if (LPWSTR* arg_list = CommandLineToArgvW(GetCommandLineW(), &n_args))
{
std::string utf8_args;
for (int i = 0; i < n_args; i++)
{
if (i > 0) utf8_args += " ";
utf8_args += '\'' + wchar_to_utf8(arg_list[i]) + '\'';
}
sys_log.notice("argv_utf8: %s", utf8_args);
}
#endif
// Before we proceed, run some sanity checks
run_platform_sanity_checks();

View file

@ -2069,6 +2069,7 @@
<None Include="..\buildfiles\msvc\rpcs3_debug.props" />
<None Include="..\buildfiles\msvc\rpcs3_default.props" />
<None Include="..\buildfiles\msvc\rpcs3_release.props" />
<None Include="..\darwin\util\sysinfo_darwin.mm" />
<None Include="..\Utilities\git-version-gen.cmd" />
<None Include="update_helper.sh" />
</ItemGroup>

View file

@ -190,6 +190,9 @@
<Filter Include="Io\Move">
<UniqueIdentifier>{f8a98f7b-dc23-47c0-8a5f-d0b76eaf0df5}</UniqueIdentifier>
</Filter>
<Filter Include="Darwin">
<UniqueIdentifier>{f6b701aa-7f4a-4816-b05f-80d24cb70e13}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="main.cpp">
@ -1806,5 +1809,8 @@
<None Include="..\.ci\optimize-mac.sh">
<Filter>CI</Filter>
</None>
<None Include="..\darwin\util\sysinfo_darwin.mm">
<Filter>Darwin</Filter>
</None>
</ItemGroup>
</Project>

View file

@ -13,6 +13,7 @@ enum Category
Home,
Media,
Data,
OS,
Unknown_Cat,
Others,
};
@ -50,5 +51,6 @@ namespace cat
const QStringList psp_games = { cat_psp_game, cat_psp_mini, cat_psp_rema };
const QStringList media = { cat_app_photo, cat_app_video, cat_bc_video, cat_app_music, cat_app_store, cat_app_tv, cat_web_tv };
const QStringList data = { cat_ps3_data, cat_ps2_data, cat_ps3_save, cat_psp_save };
const QStringList others = { cat_network, cat_store_fe, cat_ps3_os };
const QStringList os = { cat_ps3_os };
const QStringList others = { cat_network, cat_store_fe };
}

View file

@ -200,6 +200,7 @@ enum class emu_settings_type
KeyboardType,
EnterButtonAssignment,
EnableHostRoot,
EmptyHdd0Tmp,
LimitCacheSize,
MaximumCacheSize,
ConsoleTimeOffset,
@ -399,6 +400,7 @@ inline static const std::map<emu_settings_type, cfg_location> settings_location
{ emu_settings_type::KeyboardType, { "System", "Keyboard Type"} },
{ emu_settings_type::EnterButtonAssignment, { "System", "Enter button assignment"}},
{ emu_settings_type::EnableHostRoot, { "VFS", "Enable /host_root/"}},
{ emu_settings_type::EmptyHdd0Tmp, { "VFS", "Empty /dev_hdd0/tmp/"}},
{ emu_settings_type::LimitCacheSize, { "VFS", "Limit disk cache size"}},
{ emu_settings_type::MaximumCacheSize, { "VFS", "Disk cache maximum size (MB)"}},
{ emu_settings_type::ConsoleTimeOffset, { "System", "Console time offset (s)"}},

View file

@ -125,6 +125,7 @@ QStringList gui_settings::GetGameListCategoryFilters(bool is_list_mode) const
if (GetCategoryVisibility(Category::Home, is_list_mode)) filterList.append(cat::cat_home);
if (GetCategoryVisibility(Category::Media, is_list_mode)) filterList.append(cat::media);
if (GetCategoryVisibility(Category::Data, is_list_mode)) filterList.append(cat::data);
if (GetCategoryVisibility(Category::OS, is_list_mode)) filterList.append(cat::os);
if (GetCategoryVisibility(Category::Unknown_Cat, is_list_mode)) filterList.append(cat::cat_unknown);
if (GetCategoryVisibility(Category::Others, is_list_mode)) filterList.append(cat::others);
@ -205,7 +206,7 @@ bool gui_settings::GetBootConfirmation(QWidget* parent, const gui_save& gui_save
{
if (Emu.GetStatus(false) != system_state::stopping)
{
ensure(info == Emu.GetEmulationIdentifier(old_status == system_state::stopping ? true : false));
ensure(info == Emu.GetEmulationIdentifier(old_status == system_state::stopping));
return true;
}
@ -344,6 +345,7 @@ gui_save gui_settings::GetGuiSaveForCategory(int cat, bool is_list_mode)
case Category::PSP_Game: return is_list_mode ? gui::cat_psp_game : gui::grid_cat_psp_game;
case Category::Media: return is_list_mode ? gui::cat_audio_video : gui::grid_cat_audio_video;
case Category::Data: return is_list_mode ? gui::cat_game_data : gui::grid_cat_game_data;
case Category::OS: return is_list_mode ? gui::cat_os : gui::grid_cat_os;
case Category::Unknown_Cat: return is_list_mode ? gui::cat_unknown : gui::grid_cat_unknown;
case Category::Others: return is_list_mode ? gui::cat_other : gui::grid_cat_other;
default:

View file

@ -161,6 +161,7 @@ namespace gui
const gui_save cat_home = gui_save(game_list, "categoryVisibleHome", true);
const gui_save cat_audio_video = gui_save(game_list, "categoryVisibleAudioVideo", true);
const gui_save cat_game_data = gui_save(game_list, "categoryVisibleGameData", false);
const gui_save cat_os = gui_save(game_list, "categoryVisibleOS", false);
const gui_save cat_unknown = gui_save(game_list, "categoryVisibleUnknown", true);
const gui_save cat_other = gui_save(game_list, "categoryVisibleOther", true);
@ -172,6 +173,7 @@ namespace gui
const gui_save grid_cat_home = gui_save(game_list, "gridCategoryVisibleHome", true);
const gui_save grid_cat_audio_video = gui_save(game_list, "gridCategoryVisibleAudioVideo", true);
const gui_save grid_cat_game_data = gui_save(game_list, "gridCategoryVisibleGameData", false);
const gui_save grid_cat_os = gui_save(game_list, "gridCategoryVisibleOS", false);
const gui_save grid_cat_unknown = gui_save(game_list, "gridCategoryVisibleUnknown", true);
const gui_save grid_cat_other = gui_save(game_list, "gridCategoryVisibleOther", true);

View file

@ -2391,6 +2391,7 @@ void main_window::UpdateFilterActions()
ui->showCatHomeAct->setChecked(m_gui_settings->GetCategoryVisibility(Category::Home, m_is_list_mode));
ui->showCatAudioVideoAct->setChecked(m_gui_settings->GetCategoryVisibility(Category::Media, m_is_list_mode));
ui->showCatGameDataAct->setChecked(m_gui_settings->GetCategoryVisibility(Category::Data, m_is_list_mode));
ui->showCatOSAct->setChecked(m_gui_settings->GetCategoryVisibility(Category::OS, m_is_list_mode));
ui->showCatUnknownAct->setChecked(m_gui_settings->GetCategoryVisibility(Category::Unknown_Cat, m_is_list_mode));
ui->showCatOtherAct->setChecked(m_gui_settings->GetCategoryVisibility(Category::Others, m_is_list_mode));
}
@ -2551,6 +2552,7 @@ void main_window::CreateActions()
m_category_visible_act_group->addAction(ui->showCatHomeAct);
m_category_visible_act_group->addAction(ui->showCatAudioVideoAct);
m_category_visible_act_group->addAction(ui->showCatGameDataAct);
m_category_visible_act_group->addAction(ui->showCatOSAct);
m_category_visible_act_group->addAction(ui->showCatUnknownAct);
m_category_visible_act_group->addAction(ui->showCatOtherAct);
m_category_visible_act_group->setExclusive(false);
@ -3227,16 +3229,17 @@ void main_window::CreateConnects()
const auto get_cats = [this](QAction* act, int& id) -> QStringList
{
QStringList categories;
if (act == ui->showCatHDDGameAct) { categories += cat::cat_hdd_game; id = Category::HDD_Game; }
else if (act == ui->showCatDiscGameAct) { categories += cat::cat_disc_game; id = Category::Disc_Game; }
else if (act == ui->showCatPS1GamesAct) { categories += cat::cat_ps1_game; id = Category::PS1_Game; }
else if (act == ui->showCatPS2GamesAct) { categories += cat::ps2_games; id = Category::PS2_Game; }
else if (act == ui->showCatPSPGamesAct) { categories += cat::psp_games; id = Category::PSP_Game; }
else if (act == ui->showCatHomeAct) { categories += cat::cat_home; id = Category::Home; }
else if (act == ui->showCatAudioVideoAct) { categories += cat::media; id = Category::Media; }
else if (act == ui->showCatGameDataAct) { categories += cat::data; id = Category::Data; }
else if (act == ui->showCatUnknownAct) { categories += cat::cat_unknown; id = Category::Unknown_Cat; }
else if (act == ui->showCatOtherAct) { categories += cat::others; id = Category::Others; }
if (act == ui->showCatHDDGameAct) { categories.append(cat::cat_hdd_game); id = Category::HDD_Game; }
else if (act == ui->showCatDiscGameAct) { categories.append(cat::cat_disc_game); id = Category::Disc_Game; }
else if (act == ui->showCatPS1GamesAct) { categories.append(cat::cat_ps1_game); id = Category::PS1_Game; }
else if (act == ui->showCatPS2GamesAct) { categories.append(cat::ps2_games); id = Category::PS2_Game; }
else if (act == ui->showCatPSPGamesAct) { categories.append(cat::psp_games); id = Category::PSP_Game; }
else if (act == ui->showCatHomeAct) { categories.append(cat::cat_home); id = Category::Home; }
else if (act == ui->showCatAudioVideoAct) { categories.append(cat::media); id = Category::Media; }
else if (act == ui->showCatGameDataAct) { categories.append(cat::data); id = Category::Data; }
else if (act == ui->showCatOSAct) { categories.append(cat::os); id = Category::OS; }
else if (act == ui->showCatUnknownAct) { categories.append(cat::cat_unknown); id = Category::Unknown_Cat; }
else if (act == ui->showCatOtherAct) { categories.append(cat::others); id = Category::Others; }
else { gui_log.warning("categoryVisibleActGroup: category action not found"); }
return categories;
};
@ -3276,6 +3279,7 @@ void main_window::CreateConnects()
set_cat_count(ui->showCatHomeAct, tr("Home"));
set_cat_count(ui->showCatAudioVideoAct, tr("Audio/Video"));
set_cat_count(ui->showCatGameDataAct, tr("Game Data"));
set_cat_count(ui->showCatOSAct, tr("Operating System"));
set_cat_count(ui->showCatUnknownAct, tr("Unknown"));
set_cat_count(ui->showCatOtherAct, tr("Other"));
});

View file

@ -355,6 +355,7 @@
<addaction name="showCatHomeAct"/>
<addaction name="showCatAudioVideoAct"/>
<addaction name="showCatGameDataAct"/>
<addaction name="showCatOSAct"/>
<addaction name="showCatUnknownAct"/>
<addaction name="showCatOtherAct"/>
</widget>
@ -883,9 +884,6 @@
<property name="checkable">
<bool>true</bool>
</property>
<property name="checked">
<bool>true</bool>
</property>
<property name="text">
<string>HDD Games</string>
</property>
@ -894,9 +892,6 @@
<property name="checkable">
<bool>true</bool>
</property>
<property name="checked">
<bool>true</bool>
</property>
<property name="text">
<string>Disc Games</string>
</property>
@ -905,9 +900,6 @@
<property name="checkable">
<bool>true</bool>
</property>
<property name="checked">
<bool>true</bool>
</property>
<property name="text">
<string>PS1 Games</string>
</property>
@ -916,9 +908,6 @@
<property name="checkable">
<bool>true</bool>
</property>
<property name="checked">
<bool>true</bool>
</property>
<property name="text">
<string>PS2 Games</string>
</property>
@ -927,9 +916,6 @@
<property name="checkable">
<bool>true</bool>
</property>
<property name="checked">
<bool>true</bool>
</property>
<property name="text">
<string>PSP Games</string>
</property>
@ -938,9 +924,6 @@
<property name="checkable">
<bool>true</bool>
</property>
<property name="checked">
<bool>true</bool>
</property>
<property name="text">
<string>Home</string>
</property>
@ -949,9 +932,6 @@
<property name="checkable">
<bool>true</bool>
</property>
<property name="checked">
<bool>true</bool>
</property>
<property name="text">
<string>Audio/Video</string>
</property>
@ -960,9 +940,6 @@
<property name="checkable">
<bool>true</bool>
</property>
<property name="checked">
<bool>true</bool>
</property>
<property name="text">
<string>Game Data</string>
</property>
@ -971,9 +948,6 @@
<property name="checkable">
<bool>true</bool>
</property>
<property name="checked">
<bool>true</bool>
</property>
<property name="text">
<string>Unknown</string>
</property>
@ -1090,9 +1064,6 @@
<property name="checkable">
<bool>true</bool>
</property>
<property name="checked">
<bool>true</bool>
</property>
<property name="text">
<string>Other</string>
</property>
@ -1402,6 +1373,14 @@
<string>PS Move</string>
</property>
</action>
<action name="showCatOSAct">
<property name="checkable">
<bool>true</bool>
</property>
<property name="text">
<string>Operating System</string>
</property>
</action>
</widget>
<layoutdefault spacing="6" margin="11"/>
<resources>

View file

@ -625,13 +625,18 @@ namespace gui
{
usz byte_unit = 0;
usz divisor = 1;
#if defined(__APPLE__)
constexpr usz multiplier = 1000;
static const QString s_units[]{"B", "kB", "MB", "GB", "TB", "PB"};
#else
constexpr usz multiplier = 1024;
static const QString s_units[]{"B", "KiB", "MiB", "GiB", "TiB", "PiB"};
#endif
static const QString s_units[]{"B", "KB", "MB", "GB", "TB", "PB"};
while (byte_unit < std::size(s_units) - 1 && size / divisor >= 1024)
while (byte_unit < std::size(s_units) - 1 && size / divisor >= multiplier)
{
byte_unit++;
divisor *= 1024;
divisor *= multiplier;
}
return QStringLiteral("%0 %1").arg(QString::number((size + 0.) / divisor, 'f', 2)).arg(s_units[byte_unit]);

View file

@ -1428,6 +1428,9 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
m_emu_settings->EnhanceCheckBox(ui->enableHostRoot, emu_settings_type::EnableHostRoot);
SubscribeTooltip(ui->enableHostRoot, tooltips.settings.enable_host_root);
m_emu_settings->EnhanceCheckBox(ui->emptyHdd0Tmp, emu_settings_type::EmptyHdd0Tmp);
SubscribeTooltip(ui->emptyHdd0Tmp, tooltips.settings.empty_hdd0_tmp);
m_emu_settings->EnhanceCheckBox(ui->enableCacheClearing, emu_settings_type::LimitCacheSize);
SubscribeTooltip(ui->gb_DiskCacheClearing, tooltips.settings.limit_cache_size);
if (game)

View file

@ -1900,206 +1900,237 @@
<attribute name="title">
<string>System</string>
</attribute>
<layout class="QVBoxLayout" name="systemTab_layout" stretch="0,0,0,0,1,0">
<layout class="QVBoxLayout" name="systemTab_layout" stretch="0,1,0">
<item>
<layout class="QHBoxLayout" name="systemTabLayout1" stretch="1,1,1">
<layout class="QHBoxLayout" name="systemTabHorizontalLayout" stretch="1,1,1">
<item>
<widget class="QGroupBox" name="gb_sysLang">
<property name="title">
<string>Console Language</string>
</property>
<layout class="QVBoxLayout" name="gb_sysLang_layout">
<item>
<widget class="QComboBox" name="sysLangBox"/>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="gb_keyboardType">
<property name="title">
<string>Keyboard Type</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout_keyboardType">
<item>
<widget class="QComboBox" name="keyboardType"/>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="gb_homebrew">
<property name="title">
<string>Homebrew</string>
</property>
<layout class="QVBoxLayout" name="gb_homebrew_layout">
<item>
<widget class="QCheckBox" name="enableHostRoot">
<property name="text">
<string>Enable /host_root/</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
</layout>
</item>
<item>
<layout class="QHBoxLayout" name="systemTabLayout2" stretch="1,1,1">
<item>
<widget class="QGroupBox" name="gb_console_region">
<property name="title">
<string>Console Region</string>
</property>
<layout class="QVBoxLayout" name="gb_console_region_layout">
<item>
<widget class="QComboBox" name="console_region"/>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="gb_console_time">
<property name="title">
<string>Console Time</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout_2">
<item>
<layout class="QHBoxLayout" name="horizontalLayout">
<layout class="QVBoxLayout" name="systemTabLeftLayout">
<item>
<widget class="QGroupBox" name="gb_sysLang">
<property name="title">
<string>Console Language</string>
</property>
<layout class="QVBoxLayout" name="gb_sysLang_layout">
<item>
<widget class="QDateTimeEdit" name="console_time_edit">
<property name="sizePolicy">
<sizepolicy hsizetype="Preferred" vsizetype="Fixed">
<horstretch>10</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="layoutDirection">
<enum>Qt::LayoutDirection::LeftToRight</enum>
</property>
<property name="wrapping">
<bool>false</bool>
</property>
<property name="frame">
<bool>true</bool>
</property>
<property name="alignment">
<set>Qt::AlignmentFlag::AlignCenter</set>
</property>
<property name="readOnly">
<bool>false</bool>
</property>
<property name="buttonSymbols">
<enum>QAbstractSpinBox::ButtonSymbols::UpDownArrows</enum>
</property>
<property name="accelerated">
<bool>true</bool>
</property>
<property name="showGroupSeparator" stdset="0">
<bool>false</bool>
</property>
<property name="calendarPopup">
<bool>true</bool>
<widget class="QComboBox" name="sysLangBox"/>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="gb_console_region">
<property name="title">
<string>Console Region</string>
</property>
<layout class="QVBoxLayout" name="gb_console_region_layout">
<item>
<widget class="QComboBox" name="console_region"/>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="gb_enterButtonAssignment">
<property name="title">
<string>Enter Button Assignment</string>
</property>
<layout class="QVBoxLayout" name="gb_enterButtonAssignment_layout">
<item>
<widget class="QRadioButton" name="enterButtonAssignCircle">
<property name="text">
<string notr="true">Enter with the Circle button</string>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="console_time_reset">
<widget class="QRadioButton" name="enterButtonAssignCross">
<property name="text">
<string>Set to Now</string>
<string notr="true">Enter with the Cross button</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>
</widget>
</widget>
</item>
<item>
<widget class="QGroupBox" name="gb_DiskCacheClearing">
<property name="title">
<string>Disk Cache</string>
</property>
<layout class="QVBoxLayout" name="gb_DiskCacheClearing_layout">
<item>
<widget class="QCheckBox" name="enableCacheClearing">
<property name="text">
<string>Clear cache automatically</string>
</property>
</widget>
</item>
<item>
<widget class="QLabel" name="maximumCacheSizeLabel">
<property name="text">
<string>Cache size: 3072 MB</string>
</property>
</widget>
</item>
<item>
<widget class="QSlider" name="maximumCacheSize">
<property name="pageStep">
<number>512</number>
</property>
<property name="orientation">
<enum>Qt::Orientation::Horizontal</enum>
</property>
<property name="tickPosition">
<enum>QSlider::TickPosition::TicksBelow</enum>
</property>
<property name="tickInterval">
<number>1024</number>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<spacer name="systemTabLeftVerticalSpacer">
<property name="orientation">
<enum>Qt::Orientation::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>0</width>
<height>0</height>
</size>
</property>
</spacer>
</item>
</layout>
</item>
<item>
<widget class="QWidget" name="systemTabSpacerWidget2" native="true"/>
</item>
</layout>
</item>
<item>
<layout class="QHBoxLayout" name="systemTabLayout3" stretch="1,1,1">
<item>
<widget class="QGroupBox" name="gb_enterButtonAssignment">
<property name="title">
<string>Enter Button Assignment</string>
</property>
<layout class="QVBoxLayout" name="gb_enterButtonAssignment_layout">
<item>
<widget class="QRadioButton" name="enterButtonAssignCircle">
<property name="text">
<string notr="true">Enter with the Circle button</string>
</property>
</widget>
</item>
<item>
<widget class="QRadioButton" name="enterButtonAssignCross">
<property name="text">
<string notr="true">Enter with the Cross button</string>
</property>
</widget>
</item>
</layout>
</widget>
<layout class="QVBoxLayout" name="systemTabMiddleLayout">
<item>
<widget class="QGroupBox" name="gb_keyboardType">
<property name="title">
<string>Keyboard Type</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout_keyboardType">
<item>
<widget class="QComboBox" name="keyboardType"/>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="gb_console_time">
<property name="title">
<string>Console Time</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout_2">
<item>
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<widget class="QDateTimeEdit" name="console_time_edit">
<property name="sizePolicy">
<sizepolicy hsizetype="Preferred" vsizetype="Fixed">
<horstretch>10</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="layoutDirection">
<enum>Qt::LayoutDirection::LeftToRight</enum>
</property>
<property name="wrapping">
<bool>false</bool>
</property>
<property name="frame">
<bool>true</bool>
</property>
<property name="alignment">
<set>Qt::AlignmentFlag::AlignCenter</set>
</property>
<property name="readOnly">
<bool>false</bool>
</property>
<property name="buttonSymbols">
<enum>QAbstractSpinBox::ButtonSymbols::UpDownArrows</enum>
</property>
<property name="accelerated">
<bool>true</bool>
</property>
<property name="showGroupSeparator" stdset="0">
<bool>false</bool>
</property>
<property name="calendarPopup">
<bool>true</bool>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="console_time_reset">
<property name="text">
<string>Set to Now</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>
</widget>
</item>
<item>
<spacer name="systemTabMiddleVerticalSpacer">
<property name="orientation">
<enum>Qt::Orientation::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>0</width>
<height>0</height>
</size>
</property>
</spacer>
</item>
</layout>
</item>
<item>
<widget class="QWidget" name="systemTabSpacerWidget3" native="true"/>
</item>
<item>
<widget class="QWidget" name="systemTabSpacerWidget4" native="true"/>
</item>
</layout>
</item>
<item>
<layout class="QHBoxLayout" name="systemTabLayout4" stretch="1,1,1">
<item>
<widget class="QGroupBox" name="gb_DiskCacheClearing">
<property name="title">
<string>Disk Cache</string>
</property>
<layout class="QVBoxLayout" name="gb_DiskCacheClearing_layout">
<item>
<widget class="QCheckBox" name="enableCacheClearing">
<property name="text">
<string>Clear cache automatically</string>
</property>
</widget>
</item>
<item>
<widget class="QLabel" name="maximumCacheSizeLabel">
<property name="text">
<string>Cache size: 3072 MB</string>
</property>
</widget>
</item>
<item>
<widget class="QSlider" name="maximumCacheSize">
<property name="pageStep">
<number>512</number>
</property>
<property name="orientation">
<enum>Qt::Orientation::Horizontal</enum>
</property>
<property name="tickPosition">
<enum>QSlider::TickPosition::TicksBelow</enum>
</property>
<property name="tickInterval">
<number>1024</number>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QWidget" name="systemTabSpacerWidget5" native="true"/>
</item>
<item>
<widget class="QWidget" name="systemTabSpacerWidget6" native="true"/>
<layout class="QVBoxLayout" name="systemTabRightLayout">
<item>
<widget class="QGroupBox" name="gb_homebrew">
<property name="title">
<string>Homebrew</string>
</property>
<layout class="QVBoxLayout" name="gb_homebrew_layout">
<item>
<widget class="QCheckBox" name="enableHostRoot">
<property name="text">
<string>Enable /host_root/</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="emptyHdd0Tmp">
<property name="text">
<string>Empty /dev_hdd0/tmp/</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<spacer name="systemTabRightVerticalSpacer">
<property name="orientation">
<enum>Qt::Orientation::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>0</width>
<height>0</height>
</size>
</property>
</spacer>
</item>
</layout>
</item>
</layout>
</item>

View file

@ -260,7 +260,8 @@ public:
const QString system_language = tr("Some games may fail to boot if the system language is not available in the game itself.\nOther games will switch language automatically to what is selected here.\nIt is recommended leaving this on a language supported by the game.");
const QString keyboard_type = tr("Sets the used keyboard layout.\nCurrently only US, Japanese and German layouts are fully supported at this moment.");
const QString enter_button_assignment = tr("The button used for enter/accept/confirm in system dialogs.\nChange this to use the Circle button instead, which is the default configuration on Japanese systems and in many Japanese games.\nIn these cases having the cross button assigned can often lead to confusion.");
const QString enable_host_root = tr("Required for some Homebrew.\nIf unsure, don't use this option.");
const QString enable_host_root = tr("Required for some Homebrew.\nIf unsure, do not use this option.");
const QString empty_hdd0_tmp = tr("Required for some Homebrew or Game Mods.\nIf unsure, do not use this option");
const QString limit_cache_size = tr("Automatically removes older files from disk cache on boot if it grows larger than the specified value.\nGames can use the cache folder to temporarily store data outside of system memory. It is not used for long-term storage.\n\nThis setting is only available in the global configuration.");
const QString console_time_offset = tr("Sets the time to be used within the console. This will be applied as an offset that tracks wall clock time.\nCan be reset to current wall clock time by clicking \"Set to Now\".");
} settings;

View file

@ -76,6 +76,8 @@ namespace stx
constexpr shared_data() noexcept = default;
};
struct null_ptr_t;
// Simplified unique pointer. In some cases, std::unique_ptr is preferred.
// This one is shared_ptr counterpart, it has a control block with refs and deleter.
// It's trivially convertible to shared_ptr, and back if refs == 1.
@ -603,9 +605,10 @@ namespace stx
template <typename T1>
static constexpr bool is_stx_pointer = false
|| is_instance_of<std::remove_cv_t<T1>, shared_ptr>::value
|| is_instance_of<std::remove_cv_t<T1>, single_ptr>::value
|| is_instance_of<std::remove_cv_t<T1>, atomic_ptr>::value;
|| is_instance_of<std::remove_cvref_t<T1>, shared_ptr>::value
|| is_instance_of<std::remove_cvref_t<T1>, single_ptr>::value
|| is_instance_of<std::remove_cvref_t<T1>, atomic_ptr>::value
|| std::is_same_v<std::remove_cvref_t<T1>, null_ptr_t>;
public:
using element_type = std::remove_extent_t<T>;