mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-21 12:05:23 +00:00
rsx: Minor refactoring RSXThread
- Part 1 of many
This commit is contained in:
parent
659ee81e80
commit
3dba894369
22 changed files with 637 additions and 525 deletions
|
@ -9,7 +9,7 @@
|
|||
#include "Emu/perf_meter.hpp"
|
||||
#include "Emu/Memory/vm_reservation.h"
|
||||
#include "Emu/Memory/vm_locking.h"
|
||||
#include "Emu/RSX/RSXThread.h"
|
||||
#include "Emu/RSX/Core/RSXReservationLock.hpp"
|
||||
#include "Emu/VFS.h"
|
||||
#include "Emu/system_progress.hpp"
|
||||
#include "Emu/system_utils.hpp"
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
#include "Emu/VFS.h"
|
||||
#include "Emu/IdManager.h"
|
||||
#include "Emu/perf_meter.hpp"
|
||||
#include "Emu/RSX/RSXThread.h"
|
||||
#include "Emu/Cell/PPUThread.h"
|
||||
#include "Emu/Cell/ErrorCodes.h"
|
||||
#include "Emu/Cell/lv2/sys_spu.h"
|
||||
|
@ -23,6 +22,9 @@
|
|||
#include "Emu/Cell/SPURecompiler.h"
|
||||
#include "Emu/Cell/timers.hpp"
|
||||
|
||||
#include "Emu/RSX/Core/RSXReservationLock.hpp"
|
||||
#include "Emu/RSX/RSXThread.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cfenv>
|
||||
#include <thread>
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
#include "Emu/Cell/ErrorCodes.h"
|
||||
#include "Emu/Cell/timers.hpp"
|
||||
#include "Emu/Memory/vm_locking.h"
|
||||
#include "Emu/RSX/Core/RSXEngLock.hpp"
|
||||
#include "Emu/RSX/Core/RSXReservationLock.hpp"
|
||||
#include "Emu/RSX/RSXThread.h"
|
||||
#include "util/asm.hpp"
|
||||
#include "sys_event.h"
|
||||
|
|
73
rpcs3/Emu/RSX/Core/RSXDisplay.h
Normal file
73
rpcs3/Emu/RSX/Core/RSXDisplay.h
Normal file
|
@ -0,0 +1,73 @@
|
|||
#pragma once
|
||||
|
||||
#include <util/types.hpp>
|
||||
#include <util/logs.hpp>
|
||||
#include <deque>
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
struct frame_statistics_t
|
||||
{
|
||||
u32 draw_calls;
|
||||
u32 submit_count;
|
||||
|
||||
s64 setup_time;
|
||||
s64 vertex_upload_time;
|
||||
s64 textures_upload_time;
|
||||
s64 draw_exec_time;
|
||||
s64 flip_time;
|
||||
};
|
||||
|
||||
struct display_flip_info_t
|
||||
{
|
||||
std::deque<u32> buffer_queue;
|
||||
u32 buffer;
|
||||
bool skip_frame;
|
||||
bool emu_flip;
|
||||
bool in_progress;
|
||||
frame_statistics_t stats;
|
||||
|
||||
inline void push(u32 _buffer)
|
||||
{
|
||||
buffer_queue.push_back(_buffer);
|
||||
}
|
||||
|
||||
inline bool pop(u32 _buffer)
|
||||
{
|
||||
if (buffer_queue.empty())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
const auto index = buffer_queue.front();
|
||||
buffer_queue.pop_front();
|
||||
|
||||
if (index == _buffer)
|
||||
{
|
||||
buffer = _buffer;
|
||||
return true;
|
||||
}
|
||||
} while (!buffer_queue.empty());
|
||||
|
||||
// Need to observe this happening in the wild
|
||||
rsx_log.error("Display queue was discarded while not empty!");
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
class vblank_thread
|
||||
{
|
||||
std::shared_ptr<named_thread<std::function<void()>>> m_thread;
|
||||
|
||||
public:
|
||||
vblank_thread() = default;
|
||||
vblank_thread(const vblank_thread&) = delete;
|
||||
|
||||
void set_thread(std::shared_ptr<named_thread<std::function<void()>>> thread);
|
||||
|
||||
vblank_thread& operator=(thread_state);
|
||||
vblank_thread& operator=(const vblank_thread&) = delete;
|
||||
};
|
||||
}
|
31
rpcs3/Emu/RSX/Core/RSXEngLock.hpp
Normal file
31
rpcs3/Emu/RSX/Core/RSXEngLock.hpp
Normal file
|
@ -0,0 +1,31 @@
|
|||
#pragma once
|
||||
|
||||
#include <util/types.hpp>
|
||||
#include "../RSXThread.h"
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
class eng_lock
|
||||
{
|
||||
rsx::thread* pthr;
|
||||
|
||||
public:
|
||||
eng_lock(rsx::thread* target)
|
||||
:pthr(target)
|
||||
{
|
||||
if (pthr->is_current_thread())
|
||||
{
|
||||
pthr = nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
pthr->pause();
|
||||
}
|
||||
}
|
||||
|
||||
~eng_lock()
|
||||
{
|
||||
if (pthr) pthr->unpause();
|
||||
}
|
||||
};
|
||||
}
|
42
rpcs3/Emu/RSX/Core/RSXFrameBuffer.h
Normal file
42
rpcs3/Emu/RSX/Core/RSXFrameBuffer.h
Normal file
|
@ -0,0 +1,42 @@
|
|||
#pragma once
|
||||
|
||||
#include <util/types.hpp>
|
||||
#include "../gcm_enums.h"
|
||||
#include "../GCM.h"
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
struct tiled_region
|
||||
{
|
||||
u32 address;
|
||||
u32 base;
|
||||
GcmTileInfo* tile;
|
||||
u8* ptr;
|
||||
|
||||
void write(const void* src, u32 width, u32 height, u32 pitch);
|
||||
void read(void* dst, u32 width, u32 height, u32 pitch);
|
||||
};
|
||||
|
||||
struct framebuffer_layout
|
||||
{
|
||||
ENABLE_BITWISE_SERIALIZATION;
|
||||
|
||||
u16 width;
|
||||
u16 height;
|
||||
std::array<u32, 4> color_addresses;
|
||||
std::array<u32, 4> color_pitch;
|
||||
std::array<u32, 4> actual_color_pitch;
|
||||
std::array<bool, 4> color_write_enabled;
|
||||
u32 zeta_address;
|
||||
u32 zeta_pitch;
|
||||
u32 actual_zeta_pitch;
|
||||
bool zeta_write_enabled;
|
||||
rsx::surface_target target;
|
||||
rsx::surface_color_format color_format;
|
||||
rsx::surface_depth_format2 depth_format;
|
||||
rsx::surface_antialiasing aa_mode;
|
||||
rsx::surface_raster_type raster_type;
|
||||
u32 aa_factors[2];
|
||||
bool ignore_change;
|
||||
};
|
||||
}
|
86
rpcs3/Emu/RSX/Core/RSXIOMap.hpp
Normal file
86
rpcs3/Emu/RSX/Core/RSXIOMap.hpp
Normal file
|
@ -0,0 +1,86 @@
|
|||
#pragma once
|
||||
|
||||
#include <util/types.hpp>
|
||||
#include "Utilities/mutex.h"
|
||||
#include "Emu/CPU/CPUThread.h"
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
struct rsx_iomap_table
|
||||
{
|
||||
static constexpr u32 c_lock_stride = 8192;
|
||||
|
||||
std::array<atomic_t<u32>, 4096> ea;
|
||||
std::array<atomic_t<u32>, 4096> io;
|
||||
std::array<shared_mutex, 0x1'0000'0000 / c_lock_stride> rs;
|
||||
|
||||
rsx_iomap_table() noexcept;
|
||||
|
||||
// Try to get the real address given a mapped address
|
||||
// Returns -1 on failure
|
||||
u32 get_addr(u32 offs) const noexcept
|
||||
{
|
||||
return this->ea[offs >> 20] | (offs & 0xFFFFF);
|
||||
}
|
||||
|
||||
template <bool IsFullLock, uint Stride>
|
||||
bool lock(u32 addr, u32 len, cpu_thread* self = nullptr) noexcept
|
||||
{
|
||||
if (len <= 1) return false;
|
||||
const u32 end = addr + len - 1;
|
||||
|
||||
bool added_wait = false;
|
||||
|
||||
for (u32 block = addr / c_lock_stride; block <= (end / c_lock_stride); block += Stride)
|
||||
{
|
||||
auto& mutex_ = rs[block];
|
||||
|
||||
if (IsFullLock ? !mutex_.try_lock() : !mutex_.try_lock_shared()) [[ unlikely ]]
|
||||
{
|
||||
if (self)
|
||||
{
|
||||
added_wait |= !self->state.test_and_set(cpu_flag::wait);
|
||||
}
|
||||
|
||||
if (!self || self->id_type() != 0x55u)
|
||||
{
|
||||
IsFullLock ? mutex_.lock() : mutex_.lock_shared();
|
||||
}
|
||||
else
|
||||
{
|
||||
while (IsFullLock ? !mutex_.try_lock() : !mutex_.try_lock_shared())
|
||||
{
|
||||
self->cpu_wait({});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (added_wait)
|
||||
{
|
||||
self->check_state();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <bool IsFullLock, uint Stride>
|
||||
void unlock(u32 addr, u32 len) noexcept
|
||||
{
|
||||
ensure(len >= 1);
|
||||
const u32 end = addr + len - 1;
|
||||
|
||||
for (u32 block = (addr / 8192); block <= (end / 8192); block += Stride)
|
||||
{
|
||||
if constexpr (IsFullLock)
|
||||
{
|
||||
rs[block].unlock();
|
||||
}
|
||||
else
|
||||
{
|
||||
rs[block].unlock_shared();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
106
rpcs3/Emu/RSX/Core/RSXReservationLock.hpp
Normal file
106
rpcs3/Emu/RSX/Core/RSXReservationLock.hpp
Normal file
|
@ -0,0 +1,106 @@
|
|||
#pragma once
|
||||
|
||||
#include <util/types.hpp>
|
||||
#include "../RSXThread.h"
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
template<bool IsFullLock = false, uint Stride = 128>
|
||||
class reservation_lock
|
||||
{
|
||||
u32 addr = 0;
|
||||
u32 length = 0;
|
||||
|
||||
inline void lock_range(u32 addr, u32 length)
|
||||
{
|
||||
if (!get_current_renderer()->iomap_table.lock<IsFullLock, Stride>(addr, length, get_current_cpu_thread()))
|
||||
{
|
||||
length = 0;
|
||||
}
|
||||
|
||||
this->addr = addr;
|
||||
this->length = length;
|
||||
}
|
||||
|
||||
public:
|
||||
reservation_lock(u32 addr, u32 length)
|
||||
{
|
||||
if (g_cfg.core.rsx_accurate_res_access &&
|
||||
addr < constants::local_mem_base)
|
||||
{
|
||||
lock_range(addr, length);
|
||||
}
|
||||
}
|
||||
|
||||
reservation_lock(u32 addr, u32 length, bool setting)
|
||||
{
|
||||
if (setting)
|
||||
{
|
||||
lock_range(addr, length);
|
||||
}
|
||||
}
|
||||
|
||||
// Multi-range lock. If ranges overlap, the combined range will be acquired.
|
||||
// If ranges do not overlap, the first range that is in main memory will be acquired.
|
||||
reservation_lock(u32 dst_addr, u32 dst_length, u32 src_addr, u32 src_length)
|
||||
{
|
||||
if (g_cfg.core.rsx_accurate_res_access)
|
||||
{
|
||||
const auto range1 = utils::address_range::start_length(dst_addr, dst_length);
|
||||
const auto range2 = utils::address_range::start_length(src_addr, src_length);
|
||||
utils::address_range target_range;
|
||||
|
||||
if (!range1.overlaps(range2)) [[likely]]
|
||||
{
|
||||
target_range = (dst_addr < constants::local_mem_base) ? range1 : range2;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Very unlikely
|
||||
target_range = range1.get_min_max(range2);
|
||||
}
|
||||
|
||||
if (target_range.start < constants::local_mem_base)
|
||||
{
|
||||
lock_range(target_range.start, target_range.length());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Very special utility for batched transfers (SPU related)
|
||||
template <typename T = void>
|
||||
void update_if_enabled(u32 addr, u32 _length, const std::add_pointer_t<T>& lock_release = std::add_pointer_t<void>{})
|
||||
{
|
||||
// This check is not perfect but it covers the important cases fast (this check is only an optimization - forcing true disables it)
|
||||
if (length && (this->addr / rsx_iomap_table::c_lock_stride != addr / rsx_iomap_table::c_lock_stride || (addr % rsx_iomap_table::c_lock_stride + _length) > rsx_iomap_table::c_lock_stride) && _length > 1)
|
||||
{
|
||||
if constexpr (!std::is_void_v<T>)
|
||||
{
|
||||
// See SPUThread.cpp
|
||||
lock_release->release(0);
|
||||
}
|
||||
|
||||
unlock();
|
||||
lock_range(addr, _length);
|
||||
}
|
||||
}
|
||||
|
||||
void unlock(bool destructor = false)
|
||||
{
|
||||
if (length)
|
||||
{
|
||||
get_current_renderer()->iomap_table.unlock<IsFullLock, Stride>(addr, length);
|
||||
|
||||
if (!destructor)
|
||||
{
|
||||
length = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
~reservation_lock()
|
||||
{
|
||||
unlock(true);
|
||||
}
|
||||
};
|
||||
}
|
168
rpcs3/Emu/RSX/Core/RSXVertexTypes.h
Normal file
168
rpcs3/Emu/RSX/Core/RSXVertexTypes.h
Normal file
|
@ -0,0 +1,168 @@
|
|||
#pragma once
|
||||
|
||||
#include <util/types.hpp>
|
||||
#include "../Common/simple_array.hpp"
|
||||
#include "../gcm_enums.h"
|
||||
|
||||
#include <span>
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
struct vertex_array_buffer
|
||||
{
|
||||
rsx::vertex_base_type type;
|
||||
u8 attribute_size;
|
||||
u8 stride;
|
||||
std::span<const std::byte> data;
|
||||
u8 index;
|
||||
bool is_be;
|
||||
};
|
||||
|
||||
struct vertex_array_register
|
||||
{
|
||||
rsx::vertex_base_type type;
|
||||
u8 attribute_size;
|
||||
std::array<u32, 4> data;
|
||||
u8 index;
|
||||
};
|
||||
|
||||
struct empty_vertex_array
|
||||
{
|
||||
u8 index;
|
||||
};
|
||||
|
||||
struct draw_array_command
|
||||
{
|
||||
u32 __dummy;
|
||||
};
|
||||
|
||||
struct draw_indexed_array_command
|
||||
{
|
||||
std::span<const std::byte> raw_index_buffer;
|
||||
};
|
||||
|
||||
struct draw_inlined_array
|
||||
{
|
||||
u32 __dummy;
|
||||
u32 __dummy2;
|
||||
};
|
||||
|
||||
struct interleaved_attribute_t
|
||||
{
|
||||
u8 index;
|
||||
bool modulo;
|
||||
u16 frequency;
|
||||
};
|
||||
|
||||
struct interleaved_range_info
|
||||
{
|
||||
bool interleaved = false;
|
||||
bool single_vertex = false;
|
||||
u32 base_offset = 0;
|
||||
u32 real_offset_address = 0;
|
||||
u8 memory_location = 0;
|
||||
u8 attribute_stride = 0;
|
||||
|
||||
rsx::simple_array<interleaved_attribute_t> locations;
|
||||
|
||||
// Check if we need to upload a full unoptimized range, i.e [0-max_index]
|
||||
std::pair<u32, u32> calculate_required_range(u32 first, u32 count) const;
|
||||
};
|
||||
|
||||
enum attribute_buffer_placement : u8
|
||||
{
|
||||
none = 0,
|
||||
persistent = 1,
|
||||
transient = 2
|
||||
};
|
||||
|
||||
class vertex_input_layout
|
||||
{
|
||||
int m_num_used_blocks = 0;
|
||||
std::array<interleaved_range_info, 16> m_blocks_data{};
|
||||
|
||||
public:
|
||||
rsx::simple_array<interleaved_range_info*> interleaved_blocks{}; // Interleaved blocks to be uploaded as-is
|
||||
std::vector<std::pair<u8, u32>> volatile_blocks{}; // Volatile data blocks (immediate draw vertex data for example)
|
||||
rsx::simple_array<u8> referenced_registers{}; // Volatile register data
|
||||
|
||||
std::array<attribute_buffer_placement, 16> attribute_placement = fill_array(attribute_buffer_placement::none);
|
||||
|
||||
vertex_input_layout() = default;
|
||||
|
||||
interleaved_range_info* alloc_interleaved_block()
|
||||
{
|
||||
auto result = &m_blocks_data[m_num_used_blocks++];
|
||||
result->attribute_stride = 0;
|
||||
result->base_offset = 0;
|
||||
result->memory_location = 0;
|
||||
result->real_offset_address = 0;
|
||||
result->single_vertex = false;
|
||||
result->locations.clear();
|
||||
result->interleaved = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
m_num_used_blocks = 0;
|
||||
interleaved_blocks.clear();
|
||||
volatile_blocks.clear();
|
||||
referenced_registers.clear();
|
||||
}
|
||||
|
||||
bool validate() const
|
||||
{
|
||||
// Criteria: At least one array stream has to be defined to feed vertex positions
|
||||
// This stream cannot be a const register as the vertices cannot create a zero-area primitive
|
||||
|
||||
if (!interleaved_blocks.empty() && interleaved_blocks[0]->attribute_stride != 0)
|
||||
return true;
|
||||
|
||||
if (!volatile_blocks.empty())
|
||||
return true;
|
||||
|
||||
for (u8 index = 0; index < limits::vertex_count; ++index)
|
||||
{
|
||||
switch (attribute_placement[index])
|
||||
{
|
||||
case attribute_buffer_placement::transient:
|
||||
{
|
||||
// Ignore register reference
|
||||
if (std::find(referenced_registers.begin(), referenced_registers.end(), index) != referenced_registers.end())
|
||||
continue;
|
||||
|
||||
// The source is inline array or immediate draw push buffer
|
||||
return true;
|
||||
}
|
||||
case attribute_buffer_placement::persistent:
|
||||
{
|
||||
return true;
|
||||
}
|
||||
case attribute_buffer_placement::none:
|
||||
{
|
||||
continue;
|
||||
}
|
||||
default:
|
||||
{
|
||||
fmt::throw_exception("Unreachable");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
u32 calculate_interleaved_memory_requirements(u32 first_vertex, u32 vertex_count) const
|
||||
{
|
||||
u32 mem = 0;
|
||||
for (auto& block : interleaved_blocks)
|
||||
{
|
||||
const auto range = block->calculate_required_range(first_vertex, vertex_count);
|
||||
mem += range.second * block->attribute_stride;
|
||||
}
|
||||
|
||||
return mem;
|
||||
}
|
||||
};
|
||||
}
|
|
@ -1037,11 +1037,11 @@ void GLGSRender::on_semaphore_acquire_wait()
|
|||
if (!work_queue.empty() ||
|
||||
(async_flip_requested & flip_request::emu_requested))
|
||||
{
|
||||
do_local_task(rsx::FIFO_state::lock_wait);
|
||||
do_local_task(rsx::FIFO::state::lock_wait);
|
||||
}
|
||||
}
|
||||
|
||||
void GLGSRender::do_local_task(rsx::FIFO_state state)
|
||||
void GLGSRender::do_local_task(rsx::FIFO::state state)
|
||||
{
|
||||
if (!work_queue.empty())
|
||||
{
|
||||
|
@ -1058,7 +1058,7 @@ void GLGSRender::do_local_task(rsx::FIFO_state state)
|
|||
q.processed = true;
|
||||
}
|
||||
}
|
||||
else if (!in_begin_end && state != rsx::FIFO_state::lock_wait)
|
||||
else if (!in_begin_end && state != rsx::FIFO::state::lock_wait)
|
||||
{
|
||||
if (m_graphics_state & rsx::pipeline_state::framebuffer_reads_dirty)
|
||||
{
|
||||
|
@ -1071,7 +1071,7 @@ void GLGSRender::do_local_task(rsx::FIFO_state state)
|
|||
|
||||
rsx::thread::do_local_task(state);
|
||||
|
||||
if (state == rsx::FIFO_state::lock_wait)
|
||||
if (state == rsx::FIFO::state::lock_wait)
|
||||
{
|
||||
// Critical check finished
|
||||
return;
|
||||
|
|
|
@ -193,7 +193,7 @@ protected:
|
|||
void on_exit() override;
|
||||
void flip(const rsx::display_flip_info_t& info) override;
|
||||
|
||||
void do_local_task(rsx::FIFO_state state) override;
|
||||
void do_local_task(rsx::FIFO::state state) override;
|
||||
|
||||
bool on_access_violation(u32 address, bool is_writing) override;
|
||||
void on_invalidate_memory_range(const utils::address_range &range, rsx::invalidation_cause cause) override;
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include "RSXThread.h"
|
||||
#include "Capture/rsx_capture.h"
|
||||
#include "Common/time.hpp"
|
||||
#include "Core/RSXReservationLock.hpp"
|
||||
#include "Emu/Memory/vm_reservation.h"
|
||||
#include "Emu/Cell/lv2/sys_rsx.h"
|
||||
#include "util/asm.hpp"
|
||||
|
@ -613,20 +614,20 @@ namespace rsx
|
|||
{
|
||||
case FIFO::FIFO_NOP:
|
||||
{
|
||||
if (performance_counters.state == FIFO_state::running)
|
||||
if (performance_counters.state == FIFO::state::running)
|
||||
{
|
||||
performance_counters.FIFO_idle_timestamp = rsx::uclock();
|
||||
performance_counters.state = FIFO_state::nop;
|
||||
performance_counters.state = FIFO::state::nop;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
case FIFO::FIFO_EMPTY:
|
||||
{
|
||||
if (performance_counters.state == FIFO_state::running)
|
||||
if (performance_counters.state == FIFO::state::running)
|
||||
{
|
||||
performance_counters.FIFO_idle_timestamp = rsx::uclock();
|
||||
performance_counters.state = FIFO_state::empty;
|
||||
performance_counters.state = FIFO::state::empty;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -658,13 +659,13 @@ namespace rsx
|
|||
if (offs == fifo_ctrl->get_pos())
|
||||
{
|
||||
//Jump to self. Often preceded by NOP
|
||||
if (performance_counters.state == FIFO_state::running)
|
||||
if (performance_counters.state == FIFO::state::running)
|
||||
{
|
||||
performance_counters.FIFO_idle_timestamp = rsx::uclock();
|
||||
sync_point_request.release(true);
|
||||
}
|
||||
|
||||
performance_counters.state = FIFO_state::spinning;
|
||||
performance_counters.state = FIFO::state::spinning;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -710,14 +711,14 @@ namespace rsx
|
|||
}
|
||||
|
||||
if (const auto state = performance_counters.state;
|
||||
state != FIFO_state::running)
|
||||
state != FIFO::state::running)
|
||||
{
|
||||
performance_counters.state = FIFO_state::running;
|
||||
performance_counters.state = FIFO::state::running;
|
||||
|
||||
// Hack: Delay FIFO wake-up according to setting
|
||||
// NOTE: The typical spin setup is a NOP followed by a jump-to-self
|
||||
// NOTE: There is a small delay when the jump address is dynamically edited by cell
|
||||
if (state != FIFO_state::nop)
|
||||
if (state != FIFO::state::nop)
|
||||
{
|
||||
fifo_wake_delay();
|
||||
}
|
||||
|
|
|
@ -32,6 +32,22 @@ namespace rsx
|
|||
EMIT_BARRIER = 2
|
||||
};
|
||||
|
||||
enum class state : u8
|
||||
{
|
||||
running = 0,
|
||||
empty = 1, // PUT == GET
|
||||
spinning = 2, // Puller continuously jumps to self addr (synchronization technique)
|
||||
nop = 3, // Puller is processing a NOP command
|
||||
lock_wait = 4,// Puller is processing a lock acquire
|
||||
paused = 5, // Puller is paused externallly
|
||||
};
|
||||
|
||||
enum class interrupt_hint : u8
|
||||
{
|
||||
conditional_render_eval = 1,
|
||||
zcull_sync = 2
|
||||
};
|
||||
|
||||
struct register_pair
|
||||
{
|
||||
u32 reg;
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include "Emu/Memory/vm.h"
|
||||
#include "Common/BufferUtils.h"
|
||||
#include "Core/RSXReservationLock.hpp"
|
||||
#include "RSXOffload.h"
|
||||
#include "RSXThread.h"
|
||||
|
||||
|
|
|
@ -5,12 +5,14 @@
|
|||
#include "Emu/Cell/SPUThread.h"
|
||||
#include "Emu/Cell/timers.hpp"
|
||||
|
||||
#include "Capture/rsx_capture.h"
|
||||
#include "Common/BufferUtils.h"
|
||||
#include "Common/buffer_stream.hpp"
|
||||
#include "Common/texture_cache.h"
|
||||
#include "Common/surface_store.h"
|
||||
#include "Common/time.hpp"
|
||||
#include "Capture/rsx_capture.h"
|
||||
#include "Core/RSXReservationLock.hpp"
|
||||
#include "Core/RSXEngLock.hpp"
|
||||
#include "rsx_methods.h"
|
||||
#include "gcm_printing.h"
|
||||
#include "RSXDisAsm.h"
|
||||
|
@ -733,7 +735,7 @@ namespace rsx
|
|||
if ((state & (cpu_flag::dbg_global_pause + cpu_flag::exit)) == cpu_flag::dbg_global_pause)
|
||||
{
|
||||
// Wait 16ms during emulation pause. This reduces cpu load while still giving us the chance to render overlays.
|
||||
do_local_task(rsx::FIFO_state::paused);
|
||||
do_local_task(rsx::FIFO::state::paused);
|
||||
thread_ctrl::wait_on(state, old, 16000);
|
||||
}
|
||||
else
|
||||
|
@ -803,7 +805,7 @@ namespace rsx
|
|||
check_zcull_status(false);
|
||||
nv4097::set_render_mode(this, 0, method_registers.registers[NV4097_SET_RENDER_ENABLE]);
|
||||
|
||||
performance_counters.state = FIFO_state::empty;
|
||||
performance_counters.state = FIFO::state::empty;
|
||||
|
||||
const u64 event_flags = unsent_gcm_events.exchange(0);
|
||||
|
||||
|
@ -832,7 +834,7 @@ namespace rsx
|
|||
thread_ctrl::wait_for(1000);
|
||||
}
|
||||
|
||||
performance_counters.state = FIFO_state::running;
|
||||
performance_counters.state = FIFO::state::running;
|
||||
|
||||
fifo_ctrl = std::make_unique<::rsx::FIFO::FIFO_control>(this);
|
||||
fifo_ctrl->set_get(ctrl->get);
|
||||
|
@ -994,7 +996,7 @@ namespace rsx
|
|||
|
||||
// Clear any pending flush requests to release threads
|
||||
std::this_thread::sleep_for(10ms);
|
||||
do_local_task(rsx::FIFO_state::lock_wait);
|
||||
do_local_task(rsx::FIFO::state::lock_wait);
|
||||
|
||||
g_fxo->get<rsx::dma_manager>().join();
|
||||
g_fxo->get<vblank_thread>() = thread_state::finished;
|
||||
|
@ -1261,7 +1263,7 @@ namespace rsx
|
|||
fmt::throw_exception("ill-formed draw command");
|
||||
}
|
||||
|
||||
void thread::do_local_task(FIFO_state state)
|
||||
void thread::do_local_task(FIFO::state state)
|
||||
{
|
||||
m_eng_interrupt_mask.clear(rsx::backend_interrupt);
|
||||
|
||||
|
@ -1272,7 +1274,7 @@ namespace rsx
|
|||
handle_emu_flip(async_flip_buffer);
|
||||
}
|
||||
|
||||
if (!in_begin_end && state != FIFO_state::lock_wait)
|
||||
if (!in_begin_end && state != FIFO::state::lock_wait)
|
||||
{
|
||||
if (atomic_storage<u32>::load(m_invalidated_memory_range.end) != 0)
|
||||
{
|
||||
|
@ -2845,7 +2847,7 @@ namespace rsx
|
|||
if (!result.queries.empty())
|
||||
{
|
||||
cond_render_ctrl.set_eval_sources(result.queries);
|
||||
sync_hint(FIFO_hint::hint_conditional_render_eval, { .query = cond_render_ctrl.eval_sources.front(), .address = ref });
|
||||
sync_hint(FIFO::interrupt_hint::conditional_render_eval, { .query = cond_render_ctrl.eval_sources.front(), .address = ref });
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2895,7 +2897,7 @@ namespace rsx
|
|||
//ensure(async_tasks_pending.load() == 0);
|
||||
}
|
||||
|
||||
void thread::sync_hint(FIFO_hint /*hint*/, rsx::reports::sync_hint_payload_t payload)
|
||||
void thread::sync_hint(FIFO::interrupt_hint /*hint*/, rsx::reports::sync_hint_payload_t payload)
|
||||
{
|
||||
zcull_ctrl->on_sync_hint(payload);
|
||||
}
|
||||
|
|
|
@ -28,6 +28,11 @@
|
|||
#include "Emu/IdManager.h"
|
||||
#include "Emu/system_config.h"
|
||||
|
||||
#include "Core/RSXDisplay.h"
|
||||
#include "Core/RSXFrameBuffer.h"
|
||||
#include "Core/RSXIOMap.hpp"
|
||||
#include "Core/RSXVertexTypes.h"
|
||||
|
||||
extern atomic_t<bool> g_user_asked_for_frame_capture;
|
||||
extern atomic_t<bool> g_disable_frame_limit;
|
||||
extern rsx::frame_trace_data frame_debug;
|
||||
|
@ -40,84 +45,6 @@ namespace rsx
|
|||
class display_manager;
|
||||
}
|
||||
|
||||
struct rsx_iomap_table
|
||||
{
|
||||
static constexpr u32 c_lock_stride = 8192;
|
||||
|
||||
std::array<atomic_t<u32>, 4096> ea;
|
||||
std::array<atomic_t<u32>, 4096> io;
|
||||
std::array<shared_mutex, 0x1'0000'0000 / c_lock_stride> rs;
|
||||
|
||||
rsx_iomap_table() noexcept;
|
||||
|
||||
// Try to get the real address given a mapped address
|
||||
// Returns -1 on failure
|
||||
u32 get_addr(u32 offs) const noexcept
|
||||
{
|
||||
return this->ea[offs >> 20] | (offs & 0xFFFFF);
|
||||
}
|
||||
|
||||
template <bool IsFullLock, uint Stride>
|
||||
bool lock(u32 addr, u32 len, cpu_thread* self = nullptr) noexcept
|
||||
{
|
||||
if (len <= 1) return false;
|
||||
const u32 end = addr + len - 1;
|
||||
|
||||
bool added_wait = false;
|
||||
|
||||
for (u32 block = addr / c_lock_stride; block <= (end / c_lock_stride); block += Stride)
|
||||
{
|
||||
auto& mutex_ = rs[block];
|
||||
|
||||
if (IsFullLock ? !mutex_.try_lock() : !mutex_.try_lock_shared()) [[ unlikely ]]
|
||||
{
|
||||
if (self)
|
||||
{
|
||||
added_wait |= !self->state.test_and_set(cpu_flag::wait);
|
||||
}
|
||||
|
||||
if (!self || self->id_type() != 0x55u)
|
||||
{
|
||||
IsFullLock ? mutex_.lock() : mutex_.lock_shared();
|
||||
}
|
||||
else
|
||||
{
|
||||
while (IsFullLock ? !mutex_.try_lock() : !mutex_.try_lock_shared())
|
||||
{
|
||||
self->cpu_wait({});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (added_wait)
|
||||
{
|
||||
self->check_state();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <bool IsFullLock, uint Stride>
|
||||
void unlock(u32 addr, u32 len) noexcept
|
||||
{
|
||||
ensure(len >= 1);
|
||||
const u32 end = addr + len - 1;
|
||||
|
||||
for (u32 block = (addr / 8192); block <= (end / 8192); block += Stride)
|
||||
{
|
||||
if constexpr (IsFullLock)
|
||||
{
|
||||
rs[block].unlock();
|
||||
}
|
||||
else
|
||||
{
|
||||
rs[block].unlock_shared();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
enum framebuffer_creation_context : u8
|
||||
{
|
||||
context_draw = 0,
|
||||
|
@ -175,22 +102,6 @@ namespace rsx
|
|||
all_interrupt_bits = memory_config_interrupt | backend_interrupt | display_interrupt | pipe_flush_interrupt
|
||||
};
|
||||
|
||||
enum FIFO_state : u8
|
||||
{
|
||||
running = 0,
|
||||
empty = 1, // PUT == GET
|
||||
spinning = 2, // Puller continuously jumps to self addr (synchronization technique)
|
||||
nop = 3, // Puller is processing a NOP command
|
||||
lock_wait = 4,// Puller is processing a lock acquire
|
||||
paused = 5, // Puller is paused externallly
|
||||
};
|
||||
|
||||
enum FIFO_hint : u8
|
||||
{
|
||||
hint_conditional_render_eval = 1,
|
||||
hint_zcull_sync = 2
|
||||
};
|
||||
|
||||
enum result_flags: u8
|
||||
{
|
||||
result_none = 0,
|
||||
|
@ -206,264 +117,6 @@ namespace rsx
|
|||
const char* file = __builtin_FILE(),
|
||||
const char* func = __builtin_FUNCTION());
|
||||
|
||||
struct tiled_region
|
||||
{
|
||||
u32 address;
|
||||
u32 base;
|
||||
GcmTileInfo *tile;
|
||||
u8 *ptr;
|
||||
|
||||
void write(const void *src, u32 width, u32 height, u32 pitch);
|
||||
void read(void *dst, u32 width, u32 height, u32 pitch);
|
||||
};
|
||||
|
||||
struct vertex_array_buffer
|
||||
{
|
||||
rsx::vertex_base_type type;
|
||||
u8 attribute_size;
|
||||
u8 stride;
|
||||
std::span<const std::byte> data;
|
||||
u8 index;
|
||||
bool is_be;
|
||||
};
|
||||
|
||||
struct vertex_array_register
|
||||
{
|
||||
rsx::vertex_base_type type;
|
||||
u8 attribute_size;
|
||||
std::array<u32, 4> data;
|
||||
u8 index;
|
||||
};
|
||||
|
||||
struct empty_vertex_array
|
||||
{
|
||||
u8 index;
|
||||
};
|
||||
|
||||
struct draw_array_command
|
||||
{
|
||||
u32 __dummy;
|
||||
};
|
||||
|
||||
struct draw_indexed_array_command
|
||||
{
|
||||
std::span<const std::byte> raw_index_buffer;
|
||||
};
|
||||
|
||||
struct draw_inlined_array
|
||||
{
|
||||
u32 __dummy;
|
||||
u32 __dummy2;
|
||||
};
|
||||
|
||||
struct interleaved_attribute_t
|
||||
{
|
||||
u8 index;
|
||||
bool modulo;
|
||||
u16 frequency;
|
||||
};
|
||||
|
||||
struct interleaved_range_info
|
||||
{
|
||||
bool interleaved = false;
|
||||
bool single_vertex = false;
|
||||
u32 base_offset = 0;
|
||||
u32 real_offset_address = 0;
|
||||
u8 memory_location = 0;
|
||||
u8 attribute_stride = 0;
|
||||
|
||||
rsx::simple_array<interleaved_attribute_t> locations;
|
||||
|
||||
// Check if we need to upload a full unoptimized range, i.e [0-max_index]
|
||||
std::pair<u32, u32> calculate_required_range(u32 first, u32 count) const;
|
||||
};
|
||||
|
||||
enum attribute_buffer_placement : u8
|
||||
{
|
||||
none = 0,
|
||||
persistent = 1,
|
||||
transient = 2
|
||||
};
|
||||
|
||||
class vertex_input_layout
|
||||
{
|
||||
int m_num_used_blocks = 0;
|
||||
std::array<interleaved_range_info, 16> m_blocks_data{};
|
||||
|
||||
public:
|
||||
rsx::simple_array<interleaved_range_info*> interleaved_blocks{}; // Interleaved blocks to be uploaded as-is
|
||||
std::vector<std::pair<u8, u32>> volatile_blocks{}; // Volatile data blocks (immediate draw vertex data for example)
|
||||
rsx::simple_array<u8> referenced_registers{}; // Volatile register data
|
||||
|
||||
std::array<attribute_buffer_placement, 16> attribute_placement = fill_array(attribute_buffer_placement::none);
|
||||
|
||||
vertex_input_layout() = default;
|
||||
|
||||
interleaved_range_info* alloc_interleaved_block()
|
||||
{
|
||||
auto result = &m_blocks_data[m_num_used_blocks++];
|
||||
result->attribute_stride = 0;
|
||||
result->base_offset = 0;
|
||||
result->memory_location = 0;
|
||||
result->real_offset_address = 0;
|
||||
result->single_vertex = false;
|
||||
result->locations.clear();
|
||||
result->interleaved = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
m_num_used_blocks = 0;
|
||||
interleaved_blocks.clear();
|
||||
volatile_blocks.clear();
|
||||
referenced_registers.clear();
|
||||
}
|
||||
|
||||
bool validate() const
|
||||
{
|
||||
// Criteria: At least one array stream has to be defined to feed vertex positions
|
||||
// This stream cannot be a const register as the vertices cannot create a zero-area primitive
|
||||
|
||||
if (!interleaved_blocks.empty() && interleaved_blocks[0]->attribute_stride != 0)
|
||||
return true;
|
||||
|
||||
if (!volatile_blocks.empty())
|
||||
return true;
|
||||
|
||||
for (u8 index = 0; index < limits::vertex_count; ++index)
|
||||
{
|
||||
switch (attribute_placement[index])
|
||||
{
|
||||
case attribute_buffer_placement::transient:
|
||||
{
|
||||
// Ignore register reference
|
||||
if (std::find(referenced_registers.begin(), referenced_registers.end(), index) != referenced_registers.end())
|
||||
continue;
|
||||
|
||||
// The source is inline array or immediate draw push buffer
|
||||
return true;
|
||||
}
|
||||
case attribute_buffer_placement::persistent:
|
||||
{
|
||||
return true;
|
||||
}
|
||||
case attribute_buffer_placement::none:
|
||||
{
|
||||
continue;
|
||||
}
|
||||
default:
|
||||
{
|
||||
fmt::throw_exception("Unreachable");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
u32 calculate_interleaved_memory_requirements(u32 first_vertex, u32 vertex_count) const
|
||||
{
|
||||
u32 mem = 0;
|
||||
for (auto &block : interleaved_blocks)
|
||||
{
|
||||
const auto range = block->calculate_required_range(first_vertex, vertex_count);
|
||||
mem += range.second * block->attribute_stride;
|
||||
}
|
||||
|
||||
return mem;
|
||||
}
|
||||
};
|
||||
|
||||
struct framebuffer_layout
|
||||
{
|
||||
ENABLE_BITWISE_SERIALIZATION;
|
||||
|
||||
u16 width;
|
||||
u16 height;
|
||||
std::array<u32, 4> color_addresses;
|
||||
std::array<u32, 4> color_pitch;
|
||||
std::array<u32, 4> actual_color_pitch;
|
||||
std::array<bool, 4> color_write_enabled;
|
||||
u32 zeta_address;
|
||||
u32 zeta_pitch;
|
||||
u32 actual_zeta_pitch;
|
||||
bool zeta_write_enabled;
|
||||
rsx::surface_target target;
|
||||
rsx::surface_color_format color_format;
|
||||
rsx::surface_depth_format2 depth_format;
|
||||
rsx::surface_antialiasing aa_mode;
|
||||
rsx::surface_raster_type raster_type;
|
||||
u32 aa_factors[2];
|
||||
bool ignore_change;
|
||||
};
|
||||
|
||||
struct frame_statistics_t
|
||||
{
|
||||
u32 draw_calls;
|
||||
u32 submit_count;
|
||||
|
||||
s64 setup_time;
|
||||
s64 vertex_upload_time;
|
||||
s64 textures_upload_time;
|
||||
s64 draw_exec_time;
|
||||
s64 flip_time;
|
||||
};
|
||||
|
||||
struct display_flip_info_t
|
||||
{
|
||||
std::deque<u32> buffer_queue;
|
||||
u32 buffer;
|
||||
bool skip_frame;
|
||||
bool emu_flip;
|
||||
bool in_progress;
|
||||
frame_statistics_t stats;
|
||||
|
||||
inline void push(u32 _buffer)
|
||||
{
|
||||
buffer_queue.push_back(_buffer);
|
||||
}
|
||||
|
||||
inline bool pop(u32 _buffer)
|
||||
{
|
||||
if (buffer_queue.empty())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
const auto index = buffer_queue.front();
|
||||
buffer_queue.pop_front();
|
||||
|
||||
if (index == _buffer)
|
||||
{
|
||||
buffer = _buffer;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
while (!buffer_queue.empty());
|
||||
|
||||
// Need to observe this happening in the wild
|
||||
rsx_log.error("Display queue was discarded while not empty!");
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
class vblank_thread
|
||||
{
|
||||
std::shared_ptr<named_thread<std::function<void()>>> m_thread;
|
||||
|
||||
public:
|
||||
vblank_thread() = default;
|
||||
vblank_thread(const vblank_thread&) = delete;
|
||||
|
||||
void set_thread(std::shared_ptr<named_thread<std::function<void()>>> thread);
|
||||
|
||||
vblank_thread& operator=(thread_state);
|
||||
vblank_thread& operator=(const vblank_thread&) = delete;
|
||||
};
|
||||
|
||||
struct backend_configuration
|
||||
{
|
||||
bool supports_multidraw; // Draw call batching
|
||||
|
@ -493,6 +146,7 @@ namespace rsx
|
|||
u64 tsc;
|
||||
};
|
||||
|
||||
// TODO: This class is a mess, this needs to be broken into smaller chunks, like I did for RSXFIFO and RSXZCULL (kd)
|
||||
class thread : public cpu_thread
|
||||
{
|
||||
u64 timestamp_ctrl = 0;
|
||||
|
@ -586,7 +240,7 @@ namespace rsx
|
|||
atomic_t<u64> idle_time{ 0 }; // Time spent idling in microseconds
|
||||
u64 last_update_timestamp = 0; // Timestamp of last load update
|
||||
u64 FIFO_idle_timestamp = 0; // Timestamp of when FIFO queue becomes idle
|
||||
FIFO_state state = FIFO_state::running;
|
||||
FIFO::state state = FIFO::state::running;
|
||||
u32 approximate_load = 0;
|
||||
u32 sampled_frames = 0;
|
||||
}
|
||||
|
@ -736,7 +390,7 @@ namespace rsx
|
|||
/**
|
||||
* Execute a backend local task queue
|
||||
*/
|
||||
virtual void do_local_task(FIFO_state state);
|
||||
virtual void do_local_task(FIFO::state state);
|
||||
|
||||
virtual void emit_geometry(u32) {}
|
||||
|
||||
|
@ -778,7 +432,7 @@ namespace rsx
|
|||
// sync
|
||||
void sync();
|
||||
flags32_t read_barrier(u32 memory_address, u32 memory_range, bool unconditional);
|
||||
virtual void sync_hint(FIFO_hint hint, reports::sync_hint_payload_t payload);
|
||||
virtual void sync_hint(FIFO::interrupt_hint hint, reports::sync_hint_payload_t payload);
|
||||
virtual bool release_GCM_label(u32 /*address*/, u32 /*value*/) { return false; }
|
||||
|
||||
std::span<const std::byte> get_raw_index_array(const draw_clause& draw_indexed_clause) const;
|
||||
|
@ -899,126 +553,4 @@ namespace rsx
|
|||
{
|
||||
return g_fxo->try_get<rsx::thread>();
|
||||
}
|
||||
|
||||
template<bool IsFullLock = false, uint Stride = 128>
|
||||
class reservation_lock
|
||||
{
|
||||
u32 addr = 0;
|
||||
u32 length = 0;
|
||||
|
||||
inline void lock_range(u32 addr, u32 length)
|
||||
{
|
||||
if (!get_current_renderer()->iomap_table.lock<IsFullLock, Stride>(addr, length, get_current_cpu_thread()))
|
||||
{
|
||||
length = 0;
|
||||
}
|
||||
|
||||
this->addr = addr;
|
||||
this->length = length;
|
||||
}
|
||||
|
||||
public:
|
||||
reservation_lock(u32 addr, u32 length)
|
||||
{
|
||||
if (g_cfg.core.rsx_accurate_res_access &&
|
||||
addr < constants::local_mem_base)
|
||||
{
|
||||
lock_range(addr, length);
|
||||
}
|
||||
}
|
||||
|
||||
reservation_lock(u32 addr, u32 length, bool setting)
|
||||
{
|
||||
if (setting)
|
||||
{
|
||||
lock_range(addr, length);
|
||||
}
|
||||
}
|
||||
|
||||
// Multi-range lock. If ranges overlap, the combined range will be acquired.
|
||||
// If ranges do not overlap, the first range that is in main memory will be acquired.
|
||||
reservation_lock(u32 dst_addr, u32 dst_length, u32 src_addr, u32 src_length)
|
||||
{
|
||||
if (g_cfg.core.rsx_accurate_res_access)
|
||||
{
|
||||
const auto range1 = utils::address_range::start_length(dst_addr, dst_length);
|
||||
const auto range2 = utils::address_range::start_length(src_addr, src_length);
|
||||
utils::address_range target_range;
|
||||
|
||||
if (!range1.overlaps(range2)) [[likely]]
|
||||
{
|
||||
target_range = (dst_addr < constants::local_mem_base) ? range1 : range2;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Very unlikely
|
||||
target_range = range1.get_min_max(range2);
|
||||
}
|
||||
|
||||
if (target_range.start < constants::local_mem_base)
|
||||
{
|
||||
lock_range(target_range.start, target_range.length());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Very special utility for batched transfers (SPU related)
|
||||
template <typename T = void>
|
||||
void update_if_enabled(u32 addr, u32 _length, const std::add_pointer_t<T>& lock_release = std::add_pointer_t<void>{})
|
||||
{
|
||||
// This check is not perfect but it covers the important cases fast (this check is only an optimization - forcing true disables it)
|
||||
if (length && (this->addr / rsx_iomap_table::c_lock_stride != addr / rsx_iomap_table::c_lock_stride || (addr % rsx_iomap_table::c_lock_stride + _length) > rsx_iomap_table::c_lock_stride) && _length > 1)
|
||||
{
|
||||
if constexpr (!std::is_void_v<T>)
|
||||
{
|
||||
// See SPUThread.cpp
|
||||
lock_release->release(0);
|
||||
}
|
||||
|
||||
unlock();
|
||||
lock_range(addr, _length);
|
||||
}
|
||||
}
|
||||
|
||||
void unlock(bool destructor = false)
|
||||
{
|
||||
if (length)
|
||||
{
|
||||
get_current_renderer()->iomap_table.unlock<IsFullLock, Stride>(addr, length);
|
||||
|
||||
if (!destructor)
|
||||
{
|
||||
length = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
~reservation_lock()
|
||||
{
|
||||
unlock(true);
|
||||
}
|
||||
};
|
||||
|
||||
class eng_lock
|
||||
{
|
||||
rsx::thread* pthr;
|
||||
public:
|
||||
eng_lock(rsx::thread* target)
|
||||
:pthr(target)
|
||||
{
|
||||
if (pthr->is_current_thread())
|
||||
{
|
||||
pthr = nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
pthr->pause();
|
||||
}
|
||||
}
|
||||
|
||||
~eng_lock()
|
||||
{
|
||||
if (pthr) pthr->unpause();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
#include "stdafx.h"
|
||||
#include "Core/RSXEngLock.hpp"
|
||||
#include "Core/RSXReservationLock.hpp"
|
||||
#include "RSXThread.h"
|
||||
|
||||
namespace rsx
|
||||
|
@ -422,7 +424,7 @@ namespace rsx
|
|||
if (It->query->sync_tag > m_sync_tag)
|
||||
{
|
||||
// rsx_log.trace("[Performance warning] Query hint emit during sync command.");
|
||||
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, { .query = It->query });
|
||||
ptimer->sync_hint(FIFO::interrupt_hint::zcull_sync, { .query = It->query });
|
||||
}
|
||||
|
||||
break;
|
||||
|
@ -531,7 +533,7 @@ namespace rsx
|
|||
{
|
||||
if (It->query->num_draws && It->query->sync_tag > m_sync_tag)
|
||||
{
|
||||
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, { .query = It->query });
|
||||
ptimer->sync_hint(FIFO::interrupt_hint::zcull_sync, { .query = It->query });
|
||||
ensure(It->query->sync_tag <= m_sync_tag);
|
||||
}
|
||||
|
||||
|
@ -556,7 +558,7 @@ namespace rsx
|
|||
const auto elapsed = m_tsc - front.query->timestamp;
|
||||
if (elapsed > max_zcull_delay_us)
|
||||
{
|
||||
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, { .query = front.query });
|
||||
ptimer->sync_hint(FIFO::interrupt_hint::zcull_sync, { .query = front.query });
|
||||
ensure(front.query->sync_tag <= m_sync_tag);
|
||||
}
|
||||
|
||||
|
@ -704,7 +706,7 @@ namespace rsx
|
|||
{
|
||||
if (query->sync_tag > m_sync_tag) [[unlikely]]
|
||||
{
|
||||
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, { .query = query });
|
||||
ptimer->sync_hint(FIFO::interrupt_hint::zcull_sync, { .query = query });
|
||||
ensure(m_sync_tag >= query->sync_tag);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -673,7 +673,7 @@ VKGSRender::~VKGSRender()
|
|||
// Flush DMA queue
|
||||
while (!g_fxo->get<rsx::dma_manager>().sync())
|
||||
{
|
||||
do_local_task(rsx::FIFO_state::lock_wait);
|
||||
do_local_task(rsx::FIFO::state::lock_wait);
|
||||
}
|
||||
|
||||
//Wait for device to finish up with resources
|
||||
|
@ -895,7 +895,7 @@ void VKGSRender::on_semaphore_acquire_wait()
|
|||
(async_flip_requested & flip_request::emu_requested) ||
|
||||
(m_queue_status & flush_queue_state::deadlock))
|
||||
{
|
||||
do_local_task(rsx::FIFO_state::lock_wait);
|
||||
do_local_task(rsx::FIFO::state::lock_wait);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1602,7 +1602,7 @@ bool VKGSRender::release_GCM_label(u32 address, u32 args)
|
|||
return true;
|
||||
}
|
||||
|
||||
void VKGSRender::sync_hint(rsx::FIFO_hint hint, rsx::reports::sync_hint_payload_t payload)
|
||||
void VKGSRender::sync_hint(rsx::FIFO::interrupt_hint hint, rsx::reports::sync_hint_payload_t payload)
|
||||
{
|
||||
rsx::thread::sync_hint(hint, payload);
|
||||
|
||||
|
@ -1615,7 +1615,7 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, rsx::reports::sync_hint_payload_
|
|||
// Occlusion test result evaluation is coming up, avoid a hard sync
|
||||
switch (hint)
|
||||
{
|
||||
case rsx::FIFO_hint::hint_conditional_render_eval:
|
||||
case rsx::FIFO::interrupt_hint::conditional_render_eval:
|
||||
{
|
||||
// If a flush request is already enqueued, do nothing
|
||||
if (m_flush_requests.pending())
|
||||
|
@ -1645,7 +1645,7 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, rsx::reports::sync_hint_payload_
|
|||
m_last_cond_render_eval_hint = now;
|
||||
break;
|
||||
}
|
||||
case rsx::FIFO_hint::hint_zcull_sync:
|
||||
case rsx::FIFO::interrupt_hint::zcull_sync:
|
||||
{
|
||||
// Check if the required report is synced to this CB
|
||||
auto& data = m_occlusion_map[payload.query->driver_handle];
|
||||
|
@ -1672,7 +1672,7 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, rsx::reports::sync_hint_payload_
|
|||
}
|
||||
}
|
||||
|
||||
void VKGSRender::do_local_task(rsx::FIFO_state state)
|
||||
void VKGSRender::do_local_task(rsx::FIFO::state state)
|
||||
{
|
||||
if (m_queue_status & flush_queue_state::deadlock)
|
||||
{
|
||||
|
@ -1702,7 +1702,7 @@ void VKGSRender::do_local_task(rsx::FIFO_state state)
|
|||
m_flush_queue_mutex.unlock();
|
||||
}
|
||||
}
|
||||
else if (!in_begin_end && state != rsx::FIFO_state::lock_wait)
|
||||
else if (!in_begin_end && state != rsx::FIFO::state::lock_wait)
|
||||
{
|
||||
if (m_graphics_state & rsx::pipeline_state::framebuffer_reads_dirty)
|
||||
{
|
||||
|
@ -1717,11 +1717,11 @@ void VKGSRender::do_local_task(rsx::FIFO_state state)
|
|||
|
||||
switch (state)
|
||||
{
|
||||
case rsx::FIFO_state::lock_wait:
|
||||
case rsx::FIFO::state::lock_wait:
|
||||
// Critical check finished
|
||||
return;
|
||||
//case rsx::FIFO_state::spinning:
|
||||
//case rsx::FIFO_state::empty:
|
||||
//case rsx::FIFO::state::spinning:
|
||||
//case rsx::FIFO::state::empty:
|
||||
// We have some time, check the present queue
|
||||
//check_present_status();
|
||||
//break;
|
||||
|
|
|
@ -248,7 +248,7 @@ public:
|
|||
void set_scissor(bool clip_viewport);
|
||||
void bind_viewport();
|
||||
|
||||
void sync_hint(rsx::FIFO_hint hint, rsx::reports::sync_hint_payload_t payload) override;
|
||||
void sync_hint(rsx::FIFO::interrupt_hint hint, rsx::reports::sync_hint_payload_t payload) override;
|
||||
bool release_GCM_label(u32 address, u32 data) override;
|
||||
|
||||
void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
||||
|
@ -282,7 +282,7 @@ protected:
|
|||
|
||||
void renderctl(u32 request_code, void* args) override;
|
||||
|
||||
void do_local_task(rsx::FIFO_state state) override;
|
||||
void do_local_task(rsx::FIFO::state state) override;
|
||||
bool scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate) override;
|
||||
void notify_tile_unbound(u32 tile) override;
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include "rsx_utils.h"
|
||||
#include "rsx_decode.h"
|
||||
#include "Common/time.hpp"
|
||||
#include "Core/RSXReservationLock.hpp"
|
||||
#include "Emu/Cell/PPUCallback.h"
|
||||
#include "Emu/Cell/lv2/sys_rsx.h"
|
||||
#include "Emu/RSX/Common/BufferUtils.h"
|
||||
|
@ -1278,6 +1279,11 @@ namespace rsx
|
|||
out_pitch = out_bpp * out_w;
|
||||
}
|
||||
|
||||
if (in_pitch == 0)
|
||||
{
|
||||
in_pitch = in_bpp * in_w;
|
||||
}
|
||||
|
||||
if (in_bpp != out_bpp)
|
||||
{
|
||||
is_block_transfer = false;
|
||||
|
@ -1680,12 +1686,6 @@ namespace rsx
|
|||
const u8 in_format = method_registers.nv0039_input_format();
|
||||
const u32 notify = arg;
|
||||
|
||||
// The existing GCM commands use only the value 0x1 for inFormat and outFormat
|
||||
if (in_format != 0x01 || out_format != 0x01)
|
||||
{
|
||||
rsx_log.error("NV0039_BUFFER_NOTIFY: Unsupported format: inFormat=%d, outFormat=%d", in_format, out_format);
|
||||
}
|
||||
|
||||
if (!line_count || !line_length)
|
||||
{
|
||||
rsx_log.warning("NV0039_BUFFER_NOTIFY NOPed out: pitch(in=0x%x, out=0x%x), line(len=0x%x, cnt=0x%x), fmt(in=0x%x, out=0x%x), notify=0x%x",
|
||||
|
@ -1734,7 +1734,28 @@ namespace rsx
|
|||
(dst_offset >= src_offset && dst_offset < src_max);
|
||||
}();
|
||||
|
||||
if (is_overlapping)
|
||||
if (in_format > 1 || out_format > 1) [[ unlikely ]]
|
||||
{
|
||||
// The formats are just input channel strides. You can use this to do cool tricks like gathering channels
|
||||
// Very rare, only seen in use by Destiny
|
||||
// TODO: Hw accel
|
||||
for (u32 row = 0; row < line_count; ++row)
|
||||
{
|
||||
auto dst_ptr = dst;
|
||||
auto src_ptr = src;
|
||||
while (src_ptr < src + line_length)
|
||||
{
|
||||
*dst_ptr = *src_ptr;
|
||||
|
||||
src_ptr += in_format;
|
||||
dst_ptr += out_format;
|
||||
}
|
||||
|
||||
dst += out_pitch;
|
||||
src += in_pitch;
|
||||
}
|
||||
}
|
||||
else if (is_overlapping) [[ unlikely ]]
|
||||
{
|
||||
if (is_block_transfer)
|
||||
{
|
||||
|
|
|
@ -525,6 +525,12 @@
|
|||
<ClInclude Include="Emu\RSX\Common\simple_array.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Common\surface_cache_dma.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Common\time.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Core\RSXEngLock.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Core\RSXFrameBuffer.h" />
|
||||
<ClInclude Include="Emu\RSX\Core\RSXIOMap.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Core\RSXDisplay.h" />
|
||||
<ClInclude Include="Emu\RSX\Core\RSXReservationLock.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Core\RSXVertexTypes.h" />
|
||||
<ClInclude Include="Emu\RSX\Overlays\overlay_cursor.h" />
|
||||
<ClInclude Include="Emu\RSX\Overlays\overlay_edit_text.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Overlays\overlay_list_view.hpp" />
|
||||
|
|
|
@ -76,6 +76,9 @@
|
|||
<Filter Include="Emu\GPU\RSX\Program\Interpreter">
|
||||
<UniqueIdentifier>{bc97b324-1eea-445a-8fa9-6fc49e3df47c}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Emu\GPU\RSX\Core">
|
||||
<UniqueIdentifier>{99b3a1c9-93ea-4498-86b0-1000793013fa}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="Crypto\aes.cpp">
|
||||
|
@ -2206,6 +2209,24 @@
|
|||
<ClInclude Include="Emu\Io\recording_config.h">
|
||||
<Filter>Emu\Io</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\Core\RSXIOMap.hpp">
|
||||
<Filter>Emu\GPU\RSX\Core</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\Core\RSXDisplay.h">
|
||||
<Filter>Emu\GPU\RSX\Core</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\Core\RSXVertexTypes.h">
|
||||
<Filter>Emu\GPU\RSX\Core</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\Core\RSXFrameBuffer.h">
|
||||
<Filter>Emu\GPU\RSX\Core</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\Core\RSXReservationLock.hpp">
|
||||
<Filter>Emu\GPU\RSX\Core</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\Core\RSXEngLock.hpp">
|
||||
<Filter>Emu\GPU\RSX\Core</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">
|
||||
|
|
Loading…
Add table
Reference in a new issue