rsx: ZCULL rewrite and other improvements

- ZCULL unit emulation rewritten
- ZCULL reports are now deferred avoiding pipeline stalls
- Minor optimizations; replaced std::mutex with shared_mutex where contention is rare
- Silence unnecessary error message
- Small improvement to out of memory handling for vulkan and slightly bump vertex buffer heap
This commit is contained in:
kd-11 2018-03-05 14:09:43 +03:00
parent dece1e01f4
commit 315798b1f4
11 changed files with 697 additions and 321 deletions

View file

@ -7,7 +7,7 @@
void data_cache::store_and_protect_data(u64 key, u32 start, size_t size, u8 format, size_t w, size_t h, size_t d, size_t m, ComPtr<ID3D12Resource> data)
{
std::lock_guard<std::mutex> lock(m_mut);
std::lock_guard<shared_mutex> lock(m_mut);
m_address_to_data[key] = std::make_pair(texture_entry(format, w, h, d, m), data);
protect_data(key, start, size);
}
@ -25,7 +25,7 @@ void data_cache::protect_data(u64 key, u32 start, size_t size)
bool data_cache::invalidate_address(u32 addr)
{
// In case 2 threads write to texture memory
std::lock_guard<std::mutex> lock(m_mut);
std::lock_guard<shared_mutex> lock(m_mut);
bool handled = false;
auto It = m_protected_ranges.begin(), E = m_protected_ranges.end();
for (; It != E;)
@ -49,7 +49,7 @@ bool data_cache::invalidate_address(u32 addr)
std::pair<texture_entry, ComPtr<ID3D12Resource> > *data_cache::find_data_if_available(u64 key)
{
std::lock_guard<std::mutex> lock(m_mut);
std::lock_guard<shared_mutex> lock(m_mut);
auto It = m_address_to_data.find(key);
if (It == m_address_to_data.end())
return nullptr;
@ -58,7 +58,7 @@ std::pair<texture_entry, ComPtr<ID3D12Resource> > *data_cache::find_data_if_avai
void data_cache::unprotect_all()
{
std::lock_guard<std::mutex> lock(m_mut);
std::lock_guard<shared_mutex> lock(m_mut);
for (auto &protectedTexture : m_protected_ranges)
{
u32 protectedRangeStart = std::get<1>(protectedTexture), protectedRangeSize = std::get<2>(protectedTexture);

View file

@ -98,7 +98,7 @@ private:
* Memory protection fault catch can be generated by any thread and
* modifies it.
*/
std::mutex m_mut;
shared_mutex m_mut;
std::unordered_map<u64, std::pair<texture_entry, ComPtr<ID3D12Resource>> > m_address_to_data; // Storage
std::list <std::tuple<u64, u32, u32> > m_protected_ranges; // address, start of protected range, size of protected range

View file

@ -209,7 +209,7 @@ void GLGSRender::end()
{
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
std::lock_guard<std::mutex> lock(m_sampler_mutex);
std::lock_guard<shared_mutex> lock(m_sampler_mutex);
void* unused = nullptr;
bool update_framebuffer_sourced = false;
@ -598,6 +598,7 @@ void GLGSRender::set_viewport()
void GLGSRender::on_init_thread()
{
GSRender::on_init_thread();
zcull_ctrl.reset(static_cast<::rsx::reports::ZCULL_control*>(this));
gl::init();
@ -768,7 +769,7 @@ void GLGSRender::on_init_thread()
for (u32 i = 0; i < occlusion_query_count; ++i)
{
GLuint handle = 0;
auto &query = occlusion_query_data[i];
auto &query = m_occlusion_query_data[i];
glGenQueries(1, &handle);
query.driver_handle = (u64)handle;
@ -853,6 +854,8 @@ void GLGSRender::on_init_thread()
void GLGSRender::on_exit()
{
zcull_ctrl.release();
m_prog_buffer.clear();
if (draw_fbo)
@ -920,7 +923,7 @@ void GLGSRender::on_exit()
for (u32 i = 0; i < occlusion_query_count; ++i)
{
auto &query = occlusion_query_data[i];
auto &query = m_occlusion_query_data[i];
query.active = false;
query.pending = false;
@ -1424,7 +1427,7 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
return false;
{
std::lock_guard<std::mutex> lock(m_sampler_mutex);
std::lock_guard<shared_mutex> lock(m_sampler_mutex);
m_samplers_dirty.store(true);
}
@ -1452,7 +1455,7 @@ void GLGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
{
m_gl_texture_cache.purge_dirty();
{
std::lock_guard<std::mutex> lock(m_sampler_mutex);
std::lock_guard<shared_mutex> lock(m_sampler_mutex);
m_samplers_dirty.store(true);
}
}
@ -1464,7 +1467,7 @@ void GLGSRender::do_local_task(bool /*idle*/)
if (!work_queue.empty())
{
std::lock_guard<std::mutex> lock(queue_guard);
std::lock_guard<shared_mutex> lock(queue_guard);
work_queue.remove_if([](work_item &q) { return q.received; });
@ -1505,7 +1508,7 @@ void GLGSRender::do_local_task(bool /*idle*/)
work_item& GLGSRender::post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data)
{
std::lock_guard<std::mutex> lock(queue_guard);
std::lock_guard<shared_mutex> lock(queue_guard);
work_queue.emplace_back();
work_item &result = work_queue.back();
@ -1537,31 +1540,38 @@ void GLGSRender::notify_tile_unbound(u32 tile)
//m_rtts.invalidate_surface_address(addr, false);
}
void GLGSRender::begin_occlusion_query(rsx::occlusion_query_info* query)
void GLGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query)
{
query->result = 0;
glBeginQuery(GL_ANY_SAMPLES_PASSED, (GLuint)query->driver_handle);
}
void GLGSRender::end_occlusion_query(rsx::occlusion_query_info* query)
void GLGSRender::end_occlusion_query(rsx::reports::occlusion_query_info* query)
{
glEndQuery(GL_ANY_SAMPLES_PASSED);
if (query->num_draws)
glEndQuery(GL_ANY_SAMPLES_PASSED);
}
bool GLGSRender::check_occlusion_query_status(rsx::occlusion_query_info* query)
bool GLGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info* query)
{
if (!query->num_draws)
return true;
GLint status = GL_TRUE;
glGetQueryObjectiv((GLuint)query->driver_handle, GL_QUERY_RESULT_AVAILABLE, &status);
return status != GL_FALSE;
}
void GLGSRender::get_occlusion_query_result(rsx::occlusion_query_info* query)
void GLGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* query)
{
GLint result;
glGetQueryObjectiv((GLuint)query->driver_handle, GL_QUERY_RESULT, &result);
if (query->num_draws)
{
GLint result;
glGetQueryObjectiv((GLuint)query->driver_handle, GL_QUERY_RESULT, &result);
query->result += result;
query->result += result;
}
}
void GLGSRender::shell_do_cleanup()

View file

@ -265,7 +265,7 @@ struct driver_state
}
};
class GLGSRender : public GSRender
class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control
{
private:
GLFragmentProgram m_fragment_prog;
@ -311,7 +311,7 @@ private:
std::vector<u64> m_overlay_cleanup_requests;
std::mutex queue_guard;
shared_mutex queue_guard;
std::list<work_item> work_queue;
bool flush_draw_buffers = false;
@ -327,7 +327,7 @@ private:
//vaos are mandatory for core profile
gl::vao m_vao;
std::mutex m_sampler_mutex;
shared_mutex m_sampler_mutex;
u64 surface_store_tag = 0;
std::atomic_bool m_samplers_dirty = {true};
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count> fs_sampler_state = {};
@ -363,10 +363,10 @@ public:
bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override;
void begin_occlusion_query(rsx::occlusion_query_info* query) override;
void end_occlusion_query(rsx::occlusion_query_info* query) override;
bool check_occlusion_query_status(rsx::occlusion_query_info* query) override;
void get_occlusion_query_result(rsx::occlusion_query_info* query) override;
void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override;
void end_occlusion_query(rsx::reports::occlusion_query_info* query) override;
bool check_occlusion_query_status(rsx::reports::occlusion_query_info* query) override;
void get_occlusion_query_result(rsx::reports::occlusion_query_info* query) override;
protected:
void begin() override;

View file

@ -318,8 +318,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
}
}
if ((window_clip_width && window_clip_width != clip_horizontal) ||
(window_clip_height && window_clip_height != clip_vertical))
if ((window_clip_width && window_clip_width < clip_horizontal) ||
(window_clip_height && window_clip_height < clip_vertical))
{
LOG_ERROR(RSX, "Unexpected window clip dimensions: window_clip=%dx%d, surface_clip=%dx%d",
window_clip_width, window_clip_height, clip_horizontal, clip_vertical);
@ -428,7 +428,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
framebuffer_status_valid = draw_fbo.check();
if (!framebuffer_status_valid) return;
check_zcull_status(true, false);
check_zcull_status(true);
set_viewport();
switch (rsx::method_registers.surface_color_target())

View file

@ -349,8 +349,8 @@ namespace rsx
element_push_buffer.resize(0);
if (zcull_task_queue.active_query && zcull_task_queue.active_query->active)
zcull_task_queue.active_query->num_draws++;
if (zcull_ctrl->active)
zcull_ctrl->on_draw();
if (capture_current_frame)
{
@ -365,6 +365,12 @@ namespace rsx
reset();
if (!zcull_ctrl)
{
//Backend did not provide an implementation, provide NULL object
zcull_ctrl = std::make_unique<::rsx::reports::ZCULL_control>();
}
last_flip_time = get_system_time() - 1000000;
thread_ctrl::spawn(m_vblank_thread, "VBlank Thread", [this]()
@ -503,6 +509,9 @@ namespace rsx
//Execute backend-local tasks first
do_local_task(ctrl->put.load() == internal_get.load());
//Update sub-units
zcull_ctrl->update(this);
//Set up restore state if needed
if (sync_point_request)
{
@ -1140,6 +1149,12 @@ namespace rsx
void thread::do_internal_task()
{
if (zcull_ctrl->has_pending())
{
zcull_ctrl->sync(this);
return;
}
if (m_internal_tasks.empty())
{
std::this_thread::yield();
@ -1147,7 +1162,7 @@ namespace rsx
else
{
fmt::throw_exception("Disabled" HERE);
//std::lock_guard<std::mutex> lock{ m_mtx_task };
//std::lock_guard<shared_mutex> lock{ m_mtx_task };
//internal_task_entry &front = m_internal_tasks.front();
@ -1161,7 +1176,7 @@ namespace rsx
//std::future<void> thread::add_internal_task(std::function<bool()> callback)
//{
// std::lock_guard<std::mutex> lock{ m_mtx_task };
// std::lock_guard<shared_mutex> lock{ m_mtx_task };
// m_internal_tasks.emplace_back(callback);
// return m_internal_tasks.back().promise.get_future();
@ -2075,10 +2090,20 @@ namespace rsx
skip_frame = (m_skip_frame_ctr < 0);
}
//Reset zcull ctrl
zcull_ctrl->set_active(this, false);
zcull_ctrl->clear();
if (zcull_ctrl->has_pending())
{
LOG_ERROR(RSX, "Dangling reports found, discarding...");
zcull_ctrl->sync(this);
}
performance_counters.sampled_frames++;
}
void thread::check_zcull_status(bool framebuffer_swap, bool force_read)
void thread::check_zcull_status(bool framebuffer_swap)
{
if (g_cfg.video.disable_zcull_queries)
return;
@ -2108,35 +2133,8 @@ namespace rsx
}
}
occlusion_query_info* query = nullptr;
if (zcull_task_queue.task_stack.size() > 0)
query = zcull_task_queue.active_query;
if (query && query->active)
{
if (force_read || (!zcull_rendering_enabled || !testing_enabled || !zcull_surface_active))
{
end_occlusion_query(query);
query->active = false;
query->pending = true;
}
}
else
{
if (zcull_rendering_enabled && testing_enabled && zcull_surface_active)
{
//Find query
u32 free_index = synchronize_zcull_stats();
query = &occlusion_query_data[free_index];
zcull_task_queue.add(query);
begin_occlusion_query(query);
query->active = true;
query->result = 0;
query->num_draws = 0;
}
}
zcull_ctrl->set_enabled(this, zcull_rendering_enabled);
zcull_ctrl->set_active(this, zcull_rendering_enabled && testing_enabled && zcull_surface_active);
}
void thread::clear_zcull_stats(u32 type)
@ -2144,113 +2142,50 @@ namespace rsx
if (g_cfg.video.disable_zcull_queries)
return;
if (type == CELL_GCM_ZPASS_PIXEL_CNT)
{
if (zcull_task_queue.active_query &&
zcull_task_queue.active_query->active &&
zcull_task_queue.active_query->num_draws > 0)
{
//discard active query results
check_zcull_status(false, true);
zcull_task_queue.active_query->pending = false;
zcull_ctrl->clear();
}
//re-enable cull stats if stats are enabled
check_zcull_status(false, false);
zcull_task_queue.active_query->num_draws = 0;
void thread::get_zcull_stats(u32 type, vm::addr_t sink)
{
u32 value = 0;
if (!g_cfg.video.disable_zcull_queries)
{
switch (type)
{
case CELL_GCM_ZPASS_PIXEL_CNT:
{
zcull_ctrl->read_report(this, sink, type);
return;
}
current_zcull_stats.clear();
}
}
u32 thread::get_zcull_stats(u32 type)
{
if (g_cfg.video.disable_zcull_queries)
return 0u;
if (zcull_task_queue.active_query &&
zcull_task_queue.active_query->active &&
current_zcull_stats.zpass_pixel_cnt == 0 &&
type == CELL_GCM_ZPASS_PIXEL_CNT)
{
//The zcull unit is still bound as the read is happening and there are no results ready
check_zcull_status(false, true); //close current query
check_zcull_status(false, false); //start new query since stat counting is still active
}
switch (type)
{
case CELL_GCM_ZPASS_PIXEL_CNT:
{
if (current_zcull_stats.zpass_pixel_cnt > 0)
return UINT16_MAX;
synchronize_zcull_stats(true);
return (current_zcull_stats.zpass_pixel_cnt > 0) ? UINT16_MAX : 0;
}
case CELL_GCM_ZCULL_STATS:
case CELL_GCM_ZCULL_STATS1:
case CELL_GCM_ZCULL_STATS2:
//TODO
return UINT16_MAX;
case CELL_GCM_ZCULL_STATS3:
{
//Some kind of inverse value
if (current_zcull_stats.zpass_pixel_cnt > 0)
return 0;
synchronize_zcull_stats(true);
return (current_zcull_stats.zpass_pixel_cnt > 0) ? 0 : UINT16_MAX;
}
default:
LOG_ERROR(RSX, "Unknown zcull stat type %d", type);
return 0;
}
}
u32 thread::synchronize_zcull_stats(bool hard_sync)
{
if (!zcull_rendering_enabled || zcull_task_queue.pending == 0)
return 0;
u32 result = UINT16_MAX;
for (auto &query : zcull_task_queue.task_stack)
{
if (query == nullptr || query->active)
continue;
bool status = check_occlusion_query_status(query);
if (status == false && !hard_sync)
continue;
get_occlusion_query_result(query);
current_zcull_stats.zpass_pixel_cnt += query->result;
query->pending = false;
query = nullptr;
zcull_task_queue.pending--;
}
for (u32 i = 0; i < occlusion_query_count; ++i)
{
auto &query = occlusion_query_data[i];
if (!query.pending && !query.active)
case CELL_GCM_ZCULL_STATS:
case CELL_GCM_ZCULL_STATS1:
case CELL_GCM_ZCULL_STATS2:
case CELL_GCM_ZCULL_STATS3:
{
result = i;
//TODO
value = (type != CELL_GCM_ZCULL_STATS3)? UINT16_MAX : 0;
break;
}
default:
LOG_ERROR(RSX, "Unknown zcull stat type %d", type);
break;
}
}
if (result == UINT16_MAX && !hard_sync)
return synchronize_zcull_stats(true);
vm::ptr<CellGcmReportData> result = sink;
result->value = value;
result->padding = 0;
result->timer = timestamp();
}
return result;
void thread::sync()
{
zcull_ctrl->sync(this);
}
void thread::notify_zcull_info_changed()
{
check_zcull_status(false, false);
check_zcull_status(false);
}
//Pause/cont wrappers for FIFO ctrl. Never call this from rsx thread itself!
@ -2356,4 +2291,385 @@ namespace rsx
return false;
}
namespace reports
{
void ZCULL_control::set_enabled(class ::rsx::thread* ptimer, bool state)
{
if (state != enabled)
{
enabled = state;
if (active && !enabled)
set_active(ptimer, false);
}
}
void ZCULL_control::set_active(class ::rsx::thread* ptimer, bool state)
{
if (state != active)
{
active = state;
if (state)
{
verify(HERE), enabled && m_current_task == nullptr;
allocate_new_query(ptimer);
begin_occlusion_query(m_current_task);
}
else
{
verify(HERE), m_current_task;
if (m_current_task->num_draws)
{
end_occlusion_query(m_current_task);
m_current_task->active = false;
m_current_task->pending = true;
m_pending_writes.push_back({});
m_pending_writes.back().query = m_current_task;
}
else
{
discard_occlusion_query(m_current_task);
m_current_task->active = false;
}
m_current_task = nullptr;
}
}
}
void ZCULL_control::read_report(::rsx::thread* ptimer, vm::addr_t sink, u32 type)
{
if (m_current_task)
{
m_current_task->owned = true;
end_occlusion_query(m_current_task);
m_pending_writes.push_back({});
m_current_task->active = false;
m_current_task->pending = true;
m_pending_writes.back().query = m_current_task;
allocate_new_query(ptimer);
begin_occlusion_query(m_current_task);
}
else
{
//Spam; send null query down the pipeline to copy the last result
//Might be used to capture a timestamp (verify)
m_pending_writes.push_back({});
}
auto forwarder = &m_pending_writes.back();
for (auto It = m_pending_writes.rbegin(); It != m_pending_writes.rend(); It++)
{
if (!It->sink)
{
It->counter_tag = m_statistics_tag_id;
It->due_tsc = m_tsc + m_cycles_delay;
It->sink = sink;
It->type = type;
if (forwarder != &(*It))
{
//Not the last one in the chain, forward the writing operation to the last writer
It->forwarder = forwarder;
It->query->owned = true;
}
continue;
}
break;
}
}
void ZCULL_control::allocate_new_query(::rsx::thread* ptimer)
{
int retries = 0;
while (!Emu.IsStopped())
{
for (int n = 0; n < occlusion_query_count; ++n)
{
if (m_occlusion_query_data[n].pending || m_occlusion_query_data[n].active)
continue;
m_current_task = &m_occlusion_query_data[n];
m_current_task->num_draws = 0;
m_current_task->result = 0;
m_current_task->sync_timestamp = 0;
m_current_task->active = true;
m_current_task->owned = false;
return;
}
if (retries > 0)
{
LOG_ERROR(RSX, "ZCULL report queue is overflowing!!");
m_statistics_map[m_statistics_tag_id] = 1;
verify(HERE), m_pending_writes.front().sink == 0;
m_pending_writes.resize(0);
for (auto &query : m_occlusion_query_data)
{
discard_occlusion_query(&query);
query.pending = false;
}
m_current_task = &m_occlusion_query_data[0];
m_current_task->num_draws = 0;
m_current_task->result = 0;
m_current_task->sync_timestamp = 0;
m_current_task->active = true;
m_current_task->owned = false;
return;
}
//All slots are occupied, try to pop the earliest entry
m_tsc += max_zcull_cycles_delay;
update(ptimer);
retries++;
}
}
void ZCULL_control::clear()
{
if (!m_pending_writes.empty())
{
//Remove any dangling/unclaimed queries as the information is lost anyway
auto valid_size = m_pending_writes.size();
for (auto It = m_pending_writes.rbegin(); It != m_pending_writes.rend(); ++It)
{
if (!It->sink)
{
discard_occlusion_query(It->query);
It->query->pending = false;
valid_size--;
continue;
}
break;
}
m_pending_writes.resize(valid_size);
}
m_statistics_tag_id++;
m_statistics_map[m_statistics_tag_id] = 0;
}
void ZCULL_control::on_draw()
{
if (m_current_task)
m_current_task->num_draws++;
m_cycles_delay = max_zcull_cycles_delay;
}
void ZCULL_control::write(vm::addr_t sink, u32 timestamp, u32 value)
{
verify(HERE), sink;
vm::ptr<CellGcmReportData> out = sink;
out->value = value;
out->timer = timestamp;
out->padding = 0;
}
void ZCULL_control::sync(::rsx::thread* ptimer)
{
if (!m_pending_writes.empty())
{
u32 processed = 0;
const bool has_unclaimed = (m_pending_writes.back().sink == 0);
//Write all claimed reports unconditionally
for (auto &writer : m_pending_writes)
{
if (!writer.sink)
break;
auto query = writer.query;
u32 result = m_statistics_map[writer.counter_tag];
if (query)
{
verify(HERE), query->pending;
if (!result && query->num_draws)
{
get_occlusion_query_result(query);
if (query->result)
{
result += query->result;
m_statistics_map[writer.counter_tag] = result;
}
}
else
{
//Already have a hit, no need to retest
discard_occlusion_query(query);
}
query->pending = false;
}
if (!writer.forwarder)
//No other queries in the chain, write result
write(writer.sink, ptimer->timestamp(), result ? UINT16_MAX : 0);
processed++;
}
if (!has_unclaimed)
{
verify(HERE), processed == m_pending_writes.size();
m_pending_writes.resize(0);
}
else
{
auto remaining = m_pending_writes.size() - processed;
verify(HERE), remaining > 0;
if (remaining == 1)
{
m_pending_writes.front() = m_pending_writes.back();
m_pending_writes.resize(1);
}
else
{
std::move(m_pending_writes.begin() + processed, m_pending_writes.end(), m_pending_writes.begin());
m_pending_writes.resize(remaining);
}
}
//Delete all statistics caches but leave the current one
for (auto It = m_statistics_map.begin(); It != m_statistics_map.end(); )
{
if (It->first == m_statistics_tag_id)
++It;
else
It = m_statistics_map.erase(It);
}
}
//Critical, since its likely a WAIT_FOR_IDLE type has been processed, all results are considered available
m_cycles_delay = 2;
}
void ZCULL_control::update(::rsx::thread* ptimer)
{
m_tsc++;
if (m_pending_writes.empty())
return;
u32 stat_tag_to_remove = m_statistics_tag_id;
u32 processed = 0;
for (auto &writer : m_pending_writes)
{
if (!writer.sink)
break;
if (writer.counter_tag != stat_tag_to_remove &&
stat_tag_to_remove != m_statistics_tag_id)
{
//If the stat id is different from this stat id and the queue is advancing,
//its guaranteed that the previous tag has no remaining writes as the queue is ordered
m_statistics_map.erase(stat_tag_to_remove);
stat_tag_to_remove = m_statistics_tag_id;
}
auto query = writer.query;
u32 result = m_statistics_map[writer.counter_tag];
if (query)
{
verify(HERE), query->pending;
if (UNLIKELY(writer.due_tsc < m_tsc))
{
if (!result && query->num_draws)
{
get_occlusion_query_result(query);
if (query->result)
{
result += query->result;
m_statistics_map[writer.counter_tag] = result;
}
}
else
{
//No need to read this
discard_occlusion_query(query);
}
}
else
{
if (result || !query->num_draws)
{
//Not necessary to read the result anymore
discard_occlusion_query(query);
}
else
{
//Maybe we get lucky and results are ready
if (check_occlusion_query_status(query))
{
get_occlusion_query_result(query);
if (query->result)
{
result += query->result;
m_statistics_map[writer.counter_tag] = result;
}
}
else
{
//Too early; abort
break;
}
}
}
query->pending = false;
}
stat_tag_to_remove = writer.counter_tag;
//only zpass supported right now
if (!writer.forwarder)
//No other queries in the chain, write result
write(writer.sink, ptimer->timestamp(), result ? UINT16_MAX : 0);
processed++;
}
if (stat_tag_to_remove != m_statistics_tag_id)
m_statistics_map.erase(stat_tag_to_remove);
if (processed)
{
auto remaining = m_pending_writes.size() - processed;
if (remaining == 1)
{
m_pending_writes.front() = m_pending_writes.back();
m_pending_writes.resize(1);
}
else if (remaining)
{
std::move(m_pending_writes.begin() + processed, m_pending_writes.end(), m_pending_writes.begin());
m_pending_writes.resize(remaining);
}
else
{
m_pending_writes.resize(0);
}
}
}
}
}

View file

@ -157,64 +157,91 @@ namespace rsx
std::array<attribute_buffer_placement, 16> attribute_placement;
};
struct zcull_statistics
namespace reports
{
u32 zpass_pixel_cnt;
u32 zcull_stats;
u32 zcull_stats1;
u32 zcull_stats2;
u32 zcull_stats3;
void clear()
struct occlusion_query_info
{
zpass_pixel_cnt = zcull_stats = zcull_stats1 = zcull_stats2 = zcull_stats3 = 0;
}
};
u32 driver_handle;
u32 result;
u32 num_draws;
bool pending;
bool active;
bool owned;
struct occlusion_query_info
{
u32 driver_handle;
u32 result;
u32 num_draws;
bool pending;
bool active;
u64 sync_timestamp;
};
u64 sync_timestamp;
u64 external_flags;
};
struct occlusion_task
{
std::vector<occlusion_query_info*> task_stack;
occlusion_query_info* active_query = nullptr;
u32 pending = 0;
//Add one query to the task
void add(occlusion_query_info* query)
struct queued_report_write
{
active_query = query;
u32 type = CELL_GCM_ZPASS_PIXEL_CNT;
u32 counter_tag;
occlusion_query_info* query;
queued_report_write* forwarder;
vm::addr_t sink;
if (task_stack.size() > 0 && pending == 0)
task_stack.resize(0);
u32 due_tsc;
};
const auto empty_slots = task_stack.size() - pending;
if (empty_slots >= 4)
{
for (auto &_query : task_stack)
{
if (_query == nullptr)
{
_query = query;
pending++;
return;
}
}
}
struct ZCULL_control
{
//Delay in 'cycles' before a report update operation is forced to retire
//Larger values might give more performance but some engines (UE3) dont seem to wait for results and will flicker
//TODO: Determine the real max delay in real hardware
const u32 max_zcull_cycles_delay = 10;
task_stack.push_back(query);
pending++;
}
};
//Number of occlusion query slots available. Real hardware actually has far fewer units before choking
const u32 occlusion_query_count = 128;
bool active = false;
bool enabled = false;
std::array<occlusion_query_info, 128> m_occlusion_query_data = {};
occlusion_query_info* m_current_task = nullptr;
u32 m_statistics_tag_id = 0;
u32 m_tsc = 0;
u32 m_cycles_delay = 10;
std::vector<queued_report_write> m_pending_writes;
std::unordered_map<u32, u32> m_statistics_map;
ZCULL_control() {}
~ZCULL_control() {}
void set_enabled(class ::rsx::thread* ptimer, bool enabled);
void set_active(class ::rsx::thread* ptimer, bool active);
void write(vm::addr_t sink, u32 timestamp, u32 value);
//Read current zcull statistics into the address provided
void read_report(class ::rsx::thread* ptimer, vm::addr_t sink, u32 type);
//Sets up a new query slot and sets it to the current task
void allocate_new_query(class ::rsx::thread* ptimer);
//clears current stat block and increments stat_tag_id
void clear();
//forcefully flushes all
void sync(class ::rsx::thread* ptimer);
//call once every 'tick' to update
void update(class ::rsx::thread* ptimer);
//Draw call notification
void on_draw();
//Check for pending writes
bool has_pending() const { return (m_pending_writes.size() != 0); }
//Backend methods (optional, will return everything as always visible by default)
virtual void begin_occlusion_query(occlusion_query_info* /*query*/) {}
virtual void end_occlusion_query(occlusion_query_info* /*query*/) {}
virtual bool check_occlusion_query_status(occlusion_query_info* /*query*/) { return true; }
virtual void get_occlusion_query_result(occlusion_query_info* query) { query->result = UINT32_MAX; }
virtual void discard_occlusion_query(occlusion_query_info* /*query*/) {}
};
}
struct sampled_image_descriptor_base;
@ -236,11 +263,7 @@ namespace rsx
//occlusion query
bool zcull_surface_active = false;
zcull_statistics current_zcull_stats;
const u32 occlusion_query_count = 128;
std::array<occlusion_query_info, 128> occlusion_query_data = {};
occlusion_task zcull_task_queue = {};
std::unique_ptr<reports::ZCULL_control> zcull_ctrl;
//framebuffer setup
rsx::gcm_framebuffer_info m_surface_info[rsx::limits::color_buffers_count];
@ -382,17 +405,14 @@ namespace rsx
virtual void notify_tile_unbound(u32 /*tile*/) {}
//zcull
virtual void notify_zcull_info_changed();
virtual void clear_zcull_stats(u32 type);
virtual u32 get_zcull_stats(u32 type);
virtual void check_zcull_status(bool framebuffer_swap, bool force_read);
virtual u32 synchronize_zcull_stats(bool hard_sync = false);
virtual void begin_occlusion_query(occlusion_query_info* /*query*/) {}
virtual void end_occlusion_query(occlusion_query_info* /*query*/) {}
virtual bool check_occlusion_query_status(occlusion_query_info* /*query*/) { return true; }
virtual void get_occlusion_query_result(occlusion_query_info* query) { query->result = UINT32_MAX; }
void notify_zcull_info_changed();
void clear_zcull_stats(u32 type);
void check_zcull_status(bool framebuffer_swap);
void get_zcull_stats(u32 type, vm::addr_t sink);
//sync
void sync();
gsl::span<const gsl::byte> get_raw_index_array(const std::vector<std::pair<u32, u32> >& draw_indexed_clause) const;
gsl::span<const gsl::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector<std::pair<u32, u32>>& vertex_ranges) const;
@ -433,7 +453,7 @@ namespace rsx
void write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data);
private:
std::mutex m_mtx_task;
shared_mutex m_mtx_task;
struct internal_task_entry
{

View file

@ -566,7 +566,7 @@ VKGSRender::VKGSRender() : GSRender()
//Occlusion
m_occlusion_query_pool.create((*m_device), DESCRIPTOR_MAX_DRAW_CALLS); //Enough for 4k draw calls per pass
for (int n = 0; n < 128; ++n)
occlusion_query_data[n].driver_handle = n;
m_occlusion_query_data[n].driver_handle = n;
//Generate frame contexts
VkDescriptorPoolSize uniform_buffer_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS };
@ -769,7 +769,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
{
vk::texture_cache::thrashed_set result;
{
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
std::lock_guard<shared_mutex> lock(m_secondary_cb_guard);
result = std::move(m_texture_cache.invalidate_address(address, is_writing, false, m_secondary_command_buffer, m_memory_type_mapping, m_swapchain->get_graphics_queue()));
}
@ -777,7 +777,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
return false;
{
std::lock_guard<std::mutex> lock(m_sampler_mutex);
std::lock_guard<shared_mutex> lock(m_sampler_mutex);
m_samplers_dirty.store(true);
}
@ -795,7 +795,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
//Always submit primary cb to ensure state consistency (flush pending changes such as image transitions)
vm::temporary_unlock();
std::lock_guard<std::mutex> lock(m_flush_queue_mutex);
std::lock_guard<shared_mutex> lock(m_flush_queue_mutex);
m_flush_requests.post(sync_timestamp == 0ull);
has_queue_ref = true;
@ -846,13 +846,13 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
void VKGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
{
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
std::lock_guard<shared_mutex> lock(m_secondary_cb_guard);
if (m_texture_cache.invalidate_range(address_base, size, true, true, false,
m_secondary_command_buffer, m_memory_type_mapping, m_swapchain->get_graphics_queue()).violation_handled)
{
m_texture_cache.purge_dirty();
{
std::lock_guard<std::mutex> lock(m_sampler_mutex);
std::lock_guard<shared_mutex> lock(m_sampler_mutex);
m_samplers_dirty.store(true);
}
}
@ -866,7 +866,7 @@ void VKGSRender::notify_tile_unbound(u32 tile)
//m_rtts.invalidate_surface_address(addr, false);
{
std::lock_guard<std::mutex> lock(m_sampler_mutex);
std::lock_guard<shared_mutex> lock(m_sampler_mutex);
m_samplers_dirty.store(true);
}
}
@ -903,6 +903,7 @@ void VKGSRender::check_heap_status()
m_attrib_ring_info.reset_allocation_stats();
m_texture_upload_buffer_ring_info.reset_allocation_stats();
m_current_frame->reset_heap_ptrs();
m_last_heap_sync_time = get_system_time();
}
else
{
@ -1063,7 +1064,7 @@ void VKGSRender::end()
std::chrono::time_point<steady_clock> textures_start = vertex_end;
//Load textures
{
std::lock_guard<std::mutex> lock(m_sampler_mutex);
std::lock_guard<shared_mutex> lock(m_sampler_mutex);
bool update_framebuffer_sourced = false;
if (surface_store_tag != m_rtts.cache_tag)
@ -1356,40 +1357,15 @@ void VKGSRender::end()
occlusion_id = m_occlusion_query_pool.find_free_slot();
if (occlusion_id == UINT32_MAX)
{
bool free_slot_found = false;
u32 index_to_free = UINT32_MAX;
u64 earliest_timestamp = UINT64_MAX;
m_tsc += 100;
update(this);
//flush occlusion queries
for (auto It : m_occlusion_map)
occlusion_id = m_occlusion_query_pool.find_free_slot();
if (occlusion_id == UINT32_MAX)
{
u32 index = It.first;
auto query = &occlusion_query_data[index];
if (check_occlusion_query_status(query))
{
free_slot_found = true;
get_occlusion_query_result(query);
break;
}
if (query->sync_timestamp < earliest_timestamp)
{
index_to_free = index;
earliest_timestamp = query->sync_timestamp;
}
LOG_ERROR(RSX, "Occlusion pool overflow");
if (m_current_task) m_current_task->result = 1;
}
if (free_slot_found)
{
occlusion_id = m_occlusion_query_pool.find_free_slot();
}
else
{
get_occlusion_query_result(&occlusion_query_data[index_to_free]);
occlusion_id = m_occlusion_query_pool.find_free_slot();
}
verify(HERE), occlusion_id != UINT32_MAX;
}
}
@ -1441,7 +1417,7 @@ void VKGSRender::end()
const bool is_emulated_restart = (!primitive_emulated && rsx::method_registers.restart_index_enabled() && vk::emulate_primitive_restart() && rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed);
const bool single_draw = !supports_multidraw || (!is_emulated_restart && (rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 || rsx::method_registers.current_draw_clause.is_disjoint_primitive));
if (m_occlusion_query_active)
if (m_occlusion_query_active && (occlusion_id != UINT32_MAX))
{
//Begin query
m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id);
@ -1500,7 +1476,7 @@ void VKGSRender::end()
}
}
if (m_occlusion_query_active)
if (m_occlusion_query_active && (occlusion_id != UINT32_MAX))
{
//End query
m_occlusion_query_pool.end_query(*m_current_command_buffer, occlusion_id);
@ -1565,6 +1541,7 @@ void VKGSRender::on_init_thread()
GSRender::on_init_thread();
rsx_thread = std::this_thread::get_id();
zcull_ctrl.reset(static_cast<::rsx::reports::ZCULL_control*>(this));
if (!supports_native_ui)
{
@ -1627,6 +1604,7 @@ void VKGSRender::on_init_thread()
void VKGSRender::on_exit()
{
zcull_ctrl.release();
return GSRender::on_exit();
}
@ -2002,7 +1980,7 @@ void VKGSRender::do_local_task(bool /*idle*/)
{
if (m_flush_requests.pending())
{
std::lock_guard<std::mutex> lock(m_flush_queue_mutex);
std::lock_guard<shared_mutex> lock(m_flush_queue_mutex);
//TODO: Determine if a hard sync is necessary
//Pipeline barriers later may do a better job synchronizing than wholly stalling the pipeline
@ -2610,8 +2588,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
}
}
if ((window_clip_width && window_clip_width != clip_width) ||
(window_clip_height && window_clip_height != clip_height))
if ((window_clip_width && window_clip_width < clip_width) ||
(window_clip_height && window_clip_height < clip_height))
{
LOG_ERROR(RSX, "Unexpected window clip dimensions: window_clip=%dx%d, surface_clip=%dx%d",
window_clip_width, window_clip_height, clip_width, clip_height);
@ -2818,7 +2796,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
m_draw_fbo.reset(new vk::framebuffer_holder(*m_device, current_render_pass, fbo_width, fbo_height, std::move(fbo_images)));
}
check_zcull_status(true, false);
check_zcull_status(true);
}
void VKGSRender::reinitialize_swapchain()
@ -3194,7 +3172,7 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
check_heap_status();
//Stop all parallel operations until this is finished
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
std::lock_guard<shared_mutex> lock(m_secondary_cb_guard);
auto result = m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer);
m_current_command_buffer->begin();
@ -3240,31 +3218,32 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
return false;
}
void VKGSRender::clear_zcull_stats(u32 type)
{
rsx::thread::clear_zcull_stats(type);
m_occlusion_map.clear();
m_occlusion_query_pool.reset_all(*m_current_command_buffer);
}
void VKGSRender::begin_occlusion_query(rsx::occlusion_query_info* query)
void VKGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query)
{
query->result = 0;
query->sync_timestamp = get_system_time();
//query->sync_timestamp = get_system_time();
m_active_query_info = query;
m_occlusion_query_active = true;
}
void VKGSRender::end_occlusion_query(rsx::occlusion_query_info* query)
void VKGSRender::end_occlusion_query(rsx::reports::occlusion_query_info* query)
{
m_occlusion_query_active = false;
m_active_query_info = nullptr;
flush_command_queue();
//Avoid stalling later if this query is already tied to a report
if (query->num_draws && query->owned && !m_flush_requests.pending())
{
m_flush_requests.post(false);
m_flush_requests.remove_one();
}
}
bool VKGSRender::check_occlusion_query_status(rsx::occlusion_query_info* query)
bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info* query)
{
if (!query->num_draws)
return true;
auto found = m_occlusion_map.find(query->driver_handle);
if (found == m_occlusion_map.end())
return true;
@ -3274,16 +3253,26 @@ bool VKGSRender::check_occlusion_query_status(rsx::occlusion_query_info* query)
return true;
if (data.command_buffer_to_wait == m_current_command_buffer)
{
if (!m_flush_requests.pending())
{
//Likely to be read at some point in the near future, submit now to avoid stalling later
m_flush_requests.post(false);
m_flush_requests.remove_one();
}
return false;
}
if (data.command_buffer_to_wait->pending)
//Don't bother poking the state, a flush later will likely do it for free
return false;
u32 oldest = data.indices.front();
return m_occlusion_query_pool.check_query_status(oldest);
}
void VKGSRender::get_occlusion_query_result(rsx::occlusion_query_info* query)
void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* query)
{
auto found = m_occlusion_map.find(query->driver_handle);
if (found == m_occlusion_map.end())
@ -3293,20 +3282,32 @@ void VKGSRender::get_occlusion_query_result(rsx::occlusion_query_info* query)
if (data.indices.size() == 0)
return;
if (data.command_buffer_to_wait == m_current_command_buffer)
flush_command_queue(); //Should hard sync, but this should almost never ever happen
if (data.command_buffer_to_wait->pending)
data.command_buffer_to_wait->wait();
//Gather data
for (const auto occlusion_id : data.indices)
if (query->num_draws)
{
//We only need one hit
if (auto value = m_occlusion_query_pool.get_query_result(occlusion_id))
if (data.command_buffer_to_wait == m_current_command_buffer)
{
query->result = 1;
break;
flush_command_queue();
//Clear any deferred flush requests from previous call to get_query_status()
if (m_flush_requests.pending())
{
m_flush_requests.clear_pending_flag();
m_flush_requests.consumer_wait();
}
}
if (data.command_buffer_to_wait->pending)
data.command_buffer_to_wait->wait();
//Gather data
for (const auto occlusion_id : data.indices)
{
//We only need one hit
if (auto value = m_occlusion_query_pool.get_query_result(occlusion_id))
{
query->result = 1;
break;
}
}
}
@ -3314,6 +3315,26 @@ void VKGSRender::get_occlusion_query_result(rsx::occlusion_query_info* query)
m_occlusion_map.erase(query->driver_handle);
}
void VKGSRender::discard_occlusion_query(rsx::reports::occlusion_query_info* query)
{
if (m_active_query_info == query)
{
m_occlusion_query_active = false;
m_active_query_info = nullptr;
}
auto found = m_occlusion_map.find(query->driver_handle);
if (found == m_occlusion_map.end())
return;
auto &data = found->second;
if (data.indices.size() == 0)
return;
m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices);
m_occlusion_map.erase(query->driver_handle);
}
void VKGSRender::shell_do_cleanup()
{
//TODO: Guard this

View file

@ -38,7 +38,7 @@ namespace vk
//Heap allocation sizes in MB
//NOTE: Texture uploads can be huge, upto 16MB for a single texture (4096x4096px)
#define VK_ATTRIB_RING_BUFFER_SIZE_M 256
#define VK_ATTRIB_RING_BUFFER_SIZE_M 384
#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 256
#define VK_UBO_RING_BUFFER_SIZE_M 128
#define VK_INDEX_RING_BUFFER_SIZE_M 64
@ -55,7 +55,7 @@ struct command_buffer_chunk: public vk::command_buffer
std::atomic_bool pending = { false };
std::atomic<u64> last_sync = { 0 };
std::mutex guard_mutex;
shared_mutex guard_mutex;
command_buffer_chunk()
{}
@ -97,7 +97,7 @@ struct command_buffer_chunk: public vk::command_buffer
{
if (vkGetFenceStatus(m_device, submit_fence) == VK_SUCCESS)
{
std::lock_guard<std::mutex> lock(guard_mutex);
std::lock_guard<shared_mutex> lock(guard_mutex);
if (pending)
{
@ -111,7 +111,7 @@ struct command_buffer_chunk: public vk::command_buffer
void wait()
{
std::lock_guard<std::mutex> lock(guard_mutex);
std::lock_guard<shared_mutex> lock(guard_mutex);
if (!pending)
return;
@ -244,7 +244,7 @@ struct flush_request_task
}
};
class VKGSRender : public GSRender
class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control
{
private:
VKFragmentProgram m_fragment_prog;
@ -265,7 +265,7 @@ private:
std::unique_ptr<vk::depth_scaling_pass> m_depth_scaler;
std::unique_ptr<vk::ui_overlay_renderer> m_ui_renderer;
std::mutex m_sampler_mutex;
shared_mutex m_sampler_mutex;
u64 surface_store_tag = 0;
std::atomic_bool m_samplers_dirty = { true };
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count> fs_sampler_state = {};
@ -292,10 +292,10 @@ private:
vk::command_pool m_command_buffer_pool;
vk::occlusion_query_pool m_occlusion_query_pool;
bool m_occlusion_query_active = false;
rsx::occlusion_query_info *m_active_query_info = nullptr;
rsx::reports::occlusion_query_info *m_active_query_info = nullptr;
std::unordered_map<u32, occlusion_data> m_occlusion_map;
std::mutex m_secondary_cb_guard;
shared_mutex m_secondary_cb_guard;
vk::command_pool m_secondary_command_buffer_pool;
vk::command_buffer m_secondary_command_buffer; //command buffer used for setup operations
@ -346,7 +346,7 @@ private:
bool m_flush_draw_buffers = false;
std::atomic<int> m_last_flushable_cb = {-1 };
std::mutex m_flush_queue_mutex;
shared_mutex m_flush_queue_mutex;
flush_request_task m_flush_requests;
std::thread::id rsx_thread;
@ -400,11 +400,11 @@ public:
void write_buffers();
void set_viewport();
void clear_zcull_stats(u32 type) override;
void begin_occlusion_query(rsx::occlusion_query_info* query) override;
void end_occlusion_query(rsx::occlusion_query_info* query) override;
bool check_occlusion_query_status(rsx::occlusion_query_info* query) override;
void get_occlusion_query_result(rsx::occlusion_query_info* query) override;
void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override;
void end_occlusion_query(rsx::reports::occlusion_query_info* query) override;
bool check_occlusion_query_status(rsx::reports::occlusion_query_info* query) override;
void get_occlusion_query_result(rsx::reports::occlusion_query_info* query) override;
void discard_occlusion_query(rsx::reports::occlusion_query_info* query) override;
protected:
void begin() override;

View file

@ -1,7 +1,7 @@
#include "stdafx.h"
#include "VKHelpers.h"
#include <mutex>
#include "Utilities/mutex.h"
namespace vk
{
@ -24,7 +24,7 @@ namespace vk
u64 g_num_total_frames = 0;
//global submit guard to prevent race condition on queue submit
std::mutex g_submit_mutex;
shared_mutex g_submit_mutex;
VKAPI_ATTR void* VKAPI_CALL mem_realloc(void* pUserData, void* pOriginal, size_t size, size_t alignment, VkSystemAllocationScope allocationScope)
{

View file

@ -57,6 +57,7 @@ namespace rsx
{
void set_reference(thread* rsx, u32 _reg, u32 arg)
{
rsx->sync();
rsx->ctrl->ref.exchange(arg);
}
@ -112,6 +113,7 @@ namespace rsx
void semaphore_release(thread* rsx, u32 _reg, u32 arg)
{
rsx->sync();
rsx->sync_point_request = true;
const u32 addr = get_address(method_registers.semaphore_offset_406e(), method_registers.semaphore_context_dma_406e());
@ -164,6 +166,8 @@ namespace rsx
{
//
}
rsx->sync();
auto& sema = vm::_ref<RsxReports>(rsx->label_addr);
sema.semaphore[index].val = arg;
sema.semaphore[index].pad = 0;
@ -177,8 +181,9 @@ namespace rsx
{
//
}
u32 val = (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff);
rsx->sync();
u32 val = (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff);
auto& sema = vm::_ref<RsxReports>(rsx->label_addr);
sema.semaphore[index].val = val;
sema.semaphore[index].pad = 0;
@ -433,16 +438,14 @@ namespace rsx
case CELL_GCM_ZCULL_STATS1:
case CELL_GCM_ZCULL_STATS2:
case CELL_GCM_ZCULL_STATS3:
result->value = rsx->get_zcull_stats(type);
LOG_WARNING(RSX, "NV4097_GET_REPORT: Unimplemented type %d", type);
rsx->get_zcull_stats(type, address_ptr);
break;
default:
LOG_ERROR(RSX, "NV4097_GET_REPORT: Bad type %d", type);
result->timer = rsx->timestamp();
result->padding = 0;
break;
}
result->timer = rsx->timestamp();
result->padding = 0;
}
void clear_report_value(thread* rsx, u32 _reg, u32 arg)
@ -450,10 +453,7 @@ namespace rsx
switch (arg)
{
case CELL_GCM_ZPASS_PIXEL_CNT:
LOG_WARNING(RSX, "TODO: NV4097_CLEAR_REPORT_VALUE: ZPASS_PIXEL_CNT");
break;
case CELL_GCM_ZCULL_STATS:
LOG_WARNING(RSX, "TODO: NV4097_CLEAR_REPORT_VALUE: ZCULL_STATS");
break;
default:
LOG_ERROR(RSX, "NV4097_CLEAR_REPORT_VALUE: Bad type: %d", arg);
@ -492,6 +492,7 @@ namespace rsx
return;
}
rsx->sync();
vm::ptr<CellGcmReportData> result = address_ptr;
rsx->conditional_render_test_failed = (result->value == 0);
}
@ -514,6 +515,11 @@ namespace rsx
rsx->notify_zcull_info_changed();
}
void sync(thread* rsx, u32, u32)
{
rsx->sync();
}
void set_surface_dirty_bit(thread* rsx, u32, u32)
{
rsx->m_rtts_dirty = true;
@ -1678,6 +1684,9 @@ namespace rsx
bind<NV4097_SET_STENCIL_TEST_ENABLE, nv4097::set_surface_options_dirty_bit>();
bind<NV4097_SET_DEPTH_MASK, nv4097::set_surface_options_dirty_bit>();
bind<NV4097_SET_COLOR_MASK, nv4097::set_surface_options_dirty_bit>();
bind<NV4097_WAIT_FOR_IDLE, nv4097::sync>();
bind<NV4097_ZCULL_SYNC, nv4097::sync>();
bind<NV4097_SET_CONTEXT_DMA_REPORT, nv4097::sync>();
//NV308A
bind_range<NV308A_COLOR, 1, 256, nv308a::color>();