rsx: Preserve memory order when doing flush

- Orders flushing to preserve memory at all cost
- Avoids false positive where flushing overlapping sections can falsely invalidate another with head/tail test
This commit is contained in:
kd-11 2018-09-19 01:21:57 +03:00 committed by kd-11
commit fc486a1bac
5 changed files with 128 additions and 80 deletions

View file

@ -1,4 +1,4 @@
#pragma once #pragma once
#include "../rsx_cache.h" #include "../rsx_cache.h"
#include "../rsx_utils.h" #include "../rsx_utils.h"
@ -74,12 +74,28 @@ namespace rsx
std::deque<u32> read_history; std::deque<u32> read_history;
u64 cache_tag = 0; u64 cache_tag = 0;
u64 last_write_tag = 0;
memory_read_flags readback_behaviour = memory_read_flags::flush_once; memory_read_flags readback_behaviour = memory_read_flags::flush_once;
rsx::texture_create_flags view_flags = rsx::texture_create_flags::default_component_order; rsx::texture_create_flags view_flags = rsx::texture_create_flags::default_component_order;
rsx::texture_upload_context context = rsx::texture_upload_context::shader_read; rsx::texture_upload_context context = rsx::texture_upload_context::shader_read;
rsx::texture_dimension_extended image_type = rsx::texture_dimension_extended::texture_dimension_2d; rsx::texture_dimension_extended image_type = rsx::texture_dimension_extended::texture_dimension_2d;
void reset(u32 rsx_address, u32 rsx_size)
{
rsx::protection_policy policy = g_cfg.video.strict_rendering_mode ? rsx::protection_policy::protect_policy_full_range : rsx::protection_policy::protect_policy_conservative;
rsx::buffered_section::reset(rsx_address, rsx_size, policy);
flushed = false;
synchronized = false;
sync_timestamp = 0ull;
last_write_tag = 0ull;
cache_tag = 0ull;
// TODO: Fix write tracking and reset stats
}
bool matches(u32 rsx_address, u32 rsx_size) bool matches(u32 rsx_address, u32 rsx_size)
{ {
return rsx::buffered_section::matches(rsx_address, rsx_size); return rsx::buffered_section::matches(rsx_address, rsx_size);
@ -110,9 +126,10 @@ namespace rsx
return false; return false;
} }
void touch() void touch(u64 tag)
{ {
num_writes++; num_writes++;
last_write_tag = tag;
} }
void reset_write_statistics() void reset_write_statistics()
@ -645,6 +662,47 @@ namespace rsx
return result; return result;
} }
template <typename ...Args>
void flush_set(thrashed_set& data, Args&&... extras)
{
if (data.sections_to_flush.size() > 1)
{
// Sort with oldest data first
// Ensures that new data tramples older data
std::sort(data.sections_to_flush.begin(), data.sections_to_flush.end(), [](const auto& a, const auto& b)
{
return (a->last_write_tag < b->last_write_tag);
});
}
for (auto &surface : data.sections_to_flush)
{
if (surface->get_memory_read_flags() == rsx::memory_read_flags::flush_always)
{
// This region is set to always read from itself (unavoidable hard sync)
const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp;
if (surface->is_synchronized() && ROP_timestamp > surface->get_sync_timestamp())
{
m_num_cache_mispredictions++;
m_num_cache_misses++;
surface->copy_texture(true, std::forward<Args>(extras)...);
}
}
if (!surface->flush(std::forward<Args>(extras)...))
{
// Missed address, note this
// TODO: Lower severity when successful to keep the cache from overworking
record_cache_miss(*surface);
}
m_num_flush_requests++;
data.sections_to_unprotect.push_back(surface);
}
data.sections_to_flush.clear();
}
void unprotect_set(thrashed_set& data) void unprotect_set(thrashed_set& data)
{ {
auto release_set = [this](std::vector<section_storage_type*>& _set) auto release_set = [this](std::vector<section_storage_type*>& _set)
@ -844,33 +902,9 @@ namespace rsx
result.sections_to_unprotect.push_back(obj.first); result.sections_to_unprotect.push_back(obj.first);
} }
else if (!allow_flush)
{
result.sections_to_flush.push_back(obj.first);
}
else else
{ {
if (obj.first->get_memory_read_flags() == rsx::memory_read_flags::flush_always) result.sections_to_flush.push_back(obj.first);
{
// This region is set to always read from itself (unavoidable hard sync)
const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp;
if (obj.first->is_synchronized() && ROP_timestamp > obj.first->get_sync_timestamp())
{
m_num_cache_mispredictions++;
m_num_cache_misses++;
obj.first->copy_texture(true, std::forward<Args>(extras)...);
}
}
if (!obj.first->flush(std::forward<Args>(extras)...))
{
//Missed address, note this
//TODO: Lower severity when successful to keep the cache from overworking
record_cache_miss(*obj.first);
}
m_num_flush_requests++;
result.sections_to_unprotect.push_back(obj.first);
} }
continue; continue;
@ -897,19 +931,25 @@ namespace rsx
obj.second->remove_one(); obj.second->remove_one();
} }
if (deferred_flush && result.sections_to_flush.size()) if (!result.sections_to_flush.empty())
{ {
result.num_flushable = static_cast<int>(result.sections_to_flush.size()); if (deferred_flush)
result.address_base = address; {
result.address_range = range; result.num_flushable = static_cast<int>(result.sections_to_flush.size());
result.cache_tag = m_cache_update_tag.load(std::memory_order_consume); result.address_base = address;
return result; result.address_range = range;
} result.cache_tag = m_cache_update_tag.load(std::memory_order_consume);
else return result;
{ }
unprotect_set(result); else
{
verify(HERE), allow_flush;
flush_set(result, std::forward<Args>(extras)...);
}
} }
unprotect_set(result);
//Everything has been handled //Everything has been handled
result = {}; result = {};
result.violation_handled = true; result.violation_handled = true;
@ -1184,7 +1224,7 @@ namespace rsx
region.set_context(texture_upload_context::framebuffer_storage); region.set_context(texture_upload_context::framebuffer_storage);
region.set_image_type(rsx::texture_dimension_extended::texture_dimension_2d); region.set_image_type(rsx::texture_dimension_extended::texture_dimension_2d);
region.set_memory_read_flags(memory_read_flags::flush_always); region.set_memory_read_flags(memory_read_flags::flush_always);
region.touch(); region.touch(m_cache_update_tag);
m_flush_always_cache[memory_address] = memory_size; m_flush_always_cache[memory_address] = memory_size;
@ -1376,30 +1416,7 @@ namespace rsx
if (m_cache_update_tag.load(std::memory_order_consume) == data.cache_tag) if (m_cache_update_tag.load(std::memory_order_consume) == data.cache_tag)
{ {
//1. Write memory to cpu side //1. Write memory to cpu side
for (auto &tex : data.sections_to_flush) flush_set(data, std::forward<Args>(extras)...);
{
if (tex->is_locked())
{
if (tex->get_memory_read_flags() == rsx::memory_read_flags::flush_always)
{
// This region is set to always read from itself (unavoidable hard sync)
const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp;
if (tex->is_synchronized() && ROP_timestamp > tex->get_sync_timestamp())
{
m_num_cache_mispredictions++;
m_num_cache_misses++;
tex->copy_texture(true, std::forward<Args>(extras)...);
}
}
if (!tex->flush(std::forward<Args>(extras)...))
{
record_cache_miss(*tex);
}
m_num_flush_requests++;
}
}
//2. Release all obsolete sections //2. Release all obsolete sections
unprotect_set(data); unprotect_set(data);
@ -2521,7 +2538,8 @@ namespace rsx
verify(HERE), (mem_base + mem_length) <= cached_dest->get_section_size(); verify(HERE), (mem_base + mem_length) <= cached_dest->get_section_size();
cached_dest->reprotect(utils::protection::no, { mem_base, mem_length }); cached_dest->reprotect(utils::protection::no, { mem_base, mem_length });
cached_dest->touch(); cached_dest->touch(m_cache_update_tag);
update_cache_tag();
} }
else else
{ {

View file

@ -1,4 +1,4 @@
#include "stdafx.h" #include "stdafx.h"
#include "Emu/Memory/vm.h" #include "Emu/Memory/vm.h"
#include "Emu/System.h" #include "Emu/System.h"
#include "GLGSRender.h" #include "GLGSRender.h"
@ -1473,10 +1473,27 @@ void GLGSRender::flip(int buffer)
if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address)) if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address))
{ {
buffer_width = render_target_texture->width(); if (render_target_texture->last_use_tag == m_rtts.write_tag)
buffer_height = render_target_texture->height(); {
image = render_target_texture->raw_handle();
}
else
{
const auto overlap_info = m_rtts.get_merged_texture_memory_region(absolute_address, buffer_width, buffer_height, buffer_pitch, 4);
verify(HERE), !overlap_info.empty();
image = render_target_texture->raw_handle(); if (overlap_info.back().surface == render_target_texture)
{
// Confirmed to be the newest data source in that range
image = render_target_texture->raw_handle();
}
}
if (image)
{
buffer_width = render_target_texture->width();
buffer_height = render_target_texture->height();
}
} }
else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions(absolute_address, buffer_width, buffer_height)) else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions(absolute_address, buffer_width, buffer_height))
{ {
@ -1484,7 +1501,8 @@ void GLGSRender::flip(int buffer)
//The render might have been done offscreen or in software and a blit used to display //The render might have been done offscreen or in software and a blit used to display
image = surface->get_raw_texture()->id(); image = surface->get_raw_texture()->id();
} }
else
if (!image)
{ {
LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU"); LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU");

View file

@ -1,4 +1,4 @@
#pragma once #pragma once
#include "stdafx.h" #include "stdafx.h"
@ -252,12 +252,7 @@ namespace gl
void reset(u32 base, u32 size, bool /*flushable*/=false) void reset(u32 base, u32 size, bool /*flushable*/=false)
{ {
rsx::protection_policy policy = g_cfg.video.strict_rendering_mode ? rsx::protection_policy::protect_policy_full_range : rsx::protection_policy::protect_policy_conservative; rsx::cached_texture_section::reset(base, size);
rsx::buffered_section::reset(base, size, policy);
flushed = false;
synchronized = false;
sync_timestamp = 0ull;
vram_texture = nullptr; vram_texture = nullptr;
managed_texture.reset(); managed_texture.reset();

View file

@ -2922,6 +2922,9 @@ void VKGSRender::flip(int buffer)
u32 buffer_width = display_buffers[buffer].width; u32 buffer_width = display_buffers[buffer].width;
u32 buffer_height = display_buffers[buffer].height; u32 buffer_height = display_buffers[buffer].height;
u32 buffer_pitch = display_buffers[buffer].pitch;
if (!buffer_pitch) buffer_pitch = buffer_width * 4; // TODO: Check avconf
coordi aspect_ratio; coordi aspect_ratio;
@ -3005,7 +3008,21 @@ void VKGSRender::flip(int buffer)
if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address)) if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address))
{ {
image_to_flip = render_target_texture; if (render_target_texture->last_use_tag == m_rtts.write_tag)
{
image_to_flip = render_target_texture;
}
else
{
const auto overlap_info = m_rtts.get_merged_texture_memory_region(absolute_address, buffer_width, buffer_height, buffer_pitch, 4);
verify(HERE), !overlap_info.empty();
if (overlap_info.back().surface == render_target_texture)
{
// Confirmed to be the newest data source in that range
image_to_flip = render_target_texture;
}
}
} }
else if (auto surface = m_texture_cache.find_texture_from_dimensions(absolute_address, buffer_width, buffer_height)) else if (auto surface = m_texture_cache.find_texture_from_dimensions(absolute_address, buffer_width, buffer_height))
{ {
@ -3013,7 +3030,8 @@ void VKGSRender::flip(int buffer)
//The render might have been done offscreen or in software and a blit used to display //The render might have been done offscreen or in software and a blit used to display
image_to_flip = surface->get_raw_texture(); image_to_flip = surface->get_raw_texture();
} }
else
if (!image_to_flip)
{ {
//Read from cell //Read from cell
image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, absolute_address, buffer_width, buffer_height); image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, absolute_address, buffer_width, buffer_height);

View file

@ -1,4 +1,4 @@
#pragma once #pragma once
#include "stdafx.h" #include "stdafx.h"
#include "VKRenderTargets.h" #include "VKRenderTargets.h"
#include "VKGSRender.h" #include "VKGSRender.h"
@ -32,8 +32,7 @@ namespace vk
if (length > cpu_address_range) if (length > cpu_address_range)
release_dma_resources(); release_dma_resources();
rsx::protection_policy policy = g_cfg.video.strict_rendering_mode ? rsx::protection_policy::protect_policy_full_range : rsx::protection_policy::protect_policy_conservative; rsx::cached_texture_section::reset(base, length);
rsx::buffered_section::reset(base, length, policy);
} }
void create(u16 w, u16 h, u16 depth, u16 mipmaps, vk::image *image, u32 rsx_pitch, bool managed, u32 gcm_format, bool pack_swap_bytes = false) void create(u16 w, u16 h, u16 depth, u16 mipmaps, vk::image *image, u32 rsx_pitch, bool managed, u32 gcm_format, bool pack_swap_bytes = false)