Misc. Texture Cache fixes

This commit is contained in:
Rui Pinheiro 2018-11-01 01:31:12 +00:00 committed by kd-11
parent 9d1cdccb1a
commit bcdf91edbb
5 changed files with 245 additions and 148 deletions

View file

@ -55,6 +55,7 @@ namespace rsx
std::vector<section_storage_type*> sections_to_exclude; // These sections are do be excluded from protection manipulation (subtracted from other sections)
u32 num_flushable = 0;
u64 cache_tag = 0;
address_range fault_range;
address_range invalidate_range;
@ -300,7 +301,7 @@ namespace rsx
public:
virtual void destroy() = 0;
virtual bool is_depth_texture(u32, u32) = 0;
virtual void on_section_destroyed(section_storage_type& section)
virtual void on_section_destroyed(section_storage_type& /*section*/)
{}
@ -405,6 +406,24 @@ namespace rsx
}
surface->flush(std::forward<Args>(extras)...);
// Exclude this region when flushing other sections that should not trample it
// If we overlap an excluded RO, set it as dirty
for (auto &other : data.sections_to_exclude)
{
AUDIT(other != surface);
if (!other->is_flushable())
{
if (other->overlaps(*surface, section_bounds::full_range))
{
other->set_dirty(true);
}
}
else if(surface->last_write_tag > other->last_write_tag)
{
other->add_flush_exclusion(surface->get_confirmed_range());
}
}
}
data.flushed = true;
@ -483,7 +502,7 @@ namespace rsx
// Sanity checks
AUDIT(exclusion_range.is_page_range());
AUDIT(!exclusion_range.overlaps(data.fault_range));
AUDIT(data.cause.is_read() && !excluded->is_flushable() || !exclusion_range.overlaps(data.fault_range));
// Apply exclusion
ranges_to_unprotect.exclude(exclusion_range);
@ -590,10 +609,6 @@ namespace rsx
const auto new_range = tex.get_min_max(invalidate_range, bounds).to_page_range();
AUDIT(new_range.is_page_range() && invalidate_range.inside(new_range));
const s32 signed_distance = tex.signed_distance(fault_range, section_bounds::locked_range);
const s32 distance = signed_distance < 0 ? -signed_distance : signed_distance;
const bool is_after_fault = (signed_distance >= 0);
// The various chaining policies behave differently
bool extend_invalidate_range = tex.overlaps(fault_range, bounds);
@ -662,7 +677,7 @@ namespace rsx
AUDIT(fault_range_in.valid());
address_range fault_range = fault_range_in.to_page_range();
auto trampled_set = std::move(get_intersecting_set(fault_range));
intersecting_set trampled_set = std::move(get_intersecting_set(fault_range));
thrashed_set result = {};
result.cause = cause;
@ -685,11 +700,12 @@ namespace rsx
{
if (g_cfg.video.strict_texture_flushing && tex.is_flushable())
{
// TODO: Flush only the part outside the fault_range
LOG_TODO(RSX, "Flushable section data may have been lost");
tex.add_flush_exclusion(fault_range);
}
else
{
tex.set_dirty(true);
}
tex.set_dirty(true);
}
}
@ -729,8 +745,7 @@ namespace rsx
if (
// RO sections during a read invalidation can be ignored (unless there are flushables in trampled_set, since those could overwrite RO data)
// TODO: Also exclude RO sections even if there are flushables
(invalidation_keep_ro_during_read && !trampled_set.has_flushables && cause.is_read() && tex.get_protection() == utils::protection::ro) ||
(invalidation_keep_ro_during_read && !trampled_set.has_flushables && cause.is_read() && !tex.is_flushable()) ||
// Sections that are not fully contained in invalidate_range can be ignored
!tex.inside(trampled_set.invalidate_range, bounds) ||
// Unsynchronized sections (or any flushable when skipping flushes) that do not overlap the fault range directly can also be ignored
@ -1080,36 +1095,25 @@ namespace rsx
template <typename ...FlushArgs, typename ...Args>
void lock_memory_region(image_storage_type* image, const address_range &rsx_range, u32 width, u32 height, u32 pitch, const std::tuple<FlushArgs...>& flush_extras, Args&&... extras)
{
AUDIT(g_cfg.video.write_color_buffers); // this method is only called when WCB is enabled
AUDIT(g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer); // this method is only called when either WCB or WDB are enabled
std::lock_guard lock(m_cache_mutex);
// Find a cached section to use
section_storage_type& region = *find_cached_texture(rsx_range, true, false);
if (!region.is_locked())
{
// Invalidate sections from surface cache occupying same address range
std::apply(&texture_cache::invalidate_range_impl_base<FlushArgs...>, std::tuple_cat(std::make_tuple(this, rsx_range, invalidation_cause::superseded_by_fbo), flush_extras));
}
section_storage_type& region = *find_cached_texture(rsx_range, true, true, width, height);
// Prepare and initialize fbo region
if (region.exists() && region.get_context() != texture_upload_context::framebuffer_storage)
{
AUDIT(region.matches(rsx_range));
//This space was being used for other purposes other than framebuffer storage
//Delete used resources before attaching it to framebuffer memory
read_only_tex_invalidate = true;
}
// We are going to reprotect this section in a second, so discard it here
if (region.is_locked())
{
region.discard();
}
// Destroy the resources
region.destroy();
if (!region.is_locked() || region.get_context() != texture_upload_context::framebuffer_storage)
{
// Invalidate sections from surface cache occupying same address range
std::apply(&texture_cache::invalidate_range_impl_base<FlushArgs...>, std::tuple_cat(std::make_tuple(this, rsx_range, invalidation_cause::superseded_by_fbo), flush_extras));
}
if (!region.is_locked() || region.can_be_reused())
@ -1129,6 +1133,9 @@ namespace rsx
}
region.create(width, height, 1, 1, image, pitch, false, std::forward<Args>(extras)...);
region.reprotect(utils::protection::no, { 0, rsx_range.length() });
tag_framebuffer(region.get_section_base());
region.set_dirty(false);
region.touch(m_cache_update_tag);
@ -1143,8 +1150,6 @@ namespace rsx
AUDIT(m_flush_always_cache.find(region.get_section_range()) != m_flush_always_cache.end());
}
// Delay protection until here in case the invalidation block above has unprotected pages in this range
region.reprotect(utils::protection::no, { 0, rsx_range.length() });
update_cache_tag();
#ifdef TEXTURE_CACHE_DEBUG

View file

@ -1001,6 +1001,8 @@ namespace rsx
rsx::texture_upload_context context = rsx::texture_upload_context::shader_read;
rsx::texture_dimension_extended image_type = rsx::texture_dimension_extended::texture_dimension_2d;
address_range_vector flush_exclusions; // Address ranges that will be skipped during flush
predictor_type *m_predictor = nullptr;
size_t m_predictor_key_hash = 0;
predictor_entry_type *m_predictor_entry = nullptr;
@ -1015,9 +1017,9 @@ namespace rsx
}
cached_texture_section() = default;
cached_texture_section(ranged_storage_block_type *block) : m_block(block), m_storage(&block->get_storage()), m_tex_cache(&block->get_texture_cache()), m_predictor(&m_tex_cache->get_predictor())
cached_texture_section(ranged_storage_block_type *block)
{
update_unreleased();
initialize(block);
}
void initialize(ranged_storage_block_type *block)
@ -1073,6 +1075,8 @@ namespace rsx
context = rsx::texture_upload_context::shader_read;
image_type = rsx::texture_dimension_extended::texture_dimension_2d;
flush_exclusions.clear();
// Set to dirty
set_dirty(true);
@ -1324,6 +1328,8 @@ namespace rsx
{
get_predictor_entry().on_flush();
}
flush_exclusions.clear();
}
void on_speculative_flush()
@ -1346,12 +1352,144 @@ namespace rsx
{
m_tex_cache->on_misprediction();
}
flush_exclusions.clear();
}
/**
* Flush
*/
private:
void imp_flush_memcpy(u32 vm_dst, u8* src, u32 len) const
{
u8 *dst = get_ptr<u8>(vm_dst);
address_range copy_range = address_range::start_length(vm_dst, len);
if (flush_exclusions.empty() || !copy_range.overlaps(flush_exclusions))
{
// Normal case = no flush exclusions, or no overlap
memcpy(dst, src, len);
return;
}
else if (copy_range.inside(flush_exclusions))
{
// Nothing to copy
return;
}
// Otherwise, we need to filter the memcpy with our flush exclusions
// Should be relatively rare
address_range_vector vec;
vec.merge(copy_range);
vec.exclude(flush_exclusions);
for (const auto& rng : vec)
{
if (!rng.valid())
continue;
AUDIT(rng.inside(copy_range));
u32 offset = rng.start - vm_dst;
memcpy(dst + offset, src + offset, rng.length());
}
}
void imp_flush()
{
AUDIT(synchronized);
ASSERT(real_pitch > 0);
// Calculate valid range
const auto valid_range = get_confirmed_range();
AUDIT(valid_range.valid());
const auto valid_length = valid_range.length();
const auto valid_offset = valid_range.start - get_section_base();
AUDIT(valid_length > 0);
// Obtain pointers to the source and destination memory regions
u8 *src = static_cast<u8*>(derived()->map_synchronized(valid_offset, valid_length));
u32 dst = valid_range.start;
ASSERT(src != nullptr);
// Copy from src to dst
if (real_pitch >= rsx_pitch || valid_length <= rsx_pitch)
{
imp_flush_memcpy(dst, src, valid_length);
}
else
{
ASSERT(valid_length % rsx_pitch == 0);
u8 *_src = src;
u32 _dst = dst;
const auto num_rows = valid_length / rsx_pitch;
for (u32 row = 0; row < num_rows; ++row)
{
imp_flush_memcpy(_dst, _src, real_pitch);
_src += real_pitch;
_dst += rsx_pitch;
}
}
}
public:
// Returns false if there was a cache miss
template <typename ...Args>
bool flush(Args&&... extras)
{
if (flushed) return true;
bool miss = false;
// Sanity checks
ASSERT(exists());
AUDIT(is_locked());
// If we are fully inside the flush exclusions regions, we just mark ourselves as flushed and return
if (get_confirmed_range().inside(flush_exclusions))
{
flushed = true;
flush_exclusions.clear();
on_flush(miss);
return !miss;
}
// If we are not synchronized, we must synchronize before proceeding (hard fault)
if (!synchronized)
{
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
derived()->synchronize(true, std::forward<Args>(extras)...);
miss = true;
ASSERT(synchronized); // TODO ruipin: This might be possible in OGL. Revisit
}
// Copy flush result to guest memory
imp_flush();
// Finish up
// Its highly likely that this surface will be reused, so we just leave resources in place
flushed = true;
derived()->finish_flush();
flush_exclusions.clear();
on_flush(miss);
return !miss;
}
void add_flush_exclusion(const address_range& rng)
{
AUDIT(exists() && is_locked() && is_flushable());
const auto _rng = rng.get_intersect(get_section_range());
flush_exclusions.merge(_rng);
}
/**
* Misc
*/
public:
predictor_entry_type& get_predictor_entry()
{
// If we don't have a predictor entry, or the key has changed

View file

@ -484,46 +484,39 @@ namespace gl
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, GL_NONE);
}
bool flush()
/**
* Flush
*/
void synchronize(bool blocking)
{
ASSERT(exists());
if (synchronized)
return;
if (flushed) return true; //Already written, ignore
AUDIT(is_locked());
copy_texture(blocking);
bool result = true;
if (!synchronized)
if (blocking)
{
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
copy_texture(true);
if (!synchronized)
{
LOG_WARNING(RSX, "Nothing to copy; Setting section to readable and moving on...");
protect(utils::protection::ro);
return false;
}
result = false;
m_fence.wait_for_signal();
}
}
verify(HERE), real_pitch > 0;
void* map_synchronized(u32 offset, u32 size)
{
AUDIT(synchronized);
m_fence.wait_for_signal();
flushed = true;
const auto valid_range = get_confirmed_range_delta();
const u32 valid_offset = valid_range.first;
const u32 valid_length = valid_range.second;
AUDIT(valid_length > 0);
void *dst = get_ptr(get_section_base() + valid_offset);
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
void *src = glMapBufferRange(GL_PIXEL_PACK_BUFFER, valid_offset, valid_length, GL_MAP_READ_BIT);
return glMapBufferRange(GL_PIXEL_PACK_BUFFER, offset, size, GL_MAP_READ_BIT);
}
//throw if map failed since we'll segfault anyway
verify(HERE), src != nullptr;
void finish_flush()
{
// Free resources
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
// Shuffle
bool require_manual_shuffle = false;
if (pack_unpack_swap_bytes)
{
@ -531,27 +524,10 @@ namespace gl
require_manual_shuffle = true;
}
if (real_pitch >= rsx_pitch || valid_length <= rsx_pitch)
{
memcpy(dst, src, valid_length);
}
else
{
if (valid_length % rsx_pitch)
{
fmt::throw_exception("Unreachable" HERE);
}
u8 *_src = (u8*)src;
u8 *_dst = (u8*)dst;
const auto num_rows = valid_length / rsx_pitch;
for (u32 row = 0; row < num_rows; ++row)
{
memcpy(_dst, _src, real_pitch);
_src += real_pitch;
_dst += rsx_pitch;
}
}
const auto valid_range = get_confirmed_range_delta();
const u32 valid_offset = valid_range.first;
const u32 valid_length = valid_range.second;
void *dst = get_ptr(get_section_base() + valid_offset);
if (require_manual_shuffle)
{
@ -560,6 +536,7 @@ namespace gl
}
else if (pack_unpack_swap_bytes && ::gl::get_driver_caps().vendor_AMD)
{
//AMD driver bug - cannot use pack_swap_bytes
//Manually byteswap texel data
switch (type)
@ -609,15 +586,13 @@ namespace gl
}
}
}
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
baseclass::on_flush(!result);
return result;
}
/**
* Misc
*/
void destroy()
{
if (!is_locked() && pbo_id == 0 && vram_texture == nullptr && m_fence.is_empty() && managed_texture.get() == nullptr)

View file

@ -310,69 +310,38 @@ namespace vk
sync_timestamp = get_system_time();
}
bool flush(vk::command_buffer& cmd, VkQueue submit_queue)
/**
* Flush
*/
void synchronize(bool blocking, vk::command_buffer& cmd, VkQueue submit_queue)
{
ASSERT(exists());
if (flushed) return true;
AUDIT(is_locked());
if (synchronized)
return;
if (m_device == nullptr)
{
m_device = &cmd.get_command_pool().get_owner();
}
// Return false if a flush occured 'late', i.e we had a miss
bool result = true;
if (!synchronized)
{
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
copy_texture(true, cmd, submit_queue);
result = false;
}
verify(HERE), real_pitch > 0;
flushed = true;
const auto valid_range = get_confirmed_range_delta();
const u32 valid_offset = valid_range.first;
const u32 valid_length = valid_range.second;
AUDIT(valid_length > 0);
void* pixels_src = dma_buffer->map(valid_offset, valid_length);
void* pixels_dst = get_ptr(get_section_base() + valid_offset);
if (real_pitch >= rsx_pitch || valid_length <= rsx_pitch)
{
memcpy(pixels_dst, pixels_src, valid_length);
}
else
{
if (valid_length % rsx_pitch)
{
fmt::throw_exception("Unreachable" HERE);
}
const u32 num_rows = valid_length / rsx_pitch;
auto _src = (u8*)pixels_src;
auto _dst = (u8*)pixels_dst;
for (u32 y = 0; y < num_rows; ++y)
{
memcpy(_dst, _src, real_pitch);
_src += real_pitch;
_dst += real_pitch;
}
}
dma_buffer->unmap();
baseclass::on_flush(!result);
//Its highly likely that this surface will be reused, so we just leave resources in place
return result;
copy_texture(blocking, cmd, submit_queue);
}
void* map_synchronized(u32 offset, u32 size)
{
AUDIT(synchronized);
return dma_buffer->map(offset, size);
}
void finish_flush()
{
dma_buffer->unmap();
}
/**
* Misc
*/
void set_unpack_swap_bytes(bool swap_bytes)
{
pack_unpack_swap_bytes = swap_bytes;

View file

@ -225,6 +225,11 @@ namespace rsx
return get_bounds(bounds).overlaps(other);
}
inline bool overlaps(const address_range_vector &other, section_bounds bounds) const
{
return get_bounds(bounds).overlaps(other);
}
inline bool overlaps(const buffered_section &other, section_bounds bounds) const
{
return get_bounds(bounds).overlaps(other.get_bounds(bounds));
@ -235,6 +240,11 @@ namespace rsx
return get_bounds(bounds).inside(other);
}
inline bool inside(const address_range_vector &other, section_bounds bounds) const
{
return get_bounds(bounds).inside(other);
}
inline bool inside(const buffered_section &other, section_bounds bounds) const
{
return get_bounds(bounds).inside(other.get_bounds(bounds));
@ -316,7 +326,7 @@ namespace rsx
* Super Pointer
*/
template <typename T = void>
inline T* get_ptr(u32 address)
inline T* get_ptr(u32 address) const
{
return reinterpret_cast<T*>(vm::g_sudo_addr + address);
}