Texture cache section management fixups

Fixes VRAM leaks and incorrect destruction of resources, which could
lead to drivers crashes.

Additionally, lock_memory_region is now able to flush superseded
sections. However, due to the potential performance impact of this
for little gain, a new debug setting ("Strict Flushing") has been
added to config.yaml
This commit is contained in:
Rui Pinheiro 2018-10-18 23:22:00 +01:00 committed by kd-11
commit af360b78f2
7 changed files with 180 additions and 222 deletions

View file

@ -74,7 +74,7 @@ namespace rsx
//------------------------- //-------------------------
// It is illegal to have only exclusions except when reading from a range with only RO sections // It is illegal to have only exclusions except when reading from a range with only RO sections
ASSERT(flush_and_unprotect_count > 0 || exclude_count == 0 || !cause.is_write()); ASSERT(flush_and_unprotect_count > 0 || exclude_count == 0 || cause.is_read());
if (flush_and_unprotect_count == 0 && exclude_count > 0) if (flush_and_unprotect_count == 0 && exclude_count > 0)
{ {
// double-check that only RO sections exists // double-check that only RO sections exists
@ -87,8 +87,8 @@ namespace rsx
const auto min_overlap_fault_no_ro = tex_cache_checker.get_minimum_number_of_sections(fault_range); const auto min_overlap_fault_no_ro = tex_cache_checker.get_minimum_number_of_sections(fault_range);
const auto min_overlap_invalidate_no_ro = tex_cache_checker.get_minimum_number_of_sections(invalidate_range); const auto min_overlap_invalidate_no_ro = tex_cache_checker.get_minimum_number_of_sections(invalidate_range);
const u16 min_overlap_fault = min_overlap_fault_no_ro.first + (cause.is_write() ? min_overlap_fault_no_ro.second : 0); const u16 min_overlap_fault = min_overlap_fault_no_ro.first + (cause.is_read() ? 0 : min_overlap_fault_no_ro.second);
const u16 min_overlap_invalidate = min_overlap_invalidate_no_ro.first + (cause.is_write() ? min_overlap_invalidate_no_ro.second : 0); const u16 min_overlap_invalidate = min_overlap_invalidate_no_ro.first + (cause.is_read() ? 0 : min_overlap_invalidate_no_ro.second);
AUDIT(min_overlap_fault <= min_overlap_invalidate); AUDIT(min_overlap_fault <= min_overlap_invalidate);
const u16 min_flush_or_unprotect = min_overlap_fault; const u16 min_flush_or_unprotect = min_overlap_fault;
@ -326,7 +326,7 @@ namespace rsx
inline bool region_intersects_cache(const address_range &test_range, bool is_writing) inline bool region_intersects_cache(const address_range &test_range, bool is_writing)
{ {
AUDIT( test_range.valid() ); AUDIT(test_range.valid());
// Quick range overlaps with cache tests // Quick range overlaps with cache tests
if (!is_writing) if (!is_writing)
@ -411,7 +411,7 @@ namespace rsx
for (const auto &section : sections) for (const auto &section : sections)
{ {
const auto &new_range = section->get_locked_range(); const auto &new_range = section->get_locked_range();
AUDIT( new_range.is_page_range() ); AUDIT(new_range.is_page_range());
result.merge(new_range); result.merge(new_range);
} }
@ -473,7 +473,7 @@ namespace rsx
exclusion_range.intersect(data.invalidate_range); exclusion_range.intersect(data.invalidate_range);
// Sanity checks // Sanity checks
AUDIT( exclusion_range.is_page_range() ); AUDIT(exclusion_range.is_page_range());
AUDIT(!exclusion_range.overlaps(data.fault_range)); AUDIT(!exclusion_range.overlaps(data.fault_range));
// Apply exclusion // Apply exclusion
@ -508,10 +508,10 @@ namespace rsx
} }
} }
} }
AUDIT( !ranges_to_unprotect.empty() ); AUDIT(!ranges_to_unprotect.empty());
// Exclude the fault range if told to do so (this means the fault_range got unmapped or is otherwise invalid) // Exclude the fault range if told to do so (this means the fault_range got unmapped or is otherwise invalid)
if (data.cause.exclude_fault_range()) if (data.cause.keep_fault_range_protection())
{ {
ranges_to_unprotect.exclude(data.fault_range); ranges_to_unprotect.exclude(data.fault_range);
ranges_to_protect_ro.exclude(data.fault_range); ranges_to_protect_ro.exclude(data.fault_range);
@ -521,10 +521,10 @@ namespace rsx
} }
else else
{ {
AUDIT( ranges_to_unprotect.inside(data.invalidate_range) ); AUDIT(ranges_to_unprotect.inside(data.invalidate_range));
AUDIT( ranges_to_protect_ro.inside(data.invalidate_range) ); AUDIT(ranges_to_protect_ro.inside(data.invalidate_range));
} }
AUDIT( !ranges_to_protect_ro.overlaps(ranges_to_unprotect) ); AUDIT(!ranges_to_protect_ro.overlaps(ranges_to_unprotect));
// Unprotect and discard // Unprotect and discard
protect_ranges(ranges_to_unprotect, utils::protection::rw); protect_ranges(ranges_to_unprotect, utils::protection::rw);
@ -540,11 +540,9 @@ namespace rsx
// Return a set containing all sections that should be flushed/unprotected/reprotected // Return a set containing all sections that should be flushed/unprotected/reprotected
std::atomic<u64> m_last_section_cache_tag = 0; std::atomic<u64> m_last_section_cache_tag = 0;
intersecting_set get_intersecting_set(const address_range &fault_range, bool is_writing) intersecting_set get_intersecting_set(const address_range &fault_range)
{ {
(void)is_writing; // silence unused formal parameter warning; used only for debug purposes if TEXTURE_CACHE_DEBUG is defined AUDIT(fault_range.is_page_range());
AUDIT( fault_range.is_page_range() );
const u64 cache_tag = ++m_last_section_cache_tag; const u64 cache_tag = ++m_last_section_cache_tag;
@ -623,7 +621,7 @@ namespace rsx
} }
} }
AUDIT( result.invalidate_range.is_page_range() ); AUDIT(result.invalidate_range.is_page_range());
#ifdef TEXTURE_CACHE_DEBUG #ifdef TEXTURE_CACHE_DEBUG
// naive check that sections are not duplicated in the results // naive check that sections are not duplicated in the results
@ -636,12 +634,6 @@ namespace rsx
} }
verify(HERE), count == 1; verify(HERE), count == 1;
} }
// Check that the number of sections we "found" matches the sections known to be in the invalidation range
const u32 count = static_cast<u32>(result.sections.size());
const auto inv_min_no_ro = tex_cache_checker.get_minimum_number_of_sections(invalidate_range);
const u16 inv_min = inv_min_no_ro.first + (is_writing ? inv_min_no_ro.second : 0);
ASSERT(count >= inv_min);
#endif //TEXTURE_CACHE_DEBUG #endif //TEXTURE_CACHE_DEBUG
return result; return result;
@ -657,18 +649,19 @@ namespace rsx
tex_cache_checker.verify(); tex_cache_checker.verify();
#endif // TEXTURE_CACHE_DEBUG #endif // TEXTURE_CACHE_DEBUG
AUDIT( cause.valid() ); AUDIT(cause.valid());
AUDIT( fault_range_in.valid() ); AUDIT(fault_range_in.valid());
address_range fault_range = fault_range_in.to_page_range(); address_range fault_range = fault_range_in.to_page_range();
auto trampled_set = std::move(get_intersecting_set(fault_range, cause.is_write())); auto trampled_set = std::move(get_intersecting_set(fault_range));
thrashed_set result = {}; thrashed_set result = {};
result.cause = cause; result.cause = cause;
result.fault_range = fault_range; result.fault_range = fault_range;
result.invalidate_range = trampled_set.invalidate_range; result.invalidate_range = trampled_set.invalidate_range;
if (cause == invalidation_cause::unmap && !trampled_set.sections.empty()) // Fast code-path for keeping the fault range protection when not flushing anything
if (cause.keep_fault_range_protection() && cause.skip_flush() && !trampled_set.sections.empty())
{ {
// We discard all sections fully inside fault_range // We discard all sections fully inside fault_range
for (auto &obj : trampled_set.sections) for (auto &obj : trampled_set.sections)
@ -676,16 +669,15 @@ namespace rsx
auto &tex = *obj; auto &tex = *obj;
if (tex.inside(fault_range, section_bounds::locked_range)) if (tex.inside(fault_range, section_bounds::locked_range))
{ {
// Discard and destroy - this section won't be needed any more // Discard - this section won't be needed any more
tex.discard(); tex.discard(/* set_dirty */ true);
tex.destroy();
} }
else else if (tex.overlaps(fault_range, section_bounds::locked_range))
{ {
if (tex.is_flushable()) if (g_cfg.video.strict_texture_flushing && tex.is_flushable())
{ {
// TODO: Flush only the part outside the fault_range // TODO: Flush only the part outside the fault_range
LOG_TODO(RSX, "Flushable section 0x%x data may have been lost (invalidate_range_impl_base)", tex.get_section_base()); LOG_TODO(RSX, "Flushable section data may have been lost");
} }
tex.set_dirty(true); tex.set_dirty(true);
@ -709,15 +701,6 @@ namespace rsx
return result; return result;
} }
AUDIT(fault_range.inside(invalidate_range)); AUDIT(fault_range.inside(invalidate_range));
// There are textures that need to be invalidated, we now trigger another intersecting_set search on it
// "invalidate_range" contains the range of sections that overlaps the unmap boundary
// We set "fault_range = invalidate_range" to cause a forced invalidation over that whole range,
// and proceed as normal.
// NOTE: result.fault_range *must* stay the original fault_range otherwise everything breaks
fault_range = invalidate_range;
trampled_set = std::move(get_intersecting_set(fault_range, true));
result.invalidate_range = trampled_set.invalidate_range;
} }
@ -730,16 +713,19 @@ namespace rsx
{ {
auto &tex = *obj; auto &tex = *obj;
if (!tex.is_locked())
continue;
const rsx::section_bounds bounds = tex.get_overlap_test_bounds(); const rsx::section_bounds bounds = tex.get_overlap_test_bounds();
if ( if (
// RO sections during a read invalidation can be ignored (unless there are flushables in trampled_set, since those could overwrite RO data) // RO sections during a read invalidation can be ignored (unless there are flushables in trampled_set, since those could overwrite RO data)
// TODO: Also exclude RO sections even if there are flushables // TODO: Also exclude RO sections even if there are flushables
(invalidation_keep_ro_during_read && !trampled_set.has_flushables && !cause.is_write() && tex.get_protection() == utils::protection::ro) || (invalidation_keep_ro_during_read && !trampled_set.has_flushables && cause.is_read() && tex.get_protection() == utils::protection::ro) ||
// Sections that are not fully contained in invalidate_range can be ignored // Sections that are not fully contained in invalidate_range can be ignored
!tex.inside(trampled_set.invalidate_range, bounds) || !tex.inside(trampled_set.invalidate_range, bounds) ||
// Unsynchronized sections that do not overlap the fault range directly can also be ignored // Unsynchronized sections (or any flushable when skipping flushes) that do not overlap the fault range directly can also be ignored
(invalidation_ignore_unsynchronized && tex.is_flushable() && !tex.is_synchronized() && !tex.overlaps(fault_range, bounds)) (invalidation_ignore_unsynchronized && tex.is_flushable() && (cause.skip_flush() || !tex.is_synchronized()) && !tex.overlaps(fault_range, bounds))
) )
{ {
// False positive // False positive
@ -749,12 +735,12 @@ namespace rsx
if (tex.is_flushable()) if (tex.is_flushable())
{ {
//Write if and only if no one else has trashed section memory already // Write if and only if no one else has trashed section memory already
//TODO: Proper section management should prevent this from happening // TODO: Proper section management should prevent this from happening
//TODO: Blit engine section merge support and/or partial texture memory buffering // TODO: Blit engine section merge support and/or partial texture memory buffering
if (tex.is_dirty() || !tex.test_memory_head() || !tex.test_memory_tail()) if (tex.is_dirty() || !tex.test_memory_head() || !tex.test_memory_tail())
{ {
//Contents clobbered, destroy this // Contents clobbered, destroy this
if (!tex.is_dirty()) if (!tex.is_dirty())
{ {
tex.set_dirty(true); tex.set_dirty(true);
@ -771,10 +757,10 @@ namespace rsx
} }
else else
{ {
//allow_flush = false and not synchronized // deferred_flush = true and not synchronized
if (!tex.is_dirty()) if (!tex.is_dirty())
{ {
AUDIT( tex.get_memory_read_flags() != memory_read_flags::flush_always ); AUDIT(tex.get_memory_read_flags() != memory_read_flags::flush_always);
tex.set_dirty(true); tex.set_dirty(true);
} }
@ -795,7 +781,7 @@ namespace rsx
const bool has_flushables = !result.sections_to_flush.empty(); const bool has_flushables = !result.sections_to_flush.empty();
const bool has_unprotectables = !result.sections_to_unprotect.empty(); const bool has_unprotectables = !result.sections_to_unprotect.empty();
if (cause.is_deferred() && has_flushables) if (cause.deferred_flush() && has_flushables)
{ {
// There is something to flush, but we've been asked to defer it // There is something to flush, but we've been asked to defer it
result.num_flushable = static_cast<int>(result.sections_to_flush.size()); result.num_flushable = static_cast<int>(result.sections_to_flush.size());
@ -804,24 +790,24 @@ namespace rsx
} }
else if (has_flushables || has_unprotectables) else if (has_flushables || has_unprotectables)
{ {
AUDIT(!has_flushables || cause.allow_flush()); AUDIT(!has_flushables || !cause.deferred_flush());
// We have something to flush and are allowed to flush now // We have something to flush and are allowed to flush now
// or there is nothing to flush but we have something to unprotect // or there is nothing to flush but we have something to unprotect
if (has_flushables) if (has_flushables && !cause.skip_flush())
{ {
flush_set(result, std::forward<Args>(extras)...); flush_set(result, std::forward<Args>(extras)...);
} }
unprotect_set(result); unprotect_set(result);
//Everything has been handled // Everything has been handled
result.clear_sections(); result.clear_sections();
} }
else else
{ {
// This is a read and all overlapping sections were RO and were excluded // This is a read and all overlapping sections were RO and were excluded
AUDIT(!cause.is_write() && !result.sections_to_exclude.empty()); AUDIT(cause.is_read() && !result.sections_to_exclude.empty());
// We did not handle this violation // We did not handle this violation
result.clear_sections(); result.clear_sections();
@ -981,8 +967,7 @@ namespace rsx
auto &block = m_storage.block_for(range); auto &block = m_storage.block_for(range);
section_storage_type *best_fit = nullptr; section_storage_type *best_fit = nullptr;
section_storage_type *first_dirty = nullptr; section_storage_type *reuse = nullptr;
section_storage_type *mismatch = nullptr;
#ifdef TEXTURE_CACHE_DEBUG #ifdef TEXTURE_CACHE_DEBUG
section_storage_type *res = nullptr; section_storage_type *res = nullptr;
#endif #endif
@ -1003,10 +988,6 @@ namespace rsx
res = &tex; res = &tex;
#endif #endif
} }
else
{
mismatch = &tex;
}
} }
else if (best_fit == nullptr && tex.can_be_reused()) else if (best_fit == nullptr && tex.can_be_reused())
{ {
@ -1014,9 +995,9 @@ namespace rsx
best_fit = &tex; best_fit = &tex;
} }
} }
else if (first_dirty == nullptr && tex.can_be_reused()) else if (reuse == nullptr && tex.can_be_reused())
{ {
first_dirty = &tex; reuse = &tex;
} }
} }
@ -1025,9 +1006,9 @@ namespace rsx
return res; return res;
#endif #endif
if (mismatch != nullptr) if (best_fit != nullptr)
{ {
auto &tex = *mismatch; auto &tex = *best_fit;
LOG_WARNING(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters (width=%d vs %d; height=%d vs %d; depth=%d vs %d; mipmaps=%d vs %d)", LOG_WARNING(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters (width=%d vs %d; height=%d vs %d; depth=%d vs %d; mipmaps=%d vs %d)",
range.start, width, tex.get_width(), height, tex.get_height(), depth, tex.get_depth(), mipmaps, tex.get_mipmaps()); range.start, width, tex.get_width(), height, tex.get_height(), depth, tex.get_depth(), mipmaps, tex.get_mipmaps());
} }
@ -1036,19 +1017,25 @@ namespace rsx
return nullptr; return nullptr;
// If found, use the best fitting section // If found, use the best fitting section
if (best_fit) if (best_fit != nullptr)
{ {
best_fit->destroy(); if (best_fit->exists())
{
best_fit->destroy();
}
return best_fit; return best_fit;
} }
// Return the first dirty section found, if any // Return the first dirty section found, if any
if (first_dirty != nullptr) if (reuse != nullptr)
{ {
first_dirty->destroy(); if (reuse->exists())
{
reuse->destroy();
}
return first_dirty; return reuse;
} }
// Create and return a new section // Create and return a new section
@ -1072,30 +1059,42 @@ namespace rsx
return nullptr; return nullptr;
} }
template <typename ...Args> template <typename ...FlushArgs, typename ...Args>
void lock_memory_region(image_storage_type* image, const address_range &rsx_range, u32 width, u32 height, u32 pitch, Args&&... extras) void lock_memory_region(image_storage_type* image, const address_range &rsx_range, u32 width, u32 height, u32 pitch, const std::tuple<FlushArgs...>& flush_extras, Args&&... extras)
{ {
AUDIT( g_cfg.video.write_color_buffers ); // this method is only called when WCB is enabled AUDIT(g_cfg.video.write_color_buffers); // this method is only called when WCB is enabled
std::lock_guard lock(m_cache_mutex); std::lock_guard lock(m_cache_mutex);
// Find a cached section to use
section_storage_type& region = *find_cached_texture(rsx_range, true, false); section_storage_type& region = *find_cached_texture(rsx_range, true, false);
if (region.get_context() != texture_upload_context::framebuffer_storage && region.exists()) if (!region.is_locked())
{ {
// Invalidate sections from surface cache occupying same address range
std::apply(&texture_cache::invalidate_range_impl_base<FlushArgs...>, std::tuple_cat(std::make_tuple(this, rsx_range, invalidation_cause::superseded_by_fbo), flush_extras));
}
// Prepare and initialize fbo region
if (region.exists() && region.get_context() != texture_upload_context::framebuffer_storage)
{
AUDIT(region.matches(rsx_range));
//This space was being used for other purposes other than framebuffer storage //This space was being used for other purposes other than framebuffer storage
//Delete used resources before attaching it to framebuffer memory //Delete used resources before attaching it to framebuffer memory
read_only_tex_invalidate = true; read_only_tex_invalidate = true;
// We are going to reprotect this section in a second, so discard it here // We are going to reprotect this section in a second, so discard it here
if (region.is_locked()) if (region.is_locked())
{
region.discard(); region.discard();
}
// Destroy the resources // Destroy the resources
region.destroy(); region.destroy();
} }
if (!region.is_locked()) if (!region.is_locked() || region.can_be_reused())
{ {
// New region, we must prepare it // New region, we must prepare it
region.reset(rsx_range); region.reset(rsx_range);
@ -1105,8 +1104,8 @@ namespace rsx
} }
else else
{ {
// Re-using locked fbo region // Re-using clean fbo region
AUDIT(region.matches(rsx_range)); ASSERT(region.matches(rsx_range));
ASSERT(region.get_context() == texture_upload_context::framebuffer_storage); ASSERT(region.get_context() == texture_upload_context::framebuffer_storage);
ASSERT(region.get_image_type() == rsx::texture_dimension_extended::texture_dimension_2d); ASSERT(region.get_image_type() == rsx::texture_dimension_extended::texture_dimension_2d);
} }
@ -1126,63 +1125,6 @@ namespace rsx
AUDIT(m_flush_always_cache.find(region.get_section_range()) != m_flush_always_cache.end()); AUDIT(m_flush_always_cache.find(region.get_section_range()) != m_flush_always_cache.end());
} }
// Test for invalidated sections from surface cache occupying same address range
const auto mem_range = rsx_range.to_page_range();
const auto &overlapped = find_texture_from_range(mem_range);
bool found_region = false;
for (auto* surface : overlapped)
{
if (surface == &region)
{
found_region = true;
continue;
}
if (surface->get_context() == rsx::texture_upload_context::framebuffer_storage)
{
if (surface->get_section_base() != rsx_range.start)
// HACK: preserve other overlapped sections despite overlap unless new section is superseding
// TODO: write memory to cell or redesign sections to preserve the data
// TODO ruipin: can this be done now?
continue;
}
// Memory is shared with another surface
// Discard it - the backend should ensure memory contents are preserved if needed
// TODO ruipin: This fails the protection checker. Refactor to use invalidate_range_impl_base
surface->set_dirty(true);
if (surface->is_locked())
{
AUDIT(surface->get_memory_read_flags() != memory_read_flags::flush_always);
if (surface->is_flushable() && surface->test_memory_head() && surface->test_memory_tail())
{
if (!surface->inside(rsx_range, rsx::section_bounds::full_range))
{
// TODO: This section contains data that should be flushed
LOG_TODO(RSX, "Flushable section 0x%x data may have been lost (lock_memory_region)", surface->get_section_base());
}
}
if (surface->inside(region, rsx::section_bounds::locked_range))
{
// Since this surface falls inside "region", we don't need to unlock and can simply discard
surface->discard();
}
else
{
// TODO: Exclude other NA/RO regions overlapping this one but not "region".
// Exclude region to avoid having the region's locked_range unprotected for a split second
const auto &srfc_rng = surface->get_section_range();
LOG_TODO(RSX, "Valid region data may have been incorrectly unprotected (0x%x-0x%x)", srfc_rng.start, srfc_rng.end);
surface->unprotect();
}
}
}
ASSERT(found_region);
// Delay protection until here in case the invalidation block above has unprotected pages in this range // Delay protection until here in case the invalidation block above has unprotected pages in this range
region.reprotect(utils::protection::no, { 0, rsx_range.length() }); region.reprotect(utils::protection::no, { 0, rsx_range.length() });
update_cache_tag(); update_cache_tag();
@ -1200,14 +1142,14 @@ namespace rsx
auto* region_ptr = find_cached_texture(memory_range, false, false); auto* region_ptr = find_cached_texture(memory_range, false, false);
if (region_ptr == nullptr) if (region_ptr == nullptr)
{ {
AUDIT( m_flush_always_cache.find(memory_range) == m_flush_always_cache.end() ); AUDIT(m_flush_always_cache.find(memory_range) == m_flush_always_cache.end());
LOG_ERROR(RSX, "set_memory_flags(0x%x, 0x%x, %d): region_ptr == nullptr"); LOG_ERROR(RSX, "set_memory_flags(0x%x, 0x%x, %d): region_ptr == nullptr", memory_range.start, memory_range.end, static_cast<u32>(flags));
return; return;
} }
auto& region = *region_ptr; auto& region = *region_ptr;
if (region.is_dirty() || !region.exists() || region.get_context() != texture_upload_context::framebuffer_storage) if (!region.exists() || region.is_dirty() || region.get_context() != texture_upload_context::framebuffer_storage)
{ {
#ifdef TEXTURE_CACHE_DEBUG #ifdef TEXTURE_CACHE_DEBUG
if (!region.is_dirty()) if (!region.is_dirty())
@ -1309,7 +1251,7 @@ namespace rsx
{ {
//Test before trying to acquire the lock //Test before trying to acquire the lock
const auto range = page_for(address); const auto range = page_for(address);
if (!region_intersects_cache(range, cause.is_write())) if (!region_intersects_cache(range, !cause.is_read()))
return{}; return{};
std::lock_guard lock(m_cache_mutex); std::lock_guard lock(m_cache_mutex);
@ -1320,7 +1262,7 @@ namespace rsx
thrashed_set invalidate_range(const address_range &range, invalidation_cause cause, Args&&... extras) thrashed_set invalidate_range(const address_range &range, invalidation_cause cause, Args&&... extras)
{ {
//Test before trying to acquire the lock //Test before trying to acquire the lock
if (!region_intersects_cache(range, cause.is_write())) if (!region_intersects_cache(range, !cause.is_read()))
return {}; return {};
std::lock_guard lock(m_cache_mutex); std::lock_guard lock(m_cache_mutex);
@ -1332,7 +1274,7 @@ namespace rsx
{ {
std::lock_guard lock(m_cache_mutex); std::lock_guard lock(m_cache_mutex);
AUDIT(data.cause.is_deferred()); AUDIT(data.cause.deferred_flush());
AUDIT(!data.flushed); AUDIT(!data.flushed);
if (m_cache_update_tag.load(std::memory_order_consume) == data.cache_tag) if (m_cache_update_tag.load(std::memory_order_consume) == data.cache_tag)
@ -2306,7 +2248,7 @@ namespace rsx
//TODO: Check for other types of format mismatch //TODO: Check for other types of format mismatch
const address_range dst_range = address_range::start_length(dst_address, dst.pitch * dst.height); const address_range dst_range = address_range::start_length(dst_address, dst.pitch * dst.height);
AUDIT( cached_dest == nullptr || cached_dest->overlaps(dst_range, section_bounds::full_range) ); AUDIT(cached_dest == nullptr || cached_dest->overlaps(dst_range, section_bounds::full_range));
if (format_mismatch) if (format_mismatch)
{ {
lock.upgrade(); lock.upgrade();
@ -2528,7 +2470,7 @@ namespace rsx
return m_storage.m_unreleased_texture_objects; return m_storage.m_unreleased_texture_objects;
} }
virtual const u32 get_texture_memory_in_use() const virtual const u64 get_texture_memory_in_use() const
{ {
return m_storage.m_texture_memory_in_use; return m_storage.m_texture_memory_in_use;
} }

View file

@ -47,57 +47,61 @@ namespace rsx
deferred_read, deferred_read,
write, write,
deferred_write, deferred_write,
unmap unmap, // fault range is being unmapped
reprotect, // we are going to reprotect the fault range
superseded_by_fbo // used by texture_cache::locked_memory_region
} cause; } cause;
bool valid() const constexpr bool valid() const
{ {
return cause != invalid; return cause != invalid;
} }
bool is_read() const constexpr bool is_read() const
{ {
AUDIT(valid()); AUDIT(valid());
return (cause == read || cause == deferred_read); return (cause == read || cause == deferred_read);
} }
bool is_write() const constexpr bool deferred_flush() const
{
AUDIT(valid());
return (cause == write || cause == deferred_write || cause == unmap);
}
bool is_deferred() const
{ {
AUDIT(valid()); AUDIT(valid());
return (cause == deferred_read || cause == deferred_write); return (cause == deferred_read || cause == deferred_write);
} }
bool allow_flush() const constexpr bool destroy_fault_range() const
{
return (cause == read || cause == write || cause == unmap);
}
bool exclude_fault_range() const
{ {
AUDIT(valid());
return (cause == unmap); return (cause == unmap);
} }
invalidation_cause undefer() const constexpr bool keep_fault_range_protection() const
{ {
AUDIT(is_deferred()); AUDIT(valid());
if (is_read()) return (cause == unmap || cause == reprotect || cause == superseded_by_fbo);
}
bool skip_flush() const
{
AUDIT(valid());
return (cause == unmap) || (!g_cfg.video.strict_texture_flushing && cause == superseded_by_fbo);
}
constexpr invalidation_cause undefer() const
{
AUDIT(deferred_flush());
if (cause == deferred_read)
return read; return read;
else if (is_write()) else if (cause == deferred_write)
return write; return write;
else else
fmt::throw_exception("Unreachable " HERE); fmt::throw_exception("Unreachable " HERE);
} }
invalidation_cause() : cause(invalid) {} constexpr invalidation_cause() : cause(invalid) {}
invalidation_cause(enum_type _cause) : cause(_cause) {} constexpr invalidation_cause(enum_type _cause) : cause(_cause) {}
operator enum_type&() { return cause; } operator enum_type&() { return cause; }
operator enum_type() const { return cause; } constexpr operator enum_type() const { return cause; }
}; };
struct typeless_xfer struct typeless_xfer
@ -417,6 +421,9 @@ namespace rsx
{ {
for (auto &section : *this) for (auto &section : *this)
{ {
if (section.is_locked())
section.unprotect();
section.destroy(); section.destroy();
} }
@ -580,7 +587,7 @@ namespace rsx
public: public:
std::atomic<u32> m_unreleased_texture_objects = { 0 }; //Number of invalidated objects not yet freed from memory std::atomic<u32> m_unreleased_texture_objects = { 0 }; //Number of invalidated objects not yet freed from memory
std::atomic<u32> m_texture_memory_in_use = { 0 }; std::atomic<u64> m_texture_memory_in_use = { 0 };
// Constructor // Constructor
ranged_storage(texture_cache_type *tex_cache) : ranged_storage(texture_cache_type *tex_cache) :
@ -716,8 +723,8 @@ namespace rsx
void on_section_resources_destroyed(const section_storage_type &section) void on_section_resources_destroyed(const section_storage_type &section)
{ {
u32 size = section.get_section_size(); u64 size = section.get_section_size();
u32 prev_size = m_texture_memory_in_use.fetch_sub(size); u64 prev_size = m_texture_memory_in_use.fetch_sub(size);
ASSERT(prev_size >= size); ASSERT(prev_size >= size);
} }
@ -1036,8 +1043,8 @@ namespace rsx
AUDIT(memory_range.valid()); AUDIT(memory_range.valid());
AUDIT(!is_locked()); AUDIT(!is_locked());
// Invalidate if necessary // Destroy if necessary
invalidate_range(); destroy();
// Superclass // Superclass
rsx::buffered_section::reset(memory_range); rsx::buffered_section::reset(memory_range);
@ -1083,10 +1090,6 @@ namespace rsx
*/ */
inline bool is_destroyed() const { return !exists(); } // this section is currently destroyed inline bool is_destroyed() const { return !exists(); } // this section is currently destroyed
inline bool can_destroy() const {
return !is_destroyed() && is_tracked();
} // This section may be destroyed
protected: protected:
void on_section_resources_created() void on_section_resources_created()
{ {
@ -1107,16 +1110,12 @@ namespace rsx
triggered_exists_callbacks = false; triggered_exists_callbacks = false;
AUDIT(valid_range()); AUDIT(valid_range());
ASSERT(!is_locked());
ASSERT(is_managed());
// Set dirty // Set dirty
set_dirty(true); set_dirty(true);
// Unlock
if (is_locked())
{
unprotect();
}
// Trigger callbacks // Trigger callbacks
m_block->on_section_resources_destroyed(*derived()); m_block->on_section_resources_destroyed(*derived());
m_storage->on_section_resources_destroyed(*derived()); m_storage->on_section_resources_destroyed(*derived());
@ -1204,14 +1203,9 @@ namespace rsx
/** /**
* Misc. * Misc.
*/ */
bool is_tracked() const
{
return !exists() || (get_context() != framebuffer_storage);
}
bool is_unreleased() const bool is_unreleased() const
{ {
return is_tracked() && exists() && is_dirty() && !is_locked(); return exists() && is_dirty() && !is_locked();
} }
bool can_be_reused() const bool can_be_reused() const
@ -1530,12 +1524,17 @@ namespace rsx
/** /**
* Derived wrappers * Derived wrappers
*/ */
inline void destroy() void destroy()
{ {
derived()->destroy(); derived()->destroy();
} }
inline bool exists() const bool is_managed() const
{
return derived()->is_managed();
}
bool exists() const
{ {
return derived()->exists(); return derived()->exists();
} }

View file

@ -390,7 +390,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
const utils::address_range surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]); const utils::address_range surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]);
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch, m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch,
color_format.format, color_format.type, color_format.swap_bytes); std::tuple<>{}, color_format.format, color_format.type, color_format.swap_bytes);
} }
} }
@ -401,7 +401,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format); const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format);
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]); const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch,
depth_format_gl.format, depth_format_gl.type, true); std::tuple<>{}, depth_format_gl.format, depth_format_gl.type, true);
} }
} }

View file

@ -253,18 +253,12 @@ namespace gl
public: public:
using baseclass::cached_texture_section; using baseclass::cached_texture_section;
void reset(const utils::address_range &memory_range)
{
vram_texture = nullptr;
managed_texture.reset();
baseclass::reset(memory_range);
}
void create(u16 w, u16 h, u16 depth, u16 mipmaps, gl::texture* image, u32 rsx_pitch, bool read_only, void create(u16 w, u16 h, u16 depth, u16 mipmaps, gl::texture* image, u32 rsx_pitch, bool read_only,
gl::texture::format gl_format, gl::texture::type gl_type, bool swap_bytes) gl::texture::format gl_format, gl::texture::type gl_type, bool swap_bytes)
{ {
vram_texture = static_cast<gl::viewable_image*>(image); auto new_texture = static_cast<gl::viewable_image*>(image);
ASSERT(!exists() || !is_managed() || vram_texture == new_texture);
vram_texture = new_texture;
if (read_only) if (read_only)
{ {
@ -277,6 +271,7 @@ namespace gl
init_buffer(); init_buffer();
aa_mode = static_cast<gl::render_target*>(image)->read_aa_mode; aa_mode = static_cast<gl::render_target*>(image)->read_aa_mode;
ASSERT(managed_texture.get() == nullptr);
} }
flushed = false; flushed = false;
@ -302,6 +297,8 @@ namespace gl
void create_read_only(gl::viewable_image* image, u32 width, u32 height, u32 depth, u32 mipmaps) void create_read_only(gl::viewable_image* image, u32 width, u32 height, u32 depth, u32 mipmaps)
{ {
ASSERT(!exists() || !is_managed() || vram_texture == image);
//Only to be used for ro memory, we dont care about most members, just dimensions and the vram texture handle //Only to be used for ro memory, we dont care about most members, just dimensions and the vram texture handle
this->width = width; this->width = width;
this->height = height; this->height = height;
@ -353,6 +350,8 @@ namespace gl
void copy_texture(bool=false) void copy_texture(bool=false)
{ {
ASSERT(exists());
if (!pbo_id) if (!pbo_id)
{ {
init_buffer(); init_buffer();
@ -466,8 +465,10 @@ namespace gl
bool flush() bool flush()
{ {
ASSERT(exists());
if (flushed) return true; //Already written, ignore if (flushed) return true; //Already written, ignore
AUDIT( is_locked() ); AUDIT(is_locked());
bool result = true; bool result = true;
if (!synchronized) if (!synchronized)
@ -493,7 +494,7 @@ namespace gl
const auto valid_range = get_confirmed_range_delta(); const auto valid_range = get_confirmed_range_delta();
const u32 valid_offset = valid_range.first; const u32 valid_offset = valid_range.first;
const u32 valid_length = valid_range.second; const u32 valid_length = valid_range.second;
AUDIT( valid_length > 0 ); AUDIT(valid_length > 0);
void *dst = get_ptr(get_section_base() + valid_offset); void *dst = get_ptr(get_section_base() + valid_offset);
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
@ -624,11 +625,16 @@ namespace gl
baseclass::on_section_resources_destroyed(); baseclass::on_section_resources_destroyed();
} }
inline bool exists() const bool exists() const
{ {
return (vram_texture != nullptr); return (vram_texture != nullptr);
} }
bool is_managed() const
{
return !exists() || managed_texture.get() != nullptr;
}
texture::format get_format() const texture::format get_format() const
{ {
return format; return format;
@ -951,7 +957,7 @@ namespace gl
if (context != rsx::texture_upload_context::blit_engine_dst) if (context != rsx::texture_upload_context::blit_engine_dst)
{ {
AUDIT( cached.get_memory_read_flags() != rsx::memory_read_flags::flush_always ); AUDIT(cached.get_memory_read_flags() != rsx::memory_read_flags::flush_always);
read_only_range = cached.get_min_max(read_only_range, rsx::section_bounds::locked_range); // TODO ruipin: This was outside the if, but is inside the if in Vulkan. Ask kd-11 read_only_range = cached.get_min_max(read_only_range, rsx::section_bounds::locked_range); // TODO ruipin: This was outside the if, but is inside the if in Vulkan. Ask kd-11
cached.protect(utils::protection::ro); cached.protect(utils::protection::ro);
} }

View file

@ -2921,7 +2921,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]); const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]);
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range, m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range,
m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], color_fmt_info.first, color_fmt_info.second); m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], std::tuple<vk::command_buffer&, VkQueue>{ *m_current_command_buffer, m_swapchain->get_graphics_queue() }, color_fmt_info.first, color_fmt_info.second);
} }
} }
@ -2932,7 +2932,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16)? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8; const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16)? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]); const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), surface_range,
m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, gcm_format, false); m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, std::tuple<vk::command_buffer&, VkQueue>{ *m_current_command_buffer, m_swapchain->get_graphics_queue() }, gcm_format, false);
} }
} }

View file

@ -28,16 +28,12 @@ namespace vk
public: public:
using baseclass::cached_texture_section; using baseclass::cached_texture_section;
void reset(const utils::address_range &memory_range)
{
if (memory_range.length() > get_section_size())
release_dma_resources();
baseclass::reset(memory_range);
}
void create(u16 w, u16 h, u16 depth, u16 mipmaps, vk::image *image, u32 rsx_pitch, bool managed, u32 gcm_format, bool pack_swap_bytes = false) void create(u16 w, u16 h, u16 depth, u16 mipmaps, vk::image *image, u32 rsx_pitch, bool managed, u32 gcm_format, bool pack_swap_bytes = false)
{ {
auto new_texture = static_cast<vk::viewable_image*>(image);
ASSERT(!exists() || !is_managed() || vram_texture == new_texture);
vram_texture = new_texture;
width = w; width = w;
height = h; height = h;
this->depth = depth; this->depth = depth;
@ -46,8 +42,6 @@ namespace vk
this->gcm_format = gcm_format; this->gcm_format = gcm_format;
this->pack_unpack_swap_bytes = pack_swap_bytes; this->pack_unpack_swap_bytes = pack_swap_bytes;
vram_texture = static_cast<vk::viewable_image*>(image);
if (managed) if (managed)
{ {
managed_texture.reset(vram_texture); managed_texture.reset(vram_texture);
@ -85,18 +79,27 @@ namespace vk
void destroy() void destroy()
{ {
if (!exists())
return;
m_tex_cache->on_section_destroyed(*this); m_tex_cache->on_section_destroyed(*this);
vram_texture = nullptr; vram_texture = nullptr;
ASSERT(managed_texture.get() == nullptr);
release_dma_resources(); release_dma_resources();
baseclass::on_section_resources_destroyed(); baseclass::on_section_resources_destroyed();
} }
inline bool exists() const bool exists() const
{ {
return (vram_texture != nullptr); return (vram_texture != nullptr);
} }
bool is_managed() const
{
return !exists() || managed_texture.get() != nullptr;
}
vk::image_view* get_view(u32 remap_encoding, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap) vk::image_view* get_view(u32 remap_encoding, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap)
{ {
return vram_texture->get_view(remap_encoding, remap); return vram_texture->get_view(remap_encoding, remap);
@ -130,6 +133,8 @@ namespace vk
void copy_texture(bool manage_cb_lifetime, vk::command_buffer& cmd, VkQueue submit_queue) void copy_texture(bool manage_cb_lifetime, vk::command_buffer& cmd, VkQueue submit_queue)
{ {
ASSERT(exists());
if (m_device == nullptr) if (m_device == nullptr)
{ {
m_device = &cmd.get_command_pool().get_owner(); m_device = &cmd.get_command_pool().get_owner();
@ -282,8 +287,10 @@ namespace vk
bool flush(vk::command_buffer& cmd, VkQueue submit_queue) bool flush(vk::command_buffer& cmd, VkQueue submit_queue)
{ {
ASSERT(exists());
if (flushed) return true; if (flushed) return true;
AUDIT( is_locked() ); AUDIT(is_locked());
if (m_device == nullptr) if (m_device == nullptr)
{ {
@ -306,7 +313,7 @@ namespace vk
const auto valid_range = get_confirmed_range_delta(); const auto valid_range = get_confirmed_range_delta();
const u32 valid_offset = valid_range.first; const u32 valid_offset = valid_range.first;
const u32 valid_length = valid_range.second; const u32 valid_length = valid_range.second;
AUDIT( valid_length > 0 ); AUDIT(valid_length > 0);
void* pixels_src = dma_buffer->map(valid_offset, valid_length); void* pixels_src = dma_buffer->map(valid_offset, valid_length);
void* pixels_dst = get_ptr(get_section_base() + valid_offset); void* pixels_dst = get_ptr(get_section_base() + valid_offset);
@ -398,7 +405,7 @@ namespace vk
view = std::move(_view); view = std::move(_view);
} }
discarded_storage(cached_texture_section& tex) discarded_storage(vk::cached_texture_section& tex)
{ {
combined_image = std::move(tex.get_texture()); combined_image = std::move(tex.get_texture());
block_size = tex.get_section_size(); block_size = tex.get_section_size();
@ -415,8 +422,11 @@ namespace vk
public: public:
virtual void on_section_destroyed(cached_texture_section& tex) virtual void on_section_destroyed(cached_texture_section& tex)
{ {
m_discarded_memory_size += tex.get_section_size(); if (tex.is_managed())
m_discardable_storage.push_back(tex); {
m_discarded_memory_size += tex.get_section_size();
m_discardable_storage.push_back(tex);
}
} }
private: private:
@ -1233,7 +1243,7 @@ namespace vk
return m_storage.m_unreleased_texture_objects + (u32)m_discardable_storage.size(); return m_storage.m_unreleased_texture_objects + (u32)m_discardable_storage.size();
} }
const u32 get_texture_memory_in_use() const override const u64 get_texture_memory_in_use() const override
{ {
return m_storage.m_texture_memory_in_use; return m_storage.m_texture_memory_in_use;
} }

View file

@ -437,6 +437,7 @@ struct cfg_root : cfg::node
cfg::_bool disable_vulkan_mem_allocator{this, "Disable Vulkan Memory Allocator", false}; cfg::_bool disable_vulkan_mem_allocator{this, "Disable Vulkan Memory Allocator", false};
cfg::_bool full_rgb_range_output{this, "Use full RGB output range", true}; // Video out dynamic range cfg::_bool full_rgb_range_output{this, "Use full RGB output range", true}; // Video out dynamic range
cfg::_bool disable_asynchronous_shader_compiler{this, "Disable Asynchronous Shader Compiler", false}; cfg::_bool disable_asynchronous_shader_compiler{this, "Disable Asynchronous Shader Compiler", false};
cfg::_bool strict_texture_flushing{this, "Strict Texture Flushing", false};
cfg::_int<1, 8> consequtive_frames_to_draw{this, "Consecutive Frames To Draw", 1}; cfg::_int<1, 8> consequtive_frames_to_draw{this, "Consecutive Frames To Draw", 1};
cfg::_int<1, 8> consequtive_frames_to_skip{this, "Consecutive Frames To Skip", 1}; cfg::_int<1, 8> consequtive_frames_to_skip{this, "Consecutive Frames To Skip", 1};
cfg::_int<50, 800> resolution_scale_percent{this, "Resolution Scale", 100}; cfg::_int<50, 800> resolution_scale_percent{this, "Resolution Scale", 100};