From e3944bc67fdd6ae542aae4b2fec75441f7f804a5 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 8 May 2021 19:08:32 +0300 Subject: [PATCH] rsx: Handle transfer_read differently from transfer_write - Transfer writes are expected to clobber surface cache contents. Do NOT reload from CPU memory for writes. - TODO: During transfer write to surface cache objects, lock memory if it was unlocked to avoid silly problems. --- rpcs3/Emu/RSX/Common/TextureUtils.h | 56 ++++++++++++++++++-- rpcs3/Emu/RSX/Common/surface_store.h | 7 ++- rpcs3/Emu/RSX/Common/surface_utils.h | 2 +- rpcs3/Emu/RSX/Common/texture_cache.h | 29 +++++----- rpcs3/Emu/RSX/Common/texture_cache_helpers.h | 8 +-- rpcs3/Emu/RSX/GL/GLPresent.cpp | 4 +- rpcs3/Emu/RSX/GL/GLRenderTargets.cpp | 16 +++++- rpcs3/Emu/RSX/GL/GLRenderTargets.h | 4 +- rpcs3/Emu/RSX/VK/VKPresent.cpp | 4 +- rpcs3/Emu/RSX/VK/VKRenderTargets.cpp | 8 +-- rpcs3/Emu/RSX/VK/VKRenderTargets.h | 4 +- rpcs3/Emu/RSX/VK/VKTextureCache.h | 2 +- 12 files changed, 106 insertions(+), 38 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index 29727b28f9..41b1a6b183 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -36,11 +36,59 @@ namespace rsx bytes = 2 }; - enum surface_access : u32 + class surface_access // This is simply a modified enum class { - read = 0, - write = 1, - transfer = 2 + public: + // Publicly visible enumerators + enum + { + shader_read = 0, + shader_write = 1, + transfer_read = 2, + transfer_write = 4, + }; + + private: + // Meta + enum + { + all_writes = (shader_write | transfer_write), + all_reads = (shader_read | transfer_read), + all_transfer = (transfer_read | transfer_write) + }; + + u32 value_; + + public: + // Ctor + surface_access(u32 value) : value_(value) + {} + + // Quick helpers + inline bool is_read() const + { + return !(value_ & ~all_reads); + } + + inline bool is_write() const + { + return !(value_ & ~all_writes); + } + + inline bool is_transfer() const + { + return !(value_ & ~all_transfer); + } + + bool operator == (const surface_access& other) const + { + return value_ == other.value_; + } + + bool operator == (u32 other) const + { + return value_ == other; + } }; // Defines how the underlying PS3-visible memory backed by a texture is accessed diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 285451dfc4..5e942bcc61 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -837,8 +837,11 @@ namespace rsx continue; auto surface = tex_info.second.get(); - if (access == rsx::surface_access::transfer && surface->write_through()) + if (access.is_transfer() && access.is_read() && surface->write_through()) + { + // The surface has no data other than what can be loaded from CPU continue; + } if (!rsx::pitch_compatible(surface, required_pitch, required_height)) continue; @@ -1128,7 +1131,7 @@ namespace rsx if (surface->dirty()) { // Force memory barrier to release some resources - surface->memory_barrier(cmd, rsx::surface_access::read); + surface->memory_barrier(cmd, rsx::surface_access::shader_read); } else if (!surface->test()) { diff --git a/rpcs3/Emu/RSX/Common/surface_utils.h b/rpcs3/Emu/RSX/Common/surface_utils.h index be6318f351..7e4e04503e 100644 --- a/rpcs3/Emu/RSX/Common/surface_utils.h +++ b/rpcs3/Emu/RSX/Common/surface_utils.h @@ -623,7 +623,7 @@ namespace rsx if (spp == 1 || sample_layout == rsx::surface_sample_layout::ps3) return; - ensure(access_type != rsx::surface_access::write); + ensure(access_type.is_read() || access_type.is_transfer()); transform_samples_to_pixels(region); } }; diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 32234b4581..6ba428fb0c 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -1630,7 +1630,7 @@ namespace rsx if (options.prefer_surface_cache) { const u16 block_h = (attr.depth * attr.slice_h); - overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::read); + overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read); if (!overlapping_fbos.empty()) { @@ -1695,7 +1695,7 @@ namespace rsx { // Now check for surface cache hits const u16 block_h = (attr.depth * attr.slice_h); - overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::read); + overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read); } if (!overlapping_fbos.empty() || !overlapping_locals.empty()) @@ -2171,9 +2171,9 @@ namespace rsx src_address += (src.width - src_w) * src_bpp; } - auto rtt_lookup = [&m_rtts, &cmd, &scale_x, &scale_y, this](u32 address, u32 width, u32 height, u32 pitch, u8 bpp, bool allow_clipped) -> typename surface_store_type::surface_overlap_info + auto rtt_lookup = [&m_rtts, &cmd, &scale_x, &scale_y, this](u32 address, u32 width, u32 height, u32 pitch, u8 bpp, rsx::flags32_t access, bool allow_clipped) -> typename surface_store_type::surface_overlap_info { - const auto list = m_rtts.get_merged_texture_memory_region(cmd, address, width, height, pitch, bpp, rsx::surface_access::transfer); + const auto list = m_rtts.get_merged_texture_memory_region(cmd, address, width, height, pitch, bpp, access); if (list.empty()) { return {}; @@ -2256,11 +2256,18 @@ namespace rsx // Check if src/dst are parts of render targets typename surface_store_type::surface_overlap_info dst_subres; bool use_null_region = false; + + // TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate + // NOTE: Grab the src first as requirements for reading are more strict than requirements for writing + auto src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, src_bpp, surface_access::transfer_read, false); + src_is_render_target = src_subres.surface != nullptr; + + if (get_location(dst_address) == CELL_GCM_LOCATION_LOCAL) { // TODO: HACK // After writing, it is required to lock the memory range from access! - dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, dst_bpp, false); + dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, dst_bpp, surface_access::transfer_write, false); dst_is_render_target = dst_subres.surface != nullptr; } else @@ -2272,10 +2279,6 @@ namespace rsx m_rtts.invalidate_range(utils::address_range::start_length(dst_address, dst.pitch* dst_h)); } - // TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate - auto src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, src_bpp, false); - src_is_render_target = src_subres.surface != nullptr; - if (src_is_render_target) { const auto surf = src_subres.surface; @@ -2543,7 +2546,7 @@ namespace rsx // Destination dimensions are relaxed (true) dst_area = dst_subres.src_area; - dest_texture = dst_subres.surface->get_surface(rsx::surface_access::transfer); + dest_texture = dst_subres.surface->get_surface(rsx::surface_access::transfer_write); typeless_info.dst_context = texture_upload_context::framebuffer_storage; dst_is_depth_surface = typeless_info.dst_is_typeless ? false : dst_subres.is_depth; @@ -2692,7 +2695,7 @@ namespace rsx else { src_area = src_subres.src_area; - vram_texture = src_subres.surface->get_surface(rsx::surface_access::read); + vram_texture = src_subres.surface->get_surface(rsx::surface_access::transfer_read); typeless_info.src_context = texture_upload_context::framebuffer_storage; } @@ -2879,7 +2882,7 @@ namespace rsx std::tie(src_area.x2, src_area.y2) = rsx::apply_resolution_scale(src_area.x2, src_area.y2, surface_width, surface_height); // The resource is of surface type; possibly disabled AA emulation - src_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer, src_area); + src_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer_read, src_area); } if (dst_is_render_target) @@ -2890,7 +2893,7 @@ namespace rsx std::tie(dst_area.x2, dst_area.y2) = rsx::apply_resolution_scale(dst_area.x2, dst_area.y2, surface_width, surface_height); // The resource is of surface type; possibly disabled AA emulation - dst_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer, dst_area); + dst_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer_write, dst_area); } if (helpers::is_gcm_depth_format(typeless_info.src_gcm_format) != diff --git a/rpcs3/Emu/RSX/Common/texture_cache_helpers.h b/rpcs3/Emu/RSX/Common/texture_cache_helpers.h index a97b0fcbda..636fb989d4 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache_helpers.h +++ b/rpcs3/Emu/RSX/Common/texture_cache_helpers.h @@ -317,7 +317,7 @@ namespace rsx out.push_back ({ - section.surface->get_surface(rsx::surface_access::read), + section.surface->get_surface(rsx::surface_access::shader_read), surface_transform::identity, 0, static_cast(src_x), @@ -558,7 +558,7 @@ namespace rsx const auto format_class = (force_convert) ? classify_format(attr2.gcm_format) : texptr->format_class(); const auto command = surface_is_rop_target ? deferred_request_command::copy_image_dynamic : deferred_request_command::copy_image_static; - return { texptr->get_surface(rsx::surface_access::read), command, attr2, {}, + return { texptr->get_surface(rsx::surface_access::shader_read), command, attr2, {}, texture_upload_context::framebuffer_storage, format_class, scale, extended_dimension, decoded_remap }; } @@ -569,7 +569,7 @@ namespace rsx if (extended_dimension == rsx::texture_dimension_extended::texture_dimension_3d) { - return{ texptr->get_surface(rsx::surface_access::read), deferred_request_command::_3d_unwrap, + return{ texptr->get_surface(rsx::surface_access::shader_read), deferred_request_command::_3d_unwrap, attr2, {}, texture_upload_context::framebuffer_storage, texptr->format_class(), scale, rsx::texture_dimension_extended::texture_dimension_3d, decoded_remap }; @@ -577,7 +577,7 @@ namespace rsx ensure(extended_dimension == rsx::texture_dimension_extended::texture_dimension_cubemap); - return{ texptr->get_surface(rsx::surface_access::read), deferred_request_command::cubemap_unwrap, + return{ texptr->get_surface(rsx::surface_access::shader_read), deferred_request_command::cubemap_unwrap, attr2, {}, texture_upload_context::framebuffer_storage, texptr->format_class(), scale, rsx::texture_dimension_extended::texture_dimension_cubemap, decoded_remap }; diff --git a/rpcs3/Emu/RSX/GL/GLPresent.cpp b/rpcs3/Emu/RSX/GL/GLPresent.cpp index 3b73f0dd06..6c64e2ece8 100644 --- a/rpcs3/Emu/RSX/GL/GLPresent.cpp +++ b/rpcs3/Emu/RSX/GL/GLPresent.cpp @@ -12,7 +12,7 @@ gl::texture* GLGSRender::get_present_source(gl::present_surface_info* info, cons gl::command_context cmd = { gl_state }; const auto format_bpp = rsx::get_format_block_size_in_bytes(info->format); const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, - info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::read); + info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::shader_read); if (!overlap_info.empty()) { @@ -46,7 +46,7 @@ gl::texture* GLGSRender::get_present_source(gl::present_surface_info* info, cons if (viable) { surface->read_barrier(cmd); - image = section.surface->get_surface(rsx::surface_access::read); + image = section.surface->get_surface(rsx::surface_access::shader_read); std::tie(info->width, info->height) = rsx::apply_resolution_scale( std::min(surface_width, static_cast(info->width)), diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 334ce8e436..605319538e 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -456,7 +456,21 @@ void gl::render_target::initialize_memory(gl::command_context& cmd, bool /*read_ void gl::render_target::memory_barrier(gl::command_context& cmd, rsx::surface_access access) { - const bool read_access = (access != rsx::surface_access::write); + const bool read_access = access.is_read(); + const bool is_depth = is_depth_surface(); + const bool should_read_buffers = is_depth ? !!g_cfg.video.read_depth_buffer : !!g_cfg.video.read_color_buffers; + + if (should_read_buffers) + { + // TODO: Decide what to do when memory loads are disabled but the underlying has memory changed + // NOTE: Assume test() is expensive when in a pinch + if (last_use_tag && state_flags == rsx::surface_state_flags::ready && !test()) + { + // TODO: Figure out why merely returning and failing the test does not work when reading (TLoU) + // The result should have been the same either way + state_flags |= rsx::surface_state_flags::erase_bkgnd; + } + } if (old_contents.empty()) { diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index a12dd2efee..c5315bda6a 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -111,8 +111,8 @@ namespace gl } void memory_barrier(gl::command_context& cmd, rsx::surface_access access); - void read_barrier(gl::command_context& cmd) { memory_barrier(cmd, rsx::surface_access::read); } - void write_barrier(gl::command_context& cmd) { memory_barrier(cmd, rsx::surface_access::write); } + void read_barrier(gl::command_context& cmd) { memory_barrier(cmd, rsx::surface_access::shader_read); } + void write_barrier(gl::command_context& cmd) { memory_barrier(cmd, rsx::surface_access::shader_write); } }; struct framebuffer_holder : public gl::fbo, public rsx::ref_counted diff --git a/rpcs3/Emu/RSX/VK/VKPresent.cpp b/rpcs3/Emu/RSX/VK/VKPresent.cpp index 059a89f93c..4635afb1a6 100644 --- a/rpcs3/Emu/RSX/VK/VKPresent.cpp +++ b/rpcs3/Emu/RSX/VK/VKPresent.cpp @@ -281,7 +281,7 @@ vk::image* VKGSRender::get_present_source(vk::present_surface_info* info, const // Check the surface store first const auto format_bpp = rsx::get_format_block_size_in_bytes(info->format); const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, - info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::read); + info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::shader_read); if (!overlap_info.empty()) { @@ -315,7 +315,7 @@ vk::image* VKGSRender::get_present_source(vk::present_surface_info* info, const if (viable) { surface->read_barrier(*m_current_command_buffer); - image_to_flip = section.surface->get_surface(rsx::surface_access::read); + image_to_flip = section.surface->get_surface(rsx::surface_access::shader_read); std::tie(info->width, info->height) = rsx::apply_resolution_scale( std::min(surface_width, static_cast(info->width)), diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.cpp b/rpcs3/Emu/RSX/VK/VKRenderTargets.cpp index 4407083020..3a8ef45787 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.cpp +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.cpp @@ -304,7 +304,7 @@ namespace vk vk::viewable_image* render_target::get_surface(rsx::surface_access access_type) { - if (samples() == 1 || access_type == rsx::surface_access::write) + if (samples() == 1 || access_type == rsx::surface_access::shader_write) { return this; } @@ -369,7 +369,7 @@ namespace vk void render_target::memory_barrier(vk::command_buffer& cmd, rsx::surface_access access) { - const bool read_access = (access != rsx::surface_access::write); + const bool read_access = access.is_read(); const bool is_depth = is_depth_surface(); const bool should_read_buffers = is_depth ? !!g_cfg.video.read_depth_buffer : !!g_cfg.video.read_color_buffers; @@ -533,8 +533,8 @@ namespace vk hw_blitter.scale_image( cmd, - src_texture->get_surface(rsx::surface_access::read), - this->get_surface(rsx::surface_access::transfer), + src_texture->get_surface(rsx::surface_access::transfer_read), + this->get_surface(rsx::surface_access::transfer_write), src_area, dst_area, /*linear?*/false, typeless_info); diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index ff86380f8c..5a5a227cda 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -53,8 +53,8 @@ namespace vk // Synchronization void texture_barrier(vk::command_buffer& cmd); void memory_barrier(vk::command_buffer& cmd, rsx::surface_access access); - void read_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::read); } - void write_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::write); } + void read_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::shader_read); } + void write_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::shader_write); } }; static inline vk::render_target* as_rtt(vk::image* t) diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index ba8ee54ccc..41b81467e2 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -197,7 +197,7 @@ namespace vk { auto surface = vk::as_rtt(vram_texture); surface->read_barrier(cmd); - locked_resource = surface->get_surface(rsx::surface_access::read); + locked_resource = surface->get_surface(rsx::surface_access::shader_read); transfer_width *= surface->samples_x; transfer_height *= surface->samples_y; }