diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index eeb7b2372a..5d42a0a398 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -109,18 +109,24 @@ namespace rsx std::vector data; //Stored data std::atomic_int valid_count = { 0 }; //Number of usable (non-dirty) blocks u32 max_range = 0; //Largest stored block + u32 max_addr = 0; + u32 min_addr = UINT32_MAX; - void notify(u32 data_size) + void notify(u32 addr, u32 data_size) { verify(HERE), valid_count >= 0; max_range = std::max(data_size, max_range); + max_addr = std::max(max_addr, addr); + min_addr = std::min(min_addr, addr); valid_count++; } - void add(section_storage_type& section, u32 data_size) + void add(section_storage_type& section, u32 addr, u32 data_size) { verify(HERE), valid_count >= 0; max_range = std::max(data_size, max_range); + max_addr = std::max(max_addr, addr); + min_addr = std::min(min_addr, addr); valid_count++; data.push_back(std::move(section)); @@ -141,7 +147,6 @@ namespace rsx texture_format format; }; - std::atomic_bool in_access_violation_handler = { false }; shared_mutex m_cache_mutex; std::unordered_map m_cache; @@ -149,6 +154,9 @@ namespace rsx std::pair no_access_range = std::make_pair(0xFFFFFFFF, 0); std::unordered_map m_cache_miss_statistics_table; + + //Set when a hw blit engine incompatibility is detected + bool blit_engine_incompatibility_warning_raised = false; //Memory usage const s32 m_max_zombie_objects = 128; //Limit on how many texture objects to keep around for reuse after they are invalidated @@ -165,12 +173,15 @@ namespace rsx virtual void enforce_surface_creation_type(section_storage_type& section, const texture_create_flags expected) = 0; virtual void insert_texture_barrier() = 0; + constexpr u32 get_block_size() const { return 0x1000000; } + inline u32 get_block_address(u32 address) const { return (address & ~0xFFFFFF); } + private: //Internal implementation methods bool invalidate_range_impl(u32 address, u32 range, bool unprotect) { bool response = false; - u32 last_dirty_block = 0; + u32 last_dirty_block = UINT32_MAX; std::pair trampled_range = std::make_pair(address, address + range); for (auto It = m_cache.begin(); It != m_cache.end(); It++) @@ -185,7 +196,7 @@ namespace rsx if (trampled_range.first < trampled_range.second) { //Only if a valid range, ignore empty sets - if (trampled_range.first >= (base + range_data.max_range + get_block_size()) || base >= trampled_range.second) + if (trampled_range.first >= (range_data.max_addr + range_data.max_range) || range_data.min_addr >= trampled_range.second) continue; } @@ -239,7 +250,7 @@ namespace rsx bool flush_address_impl(u32 address, Args&&... extras) { bool response = false; - u32 last_dirty_block = 0; + u32 last_dirty_block = UINT32_MAX; std::pair trampled_range = std::make_pair(0xffffffff, 0x0); std::vector sections_to_flush; @@ -255,7 +266,7 @@ namespace rsx if (trampled_range.first < trampled_range.second) { //Only if a valid range, ignore empty sets - if (trampled_range.first >= (base + range_data.max_range + get_block_size()) || base >= trampled_range.second) + if (trampled_range.first >= (range_data.max_addr + range_data.max_range) || range_data.min_addr >= trampled_range.second) continue; } @@ -307,8 +318,19 @@ namespace rsx return response; } - constexpr u32 get_block_size() const { return 0x1000000; } - inline u32 get_block_address(u32 address) const { return (address & ~0xFFFFFF); } + bool is_hw_blit_engine_compatible(const u32 format) const + { + switch (format) + { + case CELL_GCM_TEXTURE_A8R8G8B8: + case CELL_GCM_TEXTURE_R5G6B5: + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH24_D8: + return true; + default: + return false; + } + } public: @@ -316,7 +338,7 @@ namespace rsx ~texture_cache() {} virtual void destroy() = 0; - virtual bool is_depth_texture(const u32) = 0; + virtual bool is_depth_texture(const u32, const u32) = 0; virtual void on_frame_end() = 0; std::vector find_texture_from_range(u32 rsx_address, u32 range) @@ -372,10 +394,13 @@ namespace rsx { if (tex.matches(rsx_address, rsx_size) && !tex.is_dirty()) { - if (!confirm_dimensions) return tex; + if (!confirm_dimensions || tex.matches(rsx_address, width, height, mipmaps)) + { + if (!tex.is_locked()) + range_data.notify(rsx_address, rsx_size); - if (tex.matches(rsx_address, width, height, mipmaps)) return tex; + } else { LOG_ERROR(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters.", rsx_address); @@ -394,14 +419,14 @@ namespace rsx free_texture_section(tex); } - range_data.notify(rsx_size); + range_data.notify(rsx_address, rsx_size); return tex; } } } section_storage_type tmp; - m_cache[block_address].add(tmp, rsx_size); + m_cache[block_address].add(tmp, rsx_address, rsx_size); return m_cache[block_address].data.back(); } @@ -483,7 +508,7 @@ namespace rsx address > no_access_range.second) return std::make_tuple(false, nullptr); - rsx::conditional_lock lock(in_access_violation_handler, m_cache_mutex); + reader_lock lock(m_cache_mutex); auto found = m_cache.find(get_block_address(address)); if (found != m_cache.end()) @@ -494,7 +519,7 @@ namespace rsx if (tex.is_dirty()) continue; if (!tex.is_flushable()) continue; - if (tex.overlaps(address)) + if (tex.overlaps(address, false)) return std::make_tuple(true, &tex); } } @@ -518,7 +543,7 @@ namespace rsx if (tex.is_dirty()) continue; if (!tex.is_flushable()) continue; - if (tex.overlaps(address)) + if (tex.overlaps(address, false)) return std::make_tuple(true, &tex); } } @@ -533,7 +558,7 @@ namespace rsx address > no_access_range.second) return false; - rsx::conditional_lock lock(in_access_violation_handler, m_cache_mutex); + writer_lock lock(m_cache_mutex); return flush_address_impl(address, std::forward(extras)...); } @@ -555,7 +580,7 @@ namespace rsx return false; } - rsx::conditional_lock lock(in_access_violation_handler, m_cache_mutex); + writer_lock lock(m_cache_mutex); return invalidate_range_impl(address, range, unprotect); } @@ -651,17 +676,16 @@ namespace rsx image_view_type upload_texture(commandbuffer_type& cmd, RsxTextureType& tex, surface_store_type& m_rtts) { const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); - const u32 range = (u32)get_texture_size(tex); + const u32 tex_size = (u32)get_texture_size(tex); const u32 format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); const u32 tex_width = tex.width(); const u32 tex_height = tex.height(); - const u32 native_pitch = (tex_width * get_format_block_size_in_bytes(format)); - const u32 tex_pitch = (tex.pitch() == 0) ? native_pitch : tex.pitch(); + const u32 tex_pitch = (tex_size / tex_height); //NOTE: Compressed textures dont have a real pitch (tex_size = (w*h)/6) - if (!texaddr || !range) + if (!texaddr || !tex_size) { - LOG_ERROR(RSX, "Texture upload requested but texture not found, (address=0x%X, size=0x%X)", texaddr, range); + LOG_ERROR(RSX, "Texture upload requested but texture not found, (address=0x%X, size=0x%X)", texaddr, tex_size); return 0; } @@ -718,30 +742,42 @@ namespace rsx return cached_texture->get_raw_view(); } - //Find based on range instead - auto overlapping_surfaces = find_texture_from_range(texaddr, tex_pitch * tex_height); - if (!overlapping_surfaces.empty()) + if ((!blit_engine_incompatibility_warning_raised && g_cfg.video.use_gpu_texture_scaling) || is_hw_blit_engine_compatible(format)) { - for (auto surface : overlapping_surfaces) + //Find based on range instead + auto overlapping_surfaces = find_texture_from_range(texaddr, tex_size); + if (!overlapping_surfaces.empty()) { - if (surface->get_context() != rsx::texture_upload_context::blit_engine_dst) - continue; - - if (surface->get_width() >= tex_width && surface->get_height() >= tex_height) + for (auto surface : overlapping_surfaces) { - u16 offset_x = 0, offset_y = 0; - if (const u32 address_offset = texaddr - surface->get_section_base()) - { - const auto bpp = get_format_block_size_in_bytes(format); - offset_y = address_offset / tex_pitch; - offset_x = (address_offset % tex_pitch) / bpp; - } + if (surface->get_context() != rsx::texture_upload_context::blit_engine_dst) + continue; - if ((offset_x + tex_width) <= surface->get_width() && - (offset_y + tex_height) <= surface->get_height()) + if (surface->get_width() >= tex_width && surface->get_height() >= tex_height) { - auto src_image = surface->get_raw_texture(); - return create_temporary_subresource_view(cmd, &src_image, format, offset_x, offset_y, tex_width, tex_height); + u16 offset_x = 0, offset_y = 0; + if (const u32 address_offset = texaddr - surface->get_section_base()) + { + const auto bpp = get_format_block_size_in_bytes(format); + offset_y = address_offset / tex_pitch; + offset_x = (address_offset % tex_pitch) / bpp; + } + + if ((offset_x + tex_width) <= surface->get_width() && + (offset_y + tex_height) <= surface->get_height()) + { + if (!blit_engine_incompatibility_warning_raised && !is_hw_blit_engine_compatible(format)) + { + LOG_ERROR(RSX, "Format 0x%X is not compatible with the hardware blit acceleration." + " Consider turning off GPU texture scaling in the options to partially handle textures on your CPU.", format); + blit_engine_incompatibility_warning_raised = true; + break; + } + + auto src_image = surface->get_raw_texture(); + if (auto result = create_temporary_subresource_view(cmd, &src_image, format, offset_x, offset_y, tex_width, tex_height)) + return result; + } } } } @@ -754,6 +790,7 @@ namespace rsx * a bound render target. We can bypass the expensive download in this case */ + const u32 native_pitch = tex_width * get_format_block_size_in_bytes(format); const f32 internal_scale = (f32)tex_pitch / native_pitch; const u32 internal_width = (const u32)(tex_width * internal_scale); @@ -917,7 +954,7 @@ namespace rsx bool is_memcpy = false; u32 memcpy_bytes_length = 0; - if (dst_is_argb8 == src_is_argb8 && !dst.swizzled) + if (!src_is_render_target && !dst_is_render_target && dst_is_argb8 == src_is_argb8 && !dst.swizzled) { if ((src.slice_h == 1 && dst.clip_height == 1) || (dst.clip_width == src.width && dst.clip_height == src.slice_h && src.pitch == dst.pitch)) @@ -930,54 +967,42 @@ namespace rsx reader_lock lock(m_cache_mutex); section_storage_type* cached_dest = nullptr; + bool invalidate_dst_range = false; if (!dst_is_render_target) { - //Apply region offsets - dst_area.x1 += dst.offset_x; - dst_area.x2 += dst.offset_x; - dst_area.y1 += dst.offset_y; - dst_area.y2 += dst.offset_y; - - //First check if this surface exists in VRAM with exact dimensions - //Since scaled GPU resources are not invalidated by the CPU, we need to reuse older surfaces if possible - cached_dest = find_texture_from_dimensions(dst.rsx_address, dst_dimensions.width, dst_dimensions.height); - //Check for any available region that will fit this one - if (!cached_dest) + auto overlapping_surfaces = find_texture_from_range(dst_address, dst.pitch * dst.clip_height); + + for (auto surface: overlapping_surfaces) { - auto overlapping_surfaces = find_texture_from_range(dst_address, dst.pitch * dst.clip_height); + if (surface->get_context() != rsx::texture_upload_context::blit_engine_dst) + continue; - for (auto surface: overlapping_surfaces) + const auto old_dst_area = dst_area; + if (const u32 address_offset = dst_address - surface->get_section_base()) { - if (surface->get_context() != rsx::texture_upload_context::blit_engine_dst) - continue; + const u16 bpp = dst_is_argb8 ? 4 : 2; + const u16 offset_y = address_offset / dst.pitch; + const u16 offset_x = address_offset % dst.pitch; + const u16 offset_x_in_block = offset_x / bpp; - const auto old_dst_area = dst_area; - if (const u32 address_offset = dst_address - surface->get_section_base()) - { - const u16 bpp = dst_is_argb8 ? 4 : 2; - const u16 offset_y = address_offset / dst.pitch; - const u16 offset_x = address_offset % dst.pitch; - const u16 offset_x_in_block = offset_x / bpp; + dst_area.x1 += offset_x_in_block; + dst_area.x2 += offset_x_in_block; + dst_area.y1 += offset_y; + dst_area.y2 += offset_y; + } - dst_area.x1 += offset_x_in_block; - dst_area.x2 += offset_x_in_block; - dst_area.y1 += offset_y; - dst_area.y2 += offset_y; - } - - //Validate clipping region - if ((unsigned)dst_area.x2 <= surface->get_width() && - (unsigned)dst_area.y2 <= surface->get_height()) - { - cached_dest = surface; - break; - } - else - { - dst_area = old_dst_area; - } + //Validate clipping region + if ((unsigned)dst_area.x2 <= surface->get_width() && + (unsigned)dst_area.y2 <= surface->get_height()) + { + cached_dest = surface; + break; + } + else + { + dst_area = old_dst_area; } } @@ -999,6 +1024,10 @@ namespace rsx memcpy(dst.pixels, src.pixels, memcpy_bytes_length); return true; } + else if (overlapping_surfaces.size() > 0) + { + invalidate_dst_range = true; + } } else { @@ -1115,6 +1144,11 @@ namespace rsx dest_texture = 0; cached_dest = nullptr; } + else if (invalidate_dst_range) + { + lock.upgrade(); + invalidate_range_impl(dst_address, dst.pitch * dst.height, true); + } //Validate clipping region if ((dst.offset_x + dst.clip_x + dst.clip_width) > max_dst_width) dst.clip_x = 0; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index c98d67f7cf..1f847bf210 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -939,7 +939,7 @@ bool GLGSRender::check_program_state() { surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr); - if (!surface && m_gl_texture_cache.is_depth_texture(texaddr)) + if (!surface && m_gl_texture_cache.is_depth_texture(texaddr, (u32)get_texture_size(tex))) return std::make_tuple(true, 0); } @@ -1094,17 +1094,8 @@ void GLGSRender::flip(int buffer) m_flip_fbo.recreate(); m_flip_fbo.bind(); - //The render might have been done offscreen and a blit used to display - //Check the texture cache for a blitted copy const u32 size = buffer_pitch * buffer_height; - auto surface = m_gl_texture_cache.find_texture_from_dimensions(absolute_address); - - if (surface != nullptr) - { - m_flip_fbo.color = surface->get_raw_view(); - m_flip_fbo.read_buffer(m_flip_fbo.color); - } - else if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address)) + if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address)) { buffer_width = render_target_texture->width(); buffer_height = render_target_texture->height(); @@ -1112,6 +1103,13 @@ void GLGSRender::flip(int buffer) m_flip_fbo.color = *render_target_texture; m_flip_fbo.read_buffer(m_flip_fbo.color); } + else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions(absolute_address)) + { + //Hack - this should be the first location to check for output + //The render might have been done offscreen or in software and a blit used to display + m_flip_fbo.color = surface->get_raw_view(); + m_flip_fbo.read_buffer(m_flip_fbo.color); + } else { LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU"); @@ -1248,7 +1246,7 @@ void GLGSRender::do_local_task() //Check if the suggested section is valid if (!q.section_to_flush->is_flushed()) { - q.section_to_flush->flush(); + m_gl_texture_cache.flush_address(q.address_to_flush); q.result = true; } else diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 3bc06e2a1d..1cfca7f480 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -19,7 +19,7 @@ namespace gl case CELL_GCM_TEXTURE_A8R8G8B8: return GL_RGBA8; case CELL_GCM_TEXTURE_G8B8: return GL_RG8; case CELL_GCM_TEXTURE_R6G5B5: return GL_RGB565; - case CELL_GCM_TEXTURE_DEPTH24_D8: return GL_DEPTH_COMPONENT24; + case CELL_GCM_TEXTURE_DEPTH24_D8: return GL_DEPTH24_STENCIL8; case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return GL_DEPTH_COMPONENT24; case CELL_GCM_TEXTURE_DEPTH16: return GL_DEPTH_COMPONENT16; case CELL_GCM_TEXTURE_DEPTH16_FLOAT: return GL_DEPTH_COMPONENT16; @@ -442,7 +442,7 @@ namespace gl } } - void upload_texture(const GLuint id, const u32 texaddr, const u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, bool is_swizzled, rsx::texture_dimension_extended type, + void upload_texture(const GLuint id, const u32 texaddr, const u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, bool is_swizzled, rsx::texture_dimension_extended type, std::vector& subresources_layout, std::pair, std::array>& decoded_remap, bool static_state) { const bool is_cubemap = type == rsx::texture_dimension_extended::texture_dimension_cubemap; diff --git a/rpcs3/Emu/RSX/GL/GLTexture.h b/rpcs3/Emu/RSX/GL/GLTexture.h index f2987be24a..335003298d 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.h +++ b/rpcs3/Emu/RSX/GL/GLTexture.h @@ -25,7 +25,7 @@ namespace gl * - second vector contains overrides to force the value to either 0 or 1 instead of reading from texture * static_state - set up the texture without consideration for sampler state (useful for vertex textures which have no real sampler state on RSX) */ - void upload_texture(const GLuint id, const u32 texaddr, const u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, bool is_swizzled, rsx::texture_dimension_extended type, + void upload_texture(const GLuint id, const u32 texaddr, const u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, bool is_swizzled, rsx::texture_dimension_extended type, std::vector& subresources_layout, std::pair, std::array>& decoded_remap, bool static_state); class sampler_state diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 07b7920d2e..56b5a0b190 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -476,6 +476,19 @@ namespace gl { u32 dst_id = 0; + GLenum ifmt; + glBindTexture(GL_TEXTURE_2D, src_id); + glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_INTERNAL_FORMAT, (GLint*)&ifmt); + + switch (ifmt) + { + case GL_DEPTH_COMPONENT16: + case GL_DEPTH_COMPONENT24: + case GL_DEPTH24_STENCIL8: + sized_internal_fmt = ifmt; + break; + } + glGenTextures(1, &dst_id); glBindTexture(GL_TEXTURE_2D, dst_id); @@ -552,12 +565,15 @@ namespace gl } auto& cached = create_texture(vram_texture, rsx_address, rsx_size, width, height); - cached.protect(utils::protection::ro); cached.set_dirty(false); cached.set_depth_flag(depth_flag); cached.set_view_flags(flags); cached.set_context(context); + //Its not necessary to lock blit dst textures as they are just reused as necessary + if (context != rsx::texture_upload_context::blit_engine_dst || g_cfg.video.strict_rendering_mode) + cached.protect(utils::protection::ro); + return &cached; } @@ -572,7 +588,7 @@ namespace gl //Swizzling is ignored for blit engine copy and emulated using remapping bool input_swizzled = (context == rsx::texture_upload_context::blit_engine_src)? false : swizzled; - gl::upload_texture(section->get_raw_texture(), rsx_address, gcm_format, width, height, depth, mipmaps, pitch, input_swizzled, type, subresource_layout, remap_vector, false); + gl::upload_texture(section->get_raw_texture(), rsx_address, gcm_format, width, height, depth, mipmaps, input_swizzled, type, subresource_layout, remap_vector, false); return section; } @@ -620,11 +636,11 @@ namespace gl m_hw_blitter.destroy(); } - bool is_depth_texture(const u32 rsx_address) override + bool is_depth_texture(const u32 rsx_address, const u32 rsx_size) override { reader_lock lock(m_cache_mutex); -/* auto found = m_cache.find(rsx_address); + auto found = m_cache.find(get_block_address(rsx_address)); if (found == m_cache.end()) return false; @@ -636,8 +652,12 @@ namespace gl if (tex.is_dirty()) continue; - return tex.is_depth_texture(); - }*/ + if (!tex.overlaps(rsx_address, true)) + continue; + + if ((rsx_address + rsx_size - tex.get_section_base()) <= tex.get_section_size()) + return tex.is_depth_texture(); + } return false; } diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 461439504c..149c083df8 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -394,6 +394,7 @@ namespace rsx // Deferred calls are used to batch draws together u32 deferred_primitive_type = 0; u32 deferred_call_size = 0; + s32 deferred_begin_end = 0; std::vector deferred_stack; bool has_deferred_call = false; @@ -453,6 +454,10 @@ namespace rsx if (emit_end) methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0); + if (deferred_begin_end > 0) //Hanging draw call (useful for immediate rendering where the begin call needs to be noted) + methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, deferred_primitive_type); + + deferred_begin_end = 0; deferred_primitive_type = 0; deferred_call_size = 0; has_deferred_call = false; @@ -569,6 +574,11 @@ namespace rsx case NV4097_SET_BEGIN_END: { // Hook; Allows begin to go through, but ignores end + if (value) + deferred_begin_end++; + else + deferred_begin_end--; + if (value && value != deferred_primitive_type) deferred_primitive_type = value; else @@ -1065,8 +1075,7 @@ namespace rsx current_vertex_program.skip_vertex_input_check = false; current_vertex_program.rsx_vertex_inputs.resize(0); - current_vertex_program.data.resize(512 * 4); - current_vertex_program.rsx_vertex_inputs.reserve(rsx::limits::vertex_count); + current_vertex_program.data.resize((512 - transform_program_start) * 4); u32* ucode_src = rsx::method_registers.transform_program.data() + (transform_program_start * 4); u32* ucode_dst = current_vertex_program.data.data(); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index b113babd90..b946f87ab3 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1696,7 +1696,7 @@ bool VKGSRender::do_method(u32 cmd, u32 arg) bool VKGSRender::check_program_status() { - auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture&, bool is_depth) -> std::tuple + auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture &tex, bool is_depth) -> std::tuple { vk::render_target *surface = nullptr; @@ -1706,7 +1706,7 @@ bool VKGSRender::check_program_status() { surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr); - if (!surface && m_texture_cache.is_depth_texture(texaddr)) + if (!surface && m_texture_cache.is_depth_texture(texaddr, (u32)get_texture_size(tex))) return std::make_tuple(true, 0); } diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 302d16c756..2f0b2f6d81 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -6,10 +6,10 @@ namespace vk context* g_current_vulkan_ctx = nullptr; render_device g_current_renderer; - texture g_null_texture; + std::unique_ptr g_null_texture; + std::unique_ptr g_null_image_view; VkSampler g_null_sampler = nullptr; - VkImageView g_null_image_view = nullptr; bool g_cb_no_interrupt_flag = false; @@ -131,6 +131,47 @@ namespace vk fmt::throw_exception("Invalid or unsupported sampler format for texture format (0x%x)" HERE, format); } + u8 get_format_texel_width(const VkFormat format) + { + switch (format) + { + case VK_FORMAT_R8_UNORM: + return 1; + case VK_FORMAT_R16_UINT: + case VK_FORMAT_R16_SFLOAT: + case VK_FORMAT_R16_UNORM: + case VK_FORMAT_R8G8_UNORM: + case VK_FORMAT_R8G8_SNORM: + case VK_FORMAT_A1R5G5B5_UNORM_PACK16: + case VK_FORMAT_R4G4B4A4_UNORM_PACK16: + case VK_FORMAT_R5G6B5_UNORM_PACK16: + case VK_FORMAT_R5G5B5A1_UNORM_PACK16: + return 2; + case VK_FORMAT_R32_UINT: + case VK_FORMAT_R32_SFLOAT: + case VK_FORMAT_R16G16_UNORM: + case VK_FORMAT_R16G16_SFLOAT: + case VK_FORMAT_A8B8G8R8_UNORM_PACK32: + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: + case VK_FORMAT_BC2_UNORM_BLOCK: + case VK_FORMAT_BC3_UNORM_BLOCK: + return 4; + case VK_FORMAT_R16G16B16A16_SFLOAT: + return 8; + case VK_FORMAT_R32G32B32A32_SFLOAT: + return 16; + case VK_FORMAT_D16_UNORM: + return 2; + case VK_FORMAT_D32_SFLOAT_S8_UINT: //TODO: Translate to D24S8 + case VK_FORMAT_D24_UNORM_S8_UINT: + return 4; + } + + fmt::throw_exception("Unexpected vkFormat 0x%X", (u32)format); + } + VkAllocationCallbacks default_callbacks() { VkAllocationCallbacks callbacks; @@ -170,22 +211,28 @@ namespace vk VkImageView null_image_view() { if (g_null_image_view) - return g_null_image_view; + return g_null_image_view->value; - g_null_texture.create(g_current_renderer, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_USAGE_SAMPLED_BIT, 4, 4); - g_null_image_view = g_null_texture; - return g_null_image_view; + g_null_texture.reset(new image(g_current_renderer, get_memory_mapping(g_current_renderer.gpu()).device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + VK_IMAGE_TYPE_2D, VK_FORMAT_B8G8R8A8_UNORM, 4, 4, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_SAMPLED_BIT, 0)); + + g_null_image_view.reset(new image_view(g_current_renderer, g_null_texture->value, VK_IMAGE_VIEW_TYPE_2D, + VK_FORMAT_B8G8R8A8_UNORM, {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A}, + {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1})); + + return g_null_image_view->value; } void destroy_global_resources() { - g_null_texture.destroy(); + g_null_texture.reset(); + g_null_image_view .reset(); if (g_null_sampler) vkDestroySampler(g_current_renderer, g_null_sampler, nullptr); g_null_sampler = nullptr; - g_null_image_view = nullptr; } void set_current_thread_ctx(const vk::context &ctx) diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index d5a810ec79..b4daff043b 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -81,6 +81,7 @@ namespace vk void copy_scaled_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 src_x_offset, u32 src_y_offset, u32 src_width, u32 src_height, u32 dst_x_offset, u32 dst_y_offset, u32 dst_width, u32 dst_height, u32 mipmaps, VkImageAspectFlagBits aspect); VkFormat get_compatible_sampler_format(u32 format); + u8 get_format_texel_width(const VkFormat format); std::pair get_compatible_surface_format(rsx::surface_color_format color_format); size_t get_render_pass_location(VkFormat color_surface_format, VkFormat depth_stencil_format, u8 color_surface_count); @@ -475,57 +476,6 @@ namespace vk VkDevice m_device; }; - class texture - { - VkImageView m_view = nullptr; - VkImage m_image_contents = nullptr; - VkMemoryRequirements m_memory_layout; - VkFormat m_internal_format; - VkImageUsageFlags m_flags; - VkImageAspectFlagBits m_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; - VkImageLayout m_layout = VK_IMAGE_LAYOUT_UNDEFINED; - VkImageViewType m_view_type = VK_IMAGE_VIEW_TYPE_2D; - VkImageUsageFlags m_usage = VK_IMAGE_USAGE_SAMPLED_BIT; - VkImageTiling m_tiling = VK_IMAGE_TILING_LINEAR; - - vk::memory_block_deprecated vram_allocation; - vk::render_device *owner = nullptr; - - u32 m_width; - u32 m_height; - u32 m_mipmaps; - - vk::texture *staging_texture = nullptr; - bool ready = false; - - public: - texture(vk::swap_chain_image &img); - texture() {} - ~texture() {} - - void create(vk::render_device &device, VkFormat format, VkImageType image_type, VkImageViewType view_type, VkImageCreateFlags image_flags, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping swizzle); - void create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping swizzle); - void create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height, u32 mipmaps = 1, bool gpu_only = false, VkComponentMapping swizzle = default_component_map()); - void destroy(); - - void init(rsx::fragment_texture &tex, vk::command_buffer &cmd, bool ignore_checks = false); - void flush(vk::command_buffer & cmd); - - //Fill with debug color 0xFF - void init_debug(); - - void change_layout(vk::command_buffer &cmd, VkImageLayout new_layout); - VkImageLayout get_layout(); - - const u32 width(); - const u32 height(); - const u16 mipmaps(); - const VkFormat get_format(); - - operator VkImageView(); - operator VkImage(); - }; - struct buffer { VkBuffer value; @@ -779,11 +729,6 @@ namespace vk { return view; } - - operator vk::texture() - { - return vk::texture(*this); - } }; class swap_chain diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index 68937af42d..3253fd7b05 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -147,14 +147,6 @@ namespace vk change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); } - void copy_texture(VkCommandBuffer cmd, texture &src, texture &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 width, u32 height, u32 mipmaps, VkImageAspectFlagBits aspect) - { - VkImage isrc = (VkImage)src; - VkImage idst = (VkImage)dst; - - copy_image(cmd, isrc, idst, srcLayout, dstLayout, width, height, mipmaps, aspect); - } - void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, VkImage dst_image, const std::vector& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, VkImageAspectFlags flags, vk::vk_data_heap &upload_heap, vk::buffer* upload_buffer) @@ -188,347 +180,4 @@ namespace vk mipmap_level++; } } - - texture::texture(vk::swap_chain_image &img) - { - m_image_contents = img; - m_view = img; - - //We did not create this object, do not allow internal modification! - owner = nullptr; - } - - void texture::create(vk::render_device &device, VkFormat format, VkImageType image_type, VkImageViewType view_type, VkImageCreateFlags image_flags, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping swizzle) - { - owner = &device; - - //First create the image - VkImageCreateInfo image_info = {}; - - image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - image_info.imageType = image_type; - image_info.format = format; - image_info.extent = { width, height, 1 }; - image_info.mipLevels = mipmaps; - image_info.arrayLayers = (image_flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)? 6: 1; - image_info.samples = VK_SAMPLE_COUNT_1_BIT; - image_info.tiling = tiling; - image_info.usage = usage; - image_info.flags = image_flags; - image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - - CHECK_RESULT(vkCreateImage(device, &image_info, nullptr, &m_image_contents)); - - vkGetImageMemoryRequirements(device, m_image_contents, &m_memory_layout); - vram_allocation.allocate_from_pool(device, m_memory_layout.size, !gpu_only, m_memory_layout.memoryTypeBits); - - CHECK_RESULT(vkBindImageMemory(device, m_image_contents, vram_allocation, 0)); - - VkImageViewCreateInfo view_info = {}; - view_info.format = format; - view_info.image = m_image_contents; - view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - view_info.viewType = view_type; - view_info.components = swizzle; - view_info.subresourceRange = get_image_subresource_range(0, 0, 1, mipmaps, VK_IMAGE_ASPECT_COLOR_BIT); - - if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) - { - view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; - m_image_aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - } - - CHECK_RESULT(vkCreateImageView(device, &view_info, nullptr, &m_view)); - - m_width = width; - m_height = height; - m_mipmaps = mipmaps; - m_internal_format = format; - m_flags = usage; - m_view_type = view_type; - m_usage = usage; - m_tiling = tiling; - - ready = true; - } - - void texture::create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping swizzle) - { - create(device, format, VK_IMAGE_TYPE_2D, VK_IMAGE_VIEW_TYPE_2D, 0, usage, tiling, width, height, mipmaps, gpu_only, swizzle); - } - - void texture::create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping swizzle) - { - VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL; - - /* The spec mandates checking against all usage bits for support in either linear or optimal tiling modes. - * Ideally, no assumptions should be made, but for simplification, we'll assume optimal mode suppoorts everything - */ - - VkFormatProperties props; - vkGetPhysicalDeviceFormatProperties(device.gpu(), format, &props); - - bool linear_is_supported = true; - - if (!!(usage & VK_IMAGE_USAGE_SAMPLED_BIT)) - { - if (!(props.linearTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) - linear_is_supported = false; - } - - if (linear_is_supported && !!(usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) - { - if (!(props.linearTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) - linear_is_supported = false; - } - - if (linear_is_supported && !!(usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) - { - if (!(props.linearTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) - linear_is_supported = false; - } - - if (linear_is_supported && !!(usage & VK_IMAGE_USAGE_STORAGE_BIT)) - { - if (!(props.linearTilingFeatures & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) - linear_is_supported = false; - } - - if (linear_is_supported) - tiling = VK_IMAGE_TILING_LINEAR; - else - usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; - - create(device, format, usage, tiling, width, height, mipmaps, gpu_only, swizzle); - } - - void texture::init(rsx::fragment_texture& tex, vk::command_buffer &cmd, bool ignore_checks) - { - VkImageViewType best_type = VK_IMAGE_VIEW_TYPE_2D; - - if (tex.cubemap() && m_view_type != VK_IMAGE_VIEW_TYPE_CUBE) - { - vk::render_device &dev = (*owner); - VkFormat format = m_internal_format; - VkImageUsageFlags usage = m_usage; - VkImageTiling tiling = m_tiling; - - destroy(); - create(dev, format, VK_IMAGE_TYPE_2D, VK_IMAGE_VIEW_TYPE_CUBE, VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT, usage, tiling, tex.width(), tex.height(), tex.get_exact_mipmap_count(), false, default_component_map()); - } - - if (!tex.cubemap() && tex.depth() > 1 && m_view_type != VK_IMAGE_VIEW_TYPE_3D) - { - best_type = VK_IMAGE_VIEW_TYPE_3D; - - vk::render_device &dev = (*owner); - VkFormat format = m_internal_format; - VkImageUsageFlags usage = m_usage; - VkImageTiling tiling = m_tiling; - - destroy(); - create(dev, format, VK_IMAGE_TYPE_3D, VK_IMAGE_VIEW_TYPE_3D, 0, usage, tiling, tex.width(), tex.height(), tex.get_exact_mipmap_count(), false, default_component_map()); - } - - VkImageSubresource subres = {}; - subres.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - subres.mipLevel = 0; - subres.arrayLayer = 0; - - u8 *data; - - VkFormatProperties props; - vk::physical_device dev = owner->gpu(); - vkGetPhysicalDeviceFormatProperties(dev, m_internal_format, &props); - - if (ignore_checks || props.linearTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) - { - std::vector> layout_alignment(tex.get_exact_mipmap_count()); - - for (u32 i = 0; i < tex.get_exact_mipmap_count(); ++i) - { - layout_alignment[i].first = 4096; - vkGetImageSubresourceLayout((*owner), m_image_contents, &subres, &layout_alignment[i].second); - - if (m_view_type == VK_IMAGE_VIEW_TYPE_CUBE) - layout_alignment[i].second.size *= 6; - - while (layout_alignment[i].first > 1) - { - //Test if is wholly divisible by alignment.. - if (!(layout_alignment[i].second.rowPitch & (layout_alignment[i].first - 1))) - break; - - layout_alignment[i].first >>= 1; - } - - subres.mipLevel++; - } - - if (tex.get_exact_mipmap_count() == 1) - { - u64 buffer_size = get_placed_texture_storage_size(tex, layout_alignment[0].first, layout_alignment[0].first); - if (buffer_size != layout_alignment[0].second.size) - { - if (buffer_size > layout_alignment[0].second.size) - { - LOG_ERROR(RSX, "Layout->pitch = %d, size=%d, height=%d", layout_alignment[0].second.rowPitch, layout_alignment[0].second.size, tex.height()); - LOG_ERROR(RSX, "Computed alignment would have been %d, which yielded a size of %d", layout_alignment[0].first, buffer_size); - LOG_ERROR(RSX, "Retrying..."); - - //layout_alignment[0].first >>= 1; - buffer_size = get_placed_texture_storage_size(tex, layout_alignment[0].first, layout_alignment[0].first); - - if (buffer_size != layout_alignment[0].second.size) - fmt::throw_exception("Bad texture alignment computation!" HERE); - } - else - { - LOG_ERROR(RSX, "Bad texture alignment computation: expected size=%d bytes, computed=%d bytes, alignment=%d, hw pitch=%d", - layout_alignment[0].second.size, buffer_size, layout_alignment[0].first, layout_alignment[0].second.rowPitch); - } - } - - CHECK_RESULT(vkMapMemory((*owner), vram_allocation, 0, m_memory_layout.size, 0, (void**)&data)); - gsl::span mapped{ (gsl::byte*)(data + layout_alignment[0].second.offset), ::narrow(layout_alignment[0].second.size) }; - - const std::vector &subresources_layout = get_subresources_layout(tex); - for (const rsx_subresource_layout &layout : subresources_layout) - { - upload_texture_subresource(mapped, layout, tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN), !(tex.format() & CELL_GCM_TEXTURE_LN), layout_alignment[0].first); - } - vkUnmapMemory((*owner), vram_allocation); - } - else - { - auto &layer_props = layout_alignment[layout_alignment.size() - 1].second; - u64 max_size = layer_props.offset + layer_props.size; - - if (m_memory_layout.size < max_size) - { - fmt::throw_exception("Failed to upload texture. Invalid memory block size." HERE); - } - - int index= 0; - std::vector> layout_offset_info(tex.get_exact_mipmap_count()); - - for (auto &mip_info : layout_offset_info) - { - auto &alignment = layout_alignment[index].first; - auto &layout = layout_alignment[index++].second; - - mip_info = std::make_pair(layout.offset, (u32)layout.rowPitch); - } - - CHECK_RESULT(vkMapMemory((*owner), vram_allocation, 0, m_memory_layout.size, 0, (void**)&data)); - gsl::span mapped{ (gsl::byte*)(data), ::narrow(m_memory_layout.size) }; - - const std::vector &subresources_layout = get_subresources_layout(tex); - size_t idx = 0; - for (const rsx_subresource_layout &layout : subresources_layout) - { - const auto &dst_layout = layout_offset_info[idx++]; - upload_texture_subresource(mapped.subspan(dst_layout.first), layout, tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN), !(tex.format() & CELL_GCM_TEXTURE_LN), dst_layout.second); - } - vkUnmapMemory((*owner), vram_allocation); - } - } - else if (!ignore_checks) - { - if (!staging_texture) - { - staging_texture = new texture(); - staging_texture->create((*owner), m_internal_format, VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_IMAGE_USAGE_SAMPLED_BIT, VK_IMAGE_TILING_LINEAR, m_width, m_height, tex.get_exact_mipmap_count(), false, default_component_map()); - } - - staging_texture->init(tex, cmd, true); - staging_texture->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - - ready = false; - } - } - - void texture::flush(vk::command_buffer &cmd) - { - if (!ready) - { - vk::copy_texture(cmd, *staging_texture, *this, staging_texture->get_layout(), m_layout, m_width, m_height, m_mipmaps, m_image_aspect); - ready = true; - } - } - - void texture::init_debug() - { - void *data; - CHECK_RESULT(vkMapMemory((*owner), vram_allocation, 0, m_memory_layout.size, 0, (void**)&data)); - - memset(data, 0xFF, m_memory_layout.size); - vkUnmapMemory((*owner), vram_allocation); - } - - void texture::change_layout(vk::command_buffer &cmd, VkImageLayout new_layout) - { - if (m_layout == new_layout) return; - - vk::change_image_layout(cmd, m_image_contents, m_layout, new_layout, vk::get_image_subresource_range(0, 0, 1, 1, m_image_aspect)); - m_layout = new_layout; - } - - VkImageLayout texture::get_layout() - { - return m_layout; - } - - const u32 texture::width() - { - return m_width; - } - - const u32 texture::height() - { - return m_height; - } - - const u16 texture::mipmaps() - { - return m_mipmaps; - } - - void texture::destroy() - { - if (!owner) return; - - //Destroy all objects managed by this object - vkDestroyImageView((*owner), m_view, nullptr); - vkDestroyImage((*owner), m_image_contents, nullptr); - - vram_allocation.destroy(); - - owner = nullptr; - m_view = nullptr; - m_image_contents = nullptr; - - if (staging_texture) - { - staging_texture->destroy(); - delete staging_texture; - staging_texture = nullptr; - } - } - - const VkFormat texture::get_format() - { - return m_internal_format; - } - - texture::operator VkImage() - { - return m_image_contents; - } - - texture::operator VkImageView() - { - return m_view; - } - } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 89ce2217f5..b4f125f91b 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -39,7 +39,7 @@ namespace vk rsx::buffered_section::reset(base, length, policy); } - void create(const u16 w, const u16 h, const u16 depth, const u16 mipmaps, vk::image_view *view, vk::image *image, const u32 rsx_pitch = 0, bool managed=true) + void create(const u16 w, const u16 h, const u16 depth, const u16 mipmaps, vk::image_view *view, vk::image *image, const u32 rsx_pitch=0, bool managed=true) { width = w; height = h; @@ -52,8 +52,12 @@ namespace vk if (managed) managed_texture.reset(image); //TODO: Properly compute these values - this->rsx_pitch = rsx_pitch; - real_pitch = cpu_address_range / height; + if (rsx_pitch > 0) + this->rsx_pitch = rsx_pitch; + else + this->rsx_pitch = cpu_address_range / height; + + real_pitch = vk::get_format_texel_width(image->info.format) * width; //Even if we are managing the same vram section, we cannot guarantee contents are static //The create method is only invoked when a new mangaged session is required @@ -493,10 +497,13 @@ namespace vk cached_texture_section& region = find_cached_texture(rsx_address, rsx_size, true, width, height, 0); region.reset(rsx_address, rsx_size); region.create(width, height, depth, mipmaps, view, image); - region.protect(utils::protection::ro); region.set_dirty(false); region.set_context(context); + //Its not necessary to lock blit dst textures as they are just reused as necessary + if (context != rsx::texture_upload_context::blit_engine_dst || g_cfg.video.strict_rendering_mode) + region.protect(utils::protection::ro); + read_only_range = region.get_min_max(read_only_range); return ®ion; } @@ -580,11 +587,11 @@ namespace vk purge_cache(); } - bool is_depth_texture(const u32 texaddr) override + bool is_depth_texture(const u32 rsx_address, const u32 rsx_size) override { reader_lock lock(m_cache_mutex); - auto found = m_cache.find(texaddr); + auto found = m_cache.find(get_block_address(rsx_address)); if (found == m_cache.end()) return false; @@ -596,14 +603,20 @@ namespace vk if (tex.is_dirty()) continue; - switch (tex.get_format()) + if (!tex.overlaps(rsx_address, true)) + continue; + + if ((rsx_address + rsx_size - tex.get_section_base()) <= tex.get_section_size()) { - case VK_FORMAT_D16_UNORM: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - case VK_FORMAT_D24_UNORM_S8_UINT: - return true; - default: - return false; + switch (tex.get_format()) + { + case VK_FORMAT_D16_UNORM: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + case VK_FORMAT_D24_UNORM_S8_UINT: + return true; + default: + return false; + } } } diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index e449c2d558..51af8ddb06 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -134,21 +134,24 @@ namespace rsx locked = false; } + /** + * Check if range overlaps with this section. + * ignore_protection_range - if true, the test should not check against the aligned protection range, instead + * tests against actual range of contents in memory + */ bool overlaps(std::pair range) const { return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second); } - bool overlaps(u32 address) const + bool overlaps(u32 address, bool ignore_protection_range) const { - return (locked_address_base <= address && (address - locked_address_base) < locked_address_range); + if (!ignore_protection_range) + return (locked_address_base <= address && (address - locked_address_base) < locked_address_range); + else + return (cpu_address_base <= address && (address - cpu_address_base) < cpu_address_range); } - /** - * Check if range overlaps with this section. - * ignore_protection_range - if true, the test should not check against the aligned protection range, instead - * tests against actual range of contents in memory - */ bool overlaps(std::pair range, bool ignore_protection_range) const { if (!ignore_protection_range) diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index dbb7c0dfb7..e29af6db59 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -207,35 +207,4 @@ namespace rsx return std::make_tuple(x, y, width, height); } - - // Conditional mutex lock for shared mutex types - // May silently fail to acquire the lock - template - struct conditional_lock - { - lock_type& _ref; - std::atomic_bool& _flag; - bool acquired = false; - - conditional_lock(std::atomic_bool& flag, lock_type& mutex): - _ref(mutex), _flag(flag) - { - const bool _false = false; - if (flag.compare_exchange_weak(const_cast(_false), true)) - { - mutex.lock_shared(); - acquired = true; - } - } - - ~conditional_lock() - { - if (acquired) - { - _ref.unlock_shared(); - _flag.store(false); - acquired = false; - } - } - }; }