diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 4eaf1aa029..d8a8e2c84d 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -478,7 +478,7 @@ namespace gl flushed = true; const auto valid_range = get_confirmed_range(); - void *dst = get_raw_ptr(valid_range.first); + void *dst = get_raw_ptr(valid_range.first, true); glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); void *src = glMapBufferRange(GL_PIXEL_PACK_BUFFER, valid_range.first, valid_range.second, GL_MAP_READ_BIT); @@ -559,7 +559,7 @@ namespace gl } } - flush_io(); + flush_io(valid_range.first, valid_range.second); glUnmapBuffer(GL_PIXEL_PACK_BUFFER); glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE); diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 1f21b80aa6..3ca5cda174 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -245,7 +245,7 @@ namespace vk const auto valid_range = get_confirmed_range(); void* pixels_src = dma_buffer->map(valid_range.first, valid_range.second); - void* pixels_dst = get_raw_ptr(valid_range.first); + void* pixels_dst = get_raw_ptr(valid_range.first, true); const auto texel_layout = vk::get_format_element_size(vram_texture->info.format); const auto elem_size = texel_layout.first; @@ -323,7 +323,7 @@ namespace vk } } - flush_io(); + flush_io(valid_range.first, valid_range.second); dma_buffer->unmap(); reset_write_statistics(); diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index f59d963f75..8762696e7b 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -38,13 +38,14 @@ namespace rsx if (locked_memory_ptr) { const u32 valid_limit = (confirmed_range.second) ? confirmed_range.first + confirmed_range.second : cpu_address_range; - u32* first = locked_memory_ptr.get(confirmed_range.first); - u32* last = locked_memory_ptr.get(valid_limit - 4); + u32* first = locked_memory_ptr.get(confirmed_range.first, true); + u32* last = locked_memory_ptr.get(valid_limit - 4, true); *first = cpu_address_base + confirmed_range.first; *last = cpu_address_base + valid_limit - 4; - locked_memory_ptr.flush(); + locked_memory_ptr.flush(confirmed_range.first, 4); + locked_memory_ptr.flush(valid_limit - 4, 4); } } @@ -321,10 +322,10 @@ namespace rsx } template - T* get_raw_ptr(u32 offset = 0) + T* get_raw_ptr(u32 offset = 0, bool no_sync = false) { verify(HERE), locked_memory_ptr; - return locked_memory_ptr.get(offset); + return locked_memory_ptr.get(offset, no_sync); } bool test_memory_head() @@ -350,9 +351,9 @@ namespace rsx return (*last == (cpu_address_base + valid_limit - 4)); } - void flush_io() const + void flush_io(u32 offset = 0, u32 len = 0) const { - locked_memory_ptr.flush(); + locked_memory_ptr.flush(offset, len); } std::pair get_confirmed_range() const diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index 74706180e9..7301ce39cf 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -97,7 +97,7 @@ namespace rsx } template - T* get(u32 offset = 0) + T* get(u32 offset = 0, bool no_sync = false) { if (contiguous) { @@ -105,7 +105,7 @@ namespace rsx } else { - if (!synchronized) + if (!synchronized && !no_sync) sync(); return (T*)(io_cache.data() + offset); @@ -127,16 +127,70 @@ namespace rsx synchronized = true; } - void flush() const + void flush(u32 offset = 0, u32 len = 0) const { if (contiguous) return; u8* src = (u8*)io_cache.data(); - for (const auto &block : _blocks) + + if (!offset && (!len || len == io_cache.size())) { - memcpy(block.first.get(), src, block.second); - src += block.second; + for (const auto &block : _blocks) + { + memcpy(block.first.get(), src, block.second); + src += block.second; + } + } + else + { + auto remaining_bytes = len? len : io_cache.size() - offset; + const auto write_end = remaining_bytes + offset; + + u32 write_offset; + u32 write_length; + u32 base_offset = 0; + + for (const auto &block : _blocks) + { + const u32 block_end = base_offset + block.second; + + if (offset >= base_offset && offset < block_end) + { + // Head + write_offset = (offset - base_offset); + write_length = std::min(block.second - write_offset, remaining_bytes); + } + else if (base_offset > offset && block_end <= write_end) + { + // Completely spanned + write_offset = 0; + write_length = block.second; + } + else if (base_offset > offset && write_end < block_end) + { + // Tail + write_offset = 0; + write_length = remaining_bytes; + } + else + { + // No overlap; skip + write_length = 0; + } + + if (write_length) + { + memcpy(block.first.get() + write_offset, src + (base_offset + write_offset), write_length); + + verify(HERE), write_length <= remaining_bytes; + remaining_bytes -= write_length; + if (!remaining_bytes) + break; + } + + base_offset += block.second; + } } }