mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 19:45:20 +00:00
Optimized cached write-through
- Allows grabbing an unsynchronized memory block if overwriting contents anyway - Allows flushing only specified range of memory
This commit is contained in:
parent
f8d999b384
commit
d2bf04796f
4 changed files with 72 additions and 17 deletions
|
@ -478,7 +478,7 @@ namespace gl
|
|||
flushed = true;
|
||||
|
||||
const auto valid_range = get_confirmed_range();
|
||||
void *dst = get_raw_ptr(valid_range.first);
|
||||
void *dst = get_raw_ptr(valid_range.first, true);
|
||||
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
|
||||
void *src = glMapBufferRange(GL_PIXEL_PACK_BUFFER, valid_range.first, valid_range.second, GL_MAP_READ_BIT);
|
||||
|
@ -559,7 +559,7 @@ namespace gl
|
|||
}
|
||||
}
|
||||
|
||||
flush_io();
|
||||
flush_io(valid_range.first, valid_range.second);
|
||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
||||
|
||||
|
|
|
@ -245,7 +245,7 @@ namespace vk
|
|||
|
||||
const auto valid_range = get_confirmed_range();
|
||||
void* pixels_src = dma_buffer->map(valid_range.first, valid_range.second);
|
||||
void* pixels_dst = get_raw_ptr(valid_range.first);
|
||||
void* pixels_dst = get_raw_ptr(valid_range.first, true);
|
||||
|
||||
const auto texel_layout = vk::get_format_element_size(vram_texture->info.format);
|
||||
const auto elem_size = texel_layout.first;
|
||||
|
@ -323,7 +323,7 @@ namespace vk
|
|||
}
|
||||
}
|
||||
|
||||
flush_io();
|
||||
flush_io(valid_range.first, valid_range.second);
|
||||
dma_buffer->unmap();
|
||||
reset_write_statistics();
|
||||
|
||||
|
|
|
@ -38,13 +38,14 @@ namespace rsx
|
|||
if (locked_memory_ptr)
|
||||
{
|
||||
const u32 valid_limit = (confirmed_range.second) ? confirmed_range.first + confirmed_range.second : cpu_address_range;
|
||||
u32* first = locked_memory_ptr.get<u32>(confirmed_range.first);
|
||||
u32* last = locked_memory_ptr.get<u32>(valid_limit - 4);
|
||||
u32* first = locked_memory_ptr.get<u32>(confirmed_range.first, true);
|
||||
u32* last = locked_memory_ptr.get<u32>(valid_limit - 4, true);
|
||||
|
||||
*first = cpu_address_base + confirmed_range.first;
|
||||
*last = cpu_address_base + valid_limit - 4;
|
||||
|
||||
locked_memory_ptr.flush();
|
||||
locked_memory_ptr.flush(confirmed_range.first, 4);
|
||||
locked_memory_ptr.flush(valid_limit - 4, 4);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -321,10 +322,10 @@ namespace rsx
|
|||
}
|
||||
|
||||
template <typename T = void>
|
||||
T* get_raw_ptr(u32 offset = 0)
|
||||
T* get_raw_ptr(u32 offset = 0, bool no_sync = false)
|
||||
{
|
||||
verify(HERE), locked_memory_ptr;
|
||||
return locked_memory_ptr.get<T>(offset);
|
||||
return locked_memory_ptr.get<T>(offset, no_sync);
|
||||
}
|
||||
|
||||
bool test_memory_head()
|
||||
|
@ -350,9 +351,9 @@ namespace rsx
|
|||
return (*last == (cpu_address_base + valid_limit - 4));
|
||||
}
|
||||
|
||||
void flush_io() const
|
||||
void flush_io(u32 offset = 0, u32 len = 0) const
|
||||
{
|
||||
locked_memory_ptr.flush();
|
||||
locked_memory_ptr.flush(offset, len);
|
||||
}
|
||||
|
||||
std::pair<u32, u32> get_confirmed_range() const
|
||||
|
|
|
@ -97,7 +97,7 @@ namespace rsx
|
|||
}
|
||||
|
||||
template <typename T = void>
|
||||
T* get(u32 offset = 0)
|
||||
T* get(u32 offset = 0, bool no_sync = false)
|
||||
{
|
||||
if (contiguous)
|
||||
{
|
||||
|
@ -105,7 +105,7 @@ namespace rsx
|
|||
}
|
||||
else
|
||||
{
|
||||
if (!synchronized)
|
||||
if (!synchronized && !no_sync)
|
||||
sync();
|
||||
|
||||
return (T*)(io_cache.data() + offset);
|
||||
|
@ -127,16 +127,70 @@ namespace rsx
|
|||
synchronized = true;
|
||||
}
|
||||
|
||||
void flush() const
|
||||
void flush(u32 offset = 0, u32 len = 0) const
|
||||
{
|
||||
if (contiguous)
|
||||
return;
|
||||
|
||||
u8* src = (u8*)io_cache.data();
|
||||
for (const auto &block : _blocks)
|
||||
|
||||
if (!offset && (!len || len == io_cache.size()))
|
||||
{
|
||||
memcpy(block.first.get(), src, block.second);
|
||||
src += block.second;
|
||||
for (const auto &block : _blocks)
|
||||
{
|
||||
memcpy(block.first.get(), src, block.second);
|
||||
src += block.second;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto remaining_bytes = len? len : io_cache.size() - offset;
|
||||
const auto write_end = remaining_bytes + offset;
|
||||
|
||||
u32 write_offset;
|
||||
u32 write_length;
|
||||
u32 base_offset = 0;
|
||||
|
||||
for (const auto &block : _blocks)
|
||||
{
|
||||
const u32 block_end = base_offset + block.second;
|
||||
|
||||
if (offset >= base_offset && offset < block_end)
|
||||
{
|
||||
// Head
|
||||
write_offset = (offset - base_offset);
|
||||
write_length = std::min<u32>(block.second - write_offset, remaining_bytes);
|
||||
}
|
||||
else if (base_offset > offset && block_end <= write_end)
|
||||
{
|
||||
// Completely spanned
|
||||
write_offset = 0;
|
||||
write_length = block.second;
|
||||
}
|
||||
else if (base_offset > offset && write_end < block_end)
|
||||
{
|
||||
// Tail
|
||||
write_offset = 0;
|
||||
write_length = remaining_bytes;
|
||||
}
|
||||
else
|
||||
{
|
||||
// No overlap; skip
|
||||
write_length = 0;
|
||||
}
|
||||
|
||||
if (write_length)
|
||||
{
|
||||
memcpy(block.first.get() + write_offset, src + (base_offset + write_offset), write_length);
|
||||
|
||||
verify(HERE), write_length <= remaining_bytes;
|
||||
remaining_bytes -= write_length;
|
||||
if (!remaining_bytes)
|
||||
break;
|
||||
}
|
||||
|
||||
base_offset += block.second;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue