Optimized cached write-through

- Allows grabbing an unsynchronized memory block if overwriting contents
anyway
- Allows flushing only specified range of memory
This commit is contained in:
kd-11 2018-05-21 10:58:49 +03:00 committed by kd-11
parent f8d999b384
commit d2bf04796f
4 changed files with 72 additions and 17 deletions

View file

@ -478,7 +478,7 @@ namespace gl
flushed = true;
const auto valid_range = get_confirmed_range();
void *dst = get_raw_ptr(valid_range.first);
void *dst = get_raw_ptr(valid_range.first, true);
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
void *src = glMapBufferRange(GL_PIXEL_PACK_BUFFER, valid_range.first, valid_range.second, GL_MAP_READ_BIT);
@ -559,7 +559,7 @@ namespace gl
}
}
flush_io();
flush_io(valid_range.first, valid_range.second);
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);

View file

@ -245,7 +245,7 @@ namespace vk
const auto valid_range = get_confirmed_range();
void* pixels_src = dma_buffer->map(valid_range.first, valid_range.second);
void* pixels_dst = get_raw_ptr(valid_range.first);
void* pixels_dst = get_raw_ptr(valid_range.first, true);
const auto texel_layout = vk::get_format_element_size(vram_texture->info.format);
const auto elem_size = texel_layout.first;
@ -323,7 +323,7 @@ namespace vk
}
}
flush_io();
flush_io(valid_range.first, valid_range.second);
dma_buffer->unmap();
reset_write_statistics();

View file

@ -38,13 +38,14 @@ namespace rsx
if (locked_memory_ptr)
{
const u32 valid_limit = (confirmed_range.second) ? confirmed_range.first + confirmed_range.second : cpu_address_range;
u32* first = locked_memory_ptr.get<u32>(confirmed_range.first);
u32* last = locked_memory_ptr.get<u32>(valid_limit - 4);
u32* first = locked_memory_ptr.get<u32>(confirmed_range.first, true);
u32* last = locked_memory_ptr.get<u32>(valid_limit - 4, true);
*first = cpu_address_base + confirmed_range.first;
*last = cpu_address_base + valid_limit - 4;
locked_memory_ptr.flush();
locked_memory_ptr.flush(confirmed_range.first, 4);
locked_memory_ptr.flush(valid_limit - 4, 4);
}
}
@ -321,10 +322,10 @@ namespace rsx
}
template <typename T = void>
T* get_raw_ptr(u32 offset = 0)
T* get_raw_ptr(u32 offset = 0, bool no_sync = false)
{
verify(HERE), locked_memory_ptr;
return locked_memory_ptr.get<T>(offset);
return locked_memory_ptr.get<T>(offset, no_sync);
}
bool test_memory_head()
@ -350,9 +351,9 @@ namespace rsx
return (*last == (cpu_address_base + valid_limit - 4));
}
void flush_io() const
void flush_io(u32 offset = 0, u32 len = 0) const
{
locked_memory_ptr.flush();
locked_memory_ptr.flush(offset, len);
}
std::pair<u32, u32> get_confirmed_range() const

View file

@ -97,7 +97,7 @@ namespace rsx
}
template <typename T = void>
T* get(u32 offset = 0)
T* get(u32 offset = 0, bool no_sync = false)
{
if (contiguous)
{
@ -105,7 +105,7 @@ namespace rsx
}
else
{
if (!synchronized)
if (!synchronized && !no_sync)
sync();
return (T*)(io_cache.data() + offset);
@ -127,16 +127,70 @@ namespace rsx
synchronized = true;
}
void flush() const
void flush(u32 offset = 0, u32 len = 0) const
{
if (contiguous)
return;
u8* src = (u8*)io_cache.data();
for (const auto &block : _blocks)
if (!offset && (!len || len == io_cache.size()))
{
memcpy(block.first.get(), src, block.second);
src += block.second;
for (const auto &block : _blocks)
{
memcpy(block.first.get(), src, block.second);
src += block.second;
}
}
else
{
auto remaining_bytes = len? len : io_cache.size() - offset;
const auto write_end = remaining_bytes + offset;
u32 write_offset;
u32 write_length;
u32 base_offset = 0;
for (const auto &block : _blocks)
{
const u32 block_end = base_offset + block.second;
if (offset >= base_offset && offset < block_end)
{
// Head
write_offset = (offset - base_offset);
write_length = std::min<u32>(block.second - write_offset, remaining_bytes);
}
else if (base_offset > offset && block_end <= write_end)
{
// Completely spanned
write_offset = 0;
write_length = block.second;
}
else if (base_offset > offset && write_end < block_end)
{
// Tail
write_offset = 0;
write_length = remaining_bytes;
}
else
{
// No overlap; skip
write_length = 0;
}
if (write_length)
{
memcpy(block.first.get() + write_offset, src + (base_offset + write_offset), write_length);
verify(HERE), write_length <= remaining_bytes;
remaining_bytes -= write_length;
if (!remaining_bytes)
break;
}
base_offset += block.second;
}
}
}