rsx: Handle transfer_read differently from transfer_write

- Transfer writes are expected to clobber surface cache contents. Do NOT reload from CPU memory for writes.
- TODO: During transfer write to surface cache objects, lock memory if it was unlocked to avoid silly problems.
This commit is contained in:
kd-11 2021-05-08 19:08:32 +03:00 committed by kd-11
parent b085284c55
commit e3944bc67f
12 changed files with 106 additions and 38 deletions

View file

@ -36,11 +36,59 @@ namespace rsx
bytes = 2
};
enum surface_access : u32
class surface_access // This is simply a modified enum class
{
read = 0,
write = 1,
transfer = 2
public:
// Publicly visible enumerators
enum
{
shader_read = 0,
shader_write = 1,
transfer_read = 2,
transfer_write = 4,
};
private:
// Meta
enum
{
all_writes = (shader_write | transfer_write),
all_reads = (shader_read | transfer_read),
all_transfer = (transfer_read | transfer_write)
};
u32 value_;
public:
// Ctor
surface_access(u32 value) : value_(value)
{}
// Quick helpers
inline bool is_read() const
{
return !(value_ & ~all_reads);
}
inline bool is_write() const
{
return !(value_ & ~all_writes);
}
inline bool is_transfer() const
{
return !(value_ & ~all_transfer);
}
bool operator == (const surface_access& other) const
{
return value_ == other.value_;
}
bool operator == (u32 other) const
{
return value_ == other;
}
};
// Defines how the underlying PS3-visible memory backed by a texture is accessed

View file

@ -837,8 +837,11 @@ namespace rsx
continue;
auto surface = tex_info.second.get();
if (access == rsx::surface_access::transfer && surface->write_through())
if (access.is_transfer() && access.is_read() && surface->write_through())
{
// The surface has no data other than what can be loaded from CPU
continue;
}
if (!rsx::pitch_compatible(surface, required_pitch, required_height))
continue;
@ -1128,7 +1131,7 @@ namespace rsx
if (surface->dirty())
{
// Force memory barrier to release some resources
surface->memory_barrier(cmd, rsx::surface_access::read);
surface->memory_barrier(cmd, rsx::surface_access::shader_read);
}
else if (!surface->test())
{

View file

@ -623,7 +623,7 @@ namespace rsx
if (spp == 1 || sample_layout == rsx::surface_sample_layout::ps3)
return;
ensure(access_type != rsx::surface_access::write);
ensure(access_type.is_read() || access_type.is_transfer());
transform_samples_to_pixels(region);
}
};

View file

@ -1630,7 +1630,7 @@ namespace rsx
if (options.prefer_surface_cache)
{
const u16 block_h = (attr.depth * attr.slice_h);
overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::read);
overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read);
if (!overlapping_fbos.empty())
{
@ -1695,7 +1695,7 @@ namespace rsx
{
// Now check for surface cache hits
const u16 block_h = (attr.depth * attr.slice_h);
overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::read);
overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read);
}
if (!overlapping_fbos.empty() || !overlapping_locals.empty())
@ -2171,9 +2171,9 @@ namespace rsx
src_address += (src.width - src_w) * src_bpp;
}
auto rtt_lookup = [&m_rtts, &cmd, &scale_x, &scale_y, this](u32 address, u32 width, u32 height, u32 pitch, u8 bpp, bool allow_clipped) -> typename surface_store_type::surface_overlap_info
auto rtt_lookup = [&m_rtts, &cmd, &scale_x, &scale_y, this](u32 address, u32 width, u32 height, u32 pitch, u8 bpp, rsx::flags32_t access, bool allow_clipped) -> typename surface_store_type::surface_overlap_info
{
const auto list = m_rtts.get_merged_texture_memory_region(cmd, address, width, height, pitch, bpp, rsx::surface_access::transfer);
const auto list = m_rtts.get_merged_texture_memory_region(cmd, address, width, height, pitch, bpp, access);
if (list.empty())
{
return {};
@ -2256,11 +2256,18 @@ namespace rsx
// Check if src/dst are parts of render targets
typename surface_store_type::surface_overlap_info dst_subres;
bool use_null_region = false;
// TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
// NOTE: Grab the src first as requirements for reading are more strict than requirements for writing
auto src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, src_bpp, surface_access::transfer_read, false);
src_is_render_target = src_subres.surface != nullptr;
if (get_location(dst_address) == CELL_GCM_LOCATION_LOCAL)
{
// TODO: HACK
// After writing, it is required to lock the memory range from access!
dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, dst_bpp, false);
dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, dst_bpp, surface_access::transfer_write, false);
dst_is_render_target = dst_subres.surface != nullptr;
}
else
@ -2272,10 +2279,6 @@ namespace rsx
m_rtts.invalidate_range(utils::address_range::start_length(dst_address, dst.pitch* dst_h));
}
// TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
auto src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, src_bpp, false);
src_is_render_target = src_subres.surface != nullptr;
if (src_is_render_target)
{
const auto surf = src_subres.surface;
@ -2543,7 +2546,7 @@ namespace rsx
// Destination dimensions are relaxed (true)
dst_area = dst_subres.src_area;
dest_texture = dst_subres.surface->get_surface(rsx::surface_access::transfer);
dest_texture = dst_subres.surface->get_surface(rsx::surface_access::transfer_write);
typeless_info.dst_context = texture_upload_context::framebuffer_storage;
dst_is_depth_surface = typeless_info.dst_is_typeless ? false : dst_subres.is_depth;
@ -2692,7 +2695,7 @@ namespace rsx
else
{
src_area = src_subres.src_area;
vram_texture = src_subres.surface->get_surface(rsx::surface_access::read);
vram_texture = src_subres.surface->get_surface(rsx::surface_access::transfer_read);
typeless_info.src_context = texture_upload_context::framebuffer_storage;
}
@ -2879,7 +2882,7 @@ namespace rsx
std::tie(src_area.x2, src_area.y2) = rsx::apply_resolution_scale<true>(src_area.x2, src_area.y2, surface_width, surface_height);
// The resource is of surface type; possibly disabled AA emulation
src_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer, src_area);
src_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer_read, src_area);
}
if (dst_is_render_target)
@ -2890,7 +2893,7 @@ namespace rsx
std::tie(dst_area.x2, dst_area.y2) = rsx::apply_resolution_scale<true>(dst_area.x2, dst_area.y2, surface_width, surface_height);
// The resource is of surface type; possibly disabled AA emulation
dst_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer, dst_area);
dst_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer_write, dst_area);
}
if (helpers::is_gcm_depth_format(typeless_info.src_gcm_format) !=

View file

@ -317,7 +317,7 @@ namespace rsx
out.push_back
({
section.surface->get_surface(rsx::surface_access::read),
section.surface->get_surface(rsx::surface_access::shader_read),
surface_transform::identity,
0,
static_cast<u16>(src_x),
@ -558,7 +558,7 @@ namespace rsx
const auto format_class = (force_convert) ? classify_format(attr2.gcm_format) : texptr->format_class();
const auto command = surface_is_rop_target ? deferred_request_command::copy_image_dynamic : deferred_request_command::copy_image_static;
return { texptr->get_surface(rsx::surface_access::read), command, attr2, {},
return { texptr->get_surface(rsx::surface_access::shader_read), command, attr2, {},
texture_upload_context::framebuffer_storage, format_class, scale,
extended_dimension, decoded_remap };
}
@ -569,7 +569,7 @@ namespace rsx
if (extended_dimension == rsx::texture_dimension_extended::texture_dimension_3d)
{
return{ texptr->get_surface(rsx::surface_access::read), deferred_request_command::_3d_unwrap,
return{ texptr->get_surface(rsx::surface_access::shader_read), deferred_request_command::_3d_unwrap,
attr2, {},
texture_upload_context::framebuffer_storage, texptr->format_class(), scale,
rsx::texture_dimension_extended::texture_dimension_3d, decoded_remap };
@ -577,7 +577,7 @@ namespace rsx
ensure(extended_dimension == rsx::texture_dimension_extended::texture_dimension_cubemap);
return{ texptr->get_surface(rsx::surface_access::read), deferred_request_command::cubemap_unwrap,
return{ texptr->get_surface(rsx::surface_access::shader_read), deferred_request_command::cubemap_unwrap,
attr2, {},
texture_upload_context::framebuffer_storage, texptr->format_class(), scale,
rsx::texture_dimension_extended::texture_dimension_cubemap, decoded_remap };

View file

@ -12,7 +12,7 @@ gl::texture* GLGSRender::get_present_source(gl::present_surface_info* info, cons
gl::command_context cmd = { gl_state };
const auto format_bpp = rsx::get_format_block_size_in_bytes(info->format);
const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd,
info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::read);
info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::shader_read);
if (!overlap_info.empty())
{
@ -46,7 +46,7 @@ gl::texture* GLGSRender::get_present_source(gl::present_surface_info* info, cons
if (viable)
{
surface->read_barrier(cmd);
image = section.surface->get_surface(rsx::surface_access::read);
image = section.surface->get_surface(rsx::surface_access::shader_read);
std::tie(info->width, info->height) = rsx::apply_resolution_scale<true>(
std::min(surface_width, static_cast<u16>(info->width)),

View file

@ -456,7 +456,21 @@ void gl::render_target::initialize_memory(gl::command_context& cmd, bool /*read_
void gl::render_target::memory_barrier(gl::command_context& cmd, rsx::surface_access access)
{
const bool read_access = (access != rsx::surface_access::write);
const bool read_access = access.is_read();
const bool is_depth = is_depth_surface();
const bool should_read_buffers = is_depth ? !!g_cfg.video.read_depth_buffer : !!g_cfg.video.read_color_buffers;
if (should_read_buffers)
{
// TODO: Decide what to do when memory loads are disabled but the underlying has memory changed
// NOTE: Assume test() is expensive when in a pinch
if (last_use_tag && state_flags == rsx::surface_state_flags::ready && !test())
{
// TODO: Figure out why merely returning and failing the test does not work when reading (TLoU)
// The result should have been the same either way
state_flags |= rsx::surface_state_flags::erase_bkgnd;
}
}
if (old_contents.empty())
{

View file

@ -111,8 +111,8 @@ namespace gl
}
void memory_barrier(gl::command_context& cmd, rsx::surface_access access);
void read_barrier(gl::command_context& cmd) { memory_barrier(cmd, rsx::surface_access::read); }
void write_barrier(gl::command_context& cmd) { memory_barrier(cmd, rsx::surface_access::write); }
void read_barrier(gl::command_context& cmd) { memory_barrier(cmd, rsx::surface_access::shader_read); }
void write_barrier(gl::command_context& cmd) { memory_barrier(cmd, rsx::surface_access::shader_write); }
};
struct framebuffer_holder : public gl::fbo, public rsx::ref_counted

View file

@ -281,7 +281,7 @@ vk::image* VKGSRender::get_present_source(vk::present_surface_info* info, const
// Check the surface store first
const auto format_bpp = rsx::get_format_block_size_in_bytes(info->format);
const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer,
info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::read);
info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::shader_read);
if (!overlap_info.empty())
{
@ -315,7 +315,7 @@ vk::image* VKGSRender::get_present_source(vk::present_surface_info* info, const
if (viable)
{
surface->read_barrier(*m_current_command_buffer);
image_to_flip = section.surface->get_surface(rsx::surface_access::read);
image_to_flip = section.surface->get_surface(rsx::surface_access::shader_read);
std::tie(info->width, info->height) = rsx::apply_resolution_scale<true>(
std::min(surface_width, static_cast<u16>(info->width)),

View file

@ -304,7 +304,7 @@ namespace vk
vk::viewable_image* render_target::get_surface(rsx::surface_access access_type)
{
if (samples() == 1 || access_type == rsx::surface_access::write)
if (samples() == 1 || access_type == rsx::surface_access::shader_write)
{
return this;
}
@ -369,7 +369,7 @@ namespace vk
void render_target::memory_barrier(vk::command_buffer& cmd, rsx::surface_access access)
{
const bool read_access = (access != rsx::surface_access::write);
const bool read_access = access.is_read();
const bool is_depth = is_depth_surface();
const bool should_read_buffers = is_depth ? !!g_cfg.video.read_depth_buffer : !!g_cfg.video.read_color_buffers;
@ -533,8 +533,8 @@ namespace vk
hw_blitter.scale_image(
cmd,
src_texture->get_surface(rsx::surface_access::read),
this->get_surface(rsx::surface_access::transfer),
src_texture->get_surface(rsx::surface_access::transfer_read),
this->get_surface(rsx::surface_access::transfer_write),
src_area,
dst_area,
/*linear?*/false, typeless_info);

View file

@ -53,8 +53,8 @@ namespace vk
// Synchronization
void texture_barrier(vk::command_buffer& cmd);
void memory_barrier(vk::command_buffer& cmd, rsx::surface_access access);
void read_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::read); }
void write_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::write); }
void read_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::shader_read); }
void write_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::shader_write); }
};
static inline vk::render_target* as_rtt(vk::image* t)

View file

@ -197,7 +197,7 @@ namespace vk
{
auto surface = vk::as_rtt(vram_texture);
surface->read_barrier(cmd);
locked_resource = surface->get_surface(rsx::surface_access::read);
locked_resource = surface->get_surface(rsx::surface_access::shader_read);
transfer_width *= surface->samples_x;
transfer_height *= surface->samples_y;
}