mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 19:45:20 +00:00
rsx: Handle transfer_read differently from transfer_write
- Transfer writes are expected to clobber surface cache contents. Do NOT reload from CPU memory for writes. - TODO: During transfer write to surface cache objects, lock memory if it was unlocked to avoid silly problems.
This commit is contained in:
parent
b085284c55
commit
e3944bc67f
12 changed files with 106 additions and 38 deletions
|
@ -36,11 +36,59 @@ namespace rsx
|
|||
bytes = 2
|
||||
};
|
||||
|
||||
enum surface_access : u32
|
||||
class surface_access // This is simply a modified enum class
|
||||
{
|
||||
read = 0,
|
||||
write = 1,
|
||||
transfer = 2
|
||||
public:
|
||||
// Publicly visible enumerators
|
||||
enum
|
||||
{
|
||||
shader_read = 0,
|
||||
shader_write = 1,
|
||||
transfer_read = 2,
|
||||
transfer_write = 4,
|
||||
};
|
||||
|
||||
private:
|
||||
// Meta
|
||||
enum
|
||||
{
|
||||
all_writes = (shader_write | transfer_write),
|
||||
all_reads = (shader_read | transfer_read),
|
||||
all_transfer = (transfer_read | transfer_write)
|
||||
};
|
||||
|
||||
u32 value_;
|
||||
|
||||
public:
|
||||
// Ctor
|
||||
surface_access(u32 value) : value_(value)
|
||||
{}
|
||||
|
||||
// Quick helpers
|
||||
inline bool is_read() const
|
||||
{
|
||||
return !(value_ & ~all_reads);
|
||||
}
|
||||
|
||||
inline bool is_write() const
|
||||
{
|
||||
return !(value_ & ~all_writes);
|
||||
}
|
||||
|
||||
inline bool is_transfer() const
|
||||
{
|
||||
return !(value_ & ~all_transfer);
|
||||
}
|
||||
|
||||
bool operator == (const surface_access& other) const
|
||||
{
|
||||
return value_ == other.value_;
|
||||
}
|
||||
|
||||
bool operator == (u32 other) const
|
||||
{
|
||||
return value_ == other;
|
||||
}
|
||||
};
|
||||
|
||||
// Defines how the underlying PS3-visible memory backed by a texture is accessed
|
||||
|
|
|
@ -837,8 +837,11 @@ namespace rsx
|
|||
continue;
|
||||
|
||||
auto surface = tex_info.second.get();
|
||||
if (access == rsx::surface_access::transfer && surface->write_through())
|
||||
if (access.is_transfer() && access.is_read() && surface->write_through())
|
||||
{
|
||||
// The surface has no data other than what can be loaded from CPU
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!rsx::pitch_compatible(surface, required_pitch, required_height))
|
||||
continue;
|
||||
|
@ -1128,7 +1131,7 @@ namespace rsx
|
|||
if (surface->dirty())
|
||||
{
|
||||
// Force memory barrier to release some resources
|
||||
surface->memory_barrier(cmd, rsx::surface_access::read);
|
||||
surface->memory_barrier(cmd, rsx::surface_access::shader_read);
|
||||
}
|
||||
else if (!surface->test())
|
||||
{
|
||||
|
|
|
@ -623,7 +623,7 @@ namespace rsx
|
|||
if (spp == 1 || sample_layout == rsx::surface_sample_layout::ps3)
|
||||
return;
|
||||
|
||||
ensure(access_type != rsx::surface_access::write);
|
||||
ensure(access_type.is_read() || access_type.is_transfer());
|
||||
transform_samples_to_pixels(region);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -1630,7 +1630,7 @@ namespace rsx
|
|||
if (options.prefer_surface_cache)
|
||||
{
|
||||
const u16 block_h = (attr.depth * attr.slice_h);
|
||||
overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::read);
|
||||
overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read);
|
||||
|
||||
if (!overlapping_fbos.empty())
|
||||
{
|
||||
|
@ -1695,7 +1695,7 @@ namespace rsx
|
|||
{
|
||||
// Now check for surface cache hits
|
||||
const u16 block_h = (attr.depth * attr.slice_h);
|
||||
overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::read);
|
||||
overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read);
|
||||
}
|
||||
|
||||
if (!overlapping_fbos.empty() || !overlapping_locals.empty())
|
||||
|
@ -2171,9 +2171,9 @@ namespace rsx
|
|||
src_address += (src.width - src_w) * src_bpp;
|
||||
}
|
||||
|
||||
auto rtt_lookup = [&m_rtts, &cmd, &scale_x, &scale_y, this](u32 address, u32 width, u32 height, u32 pitch, u8 bpp, bool allow_clipped) -> typename surface_store_type::surface_overlap_info
|
||||
auto rtt_lookup = [&m_rtts, &cmd, &scale_x, &scale_y, this](u32 address, u32 width, u32 height, u32 pitch, u8 bpp, rsx::flags32_t access, bool allow_clipped) -> typename surface_store_type::surface_overlap_info
|
||||
{
|
||||
const auto list = m_rtts.get_merged_texture_memory_region(cmd, address, width, height, pitch, bpp, rsx::surface_access::transfer);
|
||||
const auto list = m_rtts.get_merged_texture_memory_region(cmd, address, width, height, pitch, bpp, access);
|
||||
if (list.empty())
|
||||
{
|
||||
return {};
|
||||
|
@ -2256,11 +2256,18 @@ namespace rsx
|
|||
// Check if src/dst are parts of render targets
|
||||
typename surface_store_type::surface_overlap_info dst_subres;
|
||||
bool use_null_region = false;
|
||||
|
||||
// TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
|
||||
// NOTE: Grab the src first as requirements for reading are more strict than requirements for writing
|
||||
auto src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, src_bpp, surface_access::transfer_read, false);
|
||||
src_is_render_target = src_subres.surface != nullptr;
|
||||
|
||||
|
||||
if (get_location(dst_address) == CELL_GCM_LOCATION_LOCAL)
|
||||
{
|
||||
// TODO: HACK
|
||||
// After writing, it is required to lock the memory range from access!
|
||||
dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, dst_bpp, false);
|
||||
dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, dst_bpp, surface_access::transfer_write, false);
|
||||
dst_is_render_target = dst_subres.surface != nullptr;
|
||||
}
|
||||
else
|
||||
|
@ -2272,10 +2279,6 @@ namespace rsx
|
|||
m_rtts.invalidate_range(utils::address_range::start_length(dst_address, dst.pitch* dst_h));
|
||||
}
|
||||
|
||||
// TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
|
||||
auto src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, src_bpp, false);
|
||||
src_is_render_target = src_subres.surface != nullptr;
|
||||
|
||||
if (src_is_render_target)
|
||||
{
|
||||
const auto surf = src_subres.surface;
|
||||
|
@ -2543,7 +2546,7 @@ namespace rsx
|
|||
// Destination dimensions are relaxed (true)
|
||||
dst_area = dst_subres.src_area;
|
||||
|
||||
dest_texture = dst_subres.surface->get_surface(rsx::surface_access::transfer);
|
||||
dest_texture = dst_subres.surface->get_surface(rsx::surface_access::transfer_write);
|
||||
typeless_info.dst_context = texture_upload_context::framebuffer_storage;
|
||||
dst_is_depth_surface = typeless_info.dst_is_typeless ? false : dst_subres.is_depth;
|
||||
|
||||
|
@ -2692,7 +2695,7 @@ namespace rsx
|
|||
else
|
||||
{
|
||||
src_area = src_subres.src_area;
|
||||
vram_texture = src_subres.surface->get_surface(rsx::surface_access::read);
|
||||
vram_texture = src_subres.surface->get_surface(rsx::surface_access::transfer_read);
|
||||
typeless_info.src_context = texture_upload_context::framebuffer_storage;
|
||||
}
|
||||
|
||||
|
@ -2879,7 +2882,7 @@ namespace rsx
|
|||
std::tie(src_area.x2, src_area.y2) = rsx::apply_resolution_scale<true>(src_area.x2, src_area.y2, surface_width, surface_height);
|
||||
|
||||
// The resource is of surface type; possibly disabled AA emulation
|
||||
src_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer, src_area);
|
||||
src_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer_read, src_area);
|
||||
}
|
||||
|
||||
if (dst_is_render_target)
|
||||
|
@ -2890,7 +2893,7 @@ namespace rsx
|
|||
std::tie(dst_area.x2, dst_area.y2) = rsx::apply_resolution_scale<true>(dst_area.x2, dst_area.y2, surface_width, surface_height);
|
||||
|
||||
// The resource is of surface type; possibly disabled AA emulation
|
||||
dst_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer, dst_area);
|
||||
dst_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer_write, dst_area);
|
||||
}
|
||||
|
||||
if (helpers::is_gcm_depth_format(typeless_info.src_gcm_format) !=
|
||||
|
|
|
@ -317,7 +317,7 @@ namespace rsx
|
|||
|
||||
out.push_back
|
||||
({
|
||||
section.surface->get_surface(rsx::surface_access::read),
|
||||
section.surface->get_surface(rsx::surface_access::shader_read),
|
||||
surface_transform::identity,
|
||||
0,
|
||||
static_cast<u16>(src_x),
|
||||
|
@ -558,7 +558,7 @@ namespace rsx
|
|||
const auto format_class = (force_convert) ? classify_format(attr2.gcm_format) : texptr->format_class();
|
||||
const auto command = surface_is_rop_target ? deferred_request_command::copy_image_dynamic : deferred_request_command::copy_image_static;
|
||||
|
||||
return { texptr->get_surface(rsx::surface_access::read), command, attr2, {},
|
||||
return { texptr->get_surface(rsx::surface_access::shader_read), command, attr2, {},
|
||||
texture_upload_context::framebuffer_storage, format_class, scale,
|
||||
extended_dimension, decoded_remap };
|
||||
}
|
||||
|
@ -569,7 +569,7 @@ namespace rsx
|
|||
|
||||
if (extended_dimension == rsx::texture_dimension_extended::texture_dimension_3d)
|
||||
{
|
||||
return{ texptr->get_surface(rsx::surface_access::read), deferred_request_command::_3d_unwrap,
|
||||
return{ texptr->get_surface(rsx::surface_access::shader_read), deferred_request_command::_3d_unwrap,
|
||||
attr2, {},
|
||||
texture_upload_context::framebuffer_storage, texptr->format_class(), scale,
|
||||
rsx::texture_dimension_extended::texture_dimension_3d, decoded_remap };
|
||||
|
@ -577,7 +577,7 @@ namespace rsx
|
|||
|
||||
ensure(extended_dimension == rsx::texture_dimension_extended::texture_dimension_cubemap);
|
||||
|
||||
return{ texptr->get_surface(rsx::surface_access::read), deferred_request_command::cubemap_unwrap,
|
||||
return{ texptr->get_surface(rsx::surface_access::shader_read), deferred_request_command::cubemap_unwrap,
|
||||
attr2, {},
|
||||
texture_upload_context::framebuffer_storage, texptr->format_class(), scale,
|
||||
rsx::texture_dimension_extended::texture_dimension_cubemap, decoded_remap };
|
||||
|
|
|
@ -12,7 +12,7 @@ gl::texture* GLGSRender::get_present_source(gl::present_surface_info* info, cons
|
|||
gl::command_context cmd = { gl_state };
|
||||
const auto format_bpp = rsx::get_format_block_size_in_bytes(info->format);
|
||||
const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd,
|
||||
info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::read);
|
||||
info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::shader_read);
|
||||
|
||||
if (!overlap_info.empty())
|
||||
{
|
||||
|
@ -46,7 +46,7 @@ gl::texture* GLGSRender::get_present_source(gl::present_surface_info* info, cons
|
|||
if (viable)
|
||||
{
|
||||
surface->read_barrier(cmd);
|
||||
image = section.surface->get_surface(rsx::surface_access::read);
|
||||
image = section.surface->get_surface(rsx::surface_access::shader_read);
|
||||
|
||||
std::tie(info->width, info->height) = rsx::apply_resolution_scale<true>(
|
||||
std::min(surface_width, static_cast<u16>(info->width)),
|
||||
|
|
|
@ -456,7 +456,21 @@ void gl::render_target::initialize_memory(gl::command_context& cmd, bool /*read_
|
|||
|
||||
void gl::render_target::memory_barrier(gl::command_context& cmd, rsx::surface_access access)
|
||||
{
|
||||
const bool read_access = (access != rsx::surface_access::write);
|
||||
const bool read_access = access.is_read();
|
||||
const bool is_depth = is_depth_surface();
|
||||
const bool should_read_buffers = is_depth ? !!g_cfg.video.read_depth_buffer : !!g_cfg.video.read_color_buffers;
|
||||
|
||||
if (should_read_buffers)
|
||||
{
|
||||
// TODO: Decide what to do when memory loads are disabled but the underlying has memory changed
|
||||
// NOTE: Assume test() is expensive when in a pinch
|
||||
if (last_use_tag && state_flags == rsx::surface_state_flags::ready && !test())
|
||||
{
|
||||
// TODO: Figure out why merely returning and failing the test does not work when reading (TLoU)
|
||||
// The result should have been the same either way
|
||||
state_flags |= rsx::surface_state_flags::erase_bkgnd;
|
||||
}
|
||||
}
|
||||
|
||||
if (old_contents.empty())
|
||||
{
|
||||
|
|
|
@ -111,8 +111,8 @@ namespace gl
|
|||
}
|
||||
|
||||
void memory_barrier(gl::command_context& cmd, rsx::surface_access access);
|
||||
void read_barrier(gl::command_context& cmd) { memory_barrier(cmd, rsx::surface_access::read); }
|
||||
void write_barrier(gl::command_context& cmd) { memory_barrier(cmd, rsx::surface_access::write); }
|
||||
void read_barrier(gl::command_context& cmd) { memory_barrier(cmd, rsx::surface_access::shader_read); }
|
||||
void write_barrier(gl::command_context& cmd) { memory_barrier(cmd, rsx::surface_access::shader_write); }
|
||||
};
|
||||
|
||||
struct framebuffer_holder : public gl::fbo, public rsx::ref_counted
|
||||
|
|
|
@ -281,7 +281,7 @@ vk::image* VKGSRender::get_present_source(vk::present_surface_info* info, const
|
|||
// Check the surface store first
|
||||
const auto format_bpp = rsx::get_format_block_size_in_bytes(info->format);
|
||||
const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer,
|
||||
info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::read);
|
||||
info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::shader_read);
|
||||
|
||||
if (!overlap_info.empty())
|
||||
{
|
||||
|
@ -315,7 +315,7 @@ vk::image* VKGSRender::get_present_source(vk::present_surface_info* info, const
|
|||
if (viable)
|
||||
{
|
||||
surface->read_barrier(*m_current_command_buffer);
|
||||
image_to_flip = section.surface->get_surface(rsx::surface_access::read);
|
||||
image_to_flip = section.surface->get_surface(rsx::surface_access::shader_read);
|
||||
|
||||
std::tie(info->width, info->height) = rsx::apply_resolution_scale<true>(
|
||||
std::min(surface_width, static_cast<u16>(info->width)),
|
||||
|
|
|
@ -304,7 +304,7 @@ namespace vk
|
|||
|
||||
vk::viewable_image* render_target::get_surface(rsx::surface_access access_type)
|
||||
{
|
||||
if (samples() == 1 || access_type == rsx::surface_access::write)
|
||||
if (samples() == 1 || access_type == rsx::surface_access::shader_write)
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
@ -369,7 +369,7 @@ namespace vk
|
|||
|
||||
void render_target::memory_barrier(vk::command_buffer& cmd, rsx::surface_access access)
|
||||
{
|
||||
const bool read_access = (access != rsx::surface_access::write);
|
||||
const bool read_access = access.is_read();
|
||||
const bool is_depth = is_depth_surface();
|
||||
const bool should_read_buffers = is_depth ? !!g_cfg.video.read_depth_buffer : !!g_cfg.video.read_color_buffers;
|
||||
|
||||
|
@ -533,8 +533,8 @@ namespace vk
|
|||
|
||||
hw_blitter.scale_image(
|
||||
cmd,
|
||||
src_texture->get_surface(rsx::surface_access::read),
|
||||
this->get_surface(rsx::surface_access::transfer),
|
||||
src_texture->get_surface(rsx::surface_access::transfer_read),
|
||||
this->get_surface(rsx::surface_access::transfer_write),
|
||||
src_area,
|
||||
dst_area,
|
||||
/*linear?*/false, typeless_info);
|
||||
|
|
|
@ -53,8 +53,8 @@ namespace vk
|
|||
// Synchronization
|
||||
void texture_barrier(vk::command_buffer& cmd);
|
||||
void memory_barrier(vk::command_buffer& cmd, rsx::surface_access access);
|
||||
void read_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::read); }
|
||||
void write_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::write); }
|
||||
void read_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::shader_read); }
|
||||
void write_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::shader_write); }
|
||||
};
|
||||
|
||||
static inline vk::render_target* as_rtt(vk::image* t)
|
||||
|
|
|
@ -197,7 +197,7 @@ namespace vk
|
|||
{
|
||||
auto surface = vk::as_rtt(vram_texture);
|
||||
surface->read_barrier(cmd);
|
||||
locked_resource = surface->get_surface(rsx::surface_access::read);
|
||||
locked_resource = surface->get_surface(rsx::surface_access::shader_read);
|
||||
transfer_width *= surface->samples_x;
|
||||
transfer_height *= surface->samples_y;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue