mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-21 03:55:32 +00:00
rsx: Experiments with nul sink
This commit is contained in:
parent
212ac19c11
commit
858014b718
12 changed files with 420 additions and 301 deletions
|
@ -12,7 +12,8 @@ namespace rsx
|
|||
shader_read = 1,
|
||||
blit_engine_src = 2,
|
||||
blit_engine_dst = 4,
|
||||
framebuffer_storage = 8
|
||||
framebuffer_storage = 8,
|
||||
dma = 16
|
||||
};
|
||||
|
||||
enum texture_colorspace : u32
|
||||
|
|
|
@ -775,6 +775,9 @@ namespace rsx
|
|||
continue;
|
||||
|
||||
auto surface = tex_info.second.get();
|
||||
if (access == rsx::surface_access::transfer && surface->write_through())
|
||||
continue;
|
||||
|
||||
if (!rsx::pitch_compatible(surface, required_pitch, required_height))
|
||||
continue;
|
||||
|
||||
|
|
|
@ -309,6 +309,11 @@ namespace rsx
|
|||
return (state_flags != rsx::surface_state_flags::ready) || !old_contents.empty();
|
||||
}
|
||||
|
||||
bool write_through() const
|
||||
{
|
||||
return (state_flags & rsx::surface_state_flags::erase_bkgnd) && old_contents.empty();
|
||||
}
|
||||
|
||||
#if (ENABLE_SURFACE_CACHE_DEBUG)
|
||||
u64 hash_block() const
|
||||
{
|
||||
|
|
|
@ -362,6 +362,7 @@ namespace rsx
|
|||
rsx::texture_upload_context context, rsx::texture_dimension_extended type, texture_create_flags flags) = 0;
|
||||
virtual section_storage_type* upload_image_from_cpu(commandbuffer_type&, const address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format, texture_upload_context context,
|
||||
const std::vector<rsx_subresource_layout>& subresource_layout, rsx::texture_dimension_extended type, bool swizzled) = 0;
|
||||
virtual section_storage_type* create_nul_section(commandbuffer_type&, const address_range &rsx_range, bool memory_load) = 0;
|
||||
virtual void enforce_surface_creation_type(section_storage_type& section, u32 gcm_format, texture_create_flags expected) = 0;
|
||||
virtual void insert_texture_barrier(commandbuffer_type&, image_storage_type* tex) = 0;
|
||||
virtual image_view_type generate_cubemap_from_images(commandbuffer_type&, u32 gcm_format, u16 size, const std::vector<copy_region_descriptor>& sources, const texture_channel_remap_t& remap_vector) = 0;
|
||||
|
@ -2429,6 +2430,7 @@ namespace rsx
|
|||
|
||||
// Check if src/dst are parts of render targets
|
||||
typename surface_store_type::surface_overlap_info dst_subres;
|
||||
bool use_null_region = false;
|
||||
if (dst_address > 0xc0000000)
|
||||
{
|
||||
// TODO: HACK
|
||||
|
@ -2442,6 +2444,7 @@ namespace rsx
|
|||
// 1. Invalidate surfaces in range
|
||||
// 2. Proceed as normal, blit into a 'normal' surface and any upload routines should catch it
|
||||
m_rtts.invalidate_range(utils::address_range::start_length(dst_address, dst.pitch * dst_h));
|
||||
use_null_region = (scale_x == 1.f && scale_y == 1.f);
|
||||
}
|
||||
|
||||
// TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
|
||||
|
@ -2545,7 +2548,9 @@ namespace rsx
|
|||
if (!dst_is_render_target)
|
||||
{
|
||||
// Check for any available region that will fit this one
|
||||
auto overlapping_surfaces = find_texture_from_range(address_range::start_length(dst_address, dst.pitch * dst.clip_height), dst.pitch, rsx::texture_upload_context::blit_engine_dst);
|
||||
const auto required_type = (use_null_region) ? texture_upload_context::dma : texture_upload_context::blit_engine_dst;
|
||||
const auto dst_range = address_range::start_length(dst_address, dst.pitch * dst.clip_height);
|
||||
auto overlapping_surfaces = find_texture_from_range(dst_range, dst.pitch, required_type);
|
||||
for (const auto &surface : overlapping_surfaces)
|
||||
{
|
||||
if (!surface->is_locked())
|
||||
|
@ -2561,6 +2566,17 @@ namespace rsx
|
|||
continue;
|
||||
}
|
||||
|
||||
if (use_null_region)
|
||||
{
|
||||
if (dst_range.inside(surface->get_section_range()))
|
||||
{
|
||||
// Attach to existing region
|
||||
cached_dest = surface;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto this_address = surface->get_section_base();
|
||||
if (this_address > dst_address)
|
||||
{
|
||||
|
@ -2609,9 +2625,9 @@ namespace rsx
|
|||
|
||||
// Check if available target is acceptable
|
||||
// TODO: Check for other types of format mismatch
|
||||
bool format_mismatch = false;
|
||||
if (cached_dest)
|
||||
if (cached_dest && !use_null_region)
|
||||
{
|
||||
bool format_mismatch = false;
|
||||
if (cached_dest->is_depth_texture() != src_subres.is_depth)
|
||||
{
|
||||
// Dest surface has the wrong 'aspect'
|
||||
|
@ -2635,14 +2651,14 @@ namespace rsx
|
|||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (format_mismatch)
|
||||
{
|
||||
// The invalidate call before creating a new target will remove this section
|
||||
cached_dest = nullptr;
|
||||
dest_texture = 0;
|
||||
dst_area = old_dst_area;
|
||||
if (format_mismatch)
|
||||
{
|
||||
// The invalidate call before creating a new target will remove this section
|
||||
cached_dest = nullptr;
|
||||
dest_texture = 0;
|
||||
dst_area = old_dst_area;
|
||||
}
|
||||
}
|
||||
|
||||
// Create source texture if does not exist
|
||||
|
@ -2795,7 +2811,7 @@ namespace rsx
|
|||
else
|
||||
gcm_format = (dst_is_argb8) ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5;
|
||||
|
||||
if (cached_dest)
|
||||
if (cached_dest && !use_null_region)
|
||||
{
|
||||
// Prep surface
|
||||
auto channel_order = src_is_render_target ? rsx::texture_create_flags::native_component_order :
|
||||
|
@ -2847,9 +2863,9 @@ namespace rsx
|
|||
|
||||
const auto modified_range = utils::address_range::start_length(dst_address, mem_length);
|
||||
|
||||
if (dest_texture == 0)
|
||||
if (!cached_dest && !dst_is_render_target)
|
||||
{
|
||||
verify(HERE), !dst_is_render_target;
|
||||
verify(HERE), !dest_texture;
|
||||
|
||||
// Need to calculate the minium required size that will fit the data, anchored on the rsx_address
|
||||
// If the application starts off with an 'inseted' section, the guessed dimensions may not fit!
|
||||
|
@ -2859,55 +2875,72 @@ namespace rsx
|
|||
const u32 section_length = std::max(write_end, expected_end) - dst.rsx_address;
|
||||
dst_dimensions.height = section_length / dst.pitch;
|
||||
|
||||
// render target data is already in correct swizzle layout
|
||||
auto channel_order = src_is_render_target ? rsx::texture_create_flags::native_component_order :
|
||||
dst_is_argb8 ? rsx::texture_create_flags::default_component_order :
|
||||
rsx::texture_create_flags::swapped_native_component_order;
|
||||
|
||||
// Translate dst_area into the 'full' dst block based on dst.rsx_address as (0, 0)
|
||||
dst_area.x1 += dst.offset_x;
|
||||
dst_area.x2 += dst.offset_x;
|
||||
dst_area.y1 += dst.offset_y;
|
||||
dst_area.y2 += dst.offset_y;
|
||||
|
||||
lock.upgrade();
|
||||
|
||||
// NOTE: Write flag set to remove all other overlapping regions (e.g shader_read or blit_src)
|
||||
const auto rsx_range = address_range::start_length(dst.rsx_address, section_length);
|
||||
invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::write, std::forward<Args>(extras)...);
|
||||
|
||||
if (!dst_area.x1 && !dst_area.y1 && dst_area.x2 == dst_dimensions.width && dst_area.y2 == dst_dimensions.height)
|
||||
if (LIKELY(use_null_region))
|
||||
{
|
||||
cached_dest = create_new_texture(cmd, rsx_range, dst_dimensions.width, dst_dimensions.height, 1, 1, dst.pitch,
|
||||
gcm_format, rsx::texture_upload_context::blit_engine_dst, rsx::texture_dimension_extended::texture_dimension_2d,
|
||||
channel_order);
|
||||
bool force_dma_load = false;
|
||||
if ((dst_w * dst_bpp) != dst.pitch)
|
||||
{
|
||||
// Keep Cell from touching the range we need
|
||||
const auto prot_range = modified_range.to_page_range();
|
||||
utils::memory_protect(vm::base(prot_range.start), prot_range.length(), utils::protection::no);
|
||||
|
||||
force_dma_load = true;
|
||||
}
|
||||
|
||||
cached_dest = create_nul_section(cmd, rsx_range, force_dma_load);
|
||||
}
|
||||
else
|
||||
{
|
||||
// HACK: workaround for data race with Cell
|
||||
// Pre-lock the memory range we'll be touching, then load with super_ptr
|
||||
const auto prot_range = modified_range.to_page_range();
|
||||
utils::memory_protect(vm::base(prot_range.start), prot_range.length(), utils::protection::no);
|
||||
// render target data is already in correct swizzle layout
|
||||
auto channel_order = src_is_render_target ? rsx::texture_create_flags::native_component_order :
|
||||
dst_is_argb8 ? rsx::texture_create_flags::default_component_order :
|
||||
rsx::texture_create_flags::swapped_native_component_order;
|
||||
|
||||
const u16 pitch_in_block = dst.pitch / dst_bpp;
|
||||
std::vector<rsx_subresource_layout> subresource_layout;
|
||||
rsx_subresource_layout subres = {};
|
||||
subres.width_in_block = dst_dimensions.width;
|
||||
subres.height_in_block = dst_dimensions.height;
|
||||
subres.pitch_in_block = pitch_in_block;
|
||||
subres.depth = 1;
|
||||
subres.data = { reinterpret_cast<const gsl::byte*>(vm::get_super_ptr(dst.rsx_address)), dst.pitch * dst_dimensions.height };
|
||||
subresource_layout.push_back(subres);
|
||||
// Translate dst_area into the 'full' dst block based on dst.rsx_address as (0, 0)
|
||||
dst_area.x1 += dst.offset_x;
|
||||
dst_area.x2 += dst.offset_x;
|
||||
dst_area.y1 += dst.offset_y;
|
||||
dst_area.y2 += dst.offset_y;
|
||||
|
||||
cached_dest = upload_image_from_cpu(cmd, rsx_range, dst_dimensions.width, dst_dimensions.height, 1, 1, dst.pitch,
|
||||
gcm_format, rsx::texture_upload_context::blit_engine_dst, subresource_layout,
|
||||
rsx::texture_dimension_extended::texture_dimension_2d, false);
|
||||
if (!dst_area.x1 && !dst_area.y1 && dst_area.x2 == dst_dimensions.width && dst_area.y2 == dst_dimensions.height)
|
||||
{
|
||||
cached_dest = create_new_texture(cmd, rsx_range, dst_dimensions.width, dst_dimensions.height, 1, 1, dst.pitch,
|
||||
gcm_format, rsx::texture_upload_context::blit_engine_dst, rsx::texture_dimension_extended::texture_dimension_2d,
|
||||
channel_order);
|
||||
}
|
||||
else
|
||||
{
|
||||
// HACK: workaround for data race with Cell
|
||||
// Pre-lock the memory range we'll be touching, then load with super_ptr
|
||||
const auto prot_range = modified_range.to_page_range();
|
||||
utils::memory_protect(vm::base(prot_range.start), prot_range.length(), utils::protection::no);
|
||||
|
||||
enforce_surface_creation_type(*cached_dest, gcm_format, channel_order);
|
||||
const u16 pitch_in_block = dst.pitch / dst_bpp;
|
||||
std::vector<rsx_subresource_layout> subresource_layout;
|
||||
rsx_subresource_layout subres = {};
|
||||
subres.width_in_block = dst_dimensions.width;
|
||||
subres.height_in_block = dst_dimensions.height;
|
||||
subres.pitch_in_block = pitch_in_block;
|
||||
subres.depth = 1;
|
||||
subres.data = { reinterpret_cast<const gsl::byte*>(vm::get_super_ptr(dst.rsx_address)), dst.pitch * dst_dimensions.height };
|
||||
subresource_layout.push_back(subres);
|
||||
|
||||
cached_dest = upload_image_from_cpu(cmd, rsx_range, dst_dimensions.width, dst_dimensions.height, 1, 1, dst.pitch,
|
||||
gcm_format, rsx::texture_upload_context::blit_engine_dst, subresource_layout,
|
||||
rsx::texture_dimension_extended::texture_dimension_2d, false);
|
||||
|
||||
enforce_surface_creation_type(*cached_dest, gcm_format, channel_order);
|
||||
}
|
||||
|
||||
dest_texture = cached_dest->get_raw_texture();
|
||||
typeless_info.dst_context = texture_upload_context::blit_engine_dst;
|
||||
}
|
||||
|
||||
dest_texture = cached_dest->get_raw_texture();
|
||||
typeless_info.dst_context = texture_upload_context::blit_engine_dst;
|
||||
}
|
||||
|
||||
verify(HERE), cached_dest || dst_is_render_target;
|
||||
|
@ -2979,8 +3012,15 @@ namespace rsx
|
|||
dst_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer, dst_area);
|
||||
}
|
||||
|
||||
typeless_info.analyse();
|
||||
blitter.scale_image(cmd, vram_texture, dest_texture, src_area, dst_area, interpolate, is_depth_blit, typeless_info);
|
||||
if (!use_null_region)
|
||||
{
|
||||
typeless_info.analyse();
|
||||
blitter.scale_image(cmd, vram_texture, dest_texture, src_area, dst_area, interpolate, is_depth_blit, typeless_info);
|
||||
}
|
||||
else
|
||||
{
|
||||
cached_dest->dma_transfer(cmd, vram_texture, src_area, modified_range, dst.pitch);
|
||||
}
|
||||
|
||||
blit_op_result result = true;
|
||||
result.is_depth = is_depth_blit;
|
||||
|
|
|
@ -1504,7 +1504,7 @@ namespace rsx
|
|||
|
||||
void add_flush_exclusion(const address_range& rng)
|
||||
{
|
||||
AUDIT(exists() && is_locked() && is_flushable());
|
||||
AUDIT(is_locked() && is_flushable());
|
||||
const auto _rng = rng.get_intersect(get_section_range());
|
||||
flush_exclusions.merge(_rng);
|
||||
}
|
||||
|
@ -1710,7 +1710,14 @@ namespace rsx
|
|||
|
||||
bool exists() const
|
||||
{
|
||||
return derived()->exists();
|
||||
if (derived()->exists())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return (context == rsx::texture_upload_context::dma && is_locked());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -883,7 +883,7 @@ namespace gl
|
|||
|
||||
void data(GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW)
|
||||
{
|
||||
verify(HERE), m_memory_type == memory_type::undefined;
|
||||
verify(HERE), m_memory_type != memory_type::local;
|
||||
|
||||
target target_ = current_target();
|
||||
save_binding_state save(target_, *this);
|
||||
|
|
|
@ -61,72 +61,6 @@ namespace gl
|
|||
texture::format format = texture::format::rgba;
|
||||
texture::type type = texture::type::ubyte;
|
||||
|
||||
u8 get_pixel_size(texture::format fmt_, texture::type type_)
|
||||
{
|
||||
u8 size = 1;
|
||||
switch (type_)
|
||||
{
|
||||
case texture::type::ubyte:
|
||||
case texture::type::sbyte:
|
||||
break;
|
||||
case texture::type::ushort:
|
||||
case texture::type::sshort:
|
||||
case texture::type::f16:
|
||||
size = 2;
|
||||
break;
|
||||
case texture::type::ushort_5_6_5:
|
||||
case texture::type::ushort_5_6_5_rev:
|
||||
case texture::type::ushort_4_4_4_4:
|
||||
case texture::type::ushort_4_4_4_4_rev:
|
||||
case texture::type::ushort_5_5_5_1:
|
||||
case texture::type::ushort_1_5_5_5_rev:
|
||||
return 2;
|
||||
case texture::type::uint_8_8_8_8:
|
||||
case texture::type::uint_8_8_8_8_rev:
|
||||
case texture::type::uint_10_10_10_2:
|
||||
case texture::type::uint_2_10_10_10_rev:
|
||||
case texture::type::uint_24_8:
|
||||
return 4;
|
||||
case texture::type::f32:
|
||||
case texture::type::sint:
|
||||
case texture::type::uint:
|
||||
size = 4;
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(RSX, "Unsupported texture type");
|
||||
}
|
||||
|
||||
switch (fmt_)
|
||||
{
|
||||
case texture::format::r:
|
||||
break;
|
||||
case texture::format::rg:
|
||||
size *= 2;
|
||||
break;
|
||||
case texture::format::rgb:
|
||||
case texture::format::bgr:
|
||||
size *= 3;
|
||||
break;
|
||||
case texture::format::rgba:
|
||||
case texture::format::bgra:
|
||||
size *= 4;
|
||||
break;
|
||||
|
||||
//Depth formats..
|
||||
case texture::format::depth:
|
||||
size = 2;
|
||||
break;
|
||||
case texture::format::depth_stencil:
|
||||
size = 4;
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(RSX, "Unsupported rtt format %d", (GLenum)fmt_);
|
||||
size = 4;
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
void init_buffer(const gl::texture* src)
|
||||
{
|
||||
const u32 vram_size = src->pitch() * src->height();
|
||||
|
@ -218,6 +152,61 @@ namespace gl
|
|||
}
|
||||
}
|
||||
|
||||
void dma_transfer(gl::command_context& cmd, gl::texture* src, const areai& /*src_area*/, const utils::address_range& /*valid_range*/, u32 pitch)
|
||||
{
|
||||
init_buffer(src);
|
||||
|
||||
glGetError();
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
|
||||
|
||||
if (context == rsx::texture_upload_context::dma)
|
||||
{
|
||||
// Determine unpack config dynamically
|
||||
const auto format_info = gl::get_format_type(src->get_internal_format());
|
||||
format = static_cast<gl::texture::format>(std::get<0>(format_info));
|
||||
type = static_cast<gl::texture::type>(std::get<1>(format_info));
|
||||
|
||||
if ((src->aspect() & gl::image_aspect::stencil) == 0)
|
||||
{
|
||||
pack_unpack_swap_bytes = std::get<2>(format_info);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Z24S8 decode is done on the CPU for now
|
||||
pack_unpack_swap_bytes = false;
|
||||
}
|
||||
}
|
||||
|
||||
pixel_pack_settings pack_settings;
|
||||
pack_settings.alignment(1);
|
||||
pack_settings.swap_bytes(pack_unpack_swap_bytes);
|
||||
|
||||
src->copy_to(nullptr, format, type, pack_settings);
|
||||
real_pitch = src->pitch();
|
||||
rsx_pitch = pitch;
|
||||
|
||||
if (auto error = glGetError())
|
||||
{
|
||||
if (error == GL_OUT_OF_MEMORY && ::gl::get_driver_caps().vendor_AMD)
|
||||
{
|
||||
// AMD driver bug
|
||||
// Pixel transfer fails with GL_OUT_OF_MEMORY. Usually happens with float textures or operations attempting to swap endianness.
|
||||
// Failed operations also leak a large amount of memory
|
||||
LOG_ERROR(RSX, "Memory transfer failure (AMD bug). Please update your driver to Adrenalin 19.4.3 or newer. Format=0x%x, Type=0x%x, Swap=%d", (u32)format, (u32)type, pack_unpack_swap_bytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_ERROR(RSX, "Memory transfer failed with error 0x%x. Format=0x%x, Type=0x%x", error, (u32)format, (u32)type);
|
||||
}
|
||||
}
|
||||
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
||||
|
||||
m_fence.reset();
|
||||
synchronized = true;
|
||||
sync_timestamp = get_system_time();
|
||||
}
|
||||
|
||||
void copy_texture(gl::command_context& cmd, bool miss)
|
||||
{
|
||||
ASSERT(exists());
|
||||
|
@ -284,38 +273,7 @@ namespace gl
|
|||
}
|
||||
}
|
||||
|
||||
init_buffer(target_texture);
|
||||
|
||||
glGetError();
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
|
||||
|
||||
pixel_pack_settings pack_settings;
|
||||
pack_settings.alignment(1);
|
||||
pack_settings.swap_bytes(pack_unpack_swap_bytes);
|
||||
|
||||
target_texture->copy_to(nullptr, format, type, pack_settings);
|
||||
real_pitch = target_texture->pitch();
|
||||
|
||||
if (auto error = glGetError())
|
||||
{
|
||||
if (error == GL_OUT_OF_MEMORY && ::gl::get_driver_caps().vendor_AMD)
|
||||
{
|
||||
// AMD driver bug
|
||||
// Pixel transfer fails with GL_OUT_OF_MEMORY. Usually happens with float textures or operations attempting to swap endianness.
|
||||
// Failed operations also leak a large amount of memory
|
||||
LOG_ERROR(RSX, "Memory transfer failure (AMD bug). Please update your driver to Adrenalin 19.4.3 or newer. Format=0x%x, Type=0x%x, Swap=%d", (u32)format, (u32)type, pack_unpack_swap_bytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_ERROR(RSX, "Memory transfer failed with error 0x%x. Format=0x%x, Type=0x%x", error, (u32)format, (u32)type);
|
||||
}
|
||||
}
|
||||
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
||||
|
||||
m_fence.reset();
|
||||
synchronized = true;
|
||||
sync_timestamp = get_system_time();
|
||||
dma_transfer(cmd, target_texture, {}, {}, rsx_pitch);
|
||||
}
|
||||
|
||||
void fill_texture(gl::texture* tex)
|
||||
|
@ -889,6 +847,21 @@ namespace gl
|
|||
return &cached;
|
||||
}
|
||||
|
||||
cached_texture_section* create_nul_section(gl::command_context& cmd, const utils::address_range& rsx_range, bool memory_load) override
|
||||
{
|
||||
auto& cached = *find_cached_texture(rsx_range, RSX_GCM_FORMAT_IGNORED, true, false);
|
||||
ASSERT(!cached.is_locked());
|
||||
|
||||
// Prepare section
|
||||
cached.reset(rsx_range);
|
||||
cached.set_context(rsx::texture_upload_context::dma);
|
||||
cached.set_dirty(false);
|
||||
|
||||
no_access_range = cached.get_min_max(no_access_range, rsx::section_bounds::locked_range);
|
||||
update_cache_tag();
|
||||
return &cached;
|
||||
}
|
||||
|
||||
cached_texture_section* upload_image_from_cpu(gl::command_context &cmd, const utils::address_range& rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format,
|
||||
rsx::texture_upload_context context, const std::vector<rsx_subresource_layout>& subresource_layout, rsx::texture_dimension_extended type, bool input_swizzled) override
|
||||
{
|
||||
|
|
|
@ -443,6 +443,7 @@ namespace vk
|
|||
}
|
||||
};
|
||||
|
||||
template<bool _SwapBytes = false>
|
||||
struct cs_gather_d24x8 : cs_interleave_task
|
||||
{
|
||||
cs_gather_d24x8()
|
||||
|
@ -456,13 +457,24 @@ namespace vk
|
|||
" stencil_shift = (index % 4) * 8;\n"
|
||||
" stencil = data[stencil_offset + s_offset];\n"
|
||||
" stencil = (stencil >> stencil_shift) & 0xFF;\n"
|
||||
" value = (depth << 8) | stencil;\n"
|
||||
" value = (depth << 8) | stencil;\n";
|
||||
|
||||
if constexpr (!_SwapBytes)
|
||||
{
|
||||
work_kernel +=
|
||||
" data[index] = value;\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
work_kernel +=
|
||||
" data[index] = bswap_u32(value);\n";
|
||||
}
|
||||
|
||||
cs_shuffle_base::build("");
|
||||
}
|
||||
};
|
||||
|
||||
template<bool _SwapBytes = false>
|
||||
struct cs_gather_d32x8 : cs_interleave_task
|
||||
{
|
||||
cs_gather_d32x8()
|
||||
|
@ -476,8 +488,18 @@ namespace vk
|
|||
" stencil_shift = (index % 4) * 8;\n"
|
||||
" stencil = data[stencil_offset + s_offset];\n"
|
||||
" stencil = (stencil >> stencil_shift) & 0xFF;\n"
|
||||
" value = (depth << 8) | stencil;\n"
|
||||
" value = (depth << 8) | stencil;\n";
|
||||
|
||||
if constexpr (!_SwapBytes)
|
||||
{
|
||||
work_kernel +=
|
||||
" data[index] = value;\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
work_kernel +=
|
||||
" data[index] = bswap_u32(value);\n";
|
||||
}
|
||||
|
||||
cs_shuffle_base::build("");
|
||||
}
|
||||
|
|
|
@ -2949,7 +2949,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
|||
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
|
||||
m_texture_cache.lock_memory_region(
|
||||
*m_current_command_buffer, m_rtts.m_bound_depth_stencil.second, surface_range, true,
|
||||
m_depth_surface_info.width, m_depth_surface_info.height, m_framebuffer_layout.actual_zeta_pitch, gcm_format, false);
|
||||
m_depth_surface_info.width, m_depth_surface_info.height, m_framebuffer_layout.actual_zeta_pitch, gcm_format, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -148,7 +148,7 @@ namespace vk
|
|||
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, const VkImageSubresourceRange& range);
|
||||
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout);
|
||||
|
||||
void copy_image_to_buffer(VkCommandBuffer cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region);
|
||||
void copy_image_to_buffer(VkCommandBuffer cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region, bool swap_bytes = false);
|
||||
void copy_buffer_to_image(VkCommandBuffer cmd, const vk::buffer* src, const vk::image* dst, const VkBufferImageCopy& region);
|
||||
|
||||
void copy_image_typeless(const command_buffer &cmd, image *src, image *dst, const areai& src_rect, const areai& dst_rect,
|
||||
|
|
|
@ -56,7 +56,7 @@ namespace vk
|
|||
}
|
||||
}
|
||||
|
||||
void copy_image_to_buffer(VkCommandBuffer cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region)
|
||||
void copy_image_to_buffer(VkCommandBuffer cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region, bool swap_bytes)
|
||||
{
|
||||
// Always validate
|
||||
verify("Invalid image layout!" HERE),
|
||||
|
@ -66,6 +66,7 @@ namespace vk
|
|||
{
|
||||
default:
|
||||
{
|
||||
verify("Implicit byteswap option not supported for speficied format" HERE), !swap_bytes;
|
||||
vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, dst->value, 1, ®ion);
|
||||
break;
|
||||
}
|
||||
|
@ -83,8 +84,9 @@ namespace vk
|
|||
const auto allocation_end = region.bufferOffset + packed_length + in_depth_size + in_stencil_size;
|
||||
verify(HERE), dst->size() >= allocation_end;
|
||||
|
||||
const VkDeviceSize z_offset = align<VkDeviceSize>(region.bufferOffset + packed_length, 256);
|
||||
const VkDeviceSize s_offset = align<VkDeviceSize>(z_offset + in_depth_size, 256);
|
||||
const auto data_offset = u32(region.bufferOffset);
|
||||
const auto z_offset = align<u32>(data_offset + packed_length, 256);
|
||||
const auto s_offset = align<u32>(z_offset + in_depth_size, 256);
|
||||
|
||||
// 1. Copy the depth and stencil blocks to separate banks
|
||||
VkBufferImageCopy sub_regions[2];
|
||||
|
@ -97,20 +99,34 @@ namespace vk
|
|||
|
||||
// 2. Interleave the separated data blocks with a compute job
|
||||
vk::cs_interleave_task *job;
|
||||
if (src->format() == VK_FORMAT_D24_UNORM_S8_UINT)
|
||||
if (LIKELY(!swap_bytes))
|
||||
{
|
||||
job = vk::get_compute_task<vk::cs_gather_d24x8>();
|
||||
if (src->format() == VK_FORMAT_D24_UNORM_S8_UINT)
|
||||
{
|
||||
job = vk::get_compute_task<vk::cs_gather_d24x8<false>>();
|
||||
}
|
||||
else
|
||||
{
|
||||
job = vk::get_compute_task<vk::cs_gather_d32x8<false>>();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
job = vk::get_compute_task<vk::cs_gather_d32x8>();
|
||||
if (src->format() == VK_FORMAT_D24_UNORM_S8_UINT)
|
||||
{
|
||||
job = vk::get_compute_task<vk::cs_gather_d24x8<true>>();
|
||||
}
|
||||
else
|
||||
{
|
||||
job = vk::get_compute_task<vk::cs_gather_d32x8<true>>();
|
||||
}
|
||||
}
|
||||
|
||||
vk::insert_buffer_memory_barrier(cmd, dst->value, z_offset, in_depth_size + in_stencil_size,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||
|
||||
job->run(cmd, dst, (u32)region.bufferOffset, packed_length, (u32)z_offset, (u32)s_offset);
|
||||
job->run(cmd, dst, data_offset, packed_length, z_offset, s_offset);
|
||||
|
||||
vk::insert_buffer_memory_barrier(cmd, dst->value, region.bufferOffset, packed_length,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
|
@ -145,8 +161,9 @@ namespace vk
|
|||
const auto allocation_end = region.bufferOffset + packed_length + in_depth_size + in_stencil_size;
|
||||
verify("Out of memory (compute heap). Lower your resolution scale setting." HERE), src->size() >= allocation_end;
|
||||
|
||||
const VkDeviceSize z_offset = align<VkDeviceSize>(region.bufferOffset + packed_length, 256);
|
||||
const VkDeviceSize s_offset = align<VkDeviceSize>(z_offset + in_depth_size, 256);
|
||||
const auto data_offset = u32(region.bufferOffset);
|
||||
const auto z_offset = align<u32>(data_offset + packed_length, 256);
|
||||
const auto s_offset = align<u32>(z_offset + in_depth_size, 256);
|
||||
|
||||
// Zero out the stencil block
|
||||
vkCmdFillBuffer(cmd, src->value, s_offset, in_stencil_size, 0);
|
||||
|
@ -166,7 +183,7 @@ namespace vk
|
|||
job = vk::get_compute_task<vk::cs_scatter_d32x8>();
|
||||
}
|
||||
|
||||
job->run(cmd, src, (u32)region.bufferOffset, packed_length, (u32)z_offset, (u32)s_offset);
|
||||
job->run(cmd, src, data_offset, packed_length, z_offset, s_offset);
|
||||
|
||||
vk::insert_buffer_memory_barrier(cmd, src->value, z_offset, in_depth_size + in_stencil_size,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
|
|
|
@ -151,8 +151,13 @@ namespace vk
|
|||
|
||||
VkFormat get_format() const
|
||||
{
|
||||
if (context == rsx::texture_upload_context::dma)
|
||||
{
|
||||
return VK_FORMAT_R32_UINT;
|
||||
}
|
||||
|
||||
ASSERT(vram_texture != nullptr);
|
||||
return vram_texture->info.format;
|
||||
return vram_texture->format();
|
||||
}
|
||||
|
||||
bool is_flushed() const
|
||||
|
@ -161,18 +166,9 @@ namespace vk
|
|||
return flushed;
|
||||
}
|
||||
|
||||
void copy_texture(vk::command_buffer& cmd, bool miss)
|
||||
void dma_transfer(vk::command_buffer& cmd, vk::image* src, const areai& src_area, const utils::address_range& valid_range, u32 pitch)
|
||||
{
|
||||
ASSERT(exists());
|
||||
|
||||
if (LIKELY(!miss))
|
||||
{
|
||||
baseclass::on_speculative_flush();
|
||||
}
|
||||
else
|
||||
{
|
||||
baseclass::on_miss();
|
||||
}
|
||||
verify(HERE), src->samples() == 1;
|
||||
|
||||
if (m_device == nullptr)
|
||||
{
|
||||
|
@ -186,9 +182,146 @@ namespace vk
|
|||
vkCreateEvent(*m_device, &createInfo, nullptr, &dma_fence);
|
||||
}
|
||||
|
||||
src->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
||||
|
||||
const auto internal_bpp = vk::get_format_texel_width(src->format());
|
||||
const auto transfer_width = (u32)src_area.width();
|
||||
const auto transfer_height = (u32)src_area.height();
|
||||
real_pitch = internal_bpp * transfer_width;
|
||||
rsx_pitch = pitch;
|
||||
|
||||
const bool is_depth_stencil = !!(src->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
if (is_depth_stencil || pack_unpack_swap_bytes)
|
||||
{
|
||||
const auto section_length = valid_range.length();
|
||||
const auto transfer_pitch = real_pitch;
|
||||
const auto task_length = transfer_pitch * src_area.height();
|
||||
|
||||
auto working_buffer = vk::get_scratch_buffer();
|
||||
auto final_mapping = vk::map_dma(cmd, valid_range.start, section_length);
|
||||
|
||||
VkBufferImageCopy region = {};
|
||||
region.imageSubresource = { src->aspect(), 0, 0, 1 };
|
||||
region.imageOffset = { src_area.x1, src_area.y1, 0 };
|
||||
region.imageExtent = { transfer_width, transfer_height, 1 };
|
||||
vk::copy_image_to_buffer(cmd, src, working_buffer, region, (is_depth_stencil && pack_unpack_swap_bytes));
|
||||
|
||||
// NOTE: For depth-stencil formats, copying to buffer and byteswap are combined into one step above
|
||||
if (pack_unpack_swap_bytes && !is_depth_stencil)
|
||||
{
|
||||
const auto texel_layout = vk::get_format_element_size(src->format());
|
||||
const auto elem_size = texel_layout.first;
|
||||
vk::cs_shuffle_base *shuffle_kernel;
|
||||
|
||||
if (elem_size == 2)
|
||||
{
|
||||
shuffle_kernel = vk::get_compute_task<vk::cs_shuffle_16>();
|
||||
}
|
||||
else if (elem_size == 4)
|
||||
{
|
||||
shuffle_kernel = vk::get_compute_task<vk::cs_shuffle_32>();
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::throw_exception("Unreachable" HERE);
|
||||
}
|
||||
|
||||
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, 0, task_length,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||
|
||||
shuffle_kernel->run(cmd, working_buffer, task_length);
|
||||
|
||||
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, 0, task_length,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||
}
|
||||
|
||||
if (LIKELY(rsx_pitch == real_pitch))
|
||||
{
|
||||
VkBufferCopy copy = {};
|
||||
copy.dstOffset = final_mapping.first;
|
||||
copy.size = section_length;
|
||||
vkCmdCopyBuffer(cmd, working_buffer->value, final_mapping.second->value, 1, ©);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<VkBufferCopy> copy;
|
||||
copy.reserve(transfer_height);
|
||||
|
||||
u32 dst_offset = final_mapping.first;
|
||||
u32 src_offset = 0;
|
||||
|
||||
for (unsigned row = 0; row < transfer_height; ++row)
|
||||
{
|
||||
copy.push_back({ src_offset, dst_offset, transfer_pitch });
|
||||
src_offset += real_pitch;
|
||||
dst_offset += rsx_pitch;
|
||||
}
|
||||
|
||||
vkCmdCopyBuffer(cmd, working_buffer->value, final_mapping.second->value, transfer_height, copy.data());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
VkBufferImageCopy region = {};
|
||||
region.bufferRowLength = (rsx_pitch / internal_bpp);
|
||||
region.imageSubresource = { src->aspect(), 0, 0, 1 };
|
||||
region.imageOffset = { src_area.x1, src_area.y1, 0 };
|
||||
region.imageExtent = { transfer_width, transfer_height, 1 };
|
||||
|
||||
auto mapping = vk::map_dma(cmd, valid_range.start, valid_range.length());
|
||||
region.bufferOffset = mapping.first;
|
||||
vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, mapping.second->value, 1, ®ion);
|
||||
}
|
||||
|
||||
src->pop_layout(cmd);
|
||||
|
||||
if (UNLIKELY(synchronized))
|
||||
{
|
||||
// Replace the wait event with a new one to avoid premature signaling!
|
||||
vk::get_resource_manager()->dispose(dma_fence);
|
||||
|
||||
VkEventCreateInfo createInfo = {};
|
||||
createInfo.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
|
||||
vkCreateEvent(*m_device, &createInfo, nullptr, &dma_fence);
|
||||
}
|
||||
else
|
||||
{
|
||||
// If this is speculated, it should only occur once
|
||||
verify(HERE), vkGetEventStatus(*m_device, dma_fence) == VK_EVENT_RESET;
|
||||
}
|
||||
|
||||
cmd.set_flag(vk::command_buffer::cb_has_dma_transfer);
|
||||
vkCmdSetEvent(cmd, dma_fence, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT);
|
||||
|
||||
synchronized = true;
|
||||
sync_timestamp = get_system_time();
|
||||
}
|
||||
|
||||
void copy_texture(vk::command_buffer& cmd, bool miss)
|
||||
{
|
||||
ASSERT(exists());
|
||||
|
||||
if (LIKELY(!miss))
|
||||
{
|
||||
verify(HERE), !synchronized;
|
||||
baseclass::on_speculative_flush();
|
||||
}
|
||||
else
|
||||
{
|
||||
baseclass::on_miss();
|
||||
}
|
||||
|
||||
if (m_device == nullptr)
|
||||
{
|
||||
m_device = &cmd.get_command_pool().get_owner();
|
||||
}
|
||||
|
||||
vk::image *locked_resource = vram_texture;
|
||||
u32 transfer_width = width;
|
||||
u32 transfer_height = height;
|
||||
u32 transfer_x = 0, transfer_y = 0;
|
||||
|
||||
if (context == rsx::texture_upload_context::framebuffer_storage)
|
||||
{
|
||||
|
@ -199,12 +332,7 @@ namespace vk
|
|||
transfer_height *= surface->samples_y;
|
||||
}
|
||||
|
||||
verify(HERE), locked_resource->samples() == 1;
|
||||
|
||||
vk::image* target = locked_resource;
|
||||
locked_resource->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
||||
real_pitch = vk::get_format_texel_width(locked_resource->info.format) * locked_resource->width();
|
||||
|
||||
if (transfer_width != locked_resource->width() || transfer_height != locked_resource->height())
|
||||
{
|
||||
// TODO: Synchronize access to typeles textures
|
||||
|
@ -221,14 +349,9 @@ namespace vk
|
|||
target->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
||||
}
|
||||
|
||||
verify(HERE), target->current_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
|
||||
|
||||
// TODO: Read back stencil values (is this really necessary?)
|
||||
const auto internal_bpp = vk::get_format_texel_width(vram_texture->format());
|
||||
const auto valid_range = get_confirmed_range();
|
||||
real_pitch = internal_bpp * transfer_width;
|
||||
|
||||
u32 transfer_x = 0, transfer_y = 0;
|
||||
if (const auto section_range = get_section_range(); section_range != valid_range)
|
||||
{
|
||||
if (const auto offset = (valid_range.start - get_section_base()))
|
||||
|
@ -250,111 +373,12 @@ namespace vk
|
|||
}
|
||||
}
|
||||
|
||||
if ((vram_texture->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT) ||
|
||||
pack_unpack_swap_bytes)
|
||||
{
|
||||
const auto section_length = valid_range.length();
|
||||
const auto transfer_pitch = transfer_width * internal_bpp;
|
||||
const auto task_length = transfer_pitch * transfer_height;
|
||||
|
||||
auto working_buffer = vk::get_scratch_buffer();
|
||||
auto final_mapping = vk::map_dma(cmd, valid_range.start, section_length);
|
||||
|
||||
VkBufferImageCopy region = {};
|
||||
region.imageSubresource = { vram_texture->aspect(), 0, 0, 1 };
|
||||
region.imageOffset = { (s32)transfer_x, (s32)transfer_y, 0 };
|
||||
region.imageExtent = { transfer_width, transfer_height, 1 };
|
||||
vk::copy_image_to_buffer(cmd, target, working_buffer, region);
|
||||
|
||||
const auto texel_layout = vk::get_format_element_size(vram_texture->format());
|
||||
const auto elem_size = texel_layout.first;
|
||||
vk::cs_shuffle_base *shuffle_kernel;
|
||||
|
||||
if (elem_size == 2)
|
||||
{
|
||||
shuffle_kernel = vk::get_compute_task<vk::cs_shuffle_16>();
|
||||
}
|
||||
else if (elem_size == 4)
|
||||
{
|
||||
shuffle_kernel = vk::get_compute_task<vk::cs_shuffle_32>();
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::throw_exception("Unreachable" HERE);
|
||||
}
|
||||
|
||||
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, 0, task_length,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||
|
||||
shuffle_kernel->run(cmd, working_buffer, task_length);
|
||||
|
||||
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, 0, task_length,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||
|
||||
if (LIKELY(rsx_pitch == real_pitch))
|
||||
{
|
||||
VkBufferCopy copy = {};
|
||||
copy.dstOffset = final_mapping.first;
|
||||
copy.size = section_length;
|
||||
vkCmdCopyBuffer(cmd, working_buffer->value, final_mapping.second->value, 1, ©);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<VkBufferCopy> copy;
|
||||
copy.reserve(transfer_height);
|
||||
|
||||
u32 dst_offset = final_mapping.first;
|
||||
u32 src_offset = 0;
|
||||
|
||||
for (unsigned row = 0; row < transfer_height; ++row)
|
||||
{
|
||||
copy.push_back({src_offset, dst_offset, transfer_pitch});
|
||||
src_offset += real_pitch;
|
||||
dst_offset += rsx_pitch;
|
||||
}
|
||||
|
||||
vkCmdCopyBuffer(cmd, working_buffer->value, final_mapping.second->value, transfer_height, copy.data());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
VkBufferImageCopy region = {};
|
||||
region.bufferRowLength = (rsx_pitch / internal_bpp);
|
||||
region.imageSubresource = { vram_texture->aspect(), 0, 0, 1 };
|
||||
region.imageOffset = { (s32)transfer_x, (s32)transfer_y, 0 };
|
||||
region.imageExtent = { transfer_width, transfer_height, 1 };
|
||||
|
||||
auto mapping = vk::map_dma(cmd, valid_range.start, valid_range.length());
|
||||
region.bufferOffset = mapping.first;
|
||||
vkCmdCopyImageToBuffer(cmd, target->value, target->current_layout, mapping.second->value, 1, ®ion);
|
||||
}
|
||||
|
||||
locked_resource->pop_layout(cmd);
|
||||
|
||||
if (UNLIKELY(synchronized))
|
||||
{
|
||||
verify(HERE), miss;
|
||||
|
||||
// Replace the wait event with a new one to avoid premature signaling!
|
||||
vk::get_resource_manager()->dispose(dma_fence);
|
||||
|
||||
VkEventCreateInfo createInfo = {};
|
||||
createInfo.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
|
||||
vkCreateEvent(*m_device, &createInfo, nullptr, &dma_fence);
|
||||
}
|
||||
else
|
||||
{
|
||||
// If this is speculated, it should only occur once
|
||||
verify(HERE), vkGetEventStatus(*m_device, dma_fence) == VK_EVENT_RESET;
|
||||
}
|
||||
|
||||
cmd.set_flag(vk::command_buffer::cb_has_dma_transfer);
|
||||
vkCmdSetEvent(cmd, dma_fence, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT);
|
||||
|
||||
synchronized = true;
|
||||
sync_timestamp = get_system_time();
|
||||
areai src_area;
|
||||
src_area.x1 = (s32)transfer_x;
|
||||
src_area.y1 = (s32)transfer_y;
|
||||
src_area.x2 = s32(transfer_x + transfer_width);
|
||||
src_area.y2 = s32(transfer_y + transfer_height);
|
||||
dma_transfer(cmd, target, src_area, valid_range, rsx_pitch);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1079,24 +1103,51 @@ namespace vk
|
|||
region.create(width, height, section_depth, mipmaps, image, pitch, true, gcm_format);
|
||||
region.set_dirty(false);
|
||||
|
||||
//Its not necessary to lock blit dst textures as they are just reused as necessary
|
||||
if (context != rsx::texture_upload_context::blit_engine_dst)
|
||||
// Its not necessary to lock blit dst textures as they are just reused as necessary
|
||||
switch (context)
|
||||
{
|
||||
case rsx::texture_upload_context::shader_read:
|
||||
case rsx::texture_upload_context::blit_engine_src:
|
||||
region.protect(utils::protection::ro);
|
||||
read_only_range = region.get_min_max(read_only_range, rsx::section_bounds::locked_range);
|
||||
}
|
||||
else
|
||||
{
|
||||
//TODO: Confirm byte swap patterns
|
||||
//NOTE: Protection is handled by the caller
|
||||
region.set_unpack_swap_bytes((aspect_flags & VK_IMAGE_ASPECT_COLOR_BIT) == VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
break;
|
||||
case rsx::texture_upload_context::blit_engine_dst:
|
||||
region.set_unpack_swap_bytes(true);
|
||||
no_access_range = region.get_min_max(no_access_range, rsx::section_bounds::locked_range);
|
||||
break;
|
||||
case rsx::texture_upload_context::dma:
|
||||
case rsx::texture_upload_context::framebuffer_storage:
|
||||
// Should not initialized with this method
|
||||
default:
|
||||
fmt::throw_exception("Unexpected upload context 0x%x", u32(context));
|
||||
}
|
||||
|
||||
update_cache_tag();
|
||||
return ®ion;
|
||||
}
|
||||
|
||||
cached_texture_section* create_nul_section(vk::command_buffer& cmd, const utils::address_range& rsx_range, bool memory_load) override
|
||||
{
|
||||
auto& region = *find_cached_texture(rsx_range, RSX_GCM_FORMAT_IGNORED, true, false);
|
||||
ASSERT(!region.is_locked());
|
||||
|
||||
// Prepare section
|
||||
region.reset(rsx_range);
|
||||
region.set_context(rsx::texture_upload_context::dma);
|
||||
region.set_dirty(false);
|
||||
region.set_unpack_swap_bytes(true);
|
||||
|
||||
if (memory_load)
|
||||
{
|
||||
vk::map_dma(cmd, rsx_range.start, rsx_range.length());
|
||||
vk::load_dma(rsx_range.start, rsx_range.length());
|
||||
}
|
||||
|
||||
no_access_range = region.get_min_max(no_access_range, rsx::section_bounds::locked_range);
|
||||
update_cache_tag();
|
||||
return ®ion;
|
||||
}
|
||||
|
||||
cached_texture_section* upload_image_from_cpu(vk::command_buffer& cmd, const utils::address_range& rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format,
|
||||
rsx::texture_upload_context context, const std::vector<rsx_subresource_layout>& subresource_layout, rsx::texture_dimension_extended type, bool swizzled) override
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue