rsx: Implement mipmap gathering from texture cache

This commit is contained in:
kd-11 2019-10-09 22:29:23 +03:00 committed by kd-11
parent d6d8766f8d
commit 840b52fe80
6 changed files with 263 additions and 50 deletions

View file

@ -316,6 +316,7 @@ namespace rsx
virtual image_view_type generate_cubemap_from_images(commandbuffer_type&, u32 gcm_format, u16 size, const std::vector<copy_region_descriptor>& sources, const texture_channel_remap_t& remap_vector) = 0;
virtual image_view_type generate_3d_from_2d_images(commandbuffer_type&, u32 gcm_format, u16 width, u16 height, u16 depth, const std::vector<copy_region_descriptor>& sources, const texture_channel_remap_t& remap_vector) = 0;
virtual image_view_type generate_atlas_from_images(commandbuffer_type&, u32 gcm_format, u16 width, u16 height, const std::vector<copy_region_descriptor>& sections_to_copy, const texture_channel_remap_t& remap_vector) = 0;
virtual image_view_type generate_2d_mipmaps_from_images(commandbuffer_type&, u32 gcm_format, u16 width, u16 height, const std::vector<copy_region_descriptor>& sections_to_copy, const texture_channel_remap_t& remap_vector) = 0;
virtual void update_image_contents(commandbuffer_type&, image_view_type dst, image_resource_type src, u16 width, u16 height) = 0;
virtual bool render_target_format_is_compatible(image_storage_type* tex, u32 gcm_format) = 0;
virtual void prepare_for_dma_transfers(commandbuffer_type&) = 0;
@ -1354,6 +1355,7 @@ namespace rsx
{
desc.external_handle,
surface_transform::coordinate_transform,
0,
0, (u16)(desc.slice_h * n),
0, 0, n,
desc.width, desc.height,
@ -1379,6 +1381,7 @@ namespace rsx
{
desc.external_handle,
surface_transform::coordinate_transform,
0,
0, (u16)(desc.slice_h * n),
0, 0, n,
desc.width, desc.height,
@ -1400,6 +1403,11 @@ namespace rsx
result = create_temporary_subresource_view(cmd, &desc.external_handle, desc.gcm_format, desc.x, desc.y, desc.width, desc.height, desc.remap);
break;
}
case deferred_request_command::mipmap_gather:
{
result = generate_2d_mipmaps_from_images(cmd, desc.gcm_format, desc.width, desc.height, desc.sections_to_copy, desc.remap);
break;
}
default:
{
//Throw
@ -1407,7 +1415,7 @@ namespace rsx
}
}
if (result)
if (result && !desc.do_not_cache)
{
m_temporary_subresource_cache.insert({ desc.address,{ desc, result } });
}
@ -1438,6 +1446,7 @@ namespace rsx
u32 encoded_remap,
const texture_channel_remap_t& remap,
bool is_compressed_format,
bool skip_texture_barriers,
const utils::address_range& memory_range,
rsx::texture_dimension_extended extended_dimension,
surface_store_type& m_rtts, Args&& ... extras)
@ -1463,7 +1472,10 @@ namespace rsx
auto result = texture_cache_helpers::process_framebuffer_resource_fast<sampled_image_descriptor>(
cmd, texptr, attr, scale, extended_dimension, encoded_remap, remap, true, force_convert);
insert_texture_barrier(cmd, texptr);
if (!skip_texture_barriers)
{
insert_texture_barrier(cmd, texptr);
}
return result;
}
}
@ -1645,7 +1657,8 @@ namespace rsx
const bool is_swizzled = !(tex.format() & CELL_GCM_TEXTURE_LN);
const auto extended_dimension = tex.get_extended_texture_dimension();
u32 tex_size = 0, required_surface_height, subsurface_count;
u32 tex_size = 0, required_surface_height;
u8 subsurface_count;
size2f scale{ 1.f, 1.f };
if (LIKELY(!is_swizzled))
@ -1708,54 +1721,92 @@ namespace rsx
reader_lock lock(m_cache_mutex);
auto result = fast_texture_search(cmd, attributes, scale, tex.remap(), tex.decoded_remap(),
is_compressed_format, lookup_range, extended_dimension, m_rtts,
is_compressed_format, false, lookup_range, extended_dimension, m_rtts,
std::forward<Args>(extras)...);
if (result.validate())
{
#if 0
if (subsurface_count <= 1 ||
(result.image_handle && result.upload_context == rsx::texture_upload_context::shader_read))
if (subsurface_count == 1)
{
// Full result exists
return result;
}
if (result.upload_context != rsx::texture_upload_context::blit_engine_dst &&
result.upload_context != rsx::texture_upload_context::framebuffer_storage)
switch (result.upload_context)
{
LOG_ERROR(RSX, "Unexpected surface context %d", (u32)result.upload_context);
case rsx::texture_upload_context::blit_engine_dst:
case rsx::texture_upload_context::framebuffer_storage:
break;
case rsx::texture_upload_context::shader_read:
if (!result.image_handle)
break;
// Conditional fallthrough
default:
return result;
}
// Traverse mipmap tree
auto scan_address = texaddr + (tex_pitch * tex_height);
auto scan_pitch = swizzled? (tex_pitch / 2) : tex_pitch;
auto scan_width = (tex_width / 2);
auto scan_height = (tex_height / 2);
// Traverse the mipmap tree
// Some guarantees here include:
// 1. Only 2D images will invoke this routine
// 2. The image has to have been generated on the GPU (fbo or blit target only)
std::vector<sampled_image_descriptor> sections;
sections.reserve(subsurface_count - 1);
std::vector<copy_region_descriptor> sections;
const bool use_upscaling = (result.upload_context == rsx::texture_upload_context::framebuffer_storage && g_cfg.video.resolution_scale_percent != 100);
for (u32 subsurface = 1; subsurface < subsurface_count; ++subsurface)
if (UNLIKELY(!texture_cache_helpers::append_mipmap_level(sections, result, attributes, 0, use_upscaling, attributes)))
{
const auto range = utils::address_range::start_length(scan_address, scan_pitch * scan_height);
// Abort if mip0 is not compatible
return result;
}
auto ret = fast_texture_search(cmd, scan_address, format, scan_width, scan_height, 1, scan_height,
scan_pitch, bpp, is_compressed_format, range, extended_dimension, m_rtts,
std::forward<Args>(extras)...);
auto attr2 = attributes;
sections.reserve(subsurface_count);
if (LIKELY(ret.validate()))
for (u8 subsurface = 1; subsurface < subsurface_count; ++subsurface)
{
attr2.address += (attr2.pitch * attr2.height);
attr2.width = std::max(attr2.width / 2, 1);
attr2.height = std::max(attr2.height / 2, 1);
if (is_swizzled)
{
sections.push_back(ret);
attr2.pitch = attr2.width * attr2.bpp;
}
else
const auto range = utils::address_range::start_length(attr2.address, attr2.pitch * attr2.height);
auto ret = fast_texture_search(cmd, attr2, scale, tex.remap(), tex.decoded_remap(),
false, true, range, extended_dimension, m_rtts, std::forward<Args>(extras)...);
if (!ret.validate() ||
!texture_cache_helpers::append_mipmap_level(sections, ret, attr2, subsurface, use_upscaling, attributes))
{
// Abort
break;
}
}
#endif
return result;
if (UNLIKELY(sections.size() == 1))
{
return result;
}
else
{
// NOTE: Do not disable 'cyclic ref' since the texture_barrier may have already been issued!
result.image_handle = 0;
result.external_subresource_desc = { 0, deferred_request_command::mipmap_gather, attributes, {}, tex.decoded_remap() };
if (use_upscaling)
{
// Grab the correct image dimensions from the base mipmap level
const auto& mip0 = sections.front();
result.external_subresource_desc.width = mip0.dst_w;
result.external_subresource_desc.height = mip0.dst_h;
}
// Disable caching until the subresources store actual memory ranges!
result.external_subresource_desc.do_not_cache = true;
result.external_subresource_desc.sections_to_copy = std::move(sections);
return result;
}
}
// Do direct upload from CPU as the last resort

View file

@ -20,6 +20,7 @@ namespace rsx
{
image_resource_type src;
flags32_t xform;
u8 level;
u16 src_x;
u16 src_y;
u16 dst_x;
@ -233,6 +234,7 @@ namespace rsx
({
section.surface->get_surface(rsx::surface_access::read),
surface_transform::identity,
0,
rsx::apply_resolution_scale(src_x, true),
rsx::apply_resolution_scale(src_y, true),
rsx::apply_resolution_scale(dst_x, true),
@ -292,6 +294,7 @@ namespace rsx
({
section->get_raw_texture(),
surface_transform::identity,
0,
(u16)std::get<0>(clipped).x,
(u16)std::get<0>(clipped).y,
rsx::apply_resolution_scale((u16)std::get<1>(clipped).x, true),
@ -309,6 +312,7 @@ namespace rsx
({
section->get_raw_texture(),
surface_transform::identity,
0,
(u16)std::get<0>(clipped).x,
(u16)std::get<0>(clipped).y,
(u16)std::get<1>(clipped).x,
@ -581,5 +585,74 @@ namespace rsx
result.simplify();
return result;
}
template<typename sampled_image_descriptor, typename copy_region_descriptor_type>
bool append_mipmap_level(
std::vector<copy_region_descriptor_type>& sections, // Destination list
const sampled_image_descriptor& level, // Descriptor for the image level being checked
const image_section_attributes_t& attr, // Attributes of image level
u8 mipmap_level, // Level index
bool apply_upscaling, // Whether to upscale the results or not
const image_section_attributes_t& level0_attr) // Attributes of the first mipmap level
{
if (level.image_handle)
{
copy_region_descriptor_type mip{};
mip.src = level.image_handle->image();
mip.xform = surface_transform::coordinate_transform;
mip.level = mipmap_level;
mip.dst_w = attr.width;
mip.dst_h = attr.height;
if (level.upload_context == rsx::texture_upload_context::framebuffer_storage)
{
mip.src_w = rsx::apply_resolution_scale(attr.width, true);
mip.src_h = rsx::apply_resolution_scale(attr.height, true);
}
else
{
mip.src_w = attr.width;
mip.src_h = attr.height;
}
sections.push_back(mip);
}
else
{
switch (level.external_subresource_desc.op)
{
case deferred_request_command::copy_image_dynamic:
case deferred_request_command::copy_image_static:
{
copy_region_descriptor_type mip{};
mip.src = level.external_subresource_desc.external_handle;
mip.xform = surface_transform::coordinate_transform;
mip.level = mipmap_level;
mip.src_w = level.external_subresource_desc.width;
mip.src_h = level.external_subresource_desc.height;
mip.dst_w = attr.width;
mip.dst_h = attr.height;
sections.push_back(mip);
break;
}
default:
{
// TODO
return false;
}
}
}
// Check for upscaling if requested
if (apply_upscaling)
{
auto& mip = sections.back();
mip.dst_w = rsx::apply_resolution_scale(mip.dst_w, true, level0_attr.width);
mip.dst_h = rsx::apply_resolution_scale(mip.dst_h, true, level0_attr.height);
}
return true;
}
};
}

View file

@ -502,6 +502,7 @@ namespace gl
{{
src,
rsx::surface_transform::coordinate_transform,
0,
x, y, 0, 0, 0,
width, height, width, height
}};
@ -620,7 +621,7 @@ namespace gl
if (src_w == slice.dst_w && src_h == slice.dst_h)
{
glCopyImageSubData(src_image->id(), GL_TEXTURE_2D, 0, src_x, src_y, 0,
dst_image->id(), (GLenum)dst_image->get_target(), 0, slice.dst_x, slice.dst_y, slice.dst_z, src_w, src_h, 1);
dst_image->id(), (GLenum)dst_image->get_target(), slice.level, slice.dst_x, slice.dst_y, slice.dst_z, src_w, src_h, 1);
}
else
{
@ -630,10 +631,13 @@ namespace gl
const areai src_rect = { src_x, src_y, src_x + src_w, src_y + src_h };
const areai dst_rect = { slice.dst_x, slice.dst_y, slice.dst_x + slice.dst_w, slice.dst_y + slice.dst_h };
auto _dst = dst_image;
if (UNLIKELY(src_image->get_internal_format() != dst_image->get_internal_format()))
gl::texture* _dst;
if (src_image->get_internal_format() == dst_image->get_internal_format() && slice.level == 0)
{
_dst = dst_image;
}
else
{
verify(HERE), !typeless;
tmp = std::make_unique<texture>(GL_TEXTURE_2D, dst_rect.x2, dst_rect.y2, 1, 1, (GLenum)slice.src->get_internal_format());
_dst = tmp.get();
}
@ -645,7 +649,7 @@ namespace gl
{
// Data cast comes after scaling
glCopyImageSubData(tmp->id(), GL_TEXTURE_2D, 0, slice.dst_x, slice.dst_y, 0,
dst_image->id(), (GLenum)dst_image->get_target(), 0, slice.dst_x, slice.dst_y, slice.dst_z, slice.dst_w, slice.dst_h, 1);
dst_image->id(), (GLenum)dst_image->get_target(), slice.level, slice.dst_x, slice.dst_y, slice.dst_z, slice.dst_w, slice.dst_h, 1);
}
}
}
@ -757,12 +761,33 @@ namespace gl
return result;
}
gl::texture_view* generate_2d_mipmaps_from_images(gl::command_context& cmd, u32 gcm_format, u16 width, u16 height, const std::vector<copy_region_descriptor>& sections_to_copy,
const rsx::texture_channel_remap_t& remap_vector) override
{
const auto _template = sections_to_copy.front().src;
const GLenum ifmt = (GLenum)_template->get_internal_format();
const u8 mipmaps = (u8)sections_to_copy.size();
const auto swizzle = _template->get_native_component_layout();
auto image_ptr = new gl::viewable_image(GL_TEXTURE_2D, width, height, 1, mipmaps, ifmt);
image_ptr->set_native_component_layout(swizzle);
copy_transfer_regions_impl(cmd, image_ptr, sections_to_copy);
auto view = image_ptr->get_view(get_remap_encoding(remap_vector), remap_vector);
std::unique_ptr<gl::texture> dst_image(image_ptr);
m_temporary_surfaces.emplace_back(dst_image);
return view;
}
void update_image_contents(gl::command_context& cmd, gl::texture_view* dst, gl::texture* src, u16 width, u16 height) override
{
std::vector<copy_region_descriptor> region =
{{
src,
rsx::surface_transform::identity,
0,
0, 0, 0, 0, 0,
width, height, width, height
}};

View file

@ -1346,16 +1346,37 @@ void VKGSRender::end()
mip_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
}
if (sampler_state->upload_context == rsx::texture_upload_context::shader_read &&
rsx::method_registers.fragment_textures[i].get_exact_mipmap_count() > 1)
if (rsx::method_registers.fragment_textures[i].get_exact_mipmap_count() > 1)
{
min_lod = (float)(rsx::method_registers.fragment_textures[i].min_lod() >> 8);
max_lod = (float)(rsx::method_registers.fragment_textures[i].max_lod() >> 8);
lod_bias = rsx::method_registers.fragment_textures[i].bias();
}
else
{
mip_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
f32 actual_mipmaps;
if (sampler_state->upload_context == rsx::texture_upload_context::shader_read)
{
actual_mipmaps = (f32)rsx::method_registers.fragment_textures[i].get_exact_mipmap_count();
}
else if (sampler_state->external_subresource_desc.op == rsx::deferred_request_command::mipmap_gather)
{
// Clamp min and max lod
actual_mipmaps = (f32)sampler_state->external_subresource_desc.sections_to_copy.size();
}
else
{
actual_mipmaps = 1.f;
}
if (actual_mipmaps > 1.f)
{
min_lod = std::min(min_lod, actual_mipmaps - 1.f);
max_lod = std::min(max_lod, actual_mipmaps - 1.f);
}
else
{
min_lod = max_lod = lod_bias = 0.f;
mip_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
}
}
if (fs_sampler_handles[i] && m_textures_dirty[i])

View file

@ -1188,6 +1188,11 @@ private:
return info.extent.depth;
}
u32 mipmaps() const
{
return info.mipLevels;
}
u8 samples() const
{
return u8(info.samples);

View file

@ -466,14 +466,15 @@ namespace vk
return ref_frame > 0 && frame_tag <= ref_frame;
}
bool matches(VkFormat format, u16 w, u16 h, u16 d, VkFlags flags) const
bool matches(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkFlags flags) const
{
if (combined_image &&
combined_image->info.flags == flags &&
combined_image->format() == format &&
combined_image->width() == w &&
combined_image->height() == h &&
combined_image->depth() == d)
combined_image->depth() == d &&
combined_image->mipmaps() == mipmaps)
{
return true;
}
@ -634,6 +635,7 @@ namespace vk
else
{
copy_rgn.dstSubresource.baseArrayLayer = section.dst_z;
copy_rgn.dstSubresource.mipLevel = section.level;
}
vkCmdCopyImage(cmd, src_image->value, src_image->current_layout, dst->value, dst->current_layout, 1, &copy_rgn);
@ -645,14 +647,13 @@ namespace vk
u16 dst_x = section.dst_x, dst_y = section.dst_y;
vk::image* _dst;
if (LIKELY(src_image->info.format == dst->info.format))
if (LIKELY(src_image->info.format == dst->info.format && section.level == 0))
{
_dst = dst;
}
else
{
verify(HERE), !typeless;
// Either a bitcast is required or a scale+copy to mipmap level
_dst = vk::get_typeless_helper(src_image->info.format, dst->width(), dst->height() * 2);
_dst->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
}
@ -724,7 +725,7 @@ namespace vk
VkImageCopy copy_rgn;
copy_rgn.srcOffset = { s32(dst_x), s32(dst_y), 0 };
copy_rgn.dstOffset = { section.dst_x, section.dst_y, 0 };
copy_rgn.dstSubresource = { dst_aspect, 0, 0, 1 };
copy_rgn.dstSubresource = { dst_aspect, section.level, 0, 1 };
copy_rgn.srcSubresource = { _dst->aspect(), 0, 0, 1 };
copy_rgn.extent = { section.dst_w, section.dst_h, 1 };
@ -771,12 +772,12 @@ namespace vk
return result;
}
std::unique_ptr<vk::viewable_image> find_temporary_image(VkFormat format, u16 w, u16 h, u16 d)
std::unique_ptr<vk::viewable_image> find_temporary_image(VkFormat format, u16 w, u16 h, u16 d, u8 mipmaps)
{
const auto current_frame = vk::get_current_frame_id();
for (auto &e : m_temporary_storage)
{
if (e.frame_tag != current_frame && e.matches(format, w, h, d, 0))
if (e.frame_tag != current_frame && e.matches(format, w, h, d, mipmaps, 0))
{
m_temporary_memory_size -= e.block_size;
e.block_size = 0;
@ -792,7 +793,7 @@ namespace vk
const auto current_frame = vk::get_current_frame_id();
for (auto &e : m_temporary_storage)
{
if (e.frame_tag != current_frame && e.matches(format, size, size, 1, VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT))
if (e.frame_tag != current_frame && e.matches(format, size, size, 1, 1, VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT))
{
m_temporary_memory_size -= e.block_size;
e.block_size = 0;
@ -814,7 +815,7 @@ namespace vk
if (LIKELY(!image_flags))
{
image = find_temporary_image(dst_format, w, h, 1);
image = find_temporary_image(dst_format, w, h, 1, 1);
}
else
{
@ -855,6 +856,7 @@ namespace vk
{{
source,
rsx::surface_transform::coordinate_transform,
0,
x, y, 0, 0, 0,
w, h, w, h
}};
@ -944,7 +946,7 @@ namespace vk
VkFormat dst_format = vk::get_compatible_sampler_format(m_formats_support, gcm_format);
VkImageAspectFlags dst_aspect = vk::get_aspect_flags(dst_format);
if (image = find_temporary_image(dst_format, width, height, depth); !image)
if (image = find_temporary_image(dst_format, width, height, depth, 1); !image)
{
image = std::make_unique<vk::viewable_image>(*vk::get_current_renderer(), m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_IMAGE_TYPE_3D,
@ -1018,12 +1020,48 @@ namespace vk
return result;
}
vk::image_view* generate_2d_mipmaps_from_images(vk::command_buffer& cmd, u32 gcm_format, u16 width, u16 height,
const std::vector<copy_region_descriptor>& sections_to_copy, const rsx::texture_channel_remap_t& remap_vector) override
{
const auto _template = sections_to_copy.front().src;
const auto mipmaps = (u8)sections_to_copy.size();
std::unique_ptr<vk::viewable_image> image;
if (image = find_temporary_image(_template->format(), width, height, 1, mipmaps); !image)
{
image = std::make_unique<vk::viewable_image>(*vk::get_current_renderer(), m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
_template->info.imageType,
_template->info.format,
width, height, 1, mipmaps, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 0);
image->set_native_component_layout(_template->native_component_map);
}
auto view = image->get_view(get_remap_encoding(remap_vector), remap_vector);
VkImageSubresourceRange dst_range = { _template->aspect(), 0, mipmaps, 0, 1 };
vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range);
copy_transfer_regions_impl(cmd, image.get(), sections_to_copy);
vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range);
const u32 resource_memory = width * height * 2 * 4; // Rough approximate
m_temporary_storage.emplace_back(image);
m_temporary_storage.back().block_size = resource_memory;
m_temporary_memory_size += resource_memory;
return view;
}
void update_image_contents(vk::command_buffer& cmd, vk::image_view* dst_view, vk::image* src, u16 width, u16 height) override
{
std::vector<copy_region_descriptor> region =
{ {
src,
rsx::surface_transform::identity,
0,
0, 0, 0, 0, 0,
width, height, width, height
}};