rsx: Improve ARGB8->D24S8 casting

- Set up partial transfers
- Force clear of target before starting the transfer
This commit is contained in:
kd-11 2018-12-31 15:20:29 +03:00 committed by kd-11
parent 475cc99117
commit 95245bdd83
5 changed files with 75 additions and 24 deletions

View file

@ -214,12 +214,20 @@ void GLGSRender::end()
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
// Handle special memory barrier for ARGB8->D24S8 in an active DSV
if (ds && ds->old_contents != nullptr && ds->get_rsx_pitch() == static_cast<gl::render_target*>(ds->old_contents)->get_rsx_pitch() &&
ds->old_contents->get_internal_format() == gl::texture::internal_format::rgba8)
if (ds && ds->old_contents != nullptr &&
ds->old_contents->get_internal_format() == gl::texture::internal_format::rgba8 &&
ds->get_rsx_pitch() == static_cast<gl::render_target*>(ds->old_contents)->get_rsx_pitch())
{
// TODO: Partial memory transfer
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
m_depth_converter.run(ds->width(), ds->height(), ds->id(), ds->old_contents->id());
// TODO: Stencil transfer
gl::g_hw_blitter->fast_clear_image(cmd, ds, 1.f, 0xFF);
const auto region = rsx::get_transferable_region(ds);
m_depth_converter.run({0, 0, std::get<0>(region), std::get<1>(region)},
{0, 0, std::get<2>(region), std::get<3>(region)},
ds->old_contents, ds);
ds->on_write();
}

View file

@ -1,4 +1,4 @@
#pragma once
#pragma once
#include "stdafx.h"
#include "GLHelpers.h"
@ -250,6 +250,7 @@ namespace gl
vs_src =
{
"#version 420\n\n"
"uniform vec2 tex_scale;\n"
"out vec2 tc0;\n"
"\n"
"void main()\n"
@ -257,7 +258,7 @@ namespace gl
" vec2 positions[] = {vec2(-1., -1.), vec2(1., -1.), vec2(-1., 1.), vec2(1., 1.)};\n"
" vec2 coords[] = {vec2(0., 0.), vec2(1., 0.), vec2(0., 1.), vec2(1., 1.)};\n"
" gl_Position = vec4(positions[gl_VertexID % 4], 0., 1.);\n"
" tc0 = coords[gl_VertexID % 4];\n"
" tc0 = coords[gl_VertexID % 4] * tex_scale;\n"
"}\n"
};
@ -275,12 +276,17 @@ namespace gl
};
}
void run(u16 w, u16 h, GLuint target, GLuint source)
void run(const areai& src_area, const areai& dst_area, gl::texture* source, gl::texture* target)
{
glActiveTexture(GL_TEXTURE31);
glBindTexture(GL_TEXTURE_2D, source);
const auto src_ratio_x = f32(src_area.x2) / source->width();
const auto src_ratio_y = f32(src_area.y2) / source->height();
overlay_pass::run(w, h, target, true);
program_handle.uniforms["tex_scale"] = color2f(src_ratio_x, src_ratio_y);
glActiveTexture(GL_TEXTURE31);
glBindTexture(GL_TEXTURE_2D, source->id());
overlay_pass::run(dst_area.x2, dst_area.y2, target->id(), true);
}
};

View file

@ -1334,21 +1334,32 @@ void VKGSRender::end()
// Check for data casts
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
if (ds && ds->old_contents)
if (ds && ds->old_contents &&
ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM &&
ds->get_rsx_pitch() == static_cast<vk::render_target*>(ds->old_contents)->get_rsx_pitch())
{
if (UNLIKELY(ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM))
{
// TODO: Partial memory transfer
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, ds->info.format, 0);
auto render_pass = m_render_passes[rp];
verify("Usupported renderpass configuration" HERE), render_pass != VK_NULL_HANDLE;
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, ds->info.format, 0);
auto render_pass = m_render_passes[rp];
verify("Usupported renderpass configuration" HERE), render_pass != VK_NULL_HANDLE;
m_depth_converter->run(*m_current_command_buffer, ds->width(), ds->height(), ds,
static_cast<vk::render_target*>(ds->old_contents)->get_view(0xAAE4, rsx::default_remap_vector),
render_pass, m_framebuffers_to_clean);
VkClearDepthStencilValue clear = { 1.f, 0xFF };
VkImageSubresourceRange range = { VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 1, 0, 1 };
ds->on_write();
}
// Clear explicitly before starting the inheritance transfer
vk::change_image_layout(*m_current_command_buffer, ds, VK_IMAGE_LAYOUT_GENERAL, range);
vkCmdClearDepthStencilImage(*m_current_command_buffer, ds->value, VK_IMAGE_LAYOUT_GENERAL, &clear, 1, &range);
vk::change_image_layout(*m_current_command_buffer, ds, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range);
// TODO: Stencil transfer
const auto region = rsx::get_transferable_region(ds);
m_depth_converter->run(*m_current_command_buffer,
{ 0, 0, std::get<0>(region), std::get<1>(region) },
{ 0, 0, std::get<2>(region), std::get<3>(region) },
static_cast<vk::render_target*>(ds->old_contents)->get_view(0xAAE4, rsx::default_remap_vector),
ds, render_pass, m_framebuffers_to_clean);
// TODO: Flush management to avoid pass running out of ubo space (very unlikely)
ds->on_write();
}
//Load textures

View file

@ -396,12 +396,16 @@ namespace vk
struct depth_convert_pass : public overlay_pass
{
f32 src_scale_x;
f32 src_scale_y;
depth_convert_pass()
{
vs_src =
{
"#version 450\n"
"#extension GL_ARB_separate_shader_objects : enable\n"
"layout(std140, set=0, binding=0) uniform static_data{ vec4 regs[8]; };\n"
"layout(location=0) out vec2 tc0;\n"
"\n"
"void main()\n"
@ -409,7 +413,7 @@ namespace vk
" vec2 positions[] = {vec2(-1., -1.), vec2(1., -1.), vec2(-1., 1.), vec2(1., 1.)};\n"
" vec2 coords[] = {vec2(0., 0.), vec2(1., 0.), vec2(0., 1.), vec2(1., 1.)};\n"
" gl_Position = vec4(positions[gl_VertexIndex % 4], 0., 1.);\n"
" tc0 = coords[gl_VertexIndex % 4];\n"
" tc0 = coords[gl_VertexIndex % 4] * regs[0].xy;\n"
"}\n"
};
@ -430,7 +434,28 @@ namespace vk
renderpass_config.set_depth_mask(true);
renderpass_config.enable_depth_test(VK_COMPARE_OP_ALWAYS);
renderpass_config.enable_stencil_test(VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, VK_COMPARE_OP_ALWAYS, 0xFF, 0xFF);
}
void update_uniforms(vk::glsl::program* /*program*/) override
{
m_ubo_offset = (u32)m_ubo.alloc<256>(128);
auto dst = (f32*)m_ubo.map(m_ubo_offset, 128);
dst[0] = src_scale_x;
dst[1] = src_scale_y;
dst[2] = 0.f;
dst[3] = 0.f;
m_ubo.unmap();
}
void run(vk::command_buffer& cmd, const areai& src_area, const areai& dst_area, vk::image_view* src, vk::image* dst, VkRenderPass render_pass, std::list<std::unique_ptr<vk::framebuffer_holder>>& framebuffer_resources)
{
auto real_src = src->image();
verify(HERE), real_src;
src_scale_x = f32(src_area.x2) / real_src->width();
src_scale_y = f32(src_area.y2) / real_src->height();
overlay_pass::run(cmd, dst_area.x2, dst_area.y2, dst, src, render_pass, framebuffer_resources);
}
};

View file

@ -448,6 +448,7 @@ namespace rsx
/**
* Calculates the regions used for memory transfer between rendertargets on succession events
* Returns <src_w, src_h, dst_w, dst_h>
*/
template <typename SurfaceType>
std::tuple<u16, u16, u16, u16> get_transferable_region(SurfaceType* surface)