rsx: Minor fixes

- vk: Clear dirty textures before copying 'old contents' in case the old data does not fill the new region
- rsx: Properly decode border color - seems to be in BGRA format
- vk: better approximation of border color to better choose between the presets
- vk: Individually clear color images outside render pass and without scissor
- vk: Fix renderpass selection for clear overlay pass
- vk: Include scissor region when emulating clear mask

NOTES:
- vk: Completely avoid using vkClearXXXXimage - its 'broken' on nvidia drivers
  Spec is vague about the function so its not an actual bug
  ClearAttachment is clearly defined as bypassing bound state which works correctly
- TODO: Implement memory sampling to simulate loading precleared memory if cell used memset to preinitialize the framebuffer
  Autoclear depth to 1|255 and color to 0 is hacky!
This commit is contained in:
kd-11 2018-04-15 01:53:38 +03:00 committed by kd-11
parent da99f3cb9a
commit 91a6091d26
10 changed files with 128 additions and 110 deletions

View file

@ -259,7 +259,7 @@ void GLGSRender::end()
if (clear_depth)
{
gl_state.depth_mask(GL_TRUE);
gl_state.clear_depth(1.0);
gl_state.clear_depth(1.f);
gl_state.clear_stencil(255);
mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
}

View file

@ -206,13 +206,12 @@ namespace gl
//Apply sampler state settings
void sampler_state::apply(rsx::fragment_texture& tex, const rsx::sampled_image_descriptor_base* sampled_image)
{
const f32 border_color = (f32)tex.border_color() / 255;
const f32 border_color_array[] = { border_color, border_color, border_color, border_color };
const color4f border_color = rsx::decode_border_color(tex.border_color());
glSamplerParameteri(samplerHandle, GL_TEXTURE_WRAP_S, wrap_mode(tex.wrap_s()));
glSamplerParameteri(samplerHandle, GL_TEXTURE_WRAP_T, wrap_mode(tex.wrap_t()));
glSamplerParameteri(samplerHandle, GL_TEXTURE_WRAP_R, wrap_mode(tex.wrap_r()));
glSamplerParameterfv(samplerHandle, GL_TEXTURE_BORDER_COLOR, border_color_array);
glSamplerParameterfv(samplerHandle, GL_TEXTURE_BORDER_COLOR, border_color.rgba);
if (sampled_image->upload_context != rsx::texture_upload_context::shader_read ||
tex.get_exact_mipmap_count() <= 1)

View file

@ -68,13 +68,27 @@ namespace vk
fmt::throw_exception("Invalid mag filter (0x%x)" HERE, (u32)mag_filter);
}
VkBorderColor get_border_color(u8 color)
VkBorderColor get_border_color(u32 color)
{
// TODO: Handle simulated alpha tests and modify texture operations accordingly
if ((color / 0x10) >= 0x8)
//TODO: Improve accuracy
auto color4 = rsx::decode_border_color(color);
f32 mag = (color4.r * color4.r) + (color4.g * color4.g) + (color4.b * color4.b);
if (mag > 0.f) mag = sqrtf(mag);
mag *= 0.3333f;
if (mag > 0.8f && color4.a > 0.f)
{
//If color elements are brighter than roughly 0.5 average, use white border
return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
}
else
return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
{
if (color4.a > 0.5f)
return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
else
return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
}
}
VkSamplerAddressMode vk_wrap_mode(rsx::texture_wrap_mode gcm_wrap)

View file

@ -11,7 +11,7 @@ namespace vk
};
gpu_formats_support get_optimal_tiling_supported_formats(VkPhysicalDevice physical_device);
VkBorderColor get_border_color(u8 color);
VkBorderColor get_border_color(u32 color);
VkFormat get_compatible_depth_surface_format(const gpu_formats_support &support, rsx::surface_depth_format format);
VkFormat get_compatible_sampler_format(u32 format);

View file

@ -1029,6 +1029,10 @@ void VKGSRender::update_draw_state()
//Update depth bounds min/max
vkCmdSetDepthBounds(*m_current_command_buffer, rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max());
}
else
{
vkCmdSetDepthBounds(*m_current_command_buffer, 0.f, 1.f);
}
set_viewport();
@ -1084,25 +1088,54 @@ void VKGSRender::end()
std::chrono::time_point<steady_clock> textures_start = vertex_end;
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
//Clear any 'dirty' surfaces - possible is a recycled cache surface is used
std::vector<VkClearAttachment> buffers_to_clear;
buffers_to_clear.reserve(4);
const auto targets = rsx::utility::get_rtt_indexes(rsx::method_registers.surface_color_target());
//Check for memory clears
if (ds && ds->dirty)
{
//Clear this surface before drawing on it
VkClearValue clear_value = {};
clear_value.depthStencil = { 1.f, 255 };
buffers_to_clear.push_back({ vk::get_aspect_flags(ds->info.format), 0, clear_value });
ds->dirty = false;
}
for (u32 index = 0; index < targets.size(); ++index)
{
if (auto rtt = std::get<1>(m_rtts.m_bound_render_targets[index]))
{
if (rtt->dirty)
{
buffers_to_clear.push_back({ VK_IMAGE_ASPECT_COLOR_BIT, index, {} });
rtt->dirty = false;
}
}
}
if (buffers_to_clear.size() > 0)
{
begin_render_pass();
VkClearRect rect = { {{0, 0}, {m_draw_fbo->width(), m_draw_fbo->height()}}, 0, 1 };
vkCmdClearAttachments(*m_current_command_buffer, (u32)buffers_to_clear.size(),
buffers_to_clear.data(), 1, &rect);
close_render_pass();
}
//Check for data casts
if (ds && ds->old_contents)
{
if (ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM)
{
//This routine does not recover stencil data, initialize to 255
VkClearDepthStencilValue clear_depth = { 1.f, 255 };
VkImageSubresourceRange range = { VK_IMAGE_ASPECT_STENCIL_BIT, 0, 1, 0, 1 };
change_image_layout(*m_current_command_buffer, ds, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
vkCmdClearDepthStencilImage(*m_current_command_buffer, ds->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_depth, 1, &range);
change_image_layout(*m_current_command_buffer, ds, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, ds->info.format, 0);
auto render_pass = m_render_passes[rp];
m_depth_converter->run(*m_current_command_buffer, ds->width(), ds->height(), ds, ds->old_contents->get_view(0xAAE4, rsx::default_remap_vector), render_pass, m_framebuffers_to_clean);
ds->old_contents = nullptr;
ds->dirty = false;
}
else if (!g_cfg.video.strict_rendering_mode)
{
@ -1406,41 +1439,6 @@ void VKGSRender::end()
begin_render_pass();
//Clear any 'dirty' surfaces - possible is a recycled cache surface is used
std::vector<VkClearAttachment> buffers_to_clear;
buffers_to_clear.reserve(4);
const auto targets = rsx::utility::get_rtt_indexes(rsx::method_registers.surface_color_target());
if (ds && ds->dirty)
{
//Clear this surface before drawing on it
VkClearValue depth_clear_value;
depth_clear_value.depthStencil.depth = 1.f;
depth_clear_value.depthStencil.stencil = 255;
VkClearAttachment clear_desc = { ds->attachment_aspect_flag, 0, depth_clear_value };
buffers_to_clear.push_back(clear_desc);
ds->dirty = false;
}
for (int index = 0; index < targets.size(); ++index)
{
if (std::get<0>(m_rtts.m_bound_render_targets[index]) != 0 && std::get<1>(m_rtts.m_bound_render_targets[index])->dirty)
{
const u32 real_index = (index == 1 && targets.size() == 1) ? 0 : static_cast<u32>(index);
buffers_to_clear.push_back({ VK_IMAGE_ASPECT_COLOR_BIT, real_index, {} });
std::get<1>(m_rtts.m_bound_render_targets[index])->dirty = false;
}
}
if (buffers_to_clear.size() > 0)
{
VkClearRect clear_rect = { 0, 0, m_draw_fbo->width(), m_draw_fbo->height(), 0, 1 };
vkCmdClearAttachments(*m_current_command_buffer, static_cast<u32>(buffers_to_clear.size()), buffers_to_clear.data(), 1, &clear_rect);
}
bool primitive_emulated = false;
vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated);
@ -1756,15 +1754,19 @@ void VKGSRender::clear_surface(u32 mask)
color_clear_values.color.float32[3]
};
const auto fbo_format = vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first;
const auto rp_index = vk::get_render_pass_location(fbo_format, VK_FORMAT_UNDEFINED, 1);
const auto renderpass = m_render_passes[rp_index];
m_attachment_clear_pass->update_config(colormask, clear_color);
for (u32 index = 0; index < m_draw_buffers_count; ++index)
{
if (auto rtt = std::get<1>(m_rtts.m_bound_render_targets[index]))
{
vk::insert_texture_barrier(*m_current_command_buffer, rtt);
m_attachment_clear_pass->run(*m_current_command_buffer, rtt->width(), rtt->height(),
rtt, rtt->get_view(0xAAE4, rsx::default_remap_vector),
m_draw_fbo->info.renderPass, m_framebuffers_to_clean);
m_attachment_clear_pass->run(*m_current_command_buffer, rtt,
region.rect, renderpass, m_framebuffers_to_clean);
}
else
fmt::throw_exception("Unreachable" HERE);
@ -1800,13 +1802,9 @@ void VKGSRender::clear_surface(u32 mask)
if (clear_descriptors.size() > 0)
{
//TODO: Implement lw_graphics_pipe objects to manage the color write mask!
vk::enter_uninterruptible();
begin_render_pass();
vkCmdClearAttachments(*m_current_command_buffer, (u32)clear_descriptors.size(), clear_descriptors.data(), 1, &region);
close_render_pass();
vk::leave_uninterruptible();
}
}
@ -2234,20 +2232,21 @@ void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info)
// Rasterizer state
properties.state.set_attachment_count(m_draw_buffers_count);
properties.state.set_depth_mask(rsx::method_registers.depth_write_enabled());
properties.state.set_front_face(vk::get_front_face(rsx::method_registers.front_face_mode()));
properties.state.enable_depth_clamp(rsx::method_registers.depth_clamp_enabled() || !rsx::method_registers.depth_clip_enabled());
properties.state.enable_depth_bias(true);
properties.state.enable_depth_bounds_test(true);
if (rsx::method_registers.depth_test_enabled())
{
//NOTE: Like stencil, depth write is meaningless without depth test
properties.state.set_depth_mask(rsx::method_registers.depth_write_enabled());
properties.state.enable_depth_test(vk::get_compare_func(rsx::method_registers.depth_func()));
}
if (rsx::method_registers.logic_op_enabled())
properties.state.enable_logic_op(vk::get_logic_op(rsx::method_registers.logic_operation()));
if (rsx::method_registers.depth_bounds_test_enabled())
properties.state.enable_depth_bounds_test();
if (rsx::method_registers.cull_face_enabled())
properties.state.enable_cull_face(vk::get_cull_face(rsx::method_registers.cull_face_mode()));
@ -2315,36 +2314,28 @@ void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info)
{
if (!rsx::method_registers.two_sided_stencil_test_enabled())
{
properties.state.set_stencil_mask(rsx::method_registers.stencil_mask());
properties.state.enable_stencil_test(
vk::get_stencil_op(rsx::method_registers.stencil_op_fail()),
vk::get_stencil_op(rsx::method_registers.stencil_op_zfail()),
vk::get_stencil_op(rsx::method_registers.stencil_op_zpass()),
vk::get_compare_func(rsx::method_registers.stencil_func()),
rsx::method_registers.stencil_func_mask(),
rsx::method_registers.stencil_func_ref());
0xFF, 0xFF); //write mask, func_mask, ref are dynamic
}
else
{
properties.state.set_stencil_mask_separate(0, rsx::method_registers.stencil_mask());
properties.state.set_stencil_mask_separate(1, rsx::method_registers.back_stencil_mask());
properties.state.enable_stencil_test_separate(0,
vk::get_stencil_op(rsx::method_registers.stencil_op_fail()),
vk::get_stencil_op(rsx::method_registers.stencil_op_zfail()),
vk::get_stencil_op(rsx::method_registers.stencil_op_zpass()),
vk::get_compare_func(rsx::method_registers.stencil_func()),
rsx::method_registers.stencil_func_mask(),
rsx::method_registers.stencil_func_ref());
0xFF, 0xFF); //write mask, func_mask, ref are dynamic
properties.state.enable_stencil_test_separate(1,
vk::get_stencil_op(rsx::method_registers.back_stencil_op_fail()),
vk::get_stencil_op(rsx::method_registers.back_stencil_op_zfail()),
vk::get_stencil_op(rsx::method_registers.back_stencil_op_zpass()),
vk::get_compare_func(rsx::method_registers.back_stencil_func()),
rsx::method_registers.back_stencil_func_mask(),
rsx::method_registers.back_stencil_func_ref());
0xFF, 0xFF); //write mask, func_mask, ref are dynamic
}
}

View file

@ -80,7 +80,7 @@ namespace vk
VkComponentMapping apply_swizzle_remap(const std::array<VkComponentSwizzle, 4>& base_remap, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector);
VkImageSubresource default_image_subresource();
VkImageSubresourceRange get_image_subresource_range(uint32_t base_layer, uint32_t base_mip, uint32_t layer_count, uint32_t level_count, VkImageAspectFlags aspect);
VkImageAspectFlagBits get_aspect_flags(VkFormat format);
VkImageAspectFlags get_aspect_flags(VkFormat format);
VkSampler null_sampler();
VkImageView null_image_view(vk::command_buffer&);

View file

@ -319,19 +319,23 @@ namespace vk
vkCmdDraw(cmd, num_drawable_elements, 1, first_vertex, 0);
}
void run(vk::command_buffer &cmd, u16 w, u16 h, vk::framebuffer* fbo, VkImageView src, VkRenderPass render_pass)
virtual void set_up_viewport(vk::command_buffer &cmd, u16 max_w, u16 max_h)
{
load_program(cmd, render_pass, src);
VkViewport vp{};
vp.width = (f32)w;
vp.height = (f32)h;
vp.width = (f32)max_w;
vp.height = (f32)max_h;
vp.minDepth = 0.f;
vp.maxDepth = 1.f;
vkCmdSetViewport(cmd, 0, 1, &vp);
VkRect2D vs = { { 0, 0 },{ 0u + w, 0u + h } };
VkRect2D vs = { { 0, 0 }, { 0u + max_w, 0u + max_h } };
vkCmdSetScissor(cmd, 0, 1, &vs);
}
void run(vk::command_buffer &cmd, u16 w, u16 h, vk::framebuffer* fbo, VkImageView src, VkRenderPass render_pass)
{
load_program(cmd, render_pass, src);
set_up_viewport(cmd, w, h);
VkRenderPassBeginInfo rp_begin = {};
rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
@ -731,6 +735,7 @@ namespace vk
{
color4f clear_color = { 0.f, 0.f, 0.f, 0.f };
color4f colormask = { 1.f, 1.f, 1.f, 1.f };
VkRect2D region = {};
attachment_clear_pass()
{
@ -793,6 +798,18 @@ namespace vk
m_ubo->unmap();
}
void set_up_viewport(vk::command_buffer &cmd, u16 max_w, u16 max_h) override
{
VkViewport vp{};
vp.width = (f32)max_w;
vp.height = (f32)max_h;
vp.minDepth = 0.f;
vp.maxDepth = 1.f;
vkCmdSetViewport(cmd, 0, 1, &vp);
vkCmdSetScissor(cmd, 0, 1, &region);
}
bool update_config(u32 clearmask, color4f color)
{
color4f mask = { 0.f, 0.f, 0.f, 0.f };
@ -810,5 +827,14 @@ namespace vk
return false;
}
void run(vk::command_buffer &cmd, vk::render_target* target, VkRect2D rect, VkRenderPass render_pass, std::list<std::unique_ptr<vk::framebuffer_holder>>& framebuffer_resources)
{
region = rect;
overlay_pass::run(cmd, target->width(), target->height(), target,
target->get_view(0xAAE4, rsx::default_remap_vector)->value,
render_pass, framebuffer_resources);
}
};
}

View file

@ -138,29 +138,17 @@ namespace rsx
VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_IMAGE_USAGE_TRANSFER_DST_BIT|VK_IMAGE_USAGE_SAMPLED_BIT,
0));
change_image_layout(*cmd, rtt.get(), VK_IMAGE_LAYOUT_GENERAL, vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT));
//Clear new surface
VkClearColorValue clear_color;
VkImageSubresourceRange range = vk::get_image_subresource_range(0,0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT);
clear_color.float32[0] = 0.f;
clear_color.float32[1] = 0.f;
clear_color.float32[2] = 0.f;
clear_color.float32[3] = 0.f;
vkCmdClearColorImage(*cmd, rtt->value, VK_IMAGE_LAYOUT_GENERAL, &clear_color, 1, &range);
change_image_layout(*cmd, rtt.get(), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT));
rtt->native_component_map = fmt.second;
rtt->native_pitch = (u16)width * get_format_block_size_in_bytes(format);
rtt->surface_width = (u16)width;
rtt->surface_height = (u16)height;
rtt->dirty = true;
if (old_surface != nullptr && old_surface->info.format == requested_format)
{
rtt->old_contents = old_surface;
rtt->dirty = true;
}
return rtt;
}
@ -193,15 +181,6 @@ namespace rsx
0));
ds->native_component_map = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R };
change_image_layout(*cmd, ds.get(), VK_IMAGE_LAYOUT_GENERAL, range);
//Clear new surface..
VkClearDepthStencilValue clear_depth = {};
clear_depth.depth = 1.f;
clear_depth.stencil = 255;
vkCmdClearDepthStencilImage(*cmd, ds->value, VK_IMAGE_LAYOUT_GENERAL, &clear_depth, 1, &range);
change_image_layout(*cmd, ds.get(), VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range);
ds->native_pitch = (u16)width * 2;
@ -211,12 +190,10 @@ namespace rsx
ds->attachment_aspect_flag = range.aspectMask;
ds->surface_width = (u16)width;
ds->surface_height = (u16)height;
ds->dirty = true;
if (old_surface != nullptr && old_surface->info.format == requested_format)
{
ds->old_contents = old_surface;
ds->dirty = true;
}
return ds;
}

View file

@ -41,17 +41,17 @@ namespace vk
return subres;
}
VkImageAspectFlagBits get_aspect_flags(VkFormat format)
VkImageAspectFlags get_aspect_flags(VkFormat format)
{
switch (format)
{
default:
return VkImageAspectFlagBits(VK_IMAGE_ASPECT_COLOR_BIT);
return VK_IMAGE_ASPECT_COLOR_BIT;
case VK_FORMAT_D16_UNORM:
return VkImageAspectFlagBits(VK_IMAGE_ASPECT_DEPTH_BIT);
return VK_IMAGE_ASPECT_DEPTH_BIT;
case VK_FORMAT_D24_UNORM_S8_UINT:
case VK_FORMAT_D32_SFLOAT_S8_UINT:
return VkImageAspectFlagBits(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
}
}

View file

@ -1,6 +1,7 @@
#pragma once
#include "../System.h"
#include "Utilities/geometry.h"
#include "gcm_enums.h"
#include <atomic>
@ -470,4 +471,14 @@ namespace rsx
blue = false;
alpha = false;
}
inline color4f decode_border_color(u32 colorref)
{
color4f result;
result.b = (colorref & 0xFF) / 255.f;
result.g = ((colorref >> 8) & 0xFF) / 255.f;
result.r = ((colorref >> 16) & 0xFF) / 255.f;
result.a = ((colorref >> 24) & 0xFF) / 255.f;
return result;
}
}