forced sync2 + better barriers

This commit is contained in:
psucien 2024-08-31 20:51:04 +02:00
parent 72112271c0
commit 253cc12ee7
7 changed files with 61 additions and 29 deletions

View file

@ -118,6 +118,25 @@ public:
return buffer;
}
std::optional<vk::BufferMemoryBarrier2> GetBarrier(vk::AccessFlagBits2 dst_acess_mask,
vk::PipelineStageFlagBits2 dst_stage) {
if (dst_acess_mask == access_mask && stage == dst_stage) {
return {};
}
auto barrier = vk::BufferMemoryBarrier2{
.srcStageMask = stage,
.srcAccessMask = access_mask,
.dstStageMask = dst_stage,
.dstAccessMask = dst_acess_mask,
.buffer = buffer.buffer,
.size = size_bytes,
};
access_mask = dst_acess_mask;
stage = dst_stage;
return barrier;
}
public:
VAddr cpu_addr = 0;
bool is_picked{};
@ -128,6 +147,8 @@ public:
const Vulkan::Instance* instance{};
MemoryUsage usage;
UniqueBuffer buffer;
vk::AccessFlagBits2 access_mask{vk::AccessFlagBits2::eNone};
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eNone};
struct BufferView {
u32 offset;
u32 size;

View file

@ -248,11 +248,11 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
return {&buffer, buffer.Offset(device_addr)};
}
std::pair<const Buffer*, u32> BufferCache::ObtainTempBuffer(VAddr gpu_addr, u32 size) {
std::pair<Buffer*, u32> BufferCache::ObtainTempBuffer(VAddr gpu_addr, u32 size) {
const u64 page = gpu_addr >> CACHING_PAGEBITS;
const BufferId buffer_id = page_table[page];
if (buffer_id) {
const Buffer& buffer = slot_buffers[buffer_id];
Buffer& buffer = slot_buffers[buffer_id];
if (buffer.IsInBounds(gpu_addr, size)) {
return {&buffer, buffer.Offset(gpu_addr)};
}

View file

@ -70,7 +70,7 @@ public:
bool is_texel_buffer = false);
/// Obtains a temporary buffer for usage in texture cache.
[[nodiscard]] std::pair<const Buffer*, u32> ObtainTempBuffer(VAddr gpu_addr, u32 size);
[[nodiscard]] std::pair<Buffer*, u32> ObtainTempBuffer(VAddr gpu_addr, u32 size);
/// Return true when a region is registered on the cache
[[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);

View file

@ -104,6 +104,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos;
boost::container::static_vector<vk::DescriptorImageInfo, 16> image_infos;
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
boost::container::small_vector<vk::BufferMemoryBarrier2, 16> buffer_barriers;
Shader::PushData push_data{};
u32 binding{};
@ -183,6 +184,13 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
}
buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written,
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
if (auto barrier =
vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
: vk::AccessFlagBits2::eShaderRead,
vk::PipelineStageFlagBits2::eComputeShader)) {
buffer_barriers.emplace_back(*barrier);
}
}
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
@ -224,7 +232,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
const auto ssharp = sampler.GetSharp(*info);
if (ssharp.force_degamma) {
LOG_WARNING(Render_Vulkan, "Texture requires gamma correction");
}
}
const auto vk_sampler = texture_cache.GetSampler(ssharp);
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
set_writes.push_back({
@ -242,6 +250,15 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
}
const auto cmdbuf = scheduler.CommandBuffer();
if (!buffer_barriers.empty()) {
auto dependencies = vk::DependencyInfo{
.bufferMemoryBarrierCount = u32(buffer_barriers.size()),
.pBufferMemoryBarriers = buffer_barriers.data(),
};
cmdbuf.pipelineBarrier2(dependencies);
}
cmdbuf.pushConstants(*pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(push_data),
&push_data);
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, set_writes);

View file

@ -359,7 +359,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
boost::container::static_vector<vk::DescriptorBufferInfo, 32> buffer_infos;
boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos;
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
boost::container::small_vector<vk::BufferMemoryBarrier, 16> buffer_barriers;
boost::container::small_vector<vk::BufferMemoryBarrier2, 16> buffer_barriers;
Shader::PushData push_data{};
u32 binding{};
@ -425,15 +425,11 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
}
buffer_view = vk_buffer->View(offset_aligned, size + adjust, tex_buffer.is_written,
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
if (buffer_cache.IsRegionGpuModified(address, size)) {
buffer_barriers.push_back(vk::BufferMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eShaderWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderRead,
.buffer = vk_buffer->Handle(),
.offset = offset,
.size = size,
});
const auto dst_access = tex_buffer.is_written ? vk::AccessFlagBits2::eShaderWrite
: vk::AccessFlagBits2::eShaderRead;
if (auto barrier = vk_buffer->GetBarrier(
dst_access, vk::PipelineStageFlagBits2::eVertexShader)) {
buffer_barriers.emplace_back(*barrier);
}
}
set_writes.push_back({
@ -501,10 +497,11 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
const auto cmdbuf = scheduler.CommandBuffer();
if (!buffer_barriers.empty()) {
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eFragmentShader |
vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eFragmentShader,
vk::DependencyFlagBits::eByRegion, {}, buffer_barriers, {});
auto dependencies = vk::DependencyInfo{
.bufferMemoryBarrierCount = u32(buffer_barriers.size()),
.pBufferMemoryBarriers = buffer_barriers.data(),
};
cmdbuf.pipelineBarrier2(dependencies);
}
if (!set_writes.empty()) {

View file

@ -228,6 +228,7 @@ bool Instance::CreateDevice() {
const bool maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
add_extension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME);
add_extension(VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME);
add_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
#ifdef __APPLE__
// Required by Vulkan spec if supported.

View file

@ -314,6 +314,7 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info,
// Update tracked image usage
image.info.usage.depth_target = true;
image.info.usage.stencil = has_stencil;
return RegisterImageView(image_id, view_info);
}
@ -383,18 +384,13 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
// The obtained buffer may be written by a shader so we need to emit a barrier to prevent
// RAW hazard
if (buffer_cache.IsRegionGpuModified(image_addr, image_size)) {
const vk::BufferMemoryBarrier post_barrier{
.srcAccessMask = vk::AccessFlagBits::eShaderWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
.buffer = buffer,
.offset = offset,
.size = image_size,
if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead,
vk::PipelineStageFlagBits2::eTransfer)) {
auto dependencies = vk::DependencyInfo{
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &barrier.value(),
};
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eFragmentShader |
vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, post_barrier, {});
cmdbuf.pipelineBarrier2(dependencies);
}
}