Implement transform feedback queries and draws.
This commit is contained in:
parent
3fded314f2
commit
88f0014bc7
20 changed files with 252 additions and 71 deletions
|
@ -82,6 +82,7 @@ void LogSettings() {
|
||||||
values.use_asynchronous_gpu_emulation.GetValue());
|
values.use_asynchronous_gpu_emulation.GetValue());
|
||||||
log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue());
|
log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue());
|
||||||
log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
|
log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
|
||||||
|
log_setting("Renderer_TransformFeedbackQuery", values.transform_feedback_query.GetValue());
|
||||||
log_setting("Renderer_AsyncASTC", values.async_astc.GetValue());
|
log_setting("Renderer_AsyncASTC", values.async_astc.GetValue());
|
||||||
log_setting("Renderer_AstcRecompression", values.astc_recompression.GetValue());
|
log_setting("Renderer_AstcRecompression", values.astc_recompression.GetValue());
|
||||||
log_setting("Renderer_UseVsync", values.vsync_mode.GetValue());
|
log_setting("Renderer_UseVsync", values.vsync_mode.GetValue());
|
||||||
|
@ -246,6 +247,7 @@ void RestoreGlobalState(bool is_powered_on) {
|
||||||
values.use_asynchronous_gpu_emulation.SetGlobal(true);
|
values.use_asynchronous_gpu_emulation.SetGlobal(true);
|
||||||
values.nvdec_emulation.SetGlobal(true);
|
values.nvdec_emulation.SetGlobal(true);
|
||||||
values.accelerate_astc.SetGlobal(true);
|
values.accelerate_astc.SetGlobal(true);
|
||||||
|
values.transform_feedback_query.SetGlobal(true);
|
||||||
values.async_astc.SetGlobal(true);
|
values.async_astc.SetGlobal(true);
|
||||||
values.astc_recompression.SetGlobal(true);
|
values.astc_recompression.SetGlobal(true);
|
||||||
values.use_reactive_flushing.SetGlobal(true);
|
values.use_reactive_flushing.SetGlobal(true);
|
||||||
|
|
|
@ -468,6 +468,7 @@ struct Values {
|
||||||
SwitchableSetting<bool> use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"};
|
SwitchableSetting<bool> use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"};
|
||||||
SwitchableSetting<NvdecEmulation> nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"};
|
SwitchableSetting<NvdecEmulation> nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"};
|
||||||
SwitchableSetting<bool> accelerate_astc{true, "accelerate_astc"};
|
SwitchableSetting<bool> accelerate_astc{true, "accelerate_astc"};
|
||||||
|
SwitchableSetting<bool> transform_feedback_query{false, "transform_feedback_query"};
|
||||||
SwitchableSetting<bool> async_astc{false, "async_astc"};
|
SwitchableSetting<bool> async_astc{false, "async_astc"};
|
||||||
Setting<VSyncMode, true> vsync_mode{VSyncMode::FIFO, VSyncMode::Immediate,
|
Setting<VSyncMode, true> vsync_mode{VSyncMode::FIFO, VSyncMode::Immediate,
|
||||||
VSyncMode::FIFORelaxed, "use_vsync"};
|
VSyncMode::FIFORelaxed, "use_vsync"};
|
||||||
|
|
|
@ -255,6 +255,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
|
||||||
AddField(field_type, "Renderer_NvdecEmulation",
|
AddField(field_type, "Renderer_NvdecEmulation",
|
||||||
TranslateNvdecEmulation(Settings::values.nvdec_emulation.GetValue()));
|
TranslateNvdecEmulation(Settings::values.nvdec_emulation.GetValue()));
|
||||||
AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue());
|
AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue());
|
||||||
|
AddField(field_type, "Renderer_TransformFeedbackQuery",
|
||||||
|
Settings::values.transform_feedback_query.GetValue());
|
||||||
AddField(field_type, "Renderer_UseVsync",
|
AddField(field_type, "Renderer_UseVsync",
|
||||||
TranslateVSyncMode(Settings::values.vsync_mode.GetValue()));
|
TranslateVSyncMode(Settings::values.vsync_mode.GetValue()));
|
||||||
AddField(field_type, "Renderer_ShaderBackend",
|
AddField(field_type, "Renderer_ShaderBackend",
|
||||||
|
|
|
@ -631,6 +631,22 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
|
||||||
rasterizer->Query(regs.report_semaphore.Address(), QueryType::SamplesPassed,
|
rasterizer->Query(regs.report_semaphore.Address(), QueryType::SamplesPassed,
|
||||||
system.GPU().GetTicks());
|
system.GPU().GetTicks());
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
|
case Regs::ReportSemaphore::Report::StreamingPrimitivesSucceeded:
|
||||||
|
if (Settings::values.transform_feedback_query) {
|
||||||
|
if (regs.report_semaphore.query.sub_report == 0) {
|
||||||
|
ASSERT(regs.transform_feedback.controls[0].stride != 0);
|
||||||
|
return rasterizer->GetTransformFeedbackByteCount() /
|
||||||
|
regs.transform_feedback.controls[0].stride;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
case Regs::ReportSemaphore::Report::StreamingByteCount:
|
||||||
|
if (Settings::values.transform_feedback_query) {
|
||||||
|
if (regs.report_semaphore.query.sub_report == 0) {
|
||||||
|
return rasterizer->GetTransformFeedbackByteCount();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
default:
|
default:
|
||||||
LOG_DEBUG(HW_GPU, "Unimplemented query report type {}",
|
LOG_DEBUG(HW_GPU, "Unimplemented query report type {}",
|
||||||
regs.report_semaphore.query.report.Value());
|
regs.report_semaphore.query.report.Value());
|
||||||
|
|
|
@ -462,6 +462,25 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class HLE_B5F74EDB717278EC final : public HLEMacroImpl {
|
||||||
|
public:
|
||||||
|
explicit HLE_B5F74EDB717278EC(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||||
|
|
||||||
|
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||||
|
maxwell3d.RefreshParameters();
|
||||||
|
|
||||||
|
maxwell3d.regs.draw.begin = parameters[0];
|
||||||
|
maxwell3d.regs.draw_auto_stride = parameters[1];
|
||||||
|
maxwell3d.regs.draw_auto_byte_count = parameters[2];
|
||||||
|
|
||||||
|
if (maxwell3d.ShouldExecute()) {
|
||||||
|
maxwell3d.draw_manager->DrawArray(
|
||||||
|
maxwell3d.regs.draw.topology, 0,
|
||||||
|
maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
|
HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
|
||||||
|
@ -536,6 +555,11 @@ HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
|
||||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||||
return std::make_unique<HLE_TransformFeedbackSetup>(maxwell3d__);
|
return std::make_unique<HLE_TransformFeedbackSetup>(maxwell3d__);
|
||||||
}));
|
}));
|
||||||
|
builders.emplace(0xB5F74EDB717278ECULL,
|
||||||
|
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||||
|
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||||
|
return std::make_unique<HLE_B5F74EDB717278EC>(maxwell3d__);
|
||||||
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
HLEMacro::~HLEMacro() = default;
|
HLEMacro::~HLEMacro() = default;
|
||||||
|
|
|
@ -176,5 +176,7 @@ public:
|
||||||
virtual void BindChannel(Tegra::Control::ChannelState& channel) {}
|
virtual void BindChannel(Tegra::Control::ChannelState& channel) {}
|
||||||
|
|
||||||
virtual void ReleaseChannel(s32 channel_id) {}
|
virtual void ReleaseChannel(s32 channel_id) {}
|
||||||
|
|
||||||
|
virtual u32 GetTransformFeedbackByteCount() = 0;
|
||||||
};
|
};
|
||||||
} // namespace VideoCore
|
} // namespace VideoCore
|
||||||
|
|
|
@ -100,5 +100,8 @@ void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loadin
|
||||||
void RasterizerNull::InitializeChannel(Tegra::Control::ChannelState& channel) {}
|
void RasterizerNull::InitializeChannel(Tegra::Control::ChannelState& channel) {}
|
||||||
void RasterizerNull::BindChannel(Tegra::Control::ChannelState& channel) {}
|
void RasterizerNull::BindChannel(Tegra::Control::ChannelState& channel) {}
|
||||||
void RasterizerNull::ReleaseChannel(s32 channel_id) {}
|
void RasterizerNull::ReleaseChannel(s32 channel_id) {}
|
||||||
|
u32 RasterizerNull::GetTransformFeedbackByteCount() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Null
|
} // namespace Null
|
||||||
|
|
|
@ -84,6 +84,7 @@ public:
|
||||||
void InitializeChannel(Tegra::Control::ChannelState& channel) override;
|
void InitializeChannel(Tegra::Control::ChannelState& channel) override;
|
||||||
void BindChannel(Tegra::Control::ChannelState& channel) override;
|
void BindChannel(Tegra::Control::ChannelState& channel) override;
|
||||||
void ReleaseChannel(s32 channel_id) override;
|
void ReleaseChannel(s32 channel_id) override;
|
||||||
|
u32 GetTransformFeedbackByteCount() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Tegra::GPU& m_gpu;
|
Tegra::GPU& m_gpu;
|
||||||
|
|
|
@ -67,9 +67,13 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
|
||||||
state_tracker, gpu.ShaderNotify()),
|
state_tracker, gpu.ShaderNotify()),
|
||||||
query_cache(*this, cpu_memory_), accelerate_dma(buffer_cache, texture_cache),
|
query_cache(*this, cpu_memory_), accelerate_dma(buffer_cache, texture_cache),
|
||||||
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
|
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
|
||||||
blit_image(program_manager_) {}
|
blit_image(program_manager_) {
|
||||||
|
glGenQueries(1, &transform_query);
|
||||||
|
}
|
||||||
|
|
||||||
RasterizerOpenGL::~RasterizerOpenGL() = default;
|
RasterizerOpenGL::~RasterizerOpenGL() {
|
||||||
|
glDeleteQueries(1, &transform_query);
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SyncVertexFormats() {
|
void RasterizerOpenGL::SyncVertexFormats() {
|
||||||
auto& flags = maxwell3d->dirty.flags;
|
auto& flags = maxwell3d->dirty.flags;
|
||||||
|
@ -233,12 +237,15 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
|
||||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||||
|
|
||||||
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(draw_state.topology);
|
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(draw_state.topology);
|
||||||
|
|
||||||
|
if (host_tfb_enabled && !maxwell3d->regs.transform_feedback_enabled) {
|
||||||
|
EndTransformFeedback();
|
||||||
|
} else {
|
||||||
BeginTransformFeedback(pipeline, primitive_mode);
|
BeginTransformFeedback(pipeline, primitive_mode);
|
||||||
|
}
|
||||||
|
|
||||||
draw_func(primitive_mode);
|
draw_func(primitive_mode);
|
||||||
|
|
||||||
EndTransformFeedback();
|
|
||||||
|
|
||||||
++num_queued_commands;
|
++num_queued_commands;
|
||||||
has_written_global_memory |= pipeline->WritesGlobalMemory();
|
has_written_global_memory |= pipeline->WritesGlobalMemory();
|
||||||
}
|
}
|
||||||
|
@ -1251,27 +1258,44 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
|
||||||
|
|
||||||
void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum primitive_mode) {
|
void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum primitive_mode) {
|
||||||
const auto& regs = maxwell3d->regs;
|
const auto& regs = maxwell3d->regs;
|
||||||
if (regs.transform_feedback_enabled == 0) {
|
UNIMPLEMENTED_IF(maxwell3d->regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) ||
|
||||||
|
maxwell3d->regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation));
|
||||||
|
|
||||||
|
if (host_tfb_enabled) {
|
||||||
|
prev_tfb_enabled = regs.transform_feedback_enabled;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
program->ConfigureTransformFeedback();
|
program->ConfigureTransformFeedback();
|
||||||
|
|
||||||
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) ||
|
if (regs.transform_feedback_enabled && prev_tfb_enabled) {
|
||||||
regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation) ||
|
// if current and previous tfbs are enabled, we're resuming.
|
||||||
regs.IsShaderConfigEnabled(Maxwell::ShaderType::Geometry));
|
glResumeTransformFeedback();
|
||||||
|
host_tfb_enabled = true;
|
||||||
|
} else if (regs.transform_feedback_enabled) {
|
||||||
UNIMPLEMENTED_IF(primitive_mode != GL_POINTS);
|
UNIMPLEMENTED_IF(primitive_mode != GL_POINTS);
|
||||||
|
|
||||||
// We may have to call BeginTransformFeedbackNV here since they seem to call different
|
// We may have to call BeginTransformFeedbackNV here since they seem to call different
|
||||||
// implementations on Nvidia's driver (the pointer is different) but we are using
|
// implementations on Nvidia's driver (the pointer is different) but we are using
|
||||||
// ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB
|
// ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB
|
||||||
// extension doesn't define BeginTransformFeedback (without NV) interactions. It just works.
|
// extension doesn't define BeginTransformFeedback (without NV) interactions. It just works.
|
||||||
|
glBeginQuery(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN, transform_query);
|
||||||
glBeginTransformFeedback(GL_POINTS);
|
glBeginTransformFeedback(GL_POINTS);
|
||||||
|
host_tfb_enabled = true;
|
||||||
|
}
|
||||||
|
prev_tfb_enabled = regs.transform_feedback_enabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::EndTransformFeedback() {
|
void RasterizerOpenGL::EndTransformFeedback(bool force) {
|
||||||
if (maxwell3d->regs.transform_feedback_enabled != 0) {
|
if (!force && host_tfb_enabled && maxwell3d->regs.transform_feedback_enabled) {
|
||||||
|
// guest is still active, so pause
|
||||||
|
glPauseTransformFeedback();
|
||||||
|
host_tfb_enabled = false;
|
||||||
|
} else if (host_tfb_enabled) {
|
||||||
glEndTransformFeedback();
|
glEndTransformFeedback();
|
||||||
|
glEndQuery(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN);
|
||||||
|
host_tfb_enabled = false;
|
||||||
}
|
}
|
||||||
|
prev_tfb_enabled = maxwell3d->regs.transform_feedback_enabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::InitializeChannel(Tegra::Control::ChannelState& channel) {
|
void RasterizerOpenGL::InitializeChannel(Tegra::Control::ChannelState& channel) {
|
||||||
|
@ -1311,6 +1335,15 @@ void RasterizerOpenGL::ReleaseChannel(s32 channel_id) {
|
||||||
query_cache.EraseChannel(channel_id);
|
query_cache.EraseChannel(channel_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 RasterizerOpenGL::GetTransformFeedbackByteCount() {
|
||||||
|
EndTransformFeedback(true);
|
||||||
|
|
||||||
|
GLuint count{};
|
||||||
|
glGetQueryObjectuiv(transform_query, GL_QUERY_RESULT, &count);
|
||||||
|
|
||||||
|
return count * maxwell3d->regs.transform_feedback.controls[0].stride;
|
||||||
|
}
|
||||||
|
|
||||||
AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_, TextureCache& texture_cache_)
|
AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_, TextureCache& texture_cache_)
|
||||||
: buffer_cache{buffer_cache_}, texture_cache{texture_cache_} {}
|
: buffer_cache{buffer_cache_}, texture_cache{texture_cache_} {}
|
||||||
|
|
||||||
|
|
|
@ -138,6 +138,8 @@ public:
|
||||||
|
|
||||||
void ReleaseChannel(s32 channel_id) override;
|
void ReleaseChannel(s32 channel_id) override;
|
||||||
|
|
||||||
|
u32 GetTransformFeedbackByteCount() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static constexpr size_t MAX_TEXTURES = 192;
|
static constexpr size_t MAX_TEXTURES = 192;
|
||||||
static constexpr size_t MAX_IMAGES = 48;
|
static constexpr size_t MAX_IMAGES = 48;
|
||||||
|
@ -222,7 +224,7 @@ private:
|
||||||
void BeginTransformFeedback(GraphicsPipeline* pipeline, GLenum primitive_mode);
|
void BeginTransformFeedback(GraphicsPipeline* pipeline, GLenum primitive_mode);
|
||||||
|
|
||||||
/// End a transform feedback
|
/// End a transform feedback
|
||||||
void EndTransformFeedback();
|
void EndTransformFeedback(bool force = false);
|
||||||
|
|
||||||
Tegra::GPU& gpu;
|
Tegra::GPU& gpu;
|
||||||
|
|
||||||
|
@ -254,6 +256,9 @@ private:
|
||||||
bool has_written_global_memory = false;
|
bool has_written_global_memory = false;
|
||||||
|
|
||||||
u32 last_clip_distance_mask = 0;
|
u32 last_clip_distance_mask = 0;
|
||||||
|
bool host_tfb_enabled{};
|
||||||
|
bool prev_tfb_enabled{};
|
||||||
|
GLuint transform_query{};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -610,4 +610,20 @@ void BufferCacheRuntime::ReserveNullBuffer() {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vk::Buffer BufferCacheRuntime::CreateTransformCounterBuffer() {
|
||||||
|
return memory_allocator.CreateBuffer(
|
||||||
|
VkBufferCreateInfo{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.size = 4,
|
||||||
|
.usage = VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT |
|
||||||
|
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT,
|
||||||
|
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||||
|
.queueFamilyIndexCount = 0,
|
||||||
|
.pQueueFamilyIndices = nullptr,
|
||||||
|
},
|
||||||
|
MemoryUsage::DeviceLocal);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -83,7 +83,7 @@ public:
|
||||||
|
|
||||||
void PreCopyBarrier();
|
void PreCopyBarrier();
|
||||||
|
|
||||||
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
|
void CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
|
||||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
|
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
|
||||||
|
|
||||||
void PostCopyBarrier();
|
void PostCopyBarrier();
|
||||||
|
@ -124,6 +124,8 @@ public:
|
||||||
guest_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
|
guest_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vk::Buffer CreateTransformCounterBuffer();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
|
void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
|
||||||
guest_descriptor_queue.AddBuffer(buffer, offset, size);
|
guest_descriptor_queue.AddBuffer(buffer, offset, size);
|
||||||
|
|
|
@ -176,6 +176,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
|
||||||
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
|
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
|
||||||
wfi_event(device.GetLogical().CreateEvent()) {
|
wfi_event(device.GetLogical().CreateEvent()) {
|
||||||
scheduler.SetQueryCache(query_cache);
|
scheduler.SetQueryCache(query_cache);
|
||||||
|
transform_counter = buffer_cache_runtime.CreateTransformCounterBuffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
RasterizerVulkan::~RasterizerVulkan() = default;
|
RasterizerVulkan::~RasterizerVulkan() = default;
|
||||||
|
@ -206,13 +207,15 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
|
||||||
pipeline->SetEngine(maxwell3d, gpu_memory);
|
pipeline->SetEngine(maxwell3d, gpu_memory);
|
||||||
pipeline->Configure(is_indexed);
|
pipeline->Configure(is_indexed);
|
||||||
|
|
||||||
|
if (host_tfb_enabled && !maxwell3d->regs.transform_feedback_enabled) {
|
||||||
|
EndTransformFeedback();
|
||||||
|
} else {
|
||||||
BeginTransformFeedback();
|
BeginTransformFeedback();
|
||||||
|
}
|
||||||
|
|
||||||
UpdateDynamicStates();
|
UpdateDynamicStates();
|
||||||
|
|
||||||
draw_func();
|
draw_func();
|
||||||
|
|
||||||
EndTransformFeedback();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
|
void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
|
||||||
|
@ -902,30 +905,46 @@ void RasterizerVulkan::UpdateDynamicStates() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::BeginTransformFeedback() {
|
void RasterizerVulkan::BeginTransformFeedback() {
|
||||||
const auto& regs = maxwell3d->regs;
|
|
||||||
if (regs.transform_feedback_enabled == 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (!device.IsExtTransformFeedbackSupported()) {
|
if (!device.IsExtTransformFeedbackSupported()) {
|
||||||
LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
|
LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
const auto& regs = maxwell3d->regs;
|
||||||
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) ||
|
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) ||
|
||||||
regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation));
|
regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation));
|
||||||
scheduler.Record(
|
|
||||||
[](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); });
|
if (host_tfb_enabled) {
|
||||||
|
prev_tfb_enabled = regs.transform_feedback_enabled;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (regs.transform_feedback_enabled && prev_tfb_enabled) {
|
||||||
|
// if current and previous tfbs are enabled, we're resuming.
|
||||||
|
scheduler.Record([buffer = *transform_counter](vk::CommandBuffer cmdbuf) {
|
||||||
|
cmdbuf.BeginTransformFeedbackEXT(0, 1, &buffer, nullptr);
|
||||||
|
});
|
||||||
|
host_tfb_enabled = true;
|
||||||
|
} else if (regs.transform_feedback_enabled) {
|
||||||
|
scheduler.Record([](vk::CommandBuffer cmdbuf) {
|
||||||
|
cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
|
||||||
|
});
|
||||||
|
host_tfb_enabled = true;
|
||||||
|
}
|
||||||
|
prev_tfb_enabled = regs.transform_feedback_enabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::EndTransformFeedback() {
|
void RasterizerVulkan::EndTransformFeedback() {
|
||||||
const auto& regs = maxwell3d->regs;
|
|
||||||
if (regs.transform_feedback_enabled == 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (!device.IsExtTransformFeedbackSupported()) {
|
if (!device.IsExtTransformFeedbackSupported()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
scheduler.Record(
|
|
||||||
[](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
|
if (host_tfb_enabled) {
|
||||||
|
scheduler.Record([buffer = *transform_counter](vk::CommandBuffer cmdbuf) {
|
||||||
|
cmdbuf.EndTransformFeedbackEXT(0, 1, &buffer, nullptr);
|
||||||
|
});
|
||||||
|
host_tfb_enabled = false;
|
||||||
|
}
|
||||||
|
prev_tfb_enabled = maxwell3d->regs.transform_feedback_enabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
|
void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||||
|
@ -1494,4 +1513,22 @@ void RasterizerVulkan::ReleaseChannel(s32 channel_id) {
|
||||||
query_cache.EraseChannel(channel_id);
|
query_cache.EraseChannel(channel_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 RasterizerVulkan::GetTransformFeedbackByteCount() {
|
||||||
|
EndTransformFeedback();
|
||||||
|
|
||||||
|
auto download_staging = buffer_cache_runtime.DownloadStagingBuffer(4);
|
||||||
|
std::array copy{VideoCommon::BufferCopy{
|
||||||
|
0,
|
||||||
|
download_staging.offset,
|
||||||
|
4,
|
||||||
|
}};
|
||||||
|
buffer_cache_runtime.CopyBuffer(download_staging.buffer, *transform_counter,
|
||||||
|
std::span<VideoCommon::BufferCopy>(copy));
|
||||||
|
scheduler.Finish();
|
||||||
|
|
||||||
|
u32 count{};
|
||||||
|
std::memcpy(&count, download_staging.mapped_span.data(), 4);
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -131,6 +131,8 @@ public:
|
||||||
|
|
||||||
void ReleaseChannel(s32 channel_id) override;
|
void ReleaseChannel(s32 channel_id) override;
|
||||||
|
|
||||||
|
u32 GetTransformFeedbackByteCount() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static constexpr size_t MAX_TEXTURES = 192;
|
static constexpr size_t MAX_TEXTURES = 192;
|
||||||
static constexpr size_t MAX_IMAGES = 48;
|
static constexpr size_t MAX_IMAGES = 48;
|
||||||
|
@ -206,6 +208,9 @@ private:
|
||||||
boost::container::static_vector<VkSampler, MAX_TEXTURES> sampler_handles;
|
boost::container::static_vector<VkSampler, MAX_TEXTURES> sampler_handles;
|
||||||
|
|
||||||
u32 draw_counter = 0;
|
u32 draw_counter = 0;
|
||||||
|
bool host_tfb_enabled{};
|
||||||
|
bool prev_tfb_enabled{};
|
||||||
|
vk::Buffer transform_counter;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -100,6 +100,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
|
||||||
X(vkCmdDrawIndexedIndirect);
|
X(vkCmdDrawIndexedIndirect);
|
||||||
X(vkCmdDrawIndirectCount);
|
X(vkCmdDrawIndirectCount);
|
||||||
X(vkCmdDrawIndexedIndirectCount);
|
X(vkCmdDrawIndexedIndirectCount);
|
||||||
|
X(vkCmdDrawIndirectByteCountEXT);
|
||||||
X(vkCmdEndQuery);
|
X(vkCmdEndQuery);
|
||||||
X(vkCmdEndRenderPass);
|
X(vkCmdEndRenderPass);
|
||||||
X(vkCmdEndTransformFeedbackEXT);
|
X(vkCmdEndTransformFeedbackEXT);
|
||||||
|
|
|
@ -224,6 +224,7 @@ struct DeviceDispatch : InstanceDispatch {
|
||||||
PFN_vkCmdDrawIndexedIndirect vkCmdDrawIndexedIndirect{};
|
PFN_vkCmdDrawIndexedIndirect vkCmdDrawIndexedIndirect{};
|
||||||
PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{};
|
PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{};
|
||||||
PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{};
|
PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{};
|
||||||
|
PFN_vkCmdDrawIndirectByteCountEXT vkCmdDrawIndirectByteCountEXT{};
|
||||||
PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
|
PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
|
||||||
PFN_vkCmdEndQuery vkCmdEndQuery{};
|
PFN_vkCmdEndQuery vkCmdEndQuery{};
|
||||||
PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
|
PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
|
||||||
|
@ -1196,6 +1197,13 @@ public:
|
||||||
count_offset, draw_count, stride);
|
count_offset, draw_count, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DrawIndirectByteCountEXT(u32 instance_count, u32 first_instance, VkBuffer counter_buffer,
|
||||||
|
VkDeviceSize counter_buffer_offset, u32 counter_offset,
|
||||||
|
u32 vertex_stride) const noexcept {
|
||||||
|
dld->vkCmdDrawIndirectByteCountEXT(handle, instance_count, first_instance, counter_buffer,
|
||||||
|
counter_buffer_offset, counter_offset, vertex_stride);
|
||||||
|
}
|
||||||
|
|
||||||
void ClearAttachments(Span<VkClearAttachment> attachments,
|
void ClearAttachments(Span<VkClearAttachment> attachments,
|
||||||
Span<VkClearRect> rects) const noexcept {
|
Span<VkClearRect> rects) const noexcept {
|
||||||
dld->vkCmdClearAttachments(handle, attachments.size(), attachments.data(), rects.size(),
|
dld->vkCmdClearAttachments(handle, attachments.size(), attachments.data(), rects.size(),
|
||||||
|
|
|
@ -764,6 +764,7 @@ void Config::ReadRendererValues() {
|
||||||
ReadGlobalSetting(Settings::values.use_asynchronous_gpu_emulation);
|
ReadGlobalSetting(Settings::values.use_asynchronous_gpu_emulation);
|
||||||
ReadGlobalSetting(Settings::values.nvdec_emulation);
|
ReadGlobalSetting(Settings::values.nvdec_emulation);
|
||||||
ReadGlobalSetting(Settings::values.accelerate_astc);
|
ReadGlobalSetting(Settings::values.accelerate_astc);
|
||||||
|
ReadGlobalSetting(Settings::values.transform_feedback_query);
|
||||||
ReadGlobalSetting(Settings::values.async_astc);
|
ReadGlobalSetting(Settings::values.async_astc);
|
||||||
ReadGlobalSetting(Settings::values.astc_recompression);
|
ReadGlobalSetting(Settings::values.astc_recompression);
|
||||||
ReadGlobalSetting(Settings::values.use_reactive_flushing);
|
ReadGlobalSetting(Settings::values.use_reactive_flushing);
|
||||||
|
@ -1429,6 +1430,7 @@ void Config::SaveRendererValues() {
|
||||||
static_cast<u32>(Settings::values.nvdec_emulation.GetDefault()),
|
static_cast<u32>(Settings::values.nvdec_emulation.GetDefault()),
|
||||||
Settings::values.nvdec_emulation.UsingGlobal());
|
Settings::values.nvdec_emulation.UsingGlobal());
|
||||||
WriteGlobalSetting(Settings::values.accelerate_astc);
|
WriteGlobalSetting(Settings::values.accelerate_astc);
|
||||||
|
WriteGlobalSetting(Settings::values.transform_feedback_query);
|
||||||
WriteGlobalSetting(Settings::values.async_astc);
|
WriteGlobalSetting(Settings::values.async_astc);
|
||||||
WriteSetting(QString::fromStdString(Settings::values.astc_recompression.GetLabel()),
|
WriteSetting(QString::fromStdString(Settings::values.astc_recompression.GetLabel()),
|
||||||
static_cast<u32>(Settings::values.astc_recompression.GetValue(global)),
|
static_cast<u32>(Settings::values.astc_recompression.GetValue(global)),
|
||||||
|
|
|
@ -45,6 +45,7 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
|
||||||
ui->use_video_framerate_checkbox->setChecked(Settings::values.use_video_framerate.GetValue());
|
ui->use_video_framerate_checkbox->setChecked(Settings::values.use_video_framerate.GetValue());
|
||||||
ui->barrier_feedback_loops_checkbox->setChecked(
|
ui->barrier_feedback_loops_checkbox->setChecked(
|
||||||
Settings::values.barrier_feedback_loops.GetValue());
|
Settings::values.barrier_feedback_loops.GetValue());
|
||||||
|
ui->transform_feedback_query->setChecked(Settings::values.transform_feedback_query.GetValue());
|
||||||
|
|
||||||
if (Settings::IsConfiguringGlobal()) {
|
if (Settings::IsConfiguringGlobal()) {
|
||||||
ui->gpu_accuracy->setCurrentIndex(
|
ui->gpu_accuracy->setCurrentIndex(
|
||||||
|
@ -99,6 +100,9 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
|
||||||
ConfigurationShared::ApplyPerGameSetting(&Settings::values.barrier_feedback_loops,
|
ConfigurationShared::ApplyPerGameSetting(&Settings::values.barrier_feedback_loops,
|
||||||
ui->barrier_feedback_loops_checkbox,
|
ui->barrier_feedback_loops_checkbox,
|
||||||
barrier_feedback_loops);
|
barrier_feedback_loops);
|
||||||
|
ConfigurationShared::ApplyPerGameSetting(&Settings::values.transform_feedback_query,
|
||||||
|
ui->transform_feedback_query,
|
||||||
|
transform_feedback_query);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) {
|
void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) {
|
||||||
|
@ -137,6 +141,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
|
||||||
Settings::values.use_video_framerate.UsingGlobal());
|
Settings::values.use_video_framerate.UsingGlobal());
|
||||||
ui->barrier_feedback_loops_checkbox->setEnabled(
|
ui->barrier_feedback_loops_checkbox->setEnabled(
|
||||||
Settings::values.barrier_feedback_loops.UsingGlobal());
|
Settings::values.barrier_feedback_loops.UsingGlobal());
|
||||||
|
ui->transform_feedback_query->setEnabled(
|
||||||
|
Settings::values.transform_feedback_query.UsingGlobal());
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -167,6 +173,10 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
|
||||||
ConfigurationShared::SetColoredTristate(ui->barrier_feedback_loops_checkbox,
|
ConfigurationShared::SetColoredTristate(ui->barrier_feedback_loops_checkbox,
|
||||||
Settings::values.barrier_feedback_loops,
|
Settings::values.barrier_feedback_loops,
|
||||||
barrier_feedback_loops);
|
barrier_feedback_loops);
|
||||||
|
ConfigurationShared::SetColoredTristate(ui->transform_feedback_query,
|
||||||
|
Settings::values.transform_feedback_query,
|
||||||
|
transform_feedback_query);
|
||||||
|
|
||||||
ConfigurationShared::SetColoredComboBox(
|
ConfigurationShared::SetColoredComboBox(
|
||||||
ui->gpu_accuracy, ui->label_gpu_accuracy,
|
ui->gpu_accuracy, ui->label_gpu_accuracy,
|
||||||
static_cast<int>(Settings::values.gpu_accuracy.GetValue(true)));
|
static_cast<int>(Settings::values.gpu_accuracy.GetValue(true)));
|
||||||
|
|
|
@ -49,6 +49,7 @@ private:
|
||||||
ConfigurationShared::CheckState enable_compute_pipelines;
|
ConfigurationShared::CheckState enable_compute_pipelines;
|
||||||
ConfigurationShared::CheckState use_video_framerate;
|
ConfigurationShared::CheckState use_video_framerate;
|
||||||
ConfigurationShared::CheckState barrier_feedback_loops;
|
ConfigurationShared::CheckState barrier_feedback_loops;
|
||||||
|
ConfigurationShared::CheckState transform_feedback_query;
|
||||||
|
|
||||||
const Core::System& system;
|
const Core::System& system;
|
||||||
};
|
};
|
||||||
|
|
|
@ -211,6 +211,16 @@ Compute pipelines are always enabled on all other drivers.</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QCheckBox" name="transform_feedback_query">
|
||||||
|
<property name="text">
|
||||||
|
<string>Enable Transform Feedback queries</string>
|
||||||
|
</property>
|
||||||
|
<property name="toolTip">
|
||||||
|
<string>Enables querying the TFB counter for TFB draws. Can noticeably reduce performance of some games if they query TFB but not rely on it to draw.</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<widget class="QWidget" name="af_layout" native="true">
|
<widget class="QWidget" name="af_layout" native="true">
|
||||||
<layout class="QHBoxLayout" name="horizontalLayout_1">
|
<layout class="QHBoxLayout" name="horizontalLayout_1">
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue