mirror of
				https://github.com/dolphin-emu/dolphin.git
				synced 2025-10-26 18:09:20 +00:00 
			
		
		
		
	SPDX standardizes how source code conveys its copyright and licensing information. See https://spdx.github.io/spdx-spec/1-rationale/ . SPDX tags are adopted in many large projects, including things like the Linux kernel.
		
			
				
	
	
		
			283 lines
		
	
	
	
		
			7.1 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			283 lines
		
	
	
	
		
			7.1 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Copyright 2012 Dolphin Emulator Project
 | |
| // SPDX-License-Identifier: GPL-2.0-or-later
 | |
| 
 | |
| #include "VideoBackends/OGL/OGLPerfQuery.h"
 | |
| 
 | |
| #include <memory>
 | |
| 
 | |
| #include "Common/CommonTypes.h"
 | |
| #include "Common/GL/GLExtensions/GLExtensions.h"
 | |
| 
 | |
| #include "VideoBackends/OGL/OGLRender.h"
 | |
| #include "VideoCommon/VideoCommon.h"
 | |
| #include "VideoCommon/VideoConfig.h"
 | |
| 
 | |
| namespace OGL
 | |
| {
 | |
| std::unique_ptr<PerfQueryBase> GetPerfQuery()
 | |
| {
 | |
|   const bool is_gles = static_cast<Renderer*>(g_renderer.get())->IsGLES();
 | |
|   if (is_gles && GLExtensions::Supports("GL_NV_occlusion_query_samples"))
 | |
|     return std::make_unique<PerfQueryGLESNV>();
 | |
|   else if (is_gles)
 | |
|     return std::make_unique<PerfQueryGL>(GL_ANY_SAMPLES_PASSED);
 | |
|   else
 | |
|     return std::make_unique<PerfQueryGL>(GL_SAMPLES_PASSED);
 | |
| }
 | |
| 
 | |
| PerfQuery::PerfQuery() : m_query_read_pos()
 | |
| {
 | |
|   ResetQuery();
 | |
| }
 | |
| 
 | |
| void PerfQuery::EnableQuery(PerfQueryGroup type)
 | |
| {
 | |
|   m_query->EnableQuery(type);
 | |
| }
 | |
| 
 | |
| void PerfQuery::DisableQuery(PerfQueryGroup type)
 | |
| {
 | |
|   m_query->DisableQuery(type);
 | |
| }
 | |
| 
 | |
| bool PerfQuery::IsFlushed() const
 | |
| {
 | |
|   return m_query_count.load(std::memory_order_relaxed) == 0;
 | |
| }
 | |
| 
 | |
| // TODO: could selectively flush things, but I don't think that will do much
 | |
| void PerfQuery::FlushResults()
 | |
| {
 | |
|   m_query->FlushResults();
 | |
| }
 | |
| 
 | |
| void PerfQuery::ResetQuery()
 | |
| {
 | |
|   m_query_count.store(0, std::memory_order_relaxed);
 | |
|   for (size_t i = 0; i < m_results.size(); ++i)
 | |
|     m_results[i].store(0, std::memory_order_relaxed);
 | |
| }
 | |
| 
 | |
| u32 PerfQuery::GetQueryResult(PerfQueryType type)
 | |
| {
 | |
|   u32 result = 0;
 | |
| 
 | |
|   if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
 | |
|   {
 | |
|     result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
 | |
|   }
 | |
|   else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
 | |
|   {
 | |
|     result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
 | |
|   }
 | |
|   else if (type == PQ_BLEND_INPUT)
 | |
|   {
 | |
|     result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
 | |
|              m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
 | |
|   }
 | |
|   else if (type == PQ_EFB_COPY_CLOCKS)
 | |
|   {
 | |
|     result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
 | |
|   }
 | |
| 
 | |
|   return result;
 | |
| }
 | |
| 
 | |
| // Implementations
 | |
| PerfQueryGL::PerfQueryGL(GLenum query_type) : m_query_type(query_type)
 | |
| {
 | |
|   for (ActiveQuery& query : m_query_buffer)
 | |
|     glGenQueries(1, &query.query_id);
 | |
| }
 | |
| 
 | |
| PerfQueryGL::~PerfQueryGL()
 | |
| {
 | |
|   for (ActiveQuery& query : m_query_buffer)
 | |
|     glDeleteQueries(1, &query.query_id);
 | |
| }
 | |
| 
 | |
| void PerfQueryGL::EnableQuery(PerfQueryGroup type)
 | |
| {
 | |
|   u32 query_count = m_query_count.load(std::memory_order_relaxed);
 | |
| 
 | |
|   // Is this sane?
 | |
|   if (query_count > m_query_buffer.size() / 2)
 | |
|   {
 | |
|     WeakFlush();
 | |
|     query_count = m_query_count.load(std::memory_order_relaxed);
 | |
|   }
 | |
| 
 | |
|   if (m_query_buffer.size() == query_count)
 | |
|   {
 | |
|     FlushOne();
 | |
|     query_count = m_query_count.load(std::memory_order_relaxed);
 | |
|     // ERROR_LOG_FMT(VIDEO, "Flushed query buffer early!");
 | |
|   }
 | |
| 
 | |
|   // start query
 | |
|   if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
 | |
|   {
 | |
|     auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];
 | |
| 
 | |
|     glBeginQuery(m_query_type, entry.query_id);
 | |
|     entry.query_type = type;
 | |
| 
 | |
|     m_query_count.fetch_add(1, std::memory_order_relaxed);
 | |
|   }
 | |
| }
 | |
| void PerfQueryGL::DisableQuery(PerfQueryGroup type)
 | |
| {
 | |
|   // stop query
 | |
|   if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
 | |
|   {
 | |
|     glEndQuery(m_query_type);
 | |
|   }
 | |
| }
 | |
| 
 | |
| void PerfQueryGL::WeakFlush()
 | |
| {
 | |
|   while (!IsFlushed())
 | |
|   {
 | |
|     auto& entry = m_query_buffer[m_query_read_pos];
 | |
| 
 | |
|     GLuint result = GL_FALSE;
 | |
|     glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT_AVAILABLE, &result);
 | |
| 
 | |
|     if (GL_TRUE == result)
 | |
|     {
 | |
|       FlushOne();
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| void PerfQueryGL::FlushOne()
 | |
| {
 | |
|   auto& entry = m_query_buffer[m_query_read_pos];
 | |
| 
 | |
|   GLuint result = 0;
 | |
|   glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT, &result);
 | |
| 
 | |
|   // NOTE: Reported pixel metrics should be referenced to native resolution
 | |
|   // TODO: Dropping the lower 2 bits from this count should be closer to actual
 | |
|   // hardware behavior when drawing triangles.
 | |
|   result = static_cast<u64>(result) * EFB_WIDTH * EFB_HEIGHT /
 | |
|            (g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight());
 | |
| 
 | |
|   // Adjust for multisampling
 | |
|   if (g_ActiveConfig.iMultisamples > 1)
 | |
|     result /= g_ActiveConfig.iMultisamples;
 | |
| 
 | |
|   m_results[entry.query_type].fetch_add(result, std::memory_order_relaxed);
 | |
| 
 | |
|   m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
 | |
|   m_query_count.fetch_sub(1, std::memory_order_relaxed);
 | |
| }
 | |
| 
 | |
| // TODO: could selectively flush things, but I don't think that will do much
 | |
| void PerfQueryGL::FlushResults()
 | |
| {
 | |
|   while (!IsFlushed())
 | |
|     FlushOne();
 | |
| }
 | |
| 
 | |
| PerfQueryGLESNV::PerfQueryGLESNV()
 | |
| {
 | |
|   for (ActiveQuery& query : m_query_buffer)
 | |
|     glGenOcclusionQueriesNV(1, &query.query_id);
 | |
| }
 | |
| 
 | |
| PerfQueryGLESNV::~PerfQueryGLESNV()
 | |
| {
 | |
|   for (ActiveQuery& query : m_query_buffer)
 | |
|     glDeleteOcclusionQueriesNV(1, &query.query_id);
 | |
| }
 | |
| 
 | |
| void PerfQueryGLESNV::EnableQuery(PerfQueryGroup type)
 | |
| {
 | |
|   u32 query_count = m_query_count.load(std::memory_order_relaxed);
 | |
| 
 | |
|   // Is this sane?
 | |
|   if (query_count > m_query_buffer.size() / 2)
 | |
|   {
 | |
|     WeakFlush();
 | |
|     query_count = m_query_count.load(std::memory_order_relaxed);
 | |
|   }
 | |
| 
 | |
|   if (m_query_buffer.size() == query_count)
 | |
|   {
 | |
|     FlushOne();
 | |
|     query_count = m_query_count.load(std::memory_order_relaxed);
 | |
|     // ERROR_LOG_FMT(VIDEO, "Flushed query buffer early!");
 | |
|   }
 | |
| 
 | |
|   // start query
 | |
|   if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
 | |
|   {
 | |
|     auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];
 | |
| 
 | |
|     glBeginOcclusionQueryNV(entry.query_id);
 | |
|     entry.query_type = type;
 | |
| 
 | |
|     m_query_count.fetch_add(1, std::memory_order_relaxed);
 | |
|   }
 | |
| }
 | |
| void PerfQueryGLESNV::DisableQuery(PerfQueryGroup type)
 | |
| {
 | |
|   // stop query
 | |
|   if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
 | |
|   {
 | |
|     glEndOcclusionQueryNV();
 | |
|   }
 | |
| }
 | |
| 
 | |
| void PerfQueryGLESNV::WeakFlush()
 | |
| {
 | |
|   while (!IsFlushed())
 | |
|   {
 | |
|     auto& entry = m_query_buffer[m_query_read_pos];
 | |
| 
 | |
|     GLuint result = GL_FALSE;
 | |
|     glGetOcclusionQueryuivNV(entry.query_id, GL_PIXEL_COUNT_AVAILABLE_NV, &result);
 | |
| 
 | |
|     if (GL_TRUE == result)
 | |
|     {
 | |
|       FlushOne();
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| void PerfQueryGLESNV::FlushOne()
 | |
| {
 | |
|   auto& entry = m_query_buffer[m_query_read_pos];
 | |
| 
 | |
|   GLuint result = 0;
 | |
|   glGetOcclusionQueryuivNV(entry.query_id, GL_OCCLUSION_TEST_RESULT_HP, &result);
 | |
| 
 | |
|   // NOTE: Reported pixel metrics should be referenced to native resolution
 | |
|   // TODO: Dropping the lower 2 bits from this count should be closer to actual
 | |
|   // hardware behavior when drawing triangles.
 | |
|   const u64 native_res_result = static_cast<u64>(result) * EFB_WIDTH * EFB_HEIGHT /
 | |
|                                 (g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight());
 | |
|   m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
 | |
|                                         std::memory_order_relaxed);
 | |
| 
 | |
|   m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
 | |
|   m_query_count.fetch_sub(1, std::memory_order_relaxed);
 | |
| }
 | |
| 
 | |
| // TODO: could selectively flush things, but I don't think that will do much
 | |
| void PerfQueryGLESNV::FlushResults()
 | |
| {
 | |
|   while (!IsFlushed())
 | |
|     FlushOne();
 | |
| }
 | |
| 
 | |
| }  // namespace OGL
 |