From 81fa3da101cce716ecba2157a9806f36e38000a2 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 21 Sep 2022 21:52:47 +0300 Subject: [PATCH] gl: Minor optimization around test..set patterns in the state tracker --- rpcs3/Emu/RSX/GL/GLDraw.cpp | 10 +- rpcs3/Emu/RSX/GL/GLOverlays.cpp | 46 ++++-- rpcs3/Emu/RSX/GL/GLOverlays.h | 6 +- rpcs3/Emu/RSX/GL/glutils/state_tracker.hpp | 157 +++++++++++++-------- 4 files changed, 146 insertions(+), 73 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLDraw.cpp b/rpcs3/Emu/RSX/GL/GLDraw.cpp index f43625a91c..9dc5adbb16 100644 --- a/rpcs3/Emu/RSX/GL/GLDraw.cpp +++ b/rpcs3/Emu/RSX/GL/GLDraw.cpp @@ -165,21 +165,21 @@ void GLGSRender::update_draw_state() if (gl_state.enable(rsx::method_registers.stencil_test_enabled(), GL_STENCIL_TEST)) { - glStencilFunc(gl::comparison_op(rsx::method_registers.stencil_func()), + gl_state.stencil_func(gl::comparison_op(rsx::method_registers.stencil_func()), rsx::method_registers.stencil_func_ref(), rsx::method_registers.stencil_func_mask()); - glStencilOp(gl::stencil_op(rsx::method_registers.stencil_op_fail()), gl::stencil_op(rsx::method_registers.stencil_op_zfail()), + gl_state.stencil_op(gl::stencil_op(rsx::method_registers.stencil_op_fail()), gl::stencil_op(rsx::method_registers.stencil_op_zfail()), gl::stencil_op(rsx::method_registers.stencil_op_zpass())); if (rsx::method_registers.two_sided_stencil_test_enabled()) { - glStencilMaskSeparate(GL_BACK, rsx::method_registers.back_stencil_mask()); + gl_state.stencil_back_mask(rsx::method_registers.back_stencil_mask()); - glStencilFuncSeparate(GL_BACK, gl::comparison_op(rsx::method_registers.back_stencil_func()), + gl_state.stencil_back_func(gl::comparison_op(rsx::method_registers.back_stencil_func()), rsx::method_registers.back_stencil_func_ref(), rsx::method_registers.back_stencil_func_mask()); - glStencilOpSeparate(GL_BACK, gl::stencil_op(rsx::method_registers.back_stencil_op_fail()), + gl_state.stencil_back_op(gl::stencil_op(rsx::method_registers.back_stencil_op_fail()), gl::stencil_op(rsx::method_registers.back_stencil_op_zfail()), gl::stencil_op(rsx::method_registers.back_stencil_op_zpass())); } } diff --git a/rpcs3/Emu/RSX/GL/GLOverlays.cpp b/rpcs3/Emu/RSX/GL/GLOverlays.cpp index 835181b9b8..172f002a19 100644 --- a/rpcs3/Emu/RSX/GL/GLOverlays.cpp +++ b/rpcs3/Emu/RSX/GL/GLOverlays.cpp @@ -84,7 +84,7 @@ namespace gl glBindVertexArray(old_vao); } - void overlay_pass::run(gl::command_context& cmd, const areau& region, GLuint target_texture, GLuint image_aspect_bits, bool use_blending) + void overlay_pass::run(gl::command_context& cmd, const areau& region, GLuint target_texture, GLuint image_aspect_bits, bool enable_blending) { if (!compiled) { @@ -116,6 +116,9 @@ namespace gl default: fmt::throw_exception("Unsupported image aspect combination 0x%x", image_aspect_bits); } + + enable_depth_writes = (image_aspect_bits & m_write_aspect_mask) & gl::image_aspect::depth; + enable_stencil_writes = (image_aspect_bits & m_write_aspect_mask) & gl::image_aspect::stencil; } if (!target_texture || fbo.check()) @@ -128,15 +131,34 @@ namespace gl cmd->color_maski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); cmd->depth_mask(image_aspect_bits == gl::image_aspect::color ? GL_FALSE : GL_TRUE); - // Disabling depth test will also disable depth writes which is not desired - cmd->depth_func(GL_ALWAYS); - cmd->enable(GL_DEPTH_TEST); - - cmd->disable(GL_SCISSOR_TEST); cmd->disable(GL_CULL_FACE); - cmd->disable(GL_STENCIL_TEST); + cmd->disable(GL_SCISSOR_TEST); - if (use_blending) + if (enable_depth_writes) + { + // Disabling depth test will also disable depth writes which is not desired + cmd->depth_func(GL_ALWAYS); + cmd->enable(GL_DEPTH_TEST); + } + else + { + cmd->disable(GL_DEPTH_TEST); + } + + if (enable_stencil_writes) + { + // Disabling stencil test also disables stencil writes. + cmd->enable(GL_STENCIL_TEST); + cmd->stencil_mask(0xFF); + cmd->stencil_func(GL_ALWAYS, 0xFF, 0xFF); + cmd->stencil_op(GL_KEEP, GL_KEEP, GL_REPLACE); + } + else + { + cmd->disable(GL_STENCIL_TEST); + } + + if (enable_blending) { cmd->enablei(GL_BLEND, 0); glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); @@ -605,15 +627,21 @@ namespace gl #include "../Program/GLSLSnippets/CopyBufferToGenericImage.glsl" ; + const bool stencil_export_supported = gl::get_driver_caps().ARB_shader_stencil_export_supported; std::pair repl_list[] = { { "%set, ", "" }, { "%loc", std::to_string(GL_COMPUTE_BUFFER_SLOT(0)) }, { "%push_block", fmt::format("binding=%d, std140", GL_COMPUTE_BUFFER_SLOT(1)) }, - { "%stencil_export_supported", gl::get_driver_caps().ARB_shader_stencil_export_supported ? "1" : "0" } + { "%stencil_export_supported", stencil_export_supported ? "1" : "0" } }; fs_src = fmt::replace_all(fs_src, repl_list); + + if (stencil_export_supported) + { + m_write_aspect_mask |= gl::image_aspect::stencil; + } } void rp_ssbo_to_generic_texture::run(gl::command_context& cmd, diff --git a/rpcs3/Emu/RSX/GL/GLOverlays.h b/rpcs3/Emu/RSX/GL/GLOverlays.h index b332555283..3c62b2f783 100644 --- a/rpcs3/Emu/RSX/GL/GLOverlays.h +++ b/rpcs3/Emu/RSX/GL/GLOverlays.h @@ -35,6 +35,10 @@ namespace gl GLenum primitives = GL_TRIANGLE_STRIP; GLenum input_filter = GL_NEAREST; + u32 m_write_aspect_mask = gl::image_aspect::color | gl::image_aspect::depth; + bool enable_depth_writes = false; + bool enable_stencil_writes = false; + void create(); void destroy(); @@ -52,7 +56,7 @@ namespace gl virtual void emit_geometry(); - void run(gl::command_context& cmd, const areau& region, GLuint target_texture, GLuint image_aspect_bits, bool use_blending = false); + void run(gl::command_context& cmd, const areau& region, GLuint target_texture, GLuint image_aspect_bits, bool enable_blending = false); }; struct ui_overlay_renderer : public overlay_pass diff --git a/rpcs3/Emu/RSX/GL/glutils/state_tracker.hpp b/rpcs3/Emu/RSX/GL/glutils/state_tracker.hpp index ec3430aac2..b83e8a0d20 100644 --- a/rpcs3/Emu/RSX/GL/glutils/state_tracker.hpp +++ b/rpcs3/Emu/RSX/GL/glutils/state_tracker.hpp @@ -7,12 +7,17 @@ namespace gl { - struct driver_state + class driver_state { - const u32 DEPTH_BOUNDS_MIN = 0xFFFF0001; - const u32 DEPTH_BOUNDS_MAX = 0xFFFF0002; - const u32 DEPTH_RANGE_MIN = 0xFFFF0003; - const u32 DEPTH_RANGE_MAX = 0xFFFF0004; + const u32 DEPTH_BOUNDS_MIN = 0xFFFF0001; + const u32 DEPTH_BOUNDS_MAX = 0xFFFF0002; + const u32 DEPTH_RANGE_MIN = 0xFFFF0003; + const u32 DEPTH_RANGE_MAX = 0xFFFF0004; + const u32 STENCIL_FRONT_FUNC = 0xFFFF0005; + const u32 STENCIL_BACK_FUNC = 0xFFFF0006; + const u32 STENCIL_FRONT_OP = 0xFFFF0007; + const u32 STENCIL_BACK_OP = 0xFFFF0008; + const u32 STENCIL_BACK_MASK = 0xFFFF0009; std::unordered_map properties = {}; std::unordered_map> indexed_properties = {}; @@ -20,6 +25,36 @@ namespace gl GLuint current_program = GL_NONE; std::array, 48> bound_textures{ {} }; + bool test_and_set_property(GLenum property, u32 test) + { + auto found = properties.find(property); + if (found != properties.end() && found->second == test) + return true; + + properties[property] = test; + return false; + } + + bool test_and_set_property(GLenum property, u32 test, GLint index) + { + auto found = indexed_properties.find(property); + if (found != indexed_properties.end()) + { + if (found->second[index] == test) + { + return true; + } + + found->second[index] = test; + return false; + } + + indexed_properties[property][index] = test; + return false; + } + + public: + bool enable(u32 test, GLenum cap) { auto found = properties.find(cap); @@ -82,77 +117,97 @@ namespace gl return enablei(GL_FALSE, cap, index); } - inline bool test_property(GLenum property, u32 test) const - { - auto found = properties.find(property); - if (found == properties.end()) - return false; - - return (found->second == test); - } - - inline bool test_propertyi(GLenum property, u32 test, GLint index) const - { - auto found = indexed_properties.find(property); - if (found == indexed_properties.end()) - return false; - - return found->second[index] == test; - } - void depth_func(GLenum func) { - if (!test_property(GL_DEPTH_FUNC, func)) + if (!test_and_set_property(GL_DEPTH_FUNC, func)) { glDepthFunc(func); - properties[GL_DEPTH_FUNC] = func; } } void depth_mask(GLboolean mask) { - if (!test_property(GL_DEPTH_WRITEMASK, mask)) + if (!test_and_set_property(GL_DEPTH_WRITEMASK, mask)) { glDepthMask(mask); - properties[GL_DEPTH_WRITEMASK] = mask; } } void clear_depth(GLfloat depth) { u32 value = std::bit_cast(depth); - if (!test_property(GL_DEPTH_CLEAR_VALUE, value)) + if (!test_and_set_property(GL_DEPTH_CLEAR_VALUE, value)) { glClearDepth(depth); - properties[GL_DEPTH_CLEAR_VALUE] = value; } } void stencil_mask(GLuint mask) { - if (!test_property(GL_STENCIL_WRITEMASK, mask)) + if (!test_and_set_property(GL_STENCIL_WRITEMASK, mask)) { glStencilMask(mask); - properties[GL_STENCIL_WRITEMASK] = mask; + } + } + + void stencil_back_mask(GLuint mask) + { + if (!test_and_set_property(STENCIL_BACK_MASK, mask)) + { + glStencilMaskSeparate(GL_BACK, mask); } } void clear_stencil(GLint stencil) { - u32 value = std::bit_cast(stencil); - if (!test_property(GL_STENCIL_CLEAR_VALUE, value)) + const u32 value = std::bit_cast(stencil); + if (!test_and_set_property(GL_STENCIL_CLEAR_VALUE, value)) { glClearStencil(stencil); - properties[GL_STENCIL_CLEAR_VALUE] = value; + } + } + + void stencil_func(GLenum func, GLint ref, GLuint mask) + { + const u32 value = func | ref << 16u | mask << 24; + if (!test_and_set_property(STENCIL_FRONT_FUNC, value)) + { + glStencilFunc(func, ref, mask); + } + } + + void stencil_back_func(GLenum func, GLint ref, GLuint mask) + { + const u32 value = func | ref << 16u | mask << 24; + if (!test_and_set_property(STENCIL_BACK_FUNC, value)) + { + glStencilFunc(func, ref, mask); + } + } + + void stencil_op(GLenum fail, GLenum zfail, GLenum zpass) + { + const u32 value = (fail & 0xFF) << 16 | (zfail & 0xFF) << 8 | (zpass & 0xFF); + if (!test_and_set_property(STENCIL_FRONT_OP, value)) + { + glStencilOp(fail, zfail, zpass); + } + } + + void stencil_back_op(GLenum fail, GLenum zfail, GLenum zpass) + { + const u32 value = (fail & 0xFF) << 16 | (zfail & 0xFF) << 8 | (zpass & 0xFF); + if (!test_and_set_property(STENCIL_FRONT_OP, value)) + { + glStencilOpSeparate(GL_BACK, fail, zfail, zpass); } } void color_maski(GLint index, u32 mask) { - if (!test_propertyi(GL_COLOR_WRITEMASK, mask, index)) + if (!test_and_set_property(GL_COLOR_WRITEMASK, mask, index)) { glColorMaski(index, ((mask & 0x10) ? 1 : 0), ((mask & 0x20) ? 1 : 0), ((mask & 0x40) ? 1 : 0), ((mask & 0x80) ? 1 : 0)); - indexed_properties[GL_COLOR_WRITEMASK][index] = mask; } } @@ -170,10 +225,9 @@ namespace gl void clear_color(u8 r, u8 g, u8 b, u8 a) { u32 value = u32{ r } | u32{ g } << 8 | u32{ b } << 16 | u32{ a } << 24; - if (!test_property(GL_COLOR_CLEAR_VALUE, value)) + if (!test_and_set_property(GL_COLOR_CLEAR_VALUE, value)) { glClearColor(r / 255.f, g / 255.f, b / 255.f, a / 255.f); - properties[GL_COLOR_CLEAR_VALUE] = value; } } @@ -187,7 +241,7 @@ namespace gl u32 depth_min = std::bit_cast(min); u32 depth_max = std::bit_cast(max); - if (!test_property(DEPTH_BOUNDS_MIN, depth_min) || !test_property(DEPTH_BOUNDS_MAX, depth_max)) + if (!test_and_set_property(DEPTH_BOUNDS_MIN, depth_min) || !test_and_set_property(DEPTH_BOUNDS_MAX, depth_max)) { if (get_driver_caps().NV_depth_buffer_float_supported) { @@ -197,9 +251,6 @@ namespace gl { glDepthBoundsEXT(min, max); } - - properties[DEPTH_BOUNDS_MIN] = depth_min; - properties[DEPTH_BOUNDS_MAX] = depth_max; } } @@ -208,7 +259,7 @@ namespace gl u32 depth_min = std::bit_cast(min); u32 depth_max = std::bit_cast(max); - if (!test_property(DEPTH_RANGE_MIN, depth_min) || !test_property(DEPTH_RANGE_MAX, depth_max)) + if (!test_and_set_property(DEPTH_RANGE_MIN, depth_min) || !test_and_set_property(DEPTH_RANGE_MAX, depth_max)) { if (get_driver_caps().NV_depth_buffer_float_supported) { @@ -218,18 +269,14 @@ namespace gl { glDepthRange(min, max); } - - properties[DEPTH_RANGE_MIN] = depth_min; - properties[DEPTH_RANGE_MAX] = depth_max; } } void logic_op(GLenum op) { - if (!test_property(GL_COLOR_LOGIC_OP, op)) + if (!test_and_set_property(GL_COLOR_LOGIC_OP, op)) { glLogicOp(op); - properties[GL_COLOR_LOGIC_OP] = op; } } @@ -237,28 +284,25 @@ namespace gl { u32 value = std::bit_cast(width); - if (!test_property(GL_LINE_WIDTH, value)) + if (!test_and_set_property(GL_LINE_WIDTH, value)) { glLineWidth(width); - properties[GL_LINE_WIDTH] = value; } } void front_face(GLenum face) { - if (!test_property(GL_FRONT_FACE, face)) + if (!test_and_set_property(GL_FRONT_FACE, face)) { glFrontFace(face); - properties[GL_FRONT_FACE] = face; } } void cull_face(GLenum mode) { - if (!test_property(GL_CULL_FACE_MODE, mode)) + if (!test_and_set_property(GL_CULL_FACE_MODE, mode)) { glCullFace(mode); - properties[GL_CULL_FACE_MODE] = mode; } } @@ -267,12 +311,9 @@ namespace gl u32 _units = std::bit_cast(units); u32 _factor = std::bit_cast(factor); - if (!test_property(GL_POLYGON_OFFSET_UNITS, _units) || !test_property(GL_POLYGON_OFFSET_FACTOR, _factor)) + if (!test_and_set_property(GL_POLYGON_OFFSET_UNITS, _units) || !test_and_set_property(GL_POLYGON_OFFSET_FACTOR, _factor)) { glPolygonOffset(factor, units); - - properties[GL_POLYGON_OFFSET_UNITS] = _units; - properties[GL_POLYGON_OFFSET_FACTOR] = _factor; } }