diff --git a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp index ccbf6eb8eb..13908a106b 100644 --- a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp +++ b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp @@ -484,7 +484,7 @@ s32 cellGcmSetDisplayBuffer(u8 id, u32 offset, u32 pitch, u32 width, u32 height) m_config->gcm_buffers[id].width = width; m_config->gcm_buffers[id].height = height; - if (id + 1 > render->display_buffers_count) + if (id + 1u > render->display_buffers_count) { render->display_buffers_count = id + 1; } diff --git a/rpcs3/Emu/Cell/lv2/sys_rsx.cpp b/rpcs3/Emu/Cell/lv2/sys_rsx.cpp index a2dccee99a..9b09d57af9 100644 --- a/rpcs3/Emu/Cell/lv2/sys_rsx.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_rsx.cpp @@ -328,7 +328,26 @@ s32 sys_rsx_context_attribute(s32 context_id, u32 package_id, u64 a3, u64 a4, u6 break; case 0x301: // Depth-buffer (Z-cull) - break; + { + //a4 high = region = (1 << 0) | (zFormat << 4) | (aaFormat << 8); + //a4 low = size = ((width >> 6) << 22) | ((height >> 6) << 6); + //a5 high = start = cullStart&(~0xFFF); + //a5 low = offset = offset; + //a6 high = status0 = (zcullDir << 1) | (zcullFormat << 2) | ((sFunc & 0xF) << 12) | (sRef << 16) | (sMask << 24); + //a6 low = status1 = (0x2000 << 0) | (0x20 << 16); + + auto &zcull = render->zculls[a3]; + zcull.zFormat = ((a4 >> 32) >> 4) & 0xF; + zcull.aaFormat = ((a4 >> 32) >> 8) & 0xF; + zcull.width = ((a4 & 0xFFFFFFFF) >> 22) << 6; + zcull.height = (((a4 & 0xFFFFFFFF) >> 6) & 0xFF) << 6; + zcull.cullStart = (a5 >> 32); + zcull.offset = (a5 & 0xFFFFFFFF); + zcull.binded = (a6 & 0xFFFFFFFF) != 0; + //TODO: Set zculldir, format, sfunc, sref, smask + } + break; + case 0x302: // something with zcull break; case 0x600: // Framebuffer setup diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 9bcd2e1d82..95059ee1b7 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -33,34 +33,6 @@ GLGSRender::GLGSRender() : GSRender() m_vertex_cache.reset(new gl::weak_vertex_cache()); } -u32 GLGSRender::enable(u32 condition, u32 cap) -{ - if (condition) - { - glEnable(cap); - } - else - { - glDisable(cap); - } - - return condition; -} - -u32 GLGSRender::enable(u32 condition, u32 cap, u32 index) -{ - if (condition) - { - glEnablei(cap, index); - } - else - { - glDisablei(cap, index); - } - - return condition; -} - extern CellGcmContextData current_context; namespace @@ -194,6 +166,9 @@ void GLGSRender::begin() if (skip_frame) return; + if (conditional_render_enabled && conditional_render_test_failed) + return; + init_buffers(); if (!framebuffer_status_valid) @@ -206,87 +181,91 @@ void GLGSRender::begin() bool color_mask_r = rsx::method_registers.color_mask_r(); bool color_mask_a = rsx::method_registers.color_mask_a(); - __glcheck glColorMask(color_mask_r, color_mask_g, color_mask_b, color_mask_a); - __glcheck glDepthMask(rsx::method_registers.depth_write_enabled()); - __glcheck glStencilMask(rsx::method_registers.stencil_mask()); + gl_state.color_mask(color_mask_r, color_mask_g, color_mask_b, color_mask_a); + gl_state.depth_mask(rsx::method_registers.depth_write_enabled()); + gl_state.stencil_mask(rsx::method_registers.stencil_mask()); - if (__glcheck enable(rsx::method_registers.depth_test_enabled(), GL_DEPTH_TEST)) + if (gl_state.enable(rsx::method_registers.depth_test_enabled(), GL_DEPTH_TEST)) { - __glcheck glDepthFunc(comparison_op(rsx::method_registers.depth_func())); + gl_state.depth_func(comparison_op(rsx::method_registers.depth_func())); } - if (glDepthBoundsEXT && (__glcheck enable(rsx::method_registers.depth_bounds_test_enabled(), GL_DEPTH_BOUNDS_TEST_EXT))) + if (glDepthBoundsEXT && (gl_state.enable(rsx::method_registers.depth_bounds_test_enabled(), GL_DEPTH_BOUNDS_TEST_EXT))) { - __glcheck glDepthBoundsEXT(rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max()); + gl_state.depth_bounds(rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max()); } - //__glcheck glDepthRange(rsx::method_registers.clip_min(), rsx::method_registers.clip_max()); - __glcheck enable(rsx::method_registers.dither_enabled(), GL_DITHER); + gl_state.depth_range(rsx::method_registers.clip_min(), rsx::method_registers.clip_max()); + gl_state.enable(rsx::method_registers.dither_enabled(), GL_DITHER); - if (__glcheck enable(rsx::method_registers.blend_enabled(), GL_BLEND)) + if (gl_state.enable(rsx::method_registers.blend_enabled(), GL_BLEND)) { - __glcheck glBlendFuncSeparate(blend_factor(rsx::method_registers.blend_func_sfactor_rgb()), + glBlendFuncSeparate(blend_factor(rsx::method_registers.blend_func_sfactor_rgb()), blend_factor(rsx::method_registers.blend_func_dfactor_rgb()), blend_factor(rsx::method_registers.blend_func_sfactor_a()), blend_factor(rsx::method_registers.blend_func_dfactor_a())); auto blend_colors = rsx::get_constant_blend_colors(); - __glcheck glBlendColor(blend_colors[0], blend_colors[1], blend_colors[2], blend_colors[3]); + glBlendColor(blend_colors[0], blend_colors[1], blend_colors[2], blend_colors[3]); - __glcheck glBlendEquationSeparate(blend_equation(rsx::method_registers.blend_equation_rgb()), + glBlendEquationSeparate(blend_equation(rsx::method_registers.blend_equation_rgb()), blend_equation(rsx::method_registers.blend_equation_a())); } - if (__glcheck enable(rsx::method_registers.stencil_test_enabled(), GL_STENCIL_TEST)) + if (gl_state.enable(rsx::method_registers.stencil_test_enabled(), GL_STENCIL_TEST)) { - __glcheck glStencilFunc(comparison_op(rsx::method_registers.stencil_func()), rsx::method_registers.stencil_func_ref(), + glStencilFunc(comparison_op(rsx::method_registers.stencil_func()), + rsx::method_registers.stencil_func_ref(), rsx::method_registers.stencil_func_mask()); - __glcheck glStencilOp(stencil_op(rsx::method_registers.stencil_op_fail()), stencil_op(rsx::method_registers.stencil_op_zfail()), + + glStencilOp(stencil_op(rsx::method_registers.stencil_op_fail()), stencil_op(rsx::method_registers.stencil_op_zfail()), stencil_op(rsx::method_registers.stencil_op_zpass())); if (rsx::method_registers.two_sided_stencil_test_enabled()) { - __glcheck glStencilMaskSeparate(GL_BACK, rsx::method_registers.back_stencil_mask()); - __glcheck glStencilFuncSeparate(GL_BACK, comparison_op(rsx::method_registers.back_stencil_func()), + glStencilMaskSeparate(GL_BACK, rsx::method_registers.back_stencil_mask()); + + glStencilFuncSeparate(GL_BACK, comparison_op(rsx::method_registers.back_stencil_func()), rsx::method_registers.back_stencil_func_ref(), rsx::method_registers.back_stencil_func_mask()); - __glcheck glStencilOpSeparate(GL_BACK, stencil_op(rsx::method_registers.back_stencil_op_fail()), + + glStencilOpSeparate(GL_BACK, stencil_op(rsx::method_registers.back_stencil_op_fail()), stencil_op(rsx::method_registers.back_stencil_op_zfail()), stencil_op(rsx::method_registers.back_stencil_op_zpass())); } } - __glcheck enable(rsx::method_registers.blend_enabled_surface_1(), GL_BLEND, 1); - __glcheck enable(rsx::method_registers.blend_enabled_surface_2(), GL_BLEND, 2); - __glcheck enable(rsx::method_registers.blend_enabled_surface_3(), GL_BLEND, 3); + gl_state.enablei(rsx::method_registers.blend_enabled_surface_1(), GL_BLEND, 1); + gl_state.enablei(rsx::method_registers.blend_enabled_surface_2(), GL_BLEND, 2); + gl_state.enablei(rsx::method_registers.blend_enabled_surface_3(), GL_BLEND, 3); - if (__glcheck enable(rsx::method_registers.logic_op_enabled(), GL_COLOR_LOGIC_OP)) + if (gl_state.enable(rsx::method_registers.logic_op_enabled(), GL_COLOR_LOGIC_OP)) { - __glcheck glLogicOp(logic_op(rsx::method_registers.logic_operation())); + gl_state.logic_op(logic_op(rsx::method_registers.logic_operation())); } - __glcheck glLineWidth(rsx::method_registers.line_width()); - __glcheck enable(rsx::method_registers.line_smooth_enabled(), GL_LINE_SMOOTH); + gl_state.line_width(rsx::method_registers.line_width()); + gl_state.enable(rsx::method_registers.line_smooth_enabled(), GL_LINE_SMOOTH); + + gl_state.enable(rsx::method_registers.poly_offset_point_enabled(), GL_POLYGON_OFFSET_POINT); + gl_state.enable(rsx::method_registers.poly_offset_line_enabled(), GL_POLYGON_OFFSET_LINE); + gl_state.enable(rsx::method_registers.poly_offset_fill_enabled(), GL_POLYGON_OFFSET_FILL); + + gl_state.polygon_offset(rsx::method_registers.poly_offset_scale(), rsx::method_registers.poly_offset_bias()); + + if (gl_state.enable(rsx::method_registers.cull_face_enabled(), GL_CULL_FACE)) + { + gl_state.cull_face(cull_face(rsx::method_registers.cull_face_mode())); + } + + gl_state.front_face(front_face(rsx::method_registers.front_face_mode())); //TODO //NV4097_SET_ANISO_SPREAD - - __glcheck enable(rsx::method_registers.poly_offset_point_enabled(), GL_POLYGON_OFFSET_POINT); - __glcheck enable(rsx::method_registers.poly_offset_line_enabled(), GL_POLYGON_OFFSET_LINE); - __glcheck enable(rsx::method_registers.poly_offset_fill_enabled(), GL_POLYGON_OFFSET_FILL); - - __glcheck glPolygonOffset(rsx::method_registers.poly_offset_scale(), - rsx::method_registers.poly_offset_bias()); - //NV4097_SET_SPECULAR_ENABLE //NV4097_SET_TWO_SIDE_LIGHT_EN //NV4097_SET_FLAT_SHADE_OP //NV4097_SET_EDGE_FLAG - if (__glcheck enable(rsx::method_registers.cull_face_enabled(), GL_CULL_FACE)) - { - __glcheck glCullFace(cull_face(rsx::method_registers.cull_face_mode())); - } - __glcheck glFrontFace(front_face(rsx::method_registers.front_face_mode())); //NV4097_SET_COLOR_KEY_COLOR //NV4097_SET_SHADER_CONTROL @@ -330,7 +309,7 @@ void GLGSRender::end() std::chrono::time_point program_start = steady_clock::now(); //Load program here since it is dependent on vertex state - if (skip_frame || !framebuffer_status_valid || !load_program()) + if (skip_frame || !framebuffer_status_valid || (conditional_render_enabled && conditional_render_test_failed) || !load_program()) { rsx::thread::end(); return; @@ -474,12 +453,12 @@ void GLGSRender::end() int texture_index = i + rsx::limits::fragment_textures_count; int location; -/* if (!rsx::method_registers.vertex_textures[i].enabled()) + if (!rsx::method_registers.vertex_textures[i].enabled()) { - glActiveTexture(GL_TEXTURE0 + texture_index); - glBindTexture(GL_TEXTURE_2D, 0); + //glActiveTexture(GL_TEXTURE0 + texture_index); + //glBindTexture(GL_TEXTURE_2D, 0); continue; - } */ + } if (m_program->uniforms.has_location("vtex" + std::to_string(i), &location)) { @@ -520,7 +499,7 @@ void GLGSRender::end() if (indexed_draw_info || (skip_upload && m_last_draw_indexed == true)) { - if (__glcheck enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART)) + if (__glcheck gl_state.enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART)) { GLenum index_type = (skip_upload)? m_last_ib_type: std::get<0>(indexed_draw_info.value()); __glcheck glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff); @@ -697,6 +676,17 @@ void GLGSRender::on_init_thread() m_gl_sampler_states[i].bind(i); } + //Occlusion query + for (u32 i = 0; i < occlusion_query_count; ++i) + { + auto &query = occlusion_query_data[i]; + glGenQueries(1, &query.handle); + + query.pending = false; + query.active = false; + query.result = 0; + } + //Clip planes are shader controlled; enable all planes driver-side glEnable(GL_CLIP_DISTANCE0 + 0); glEnable(GL_CLIP_DISTANCE0 + 1); @@ -772,6 +762,15 @@ void GLGSRender::on_exit() m_text_printer.close(); m_gl_texture_cache.close(); + for (u32 i = 0; i < occlusion_query_count; ++i) + { + auto &query = occlusion_query_data[i]; + query.active = false; + query.pending = false; + + glDeleteQueries(1, &query.handle); + } + return GSRender::on_exit(); } @@ -790,8 +789,8 @@ void GLGSRender::clear_surface(u32 arg) u32 max_depth_value = get_max_depth_value(surface_depth_format); u32 clear_depth = rsx::method_registers.z_clear_value(surface_depth_format == rsx::surface_depth_format::z24s8); - glDepthMask(GL_TRUE); - glClearDepth(double(clear_depth) / max_depth_value); + gl_state.depth_mask(GL_TRUE); + gl_state.clear_depth(f32(clear_depth) / max_depth_value); mask |= GLenum(gl::buffers::depth); gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); @@ -806,8 +805,8 @@ void GLGSRender::clear_surface(u32 arg) { u8 clear_stencil = rsx::method_registers.stencil_clear_value(); - __glcheck glStencilMask(rsx::method_registers.stencil_mask()); - glClearStencil(clear_stencil); + gl_state.stencil_mask(rsx::method_registers.stencil_mask()); + gl_state.clear_stencil(clear_stencil); mask |= GLenum(gl::buffers::stencil); } @@ -819,8 +818,8 @@ void GLGSRender::clear_surface(u32 arg) u8 clear_g = rsx::method_registers.clear_color_g(); u8 clear_b = rsx::method_registers.clear_color_b(); - glColorMask(((arg & 0x20) ? 1 : 0), ((arg & 0x40) ? 1 : 0), ((arg & 0x80) ? 1 : 0), ((arg & 0x10) ? 1 : 0)); - glClearColor(clear_r / 255.f, clear_g / 255.f, clear_b / 255.f, clear_a / 255.f); + gl_state.color_mask(arg & 0xf0); + gl_state.clear_color(clear_r, clear_g, clear_b, clear_a); mask |= GLenum(gl::buffers::color); @@ -1064,10 +1063,8 @@ void GLGSRender::flip(int buffer) } // Blit source image to the screen - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + // Disable scissor test (affects blit) glDisable(GL_SCISSOR_TEST); - glDisable(GL_DEPTH_TEST); - glDisable(GL_STENCIL_TEST); gl::screen.clear(gl::buffers::color_depth_stencil); __glcheck m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear); @@ -1183,3 +1180,148 @@ bool GLGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst { return m_gl_texture_cache.upload_scaled_image(src, dst, interpolate, m_rtts); } + +void GLGSRender::check_zcull_status(bool framebuffer_swap) +{ + if (framebuffer_swap) + { + zcull_surface_active = false; + const u32 zeta_address = depth_surface_info.address; + + if (zeta_address) + { + //Find zeta address in bound zculls + for (int i = 0; i < rsx::limits::zculls_count; i++) + { + if (zculls[i].binded) + { + const u32 rsx_address = rsx::get_address(zculls[i].offset, CELL_GCM_LOCATION_LOCAL); + if (rsx_address == zeta_address) + { + zcull_surface_active = true; + break; + } + } + } + } + } + + bool testing_enabled = zcull_pixel_cnt_enabled || zcull_stats_enabled; + occlusion_query_info* query = nullptr; + + if (zcull_task_queue.task_stack.size() > 0) + query = zcull_task_queue.active_query; + + if (query && query->active) + { + if (!zcull_rendering_enabled || !testing_enabled || !zcull_surface_active) + { + glEndQuery(GL_ANY_SAMPLES_PASSED); + query->active = false; + query->pending = true; + } + } + else + { + if (zcull_rendering_enabled && testing_enabled && zcull_surface_active) + { + //Find query + u32 free_index = synchronize_zcull_stats(); + query = &occlusion_query_data[free_index]; + zcull_task_queue.add(query); + + glBeginQuery(GL_ANY_SAMPLES_PASSED, query->handle); + query->active = true; + query->result = 0; + } + } +} + +void GLGSRender::clear_zcull_stats(u32 type) +{ + if (type == CELL_GCM_ZPASS_PIXEL_CNT) + { + //synchronize_zcull_stats(true); + current_zcull_stats.clear(); + } +} + +u32 GLGSRender::get_zcull_stats(u32 type) +{ + switch (type) + { + case CELL_GCM_ZPASS_PIXEL_CNT: + { + if (current_zcull_stats.zpass_pixel_cnt > 0) + return UINT32_MAX; + + //If we have no results, we might as well synchronize here and wait for results to become available + synchronize_zcull_stats(true); + return (current_zcull_stats.zpass_pixel_cnt > 0)? UINT32_MAX: 0; + } + case CELL_GCM_ZCULL_STATS: + case CELL_GCM_ZCULL_STATS1: + case CELL_GCM_ZCULL_STATS2: + //TODO + return UINT32_MAX; + case CELL_GCM_ZCULL_STATS3: + { + //Some kind of inverse value + if (current_zcull_stats.zpass_pixel_cnt > 0) + return 0; + + synchronize_zcull_stats(true); + return (current_zcull_stats.zpass_pixel_cnt > 0) ? 0 : UINT32_MAX; + } + default: + LOG_ERROR(RSX, "Unknown zcull stat type %d", type); + return 0; + } +} + +u32 GLGSRender::synchronize_zcull_stats(bool hard_sync) +{ + if (!zcull_rendering_enabled || zcull_task_queue.pending == 0) + return 0; + + u32 result = UINT16_MAX; + GLint count, status; + + for (auto &query : zcull_task_queue.task_stack) + { + if (query == nullptr || query->active) + continue; + + glGetQueryObjectiv(query->handle, GL_QUERY_RESULT_AVAILABLE, &status); + + if (status == GL_FALSE && !hard_sync) + continue; + + glGetQueryObjectiv(query->handle, GL_QUERY_RESULT, &count); + query->pending = false; + query = nullptr; + + current_zcull_stats.zpass_pixel_cnt += count; + zcull_task_queue.pending--; + } + + for (u32 i = 0; i < occlusion_query_count; ++i) + { + auto &query = occlusion_query_data[i]; + if (!query.pending && !query.active) + { + result = i; + break; + } + } + + if (result == UINT16_MAX && !hard_sync) + return synchronize_zcull_stats(true); + + return result; +} + +void GLGSRender::notify_zcull_info_changed() +{ + check_zcull_status(false); +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index c84aee96d4..3754032e1d 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -101,12 +101,284 @@ private: //vaos are mandatory for core profile gl::vao m_vao; + //occlusion query + bool zcull_surface_active = false; + + struct occlusion_query_info + { + GLuint handle; + GLint result; + bool pending; + bool active; + }; + + struct + { + u32 zpass_pixel_cnt; + u32 zcull_stats; + u32 zcull_stats1; + u32 zcull_stats2; + u32 zcull_stats3; + + void clear() + { + zpass_pixel_cnt = zcull_stats = zcull_stats1 = zcull_stats2 = zcull_stats3 = 0; + } + } + current_zcull_stats; + + struct occlusion_task + { + std::vector task_stack; + occlusion_query_info* active_query = nullptr; + u32 pending = 0; + + //Add one query to the task + void add(occlusion_query_info* query) + { + active_query = query; + + if (task_stack.size() > 0 && pending == 0) + task_stack.resize(0); + + const auto empty_slots = task_stack.size() - pending; + if (empty_slots >= 4) + { + for (auto &_query : task_stack) + { + if (_query == nullptr) + { + _query = query; + pending++; + return; + } + } + } + + task_stack.push_back(query); + pending++; + } + } + zcull_task_queue = {}; + + const u32 occlusion_query_count = 128; + std::array occlusion_query_data = {}; + public: GLGSRender(); private: - static u32 enable(u32 enable, u32 cap); - static u32 enable(u32 enable, u32 cap, u32 index); + + struct + { + const u32 DEPTH_BOUNDS_MIN = 0xFFFF0001; + const u32 DEPTH_BOUNDS_MAX = 0xFFFF0002; + const u32 DEPTH_RANGE_MIN = 0xFFFF0003; + const u32 DEPTH_RANGE_MAX = 0xFFFF0004; + + std::unordered_map properties = {}; + std::unordered_map> indexed_properties = {}; + + bool enable(u32 test, GLenum cap) + { + auto found = properties.find(cap); + if (found != properties.end() && found->second == test) + return !!test; + + properties[cap] = test; + + if (test) + glEnable(cap); + else + glDisable(cap); + + return !!test; + } + + bool enablei(u32 test, GLenum cap, u32 index) + { + auto found = indexed_properties.find(cap); + const bool exists = found != indexed_properties.end(); + + if (!exists) + { + indexed_properties[cap] = {}; + indexed_properties[cap][index] = test; + } + else + { + if (found->second[index] == test) + return !!test; + + found->second[index] = test; + } + + if (test) + glEnablei(cap, index); + else + glDisablei(cap, index); + + return !!test; + } + + void depth_func(GLenum func) + { + if (properties[GL_DEPTH_FUNC] != func) + { + glDepthFunc(func); + properties[GL_DEPTH_FUNC] = func; + } + } + + void depth_mask(GLboolean mask) + { + if (properties[GL_DEPTH_WRITEMASK] != mask) + { + glDepthMask(mask); + properties[GL_DEPTH_WRITEMASK] = mask; + } + } + + void clear_depth(GLfloat depth) + { + u32 value = (u32&)depth; + if (properties[GL_DEPTH_CLEAR_VALUE] != value) + { + glClearDepth(value); + properties[GL_DEPTH_CLEAR_VALUE] = value; + } + } + + void stencil_mask(GLuint mask) + { + if (properties[GL_STENCIL_WRITEMASK] != mask) + { + glStencilMask(mask); + properties[GL_STENCIL_WRITEMASK] = mask; + } + } + + void clear_stencil(GLint stencil) + { + u32 value = (u32&)stencil; + if (properties[GL_STENCIL_CLEAR_VALUE] != value) + { + glClearStencil(value); + properties[GL_STENCIL_CLEAR_VALUE] = value; + } + } + + void color_mask(u32 mask) + { + if (properties[GL_COLOR_WRITEMASK] != mask) + { + glColorMask(((mask & 0x20) ? 1 : 0), ((mask & 0x40) ? 1 : 0), ((mask & 0x80) ? 1 : 0), ((mask & 0x10) ? 1 : 0)); + properties[GL_COLOR_WRITEMASK] = mask; + } + } + + void color_mask(bool r, bool g, bool b, bool a) + { + u32 mask = 0; + if (r) mask |= 0x20; + if (g) mask |= 0x40; + if (b) mask |= 0x80; + if (a) mask |= 0x10; + + color_mask(mask); + } + + void clear_color(u8 r, u8 g, u8 b, u8 a) + { + u32 value = (u32)r | (u32)g << 8 | (u32)b << 16 | (u32)a << 24; + if (properties[GL_COLOR_CLEAR_VALUE] != value) + { + glClearColor(r / 255.f, g / 255.f, b / 255.f, a / 255.f); + properties[GL_COLOR_CLEAR_VALUE] = value; + } + } + + void depth_bounds(float min, float max) + { + u32 depth_min = (u32&)min; + u32 depth_max = (u32&)max; + + if (properties[DEPTH_BOUNDS_MIN] != depth_min || properties[DEPTH_BOUNDS_MAX] != depth_max) + { + glDepthBoundsEXT(min, max); + + properties[DEPTH_BOUNDS_MIN] = depth_min; + properties[DEPTH_BOUNDS_MAX] = depth_max; + } + } + + void depth_range(float min, float max) + { + u32 depth_min = (u32&)min; + u32 depth_max = (u32&)max; + + if (properties[DEPTH_RANGE_MIN] != depth_min || properties[DEPTH_RANGE_MAX] != depth_max) + { + glDepthRange(min, max); + + properties[DEPTH_RANGE_MIN] = depth_min; + properties[DEPTH_RANGE_MAX] = depth_max; + } + } + + void logic_op(GLenum op) + { + if (properties[GL_COLOR_LOGIC_OP] != op) + { + glLogicOp(op); + properties[GL_COLOR_LOGIC_OP] = op; + } + } + + void line_width(GLfloat width) + { + u32 value = (u32&)width; + + if (properties[GL_LINE_WIDTH] != value) + { + glLineWidth(width); + properties[GL_LINE_WIDTH] = value; + } + } + + void front_face(GLenum face) + { + if (properties[GL_FRONT_FACE] != face) + { + glFrontFace(face); + properties[GL_FRONT_FACE] = face; + } + } + + void cull_face(GLenum mode) + { + if (properties[GL_CULL_FACE_MODE] != mode) + { + glCullFace(mode); + properties[GL_CULL_FACE_MODE] = mode; + } + } + + void polygon_offset(float factor, float units) + { + u32 _units = (u32&)units; + u32 _factor = (u32&)factor; + + if (properties[GL_POLYGON_OFFSET_UNITS] != _units || properties[GL_POLYGON_OFFSET_FACTOR] != _factor) + { + glPolygonOffset(factor, units); + + properties[GL_POLYGON_OFFSET_UNITS] = _units; + properties[GL_POLYGON_OFFSET_FACTOR] = _factor; + } + } + } + gl_state; // Return element to draw and in case of indexed draw index type and offset in index buffer std::tuple > > set_vertex_buffer(); @@ -124,6 +396,9 @@ public: work_item& post_flush_request(u32 address, gl::texture_cache::cached_texture_section *section); bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override; + + void check_zcull_status(bool framebuffer_swap); + u32 synchronize_zcull_stats(bool hard_sync = false); protected: void begin() override; @@ -137,6 +412,10 @@ protected: void do_local_task() override; + void notify_zcull_info_changed() override; + void clear_zcull_stats(u32 type) override; + u32 get_zcull_stats(u32 type) override; + bool on_access_violation(u32 address, bool is_writing) override; virtual std::array, 4> copy_render_targets_to_memory() override; diff --git a/rpcs3/Emu/RSX/GL/GLProcTable.h b/rpcs3/Emu/RSX/GL/GLProcTable.h index 2eb4c399bd..92c06e3fa7 100644 --- a/rpcs3/Emu/RSX/GL/GLProcTable.h +++ b/rpcs3/Emu/RSX/GL/GLProcTable.h @@ -183,6 +183,13 @@ OPENGL_PROC(PFNGLSAMPLERPARAMETERIPROC, SamplerParameteri); OPENGL_PROC(PFNGLSAMPLERPARAMETERFPROC, SamplerParameterf); OPENGL_PROC(PFNGLSAMPLERPARAMETERFVPROC, SamplerParameterfv); +//Occlusion Query +OPENGL_PROC(PFNGLGENQUERIESPROC, GenQueries); +OPENGL_PROC(PFNGLDELETEQUERIESPROC, DeleteQueries); +OPENGL_PROC(PFNGLGETQUERYOBJECTIVPROC, GetQueryObjectiv); +OPENGL_PROC(PFNGLBEGINQUERYPROC, BeginQuery); +OPENGL_PROC(PFNGLENDQUERYPROC, EndQuery); + //Texture Buffers OPENGL_PROC(PFNGLTEXTUREBUFFERRANGEEXTPROC, TextureBufferRangeEXT); OPENGL_PROC(PFNGLTEXTUREBUFFERRANGEPROC, TextureBufferRange); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index b6bf9bd859..ea7073a801 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -161,6 +161,7 @@ void GLGSRender::init_buffers(bool skip_reading) synchronize_buffers(); m_rtts_dirty = false; + zcull_surface_active = false; const u16 clip_horizontal = rsx::method_registers.surface_clip_width(); const u16 clip_vertical = rsx::method_registers.surface_clip_height(); @@ -245,6 +246,8 @@ void GLGSRender::init_buffers(bool skip_reading) framebuffer_status_valid = draw_fbo.check(); if (!framebuffer_status_valid) return; + check_zcull_status(true); + draw_fbo.bind(); set_viewport(); diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index a8e93eb8b7..05bbb54db9 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -184,6 +184,12 @@ namespace rsx bool invalid_command_interrupt_raised = false; bool in_begin_end = false; + bool conditional_render_test_failed = false; + bool conditional_render_enabled = false; + bool zcull_stats_enabled = false; + bool zcull_rendering_enabled = false; + bool zcull_pixel_cnt_enabled = false; + protected: thread(); virtual ~thread(); @@ -212,6 +218,11 @@ namespace rsx virtual u64 timestamp() const; virtual bool on_access_violation(u32 /*address*/, bool /*is_writing*/) { return false; } + //zcull + virtual void notify_zcull_info_changed() {} + virtual void clear_zcull_stats(u32 type) {} + virtual u32 get_zcull_stats(u32 type) { return UINT32_MAX; } + gsl::span get_raw_index_array(const std::vector >& draw_indexed_clause) const; gsl::span get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector>& vertex_ranges) const; diff --git a/rpcs3/Emu/RSX/gcm_enums.h b/rpcs3/Emu/RSX/gcm_enums.h index f8e1066cb8..93199f1e06 100644 --- a/rpcs3/Emu/RSX/gcm_enums.h +++ b/rpcs3/Emu/RSX/gcm_enums.h @@ -5,7 +5,7 @@ namespace rsx { enum class vertex_base_type : u8 { - s1, ///< signed byte + s1, ///< signed normalized 16-bit int f, ///< float sf, ///< half float ub, ///< unsigned byte interpreted as 0.f and 1.f diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index bcb392747d..9e22aa3499 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -327,12 +327,10 @@ namespace rsx } } - void get_report(thread* rsx, u32 _reg, u32 arg) + vm::addr_t get_report_data_impl(u32 offset) { - u8 type = arg >> 24; - u32 offset = arg & 0xffffff; + u32 location = 0; blit_engine::context_dma report_dma = method_registers.context_dma_report(); - u32 location; switch (report_dma) { @@ -340,33 +338,42 @@ namespace rsx case blit_engine::context_dma::report_location_main: location = CELL_GCM_CONTEXT_DMA_REPORT_LOCATION_MAIN; break; case blit_engine::context_dma::memory_host_buffer: location = CELL_GCM_CONTEXT_DMA_MEMORY_HOST_BUFFER; break; default: - LOG_WARNING(RSX, "nv4097::get_report: bad report dma: 0x%x", (u8)report_dma); + return vm::addr_t(0); + } + + return vm::cast(get_address(offset, location)); + } + + void get_report(thread* rsx, u32 _reg, u32 arg) + { + u8 type = arg >> 24; + u32 offset = arg & 0xffffff; + + auto address_ptr = get_report_data_impl(offset); + if (!address_ptr) + { + LOG_ERROR(RSX, "Bad argument passed to NV4097_GET_REPORT, arg=0x%X", arg); return; } - vm::ps3::ptr result = vm::cast(get_address(offset, location)); - - result->timer = rsx->timestamp(); + vm::ps3::ptr result = address_ptr; switch (type) { case CELL_GCM_ZPASS_PIXEL_CNT: - // todo: actual zculling, here we just report max, which seems to be enough for most games, but causes them to render *everything* - result->value = 0xFFFFFFFF; - break; case CELL_GCM_ZCULL_STATS: case CELL_GCM_ZCULL_STATS1: case CELL_GCM_ZCULL_STATS2: case CELL_GCM_ZCULL_STATS3: - result->value = 0; + result->value = rsx->get_zcull_stats(type); + LOG_WARNING(RSX, "NV4097_GET_REPORT: Unimplemented type %d", type); break; - default: - result->value = 0; LOG_ERROR(RSX, "NV4097_GET_REPORT: Bad type %d", type); break; } - // This padding is needed to be set to 0, as games may use it for sync + + result->timer = rsx->timestamp(); result->padding = 0; } @@ -384,6 +391,58 @@ namespace rsx LOG_ERROR(RSX, "NV4097_CLEAR_REPORT_VALUE: Bad type: %d", arg); break; } + + rsx->clear_zcull_stats(arg); + } + + void set_render_mode(thread* rsx, u32, u32 arg) + { + const u32 mode = arg >> 24; + switch (mode) + { + case 1: + rsx->conditional_render_enabled = false; + rsx->conditional_render_test_failed = false; + return; + case 2: + rsx->conditional_render_enabled = true; + break; + default: + rsx->conditional_render_enabled = false; + LOG_ERROR(RSX, "Unknown render mode %d", mode); + return; + } + + const u32 offset = arg & 0xffffff; + auto address_ptr = get_report_data_impl(offset); + + if (!address_ptr) + { + rsx->conditional_render_test_failed = false; + LOG_ERROR(RSX, "Bad argument passed to NV4097_SET_RENDER_ENABLE, arg=0x%X", arg); + return; + } + + vm::ps3::ptr result = address_ptr; + rsx->conditional_render_test_failed = (result->value == 0); + } + + void set_zcull_render_enable(thread* rsx, u32, u32 arg) + { + rsx->zcull_rendering_enabled = !!arg; + rsx->notify_zcull_info_changed(); + } + + void set_zcull_stats_enable(thread* rsx, u32, u32 arg) + { + rsx->zcull_stats_enabled = !!arg; + rsx->notify_zcull_info_changed(); + } + + void set_zcull_pixel_count_enable(thread* rsx, u32, u32 arg) + { + rsx->zcull_pixel_cnt_enabled = !!arg; + rsx->notify_zcull_info_changed(); } void set_surface_dirty_bit(thread* rsx, u32 _reg, u32) @@ -1486,6 +1545,10 @@ namespace rsx bind_range(); bind_range(); bind(); + bind(); + bind(); + bind(); + bind(); //NV308A bind_range(); diff --git a/rpcs3/Emu/RSX/rsx_vertex_data.h b/rpcs3/Emu/RSX/rsx_vertex_data.h index 9cd09fdd86..4ba9bc4849 100644 --- a/rpcs3/Emu/RSX/rsx_vertex_data.h +++ b/rpcs3/Emu/RSX/rsx_vertex_data.h @@ -86,6 +86,7 @@ struct push_buffer_vertex_info case vertex_base_type::ub: case vertex_base_type::ub256: return 1; + case vertex_base_type::s1: case vertex_base_type::s32k: return size / 2; default: @@ -117,11 +118,11 @@ struct push_buffer_vertex_info case vertex_base_type::f: *(u32*)dst = se_storage::swap(arg); break; - case vertex_base_type::s1: case vertex_base_type::ub: case vertex_base_type::ub256: *(u32*)dst = arg; break; + case vertex_base_type::s1: case vertex_base_type::s32k: ((u16*)dst)[0] = se_storage::swap((u16)(arg & 0xffff)); ((u16*)dst)[1] = se_storage::swap((u16)(arg >> 16));