Implement hardware zcull emulation

rsx/gl: Support s1 immediate values; ogl minor refactoring
This commit is contained in:
kd-11 2017-07-27 19:04:55 +03:00
parent 7ab1792ef7
commit fcb7072fee
10 changed files with 628 additions and 103 deletions

View file

@ -484,7 +484,7 @@ s32 cellGcmSetDisplayBuffer(u8 id, u32 offset, u32 pitch, u32 width, u32 height)
m_config->gcm_buffers[id].width = width;
m_config->gcm_buffers[id].height = height;
if (id + 1 > render->display_buffers_count)
if (id + 1u > render->display_buffers_count)
{
render->display_buffers_count = id + 1;
}

View file

@ -328,7 +328,26 @@ s32 sys_rsx_context_attribute(s32 context_id, u32 package_id, u64 a3, u64 a4, u6
break;
case 0x301: // Depth-buffer (Z-cull)
break;
{
//a4 high = region = (1 << 0) | (zFormat << 4) | (aaFormat << 8);
//a4 low = size = ((width >> 6) << 22) | ((height >> 6) << 6);
//a5 high = start = cullStart&(~0xFFF);
//a5 low = offset = offset;
//a6 high = status0 = (zcullDir << 1) | (zcullFormat << 2) | ((sFunc & 0xF) << 12) | (sRef << 16) | (sMask << 24);
//a6 low = status1 = (0x2000 << 0) | (0x20 << 16);
auto &zcull = render->zculls[a3];
zcull.zFormat = ((a4 >> 32) >> 4) & 0xF;
zcull.aaFormat = ((a4 >> 32) >> 8) & 0xF;
zcull.width = ((a4 & 0xFFFFFFFF) >> 22) << 6;
zcull.height = (((a4 & 0xFFFFFFFF) >> 6) & 0xFF) << 6;
zcull.cullStart = (a5 >> 32);
zcull.offset = (a5 & 0xFFFFFFFF);
zcull.binded = (a6 & 0xFFFFFFFF) != 0;
//TODO: Set zculldir, format, sfunc, sref, smask
}
break;
case 0x302: // something with zcull
break;
case 0x600: // Framebuffer setup

View file

@ -33,34 +33,6 @@ GLGSRender::GLGSRender() : GSRender()
m_vertex_cache.reset(new gl::weak_vertex_cache());
}
u32 GLGSRender::enable(u32 condition, u32 cap)
{
if (condition)
{
glEnable(cap);
}
else
{
glDisable(cap);
}
return condition;
}
u32 GLGSRender::enable(u32 condition, u32 cap, u32 index)
{
if (condition)
{
glEnablei(cap, index);
}
else
{
glDisablei(cap, index);
}
return condition;
}
extern CellGcmContextData current_context;
namespace
@ -194,6 +166,9 @@ void GLGSRender::begin()
if (skip_frame)
return;
if (conditional_render_enabled && conditional_render_test_failed)
return;
init_buffers();
if (!framebuffer_status_valid)
@ -206,87 +181,91 @@ void GLGSRender::begin()
bool color_mask_r = rsx::method_registers.color_mask_r();
bool color_mask_a = rsx::method_registers.color_mask_a();
__glcheck glColorMask(color_mask_r, color_mask_g, color_mask_b, color_mask_a);
__glcheck glDepthMask(rsx::method_registers.depth_write_enabled());
__glcheck glStencilMask(rsx::method_registers.stencil_mask());
gl_state.color_mask(color_mask_r, color_mask_g, color_mask_b, color_mask_a);
gl_state.depth_mask(rsx::method_registers.depth_write_enabled());
gl_state.stencil_mask(rsx::method_registers.stencil_mask());
if (__glcheck enable(rsx::method_registers.depth_test_enabled(), GL_DEPTH_TEST))
if (gl_state.enable(rsx::method_registers.depth_test_enabled(), GL_DEPTH_TEST))
{
__glcheck glDepthFunc(comparison_op(rsx::method_registers.depth_func()));
gl_state.depth_func(comparison_op(rsx::method_registers.depth_func()));
}
if (glDepthBoundsEXT && (__glcheck enable(rsx::method_registers.depth_bounds_test_enabled(), GL_DEPTH_BOUNDS_TEST_EXT)))
if (glDepthBoundsEXT && (gl_state.enable(rsx::method_registers.depth_bounds_test_enabled(), GL_DEPTH_BOUNDS_TEST_EXT)))
{
__glcheck glDepthBoundsEXT(rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max());
gl_state.depth_bounds(rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max());
}
//__glcheck glDepthRange(rsx::method_registers.clip_min(), rsx::method_registers.clip_max());
__glcheck enable(rsx::method_registers.dither_enabled(), GL_DITHER);
gl_state.depth_range(rsx::method_registers.clip_min(), rsx::method_registers.clip_max());
gl_state.enable(rsx::method_registers.dither_enabled(), GL_DITHER);
if (__glcheck enable(rsx::method_registers.blend_enabled(), GL_BLEND))
if (gl_state.enable(rsx::method_registers.blend_enabled(), GL_BLEND))
{
__glcheck glBlendFuncSeparate(blend_factor(rsx::method_registers.blend_func_sfactor_rgb()),
glBlendFuncSeparate(blend_factor(rsx::method_registers.blend_func_sfactor_rgb()),
blend_factor(rsx::method_registers.blend_func_dfactor_rgb()),
blend_factor(rsx::method_registers.blend_func_sfactor_a()),
blend_factor(rsx::method_registers.blend_func_dfactor_a()));
auto blend_colors = rsx::get_constant_blend_colors();
__glcheck glBlendColor(blend_colors[0], blend_colors[1], blend_colors[2], blend_colors[3]);
glBlendColor(blend_colors[0], blend_colors[1], blend_colors[2], blend_colors[3]);
__glcheck glBlendEquationSeparate(blend_equation(rsx::method_registers.blend_equation_rgb()),
glBlendEquationSeparate(blend_equation(rsx::method_registers.blend_equation_rgb()),
blend_equation(rsx::method_registers.blend_equation_a()));
}
if (__glcheck enable(rsx::method_registers.stencil_test_enabled(), GL_STENCIL_TEST))
if (gl_state.enable(rsx::method_registers.stencil_test_enabled(), GL_STENCIL_TEST))
{
__glcheck glStencilFunc(comparison_op(rsx::method_registers.stencil_func()), rsx::method_registers.stencil_func_ref(),
glStencilFunc(comparison_op(rsx::method_registers.stencil_func()),
rsx::method_registers.stencil_func_ref(),
rsx::method_registers.stencil_func_mask());
__glcheck glStencilOp(stencil_op(rsx::method_registers.stencil_op_fail()), stencil_op(rsx::method_registers.stencil_op_zfail()),
glStencilOp(stencil_op(rsx::method_registers.stencil_op_fail()), stencil_op(rsx::method_registers.stencil_op_zfail()),
stencil_op(rsx::method_registers.stencil_op_zpass()));
if (rsx::method_registers.two_sided_stencil_test_enabled())
{
__glcheck glStencilMaskSeparate(GL_BACK, rsx::method_registers.back_stencil_mask());
__glcheck glStencilFuncSeparate(GL_BACK, comparison_op(rsx::method_registers.back_stencil_func()),
glStencilMaskSeparate(GL_BACK, rsx::method_registers.back_stencil_mask());
glStencilFuncSeparate(GL_BACK, comparison_op(rsx::method_registers.back_stencil_func()),
rsx::method_registers.back_stencil_func_ref(), rsx::method_registers.back_stencil_func_mask());
__glcheck glStencilOpSeparate(GL_BACK, stencil_op(rsx::method_registers.back_stencil_op_fail()),
glStencilOpSeparate(GL_BACK, stencil_op(rsx::method_registers.back_stencil_op_fail()),
stencil_op(rsx::method_registers.back_stencil_op_zfail()), stencil_op(rsx::method_registers.back_stencil_op_zpass()));
}
}
__glcheck enable(rsx::method_registers.blend_enabled_surface_1(), GL_BLEND, 1);
__glcheck enable(rsx::method_registers.blend_enabled_surface_2(), GL_BLEND, 2);
__glcheck enable(rsx::method_registers.blend_enabled_surface_3(), GL_BLEND, 3);
gl_state.enablei(rsx::method_registers.blend_enabled_surface_1(), GL_BLEND, 1);
gl_state.enablei(rsx::method_registers.blend_enabled_surface_2(), GL_BLEND, 2);
gl_state.enablei(rsx::method_registers.blend_enabled_surface_3(), GL_BLEND, 3);
if (__glcheck enable(rsx::method_registers.logic_op_enabled(), GL_COLOR_LOGIC_OP))
if (gl_state.enable(rsx::method_registers.logic_op_enabled(), GL_COLOR_LOGIC_OP))
{
__glcheck glLogicOp(logic_op(rsx::method_registers.logic_operation()));
gl_state.logic_op(logic_op(rsx::method_registers.logic_operation()));
}
__glcheck glLineWidth(rsx::method_registers.line_width());
__glcheck enable(rsx::method_registers.line_smooth_enabled(), GL_LINE_SMOOTH);
gl_state.line_width(rsx::method_registers.line_width());
gl_state.enable(rsx::method_registers.line_smooth_enabled(), GL_LINE_SMOOTH);
gl_state.enable(rsx::method_registers.poly_offset_point_enabled(), GL_POLYGON_OFFSET_POINT);
gl_state.enable(rsx::method_registers.poly_offset_line_enabled(), GL_POLYGON_OFFSET_LINE);
gl_state.enable(rsx::method_registers.poly_offset_fill_enabled(), GL_POLYGON_OFFSET_FILL);
gl_state.polygon_offset(rsx::method_registers.poly_offset_scale(), rsx::method_registers.poly_offset_bias());
if (gl_state.enable(rsx::method_registers.cull_face_enabled(), GL_CULL_FACE))
{
gl_state.cull_face(cull_face(rsx::method_registers.cull_face_mode()));
}
gl_state.front_face(front_face(rsx::method_registers.front_face_mode()));
//TODO
//NV4097_SET_ANISO_SPREAD
__glcheck enable(rsx::method_registers.poly_offset_point_enabled(), GL_POLYGON_OFFSET_POINT);
__glcheck enable(rsx::method_registers.poly_offset_line_enabled(), GL_POLYGON_OFFSET_LINE);
__glcheck enable(rsx::method_registers.poly_offset_fill_enabled(), GL_POLYGON_OFFSET_FILL);
__glcheck glPolygonOffset(rsx::method_registers.poly_offset_scale(),
rsx::method_registers.poly_offset_bias());
//NV4097_SET_SPECULAR_ENABLE
//NV4097_SET_TWO_SIDE_LIGHT_EN
//NV4097_SET_FLAT_SHADE_OP
//NV4097_SET_EDGE_FLAG
if (__glcheck enable(rsx::method_registers.cull_face_enabled(), GL_CULL_FACE))
{
__glcheck glCullFace(cull_face(rsx::method_registers.cull_face_mode()));
}
__glcheck glFrontFace(front_face(rsx::method_registers.front_face_mode()));
//NV4097_SET_COLOR_KEY_COLOR
//NV4097_SET_SHADER_CONTROL
@ -330,7 +309,7 @@ void GLGSRender::end()
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
//Load program here since it is dependent on vertex state
if (skip_frame || !framebuffer_status_valid || !load_program())
if (skip_frame || !framebuffer_status_valid || (conditional_render_enabled && conditional_render_test_failed) || !load_program())
{
rsx::thread::end();
return;
@ -474,12 +453,12 @@ void GLGSRender::end()
int texture_index = i + rsx::limits::fragment_textures_count;
int location;
/* if (!rsx::method_registers.vertex_textures[i].enabled())
if (!rsx::method_registers.vertex_textures[i].enabled())
{
glActiveTexture(GL_TEXTURE0 + texture_index);
glBindTexture(GL_TEXTURE_2D, 0);
//glActiveTexture(GL_TEXTURE0 + texture_index);
//glBindTexture(GL_TEXTURE_2D, 0);
continue;
} */
}
if (m_program->uniforms.has_location("vtex" + std::to_string(i), &location))
{
@ -520,7 +499,7 @@ void GLGSRender::end()
if (indexed_draw_info || (skip_upload && m_last_draw_indexed == true))
{
if (__glcheck enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
if (__glcheck gl_state.enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
{
GLenum index_type = (skip_upload)? m_last_ib_type: std::get<0>(indexed_draw_info.value());
__glcheck glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff);
@ -697,6 +676,17 @@ void GLGSRender::on_init_thread()
m_gl_sampler_states[i].bind(i);
}
//Occlusion query
for (u32 i = 0; i < occlusion_query_count; ++i)
{
auto &query = occlusion_query_data[i];
glGenQueries(1, &query.handle);
query.pending = false;
query.active = false;
query.result = 0;
}
//Clip planes are shader controlled; enable all planes driver-side
glEnable(GL_CLIP_DISTANCE0 + 0);
glEnable(GL_CLIP_DISTANCE0 + 1);
@ -772,6 +762,15 @@ void GLGSRender::on_exit()
m_text_printer.close();
m_gl_texture_cache.close();
for (u32 i = 0; i < occlusion_query_count; ++i)
{
auto &query = occlusion_query_data[i];
query.active = false;
query.pending = false;
glDeleteQueries(1, &query.handle);
}
return GSRender::on_exit();
}
@ -790,8 +789,8 @@ void GLGSRender::clear_surface(u32 arg)
u32 max_depth_value = get_max_depth_value(surface_depth_format);
u32 clear_depth = rsx::method_registers.z_clear_value(surface_depth_format == rsx::surface_depth_format::z24s8);
glDepthMask(GL_TRUE);
glClearDepth(double(clear_depth) / max_depth_value);
gl_state.depth_mask(GL_TRUE);
gl_state.clear_depth(f32(clear_depth) / max_depth_value);
mask |= GLenum(gl::buffers::depth);
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
@ -806,8 +805,8 @@ void GLGSRender::clear_surface(u32 arg)
{
u8 clear_stencil = rsx::method_registers.stencil_clear_value();
__glcheck glStencilMask(rsx::method_registers.stencil_mask());
glClearStencil(clear_stencil);
gl_state.stencil_mask(rsx::method_registers.stencil_mask());
gl_state.clear_stencil(clear_stencil);
mask |= GLenum(gl::buffers::stencil);
}
@ -819,8 +818,8 @@ void GLGSRender::clear_surface(u32 arg)
u8 clear_g = rsx::method_registers.clear_color_g();
u8 clear_b = rsx::method_registers.clear_color_b();
glColorMask(((arg & 0x20) ? 1 : 0), ((arg & 0x40) ? 1 : 0), ((arg & 0x80) ? 1 : 0), ((arg & 0x10) ? 1 : 0));
glClearColor(clear_r / 255.f, clear_g / 255.f, clear_b / 255.f, clear_a / 255.f);
gl_state.color_mask(arg & 0xf0);
gl_state.clear_color(clear_r, clear_g, clear_b, clear_a);
mask |= GLenum(gl::buffers::color);
@ -1064,10 +1063,8 @@ void GLGSRender::flip(int buffer)
}
// Blit source image to the screen
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
// Disable scissor test (affects blit)
glDisable(GL_SCISSOR_TEST);
glDisable(GL_DEPTH_TEST);
glDisable(GL_STENCIL_TEST);
gl::screen.clear(gl::buffers::color_depth_stencil);
__glcheck m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear);
@ -1183,3 +1180,148 @@ bool GLGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
{
return m_gl_texture_cache.upload_scaled_image(src, dst, interpolate, m_rtts);
}
void GLGSRender::check_zcull_status(bool framebuffer_swap)
{
if (framebuffer_swap)
{
zcull_surface_active = false;
const u32 zeta_address = depth_surface_info.address;
if (zeta_address)
{
//Find zeta address in bound zculls
for (int i = 0; i < rsx::limits::zculls_count; i++)
{
if (zculls[i].binded)
{
const u32 rsx_address = rsx::get_address(zculls[i].offset, CELL_GCM_LOCATION_LOCAL);
if (rsx_address == zeta_address)
{
zcull_surface_active = true;
break;
}
}
}
}
}
bool testing_enabled = zcull_pixel_cnt_enabled || zcull_stats_enabled;
occlusion_query_info* query = nullptr;
if (zcull_task_queue.task_stack.size() > 0)
query = zcull_task_queue.active_query;
if (query && query->active)
{
if (!zcull_rendering_enabled || !testing_enabled || !zcull_surface_active)
{
glEndQuery(GL_ANY_SAMPLES_PASSED);
query->active = false;
query->pending = true;
}
}
else
{
if (zcull_rendering_enabled && testing_enabled && zcull_surface_active)
{
//Find query
u32 free_index = synchronize_zcull_stats();
query = &occlusion_query_data[free_index];
zcull_task_queue.add(query);
glBeginQuery(GL_ANY_SAMPLES_PASSED, query->handle);
query->active = true;
query->result = 0;
}
}
}
void GLGSRender::clear_zcull_stats(u32 type)
{
if (type == CELL_GCM_ZPASS_PIXEL_CNT)
{
//synchronize_zcull_stats(true);
current_zcull_stats.clear();
}
}
u32 GLGSRender::get_zcull_stats(u32 type)
{
switch (type)
{
case CELL_GCM_ZPASS_PIXEL_CNT:
{
if (current_zcull_stats.zpass_pixel_cnt > 0)
return UINT32_MAX;
//If we have no results, we might as well synchronize here and wait for results to become available
synchronize_zcull_stats(true);
return (current_zcull_stats.zpass_pixel_cnt > 0)? UINT32_MAX: 0;
}
case CELL_GCM_ZCULL_STATS:
case CELL_GCM_ZCULL_STATS1:
case CELL_GCM_ZCULL_STATS2:
//TODO
return UINT32_MAX;
case CELL_GCM_ZCULL_STATS3:
{
//Some kind of inverse value
if (current_zcull_stats.zpass_pixel_cnt > 0)
return 0;
synchronize_zcull_stats(true);
return (current_zcull_stats.zpass_pixel_cnt > 0) ? 0 : UINT32_MAX;
}
default:
LOG_ERROR(RSX, "Unknown zcull stat type %d", type);
return 0;
}
}
u32 GLGSRender::synchronize_zcull_stats(bool hard_sync)
{
if (!zcull_rendering_enabled || zcull_task_queue.pending == 0)
return 0;
u32 result = UINT16_MAX;
GLint count, status;
for (auto &query : zcull_task_queue.task_stack)
{
if (query == nullptr || query->active)
continue;
glGetQueryObjectiv(query->handle, GL_QUERY_RESULT_AVAILABLE, &status);
if (status == GL_FALSE && !hard_sync)
continue;
glGetQueryObjectiv(query->handle, GL_QUERY_RESULT, &count);
query->pending = false;
query = nullptr;
current_zcull_stats.zpass_pixel_cnt += count;
zcull_task_queue.pending--;
}
for (u32 i = 0; i < occlusion_query_count; ++i)
{
auto &query = occlusion_query_data[i];
if (!query.pending && !query.active)
{
result = i;
break;
}
}
if (result == UINT16_MAX && !hard_sync)
return synchronize_zcull_stats(true);
return result;
}
void GLGSRender::notify_zcull_info_changed()
{
check_zcull_status(false);
}

View file

@ -101,12 +101,284 @@ private:
//vaos are mandatory for core profile
gl::vao m_vao;
//occlusion query
bool zcull_surface_active = false;
struct occlusion_query_info
{
GLuint handle;
GLint result;
bool pending;
bool active;
};
struct
{
u32 zpass_pixel_cnt;
u32 zcull_stats;
u32 zcull_stats1;
u32 zcull_stats2;
u32 zcull_stats3;
void clear()
{
zpass_pixel_cnt = zcull_stats = zcull_stats1 = zcull_stats2 = zcull_stats3 = 0;
}
}
current_zcull_stats;
struct occlusion_task
{
std::vector<occlusion_query_info*> task_stack;
occlusion_query_info* active_query = nullptr;
u32 pending = 0;
//Add one query to the task
void add(occlusion_query_info* query)
{
active_query = query;
if (task_stack.size() > 0 && pending == 0)
task_stack.resize(0);
const auto empty_slots = task_stack.size() - pending;
if (empty_slots >= 4)
{
for (auto &_query : task_stack)
{
if (_query == nullptr)
{
_query = query;
pending++;
return;
}
}
}
task_stack.push_back(query);
pending++;
}
}
zcull_task_queue = {};
const u32 occlusion_query_count = 128;
std::array<occlusion_query_info, 128> occlusion_query_data = {};
public:
GLGSRender();
private:
static u32 enable(u32 enable, u32 cap);
static u32 enable(u32 enable, u32 cap, u32 index);
struct
{
const u32 DEPTH_BOUNDS_MIN = 0xFFFF0001;
const u32 DEPTH_BOUNDS_MAX = 0xFFFF0002;
const u32 DEPTH_RANGE_MIN = 0xFFFF0003;
const u32 DEPTH_RANGE_MAX = 0xFFFF0004;
std::unordered_map<GLenum, u32> properties = {};
std::unordered_map<GLenum, std::array<u32, 4>> indexed_properties = {};
bool enable(u32 test, GLenum cap)
{
auto found = properties.find(cap);
if (found != properties.end() && found->second == test)
return !!test;
properties[cap] = test;
if (test)
glEnable(cap);
else
glDisable(cap);
return !!test;
}
bool enablei(u32 test, GLenum cap, u32 index)
{
auto found = indexed_properties.find(cap);
const bool exists = found != indexed_properties.end();
if (!exists)
{
indexed_properties[cap] = {};
indexed_properties[cap][index] = test;
}
else
{
if (found->second[index] == test)
return !!test;
found->second[index] = test;
}
if (test)
glEnablei(cap, index);
else
glDisablei(cap, index);
return !!test;
}
void depth_func(GLenum func)
{
if (properties[GL_DEPTH_FUNC] != func)
{
glDepthFunc(func);
properties[GL_DEPTH_FUNC] = func;
}
}
void depth_mask(GLboolean mask)
{
if (properties[GL_DEPTH_WRITEMASK] != mask)
{
glDepthMask(mask);
properties[GL_DEPTH_WRITEMASK] = mask;
}
}
void clear_depth(GLfloat depth)
{
u32 value = (u32&)depth;
if (properties[GL_DEPTH_CLEAR_VALUE] != value)
{
glClearDepth(value);
properties[GL_DEPTH_CLEAR_VALUE] = value;
}
}
void stencil_mask(GLuint mask)
{
if (properties[GL_STENCIL_WRITEMASK] != mask)
{
glStencilMask(mask);
properties[GL_STENCIL_WRITEMASK] = mask;
}
}
void clear_stencil(GLint stencil)
{
u32 value = (u32&)stencil;
if (properties[GL_STENCIL_CLEAR_VALUE] != value)
{
glClearStencil(value);
properties[GL_STENCIL_CLEAR_VALUE] = value;
}
}
void color_mask(u32 mask)
{
if (properties[GL_COLOR_WRITEMASK] != mask)
{
glColorMask(((mask & 0x20) ? 1 : 0), ((mask & 0x40) ? 1 : 0), ((mask & 0x80) ? 1 : 0), ((mask & 0x10) ? 1 : 0));
properties[GL_COLOR_WRITEMASK] = mask;
}
}
void color_mask(bool r, bool g, bool b, bool a)
{
u32 mask = 0;
if (r) mask |= 0x20;
if (g) mask |= 0x40;
if (b) mask |= 0x80;
if (a) mask |= 0x10;
color_mask(mask);
}
void clear_color(u8 r, u8 g, u8 b, u8 a)
{
u32 value = (u32)r | (u32)g << 8 | (u32)b << 16 | (u32)a << 24;
if (properties[GL_COLOR_CLEAR_VALUE] != value)
{
glClearColor(r / 255.f, g / 255.f, b / 255.f, a / 255.f);
properties[GL_COLOR_CLEAR_VALUE] = value;
}
}
void depth_bounds(float min, float max)
{
u32 depth_min = (u32&)min;
u32 depth_max = (u32&)max;
if (properties[DEPTH_BOUNDS_MIN] != depth_min || properties[DEPTH_BOUNDS_MAX] != depth_max)
{
glDepthBoundsEXT(min, max);
properties[DEPTH_BOUNDS_MIN] = depth_min;
properties[DEPTH_BOUNDS_MAX] = depth_max;
}
}
void depth_range(float min, float max)
{
u32 depth_min = (u32&)min;
u32 depth_max = (u32&)max;
if (properties[DEPTH_RANGE_MIN] != depth_min || properties[DEPTH_RANGE_MAX] != depth_max)
{
glDepthRange(min, max);
properties[DEPTH_RANGE_MIN] = depth_min;
properties[DEPTH_RANGE_MAX] = depth_max;
}
}
void logic_op(GLenum op)
{
if (properties[GL_COLOR_LOGIC_OP] != op)
{
glLogicOp(op);
properties[GL_COLOR_LOGIC_OP] = op;
}
}
void line_width(GLfloat width)
{
u32 value = (u32&)width;
if (properties[GL_LINE_WIDTH] != value)
{
glLineWidth(width);
properties[GL_LINE_WIDTH] = value;
}
}
void front_face(GLenum face)
{
if (properties[GL_FRONT_FACE] != face)
{
glFrontFace(face);
properties[GL_FRONT_FACE] = face;
}
}
void cull_face(GLenum mode)
{
if (properties[GL_CULL_FACE_MODE] != mode)
{
glCullFace(mode);
properties[GL_CULL_FACE_MODE] = mode;
}
}
void polygon_offset(float factor, float units)
{
u32 _units = (u32&)units;
u32 _factor = (u32&)factor;
if (properties[GL_POLYGON_OFFSET_UNITS] != _units || properties[GL_POLYGON_OFFSET_FACTOR] != _factor)
{
glPolygonOffset(factor, units);
properties[GL_POLYGON_OFFSET_UNITS] = _units;
properties[GL_POLYGON_OFFSET_FACTOR] = _factor;
}
}
}
gl_state;
// Return element to draw and in case of indexed draw index type and offset in index buffer
std::tuple<u32, std::optional<std::tuple<GLenum, u32> > > set_vertex_buffer();
@ -124,6 +396,9 @@ public:
work_item& post_flush_request(u32 address, gl::texture_cache::cached_texture_section *section);
bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override;
void check_zcull_status(bool framebuffer_swap);
u32 synchronize_zcull_stats(bool hard_sync = false);
protected:
void begin() override;
@ -137,6 +412,10 @@ protected:
void do_local_task() override;
void notify_zcull_info_changed() override;
void clear_zcull_stats(u32 type) override;
u32 get_zcull_stats(u32 type) override;
bool on_access_violation(u32 address, bool is_writing) override;
virtual std::array<std::vector<gsl::byte>, 4> copy_render_targets_to_memory() override;

View file

@ -183,6 +183,13 @@ OPENGL_PROC(PFNGLSAMPLERPARAMETERIPROC, SamplerParameteri);
OPENGL_PROC(PFNGLSAMPLERPARAMETERFPROC, SamplerParameterf);
OPENGL_PROC(PFNGLSAMPLERPARAMETERFVPROC, SamplerParameterfv);
//Occlusion Query
OPENGL_PROC(PFNGLGENQUERIESPROC, GenQueries);
OPENGL_PROC(PFNGLDELETEQUERIESPROC, DeleteQueries);
OPENGL_PROC(PFNGLGETQUERYOBJECTIVPROC, GetQueryObjectiv);
OPENGL_PROC(PFNGLBEGINQUERYPROC, BeginQuery);
OPENGL_PROC(PFNGLENDQUERYPROC, EndQuery);
//Texture Buffers
OPENGL_PROC(PFNGLTEXTUREBUFFERRANGEEXTPROC, TextureBufferRangeEXT);
OPENGL_PROC(PFNGLTEXTUREBUFFERRANGEPROC, TextureBufferRange);

View file

@ -161,6 +161,7 @@ void GLGSRender::init_buffers(bool skip_reading)
synchronize_buffers();
m_rtts_dirty = false;
zcull_surface_active = false;
const u16 clip_horizontal = rsx::method_registers.surface_clip_width();
const u16 clip_vertical = rsx::method_registers.surface_clip_height();
@ -245,6 +246,8 @@ void GLGSRender::init_buffers(bool skip_reading)
framebuffer_status_valid = draw_fbo.check();
if (!framebuffer_status_valid) return;
check_zcull_status(true);
draw_fbo.bind();
set_viewport();

View file

@ -184,6 +184,12 @@ namespace rsx
bool invalid_command_interrupt_raised = false;
bool in_begin_end = false;
bool conditional_render_test_failed = false;
bool conditional_render_enabled = false;
bool zcull_stats_enabled = false;
bool zcull_rendering_enabled = false;
bool zcull_pixel_cnt_enabled = false;
protected:
thread();
virtual ~thread();
@ -212,6 +218,11 @@ namespace rsx
virtual u64 timestamp() const;
virtual bool on_access_violation(u32 /*address*/, bool /*is_writing*/) { return false; }
//zcull
virtual void notify_zcull_info_changed() {}
virtual void clear_zcull_stats(u32 type) {}
virtual u32 get_zcull_stats(u32 type) { return UINT32_MAX; }
gsl::span<const gsl::byte> get_raw_index_array(const std::vector<std::pair<u32, u32> >& draw_indexed_clause) const;
gsl::span<const gsl::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector<std::pair<u32, u32>>& vertex_ranges) const;

View file

@ -5,7 +5,7 @@ namespace rsx
{
enum class vertex_base_type : u8
{
s1, ///< signed byte
s1, ///< signed normalized 16-bit int
f, ///< float
sf, ///< half float
ub, ///< unsigned byte interpreted as 0.f and 1.f

View file

@ -327,12 +327,10 @@ namespace rsx
}
}
void get_report(thread* rsx, u32 _reg, u32 arg)
vm::addr_t get_report_data_impl(u32 offset)
{
u8 type = arg >> 24;
u32 offset = arg & 0xffffff;
u32 location = 0;
blit_engine::context_dma report_dma = method_registers.context_dma_report();
u32 location;
switch (report_dma)
{
@ -340,33 +338,42 @@ namespace rsx
case blit_engine::context_dma::report_location_main: location = CELL_GCM_CONTEXT_DMA_REPORT_LOCATION_MAIN; break;
case blit_engine::context_dma::memory_host_buffer: location = CELL_GCM_CONTEXT_DMA_MEMORY_HOST_BUFFER; break;
default:
LOG_WARNING(RSX, "nv4097::get_report: bad report dma: 0x%x", (u8)report_dma);
return vm::addr_t(0);
}
return vm::cast(get_address(offset, location));
}
void get_report(thread* rsx, u32 _reg, u32 arg)
{
u8 type = arg >> 24;
u32 offset = arg & 0xffffff;
auto address_ptr = get_report_data_impl(offset);
if (!address_ptr)
{
LOG_ERROR(RSX, "Bad argument passed to NV4097_GET_REPORT, arg=0x%X", arg);
return;
}
vm::ps3::ptr<CellGcmReportData> result = vm::cast(get_address(offset, location));
result->timer = rsx->timestamp();
vm::ps3::ptr<CellGcmReportData> result = address_ptr;
switch (type)
{
case CELL_GCM_ZPASS_PIXEL_CNT:
// todo: actual zculling, here we just report max, which seems to be enough for most games, but causes them to render *everything*
result->value = 0xFFFFFFFF;
break;
case CELL_GCM_ZCULL_STATS:
case CELL_GCM_ZCULL_STATS1:
case CELL_GCM_ZCULL_STATS2:
case CELL_GCM_ZCULL_STATS3:
result->value = 0;
result->value = rsx->get_zcull_stats(type);
LOG_WARNING(RSX, "NV4097_GET_REPORT: Unimplemented type %d", type);
break;
default:
result->value = 0;
LOG_ERROR(RSX, "NV4097_GET_REPORT: Bad type %d", type);
break;
}
// This padding is needed to be set to 0, as games may use it for sync
result->timer = rsx->timestamp();
result->padding = 0;
}
@ -384,6 +391,58 @@ namespace rsx
LOG_ERROR(RSX, "NV4097_CLEAR_REPORT_VALUE: Bad type: %d", arg);
break;
}
rsx->clear_zcull_stats(arg);
}
void set_render_mode(thread* rsx, u32, u32 arg)
{
const u32 mode = arg >> 24;
switch (mode)
{
case 1:
rsx->conditional_render_enabled = false;
rsx->conditional_render_test_failed = false;
return;
case 2:
rsx->conditional_render_enabled = true;
break;
default:
rsx->conditional_render_enabled = false;
LOG_ERROR(RSX, "Unknown render mode %d", mode);
return;
}
const u32 offset = arg & 0xffffff;
auto address_ptr = get_report_data_impl(offset);
if (!address_ptr)
{
rsx->conditional_render_test_failed = false;
LOG_ERROR(RSX, "Bad argument passed to NV4097_SET_RENDER_ENABLE, arg=0x%X", arg);
return;
}
vm::ps3::ptr<CellGcmReportData> result = address_ptr;
rsx->conditional_render_test_failed = (result->value == 0);
}
void set_zcull_render_enable(thread* rsx, u32, u32 arg)
{
rsx->zcull_rendering_enabled = !!arg;
rsx->notify_zcull_info_changed();
}
void set_zcull_stats_enable(thread* rsx, u32, u32 arg)
{
rsx->zcull_stats_enabled = !!arg;
rsx->notify_zcull_info_changed();
}
void set_zcull_pixel_count_enable(thread* rsx, u32, u32 arg)
{
rsx->zcull_pixel_cnt_enabled = !!arg;
rsx->notify_zcull_info_changed();
}
void set_surface_dirty_bit(thread* rsx, u32 _reg, u32)
@ -1486,6 +1545,10 @@ namespace rsx
bind_range<NV4097_SET_TEXTURE_BORDER_COLOR, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_VERTEX_DATA_ARRAY_OFFSET, 1, 16, nv4097::set_vertex_array_dirty_bit>();
bind<NV4097_SET_INDEX_ARRAY_ADDRESS, nv4097::set_idbuf_dirty_bit>();
bind<NV4097_SET_RENDER_ENABLE, nv4097::set_render_mode>();
bind<NV4097_SET_ZCULL_EN, nv4097::set_zcull_render_enable>();
bind<NV4097_SET_ZCULL_STATS_ENABLE, nv4097::set_zcull_stats_enable>();
bind<NV4097_SET_ZPASS_PIXEL_COUNT_ENABLE, nv4097::set_zcull_pixel_count_enable>();
//NV308A
bind_range<NV308A_COLOR, 1, 256, nv308a::color>();

View file

@ -86,6 +86,7 @@ struct push_buffer_vertex_info
case vertex_base_type::ub:
case vertex_base_type::ub256:
return 1;
case vertex_base_type::s1:
case vertex_base_type::s32k:
return size / 2;
default:
@ -117,11 +118,11 @@ struct push_buffer_vertex_info
case vertex_base_type::f:
*(u32*)dst = se_storage<u32>::swap(arg);
break;
case vertex_base_type::s1:
case vertex_base_type::ub:
case vertex_base_type::ub256:
*(u32*)dst = arg;
break;
case vertex_base_type::s1:
case vertex_base_type::s32k:
((u16*)dst)[0] = se_storage<u16>::swap((u16)(arg & 0xffff));
((u16*)dst)[1] = se_storage<u16>::swap((u16)(arg >> 16));