From 81fcadac3804eff064cf7d4d6f25f5e509208115 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Tue, 19 Jan 2016 18:43:16 +0100 Subject: [PATCH] gl: Use shared surface store (updated) Update surface cache optimization to master; minor fixes Fix mixed EOL Remove unused include; change null bindings to 0 --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 453 +------------------------ rpcs3/Emu/RSX/GL/GLGSRender.h | 8 +- rpcs3/Emu/RSX/GL/gl_helpers.h | 1 + rpcs3/Emu/RSX/GL/gl_render_targets.cpp | 349 +++++++++++++++++++ rpcs3/Emu/RSX/GL/gl_render_targets.h | 196 +++++++++++ rpcs3/Emu/RSX/GL/gl_texture_cache.h | 54 +-- rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp | 9 +- rpcs3/GLGSRender.vcxproj | 2 + rpcs3/GLGSRender.vcxproj.filters | 3 + 9 files changed, 594 insertions(+), 481 deletions(-) create mode 100644 rpcs3/Emu/RSX/GL/gl_render_targets.cpp create mode 100644 rpcs3/Emu/RSX/GL/gl_render_targets.h diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 002cfc85d4..e517efb382 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -21,16 +21,6 @@ namespace throw EXCEPTION("Unknow depth format"); } - u8 get_pixel_size(rsx::surface_depth_format format) - { - switch (format) - { - case rsx::surface_depth_format::z16: return 2; - case rsx::surface_depth_format::z24s8: return 4; - } - throw EXCEPTION("Unknow depth format"); - } - u32 to_gl_internal_type(rsx::vertex_base_type type, u8 size) { /** @@ -417,14 +407,14 @@ void GLGSRender::end() if (!textures[i].enabled()) { glActiveTexture(GL_TEXTURE0 + i); - glBindTexture(target, NULL); + glBindTexture(target, 0); glProgramUniform1i(m_program->id(), location, i); continue; } m_gl_textures[i].set_target(target); - __glcheck m_gl_texture_cache.upload_texture(i, textures[i], m_gl_textures[i]); + __glcheck m_gl_texture_cache.upload_texture(i, textures[i], m_gl_textures[i], m_rtts); glProgramUniform1i(m_program->id(), location, i); } } @@ -500,7 +490,7 @@ void GLGSRender::end() if (!vertex_info.size) // disabled, bind a null sampler { glActiveTexture(GL_TEXTURE0 + index + rsx::limits::textures_count); - glBindTexture(GL_TEXTURE_BUFFER, NULL); + glBindTexture(GL_TEXTURE_BUFFER, 0); glProgramUniform1i(m_program->id(), location, index + rsx::limits::textures_count); continue; } @@ -567,7 +557,7 @@ void GLGSRender::end() if (!enabled) { glActiveTexture(GL_TEXTURE0 + index + rsx::limits::textures_count); - glBindTexture(GL_TEXTURE_BUFFER, NULL); + glBindTexture(GL_TEXTURE_BUFFER, 0); glProgramUniform1i(m_program->id(), location, index + rsx::limits::textures_count); continue; } @@ -664,7 +654,7 @@ void GLGSRender::end() else { glActiveTexture(GL_TEXTURE0 + index + rsx::limits::textures_count); - glBindTexture(GL_TEXTURE_BUFFER, NULL); + glBindTexture(GL_TEXTURE_BUFFER, 0); glProgramUniform1i(m_program->id(), location, index + rsx::limits::textures_count); continue; } @@ -790,12 +780,6 @@ void GLGSRender::on_exit() if (draw_fbo) draw_fbo.remove(); - for (auto &tex : m_draw_tex_color) - if (tex) tex.remove(); - - if (m_draw_tex_depth_stencil) - m_draw_tex_depth_stencil.remove(); - if (m_flip_fbo) m_flip_fbo.remove(); @@ -987,433 +971,6 @@ bool GLGSRender::load_program() return true; } -struct color_swizzle -{ - gl::texture::channel a = gl::texture::channel::a; - gl::texture::channel r = gl::texture::channel::r; - gl::texture::channel g = gl::texture::channel::g; - gl::texture::channel b = gl::texture::channel::b; - - color_swizzle() = default; - color_swizzle(gl::texture::channel a, gl::texture::channel r, gl::texture::channel g, gl::texture::channel b) - : a(a), r(r), g(g), b(b) - { - } -}; - -struct color_format -{ - gl::texture::type type; - gl::texture::format format; - bool swap_bytes; - int channel_count; - int channel_size; - color_swizzle swizzle; -}; - -color_format surface_color_format_to_gl(rsx::surface_color_format color_format) -{ - //color format - switch (color_format) - { - case rsx::surface_color_format::r5g6b5: - return{ gl::texture::type::ushort_5_6_5, gl::texture::format::bgr, false, 3, 2 }; - - case rsx::surface_color_format::a8r8g8b8: - return{ gl::texture::type::uint_8_8_8_8, gl::texture::format::bgra, false, 4, 1 }; - - case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: - return{ gl::texture::type::uint_8_8_8_8, gl::texture::format::bgra, false, 4, 1, - { gl::texture::channel::one, gl::texture::channel::r, gl::texture::channel::g, gl::texture::channel::b } }; - - case rsx::surface_color_format::w16z16y16x16: - return{ gl::texture::type::f16, gl::texture::format::rgba, true, 4, 2 }; - - case rsx::surface_color_format::w32z32y32x32: - return{ gl::texture::type::f32, gl::texture::format::rgba, true, 4, 4 }; - - case rsx::surface_color_format::b8: - case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: - case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: - case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: - case rsx::surface_color_format::g8b8: - case rsx::surface_color_format::x32: - case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: - case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: - case rsx::surface_color_format::a8b8g8r8: - default: - LOG_ERROR(RSX, "Surface color buffer: Unsupported surface color format (0x%x)", color_format); - return{ gl::texture::type::uint_8_8_8_8, gl::texture::format::bgra, false, 4, 1 }; - } -} - -std::pair surface_depth_format_to_gl(rsx::surface_depth_format depth_format) -{ - switch (depth_format) - { - case rsx::surface_depth_format::z16: - return std::make_pair(gl::texture::type::ushort, gl::texture::format::depth); - - default: - LOG_ERROR(RSX, "Surface depth buffer: Unsupported surface depth format (0x%x)", depth_format); - case rsx::surface_depth_format::z24s8: - return std::make_pair(gl::texture::type::uint_24_8, gl::texture::format::depth_stencil); - //return std::make_pair(gl::texture::type::f32, gl::texture::format::depth); - } -} - -void GLGSRender::init_buffers(bool skip_reading) -{ - u32 surface_format = rsx::method_registers[NV4097_SET_SURFACE_FORMAT]; - - u32 clip_horizontal = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL]; - u32 clip_vertical = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL]; - - u32 clip_width = clip_horizontal >> 16; - u32 clip_height = clip_vertical >> 16; - u32 clip_x = clip_horizontal; - u32 clip_y = clip_vertical; - - if (!draw_fbo || m_surface.format != surface_format) - { - m_surface.unpack(surface_format); - m_surface.width = clip_width; - m_surface.height = clip_height; - - LOG_WARNING(RSX, "surface: %dx%d", clip_width, clip_height); - - draw_fbo.recreate(); - m_draw_tex_depth_stencil.recreate(gl::texture::target::texture2D); - - auto format = surface_color_format_to_gl(m_surface.color_format); - - for (int i = 0; i < rsx::limits::color_buffers_count; ++i) - { - m_draw_tex_color[i].recreate(gl::texture::target::texture2D); - __glcheck m_draw_tex_color[i].config() - .size({ (int)m_surface.width, (int)m_surface.height }) - .type(format.type) - .format(format.format) - .swizzle(format.swizzle.r, format.swizzle.g, format.swizzle.b, format.swizzle.a); - - __glcheck m_draw_tex_color[i].pixel_pack_settings().swap_bytes(format.swap_bytes).aligment(1); - __glcheck m_draw_tex_color[i].pixel_unpack_settings().swap_bytes(format.swap_bytes).aligment(1); - - __glcheck draw_fbo.color[i] = m_draw_tex_color[i]; - __glcheck draw_fbo.check(); - } - - switch (m_surface.depth_format) - { - case rsx::surface_depth_format::z16: - { - __glcheck m_draw_tex_depth_stencil.config() - .size({ (int)m_surface.width, (int)m_surface.height }) - .type(gl::texture::type::ushort) - .format(gl::texture::format::depth) - .internal_format(gl::texture::internal_format::depth16); - - __glcheck draw_fbo.depth = m_draw_tex_depth_stencil; - break; - } - - case rsx::surface_depth_format::z24s8: - { - __glcheck m_draw_tex_depth_stencil.config() - .size({ (int)m_surface.width, (int)m_surface.height }) - .type(gl::texture::type::uint_24_8) - .format(gl::texture::format::depth_stencil) - .internal_format(gl::texture::internal_format::depth24_stencil8); - - __glcheck draw_fbo.depth_stencil = m_draw_tex_depth_stencil; - break; - } - - default: - { - LOG_ERROR(RSX, "Bad depth format! (%d)", m_surface.depth_format); - assert(0); - break; - } - } - - __glcheck m_draw_tex_depth_stencil.pixel_pack_settings().aligment(1); - __glcheck m_draw_tex_depth_stencil.pixel_unpack_settings().aligment(1); - } - - if (!skip_reading) - { - read_buffers(); - } - - set_viewport(); - - switch (rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])) - { - case rsx::surface_target::none: break; - - case rsx::surface_target::surface_a: - __glcheck draw_fbo.draw_buffer(draw_fbo.color[0]); - break; - - case rsx::surface_target::surface_b: - __glcheck draw_fbo.draw_buffer(draw_fbo.color[1] ); - break; - - case rsx::surface_target::surfaces_a_b: - __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1] }); - break; - - case rsx::surface_target::surfaces_a_b_c: - __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2] }); - break; - - case rsx::surface_target::surfaces_a_b_c_d: - __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2], draw_fbo.color[3] }); - break; - - default: - LOG_ERROR(RSX, "Bad surface color target: %d", rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]); - break; - } -} - -static const u32 mr_color_offset[rsx::limits::color_buffers_count] = -{ - NV4097_SET_SURFACE_COLOR_AOFFSET, - NV4097_SET_SURFACE_COLOR_BOFFSET, - NV4097_SET_SURFACE_COLOR_COFFSET, - NV4097_SET_SURFACE_COLOR_DOFFSET -}; - -static const u32 mr_color_dma[rsx::limits::color_buffers_count] = -{ - NV4097_SET_CONTEXT_DMA_COLOR_A, - NV4097_SET_CONTEXT_DMA_COLOR_B, - NV4097_SET_CONTEXT_DMA_COLOR_C, - NV4097_SET_CONTEXT_DMA_COLOR_D -}; - -static const u32 mr_color_pitch[rsx::limits::color_buffers_count] = -{ - NV4097_SET_SURFACE_PITCH_A, - NV4097_SET_SURFACE_PITCH_B, - NV4097_SET_SURFACE_PITCH_C, - NV4097_SET_SURFACE_PITCH_D -}; - -void GLGSRender::read_buffers() -{ - if (!draw_fbo) - return; - - glDisable(GL_STENCIL_TEST); - - if (rpcs3::state.config.rsx.opengl.read_color_buffers) - { - auto color_format = surface_color_format_to_gl(m_surface.color_format); - - auto read_color_buffers = [&](int index, int count) - { - u32 width = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; - u32 height = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; - - /** - * Read color buffers is useless if write buffers is enabled. I havent encountered a case where it is necessary - * since the output is usually fed back into the pipeline as a fragment shader input texture - * It is included here for completeness - */ - for (int i = index; i < index + count; ++i) - { - u32 offset = rsx::method_registers[mr_color_offset[i]]; - u32 location = rsx::method_registers[mr_color_dma[i]]; - u32 pitch = rsx::method_registers[mr_color_pitch[i]]; - - if (pitch <= 64) - continue; - - rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf); - u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0)); - - bool success = m_gl_texture_cache.explicit_writeback(m_draw_tex_color[i], texaddr, pitch); - - //Fall back to slower methods if the image could not be fetched. - if (!success) - { - if (!color_buffer.tile) - { - m_draw_tex_color[i].copy_from(color_buffer.ptr, color_format.format, color_format.type); - } - else - { - u32 range = pitch * height; - m_gl_texture_cache.remove_in_range(texaddr, range); - - std::unique_ptr buffer(new u8[pitch * height]); - color_buffer.read(buffer.get(), width, height, pitch); - - __glcheck m_draw_tex_color[i].copy_from(buffer.get(), color_format.format, color_format.type); - } - } - } - }; - - switch (rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])) - { - case rsx::surface_target::none: - break; - - case rsx::surface_target::surface_a: - read_color_buffers(0, 1); - break; - - case rsx::surface_target::surface_b: - read_color_buffers(1, 1); - break; - - case rsx::surface_target::surfaces_a_b: - read_color_buffers(0, 2); - break; - - case rsx::surface_target::surfaces_a_b_c: - read_color_buffers(0, 3); - break; - - case rsx::surface_target::surfaces_a_b_c_d: - read_color_buffers(0, 4); - break; - } - } - - if (rpcs3::state.config.rsx.opengl.read_depth_buffer) - { - //TODO: use pitch - u32 pitch = rsx::method_registers[NV4097_SET_SURFACE_PITCH_Z]; - - if (pitch <= 64) - return; - - u32 depth_address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]); - bool in_cache = m_gl_texture_cache.explicit_writeback(m_draw_tex_depth_stencil, depth_address, pitch); - - if (in_cache) - return; - - //Read failed. Fall back to slow s/w path... - - auto depth_format = surface_depth_format_to_gl(m_surface.depth_format); - int pixel_size = get_pixel_size(m_surface.depth_format); - gl::buffer pbo_depth; - - __glcheck pbo_depth.create(m_surface.width * m_surface.height * pixel_size); - __glcheck pbo_depth.map([&](GLubyte* pixels) - { - u32 depth_address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]); - - if (m_surface.depth_format == rsx::surface_depth_format::z16) - { - u16 *dst = (u16*)pixels; - const be_t* src = vm::ps3::_ptr(depth_address); - for (int i = 0, end = m_draw_tex_depth_stencil.width() * m_draw_tex_depth_stencil.height(); i < end; ++i) - { - dst[i] = src[i]; - } - } - else - { - u32 *dst = (u32*)pixels; - const be_t* src = vm::ps3::_ptr(depth_address); - for (int i = 0, end = m_draw_tex_depth_stencil.width() * m_draw_tex_depth_stencil.height(); i < end; ++i) - { - dst[i] = src[i]; - } - } - }, gl::buffer::access::write); - - __glcheck m_draw_tex_depth_stencil.copy_from(pbo_depth, depth_format.second, depth_format.first); - } -} - -void GLGSRender::write_buffers() -{ - if (!draw_fbo) - return; - - if (rpcs3::state.config.rsx.opengl.write_color_buffers) - { - auto color_format = surface_color_format_to_gl(m_surface.color_format); - - auto write_color_buffers = [&](int index, int count) - { - u32 width = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; - u32 height = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; - - for (int i = index; i < index + count; ++i) - { - u32 offset = rsx::method_registers[mr_color_offset[i]]; - u32 location = rsx::method_registers[mr_color_dma[i]]; - u32 pitch = rsx::method_registers[mr_color_pitch[i]]; - - if (pitch <= 64) - continue; - - rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf); - u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0)); - u32 range = pitch * height; - - /**Even tiles are loaded as whole textures during read_buffers from testing. - * Need further evaluation to determine correct behavior. Separate paths for both show no difference, - * but using the GPU to perform the caching is many times faster. - */ - __glcheck m_gl_texture_cache.save_render_target(texaddr, range, m_draw_tex_color[i]); - } - }; - - switch (rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])) - { - case rsx::surface_target::none: - break; - - case rsx::surface_target::surface_a: - write_color_buffers(0, 1); - break; - - case rsx::surface_target::surface_b: - write_color_buffers(1, 1); - break; - - case rsx::surface_target::surfaces_a_b: - write_color_buffers(0, 2); - break; - - case rsx::surface_target::surfaces_a_b_c: - write_color_buffers(0, 3); - break; - - case rsx::surface_target::surfaces_a_b_c_d: - write_color_buffers(0, 4); - break; - } - } - - if (rpcs3::state.config.rsx.opengl.write_depth_buffer) - { - //TODO: use pitch - u32 pitch = rsx::method_registers[NV4097_SET_SURFACE_PITCH_Z]; - - if (pitch <= 64) - return; - - auto depth_format = surface_depth_format_to_gl(m_surface.depth_format); - u32 depth_address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]); - u32 range = m_draw_tex_depth_stencil.width() * m_draw_tex_depth_stencil.height() * 2; - - if (m_surface.depth_format != rsx::surface_depth_format::z16) range *= 2; - - m_gl_texture_cache.save_render_target(depth_address, range, m_draw_tex_depth_stencil); - } -} - void GLGSRender::flip(int buffer) { //LOG_NOTICE(Log::RSX, "flip(%d)", buffer); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 4a7c150657..81b0b04d33 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -3,6 +3,7 @@ #include "gl_helpers.h" #include "rsx_gl_texture.h" #include "gl_texture_cache.h" +#include "gl_render_targets.h" #define RSX_DEBUG 1 @@ -22,6 +23,7 @@ private: gl::glsl::program *m_program; rsx::surface_info m_surface; + gl_render_targets m_rtts; struct texture_buffer_pair { @@ -38,9 +40,6 @@ public: private: GLProgramBuffer m_prog_buffer; - gl::texture m_draw_tex_color[rsx::limits::color_buffers_count]; - gl::texture m_draw_tex_depth_stencil; - //buffer gl::fbo m_flip_fbo; gl::texture m_flip_tex_color; @@ -78,4 +77,7 @@ protected: u64 timestamp() const override; bool on_access_violation(u32 address, bool is_writing) override; + + virtual std::array, 4> copy_render_targets_to_memory() override; + virtual std::array, 2> copy_depth_stencil_buffer_to_memory() override; }; diff --git a/rpcs3/Emu/RSX/GL/gl_helpers.h b/rpcs3/Emu/RSX/GL/gl_helpers.h index 73b78b77a7..af2f38970b 100644 --- a/rpcs3/Emu/RSX/GL/gl_helpers.h +++ b/rpcs3/Emu/RSX/GL/gl_helpers.h @@ -387,6 +387,7 @@ namespace gl public: buffer() = default; + buffer(const buffer&) = delete; buffer(GLuint id) { diff --git a/rpcs3/Emu/RSX/GL/gl_render_targets.cpp b/rpcs3/Emu/RSX/GL/gl_render_targets.cpp new file mode 100644 index 0000000000..5adae235f9 --- /dev/null +++ b/rpcs3/Emu/RSX/GL/gl_render_targets.cpp @@ -0,0 +1,349 @@ +#include "stdafx.h" +#include "../rsx_methods.h" +#include "GLGSRender.h" +#include "Emu/state.h" + +color_format rsx::internals::surface_color_format_to_gl(rsx::surface_color_format color_format) +{ + //color format + switch (color_format) + { + case rsx::surface_color_format::r5g6b5: + return{ ::gl::texture::type::ushort_5_6_5, ::gl::texture::format::bgr, false, 3, 2 }; + + case rsx::surface_color_format::a8r8g8b8: + return{ ::gl::texture::type::uint_8_8_8_8, ::gl::texture::format::bgra, false, 4, 1 }; + + case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: + return{ ::gl::texture::type::uint_8_8_8_8, ::gl::texture::format::bgra, false, 4, 1, + { ::gl::texture::channel::one, ::gl::texture::channel::r, ::gl::texture::channel::g, ::gl::texture::channel::b } }; + + case rsx::surface_color_format::w16z16y16x16: + return{ ::gl::texture::type::f16, ::gl::texture::format::rgba, true, 4, 2 }; + + case rsx::surface_color_format::w32z32y32x32: + return{ ::gl::texture::type::f32, ::gl::texture::format::rgba, true, 4, 4 }; + + case rsx::surface_color_format::b8: + case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: + case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: + case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: + case rsx::surface_color_format::g8b8: + case rsx::surface_color_format::x32: + case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: + case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: + case rsx::surface_color_format::a8b8g8r8: + default: + LOG_ERROR(RSX, "Surface color buffer: Unsupported surface color format (0x%x)", color_format); + return{ ::gl::texture::type::uint_8_8_8_8, ::gl::texture::format::bgra, false, 4, 1 }; + } +} + +depth_format rsx::internals::surface_depth_format_to_gl(rsx::surface_depth_format depth_format) +{ + switch (depth_format) + { + case rsx::surface_depth_format::z16: + return{ ::gl::texture::type::ushort, ::gl::texture::format::depth, ::gl::texture::internal_format::depth16 }; + + default: + LOG_ERROR(RSX, "Surface depth buffer: Unsupported surface depth format (0x%x)", depth_format); + case rsx::surface_depth_format::z24s8: + return{ ::gl::texture::type::uint_24_8, ::gl::texture::format::depth_stencil, ::gl::texture::internal_format::depth24_stencil8 }; + } +} + +u8 rsx::internals::get_pixel_size(rsx::surface_depth_format format) +{ + switch (format) + { + case rsx::surface_depth_format::z16: return 2; + case rsx::surface_depth_format::z24s8: return 4; + } + throw EXCEPTION("Unknow depth format"); +} + + +void GLGSRender::init_buffers(bool skip_reading) +{ + u32 surface_format = rsx::method_registers[NV4097_SET_SURFACE_FORMAT]; + + u32 clip_horizontal = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL]; + u32 clip_vertical = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL]; + + + set_viewport(); + + if (draw_fbo && !m_rtts_dirty) + return; + m_rtts_dirty = false; + + m_rtts.prepare_render_target(nullptr, surface_format, clip_horizontal, clip_vertical, rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]), + get_color_surface_addresses(), get_zeta_surface_address()); + + draw_fbo.recreate(); + + for (int i = 0; i < rsx::limits::color_buffers_count; ++i) + { + if (std::get<0>(m_rtts.m_bound_render_targets[i]) != 0) + __glcheck draw_fbo.color[i] = *std::get<1>(m_rtts.m_bound_render_targets[i]); + } + if (std::get<0>(m_rtts.m_bound_depth_stencil) != 0) + __glcheck draw_fbo.depth = *std::get<1>(m_rtts.m_bound_depth_stencil); + __glcheck draw_fbo.check(); + + + switch (rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])) + { + case rsx::surface_target::none: break; + + case rsx::surface_target::surface_a: + __glcheck draw_fbo.draw_buffer(draw_fbo.color[0]); + break; + + case rsx::surface_target::surface_b: + __glcheck draw_fbo.draw_buffer(draw_fbo.color[1]); + break; + + case rsx::surface_target::surfaces_a_b: + __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1] }); + break; + + case rsx::surface_target::surfaces_a_b_c: + __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2] }); + break; + + case rsx::surface_target::surfaces_a_b_c_d: + __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2], draw_fbo.color[3] }); + break; + } +} + +std::array, 4> GLGSRender::copy_render_targets_to_memory() +{ + int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; + int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; + rsx::surface_info surface = {}; + surface.unpack(rsx::method_registers[NV4097_SET_SURFACE_FORMAT]); + return m_rtts.get_render_targets_data(surface.color_format, clip_w, clip_h); +} + +std::array, 2> GLGSRender::copy_depth_stencil_buffer_to_memory() +{ + int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; + int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; + rsx::surface_info surface = {}; + surface.unpack(rsx::method_registers[NV4097_SET_SURFACE_FORMAT]); + return m_rtts.get_depth_stencil_data(surface.depth_format, clip_w, clip_h); +} + +void GLGSRender::read_buffers() +{ + if (!draw_fbo) + return; + + glDisable(GL_STENCIL_TEST); + + if (rpcs3::state.config.rsx.opengl.read_color_buffers) + { + auto color_format = rsx::internals::surface_color_format_to_gl(m_surface.color_format); + + auto read_color_buffers = [&](int index, int count) + { + u32 width = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; + u32 height = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; + + for (int i = index; i < index + count; ++i) + { + u32 offset = rsx::method_registers[rsx::internals::mr_color_offset[i]]; + u32 location = rsx::method_registers[rsx::internals::mr_color_dma[i]]; + u32 pitch = rsx::method_registers[rsx::internals::mr_color_pitch[i]]; + + if (pitch <= 64) + continue; + + rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf); + u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0)); + + bool success = m_gl_texture_cache.explicit_writeback((*std::get<1>(m_rtts.m_bound_render_targets[i])), texaddr, pitch); + + //Fall back to slower methods if the image could not be fetched from cache. + if (!success) + { + if (!color_buffer.tile) + { + __glcheck std::get<1>(m_rtts.m_bound_render_targets[i])->copy_from(color_buffer.ptr, color_format.format, color_format.type); + } + else + { + u32 range = pitch * height; + m_gl_texture_cache.remove_in_range(texaddr, range); + + std::unique_ptr buffer(new u8[pitch * height]); + color_buffer.read(buffer.get(), width, height, pitch); + + __glcheck std::get<1>(m_rtts.m_bound_render_targets[i])->copy_from(buffer.get(), color_format.format, color_format.type); + } + } + } + }; + + switch (rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])) + { + case rsx::surface_target::none: + break; + + case rsx::surface_target::surface_a: + read_color_buffers(0, 1); + break; + + case rsx::surface_target::surface_b: + read_color_buffers(1, 1); + break; + + case rsx::surface_target::surfaces_a_b: + read_color_buffers(0, 2); + break; + + case rsx::surface_target::surfaces_a_b_c: + read_color_buffers(0, 3); + break; + + case rsx::surface_target::surfaces_a_b_c_d: + read_color_buffers(0, 4); + break; + } + } + + if (rpcs3::state.config.rsx.opengl.read_depth_buffer) + { + //TODO: use pitch + u32 pitch = rsx::method_registers[NV4097_SET_SURFACE_PITCH_Z]; + + if (pitch <= 64) + return; + + u32 depth_address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]); + bool in_cache = m_gl_texture_cache.explicit_writeback((*std::get<1>(m_rtts.m_bound_depth_stencil)), depth_address, pitch); + + if (in_cache) + return; + + //Read failed. Fall back to slow s/w path... + + auto depth_format = rsx::internals::surface_depth_format_to_gl(m_surface.depth_format); + int pixel_size = rsx::internals::get_pixel_size(m_surface.depth_format); + gl::buffer pbo_depth; + + __glcheck pbo_depth.create(m_surface.width * m_surface.height * pixel_size); + __glcheck pbo_depth.map([&](GLubyte* pixels) + { + u32 depth_address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]); + + if (m_surface.depth_format == rsx::surface_depth_format::z16) + { + u16 *dst = (u16*)pixels; + const be_t* src = vm::ps3::_ptr(depth_address); + for (int i = 0, end = std::get<1>(m_rtts.m_bound_depth_stencil)->width() * std::get<1>(m_rtts.m_bound_depth_stencil)->height(); i < end; ++i) + { + dst[i] = src[i]; + } + } + else + { + u32 *dst = (u32*)pixels; + const be_t* src = vm::ps3::_ptr(depth_address); + for (int i = 0, end = std::get<1>(m_rtts.m_bound_depth_stencil)->width() * std::get<1>(m_rtts.m_bound_depth_stencil)->height(); i < end; ++i) + { + dst[i] = src[i]; + } + } + }, gl::buffer::access::write); + + __glcheck std::get<1>(m_rtts.m_bound_depth_stencil)->copy_from(pbo_depth, depth_format.format, depth_format.type); + } +} + +void GLGSRender::write_buffers() +{ + if (!draw_fbo) + return; + + //TODO: Detect when the data is actually being used by cell and issue download command on-demand (mark as not present?) + //Should also mark cached resources as dirty so that read buffers works out-of-the-box without modification + + if (rpcs3::state.config.rsx.opengl.write_color_buffers) + { + auto color_format = rsx::internals::surface_color_format_to_gl(m_surface.color_format); + + auto write_color_buffers = [&](int index, int count) + { + u32 width = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; + u32 height = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; + + for (int i = index; i < index + count; ++i) + { + u32 offset = rsx::method_registers[rsx::internals::mr_color_offset[i]]; + u32 location = rsx::method_registers[rsx::internals::mr_color_dma[i]]; + u32 pitch = rsx::method_registers[rsx::internals::mr_color_pitch[i]]; + + if (pitch <= 64) + continue; + + rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf); + u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0)); + u32 range = pitch * height; + + /**Even tiles are loaded as whole textures during read_buffers from testing. + * Need further evaluation to determine correct behavior. Separate paths for both show no difference, + * but using the GPU to perform the caching is many times faster. + */ + + __glcheck m_gl_texture_cache.save_render_target(texaddr, range, (*std::get<1>(m_rtts.m_bound_render_targets[i]))); + } + }; + + switch (rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])) + { + case rsx::surface_target::none: + break; + + case rsx::surface_target::surface_a: + write_color_buffers(0, 1); + break; + + case rsx::surface_target::surface_b: + write_color_buffers(1, 1); + break; + + case rsx::surface_target::surfaces_a_b: + write_color_buffers(0, 2); + break; + + case rsx::surface_target::surfaces_a_b_c: + write_color_buffers(0, 3); + break; + + case rsx::surface_target::surfaces_a_b_c_d: + write_color_buffers(0, 4); + break; + } + } + + if (rpcs3::state.config.rsx.opengl.write_depth_buffer) + { + //TODO: use pitch + u32 pitch = rsx::method_registers[NV4097_SET_SURFACE_PITCH_Z]; + + if (pitch <= 64) + return; + + auto depth_format = rsx::internals::surface_depth_format_to_gl(m_surface.depth_format); + u32 depth_address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]); + u32 range = std::get<1>(m_rtts.m_bound_depth_stencil)->width() * std::get<1>(m_rtts.m_bound_depth_stencil)->height() * 2; + + if (m_surface.depth_format != rsx::surface_depth_format::z16) range *= 2; + + m_gl_texture_cache.save_render_target(depth_address, range, (*std::get<1>(m_rtts.m_bound_depth_stencil))); + } +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/GL/gl_render_targets.h b/rpcs3/Emu/RSX/GL/gl_render_targets.h new file mode 100644 index 0000000000..e2d24c15be --- /dev/null +++ b/rpcs3/Emu/RSX/GL/gl_render_targets.h @@ -0,0 +1,196 @@ +#pragma once +#include "../Common/surface_store.h" +#include "gl_helpers.h" +#include "stdafx.h" +#include "../RSXThread.h" + +struct color_swizzle +{ + gl::texture::channel a = gl::texture::channel::a; + gl::texture::channel r = gl::texture::channel::r; + gl::texture::channel g = gl::texture::channel::g; + gl::texture::channel b = gl::texture::channel::b; + + color_swizzle() = default; + color_swizzle(gl::texture::channel a, gl::texture::channel r, gl::texture::channel g, gl::texture::channel b) + : a(a), r(r), g(g), b(b) + { + } +}; + +struct color_format +{ + gl::texture::type type; + gl::texture::format format; + bool swap_bytes; + int channel_count; + int channel_size; + color_swizzle swizzle; +}; + +struct depth_format +{ + gl::texture::type type; + gl::texture::format format; + gl::texture::internal_format internal_format; +}; + +namespace rsx +{ + namespace internals + { + color_format surface_color_format_to_gl(rsx::surface_color_format color_format); + depth_format surface_depth_format_to_gl(rsx::surface_depth_format depth_format); + u8 get_pixel_size(rsx::surface_depth_format format); + + const u32 mr_color_offset[rsx::limits::color_buffers_count] = + { + NV4097_SET_SURFACE_COLOR_AOFFSET, + NV4097_SET_SURFACE_COLOR_BOFFSET, + NV4097_SET_SURFACE_COLOR_COFFSET, + NV4097_SET_SURFACE_COLOR_DOFFSET + }; + + const u32 mr_color_dma[rsx::limits::color_buffers_count] = + { + NV4097_SET_CONTEXT_DMA_COLOR_A, + NV4097_SET_CONTEXT_DMA_COLOR_B, + NV4097_SET_CONTEXT_DMA_COLOR_C, + NV4097_SET_CONTEXT_DMA_COLOR_D + }; + + const u32 mr_color_pitch[rsx::limits::color_buffers_count] = + { + NV4097_SET_SURFACE_PITCH_A, + NV4097_SET_SURFACE_PITCH_B, + NV4097_SET_SURFACE_PITCH_C, + NV4097_SET_SURFACE_PITCH_D + }; + } +} + +struct gl_render_target_traits +{ + using surface_storage_type = std::unique_ptr; + using surface_type = gl::texture*; + using command_list_type = void*; + using download_buffer_object = std::vector; + + static + std::unique_ptr create_new_surface( + u32 address, + rsx::surface_color_format surface_color_format, + size_t width, + size_t height + ) + { + std::unique_ptr result(new gl::texture()); + + auto format = rsx::internals::surface_color_format_to_gl(surface_color_format); + result->recreate(gl::texture::target::texture2D); + + __glcheck result->config() + .size({ (int)width, (int)height }) + .type(format.type) + .format(format.format) + .swizzle(format.swizzle.r, format.swizzle.g, format.swizzle.b, format.swizzle.a) + .apply(); + + __glcheck result->pixel_pack_settings().swap_bytes(format.swap_bytes).aligment(1); + __glcheck result->pixel_unpack_settings().swap_bytes(format.swap_bytes).aligment(1); + + return result; + } + + static + std::unique_ptr create_new_surface( + u32 address, + rsx::surface_depth_format surface_depth_format, + size_t width, + size_t height + ) + { + std::unique_ptr result(new gl::texture()); + + auto format = rsx::internals::surface_depth_format_to_gl(surface_depth_format); + result->recreate(gl::texture::target::texture2D); + + __glcheck result->config() + .size({ (int)width, (int)height }) + .type(format.type) + .format(format.format) + .internal_format(format.internal_format) + .apply(); + + __glcheck result->pixel_pack_settings().aligment(1); + __glcheck result->pixel_unpack_settings().aligment(1); + + return result; + } + + static void prepare_rtt_for_drawing(void *, gl::texture*) {} + static void prepare_rtt_for_sampling(void *, gl::texture*) {} + static void prepare_ds_for_drawing(void *, gl::texture*) {} + static void prepare_ds_for_sampling(void *, gl::texture*) {} + + static + bool rtt_has_format_width_height(const std::unique_ptr &rtt, rsx::surface_color_format surface_color_format, size_t width, size_t height) + { + // TODO: check format + return rtt->width() == width && rtt->height() == height; + } + + static + bool ds_has_format_width_height(const std::unique_ptr &rtt, rsx::surface_depth_format surface_depth_stencil_format, size_t width, size_t height) + { + // TODO: check format + return rtt->width() == width && rtt->height() == height; + } + + // Note : pbo breaks fbo here so use classic texture copy + static std::vector issue_download_command(gl::texture* color_buffer, rsx::surface_color_format color_format, size_t width, size_t height) + { + auto pixel_format = rsx::internals::surface_color_format_to_gl(color_format); + std::vector result(width * height * pixel_format.channel_count * pixel_format.channel_size); + color_buffer->bind(); + glGetTexImage(GL_TEXTURE_2D, 0, (GLenum)pixel_format.format, (GLenum)pixel_format.type, result.data()); + return result; + } + + static std::vector issue_depth_download_command(gl::texture* depth_stencil_buffer, rsx::surface_depth_format depth_format, size_t width, size_t height) + { + std::vector result(width * height * 4); + + auto pixel_format = rsx::internals::surface_depth_format_to_gl(depth_format); + depth_stencil_buffer->bind(); + glGetTexImage(GL_TEXTURE_2D, 0, (GLenum)pixel_format.format, (GLenum)pixel_format.type, result.data()); + return result; + } + + static std::vector issue_stencil_download_command(gl::texture* depth_stencil_buffer, size_t width, size_t height) + { + std::vector result(width * height * 4); + return result; + } + + static + gsl::span map_downloaded_buffer(const std::vector &buffer) + { + return{ reinterpret_cast(buffer.data()), gsl::narrow(buffer.size()) }; + } + + static + void unmap_downloaded_buffer(const std::vector &) + { + } + + static gl::texture* get(const std::unique_ptr &in) + { + return in.get(); + } +}; + + +struct gl_render_targets : public rsx::surface_store +{ +}; diff --git a/rpcs3/Emu/RSX/GL/gl_texture_cache.h b/rpcs3/Emu/RSX/GL/gl_texture_cache.h index fa1ea9f8d2..442d85a8a4 100644 --- a/rpcs3/Emu/RSX/GL/gl_texture_cache.h +++ b/rpcs3/Emu/RSX/GL/gl_texture_cache.h @@ -10,6 +10,7 @@ #include #include "GLGSRender.h" +#include "gl_render_targets.h" #include "../Common/TextureUtils.h" #include @@ -423,42 +424,48 @@ namespace gl } } - void upload_texture(int index, rsx::texture &tex, rsx::gl::texture &gl_texture) + void upload_texture(int index, rsx::texture &tex, rsx::gl::texture &gl_texture, gl_render_targets &m_rtts) { const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); const u32 range = (u32)get_texture_size(tex); + glActiveTexture(GL_TEXTURE0 + index); + + /** + * Give precedence to rtt data obtained through read/write buffers + */ cached_rtt *rtt = find_cached_rtt(texaddr, range); if (rtt && !rtt->is_dirty) { - if (!rtt->is_depth) - { - u32 real_id = gl_texture.id(); + u32 real_id = gl_texture.id(); - glActiveTexture(GL_TEXTURE0 + index); - gl_texture.set_id(rtt->copy_glid); - gl_texture.bind(); + gl_texture.set_id(rtt->copy_glid); + gl_texture.bind(); - gl_texture.set_id(real_id); - } - else - { - LOG_NOTICE(RSX, "Depth RTT found from 0x%X, Trying to upload width dims: %d x %d, Saved as %d x %d", rtt->data_addr, tex.width(), tex.height(), rtt->current_width, rtt->current_height); - //The texture should have already been loaded through the writeback interface call - //Bind it directly - u32 real_id = gl_texture.id(); + gl_texture.set_id(real_id); + } - glActiveTexture(GL_TEXTURE0 + index); - gl_texture.set_id(rtt->copy_glid); - gl_texture.bind(); - - gl_texture.set_id(real_id); - } + /** + * Check for sampleable rtts from previous render passes + */ + gl::texture *texptr = nullptr; + if (texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr)) + { + texptr->bind(); return; } - else if (rtt) - LOG_NOTICE(RSX, "RTT texture for address 0x%X is dirty!", texaddr); + + if (texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr)) + { + texptr->bind(); + return; + } + + /** + * If all the above failed, then its probably a generic texture. + * Search in cache and upload/bind + */ gl_cached_texture *obj = nullptr; @@ -469,7 +476,6 @@ namespace gl { u32 real_id = gl_texture.id(); - glActiveTexture(GL_TEXTURE0 + index); gl_texture.set_id(obj->gl_id); gl_texture.bind(); diff --git a/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp b/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp index 61d1dca3d9..45e3ebc2a8 100644 --- a/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp +++ b/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp @@ -149,6 +149,9 @@ namespace rsx void texture::init(int index, rsx::texture& tex) { + const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); + + //TODO: safe init if (!m_id) { create(); @@ -157,12 +160,6 @@ namespace rsx glActiveTexture(GL_TEXTURE0 + index); bind(); - const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); - //LOG_WARNING(RSX, "texture addr = 0x%x, width = %d, height = %d, max_aniso=%d, mipmap=%d, remap=0x%x, zfunc=0x%x, wraps=0x%x, wrapt=0x%x, wrapr=0x%x, minlod=0x%x, maxlod=0x%x", - // m_offset, m_width, m_height, m_maxaniso, m_mipmap, m_remap, m_zfunc, m_wraps, m_wrapt, m_wrapr, m_minlod, m_maxlod); - - //TODO: safe init - u32 full_format = tex.format(); u32 format = full_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); diff --git a/rpcs3/GLGSRender.vcxproj b/rpcs3/GLGSRender.vcxproj index 604cffd106..d82677ad32 100644 --- a/rpcs3/GLGSRender.vcxproj +++ b/rpcs3/GLGSRender.vcxproj @@ -79,6 +79,7 @@ + @@ -89,6 +90,7 @@ + diff --git a/rpcs3/GLGSRender.vcxproj.filters b/rpcs3/GLGSRender.vcxproj.filters index 6d782a3eae..d8edb1af32 100644 --- a/rpcs3/GLGSRender.vcxproj.filters +++ b/rpcs3/GLGSRender.vcxproj.filters @@ -8,6 +8,7 @@ + @@ -19,5 +20,7 @@ + + \ No newline at end of file