diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 0221527a3b..9ccdaf0eeb 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -546,7 +546,6 @@ void GLGSRender::on_init_thread() m_index_ring_buffer->create(gl::buffer::target::element_array, 16 * 0x100000); m_vao.element_array_buffer = *m_index_ring_buffer; - m_gl_texture_cache.initialize_rtt_cache(); if (g_cfg_rsx_overlay) m_text_printer.init(); @@ -644,7 +643,7 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer) } glClear(mask); - renderer->write_buffers(); + //renderer->write_buffers(); } using rsx_method_impl_t = void(*)(u32, GLGSRender*); @@ -698,7 +697,6 @@ bool GLGSRender::load_program() RSXVertexProgram vertex_program = get_current_vertex_program(); RSXFragmentProgram fragment_program = get_current_fragment_program(rtt_lookup_func); - std::array rtt_scaling; u32 unnormalized_rtts = 0; for (auto &vtx : vertex_program.rsx_vertex_inputs) @@ -946,6 +944,8 @@ u64 GLGSRender::timestamp() const bool GLGSRender::on_access_violation(u32 address, bool is_writing) { - if (is_writing) return m_gl_texture_cache.mark_as_dirty(address); - return false; + if (is_writing) + return m_gl_texture_cache.mark_as_dirty(address); + else + return m_gl_texture_cache.flush_section(address); } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index d2022e8b60..9576bf0997 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -25,7 +25,7 @@ private: gl_render_targets m_rtts; - gl::gl_texture_cache m_gl_texture_cache; + gl::texture_cache m_gl_texture_cache; gl::texture m_gl_attrib_buffers[rsx::limits::vertex_count]; diff --git a/rpcs3/Emu/RSX/GL/GLProcTable.h b/rpcs3/Emu/RSX/GL/GLProcTable.h index 403ed392d8..485bb110dc 100644 --- a/rpcs3/Emu/RSX/GL/GLProcTable.h +++ b/rpcs3/Emu/RSX/GL/GLProcTable.h @@ -170,6 +170,8 @@ OPENGL_PROC(PFNGLBINDBUFFERBASEPROC, BindBufferBase); OPENGL_PROC(PFNGLMULTIDRAWARRAYSPROC, MultiDrawArrays); +OPENGL_PROC(PFNGLGETTEXTUREIMAGEPROC, GetTextureImage); + //Texture Buffers OPENGL_PROC(PFNGLTEXBUFFERPROC, TexBuffer); OPENGL_PROC(PFNGLTEXTUREBUFFERRANGEEXTPROC, TextureBufferRangeEXT); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index ae3925a8b9..c3e88c1bcb 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -228,7 +228,7 @@ void GLGSRender::read_buffers() rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf); u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0)); - bool success = m_gl_texture_cache.explicit_writeback((*std::get<1>(m_rtts.m_bound_render_targets[i])), texaddr, pitch); + bool success = m_gl_texture_cache.load_rtt((*std::get<1>(m_rtts.m_bound_render_targets[i])), texaddr, pitch); //Fall back to slower methods if the image could not be fetched from cache. if (!success) @@ -240,7 +240,7 @@ void GLGSRender::read_buffers() else { u32 range = pitch * height; - m_gl_texture_cache.remove_in_range(texaddr, range); + m_gl_texture_cache.invalidate_range(texaddr, range); std::unique_ptr buffer(new u8[pitch * height]); color_buffer.read(buffer.get(), width, height, pitch); @@ -287,7 +287,7 @@ void GLGSRender::read_buffers() return; u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma()); - bool in_cache = m_gl_texture_cache.explicit_writeback((*std::get<1>(m_rtts.m_bound_depth_stencil)), depth_address, pitch); + bool in_cache = m_gl_texture_cache.load_rtt((*std::get<1>(m_rtts.m_bound_depth_stencil)), depth_address, pitch); if (in_cache) return; @@ -332,9 +332,6 @@ void GLGSRender::write_buffers() if (!draw_fbo) return; - //TODO: Detect when the data is actually being used by cell and issue download command on-demand (mark as not present?) - //Should also mark cached resources as dirty so that read buffers works out-of-the-box without modification - if (g_cfg_rsx_write_color_buffers) { auto color_format = rsx::internals::surface_color_format_to_gl(rsx::method_registers.surface_color()); @@ -366,7 +363,7 @@ void GLGSRender::write_buffers() * but using the GPU to perform the caching is many times faster. */ - __glcheck m_gl_texture_cache.save_render_target(texaddr, range, (*std::get<1>(m_rtts.m_bound_render_targets[i]))); + __glcheck m_gl_texture_cache.save_rtt(texaddr, range, (*std::get<1>(m_rtts.m_bound_render_targets[i])), width, height, pitch, color_format.format, color_format.type); } }; @@ -405,12 +402,14 @@ void GLGSRender::write_buffers() if (pitch <= 64) return; + u32 width = rsx::method_registers.surface_clip_width(); + u32 height = rsx::method_registers.surface_clip_height(); + u32 range = width * height * 2; auto depth_format = rsx::internals::surface_depth_format_to_gl(rsx::method_registers.surface_depth_fmt()); u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma()); - u32 range = std::get<1>(m_rtts.m_bound_depth_stencil)->width() * std::get<1>(m_rtts.m_bound_depth_stencil)->height() * 2; if (rsx::method_registers.surface_depth_fmt() != rsx::surface_depth_format::z16) range *= 2; - m_gl_texture_cache.save_render_target(depth_address, range, (*std::get<1>(m_rtts.m_bound_depth_stencil))); + m_gl_texture_cache.save_rtt(depth_address, range, (*std::get<1>(m_rtts.m_bound_depth_stencil)), width, height, pitch, depth_format.format, depth_format.type); } } \ No newline at end of file diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index a372494bdc..6985d199f0 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -14,190 +14,22 @@ #include "../Common/TextureUtils.h" #include -namespace gl +namespace rsx { - class gl_texture_cache + //TODO: Properly move this into rsx shared + class buffered_section { - public: + protected: + u32 cpu_address_base = 0; + u32 cpu_address_range = 0; - struct gl_cached_texture - { - u32 gl_id; - u32 w; - u32 h; - u64 data_addr; - u32 block_sz; - u32 frame_ctr; - u32 protected_block_start; - u32 protected_block_sz; - u16 mipmap; - bool deleted; - bool locked; - }; + u32 locked_address_base = 0; + u32 locked_address_range = 0; - struct invalid_cache_area - { - u32 block_base; - u32 block_sz; - }; + u32 memory_protection = 0; - struct cached_rtt - { - u32 copy_glid; - u32 data_addr; - u32 block_sz; - - bool is_dirty; - bool is_depth; - bool valid; - - u32 current_width; - u32 current_height; - - bool locked; - cached_rtt() : valid(false) {} - }; - - private: - std::vector texture_cache; - std::vector rtt_cache; - u32 frame_ctr; - std::pair texture_cache_range = std::make_pair(0xFFFFFFFF, 0); - u32 max_tex_address = 0; - - bool lock_memory_region(u32 start, u32 size) - { - static const u32 memory_page_size = 4096; - start = start & ~(memory_page_size - 1); - size = (u32)align(size, memory_page_size); - - if (start < texture_cache_range.first) - texture_cache_range = std::make_pair(start, texture_cache_range.second); - - if ((start+size) > texture_cache_range.second) - texture_cache_range = std::make_pair(texture_cache_range.first, (start+size)); - - return vm::page_protect(start, size, 0, 0, vm::page_writable); - } - - bool unlock_memory_region(u32 start, u32 size) - { - static const u32 memory_page_size = 4096; - start = start & ~(memory_page_size - 1); - size = (u32)align(size, memory_page_size); - - return vm::page_protect(start, size, 0, vm::page_writable, 0); - } - - void lock_gl_object(gl_cached_texture &obj) - { - static const u32 memory_page_size = 4096; - obj.protected_block_start = obj.data_addr & ~(memory_page_size - 1); - obj.protected_block_sz = (u32)align(obj.block_sz, memory_page_size); - - if (!lock_memory_region(obj.protected_block_start, obj.protected_block_sz)) - LOG_ERROR(RSX, "lock_gl_object failed!"); - else - obj.locked = true; - } - - void unlock_gl_object(gl_cached_texture &obj) - { - if (!unlock_memory_region(obj.protected_block_start, obj.protected_block_sz)) - LOG_ERROR(RSX, "unlock_gl_object failed! Will probably crash soon..."); - else - obj.locked = false; - } - - gl_cached_texture *find_obj_for_params(u64 texaddr, u32 w, u32 h, u16 mipmap) - { - for (gl_cached_texture &tex: texture_cache) - { - if (tex.gl_id && tex.data_addr == texaddr) - { - if (w && h && mipmap && (tex.h != h || tex.w != w || tex.mipmap != mipmap)) - { - continue; - } - - tex.frame_ctr = frame_ctr; - return &tex; - } - } - - return nullptr; - } - - gl_cached_texture& create_obj_for_params(u32 gl_id, u64 texaddr, u32 w, u32 h, u16 mipmap) - { - gl_cached_texture obj = { 0 }; - - obj.gl_id = gl_id; - obj.data_addr = texaddr; - obj.w = w; - obj.h = h; - obj.mipmap = mipmap; - obj.deleted = false; - obj.locked = false; - - for (gl_cached_texture &tex : texture_cache) - { - if (tex.gl_id == 0 || (tex.deleted && (frame_ctr - tex.frame_ctr) > 32768)) - { - if (tex.gl_id) - { - LOG_NOTICE(RSX, "Reclaiming GL texture %d, cache_size=%d, master_ctr=%d, ctr=%d", tex.gl_id, texture_cache.size(), frame_ctr, tex.frame_ctr); - __glcheck glDeleteTextures(1, &tex.gl_id); - unlock_gl_object(tex); - tex.gl_id = 0; - } - - tex = obj; - return tex; - } - } - - texture_cache.push_back(obj); - return texture_cache[texture_cache.size()-1]; - } - - void remove_obj(gl_cached_texture &tex) - { - if (tex.locked) - unlock_gl_object(tex); - - tex.deleted = true; - } - - void remove_obj_for_glid(u32 gl_id) - { - for (gl_cached_texture &tex : texture_cache) - { - if (tex.gl_id == gl_id) - remove_obj(tex); - } - } - - void clear_obj_cache() - { - for (gl_cached_texture &tex : texture_cache) - { - if (tex.locked) - unlock_gl_object(tex); - - if (tex.gl_id) - { - LOG_NOTICE(RSX, "Deleting texture %d", tex.gl_id); - glDeleteTextures(1, &tex.gl_id); - } - - tex.deleted = true; - tex.gl_id = 0; - } - - texture_cache.resize(0); - destroy_rtt_cache(); - } + bool locked = false; + bool dirty = false; bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2) { @@ -221,11 +53,598 @@ namespace gl return false; } - cached_rtt* find_cached_rtt(u32 base, u32 size) + public: + + buffered_section() {} + ~buffered_section() {} + + void reset(u32 base, u32 length) { - for (cached_rtt &rtt : rtt_cache) + verify(HERE), locked == false; + + cpu_address_base = base; + cpu_address_range = length; + + locked_address_base = (base & ~4095); + locked_address_range = align(base + length, 4096) - locked_address_base; + + memory_protection = vm::page_readable|vm::page_writable; + + locked = false; + } + + bool protect(u8 flags_set, u8 flags_clear) + { + if (vm::page_protect(locked_address_base, locked_address_range, 0, flags_set, flags_clear)) { - if (region_overlaps(base, base+size, rtt.data_addr, rtt.data_addr+rtt.block_sz)) + memory_protection &= ~flags_clear; + memory_protection |= flags_set; + + locked = memory_protection != (vm::page_readable | vm::page_writable); + } + else + fmt::throw_exception("failed to lock memory @ 0x%X!", locked_address_base); + + return false; + } + + bool unprotect() + { + u32 flags_set = (vm::page_readable | vm::page_writable) & ~memory_protection; + + if (vm::page_protect(locked_address_base, locked_address_range, 0, flags_set, 0)) + { + memory_protection = (vm::page_writable | vm::page_readable); + locked = false; + return true; + } + else + fmt::throw_exception("failed to unlock memory @ 0x%X!", locked_address_base); + + return false; + } + + bool overlaps(std::pair range) + { + return region_overlaps(locked_address_base, locked_address_base+locked_address_range, range.first, range.first + range.second); + } + + bool overlaps(u32 address) + { + return (locked_address_base <= address && (address - locked_address_base) < locked_address_range); + } + + bool is_locked() const + { + return locked; + } + + bool is_dirty() const + { + return dirty; + } + + void set_dirty(bool state) + { + dirty = state; + } + + u32 get_section_base() const + { + return cpu_address_base; + } + + u32 get_section_size() const + { + return cpu_address_range; + } + + bool matches(u32 cpu_address, u32 size) const + { + return (cpu_address_base == cpu_address && cpu_address_range == size); + } + + std::pair get_min_max(std::pair current_min_max) + { + u32 min = std::min(current_min_max.first, locked_address_base); + u32 max = std::max(current_min_max.second, locked_address_base + locked_address_range); + + return std::make_pair(min, max); + } + }; +} + +namespace gl +{ + //TODO: Properly move this into helpers + class fence + { + GLsync m_value = nullptr; + GLenum flags = GL_SYNC_FLUSH_COMMANDS_BIT; + + public: + + fence() {} + ~fence() {} + + void create() + { + m_value = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + } + + void destroy() + { + glDeleteSync(m_value); + m_value = nullptr; + } + + void reset() + { + if (m_value != nullptr) + destroy(); + + create(); + } + + bool check_signaled() + { + verify(HERE), m_value != nullptr; + + GLenum err = glClientWaitSync(m_value, flags, 0); + flags = 0; + return (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED); + } + + bool wait_for_signal() + { + verify(HERE), m_value != nullptr; + + GLenum err = GL_WAIT_FAILED; + bool done = false; + + while (!done) + { + //Check if we are finished, wait time = 1us + err = glClientWaitSync(m_value, flags, 1000); + flags = 0; + + switch (err) + { + default: + LOG_ERROR(RSX, "gl::fence sync returned unknown error 0x%X", err); + case GL_ALREADY_SIGNALED: + case GL_CONDITION_SATISFIED: + done = true; + break; + case GL_TIMEOUT_EXPIRED: + continue; + } + } + + glDeleteSync(m_value); + m_value = nullptr; + + return (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED); + } + }; + + + //TODO: Unify all cache objects + class texture_cache + { + public: + + class cached_texture_section : public rsx::buffered_section + { + u32 texture_id = 0; + u32 width = 0; + u32 height = 0; + u16 mipmaps = 0; + + public: + + void create(u32 id, u32 width, u32 height, u32 mipmaps) + { + verify(HERE), locked == false; + + texture_id = id; + this->width = width; + this->height = height; + this->mipmaps = mipmaps; + } + + bool matches(u32 rsx_address, u32 width, u32 height, u32 mipmaps) + { + if (rsx_address == cpu_address_base && texture_id != 0) + { + if (!width && !height && !mipmaps) + return true; + + return (width == this->width && height == this->height && mipmaps == this->mipmaps); + } + + return false; + } + + void destroy() + { + if (locked) + unprotect(); + + glDeleteTextures(1, &texture_id); + texture_id = 0; + } + + bool is_empty() + { + return (texture_id == 0); + } + + u32 id() const + { + return texture_id; + } + }; + + class cached_rtt_section : public rsx::buffered_section + { + private: + fence m_fence; + u32 pbo_id = 0; + u32 pbo_size = 0; + + bool flushed = false; + bool is_depth = false; + + u32 flush_count = 0; + u32 copy_count = 0; + + u32 current_width = 0; + u32 current_height = 0; + u32 current_pitch = 0; + u32 real_pitch = 0; + + texture::format format = texture::format::rgba; + texture::type type = texture::type::ubyte; + + u8 get_pixel_size(texture::format fmt_, texture::type type_) + { + u8 size = 1; + switch (type_) + { + case texture::type::ubyte: + case texture::type::sbyte: + break; + case texture::type::ushort: + case texture::type::sshort: + case texture::type::f16: + size = 2; + break; + case texture::type::ushort_5_6_5: + case texture::type::ushort_5_6_5_rev: + case texture::type::ushort_4_4_4_4: + case texture::type::ushort_4_4_4_4_rev: + case texture::type::ushort_5_5_5_1: + case texture::type::ushort_1_5_5_5_rev: + return 2; + case texture::type::uint_8_8_8_8: + case texture::type::uint_8_8_8_8_rev: + case texture::type::uint_10_10_10_2: + case texture::type::uint_2_10_10_10_rev: + case texture::type::uint_24_8: + return 4; + case texture::type::f32: + case texture::type::sint: + case texture::type::uint: + size = 4; + break; + } + + switch (fmt_) + { + case texture::format::red: + case texture::format::r: + break; + case texture::format::rg: + size *= 2; + break; + case texture::format::rgb: + case texture::format::bgr: + size *= 3; + break; + case texture::format::rgba: + case texture::format::bgra: + size *= 4; + break; + + //Depth formats.. + case texture::format::depth: + size = 2; + break; + case texture::format::depth_stencil: + size = 4; + break; + default: + LOG_ERROR(RSX, "Unsupported rtt format %d", (GLenum)fmt_); + size = 4; + } + + return size; + } + + public: + + void reset(u32 base, u32 size) + { + rsx::buffered_section::reset(base, size); + flushed = false; + flush_count = 0; + copy_count = 0; + } + + void init_buffer() + { + glGenBuffers(1, &pbo_id); + + glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); + glBufferStorage(GL_PIXEL_PACK_BUFFER, locked_address_range, nullptr, GL_MAP_READ_BIT); + + pbo_size = locked_address_range; + } + + void set_dimensions(u32 width, u32 height, u32 pitch) + { + current_width = width; + current_height = height; + current_pitch = pitch; + + real_pitch = width * get_pixel_size(format, type); + } + + void set_format(texture::format gl_format, texture::type gl_type) + { + format = gl_format; + type = gl_type; + + real_pitch = current_width * get_pixel_size(format, type); + } + + void copy_texture(gl::texture &source) + { + glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); + glGetTextureImage(source.id(), 0, (GLenum)format, (GLenum)type, pbo_size, nullptr); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + + m_fence.reset(); + copy_count++; + } + + void fill_texture(gl::texture &tex) + { + u32 min_width = std::min((u32)tex.width(), current_width); + u32 min_height = std::min((u32)tex.height(), current_height); + + tex.bind(); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_id); + glTexSubImage2D((GLenum)tex.get_target(), 0, 0, 0, min_width, min_height, (GLenum)format, (GLenum)type, nullptr); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + } + + template + void scale_image_impl(T* dst, const T* src, u16 src_width, u16 src_height, u16 padding) + { + u32 dst_offset = 0; + u32 src_offset = 0; + + for (u16 h = 0; h < src_height; ++h) + { + for (u16 w = 0; w < src_width; ++w) + { + for (u8 n = 0; n < N; ++n) + { + dst[dst_offset++] = src[src_offset]; + } + + //Fetch next pixel + src_offset++; + } + + //Pad this row + dst_offset += padding; + } + } + + template + void scale_image(void *dst, void *src, u8 pixel_size, u16 src_width, u16 src_height, u16 padding) + { + switch (pixel_size) + { + case 1: + scale_image_impl((u8*)dst, (u8*)src, current_width, current_height, padding); + break; + case 2: + scale_image_impl((u16*)dst, (u16*)src, current_width, current_height, padding); + break; + case 4: + scale_image_impl((u32*)dst, (u32*)src, current_width, current_height, padding); + break; + case 8: + scale_image_impl((u64*)dst, (u64*)src, current_width, current_height, padding); + break; + default: + fmt::throw_exception("unsupported rtt format 0x%X" HERE, (u32)format); + } + } + + void flush() + { + protect(vm::page_writable, 0); + m_fence.wait_for_signal(); + flushed = true; + + glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); + void *data = glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, pbo_size, GL_MAP_READ_BIT); + u8 *dst = vm::ps3::_ptr(cpu_address_base); + + //throw if map failed since we'll segfault anyway + verify(HERE), data != nullptr; + + if (real_pitch >= current_pitch) + memcpy(dst, data, cpu_address_range); + else + { + //Scale this image by repeating pixel data n times + //n = expected_pitch / real_pitch + //Use of fixed argument templates for performance reasons + + const u16 pixel_size = get_pixel_size(format, type); + const u16 dst_width = current_pitch / pixel_size; + const u16 sample_count = current_pitch / real_pitch; + const u16 padding = dst_width - (current_width * sample_count); + + switch (sample_count) + { + case 2: + scale_image<2>(dst, data, pixel_size, current_width, current_height, padding); + break; + case 3: + scale_image<3>(dst, data, pixel_size, current_width, current_height, padding); + break; + case 4: + scale_image<4>(dst, data, pixel_size, current_width, current_height, padding); + break; + case 8: + scale_image<8>(dst, data, pixel_size, current_width, current_height, padding); + break; + case 16: + scale_image<16>(dst, data, pixel_size, current_width, current_height, padding); + break; + default: + LOG_ERROR(RSX, "Unsupported RTT scaling factor: dst_pitch=%d src_pitch=%d", current_pitch, real_pitch); + memcpy(dst, data, cpu_address_range); + } + } + + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + protect(vm::page_readable, vm::page_writable); + + flush_count++; + } + + void destroy() + { + if (locked) + unprotect(); + + glDeleteBuffers(1, &pbo_id); + pbo_id = 0; + pbo_size = 0; + + m_fence.destroy(); + } + + bool is_flushed() const + { + return flushed; + } + + bool can_skip() + { + //TODO: Better balancing algorithm. Copying buffers is very expensive + //TODO: Add a switch to force strict enforcement + + //Always accept the first attempt at caching after creation + if (!copy_count) + return false; + + //If surface is flushed often, force buffering + if (flush_count) + { + //TODO: Pick better values. Using 80% and 20% for now + if (flush_count >= (4 * copy_count / 5)) + return false; + else + { + if (flushed) return false; //fence is guaranteed to have been signaled and destroyed + return !m_fence.check_signaled(); + } + } + + return true; + } + + void set_flushed(bool state) + { + flushed = state; + } + }; + + private: + std::vector m_texture_cache; + std::vector m_rtt_cache; + + std::pair texture_cache_range = std::make_pair(0xFFFFFFFF, 0); + std::pair rtt_cache_range = std::make_pair(0xFFFFFFFF, 0); + + std::mutex m_section_mutex; + + cached_texture_section *find_texture(u64 texaddr, u32 w, u32 h, u16 mipmaps) + { + for (cached_texture_section &tex : m_texture_cache) + { + if (tex.matches(texaddr, w, h, mipmaps) && !tex.is_dirty()) + return &tex; + } + + return nullptr; + } + + cached_texture_section& create_texture(u32 id, u32 texaddr, u32 texsize, u32 w, u32 h, u16 mipmap) + { + for (cached_texture_section &tex : m_texture_cache) + { + if (tex.is_dirty()) + { + tex.destroy(); + tex.reset(texaddr, texsize); + tex.create(id, w, h, mipmap); + + texture_cache_range = tex.get_min_max(texture_cache_range); + return tex; + } + } + + cached_texture_section tex; + tex.reset(texaddr, texsize); + tex.create(id, w, h, mipmap); + texture_cache_range = tex.get_min_max(texture_cache_range); + + m_texture_cache.push_back(tex); + return m_texture_cache.back(); + } + + void clear() + { + for (cached_texture_section &tex : m_texture_cache) + { + tex.destroy(); + } + + for (cached_rtt_section &rtt : m_rtt_cache) + { + rtt.destroy(); + } + + m_rtt_cache.resize(0); + m_texture_cache.resize(0); + } + + cached_rtt_section* find_cached_rtt_section(u32 base, u32 size) + { + for (cached_rtt_section &rtt : m_rtt_cache) + { + if (rtt.matches(base, size)) { return &rtt; } @@ -234,194 +653,60 @@ namespace gl return nullptr; } - void invalidate_rtts_in_range(u32 base, u32 size) + cached_rtt_section *create_locked_view_of_section(u32 base, u32 size) { - for (cached_rtt &rtt : rtt_cache) - { - if (!rtt.data_addr || rtt.is_dirty) continue; - - u32 rtt_aligned_base = ((u32)(rtt.data_addr)) & ~(4096 - 1); - u32 rtt_block_sz = align(rtt.block_sz, 4096); - - if (region_overlaps(rtt_aligned_base, (rtt_aligned_base + rtt_block_sz), base, base+size)) - { - rtt.is_dirty = true; - if (rtt.locked) - { - rtt.locked = false; - unlock_memory_region((u32)rtt.data_addr, rtt.block_sz); - } - } - } - } - - void prep_rtt(cached_rtt &rtt, u32 width, u32 height, u32 gl_pixel_format_internal) - { - int binding = 0; - bool is_depth = false; - - if (gl_pixel_format_internal == GL_DEPTH24_STENCIL8 || - gl_pixel_format_internal == GL_DEPTH_COMPONENT24 || - gl_pixel_format_internal == GL_DEPTH_COMPONENT16 || - gl_pixel_format_internal == GL_DEPTH_COMPONENT32) - { - is_depth = true; - } - - glGetIntegerv(GL_TEXTURE_2D_BINDING_EXT, &binding); - glBindTexture(GL_TEXTURE_2D, rtt.copy_glid); - - rtt.current_width = width; - rtt.current_height = height; - - if (!is_depth) - { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - - __glcheck glTexImage2D(GL_TEXTURE_2D, 0, gl_pixel_format_internal, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); - } - else - { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - - u32 ex_format = GL_UNSIGNED_SHORT; - u32 in_format = GL_DEPTH_COMPONENT16; - - switch (gl_pixel_format_internal) - { - case GL_DEPTH24_STENCIL8: - { - ex_format = GL_UNSIGNED_INT_24_8; - in_format = GL_DEPTH_STENCIL; - break; - } - case GL_DEPTH_COMPONENT16: - break; - default: - fmt::throw_exception("Unsupported depth format!" HERE); - } - - __glcheck glTexImage2D(GL_TEXTURE_2D, 0, gl_pixel_format_internal, width, height, 0, in_format, ex_format, nullptr); - } - - glBindTexture(GL_TEXTURE_2D, binding); - rtt.is_depth = is_depth; - } - - void save_rtt(u32 base, u32 size, u32 width, u32 height, u32 gl_pixel_format_internal, gl::texture &source) - { - cached_rtt *region = find_cached_rtt(base, size); + cached_rtt_section *region = find_cached_rtt_section(base, size); if (!region) { - for (cached_rtt &rtt : rtt_cache) + for (cached_rtt_section &rtt : m_rtt_cache) { - if (rtt.valid && rtt.data_addr == 0) + if (rtt.is_dirty()) { - prep_rtt(rtt, width, height, gl_pixel_format_internal); - - rtt.block_sz = size; - rtt.data_addr = base; - rtt.is_dirty = true; - - lock_memory_region((u32)rtt.data_addr, rtt.block_sz); - rtt.locked = true; - + rtt.reset(base, size); + rtt.protect(0, vm::page_readable | vm::page_writable); region = &rtt; break; } } - if (!region) fmt::throw_exception("No region created!!" HERE); - } - - if (width != region->current_width || - height != region->current_height) - { - prep_rtt(*region, width, height, gl_pixel_format_internal); - - if (region->locked && region->block_sz != size) + if (!region) { - unlock_memory_region((u32)region->data_addr, region->block_sz); + cached_rtt_section section; + section.reset(base, size); + section.set_dirty(true); + section.init_buffer(); + section.protect(0, vm::page_readable | vm::page_writable); - region->block_sz = size; - lock_memory_region((u32)region->data_addr, region->block_sz); - region->locked = true; + m_rtt_cache.push_back(section); + region = &m_rtt_cache.back(); } + + rtt_cache_range = region->get_min_max(rtt_cache_range); } - - __glcheck glCopyImageSubData(source.id(), GL_TEXTURE_2D, 0, 0, 0, 0, - region->copy_glid, GL_TEXTURE_2D, 0, 0, 0, 0, - width, height, 1); - - region->is_dirty = false; - - if (!region->locked) + else { - LOG_WARNING(RSX, "Locking down RTT, was unlocked!"); - lock_memory_region((u32)region->data_addr, region->block_sz); - region->locked = true; - } - } + //This section view already exists + if (region->get_section_size() != size) + { + region->unprotect(); + region->reset(base, size); + } - void write_rtt(u32 base, u32 size, u32 texaddr) - { - //Actually download the data, since it seems that cell is writing to it manually - fmt::throw_exception("write_rtt" HERE); - } - - void destroy_rtt_cache() - { - for (cached_rtt &rtt : rtt_cache) - { - rtt.valid = false; - rtt.is_dirty = false; - rtt.block_sz = 0; - rtt.data_addr = 0; - - glDeleteTextures(1, &rtt.copy_glid); - rtt.copy_glid = 0; + if (!region->is_locked() || region->is_flushed()) + region->protect(0, vm::page_readable | vm::page_writable); } - rtt_cache.resize(0); + return region; } public: - gl_texture_cache() - : frame_ctr(0) + texture_cache() {} + + ~texture_cache() { - } - - ~gl_texture_cache() - { - clear_obj_cache(); - } - - void update_frame_ctr() - { - frame_ctr++; - } - - void initialize_rtt_cache() - { - if (rtt_cache.size()) fmt::throw_exception("Initialize RTT cache while cache already exists! Leaking objects??" HERE); - - for (int i = 0; i < 64; ++i) - { - cached_rtt rtt; - - glGenTextures(1, &rtt.copy_glid); - rtt.is_dirty = true; - rtt.valid = true; - rtt.block_sz = 0; - rtt.data_addr = 0; - rtt.locked = false; - - rtt_cache.push_back(rtt); - } + clear(); } template @@ -432,21 +717,6 @@ namespace gl glActiveTexture(GL_TEXTURE0 + index); - /** - * Give precedence to rtt data obtained through read/write buffers - */ - cached_rtt *rtt = find_cached_rtt(texaddr, range); - - if (rtt && !rtt->is_dirty) - { - u32 real_id = gl_texture.id(); - - gl_texture.set_id(rtt->copy_glid); - gl_texture.bind(); - - gl_texture.set_id(real_id); - } - /** * Check for sampleable rtts from previous render passes */ @@ -467,212 +737,185 @@ namespace gl * If all the above failed, then its probably a generic texture. * Search in cache and upload/bind */ - - gl_cached_texture *obj = nullptr; - if (!rtt) - obj = find_obj_for_params(texaddr, tex.width(), tex.height(), tex.get_exact_mipmap_count()); + cached_texture_section *cached_texture = find_texture(texaddr, tex.width(), tex.height(), tex.get_exact_mipmap_count()); + verify(HERE), gl_texture.id() == 0; - if (obj && !obj->deleted) + if (cached_texture) { - u32 real_id = gl_texture.id(); + verify(HERE), cached_texture->is_empty() == false; - gl_texture.set_id(obj->gl_id); + gl_texture.set_id(cached_texture->id()); gl_texture.bind(); - gl_texture.set_id(real_id); + //external gl::texture objects should always be undefined/uninitialized! + gl_texture.set_id(0); + return; } - else + + if (!tex.width() || !tex.height()) { - u32 real_id = gl_texture.id(); - - if (!obj) gl_texture.set_id(0); - else - { - //Reuse this GLid - gl_texture.set_id(obj->gl_id); - - //Empty this slot for another one. A new holder will be created below anyway... - if (obj->locked) unlock_gl_object(*obj); - obj->gl_id = 0; - } - - if (!tex.width() || !tex.height()) - { - LOG_ERROR(RSX, "Texture upload requested but invalid texture dimensions passed"); - return; - } - - __glcheck gl_texture.init(index, tex); - gl_cached_texture &_obj = create_obj_for_params(gl_texture.id(), texaddr, tex.width(), tex.height(), tex.get_exact_mipmap_count()); - - _obj.block_sz = (u32)get_texture_size(tex); - lock_gl_object(_obj); - - gl_texture.set_id(real_id); + LOG_ERROR(RSX, "Texture upload requested but invalid texture dimensions passed"); + return; } + + gl_texture.init(index, tex); + + std::lock_guard lock(m_section_mutex); + + cached_texture_section &cached = create_texture(gl_texture.id(), texaddr, get_texture_size(tex), tex.width(), tex.height(), tex.get_exact_mipmap_count()); + cached.protect(0, vm::page_writable); + cached.set_dirty(false); + + //external gl::texture objects should always be undefined/uninitialized! + gl_texture.set_id(0); } - bool mark_as_dirty(u32 address) + void save_rtt(u32 base, u32 size, gl::texture &source, u32 width, u32 height, u32 pitch, texture::format format, texture::type type) { - if (address < texture_cache_range.first || - address > texture_cache_range.second) - return false; + std::lock_guard lock(m_section_mutex); - bool response = false; + cached_rtt_section *region = create_locked_view_of_section(base, size); - for (gl_cached_texture &tex: texture_cache) + //Ignore this if we haven't finished downloading previous draw call + //TODO: Separate locking sections vs downloading to pbo unless address faults often + if (0)//region->can_skip()) + return; + + if (!region->matches(base, size)) { - if (!tex.locked) continue; + //This memory region overlaps our own region, but does not match it exactly + if (region->is_locked()) + region->unprotect(); - if (tex.protected_block_start <= address && - tex.protected_block_sz >(address - tex.protected_block_start)) - { - unlock_gl_object(tex); - - invalidate_rtts_in_range((u32)tex.data_addr, tex.block_sz); - - tex.deleted = true; - response = true; - } + region->reset(base, size); + region->protect(0, vm::page_readable | vm::page_writable); } - if (response) return true; + region->set_dimensions(width, height, pitch); + region->copy_texture(source); + region->set_format(format, type); + region->set_dirty(false); + region->set_flushed(false); - for (cached_rtt &rtt: rtt_cache) - { - if (!rtt.data_addr || rtt.is_dirty) continue; - - u32 rtt_aligned_base = ((u32)(rtt.data_addr)) & ~(4096 - 1); - u32 rtt_block_sz = align(rtt.block_sz, 4096); - - if (rtt.locked && (u64)address >= rtt_aligned_base) - { - u32 offset = address - rtt_aligned_base; - if (offset >= rtt_block_sz) continue; - - rtt.is_dirty = true; - - unlock_memory_region(rtt_aligned_base, rtt_block_sz); - rtt.locked = false; - - response = true; - } - } - - return response; + verify(HERE), region->is_locked() == true; } - void save_render_target(u32 texaddr, u32 range, gl::texture &gl_texture) - { - save_rtt(texaddr, range, gl_texture.width(), gl_texture.height(), (GLenum)gl_texture.get_internal_format(), gl_texture); - } - - std::vector find_and_invalidate_in_range(u32 base, u32 limit) - { - /** - * Sometimes buffers can share physical pages. - * Return objects if we really encroach on texture - */ - - std::vector result; - - for (gl_cached_texture &obj : texture_cache) - { - //Check for memory area overlap. unlock page(s) if needed and add this index to array. - //Axis separation test - const u32 &block_start = obj.protected_block_start; - const u32 block_end = block_start + obj.protected_block_sz; - - if (limit < block_start) continue; - if (base > block_end) continue; - - u32 min_separation = (limit - base) + obj.protected_block_sz; - u32 range_limit = (block_end > limit) ? block_end : limit; - u32 range_base = (block_start < base) ? block_start : base; - - u32 actual_separation = (range_limit - range_base); - - if (actual_separation < min_separation) - { - const u32 texture_start = (u32)obj.data_addr; - const u32 texture_end = texture_start + obj.block_sz; - - min_separation = (limit - base) + obj.block_sz; - range_limit = (texture_end > limit) ? texture_end : limit; - range_base = (texture_start < base) ? texture_start : base; - - actual_separation = (range_limit - range_base); - - if (actual_separation < min_separation) - { - //Texture area is invalidated! - unlock_gl_object(obj); - obj.deleted = true; - - continue; - } - - //Overlap in this case will be at most 1 page... - invalid_cache_area invalid = { 0 }; - if (base < obj.data_addr) - invalid.block_base = obj.protected_block_start; - else - invalid.block_base = obj.protected_block_start + obj.protected_block_sz - 4096; - - invalid.block_sz = 4096; - unlock_memory_region(invalid.block_base, invalid.block_sz); - result.push_back(invalid); - } - } - - return result; - } - - void lock_invalidated_ranges(std::vector invalid) - { - for (invalid_cache_area area : invalid) - { - lock_memory_region(area.block_base, area.block_sz); - } - } - - void remove_in_range(u32 texaddr, u32 range) - { - //Seems that the rsx only 'reads' full texture objects.. - //This simplifies this function to simply check for matches - for (gl_cached_texture &cached : texture_cache) - { - if (cached.data_addr == texaddr && - cached.block_sz == range) - remove_obj(cached); - } - } - - bool explicit_writeback(gl::texture &tex, const u32 address, const u32 pitch) + bool load_rtt(gl::texture &tex, const u32 address, const u32 pitch) { const u32 range = tex.height() * pitch; - cached_rtt *rtt = find_cached_rtt(address, range); + cached_rtt_section *rtt = find_cached_rtt_section(address, range); - if (rtt && !rtt->is_dirty) + if (rtt && !rtt->is_dirty()) { - u32 min_w = rtt->current_width; - u32 min_h = rtt->current_height; - - if ((u32)tex.width() < min_w) min_w = (u32)tex.width(); - if ((u32)tex.height() < min_h) min_h = (u32)tex.height(); - - //TODO: Image reinterpretation e.g read back rgba data as depth texture and vice-versa - - __glcheck glCopyImageSubData(rtt->copy_glid, GL_TEXTURE_2D, 0, 0, 0, 0, - tex.id(), GL_TEXTURE_2D, 0, 0, 0, 0, - min_w, min_h, 1); - + rtt->fill_texture(tex); return true; } //No valid object found in cache return false; } + + bool mark_as_dirty(u32 address) + { + bool response = false; + + if (address >= texture_cache_range.first && + address < texture_cache_range.second) + { + std::lock_guard lock(m_section_mutex); + + for (cached_texture_section &tex : m_texture_cache) + { + if (!tex.is_locked()) continue; + + if (tex.overlaps(address)) + { + tex.unprotect(); + tex.set_dirty(true); + + response = true; + } + } + } + + if (address >= rtt_cache_range.first && + address < rtt_cache_range.second) + { + std::lock_guard lock(m_section_mutex); + + for (cached_rtt_section &rtt : m_rtt_cache) + { + if (rtt.is_dirty()) continue; + + if (rtt.is_locked() && rtt.overlaps(address)) + { + rtt.unprotect(); + rtt.set_dirty(true); + + response = true; + } + } + } + + return response; + } + + void invalidate_range(u32 base, u32 size) + { + std::lock_guard lock(m_section_mutex); + std::pair range = std::make_pair(base, size); + + if (base < texture_cache_range.second && + (base + size) >= texture_cache_range.first) + { + for (cached_texture_section &tex : m_texture_cache) + { + if (!tex.is_dirty() && tex.overlaps(range)) + tex.destroy(); + } + } + + if (base < rtt_cache_range.second && + (base + size) >= rtt_cache_range.first) + { + for (cached_rtt_section &rtt : m_rtt_cache) + { + if (!rtt.is_dirty() && rtt.overlaps(range)) + { + rtt.unprotect(); + rtt.set_dirty(true); + } + } + } + } + + bool flush_section(u32 address) + { + if (address < rtt_cache_range.first || + address >= rtt_cache_range.second) + return false; + + std::lock_guard lock(m_section_mutex); + + for (cached_rtt_section &rtt : m_rtt_cache) + { + if (rtt.is_dirty()) continue; + + if (rtt.is_locked() && rtt.overlaps(address)) + { + if (rtt.is_flushed()) + { + LOG_WARNING(RSX, "Section matches range, but marked as already flushed!, 0x%X+0x%X", rtt.get_section_base(), rtt.get_section_size()); + continue; + } + + rtt.flush(); + return true; + } + } + + return false; + } }; -} +} \ No newline at end of file