diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index 2322be1176..d7dddfd1cb 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -363,34 +363,34 @@ u8 get_format_block_size_in_texel(int format) LOG_ERROR(RSX, "Unimplemented block size in texels for texture format: 0x%x", format); return 1; } -} - -u8 get_format_block_size_in_bytes(rsx::surface_color_format format) -{ - switch (format) - { - case rsx::surface_color_format::b8: - return 1; - case rsx::surface_color_format::g8b8: - case rsx::surface_color_format::r5g6b5: - case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: - case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: - return 2; - case rsx::surface_color_format::a8b8g8r8: - case rsx::surface_color_format::a8r8g8b8: - case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: - case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: - case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: - case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: - case rsx::surface_color_format::x32: - return 4; - case rsx::surface_color_format::w16z16y16x16: - return 8; - case rsx::surface_color_format::w32z32y32x32: - return 16; - default: - fmt::throw_exception("Invalid color format 0x%x" HERE, (u32)format); - } +} + +u8 get_format_block_size_in_bytes(rsx::surface_color_format format) +{ + switch (format) + { + case rsx::surface_color_format::b8: + return 1; + case rsx::surface_color_format::g8b8: + case rsx::surface_color_format::r5g6b5: + case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: + case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: + return 2; + case rsx::surface_color_format::a8b8g8r8: + case rsx::surface_color_format::a8r8g8b8: + case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: + case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: + case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: + case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: + case rsx::surface_color_format::x32: + return 4; + case rsx::surface_color_format::w16z16y16x16: + return 8; + case rsx::surface_color_format::w32z32y32x32: + return 16; + default: + fmt::throw_exception("Invalid color format 0x%x" HERE, (u32)format); + } } static size_t get_placed_texture_storage_size(u16 width, u16 height, u32 depth, u8 format, u16 mipmap, bool cubemap, size_t row_pitch_alignement, size_t mipmap_alignment) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index e2058af742..c736894baa 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -41,7 +41,7 @@ void Shader::Compile(const std::string &code, SHADER_TYPE st) void D3D12GSRender::load_program() { - auto rtt_lookup_func = [this](u32 texaddr, bool is_depth) -> std::tuple + auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture&, bool is_depth) -> std::tuple { ID3D12Resource *surface = nullptr; if (!is_depth) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 0221527a3b..55d6211c49 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -332,7 +332,6 @@ void GLGSRender::begin() std::chrono::time_point now = steady_clock::now(); m_begin_time += (u32)std::chrono::duration_cast(now - then).count(); - m_draw_calls++; } namespace @@ -381,8 +380,6 @@ void GLGSRender::end() m_index_ring_buffer->reserve_storage_on_heap(16 * 1024); } - draw_fbo.bind(); - //Check if depth buffer is bound and valid //If ds is not initialized clear it; it seems new depth textures should have depth cleared gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); @@ -473,10 +470,17 @@ void GLGSRender::end() draw_fbo.draw_arrays(rsx::method_registers.current_draw_clause.primitive, vertex_draw_count); } + m_attrib_ring_buffer->notify(); + m_index_ring_buffer->notify(); + m_uniform_ring_buffer->notify(); + std::chrono::time_point draw_end = steady_clock::now(); m_draw_time += (u32)std::chrono::duration_cast(draw_end - draw_start).count(); - write_buffers(); + m_draw_calls++; + + //LOG_WARNING(RSX, "Finished draw call, EID=%d", m_draw_calls); + synchronize_buffers(); rsx::thread::end(); } @@ -546,10 +550,11 @@ void GLGSRender::on_init_thread() m_index_ring_buffer->create(gl::buffer::target::element_array, 16 * 0x100000); m_vao.element_array_buffer = *m_index_ring_buffer; - m_gl_texture_cache.initialize_rtt_cache(); if (g_cfg_rsx_overlay) m_text_printer.init(); + + m_gl_texture_cache.initialize(this); } void GLGSRender::on_exit() @@ -588,11 +593,12 @@ void GLGSRender::on_exit() m_index_ring_buffer->remove(); m_text_printer.close(); + m_gl_texture_cache.close(); return GSRender::on_exit(); } -void nv4097_clear_surface(u32 arg, GLGSRender* renderer) +void GLGSRender::clear_surface(u32 arg) { if (rsx::method_registers.surface_color_target() == rsx::surface_target::none) return; @@ -602,9 +608,6 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer) return; } - renderer->init_buffers(true); - renderer->draw_fbo.bind(); - GLbitfield mask = 0; rsx::surface_depth_format surface_depth_format = rsx::method_registers.surface_depth_fmt(); @@ -618,6 +621,10 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer) glDepthMask(GL_TRUE); glClearDepth(double(clear_depth) / max_depth_value); mask |= GLenum(gl::buffers::depth); + + gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); + if (ds && !ds->cleared()) + ds->set_cleared(); } if (surface_depth_format == rsx::surface_depth_format::z24s8 && (arg & 0x2)) @@ -644,46 +651,31 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer) } glClear(mask); - renderer->write_buffers(); } -using rsx_method_impl_t = void(*)(u32, GLGSRender*); - -static const std::unordered_map g_gl_method_tbl = -{ - { NV4097_CLEAR_SURFACE, nv4097_clear_surface } -}; - bool GLGSRender::do_method(u32 cmd, u32 arg) { - auto found = g_gl_method_tbl.find(cmd); - - if (found == g_gl_method_tbl.end()) - { - return false; - } - - found->second(arg, this); - switch (cmd) { case NV4097_CLEAR_SURFACE: { - if (arg & 0x1) - { - gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); - if (ds && !ds->cleared()) - ds->set_cleared(); - } + init_buffers(true); + synchronize_buffers(); + clear_surface(arg); + return true; } + case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE: + case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE: + flush_draw_buffers = true; + return true; } - return true; + return false; } bool GLGSRender::load_program() { - auto rtt_lookup_func = [this](u32 texaddr, bool is_depth) -> std::tuple + auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture &tex, bool is_depth) -> std::tuple { gl::render_target *surface = nullptr; if (!is_depth) @@ -691,14 +683,21 @@ bool GLGSRender::load_program() else surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr); - if (!surface) return std::make_tuple(false, 0); + if (!surface) + { + auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch()); + if (!rsc.surface || rsc.is_depth_surface != is_depth) + return std::make_tuple(false, 0); + + surface = rsc.surface; + } + return std::make_tuple(true, surface->get_native_pitch()); }; RSXVertexProgram vertex_program = get_current_vertex_program(); RSXFragmentProgram fragment_program = get_current_fragment_program(rtt_lookup_func); - std::array rtt_scaling; u32 unnormalized_rtts = 0; for (auto &vtx : vertex_program.rsx_vertex_inputs) @@ -819,17 +818,8 @@ void GLGSRender::flip(int buffer) rsx::tiled_region buffer_region = get_tiled_address(gcm_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL); u32 absolute_address = buffer_region.address + buffer_region.base; - if (0) - { - LOG_NOTICE(RSX, "flip(%d) -> 0x%x [0x%x]", buffer, absolute_address, rsx::get_address(gcm_buffers[1 - buffer].offset, CELL_GCM_LOCATION_LOCAL)); - } - gl::texture *render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address); - /** - * Calling read_buffers will overwrite cached content - */ - __glcheck m_flip_fbo.recreate(); m_flip_fbo.bind(); @@ -877,33 +867,27 @@ void GLGSRender::flip(int buffer) areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height }); coordi aspect_ratio; - if (1) //enable aspect ratio + + sizei csize(m_frame->client_width(), m_frame->client_height()); + sizei new_size = csize; + + const double aq = (double)buffer_width / buffer_height; + const double rq = (double)new_size.width / new_size.height; + const double q = aq / rq; + + if (q > 1.0) { - sizei csize(m_frame->client_width(), m_frame->client_height()); - sizei new_size = csize; - - const double aq = (double)buffer_width / buffer_height; - const double rq = (double)new_size.width / new_size.height; - const double q = aq / rq; - - if (q > 1.0) - { - new_size.height = int(new_size.height / q); - aspect_ratio.y = (csize.height - new_size.height) / 2; - } - else if (q < 1.0) - { - new_size.width = int(new_size.width * q); - aspect_ratio.x = (csize.width - new_size.width) / 2; - } - - aspect_ratio.size = new_size; + new_size.height = int(new_size.height / q); + aspect_ratio.y = (csize.height - new_size.height) / 2; } - else + else if (q < 1.0) { - aspect_ratio.size = { m_frame->client_width(), m_frame->client_height() }; + new_size.width = int(new_size.width * q); + aspect_ratio.x = (csize.width - new_size.width) / 2; } + aspect_ratio.size = new_size; + gl::screen.clear(gl::buffers::color_depth_stencil); __glcheck flip_fbo->blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical()); @@ -928,6 +912,8 @@ void GLGSRender::flip(int buffer) m_vertex_upload_time = 0; m_textures_upload_time = 0; + m_gl_texture_cache.clear_temporary_surfaces(); + for (auto &tex : m_rtts.invalidated_resources) { tex->remove(); @@ -946,6 +932,48 @@ u64 GLGSRender::timestamp() const bool GLGSRender::on_access_violation(u32 address, bool is_writing) { - if (is_writing) return m_gl_texture_cache.mark_as_dirty(address); - return false; + if (is_writing) + return m_gl_texture_cache.mark_as_dirty(address); + else + return m_gl_texture_cache.flush_section(address); +} + +void GLGSRender::do_local_task() +{ + std::lock_guard lock(queue_guard); + + work_queue.remove_if([](work_item &q) { return q.received; }); + + for (work_item& q: work_queue) + { + std::unique_lock lock(q.guard_mutex); + + //Process this address + q.result = m_gl_texture_cache.flush_section(q.address_to_flush); + q.processed = true; + + //Notify thread waiting on this + lock.unlock(); + q.cv.notify_one(); + } +} + +work_item& GLGSRender::post_flush_request(u32 address) +{ + std::lock_guard lock(queue_guard); + + work_queue.emplace_back(); + work_item &result = work_queue.back(); + result.address_to_flush = address; + return result; +} + +void GLGSRender::synchronize_buffers() +{ + if (flush_draw_buffers) + { + //LOG_WARNING(RSX, "Flushing RTT buffers EID=%d", m_draw_calls); + write_buffers(); + flush_draw_buffers = false; + } } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index d2022e8b60..4b05862475 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -12,6 +12,41 @@ #pragma comment(lib, "opengl32.lib") +struct work_item +{ + std::condition_variable cv; + std::mutex guard_mutex; + + u32 address_to_flush = 0; + bool processed = false; + bool result = false; + bool received = false; +}; + +struct gcm_buffer_info +{ + u32 address = 0; + u32 pitch = 0; + + bool is_depth_surface; + + rsx::surface_color_format color_format; + rsx::surface_depth_format depth_format; + + u16 width; + u16 height; + + gcm_buffer_info() + { + address = 0; + pitch = 0; + } + + gcm_buffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h) + :address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h) + {} +}; + class GLGSRender : public GSRender { private: @@ -25,7 +60,7 @@ private: gl_render_targets m_rtts; - gl::gl_texture_cache m_gl_texture_cache; + gl::texture_cache m_gl_texture_cache; gl::texture m_gl_attrib_buffers[rsx::limits::vertex_count]; @@ -49,6 +84,14 @@ private: gl::text_writer m_text_printer; + std::mutex queue_guard; + std::list work_queue; + + gcm_buffer_info surface_info[rsx::limits::color_buffers_count]; + gcm_buffer_info depth_surface_info; + + bool flush_draw_buffers = false; + public: gl::fbo draw_fbo; @@ -72,6 +115,8 @@ private: // Return element to draw and in case of indexed draw index type and offset in index buffer std::tuple > > set_vertex_buffer(); + void clear_surface(u32 arg); + public: bool load_program(); void init_buffers(bool skip_reading = false); @@ -79,6 +124,9 @@ public: void write_buffers(); void set_viewport(); + void synchronize_buffers(); + work_item& post_flush_request(u32 address); + protected: void begin() override; void end() override; @@ -89,6 +137,8 @@ protected: void flip(int buffer) override; u64 timestamp() const override; + void do_local_task() override; + bool on_access_violation(u32 address, bool is_writing) override; virtual std::array, 4> copy_render_targets_to_memory() override; diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index 953a753ca8..06749ec877 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -67,6 +67,106 @@ namespace gl } }; + class fence + { + GLsync m_value = nullptr; + GLenum flags = GL_SYNC_FLUSH_COMMANDS_BIT; + + public: + + fence() {} + ~fence() {} + + void create() + { + m_value = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + flags = GL_SYNC_FLUSH_COMMANDS_BIT; + } + + void destroy() + { + glDeleteSync(m_value); + m_value = nullptr; + } + + void reset() + { + if (m_value != nullptr) + destroy(); + + create(); + } + + bool is_empty() + { + return (m_value == nullptr); + } + + bool check_signaled() + { + verify(HERE), m_value != nullptr; + + if (flags) + { + GLenum err = glClientWaitSync(m_value, flags, 0); + flags = 0; + return (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED); + } + else + { + GLint status = GL_UNSIGNALED; + GLint tmp; + + glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status); + return (status == GL_SIGNALED); + } + } + + bool wait_for_signal() + { + verify(HERE), m_value != nullptr; + + GLenum err = GL_WAIT_FAILED; + bool done = false; + + while (!done) + { + if (flags) + { + err = glClientWaitSync(m_value, flags, 0); + flags = 0; + + switch (err) + { + default: + LOG_ERROR(RSX, "gl::fence sync returned unknown error 0x%X", err); + case GL_ALREADY_SIGNALED: + case GL_CONDITION_SATISFIED: + done = true; + break; + case GL_TIMEOUT_EXPIRED: + continue; + } + } + else + { + GLint status = GL_UNSIGNALED; + GLint tmp; + + glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status); + + if (status == GL_SIGNALED) + break; + } + } + + glDeleteSync(m_value); + m_value = nullptr; + + return (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED); + } + }; + template class save_binding_state_base { @@ -594,33 +694,7 @@ namespace gl u32 m_limit = 0; void *m_memory_mapping = nullptr; - GLsync m_fence = nullptr; - - void wait_for_sync() - { - verify(HERE), m_fence != nullptr; - - bool done = false; - while (!done) - { - //Check if we are finished, wait time = 1us - GLenum err = glClientWaitSync(m_fence, GL_SYNC_FLUSH_COMMANDS_BIT, 1000); - switch (err) - { - default: - LOG_ERROR(RSX, "err Returned 0x%X", err); - case GL_ALREADY_SIGNALED: - case GL_CONDITION_SATISFIED: - done = true; - break; - case GL_TIMEOUT_EXPIRED: - continue; - } - } - - glDeleteSync(m_fence); - m_fence = nullptr; - } + fence m_fence; public: @@ -628,7 +702,7 @@ namespace gl { if (m_id) { - wait_for_sync(); + m_fence.wait_for_signal(); remove(); } @@ -656,17 +730,15 @@ namespace gl if ((offset + alloc_size) > m_limit) { - //TODO: Measure the stall here - wait_for_sync(); + if (!m_fence.is_empty()) + m_fence.wait_for_signal(); + m_data_loc = 0; offset = 0; } if (!m_data_loc) - { - verify(HERE), m_fence == nullptr; - m_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - } + m_fence.reset(); //Align data loc to 256; allows some "guard" region so we dont trample our own data inadvertently m_data_loc = align(offset + alloc_size, 256); @@ -697,6 +769,13 @@ namespace gl { glBindBufferRange((GLenum)current_target(), index, id(), offset, size); } + + //Notification of a draw command + virtual void notify() + { + if (m_fence.is_empty()) + m_fence.reset(); + } }; class legacy_ring_buffer : public ring_buffer @@ -790,6 +869,8 @@ namespace gl m_mapped_bytes = 0; m_mapping_offset = 0; } + + void notify() override {} }; class vao @@ -1019,7 +1100,16 @@ namespace gl compressed_rgb_s3tc_dxt1 = GL_COMPRESSED_RGB_S3TC_DXT1_EXT, compressed_rgba_s3tc_dxt1 = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, compressed_rgba_s3tc_dxt3 = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, - compressed_rgba_s3tc_dxt5 = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT + compressed_rgba_s3tc_dxt5 = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, + + //Sized internal formats, see opengl spec document on glTexImage2D, table 3 + rgba8 = GL_RGBA8, + r5g6b5 = GL_RGB565, + r8 = GL_R8, + rg8 = GL_RG8, + r32f = GL_R32F, + rgba16f = GL_RGBA16F, + rgba32f = GL_RGBA32F }; enum class wrap diff --git a/rpcs3/Emu/RSX/GL/GLProcTable.h b/rpcs3/Emu/RSX/GL/GLProcTable.h index 403ed392d8..3c55e3fca9 100644 --- a/rpcs3/Emu/RSX/GL/GLProcTable.h +++ b/rpcs3/Emu/RSX/GL/GLProcTable.h @@ -170,6 +170,8 @@ OPENGL_PROC(PFNGLBINDBUFFERBASEPROC, BindBufferBase); OPENGL_PROC(PFNGLMULTIDRAWARRAYSPROC, MultiDrawArrays); +OPENGL_PROC(PFNGLGETTEXTUREIMAGEEXTPROC, GetTextureImageEXT); + //Texture Buffers OPENGL_PROC(PFNGLTEXBUFFERPROC, TexBuffer); OPENGL_PROC(PFNGLTEXTUREBUFFERRANGEEXTPROC, TextureBufferRangeEXT); @@ -183,6 +185,7 @@ OPENGL_PROC(PFNGLBUFFERSTORAGEPROC, BufferStorage); //ARB_sync OPENGL_PROC(PFNGLFENCESYNCPROC, FenceSync); OPENGL_PROC(PFNGLCLIENTWAITSYNCPROC, ClientWaitSync); +OPENGL_PROC(PFNGLGETSYNCIVPROC, GetSynciv); OPENGL_PROC(PFNGLDELETESYNCPROC, DeleteSync); //KHR_debug diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index ae3925a8b9..f2a1591191 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -44,9 +44,10 @@ color_format rsx::internals::surface_color_format_to_gl(rsx::surface_color_forma case rsx::surface_color_format::x32: return{ ::gl::texture::type::f32, ::gl::texture::format::red, false, 1, 4 }; - case rsx::surface_color_format::a8b8g8r8: default: LOG_ERROR(RSX, "Surface color buffer: Unsupported surface color format (0x%x)", (u32)color_format); + + case rsx::surface_color_format::a8b8g8r8: return{ ::gl::texture::type::uint_8_8_8_8, ::gl::texture::format::bgra, false, 4, 1 }; } } @@ -75,92 +76,46 @@ u8 rsx::internals::get_pixel_size(rsx::surface_depth_format format) fmt::throw_exception("Unknown depth format" HERE); } - -void GLGSRender::init_buffers(bool skip_reading) +::gl::texture::internal_format rsx::internals::sized_internal_format(rsx::surface_color_format color_format) { - u16 clip_horizontal = rsx::method_registers.surface_clip_width(); - u16 clip_vertical = rsx::method_registers.surface_clip_height(); - - set_viewport(); - - if (draw_fbo && !m_rtts_dirty) + switch (color_format) { - return; + case rsx::surface_color_format::r5g6b5: + return ::gl::texture::internal_format::r5g6b5; + + case rsx::surface_color_format::a8r8g8b8: + return ::gl::texture::internal_format::rgba8; + + case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: + case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: + case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: + case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: + case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: + case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: + return ::gl::texture::internal_format::rgba8; + + case rsx::surface_color_format::w16z16y16x16: + return ::gl::texture::internal_format::rgba16f; + + case rsx::surface_color_format::w32z32y32x32: + return ::gl::texture::internal_format::rgba32f; + + case rsx::surface_color_format::b8: + return ::gl::texture::internal_format::r8; + + case rsx::surface_color_format::g8b8: + return ::gl::texture::internal_format::rg8; + + case rsx::surface_color_format::x32: + return ::gl::texture::internal_format::r32f; + + case rsx::surface_color_format::a8b8g8r8: + return ::gl::texture::internal_format::rgba8; + + default: + LOG_ERROR(RSX, "Surface color buffer: Unsupported surface color format (0x%x)", (u32)color_format); + return ::gl::texture::internal_format::rgba8; } - - m_rtts_dirty = false; - - if (0) - { - LOG_NOTICE(RSX, "render to -> 0x%x", get_color_surface_addresses()[0]); - } - - m_rtts.prepare_render_target(nullptr, rsx::method_registers.surface_color(), rsx::method_registers.surface_depth_fmt(), clip_horizontal, clip_vertical, - rsx::method_registers.surface_color_target(), - get_color_surface_addresses(), get_zeta_surface_address()); - - draw_fbo.recreate(); - - for (int i = 0; i < rsx::limits::color_buffers_count; ++i) - { - if (std::get<0>(m_rtts.m_bound_render_targets[i])) - { - __glcheck draw_fbo.color[i] = *std::get<1>(m_rtts.m_bound_render_targets[i]); - } - } - - if (std::get<0>(m_rtts.m_bound_depth_stencil)) - { - __glcheck draw_fbo.depth = *std::get<1>(m_rtts.m_bound_depth_stencil); - } - - if (!draw_fbo.check()) - return; - - //HACK: read_buffer shouldn't be there - switch (rsx::method_registers.surface_color_target()) - { - case rsx::surface_target::none: break; - - case rsx::surface_target::surface_a: - __glcheck draw_fbo.draw_buffer(draw_fbo.color[0]); - __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); - break; - - case rsx::surface_target::surface_b: - __glcheck draw_fbo.draw_buffer(draw_fbo.color[1]); - __glcheck draw_fbo.read_buffer(draw_fbo.color[1]); - break; - - case rsx::surface_target::surfaces_a_b: - __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1] }); - __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); - break; - - case rsx::surface_target::surfaces_a_b_c: - __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2] }); - __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); - break; - - case rsx::surface_target::surfaces_a_b_c_d: - __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2], draw_fbo.color[3] }); - __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); - break; - } -} - -std::array, 4> GLGSRender::copy_render_targets_to_memory() -{ - int clip_w = rsx::method_registers.surface_clip_width(); - int clip_h = rsx::method_registers.surface_clip_height(); - return m_rtts.get_render_targets_data(rsx::method_registers.surface_color(), clip_w, clip_h); -} - -std::array, 2> GLGSRender::copy_depth_stencil_buffer_to_memory() -{ - int clip_w = rsx::method_registers.surface_clip_width(); - int clip_h = rsx::method_registers.surface_clip_height(); - return m_rtts.get_depth_stencil_data(rsx::method_registers.surface_depth_fmt(), clip_w, clip_h); } namespace @@ -196,6 +151,145 @@ namespace } } +void GLGSRender::init_buffers(bool skip_reading) +{ + if (draw_fbo && !m_rtts_dirty) + { + set_viewport(); + return; + } + + //We are about to change buffers, flush any pending requests for the old buffers + //LOG_WARNING(RSX, "Render targets have changed; checking for sync points (EID=%d)", m_draw_calls); + synchronize_buffers(); + + m_rtts_dirty = false; + + const u16 clip_horizontal = rsx::method_registers.surface_clip_width(); + const u16 clip_vertical = rsx::method_registers.surface_clip_height(); + + const auto pitchs = get_pitchs(); + const auto surface_format = rsx::method_registers.surface_color(); + const auto depth_format = rsx::method_registers.surface_depth_fmt(); + + const auto surface_addresses = get_color_surface_addresses(); + const auto depth_address = get_zeta_surface_address(); + + m_rtts.prepare_render_target(nullptr, surface_format, depth_format, clip_horizontal, clip_vertical, + rsx::method_registers.surface_color_target(), + surface_addresses, depth_address); + + draw_fbo.recreate(); + + for (int i = 0; i < rsx::limits::color_buffers_count; ++i) + { + if (std::get<0>(m_rtts.m_bound_render_targets[i])) + { + __glcheck draw_fbo.color[i] = *std::get<1>(m_rtts.m_bound_render_targets[i]); + + std::get<1>(m_rtts.m_bound_render_targets[i])->set_rsx_pitch(pitchs[i]); + surface_info[i] = { surface_addresses[i], pitchs[i], false, surface_format, depth_format, clip_horizontal, clip_vertical }; + } + else + surface_info[i] = {}; + } + + if (std::get<0>(m_rtts.m_bound_depth_stencil)) + { + __glcheck draw_fbo.depth = *std::get<1>(m_rtts.m_bound_depth_stencil); + + std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(rsx::method_registers.surface_z_pitch()); + depth_surface_info = { depth_address, rsx::method_registers.surface_z_pitch(), true, surface_format, depth_format, clip_horizontal, clip_vertical }; + } + else + depth_surface_info = {}; + + if (!draw_fbo.check()) + return; + + draw_fbo.bind(); + set_viewport(); + + switch (rsx::method_registers.surface_color_target()) + { + case rsx::surface_target::none: break; + + case rsx::surface_target::surface_a: + __glcheck draw_fbo.draw_buffer(draw_fbo.color[0]); + __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); + break; + + case rsx::surface_target::surface_b: + __glcheck draw_fbo.draw_buffer(draw_fbo.color[1]); + __glcheck draw_fbo.read_buffer(draw_fbo.color[1]); + break; + + case rsx::surface_target::surfaces_a_b: + __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1] }); + __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); + break; + + case rsx::surface_target::surfaces_a_b_c: + __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2] }); + __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); + break; + + case rsx::surface_target::surfaces_a_b_c_d: + __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2], draw_fbo.color[3] }); + __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); + break; + } + + //Mark buffer regions as NO_ACCESS on Cell visible side + if (g_cfg_rsx_write_color_buffers) + { + auto color_format = rsx::internals::surface_color_format_to_gl(surface_format); + + for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i) + { + if (!surface_info[i].address || pitchs[i] <= 64) continue; + + const u32 range = surface_info[i].pitch * surface_info[i].height; + m_gl_texture_cache.lock_rtt_region(surface_info[i].address, range, surface_info[i].width, surface_info[i].height, surface_info[i].pitch, + color_format.format, color_format.type, *std::get<1>(m_rtts.m_bound_render_targets[i])); + } + } + + if (g_cfg_rsx_write_depth_buffer) + { + if (depth_surface_info.address && rsx::method_registers.surface_z_pitch() > 64) + { + auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(depth_format); + + u32 pitch = depth_surface_info.width * 2; + if (depth_surface_info.depth_format != rsx::surface_depth_format::z16) pitch *= 2; + + const u32 range = pitch * depth_surface_info.height; + + //TODO: Verify that depth surface pitch variance affects results + if (pitch != depth_surface_info.pitch) + LOG_WARNING(RSX, "Depth surface pitch does not match computed pitch, %d vs %d", depth_surface_info.pitch, pitch); + + m_gl_texture_cache.lock_rtt_region(depth_surface_info.address, range, depth_surface_info.width, depth_surface_info.height, pitch, + depth_format_gl.format, depth_format_gl.type, *std::get<1>(m_rtts.m_bound_depth_stencil)); + } + } +} + +std::array, 4> GLGSRender::copy_render_targets_to_memory() +{ + int clip_w = rsx::method_registers.surface_clip_width(); + int clip_h = rsx::method_registers.surface_clip_height(); + return m_rtts.get_render_targets_data(rsx::method_registers.surface_color(), clip_w, clip_h); +} + +std::array, 2> GLGSRender::copy_depth_stencil_buffer_to_memory() +{ + int clip_w = rsx::method_registers.surface_clip_width(); + int clip_h = rsx::method_registers.surface_clip_height(); + return m_rtts.get_depth_stencil_data(rsx::method_registers.surface_depth_fmt(), clip_w, clip_h); +} + void GLGSRender::read_buffers() { if (!draw_fbo) @@ -228,7 +322,7 @@ void GLGSRender::read_buffers() rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf); u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0)); - bool success = m_gl_texture_cache.explicit_writeback((*std::get<1>(m_rtts.m_bound_render_targets[i])), texaddr, pitch); + bool success = m_gl_texture_cache.load_rtt((*std::get<1>(m_rtts.m_bound_render_targets[i])), texaddr, pitch); //Fall back to slower methods if the image could not be fetched from cache. if (!success) @@ -240,7 +334,7 @@ void GLGSRender::read_buffers() else { u32 range = pitch * height; - m_gl_texture_cache.remove_in_range(texaddr, range); + m_gl_texture_cache.invalidate_range(texaddr, range); std::unique_ptr buffer(new u8[pitch * height]); color_buffer.read(buffer.get(), width, height, pitch); @@ -287,7 +381,7 @@ void GLGSRender::read_buffers() return; u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma()); - bool in_cache = m_gl_texture_cache.explicit_writeback((*std::get<1>(m_rtts.m_bound_depth_stencil)), depth_address, pitch); + bool in_cache = m_gl_texture_cache.load_rtt((*std::get<1>(m_rtts.m_bound_depth_stencil)), depth_address, pitch); if (in_cache) return; @@ -332,85 +426,36 @@ void GLGSRender::write_buffers() if (!draw_fbo) return; - //TODO: Detect when the data is actually being used by cell and issue download command on-demand (mark as not present?) - //Should also mark cached resources as dirty so that read buffers works out-of-the-box without modification - if (g_cfg_rsx_write_color_buffers) { - auto color_format = rsx::internals::surface_color_format_to_gl(rsx::method_registers.surface_color()); - auto write_color_buffers = [&](int index, int count) { - u32 width = rsx::method_registers.surface_clip_width(); - u32 height = rsx::method_registers.surface_clip_height(); - - std::array offsets = get_offsets(); - const std::array locations = get_locations(); - const std::array pitchs = get_pitchs(); - for (int i = index; i < index + count; ++i) { - u32 offset = offsets[i]; - u32 location = locations[i]; - u32 pitch = pitchs[i]; - - if (pitch <= 64) + if (surface_info[i].address == 0 || surface_info[i].pitch <= 64) continue; - rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf); - u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0)); - u32 range = pitch * height; - /**Even tiles are loaded as whole textures during read_buffers from testing. * Need further evaluation to determine correct behavior. Separate paths for both show no difference, * but using the GPU to perform the caching is many times faster. */ - __glcheck m_gl_texture_cache.save_render_target(texaddr, range, (*std::get<1>(m_rtts.m_bound_render_targets[i]))); + const u32 range = surface_info[i].pitch * surface_info[i].height; + __glcheck m_gl_texture_cache.save_rtt(surface_info[i].address, range); } }; - switch (rsx::method_registers.surface_color_target()) - { - case rsx::surface_target::none: - break; - - case rsx::surface_target::surface_a: - write_color_buffers(0, 1); - break; - - case rsx::surface_target::surface_b: - write_color_buffers(1, 1); - break; - - case rsx::surface_target::surfaces_a_b: - write_color_buffers(0, 2); - break; - - case rsx::surface_target::surfaces_a_b_c: - write_color_buffers(0, 3); - break; - - case rsx::surface_target::surfaces_a_b_c_d: - write_color_buffers(0, 4); - break; - } + write_color_buffers(0, 4); } if (g_cfg_rsx_write_depth_buffer) { //TODO: use pitch - u32 pitch = rsx::method_registers.surface_z_pitch(); + if (!depth_surface_info.address || depth_surface_info.pitch <= 64) return; - if (pitch <= 64) - return; + u32 range = depth_surface_info.width * depth_surface_info.height * 2; + if (depth_surface_info.depth_format != rsx::surface_depth_format::z16) range *= 2; - auto depth_format = rsx::internals::surface_depth_format_to_gl(rsx::method_registers.surface_depth_fmt()); - u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma()); - u32 range = std::get<1>(m_rtts.m_bound_depth_stencil)->width() * std::get<1>(m_rtts.m_bound_depth_stencil)->height() * 2; - - if (rsx::method_registers.surface_depth_fmt() != rsx::surface_depth_format::z16) range *= 2; - - m_gl_texture_cache.save_render_target(depth_address, range, (*std::get<1>(m_rtts.m_bound_depth_stencil))); + m_gl_texture_cache.save_rtt(depth_surface_info.address, range); } } \ No newline at end of file diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index d67e5042c6..04282ae6a6 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -4,40 +4,6 @@ #include "stdafx.h" #include "../RSXThread.h" -namespace gl -{ - class render_target : public texture - { - bool is_cleared = false; - u16 native_pitch = 0; - - public: - - render_target() {} - - void set_cleared() - { - is_cleared = true; - } - - bool cleared() const - { - return is_cleared; - } - - // Internal pitch is the actual row length in bytes of the openGL texture - void set_native_pitch(u16 pitch) - { - native_pitch = pitch; - } - - u16 get_native_pitch() const - { - return native_pitch; - } - }; -} - struct color_swizzle { gl::texture::channel a = gl::texture::channel::a; @@ -73,12 +39,111 @@ namespace rsx { namespace internals { + ::gl::texture::internal_format sized_internal_format(rsx::surface_color_format color_format); color_format surface_color_format_to_gl(rsx::surface_color_format color_format); depth_format surface_depth_format_to_gl(rsx::surface_depth_format depth_format); u8 get_pixel_size(rsx::surface_depth_format format); } } +namespace gl +{ + class render_target : public texture + { + bool is_cleared = false; + + u32 rsx_pitch = 0; + u16 native_pitch = 0; + + u16 surface_height = 0; + u16 surface_width = 0; + u16 surface_pixel_size = 0; + + texture::internal_format compatible_internal_format = texture::internal_format::rgba8; + + public: + + render_target() {} + + void set_cleared() + { + is_cleared = true; + } + + bool cleared() const + { + return is_cleared; + } + + // Internal pitch is the actual row length in bytes of the openGL texture + void set_native_pitch(u16 pitch) + { + native_pitch = pitch; + } + + u16 get_native_pitch() const + { + return native_pitch; + } + + // Rsx pitch + void set_rsx_pitch(u16 pitch) + { + rsx_pitch = pitch; + } + + u16 get_rsx_pitch() const + { + return rsx_pitch; + } + + std::pair get_dimensions() + { + if (!surface_height) surface_height = height(); + if (!surface_width) surface_width = width(); + + return std::make_pair(surface_width, surface_height); + } + + void set_compatible_format(texture::internal_format format) + { + compatible_internal_format = format; + } + + texture::internal_format get_compatible_internal_format() + { + return compatible_internal_format; + } + + // For an address within the texture, extract this sub-section's rect origin + std::tuple get_texture_subresource(u32 offset) + { + if (!offset) + { + return std::make_tuple(true, 0, 0); + } + + if (!surface_height) surface_height = height(); + if (!surface_width) surface_width = width(); + + u32 range = rsx_pitch * surface_height; + if (offset < range) + { + if (!surface_pixel_size) + surface_pixel_size = native_pitch / surface_width; + + u32 pixel_offset = (offset / surface_pixel_size); + u32 y = (pixel_offset / surface_width); + u32 x = (pixel_offset % surface_width); + + return std::make_tuple(true, (u16)x, (u16)y); + } + else + return std::make_tuple(false, 0, 0); + } + }; +} + struct gl_render_target_traits { using surface_storage_type = std::unique_ptr; @@ -97,13 +162,17 @@ struct gl_render_target_traits std::unique_ptr result(new gl::render_target()); auto format = rsx::internals::surface_color_format_to_gl(surface_color_format); + auto internal_fmt = rsx::internals::sized_internal_format(surface_color_format); + result->recreate(gl::texture::target::texture2D); result->set_native_pitch(width * format.channel_count * format.channel_size); + result->set_compatible_format(internal_fmt); __glcheck result->config() .size({ (int)width, (int)height }) .type(format.type) .format(format.format) + .internal_format(internal_fmt) .swizzle(format.swizzle.r, format.swizzle.g, format.swizzle.b, format.swizzle.a) .wrap(gl::texture::wrap::clamp_to_border, gl::texture::wrap::clamp_to_border, gl::texture::wrap::clamp_to_border) .apply(); @@ -144,6 +213,7 @@ struct gl_render_target_traits native_pitch *= 2; result->set_native_pitch(native_pitch); + result->set_compatible_format(format.internal_format); return result; } @@ -210,7 +280,138 @@ struct gl_render_target_traits } }; - -struct gl_render_targets : public rsx::surface_store +struct surface_subresource { + gl::render_target *surface = nullptr; + + u16 x = 0; + u16 y = 0; + u16 w = 0; + u16 h = 0; + + bool is_bound = false; + bool is_depth_surface = false; + + surface_subresource() {} + + surface_subresource(gl::render_target *src, u16 X, u16 Y, u16 W, u16 H, bool _Bound, bool _Depth) + : surface(src), x(X), y(Y), w(W), h(H), is_bound(_Bound), is_depth_surface(_Depth) + {} +}; + +class gl_render_targets : public rsx::surface_store +{ +private: + bool surface_overlaps(gl::render_target *surface, u32 surface_address, u32 texaddr, u16 *x, u16 *y) + { + bool is_subslice = false; + u16 x_offset = 0; + u16 y_offset = 0; + + if (surface_address > texaddr) + return false; + + u32 offset = texaddr - surface_address; + if (offset >= 0) + { + std::tie(is_subslice, x_offset, y_offset) = surface->get_texture_subresource(offset); + if (is_subslice) + { + *x = x_offset; + *y = y_offset; + + return true; + } + } + + return false; + } + + bool is_bound(u32 address, bool is_depth) + { + if (is_depth) + { + const u32 bound_depth_address = std::get<0>(m_bound_depth_stencil); + return (bound_depth_address == address); + } + + for (auto &surface: m_bound_render_targets) + { + const u32 bound_address = std::get<0>(surface); + if (bound_address == address) + return true; + } + + return false; + } + + bool fits(gl::render_target *src, std::pair &dims, u16 x_offset, u16 y_offset, u16 width, u16 height) const + { + if ((x_offset + width) > dims.first) return false; + if ((y_offset + height) > dims.second) return false; + + return true; + } + +public: + surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch) + { + gl::render_target *surface = nullptr; + bool is_subslice = false; + u16 x_offset = 0; + u16 y_offset = 0; + + for (auto &tex_info : m_render_targets_storage) + { + u32 this_address = std::get<0>(tex_info); + surface = std::get<1>(tex_info).get(); + + if (surface_overlaps(surface, this_address, texaddr, &x_offset, &y_offset)) + { + if (surface->get_rsx_pitch() != requested_pitch) + continue; + + auto dims = surface->get_dimensions(); + + if (fits(surface, dims, x_offset, y_offset, requested_width, requested_height)) + return{ surface, x_offset, y_offset, requested_width, requested_height, is_bound(this_address, false), false }; + else + { + if (dims.first >= requested_width && dims.second >= requested_height) + { + LOG_WARNING(RSX, "Overlapping surface exceeds bounds; returning full surface region"); + return{ surface, 0, 0, requested_width, requested_height, is_bound(this_address, false), false }; + } + } + } + } + + //Check depth surfaces for overlap + for (auto &tex_info : m_depth_stencil_storage) + { + u32 this_address = std::get<0>(tex_info); + surface = std::get<1>(tex_info).get(); + + if (surface_overlaps(surface, this_address, texaddr, &x_offset, &y_offset)) + { + if (surface->get_rsx_pitch() != requested_pitch) + continue; + + auto dims = surface->get_dimensions(); + + if (fits(surface, dims, x_offset, y_offset, requested_width, requested_height)) + return{ surface, x_offset, y_offset, requested_width, requested_height, is_bound(this_address, true), true }; + else + { + if (dims.first >= requested_width && dims.second >= requested_height) + { + LOG_WARNING(RSX, "Overlapping depth surface exceeds bounds; returning full surface region"); + return{ surface, 0, 0, requested_width, requested_height, is_bound(this_address, true), true }; + } + } + } + } + + return {}; + } }; diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index f64644d5b1..5b5e953ad3 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -7,7 +7,7 @@ #include "../rsx_utils.h" #include "../Common/TextureUtils.h" -namespace +namespace gl { GLenum get_sized_internal_format(u32 texture_format) { @@ -40,7 +40,6 @@ namespace fmt::throw_exception("Compressed or unknown texture format 0x%x" HERE, texture_format); } - std::tuple get_format_type(u32 texture_format) { switch (texture_format) @@ -68,7 +67,10 @@ namespace } fmt::throw_exception("Compressed or unknown texture format 0x%x" HERE, texture_format); } +} +namespace +{ bool is_compressed_format(u32 texture_format) { switch (texture_format) @@ -319,10 +321,10 @@ namespace rsx int mip_level = 0; if (dim == rsx::texture_dimension_extended::texture_dimension_1d) { - __glcheck glTexStorage1D(GL_TEXTURE_1D, mipmap_count, get_sized_internal_format(format), width); + __glcheck glTexStorage1D(GL_TEXTURE_1D, mipmap_count, ::gl::get_sized_internal_format(format), width); if (!is_compressed_format(format)) { - const auto &format_type = get_format_type(format); + const auto &format_type = ::gl::get_format_type(format); for (const rsx_subresource_layout &layout : input_layouts) { __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); @@ -335,7 +337,7 @@ namespace rsx { u32 size = layout.width_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); - __glcheck glCompressedTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data()); + __glcheck glCompressedTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block * 4, ::gl::get_sized_internal_format(format), size, staging_buffer.data()); } } return; @@ -343,10 +345,10 @@ namespace rsx if (dim == rsx::texture_dimension_extended::texture_dimension_2d) { - __glcheck glTexStorage2D(GL_TEXTURE_2D, mipmap_count, get_sized_internal_format(format), width, height); + __glcheck glTexStorage2D(GL_TEXTURE_2D, mipmap_count, ::gl::get_sized_internal_format(format), width, height); if (!is_compressed_format(format)) { - const auto &format_type = get_format_type(format); + const auto &format_type = ::gl::get_format_type(format); for (const rsx_subresource_layout &layout : input_layouts) { __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); @@ -359,7 +361,7 @@ namespace rsx { u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); - __glcheck glCompressedTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data()); + __glcheck glCompressedTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, ::gl::get_sized_internal_format(format), size, staging_buffer.data()); } } return; @@ -367,13 +369,13 @@ namespace rsx if (dim == rsx::texture_dimension_extended::texture_dimension_cubemap) { - __glcheck glTexStorage2D(GL_TEXTURE_CUBE_MAP, mipmap_count, get_sized_internal_format(format), width, height); + __glcheck glTexStorage2D(GL_TEXTURE_CUBE_MAP, mipmap_count, ::gl::get_sized_internal_format(format), width, height); // Note : input_layouts size is get_exact_mipmap_count() for non cubemap texture, and 6 * get_exact_mipmap_count() for cubemap // Thus for non cubemap texture, mip_level / mipmap_per_layer will always be rounded to 0. // mip_level % mipmap_per_layer will always be equal to mip_level if (!is_compressed_format(format)) { - const auto &format_type = get_format_type(format); + const auto &format_type = ::gl::get_format_type(format); for (const rsx_subresource_layout &layout : input_layouts) { upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); @@ -387,7 +389,7 @@ namespace rsx { u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); - __glcheck glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data()); + __glcheck glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, ::gl::get_sized_internal_format(format), size, staging_buffer.data()); mip_level++; } } @@ -396,10 +398,10 @@ namespace rsx if (dim == rsx::texture_dimension_extended::texture_dimension_3d) { - __glcheck glTexStorage3D(GL_TEXTURE_3D, mipmap_count, get_sized_internal_format(format), width, height, depth); + __glcheck glTexStorage3D(GL_TEXTURE_3D, mipmap_count, ::gl::get_sized_internal_format(format), width, height, depth); if (!is_compressed_format(format)) { - const auto &format_type = get_format_type(format); + const auto &format_type = ::gl::get_format_type(format); for (const rsx_subresource_layout &layout : input_layouts) { __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); @@ -412,7 +414,7 @@ namespace rsx { u32 size = layout.width_in_block * layout.height_in_block * layout.depth * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); - __glcheck glCompressedTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, layout.depth, get_sized_internal_format(format), size, staging_buffer.data()); + __glcheck glCompressedTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, layout.depth, ::gl::get_sized_internal_format(format), size, staging_buffer.data()); } } return; diff --git a/rpcs3/Emu/RSX/GL/GLTexture.h b/rpcs3/Emu/RSX/GL/GLTexture.h index e8740f64a7..e12271fa3f 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.h +++ b/rpcs3/Emu/RSX/GL/GLTexture.h @@ -64,3 +64,9 @@ namespace rsx }; } } + +namespace gl +{ + GLenum get_sized_internal_format(u32 gcm_format); + std::tuple get_format_type(u32 texture_format); +} diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.cpp b/rpcs3/Emu/RSX/GL/GLTextureCache.cpp new file mode 100644 index 0000000000..f91088864f --- /dev/null +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.cpp @@ -0,0 +1,63 @@ +#pragma once + +#include "stdafx.h" + +#include "GLGSRender.h" +#include "GLTextureCache.h" + +namespace gl +{ + bool texture_cache::flush_section(u32 address) + { + if (address < rtt_cache_range.first || + address >= rtt_cache_range.second) + return false; + + bool post_task = false; + + { + std::lock_guard lock(m_section_mutex); + + for (cached_rtt_section &rtt : m_rtt_cache) + { + if (rtt.is_dirty()) continue; + + if (rtt.is_locked() && rtt.overlaps(address)) + { + if (rtt.is_flushed()) + { + LOG_WARNING(RSX, "Section matches range, but marked as already flushed!, 0x%X+0x%X", rtt.get_section_base(), rtt.get_section_size()); + continue; + } + + //LOG_WARNING(RSX, "Cell needs GPU data synced here, address=0x%X", address); + + if (std::this_thread::get_id() != m_renderer_thread) + { + post_task = true; + break; + } + + rtt.flush(); + return true; + } + } + } + + if (post_task) + { + //LOG_WARNING(RSX, "Cache access not from worker thread! address = 0x%X", address); + work_item &task = m_renderer->post_flush_request(address); + + { + std::unique_lock lock(task.guard_mutex); + task.cv.wait(lock, [&task] { return task.processed; }); + } + + task.received = true; + return task.result; + } + + return false; + } +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index a372494bdc..96ddcfe40e 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -7,225 +7,480 @@ #include #include #include -#include +#include +#include -#include "GLGSRender.h" #include "GLRenderTargets.h" #include "../Common/TextureUtils.h" #include +class GLGSRender; + namespace gl { - class gl_texture_cache + class texture_cache { public: - struct gl_cached_texture + class cached_texture_section : public rsx::buffered_section { - u32 gl_id; - u32 w; - u32 h; - u64 data_addr; - u32 block_sz; - u32 frame_ctr; - u32 protected_block_start; - u32 protected_block_sz; - u16 mipmap; - bool deleted; - bool locked; + u32 texture_id = 0; + u32 width = 0; + u32 height = 0; + u16 mipmaps = 0; + + public: + + void create(u32 id, u32 width, u32 height, u32 mipmaps) + { + verify(HERE), locked == false; + + texture_id = id; + this->width = width; + this->height = height; + this->mipmaps = mipmaps; + } + + bool matches(u32 rsx_address, u32 width, u32 height, u32 mipmaps) const + { + if (rsx_address == cpu_address_base && texture_id != 0) + { + if (!width && !height && !mipmaps) + return true; + + return (width == this->width && height == this->height && mipmaps == this->mipmaps); + } + + return false; + } + + void destroy() + { + if (locked) + unprotect(); + + glDeleteTextures(1, &texture_id); + texture_id = 0; + } + + bool is_empty() const + { + return (texture_id == 0); + } + + u32 id() const + { + return texture_id; + } }; - struct invalid_cache_area + class cached_rtt_section : public rsx::buffered_section { - u32 block_base; - u32 block_sz; - }; + private: + fence m_fence; + u32 pbo_id = 0; + u32 pbo_size = 0; - struct cached_rtt - { - u32 copy_glid; - u32 data_addr; - u32 block_sz; + u32 source_texture = 0; - bool is_dirty; - bool is_depth; - bool valid; + bool copied = false; + bool flushed = false; + bool is_depth = false; - u32 current_width; - u32 current_height; + u32 current_width = 0; + u32 current_height = 0; + u32 current_pitch = 0; + u32 real_pitch = 0; - bool locked; - cached_rtt() : valid(false) {} + texture::format format = texture::format::rgba; + texture::type type = texture::type::ubyte; + + u8 get_pixel_size(texture::format fmt_, texture::type type_) + { + u8 size = 1; + switch (type_) + { + case texture::type::ubyte: + case texture::type::sbyte: + break; + case texture::type::ushort: + case texture::type::sshort: + case texture::type::f16: + size = 2; + break; + case texture::type::ushort_5_6_5: + case texture::type::ushort_5_6_5_rev: + case texture::type::ushort_4_4_4_4: + case texture::type::ushort_4_4_4_4_rev: + case texture::type::ushort_5_5_5_1: + case texture::type::ushort_1_5_5_5_rev: + return 2; + case texture::type::uint_8_8_8_8: + case texture::type::uint_8_8_8_8_rev: + case texture::type::uint_10_10_10_2: + case texture::type::uint_2_10_10_10_rev: + case texture::type::uint_24_8: + return 4; + case texture::type::f32: + case texture::type::sint: + case texture::type::uint: + size = 4; + break; + } + + switch (fmt_) + { + case texture::format::red: + case texture::format::r: + break; + case texture::format::rg: + size *= 2; + break; + case texture::format::rgb: + case texture::format::bgr: + size *= 3; + break; + case texture::format::rgba: + case texture::format::bgra: + size *= 4; + break; + + //Depth formats.. + case texture::format::depth: + size = 2; + break; + case texture::format::depth_stencil: + size = 4; + break; + default: + LOG_ERROR(RSX, "Unsupported rtt format %d", (GLenum)fmt_); + size = 4; + } + + return size; + } + + void scale_image_fallback(u8* dst, const u8* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples) + { + u32 dst_offset = 0; + u32 src_offset = 0; + u32 padding = dst_pitch - (src_pitch * samples); + + for (u16 h = 0; h < src_height; ++h) + { + for (u16 w = 0; w < src_width; ++w) + { + for (u8 n = 0; n < samples; ++n) + { + memcpy(&dst[dst_offset], &src[src_offset], pixel_size); + dst_offset += pixel_size; + } + + src_offset += pixel_size; + } + + dst_offset += padding; + } + } + + template + void scale_image_impl(T* dst, const T* src, u16 src_width, u16 src_height, u16 padding) + { + u32 dst_offset = 0; + u32 src_offset = 0; + + for (u16 h = 0; h < src_height; ++h) + { + for (u16 w = 0; w < src_width; ++w) + { + for (u8 n = 0; n < N; ++n) + { + dst[dst_offset++] = src[src_offset]; + } + + //Fetch next pixel + src_offset++; + } + + //Pad this row + dst_offset += padding; + } + } + + template + void scale_image(void *dst, void *src, u8 pixel_size, u16 src_width, u16 src_height, u16 padding) + { + switch (pixel_size) + { + case 1: + scale_image_impl((u8*)dst, (u8*)src, current_width, current_height, padding); + break; + case 2: + scale_image_impl((u16*)dst, (u16*)src, current_width, current_height, padding); + break; + case 4: + scale_image_impl((u32*)dst, (u32*)src, current_width, current_height, padding); + break; + case 8: + scale_image_impl((u64*)dst, (u64*)src, current_width, current_height, padding); + break; + default: + fmt::throw_exception("unsupported rtt format 0x%X" HERE, (u32)format); + } + } + + void init_buffer() + { + if (pbo_id) + { + glDeleteBuffers(1, &pbo_id); + pbo_id = 0; + pbo_size = 0; + } + + glGenBuffers(1, &pbo_id); + + glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); + glBufferStorage(GL_PIXEL_PACK_BUFFER, locked_address_range, nullptr, GL_MAP_READ_BIT); + + pbo_size = locked_address_range; + } + + public: + + void reset(u32 base, u32 size) + { + rsx::buffered_section::reset(base, size); + init_buffer(); + + flushed = false; + copied = false; + + source_texture = 0; + } + + void set_dimensions(u32 width, u32 height, u32 pitch) + { + current_width = width; + current_height = height; + current_pitch = pitch; + + real_pitch = width * get_pixel_size(format, type); + } + + void set_format(texture::format gl_format, texture::type gl_type) + { + format = gl_format; + type = gl_type; + + real_pitch = current_width * get_pixel_size(format, type); + } + + void set_source(gl::texture &source) + { + source_texture = source.id(); + } + + void copy_texture() + { + if (!glIsTexture(source_texture)) + { + LOG_ERROR(RSX, "Attempted to download rtt texture, but texture handle was invalid! (0x%X)", source_texture); + return; + } + + glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); + glGetTextureImageEXT(source_texture, GL_TEXTURE_2D, 0, (GLenum)format, (GLenum)type, nullptr); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + + m_fence.reset(); + copied = true; + } + + void fill_texture(gl::texture &tex) + { + if (!copied) + { + //LOG_WARNING(RSX, "Request to fill texture rejected because contents were not read"); + return; + } + + u32 min_width = std::min((u32)tex.width(), current_width); + u32 min_height = std::min((u32)tex.height(), current_height); + + tex.bind(); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_id); + glTexSubImage2D((GLenum)tex.get_target(), 0, 0, 0, min_width, min_height, (GLenum)format, (GLenum)type, nullptr); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + } + + void flush() + { + if (!copied) + { + LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base); + copy_texture(); + + if (!copied) + { + LOG_WARNING(RSX, "Nothing to copy; Setting section to readable and moving on..."); + protect(vm::page_readable, 0); + return; + } + } + + protect(vm::page_writable, 0); + m_fence.wait_for_signal(); + flushed = true; + + glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); + void *data = glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, pbo_size, GL_MAP_READ_BIT); + u8 *dst = vm::ps3::_ptr(cpu_address_base); + + //throw if map failed since we'll segfault anyway + verify(HERE), data != nullptr; + + if (real_pitch >= current_pitch) + memcpy(dst, data, cpu_address_range); + else + { + //TODO: Use compression hint from the gcm tile information + //Scale this image by repeating pixel data n times + //n = expected_pitch / real_pitch + //Use of fixed argument templates for performance reasons + + const u16 pixel_size = get_pixel_size(format, type); + const u16 dst_width = current_pitch / pixel_size; + const u16 sample_count = current_pitch / real_pitch; + const u16 padding = dst_width - (current_width * sample_count); + + switch (sample_count) + { + case 2: + scale_image<2>(dst, data, pixel_size, current_width, current_height, padding); + break; + case 3: + scale_image<3>(dst, data, pixel_size, current_width, current_height, padding); + break; + case 4: + scale_image<4>(dst, data, pixel_size, current_width, current_height, padding); + break; + case 8: + scale_image<8>(dst, data, pixel_size, current_width, current_height, padding); + break; + case 16: + scale_image<16>(dst, data, pixel_size, current_width, current_height, padding); + break; + default: + LOG_ERROR(RSX, "Unsupported RTT scaling factor: dst_pitch=%d src_pitch=%d", current_pitch, real_pitch); + scale_image_fallback(dst, static_cast(data), current_width, current_height, current_pitch, real_pitch, pixel_size, sample_count); + } + } + + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + protect(vm::page_readable, vm::page_writable); + } + + void destroy() + { + if (locked) + unprotect(); + + glDeleteBuffers(1, &pbo_id); + pbo_id = 0; + pbo_size = 0; + + m_fence.destroy(); + } + + bool is_flushed() const + { + return flushed; + } + + void set_flushed(bool state) + { + flushed = state; + } + + void set_copied(bool state) + { + copied = state; + } }; private: - std::vector texture_cache; - std::vector rtt_cache; - u32 frame_ctr; - std::pair texture_cache_range = std::make_pair(0xFFFFFFFF, 0); - u32 max_tex_address = 0; + std::vector m_texture_cache; + std::vector m_rtt_cache; + std::vector m_temporary_surfaces; - bool lock_memory_region(u32 start, u32 size) + std::pair texture_cache_range = std::make_pair(0xFFFFFFFF, 0); + std::pair rtt_cache_range = std::make_pair(0xFFFFFFFF, 0); + + std::mutex m_section_mutex; + + GLGSRender *m_renderer; + std::thread::id m_renderer_thread; + + cached_texture_section *find_texture(u64 texaddr, u32 w, u32 h, u16 mipmaps) { - static const u32 memory_page_size = 4096; - start = start & ~(memory_page_size - 1); - size = (u32)align(size, memory_page_size); - - if (start < texture_cache_range.first) - texture_cache_range = std::make_pair(start, texture_cache_range.second); - - if ((start+size) > texture_cache_range.second) - texture_cache_range = std::make_pair(texture_cache_range.first, (start+size)); - - return vm::page_protect(start, size, 0, 0, vm::page_writable); - } - - bool unlock_memory_region(u32 start, u32 size) - { - static const u32 memory_page_size = 4096; - start = start & ~(memory_page_size - 1); - size = (u32)align(size, memory_page_size); - - return vm::page_protect(start, size, 0, vm::page_writable, 0); - } - - void lock_gl_object(gl_cached_texture &obj) - { - static const u32 memory_page_size = 4096; - obj.protected_block_start = obj.data_addr & ~(memory_page_size - 1); - obj.protected_block_sz = (u32)align(obj.block_sz, memory_page_size); - - if (!lock_memory_region(obj.protected_block_start, obj.protected_block_sz)) - LOG_ERROR(RSX, "lock_gl_object failed!"); - else - obj.locked = true; - } - - void unlock_gl_object(gl_cached_texture &obj) - { - if (!unlock_memory_region(obj.protected_block_start, obj.protected_block_sz)) - LOG_ERROR(RSX, "unlock_gl_object failed! Will probably crash soon..."); - else - obj.locked = false; - } - - gl_cached_texture *find_obj_for_params(u64 texaddr, u32 w, u32 h, u16 mipmap) - { - for (gl_cached_texture &tex: texture_cache) + for (cached_texture_section &tex : m_texture_cache) { - if (tex.gl_id && tex.data_addr == texaddr) - { - if (w && h && mipmap && (tex.h != h || tex.w != w || tex.mipmap != mipmap)) - { - continue; - } - - tex.frame_ctr = frame_ctr; + if (tex.matches(texaddr, w, h, mipmaps) && !tex.is_dirty()) return &tex; - } } return nullptr; } - gl_cached_texture& create_obj_for_params(u32 gl_id, u64 texaddr, u32 w, u32 h, u16 mipmap) + cached_texture_section& create_texture(u32 id, u32 texaddr, u32 texsize, u32 w, u32 h, u16 mipmap) { - gl_cached_texture obj = { 0 }; - - obj.gl_id = gl_id; - obj.data_addr = texaddr; - obj.w = w; - obj.h = h; - obj.mipmap = mipmap; - obj.deleted = false; - obj.locked = false; - - for (gl_cached_texture &tex : texture_cache) + for (cached_texture_section &tex : m_texture_cache) { - if (tex.gl_id == 0 || (tex.deleted && (frame_ctr - tex.frame_ctr) > 32768)) + if (tex.is_dirty()) { - if (tex.gl_id) - { - LOG_NOTICE(RSX, "Reclaiming GL texture %d, cache_size=%d, master_ctr=%d, ctr=%d", tex.gl_id, texture_cache.size(), frame_ctr, tex.frame_ctr); - __glcheck glDeleteTextures(1, &tex.gl_id); - unlock_gl_object(tex); - tex.gl_id = 0; - } - - tex = obj; + tex.destroy(); + tex.reset(texaddr, texsize); + tex.create(id, w, h, mipmap); + + texture_cache_range = tex.get_min_max(texture_cache_range); return tex; } } - texture_cache.push_back(obj); - return texture_cache[texture_cache.size()-1]; + cached_texture_section tex; + tex.reset(texaddr, texsize); + tex.create(id, w, h, mipmap); + texture_cache_range = tex.get_min_max(texture_cache_range); + + m_texture_cache.push_back(tex); + return m_texture_cache.back(); } - void remove_obj(gl_cached_texture &tex) + void clear() { - if (tex.locked) - unlock_gl_object(tex); - - tex.deleted = true; - } - - void remove_obj_for_glid(u32 gl_id) - { - for (gl_cached_texture &tex : texture_cache) + for (cached_texture_section &tex : m_texture_cache) { - if (tex.gl_id == gl_id) - remove_obj(tex); - } - } - - void clear_obj_cache() - { - for (gl_cached_texture &tex : texture_cache) - { - if (tex.locked) - unlock_gl_object(tex); - - if (tex.gl_id) - { - LOG_NOTICE(RSX, "Deleting texture %d", tex.gl_id); - glDeleteTextures(1, &tex.gl_id); - } - - tex.deleted = true; - tex.gl_id = 0; + tex.destroy(); } - texture_cache.resize(0); - destroy_rtt_cache(); - } - - bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2) - { - //Check for memory area overlap. unlock page(s) if needed and add this index to array. - //Axis separation test - const u32 &block_start = base1; - const u32 block_end = limit1; - - if (limit2 < block_start) return false; - if (base2 > block_end) return false; - - u32 min_separation = (limit2 - base2) + (limit1 - base1); - u32 range_limit = (block_end > limit2) ? block_end : limit2; - u32 range_base = (block_start < base2) ? block_start : base2; - - u32 actual_separation = (range_limit - range_base); - - if (actual_separation < min_separation) - return true; - - return false; - } - - cached_rtt* find_cached_rtt(u32 base, u32 size) - { - for (cached_rtt &rtt : rtt_cache) + for (cached_rtt_section &rtt : m_rtt_cache) { - if (region_overlaps(base, base+size, rtt.data_addr, rtt.data_addr+rtt.block_sz)) + rtt.destroy(); + } + + m_rtt_cache.resize(0); + m_texture_cache.resize(0); + + clear_temporary_surfaces(); + } + + cached_rtt_section* find_cached_rtt_section(u32 base, u32 size) + { + for (cached_rtt_section &rtt : m_rtt_cache) + { + if (rtt.matches(base, size)) { return &rtt; } @@ -234,194 +489,96 @@ namespace gl return nullptr; } - void invalidate_rtts_in_range(u32 base, u32 size) + cached_rtt_section *create_locked_view_of_section(u32 base, u32 size) { - for (cached_rtt &rtt : rtt_cache) - { - if (!rtt.data_addr || rtt.is_dirty) continue; - - u32 rtt_aligned_base = ((u32)(rtt.data_addr)) & ~(4096 - 1); - u32 rtt_block_sz = align(rtt.block_sz, 4096); - - if (region_overlaps(rtt_aligned_base, (rtt_aligned_base + rtt_block_sz), base, base+size)) - { - rtt.is_dirty = true; - if (rtt.locked) - { - rtt.locked = false; - unlock_memory_region((u32)rtt.data_addr, rtt.block_sz); - } - } - } - } - - void prep_rtt(cached_rtt &rtt, u32 width, u32 height, u32 gl_pixel_format_internal) - { - int binding = 0; - bool is_depth = false; - - if (gl_pixel_format_internal == GL_DEPTH24_STENCIL8 || - gl_pixel_format_internal == GL_DEPTH_COMPONENT24 || - gl_pixel_format_internal == GL_DEPTH_COMPONENT16 || - gl_pixel_format_internal == GL_DEPTH_COMPONENT32) - { - is_depth = true; - } - - glGetIntegerv(GL_TEXTURE_2D_BINDING_EXT, &binding); - glBindTexture(GL_TEXTURE_2D, rtt.copy_glid); - - rtt.current_width = width; - rtt.current_height = height; - - if (!is_depth) - { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - - __glcheck glTexImage2D(GL_TEXTURE_2D, 0, gl_pixel_format_internal, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); - } - else - { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - - u32 ex_format = GL_UNSIGNED_SHORT; - u32 in_format = GL_DEPTH_COMPONENT16; - - switch (gl_pixel_format_internal) - { - case GL_DEPTH24_STENCIL8: - { - ex_format = GL_UNSIGNED_INT_24_8; - in_format = GL_DEPTH_STENCIL; - break; - } - case GL_DEPTH_COMPONENT16: - break; - default: - fmt::throw_exception("Unsupported depth format!" HERE); - } - - __glcheck glTexImage2D(GL_TEXTURE_2D, 0, gl_pixel_format_internal, width, height, 0, in_format, ex_format, nullptr); - } - - glBindTexture(GL_TEXTURE_2D, binding); - rtt.is_depth = is_depth; - } - - void save_rtt(u32 base, u32 size, u32 width, u32 height, u32 gl_pixel_format_internal, gl::texture &source) - { - cached_rtt *region = find_cached_rtt(base, size); + cached_rtt_section *region = find_cached_rtt_section(base, size); if (!region) { - for (cached_rtt &rtt : rtt_cache) + for (cached_rtt_section &rtt : m_rtt_cache) { - if (rtt.valid && rtt.data_addr == 0) + if (rtt.is_dirty()) { - prep_rtt(rtt, width, height, gl_pixel_format_internal); - - rtt.block_sz = size; - rtt.data_addr = base; - rtt.is_dirty = true; - - lock_memory_region((u32)rtt.data_addr, rtt.block_sz); - rtt.locked = true; - + rtt.reset(base, size); + rtt.protect(0, vm::page_readable | vm::page_writable); region = &rtt; break; } } - if (!region) fmt::throw_exception("No region created!!" HERE); - } - - if (width != region->current_width || - height != region->current_height) - { - prep_rtt(*region, width, height, gl_pixel_format_internal); - - if (region->locked && region->block_sz != size) + if (!region) { - unlock_memory_region((u32)region->data_addr, region->block_sz); + cached_rtt_section section; + section.reset(base, size); + section.set_dirty(true); + section.protect(0, vm::page_readable | vm::page_writable); - region->block_sz = size; - lock_memory_region((u32)region->data_addr, region->block_sz); - region->locked = true; + m_rtt_cache.push_back(section); + region = &m_rtt_cache.back(); } + + rtt_cache_range = region->get_min_max(rtt_cache_range); + } + else + { + //This section view already exists + if (region->get_section_size() != size) + { + region->unprotect(); + region->reset(base, size); + } + + if (!region->is_locked() || region->is_flushed()) + region->protect(0, vm::page_readable | vm::page_writable); } - __glcheck glCopyImageSubData(source.id(), GL_TEXTURE_2D, 0, 0, 0, 0, - region->copy_glid, GL_TEXTURE_2D, 0, 0, 0, 0, - width, height, 1); + return region; + } + + u32 create_temporary_subresource(u32 src_id, GLenum sized_internal_fmt, u16 x, u16 y, u16 width, u16 height) + { + u32 dst_id = 0; + + glGenTextures(1, &dst_id); + glBindTexture(GL_TEXTURE_2D, dst_id); + + glTexStorage2D(GL_TEXTURE_2D, 1, sized_internal_fmt, width, height); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + + //Empty GL_ERROR + glGetError(); + + glCopyImageSubData(src_id, GL_TEXTURE_2D, 0, x, y, 0, + dst_id, GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1); + + m_temporary_surfaces.push_back(dst_id); + + //Check for error + if (GLenum err = glGetError()) + { + LOG_WARNING(RSX, "Failed to copy image subresource with GL error 0x%X", err); + return 0; + } - region->is_dirty = false; - - if (!region->locked) - { - LOG_WARNING(RSX, "Locking down RTT, was unlocked!"); - lock_memory_region((u32)region->data_addr, region->block_sz); - region->locked = true; - } - } - - void write_rtt(u32 base, u32 size, u32 texaddr) - { - //Actually download the data, since it seems that cell is writing to it manually - fmt::throw_exception("write_rtt" HERE); - } - - void destroy_rtt_cache() - { - for (cached_rtt &rtt : rtt_cache) - { - rtt.valid = false; - rtt.is_dirty = false; - rtt.block_sz = 0; - rtt.data_addr = 0; - - glDeleteTextures(1, &rtt.copy_glid); - rtt.copy_glid = 0; - } - - rtt_cache.resize(0); + return dst_id; } public: - gl_texture_cache() - : frame_ctr(0) + texture_cache() {} + + ~texture_cache() {} + + void initialize(GLGSRender *renderer) { - } - - ~gl_texture_cache() - { - clear_obj_cache(); + m_renderer = renderer; + m_renderer_thread = std::this_thread::get_id(); } - void update_frame_ctr() + void close() { - frame_ctr++; - } - - void initialize_rtt_cache() - { - if (rtt_cache.size()) fmt::throw_exception("Initialize RTT cache while cache already exists! Leaking objects??" HERE); - - for (int i = 0; i < 64; ++i) - { - cached_rtt rtt; - - glGenTextures(1, &rtt.copy_glid); - rtt.is_dirty = true; - rtt.valid = true; - rtt.block_sz = 0; - rtt.data_addr = 0; - rtt.locked = false; - - rtt_cache.push_back(rtt); - } + clear(); } template @@ -432,21 +589,6 @@ namespace gl glActiveTexture(GL_TEXTURE0 + index); - /** - * Give precedence to rtt data obtained through read/write buffers - */ - cached_rtt *rtt = find_cached_rtt(texaddr, range); - - if (rtt && !rtt->is_dirty) - { - u32 real_id = gl_texture.id(); - - gl_texture.set_id(rtt->copy_glid); - gl_texture.bind(); - - gl_texture.set_id(real_id); - } - /** * Check for sampleable rtts from previous render passes */ @@ -463,216 +605,258 @@ namespace gl return; } + /** + * Check if we are re-sampling a subresource of an RTV/DSV texture, bound or otherwise + * (Turbo: Super Stunt Squad does this; bypassing the need for a sync object) + * The engine does not read back the texture resource through cell, but specifies a texture location that is + * a bound render target. We can bypass the expensive download in this case + */ + + surface_subresource rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, tex.width(), tex.height(), tex.pitch()); + if (rsc.surface) + { + //Check that this region is not cpu-dirty before doing a copy + //This section is guaranteed to have a locking section *if* this bit has been bypassed before + + bool upload_from_cpu = false; + + for (cached_rtt_section §ion : m_rtt_cache) + { + if (section.overlaps(std::make_pair(texaddr, range)) && section.is_dirty()) + { + LOG_ERROR(RSX, "Cell wrote to render target section we are uploading from!"); + + upload_from_cpu = true; + break; + } + } + + if (!upload_from_cpu) + { + if (tex.get_extended_texture_dimension() != rsx::texture_dimension_extended::texture_dimension_2d) + { + LOG_ERROR(RSX, "Sampling of RTT region as non-2D texture! addr=0x%x, Type=%d, dims=%dx%d", + texaddr, (u8)tex.get_extended_texture_dimension(), tex.width(), tex.height()); + } + else + { + const u32 format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + + GLenum src_format = (GLenum)rsc.surface->get_internal_format(); + GLenum dst_format = std::get<0>(get_format_type(format)); + + u32 bound_index = ~0U; + + if (src_format != dst_format) + { + LOG_WARNING(RSX, "Sampling from a section of a render target, but formats might be incompatible (0x%X vs 0x%X)", src_format, dst_format); + } + + if (!rsc.is_bound) + { + if (rsc.w == tex.width() && rsc.h == tex.height()) + rsc.surface->bind(); + else + bound_index = create_temporary_subresource(rsc.surface->id(), (GLenum)rsc.surface->get_compatible_internal_format(), rsc.x, rsc.y, rsc.w, rsc.h); + } + else + { + LOG_WARNING(RSX, "Attempting to sample a currently bound render target @ 0x%x", texaddr); + bound_index = create_temporary_subresource(rsc.surface->id(), (GLenum)rsc.surface->get_compatible_internal_format(), rsc.x, rsc.y, rsc.w, rsc.h); + } + + if (bound_index) + return; + } + } + } + /** * If all the above failed, then its probably a generic texture. * Search in cache and upload/bind */ - - gl_cached_texture *obj = nullptr; - if (!rtt) - obj = find_obj_for_params(texaddr, tex.width(), tex.height(), tex.get_exact_mipmap_count()); + cached_texture_section *cached_texture = find_texture(texaddr, tex.width(), tex.height(), tex.get_exact_mipmap_count()); - if (obj && !obj->deleted) + if (cached_texture) { - u32 real_id = gl_texture.id(); + verify(HERE), cached_texture->is_empty() == false; - gl_texture.set_id(obj->gl_id); + gl_texture.set_id(cached_texture->id()); gl_texture.bind(); - gl_texture.set_id(real_id); + //external gl::texture objects should always be undefined/uninitialized! + gl_texture.set_id(0); + return; } - else + + if (!tex.width() || !tex.height()) { - u32 real_id = gl_texture.id(); - - if (!obj) gl_texture.set_id(0); - else - { - //Reuse this GLid - gl_texture.set_id(obj->gl_id); - - //Empty this slot for another one. A new holder will be created below anyway... - if (obj->locked) unlock_gl_object(*obj); - obj->gl_id = 0; - } - - if (!tex.width() || !tex.height()) - { - LOG_ERROR(RSX, "Texture upload requested but invalid texture dimensions passed"); - return; - } - - __glcheck gl_texture.init(index, tex); - gl_cached_texture &_obj = create_obj_for_params(gl_texture.id(), texaddr, tex.width(), tex.height(), tex.get_exact_mipmap_count()); - - _obj.block_sz = (u32)get_texture_size(tex); - lock_gl_object(_obj); - - gl_texture.set_id(real_id); + LOG_ERROR(RSX, "Texture upload requested but invalid texture dimensions passed"); + return; } + + gl_texture.init(index, tex); + + std::lock_guard lock(m_section_mutex); + + cached_texture_section &cached = create_texture(gl_texture.id(), texaddr, get_texture_size(tex), tex.width(), tex.height(), tex.get_exact_mipmap_count()); + cached.protect(0, vm::page_writable); + cached.set_dirty(false); + + //external gl::texture objects should always be undefined/uninitialized! + gl_texture.set_id(0); } - bool mark_as_dirty(u32 address) + void save_rtt(u32 base, u32 size) { - if (address < texture_cache_range.first || - address > texture_cache_range.second) - return false; + std::lock_guard lock(m_section_mutex); - bool response = false; + cached_rtt_section *region = find_cached_rtt_section(base, size); - for (gl_cached_texture &tex: texture_cache) + if (!region) { - if (!tex.locked) continue; - - if (tex.protected_block_start <= address && - tex.protected_block_sz >(address - tex.protected_block_start)) - { - unlock_gl_object(tex); - - invalidate_rtts_in_range((u32)tex.data_addr, tex.block_sz); - - tex.deleted = true; - response = true; - } + LOG_ERROR(RSX, "Attempted to download render target that does not exist. Please report to developers"); + return; } - if (response) return true; - - for (cached_rtt &rtt: rtt_cache) + if (!region->is_locked()) { - if (!rtt.data_addr || rtt.is_dirty) continue; + verify(HERE), region->is_dirty(); + LOG_WARNING(RSX, "Cell write to bound render target area"); - u32 rtt_aligned_base = ((u32)(rtt.data_addr)) & ~(4096 - 1); - u32 rtt_block_sz = align(rtt.block_sz, 4096); - - if (rtt.locked && (u64)address >= rtt_aligned_base) - { - u32 offset = address - rtt_aligned_base; - if (offset >= rtt_block_sz) continue; - - rtt.is_dirty = true; - - unlock_memory_region(rtt_aligned_base, rtt_block_sz); - rtt.locked = false; - - response = true; - } + region->protect(0, vm::page_writable | vm::page_readable); + region->set_dirty(false); } - return response; + region->copy_texture(); } - void save_render_target(u32 texaddr, u32 range, gl::texture &gl_texture) + void lock_rtt_region(const u32 base, const u32 size, const u16 width, const u16 height, const u16 pitch, const texture::format format, const texture::type type, gl::texture &source) { - save_rtt(texaddr, range, gl_texture.width(), gl_texture.height(), (GLenum)gl_texture.get_internal_format(), gl_texture); - } + std::lock_guard lock(m_section_mutex); - std::vector find_and_invalidate_in_range(u32 base, u32 limit) - { - /** - * Sometimes buffers can share physical pages. - * Return objects if we really encroach on texture - */ + cached_rtt_section *region = create_locked_view_of_section(base, size); - std::vector result; - - for (gl_cached_texture &obj : texture_cache) + if (!region->matches(base, size)) { - //Check for memory area overlap. unlock page(s) if needed and add this index to array. - //Axis separation test - const u32 &block_start = obj.protected_block_start; - const u32 block_end = block_start + obj.protected_block_sz; + //This memory region overlaps our own region, but does not match it exactly + if (region->is_locked()) + region->unprotect(); - if (limit < block_start) continue; - if (base > block_end) continue; - - u32 min_separation = (limit - base) + obj.protected_block_sz; - u32 range_limit = (block_end > limit) ? block_end : limit; - u32 range_base = (block_start < base) ? block_start : base; - - u32 actual_separation = (range_limit - range_base); - - if (actual_separation < min_separation) - { - const u32 texture_start = (u32)obj.data_addr; - const u32 texture_end = texture_start + obj.block_sz; - - min_separation = (limit - base) + obj.block_sz; - range_limit = (texture_end > limit) ? texture_end : limit; - range_base = (texture_start < base) ? texture_start : base; - - actual_separation = (range_limit - range_base); - - if (actual_separation < min_separation) - { - //Texture area is invalidated! - unlock_gl_object(obj); - obj.deleted = true; - - continue; - } - - //Overlap in this case will be at most 1 page... - invalid_cache_area invalid = { 0 }; - if (base < obj.data_addr) - invalid.block_base = obj.protected_block_start; - else - invalid.block_base = obj.protected_block_start + obj.protected_block_sz - 4096; - - invalid.block_sz = 4096; - unlock_memory_region(invalid.block_base, invalid.block_sz); - result.push_back(invalid); - } + region->reset(base, size); + region->protect(0, vm::page_readable | vm::page_writable); } - return result; + region->set_dimensions(width, height, pitch); + region->set_format(format, type); + region->set_dirty(false); + region->set_flushed(false); + region->set_copied(false); + region->set_source(source); + + verify(HERE), region->is_locked() == true; } - void lock_invalidated_ranges(std::vector invalid) - { - for (invalid_cache_area area : invalid) - { - lock_memory_region(area.block_base, area.block_sz); - } - } - - void remove_in_range(u32 texaddr, u32 range) - { - //Seems that the rsx only 'reads' full texture objects.. - //This simplifies this function to simply check for matches - for (gl_cached_texture &cached : texture_cache) - { - if (cached.data_addr == texaddr && - cached.block_sz == range) - remove_obj(cached); - } - } - - bool explicit_writeback(gl::texture &tex, const u32 address, const u32 pitch) + bool load_rtt(gl::texture &tex, const u32 address, const u32 pitch) { const u32 range = tex.height() * pitch; - cached_rtt *rtt = find_cached_rtt(address, range); + cached_rtt_section *rtt = find_cached_rtt_section(address, range); - if (rtt && !rtt->is_dirty) + if (rtt && !rtt->is_dirty()) { - u32 min_w = rtt->current_width; - u32 min_h = rtt->current_height; - - if ((u32)tex.width() < min_w) min_w = (u32)tex.width(); - if ((u32)tex.height() < min_h) min_h = (u32)tex.height(); - - //TODO: Image reinterpretation e.g read back rgba data as depth texture and vice-versa - - __glcheck glCopyImageSubData(rtt->copy_glid, GL_TEXTURE_2D, 0, 0, 0, 0, - tex.id(), GL_TEXTURE_2D, 0, 0, 0, 0, - min_w, min_h, 1); - + rtt->fill_texture(tex); return true; } //No valid object found in cache return false; } + + bool mark_as_dirty(u32 address) + { + bool response = false; + + if (address >= texture_cache_range.first && + address < texture_cache_range.second) + { + std::lock_guard lock(m_section_mutex); + + for (cached_texture_section &tex : m_texture_cache) + { + if (!tex.is_locked()) continue; + + if (tex.overlaps(address)) + { + tex.unprotect(); + tex.set_dirty(true); + + response = true; + } + } + } + + if (address >= rtt_cache_range.first && + address < rtt_cache_range.second) + { + std::lock_guard lock(m_section_mutex); + + for (cached_rtt_section &rtt : m_rtt_cache) + { + if (rtt.is_dirty()) continue; + + if (rtt.is_locked() && rtt.overlaps(address)) + { + rtt.unprotect(); + rtt.set_dirty(true); + + response = true; + } + } + } + + return response; + } + + void invalidate_range(u32 base, u32 size) + { + std::lock_guard lock(m_section_mutex); + std::pair range = std::make_pair(base, size); + + if (base < texture_cache_range.second && + (base + size) >= texture_cache_range.first) + { + for (cached_texture_section &tex : m_texture_cache) + { + if (!tex.is_dirty() && tex.overlaps(range)) + tex.destroy(); + } + } + + if (base < rtt_cache_range.second && + (base + size) >= rtt_cache_range.first) + { + for (cached_rtt_section &rtt : m_rtt_cache) + { + if (!rtt.is_dirty() && rtt.overlaps(range)) + { + rtt.unprotect(); + rtt.set_dirty(true); + } + } + } + } + + bool flush_section(u32 address); + + void clear_temporary_surfaces() + { + for (u32 &id : m_temporary_surfaces) + { + glDeleteTextures(1, &id); + } + + m_temporary_surfaces.clear(); + } }; -} +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index e4d86e41dc..b38b9dd89d 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -391,6 +391,9 @@ namespace rsx // TODO: exit condition while (!Emu.IsStopped()) { + //Execute backend-local tasks first + do_local_task(); + const u32 get = ctrl->get; const u32 put = ctrl->put; @@ -634,6 +637,8 @@ namespace rsx std::vector> thread::get_vertex_buffers(const rsx::rsx_state& state, const std::vector>& vertex_ranges) const { std::vector> result; + result.reserve(rsx::limits::vertex_count); + u32 input_mask = state.vertex_attrib_input_mask(); for (u8 index = 0; index < rsx::limits::vertex_count; ++index) { @@ -835,7 +840,7 @@ namespace rsx return result; } - RSXFragmentProgram thread::get_current_fragment_program(std::function(u32, bool)> get_surface_info) const + RSXFragmentProgram thread::get_current_fragment_program(std::function(u32, fragment_texture&, bool)> get_surface_info) const { RSXFragmentProgram result = {}; u32 shader_program = rsx::method_registers.shader_program_address(); @@ -883,7 +888,7 @@ namespace rsx bool surface_exists; u16 surface_pitch; - std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, false); + std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, tex, false); if (surface_exists && surface_pitch) { @@ -892,7 +897,7 @@ namespace rsx } else { - std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, true); + std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, tex, true); if (surface_exists) { u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index e63fa3e2d1..4627828a66 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -219,7 +219,7 @@ namespace rsx * get_surface_info is a helper takes 2 parameters: rsx_texture_address and surface_is_depth * returns whether surface is a render target and surface pitch in native format */ - RSXFragmentProgram get_current_fragment_program(std::function(u32, bool)> get_surface_info) const; + RSXFragmentProgram get_current_fragment_program(std::function(u32, fragment_texture&, bool)> get_surface_info) const; public: double fps_limit = 59.94; @@ -239,6 +239,11 @@ namespace rsx virtual void on_task() override; virtual void on_exit() override; + + /** + * Execute a backend local task queue + */ + virtual void do_local_task() {} public: virtual std::string get_name() const override; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index b7fcf1972c..50378ce3c0 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -981,7 +981,7 @@ bool VKGSRender::do_method(u32 cmd, u32 arg) bool VKGSRender::load_program() { - auto rtt_lookup_func = [this](u32 texaddr, bool is_depth) -> std::tuple + auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture&, bool is_depth) -> std::tuple { vk::render_target *surface = nullptr; if (!is_depth) diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 39bb3b09aa..611116aabc 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -6,143 +6,132 @@ namespace vk { - struct cached_texture_object + class cached_texture_section : public rsx::buffered_section { - u32 native_rsx_address; - u32 native_rsx_size; - u16 width; u16 height; u16 depth; u16 mipmaps; + std::unique_ptr uploaded_image_view; std::unique_ptr uploaded_texture; - u64 protected_rgn_start; - u64 protected_rgn_end; - - bool exists = false; - bool locked = false; - bool dirty = true; + public: + + cached_texture_section() {} + + void create(u16 w, u16 h, u16 depth, u16 mipmaps, vk::image_view *view, vk::image *image) + { + width = w; + height = h; + this->depth = depth; + this->mipmaps = mipmaps; + + uploaded_image_view.reset(view); + uploaded_texture.reset(image); + } + + bool matches(u32 rsx_address, u32 rsx_size) const + { + return rsx::buffered_section::matches(rsx_address, rsx_size); + } + + bool matches(u32 rsx_address, u32 width, u32 height, u32 mipmaps) const + { + if (rsx_address == cpu_address_base) + { + if (!width && !height && !mipmaps) + return true; + + return (width == this->width && height == this->height && mipmaps == this->mipmaps); + } + + return false; + } + + bool exists() const + { + return (uploaded_texture.get() != nullptr); + } + + u16 get_width() const + { + return width; + } + + u16 get_height() const + { + return height; + } + + std::unique_ptr& get_view() + { + return uploaded_image_view; + } + + std::unique_ptr& get_texture() + { + return uploaded_texture; + } }; class texture_cache { private: - std::vector m_cache; + std::vector m_cache; std::pair texture_cache_range = std::make_pair(0xFFFFFFFF, 0); std::vector > m_temporary_image_view; std::vector> m_dirty_textures; - bool lock_memory_region(u32 start, u32 size) + cached_texture_section& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0) { - static const u32 memory_page_size = 4096; - start = start & ~(memory_page_size - 1); - size = (u32)align(size, memory_page_size); - - return vm::page_protect(start, size, 0, 0, vm::page_writable); - } - - bool unlock_memory_region(u32 start, u32 size) - { - static const u32 memory_page_size = 4096; - start = start & ~(memory_page_size - 1); - size = (u32)align(size, memory_page_size); - - return vm::page_protect(start, size, 0, vm::page_writable, 0); - } - - bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2) - { - //Check for memory area overlap. unlock page(s) if needed and add this index to array. - //Axis separation test - const u32 &block_start = base1; - const u32 block_end = limit1; - - if (limit2 < block_start) return false; - if (base2 > block_end) return false; - - u32 min_separation = (limit2 - base2) + (limit1 - base1); - u32 range_limit = (block_end > limit2) ? block_end : limit2; - u32 range_base = (block_start < base2) ? block_start : base2; - - u32 actual_separation = (range_limit - range_base); - - if (actual_separation < min_separation) - return true; - - return false; - } - - cached_texture_object& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0) - { - for (cached_texture_object &tex : m_cache) + for (auto &tex : m_cache) { - if (!tex.dirty && tex.exists && - tex.native_rsx_address == rsx_address && - tex.native_rsx_size == rsx_size) + if (tex.matches(rsx_address, rsx_size) && !tex.is_dirty()) { if (!confirm_dimensions) return tex; - if (tex.width == width && tex.height == height && tex.mipmaps == mipmaps) + if (tex.matches(rsx_address, width, height, mipmaps)) return tex; else { LOG_ERROR(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters."); - LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.width, tex.height); + LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.get_width(), tex.get_height()); } } } - for (cached_texture_object &tex : m_cache) + for (auto &tex : m_cache) { - if (tex.dirty) + if (tex.is_dirty()) { - if (tex.exists) + if (tex.exists()) { - m_dirty_textures.push_back(std::move(tex.uploaded_texture)); - tex.exists = false; + m_dirty_textures.push_back(std::move(tex.get_texture())); + m_temporary_image_view.push_back(std::move(tex.get_view())); } return tex; } } - m_cache.push_back(cached_texture_object()); + m_cache.push_back(cached_texture_section()); return m_cache[m_cache.size() - 1]; } - void lock_object(cached_texture_object &obj) - { - static const u32 memory_page_size = 4096; - obj.protected_rgn_start = obj.native_rsx_address & ~(memory_page_size - 1); - obj.protected_rgn_end = (u32)align(obj.native_rsx_size, memory_page_size); - obj.protected_rgn_end += obj.protected_rgn_start; - - lock_memory_region(static_cast(obj.protected_rgn_start), static_cast(obj.native_rsx_size)); - - if (obj.protected_rgn_start < texture_cache_range.first) - texture_cache_range = std::make_pair(obj.protected_rgn_start, texture_cache_range.second); - - if (obj.protected_rgn_end > texture_cache_range.second) - texture_cache_range = std::make_pair(texture_cache_range.first, obj.protected_rgn_end); - } - - void unlock_object(cached_texture_object &obj) - { - unlock_memory_region(static_cast(obj.protected_rgn_start), static_cast(obj.native_rsx_size)); - } - void purge_cache() { - for (cached_texture_object &tex : m_cache) + for (auto &tex : m_cache) { - if (tex.exists) - m_dirty_textures.push_back(std::move(tex.uploaded_texture)); + if (tex.exists()) + { + m_dirty_textures.push_back(std::move(tex.get_texture())); + m_temporary_image_view.push_back(std::move(tex.get_view())); + } - if (tex.locked) - unlock_object(tex); + if (tex.is_locked()) + tex.unprotect(); } m_temporary_image_view.clear(); @@ -196,12 +185,6 @@ namespace vk return m_temporary_image_view.back().get(); } - cached_texture_object& cto = find_cached_texture(texaddr, range, true, tex.width(), tex.height(), tex.get_exact_mipmap_count()); - if (cto.exists && !cto.dirty) - { - return cto.uploaded_image_view.get(); - } - u32 raw_format = tex.format(); u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); @@ -213,6 +196,7 @@ namespace vk u16 height = 0; u16 depth = 0; u8 layer = 0; + switch (tex.get_extended_texture_dimension()) { case rsx::texture_dimension_extended::texture_dimension_1d: @@ -245,6 +229,12 @@ namespace vk break; } + cached_texture_section& region = find_cached_texture(texaddr, range, true, tex.width(), height, tex.get_exact_mipmap_count()); + if (region.exists() && !region.is_dirty()) + { + return region.get_view().get(); + } + bool is_cubemap = tex.get_extended_texture_dimension() == rsx::texture_dimension_extended::texture_dimension_cubemap; VkImageSubresourceRange subresource_range = vk::get_image_subresource_range(0, 0, is_cubemap ? 6 : 1, tex.get_exact_mipmap_count(), VK_IMAGE_ASPECT_COLOR_BIT); @@ -255,33 +245,29 @@ namespace vk return nullptr; } - cto.uploaded_texture = std::make_unique(*vk::get_current_renderer(), memory_type_mapping.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + vk::image *image = new vk::image(*vk::get_current_renderer(), memory_type_mapping.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, image_type, vk_format, tex.width(), height, depth, tex.get_exact_mipmap_count(), layer, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0); - change_image_layout(cmd, cto.uploaded_texture->value, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); + change_image_layout(cmd, image->value, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); - cto.uploaded_image_view = std::make_unique(*vk::get_current_renderer(), cto.uploaded_texture->value, image_view_type, vk_format, + vk::image_view *view = new vk::image_view(*vk::get_current_renderer(), image->value, image_view_type, vk_format, mapping, subresource_range); - copy_mipmaped_image_using_buffer(cmd, cto.uploaded_texture->value, get_subresources_layout(tex), format, !(tex.format() & CELL_GCM_TEXTURE_LN), tex.get_exact_mipmap_count(), + copy_mipmaped_image_using_buffer(cmd, image->value, get_subresources_layout(tex), format, !(tex.format() & CELL_GCM_TEXTURE_LN), tex.get_exact_mipmap_count(), upload_heap, upload_buffer); - change_image_layout(cmd, cto.uploaded_texture->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range); + change_image_layout(cmd, image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range); - cto.exists = true; - cto.dirty = false; - cto.native_rsx_address = texaddr; - cto.native_rsx_size = range; - cto.width = tex.width(); - cto.height = tex.height(); - cto.mipmaps = tex.get_exact_mipmap_count(); - - lock_object(cto); + region.reset(texaddr, range); + region.create(tex.width(), height, depth, tex.get_exact_mipmap_count(), view, image); + region.protect(0, vm::page_writable); + region.set_dirty(false); - return cto.uploaded_image_view.get(); + texture_cache_range = region.get_min_max(texture_cache_range); + return view; } bool invalidate_address(u32 rsx_address) @@ -290,23 +276,22 @@ namespace vk rsx_address > texture_cache_range.second) return false; - for (cached_texture_object &tex : m_cache) + bool response = false; + + for (auto &tex : m_cache) { - if (tex.dirty) continue; + if (tex.is_dirty()) continue; - if (rsx_address >= tex.protected_rgn_start && - rsx_address < tex.protected_rgn_end) + if (tex.overlaps(rsx_address)) { - unlock_object(tex); + tex.set_dirty(true); + tex.unprotect(); - tex.native_rsx_address = 0; - tex.dirty = true; - - return true; + response = true; } } - return false; + return response; } void flush() diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index 6fc12d683f..f7bdc32720 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -1,5 +1,6 @@ #pragma once #include +#include "Emu/Memory/vm.h" namespace rsx { @@ -64,4 +65,140 @@ namespace rsx program_info get(raw_program raw_program_, decompile_language lang); void clear(); }; + + class buffered_section + { + protected: + u32 cpu_address_base = 0; + u32 cpu_address_range = 0; + + u32 locked_address_base = 0; + u32 locked_address_range = 0; + + u32 memory_protection = 0; + + bool locked = false; + bool dirty = false; + + bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2) + { + //Check for memory area overlap. unlock page(s) if needed and add this index to array. + //Axis separation test + const u32 &block_start = base1; + const u32 block_end = limit1; + + if (limit2 < block_start) return false; + if (base2 > block_end) return false; + + u32 min_separation = (limit2 - base2) + (limit1 - base1); + u32 range_limit = (block_end > limit2) ? block_end : limit2; + u32 range_base = (block_start < base2) ? block_start : base2; + + u32 actual_separation = (range_limit - range_base); + + if (actual_separation < min_separation) + return true; + + return false; + } + + public: + + buffered_section() {} + ~buffered_section() {} + + void reset(u32 base, u32 length) + { + verify(HERE), locked == false; + + cpu_address_base = base; + cpu_address_range = length; + + locked_address_base = (base & ~4095); + locked_address_range = align(base + length, 4096) - locked_address_base; + + memory_protection = vm::page_readable | vm::page_writable; + + locked = false; + } + + bool protect(u8 flags_set, u8 flags_clear) + { + if (vm::page_protect(locked_address_base, locked_address_range, 0, flags_set, flags_clear)) + { + memory_protection &= ~flags_clear; + memory_protection |= flags_set; + + locked = memory_protection != (vm::page_readable | vm::page_writable); + } + else + fmt::throw_exception("failed to lock memory @ 0x%X!", locked_address_base); + + return false; + } + + bool unprotect() + { + u32 flags_set = (vm::page_readable | vm::page_writable) & ~memory_protection; + + if (vm::page_protect(locked_address_base, locked_address_range, 0, flags_set, 0)) + { + memory_protection = (vm::page_writable | vm::page_readable); + locked = false; + return true; + } + else + fmt::throw_exception("failed to unlock memory @ 0x%X!", locked_address_base); + + return false; + } + + bool overlaps(std::pair range) + { + return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second); + } + + bool overlaps(u32 address) + { + return (locked_address_base <= address && (address - locked_address_base) < locked_address_range); + } + + bool is_locked() const + { + return locked; + } + + bool is_dirty() const + { + return dirty; + } + + void set_dirty(bool state) + { + dirty = state; + } + + u32 get_section_base() const + { + return cpu_address_base; + } + + u32 get_section_size() const + { + return cpu_address_range; + } + + bool matches(u32 cpu_address, u32 size) const + { + return (cpu_address_base == cpu_address && cpu_address_range == size); + } + + std::pair get_min_max(std::pair current_min_max) + { + u32 min = std::min(current_min_max.first, locked_address_base); + u32 max = std::max(current_min_max.second, locked_address_base + locked_address_range); + + return std::make_pair(min, max); + } + }; } diff --git a/rpcs3/GLGSRender.vcxproj b/rpcs3/GLGSRender.vcxproj index 55fc2d85f1..c50c613aed 100644 --- a/rpcs3/GLGSRender.vcxproj +++ b/rpcs3/GLGSRender.vcxproj @@ -109,6 +109,7 @@ + diff --git a/rpcs3/GLGSRender.vcxproj.filters b/rpcs3/GLGSRender.vcxproj.filters index 932ef00516..35bae19d35 100644 --- a/rpcs3/GLGSRender.vcxproj.filters +++ b/rpcs3/GLGSRender.vcxproj.filters @@ -10,6 +10,7 @@ +