diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index 2322be1176..d7dddfd1cb 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -363,34 +363,34 @@ u8 get_format_block_size_in_texel(int format) LOG_ERROR(RSX, "Unimplemented block size in texels for texture format: 0x%x", format); return 1; } -} - -u8 get_format_block_size_in_bytes(rsx::surface_color_format format) -{ - switch (format) - { - case rsx::surface_color_format::b8: - return 1; - case rsx::surface_color_format::g8b8: - case rsx::surface_color_format::r5g6b5: - case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: - case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: - return 2; - case rsx::surface_color_format::a8b8g8r8: - case rsx::surface_color_format::a8r8g8b8: - case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: - case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: - case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: - case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: - case rsx::surface_color_format::x32: - return 4; - case rsx::surface_color_format::w16z16y16x16: - return 8; - case rsx::surface_color_format::w32z32y32x32: - return 16; - default: - fmt::throw_exception("Invalid color format 0x%x" HERE, (u32)format); - } +} + +u8 get_format_block_size_in_bytes(rsx::surface_color_format format) +{ + switch (format) + { + case rsx::surface_color_format::b8: + return 1; + case rsx::surface_color_format::g8b8: + case rsx::surface_color_format::r5g6b5: + case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: + case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: + return 2; + case rsx::surface_color_format::a8b8g8r8: + case rsx::surface_color_format::a8r8g8b8: + case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: + case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: + case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: + case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: + case rsx::surface_color_format::x32: + return 4; + case rsx::surface_color_format::w16z16y16x16: + return 8; + case rsx::surface_color_format::w32z32y32x32: + return 16; + default: + fmt::throw_exception("Invalid color format 0x%x" HERE, (u32)format); + } } static size_t get_placed_texture_storage_size(u16 width, u16 height, u32 depth, u8 format, u16 mipmap, bool cubemap, size_t row_pitch_alignement, size_t mipmap_alignment) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index e2058af742..c736894baa 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -41,7 +41,7 @@ void Shader::Compile(const std::string &code, SHADER_TYPE st) void D3D12GSRender::load_program() { - auto rtt_lookup_func = [this](u32 texaddr, bool is_depth) -> std::tuple + auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture&, bool is_depth) -> std::tuple { ID3D12Resource *surface = nullptr; if (!is_depth) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 9ccdaf0eeb..0de8c586b6 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -194,6 +194,12 @@ void GLGSRender::begin() if (!draw_fbo.check()) return; + if (surface_clear_flags) + { + clear_surface(surface_clear_flags); + surface_clear_flags = 0; + } + std::chrono::time_point then = steady_clock::now(); bool color_mask_b = rsx::method_registers.color_mask_b(); @@ -332,7 +338,6 @@ void GLGSRender::begin() std::chrono::time_point now = steady_clock::now(); m_begin_time += (u32)std::chrono::duration_cast(now - then).count(); - m_draw_calls++; } namespace @@ -381,8 +386,6 @@ void GLGSRender::end() m_index_ring_buffer->reserve_storage_on_heap(16 * 1024); } - draw_fbo.bind(); - //Check if depth buffer is bound and valid //If ds is not initialized clear it; it seems new depth textures should have depth cleared gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); @@ -473,10 +476,17 @@ void GLGSRender::end() draw_fbo.draw_arrays(rsx::method_registers.current_draw_clause.primitive, vertex_draw_count); } + m_attrib_ring_buffer->notify(); + m_index_ring_buffer->notify(); + m_uniform_ring_buffer->notify(); + std::chrono::time_point draw_end = steady_clock::now(); m_draw_time += (u32)std::chrono::duration_cast(draw_end - draw_start).count(); - write_buffers(); + m_draw_calls++; + + //LOG_WARNING(RSX, "Finished draw call, EID=%d", m_draw_calls); + synchronize_buffers(); rsx::thread::end(); } @@ -549,6 +559,8 @@ void GLGSRender::on_init_thread() if (g_cfg_rsx_overlay) m_text_printer.init(); + + m_gl_texture_cache.initialize(this); } void GLGSRender::on_exit() @@ -587,11 +599,12 @@ void GLGSRender::on_exit() m_index_ring_buffer->remove(); m_text_printer.close(); + m_gl_texture_cache.close(); return GSRender::on_exit(); } -void nv4097_clear_surface(u32 arg, GLGSRender* renderer) +void GLGSRender::clear_surface(u32 arg) { if (rsx::method_registers.surface_color_target() == rsx::surface_target::none) return; @@ -601,9 +614,6 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer) return; } - renderer->init_buffers(true); - renderer->draw_fbo.bind(); - GLbitfield mask = 0; rsx::surface_depth_format surface_depth_format = rsx::method_registers.surface_depth_fmt(); @@ -617,6 +627,10 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer) glDepthMask(GL_TRUE); glClearDepth(double(clear_depth) / max_depth_value); mask |= GLenum(gl::buffers::depth); + + gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); + if (ds && !ds->cleared()) + ds->set_cleared(); } if (surface_depth_format == rsx::surface_depth_format::z24s8 && (arg & 0x2)) @@ -643,46 +657,28 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer) } glClear(mask); - //renderer->write_buffers(); } -using rsx_method_impl_t = void(*)(u32, GLGSRender*); - -static const std::unordered_map g_gl_method_tbl = -{ - { NV4097_CLEAR_SURFACE, nv4097_clear_surface } -}; - bool GLGSRender::do_method(u32 cmd, u32 arg) { - auto found = g_gl_method_tbl.find(cmd); - - if (found == g_gl_method_tbl.end()) - { - return false; - } - - found->second(arg, this); - switch (cmd) { case NV4097_CLEAR_SURFACE: - { - if (arg & 0x1) - { - gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); - if (ds && !ds->cleared()) - ds->set_cleared(); - } - } + init_buffers(true); + surface_clear_flags |= arg; + return true; + case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE: + case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE: + flush_draw_buffers = true; + return true; } - return true; + return false; } bool GLGSRender::load_program() { - auto rtt_lookup_func = [this](u32 texaddr, bool is_depth) -> std::tuple + auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture &tex, bool is_depth) -> std::tuple { gl::render_target *surface = nullptr; if (!is_depth) @@ -690,7 +686,15 @@ bool GLGSRender::load_program() else surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr); - if (!surface) return std::make_tuple(false, 0); + if (!surface) + { + auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch()); + if (!rsc.surface || rsc.is_depth_surface != is_depth) + return std::make_tuple(false, 0); + + surface = rsc.surface; + } + return std::make_tuple(true, surface->get_native_pitch()); }; @@ -817,17 +821,8 @@ void GLGSRender::flip(int buffer) rsx::tiled_region buffer_region = get_tiled_address(gcm_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL); u32 absolute_address = buffer_region.address + buffer_region.base; - if (0) - { - LOG_NOTICE(RSX, "flip(%d) -> 0x%x [0x%x]", buffer, absolute_address, rsx::get_address(gcm_buffers[1 - buffer].offset, CELL_GCM_LOCATION_LOCAL)); - } - gl::texture *render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address); - /** - * Calling read_buffers will overwrite cached content - */ - __glcheck m_flip_fbo.recreate(); m_flip_fbo.bind(); @@ -875,33 +870,27 @@ void GLGSRender::flip(int buffer) areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height }); coordi aspect_ratio; - if (1) //enable aspect ratio + + sizei csize(m_frame->client_width(), m_frame->client_height()); + sizei new_size = csize; + + const double aq = (double)buffer_width / buffer_height; + const double rq = (double)new_size.width / new_size.height; + const double q = aq / rq; + + if (q > 1.0) { - sizei csize(m_frame->client_width(), m_frame->client_height()); - sizei new_size = csize; - - const double aq = (double)buffer_width / buffer_height; - const double rq = (double)new_size.width / new_size.height; - const double q = aq / rq; - - if (q > 1.0) - { - new_size.height = int(new_size.height / q); - aspect_ratio.y = (csize.height - new_size.height) / 2; - } - else if (q < 1.0) - { - new_size.width = int(new_size.width * q); - aspect_ratio.x = (csize.width - new_size.width) / 2; - } - - aspect_ratio.size = new_size; + new_size.height = int(new_size.height / q); + aspect_ratio.y = (csize.height - new_size.height) / 2; } - else + else if (q < 1.0) { - aspect_ratio.size = { m_frame->client_width(), m_frame->client_height() }; + new_size.width = int(new_size.width * q); + aspect_ratio.x = (csize.width - new_size.width) / 2; } + aspect_ratio.size = new_size; + gl::screen.clear(gl::buffers::color_depth_stencil); __glcheck flip_fbo->blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical()); @@ -926,6 +915,8 @@ void GLGSRender::flip(int buffer) m_vertex_upload_time = 0; m_textures_upload_time = 0; + m_gl_texture_cache.clear_temporary_surfaces(); + for (auto &tex : m_rtts.invalidated_resources) { tex->remove(); @@ -949,3 +940,43 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing) else return m_gl_texture_cache.flush_section(address); } + +void GLGSRender::do_local_task() +{ + std::lock_guard lock(queue_guard); + + for (work_item& q: work_queue) + { + std::unique_lock lock(q.guard_mutex); + + //Process this address + q.result = m_gl_texture_cache.flush_section(q.address_to_flush); + q.processed = true; + + //Notify thread waiting on this + lock.unlock(); + q.cv.notify_one(); + } + + work_queue.clear(); +} + +work_item& GLGSRender::post_flush_request(u32 address) +{ + std::lock_guard lock(queue_guard); + + work_queue.emplace_back(); + work_item &result = work_queue.back(); + result.address_to_flush = address; + return result; +} + +void GLGSRender::synchronize_buffers() +{ + if (flush_draw_buffers) + { + //LOG_WARNING(RSX, "Flushing RTT buffers EID=%d", m_draw_calls); + write_buffers(); + flush_draw_buffers = false; + } +} diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 9576bf0997..ba879f64fd 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -12,6 +12,40 @@ #pragma comment(lib, "opengl32.lib") +struct work_item +{ + std::condition_variable cv; + std::mutex guard_mutex; + + u32 address_to_flush; + bool processed; + bool result; +}; + +struct gcm_buffer_info +{ + u32 address = 0; + u32 pitch = 0; + + bool is_depth_surface; + + rsx::surface_color_format color_format; + rsx::surface_depth_format depth_format; + + u16 width; + u16 height; + + gcm_buffer_info() + { + address = 0; + pitch = 0; + } + + gcm_buffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h) + :address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h) + {} +}; + class GLGSRender : public GSRender { private: @@ -49,6 +83,15 @@ private: gl::text_writer m_text_printer; + std::mutex queue_guard; + std::list work_queue; + + gcm_buffer_info surface_info[rsx::limits::color_buffers_count]; + gcm_buffer_info depth_surface_info; + + u32 surface_clear_flags = 0; + bool flush_draw_buffers = false; + public: gl::fbo draw_fbo; @@ -72,6 +115,8 @@ private: // Return element to draw and in case of indexed draw index type and offset in index buffer std::tuple > > set_vertex_buffer(); + void clear_surface(u32 arg); + public: bool load_program(); void init_buffers(bool skip_reading = false); @@ -79,6 +124,9 @@ public: void write_buffers(); void set_viewport(); + void synchronize_buffers(); + work_item& post_flush_request(u32 address); + protected: void begin() override; void end() override; @@ -89,6 +137,8 @@ protected: void flip(int buffer) override; u64 timestamp() const override; + void do_local_task() override; + bool on_access_violation(u32 address, bool is_writing) override; virtual std::array, 4> copy_render_targets_to_memory() override; diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index 953a753ca8..2baac4a05c 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -67,6 +67,105 @@ namespace gl } }; + class fence + { + GLsync m_value = nullptr; + GLenum flags = GL_SYNC_FLUSH_COMMANDS_BIT; + + public: + + fence() {} + ~fence() {} + + void create() + { + m_value = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + } + + void destroy() + { + glDeleteSync(m_value); + m_value = nullptr; + } + + void reset() + { + if (m_value != nullptr) + destroy(); + + create(); + } + + bool is_empty() + { + return (m_value == nullptr); + } + + bool check_signaled() + { + verify(HERE), m_value != nullptr; + + if (flags) + { + GLenum err = glClientWaitSync(m_value, flags, 0); + flags = 0; + return (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED); + } + else + { + GLint status = GL_UNSIGNALED; + GLint tmp; + + glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status); + return (status == GL_SIGNALED); + } + } + + bool wait_for_signal() + { + verify(HERE), m_value != nullptr; + + GLenum err = GL_WAIT_FAILED; + bool done = false; + + while (!done) + { + if (flags) + { + err = glClientWaitSync(m_value, flags, 1000); + flags = 0; + + switch (err) + { + default: + LOG_ERROR(RSX, "gl::fence sync returned unknown error 0x%X", err); + case GL_ALREADY_SIGNALED: + case GL_CONDITION_SATISFIED: + done = true; + break; + case GL_TIMEOUT_EXPIRED: + continue; + } + } + else + { + GLint status = GL_UNSIGNALED; + GLint tmp; + + glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status); + + if (status == GL_SIGNALED) + break; + } + } + + glDeleteSync(m_value); + m_value = nullptr; + + return (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED); + } + }; + template class save_binding_state_base { @@ -594,33 +693,7 @@ namespace gl u32 m_limit = 0; void *m_memory_mapping = nullptr; - GLsync m_fence = nullptr; - - void wait_for_sync() - { - verify(HERE), m_fence != nullptr; - - bool done = false; - while (!done) - { - //Check if we are finished, wait time = 1us - GLenum err = glClientWaitSync(m_fence, GL_SYNC_FLUSH_COMMANDS_BIT, 1000); - switch (err) - { - default: - LOG_ERROR(RSX, "err Returned 0x%X", err); - case GL_ALREADY_SIGNALED: - case GL_CONDITION_SATISFIED: - done = true; - break; - case GL_TIMEOUT_EXPIRED: - continue; - } - } - - glDeleteSync(m_fence); - m_fence = nullptr; - } + fence m_fence; public: @@ -628,7 +701,7 @@ namespace gl { if (m_id) { - wait_for_sync(); + m_fence.wait_for_signal(); remove(); } @@ -656,17 +729,15 @@ namespace gl if ((offset + alloc_size) > m_limit) { - //TODO: Measure the stall here - wait_for_sync(); + if (!m_fence.is_empty()) + m_fence.wait_for_signal(); + m_data_loc = 0; offset = 0; } if (!m_data_loc) - { - verify(HERE), m_fence == nullptr; - m_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - } + m_fence.reset(); //Align data loc to 256; allows some "guard" region so we dont trample our own data inadvertently m_data_loc = align(offset + alloc_size, 256); @@ -697,6 +768,13 @@ namespace gl { glBindBufferRange((GLenum)current_target(), index, id(), offset, size); } + + //Notification of a draw command + virtual void notify() + { + if (m_fence.is_empty()) + m_fence.reset(); + } }; class legacy_ring_buffer : public ring_buffer @@ -790,6 +868,8 @@ namespace gl m_mapped_bytes = 0; m_mapping_offset = 0; } + + void notify() override {} }; class vao @@ -1019,7 +1099,16 @@ namespace gl compressed_rgb_s3tc_dxt1 = GL_COMPRESSED_RGB_S3TC_DXT1_EXT, compressed_rgba_s3tc_dxt1 = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, compressed_rgba_s3tc_dxt3 = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, - compressed_rgba_s3tc_dxt5 = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT + compressed_rgba_s3tc_dxt5 = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, + + //Sized internal formats, see opengl spec document on glTexImage2D, table 3 + rgba8 = GL_RGBA8, + r5g6b5 = GL_RGB565, + r8 = GL_R8, + rg8 = GL_RG8, + r32f = GL_R32F, + rgba16f = GL_RGBA16F, + rgba32f = GL_RGBA32F }; enum class wrap diff --git a/rpcs3/Emu/RSX/GL/GLProcTable.h b/rpcs3/Emu/RSX/GL/GLProcTable.h index 485bb110dc..e85ff6f534 100644 --- a/rpcs3/Emu/RSX/GL/GLProcTable.h +++ b/rpcs3/Emu/RSX/GL/GLProcTable.h @@ -185,6 +185,7 @@ OPENGL_PROC(PFNGLBUFFERSTORAGEPROC, BufferStorage); //ARB_sync OPENGL_PROC(PFNGLFENCESYNCPROC, FenceSync); OPENGL_PROC(PFNGLCLIENTWAITSYNCPROC, ClientWaitSync); +OPENGL_PROC(PFNGLGETSYNCIVPROC, GetSynciv); OPENGL_PROC(PFNGLDELETESYNCPROC, DeleteSync); //KHR_debug diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index c3e88c1bcb..7220a5be89 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -44,9 +44,10 @@ color_format rsx::internals::surface_color_format_to_gl(rsx::surface_color_forma case rsx::surface_color_format::x32: return{ ::gl::texture::type::f32, ::gl::texture::format::red, false, 1, 4 }; - case rsx::surface_color_format::a8b8g8r8: default: LOG_ERROR(RSX, "Surface color buffer: Unsupported surface color format (0x%x)", (u32)color_format); + + case rsx::surface_color_format::a8b8g8r8: return{ ::gl::texture::type::uint_8_8_8_8, ::gl::texture::format::bgra, false, 4, 1 }; } } @@ -75,92 +76,46 @@ u8 rsx::internals::get_pixel_size(rsx::surface_depth_format format) fmt::throw_exception("Unknown depth format" HERE); } - -void GLGSRender::init_buffers(bool skip_reading) +::gl::texture::internal_format rsx::internals::sized_internal_format(rsx::surface_color_format color_format) { - u16 clip_horizontal = rsx::method_registers.surface_clip_width(); - u16 clip_vertical = rsx::method_registers.surface_clip_height(); - - set_viewport(); - - if (draw_fbo && !m_rtts_dirty) + switch (color_format) { - return; + case rsx::surface_color_format::r5g6b5: + return ::gl::texture::internal_format::r5g6b5; + + case rsx::surface_color_format::a8r8g8b8: + return ::gl::texture::internal_format::rgba8; + + case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: + case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: + case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: + case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: + case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: + case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: + return ::gl::texture::internal_format::rgba8; + + case rsx::surface_color_format::w16z16y16x16: + return ::gl::texture::internal_format::rgba16f; + + case rsx::surface_color_format::w32z32y32x32: + return ::gl::texture::internal_format::rgba32f; + + case rsx::surface_color_format::b8: + return ::gl::texture::internal_format::r8; + + case rsx::surface_color_format::g8b8: + return ::gl::texture::internal_format::rg8; + + case rsx::surface_color_format::x32: + return ::gl::texture::internal_format::r32f; + + case rsx::surface_color_format::a8b8g8r8: + return ::gl::texture::internal_format::rgba8; + + default: + LOG_ERROR(RSX, "Surface color buffer: Unsupported surface color format (0x%x)", (u32)color_format); + return ::gl::texture::internal_format::rgba8; } - - m_rtts_dirty = false; - - if (0) - { - LOG_NOTICE(RSX, "render to -> 0x%x", get_color_surface_addresses()[0]); - } - - m_rtts.prepare_render_target(nullptr, rsx::method_registers.surface_color(), rsx::method_registers.surface_depth_fmt(), clip_horizontal, clip_vertical, - rsx::method_registers.surface_color_target(), - get_color_surface_addresses(), get_zeta_surface_address()); - - draw_fbo.recreate(); - - for (int i = 0; i < rsx::limits::color_buffers_count; ++i) - { - if (std::get<0>(m_rtts.m_bound_render_targets[i])) - { - __glcheck draw_fbo.color[i] = *std::get<1>(m_rtts.m_bound_render_targets[i]); - } - } - - if (std::get<0>(m_rtts.m_bound_depth_stencil)) - { - __glcheck draw_fbo.depth = *std::get<1>(m_rtts.m_bound_depth_stencil); - } - - if (!draw_fbo.check()) - return; - - //HACK: read_buffer shouldn't be there - switch (rsx::method_registers.surface_color_target()) - { - case rsx::surface_target::none: break; - - case rsx::surface_target::surface_a: - __glcheck draw_fbo.draw_buffer(draw_fbo.color[0]); - __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); - break; - - case rsx::surface_target::surface_b: - __glcheck draw_fbo.draw_buffer(draw_fbo.color[1]); - __glcheck draw_fbo.read_buffer(draw_fbo.color[1]); - break; - - case rsx::surface_target::surfaces_a_b: - __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1] }); - __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); - break; - - case rsx::surface_target::surfaces_a_b_c: - __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2] }); - __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); - break; - - case rsx::surface_target::surfaces_a_b_c_d: - __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2], draw_fbo.color[3] }); - __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); - break; - } -} - -std::array, 4> GLGSRender::copy_render_targets_to_memory() -{ - int clip_w = rsx::method_registers.surface_clip_width(); - int clip_h = rsx::method_registers.surface_clip_height(); - return m_rtts.get_render_targets_data(rsx::method_registers.surface_color(), clip_w, clip_h); -} - -std::array, 2> GLGSRender::copy_depth_stencil_buffer_to_memory() -{ - int clip_w = rsx::method_registers.surface_clip_width(); - int clip_h = rsx::method_registers.surface_clip_height(); - return m_rtts.get_depth_stencil_data(rsx::method_registers.surface_depth_fmt(), clip_w, clip_h); } namespace @@ -196,6 +151,162 @@ namespace } } +void GLGSRender::init_buffers(bool skip_reading) +{ + //NOTE 1: Sometimes, we process clear before sync flushing rsx buffers. Leads to downloading of blank data + //Clearing of surfaces is deferred to handle this + //NOTE 2: It is possible for a game to do: + //1. Bind buffer 1 + //2. Clear + //3. Bind buffer 2 without touching 1 + //4. Clear + //5. Bind buffer 1 + //6. Draw without clear + + if (draw_fbo && !m_rtts_dirty) + { + set_viewport(); + return; + } + + //We are about to change buffers, flush any pending requests for the old buffers + //LOG_WARNING(RSX, "Render targets have changed; checking for sync points (EID=%d)", m_draw_calls); + synchronize_buffers(); + + //If the old buffers were dirty, clear them before we bind new buffers + if (surface_clear_flags) + { + clear_surface(surface_clear_flags); + surface_clear_flags = 0; + } + + m_rtts_dirty = false; + + const u16 clip_horizontal = rsx::method_registers.surface_clip_width(); + const u16 clip_vertical = rsx::method_registers.surface_clip_height(); + + const auto pitchs = get_pitchs(); + const auto surface_format = rsx::method_registers.surface_color(); + const auto depth_format = rsx::method_registers.surface_depth_fmt(); + + const auto surface_addresses = get_color_surface_addresses(); + const auto depth_address = get_zeta_surface_address(); + + m_rtts.prepare_render_target(nullptr, surface_format, depth_format, clip_horizontal, clip_vertical, + rsx::method_registers.surface_color_target(), + surface_addresses, depth_address); + + draw_fbo.recreate(); + + for (int i = 0; i < rsx::limits::color_buffers_count; ++i) + { + if (std::get<0>(m_rtts.m_bound_render_targets[i])) + { + __glcheck draw_fbo.color[i] = *std::get<1>(m_rtts.m_bound_render_targets[i]); + + std::get<1>(m_rtts.m_bound_render_targets[i])->set_rsx_pitch(pitchs[i]); + surface_info[i] = { surface_addresses[i], pitchs[i], false, surface_format, depth_format, clip_horizontal, clip_vertical }; + } + else + surface_info[i] = {}; + } + + if (std::get<0>(m_rtts.m_bound_depth_stencil)) + { + __glcheck draw_fbo.depth = *std::get<1>(m_rtts.m_bound_depth_stencil); + + std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(rsx::method_registers.surface_z_pitch()); + depth_surface_info = { depth_address, rsx::method_registers.surface_z_pitch(), true, surface_format, depth_format, clip_horizontal, clip_vertical }; + } + else + depth_surface_info = {}; + + if (!draw_fbo.check()) + return; + + draw_fbo.bind(); + set_viewport(); + + switch (rsx::method_registers.surface_color_target()) + { + case rsx::surface_target::none: break; + + case rsx::surface_target::surface_a: + __glcheck draw_fbo.draw_buffer(draw_fbo.color[0]); + __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); + break; + + case rsx::surface_target::surface_b: + __glcheck draw_fbo.draw_buffer(draw_fbo.color[1]); + __glcheck draw_fbo.read_buffer(draw_fbo.color[1]); + break; + + case rsx::surface_target::surfaces_a_b: + __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1] }); + __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); + break; + + case rsx::surface_target::surfaces_a_b_c: + __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2] }); + __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); + break; + + case rsx::surface_target::surfaces_a_b_c_d: + __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2], draw_fbo.color[3] }); + __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); + break; + } + + //Mark buffer regions as NO_ACCESS on Cell visible side + if (g_cfg_rsx_write_color_buffers) + { + auto color_format = rsx::internals::surface_color_format_to_gl(surface_format); + + for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i) + { + if (!surface_info[i].address || pitchs[i] <= 64) continue; + + const u32 range = surface_info[i].pitch * surface_info[i].height; + m_gl_texture_cache.lock_rtt_region(surface_info[i].address, range, surface_info[i].width, surface_info[i].height, surface_info[i].pitch, + color_format.format, color_format.type, *std::get<1>(m_rtts.m_bound_render_targets[i])); + } + } + + if (g_cfg_rsx_write_depth_buffer) + { + if (depth_surface_info.address && rsx::method_registers.surface_z_pitch() > 64) + { + auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(depth_format); + + u32 pitch = depth_surface_info.width * 2; + if (depth_surface_info.depth_format != rsx::surface_depth_format::z16) pitch *= 2; + + const u32 range = pitch * depth_surface_info.height; + + //TODO: Verify that depth surface pitch variance affects results + if (pitch != depth_surface_info.pitch) + LOG_WARNING(RSX, "Depth surface pitch does not match computed pitch, %d vs %d", depth_surface_info.pitch, pitch); + + m_gl_texture_cache.lock_rtt_region(depth_surface_info.address, range, depth_surface_info.width, depth_surface_info.height, pitch, + depth_format_gl.format, depth_format_gl.type, *std::get<1>(m_rtts.m_bound_depth_stencil)); + } + } +} + +std::array, 4> GLGSRender::copy_render_targets_to_memory() +{ + int clip_w = rsx::method_registers.surface_clip_width(); + int clip_h = rsx::method_registers.surface_clip_height(); + return m_rtts.get_render_targets_data(rsx::method_registers.surface_color(), clip_w, clip_h); +} + +std::array, 2> GLGSRender::copy_depth_stencil_buffer_to_memory() +{ + int clip_w = rsx::method_registers.surface_clip_width(); + int clip_h = rsx::method_registers.surface_clip_height(); + return m_rtts.get_depth_stencil_data(rsx::method_registers.surface_depth_fmt(), clip_w, clip_h); +} + void GLGSRender::read_buffers() { if (!draw_fbo) @@ -334,82 +445,34 @@ void GLGSRender::write_buffers() if (g_cfg_rsx_write_color_buffers) { - auto color_format = rsx::internals::surface_color_format_to_gl(rsx::method_registers.surface_color()); - auto write_color_buffers = [&](int index, int count) { - u32 width = rsx::method_registers.surface_clip_width(); - u32 height = rsx::method_registers.surface_clip_height(); - - std::array offsets = get_offsets(); - const std::array locations = get_locations(); - const std::array pitchs = get_pitchs(); - for (int i = index; i < index + count; ++i) { - u32 offset = offsets[i]; - u32 location = locations[i]; - u32 pitch = pitchs[i]; - - if (pitch <= 64) + if (surface_info[i].address == 0 || surface_info[i].pitch <= 64) continue; - rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf); - u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0)); - u32 range = pitch * height; - /**Even tiles are loaded as whole textures during read_buffers from testing. * Need further evaluation to determine correct behavior. Separate paths for both show no difference, * but using the GPU to perform the caching is many times faster. */ - __glcheck m_gl_texture_cache.save_rtt(texaddr, range, (*std::get<1>(m_rtts.m_bound_render_targets[i])), width, height, pitch, color_format.format, color_format.type); + const u32 range = surface_info[i].pitch * surface_info[i].height; + __glcheck m_gl_texture_cache.save_rtt(surface_info[i].address, range); } }; - switch (rsx::method_registers.surface_color_target()) - { - case rsx::surface_target::none: - break; - - case rsx::surface_target::surface_a: - write_color_buffers(0, 1); - break; - - case rsx::surface_target::surface_b: - write_color_buffers(1, 1); - break; - - case rsx::surface_target::surfaces_a_b: - write_color_buffers(0, 2); - break; - - case rsx::surface_target::surfaces_a_b_c: - write_color_buffers(0, 3); - break; - - case rsx::surface_target::surfaces_a_b_c_d: - write_color_buffers(0, 4); - break; - } + write_color_buffers(0, 4); } if (g_cfg_rsx_write_depth_buffer) { //TODO: use pitch - u32 pitch = rsx::method_registers.surface_z_pitch(); + if (!depth_surface_info.address || depth_surface_info.pitch <= 64) return; - if (pitch <= 64) - return; + u32 range = depth_surface_info.width * depth_surface_info.height * 2; + if (depth_surface_info.depth_format != rsx::surface_depth_format::z16) range *= 2; - u32 width = rsx::method_registers.surface_clip_width(); - u32 height = rsx::method_registers.surface_clip_height(); - u32 range = width * height * 2; - auto depth_format = rsx::internals::surface_depth_format_to_gl(rsx::method_registers.surface_depth_fmt()); - u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma()); - - if (rsx::method_registers.surface_depth_fmt() != rsx::surface_depth_format::z16) range *= 2; - - m_gl_texture_cache.save_rtt(depth_address, range, (*std::get<1>(m_rtts.m_bound_depth_stencil)), width, height, pitch, depth_format.format, depth_format.type); + m_gl_texture_cache.save_rtt(depth_surface_info.address, range); } } \ No newline at end of file diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index d67e5042c6..04282ae6a6 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -4,40 +4,6 @@ #include "stdafx.h" #include "../RSXThread.h" -namespace gl -{ - class render_target : public texture - { - bool is_cleared = false; - u16 native_pitch = 0; - - public: - - render_target() {} - - void set_cleared() - { - is_cleared = true; - } - - bool cleared() const - { - return is_cleared; - } - - // Internal pitch is the actual row length in bytes of the openGL texture - void set_native_pitch(u16 pitch) - { - native_pitch = pitch; - } - - u16 get_native_pitch() const - { - return native_pitch; - } - }; -} - struct color_swizzle { gl::texture::channel a = gl::texture::channel::a; @@ -73,12 +39,111 @@ namespace rsx { namespace internals { + ::gl::texture::internal_format sized_internal_format(rsx::surface_color_format color_format); color_format surface_color_format_to_gl(rsx::surface_color_format color_format); depth_format surface_depth_format_to_gl(rsx::surface_depth_format depth_format); u8 get_pixel_size(rsx::surface_depth_format format); } } +namespace gl +{ + class render_target : public texture + { + bool is_cleared = false; + + u32 rsx_pitch = 0; + u16 native_pitch = 0; + + u16 surface_height = 0; + u16 surface_width = 0; + u16 surface_pixel_size = 0; + + texture::internal_format compatible_internal_format = texture::internal_format::rgba8; + + public: + + render_target() {} + + void set_cleared() + { + is_cleared = true; + } + + bool cleared() const + { + return is_cleared; + } + + // Internal pitch is the actual row length in bytes of the openGL texture + void set_native_pitch(u16 pitch) + { + native_pitch = pitch; + } + + u16 get_native_pitch() const + { + return native_pitch; + } + + // Rsx pitch + void set_rsx_pitch(u16 pitch) + { + rsx_pitch = pitch; + } + + u16 get_rsx_pitch() const + { + return rsx_pitch; + } + + std::pair get_dimensions() + { + if (!surface_height) surface_height = height(); + if (!surface_width) surface_width = width(); + + return std::make_pair(surface_width, surface_height); + } + + void set_compatible_format(texture::internal_format format) + { + compatible_internal_format = format; + } + + texture::internal_format get_compatible_internal_format() + { + return compatible_internal_format; + } + + // For an address within the texture, extract this sub-section's rect origin + std::tuple get_texture_subresource(u32 offset) + { + if (!offset) + { + return std::make_tuple(true, 0, 0); + } + + if (!surface_height) surface_height = height(); + if (!surface_width) surface_width = width(); + + u32 range = rsx_pitch * surface_height; + if (offset < range) + { + if (!surface_pixel_size) + surface_pixel_size = native_pitch / surface_width; + + u32 pixel_offset = (offset / surface_pixel_size); + u32 y = (pixel_offset / surface_width); + u32 x = (pixel_offset % surface_width); + + return std::make_tuple(true, (u16)x, (u16)y); + } + else + return std::make_tuple(false, 0, 0); + } + }; +} + struct gl_render_target_traits { using surface_storage_type = std::unique_ptr; @@ -97,13 +162,17 @@ struct gl_render_target_traits std::unique_ptr result(new gl::render_target()); auto format = rsx::internals::surface_color_format_to_gl(surface_color_format); + auto internal_fmt = rsx::internals::sized_internal_format(surface_color_format); + result->recreate(gl::texture::target::texture2D); result->set_native_pitch(width * format.channel_count * format.channel_size); + result->set_compatible_format(internal_fmt); __glcheck result->config() .size({ (int)width, (int)height }) .type(format.type) .format(format.format) + .internal_format(internal_fmt) .swizzle(format.swizzle.r, format.swizzle.g, format.swizzle.b, format.swizzle.a) .wrap(gl::texture::wrap::clamp_to_border, gl::texture::wrap::clamp_to_border, gl::texture::wrap::clamp_to_border) .apply(); @@ -144,6 +213,7 @@ struct gl_render_target_traits native_pitch *= 2; result->set_native_pitch(native_pitch); + result->set_compatible_format(format.internal_format); return result; } @@ -210,7 +280,138 @@ struct gl_render_target_traits } }; - -struct gl_render_targets : public rsx::surface_store +struct surface_subresource { + gl::render_target *surface = nullptr; + + u16 x = 0; + u16 y = 0; + u16 w = 0; + u16 h = 0; + + bool is_bound = false; + bool is_depth_surface = false; + + surface_subresource() {} + + surface_subresource(gl::render_target *src, u16 X, u16 Y, u16 W, u16 H, bool _Bound, bool _Depth) + : surface(src), x(X), y(Y), w(W), h(H), is_bound(_Bound), is_depth_surface(_Depth) + {} +}; + +class gl_render_targets : public rsx::surface_store +{ +private: + bool surface_overlaps(gl::render_target *surface, u32 surface_address, u32 texaddr, u16 *x, u16 *y) + { + bool is_subslice = false; + u16 x_offset = 0; + u16 y_offset = 0; + + if (surface_address > texaddr) + return false; + + u32 offset = texaddr - surface_address; + if (offset >= 0) + { + std::tie(is_subslice, x_offset, y_offset) = surface->get_texture_subresource(offset); + if (is_subslice) + { + *x = x_offset; + *y = y_offset; + + return true; + } + } + + return false; + } + + bool is_bound(u32 address, bool is_depth) + { + if (is_depth) + { + const u32 bound_depth_address = std::get<0>(m_bound_depth_stencil); + return (bound_depth_address == address); + } + + for (auto &surface: m_bound_render_targets) + { + const u32 bound_address = std::get<0>(surface); + if (bound_address == address) + return true; + } + + return false; + } + + bool fits(gl::render_target *src, std::pair &dims, u16 x_offset, u16 y_offset, u16 width, u16 height) const + { + if ((x_offset + width) > dims.first) return false; + if ((y_offset + height) > dims.second) return false; + + return true; + } + +public: + surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch) + { + gl::render_target *surface = nullptr; + bool is_subslice = false; + u16 x_offset = 0; + u16 y_offset = 0; + + for (auto &tex_info : m_render_targets_storage) + { + u32 this_address = std::get<0>(tex_info); + surface = std::get<1>(tex_info).get(); + + if (surface_overlaps(surface, this_address, texaddr, &x_offset, &y_offset)) + { + if (surface->get_rsx_pitch() != requested_pitch) + continue; + + auto dims = surface->get_dimensions(); + + if (fits(surface, dims, x_offset, y_offset, requested_width, requested_height)) + return{ surface, x_offset, y_offset, requested_width, requested_height, is_bound(this_address, false), false }; + else + { + if (dims.first >= requested_width && dims.second >= requested_height) + { + LOG_WARNING(RSX, "Overlapping surface exceeds bounds; returning full surface region"); + return{ surface, 0, 0, requested_width, requested_height, is_bound(this_address, false), false }; + } + } + } + } + + //Check depth surfaces for overlap + for (auto &tex_info : m_depth_stencil_storage) + { + u32 this_address = std::get<0>(tex_info); + surface = std::get<1>(tex_info).get(); + + if (surface_overlaps(surface, this_address, texaddr, &x_offset, &y_offset)) + { + if (surface->get_rsx_pitch() != requested_pitch) + continue; + + auto dims = surface->get_dimensions(); + + if (fits(surface, dims, x_offset, y_offset, requested_width, requested_height)) + return{ surface, x_offset, y_offset, requested_width, requested_height, is_bound(this_address, true), true }; + else + { + if (dims.first >= requested_width && dims.second >= requested_height) + { + LOG_WARNING(RSX, "Overlapping depth surface exceeds bounds; returning full surface region"); + return{ surface, 0, 0, requested_width, requested_height, is_bound(this_address, true), true }; + } + } + } + } + + return {}; + } }; diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index f64644d5b1..5b5e953ad3 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -7,7 +7,7 @@ #include "../rsx_utils.h" #include "../Common/TextureUtils.h" -namespace +namespace gl { GLenum get_sized_internal_format(u32 texture_format) { @@ -40,7 +40,6 @@ namespace fmt::throw_exception("Compressed or unknown texture format 0x%x" HERE, texture_format); } - std::tuple get_format_type(u32 texture_format) { switch (texture_format) @@ -68,7 +67,10 @@ namespace } fmt::throw_exception("Compressed or unknown texture format 0x%x" HERE, texture_format); } +} +namespace +{ bool is_compressed_format(u32 texture_format) { switch (texture_format) @@ -319,10 +321,10 @@ namespace rsx int mip_level = 0; if (dim == rsx::texture_dimension_extended::texture_dimension_1d) { - __glcheck glTexStorage1D(GL_TEXTURE_1D, mipmap_count, get_sized_internal_format(format), width); + __glcheck glTexStorage1D(GL_TEXTURE_1D, mipmap_count, ::gl::get_sized_internal_format(format), width); if (!is_compressed_format(format)) { - const auto &format_type = get_format_type(format); + const auto &format_type = ::gl::get_format_type(format); for (const rsx_subresource_layout &layout : input_layouts) { __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); @@ -335,7 +337,7 @@ namespace rsx { u32 size = layout.width_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); - __glcheck glCompressedTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data()); + __glcheck glCompressedTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block * 4, ::gl::get_sized_internal_format(format), size, staging_buffer.data()); } } return; @@ -343,10 +345,10 @@ namespace rsx if (dim == rsx::texture_dimension_extended::texture_dimension_2d) { - __glcheck glTexStorage2D(GL_TEXTURE_2D, mipmap_count, get_sized_internal_format(format), width, height); + __glcheck glTexStorage2D(GL_TEXTURE_2D, mipmap_count, ::gl::get_sized_internal_format(format), width, height); if (!is_compressed_format(format)) { - const auto &format_type = get_format_type(format); + const auto &format_type = ::gl::get_format_type(format); for (const rsx_subresource_layout &layout : input_layouts) { __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); @@ -359,7 +361,7 @@ namespace rsx { u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); - __glcheck glCompressedTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data()); + __glcheck glCompressedTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, ::gl::get_sized_internal_format(format), size, staging_buffer.data()); } } return; @@ -367,13 +369,13 @@ namespace rsx if (dim == rsx::texture_dimension_extended::texture_dimension_cubemap) { - __glcheck glTexStorage2D(GL_TEXTURE_CUBE_MAP, mipmap_count, get_sized_internal_format(format), width, height); + __glcheck glTexStorage2D(GL_TEXTURE_CUBE_MAP, mipmap_count, ::gl::get_sized_internal_format(format), width, height); // Note : input_layouts size is get_exact_mipmap_count() for non cubemap texture, and 6 * get_exact_mipmap_count() for cubemap // Thus for non cubemap texture, mip_level / mipmap_per_layer will always be rounded to 0. // mip_level % mipmap_per_layer will always be equal to mip_level if (!is_compressed_format(format)) { - const auto &format_type = get_format_type(format); + const auto &format_type = ::gl::get_format_type(format); for (const rsx_subresource_layout &layout : input_layouts) { upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); @@ -387,7 +389,7 @@ namespace rsx { u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); - __glcheck glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data()); + __glcheck glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, ::gl::get_sized_internal_format(format), size, staging_buffer.data()); mip_level++; } } @@ -396,10 +398,10 @@ namespace rsx if (dim == rsx::texture_dimension_extended::texture_dimension_3d) { - __glcheck glTexStorage3D(GL_TEXTURE_3D, mipmap_count, get_sized_internal_format(format), width, height, depth); + __glcheck glTexStorage3D(GL_TEXTURE_3D, mipmap_count, ::gl::get_sized_internal_format(format), width, height, depth); if (!is_compressed_format(format)) { - const auto &format_type = get_format_type(format); + const auto &format_type = ::gl::get_format_type(format); for (const rsx_subresource_layout &layout : input_layouts) { __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); @@ -412,7 +414,7 @@ namespace rsx { u32 size = layout.width_in_block * layout.height_in_block * layout.depth * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); - __glcheck glCompressedTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, layout.depth, get_sized_internal_format(format), size, staging_buffer.data()); + __glcheck glCompressedTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, layout.depth, ::gl::get_sized_internal_format(format), size, staging_buffer.data()); } } return; diff --git a/rpcs3/Emu/RSX/GL/GLTexture.h b/rpcs3/Emu/RSX/GL/GLTexture.h index e8740f64a7..e12271fa3f 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.h +++ b/rpcs3/Emu/RSX/GL/GLTexture.h @@ -64,3 +64,9 @@ namespace rsx }; } } + +namespace gl +{ + GLenum get_sized_internal_format(u32 gcm_format); + std::tuple get_format_type(u32 texture_format); +} diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.cpp b/rpcs3/Emu/RSX/GL/GLTextureCache.cpp new file mode 100644 index 0000000000..39ec892e08 --- /dev/null +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.cpp @@ -0,0 +1,63 @@ +#pragma once + +#include "stdafx.h" + +#include "GLGSRender.h" +#include "GLTextureCache.h" + +namespace gl +{ + bool texture_cache::flush_section(u32 address) + { + if (address < rtt_cache_range.first || + address >= rtt_cache_range.second) + return false; + + bool post_task = false; + + { + std::lock_guard lock(m_section_mutex); + + for (cached_rtt_section &rtt : m_rtt_cache) + { + if (rtt.is_dirty()) continue; + + if (rtt.is_locked() && rtt.overlaps(address)) + { + if (rtt.is_flushed()) + { + LOG_WARNING(RSX, "Section matches range, but marked as already flushed!, 0x%X+0x%X", rtt.get_section_base(), rtt.get_section_size()); + continue; + } + + //LOG_WARNING(RSX, "Cell needs GPU data synced here, address=0x%X", address); + + if (std::this_thread::get_id() != m_renderer_thread) + { + post_task = true; + break; + } + + rtt.flush(); + return true; + } + } + } + + if (post_task) + { + //LOG_WARNING(RSX, "Cache access not from worker thread! address = 0x%X", address); + work_item &task = m_renderer->post_flush_request(address); + + { + std::unique_lock lock(task.guard_mutex); + task.cv.wait(lock, [&task] { return task.processed; }); + } + + verify(HERE), task.result == true; + return task.result; + } + + return false; + } +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 6985d199f0..bd608f6078 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -9,227 +9,14 @@ #include #include -#include "GLGSRender.h" #include "GLRenderTargets.h" #include "../Common/TextureUtils.h" #include -namespace rsx -{ - //TODO: Properly move this into rsx shared - class buffered_section - { - protected: - u32 cpu_address_base = 0; - u32 cpu_address_range = 0; - - u32 locked_address_base = 0; - u32 locked_address_range = 0; - - u32 memory_protection = 0; - - bool locked = false; - bool dirty = false; - - bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2) - { - //Check for memory area overlap. unlock page(s) if needed and add this index to array. - //Axis separation test - const u32 &block_start = base1; - const u32 block_end = limit1; - - if (limit2 < block_start) return false; - if (base2 > block_end) return false; - - u32 min_separation = (limit2 - base2) + (limit1 - base1); - u32 range_limit = (block_end > limit2) ? block_end : limit2; - u32 range_base = (block_start < base2) ? block_start : base2; - - u32 actual_separation = (range_limit - range_base); - - if (actual_separation < min_separation) - return true; - - return false; - } - - public: - - buffered_section() {} - ~buffered_section() {} - - void reset(u32 base, u32 length) - { - verify(HERE), locked == false; - - cpu_address_base = base; - cpu_address_range = length; - - locked_address_base = (base & ~4095); - locked_address_range = align(base + length, 4096) - locked_address_base; - - memory_protection = vm::page_readable|vm::page_writable; - - locked = false; - } - - bool protect(u8 flags_set, u8 flags_clear) - { - if (vm::page_protect(locked_address_base, locked_address_range, 0, flags_set, flags_clear)) - { - memory_protection &= ~flags_clear; - memory_protection |= flags_set; - - locked = memory_protection != (vm::page_readable | vm::page_writable); - } - else - fmt::throw_exception("failed to lock memory @ 0x%X!", locked_address_base); - - return false; - } - - bool unprotect() - { - u32 flags_set = (vm::page_readable | vm::page_writable) & ~memory_protection; - - if (vm::page_protect(locked_address_base, locked_address_range, 0, flags_set, 0)) - { - memory_protection = (vm::page_writable | vm::page_readable); - locked = false; - return true; - } - else - fmt::throw_exception("failed to unlock memory @ 0x%X!", locked_address_base); - - return false; - } - - bool overlaps(std::pair range) - { - return region_overlaps(locked_address_base, locked_address_base+locked_address_range, range.first, range.first + range.second); - } - - bool overlaps(u32 address) - { - return (locked_address_base <= address && (address - locked_address_base) < locked_address_range); - } - - bool is_locked() const - { - return locked; - } - - bool is_dirty() const - { - return dirty; - } - - void set_dirty(bool state) - { - dirty = state; - } - - u32 get_section_base() const - { - return cpu_address_base; - } - - u32 get_section_size() const - { - return cpu_address_range; - } - - bool matches(u32 cpu_address, u32 size) const - { - return (cpu_address_base == cpu_address && cpu_address_range == size); - } - - std::pair get_min_max(std::pair current_min_max) - { - u32 min = std::min(current_min_max.first, locked_address_base); - u32 max = std::max(current_min_max.second, locked_address_base + locked_address_range); - - return std::make_pair(min, max); - } - }; -} +class GLGSRender; namespace gl { - //TODO: Properly move this into helpers - class fence - { - GLsync m_value = nullptr; - GLenum flags = GL_SYNC_FLUSH_COMMANDS_BIT; - - public: - - fence() {} - ~fence() {} - - void create() - { - m_value = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - } - - void destroy() - { - glDeleteSync(m_value); - m_value = nullptr; - } - - void reset() - { - if (m_value != nullptr) - destroy(); - - create(); - } - - bool check_signaled() - { - verify(HERE), m_value != nullptr; - - GLenum err = glClientWaitSync(m_value, flags, 0); - flags = 0; - return (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED); - } - - bool wait_for_signal() - { - verify(HERE), m_value != nullptr; - - GLenum err = GL_WAIT_FAILED; - bool done = false; - - while (!done) - { - //Check if we are finished, wait time = 1us - err = glClientWaitSync(m_value, flags, 1000); - flags = 0; - - switch (err) - { - default: - LOG_ERROR(RSX, "gl::fence sync returned unknown error 0x%X", err); - case GL_ALREADY_SIGNALED: - case GL_CONDITION_SATISFIED: - done = true; - break; - case GL_TIMEOUT_EXPIRED: - continue; - } - } - - glDeleteSync(m_value); - m_value = nullptr; - - return (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED); - } - }; - - - //TODO: Unify all cache objects class texture_cache { public: @@ -253,7 +40,7 @@ namespace gl this->mipmaps = mipmaps; } - bool matches(u32 rsx_address, u32 width, u32 height, u32 mipmaps) + bool matches(u32 rsx_address, u32 width, u32 height, u32 mipmaps) const { if (rsx_address == cpu_address_base && texture_id != 0) { @@ -275,7 +62,7 @@ namespace gl texture_id = 0; } - bool is_empty() + bool is_empty() const { return (texture_id == 0); } @@ -293,12 +80,12 @@ namespace gl u32 pbo_id = 0; u32 pbo_size = 0; + u32 source_texture = 0; + + bool copied = false; bool flushed = false; bool is_depth = false; - u32 flush_count = 0; - u32 copy_count = 0; - u32 current_width = 0; u32 current_height = 0; u32 current_pitch = 0; @@ -372,62 +159,27 @@ namespace gl return size; } - public: - - void reset(u32 base, u32 size) + void scale_image_fallback(u8* dst, const u8* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples) { - rsx::buffered_section::reset(base, size); - flushed = false; - flush_count = 0; - copy_count = 0; - } + u32 dst_offset = 0; + u32 src_offset = 0; + u32 padding = dst_pitch - (src_pitch * samples); - void init_buffer() - { - glGenBuffers(1, &pbo_id); + for (u16 h = 0; h < src_height; ++h) + { + for (u16 w = 0; w < src_width; ++w) + { + for (u8 n = 0; n < samples; ++n) + { + memcpy(&dst[dst_offset], &src[src_offset], pixel_size); + dst_offset += pixel_size; + } - glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); - glBufferStorage(GL_PIXEL_PACK_BUFFER, locked_address_range, nullptr, GL_MAP_READ_BIT); + src_offset += pixel_size; + } - pbo_size = locked_address_range; - } - - void set_dimensions(u32 width, u32 height, u32 pitch) - { - current_width = width; - current_height = height; - current_pitch = pitch; - - real_pitch = width * get_pixel_size(format, type); - } - - void set_format(texture::format gl_format, texture::type gl_type) - { - format = gl_format; - type = gl_type; - - real_pitch = current_width * get_pixel_size(format, type); - } - - void copy_texture(gl::texture &source) - { - glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); - glGetTextureImage(source.id(), 0, (GLenum)format, (GLenum)type, pbo_size, nullptr); - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - - m_fence.reset(); - copy_count++; - } - - void fill_texture(gl::texture &tex) - { - u32 min_width = std::min((u32)tex.width(), current_width); - u32 min_height = std::min((u32)tex.height(), current_height); - - tex.bind(); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_id); - glTexSubImage2D((GLenum)tex.get_target(), 0, 0, 0, min_width, min_height, (GLenum)format, (GLenum)type, nullptr); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + dst_offset += padding; + } } template @@ -476,8 +228,98 @@ namespace gl } } + public: + + void reset(u32 base, u32 size) + { + rsx::buffered_section::reset(base, size); + + flushed = false; + copied = false; + + source_texture = 0; + } + + void init_buffer() + { + glGenBuffers(1, &pbo_id); + + glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); + glBufferStorage(GL_PIXEL_PACK_BUFFER, locked_address_range, nullptr, GL_MAP_READ_BIT); + + pbo_size = locked_address_range; + } + + void set_dimensions(u32 width, u32 height, u32 pitch) + { + current_width = width; + current_height = height; + current_pitch = pitch; + + real_pitch = width * get_pixel_size(format, type); + } + + void set_format(texture::format gl_format, texture::type gl_type) + { + format = gl_format; + type = gl_type; + + real_pitch = current_width * get_pixel_size(format, type); + } + + void set_source(gl::texture &source) + { + source_texture = source.id(); + } + + void copy_texture() + { + if (!glIsTexture(source_texture)) + { + LOG_ERROR(RSX, "Attempted to download rtt texture, but texture handle was invalid! (0x%X)", source_texture); + return; + } + + glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); + glGetTextureImage(source_texture, 0, (GLenum)format, (GLenum)type, pbo_size, nullptr); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + + m_fence.reset(); + copied = true; + } + + void fill_texture(gl::texture &tex) + { + if (!copied) + { + //LOG_WARNING(RSX, "Request to fill texture rejected because contents were not read"); + return; + } + + u32 min_width = std::min((u32)tex.width(), current_width); + u32 min_height = std::min((u32)tex.height(), current_height); + + tex.bind(); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_id); + glTexSubImage2D((GLenum)tex.get_target(), 0, 0, 0, min_width, min_height, (GLenum)format, (GLenum)type, nullptr); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + } + void flush() { + if (!copied) + { + LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base); + copy_texture(); + + if (!copied) + { + LOG_WARNING(RSX, "Nothing to copy; Setting section to readable and moving on..."); + protect(vm::page_readable, 0); + return; + } + } + protect(vm::page_writable, 0); m_fence.wait_for_signal(); flushed = true; @@ -493,6 +335,7 @@ namespace gl memcpy(dst, data, cpu_address_range); else { + //TODO: Use compression hint from the gcm tile information //Scale this image by repeating pixel data n times //n = expected_pitch / real_pitch //Use of fixed argument templates for performance reasons @@ -521,15 +364,13 @@ namespace gl break; default: LOG_ERROR(RSX, "Unsupported RTT scaling factor: dst_pitch=%d src_pitch=%d", current_pitch, real_pitch); - memcpy(dst, data, cpu_address_range); + scale_image_fallback(dst, static_cast(data), current_width, current_height, current_pitch, real_pitch, pixel_size, sample_count); } } glUnmapBuffer(GL_PIXEL_PACK_BUFFER); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); protect(vm::page_readable, vm::page_writable); - - flush_count++; } void destroy() @@ -549,46 +390,30 @@ namespace gl return flushed; } - bool can_skip() - { - //TODO: Better balancing algorithm. Copying buffers is very expensive - //TODO: Add a switch to force strict enforcement - - //Always accept the first attempt at caching after creation - if (!copy_count) - return false; - - //If surface is flushed often, force buffering - if (flush_count) - { - //TODO: Pick better values. Using 80% and 20% for now - if (flush_count >= (4 * copy_count / 5)) - return false; - else - { - if (flushed) return false; //fence is guaranteed to have been signaled and destroyed - return !m_fence.check_signaled(); - } - } - - return true; - } - void set_flushed(bool state) { flushed = state; } + + void set_copied(bool state) + { + copied = state; + } }; private: std::vector m_texture_cache; std::vector m_rtt_cache; + std::vector m_temporary_surfaces; std::pair texture_cache_range = std::make_pair(0xFFFFFFFF, 0); std::pair rtt_cache_range = std::make_pair(0xFFFFFFFF, 0); std::mutex m_section_mutex; + GLGSRender *m_renderer; + std::thread::id m_renderer_thread; + cached_texture_section *find_texture(u64 texaddr, u32 w, u32 h, u16 mipmaps) { for (cached_texture_section &tex : m_texture_cache) @@ -638,6 +463,8 @@ namespace gl m_rtt_cache.resize(0); m_texture_cache.resize(0); + + clear_temporary_surfaces(); } cached_rtt_section* find_cached_rtt_section(u32 base, u32 size) @@ -700,11 +527,48 @@ namespace gl return region; } + u32 create_temporary_subresource(u32 src_id, GLenum sized_internal_fmt, u16 x, u16 y, u16 width, u16 height) + { + u32 dst_id = 0; + + glGenTextures(1, &dst_id); + glBindTexture(GL_TEXTURE_2D, dst_id); + + glTexStorage2D(GL_TEXTURE_2D, 1, sized_internal_fmt, width, height); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + + //Empty GL_ERROR + glGetError(); + + glCopyImageSubData(src_id, GL_TEXTURE_2D, 0, x, y, 0, + dst_id, GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1); + + m_temporary_surfaces.push_back(dst_id); + + //Check for error + if (GLenum err = glGetError()) + { + LOG_WARNING(RSX, "Failed to copy image subresource with GL error 0x%X", err); + return 0; + } + + return dst_id; + } + public: texture_cache() {} - ~texture_cache() + ~texture_cache() {} + + void initialize(GLGSRender *renderer) + { + m_renderer = renderer; + m_renderer_thread = std::this_thread::get_id(); + } + + void close() { clear(); } @@ -733,13 +597,78 @@ namespace gl return; } + /** + * Check if we are re-sampling a subresource of an RTV/DSV texture, bound or otherwise + * (Turbo: Super Stunt Squad does this; bypassing the need for a sync object) + * The engine does not read back the texture resource through cell, but specifies a texture location that is + * a bound render target. We can bypass the expensive download in this case + */ + + surface_subresource rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, tex.width(), tex.height(), tex.pitch()); + if (rsc.surface) + { + //Check that this region is not cpu-dirty before doing a copy + //This section is guaranteed to have a locking section *if* this bit has been bypassed before + + bool upload_from_cpu = false; + + for (cached_rtt_section §ion : m_rtt_cache) + { + if (section.overlaps(std::make_pair(texaddr, range)) && section.is_dirty()) + { + LOG_ERROR(RSX, "Cell wrote to render target section we are uploading from!"); + + upload_from_cpu = true; + break; + } + } + + if (!upload_from_cpu) + { + if (tex.get_extended_texture_dimension() != rsx::texture_dimension_extended::texture_dimension_2d) + { + LOG_ERROR(RSX, "Sampling of RTT region as non-2D texture! addr=0x%x, Type=%d, dims=%dx%d", + texaddr, (u8)tex.get_extended_texture_dimension(), tex.width(), tex.height()); + } + else + { + const u32 format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + + GLenum src_format = (GLenum)rsc.surface->get_internal_format(); + GLenum dst_format = std::get<0>(get_format_type(format)); + + u32 bound_index = ~0U; + + if (src_format != dst_format) + { + LOG_WARNING(RSX, "Sampling from a section of a render target, but formats might be incompatible (0x%X vs 0x%X)", src_format, dst_format); + } + + if (!rsc.is_bound) + { + if (rsc.w == tex.width() && rsc.h == tex.height()) + rsc.surface->bind(); + else + bound_index = create_temporary_subresource(rsc.surface->id(), (GLenum)rsc.surface->get_compatible_internal_format(), rsc.x, rsc.y, rsc.w, rsc.h); + } + else + { + LOG_WARNING(RSX, "Attempting to sample a currently bound render target @ 0x%x", texaddr); + bound_index = create_temporary_subresource(rsc.surface->id(), (GLenum)rsc.surface->get_compatible_internal_format(), rsc.x, rsc.y, rsc.w, rsc.h); + } + + if (bound_index) + return; + } + } + } + /** * If all the above failed, then its probably a generic texture. * Search in cache and upload/bind */ cached_texture_section *cached_texture = find_texture(texaddr, tex.width(), tex.height(), tex.get_exact_mipmap_count()); - verify(HERE), gl_texture.id() == 0; if (cached_texture) { @@ -771,17 +700,28 @@ namespace gl gl_texture.set_id(0); } - void save_rtt(u32 base, u32 size, gl::texture &source, u32 width, u32 height, u32 pitch, texture::format format, texture::type type) + void save_rtt(u32 base, u32 size) + { + std::lock_guard lock(m_section_mutex); + + cached_rtt_section *region = find_cached_rtt_section(base, size); + + if (!region) + { + LOG_ERROR(RSX, "Attempted to download render target that does not exist. Please report to developers"); + return; + } + + verify(HERE), region->is_locked(); + region->copy_texture(); + } + + void lock_rtt_region(const u32 base, const u32 size, const u16 width, const u16 height, const u16 pitch, const texture::format format, const texture::type type, gl::texture &source) { std::lock_guard lock(m_section_mutex); cached_rtt_section *region = create_locked_view_of_section(base, size); - //Ignore this if we haven't finished downloading previous draw call - //TODO: Separate locking sections vs downloading to pbo unless address faults often - if (0)//region->can_skip()) - return; - if (!region->matches(base, size)) { //This memory region overlaps our own region, but does not match it exactly @@ -793,10 +733,11 @@ namespace gl } region->set_dimensions(width, height, pitch); - region->copy_texture(source); region->set_format(format, type); region->set_dirty(false); region->set_flushed(false); + region->set_copied(false); + region->set_source(source); verify(HERE), region->is_locked() == true; } @@ -890,32 +831,16 @@ namespace gl } } - bool flush_section(u32 address) + bool flush_section(u32 address); + + void clear_temporary_surfaces() { - if (address < rtt_cache_range.first || - address >= rtt_cache_range.second) - return false; - - std::lock_guard lock(m_section_mutex); - - for (cached_rtt_section &rtt : m_rtt_cache) + for (u32 &id : m_temporary_surfaces) { - if (rtt.is_dirty()) continue; - - if (rtt.is_locked() && rtt.overlaps(address)) - { - if (rtt.is_flushed()) - { - LOG_WARNING(RSX, "Section matches range, but marked as already flushed!, 0x%X+0x%X", rtt.get_section_base(), rtt.get_section_size()); - continue; - } - - rtt.flush(); - return true; - } + glDeleteTextures(1, &id); } - return false; + m_temporary_surfaces.clear(); } }; } \ No newline at end of file diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 461bcbd1fb..b38b9dd89d 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -391,6 +391,9 @@ namespace rsx // TODO: exit condition while (!Emu.IsStopped()) { + //Execute backend-local tasks first + do_local_task(); + const u32 get = ctrl->get; const u32 put = ctrl->put; @@ -837,7 +840,7 @@ namespace rsx return result; } - RSXFragmentProgram thread::get_current_fragment_program(std::function(u32, bool)> get_surface_info) const + RSXFragmentProgram thread::get_current_fragment_program(std::function(u32, fragment_texture&, bool)> get_surface_info) const { RSXFragmentProgram result = {}; u32 shader_program = rsx::method_registers.shader_program_address(); @@ -885,7 +888,7 @@ namespace rsx bool surface_exists; u16 surface_pitch; - std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, false); + std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, tex, false); if (surface_exists && surface_pitch) { @@ -894,7 +897,7 @@ namespace rsx } else { - std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, true); + std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, tex, true); if (surface_exists) { u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index e63fa3e2d1..4627828a66 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -219,7 +219,7 @@ namespace rsx * get_surface_info is a helper takes 2 parameters: rsx_texture_address and surface_is_depth * returns whether surface is a render target and surface pitch in native format */ - RSXFragmentProgram get_current_fragment_program(std::function(u32, bool)> get_surface_info) const; + RSXFragmentProgram get_current_fragment_program(std::function(u32, fragment_texture&, bool)> get_surface_info) const; public: double fps_limit = 59.94; @@ -239,6 +239,11 @@ namespace rsx virtual void on_task() override; virtual void on_exit() override; + + /** + * Execute a backend local task queue + */ + virtual void do_local_task() {} public: virtual std::string get_name() const override; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index b7fcf1972c..50378ce3c0 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -981,7 +981,7 @@ bool VKGSRender::do_method(u32 cmd, u32 arg) bool VKGSRender::load_program() { - auto rtt_lookup_func = [this](u32 texaddr, bool is_depth) -> std::tuple + auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture&, bool is_depth) -> std::tuple { vk::render_target *surface = nullptr; if (!is_depth) diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 39bb3b09aa..3a378d349a 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -6,143 +6,132 @@ namespace vk { - struct cached_texture_object + class cached_texture_section : public rsx::buffered_section { - u32 native_rsx_address; - u32 native_rsx_size; - u16 width; u16 height; u16 depth; u16 mipmaps; + std::unique_ptr uploaded_image_view; std::unique_ptr uploaded_texture; - u64 protected_rgn_start; - u64 protected_rgn_end; - - bool exists = false; - bool locked = false; - bool dirty = true; + public: + + cached_texture_section() {} + + void create(u16 w, u16 h, u16 depth, u16 mipmaps, vk::image_view *view, vk::image *image) + { + width = w; + height = h; + this->depth = depth; + this->mipmaps = mipmaps; + + uploaded_image_view.reset(view); + uploaded_texture.reset(image); + } + + bool matches(u32 rsx_address, u32 rsx_size) const + { + return rsx::buffered_section::matches(rsx_address, rsx_size); + } + + bool matches(u32 rsx_address, u32 width, u32 height, u32 mipmaps) const + { + if (rsx_address == cpu_address_base) + { + if (!width && !height && !mipmaps) + return true; + + return (width == this->width && height == this->height && mipmaps == this->mipmaps); + } + + return false; + } + + bool exists() const + { + return (uploaded_texture.get() != nullptr); + } + + u16 get_width() const + { + return width; + } + + u16 get_height() const + { + return height; + } + + std::unique_ptr& get_view() + { + return uploaded_image_view; + } + + std::unique_ptr& get_texture() + { + return uploaded_texture; + } }; class texture_cache { private: - std::vector m_cache; + std::vector m_cache; std::pair texture_cache_range = std::make_pair(0xFFFFFFFF, 0); std::vector > m_temporary_image_view; std::vector> m_dirty_textures; - bool lock_memory_region(u32 start, u32 size) + cached_texture_section& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0) { - static const u32 memory_page_size = 4096; - start = start & ~(memory_page_size - 1); - size = (u32)align(size, memory_page_size); - - return vm::page_protect(start, size, 0, 0, vm::page_writable); - } - - bool unlock_memory_region(u32 start, u32 size) - { - static const u32 memory_page_size = 4096; - start = start & ~(memory_page_size - 1); - size = (u32)align(size, memory_page_size); - - return vm::page_protect(start, size, 0, vm::page_writable, 0); - } - - bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2) - { - //Check for memory area overlap. unlock page(s) if needed and add this index to array. - //Axis separation test - const u32 &block_start = base1; - const u32 block_end = limit1; - - if (limit2 < block_start) return false; - if (base2 > block_end) return false; - - u32 min_separation = (limit2 - base2) + (limit1 - base1); - u32 range_limit = (block_end > limit2) ? block_end : limit2; - u32 range_base = (block_start < base2) ? block_start : base2; - - u32 actual_separation = (range_limit - range_base); - - if (actual_separation < min_separation) - return true; - - return false; - } - - cached_texture_object& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0) - { - for (cached_texture_object &tex : m_cache) + for (auto &tex : m_cache) { - if (!tex.dirty && tex.exists && - tex.native_rsx_address == rsx_address && - tex.native_rsx_size == rsx_size) + if (tex.matches(rsx_address, rsx_size) && !tex.is_dirty()) { if (!confirm_dimensions) return tex; - if (tex.width == width && tex.height == height && tex.mipmaps == mipmaps) + if (tex.matches(rsx_address, width, height, mipmaps)) return tex; else { LOG_ERROR(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters."); - LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.width, tex.height); + LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.get_width(), tex.get_height()); } } } - for (cached_texture_object &tex : m_cache) + for (auto &tex : m_cache) { - if (tex.dirty) + if (tex.is_dirty()) { - if (tex.exists) + if (tex.exists()) { - m_dirty_textures.push_back(std::move(tex.uploaded_texture)); - tex.exists = false; + m_dirty_textures.push_back(std::move(tex.get_texture())); + m_temporary_image_view.push_back(std::move(tex.get_view())); } return tex; } } - m_cache.push_back(cached_texture_object()); + m_cache.push_back(cached_texture_section()); return m_cache[m_cache.size() - 1]; } - void lock_object(cached_texture_object &obj) - { - static const u32 memory_page_size = 4096; - obj.protected_rgn_start = obj.native_rsx_address & ~(memory_page_size - 1); - obj.protected_rgn_end = (u32)align(obj.native_rsx_size, memory_page_size); - obj.protected_rgn_end += obj.protected_rgn_start; - - lock_memory_region(static_cast(obj.protected_rgn_start), static_cast(obj.native_rsx_size)); - - if (obj.protected_rgn_start < texture_cache_range.first) - texture_cache_range = std::make_pair(obj.protected_rgn_start, texture_cache_range.second); - - if (obj.protected_rgn_end > texture_cache_range.second) - texture_cache_range = std::make_pair(texture_cache_range.first, obj.protected_rgn_end); - } - - void unlock_object(cached_texture_object &obj) - { - unlock_memory_region(static_cast(obj.protected_rgn_start), static_cast(obj.native_rsx_size)); - } - void purge_cache() { - for (cached_texture_object &tex : m_cache) + for (auto &tex : m_cache) { - if (tex.exists) - m_dirty_textures.push_back(std::move(tex.uploaded_texture)); + if (tex.exists()) + { + m_dirty_textures.push_back(std::move(tex.get_texture())); + m_temporary_image_view.push_back(std::move(tex.get_view())); + } - if (tex.locked) - unlock_object(tex); + if (tex.is_locked()) + tex.unprotect(); } m_temporary_image_view.clear(); @@ -196,12 +185,6 @@ namespace vk return m_temporary_image_view.back().get(); } - cached_texture_object& cto = find_cached_texture(texaddr, range, true, tex.width(), tex.height(), tex.get_exact_mipmap_count()); - if (cto.exists && !cto.dirty) - { - return cto.uploaded_image_view.get(); - } - u32 raw_format = tex.format(); u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); @@ -213,6 +196,7 @@ namespace vk u16 height = 0; u16 depth = 0; u8 layer = 0; + switch (tex.get_extended_texture_dimension()) { case rsx::texture_dimension_extended::texture_dimension_1d: @@ -245,6 +229,12 @@ namespace vk break; } + cached_texture_section& region = find_cached_texture(texaddr, range, true, tex.width(), height, tex.get_exact_mipmap_count()); + if (region.exists() && !region.is_dirty()) + { + return region.get_view().get(); + } + bool is_cubemap = tex.get_extended_texture_dimension() == rsx::texture_dimension_extended::texture_dimension_cubemap; VkImageSubresourceRange subresource_range = vk::get_image_subresource_range(0, 0, is_cubemap ? 6 : 1, tex.get_exact_mipmap_count(), VK_IMAGE_ASPECT_COLOR_BIT); @@ -255,33 +245,28 @@ namespace vk return nullptr; } - cto.uploaded_texture = std::make_unique(*vk::get_current_renderer(), memory_type_mapping.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + vk::image *image = new vk::image(*vk::get_current_renderer(), memory_type_mapping.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, image_type, vk_format, tex.width(), height, depth, tex.get_exact_mipmap_count(), layer, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0); - change_image_layout(cmd, cto.uploaded_texture->value, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); + change_image_layout(cmd, image->value, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); - cto.uploaded_image_view = std::make_unique(*vk::get_current_renderer(), cto.uploaded_texture->value, image_view_type, vk_format, + vk::image_view *view = new vk::image_view(*vk::get_current_renderer(), image->value, image_view_type, vk_format, mapping, subresource_range); - copy_mipmaped_image_using_buffer(cmd, cto.uploaded_texture->value, get_subresources_layout(tex), format, !(tex.format() & CELL_GCM_TEXTURE_LN), tex.get_exact_mipmap_count(), + copy_mipmaped_image_using_buffer(cmd, image->value, get_subresources_layout(tex), format, !(tex.format() & CELL_GCM_TEXTURE_LN), tex.get_exact_mipmap_count(), upload_heap, upload_buffer); - change_image_layout(cmd, cto.uploaded_texture->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range); + change_image_layout(cmd, image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range); - cto.exists = true; - cto.dirty = false; - cto.native_rsx_address = texaddr; - cto.native_rsx_size = range; - cto.width = tex.width(); - cto.height = tex.height(); - cto.mipmaps = tex.get_exact_mipmap_count(); - - lock_object(cto); + region.reset(texaddr, range); + region.create(tex.width(), height, depth, tex.get_exact_mipmap_count(), view, image); + region.protect(0, vm::page_writable); + region.set_dirty(false); - return cto.uploaded_image_view.get(); + return view; } bool invalidate_address(u32 rsx_address) @@ -290,23 +275,22 @@ namespace vk rsx_address > texture_cache_range.second) return false; - for (cached_texture_object &tex : m_cache) + bool response = false; + + for (auto &tex : m_cache) { - if (tex.dirty) continue; + if (tex.is_dirty()) continue; - if (rsx_address >= tex.protected_rgn_start && - rsx_address < tex.protected_rgn_end) + if (tex.overlaps(rsx_address)) { - unlock_object(tex); + tex.set_dirty(true); + tex.unprotect(); - tex.native_rsx_address = 0; - tex.dirty = true; - - return true; + response = true; } } - return false; + return response; } void flush() diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index 6fc12d683f..f7bdc32720 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -1,5 +1,6 @@ #pragma once #include +#include "Emu/Memory/vm.h" namespace rsx { @@ -64,4 +65,140 @@ namespace rsx program_info get(raw_program raw_program_, decompile_language lang); void clear(); }; + + class buffered_section + { + protected: + u32 cpu_address_base = 0; + u32 cpu_address_range = 0; + + u32 locked_address_base = 0; + u32 locked_address_range = 0; + + u32 memory_protection = 0; + + bool locked = false; + bool dirty = false; + + bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2) + { + //Check for memory area overlap. unlock page(s) if needed and add this index to array. + //Axis separation test + const u32 &block_start = base1; + const u32 block_end = limit1; + + if (limit2 < block_start) return false; + if (base2 > block_end) return false; + + u32 min_separation = (limit2 - base2) + (limit1 - base1); + u32 range_limit = (block_end > limit2) ? block_end : limit2; + u32 range_base = (block_start < base2) ? block_start : base2; + + u32 actual_separation = (range_limit - range_base); + + if (actual_separation < min_separation) + return true; + + return false; + } + + public: + + buffered_section() {} + ~buffered_section() {} + + void reset(u32 base, u32 length) + { + verify(HERE), locked == false; + + cpu_address_base = base; + cpu_address_range = length; + + locked_address_base = (base & ~4095); + locked_address_range = align(base + length, 4096) - locked_address_base; + + memory_protection = vm::page_readable | vm::page_writable; + + locked = false; + } + + bool protect(u8 flags_set, u8 flags_clear) + { + if (vm::page_protect(locked_address_base, locked_address_range, 0, flags_set, flags_clear)) + { + memory_protection &= ~flags_clear; + memory_protection |= flags_set; + + locked = memory_protection != (vm::page_readable | vm::page_writable); + } + else + fmt::throw_exception("failed to lock memory @ 0x%X!", locked_address_base); + + return false; + } + + bool unprotect() + { + u32 flags_set = (vm::page_readable | vm::page_writable) & ~memory_protection; + + if (vm::page_protect(locked_address_base, locked_address_range, 0, flags_set, 0)) + { + memory_protection = (vm::page_writable | vm::page_readable); + locked = false; + return true; + } + else + fmt::throw_exception("failed to unlock memory @ 0x%X!", locked_address_base); + + return false; + } + + bool overlaps(std::pair range) + { + return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second); + } + + bool overlaps(u32 address) + { + return (locked_address_base <= address && (address - locked_address_base) < locked_address_range); + } + + bool is_locked() const + { + return locked; + } + + bool is_dirty() const + { + return dirty; + } + + void set_dirty(bool state) + { + dirty = state; + } + + u32 get_section_base() const + { + return cpu_address_base; + } + + u32 get_section_size() const + { + return cpu_address_range; + } + + bool matches(u32 cpu_address, u32 size) const + { + return (cpu_address_base == cpu_address && cpu_address_range == size); + } + + std::pair get_min_max(std::pair current_min_max) + { + u32 min = std::min(current_min_max.first, locked_address_base); + u32 max = std::max(current_min_max.second, locked_address_base + locked_address_range); + + return std::make_pair(min, max); + } + }; } diff --git a/rpcs3/GLGSRender.vcxproj b/rpcs3/GLGSRender.vcxproj index 55fc2d85f1..c50c613aed 100644 --- a/rpcs3/GLGSRender.vcxproj +++ b/rpcs3/GLGSRender.vcxproj @@ -109,6 +109,7 @@ + diff --git a/rpcs3/GLGSRender.vcxproj.filters b/rpcs3/GLGSRender.vcxproj.filters index 932ef00516..35bae19d35 100644 --- a/rpcs3/GLGSRender.vcxproj.filters +++ b/rpcs3/GLGSRender.vcxproj.filters @@ -10,6 +10,7 @@ +