diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index d2c3dc3c13..1a306de1a4 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -300,19 +300,22 @@ static void BPWritten(const BPCmd& bp) destAddr, srcRect.left, srcRect.top, srcRect.right, srcRect.bottom, bpmem.copyTexSrcWH.x + 1, destStride, height, yScale); - bool is_depth_copy = bpmem.zcontrol.pixel_format == PEControl::Z24; - g_texture_cache->CopyRenderTargetToTexture( - destAddr, EFBCopyFormat::XFB, copy_width, height, destStride, is_depth_copy, srcRect, - false, false, yScale, s_gammaLUT[PE_copy.gamma], bpmem.triggerEFBCopy.clamp_top, - bpmem.triggerEFBCopy.clamp_bottom, bpmem.copyfilter.GetCoefficients()); + //bool is_depth_copy = bpmem.zcontrol.pixel_format == PEControl::Z24; + //auto one = std::chrono::high_resolution_clock::now(); + //g_texture_cache->CopyRenderTargetToTexture( + // destAddr, EFBCopyFormat::XFB, copy_width, height, destStride, is_depth_copy, srcRect, + // false, false, yScale, s_gammaLUT[PE_copy.gamma], bpmem.triggerEFBCopy.clamp_top, + // bpmem.triggerEFBCopy.clamp_bottom, bpmem.copyfilter.GetCoefficients()); + //auto two = std::chrono::high_resolution_clock::now(); + //WARN_LOG(SLIPPI, "timer: %d", (two - one).count()); // This stays in to signal end of a "frame" g_renderer->RenderToXFB(destAddr, srcRect, destStride, height, s_gammaLUT[PE_copy.gamma]); - + if (g_ActiveConfig.bImmediateXFB) { // below div two to convert from bytes to pixels - it expects width, not stride - g_renderer->Swap(destAddr, destStride / 2, destStride, height, CoreTiming::GetTicks()); + g_renderer->Swap(destAddr, destStride / 2, destStride, height, CoreTiming::GetTicks(), s_gammaLUT[PE_copy.gamma], srcRect, bpmem.copyfilter.GetCoefficients(), yScale, bpmem.triggerEFBCopy.clamp_top, bpmem.triggerEFBCopy.clamp_bottom); } else { diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index bdd7cd5ec5..523dff0e5b 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -1222,6 +1222,147 @@ void Renderer::UpdateWidescreenHeuristic() m_was_orthographically_anamorphic = ortho_looks_anamorphic; } +void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u64 ticks, float gamma, const MathUtil::Rectangle& srcRect, const CopyFilterCoefficients::Values& filter_coefficients, float y_scale, bool clamp_top, bool clamp_bottom) +{ + if (SConfig::GetInstance().bWii) + m_is_game_widescreen = Config::Get(Config::SYSCONF_WIDESCREEN); + + // suggested_aspect_mode overrides SYSCONF_WIDESCREEN + if (g_ActiveConfig.suggested_aspect_mode == AspectMode::Analog) + m_is_game_widescreen = false; + else if (g_ActiveConfig.suggested_aspect_mode == AspectMode::AnalogWide) + m_is_game_widescreen = true; + + // If widescreen hack is disabled override game's AR if UI is set to 4:3 or 16:9. + if (!g_ActiveConfig.bWidescreenHack) + { + const auto aspect_mode = g_ActiveConfig.aspect_mode; + if (aspect_mode == AspectMode::Analog) + m_is_game_widescreen = false; + else if (aspect_mode == AspectMode::AnalogWide) + m_is_game_widescreen = true; + } + + if (xfb_addr && fb_width && fb_stride && fb_height) + { + // Get the current XFB from texture cache + MathUtil::Rectangle xfb_rect; + const auto* xfb_entry = + g_texture_cache->GetXFBTexture(xfb_addr, fb_width, fb_height, fb_stride, &xfb_rect, gamma, srcRect, filter_coefficients, y_scale, clamp_top, clamp_bottom); + if (xfb_entry && + (!g_ActiveConfig.bSkipPresentingDuplicateXFBs || xfb_entry->id != m_last_xfb_id)) + { + const bool is_duplicate_frame = xfb_entry->id == m_last_xfb_id; + m_last_xfb_id = xfb_entry->id; + + // Since we use the common pipelines here and draw vertices if a batch is currently being + // built by the vertex loader, we end up trampling over its pointer, as we share the buffer + // with the loader, and it has not been unmapped yet. Force a pipeline flush to avoid this. + //g_vertex_manager->Flush(); + + // Render any UI elements to the draw list. + { + auto lock = GetImGuiLock(); + +#ifdef IS_PLAYBACK + if (SConfig::GetInstance().m_slippiEnableSeek && g_replayComm->getSettings().rollbackDisplayMethod == "off" && g_playbackStatus->inSlippiPlayback) + OSD::DrawSlippiPlaybackControls(); +#endif + + DrawDebugText(); + OSD::DrawMessages(); + + ImGui::Render(); + } + + // Render the XFB to the screen. + BeginUtilityDrawing(); + if (!IsHeadless()) + { + BindBackbuffer({ {0.0f, 0.0f, 0.0f, 1.0f} }); + + if (!is_duplicate_frame) + UpdateWidescreenHeuristic(); + + UpdateDrawRectangle(); + + // Adjust the source rectangle instead of using an oversized viewport to render the XFB. + auto render_target_rc = GetTargetRectangle(); + auto render_source_rc = xfb_rect; + AdjustRectanglesToFitBounds(&render_target_rc, &render_source_rc, m_backbuffer_width, + m_backbuffer_height); + RenderXFBToScreen(render_target_rc, xfb_entry->texture.get(), render_source_rc); + + DrawImGui(); + + // Present to the window system. + { + std::lock_guard guard(m_swap_mutex); + PresentBackbuffer(); + } + + // Update the window size based on the frame that was just rendered. + // Due to depending on guest state, we need to call this every frame. + SetWindowSize(xfb_rect.GetWidth(), xfb_rect.GetHeight()); + } + + if (!is_duplicate_frame) + { + m_fps_counter.Update(); + + if (IsFrameDumping()) + DumpCurrentFrame(xfb_entry->texture.get(), xfb_rect, ticks); + + // Begin new frame + m_frame_count++; + g_stats.ResetFrame(); + } + + g_shader_cache->RetrieveAsyncShaders(); + g_vertex_manager->OnEndFrame(); + BeginImGuiFrame(); + + // We invalidate the pipeline object at the start of the frame. + // This is for the rare case where only a single pipeline configuration is used, + // and hybrid ubershaders have compiled the specialized shader, but without any + // state changes the specialized shader will not take over. + g_vertex_manager->InvalidatePipelineObject(); + + // Flush any outstanding EFB copies to RAM, in case the game is running at an uncapped frame + // rate and not waiting for vblank. Otherwise, we'd end up with a huge list of pending copies. + g_texture_cache->FlushEFBCopies(); + + if (!is_duplicate_frame) + { + // Remove stale EFB/XFB copies. + g_texture_cache->Cleanup(m_frame_count); + Core::Callback_FramePresented(); + } + + // Handle any config changes, this gets propogated to the backend. + CheckForConfigChanges(); + g_Config.iSaveTargetId = 0; + + EndUtilityDrawing(); + } + else + { + Flush(); + } + + // Update our last xfb values + m_last_xfb_addr = xfb_addr; + m_last_xfb_ticks = ticks; + m_last_xfb_width = fb_width; + m_last_xfb_stride = fb_stride; + m_last_xfb_height = fb_height; + } + else + { + Flush(); + } +} + void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u64 ticks) { if (SConfig::GetInstance().bWii) diff --git a/Source/Core/VideoCommon/RenderBase.h b/Source/Core/VideoCommon/RenderBase.h index 03f756d55a..3d6a6f4da6 100644 --- a/Source/Core/VideoCommon/RenderBase.h +++ b/Source/Core/VideoCommon/RenderBase.h @@ -220,6 +220,7 @@ public: // Finish up the current frame, print some stats void Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u64 ticks); + void Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u64 ticks, float gamma, const MathUtil::Rectangle& srcRect, const CopyFilterCoefficients::Values& filter_coefficients, float y_scale, bool clamp_top, bool clamp_bottom); void UpdateWidescreenHeuristic(); diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index 5a298273d3..d8cf88b045 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -1695,7 +1695,7 @@ static void GetDisplayRectForXFBEntry(TextureCacheBase::TCacheEntry* entry, u32 TextureCacheBase::TCacheEntry* TextureCacheBase::GetXFBTexture(u32 address, u32 width, u32 height, u32 stride, - MathUtil::Rectangle* display_rect) + MathUtil::Rectangle* display_rect) { const u8* src_data = Memory::GetPointer(address); if (!src_data) @@ -1724,9 +1724,9 @@ TextureCacheBase::GetXFBTexture(u32 address, u32 width, u32 height, u32 stride, // Create a new VRAM texture, and fill it with the data from guest RAM. entry = AllocateCacheEntry(TextureConfig(width, height, 1, 1, 1, AbstractTextureFormat::RGBA8, - AbstractTextureFlag_RenderTarget)); + AbstractTextureFlag_RenderTarget)); entry->SetGeneralParameters(address, total_size, - TextureAndTLUTFormat(TextureFormat::XFB, TLUTFormat::IA8), true); + TextureAndTLUTFormat(TextureFormat::XFB, TLUTFormat::IA8), true); entry->SetDimensions(width, height, 1); entry->SetHashes(hash, hash); entry->SetXfbCopy(stride); @@ -1735,8 +1735,8 @@ TextureCacheBase::GetXFBTexture(u32 address, u32 width, u32 height, u32 stride, entry->may_have_overlapping_textures = false; entry->frameCount = FRAMECOUNT_INVALID; if (!g_ActiveConfig.UseGPUTextureDecoding() || - !DecodeTextureOnGPU(entry, 0, src_data, total_size, entry->format.texfmt, width, height, - width, height, stride, texMem, entry->format.tlutfmt)) + !DecodeTextureOnGPU(entry, 0, src_data, total_size, entry->format.texfmt, width, height, + width, height, stride, texMem, entry->format.tlutfmt)) { const u32 decoded_size = width * height * sizeof(u32); CheckTempSize(decoded_size); @@ -1758,7 +1758,213 @@ TextureCacheBase::GetXFBTexture(u32 address, u32 width, u32 height, u32 stride, // While this isn't really an xfb copy, we can treat it as such for dumping purposes static int xfb_count = 0; entry->texture->Save( - fmt::format("{}xfb_loaded_{}.png", File::GetUserPath(D_DUMPTEXTURES_IDX), xfb_count++), 0); + fmt::format("{}xfb_loaded_{}.png", File::GetUserPath(D_DUMPTEXTURES_IDX), xfb_count++), 0); + } + + GetDisplayRectForXFBEntry(entry, width, height, display_rect); + return entry; +} + +TextureCacheBase::TCacheEntry* +TextureCacheBase::GetXFBTexture(u32 address, u32 width, u32 height, u32 stride, + MathUtil::Rectangle* display_rect, float gamma, const MathUtil::Rectangle& src_rect, const CopyFilterCoefficients::Values& copy_filter_coefficients, float y_scale, bool clamp_top, bool clamp_bottom) +{ + auto filter_coefficients = GetVRAMCopyFilterCoefficients(copy_filter_coefficients); + const u8* src_data = Memory::GetPointer(address); + if (!src_data) + { + ERROR_LOG(VIDEO, "Trying to load XFB texture from invalid address 0x%8x", address); + return nullptr; + } + + // Compute total texture size. XFB textures aren't tiled, so this is simple. + const u32 total_size = height * stride; + const u64 hash = Common::GetHash64(src_data, total_size, 0); + + TCacheEntry* entry = nullptr; + const TextureConfig config(width, height, 1, g_framebuffer_manager->GetEFBLayers(), + 1, AbstractTextureFormat::RGBA8, AbstractTextureFlag_RenderTarget); + entry = AllocateCacheEntry(config); + + auto baseFormat = TexDecoder_GetEFBCopyBaseFormat(EFBCopyFormat::XFB); + u32 blockH = TexDecoder_GetBlockHeightInTexels(baseFormat); + const u32 blockW = TexDecoder_GetBlockWidthInTexels(baseFormat); + + // Round up source height to multiple of block size + u32 actualHeight = Common::AlignUp(height, blockH); + const u32 actualWidth = Common::AlignUp(width, blockW); + + u32 num_blocks_y = actualHeight / blockH; + const u32 num_blocks_x = actualWidth / blockW; + + // RGBA takes two cache lines per block; all others take one + const u32 bytes_per_block = baseFormat == TextureFormat::RGBA8 ? 64 : 32; + + const u32 bytes_per_row = num_blocks_x * bytes_per_block; + const u32 covered_range = num_blocks_y * stride; + + if (entry) + { + entry->SetGeneralParameters(address, 0, TexDecoder_GetEFBCopyBaseFormat(EFBCopyFormat::XFB), true); + entry->SetDimensions(width, height, 1); + entry->frameCount = FRAMECOUNT_INVALID; + entry->should_force_safe_hashing = true; + entry->SetXfbCopy(stride); + entry->may_have_overlapping_textures = false; + entry->is_custom_tex = false; + + + /////////////////////// + // Flush EFB pokes first, as they're expected to be included. + g_framebuffer_manager->FlushEFBPokes(); + + // Get the pipeline which we will be using. If the compilation failed, this will be null. + const AbstractPipeline* copy_pipeline = + g_shader_cache->GetEFBCopyToVRAMPipeline(TextureConversionShaderGen::GetShaderUid( + EFBCopyFormat::XFB, false, false, false, + NeedsCopyFilterInShader(filter_coefficients))); + if (!copy_pipeline) + { + WARN_LOG(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline."); + return entry; + } + + const auto scaled_src_rect = g_renderer->ConvertEFBRectangle(src_rect); + const auto framebuffer_rect = g_renderer->ConvertFramebufferRectangle( + scaled_src_rect, g_framebuffer_manager->GetEFBFramebuffer()); + AbstractTexture* src_texture = g_framebuffer_manager->ResolveEFBColorTexture(framebuffer_rect); + + src_texture->FinishedRendering(); + g_renderer->BeginUtilityDrawing(); + + const bool linear_filter = g_renderer->GetEFBScale() != 1 || y_scale > 1.0f; + + // Fill uniform buffer. + struct Uniforms + { + float src_left, src_top, src_width, src_height; + float filter_coefficients[3]; + float gamma_rcp; + float clamp_top; + float clamp_bottom; + float pixel_height; + u32 padding; + }; + Uniforms uniforms; + const float rcp_efb_width = 1.0f / static_cast(g_framebuffer_manager->GetEFBWidth()); + const float rcp_efb_height = 1.0f / static_cast(g_framebuffer_manager->GetEFBHeight()); + uniforms.src_left = framebuffer_rect.left * rcp_efb_width; + uniforms.src_top = framebuffer_rect.top * rcp_efb_height; + uniforms.src_width = framebuffer_rect.GetWidth() * rcp_efb_width; + uniforms.src_height = framebuffer_rect.GetHeight() * rcp_efb_height; + uniforms.filter_coefficients[0] = filter_coefficients.upper; + uniforms.filter_coefficients[1] = filter_coefficients.middle; + uniforms.filter_coefficients[2] = filter_coefficients.lower; + uniforms.gamma_rcp = 1.0f / gamma; + uniforms.clamp_top = clamp_top ? framebuffer_rect.top * rcp_efb_height : 0.0f; + uniforms.clamp_bottom = clamp_bottom ? framebuffer_rect.bottom * rcp_efb_height : 1.0f; + uniforms.pixel_height = g_ActiveConfig.bCopyEFBScaled ? rcp_efb_height : 1.0f / EFB_HEIGHT; + uniforms.padding = 0; + g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms)); + + // Use the copy pipeline to render the VRAM copy. + g_renderer->SetAndDiscardFramebuffer(entry->framebuffer.get()); + g_renderer->SetViewportAndScissor(entry->framebuffer->GetRect()); + g_renderer->SetPipeline(copy_pipeline); + g_renderer->SetTexture(0, src_texture); + g_renderer->SetSamplerState(0, linear_filter ? RenderState::GetLinearSamplerState() : + RenderState::GetPointSamplerState()); + g_renderer->Draw(0, 3); + g_renderer->EndUtilityDrawing(); + entry->texture->FinishedRendering(); + + //////////////////////////////////////////////////////////////////////////// + + if (g_ActiveConfig.bDumpXFBTarget) + { + static int xfb_count = 0; + entry->texture->Save( + fmt::format("{}xfb_copy_{}.png", File::GetUserPath(D_DUMPTEXTURES_IDX), xfb_count++), + 0); + } + u8* dst = Memory::GetPointer(address); + UninitializeXFBMemory(dst, stride, bytes_per_row, num_blocks_y); + + // Invalidate all textures, if they are either fully overwritten by our efb copy, or if they + // have a different stride than our efb copy. Partly overwritten textures with the same stride + // as our efb copy are marked to check them for partial texture updates. + // TODO: The logic to detect overlapping strided efb copies is not 100% accurate. + bool strided_efb_copy = stride != bytes_per_row; + auto iter = FindOverlappingTextures(address, covered_range); + while (iter.first != iter.second) + { + TCacheEntry* overlapping_entry = iter.first->second; + + if (overlapping_entry->addr == address && overlapping_entry->is_xfb_copy) + { + for (auto& reference : overlapping_entry->references) + { + reference->reference_changed = true; + } + } + + if (overlapping_entry->OverlapsMemoryRange(address, covered_range)) + { + u32 overlap_range = std::min(overlapping_entry->addr + overlapping_entry->size_in_bytes, + address + covered_range) - + std::max(overlapping_entry->addr, address); + if (overlapping_entry->memory_stride != stride || + (!strided_efb_copy && overlapping_entry->size_in_bytes == overlap_range) || + (strided_efb_copy && overlapping_entry->size_in_bytes == overlap_range && + overlapping_entry->addr == address)) + { + // Pending EFB copies which are completely covered by this new copy can simply be tossed, + // instead of having to flush them later on, since this copy will write over everything. + iter.first = InvalidateTexture(iter.first, true); + continue; + } + + // We don't want to change the may_have_overlapping_textures flag on XFB container entries + // because otherwise they can't be re-used/updated, leaking textures for several frames. + if (!overlapping_entry->is_xfb_container) + overlapping_entry->may_have_overlapping_textures = true; + + // Do not load textures by hash, if they were at least partly overwritten by an efb copy. + // In this case, comparing the hash is not enough to check, if two textures are identical. + if (overlapping_entry->textures_by_hash_iter != textures_by_hash.end()) + { + textures_by_hash.erase(overlapping_entry->textures_by_hash_iter); + overlapping_entry->textures_by_hash_iter = textures_by_hash.end(); + } + } + ++iter.first; + } + + if (OpcodeDecoder::g_record_fifo_data) + { + // Mark the memory behind this efb copy as dynamicly generated for the Fifo log + for (u32 i = 0; i < num_blocks_y; i++) + { + FifoRecorder::GetInstance().UseMemory(address, bytes_per_row, MemoryUpdate::TEXTURE_MAP, + true); + address += stride; + } + } + + // Even if the copy is deferred, still compute the hash. This way if the copy is used as a texture + // in a subsequent draw before it is flushed, it will have the same hash. + if (entry) + { + const u64 entry_hash = entry->CalculateHash(); + entry->SetHashes(entry_hash, entry_hash); + textures_by_address.emplace(address, entry); + } + } + + if (entry && entry->is_xfb_container) + { + StitchXFBCopy(entry); + entry->texture->FinishedRendering(); } GetDisplayRectForXFBEntry(entry, width, height, display_rect); @@ -2633,7 +2839,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop const EFBCopyFilterCoefficients& filter_coefficients) { // Flush EFB pokes first, as they're expected to be included. - g_framebuffer_manager->FlushEFBPokes(); + //g_framebuffer_manager->FlushEFBPokes(); // Get the pipeline which we will be using. If the compilation failed, this will be null. const AbstractPipeline* copy_pipeline = diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index e8ecf5b6fe..bb7bd838f0 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -222,6 +222,8 @@ public: u32 tmem_address_odd = 0); TCacheEntry* GetXFBTexture(u32 address, u32 width, u32 height, u32 stride, MathUtil::Rectangle* display_rect); + TCacheEntry* GetXFBTexture(u32 address, u32 width, u32 height, u32 stride, + MathUtil::Rectangle* display_rect, float gamma, const MathUtil::Rectangle& src_rect, const CopyFilterCoefficients::Values& copy_filter_coefficients, float y_scale, bool clamp_top, bool clamp_bottom); virtual void BindTextures(); void CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height,