move a lot of stuff around and hope it does something

This commit is contained in:
R2DLiu 2020-08-16 01:55:32 -04:00
commit 1198fd6f87
5 changed files with 367 additions and 14 deletions

View file

@ -300,19 +300,22 @@ static void BPWritten(const BPCmd& bp)
destAddr, srcRect.left, srcRect.top, srcRect.right, srcRect.bottom,
bpmem.copyTexSrcWH.x + 1, destStride, height, yScale);
bool is_depth_copy = bpmem.zcontrol.pixel_format == PEControl::Z24;
g_texture_cache->CopyRenderTargetToTexture(
destAddr, EFBCopyFormat::XFB, copy_width, height, destStride, is_depth_copy, srcRect,
false, false, yScale, s_gammaLUT[PE_copy.gamma], bpmem.triggerEFBCopy.clamp_top,
bpmem.triggerEFBCopy.clamp_bottom, bpmem.copyfilter.GetCoefficients());
//bool is_depth_copy = bpmem.zcontrol.pixel_format == PEControl::Z24;
//auto one = std::chrono::high_resolution_clock::now();
//g_texture_cache->CopyRenderTargetToTexture(
// destAddr, EFBCopyFormat::XFB, copy_width, height, destStride, is_depth_copy, srcRect,
// false, false, yScale, s_gammaLUT[PE_copy.gamma], bpmem.triggerEFBCopy.clamp_top,
// bpmem.triggerEFBCopy.clamp_bottom, bpmem.copyfilter.GetCoefficients());
//auto two = std::chrono::high_resolution_clock::now();
//WARN_LOG(SLIPPI, "timer: %d", (two - one).count());
// This stays in to signal end of a "frame"
g_renderer->RenderToXFB(destAddr, srcRect, destStride, height, s_gammaLUT[PE_copy.gamma]);
if (g_ActiveConfig.bImmediateXFB)
{
// below div two to convert from bytes to pixels - it expects width, not stride
g_renderer->Swap(destAddr, destStride / 2, destStride, height, CoreTiming::GetTicks());
g_renderer->Swap(destAddr, destStride / 2, destStride, height, CoreTiming::GetTicks(), s_gammaLUT[PE_copy.gamma], srcRect, bpmem.copyfilter.GetCoefficients(), yScale, bpmem.triggerEFBCopy.clamp_top, bpmem.triggerEFBCopy.clamp_bottom);
}
else
{

View file

@ -1222,6 +1222,147 @@ void Renderer::UpdateWidescreenHeuristic()
m_was_orthographically_anamorphic = ortho_looks_anamorphic;
}
void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u64 ticks, float gamma, const MathUtil::Rectangle<int>& srcRect, const CopyFilterCoefficients::Values& filter_coefficients, float y_scale, bool clamp_top, bool clamp_bottom)
{
if (SConfig::GetInstance().bWii)
m_is_game_widescreen = Config::Get(Config::SYSCONF_WIDESCREEN);
// suggested_aspect_mode overrides SYSCONF_WIDESCREEN
if (g_ActiveConfig.suggested_aspect_mode == AspectMode::Analog)
m_is_game_widescreen = false;
else if (g_ActiveConfig.suggested_aspect_mode == AspectMode::AnalogWide)
m_is_game_widescreen = true;
// If widescreen hack is disabled override game's AR if UI is set to 4:3 or 16:9.
if (!g_ActiveConfig.bWidescreenHack)
{
const auto aspect_mode = g_ActiveConfig.aspect_mode;
if (aspect_mode == AspectMode::Analog)
m_is_game_widescreen = false;
else if (aspect_mode == AspectMode::AnalogWide)
m_is_game_widescreen = true;
}
if (xfb_addr && fb_width && fb_stride && fb_height)
{
// Get the current XFB from texture cache
MathUtil::Rectangle<int> xfb_rect;
const auto* xfb_entry =
g_texture_cache->GetXFBTexture(xfb_addr, fb_width, fb_height, fb_stride, &xfb_rect, gamma, srcRect, filter_coefficients, y_scale, clamp_top, clamp_bottom);
if (xfb_entry &&
(!g_ActiveConfig.bSkipPresentingDuplicateXFBs || xfb_entry->id != m_last_xfb_id))
{
const bool is_duplicate_frame = xfb_entry->id == m_last_xfb_id;
m_last_xfb_id = xfb_entry->id;
// Since we use the common pipelines here and draw vertices if a batch is currently being
// built by the vertex loader, we end up trampling over its pointer, as we share the buffer
// with the loader, and it has not been unmapped yet. Force a pipeline flush to avoid this.
//g_vertex_manager->Flush();
// Render any UI elements to the draw list.
{
auto lock = GetImGuiLock();
#ifdef IS_PLAYBACK
if (SConfig::GetInstance().m_slippiEnableSeek && g_replayComm->getSettings().rollbackDisplayMethod == "off" && g_playbackStatus->inSlippiPlayback)
OSD::DrawSlippiPlaybackControls();
#endif
DrawDebugText();
OSD::DrawMessages();
ImGui::Render();
}
// Render the XFB to the screen.
BeginUtilityDrawing();
if (!IsHeadless())
{
BindBackbuffer({ {0.0f, 0.0f, 0.0f, 1.0f} });
if (!is_duplicate_frame)
UpdateWidescreenHeuristic();
UpdateDrawRectangle();
// Adjust the source rectangle instead of using an oversized viewport to render the XFB.
auto render_target_rc = GetTargetRectangle();
auto render_source_rc = xfb_rect;
AdjustRectanglesToFitBounds(&render_target_rc, &render_source_rc, m_backbuffer_width,
m_backbuffer_height);
RenderXFBToScreen(render_target_rc, xfb_entry->texture.get(), render_source_rc);
DrawImGui();
// Present to the window system.
{
std::lock_guard<std::mutex> guard(m_swap_mutex);
PresentBackbuffer();
}
// Update the window size based on the frame that was just rendered.
// Due to depending on guest state, we need to call this every frame.
SetWindowSize(xfb_rect.GetWidth(), xfb_rect.GetHeight());
}
if (!is_duplicate_frame)
{
m_fps_counter.Update();
if (IsFrameDumping())
DumpCurrentFrame(xfb_entry->texture.get(), xfb_rect, ticks);
// Begin new frame
m_frame_count++;
g_stats.ResetFrame();
}
g_shader_cache->RetrieveAsyncShaders();
g_vertex_manager->OnEndFrame();
BeginImGuiFrame();
// We invalidate the pipeline object at the start of the frame.
// This is for the rare case where only a single pipeline configuration is used,
// and hybrid ubershaders have compiled the specialized shader, but without any
// state changes the specialized shader will not take over.
g_vertex_manager->InvalidatePipelineObject();
// Flush any outstanding EFB copies to RAM, in case the game is running at an uncapped frame
// rate and not waiting for vblank. Otherwise, we'd end up with a huge list of pending copies.
g_texture_cache->FlushEFBCopies();
if (!is_duplicate_frame)
{
// Remove stale EFB/XFB copies.
g_texture_cache->Cleanup(m_frame_count);
Core::Callback_FramePresented();
}
// Handle any config changes, this gets propogated to the backend.
CheckForConfigChanges();
g_Config.iSaveTargetId = 0;
EndUtilityDrawing();
}
else
{
Flush();
}
// Update our last xfb values
m_last_xfb_addr = xfb_addr;
m_last_xfb_ticks = ticks;
m_last_xfb_width = fb_width;
m_last_xfb_stride = fb_stride;
m_last_xfb_height = fb_height;
}
else
{
Flush();
}
}
void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u64 ticks)
{
if (SConfig::GetInstance().bWii)

View file

@ -220,6 +220,7 @@ public:
// Finish up the current frame, print some stats
void Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u64 ticks);
void Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u64 ticks, float gamma, const MathUtil::Rectangle<int>& srcRect, const CopyFilterCoefficients::Values& filter_coefficients, float y_scale, bool clamp_top, bool clamp_bottom);
void UpdateWidescreenHeuristic();

View file

@ -1695,7 +1695,7 @@ static void GetDisplayRectForXFBEntry(TextureCacheBase::TCacheEntry* entry, u32
TextureCacheBase::TCacheEntry*
TextureCacheBase::GetXFBTexture(u32 address, u32 width, u32 height, u32 stride,
MathUtil::Rectangle<int>* display_rect)
MathUtil::Rectangle<int>* display_rect)
{
const u8* src_data = Memory::GetPointer(address);
if (!src_data)
@ -1724,9 +1724,9 @@ TextureCacheBase::GetXFBTexture(u32 address, u32 width, u32 height, u32 stride,
// Create a new VRAM texture, and fill it with the data from guest RAM.
entry = AllocateCacheEntry(TextureConfig(width, height, 1, 1, 1, AbstractTextureFormat::RGBA8,
AbstractTextureFlag_RenderTarget));
AbstractTextureFlag_RenderTarget));
entry->SetGeneralParameters(address, total_size,
TextureAndTLUTFormat(TextureFormat::XFB, TLUTFormat::IA8), true);
TextureAndTLUTFormat(TextureFormat::XFB, TLUTFormat::IA8), true);
entry->SetDimensions(width, height, 1);
entry->SetHashes(hash, hash);
entry->SetXfbCopy(stride);
@ -1735,8 +1735,8 @@ TextureCacheBase::GetXFBTexture(u32 address, u32 width, u32 height, u32 stride,
entry->may_have_overlapping_textures = false;
entry->frameCount = FRAMECOUNT_INVALID;
if (!g_ActiveConfig.UseGPUTextureDecoding() ||
!DecodeTextureOnGPU(entry, 0, src_data, total_size, entry->format.texfmt, width, height,
width, height, stride, texMem, entry->format.tlutfmt))
!DecodeTextureOnGPU(entry, 0, src_data, total_size, entry->format.texfmt, width, height,
width, height, stride, texMem, entry->format.tlutfmt))
{
const u32 decoded_size = width * height * sizeof(u32);
CheckTempSize(decoded_size);
@ -1758,7 +1758,213 @@ TextureCacheBase::GetXFBTexture(u32 address, u32 width, u32 height, u32 stride,
// While this isn't really an xfb copy, we can treat it as such for dumping purposes
static int xfb_count = 0;
entry->texture->Save(
fmt::format("{}xfb_loaded_{}.png", File::GetUserPath(D_DUMPTEXTURES_IDX), xfb_count++), 0);
fmt::format("{}xfb_loaded_{}.png", File::GetUserPath(D_DUMPTEXTURES_IDX), xfb_count++), 0);
}
GetDisplayRectForXFBEntry(entry, width, height, display_rect);
return entry;
}
TextureCacheBase::TCacheEntry*
TextureCacheBase::GetXFBTexture(u32 address, u32 width, u32 height, u32 stride,
MathUtil::Rectangle<int>* display_rect, float gamma, const MathUtil::Rectangle<int>& src_rect, const CopyFilterCoefficients::Values& copy_filter_coefficients, float y_scale, bool clamp_top, bool clamp_bottom)
{
auto filter_coefficients = GetVRAMCopyFilterCoefficients(copy_filter_coefficients);
const u8* src_data = Memory::GetPointer(address);
if (!src_data)
{
ERROR_LOG(VIDEO, "Trying to load XFB texture from invalid address 0x%8x", address);
return nullptr;
}
// Compute total texture size. XFB textures aren't tiled, so this is simple.
const u32 total_size = height * stride;
const u64 hash = Common::GetHash64(src_data, total_size, 0);
TCacheEntry* entry = nullptr;
const TextureConfig config(width, height, 1, g_framebuffer_manager->GetEFBLayers(),
1, AbstractTextureFormat::RGBA8, AbstractTextureFlag_RenderTarget);
entry = AllocateCacheEntry(config);
auto baseFormat = TexDecoder_GetEFBCopyBaseFormat(EFBCopyFormat::XFB);
u32 blockH = TexDecoder_GetBlockHeightInTexels(baseFormat);
const u32 blockW = TexDecoder_GetBlockWidthInTexels(baseFormat);
// Round up source height to multiple of block size
u32 actualHeight = Common::AlignUp(height, blockH);
const u32 actualWidth = Common::AlignUp(width, blockW);
u32 num_blocks_y = actualHeight / blockH;
const u32 num_blocks_x = actualWidth / blockW;
// RGBA takes two cache lines per block; all others take one
const u32 bytes_per_block = baseFormat == TextureFormat::RGBA8 ? 64 : 32;
const u32 bytes_per_row = num_blocks_x * bytes_per_block;
const u32 covered_range = num_blocks_y * stride;
if (entry)
{
entry->SetGeneralParameters(address, 0, TexDecoder_GetEFBCopyBaseFormat(EFBCopyFormat::XFB), true);
entry->SetDimensions(width, height, 1);
entry->frameCount = FRAMECOUNT_INVALID;
entry->should_force_safe_hashing = true;
entry->SetXfbCopy(stride);
entry->may_have_overlapping_textures = false;
entry->is_custom_tex = false;
///////////////////////
// Flush EFB pokes first, as they're expected to be included.
g_framebuffer_manager->FlushEFBPokes();
// Get the pipeline which we will be using. If the compilation failed, this will be null.
const AbstractPipeline* copy_pipeline =
g_shader_cache->GetEFBCopyToVRAMPipeline(TextureConversionShaderGen::GetShaderUid(
EFBCopyFormat::XFB, false, false, false,
NeedsCopyFilterInShader(filter_coefficients)));
if (!copy_pipeline)
{
WARN_LOG(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline.");
return entry;
}
const auto scaled_src_rect = g_renderer->ConvertEFBRectangle(src_rect);
const auto framebuffer_rect = g_renderer->ConvertFramebufferRectangle(
scaled_src_rect, g_framebuffer_manager->GetEFBFramebuffer());
AbstractTexture* src_texture = g_framebuffer_manager->ResolveEFBColorTexture(framebuffer_rect);
src_texture->FinishedRendering();
g_renderer->BeginUtilityDrawing();
const bool linear_filter = g_renderer->GetEFBScale() != 1 || y_scale > 1.0f;
// Fill uniform buffer.
struct Uniforms
{
float src_left, src_top, src_width, src_height;
float filter_coefficients[3];
float gamma_rcp;
float clamp_top;
float clamp_bottom;
float pixel_height;
u32 padding;
};
Uniforms uniforms;
const float rcp_efb_width = 1.0f / static_cast<float>(g_framebuffer_manager->GetEFBWidth());
const float rcp_efb_height = 1.0f / static_cast<float>(g_framebuffer_manager->GetEFBHeight());
uniforms.src_left = framebuffer_rect.left * rcp_efb_width;
uniforms.src_top = framebuffer_rect.top * rcp_efb_height;
uniforms.src_width = framebuffer_rect.GetWidth() * rcp_efb_width;
uniforms.src_height = framebuffer_rect.GetHeight() * rcp_efb_height;
uniforms.filter_coefficients[0] = filter_coefficients.upper;
uniforms.filter_coefficients[1] = filter_coefficients.middle;
uniforms.filter_coefficients[2] = filter_coefficients.lower;
uniforms.gamma_rcp = 1.0f / gamma;
uniforms.clamp_top = clamp_top ? framebuffer_rect.top * rcp_efb_height : 0.0f;
uniforms.clamp_bottom = clamp_bottom ? framebuffer_rect.bottom * rcp_efb_height : 1.0f;
uniforms.pixel_height = g_ActiveConfig.bCopyEFBScaled ? rcp_efb_height : 1.0f / EFB_HEIGHT;
uniforms.padding = 0;
g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms));
// Use the copy pipeline to render the VRAM copy.
g_renderer->SetAndDiscardFramebuffer(entry->framebuffer.get());
g_renderer->SetViewportAndScissor(entry->framebuffer->GetRect());
g_renderer->SetPipeline(copy_pipeline);
g_renderer->SetTexture(0, src_texture);
g_renderer->SetSamplerState(0, linear_filter ? RenderState::GetLinearSamplerState() :
RenderState::GetPointSamplerState());
g_renderer->Draw(0, 3);
g_renderer->EndUtilityDrawing();
entry->texture->FinishedRendering();
////////////////////////////////////////////////////////////////////////////
if (g_ActiveConfig.bDumpXFBTarget)
{
static int xfb_count = 0;
entry->texture->Save(
fmt::format("{}xfb_copy_{}.png", File::GetUserPath(D_DUMPTEXTURES_IDX), xfb_count++),
0);
}
u8* dst = Memory::GetPointer(address);
UninitializeXFBMemory(dst, stride, bytes_per_row, num_blocks_y);
// Invalidate all textures, if they are either fully overwritten by our efb copy, or if they
// have a different stride than our efb copy. Partly overwritten textures with the same stride
// as our efb copy are marked to check them for partial texture updates.
// TODO: The logic to detect overlapping strided efb copies is not 100% accurate.
bool strided_efb_copy = stride != bytes_per_row;
auto iter = FindOverlappingTextures(address, covered_range);
while (iter.first != iter.second)
{
TCacheEntry* overlapping_entry = iter.first->second;
if (overlapping_entry->addr == address && overlapping_entry->is_xfb_copy)
{
for (auto& reference : overlapping_entry->references)
{
reference->reference_changed = true;
}
}
if (overlapping_entry->OverlapsMemoryRange(address, covered_range))
{
u32 overlap_range = std::min(overlapping_entry->addr + overlapping_entry->size_in_bytes,
address + covered_range) -
std::max(overlapping_entry->addr, address);
if (overlapping_entry->memory_stride != stride ||
(!strided_efb_copy && overlapping_entry->size_in_bytes == overlap_range) ||
(strided_efb_copy && overlapping_entry->size_in_bytes == overlap_range &&
overlapping_entry->addr == address))
{
// Pending EFB copies which are completely covered by this new copy can simply be tossed,
// instead of having to flush them later on, since this copy will write over everything.
iter.first = InvalidateTexture(iter.first, true);
continue;
}
// We don't want to change the may_have_overlapping_textures flag on XFB container entries
// because otherwise they can't be re-used/updated, leaking textures for several frames.
if (!overlapping_entry->is_xfb_container)
overlapping_entry->may_have_overlapping_textures = true;
// Do not load textures by hash, if they were at least partly overwritten by an efb copy.
// In this case, comparing the hash is not enough to check, if two textures are identical.
if (overlapping_entry->textures_by_hash_iter != textures_by_hash.end())
{
textures_by_hash.erase(overlapping_entry->textures_by_hash_iter);
overlapping_entry->textures_by_hash_iter = textures_by_hash.end();
}
}
++iter.first;
}
if (OpcodeDecoder::g_record_fifo_data)
{
// Mark the memory behind this efb copy as dynamicly generated for the Fifo log
for (u32 i = 0; i < num_blocks_y; i++)
{
FifoRecorder::GetInstance().UseMemory(address, bytes_per_row, MemoryUpdate::TEXTURE_MAP,
true);
address += stride;
}
}
// Even if the copy is deferred, still compute the hash. This way if the copy is used as a texture
// in a subsequent draw before it is flushed, it will have the same hash.
if (entry)
{
const u64 entry_hash = entry->CalculateHash();
entry->SetHashes(entry_hash, entry_hash);
textures_by_address.emplace(address, entry);
}
}
if (entry && entry->is_xfb_container)
{
StitchXFBCopy(entry);
entry->texture->FinishedRendering();
}
GetDisplayRectForXFBEntry(entry, width, height, display_rect);
@ -2633,7 +2839,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
const EFBCopyFilterCoefficients& filter_coefficients)
{
// Flush EFB pokes first, as they're expected to be included.
g_framebuffer_manager->FlushEFBPokes();
//g_framebuffer_manager->FlushEFBPokes();
// Get the pipeline which we will be using. If the compilation failed, this will be null.
const AbstractPipeline* copy_pipeline =

View file

@ -222,6 +222,8 @@ public:
u32 tmem_address_odd = 0);
TCacheEntry* GetXFBTexture(u32 address, u32 width, u32 height, u32 stride,
MathUtil::Rectangle<int>* display_rect);
TCacheEntry* GetXFBTexture(u32 address, u32 width, u32 height, u32 stride,
MathUtil::Rectangle<int>* display_rect, float gamma, const MathUtil::Rectangle<int>& src_rect, const CopyFilterCoefficients::Values& copy_filter_coefficients, float y_scale, bool clamp_top, bool clamp_bottom);
virtual void BindTextures();
void CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height,