Extend TMEM cache implementation

Now works with games that deliberately avoid invalidating TMEM because they know textures are too large to fit: * Sonic Riders * Metal Arms: Glitch in the System * Godzilla: Destroy All Monsters Melee * NHL Slapshot * Tak and the Power of Juju * Night at the Museum: Battle of the Smithsonian * 428: Fūsa Sareta Shibuya de
2025-07-05 00:21:39 +00:00 · 2019-09-06 21:22:47 +12:00 · 2019-09-06 21:22:47 +12:00 · 88bd10cd30
commit 88bd10cd30
parent eee302c040
8 changed files with 301 additions and 32 deletions
--- a/Source/Core/VideoCommon/TextureCacheBase.cpp
+++ b/Source/Core/VideoCommon/TextureCacheBase.cpp
@ -43,6 +43,7 @@
 #include "VideoCommon/SamplerCommon.h"
 #include "VideoCommon/ShaderCache.h"
 #include "VideoCommon/Statistics.h"
+#include "VideoCommon/TMEM.h"
 #include "VideoCommon/TextureConversionShader.h"
 #include "VideoCommon/TextureConverterShaderGen.h"
 #include "VideoCommon/TextureDecoder.h"
@ -57,8 +58,6 @@ static const int TEXTURE_POOL_KILL_THRESHOLD = 3;

 std::unique_ptr<TextureCacheBase> g_texture_cache;

-std::bitset<8> TextureCacheBase::valid_bind_points;
-
 TextureCacheBase::TCacheEntry::TCacheEntry(std::unique_ptr<AbstractTexture> tex,
                                           std::unique_ptr<AbstractFramebuffer> fb)
    : texture(std::move(tex)), framebuffer(std::move(fb))
@ -95,7 +94,7 @@ TextureCacheBase::TextureCacheBase()

  Common::SetHash64Function();

-  InvalidateAllBindPoints();
+  TMEM::InvalidateAll();
 }

 TextureCacheBase::~TextureCacheBase()
@ -123,7 +122,7 @@ bool TextureCacheBase::Initialize()
 void TextureCacheBase::Invalidate()
 {
  FlushEFBCopies();
-  InvalidateAllBindPoints();
+  TMEM::InvalidateAll();

  bound_textures.fill(nullptr);
  for (auto& tex : textures_by_address)
@ -1026,12 +1025,12 @@ static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex,
  g_renderer->SetSamplerState(index, state);
 }

-void TextureCacheBase::BindTextures()
+void TextureCacheBase::BindTextures(BitSet32 used_textures)
 {
  for (u32 i = 0; i < bound_textures.size(); i++)
  {
    const TCacheEntry* tentry = bound_textures[i];
-    if (IsValidBindPoint(i) && tentry)
+    if (used_textures[i] && tentry)
    {
      g_renderer->SetTexture(i, tentry->texture.get());
      PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height);
@ -1040,6 +1039,8 @@ void TextureCacheBase::BindTextures()
      SetSamplerState(i, custom_tex_scale, tentry->is_custom_tex, tentry->has_arbitrary_mips);
    }
  }
+
+  TMEM::FinalizeBinds(used_textures);
 }

 class ArbitraryMipmapDetector
@ -1190,9 +1191,22 @@ private:
 TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
 {
  // if this stage was not invalidated by changes to texture registers, keep the current texture
-  if (IsValidBindPoint(stage) && bound_textures[stage])
+  if (TMEM::IsValid(stage) && bound_textures[stage])
  {
-    return bound_textures[stage];
+    TCacheEntry* entry = bound_textures[stage];
+    // If the TMEM configuration is such that this texture is more or less guaranteed to still
+    // be in TMEM, then we know we can reuse the old entry without even hashing the memory
+    if (TMEM::IsCached(stage))
+    {
+      return entry;
+    }
+
+    // Otherwise, hash the backing memory and check it's unchanged.
+    // FIXME: this doesn't correctly handle textures from tmem.
+    if (!entry->tmem_only && entry->base_hash == entry->CalculateHash())
+    {
+      return entry;
+    }
  }

  TextureInfo texture_info = TextureInfo::FromStage(stage);
@ -1207,7 +1221,8 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)

  // We need to keep track of invalided textures until they have actually been replaced or
  // re-loaded
-  valid_bind_points.set(stage);
+  TMEM::Bind(stage, entry->NumBlocksX(), entry->NumBlocksY(), entry->GetNumLevels() > 1,
+             entry->format == TextureFormat::RGBA8);

  return entry;
 }
@ -1510,7 +1525,7 @@ TextureCacheBase::GetTexture(const int textureCacheSafetyColorSampleSize, Textur
  const u32 texLevels = hires_tex ? (u32)hires_tex->m_levels.size() : texture_info.GetLevelCount();

  // We can decode on the GPU if it is a supported format and the flag is enabled.
-  // Currently we don't decode RGBA8 textures from Tmem, as that would require copying from both
+  // Currently we don't decode RGBA8 textures from TMEM, as that would require copying from both
  // banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since
  // there's no conversion between formats. In the future this could be extended with a separate
  // shader, however.
@ -2537,10 +2552,10 @@ TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter, bool discard_pe

  for (size_t i = 0; i < bound_textures.size(); ++i)
  {
-    // If the entry is currently bound and not invalidated, keep it, but mark it as invalidated.
-    // This way it can still be used via tmem cache emulation, but nothing else.
+    // If the entry is currently bound and tmem has it recorded as cached, keep it, but mark it as
+    // invalidated. This way it can still be used via tmem cache emulation, but nothing else.
    // Spyro: A Hero's Tail is known for using such overwritten textures.
-    if (bound_textures[i] == entry && IsValidBindPoint(static_cast<u32>(i)))
+    if (bound_textures[i] == entry && TMEM::IsCached(static_cast<u32>(i)))
    {
      bound_textures[i]->tmem_only = true;
      return ++iter;
@ -2815,18 +2830,21 @@ bool TextureCacheBase::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, con
 }

 u32 TextureCacheBase::TCacheEntry::BytesPerRow() const
+{
+  // RGBA takes two cache lines per block; all others take one
+  const u32 bytes_per_block = format == TextureFormat::RGBA8 ? 64 : 32;
+
+  return NumBlocksX() * bytes_per_block;
+}
+
+u32 TextureCacheBase::TCacheEntry::NumBlocksX() const
 {
  const u32 blockW = TexDecoder_GetBlockWidthInTexels(format.texfmt);

  // Round up source height to multiple of block size
  const u32 actualWidth = Common::AlignUp(native_width, blockW);

-  const u32 numBlocksX = actualWidth / blockW;
-
-  // RGBA takes two cache lines per block; all others take one
-  const u32 bytes_per_block = format == TextureFormat::RGBA8 ? 64 : 32;
-
-  return numBlocksX * bytes_per_block;
+  return actualWidth / blockW;
 }

 u32 TextureCacheBase::TCacheEntry::NumBlocksY() const
@ -2883,6 +2901,8 @@ u64 TextureCacheBase::TCacheEntry::CalculateHash() const
 {
  const u32 bytes_per_row = BytesPerRow();
  const u32 hash_sample_size = HashSampleSize();
+
+  // FIXME: textures from tmem won't get the correct hash.
  u8* ptr = Memory::GetPointer(addr);
  if (memory_stride == bytes_per_row)
  {