mirror of
				https://github.com/dolphin-emu/dolphin.git
				synced 2025-10-25 01:19:19 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			290 lines
		
	
	
	
		
			9.2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			290 lines
		
	
	
	
		
			9.2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Copyright 2019 Dolphin Emulator Project
 | |
| // Licensed under GPLv2+
 | |
| // Refer to the license.txt file included.
 | |
| 
 | |
| #include "VideoCommon/TMEM.h"
 | |
| 
 | |
| #include <array>
 | |
| 
 | |
| #include "Common/ChunkFile.h"
 | |
| #include "VideoCommon/BPMemory.h"
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////////////////////////////////
 | |
| //
 | |
| // TMEM emulation tracks which textures should be cached in TMEM on a real console.
 | |
| // There are two good reasons to do this:
 | |
| //
 | |
| // 1. Some games deliberately avoid invalidating a texture, overwrite it with an EFB copy,
 | |
| //    and then expect the original texture to still be found in TMEM for another draw call.
 | |
| //    Spyro: A Hero's Tail is known for using such overwritten textures.
 | |
| //    However, other games like:
 | |
| //      * Sonic Riders
 | |
| //      * Metal Arms: Glitch in the System
 | |
| //      * Godzilla: Destroy All Monsters Melee
 | |
| //      * NHL Slapshot
 | |
| //      * Tak and the Power of Juju
 | |
| //      * Night at the Museum: Battle of the Smithsonian
 | |
| //      * 428: Fūsa Sareta Shibuya de
 | |
| //    are known to (accidentally or deliberately) avoid invalidating and then expect the pattern
 | |
| //    of the draw and the fact that the whole texture doesn't fit in TMEM to self-invalidate the
 | |
| //    texture. These are usually full-screen efb copies.
 | |
| //    So we must track the size of the textures as an heuristic to see if they will self-invalidate
 | |
| //    or not.
 | |
| //
 | |
| // 2. It actually improves Dolphin's performance in safer texture hashing modes, by reducing the
 | |
| //    amount of times a texture needs to be hashed when reused in subsequent draws.
 | |
| //
 | |
| // As a side-effect, TMEM emulation also tracks if the texture unit configuration has changed at
 | |
| // all, which Dolphin's TextureCache takes advantage of.
 | |
| //
 | |
| ////////////////////////////////////////////////////////////////////////////////////////////////////
 | |
| //
 | |
| // Checking if a texture fits in TMEM or not is complicated by the fact that Flipper's TMEM is quite
 | |
| // configurable.
 | |
| // Each of the eight texture units has two banks (even and odd) that can be pointed at any offset
 | |
| // and set to any size. It is completely valid to have overlapping banks, and performance can be
 | |
| // improved by overlapping the caches of texture units that are drawing the same textures.
 | |
| //
 | |
| // For trilinear textures, the even/odd banks contain the even/odd LODs of the texture. TMEM has two
 | |
| // banks of 512KB each, covering the upper and lower halves of TMEM's address space. The two banks
 | |
| // be accessed simultaneously, allowing a trilinear texture sample to be completed at the same cost
 | |
| // as a bilinear sample, assuming the even and odd banks are mapped onto different banks.
 | |
| //
 | |
| // 32bit textures are actually stored as two 16bit textures in separate banks, allowing a bilinear
 | |
| // sample of a 32bit texture at the same cost as a 16bit bilinear/trilinear sample. A trilinear
 | |
| // sample of a 32bit texture costs more.
 | |
| //
 | |
| // TODO: I'm not sure if it's valid for a texture unit's even and odd banks to overlap. There might
 | |
| //       actually be a hard requirement for even and odd banks to live in different banks of TMEM.
 | |
| //
 | |
| // Note: This is still very much a heuristic.
 | |
| //       Actually knowing if a texture is partially or fully cached within TMEM would require
 | |
| //       extensive software rasterization, or sampler feedback from a hardware backend.
 | |
| //
 | |
| ////////////////////////////////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| namespace TMEM
 | |
| {
 | |
| struct TextureUnitState
 | |
| {
 | |
|   enum class State
 | |
|   {
 | |
|     // Cache is invalid. Configuration has changed
 | |
|     INVALID,
 | |
| 
 | |
|     // Valid, but not cached due to either being too big, or overlapping with another texture unit
 | |
|     VALID,
 | |
| 
 | |
|     // Texture unit has cached all of the previous draw
 | |
|     CACHED,
 | |
|   };
 | |
| 
 | |
|   struct BankConfig
 | |
|   {
 | |
|     u32 width = 0;
 | |
|     u32 height = 0;
 | |
|     u32 base = 0;
 | |
|     u32 size = 0;
 | |
|     bool Overlaps(const BankConfig& other) const;
 | |
|   };
 | |
| 
 | |
|   BankConfig even = {};
 | |
|   BankConfig odd = {};
 | |
|   State state = State::INVALID;
 | |
| 
 | |
|   bool Overlaps(const TextureUnitState& other) const;
 | |
| };
 | |
| 
 | |
| static u32 CalculateUnitSize(TextureUnitState::BankConfig bank_config);
 | |
| 
 | |
| static std::array<TextureUnitState, 8> s_unit;
 | |
| 
 | |
| // On TMEM configuration changed:
 | |
| // 1. invalidate stage.
 | |
| 
 | |
| void ConfigurationChanged(TexUnitAddress bp_addr, u32 config)
 | |
| {
 | |
|   TextureUnitState& unit_state = s_unit[bp_addr.GetUnitID()];
 | |
| 
 | |
|   // If anything has changed, we can't assume existing state is still valid.
 | |
|   unit_state.state = TextureUnitState::State::INVALID;
 | |
| 
 | |
|   // Note: BPStructs has already filtered out NOP changes before calling us
 | |
|   switch (bp_addr.Reg)
 | |
|   {
 | |
|   case TexUnitAddress::Register::SETIMAGE1:
 | |
|   {
 | |
|     // Image Type and Even bank's Cache Height, Cache Width, TMEM Offset
 | |
|     TexImage1 even = {.hex = config};
 | |
|     unit_state.even = {even.cache_width, even.cache_height, even.tmem_even << 5, 0};
 | |
|     break;
 | |
|   }
 | |
|   case TexUnitAddress::Register::SETIMAGE2:
 | |
|   {
 | |
|     // Odd bank's Cache Height, Cache Width, TMEM Offset
 | |
|     TexImage2 odd = {.hex = config};
 | |
|     unit_state.odd = {odd.cache_width, odd.cache_height, odd.tmem_odd << 5, 0};
 | |
|     break;
 | |
|   }
 | |
|   default:
 | |
|     // Something else has changed
 | |
|     return;
 | |
|   }
 | |
| }
 | |
| 
 | |
| void InvalidateAll()
 | |
| {
 | |
|   for (auto& unit : s_unit)
 | |
|   {
 | |
|     unit.state = TextureUnitState::State::INVALID;
 | |
|   }
 | |
| }
 | |
| 
 | |
| // On invalidate cache:
 | |
| // 1. invalidate all texture units.
 | |
| 
 | |
| void Invalidate([[maybe_unused]] u32 param)
 | |
| {
 | |
|   // The exact arguments of Invalidate commands is currently unknown.
 | |
|   // It appears to contain the TMEM address and a size.
 | |
| 
 | |
|   // For simplicity, we will just invalidate everything
 | |
|   InvalidateAll();
 | |
| }
 | |
| 
 | |
| // On bind:
 | |
| // 1. use mipmapping/32bit status to calculate final sizes
 | |
| // 2. if texture size is small enough to fit in region mark as cached.
 | |
| //    otherwise, mark as valid
 | |
| 
 | |
| void Bind(u32 unit, int width, int height, bool is_mipmapped, bool is_32_bit)
 | |
| {
 | |
|   TextureUnitState& unit_state = s_unit[unit];
 | |
| 
 | |
|   // All textures use the even bank.
 | |
|   // It holds the level 0 mipmap (and other even mipmap LODs, if mipmapping is enabled)
 | |
|   unit_state.even.size = CalculateUnitSize(unit_state.even);
 | |
| 
 | |
|   bool fits = (width * height * 32U) <= unit_state.even.size;
 | |
| 
 | |
|   if (is_mipmapped || is_32_bit)
 | |
|   {
 | |
|     // And the odd bank is enabled when either mipmapping is enabled or the texture is 32 bit
 | |
|     // It holds the Alpha and Red channels of 32 bit textures or the odd layers of a mipmapped
 | |
|     // texture
 | |
|     unit_state.odd.size = CalculateUnitSize(unit_state.odd);
 | |
| 
 | |
|     fits = fits && (width * height * 32U) <= unit_state.odd.size;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     unit_state.odd.size = 0;
 | |
|   }
 | |
| 
 | |
|   if (is_mipmapped)
 | |
|   {
 | |
|     // TODO: This is what games appear to expect from hardware. But seems odd, as it doesn't line up
 | |
|     //       with how much extra memory is required for mipmapping, just 33% more.
 | |
|     //       Hardware testing is required to see exactly what gets used.
 | |
| 
 | |
|     // When mipmapping is enabled, the even bank is doubled in size
 | |
|     // The extended region holds the remaining even mipmap layers
 | |
|     unit_state.even.size *= 2;
 | |
| 
 | |
|     if (is_32_bit)
 | |
|     {
 | |
|       // When a 32bit texture is mipmapped, the odd bank is also doubled in size
 | |
|       unit_state.odd.size *= 2;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   unit_state.state = fits ? TextureUnitState::State::CACHED : TextureUnitState::State::VALID;
 | |
| }
 | |
| 
 | |
| static u32 CalculateUnitSize(TextureUnitState::BankConfig bank_config)
 | |
| {
 | |
|   u32 width = bank_config.width;
 | |
|   u32 height = bank_config.height;
 | |
| 
 | |
|   // These are the only cache sizes supported by the sdk
 | |
|   if (width == height)
 | |
|   {
 | |
|     switch (width)
 | |
|     {
 | |
|     case 3:  // 32KB
 | |
|       return 32 * 1024;
 | |
|     case 4:  // 128KB
 | |
|       return 128 * 1024;
 | |
|     case 5:  // 512KB
 | |
|       return 512 * 1024;
 | |
|     default:
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // However, the registers allow a much larger amount of configurablity.
 | |
|   // Maybe other sizes are broken?
 | |
|   // Until hardware tests are done, this is a guess at the size algorithm
 | |
| 
 | |
|   return 512 * (1 << width) * (1 << height);
 | |
| }
 | |
| 
 | |
| bool TextureUnitState::BankConfig::Overlaps(const BankConfig& other) const
 | |
| {
 | |
|   if (size == 0 || other.size == 0)
 | |
|     return false;
 | |
|   return (base <= other.base && (base + size) > other.base) ||
 | |
|          (other.base <= base && (other.base + other.size) > base);
 | |
| }
 | |
| 
 | |
| bool TextureUnitState::Overlaps(const TextureUnitState& other) const
 | |
| {
 | |
|   if (state == TextureUnitState::State::INVALID || other.state == TextureUnitState::State::INVALID)
 | |
|     return false;
 | |
|   return even.Overlaps(other.even) || even.Overlaps(other.odd) || odd.Overlaps(other.even) ||
 | |
|          odd.Overlaps(other.odd);
 | |
| }
 | |
| 
 | |
| // Scans though active texture units checks for overlaps.
 | |
| void FinalizeBinds(BitSet32 used_textures)
 | |
| {
 | |
|   for (u32 i : used_textures)
 | |
|   {
 | |
|     if (s_unit[i].even.Overlaps(s_unit[i].odd))
 | |
|     {  // Self-overlap
 | |
|       s_unit[i].state = TextureUnitState::State::VALID;
 | |
|     }
 | |
|     for (size_t j = 0; j < s_unit.size(); j++)
 | |
|     {
 | |
|       if (j != i && s_unit[i].Overlaps(s_unit[j]))
 | |
|       {
 | |
|         // There is an overlap, downgrade both from CACHED
 | |
|         // (for there to be an overlap, both must have started as valid or cached)
 | |
|         s_unit[i].state = TextureUnitState::State::VALID;
 | |
|         s_unit[j].state = TextureUnitState::State::VALID;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| bool IsCached(u32 unit)
 | |
| {
 | |
|   return s_unit[unit].state == TextureUnitState::State::CACHED;
 | |
| }
 | |
| 
 | |
| bool IsValid(u32 unit)
 | |
| {
 | |
|   return s_unit[unit].state != TextureUnitState::State::INVALID;
 | |
| }
 | |
| 
 | |
| void Init()
 | |
| {
 | |
|   s_unit.fill({});
 | |
| }
 | |
| 
 | |
| void DoState(PointerWrap& p)
 | |
| {
 | |
|   p.DoArray(s_unit);
 | |
| }
 | |
| 
 | |
| }  // namespace TMEM
 |