diff --git a/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp b/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp index 1e41dc63c2..ed4c13e2c6 100644 --- a/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp +++ b/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp @@ -211,7 +211,7 @@ void CachedInterpreter::Jit(u32 address) b->codeSize = (u32)(GetCodePtr() - b->checkedEntry); b->originalSize = code_block.m_num_instructions; - m_block_cache.FinalizeBlock(*b, jo.enableBlocklink, b->checkedEntry); + m_block_cache.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses); } void CachedInterpreter::ClearCache() diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index f7b75223e2..568bfeca55 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -590,7 +590,8 @@ void Jit64::Jit(u32 em_address) } JitBlock* b = blocks.AllocateBlock(em_address); - blocks.FinalizeBlock(*b, jo.enableBlocklink, DoJit(em_address, &code_buffer, b, nextPC)); + DoJit(em_address, &code_buffer, b, nextPC); + blocks.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses); } const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock* b, u32 nextPC) diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index dac66a2e4b..9674a2e6fe 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -508,7 +508,8 @@ void JitIL::Jit(u32 em_address) } JitBlock* b = blocks.AllocateBlock(em_address); - blocks.FinalizeBlock(*b, jo.enableBlocklink, DoJit(em_address, &code_buffer, b, nextPC)); + DoJit(em_address, &code_buffer, b, nextPC); + blocks.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses); } const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock* b, u32 nextPC) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 1dfdabe0c7..7ccdb00614 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -399,7 +399,7 @@ void JitArm64::Jit(u32) JitBlock* b = blocks.AllocateBlock(em_address); const u8* BlockPtr = DoJit(em_address, &code_buffer, b, nextPC); - blocks.FinalizeBlock(*b, jo.enableBlocklink, BlockPtr); + blocks.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses); } const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock* b, u32 nextPC) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index c2ac9fd07a..f756328804 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -36,6 +36,12 @@ static void ClearCacheThreadSafe(u64 userdata, s64 cyclesdata) JitInterface::ClearCache(); } +bool JitBlock::OverlapsPhysicalRange(u32 address, u32 length) const +{ + return physical_addresses.lower_bound(address) != + physical_addresses.lower_bound(address + length); +} + JitBaseBlockCache::JitBaseBlockCache(JitBase& jit) : m_jit{jit} { } @@ -64,13 +70,13 @@ void JitBaseBlockCache::Clear() #endif m_jit.js.fifoWriteAddresses.clear(); m_jit.js.pairedQuantizeAddresses.clear(); - for (auto& e : start_block_map) + for (auto& e : block_map) { DestroyBlock(e.second); } - start_block_map.clear(); - links_to.clear(); block_map.clear(); + links_to.clear(); + block_range_map.clear(); valid_block.ClearAll(); @@ -95,14 +101,14 @@ JitBlock** JitBaseBlockCache::GetFastBlockMap() void JitBaseBlockCache::RunOnBlocks(std::function f) { - for (const auto& e : start_block_map) + for (const auto& e : block_map) f(e.second); } JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address) { u32 physicalAddress = PowerPC::JitCache_TranslateAddress(em_address).address; - JitBlock& b = start_block_map.emplace(physicalAddress, JitBlock())->second; + JitBlock& b = block_map.emplace(physicalAddress, JitBlock())->second; b.effectiveAddress = em_address; b.physicalAddress = physicalAddress; b.msrBits = MSR & JIT_CACHE_MSR_MASK; @@ -111,30 +117,21 @@ JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address) return &b; } -void JitBaseBlockCache::FreeBlock(JitBlock* block) -{ - auto iter = start_block_map.equal_range(block->physicalAddress); - while (iter.first != iter.second) - { - if (&iter.first->second == block) - iter.first = start_block_map.erase(iter.first); - else - iter.first++; - } -} - -void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, const u8* code_ptr) +void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, + const std::set& physical_addresses) { size_t index = FastLookupIndexForAddress(block.effectiveAddress); fast_block_map[index] = █ block.fast_block_map_index = index; - u32 pAddr = block.physicalAddress; + block.physical_addresses = physical_addresses; - for (u32 addr = pAddr / 32; addr <= (pAddr + (block.originalSize - 1) * 4) / 32; ++addr) - valid_block.Set(addr); - - block_map.emplace(std::make_pair(pAddr + 4 * block.originalSize - 1, pAddr), &block); + u32 range_mask = ~(BLOCK_RANGE_MAP_ELEMENTS - 1); + for (u32 addr : physical_addresses) + { + valid_block.Set(addr / 32); + block_range_map[addr & range_mask].insert(&block); + } if (block_link) { @@ -162,7 +159,7 @@ JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, u32 msr) translated_addr = translated.address; } - auto iter = start_block_map.equal_range(translated_addr); + auto iter = block_map.equal_range(translated_addr); for (; iter.first != iter.second; iter.first++) { JitBlock& b = iter.first->second; @@ -186,7 +183,7 @@ const u8* JitBaseBlockCache::Dispatch() return block->normalEntry; } -void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool forced) +void JitBaseBlockCache::InvalidateICache(u32 address, u32 length, bool forced) { auto translated = PowerPC::JitCache_TranslateAddress(address); if (!translated.valid) @@ -203,19 +200,10 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for valid_block.Clear(pAddr / 32); } - // destroy JIT blocks - // !! this works correctly under assumption that any two overlapping blocks end at the same - // address if (destroy_block) { - auto it = block_map.lower_bound(std::make_pair(pAddr, 0)); - while (it != block_map.end() && it->first.second < pAddr + length) - { - JitBlock* block = it->second; - DestroyBlock(*block); - FreeBlock(block); - it = block_map.erase(it); - } + // destroy JIT blocks + ErasePhysicalRange(pAddr, length); // If the code was actually modified, we need to clear the relevant entries from the // FIFO write address cache, so we don't end up with FIFO checks in places they shouldn't @@ -232,6 +220,46 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for } } +void JitBaseBlockCache::ErasePhysicalRange(u32 address, u32 length) +{ + // Iterate over all macro blocks which overlap the given range. + u32 range_mask = ~(BLOCK_RANGE_MAP_ELEMENTS - 1); + auto start = block_range_map.lower_bound(address & range_mask); + auto end = block_range_map.lower_bound(address + length); + while (start != end) + { + // Iterate over all blocks in the macro block. + auto iter = start->second.begin(); + while (iter != start->second.end()) + { + JitBlock* block = *iter; + if (block->OverlapsPhysicalRange(address, length)) + { + // If the block overlaps, also remove all other occupied slots in the other macro blocks. + // This will leak empty macro blocks, but they may be reused or cleared later on. + for (u32 addr : block->physical_addresses) + if ((addr & range_mask) != start->first) + block_range_map[addr & range_mask].erase(block); + + // And remove the block. + DestroyBlock(*block); + block_map.erase(block->physicalAddress); + iter = start->second.erase(iter); + } + else + { + iter++; + } + } + + // If the macro block is empty, drop it. + if (start->second.empty()) + start = block_range_map.erase(start); + else + start++; + } +} + u32* JitBaseBlockCache::GetBlockBitSet() const { return valid_block.m_valid_block.get(); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index c5aaa8d1e8..85ae3bfbb8 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "Common/CommonTypes.h" @@ -24,6 +25,8 @@ class JitBase; // address. struct JitBlock { + bool OverlapsPhysicalRange(u32 address, u32 length) const; + // A special entry point for block linking; usually used to check the // downcount. const u8* checkedEntry; @@ -35,8 +38,8 @@ struct JitBlock // The MSR bits expected for this block to be valid; see JIT_CACHE_MSR_MASK. u32 msrBits; // The physical address of the code represented by this block. - // Various maps in the cache are indexed by this (start_block_map, - // block_map, and valid_block in particular). This is useful because of + // Various maps in the cache are indexed by this (block_map + // and valid_block in particular). This is useful because of // of the way the instruction cache works on PowerPC. u32 physicalAddress; // The number of bytes of JIT'ed code contained in this block. Mostly @@ -57,6 +60,9 @@ struct JitBlock }; std::vector linkData; + // This set stores all physical addresses of all occupied instructions. + std::set physical_addresses; + // we don't really need to save start and stop // TODO (mb2): ticStart and ticStop -> "local var" mean "in block" ... low priority ;) u64 ticStart; // for profiling - time. @@ -124,8 +130,7 @@ public: void RunOnBlocks(std::function f); JitBlock* AllocateBlock(u32 em_address); - void FreeBlock(JitBlock* block); - void FinalizeBlock(JitBlock& block, bool block_link, const u8* code_ptr); + void FinalizeBlock(JitBlock& block, bool block_link, const std::set& physical_addresses); // Look for the block in the slow but accurate way. // This function shall be used if FastLookupIndexForAddress() failed. @@ -138,7 +143,8 @@ public: // assembly version.) const u8* Dispatch(); - void InvalidateICache(u32 address, const u32 length, bool forced); + void InvalidateICache(u32 address, u32 length, bool forced); + void ErasePhysicalRange(u32 address, u32 length); u32* GetBlockBitSet() const; @@ -163,20 +169,21 @@ private: // It is used to query all blocks which links to an address. std::multimap links_to; // destination_PC -> number - // Map indexed by the physical memory location. - // It is used to invalidate blocks based on memory location. - std::multimap, JitBlock*> block_map; // (end_addr, start_addr) -> block - // Map indexed by the physical address of the entry point. // This is used to query the block based on the current PC in a slow way. - // TODO: This is redundant with block_map. - std::multimap start_block_map; // start_addr -> block + std::multimap block_map; // start_addr -> block + + // Range of overlapping code indexed by a masked physical address. + // This is used for invalidation of memory regions. The range is grouped + // in macro blocks of each 0x100 bytes. + static constexpr u32 BLOCK_RANGE_MAP_ELEMENTS = 0x100; + std::map> block_range_map; // This bitsets shows which cachelines overlap with any blocks. // It is used to provide a fast way to query if no icache invalidation is needed. ValidBlockBitSet valid_block; // This array is indexed with the masked PC and likely holds the correct block id. - // This is used as a fast cache of start_block_map used in the assembly dispatcher. + // This is used as a fast cache of block_map used in the assembly dispatcher. std::array fast_block_map; // start_addr & mask -> number }; diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 3d989412c3..2fbb94fd85 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -384,7 +384,7 @@ TryReadInstResult TryReadInstruction(u32 address) auto tlb_addr = TranslateAddress(address); if (!tlb_addr.Success()) { - return TryReadInstResult{false, false, 0}; + return TryReadInstResult{false, false, 0, 0}; } else { @@ -403,7 +403,7 @@ TryReadInstResult TryReadInstruction(u32 address) { hex = PowerPC::ppcState.iCache.ReadInstruction(address); } - return TryReadInstResult{true, from_bat, hex}; + return TryReadInstResult{true, from_bat, hex, address}; } u32 HostRead_Instruction(const u32 address) diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/PowerPC/PPCAnalyst.cpp index 11db99c2e6..6b1942559c 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/PowerPC/PPCAnalyst.cpp @@ -646,6 +646,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, u32 block->m_memory_exception = false; block->m_num_instructions = 0; block->m_gqr_used = BitSet8(0); + block->m_physical_addresses.clear(); CodeOp* code = buffer->codebuffer; @@ -653,7 +654,6 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, u32 u32 return_address = 0; u32 numFollows = 0; u32 num_inst = 0; - bool prev_inst_from_bat = true; for (u32 i = 0; i < blockSize; ++i) { @@ -666,16 +666,6 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, u32 } UGeckoInstruction inst = result.hex; - // Slight hack: the JIT block cache currently assumes all blocks end at the same place, - // but broken blocks due to page faults break this assumption. Avoid this by just ending - // all virtual memory instruction blocks at page boundaries. - // FIXME: improve the JIT block cache so we don't need to do this. - if ((!result.from_bat || !prev_inst_from_bat) && i > 0 && (address & 0xfff) == 0) - { - break; - } - prev_inst_from_bat = result.from_bat; - num_inst++; memset(&code[i], 0, sizeof(CodeOp)); GekkoOPInfo* opinfo = GetOpInfo(inst); @@ -687,6 +677,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, u32 code[i].branchToIndex = -1; code[i].skip = false; block->m_stats->numCycles += opinfo->numCycles; + block->m_physical_addresses.insert(result.physical_address); SetInstructionStats(block, &code[i], opinfo, i); diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.h b/Source/Core/Core/PowerPC/PPCAnalyst.h index 42625757b4..02ebc42c18 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.h +++ b/Source/Core/Core/PowerPC/PPCAnalyst.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -157,6 +158,9 @@ struct CodeBlock // Which GPRs this block reads from before defining, if any. BitSet32 m_gpr_inputs; + + // Which memory locations are occupied by this block. + std::set m_physical_addresses; }; class PPCAnalyzer diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 2d8476e0c2..7bac1d97a2 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -232,6 +232,7 @@ struct TryReadInstResult bool valid; bool from_bat; u32 hex; + u32 physical_address; }; TryReadInstResult TryReadInstruction(const u32 address);