Optimize opcode decoding using a simple list and binary search

This commit is contained in:
gdkchan 2019-04-13 19:25:29 -03:00
parent ae9e285947
commit 5bf433f4b0
2 changed files with 303 additions and 187 deletions

View file

@ -1,33 +1,97 @@
using System;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Shader.Decoders
{
class Block
{
public int Index { get; set; }
public ulong Address { get; set; }
public ulong EndAddress { get; set; }
public Block Next { get; set; }
public Block Branch { get; set; }
public List<OpCode> OpCodes { get; }
public List<OpCode> OpCodes { get; }
public List<OpCodeSsy> SsyOpCodes { get; }
public Block(ulong address)
{
Address = address;
OpCodes = new List<OpCode>();
OpCodes = new List<OpCode>();
SsyOpCodes = new List<OpCodeSsy>();
}
public void Split(Block rightBlock)
{
int splitIndex = BinarySearch(OpCodes, rightBlock.Address);
if (OpCodes[splitIndex].Address < rightBlock.Address)
{
splitIndex++;
}
int splitCount = OpCodes.Count - splitIndex;
if (splitCount <= 0)
{
throw new ArgumentException("Can't split at right block address.");
}
rightBlock.EndAddress = EndAddress;
rightBlock.Next = Next;
rightBlock.Branch = Branch;
rightBlock.OpCodes.AddRange(OpCodes.GetRange(splitIndex, splitCount));
rightBlock.UpdateSsyOpCodes();
EndAddress = rightBlock.Address;
Next = rightBlock;
Branch = null;
OpCodes.RemoveRange(splitIndex, splitCount);
UpdateSsyOpCodes();
}
private static int BinarySearch(List<OpCode> opCodes, ulong address)
{
int left = 0;
int middle = 0;
int right = opCodes.Count - 1;
while (left <= right)
{
int size = right - left;
middle = left + (size >> 1);
OpCode opCode = opCodes[middle];
if (address == opCode.Address)
{
break;
}
if (address < opCode.Address)
{
right = middle - 1;
}
else
{
left = middle + 1;
}
}
return middle;
}
public OpCode GetLastOp()
{
if (OpCodes.Count > 0)
if (OpCodes.Count != 0)
{
return OpCodes[OpCodes.Count - 1];
}

View file

@ -25,89 +25,108 @@ namespace Ryujinx.Graphics.Shader.Decoders
public static Block[] Decode(IGalMemory memory, ulong address)
{
Dictionary<ulong, Block> visited = new Dictionary<ulong, Block>();
Dictionary<ulong, Block> visitedEnd = new Dictionary<ulong, Block>();
List<Block> blocks = new List<Block>();
Queue<Block> blocks = new Queue<Block>();
Queue<Block> workQueue = new Queue<Block>();
Block Enqueue(ulong addr)
Dictionary<ulong, Block> visited = new Dictionary<ulong, Block>();
Block GetBlock(ulong blkAddress)
{
if (!visited.TryGetValue(addr, out Block output))
if (!visited.TryGetValue(blkAddress, out Block block))
{
output = new Block(addr);
block = new Block(blkAddress);
blocks.Enqueue(output);
workQueue.Enqueue(block);
visited.Add(addr, output);
visited.Add(blkAddress, block);
}
return output;
return block;
}
ulong start = address + HeaderSize;
ulong startAddress = address + HeaderSize;
Block entry = Enqueue(start);
GetBlock(startAddress);
while (blocks.TryDequeue(out Block current))
while (workQueue.TryDequeue(out Block currBlock))
{
FillBlock(memory, current, start);
//Set child blocks. "Branch" is the block the branch instruction
//points to (when taken), "Next" is the block at the next address,
//executed when the branch is not taken. For Unconditional Branches
//or end of shader, Next is null.
if (current.OpCodes.Count > 0)
//Check if the current block is inside another block.
if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
{
foreach (OpCodeSsy ssyOp in current.SsyOpCodes)
Block nBlock = blocks[nBlkIndex];
if (nBlock.Address == currBlock.Address)
{
Enqueue(ssyOp.GetAbsoluteAddress());
throw new InvalidOperationException("Found duplicate block address on the list.");
}
OpCode lastOp = current.GetLastOp();
nBlock.Split(currBlock);
blocks.Insert(nBlkIndex + 1, currBlock);
continue;
}
//If we have a block after the current one, set the limit address.
ulong limitAddress = ulong.MaxValue;
if (nBlkIndex != blocks.Count)
{
Block nBlock = blocks[nBlkIndex];
int nextIndex = nBlkIndex + 1;
if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
{
limitAddress = blocks[nextIndex].Address;
}
else if (nBlock.Address > currBlock.Address)
{
limitAddress = blocks[nBlkIndex].Address;
}
}
FillBlock(memory, currBlock, limitAddress, startAddress);
if (currBlock.OpCodes.Count != 0)
{
foreach (OpCodeSsy ssyOp in currBlock.SsyOpCodes)
{
GetBlock(ssyOp.GetAbsoluteAddress());
}
//Set child blocks. "Branch" is the block the branch instruction
//points to (when taken), "Next" is the block at the next address,
//executed when the branch is not taken. For Unconditional Branches
//or end of program, Next is null.
OpCode lastOp = currBlock.GetLastOp();
if (lastOp is OpCodeBranch op)
{
current.Branch = Enqueue(op.GetAbsoluteAddress());
currBlock.Branch = GetBlock(op.GetAbsoluteAddress());
}
if (!IsUnconditionalBranch(lastOp))
{
current.Next = Enqueue(current.EndAddress);
currBlock.Next = GetBlock(currBlock.EndAddress);
}
}
//If we have on the graph two blocks with the same end address,
//then we need to split the bigger block and have two small blocks,
//the end address of the bigger "Current" block should then be == to
//the address of the "Smaller" block.
while (visitedEnd.TryGetValue(current.EndAddress, out Block smaller))
//Insert the new block on the list (sorted by address).
if (blocks.Count != 0)
{
if (current.Address > smaller.Address)
{
Block temp = smaller;
Block nBlock = blocks[nBlkIndex];
smaller = current;
current = temp;
}
current.EndAddress = smaller.Address;
current.Next = smaller;
current.Branch = null;
current.OpCodes.RemoveRange(
current.OpCodes.Count - smaller.OpCodes.Count,
smaller.OpCodes.Count);
current.UpdateSsyOpCodes();
smaller.UpdateSsyOpCodes();
visitedEnd[smaller.EndAddress] = smaller;
blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
}
else
{
blocks.Add(currBlock);
}
visitedEnd.Add(current.EndAddress, current);
}
foreach (Block ssyBlock in visited.Values.Where(x => x.SsyOpCodes.Count != 0))
foreach (Block ssyBlock in blocks.Where(x => x.SsyOpCodes.Count != 0))
{
for (int ssyIndex = 0; ssyIndex < ssyBlock.SsyOpCodes.Count; ssyIndex++)
{
@ -115,157 +134,61 @@ namespace Ryujinx.Graphics.Shader.Decoders
}
}
Block[] cfg = new Block[visited.Count];
int index = 0;
foreach (Block block in visited.Values.OrderBy(x => x.Address - entry.Address))
{
block.Index = index;
cfg[index++] = block;
}
return cfg;
return blocks.ToArray();
}
private struct PathBlockState
private static bool BinarySearch(List<Block> blocks, ulong address, out int index)
{
public Block Block { get; }
index = 0;
private enum RestoreType
int left = 0;
int right = blocks.Count - 1;
while (left <= right)
{
None,
PopSsy,
PushSync
}
int size = right - left;
private RestoreType _restoreType;
int middle = left + (size >> 1);
private ulong _restoreValue;
Block block = blocks[middle];
public bool ReturningFromVisit => _restoreType != RestoreType.None;
index = middle;
public PathBlockState(Block block)
{
Block = block;
_restoreType = RestoreType.None;
_restoreValue = 0;
}
public PathBlockState(int oldSsyStackSize)
{
Block = null;
_restoreType = RestoreType.PopSsy;
_restoreValue = (ulong)oldSsyStackSize;
}
public PathBlockState(ulong syncAddress)
{
Block = null;
_restoreType = RestoreType.PushSync;
_restoreValue = syncAddress;
}
public void RestoreStackState(Stack<ulong> ssyStack)
{
if (_restoreType == RestoreType.PushSync)
if (address >= block.Address && address < block.EndAddress)
{
ssyStack.Push(_restoreValue);
return true;
}
else if (_restoreType == RestoreType.PopSsy)
if (address < block.Address)
{
while (ssyStack.Count > (uint)_restoreValue)
{
ssyStack.Pop();
}
right = middle - 1;
}
else
{
left = middle + 1;
}
}
return false;
}
private static void PropagateSsy(Dictionary<ulong, Block> blocks, Block ssyBlock, int ssyIndex)
{
OpCodeSsy ssyOp = ssyBlock.SsyOpCodes[ssyIndex];
Stack<PathBlockState> pending = new Stack<PathBlockState>();
HashSet<Block> visited = new HashSet<Block>();
Stack<ulong> ssyStack = new Stack<ulong>();
void Push(PathBlockState pbs)
{
if (pbs.Block == null || visited.Add(pbs.Block))
{
pending.Push(pbs);
}
}
Push(new PathBlockState(ssyBlock));
while (pending.TryPop(out PathBlockState pbs))
{
if (pbs.ReturningFromVisit)
{
pbs.RestoreStackState(ssyStack);
continue;
}
Block current = pbs.Block;
int ssyOpCodesCount = current.SsyOpCodes.Count;
if (ssyOpCodesCount != 0)
{
Push(new PathBlockState(ssyStack.Count));
for (int index = ssyIndex; index < ssyOpCodesCount; index++)
{
ssyStack.Push(current.SsyOpCodes[index].GetAbsoluteAddress());
}
}
ssyIndex = 0;
if (current.Next != null)
{
Push(new PathBlockState(current.Next));
}
if (current.Branch != null)
{
Push(new PathBlockState(current.Branch));
}
else if (current.GetLastOp() is OpCodeSync op)
{
ulong syncAddress = ssyStack.Pop();
if (ssyStack.Count == 0)
{
ssyStack.Push(syncAddress);
op.Targets.Add(ssyOp, op.Targets.Count);
ssyOp.Syncs.TryAdd(op, Local());
}
else
{
Push(new PathBlockState(syncAddress));
Push(new PathBlockState(blocks[syncAddress]));
}
}
}
}
private static void FillBlock(IGalMemory memory, Block block, ulong start)
private static void FillBlock(
IGalMemory memory,
Block block,
ulong limitAddress,
ulong startAddress)
{
ulong address = block.Address;
do
{
if (address >= limitAddress)
{
break;
}
//Ignore scheduling instructions, which are written every 32 bytes.
if (((address - start) & 0x1f) == 0)
if (((address - startAddress) & 0x1f) == 0)
{
address += 8;
@ -350,5 +273,134 @@ namespace Ryujinx.Graphics.Shader.Decoders
return (OpActivator)mthd.CreateDelegate(typeof(OpActivator));
}
private struct PathBlockState
{
public Block Block { get; }
private enum RestoreType
{
None,
PopSsy,
PushSync
}
private RestoreType _restoreType;
private ulong _restoreValue;
public bool ReturningFromVisit => _restoreType != RestoreType.None;
public PathBlockState(Block block)
{
Block = block;
_restoreType = RestoreType.None;
_restoreValue = 0;
}
public PathBlockState(int oldSsyStackSize)
{
Block = null;
_restoreType = RestoreType.PopSsy;
_restoreValue = (ulong)oldSsyStackSize;
}
public PathBlockState(ulong syncAddress)
{
Block = null;
_restoreType = RestoreType.PushSync;
_restoreValue = syncAddress;
}
public void RestoreStackState(Stack<ulong> ssyStack)
{
if (_restoreType == RestoreType.PushSync)
{
ssyStack.Push(_restoreValue);
}
else if (_restoreType == RestoreType.PopSsy)
{
while (ssyStack.Count > (uint)_restoreValue)
{
ssyStack.Pop();
}
}
}
}
private static void PropagateSsy(Dictionary<ulong, Block> blocks, Block ssyBlock, int ssyIndex)
{
OpCodeSsy ssyOp = ssyBlock.SsyOpCodes[ssyIndex];
Stack<PathBlockState> workQueue = new Stack<PathBlockState>();
HashSet<Block> visited = new HashSet<Block>();
Stack<ulong> ssyStack = new Stack<ulong>();
void Push(PathBlockState pbs)
{
if (pbs.Block == null || visited.Add(pbs.Block))
{
workQueue.Push(pbs);
}
}
Push(new PathBlockState(ssyBlock));
while (workQueue.TryPop(out PathBlockState pbs))
{
if (pbs.ReturningFromVisit)
{
pbs.RestoreStackState(ssyStack);
continue;
}
Block current = pbs.Block;
int ssyOpCodesCount = current.SsyOpCodes.Count;
if (ssyOpCodesCount != 0)
{
Push(new PathBlockState(ssyStack.Count));
for (int index = ssyIndex; index < ssyOpCodesCount; index++)
{
ssyStack.Push(current.SsyOpCodes[index].GetAbsoluteAddress());
}
}
ssyIndex = 0;
if (current.Next != null)
{
Push(new PathBlockState(current.Next));
}
if (current.Branch != null)
{
Push(new PathBlockState(current.Branch));
}
else if (current.GetLastOp() is OpCodeSync op)
{
ulong syncAddress = ssyStack.Pop();
if (ssyStack.Count == 0)
{
ssyStack.Push(syncAddress);
op.Targets.Add(ssyOp, op.Targets.Count);
ssyOp.Syncs.TryAdd(op, Local());
}
else
{
Push(new PathBlockState(syncAddress));
Push(new PathBlockState(blocks[syncAddress]));
}
}
}
}
}
}