Merge branch 'original_master' into scissor_test_fixes_2

# Conflicts:
#	Ryujinx.Graphics/Gal/OpenGL/OGLExtension.cs
#	Ryujinx/Config.jsonc
This commit is contained in:
Andy Adshead 2019-03-01 18:11:56 +00:00
commit 771578c241
115 changed files with 5489 additions and 2120 deletions

View file

@ -14,6 +14,7 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Mono.Posix.NETStandard" Version="1.0.0" />
<PackageReference Include="System.Runtime.Intrinsics.Experimental" Version="4.5.0-rc1" />
</ItemGroup>

View file

@ -32,8 +32,6 @@ namespace ChocolArm64
{
translator.ExecuteSubroutine(this, entrypoint);
memory.RemoveMonitor(ThreadState.Core);
WorkFinished?.Invoke(this, EventArgs.Empty);
});
}

View file

@ -8,18 +8,9 @@ namespace ChocolArm64.Decoders
public OpCodeSimdCvt64(Inst inst, long position, int opCode) : base(inst, position, opCode)
{
//TODO:
//Und of Fixed Point variants.
int scale = (opCode >> 10) & 0x3f;
int sf = (opCode >> 31) & 0x1;
/*if (Type != SF && !(Type == 2 && SF == 1))
{
Emitter = AInstEmit.Und;
return;
}*/
FBits = 64 - scale;
RegisterSize = sf != 0

View file

@ -1,14 +0,0 @@
using System;
namespace ChocolArm64.Events
{
public class MemoryAccessEventArgs : EventArgs
{
public long Position { get; private set; }
public MemoryAccessEventArgs(long position)
{
Position = position;
}
}
}

View file

@ -1,13 +0,0 @@
using System;
namespace ChocolArm64.Exceptions
{
public class VmmPageFaultException : Exception
{
private const string ExMsg = "Tried to access unmapped address 0x{0:x16}!";
public VmmPageFaultException() { }
public VmmPageFaultException(long position) : base(string.Format(ExMsg, position)) { }
}
}

View file

@ -11,21 +11,56 @@ namespace ChocolArm64.Instructions
{
OpCodeBfm64 op = (OpCodeBfm64)context.CurrOp;
EmitBfmLoadRn(context);
if (op.Pos < op.Shift)
{
//BFI.
context.EmitLdintzr(op.Rn);
context.EmitLdintzr(op.Rd);
context.EmitLdc_I(~op.WMask & op.TMask);
int shift = op.GetBitsCount() - op.Shift;
context.Emit(OpCodes.And);
context.Emit(OpCodes.Or);
int width = op.Pos + 1;
context.EmitLdintzr(op.Rd);
context.EmitLdc_I(~op.TMask);
long mask = (long)(ulong.MaxValue >> (64 - width));
context.Emit(OpCodes.And);
context.Emit(OpCodes.Or);
context.EmitLdc_I(mask);
context.EmitStintzr(op.Rd);
context.Emit(OpCodes.And);
context.EmitLsl(shift);
context.EmitLdintzr(op.Rd);
context.EmitLdc_I(~(mask << shift));
context.Emit(OpCodes.And);
context.Emit(OpCodes.Or);
context.EmitStintzr(op.Rd);
}
else
{
//BFXIL.
context.EmitLdintzr(op.Rn);
context.EmitLsr(op.Shift);
int width = op.Pos - op.Shift + 1;
long mask = (long)(ulong.MaxValue >> (64 - width));
context.EmitLdc_I(mask);
context.Emit(OpCodes.And);
context.EmitLdintzr(op.Rd);
context.EmitLdc_I(~mask);
context.Emit(OpCodes.And);
context.Emit(OpCodes.Or);
context.EmitStintzr(op.Rd);
}
}
public static void Sbfm(ILEmitterCtx context)

View file

@ -39,7 +39,6 @@ namespace ChocolArm64.Instructions
context.EmitLdc_I(op.Position + 4);
context.EmitStint(RegisterAlias.Lr);
context.EmitStoreState();
EmitCall(context, op.Imm);
}
@ -60,6 +59,8 @@ namespace ChocolArm64.Instructions
{
OpCodeBReg64 op = (OpCodeBReg64)context.CurrOp;
context.HasIndirectJump = true;
context.EmitStoreState();
context.EmitLdintzr(op.Rn);

View file

@ -65,7 +65,6 @@ namespace ChocolArm64.Instructions
}
context.EmitStint(GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr));
context.EmitStoreState();
//If x is true, then this is a branch with link and exchange.
//In this case we need to swap the mode between Arm <-> Thumb.

View file

@ -11,6 +11,8 @@ namespace ChocolArm64.Instructions
{
if (context.Tier == TranslationTier.Tier0)
{
context.EmitStoreState();
context.TranslateAhead(imm);
context.EmitLdc_I8(imm);
@ -22,6 +24,10 @@ namespace ChocolArm64.Instructions
if (!context.TryOptEmitSubroutineCall())
{
context.HasSlowCall = true;
context.EmitStoreState();
context.TranslateAhead(imm);
context.EmitLdarg(TranslatedSub.StateArgIdx);
@ -32,6 +38,7 @@ namespace ChocolArm64.Instructions
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdc_I8(imm);
context.EmitLdc_I4((int)CallType.Call);
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
@ -58,20 +65,6 @@ namespace ChocolArm64.Instructions
{
if (context.Tier == TranslationTier.Tier0)
{
context.Emit(OpCodes.Dup);
context.EmitSttmp();
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator),
BindingFlags.Instance |
BindingFlags.NonPublic));
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdtmp();
context.EmitPrivateCall(typeof(Translator), nameof(Translator.TranslateVirtualSubroutine));
context.Emit(OpCodes.Ret);
}
else
@ -85,8 +78,11 @@ namespace ChocolArm64.Instructions
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdtmp();
context.EmitLdc_I4(isJump
? (int)CallType.VirtualJump
: (int)CallType.VirtualCall);
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateVirtualSubroutine));
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdarg(TranslatedSub.MemoryArgIdx);

View file

@ -31,8 +31,6 @@ namespace ChocolArm64.Instructions
{
OpCodeMem64 op = (OpCodeMem64)context.CurrOp;
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
EmitLoadAddress(context);
if (signed && op.Extend64)
@ -69,7 +67,6 @@ namespace ChocolArm64.Instructions
return;
}
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdc_I8(op.Imm);
if (op.Signed)
@ -116,13 +113,10 @@ namespace ChocolArm64.Instructions
}
}
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
EmitLoadAddress(context);
EmitReadAndStore(op.Rt);
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdtmp();
context.EmitLdc_I8(1 << op.Size);
@ -137,8 +131,6 @@ namespace ChocolArm64.Instructions
{
OpCodeMem64 op = (OpCodeMem64)context.CurrOp;
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
EmitLoadAddress(context);
if (op is IOpCodeSimd64)
@ -159,8 +151,6 @@ namespace ChocolArm64.Instructions
{
OpCodeMemPair64 op = (OpCodeMemPair64)context.CurrOp;
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
EmitLoadAddress(context);
if (op is IOpCodeSimd64)
@ -174,7 +164,6 @@ namespace ChocolArm64.Instructions
EmitWriteCall(context, op.Size);
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdtmp();
context.EmitLdc_I8(1 << op.Size);

View file

@ -64,9 +64,7 @@ namespace ChocolArm64.Instructions
{
if ((mask & 1) != 0)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdtmp();
context.EmitLdc_I4(offset);
context.Emit(OpCodes.Add);
@ -129,9 +127,7 @@ namespace ChocolArm64.Instructions
{
if ((mask & 1) != 0)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdtmp();
context.EmitLdc_I4(offset);
context.Emit(OpCodes.Add);
@ -198,8 +194,6 @@ namespace ChocolArm64.Instructions
context.EmitSttmp();
}
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
if (op.Index)
{
context.EmitLdtmp();

View file

@ -23,7 +23,9 @@ namespace ChocolArm64.Instructions
public static void Clrex(ILEmitterCtx context)
{
EmitMemoryCall(context, nameof(MemoryManager.ClearExclusive));
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitPrivateCall(typeof(CpuThreadState), nameof(CpuThreadState.ClearExclusiveAddress));
}
public static void Dmb(ILEmitterCtx context) => EmitBarrier(context);
@ -37,12 +39,12 @@ namespace ChocolArm64.Instructions
private static void EmitLdr(ILEmitterCtx context, AccessType accType)
{
EmitLoad(context, accType, false);
EmitLoad(context, accType, pair: false);
}
private static void EmitLdp(ILEmitterCtx context, AccessType accType)
{
EmitLoad(context, accType, true);
EmitLoad(context, accType, pair: true);
}
private static void EmitLoad(ILEmitterCtx context, AccessType accType, bool pair)
@ -57,32 +59,121 @@ namespace ChocolArm64.Instructions
EmitBarrier(context);
}
if (exclusive)
{
EmitMemoryCall(context, nameof(MemoryManager.SetExclusive), op.Rn);
}
context.EmitLdint(op.Rn);
context.EmitSttmp();
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdtmp();
if (exclusive)
{
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdtmp();
EmitReadZxCall(context, op.Size);
context.EmitPrivateCall(typeof(CpuThreadState), nameof(CpuThreadState.SetExclusiveAddress));
}
context.EmitStintzr(op.Rt);
void WriteExclusiveValue(string propName)
{
context.Emit(OpCodes.Dup);
if (op.Size < 3)
{
context.Emit(OpCodes.Conv_U8);
}
context.EmitSttmp2();
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdtmp2();
context.EmitCallPrivatePropSet(typeof(CpuThreadState), propName);
}
if (pair)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdtmp();
context.EmitLdc_I8(1 << op.Size);
//Exclusive loads should be atomic. For pairwise loads, we need to
//read all the data at once. For a 32-bits pairwise load, we do a
//simple 64-bits load, for a 128-bits load, we need to call a special
//method to read 128-bits atomically.
if (op.Size == 2)
{
context.EmitLdtmp();
context.Emit(OpCodes.Add);
EmitReadZxCall(context, 3);
context.Emit(OpCodes.Dup);
//Mask low half.
context.Emit(OpCodes.Conv_U4);
if (exclusive)
{
WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueLow));
}
context.EmitStintzr(op.Rt);
//Shift high half.
context.EmitLsr(32);
context.Emit(OpCodes.Conv_U4);
if (exclusive)
{
WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueHigh));
}
context.EmitStintzr(op.Rt2);
}
else if (op.Size == 3)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdtmp();
context.EmitPrivateCall(typeof(MemoryManager), nameof(MemoryManager.AtomicReadInt128));
context.Emit(OpCodes.Dup);
//Load low part of the vector.
context.EmitLdc_I4(0);
context.EmitLdc_I4(3);
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorExtractIntZx));
if (exclusive)
{
WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueLow));
}
context.EmitStintzr(op.Rt);
//Load high part of the vector.
context.EmitLdc_I4(1);
context.EmitLdc_I4(3);
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorExtractIntZx));
if (exclusive)
{
WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueHigh));
}
context.EmitStintzr(op.Rt2);
}
else
{
throw new InvalidOperationException($"Invalid load size of {1 << op.Size} bytes.");
}
}
else
{
//8, 16, 32 or 64-bits (non-pairwise) load.
context.EmitLdtmp();
EmitReadZxCall(context, op.Size);
context.EmitStintzr(op.Rt2);
if (exclusive)
{
WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueLow));
}
context.EmitStintzr(op.Rt);
}
}
@ -99,12 +190,12 @@ namespace ChocolArm64.Instructions
private static void EmitStr(ILEmitterCtx context, AccessType accType)
{
EmitStore(context, accType, false);
EmitStore(context, accType, pair: false);
}
private static void EmitStp(ILEmitterCtx context, AccessType accType)
{
EmitStore(context, accType, true);
EmitStore(context, accType, pair: true);
}
private static void EmitStore(ILEmitterCtx context, AccessType accType, bool pair)
@ -119,66 +210,132 @@ namespace ChocolArm64.Instructions
EmitBarrier(context);
}
ILLabel lblEx = new ILLabel();
ILLabel lblEnd = new ILLabel();
if (exclusive)
{
EmitMemoryCall(context, nameof(MemoryManager.TestExclusive), op.Rn);
ILLabel lblEx = new ILLabel();
ILLabel lblEnd = new ILLabel();
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdint(op.Rn);
context.EmitPrivateCall(typeof(CpuThreadState), nameof(CpuThreadState.CheckExclusiveAddress));
context.Emit(OpCodes.Brtrue_S, lblEx);
context.EmitLdc_I8(1);
//Address check failed, set error right away and do not store anything.
context.EmitLdc_I4(1);
context.EmitStintzr(op.Rs);
context.Emit(OpCodes.Br_S, lblEnd);
}
context.Emit(OpCodes.Br, lblEnd);
context.MarkLabel(lblEx);
//Address check passsed.
context.MarkLabel(lblEx);
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(op.Rn);
context.EmitLdintzr(op.Rt);
EmitWriteCall(context, op.Size);
if (pair)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(op.Rn);
context.EmitLdc_I8(1 << op.Size);
context.Emit(OpCodes.Add);
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdintzr(op.Rt2);
context.EmitCallPrivatePropGet(typeof(CpuThreadState), nameof(CpuThreadState.ExclusiveValueLow));
EmitWriteCall(context, op.Size);
}
void EmitCast()
{
//The input should be always int64.
switch (op.Size)
{
case 0: context.Emit(OpCodes.Conv_U1); break;
case 1: context.Emit(OpCodes.Conv_U2); break;
case 2: context.Emit(OpCodes.Conv_U4); break;
}
}
EmitCast();
if (pair)
{
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitCallPrivatePropGet(typeof(CpuThreadState), nameof(CpuThreadState.ExclusiveValueHigh));
EmitCast();
context.EmitLdintzr(op.Rt);
EmitCast();
context.EmitLdintzr(op.Rt2);
EmitCast();
switch (op.Size)
{
case 2: context.EmitPrivateCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchange2xInt32)); break;
case 3: context.EmitPrivateCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt128)); break;
default: throw new InvalidOperationException($"Invalid store size of {1 << op.Size} bytes.");
}
}
else
{
context.EmitLdintzr(op.Rt);
EmitCast();
switch (op.Size)
{
case 0: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeByte)); break;
case 1: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt16)); break;
case 2: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt32)); break;
case 3: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt64)); break;
default: throw new InvalidOperationException($"Invalid store size of {1 << op.Size} bytes.");
}
}
//The value returned is a bool, true if the values compared
//were equal and the new value was written, false otherwise.
//We need to invert this result, as on ARM 1 indicates failure,
//and 0 success on those instructions.
context.EmitLdc_I4(1);
context.Emit(OpCodes.Xor);
context.Emit(OpCodes.Dup);
context.Emit(OpCodes.Conv_U8);
if (exclusive)
{
context.EmitLdc_I8(0);
context.EmitStintzr(op.Rs);
EmitMemoryCall(context, nameof(MemoryManager.ClearExclusiveForStore));
//Only clear the exclusive monitor if the store was successful (Rs = false).
context.Emit(OpCodes.Brtrue_S, lblEnd);
Clrex(context);
context.MarkLabel(lblEnd);
}
context.MarkLabel(lblEnd);
}
private static void EmitMemoryCall(ILEmitterCtx context, string name, int rn = -1)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitCallPropGet(typeof(CpuThreadState), nameof(CpuThreadState.Core));
if (rn != -1)
else
{
context.EmitLdint(rn);
}
void EmitWriteCall(int rt, long offset)
{
context.EmitLdint(op.Rn);
context.EmitCall(typeof(MemoryManager), name);
if (offset != 0)
{
context.EmitLdc_I8(offset);
context.Emit(OpCodes.Add);
}
context.EmitLdintzr(rt);
InstEmitMemoryHelper.EmitWriteCall(context, op.Size);
}
EmitWriteCall(op.Rt, 0);
if (pair)
{
EmitWriteCall(op.Rt2, 1 << op.Size);
}
}
}
private static void EmitBarrier(ILEmitterCtx context)

View file

@ -1,13 +1,20 @@
using ChocolArm64.Decoders;
using ChocolArm64.Memory;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
using System.Runtime.Intrinsics.X86;
namespace ChocolArm64.Instructions
{
static class InstEmitMemoryHelper
{
private static int _tempIntAddress = ILEmitterCtx.GetIntTempIndex();
private static int _tempIntValue = ILEmitterCtx.GetIntTempIndex();
private static int _tempIntPtAddr = ILEmitterCtx.GetIntTempIndex();
private static int _tempVecValue = ILEmitterCtx.GetVecTempIndex();
private enum Extension
{
Zx,
@ -32,9 +39,10 @@ namespace ChocolArm64.Instructions
private static void EmitReadCall(ILEmitterCtx context, Extension ext, int size)
{
bool isSimd = GetIsSimd(context);
//Save the address into a temp.
context.EmitStint(_tempIntAddress);
string name = null;
bool isSimd = IsSimd(context);
if (size < 0 || size > (isSimd ? 4 : 3))
{
@ -43,28 +51,27 @@ namespace ChocolArm64.Instructions
if (isSimd)
{
switch (size)
if (context.Tier == TranslationTier.Tier0 || !Sse2.IsSupported || size < 2)
{
case 0: name = nameof(MemoryManager.ReadVector8); break;
case 1: name = nameof(MemoryManager.ReadVector16); break;
case 2: name = nameof(MemoryManager.ReadVector32); break;
case 3: name = nameof(MemoryManager.ReadVector64); break;
case 4: name = nameof(MemoryManager.ReadVector128); break;
EmitReadVectorFallback(context, size);
}
else
{
EmitReadVector(context, size);
}
}
else
{
switch (size)
if (context.Tier == TranslationTier.Tier0)
{
case 0: name = nameof(MemoryManager.ReadByte); break;
case 1: name = nameof(MemoryManager.ReadUInt16); break;
case 2: name = nameof(MemoryManager.ReadUInt32); break;
case 3: name = nameof(MemoryManager.ReadUInt64); break;
EmitReadIntFallback(context, size);
}
else
{
EmitReadInt(context, size);
}
}
context.EmitCall(typeof(MemoryManager), name);
if (!isSimd)
{
if (ext == Extension.Sx32 ||
@ -89,50 +96,379 @@ namespace ChocolArm64.Instructions
public static void EmitWriteCall(ILEmitterCtx context, int size)
{
bool isSimd = GetIsSimd(context);
bool isSimd = IsSimd(context);
string name = null;
//Save the value into a temp.
if (isSimd)
{
context.EmitStvec(_tempVecValue);
}
else
{
context.EmitStint(_tempIntValue);
}
//Save the address into a temp.
context.EmitStint(_tempIntAddress);
if (size < 0 || size > (isSimd ? 4 : 3))
{
throw new ArgumentOutOfRangeException(nameof(size));
}
if (size < 3 && !isSimd)
{
context.Emit(OpCodes.Conv_I4);
}
if (isSimd)
{
switch (size)
if (context.Tier == TranslationTier.Tier0 || !Sse2.IsSupported || size < 2)
{
case 0: name = nameof(MemoryManager.WriteVector8); break;
case 1: name = nameof(MemoryManager.WriteVector16); break;
case 2: name = nameof(MemoryManager.WriteVector32); break;
case 3: name = nameof(MemoryManager.WriteVector64); break;
case 4: name = nameof(MemoryManager.WriteVector128); break;
EmitWriteVectorFallback(context, size);
}
else
{
EmitWriteVector(context, size);
}
}
else
{
switch (size)
if (context.Tier == TranslationTier.Tier0)
{
case 0: name = nameof(MemoryManager.WriteByte); break;
case 1: name = nameof(MemoryManager.WriteUInt16); break;
case 2: name = nameof(MemoryManager.WriteUInt32); break;
case 3: name = nameof(MemoryManager.WriteUInt64); break;
EmitWriteIntFallback(context, size);
}
else
{
EmitWriteInt(context, size);
}
}
context.EmitCall(typeof(MemoryManager), name);
}
private static bool GetIsSimd(ILEmitterCtx context)
private static bool IsSimd(ILEmitterCtx context)
{
return context.CurrOp is IOpCodeSimd64 &&
!(context.CurrOp is OpCodeSimdMemMs64 ||
context.CurrOp is OpCodeSimdMemSs64);
}
private static void EmitReadInt(ILEmitterCtx context, int size)
{
EmitAddressCheck(context, size);
ILLabel lblFastPath = new ILLabel();
ILLabel lblSlowPath = new ILLabel();
ILLabel lblEnd = new ILLabel();
context.Emit(OpCodes.Brfalse_S, lblFastPath);
context.MarkLabel(lblSlowPath);
EmitReadIntFallback(context, size);
context.Emit(OpCodes.Br, lblEnd);
context.MarkLabel(lblFastPath);
EmitPtPointerLoad(context, lblSlowPath);
switch (size)
{
case 0: context.Emit(OpCodes.Ldind_U1); break;
case 1: context.Emit(OpCodes.Ldind_U2); break;
case 2: context.Emit(OpCodes.Ldind_U4); break;
case 3: context.Emit(OpCodes.Ldind_I8); break;
}
context.MarkLabel(lblEnd);
}
private static void EmitReadVector(ILEmitterCtx context, int size)
{
EmitAddressCheck(context, size);
ILLabel lblFastPath = new ILLabel();
ILLabel lblSlowPath = new ILLabel();
ILLabel lblEnd = new ILLabel();
context.Emit(OpCodes.Brfalse_S, lblFastPath);
context.MarkLabel(lblSlowPath);
EmitReadVectorFallback(context, size);
context.Emit(OpCodes.Br, lblEnd);
context.MarkLabel(lblFastPath);
EmitPtPointerLoad(context, lblSlowPath);
switch (size)
{
case 2: context.EmitCall(typeof(Sse), nameof(Sse.LoadScalarVector128)); break;
case 3:
{
Type[] types = new Type[] { typeof(double*) };
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.LoadScalarVector128), types));
break;
}
case 4: context.EmitCall(typeof(Sse), nameof(Sse.LoadAlignedVector128)); break;
throw new InvalidOperationException($"Invalid vector load size of {1 << size} bytes.");
}
context.MarkLabel(lblEnd);
}
private static void EmitWriteInt(ILEmitterCtx context, int size)
{
EmitAddressCheck(context, size);
ILLabel lblFastPath = new ILLabel();
ILLabel lblSlowPath = new ILLabel();
ILLabel lblEnd = new ILLabel();
context.Emit(OpCodes.Brfalse_S, lblFastPath);
context.MarkLabel(lblSlowPath);
EmitWriteIntFallback(context, size);
context.Emit(OpCodes.Br, lblEnd);
context.MarkLabel(lblFastPath);
EmitPtPointerLoad(context, lblSlowPath);
context.EmitLdint(_tempIntValue);
if (size < 3)
{
context.Emit(OpCodes.Conv_U4);
}
switch (size)
{
case 0: context.Emit(OpCodes.Stind_I1); break;
case 1: context.Emit(OpCodes.Stind_I2); break;
case 2: context.Emit(OpCodes.Stind_I4); break;
case 3: context.Emit(OpCodes.Stind_I8); break;
}
context.MarkLabel(lblEnd);
}
private static void EmitWriteVector(ILEmitterCtx context, int size)
{
EmitAddressCheck(context, size);
ILLabel lblFastPath = new ILLabel();
ILLabel lblSlowPath = new ILLabel();
ILLabel lblEnd = new ILLabel();
context.Emit(OpCodes.Brfalse_S, lblFastPath);
context.MarkLabel(lblSlowPath);
EmitWriteVectorFallback(context, size);
context.Emit(OpCodes.Br, lblEnd);
context.MarkLabel(lblFastPath);
EmitPtPointerLoad(context, lblSlowPath);
context.EmitLdvec(_tempVecValue);
switch (size)
{
case 2: context.EmitCall(typeof(Sse), nameof(Sse.StoreScalar)); break;
case 3: context.EmitCall(typeof(Sse2), nameof(Sse2.StoreScalar)); break;
case 4: context.EmitCall(typeof(Sse), nameof(Sse.StoreAligned)); break;
default: throw new InvalidOperationException($"Invalid vector store size of {1 << size} bytes.");
}
context.MarkLabel(lblEnd);
}
private static void EmitAddressCheck(ILEmitterCtx context, int size)
{
long addressCheckMask = ~(context.Memory.AddressSpaceSize - 1);
addressCheckMask |= (1u << size) - 1;
context.EmitLdint(_tempIntAddress);
context.EmitLdc_I(addressCheckMask);
context.Emit(OpCodes.And);
}
private static void EmitPtPointerLoad(ILEmitterCtx context, ILLabel lblFallbackPath)
{
context.EmitLdc_I8(context.Memory.PageTable.ToInt64());
context.Emit(OpCodes.Conv_I);
int bit = MemoryManager.PageBits;
do
{
context.EmitLdint(_tempIntAddress);
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
{
context.Emit(OpCodes.Conv_U8);
}
context.EmitLsr(bit);
bit += context.Memory.PtLevelBits;
if (bit < context.Memory.AddressSpaceBits)
{
context.EmitLdc_I8(context.Memory.PtLevelMask);
context.Emit(OpCodes.And);
}
context.EmitLdc_I8(IntPtr.Size);
context.Emit(OpCodes.Mul);
context.Emit(OpCodes.Conv_I);
context.Emit(OpCodes.Add);
context.Emit(OpCodes.Ldind_I);
}
while (bit < context.Memory.AddressSpaceBits);
if (!context.Memory.HasWriteWatchSupport)
{
context.Emit(OpCodes.Conv_U8);
context.EmitStint(_tempIntPtAddr);
context.EmitLdint(_tempIntPtAddr);
context.EmitLdc_I8(MemoryManager.PteFlagsMask);
context.Emit(OpCodes.And);
context.Emit(OpCodes.Brtrue, lblFallbackPath);
context.EmitLdint(_tempIntPtAddr);
context.Emit(OpCodes.Conv_I);
}
context.EmitLdint(_tempIntAddress);
context.EmitLdc_I(MemoryManager.PageMask);
context.Emit(OpCodes.And);
context.Emit(OpCodes.Conv_I);
context.Emit(OpCodes.Add);
}
private static void EmitReadIntFallback(ILEmitterCtx context, int size)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(_tempIntAddress);
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
{
context.Emit(OpCodes.Conv_U8);
}
string fallbackMethodName = null;
switch (size)
{
case 0: fallbackMethodName = nameof(MemoryManager.ReadByte); break;
case 1: fallbackMethodName = nameof(MemoryManager.ReadUInt16); break;
case 2: fallbackMethodName = nameof(MemoryManager.ReadUInt32); break;
case 3: fallbackMethodName = nameof(MemoryManager.ReadUInt64); break;
}
context.EmitCall(typeof(MemoryManager), fallbackMethodName);
}
private static void EmitReadVectorFallback(ILEmitterCtx context, int size)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(_tempIntAddress);
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
{
context.Emit(OpCodes.Conv_U8);
}
string fallbackMethodName = null;
switch (size)
{
case 0: fallbackMethodName = nameof(MemoryManager.ReadVector8); break;
case 1: fallbackMethodName = nameof(MemoryManager.ReadVector16); break;
case 2: fallbackMethodName = nameof(MemoryManager.ReadVector32); break;
case 3: fallbackMethodName = nameof(MemoryManager.ReadVector64); break;
case 4: fallbackMethodName = nameof(MemoryManager.ReadVector128); break;
}
context.EmitCall(typeof(MemoryManager), fallbackMethodName);
}
private static void EmitWriteIntFallback(ILEmitterCtx context, int size)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(_tempIntAddress);
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
{
context.Emit(OpCodes.Conv_U8);
}
context.EmitLdint(_tempIntValue);
if (size < 3)
{
context.Emit(OpCodes.Conv_U4);
}
string fallbackMethodName = null;
switch (size)
{
case 0: fallbackMethodName = nameof(MemoryManager.WriteByte); break;
case 1: fallbackMethodName = nameof(MemoryManager.WriteUInt16); break;
case 2: fallbackMethodName = nameof(MemoryManager.WriteUInt32); break;
case 3: fallbackMethodName = nameof(MemoryManager.WriteUInt64); break;
}
context.EmitCall(typeof(MemoryManager), fallbackMethodName);
}
private static void EmitWriteVectorFallback(ILEmitterCtx context, int size)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(_tempIntAddress);
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
{
context.Emit(OpCodes.Conv_U8);
}
context.EmitLdvec(_tempVecValue);
string fallbackMethodName = null;
switch (size)
{
case 0: fallbackMethodName = nameof(MemoryManager.WriteVector8); break;
case 1: fallbackMethodName = nameof(MemoryManager.WriteVector16); break;
case 2: fallbackMethodName = nameof(MemoryManager.WriteVector32); break;
case 3: fallbackMethodName = nameof(MemoryManager.WriteVector64); break;
case 4: fallbackMethodName = nameof(MemoryManager.WriteVector128); break;
}
context.EmitCall(typeof(MemoryManager), fallbackMethodName);
}
}
}

View file

@ -194,8 +194,7 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesSubAndNot));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
context.EmitStvec(op.Rd);
@ -209,14 +208,13 @@ namespace ChocolArm64.Instructions
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesSubAndNot));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
@ -252,8 +250,7 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesSubAndNot));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
context.EmitStvec(op.Rd);
@ -270,14 +267,13 @@ namespace ChocolArm64.Instructions
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAndNot));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
}
}
else
@ -321,11 +317,11 @@ namespace ChocolArm64.Instructions
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
EmitLdvecWithCastToDouble(context, op.Rn);
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
@ -374,11 +370,11 @@ namespace ChocolArm64.Instructions
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
EmitLdvecWithCastToDouble(context, op.Rn);
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
}
}
else
@ -445,12 +441,12 @@ namespace ChocolArm64.Instructions
{
Type[] typesAddH = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
EmitLdvecWithCastToDouble(context, op.Rn);
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
@ -536,14 +532,14 @@ namespace ChocolArm64.Instructions
{
Type[] typesMulAdd = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
EmitLdvecWithCastToDouble(context, op.Ra);
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.EmitLdvec(op.Ra);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AddScalar), typesMulAdd));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
@ -718,14 +714,14 @@ namespace ChocolArm64.Instructions
{
Type[] typesMulAdd = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
EmitLdvecWithCastToDouble(context, op.Rd);
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
}
}
else
@ -751,18 +747,14 @@ namespace ChocolArm64.Instructions
Type[] typesMulAdd = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd));
context.EmitStvec(op.Rd);
@ -776,21 +768,17 @@ namespace ChocolArm64.Instructions
Type[] typesSfl = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>), typeof(byte) };
Type[] typesMulAdd = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
EmitLdvecWithCastToDouble(context, op.Rd);
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
}
}
else
@ -841,14 +829,14 @@ namespace ChocolArm64.Instructions
{
Type[] typesMulSub = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
EmitLdvecWithCastToDouble(context, op.Rd);
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
}
}
else
@ -874,17 +862,13 @@ namespace ChocolArm64.Instructions
Type[] typesMulSub = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub));
context.EmitStvec(op.Rd);
@ -899,21 +883,17 @@ namespace ChocolArm64.Instructions
Type[] typesSfl = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>), typeof(byte) };
Type[] typesMulSub = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
EmitLdvecWithCastToDouble(context, op.Rd);
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
}
}
else
@ -950,14 +930,14 @@ namespace ChocolArm64.Instructions
{
Type[] typesMulSub = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
EmitLdvecWithCastToDouble(context, op.Ra);
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.EmitLdvec(op.Ra);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesMulSub));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
@ -1020,13 +1000,11 @@ namespace ChocolArm64.Instructions
Type[] typesMul = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMul));
context.EmitStvec(op.Rd);
@ -1041,17 +1019,15 @@ namespace ChocolArm64.Instructions
Type[] typesSfl = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>), typeof(byte) };
Type[] typesMul = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMul));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
}
}
else
@ -1125,11 +1101,11 @@ namespace ChocolArm64.Instructions
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
EmitLdvecWithCastToDouble(context, op.Rn);
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXor));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
@ -1175,11 +1151,11 @@ namespace ChocolArm64.Instructions
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
EmitLdvecWithCastToDouble(context, op.Rn);
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXor));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
}
}
else
@ -1242,8 +1218,7 @@ namespace ChocolArm64.Instructions
int sizeF = op.Size & 1;
if (Optimizations.FastFP && Optimizations.UseSse
&& sizeF == 0)
if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
{
EmitScalarSseOrSse2OpF(context, nameof(Sse.ReciprocalScalar));
}
@ -1262,8 +1237,7 @@ namespace ChocolArm64.Instructions
int sizeF = op.Size & 1;
if (Optimizations.FastFP && Optimizations.UseSse
&& sizeF == 0)
if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
{
EmitVectorSseOrSse2OpF(context, nameof(Sse.Reciprocal));
}
@ -1310,13 +1284,13 @@ namespace ChocolArm64.Instructions
context.EmitLdc_R8(2d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesMulSub));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
@ -1367,13 +1341,13 @@ namespace ChocolArm64.Instructions
context.EmitLdc_R8(2d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
}
}
else
@ -1579,8 +1553,7 @@ namespace ChocolArm64.Instructions
int sizeF = op.Size & 1;
if (Optimizations.FastFP && Optimizations.UseSse
&& sizeF == 0)
if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
{
EmitScalarSseOrSse2OpF(context, nameof(Sse.ReciprocalSqrtScalar));
}
@ -1599,8 +1572,7 @@ namespace ChocolArm64.Instructions
int sizeF = op.Size & 1;
if (Optimizations.FastFP && Optimizations.UseSse
&& sizeF == 0)
if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
{
EmitVectorSseOrSse2OpF(context, nameof(Sse.ReciprocalSqrt));
}
@ -1654,14 +1626,14 @@ namespace ChocolArm64.Instructions
context.EmitLdc_R8(3d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
@ -1719,14 +1691,14 @@ namespace ChocolArm64.Instructions
context.EmitLdc_R8(3d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
}
}
else
@ -1864,11 +1836,11 @@ namespace ChocolArm64.Instructions
VectorHelper.EmitCall(context, namesSzv[op.Size]);
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -1953,14 +1925,14 @@ namespace ChocolArm64.Instructions
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
@ -1969,7 +1941,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
context.EmitStvec(op.Rd);
}
else
{
@ -1999,9 +1971,8 @@ namespace ChocolArm64.Instructions
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1);
EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
@ -2010,7 +1981,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
context.EmitStvec(op.Rd);
}
else
{
@ -2027,12 +1998,12 @@ namespace ChocolArm64.Instructions
Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAndXorAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitStvectmp2();
@ -2046,10 +2017,9 @@ namespace ChocolArm64.Instructions
context.EmitLdc_I4(1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -2083,23 +2053,21 @@ namespace ChocolArm64.Instructions
context.EmitStvectmp();
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdvectmp();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAddSub));
context.Emit(OpCodes.Dup);
EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rm);
context.EmitLdvectmp();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAddSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAddSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesAddSub));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -2128,12 +2096,12 @@ namespace ChocolArm64.Instructions
Type typeSse = op.Size == 1 ? typeof(Sse2) : typeof(Sse41);
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -2169,12 +2137,12 @@ namespace ChocolArm64.Instructions
Type typeSse = op.Size == 1 ? typeof(Sse2) : typeof(Sse41);
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeSse.GetMethod(nameof(Sse2.Min), typesMin));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -2219,16 +2187,15 @@ namespace ChocolArm64.Instructions
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithSignedCast(context, op.Rd, op.Size + 1);
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
@ -2239,7 +2206,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
context.EmitStvec(op.Rd);
}
else
{
@ -2279,16 +2246,15 @@ namespace ChocolArm64.Instructions
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithSignedCast(context, op.Rd, op.Size + 1);
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
@ -2299,7 +2265,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
context.EmitStvec(op.Rd);
}
else
{
@ -2426,20 +2392,19 @@ namespace ChocolArm64.Instructions
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdvectmp();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAdd));
EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rm);
context.EmitLdvectmp();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesSubAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesSubAdd));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -2478,14 +2443,14 @@ namespace ChocolArm64.Instructions
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
@ -2494,7 +2459,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub));
EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
context.EmitStvec(op.Rd);
}
else
{
@ -2519,9 +2484,8 @@ namespace ChocolArm64.Instructions
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1);
EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
@ -2530,7 +2494,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub));
EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
context.EmitStvec(op.Rd);
}
else
{
@ -2632,14 +2596,14 @@ namespace ChocolArm64.Instructions
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
@ -2648,7 +2612,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
context.EmitStvec(op.Rd);
}
else
{
@ -2697,9 +2661,8 @@ namespace ChocolArm64.Instructions
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size + 1);
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
@ -2708,7 +2671,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
context.EmitStvec(op.Rd);
}
else
{
@ -2725,12 +2688,12 @@ namespace ChocolArm64.Instructions
Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAndXorAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitStvectmp2();
@ -2744,10 +2707,9 @@ namespace ChocolArm64.Instructions
context.EmitLdc_I4(1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -2774,16 +2736,15 @@ namespace ChocolArm64.Instructions
{
Type[] typesAvgSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvgSub));
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvgSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesAvgSub));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -2812,12 +2773,12 @@ namespace ChocolArm64.Instructions
Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -2853,12 +2814,12 @@ namespace ChocolArm64.Instructions
Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeSse.GetMethod(nameof(Sse2.Min), typesMin));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -2903,16 +2864,15 @@ namespace ChocolArm64.Instructions
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithUnsignedCast(context, op.Rd, op.Size + 1);
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
@ -2923,7 +2883,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
context.EmitStvec(op.Rd);
}
else
{
@ -2963,16 +2923,15 @@ namespace ChocolArm64.Instructions
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithUnsignedCast(context, op.Rd, op.Size + 1);
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
@ -2983,7 +2942,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
context.EmitStvec(op.Rd);
}
else
{
@ -3052,12 +3011,12 @@ namespace ChocolArm64.Instructions
{
Type[] typesAvg = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -3106,14 +3065,14 @@ namespace ChocolArm64.Instructions
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
@ -3122,7 +3081,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
context.EmitStvec(op.Rd);
}
else
{
@ -3147,9 +3106,8 @@ namespace ChocolArm64.Instructions
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size + 1);
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
@ -3158,7 +3116,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
context.EmitStvec(op.Rd);
}
else
{

View file

@ -382,7 +382,7 @@ namespace ChocolArm64.Instructions
ILLabel lblNaN = new ILLabel();
ILLabel lblEnd = new ILLabel();
EmitLdvecWithCastToDouble(context, op.Rn);
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
@ -393,7 +393,7 @@ namespace ChocolArm64.Instructions
}
else
{
EmitLdvecWithCastToDouble(context, op.Rm);
context.EmitLdvec(op.Rm);
}
context.Emit(OpCodes.Dup);
@ -656,12 +656,12 @@ namespace ChocolArm64.Instructions
if (!isLeOrLt)
{
EmitLdvecWithCastToDouble(context, op.Rn);
context.EmitLdvec(op.Rn);
}
if (op is OpCodeSimdReg64 binOp)
{
EmitLdvecWithCastToDouble(context, binOp.Rm);
context.EmitLdvec(binOp.Rm);
}
else
{
@ -670,12 +670,12 @@ namespace ChocolArm64.Instructions
if (isLeOrLt)
{
EmitLdvecWithCastToDouble(context, op.Rn);
context.EmitLdvec(op.Rn);
}
context.EmitCall(typeof(Sse2).GetMethod(name, types));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
if (scalar)
{

View file

@ -23,7 +23,7 @@ namespace ChocolArm64.Instructions
//Double -> Single.
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
EmitLdvecWithCastToDouble(context, op.Rn);
context.EmitLdvec(op.Rn);
Type[] types = new Type[] { typeof(Vector128<float>), typeof(Vector128<double>) };
@ -42,7 +42,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Double), types));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
}
else
{
@ -91,7 +91,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Double), typesCvt));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
}
else
{
@ -154,7 +154,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
EmitLdvecWithCastToDouble(context, op.Rn);
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt));
context.Emit(OpCodes.Dup);
@ -244,7 +244,7 @@ namespace ChocolArm64.Instructions
public static void Fcvtzs_Gp_Fixed(ILEmitterCtx context)
{
EmitFcvtzs_Gp_Fix(context);
EmitFcvtzs_Gp_Fixed(context);
}
public static void Fcvtzs_S(ILEmitterCtx context)
@ -264,7 +264,7 @@ namespace ChocolArm64.Instructions
public static void Fcvtzu_Gp_Fixed(ILEmitterCtx context)
{
EmitFcvtzu_Gp_Fix(context);
EmitFcvtzu_Gp_Fixed(context);
}
public static void Fcvtzu_S(ILEmitterCtx context)
@ -293,6 +293,24 @@ namespace ChocolArm64.Instructions
EmitScalarSetF(context, op.Rd, op.Size);
}
public static void Scvtf_Gp_Fixed(ILEmitterCtx context)
{
OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
context.EmitLdintzr(op.Rn);
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
{
context.Emit(OpCodes.Conv_I4);
}
EmitFloatCast(context, op.Size);
EmitI2fFBitsMul(context, op.Size, op.FBits);
EmitScalarSetF(context, op.Rd, op.Size);
}
public static void Scvtf_S(ILEmitterCtx context)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
@ -314,7 +332,7 @@ namespace ChocolArm64.Instructions
{
Type[] typesCvt = new Type[] { typeof(Vector128<int>) };
EmitLdvecWithSignedCast(context, op.Rn, 2);
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt));
@ -349,6 +367,26 @@ namespace ChocolArm64.Instructions
EmitScalarSetF(context, op.Rd, op.Size);
}
public static void Ucvtf_Gp_Fixed(ILEmitterCtx context)
{
OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
context.EmitLdintzr(op.Rn);
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
{
context.Emit(OpCodes.Conv_U4);
}
context.Emit(OpCodes.Conv_R_Un);
EmitFloatCast(context, op.Size);
EmitI2fFBitsMul(context, op.Size, op.FBits);
EmitScalarSetF(context, op.Rd, op.Size);
}
public static void Ucvtf_S(ILEmitterCtx context)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
@ -367,32 +405,6 @@ namespace ChocolArm64.Instructions
EmitVectorCvtf(context, signed: false);
}
private static int GetFBits(ILEmitterCtx context)
{
if (context.CurrOp is OpCodeSimdShImm64 op)
{
return GetImmShr(op);
}
return 0;
}
private static void EmitFloatCast(ILEmitterCtx context, int size)
{
if (size == 0)
{
context.Emit(OpCodes.Conv_R4);
}
else if (size == 1)
{
context.Emit(OpCodes.Conv_R8);
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
}
private static void EmitFcvtn(ILEmitterCtx context, bool signed, bool scalar)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
@ -476,17 +488,17 @@ namespace ChocolArm64.Instructions
context.EmitStintzr(op.Rd);
}
private static void EmitFcvtzs_Gp_Fix(ILEmitterCtx context)
private static void EmitFcvtzs_Gp_Fixed(ILEmitterCtx context)
{
EmitFcvtz__Gp_Fix(context, true);
EmitFcvtz__Gp_Fixed(context, true);
}
private static void EmitFcvtzu_Gp_Fix(ILEmitterCtx context)
private static void EmitFcvtzu_Gp_Fixed(ILEmitterCtx context)
{
EmitFcvtz__Gp_Fix(context, false);
EmitFcvtz__Gp_Fixed(context, false);
}
private static void EmitFcvtz__Gp_Fix(ILEmitterCtx context, bool signed)
private static void EmitFcvtz__Gp_Fixed(ILEmitterCtx context, bool signed)
{
OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
@ -530,9 +542,7 @@ namespace ChocolArm64.Instructions
context.Emit(OpCodes.Conv_R_Un);
}
context.Emit(sizeF == 0
? OpCodes.Conv_R4
: OpCodes.Conv_R8);
EmitFloatCast(context, sizeF);
EmitI2fFBitsMul(context, sizeF, fBits);
@ -644,6 +654,32 @@ namespace ChocolArm64.Instructions
}
}
private static int GetFBits(ILEmitterCtx context)
{
if (context.CurrOp is OpCodeSimdShImm64 op)
{
return GetImmShr(op);
}
return 0;
}
private static void EmitFloatCast(ILEmitterCtx context, int size)
{
if (size == 0)
{
context.Emit(OpCodes.Conv_R4);
}
else if (size == 1)
{
context.Emit(OpCodes.Conv_R8);
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
}
private static void EmitScalarFcvts(ILEmitterCtx context, int size, int fBits)
{
if (size < 0 || size > 1)

View file

@ -86,13 +86,13 @@ namespace ChocolArm64.Instructions
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rn);
Type baseType = VectorIntTypesPerSizeLog2[op.Size];
if (op is OpCodeSimdReg64 binOp)
{
EmitLdvecWithSignedCast(context, binOp.Rm, op.Size);
context.EmitLdvec(binOp.Rm);
context.EmitCall(type.GetMethod(name, new Type[] { baseType, baseType }));
}
@ -101,7 +101,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(type.GetMethod(name, new Type[] { baseType }));
}
EmitStvecWithSignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -109,80 +109,6 @@ namespace ChocolArm64.Instructions
}
}
public static void EmitLdvecWithSignedCast(ILEmitterCtx context, int reg, int size)
{
context.EmitLdvec(reg);
switch (size)
{
case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToSByte)); break;
case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToInt16)); break;
case 2: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToInt32)); break;
case 3: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToInt64)); break;
default: throw new ArgumentOutOfRangeException(nameof(size));
}
}
public static void EmitLdvecWithCastToDouble(ILEmitterCtx context, int reg)
{
context.EmitLdvec(reg);
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToDouble));
}
public static void EmitStvecWithCastFromDouble(ILEmitterCtx context, int reg)
{
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleToSingle));
context.EmitStvec(reg);
}
public static void EmitLdvecWithUnsignedCast(ILEmitterCtx context, int reg, int size)
{
context.EmitLdvec(reg);
switch (size)
{
case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToByte)); break;
case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToUInt16)); break;
case 2: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToUInt32)); break;
case 3: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToUInt64)); break;
default: throw new ArgumentOutOfRangeException(nameof(size));
}
}
public static void EmitStvecWithSignedCast(ILEmitterCtx context, int reg, int size)
{
switch (size)
{
case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSByteToSingle)); break;
case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt16ToSingle)); break;
case 2: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt32ToSingle)); break;
case 3: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt64ToSingle)); break;
default: throw new ArgumentOutOfRangeException(nameof(size));
}
context.EmitStvec(reg);
}
public static void EmitStvecWithUnsignedCast(ILEmitterCtx context, int reg, int size)
{
switch (size)
{
case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorByteToSingle)); break;
case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorUInt16ToSingle)); break;
case 2: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorUInt32ToSingle)); break;
case 3: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorUInt64ToSingle)); break;
default: throw new ArgumentOutOfRangeException(nameof(size));
}
context.EmitStvec(reg);
}
public static void EmitScalarSseOrSse2OpF(ILEmitterCtx context, string name)
{
EmitSseOrSse2OpF(context, name, true);
@ -199,17 +125,7 @@ namespace ChocolArm64.Instructions
int sizeF = op.Size & 1;
void Ldvec(int reg)
{
context.EmitLdvec(reg);
if (sizeF == 1)
{
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToDouble));
}
}
Ldvec(op.Rn);
context.EmitLdvec(op.Rn);
Type type;
Type baseType;
@ -227,7 +143,7 @@ namespace ChocolArm64.Instructions
if (op is OpCodeSimdReg64 binOp)
{
Ldvec(binOp.Rm);
context.EmitLdvec(binOp.Rm);
context.EmitCall(type.GetMethod(name, new Type[] { baseType, baseType }));
}
@ -236,11 +152,6 @@ namespace ChocolArm64.Instructions
context.EmitCall(type.GetMethod(name, new Type[] { baseType }));
}
if (sizeF == 1)
{
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleToSingle));
}
context.EmitStvec(op.Rd);
if (scalar)
@ -1014,12 +925,12 @@ namespace ChocolArm64.Instructions
{
Type[] types = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
EmitLdvecWithCastToDouble(context, op.Rn);
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
EmitLdvecWithCastToDouble(context, op.Rm);
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitStvectmp2();
@ -1033,7 +944,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(name, types));
EmitStvecWithCastFromDouble(context, op.Rd);
context.EmitStvec(op.Rd);
}
}
@ -1277,13 +1188,9 @@ namespace ChocolArm64.Instructions
}
// TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
public static void EmitSatQ(
ILEmitterCtx context,
int sizeDst,
bool signedSrc,
bool signedDst)
public static void EmitSatQ(ILEmitterCtx context, int sizeDst, bool signedSrc, bool signedDst)
{
if (sizeDst > 2)
if ((uint)sizeDst > 2)
{
throw new ArgumentOutOfRangeException(nameof(sizeDst));
}

View file

@ -32,12 +32,12 @@ namespace ChocolArm64.Instructions
Type[] typesAndNot = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
EmitLdvecWithUnsignedCast(context, op.Rm, 0);
EmitLdvecWithUnsignedCast(context, op.Rn, 0);
context.EmitLdvec(op.Rm);
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
EmitStvecWithUnsignedCast(context, op.Rd, 0);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -83,16 +83,16 @@ namespace ChocolArm64.Instructions
string nameAndNot = notRm ? nameof(Sse2.AndNot) : nameof(Sse2.And);
EmitLdvecWithUnsignedCast(context, op.Rd, 0);
EmitLdvecWithUnsignedCast(context, op.Rm, 0);
EmitLdvecWithUnsignedCast(context, op.Rn, 0);
EmitLdvecWithUnsignedCast(context, op.Rd, 0);
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rm);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rd);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAndNot));
context.EmitCall(typeof(Sse2).GetMethod(nameAndNot, typesXorAndNot));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAndNot));
EmitStvecWithUnsignedCast(context, op.Rd, 0);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -141,20 +141,20 @@ namespace ChocolArm64.Instructions
Type[] typesXorAnd = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
EmitLdvecWithUnsignedCast(context, op.Rm, 0);
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
EmitLdvecWithUnsignedCast(context, op.Rn, 0);
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd));
EmitLdvecWithUnsignedCast(context, op.Rd, 0);
context.EmitLdvec(op.Rd);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesXorAnd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd));
EmitStvecWithUnsignedCast(context, op.Rd, 0);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -199,14 +199,14 @@ namespace ChocolArm64.Instructions
Type[] typesSav = new Type[] { typeof(byte) };
Type[] typesAndNot = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
EmitLdvecWithUnsignedCast(context, op.Rn, 0);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(byte.MaxValue);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
EmitStvecWithUnsignedCast(context, op.Rd, 0);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -228,8 +228,8 @@ namespace ChocolArm64.Instructions
Type[] typesSav = new Type[] { typeof(byte) };
Type[] typesAndNotOr = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
EmitLdvecWithUnsignedCast(context, op.Rn, 0);
EmitLdvecWithUnsignedCast(context, op.Rm, 0);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(byte.MaxValue);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
@ -237,7 +237,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNotOr));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndNotOr));
EmitStvecWithUnsignedCast(context, op.Rd, 0);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -305,7 +305,7 @@ namespace ChocolArm64.Instructions
Type[] typesSve = new Type[] { typeof(long), typeof(long) };
Type[] typesSfl = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
EmitLdvecWithSignedCast(context, op.Rn, 0); // value
context.EmitLdvec(op.Rn); // value
context.EmitLdc_I8(14L << 56 | 15L << 48 | 12L << 40 | 13L << 32 | 10L << 24 | 11L << 16 | 08L << 8 | 09L << 0); // maskE1
context.EmitLdc_I8(06L << 56 | 07L << 48 | 04L << 40 | 05L << 32 | 02L << 24 | 03L << 16 | 00L << 8 | 01L << 0); // maskE0
@ -314,7 +314,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
EmitStvecWithSignedCast(context, op.Rd, 0);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -336,7 +336,7 @@ namespace ChocolArm64.Instructions
Type[] typesSve = new Type[] { typeof(long), typeof(long) };
Type[] typesSfl = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value
context.EmitLdvec(op.Rn); // value
if (op.Size == 0)
{
@ -353,7 +353,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -375,7 +375,7 @@ namespace ChocolArm64.Instructions
Type[] typesSve = new Type[] { typeof(long), typeof(long) };
Type[] typesSfl = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value
context.EmitLdvec(op.Rn); // value
if (op.Size == 0)
{
@ -397,7 +397,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{

View file

@ -45,7 +45,6 @@ namespace ChocolArm64.Instructions
if (isLoad)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(op.Rn);
context.EmitLdc_I8(offset);
@ -62,7 +61,6 @@ namespace ChocolArm64.Instructions
}
else
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(op.Rn);
context.EmitLdc_I8(offset);
@ -90,7 +88,6 @@ namespace ChocolArm64.Instructions
void EmitMemAddress()
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(op.Rn);
context.EmitLdc_I8(offset);

View file

@ -59,7 +59,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
}
else
{
@ -108,7 +108,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
}
else
{
@ -138,7 +138,7 @@ namespace ChocolArm64.Instructions
Type[] typesShs = new Type[] { typeof(Vector128<byte>), typeof(byte) };
Type[] typesOr = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
EmitLdvecWithUnsignedCast(context, op.Rn, 0);
context.EmitLdvec(op.Rn);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -150,7 +150,7 @@ namespace ChocolArm64.Instructions
context.EmitLdc_I4(op.Imm4);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesShs));
EmitLdvecWithUnsignedCast(context, op.Rm, 0);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4((op.RegisterSize == RegisterSize.Simd64 ? 8 : 16) - op.Imm4);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical128BitLane), typesShs));
@ -164,7 +164,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr));
EmitStvecWithUnsignedCast(context, op.Rd, 0);
context.EmitStvec(op.Rd);
}
else
{
@ -318,12 +318,26 @@ namespace ChocolArm64.Instructions
public static void Movi_V(ILEmitterCtx context)
{
EmitVectorImmUnaryOp(context, () => { });
if (Optimizations.UseSse2)
{
EmitMoviMvni(context, not: false);
}
else
{
EmitVectorImmUnaryOp(context, () => { });
}
}
public static void Mvni_V(ILEmitterCtx context)
{
EmitVectorImmUnaryOp(context, () => context.Emit(OpCodes.Not));
if (Optimizations.UseSse2)
{
EmitMoviMvni(context, not: true);
}
else
{
EmitVectorImmUnaryOp(context, () => context.Emit(OpCodes.Not));
}
}
public static void Smov_S(ILEmitterCtx context)
@ -418,7 +432,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
EmitLdvecWithSignedCast(context, op.Rn, 0); // value
context.EmitLdvec(op.Rn); // value
context.EmitLdc_I8(_masksE0_TrnUzpXtn[op.Size]); // mask
context.Emit(OpCodes.Dup); // mask
@ -480,6 +494,38 @@ namespace ChocolArm64.Instructions
}
}
private static void EmitMoviMvni(ILEmitterCtx context, bool not)
{
OpCodeSimdImm64 op = (OpCodeSimdImm64)context.CurrOp;
Type[] typesSav = new Type[] { UIntTypesPerSizeLog2[op.Size] };
long imm = op.Imm;
if (not)
{
imm = ~imm;
}
if (op.Size < 3)
{
context.EmitLdc_I4((int)imm);
}
else
{
context.EmitLdc_I8(imm);
}
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
private static void EmitVectorTranspose(ILEmitterCtx context, int part)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
@ -492,7 +538,7 @@ namespace ChocolArm64.Instructions
? nameof(Sse2.UnpackLow)
: nameof(Sse2.UnpackHigh);
EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value
context.EmitLdvec(op.Rn); // value
if (op.Size < 3)
{
@ -504,7 +550,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0)));
}
EmitLdvecWithSignedCast(context, op.Rm, op.Size); // value
context.EmitLdvec(op.Rm); // value
if (op.Size < 3)
{
@ -518,7 +564,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(op.Size)));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
}
else
{
@ -560,7 +606,7 @@ namespace ChocolArm64.Instructions
if (op.RegisterSize == RegisterSize.Simd128)
{
EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value
context.EmitLdvec(op.Rn); // value
if (op.Size < 3)
{
@ -572,7 +618,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0)));
}
EmitLdvecWithSignedCast(context, op.Rm, op.Size); // value
context.EmitLdvec(op.Rm); // value
if (op.Size < 3)
{
@ -586,12 +632,12 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3)));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
}
else
{
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), GetTypesSflUpk(op.Size))); // value
@ -609,7 +655,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3)));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
}
}
else
@ -648,8 +694,8 @@ namespace ChocolArm64.Instructions
? nameof(Sse2.UnpackLow)
: nameof(Sse2.UnpackHigh);
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
if (op.RegisterSize == RegisterSize.Simd128)
{
@ -663,7 +709,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3)));
}
EmitStvecWithSignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
}
else
{

View file

@ -42,12 +42,12 @@ namespace ChocolArm64.Instructions
{
Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -82,7 +82,7 @@ namespace ChocolArm64.Instructions
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
@ -92,7 +92,7 @@ namespace ChocolArm64.Instructions
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
context.EmitStvec(op.Rd);
}
else
{
@ -280,7 +280,7 @@ namespace ChocolArm64.Instructions
int shift = GetImmShr(op);
int eSize = 8 << op.Size;
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
@ -298,7 +298,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -329,8 +329,8 @@ namespace ChocolArm64.Instructions
int shift = GetImmShr(op);
int eSize = 8 << op.Size;
EmitLdvecWithSignedCast(context, op.Rd, op.Size);
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
@ -349,7 +349,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -405,7 +405,7 @@ namespace ChocolArm64.Instructions
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
@ -415,7 +415,7 @@ namespace ChocolArm64.Instructions
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
context.EmitStvec(op.Rd);
}
else
{
@ -437,12 +437,12 @@ namespace ChocolArm64.Instructions
{
Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(GetImmShr(op));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -470,15 +470,15 @@ namespace ChocolArm64.Instructions
Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
EmitLdvecWithSignedCast(context, op.Rd, op.Size);
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(GetImmShr(op));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -610,7 +610,7 @@ namespace ChocolArm64.Instructions
int shift = GetImmShr(op);
int eSize = 8 << op.Size;
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
@ -628,7 +628,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -658,8 +658,8 @@ namespace ChocolArm64.Instructions
int shift = GetImmShr(op);
int eSize = 8 << op.Size;
EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
@ -678,7 +678,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -734,7 +734,7 @@ namespace ChocolArm64.Instructions
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
@ -744,7 +744,7 @@ namespace ChocolArm64.Instructions
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
context.EmitStvec(op.Rd);
}
else
{
@ -765,12 +765,12 @@ namespace ChocolArm64.Instructions
{
Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(GetImmShr(op));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -797,15 +797,15 @@ namespace ChocolArm64.Instructions
Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(GetImmShr(op));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{

View file

@ -102,7 +102,6 @@ namespace ChocolArm64.Instructions
//DC ZVA
for (int offs = 0; offs < (4 << CpuThreadState.DczSizeLog2); offs += 8)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdintzr(op.Rt);
context.EmitLdc_I(offs);

View file

@ -565,203 +565,5 @@ namespace ChocolArm64.Instructions
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<sbyte> VectorSingleToSByte(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, sbyte>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<short> VectorSingleToInt16(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, short>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<int> VectorSingleToInt32(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, int>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<long> VectorSingleToInt64(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, long>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<byte> VectorSingleToByte(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, byte>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<ushort> VectorSingleToUInt16(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, ushort>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<uint> VectorSingleToUInt32(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, uint>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<ulong> VectorSingleToUInt64(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, ulong>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<double> VectorSingleToDouble(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, double>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorSByteToSingle(Vector128<sbyte> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<sbyte, float>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInt16ToSingle(Vector128<short> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<short, float>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInt32ToSingle(Vector128<int> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<int, float>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInt64ToSingle(Vector128<long> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<long, float>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorByteToSingle(Vector128<byte> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<byte, float>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorUInt16ToSingle(Vector128<ushort> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<ushort, float>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorUInt32ToSingle(Vector128<uint> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<uint, float>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorUInt64ToSingle(Vector128<ulong> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<ulong, float>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorDoubleToSingle(Vector128<double> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<double, float>(vector);
}
throw new PlatformNotSupportedException();
}
}
}

View file

@ -0,0 +1,151 @@
using System;
using System.Runtime.InteropServices;
namespace ChocolArm64.Memory
{
static class CompareExchange128
{
private struct Int128
{
public ulong Low { get; }
public ulong High { get; }
public Int128(ulong low, ulong high)
{
Low = low;
High = high;
}
}
private delegate Int128 InterlockedCompareExchange(IntPtr address, Int128 expected, Int128 desired);
private delegate int GetCpuId();
private static InterlockedCompareExchange _interlockedCompareExchange;
static CompareExchange128()
{
if (RuntimeInformation.OSArchitecture != Architecture.X64 || !IsCmpxchg16bSupported())
{
throw new PlatformNotSupportedException();
}
byte[] interlockedCompareExchange128Code;
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
interlockedCompareExchange128Code = new byte[]
{
0x53, // push rbx
0x49, 0x8b, 0x00, // mov rax, [r8]
0x49, 0x8b, 0x19, // mov rbx, [r9]
0x49, 0x89, 0xca, // mov r10, rcx
0x49, 0x89, 0xd3, // mov r11, rdx
0x49, 0x8b, 0x49, 0x08, // mov rcx, [r9+8]
0x49, 0x8b, 0x50, 0x08, // mov rdx, [r8+8]
0xf0, 0x49, 0x0f, 0xc7, 0x0b, // lock cmpxchg16b [r11]
0x49, 0x89, 0x02, // mov [r10], rax
0x4c, 0x89, 0xd0, // mov rax, r10
0x49, 0x89, 0x52, 0x08, // mov [r10+8], rdx
0x5b, // pop rbx
0xc3 // ret
};
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
interlockedCompareExchange128Code = new byte[]
{
0x53, // push rbx
0x49, 0x89, 0xd1, // mov r9, rdx
0x48, 0x89, 0xcb, // mov rbx, rcx
0x48, 0x89, 0xf0, // mov rax, rsi
0x4c, 0x89, 0xca, // mov rdx, r9
0x4c, 0x89, 0xc1, // mov rcx, r8
0xf0, 0x48, 0x0f, 0xc7, 0x0f, // lock cmpxchg16b [rdi]
0x5b, // pop rbx
0xc3 // ret
};
}
else
{
throw new PlatformNotSupportedException();
}
IntPtr funcPtr = MapCodeAsExecutable(interlockedCompareExchange128Code);
_interlockedCompareExchange = Marshal.GetDelegateForFunctionPointer<InterlockedCompareExchange>(funcPtr);
}
private static bool IsCmpxchg16bSupported()
{
byte[] getCpuIdCode = new byte[]
{
0x53, // push rbx
0xb8, 0x01, 0x00, 0x00, 0x00, // mov eax, 0x1
0x0f, 0xa2, // cpuid
0x89, 0xc8, // mov eax, ecx
0x5b, // pop rbx
0xc3 // ret
};
IntPtr funcPtr = MapCodeAsExecutable(getCpuIdCode);
GetCpuId getCpuId = Marshal.GetDelegateForFunctionPointer<GetCpuId>(funcPtr);
int cpuId = getCpuId();
MemoryManagement.Free(funcPtr);
return (cpuId & (1 << 13)) != 0;
}
private static IntPtr MapCodeAsExecutable(byte[] code)
{
ulong codeLength = (ulong)code.Length;
IntPtr funcPtr = MemoryManagement.Allocate(codeLength);
unsafe
{
fixed (byte* codePtr = code)
{
byte* dest = (byte*)funcPtr;
long size = (long)codeLength;
Buffer.MemoryCopy(codePtr, dest, size, size);
}
}
MemoryManagement.Reprotect(funcPtr, codeLength, MemoryProtection.Execute);
return funcPtr;
}
public static bool InterlockedCompareExchange128(
IntPtr address,
ulong expectedLow,
ulong expectedHigh,
ulong desiredLow,
ulong desiredHigh)
{
Int128 expected = new Int128(expectedLow, expectedHigh);
Int128 desired = new Int128(desiredLow, desiredHigh);
Int128 old = _interlockedCompareExchange(address, expected, desired);
return old.Low == expected.Low && old.High == expected.High;
}
public static void InterlockedRead128(IntPtr address, out ulong low, out ulong high)
{
Int128 zero = new Int128(0, 0);
Int128 old = _interlockedCompareExchange(address, zero, zero);
low = old.Low;
high = old.High;
}
}
}

View file

@ -0,0 +1,114 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace ChocolArm64.Memory
{
public static class MemoryManagement
{
public static bool HasWriteWatchSupport => RuntimeInformation.IsOSPlatform(OSPlatform.Windows);
public static IntPtr Allocate(ulong size)
{
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
IntPtr sizeNint = new IntPtr((long)size);
return MemoryManagementWindows.Allocate(sizeNint);
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
return MemoryManagementUnix.Allocate(size);
}
else
{
throw new PlatformNotSupportedException();
}
}
public static IntPtr AllocateWriteTracked(ulong size)
{
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
IntPtr sizeNint = new IntPtr((long)size);
return MemoryManagementWindows.AllocateWriteTracked(sizeNint);
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
return MemoryManagementUnix.Allocate(size);
}
else
{
throw new PlatformNotSupportedException();
}
}
public static void Reprotect(IntPtr address, ulong size, MemoryProtection permission)
{
bool result;
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
IntPtr sizeNint = new IntPtr((long)size);
result = MemoryManagementWindows.Reprotect(address, sizeNint, permission);
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
result = MemoryManagementUnix.Reprotect(address, size, permission);
}
else
{
throw new PlatformNotSupportedException();
}
if (!result)
{
throw new MemoryProtectionException(permission);
}
}
public static bool Free(IntPtr address)
{
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
return MemoryManagementWindows.Free(address);
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
return MemoryManagementUnix.Free(address);
}
else
{
throw new PlatformNotSupportedException();
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool GetModifiedPages(
IntPtr address,
IntPtr size,
IntPtr[] addresses,
out ulong count)
{
//This is only supported on windows, but returning
//false (failed) is also valid for platforms without
//write tracking support on the OS.
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
return MemoryManagementWindows.GetModifiedPages(address, size, addresses, out count);
}
else
{
count = 0;
return false;
}
}
}
}

View file

@ -0,0 +1,70 @@
using Mono.Unix.Native;
using System;
namespace ChocolArm64.Memory
{
static class MemoryManagementUnix
{
public static IntPtr Allocate(ulong size)
{
ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE);
const MmapProts prot = MmapProts.PROT_READ | MmapProts.PROT_WRITE;
const MmapFlags flags = MmapFlags.MAP_PRIVATE | MmapFlags.MAP_ANONYMOUS;
IntPtr ptr = Syscall.mmap(IntPtr.Zero, size + pageSize, prot, flags, -1, 0);
if (ptr == IntPtr.Zero)
{
throw new OutOfMemoryException();
}
unsafe
{
ptr = new IntPtr(ptr.ToInt64() + (long)pageSize);
*((ulong*)ptr - 1) = size;
}
return ptr;
}
public static bool Reprotect(IntPtr address, ulong size, Memory.MemoryProtection protection)
{
MmapProts prot = GetProtection(protection);
return Syscall.mprotect(address, size, prot) == 0;
}
private static MmapProts GetProtection(Memory.MemoryProtection protection)
{
switch (protection)
{
case Memory.MemoryProtection.None: return MmapProts.PROT_NONE;
case Memory.MemoryProtection.Read: return MmapProts.PROT_READ;
case Memory.MemoryProtection.ReadAndWrite: return MmapProts.PROT_READ | MmapProts.PROT_WRITE;
case Memory.MemoryProtection.ReadAndExecute: return MmapProts.PROT_READ | MmapProts.PROT_EXEC;
case Memory.MemoryProtection.Execute: return MmapProts.PROT_EXEC;
default: throw new ArgumentException($"Invalid permission \"{protection}\".");
}
}
public static bool Free(IntPtr address)
{
ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE);
ulong size;
unsafe
{
size = *((ulong*)address - 1);
address = new IntPtr(address.ToInt64() - (long)pageSize);
}
return Syscall.munmap(address, size + pageSize) == 0;
}
}
}

View file

@ -0,0 +1,155 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace ChocolArm64.Memory
{
static class MemoryManagementWindows
{
[Flags]
private enum AllocationType : uint
{
Commit = 0x1000,
Reserve = 0x2000,
Decommit = 0x4000,
Release = 0x8000,
Reset = 0x80000,
Physical = 0x400000,
TopDown = 0x100000,
WriteWatch = 0x200000,
LargePages = 0x20000000
}
[Flags]
private enum MemoryProtection : uint
{
NoAccess = 0x01,
ReadOnly = 0x02,
ReadWrite = 0x04,
WriteCopy = 0x08,
Execute = 0x10,
ExecuteRead = 0x20,
ExecuteReadWrite = 0x40,
ExecuteWriteCopy = 0x80,
GuardModifierflag = 0x100,
NoCacheModifierflag = 0x200,
WriteCombineModifierflag = 0x400
}
private enum WriteWatchFlags : uint
{
None = 0,
Reset = 1
}
[DllImport("kernel32.dll")]
private static extern IntPtr VirtualAlloc(
IntPtr lpAddress,
IntPtr dwSize,
AllocationType flAllocationType,
MemoryProtection flProtect);
[DllImport("kernel32.dll")]
private static extern bool VirtualProtect(
IntPtr lpAddress,
IntPtr dwSize,
MemoryProtection flNewProtect,
out MemoryProtection lpflOldProtect);
[DllImport("kernel32.dll")]
private static extern bool VirtualFree(
IntPtr lpAddress,
IntPtr dwSize,
AllocationType dwFreeType);
[DllImport("kernel32.dll")]
private static extern int GetWriteWatch(
WriteWatchFlags dwFlags,
IntPtr lpBaseAddress,
IntPtr dwRegionSize,
IntPtr[] lpAddresses,
ref ulong lpdwCount,
out uint lpdwGranularity);
public static IntPtr Allocate(IntPtr size)
{
const AllocationType flags =
AllocationType.Reserve |
AllocationType.Commit;
IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite);
if (ptr == IntPtr.Zero)
{
throw new OutOfMemoryException();
}
return ptr;
}
public static IntPtr AllocateWriteTracked(IntPtr size)
{
const AllocationType flags =
AllocationType.Reserve |
AllocationType.Commit |
AllocationType.WriteWatch;
IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite);
if (ptr == IntPtr.Zero)
{
throw new OutOfMemoryException();
}
return ptr;
}
public static bool Reprotect(IntPtr address, IntPtr size, Memory.MemoryProtection protection)
{
MemoryProtection prot = GetProtection(protection);
return VirtualProtect(address, size, prot, out _);
}
private static MemoryProtection GetProtection(Memory.MemoryProtection protection)
{
switch (protection)
{
case Memory.MemoryProtection.None: return MemoryProtection.NoAccess;
case Memory.MemoryProtection.Read: return MemoryProtection.ReadOnly;
case Memory.MemoryProtection.ReadAndWrite: return MemoryProtection.ReadWrite;
case Memory.MemoryProtection.ReadAndExecute: return MemoryProtection.ExecuteRead;
case Memory.MemoryProtection.Execute: return MemoryProtection.Execute;
default: throw new ArgumentException($"Invalid permission \"{protection}\".");
}
}
public static bool Free(IntPtr address)
{
return VirtualFree(address, IntPtr.Zero, AllocationType.Release);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool GetModifiedPages(
IntPtr address,
IntPtr size,
IntPtr[] addresses,
out ulong count)
{
ulong pagesCount = (ulong)addresses.Length;
int result = GetWriteWatch(
WriteWatchFlags.Reset,
address,
size,
addresses,
ref pagesCount,
out uint granularity);
count = pagesCount;
return result == 0;
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,16 @@
using System;
namespace ChocolArm64.Memory
{
[Flags]
public enum MemoryProtection
{
None = 0,
Read = 1 << 0,
Write = 1 << 1,
Execute = 1 << 2,
ReadAndWrite = Read | Write,
ReadAndExecute = Read | Execute
}
}

View file

@ -0,0 +1,10 @@
using System;
namespace ChocolArm64.Memory
{
class MemoryProtectionException : Exception
{
public MemoryProtectionException(MemoryProtection protection) :
base($"Failed to set memory protection to \"{protection}\".") { }
}
}

View file

@ -310,12 +310,12 @@ namespace ChocolArm64
SetA64("x00111100x101000000000xxxxxxxxxx", InstEmit.Fcvtps_Gp, typeof(OpCodeSimdCvt64));
SetA64("x00111100x101001000000xxxxxxxxxx", InstEmit.Fcvtpu_Gp, typeof(OpCodeSimdCvt64));
SetA64("x00111100x111000000000xxxxxxxxxx", InstEmit.Fcvtzs_Gp, typeof(OpCodeSimdCvt64));
SetA64("x00111100x011000xxxxxxxxxxxxxxxx", InstEmit.Fcvtzs_Gp_Fixed, typeof(OpCodeSimdCvt64));
SetA64(">00111100x011000>xxxxxxxxxxxxxxx", InstEmit.Fcvtzs_Gp_Fixed, typeof(OpCodeSimdCvt64));
SetA64("010111101x100001101110xxxxxxxxxx", InstEmit.Fcvtzs_S, typeof(OpCodeSimd64));
SetA64("0>0011101<100001101110xxxxxxxxxx", InstEmit.Fcvtzs_V, typeof(OpCodeSimd64));
SetA64("0x0011110>>xxxxx111111xxxxxxxxxx", InstEmit.Fcvtzs_V, typeof(OpCodeSimdShImm64));
SetA64("x00111100x111001000000xxxxxxxxxx", InstEmit.Fcvtzu_Gp, typeof(OpCodeSimdCvt64));
SetA64("x00111100x011001xxxxxxxxxxxxxxxx", InstEmit.Fcvtzu_Gp_Fixed, typeof(OpCodeSimdCvt64));
SetA64(">00111100x011001>xxxxxxxxxxxxxxx", InstEmit.Fcvtzu_Gp_Fixed, typeof(OpCodeSimdCvt64));
SetA64("011111101x100001101110xxxxxxxxxx", InstEmit.Fcvtzu_S, typeof(OpCodeSimd64));
SetA64("0>1011101<100001101110xxxxxxxxxx", InstEmit.Fcvtzu_V, typeof(OpCodeSimd64));
SetA64("0x1011110>>xxxxx111111xxxxxxxxxx", InstEmit.Fcvtzu_V, typeof(OpCodeSimdShImm64));
@ -434,6 +434,7 @@ namespace ChocolArm64
SetA64("0x001110<<100000001010xxxxxxxxxx", InstEmit.Saddlp_V, typeof(OpCodeSimd64));
SetA64("0x001110<<1xxxxx000100xxxxxxxxxx", InstEmit.Saddw_V, typeof(OpCodeSimdReg64));
SetA64("x00111100x100010000000xxxxxxxxxx", InstEmit.Scvtf_Gp, typeof(OpCodeSimdCvt64));
SetA64(">00111100x000010>xxxxxxxxxxxxxxx", InstEmit.Scvtf_Gp_Fixed, typeof(OpCodeSimdCvt64));
SetA64("010111100x100001110110xxxxxxxxxx", InstEmit.Scvtf_S, typeof(OpCodeSimd64));
SetA64("0>0011100<100001110110xxxxxxxxxx", InstEmit.Scvtf_V, typeof(OpCodeSimd64));
SetA64("01011110000xxxxx000000xxxxxxxxxx", InstEmit.Sha1c_V, typeof(OpCodeSimdReg64));
@ -542,6 +543,7 @@ namespace ChocolArm64
SetA64("01101110<<110000001110xxxxxxxxxx", InstEmit.Uaddlv_V, typeof(OpCodeSimd64));
SetA64("0x101110<<1xxxxx000100xxxxxxxxxx", InstEmit.Uaddw_V, typeof(OpCodeSimdReg64));
SetA64("x00111100x100011000000xxxxxxxxxx", InstEmit.Ucvtf_Gp, typeof(OpCodeSimdCvt64));
SetA64(">00111100x000011>xxxxxxxxxxxxxxx", InstEmit.Ucvtf_Gp_Fixed, typeof(OpCodeSimdCvt64));
SetA64("011111100x100001110110xxxxxxxxxx", InstEmit.Ucvtf_S, typeof(OpCodeSimd64));
SetA64("0>1011100<100001110110xxxxxxxxxx", InstEmit.Ucvtf_V, typeof(OpCodeSimd64));
SetA64("0x101110<<1xxxxx000001xxxxxxxxxx", InstEmit.Uhadd_V, typeof(OpCodeSimdReg64));

View file

@ -2,21 +2,23 @@ using System.Runtime.Intrinsics.X86;
public static class Optimizations
{
internal static bool FastFP = true;
public static bool AssumeStrictAbiCompliance { get; set; }
private static bool _useAllSseIfAvailable = true;
public static bool FastFP { get; set; } = true;
private static bool _useSseIfAvailable = true;
private static bool _useSse2IfAvailable = true;
private static bool _useSse3IfAvailable = true;
private static bool _useSsse3IfAvailable = true;
private static bool _useSse41IfAvailable = true;
private static bool _useSse42IfAvailable = true;
private const bool UseAllSseIfAvailable = true;
internal static bool UseSse = (_useAllSseIfAvailable && _useSseIfAvailable) && Sse.IsSupported;
internal static bool UseSse2 = (_useAllSseIfAvailable && _useSse2IfAvailable) && Sse2.IsSupported;
internal static bool UseSse3 = (_useAllSseIfAvailable && _useSse3IfAvailable) && Sse3.IsSupported;
internal static bool UseSsse3 = (_useAllSseIfAvailable && _useSsse3IfAvailable) && Ssse3.IsSupported;
internal static bool UseSse41 = (_useAllSseIfAvailable && _useSse41IfAvailable) && Sse41.IsSupported;
internal static bool UseSse42 = (_useAllSseIfAvailable && _useSse42IfAvailable) && Sse42.IsSupported;
}
public static bool UseSseIfAvailable { get; set; } = UseAllSseIfAvailable;
public static bool UseSse2IfAvailable { get; set; } = UseAllSseIfAvailable;
public static bool UseSse3IfAvailable { get; set; } = UseAllSseIfAvailable;
public static bool UseSsse3IfAvailable { get; set; } = UseAllSseIfAvailable;
public static bool UseSse41IfAvailable { get; set; } = UseAllSseIfAvailable;
public static bool UseSse42IfAvailable { get; set; } = UseAllSseIfAvailable;
internal static bool UseSse => UseSseIfAvailable && Sse.IsSupported;
internal static bool UseSse2 => UseSse2IfAvailable && Sse2.IsSupported;
internal static bool UseSse3 => UseSse3IfAvailable && Sse3.IsSupported;
internal static bool UseSsse3 => UseSsse3IfAvailable && Ssse3.IsSupported;
internal static bool UseSse41 => UseSse41IfAvailable && Sse41.IsSupported;
internal static bool UseSse42 => UseSse42IfAvailable && Sse42.IsSupported;
}

View file

@ -37,7 +37,6 @@ namespace ChocolArm64.State
public int ElrHyp;
public bool Running { get; set; }
public int Core { get; set; }
private bool _interrupted;
@ -85,6 +84,16 @@ namespace ChocolArm64.State
internal Translator CurrentTranslator;
private ulong _exclusiveAddress;
internal ulong ExclusiveValueLow { get; set; }
internal ulong ExclusiveValueHigh { get; set; }
public CpuThreadState()
{
ClearExclusiveAddress();
}
static CpuThreadState()
{
_hostTickFreq = 1.0 / Stopwatch.Frequency;
@ -94,6 +103,26 @@ namespace ChocolArm64.State
_tickCounter.Start();
}
internal void SetExclusiveAddress(ulong address)
{
_exclusiveAddress = GetMaskedExclusiveAddress(address);
}
internal bool CheckExclusiveAddress(ulong address)
{
return GetMaskedExclusiveAddress(address) == _exclusiveAddress;
}
internal void ClearExclusiveAddress()
{
_exclusiveAddress = ulong.MaxValue;
}
private ulong GetMaskedExclusiveAddress(ulong address)
{
return address & ~((4UL << ErgSizeLog2) - 1);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal bool Synchronize(int bbWeight)
{

View file

@ -0,0 +1,9 @@
namespace ChocolArm64.Translation
{
enum CallType
{
Call,
VirtualCall,
VirtualJump
}
}

View file

@ -4,13 +4,13 @@ namespace ChocolArm64.Translation
{
class ILBlock : IILEmit
{
public long IntInputs { get; private set; }
public long IntOutputs { get; private set; }
public long IntAwOutputs { get; private set; }
public long IntInputs { get; private set; }
public long IntOutputs { get; private set; }
private long _intAwOutputs;
public long VecInputs { get; private set; }
public long VecOutputs { get; private set; }
public long VecAwOutputs { get; private set; }
public long VecInputs { get; private set; }
public long VecOutputs { get; private set; }
private long _vecAwOutputs;
public bool HasStateStore { get; private set; }
@ -34,25 +34,25 @@ namespace ChocolArm64.Translation
//opcodes emitted by each ARM instruction.
//We can only consider the new outputs for doing input elimination
//after all the CIL opcodes used by the instruction being emitted.
IntAwOutputs = IntOutputs;
VecAwOutputs = VecOutputs;
_intAwOutputs = IntOutputs;
_vecAwOutputs = VecOutputs;
}
else if (emitter is ILOpCodeLoad ld && ILMethodBuilder.IsRegIndex(ld.Index))
{
switch (ld.IoType)
switch (ld.VarType)
{
case IoType.Flag: IntInputs |= ((1L << ld.Index) << 32) & ~IntAwOutputs; break;
case IoType.Int: IntInputs |= (1L << ld.Index) & ~IntAwOutputs; break;
case IoType.Vector: VecInputs |= (1L << ld.Index) & ~VecAwOutputs; break;
case VarType.Flag: IntInputs |= ((1L << ld.Index) << 32) & ~_intAwOutputs; break;
case VarType.Int: IntInputs |= (1L << ld.Index) & ~_intAwOutputs; break;
case VarType.Vector: VecInputs |= (1L << ld.Index) & ~_vecAwOutputs; break;
}
}
else if (emitter is ILOpCodeStore st && ILMethodBuilder.IsRegIndex(st.Index))
{
switch (st.IoType)
switch (st.VarType)
{
case IoType.Flag: IntOutputs |= (1L << st.Index) << 32; break;
case IoType.Int: IntOutputs |= 1L << st.Index; break;
case IoType.Vector: VecOutputs |= 1L << st.Index; break;
case VarType.Flag: IntOutputs |= (1L << st.Index) << 32; break;
case VarType.Int: IntOutputs |= 1L << st.Index; break;
case VarType.Vector: VecOutputs |= 1L << st.Index; break;
}
}
else if (emitter is ILOpCodeStoreState)

View file

@ -1,5 +1,6 @@
using ChocolArm64.Decoders;
using ChocolArm64.Instructions;
using ChocolArm64.Memory;
using ChocolArm64.State;
using System;
using System.Collections.Generic;
@ -10,6 +11,8 @@ namespace ChocolArm64.Translation
{
class ILEmitterCtx
{
public MemoryManager Memory { get; }
private TranslatorCache _cache;
private TranslatorQueue _queue;
@ -28,6 +31,10 @@ namespace ChocolArm64.Translation
public Aarch32Mode Mode { get; } = Aarch32Mode.User; //TODO
public bool HasIndirectJump { get; set; }
public bool HasSlowCall { get; set; }
private Dictionary<Block, ILBlock> _visitedBlocks;
private Queue<Block> _branchTargets;
@ -43,18 +50,34 @@ namespace ChocolArm64.Translation
//values needed by some functions, since IL doesn't have a swap instruction.
//You can use any value here as long it doesn't conflict with the indices
//for the other registers. Any value >= 64 or < 0 will do.
private const int IntTmpIndex = -1;
private const int RorTmpIndex = -2;
private const int CmpOptTmp1Index = -3;
private const int CmpOptTmp2Index = -4;
private const int VecTmp1Index = -5;
private const int VecTmp2Index = -6;
private const int ReservedLocalsCount = 64;
public ILEmitterCtx(TranslatorCache cache, TranslatorQueue queue, TranslationTier tier, Block graph)
private const int RorTmpIndex = ReservedLocalsCount + 0;
private const int CmpOptTmp1Index = ReservedLocalsCount + 1;
private const int CmpOptTmp2Index = ReservedLocalsCount + 2;
private const int IntGpTmp1Index = ReservedLocalsCount + 3;
private const int IntGpTmp2Index = ReservedLocalsCount + 4;
private const int UserIntTempStart = ReservedLocalsCount + 5;
//Vectors are part of another "set" of locals.
private const int VecGpTmp1Index = ReservedLocalsCount + 0;
private const int VecGpTmp2Index = ReservedLocalsCount + 1;
private const int UserVecTempStart = ReservedLocalsCount + 2;
private static int _userIntTempCount;
private static int _userVecTempCount;
public ILEmitterCtx(
MemoryManager memory,
TranslatorCache cache,
TranslatorQueue queue,
TranslationTier tier,
Block graph)
{
_cache = cache ?? throw new ArgumentNullException(nameof(cache));
_queue = queue ?? throw new ArgumentNullException(nameof(queue));
_currBlock = graph ?? throw new ArgumentNullException(nameof(graph));
Memory = memory ?? throw new ArgumentNullException(nameof(memory));
_cache = cache ?? throw new ArgumentNullException(nameof(cache));
_queue = queue ?? throw new ArgumentNullException(nameof(queue));
_currBlock = graph ?? throw new ArgumentNullException(nameof(graph));
Tier = tier;
@ -72,7 +95,22 @@ namespace ChocolArm64.Translation
ResetBlockState();
AdvanceOpCode();
if (AdvanceOpCode())
{
EmitSynchronization();
_ilBlock.Add(new ILOpCodeLoadState(_ilBlock, isSubEntry: true));
}
}
public static int GetIntTempIndex()
{
return UserIntTempStart + _userIntTempCount++;
}
public static int GetVecTempIndex()
{
return UserVecTempStart + _userVecTempCount++;
}
public ILBlock[] GetILBlocks()
@ -98,10 +136,18 @@ namespace ChocolArm64.Translation
return;
}
if (_opcIndex == 0)
int opcIndex = _opcIndex;
if (opcIndex == 0)
{
MarkLabel(GetLabel(_currBlock.Position));
}
bool isLastOp = opcIndex == CurrBlock.OpCodes.Count - 1;
if (isLastOp && CurrBlock.Branch != null &&
(ulong)CurrBlock.Branch.Position <= (ulong)CurrBlock.Position)
{
EmitSynchronization();
}
@ -132,7 +178,7 @@ namespace ChocolArm64.Translation
//of the next instruction to be executed (in the case that the condition
//is false, and the branch was not taken, as all basic blocks should end with
//some kind of branch).
if (CurrOp == CurrBlock.GetLastOp() && CurrBlock.Next == null)
if (isLastOp && CurrBlock.Next == null)
{
EmitStoreState();
EmitLdc_I8(CurrOp.Position + CurrOp.OpCodeSizeInBytes);
@ -144,7 +190,7 @@ namespace ChocolArm64.Translation
_ilBlock.Add(new ILBarrier());
}
private Condition GetInverseCond(Condition cond)
private static Condition GetInverseCond(Condition cond)
{
//Bit 0 of all conditions is basically a negation bit, so
//inverting this bit has the effect of inverting the condition.
@ -256,32 +302,43 @@ namespace ChocolArm64.Translation
return;
}
_queue.Enqueue(new TranslatorQueueItem(position, mode, TranslationTier.Tier1));
_queue.Enqueue(position, mode, TranslationTier.Tier1, isComplete: true);
}
public bool TryOptEmitSubroutineCall()
{
//Calls should always have a next block, unless
//we're translating a single basic block.
if (_currBlock.Next == null)
{
return false;
}
if (CurrOp.Emitter != InstEmit.Bl)
if (!(CurrOp is IOpCodeBImm op))
{
return false;
}
if (!_cache.TryGetSubroutine(((OpCodeBImmAl64)CurrOp).Imm, out TranslatedSub subroutine))
if (!_cache.TryGetSubroutine(op.Imm, out TranslatedSub sub))
{
return false;
}
//It's not worth to call a Tier0 method, because
//it contains slow code, rather than the entire function.
if (sub.Tier == TranslationTier.Tier0)
{
return false;
}
EmitStoreState(sub);
for (int index = 0; index < TranslatedSub.FixedArgTypes.Length; index++)
{
EmitLdarg(index);
}
EmitCall(subroutine.Method);
EmitCall(sub.Method);
return true;
}
@ -292,8 +349,8 @@ namespace ChocolArm64.Translation
InstEmitAluHelper.EmitAluLoadOpers(this);
Stloc(CmpOptTmp2Index, IoType.Int);
Stloc(CmpOptTmp1Index, IoType.Int);
Stloc(CmpOptTmp2Index, VarType.Int);
Stloc(CmpOptTmp1Index, VarType.Int);
}
private Dictionary<Condition, OpCode> _branchOps = new Dictionary<Condition, OpCode>()
@ -317,8 +374,8 @@ namespace ChocolArm64.Translation
{
if (_optOpLastCompare.Emitter == InstEmit.Subs)
{
Ldloc(CmpOptTmp1Index, IoType.Int, _optOpLastCompare.RegisterSize);
Ldloc(CmpOptTmp2Index, IoType.Int, _optOpLastCompare.RegisterSize);
Ldloc(CmpOptTmp1Index, VarType.Int, _optOpLastCompare.RegisterSize);
Ldloc(CmpOptTmp2Index, VarType.Int, _optOpLastCompare.RegisterSize);
Emit(_branchOps[cond], target);
@ -340,7 +397,7 @@ namespace ChocolArm64.Translation
//Such invalid values can't be encoded on the immediate encodings.
if (_optOpLastCompare is IOpCodeAluImm64 op)
{
Ldloc(CmpOptTmp1Index, IoType.Int, _optOpLastCompare.RegisterSize);
Ldloc(CmpOptTmp1Index, VarType.Int, _optOpLastCompare.RegisterSize);
if (_optOpLastCompare.RegisterSize == RegisterSize.Int32)
{
@ -462,14 +519,14 @@ namespace ChocolArm64.Translation
{
if (amount > 0)
{
Stloc(RorTmpIndex, IoType.Int);
Ldloc(RorTmpIndex, IoType.Int);
Stloc(RorTmpIndex, VarType.Int);
Ldloc(RorTmpIndex, VarType.Int);
EmitLdc_I4(amount);
Emit(OpCodes.Shr_Un);
Ldloc(RorTmpIndex, IoType.Int);
Ldloc(RorTmpIndex, VarType.Int);
EmitLdc_I4(CurrOp.GetBitsCount() - amount);
@ -517,7 +574,7 @@ namespace ChocolArm64.Translation
public void EmitLdarg(int index)
{
_ilBlock.Add(new ILOpCodeLoad(index, IoType.Arg));
_ilBlock.Add(new ILOpCodeLoad(index, VarType.Arg));
}
public void EmitLdintzr(int index)
@ -559,22 +616,30 @@ namespace ChocolArm64.Translation
_ilBlock.Add(new ILOpCodeStoreState(_ilBlock));
}
public void EmitLdtmp() => EmitLdint(IntTmpIndex);
public void EmitSttmp() => EmitStint(IntTmpIndex);
private void EmitStoreState(TranslatedSub callSub)
{
_ilBlock.Add(new ILOpCodeStoreState(_ilBlock, callSub));
}
public void EmitLdvectmp() => EmitLdvec(VecTmp1Index);
public void EmitStvectmp() => EmitStvec(VecTmp1Index);
public void EmitLdtmp() => EmitLdint(IntGpTmp1Index);
public void EmitSttmp() => EmitStint(IntGpTmp1Index);
public void EmitLdvectmp2() => EmitLdvec(VecTmp2Index);
public void EmitStvectmp2() => EmitStvec(VecTmp2Index);
public void EmitLdtmp2() => EmitLdint(IntGpTmp2Index);
public void EmitSttmp2() => EmitStint(IntGpTmp2Index);
public void EmitLdint(int index) => Ldloc(index, IoType.Int);
public void EmitStint(int index) => Stloc(index, IoType.Int);
public void EmitLdvectmp() => EmitLdvec(VecGpTmp1Index);
public void EmitStvectmp() => EmitStvec(VecGpTmp1Index);
public void EmitLdvec(int index) => Ldloc(index, IoType.Vector);
public void EmitStvec(int index) => Stloc(index, IoType.Vector);
public void EmitLdvectmp2() => EmitLdvec(VecGpTmp2Index);
public void EmitStvectmp2() => EmitStvec(VecGpTmp2Index);
public void EmitLdflg(int index) => Ldloc(index, IoType.Flag);
public void EmitLdint(int index) => Ldloc(index, VarType.Int);
public void EmitStint(int index) => Stloc(index, VarType.Int);
public void EmitLdvec(int index) => Ldloc(index, VarType.Vector);
public void EmitStvec(int index) => Stloc(index, VarType.Vector);
public void EmitLdflg(int index) => Ldloc(index, VarType.Flag);
public void EmitStflg(int index)
{
//Set this only if any of the NZCV flag bits were modified.
@ -587,52 +652,32 @@ namespace ChocolArm64.Translation
_optOpLastFlagSet = CurrOp;
}
Stloc(index, IoType.Flag);
Stloc(index, VarType.Flag);
}
private void Ldloc(int index, IoType ioType)
private void Ldloc(int index, VarType varType)
{
_ilBlock.Add(new ILOpCodeLoad(index, ioType, CurrOp.RegisterSize));
_ilBlock.Add(new ILOpCodeLoad(index, varType, CurrOp.RegisterSize));
}
private void Ldloc(int index, IoType ioType, RegisterSize registerSize)
private void Ldloc(int index, VarType varType, RegisterSize registerSize)
{
_ilBlock.Add(new ILOpCodeLoad(index, ioType, registerSize));
_ilBlock.Add(new ILOpCodeLoad(index, varType, registerSize));
}
private void Stloc(int index, IoType ioType)
private void Stloc(int index, VarType varType)
{
_ilBlock.Add(new ILOpCodeStore(index, ioType, CurrOp.RegisterSize));
_ilBlock.Add(new ILOpCodeStore(index, varType, CurrOp.RegisterSize));
}
public void EmitCallPropGet(Type objType, string propName)
{
if (objType == null)
{
throw new ArgumentNullException(nameof(objType));
}
if (propName == null)
{
throw new ArgumentNullException(nameof(propName));
}
EmitCall(objType.GetMethod($"get_{propName}"));
EmitCall(objType, $"get_{propName}");
}
public void EmitCallPropSet(Type objType, string propName)
{
if (objType == null)
{
throw new ArgumentNullException(nameof(objType));
}
if (propName == null)
{
throw new ArgumentNullException(nameof(propName));
}
EmitCall(objType.GetMethod($"set_{propName}"));
EmitCall(objType, $"set_{propName}");
}
public void EmitCall(Type objType, string mthdName)
@ -650,6 +695,16 @@ namespace ChocolArm64.Translation
EmitCall(objType.GetMethod(mthdName));
}
public void EmitCallPrivatePropGet(Type objType, string propName)
{
EmitPrivateCall(objType, $"get_{propName}");
}
public void EmitCallPrivatePropSet(Type objType, string propName)
{
EmitPrivateCall(objType, $"set_{propName}");
}
public void EmitPrivateCall(Type objType, string mthdName)
{
if (objType == null)

View file

@ -6,7 +6,7 @@ namespace ChocolArm64.Translation
{
private bool _hasLabel;
private Label _lbl;
private Label _label;
public void Emit(ILMethodBuilder context)
{
@ -17,12 +17,12 @@ namespace ChocolArm64.Translation
{
if (!_hasLabel)
{
_lbl = context.Generator.DefineLabel();
_label = context.Generator.DefineLabel();
_hasLabel = true;
}
return _lbl;
return _label;
}
}
}

View file

@ -8,7 +8,10 @@ namespace ChocolArm64.Translation
{
class ILMethodBuilder
{
public LocalAlloc LocalAlloc { get; private set; }
private const int RegsCount = 32;
private const int RegsMask = RegsCount - 1;
public RegisterUsage RegUsage { get; private set; }
public ILGenerator Generator { get; private set; }
@ -18,29 +21,47 @@ namespace ChocolArm64.Translation
private string _subName;
public bool IsAarch64 { get; }
public bool IsSubComplete { get; }
private int _localsCount;
public ILMethodBuilder(ILBlock[] ilBlocks, string subName)
public ILMethodBuilder(
ILBlock[] ilBlocks,
string subName,
bool isAarch64,
bool isSubComplete = false)
{
_ilBlocks = ilBlocks;
_subName = subName;
_ilBlocks = ilBlocks;
_subName = subName;
IsAarch64 = isAarch64;
IsSubComplete = isSubComplete;
}
public TranslatedSub GetSubroutine(TranslationTier tier)
public TranslatedSub GetSubroutine(TranslationTier tier, bool isWorthOptimizing)
{
LocalAlloc = new LocalAlloc(_ilBlocks, _ilBlocks[0]);
RegUsage = new RegisterUsage();
RegUsage.BuildUses(_ilBlocks[0]);
DynamicMethod method = new DynamicMethod(_subName, typeof(long), TranslatedSub.FixedArgTypes);
Generator = method.GetILGenerator();
long intNiRegsMask = RegUsage.GetIntNotInputs(_ilBlocks[0]);
long vecNiRegsMask = RegUsage.GetVecNotInputs(_ilBlocks[0]);
TranslatedSub subroutine = new TranslatedSub(method, tier);
TranslatedSub subroutine = new TranslatedSub(
method,
intNiRegsMask,
vecNiRegsMask,
tier,
isWorthOptimizing);
_locals = new Dictionary<Register, int>();
_localsCount = 0;
new ILOpCodeLoadState(_ilBlocks[0]).Emit(this);
Generator = method.GetILGenerator();
foreach (ILBlock ilBlock in _ilBlocks)
{
@ -80,13 +101,13 @@ namespace ChocolArm64.Translation
public static Register GetRegFromBit(int bit, RegisterType baseType)
{
if (bit < 32)
if (bit < RegsCount)
{
return new Register(bit, baseType);
}
else if (baseType == RegisterType.Int)
{
return new Register(bit & 0x1f, RegisterType.Flag);
return new Register(bit & RegsMask, RegisterType.Flag);
}
else
{
@ -96,7 +117,7 @@ namespace ChocolArm64.Translation
public static bool IsRegIndex(int index)
{
return (uint)index < 32;
return (uint)index < RegsCount;
}
}
}

View file

@ -4,16 +4,16 @@ namespace ChocolArm64.Translation
{
struct ILOpCode : IILEmit
{
private OpCode _ilOp;
public OpCode ILOp { get; }
public ILOpCode(OpCode ilOp)
{
_ilOp = ilOp;
ILOp = ilOp;
}
public void Emit(ILMethodBuilder context)
{
context.Generator.Emit(_ilOp);
context.Generator.Emit(ILOp);
}
}
}

View file

@ -4,18 +4,18 @@ namespace ChocolArm64.Translation
{
struct ILOpCodeBranch : IILEmit
{
private OpCode _ilOp;
private ILLabel _label;
public OpCode ILOp { get; }
public ILLabel Label { get; }
public ILOpCodeBranch(OpCode ilOp, ILLabel label)
{
_ilOp = ilOp;
_label = label;
ILOp = ilOp;
Label = label;
}
public void Emit(ILMethodBuilder context)
{
context.Generator.Emit(_ilOp, _label.GetLabel(context));
context.Generator.Emit(ILOp, Label.GetLabel(context));
}
}
}

View file

@ -5,9 +5,9 @@ namespace ChocolArm64.Translation
{
struct ILOpCodeCall : IILEmit
{
public MethodInfo Info { get; private set; }
public MethodInfo Info { get; }
public bool IsVirtual { get; private set; }
public bool IsVirtual { get; }
public ILOpCodeCall(MethodInfo info, bool isVirtual)
{

View file

@ -16,6 +16,8 @@ namespace ChocolArm64.Translation
private ImmVal _value;
public long Value => _value.I8;
private enum ConstType
{
Int32,

View file

@ -5,28 +5,28 @@ namespace ChocolArm64.Translation
{
struct ILOpCodeLoad : IILEmit
{
public int Index { get; private set; }
public int Index { get; }
public IoType IoType { get; private set; }
public VarType VarType { get; }
public RegisterSize RegisterSize { get; private set; }
public RegisterSize RegisterSize { get; }
public ILOpCodeLoad(int index, IoType ioType, RegisterSize registerSize = 0)
public ILOpCodeLoad(int index, VarType varType, RegisterSize registerSize = 0)
{
Index = index;
IoType = ioType;
VarType = varType;
RegisterSize = registerSize;
}
public void Emit(ILMethodBuilder context)
{
switch (IoType)
switch (VarType)
{
case IoType.Arg: context.Generator.EmitLdarg(Index); break;
case VarType.Arg: context.Generator.EmitLdarg(Index); break;
case IoType.Flag: EmitLdloc(context, Index, RegisterType.Flag); break;
case IoType.Int: EmitLdloc(context, Index, RegisterType.Int); break;
case IoType.Vector: EmitLdloc(context, Index, RegisterType.Vector); break;
case VarType.Flag: EmitLdloc(context, Index, RegisterType.Flag); break;
case VarType.Int: EmitLdloc(context, Index, RegisterType.Int); break;
case VarType.Vector: EmitLdloc(context, Index, RegisterType.Vector); break;
}
}

View file

@ -5,7 +5,7 @@ namespace ChocolArm64.Translation
{
struct ILOpCodeLoadField : IILEmit
{
public FieldInfo Info { get; private set; }
public FieldInfo Info { get; }
public ILOpCodeLoadField(FieldInfo info)
{

View file

@ -7,15 +7,24 @@ namespace ChocolArm64.Translation
{
private ILBlock _block;
public ILOpCodeLoadState(ILBlock block)
private bool _isSubEntry;
public ILOpCodeLoadState(ILBlock block, bool isSubEntry = false)
{
_block = block;
_block = block;
_isSubEntry = isSubEntry;
}
public void Emit(ILMethodBuilder context)
{
long intInputs = context.LocalAlloc.GetIntInputs(_block);
long vecInputs = context.LocalAlloc.GetVecInputs(_block);
long intInputs = context.RegUsage.GetIntInputs(_block);
long vecInputs = context.RegUsage.GetVecInputs(_block);
if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete)
{
intInputs = RegisterUsage.ClearCallerSavedIntRegs(intInputs, context.IsAarch64);
vecInputs = RegisterUsage.ClearCallerSavedVecRegs(vecInputs, context.IsAarch64);
}
LoadLocals(context, intInputs, RegisterType.Int);
LoadLocals(context, vecInputs, RegisterType.Vector);

View file

@ -2,16 +2,16 @@ namespace ChocolArm64.Translation
{
struct ILOpCodeLog : IILEmit
{
private string _text;
public string Text { get; }
public ILOpCodeLog(string text)
{
_text = text;
Text = text;
}
public void Emit(ILMethodBuilder context)
{
context.Generator.EmitWriteLine(_text);
context.Generator.EmitWriteLine(Text);
}
}
}

View file

@ -5,28 +5,28 @@ namespace ChocolArm64.Translation
{
struct ILOpCodeStore : IILEmit
{
public int Index { get; private set; }
public int Index { get; }
public IoType IoType { get; private set; }
public VarType VarType { get; }
public RegisterSize RegisterSize { get; private set; }
public RegisterSize RegisterSize { get; }
public ILOpCodeStore(int index, IoType ioType, RegisterSize registerSize = 0)
public ILOpCodeStore(int index, VarType varType, RegisterSize registerSize = 0)
{
Index = index;
IoType = ioType;
VarType = varType;
RegisterSize = registerSize;
}
public void Emit(ILMethodBuilder context)
{
switch (IoType)
switch (VarType)
{
case IoType.Arg: context.Generator.EmitStarg(Index); break;
case VarType.Arg: context.Generator.EmitStarg(Index); break;
case IoType.Flag: EmitStloc(context, Index, RegisterType.Flag); break;
case IoType.Int: EmitStloc(context, Index, RegisterType.Int); break;
case IoType.Vector: EmitStloc(context, Index, RegisterType.Vector); break;
case VarType.Flag: EmitStloc(context, Index, RegisterType.Flag); break;
case VarType.Int: EmitStloc(context, Index, RegisterType.Int); break;
case VarType.Vector: EmitStloc(context, Index, RegisterType.Vector); break;
}
}

View file

@ -7,15 +7,33 @@ namespace ChocolArm64.Translation
{
private ILBlock _block;
public ILOpCodeStoreState(ILBlock block)
private TranslatedSub _callSub;
public ILOpCodeStoreState(ILBlock block, TranslatedSub callSub = null)
{
_block = block;
_block = block;
_callSub = callSub;
}
public void Emit(ILMethodBuilder context)
{
long intOutputs = context.LocalAlloc.GetIntOutputs(_block);
long vecOutputs = context.LocalAlloc.GetVecOutputs(_block);
long intOutputs = context.RegUsage.GetIntOutputs(_block);
long vecOutputs = context.RegUsage.GetVecOutputs(_block);
if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete)
{
intOutputs = RegisterUsage.ClearCallerSavedIntRegs(intOutputs, context.IsAarch64);
vecOutputs = RegisterUsage.ClearCallerSavedVecRegs(vecOutputs, context.IsAarch64);
}
if (_callSub != null)
{
//Those register are assigned on the callee function, without
//reading it's value first. We don't need to write them because
//they are not going to be read on the callee.
intOutputs &= ~_callSub.IntNiRegsMask;
vecOutputs &= ~_callSub.VecNiRegsMask;
}
StoreLocals(context, intOutputs, RegisterType.Int);
StoreLocals(context, vecOutputs, RegisterType.Vector);

View file

@ -3,8 +3,13 @@ using System.Collections.Generic;
namespace ChocolArm64.Translation
{
class LocalAlloc
class RegisterUsage
{
public const long CallerSavedIntRegistersMask = 0x7fL << 9;
public const long PStateNzcvFlagsMask = 0xfL << 60;
public const long CallerSavedVecRegistersMask = 0xffffL << 16;
private class PathIo
{
private Dictionary<ILBlock, long> _allInputs;
@ -18,31 +23,30 @@ namespace ChocolArm64.Translation
_cmnOutputs = new Dictionary<ILBlock, long>();
}
public PathIo(ILBlock root, long inputs, long outputs) : this()
public void Set(ILBlock entry, long inputs, long outputs)
{
Set(root, inputs, outputs);
}
public void Set(ILBlock root, long inputs, long outputs)
{
if (!_allInputs.TryAdd(root, inputs))
if (!_allInputs.TryAdd(entry, inputs))
{
_allInputs[root] |= inputs;
_allInputs[entry] |= inputs;
}
if (!_cmnOutputs.TryAdd(root, outputs))
if (!_cmnOutputs.TryAdd(entry, outputs))
{
_cmnOutputs[root] &= outputs;
_cmnOutputs[entry] &= outputs;
}
_allOutputs |= outputs;
}
public long GetInputs(ILBlock root)
public long GetInputs(ILBlock entry)
{
if (_allInputs.TryGetValue(root, out long inputs))
if (_allInputs.TryGetValue(entry, out long inputs))
{
return inputs | (_allOutputs & ~_cmnOutputs[root]);
//We also need to read the registers that may not be written
//by all paths that can reach a exit point, to ensure that
//the local variable will not remain uninitialized depending
//on the flow path taken.
return inputs | (_allOutputs & ~_cmnOutputs[entry]);
}
return 0;
@ -57,15 +61,38 @@ namespace ChocolArm64.Translation
private Dictionary<ILBlock, PathIo> _intPaths;
private Dictionary<ILBlock, PathIo> _vecPaths;
private struct BlockIo
private struct BlockIo : IEquatable<BlockIo>
{
public ILBlock Block;
public ILBlock Entry;
public ILBlock Block { get; }
public ILBlock Entry { get; }
public long IntInputs;
public long VecInputs;
public long IntOutputs;
public long VecOutputs;
public long IntInputs { get; set; }
public long VecInputs { get; set; }
public long IntOutputs { get; set; }
public long VecOutputs { get; set; }
public BlockIo(ILBlock block, ILBlock entry)
{
Block = block;
Entry = entry;
IntInputs = IntOutputs = 0;
VecInputs = VecOutputs = 0;
}
public BlockIo(
ILBlock block,
ILBlock entry,
long intInputs,
long vecInputs,
long intOutputs,
long vecOutputs) : this(block, entry)
{
IntInputs = intInputs;
VecInputs = vecInputs;
IntOutputs = intOutputs;
VecOutputs = vecOutputs;
}
public override bool Equals(object obj)
{
@ -74,6 +101,11 @@ namespace ChocolArm64.Translation
return false;
}
return Equals(other);
}
public bool Equals(BlockIo other)
{
return other.Block == Block &&
other.Entry == Entry &&
other.IntInputs == IntInputs &&
@ -98,25 +130,13 @@ namespace ChocolArm64.Translation
}
}
private const int MaxOptGraphLength = 40;
public LocalAlloc(ILBlock[] graph, ILBlock entry)
public RegisterUsage()
{
_intPaths = new Dictionary<ILBlock, PathIo>();
_vecPaths = new Dictionary<ILBlock, PathIo>();
if (graph.Length > 1 &&
graph.Length < MaxOptGraphLength)
{
InitializeOptimal(graph, entry);
}
else
{
InitializeFast(graph);
}
}
private void InitializeOptimal(ILBlock[] graph, ILBlock entry)
public void BuildUses(ILBlock entry)
{
//This will go through all possible paths on the graph,
//and store all inputs/outputs for each block. A register
@ -124,7 +144,7 @@ namespace ChocolArm64.Translation
//When a block can be reached by more than one path, then the
//output from all paths needs to be set for this block, and
//only outputs present in all of the parent blocks can be considered
//when doing input elimination. Each block chain have a entry, that's where
//when doing input elimination. Each block chain has a entry, that's where
//the code starts executing. They are present on the subroutine start point,
//and on call return points too (address written to X30 by BL).
HashSet<BlockIo> visited = new HashSet<BlockIo>();
@ -133,19 +153,13 @@ namespace ChocolArm64.Translation
void Enqueue(BlockIo block)
{
if (!visited.Contains(block))
if (visited.Add(block))
{
unvisited.Enqueue(block);
visited.Add(block);
}
}
Enqueue(new BlockIo()
{
Block = entry,
Entry = entry
});
Enqueue(new BlockIo(entry, entry));
while (unvisited.Count > 0)
{
@ -177,19 +191,21 @@ namespace ChocolArm64.Translation
void EnqueueFromCurrent(ILBlock block, bool retTarget)
{
BlockIo blockIo = new BlockIo() { Block = block };
BlockIo blockIo;
if (retTarget)
{
blockIo.Entry = block;
blockIo = new BlockIo(block, block);
}
else
{
blockIo.Entry = current.Entry;
blockIo.IntInputs = current.IntInputs;
blockIo.VecInputs = current.VecInputs;
blockIo.IntOutputs = current.IntOutputs;
blockIo.VecOutputs = current.VecOutputs;
blockIo = new BlockIo(
block,
current.Entry,
current.IntInputs,
current.VecInputs,
current.IntOutputs,
current.VecOutputs);
}
Enqueue(blockIo);
@ -207,54 +223,63 @@ namespace ChocolArm64.Translation
}
}
private void InitializeFast(ILBlock[] graph)
{
//This is WAY faster than InitializeOptimal, but results in
//unneeded loads and stores, so the resulting code will be slower.
long intInputs = 0, intOutputs = 0;
long vecInputs = 0, vecOutputs = 0;
public long GetIntInputs(ILBlock entry) => GetInputsImpl(entry, _intPaths.Values);
public long GetVecInputs(ILBlock entry) => GetInputsImpl(entry, _vecPaths.Values);
foreach (ILBlock block in graph)
{
intInputs |= block.IntInputs;
intOutputs |= block.IntOutputs;
vecInputs |= block.VecInputs;
vecOutputs |= block.VecOutputs;
}
//It's possible that not all code paths writes to those output registers,
//in those cases if we attempt to write an output registers that was
//not written, we will be just writing zero and messing up the old register value.
//So we just need to ensure that all outputs are loaded.
if (graph.Length > 1)
{
intInputs |= intOutputs;
vecInputs |= vecOutputs;
}
foreach (ILBlock block in graph)
{
_intPaths.Add(block, new PathIo(block, intInputs, intOutputs));
_vecPaths.Add(block, new PathIo(block, vecInputs, vecOutputs));
}
}
public long GetIntInputs(ILBlock root) => GetInputsImpl(root, _intPaths.Values);
public long GetVecInputs(ILBlock root) => GetInputsImpl(root, _vecPaths.Values);
private long GetInputsImpl(ILBlock root, IEnumerable<PathIo> values)
private long GetInputsImpl(ILBlock entry, IEnumerable<PathIo> values)
{
long inputs = 0;
foreach (PathIo path in values)
{
inputs |= path.GetInputs(root);
inputs |= path.GetInputs(entry);
}
return inputs;
}
public long GetIntNotInputs(ILBlock entry) => GetNotInputsImpl(entry, _intPaths.Values);
public long GetVecNotInputs(ILBlock entry) => GetNotInputsImpl(entry, _vecPaths.Values);
private long GetNotInputsImpl(ILBlock entry, IEnumerable<PathIo> values)
{
//Returns a mask with registers that are written to
//before being read. Only those registers that are
//written in all paths, and is not read before being
//written to on those paths, should be set on the mask.
long mask = -1L;
foreach (PathIo path in values)
{
mask &= path.GetOutputs() & ~path.GetInputs(entry);
}
return mask;
}
public long GetIntOutputs(ILBlock block) => _intPaths[block].GetOutputs();
public long GetVecOutputs(ILBlock block) => _vecPaths[block].GetOutputs();
public static long ClearCallerSavedIntRegs(long mask, bool isAarch64)
{
//TODO: ARM32 support.
if (isAarch64)
{
mask &= ~(CallerSavedIntRegistersMask | PStateNzcvFlagsMask);
}
return mask;
}
public static long ClearCallerSavedVecRegs(long mask, bool isAarch64)
{
//TODO: ARM32 support.
if (isAarch64)
{
mask &= ~CallerSavedVecRegistersMask;
}
return mask;
}
}
}

View file

@ -10,21 +10,41 @@ namespace ChocolArm64.Translation
class TranslatedSub
{
//This is the minimum amount of calls needed for the method
//to be retranslated with higher quality code. It's only worth
//doing that for hot code.
private const int MinCallCountForOpt = 30;
public ArmSubroutine Delegate { get; private set; }
public static int StateArgIdx { get; private set; }
public static int MemoryArgIdx { get; private set; }
public static int StateArgIdx { get; }
public static int MemoryArgIdx { get; }
public static Type[] FixedArgTypes { get; private set; }
public static Type[] FixedArgTypes { get; }
public DynamicMethod Method { get; private set; }
public DynamicMethod Method { get; }
public TranslationTier Tier { get; private set; }
public TranslationTier Tier { get; }
public TranslatedSub(DynamicMethod method, TranslationTier tier)
public long IntNiRegsMask { get; }
public long VecNiRegsMask { get; }
private bool _isWorthOptimizing;
private int _callCount;
public TranslatedSub(
DynamicMethod method,
long intNiRegsMask,
long vecNiRegsMask,
TranslationTier tier,
bool isWorthOptimizing)
{
Method = method ?? throw new ArgumentNullException(nameof(method));;
Tier = tier;
Method = method ?? throw new ArgumentNullException(nameof(method));;
IntNiRegsMask = intNiRegsMask;
VecNiRegsMask = vecNiRegsMask;
_isWorthOptimizing = isWorthOptimizing;
Tier = tier;
}
static TranslatedSub()
@ -61,5 +81,24 @@ namespace ChocolArm64.Translation
{
return Delegate(threadState, memory);
}
public bool IsWorthOptimizing()
{
if (!_isWorthOptimizing)
{
return false;
}
if (_callCount++ < MinCallCountForOpt)
{
return false;
}
//Only return true once, so that it is
//added to the queue only once.
_isWorthOptimizing = false;
return true;
}
}
}

View file

@ -63,48 +63,36 @@ namespace ChocolArm64.Translation
CpuTrace?.Invoke(this, new CpuTraceEventArgs(position));
}
TranslatedSub subroutine = GetOrTranslateSubroutine(state, position);
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
{
sub = TranslateLowCq(position, state.GetExecutionMode());
}
position = subroutine.Execute(state, _memory);
position = sub.Execute(state, _memory);
}
while (position != 0 && state.Running);
state.CurrentTranslator = null;
}
internal void TranslateVirtualSubroutine(CpuThreadState state, long position)
{
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub) || sub.Tier == TranslationTier.Tier0)
{
_queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1));
}
}
internal ArmSubroutine GetOrTranslateVirtualSubroutine(CpuThreadState state, long position)
internal ArmSubroutine GetOrTranslateSubroutine(CpuThreadState state, long position, CallType cs)
{
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
{
sub = TranslateLowCq(position, state.GetExecutionMode());
}
if (sub.Tier == TranslationTier.Tier0)
if (sub.IsWorthOptimizing())
{
_queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1));
bool isComplete = cs == CallType.Call ||
cs == CallType.VirtualCall;
_queue.Enqueue(position, state.GetExecutionMode(), TranslationTier.Tier1, isComplete);
}
return sub.Delegate;
}
internal TranslatedSub GetOrTranslateSubroutine(CpuThreadState state, long position)
{
if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine))
{
subroutine = TranslateLowCq(position, state.GetExecutionMode());
}
return subroutine;
}
private void TranslateQueuedSubs()
{
while (_threadCount != 0)
@ -124,7 +112,7 @@ namespace ChocolArm64.Translation
}
else
{
TranslateHighCq(item.Position, item.Mode);
TranslateHighCq(item.Position, item.Mode, item.IsComplete);
}
}
else
@ -138,30 +126,36 @@ namespace ChocolArm64.Translation
{
Block block = Decoder.DecodeBasicBlock(_memory, position, mode);
ILEmitterCtx context = new ILEmitterCtx(_cache, _queue, TranslationTier.Tier0, block);
ILEmitterCtx context = new ILEmitterCtx(_memory, _cache, _queue, TranslationTier.Tier0, block);
string subName = GetSubroutineName(position);
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName);
bool isAarch64 = mode == ExecutionMode.Aarch64;
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0);
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName, isAarch64);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0, isWorthOptimizing: true);
return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count);
}
private void TranslateHighCq(long position, ExecutionMode mode)
private TranslatedSub TranslateHighCq(long position, ExecutionMode mode, bool isComplete)
{
Block graph = Decoder.DecodeSubroutine(_memory, position, mode);
ILEmitterCtx context = new ILEmitterCtx(_cache, _queue, TranslationTier.Tier1, graph);
ILEmitterCtx context = new ILEmitterCtx(_memory, _cache, _queue, TranslationTier.Tier1, graph);
ILBlock[] ilBlocks = context.GetILBlocks();
string subName = GetSubroutineName(position);
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName);
bool isAarch64 = mode == ExecutionMode.Aarch64;
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1);
isComplete &= !context.HasIndirectJump;
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName, isAarch64, isComplete);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1, context.HasSlowCall);
int ilOpCount = 0;
@ -170,9 +164,11 @@ namespace ChocolArm64.Translation
ilOpCount += ilBlock.Count;
}
ForceAheadOfTimeCompilation(subroutine);
_cache.AddOrUpdate(position, subroutine, ilOpCount);
ForceAheadOfTimeCompilation(subroutine);
return subroutine;
}
private string GetSubroutineName(long position)

View file

@ -1,3 +1,4 @@
using ChocolArm64.State;
using System.Collections.Concurrent;
using System.Threading;
@ -5,10 +6,6 @@ namespace ChocolArm64.Translation
{
class TranslatorQueue
{
//This is the maximum number of functions to be translated that the queue can hold.
//The value may need some tuning to find the sweet spot.
private const int MaxQueueSize = 1024;
private ConcurrentStack<TranslatorQueueItem>[] _translationQueue;
private ManualResetEvent _queueDataReceivedEvent;
@ -27,14 +24,11 @@ namespace ChocolArm64.Translation
_queueDataReceivedEvent = new ManualResetEvent(false);
}
public void Enqueue(TranslatorQueueItem item)
public void Enqueue(long position, ExecutionMode mode, TranslationTier tier, bool isComplete)
{
ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)item.Tier];
TranslatorQueueItem item = new TranslatorQueueItem(position, mode, tier, isComplete);
if (queue.Count >= MaxQueueSize)
{
queue.TryPop(out _);
}
ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)tier];
queue.Push(item);

View file

@ -10,11 +10,18 @@ namespace ChocolArm64.Translation
public TranslationTier Tier { get; }
public TranslatorQueueItem(long position, ExecutionMode mode, TranslationTier tier)
public bool IsComplete { get; }
public TranslatorQueueItem(
long position,
ExecutionMode mode,
TranslationTier tier,
bool isComplete = false)
{
Position = position;
Mode = mode;
Tier = tier;
Position = position;
Mode = mode;
Tier = tier;
IsComplete = isComplete;
}
}
}

View file

@ -1,6 +1,6 @@
namespace ChocolArm64.Translation
{
enum IoType
enum VarType
{
Arg,
Flag,

View file

@ -34,6 +34,11 @@ namespace Ryujinx.Common
return value & -(long)size;
}
public static int DivRoundUp(int value, int dividend)
{
return (value + dividend - 1) / dividend;
}
public static ulong DivRoundUp(ulong value, uint dividend)
{
return (value + dividend - 1) / dividend;
@ -44,6 +49,24 @@ namespace Ryujinx.Common
return (value + dividend - 1) / dividend;
}
public static int Pow2RoundUp(int value)
{
value--;
value |= (value >> 1);
value |= (value >> 2);
value |= (value >> 4);
value |= (value >> 8);
value |= (value >> 16);
return ++value;
}
public static int Pow2RoundDown(int value)
{
return IsPowerOfTwo32(value) ? value : Pow2RoundUp(value) >> 1;
}
public static bool IsPowerOfTwo32(int value)
{
return value != 0 && (value & (value - 1)) == 0;
@ -85,6 +108,18 @@ namespace Ryujinx.Common
return (ulong)count;
}
public static int CountTrailingZeros32(int value)
{
int count = 0;
while (((value >> count) & 1) == 0)
{
count++;
}
return count;
}
public static long ReverseBits64(long value)
{
return (long)ReverseBits64((ulong)value);

View file

@ -0,0 +1,14 @@
namespace Ryujinx.Graphics
{
public enum DepthCompareFunc
{
Never = 0,
Less = 1,
Equal = 2,
LEqual = 3,
Greater = 4,
NotEqual = 5,
GEqual = 6,
Always = 7
}
}

View file

@ -6,9 +6,15 @@ namespace Ryujinx.Graphics.Gal
{
public int Width;
public int Height;
// FIXME: separate layer and depth
public int Depth;
public int LayerCount;
public int TileWidth;
public int GobBlockHeight;
public int GobBlockDepth;
public int Pitch;
public int MaxMipmapLevel;
public GalImageFormat Format;
public GalMemoryLayout Layout;
@ -16,34 +22,45 @@ namespace Ryujinx.Graphics.Gal
public GalTextureSource YSource;
public GalTextureSource ZSource;
public GalTextureSource WSource;
public GalTextureTarget TextureTarget;
public GalImage(
int Width,
int Height,
int Depth,
int LayerCount,
int TileWidth,
int GobBlockHeight,
int GobBlockDepth,
GalMemoryLayout Layout,
GalImageFormat Format,
GalTextureSource XSource = GalTextureSource.Red,
GalTextureSource YSource = GalTextureSource.Green,
GalTextureSource ZSource = GalTextureSource.Blue,
GalTextureSource WSource = GalTextureSource.Alpha)
GalTextureTarget TextureTarget,
int MaxMipmapLevel = 1,
GalTextureSource XSource = GalTextureSource.Red,
GalTextureSource YSource = GalTextureSource.Green,
GalTextureSource ZSource = GalTextureSource.Blue,
GalTextureSource WSource = GalTextureSource.Alpha)
{
this.Width = Width;
this.Height = Height;
this.LayerCount = LayerCount;
this.Depth = Depth;
this.TileWidth = TileWidth;
this.GobBlockHeight = GobBlockHeight;
this.GobBlockDepth = GobBlockDepth;
this.Layout = Layout;
this.Format = Format;
this.MaxMipmapLevel = MaxMipmapLevel;
this.XSource = XSource;
this.YSource = YSource;
this.ZSource = ZSource;
this.WSource = WSource;
this.TextureTarget = TextureTarget;
Pitch = ImageUtils.GetPitch(Format, Width);
}
public bool SizeMatches(GalImage Image)
public bool SizeMatches(GalImage Image, bool IgnoreLayer = false)
{
if (ImageUtils.GetBytesPerPixel(Format) !=
ImageUtils.GetBytesPerPixel(Image.Format))
@ -57,7 +74,14 @@ namespace Ryujinx.Graphics.Gal
return false;
}
return Height == Image.Height;
bool Result = Height == Image.Height && Depth == Image.Depth;
if (!IgnoreLayer)
{
Result = Result && LayerCount == Image.LayerCount;
}
return Result;
}
}
}

View file

@ -12,6 +12,9 @@ namespace Ryujinx.Graphics.Gal
public GalColorF BorderColor { get; private set; }
public bool DepthCompare { get; private set; }
public DepthCompareFunc DepthCompareFunc { get; private set; }
public GalTextureSampler(
GalTextureWrap AddressU,
GalTextureWrap AddressV,
@ -19,7 +22,9 @@ namespace Ryujinx.Graphics.Gal
GalTextureFilter MinFilter,
GalTextureFilter MagFilter,
GalTextureMipFilter MipFilter,
GalColorF BorderColor)
GalColorF BorderColor,
bool DepthCompare,
DepthCompareFunc DepthCompareFunc)
{
this.AddressU = AddressU;
this.AddressV = AddressV;
@ -28,6 +33,9 @@ namespace Ryujinx.Graphics.Gal
this.MagFilter = MagFilter;
this.MipFilter = MipFilter;
this.BorderColor = BorderColor;
this.DepthCompare = DepthCompare;
this.DepthCompareFunc = DepthCompareFunc;
}
}
}

View file

@ -0,0 +1,15 @@
namespace Ryujinx.Graphics.Gal
{
public enum GalTextureTarget
{
OneD = 0,
TwoD = 1,
ThreeD = 2,
CubeMap = 3,
OneDArray = 4,
TwoDArray = 5,
OneDBuffer = 6,
TwoDNoMipMap = 7,
CubeArray = 8,
}
}

View file

@ -25,16 +25,20 @@ namespace Ryujinx.Graphics.Gal
void Render();
void Copy(
long SrcKey,
long DstKey,
int SrcX0,
int SrcY0,
int SrcX1,
int SrcY1,
int DstX0,
int DstY0,
int DstX1,
int DstY1);
GalImage SrcImage,
GalImage DstImage,
long SrcKey,
long DstKey,
int SrcLayer,
int DstLayer,
int SrcX0,
int SrcY0,
int SrcX1,
int SrcY1,
int DstX0,
int DstY0,
int DstX1,
int DstY1);
void Reinterpret(long Key, GalImage NewImage);
}

View file

@ -13,6 +13,6 @@ namespace Ryujinx.Graphics.Gal
void Bind(long Key, int Index, GalImage Image);
void SetSampler(GalTextureSampler Sampler);
void SetSampler(GalImage Image, GalTextureSampler Sampler);
}
}

View file

@ -8,6 +8,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL
public int Width => Image.Width;
public int Height => Image.Height;
public int Depth => Image.Depth;
public GalImageFormat Format => Image.Format;

View file

@ -189,6 +189,31 @@ namespace Ryujinx.Graphics.Gal.OpenGL
throw new NotImplementedException($"{Format & GalImageFormat.FormatMask} {Format & GalImageFormat.TypeMask}");
}
public static All GetDepthCompareFunc(DepthCompareFunc DepthCompareFunc)
{
switch (DepthCompareFunc)
{
case DepthCompareFunc.LEqual:
return All.Lequal;
case DepthCompareFunc.GEqual:
return All.Gequal;
case DepthCompareFunc.Less:
return All.Less;
case DepthCompareFunc.Greater:
return All.Greater;
case DepthCompareFunc.Equal:
return All.Equal;
case DepthCompareFunc.NotEqual:
return All.Notequal;
case DepthCompareFunc.Always:
return All.Always;
case DepthCompareFunc.Never:
return All.Never;
default:
throw new ArgumentException(nameof(DepthCompareFunc) + " \"" + DepthCompareFunc + "\" is not valid!");
}
}
public static InternalFormat GetCompressedImageFormat(GalImageFormat Format)
{
switch (Format)

View file

@ -4,17 +4,21 @@ using System;
namespace Ryujinx.Graphics.Gal.OpenGL
{
public static class OGLExtension
static class OGLExtension
{
// Private lazy backing variables
private static Lazy<bool> s_EnhancedLayouts = new Lazy<bool>(() => HasExtension("GL_ARB_enhanced_layouts"));
private static Lazy<bool> s_TextureMirrorClamp = new Lazy<bool>(() => HasExtension("GL_EXT_texture_mirror_clamp"));
private static Lazy<bool> s_ViewportArray = new Lazy<bool>(() => HasExtension("GL_ARB_viewport_array"));
private static Lazy<bool> s_NvidiaDriver = new Lazy<bool>(() => IsNvidiaDriver());
// Public accessors
public static bool EnhancedLayouts => s_EnhancedLayouts.Value;
public static bool TextureMirrorClamp => s_TextureMirrorClamp.Value;
public static bool ViewportArray => s_ViewportArray.Value;
public static bool NvidiaDrvier => s_NvidiaDriver.Value;
private static bool HasExtension(string Name)
{
@ -32,6 +36,11 @@ namespace Ryujinx.Graphics.Gal.OpenGL
return false;
}
private static bool IsNvidiaDriver()
{
return GL.GetString(StringName.Vendor).Equals("NVIDIA Corporation");
}
public static class Required
{

View file

@ -386,16 +386,20 @@ namespace Ryujinx.Graphics.Gal.OpenGL
}
public void Copy(
long SrcKey,
long DstKey,
int SrcX0,
int SrcY0,
int SrcX1,
int SrcY1,
int DstX0,
int DstY0,
int DstX1,
int DstY1)
GalImage SrcImage,
GalImage DstImage,
long SrcKey,
long DstKey,
int SrcLayer,
int DstLayer,
int SrcX0,
int SrcY0,
int SrcX1,
int SrcY1,
int DstX0,
int DstY0,
int DstX1,
int DstY1)
{
if (Texture.TryGetImageHandler(SrcKey, out ImageHandler SrcTex) &&
Texture.TryGetImageHandler(DstKey, out ImageHandler DstTex))
@ -422,8 +426,24 @@ namespace Ryujinx.Graphics.Gal.OpenGL
FramebufferAttachment Attachment = GetAttachment(SrcTex);
GL.FramebufferTexture(FramebufferTarget.ReadFramebuffer, Attachment, SrcTex.Handle, 0);
GL.FramebufferTexture(FramebufferTarget.DrawFramebuffer, Attachment, DstTex.Handle, 0);
if (ImageUtils.IsArray(SrcImage.TextureTarget) && SrcLayer > 0)
{
GL.FramebufferTextureLayer(FramebufferTarget.ReadFramebuffer, Attachment, SrcTex.Handle, 0, SrcLayer);
}
else
{
GL.FramebufferTexture(FramebufferTarget.ReadFramebuffer, Attachment, SrcTex.Handle, 0);
}
if (ImageUtils.IsArray(DstImage.TextureTarget) && DstLayer > 0)
{
GL.FramebufferTextureLayer(FramebufferTarget.DrawFramebuffer, Attachment, DstTex.Handle, 0, DstLayer);
}
else
{
GL.FramebufferTexture(FramebufferTarget.DrawFramebuffer, Attachment, DstTex.Handle, 0);
}
BlitFramebufferFilter Filter = BlitFramebufferFilter.Nearest;
@ -449,7 +469,10 @@ namespace Ryujinx.Graphics.Gal.OpenGL
if (NewImage.Format == OldImage.Format &&
NewImage.Width == OldImage.Width &&
NewImage.Height == OldImage.Height)
NewImage.Height == OldImage.Height &&
NewImage.Depth == OldImage.Depth &&
NewImage.LayerCount == OldImage.LayerCount &&
NewImage.TextureTarget == OldImage.TextureTarget)
{
return;
}
@ -474,9 +497,11 @@ namespace Ryujinx.Graphics.Gal.OpenGL
(_, PixelFormat Format, PixelType Type) = OGLEnumConverter.GetImageFormat(CachedImage.Format);
GL.BindTexture(TextureTarget.Texture2D, CachedImage.Handle);
TextureTarget Target = ImageUtils.GetTextureTarget(NewImage.TextureTarget);
GL.GetTexImage(TextureTarget.Texture2D, 0, Format, Type, IntPtr.Zero);
GL.BindTexture(Target, CachedImage.Handle);
GL.GetTexImage(Target, 0, Format, Type, IntPtr.Zero);
GL.BindBuffer(BufferTarget.PixelPackBuffer, 0);
GL.BindBuffer(BufferTarget.PixelUnpackBuffer, CopyPBO);

View file

@ -53,7 +53,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL
{
GlslProgram Program;
GlslDecompiler Decompiler = new GlslDecompiler(OGLLimit.MaxUboSize);
GlslDecompiler Decompiler = new GlslDecompiler(OGLLimit.MaxUboSize, OGLExtension.NvidiaDrvier);
int ShaderDumpIndex = ShaderDumper.DumpIndex;

View file

@ -38,7 +38,9 @@ namespace Ryujinx.Graphics.Gal.OpenGL
{
int Handle = GL.GenTexture();
GL.BindTexture(TextureTarget.Texture2D, Handle);
TextureTarget Target = ImageUtils.GetTextureTarget(Image.TextureTarget);
GL.BindTexture(Target, Handle);
const int Level = 0; //TODO: Support mipmap textures.
const int Border = 0;
@ -54,23 +56,70 @@ namespace Ryujinx.Graphics.Gal.OpenGL
PixelFormat Format,
PixelType Type) = OGLEnumConverter.GetImageFormat(Image.Format);
GL.TexImage2D(
TextureTarget.Texture2D,
Level,
InternalFmt,
Image.Width,
Image.Height,
Border,
Format,
Type,
IntPtr.Zero);
switch (Target)
{
case TextureTarget.Texture1D:
GL.TexImage1D(
Target,
Level,
InternalFmt,
Image.Width,
Border,
Format,
Type,
IntPtr.Zero);
break;
case TextureTarget.Texture2D:
GL.TexImage2D(
Target,
Level,
InternalFmt,
Image.Width,
Image.Height,
Border,
Format,
Type,
IntPtr.Zero);
break;
case TextureTarget.Texture3D:
GL.TexImage3D(
Target,
Level,
InternalFmt,
Image.Width,
Image.Height,
Image.Depth,
Border,
Format,
Type,
IntPtr.Zero);
break;
case TextureTarget.Texture2DArray:
GL.TexImage3D(
Target,
Level,
InternalFmt,
Image.Width,
Image.Height,
Image.LayerCount,
Border,
Format,
Type,
IntPtr.Zero);
break;
default:
throw new NotImplementedException($"Unsupported texture target type: {Target}");
}
}
public void Create(long Key, byte[] Data, GalImage Image)
{
int Handle = GL.GenTexture();
GL.BindTexture(TextureTarget.Texture2D, Handle);
TextureTarget Target = ImageUtils.GetTextureTarget(Image.TextureTarget);
GL.BindTexture(Target, Handle);
const int Level = 0; //TODO: Support mipmap textures.
const int Border = 0;
@ -81,15 +130,56 @@ namespace Ryujinx.Graphics.Gal.OpenGL
{
InternalFormat InternalFmt = OGLEnumConverter.GetCompressedImageFormat(Image.Format);
GL.CompressedTexImage2D(
TextureTarget.Texture2D,
Level,
InternalFmt,
Image.Width,
Image.Height,
Border,
Data.Length,
Data);
switch (Target)
{
case TextureTarget.Texture1D:
GL.CompressedTexImage1D(
Target,
Level,
InternalFmt,
Image.Width,
Border,
Data.Length,
Data);
break;
case TextureTarget.Texture2D:
GL.CompressedTexImage2D(
Target,
Level,
InternalFmt,
Image.Width,
Image.Height,
Border,
Data.Length,
Data);
break;
case TextureTarget.Texture3D:
GL.CompressedTexImage3D(
Target,
Level,
InternalFmt,
Image.Width,
Image.Height,
Image.Depth,
Border,
Data.Length,
Data);
break;
case TextureTarget.Texture2DArray:
GL.CompressedTexImage3D(
Target,
Level,
InternalFmt,
Image.Width,
Image.Height,
Image.LayerCount,
Border,
Data.Length,
Data);
break;
default:
throw new NotImplementedException($"Unsupported texture target type: {Target}");
}
}
else
{
@ -98,13 +188,16 @@ namespace Ryujinx.Graphics.Gal.OpenGL
{
int TextureBlockWidth = ImageUtils.GetBlockWidth(Image.Format);
int TextureBlockHeight = ImageUtils.GetBlockHeight(Image.Format);
int TextureBlockDepth = ImageUtils.GetBlockDepth(Image.Format);
Data = ASTCDecoder.DecodeToRGBA8888(
Data,
TextureBlockWidth,
TextureBlockHeight, 1,
TextureBlockHeight,
TextureBlockDepth,
Image.Width,
Image.Height, 1);
Image.Height,
Image.Depth);
Image.Format = GalImageFormat.RGBA8 | (Image.Format & GalImageFormat.TypeMask);
}
@ -113,16 +206,80 @@ namespace Ryujinx.Graphics.Gal.OpenGL
PixelFormat Format,
PixelType Type) = OGLEnumConverter.GetImageFormat(Image.Format);
GL.TexImage2D(
TextureTarget.Texture2D,
Level,
InternalFmt,
Image.Width,
Image.Height,
Border,
Format,
Type,
Data);
switch (Target)
{
case TextureTarget.Texture1D:
GL.TexImage1D(
Target,
Level,
InternalFmt,
Image.Width,
Border,
Format,
Type,
Data);
break;
case TextureTarget.Texture2D:
GL.TexImage2D(
Target,
Level,
InternalFmt,
Image.Width,
Image.Height,
Border,
Format,
Type,
Data);
break;
case TextureTarget.Texture3D:
GL.TexImage3D(
Target,
Level,
InternalFmt,
Image.Width,
Image.Height,
Image.Depth,
Border,
Format,
Type,
Data);
break;
case TextureTarget.Texture2DArray:
GL.TexImage3D(
Target,
Level,
InternalFmt,
Image.Width,
Image.Height,
Image.LayerCount,
Border,
Format,
Type,
Data);
break;
case TextureTarget.TextureCubeMap:
Span<byte> Array = new Span<byte>(Data);
int FaceSize = ImageUtils.GetSize(Image) / 6;
for (int Face = 0; Face < 6; Face++)
{
GL.TexImage2D(
TextureTarget.TextureCubeMapPositiveX + Face,
Level,
InternalFmt,
Image.Width,
Image.Height,
Border,
Format,
Type,
Array.Slice(Face * FaceSize, FaceSize).ToArray());
}
break;
default:
throw new NotImplementedException($"Unsupported texture target type: {Target}");
}
}
}
@ -165,7 +322,9 @@ namespace Ryujinx.Graphics.Gal.OpenGL
{
GL.ActiveTexture(TextureUnit.Texture0 + Index);
GL.BindTexture(TextureTarget.Texture2D, CachedImage.Handle);
TextureTarget Target = ImageUtils.GetTextureTarget(Image.TextureTarget);
GL.BindTexture(Target, CachedImage.Handle);
int[] SwizzleRgba = new int[]
{
@ -175,23 +334,27 @@ namespace Ryujinx.Graphics.Gal.OpenGL
(int)OGLEnumConverter.GetTextureSwizzle(Image.WSource)
};
GL.TexParameter(TextureTarget.Texture2D, TextureParameterName.TextureSwizzleRgba, SwizzleRgba);
GL.TexParameter(Target, TextureParameterName.TextureSwizzleRgba, SwizzleRgba);
}
}
public void SetSampler(GalTextureSampler Sampler)
public void SetSampler(GalImage Image, GalTextureSampler Sampler)
{
int WrapS = (int)OGLEnumConverter.GetTextureWrapMode(Sampler.AddressU);
int WrapT = (int)OGLEnumConverter.GetTextureWrapMode(Sampler.AddressV);
int WrapR = (int)OGLEnumConverter.GetTextureWrapMode(Sampler.AddressP);
int MinFilter = (int)OGLEnumConverter.GetTextureMinFilter(Sampler.MinFilter, Sampler.MipFilter);
int MagFilter = (int)OGLEnumConverter.GetTextureMagFilter(Sampler.MagFilter);
GL.TexParameter(TextureTarget.Texture2D, TextureParameterName.TextureWrapS, WrapS);
GL.TexParameter(TextureTarget.Texture2D, TextureParameterName.TextureWrapT, WrapT);
TextureTarget Target = ImageUtils.GetTextureTarget(Image.TextureTarget);
GL.TexParameter(TextureTarget.Texture2D, TextureParameterName.TextureMinFilter, MinFilter);
GL.TexParameter(TextureTarget.Texture2D, TextureParameterName.TextureMagFilter, MagFilter);
GL.TexParameter(Target, TextureParameterName.TextureWrapS, WrapS);
GL.TexParameter(Target, TextureParameterName.TextureWrapT, WrapT);
GL.TexParameter(Target, TextureParameterName.TextureWrapR, WrapR);
GL.TexParameter(Target, TextureParameterName.TextureMinFilter, MinFilter);
GL.TexParameter(Target, TextureParameterName.TextureMagFilter, MagFilter);
float[] Color = new float[]
{
@ -201,7 +364,18 @@ namespace Ryujinx.Graphics.Gal.OpenGL
Sampler.BorderColor.Alpha
};
GL.TexParameter(TextureTarget.Texture2D, TextureParameterName.TextureBorderColor, Color);
GL.TexParameter(Target, TextureParameterName.TextureBorderColor, Color);
if (Sampler.DepthCompare)
{
GL.TexParameter(Target, TextureParameterName.TextureCompareMode, (int)All.CompareRToTexture);
GL.TexParameter(Target, TextureParameterName.TextureCompareFunc, (int)OGLEnumConverter.GetDepthCompareFunc(Sampler.DepthCompareFunc));
}
else
{
GL.TexParameter(Target, TextureParameterName.TextureCompareMode, (int)All.None);
GL.TexParameter(Target, TextureParameterName.TextureCompareFunc, (int)All.Never);
}
}
}
}

View file

@ -1,3 +1,5 @@
using Ryujinx.Graphics.Gal.OpenGL;
using Ryujinx.Graphics.Texture;
using System;
using System.Collections.Generic;
@ -224,6 +226,7 @@ namespace Ryujinx.Graphics.Gal.Shader
if (Op.Inst == ShaderIrInst.Texq ||
Op.Inst == ShaderIrInst.Texs ||
Op.Inst == ShaderIrInst.Tld4 ||
Op.Inst == ShaderIrInst.Txlf)
{
int Handle = ((ShaderIrOperImm)Op.OperandC).Value;
@ -232,7 +235,25 @@ namespace Ryujinx.Graphics.Gal.Shader
string Name = StagePrefix + TextureName + Index;
m_Textures.TryAdd(Handle, new ShaderDeclInfo(Name, Handle));
GalTextureTarget TextureTarget;
TextureInstructionSuffix TextureInstructionSuffix;
// TODO: Non 2D texture type for TEXQ?
if (Op.Inst == ShaderIrInst.Texq)
{
TextureTarget = GalTextureTarget.TwoD;
TextureInstructionSuffix = TextureInstructionSuffix.None;
}
else
{
ShaderIrMetaTex Meta = ((ShaderIrMetaTex)Op.MetaData);
TextureTarget = Meta.TextureTarget;
TextureInstructionSuffix = Meta.TextureInstructionSuffix;
}
m_Textures.TryAdd(Handle, new ShaderDeclInfo(Name, Handle, false, 0, 1, TextureTarget, TextureInstructionSuffix));
}
else if (Op.Inst == ShaderIrInst.Texb)
{
@ -257,9 +278,10 @@ namespace Ryujinx.Graphics.Gal.Shader
if (HandleSrc != null && HandleSrc is ShaderIrOperCbuf Cbuf)
{
ShaderIrMetaTex Meta = ((ShaderIrMetaTex)Op.MetaData);
string Name = StagePrefix + TextureName + "_cb" + Cbuf.Index + "_" + Cbuf.Pos;
m_CbTextures.Add(Op, new ShaderDeclInfo(Name, Cbuf.Pos, true, Cbuf.Index));
m_CbTextures.Add(Op, new ShaderDeclInfo(Name, Cbuf.Pos, true, Cbuf.Index, 1, Meta.TextureTarget, Meta.TextureInstructionSuffix));
}
else
{

View file

@ -1,3 +1,5 @@
using OpenTK.Graphics.OpenGL;
using Ryujinx.Graphics.Texture;
using System;
using System.Collections.Generic;
using System.Globalization;
@ -33,7 +35,9 @@ namespace Ryujinx.Graphics.Gal.Shader
public int MaxUboSize { get; }
public GlslDecompiler(int MaxUboSize)
private bool IsNvidiaDriver;
public GlslDecompiler(int MaxUboSize, bool IsNvidiaDriver)
{
InstsExpr = new Dictionary<ShaderIrInst, GetInstExpr>()
{
@ -103,6 +107,7 @@ namespace Ryujinx.Graphics.Gal.Shader
{ ShaderIrInst.Texb, GetTexbExpr },
{ ShaderIrInst.Texq, GetTexqExpr },
{ ShaderIrInst.Texs, GetTexsExpr },
{ ShaderIrInst.Tld4, GetTld4Expr },
{ ShaderIrInst.Trunc, GetTruncExpr },
{ ShaderIrInst.Txlf, GetTxlfExpr },
{ ShaderIrInst.Utof, GetUtofExpr },
@ -110,6 +115,7 @@ namespace Ryujinx.Graphics.Gal.Shader
};
this.MaxUboSize = MaxUboSize / 16;
this.IsNvidiaDriver = IsNvidiaDriver;
}
public GlslProgram Decompile(
@ -219,14 +225,70 @@ namespace Ryujinx.Graphics.Gal.Shader
}
}
private string GetSamplerType(TextureTarget TextureTarget, bool HasShadow)
{
string Result;
switch (TextureTarget)
{
case TextureTarget.Texture1D:
Result = "sampler1D";
break;
case TextureTarget.Texture2D:
Result = "sampler2D";
break;
case TextureTarget.Texture3D:
Result = "sampler3D";
break;
case TextureTarget.TextureCubeMap:
Result = "samplerCube";
break;
case TextureTarget.TextureRectangle:
Result = "sampler2DRect";
break;
case TextureTarget.Texture1DArray:
Result = "sampler1DArray";
break;
case TextureTarget.Texture2DArray:
Result = "sampler2DArray";
break;
case TextureTarget.TextureCubeMapArray:
Result = "samplerCubeArray";
break;
case TextureTarget.TextureBuffer:
Result = "samplerBuffer";
break;
case TextureTarget.Texture2DMultisample:
Result = "sampler2DMS";
break;
case TextureTarget.Texture2DMultisampleArray:
Result = "sampler2DMSArray";
break;
default:
throw new NotSupportedException();
}
if (HasShadow)
Result += "Shadow";
return Result;
}
private void PrintDeclTextures()
{
foreach (ShaderDeclInfo DeclInfo in IterateCbTextures())
{
SB.AppendLine("uniform sampler2D " + DeclInfo.Name + ";");
TextureTarget Target = ImageUtils.GetTextureTarget(DeclInfo.TextureTarget);
SB.AppendLine($"// {DeclInfo.TextureSuffix}");
SB.AppendLine("uniform " + GetSamplerType(Target, (DeclInfo.TextureSuffix & TextureInstructionSuffix.DC) != 0) + " " + DeclInfo.Name + ";");
}
PrintDecls(Decl.Textures, "uniform sampler2D");
foreach (ShaderDeclInfo DeclInfo in Decl.Textures.Values.OrderBy(DeclKeySelector))
{
TextureTarget Target = ImageUtils.GetTextureTarget(DeclInfo.TextureTarget);
SB.AppendLine($"// {DeclInfo.TextureSuffix}");
SB.AppendLine("uniform " + GetSamplerType(Target, (DeclInfo.TextureSuffix & TextureInstructionSuffix.DC) != 0) + " " + DeclInfo.Name + ";");
}
}
private IEnumerable<ShaderDeclInfo> IterateCbTextures()
@ -778,6 +840,7 @@ namespace Ryujinx.Graphics.Gal.Shader
case ShaderIrInst.Ipa:
case ShaderIrInst.Texq:
case ShaderIrInst.Texs:
case ShaderIrInst.Tld4:
case ShaderIrInst.Txlf:
return false;
}
@ -1124,7 +1187,7 @@ namespace Ryujinx.Graphics.Gal.Shader
string Ch = "rgba".Substring(Meta.Elem, 1);
return "texture(" + DeclInfo.Name + ", " + Coords + ")." + Ch;
return GetTextureOperation(Op, DeclInfo.Name, Coords, Ch);
}
private string GetTexqExpr(ShaderIrOp Op)
@ -1157,20 +1220,50 @@ namespace Ryujinx.Graphics.Gal.Shader
string Ch = "rgba".Substring(Meta.Elem, 1);
return "texture(" + Sampler + ", " + Coords + ")." + Ch;
return GetTextureOperation(Op, Sampler, Coords, Ch);
}
private string GetTxlfExpr(ShaderIrOp Op)
private string GetTld4Expr(ShaderIrOp Op)
{
ShaderIrMetaTex Meta = (ShaderIrMetaTex)Op.MetaData;
string Sampler = GetTexSamplerName(Op);
string Coords = GetTexSamplerCoords(Op);
string Ch = "rgba".Substring(Meta.Elem, 1);
return GetTextureGatherOperation(Op, Sampler, Coords, Ch);
}
// TODO: support AOFFI on non nvidia drivers
private string GetTxlfExpr(ShaderIrOp Op)
{
// TODO: Support all suffixes
ShaderIrMetaTex Meta = (ShaderIrMetaTex)Op.MetaData;
TextureInstructionSuffix Suffix = Meta.TextureInstructionSuffix;
string Sampler = GetTexSamplerName(Op);
string Coords = GetITexSamplerCoords(Op);
string Ch = "rgba".Substring(Meta.Elem, 1);
return "texelFetch(" + Sampler + ", " + Coords + ", 0)." + Ch;
string Lod = "0";
if (Meta.LevelOfDetail != null)
{
Lod = GetOperExpr(Op, Meta.LevelOfDetail);
}
if ((Suffix & TextureInstructionSuffix.AOffI) != 0 && IsNvidiaDriver)
{
string Offset = GetTextureOffset(Meta, GetOperExpr(Op, Meta.Offset));
return "texelFetchOffset(" + Sampler + ", " + Coords + ", " + Lod + ", " + Offset + ")." + Ch;
}
return "texelFetch(" + Sampler + ", " + Coords + ", " + Lod + ")." + Ch;
}
private string GetTruncExpr(ShaderIrOp Op) => GetUnaryCall(Op, "trunc");
@ -1246,14 +1339,205 @@ namespace Ryujinx.Graphics.Gal.Shader
private string GetTexSamplerCoords(ShaderIrOp Op)
{
return "vec2(" + GetOperExpr(Op, Op.OperandA) + ", " +
GetOperExpr(Op, Op.OperandB) + ")";
ShaderIrMetaTex Meta = (ShaderIrMetaTex)Op.MetaData;
bool HasDepth = (Meta.TextureInstructionSuffix & TextureInstructionSuffix.DC) != 0;
int Coords = ImageUtils.GetCoordsCountTextureTarget(Meta.TextureTarget);
bool IsArray = ImageUtils.IsArray(Meta.TextureTarget);
string GetLastArgument(ShaderIrNode Node)
{
string Result = GetOperExpr(Op, Node);
// array index is actually an integer so we need to pass it correctly
if (IsArray)
{
Result = "float(floatBitsToInt(" + Result + "))";
}
return Result;
}
string LastArgument;
string DepthArgument = "";
int VecSize = Coords;
if (HasDepth && Op.Inst != ShaderIrInst.Tld4)
{
VecSize++;
DepthArgument = $", {GetOperExpr(Op, Meta.DepthCompare)}";
}
switch (Coords)
{
case 1:
if (HasDepth)
{
return $"vec3({GetOperExpr(Op, Meta.Coordinates[0])}, 0.0{DepthArgument})";
}
return GetOperExpr(Op, Meta.Coordinates[0]);
case 2:
LastArgument = GetLastArgument(Meta.Coordinates[1]);
return $"vec{VecSize}({GetOperExpr(Op, Meta.Coordinates[0])}, {LastArgument}{DepthArgument})";
case 3:
LastArgument = GetLastArgument(Meta.Coordinates[2]);
return $"vec{VecSize}({GetOperExpr(Op, Meta.Coordinates[0])}, {GetOperExpr(Op, Meta.Coordinates[1])}, {LastArgument}{DepthArgument})";
case 4:
LastArgument = GetLastArgument(Meta.Coordinates[3]);
return $"vec4({GetOperExpr(Op, Meta.Coordinates[0])}, {GetOperExpr(Op, Meta.Coordinates[1])}, {GetOperExpr(Op, Meta.Coordinates[2])}, {LastArgument}){DepthArgument}";
default:
throw new InvalidOperationException();
}
}
private string GetTextureOffset(ShaderIrMetaTex Meta, string Oper, int Shift = 4, int Mask = 0xF)
{
string GetOffset(string Operation, int Index)
{
return $"({Operation} >> {Index * Shift}) & 0x{Mask:x}";
}
int Coords = ImageUtils.GetCoordsCountTextureTarget(Meta.TextureTarget);
if (ImageUtils.IsArray(Meta.TextureTarget))
Coords -= 1;
switch (Coords)
{
case 1:
return GetOffset(Oper, 0);
case 2:
return "ivec2(" + GetOffset(Oper, 0) + ", " + GetOffset(Oper, 1) + ")";
case 3:
return "ivec3(" + GetOffset(Oper, 0) + ", " + GetOffset(Oper, 1) + ", " + GetOffset(Oper, 2) + ")";
case 4:
return "ivec4(" + GetOffset(Oper, 0) + ", " + GetOffset(Oper, 1) + ", " + GetOffset(Oper, 2) + ", " + GetOffset(Oper, 3) + ")";
default:
throw new InvalidOperationException();
}
}
// TODO: support AOFFI on non nvidia drivers
private string GetTextureGatherOperation(ShaderIrOp Op, string Sampler, string Coords, string Ch)
{
ShaderIrMetaTex Meta = (ShaderIrMetaTex)Op.MetaData;
TextureInstructionSuffix Suffix = Meta.TextureInstructionSuffix;
string ChString = "." + Ch;
string Comp = Meta.Component.ToString();
if ((Suffix & TextureInstructionSuffix.DC) != 0)
{
Comp = GetOperExpr(Op, Meta.DepthCompare);
}
if ((Suffix & TextureInstructionSuffix.AOffI) != 0 && IsNvidiaDriver)
{
string Offset = GetTextureOffset(Meta, "floatBitsToInt((" + GetOperExpr(Op, Meta.Offset) + "))", 8, 0x3F);
if ((Suffix & TextureInstructionSuffix.DC) != 0)
{
return "textureGatherOffset(" + Sampler + ", " + Coords + ", " + Comp + ", " + Offset + ")" + ChString;
}
return "textureGatherOffset(" + Sampler + ", " + Coords + ", " + Offset + ", " + Comp + ")" + ChString;
}
// TODO: Support PTP
else if ((Suffix & TextureInstructionSuffix.PTP) != 0)
{
throw new NotImplementedException();
}
return "textureGather(" + Sampler + ", " + Coords + ", " + Comp + ")" + ChString;
}
// TODO: support AOFFI on non nvidia drivers
private string GetTextureOperation(ShaderIrOp Op, string Sampler, string Coords, string Ch)
{
ShaderIrMetaTex Meta = (ShaderIrMetaTex)Op.MetaData;
TextureInstructionSuffix Suffix = Meta.TextureInstructionSuffix;
string ChString = "." + Ch;
if ((Suffix & TextureInstructionSuffix.DC) != 0)
{
ChString = "";
}
// TODO: Support LBA and LLA
if ((Suffix & TextureInstructionSuffix.LZ) != 0)
{
if ((Suffix & TextureInstructionSuffix.AOffI) != 0 && IsNvidiaDriver)
{
string Offset = GetTextureOffset(Meta, "floatBitsToInt((" + GetOperExpr(Op, Meta.Offset) + "))");
return "textureLodOffset(" + Sampler + ", " + Coords + ", 0.0, " + Offset + ")" + ChString;
}
return "textureLod(" + Sampler + ", " + Coords + ", 0.0)" + ChString;
}
else if ((Suffix & TextureInstructionSuffix.LB) != 0)
{
if ((Suffix & TextureInstructionSuffix.AOffI) != 0 && IsNvidiaDriver)
{
string Offset = GetTextureOffset(Meta, "floatBitsToInt((" + GetOperExpr(Op, Meta.Offset) + "))");
return "textureOffset(" + Sampler + ", " + Coords + ", " + Offset + ", " + GetOperExpr(Op, Meta.LevelOfDetail) + ")" + ChString;
}
return "texture(" + Sampler + ", " + Coords + ", " + GetOperExpr(Op, Meta.LevelOfDetail) + ")" + ChString;
}
else if ((Suffix & TextureInstructionSuffix.LL) != 0)
{
if ((Suffix & TextureInstructionSuffix.AOffI) != 0 && IsNvidiaDriver)
{
string Offset = GetTextureOffset(Meta, "floatBitsToInt((" + GetOperExpr(Op, Meta.Offset) + "))");
return "textureLodOffset(" + Sampler + ", " + Coords + ", " + GetOperExpr(Op, Meta.LevelOfDetail) + ", " + Offset + ")" + ChString;
}
return "textureLod(" + Sampler + ", " + Coords + ", " + GetOperExpr(Op, Meta.LevelOfDetail) + ")" + ChString;
}
else if ((Suffix & TextureInstructionSuffix.AOffI) != 0 && IsNvidiaDriver)
{
string Offset = GetTextureOffset(Meta, "floatBitsToInt((" + GetOperExpr(Op, Meta.Offset) + "))");
return "textureOffset(" + Sampler + ", " + Coords + ", " + Offset + ")" + ChString;
}
else
{
return "texture(" + Sampler + ", " + Coords + ")" + ChString;
}
throw new NotImplementedException($"Texture Suffix {Meta.TextureInstructionSuffix} is not implemented");
}
private string GetITexSamplerCoords(ShaderIrOp Op)
{
return "ivec2(" + GetOperExpr(Op, Op.OperandA) + ", " +
GetOperExpr(Op, Op.OperandB) + ")";
ShaderIrMetaTex Meta = (ShaderIrMetaTex)Op.MetaData;
switch (ImageUtils.GetCoordsCountTextureTarget(Meta.TextureTarget))
{
case 1:
return GetOperExpr(Op, Meta.Coordinates[0]);
case 2:
return "ivec2(" + GetOperExpr(Op, Meta.Coordinates[0]) + ", " + GetOperExpr(Op, Meta.Coordinates[1]) + ")";
case 3:
return "ivec3(" + GetOperExpr(Op, Meta.Coordinates[0]) + ", " + GetOperExpr(Op, Meta.Coordinates[1]) + ", " + GetOperExpr(Op, Meta.Coordinates[2]) + ")";
default:
throw new InvalidOperationException();
}
}
private string GetOperExpr(ShaderIrOp Op, ShaderIrNode Oper)
@ -1292,22 +1576,6 @@ namespace Ryujinx.Graphics.Gal.Shader
}
break;
}
case ShaderIrOperImm Imm:
{
//For integer immediates being used as float,
//it's better (for readability) to just return the float value.
if (DstType == OperType.F32)
{
float Value = BitConverter.Int32BitsToSingle(Imm.Value);
if (!float.IsNaN(Value) && !float.IsInfinity(Value))
{
return GetFloatConst(Value);
}
}
break;
}
}
switch (DstType)

View file

@ -1,3 +1,4 @@
using Ryujinx.Graphics.Texture;
using System;
using static Ryujinx.Graphics.Gal.Shader.ShaderDecodeHelper;
@ -29,6 +30,75 @@ namespace Ryujinx.Graphics.Gal.Shader
{ RGB_, RG_A, R_BA, _GBA, RGBA, ____, ____, ____ }
};
private static GalTextureTarget TexToTextureTarget(int TexType, bool IsArray)
{
switch (TexType)
{
case 0:
return IsArray ? GalTextureTarget.OneDArray : GalTextureTarget.OneD;
case 2:
return IsArray ? GalTextureTarget.TwoDArray : GalTextureTarget.TwoD;
case 4:
if (IsArray)
throw new InvalidOperationException($"ARRAY bit set on a TEX with 3D texture!");
return GalTextureTarget.ThreeD;
case 6:
return IsArray ? GalTextureTarget.CubeArray : GalTextureTarget.CubeMap;
default:
throw new InvalidOperationException();
}
}
private static GalTextureTarget TexsToTextureTarget(int TexType)
{
switch (TexType)
{
case 0:
return GalTextureTarget.OneD;
case 2:
case 4:
case 6:
case 8:
case 0xa:
case 0xc:
return GalTextureTarget.TwoD;
case 0xe:
case 0x10:
case 0x12:
return GalTextureTarget.TwoDArray;
case 0x14:
case 0x16:
return GalTextureTarget.ThreeD;
case 0x18:
case 0x1a:
return GalTextureTarget.CubeMap;
default:
throw new InvalidOperationException();
}
}
public static GalTextureTarget TldsToTextureTarget(int TexType)
{
switch (TexType)
{
case 0:
case 2:
return GalTextureTarget.OneD;
case 4:
case 8:
case 0xa:
case 0xc:
case 0x18:
return GalTextureTarget.TwoD;
case 0x10:
return GalTextureTarget.TwoDArray;
case 0xe:
return GalTextureTarget.ThreeD;
default:
throw new InvalidOperationException();
}
}
public static void Ld_A(ShaderIrBlock Block, long OpCode, int Position)
{
ShaderIrNode[] Opers = OpCode.Abuf20();
@ -132,43 +202,166 @@ namespace Ryujinx.Graphics.Gal.Shader
public static void Tex(ShaderIrBlock Block, long OpCode, int Position)
{
EmitTex(Block, OpCode, GprHandle: false);
TextureInstructionSuffix Suffix;
int RawSuffix = OpCode.Read(0x34, 0x38);
switch (RawSuffix)
{
case 0:
Suffix = TextureInstructionSuffix.None;
break;
case 0x8:
Suffix = TextureInstructionSuffix.LZ;
break;
case 0x10:
Suffix = TextureInstructionSuffix.LB;
break;
case 0x18:
Suffix = TextureInstructionSuffix.LL;
break;
case 0x30:
Suffix = TextureInstructionSuffix.LBA;
break;
case 0x38:
Suffix = TextureInstructionSuffix.LLA;
break;
default:
throw new InvalidOperationException($"Invalid Suffix for TEX instruction {RawSuffix}");
}
bool IsOffset = OpCode.Read(0x36);
if (IsOffset)
Suffix |= TextureInstructionSuffix.AOffI;
EmitTex(Block, OpCode, Suffix, GprHandle: false);
}
public static void Tex_B(ShaderIrBlock Block, long OpCode, int Position)
{
EmitTex(Block, OpCode, GprHandle: true);
TextureInstructionSuffix Suffix;
int RawSuffix = OpCode.Read(0x24, 0xe);
switch (RawSuffix)
{
case 0:
Suffix = TextureInstructionSuffix.None;
break;
case 0x2:
Suffix = TextureInstructionSuffix.LZ;
break;
case 0x4:
Suffix = TextureInstructionSuffix.LB;
break;
case 0x6:
Suffix = TextureInstructionSuffix.LL;
break;
case 0xc:
Suffix = TextureInstructionSuffix.LBA;
break;
case 0xe:
Suffix = TextureInstructionSuffix.LLA;
break;
default:
throw new InvalidOperationException($"Invalid Suffix for TEX.B instruction {RawSuffix}");
}
bool IsOffset = OpCode.Read(0x23);
if (IsOffset)
Suffix |= TextureInstructionSuffix.AOffI;
EmitTex(Block, OpCode, Suffix, GprHandle: true);
}
private static void EmitTex(ShaderIrBlock Block, long OpCode, bool GprHandle)
private static void EmitTex(ShaderIrBlock Block, long OpCode, TextureInstructionSuffix TextureInstructionSuffix, bool GprHandle)
{
//TODO: Support other formats.
ShaderIrOperGpr[] Coords = new ShaderIrOperGpr[2];
bool IsArray = OpCode.HasArray();
for (int Index = 0; Index < Coords.Length; Index++)
GalTextureTarget TextureTarget = TexToTextureTarget(OpCode.Read(28, 6), IsArray);
bool HasDepthCompare = OpCode.Read(0x32);
if (HasDepthCompare)
{
TextureInstructionSuffix |= TextureInstructionSuffix.DC;
}
ShaderIrOperGpr[] Coords = new ShaderIrOperGpr[ImageUtils.GetCoordsCountTextureTarget(TextureTarget)];
int IndexExtraCoord = 0;
if (IsArray)
{
IndexExtraCoord++;
Coords[Coords.Length - 1] = OpCode.Gpr8();
}
for (int Index = 0; Index < Coords.Length - IndexExtraCoord; Index++)
{
ShaderIrOperGpr CoordReg = OpCode.Gpr8();
CoordReg.Index += Index;
CoordReg.Index += IndexExtraCoord;
if (!CoordReg.IsValidRegister)
{
CoordReg.Index = ShaderIrOperGpr.ZRIndex;
}
Coords[Index] = ShaderIrOperGpr.MakeTemporary(Index);
Block.AddNode(new ShaderIrAsg(Coords[Index], CoordReg));
Coords[Index] = CoordReg;
}
int ChMask = OpCode.Read(31, 0xf);
ShaderIrOperGpr LevelOfDetail = null;
ShaderIrOperGpr Offset = null;
ShaderIrOperGpr DepthCompare = null;
// TODO: determine first argument when TEX.B is used
int OperBIndex = GprHandle ? 1 : 0;
if ((TextureInstructionSuffix & TextureInstructionSuffix.LL) != 0 ||
(TextureInstructionSuffix & TextureInstructionSuffix.LB) != 0 ||
(TextureInstructionSuffix & TextureInstructionSuffix.LBA) != 0 ||
(TextureInstructionSuffix & TextureInstructionSuffix.LLA) != 0)
{
LevelOfDetail = OpCode.Gpr20();
LevelOfDetail.Index += OperBIndex;
OperBIndex++;
}
if ((TextureInstructionSuffix & TextureInstructionSuffix.AOffI) != 0)
{
Offset = OpCode.Gpr20();
Offset.Index += OperBIndex;
OperBIndex++;
}
if ((TextureInstructionSuffix & TextureInstructionSuffix.DC) != 0)
{
DepthCompare = OpCode.Gpr20();
DepthCompare.Index += OperBIndex;
OperBIndex++;
}
// ???
ShaderIrNode OperC = GprHandle
? (ShaderIrNode)OpCode.Gpr20()
: (ShaderIrNode)OpCode.Imm13_36();
ShaderIrInst Inst = GprHandle ? ShaderIrInst.Texb : ShaderIrInst.Texs;
Coords = CoordsRegistersToTempRegisters(Block, Coords);
int RegInc = 0;
for (int Ch = 0; Ch < 4; Ch++)
@ -187,9 +380,14 @@ namespace Ryujinx.Graphics.Gal.Shader
continue;
}
ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch);
ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch, TextureTarget, TextureInstructionSuffix, Coords)
{
LevelOfDetail = LevelOfDetail,
Offset = Offset,
DepthCompare = DepthCompare
};
ShaderIrOp Op = new ShaderIrOp(Inst, Coords[0], Coords[1], OperC, Meta);
ShaderIrOp Op = new ShaderIrOp(Inst, Coords[0], Coords.Length > 1 ? Coords[1] : null, OperC, Meta);
Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Op)));
}
@ -197,17 +395,238 @@ namespace Ryujinx.Graphics.Gal.Shader
public static void Texs(ShaderIrBlock Block, long OpCode, int Position)
{
EmitTexs(Block, OpCode, ShaderIrInst.Texs);
TextureInstructionSuffix Suffix;
int RawSuffix = OpCode.Read(0x34, 0x1e);
switch (RawSuffix)
{
case 0:
case 0x4:
case 0x10:
case 0x16:
Suffix = TextureInstructionSuffix.LZ;
break;
case 0x6:
case 0x1a:
Suffix = TextureInstructionSuffix.LL;
break;
case 0x8:
Suffix = TextureInstructionSuffix.DC;
break;
case 0x2:
case 0xe:
case 0x14:
case 0x18:
Suffix = TextureInstructionSuffix.None;
break;
case 0xa:
Suffix = TextureInstructionSuffix.LL | TextureInstructionSuffix.DC;
break;
case 0xc:
case 0x12:
Suffix = TextureInstructionSuffix.LZ | TextureInstructionSuffix.DC;
break;
default:
throw new InvalidOperationException($"Invalid Suffix for TEXS instruction {RawSuffix}");
}
GalTextureTarget TextureTarget = TexsToTextureTarget(OpCode.Read(52, 0x1e));
EmitTexs(Block, OpCode, ShaderIrInst.Texs, TextureTarget, Suffix);
}
public static void Tlds(ShaderIrBlock Block, long OpCode, int Position)
{
EmitTexs(Block, OpCode, ShaderIrInst.Txlf);
TextureInstructionSuffix Suffix;
int RawSuffix = OpCode.Read(0x34, 0x1e);
switch (RawSuffix)
{
case 0:
case 0x4:
case 0x8:
Suffix = TextureInstructionSuffix.LZ | TextureInstructionSuffix.AOffI;
break;
case 0xc:
Suffix = TextureInstructionSuffix.LZ | TextureInstructionSuffix.MZ;
break;
case 0xe:
case 0x10:
Suffix = TextureInstructionSuffix.LZ;
break;
case 0x2:
case 0xa:
Suffix = TextureInstructionSuffix.LL;
break;
case 0x18:
Suffix = TextureInstructionSuffix.LL | TextureInstructionSuffix.AOffI;
break;
default:
throw new InvalidOperationException($"Invalid Suffix for TLDS instruction {RawSuffix}");
}
GalTextureTarget TextureTarget = TldsToTextureTarget(OpCode.Read(52, 0x1e));
EmitTexs(Block, OpCode, ShaderIrInst.Txlf, TextureTarget, Suffix);
}
private static void EmitTexs(ShaderIrBlock Block, long OpCode, ShaderIrInst Inst)
public static void Tld4(ShaderIrBlock Block, long OpCode, int Position)
{
//TODO: Support other formats.
TextureInstructionSuffix Suffix;
int RawSuffix = OpCode.Read(0x34, 0xc);
switch (RawSuffix)
{
case 0:
Suffix = TextureInstructionSuffix.None;
break;
case 0x4:
Suffix = TextureInstructionSuffix.AOffI;
break;
case 0x8:
Suffix = TextureInstructionSuffix.PTP;
break;
default:
throw new InvalidOperationException($"Invalid Suffix for TLD4 instruction {RawSuffix}");
}
bool IsShadow = OpCode.Read(0x32);
bool IsArray = OpCode.HasArray();
int ChMask = OpCode.Read(31, 0xf);
GalTextureTarget TextureTarget = TexToTextureTarget(OpCode.Read(28, 6), IsArray);
if (IsShadow)
{
Suffix |= TextureInstructionSuffix.DC;
}
EmitTld4(Block, OpCode, TextureTarget, Suffix, ChMask, OpCode.Read(0x38, 0x3), false);
}
public static void Tld4s(ShaderIrBlock Block, long OpCode, int Position)
{
TextureInstructionSuffix Suffix = TextureInstructionSuffix.None;
bool IsOffset = OpCode.Read(0x33);
bool IsShadow = OpCode.Read(0x32);
if (IsOffset)
{
Suffix |= TextureInstructionSuffix.AOffI;
}
if (IsShadow)
{
Suffix |= TextureInstructionSuffix.DC;
}
// TLD4S seems to only support 2D textures with RGBA mask?
EmitTld4(Block, OpCode, GalTextureTarget.TwoD, Suffix, RGBA, OpCode.Read(0x34, 0x3), true);
}
private static void EmitTexs(ShaderIrBlock Block,
long OpCode,
ShaderIrInst Inst,
GalTextureTarget TextureTarget,
TextureInstructionSuffix TextureInstructionSuffix)
{
if (Inst == ShaderIrInst.Txlf && TextureTarget == GalTextureTarget.CubeArray)
{
throw new InvalidOperationException("TLDS instructions cannot use CUBE modifier!");
}
bool IsArray = ImageUtils.IsArray(TextureTarget);
ShaderIrOperGpr[] Coords = new ShaderIrOperGpr[ImageUtils.GetCoordsCountTextureTarget(TextureTarget)];
ShaderIrOperGpr OperA = OpCode.Gpr8();
ShaderIrOperGpr OperB = OpCode.Gpr20();
ShaderIrOperGpr SuffixExtra = OpCode.Gpr20();
SuffixExtra.Index += 1;
int CoordStartIndex = 0;
if (IsArray)
{
CoordStartIndex++;
Coords[Coords.Length - 1] = OpCode.Gpr8();
}
switch (Coords.Length - CoordStartIndex)
{
case 1:
Coords[0] = OpCode.Gpr8();
break;
case 2:
Coords[0] = OpCode.Gpr8();
Coords[0].Index += CoordStartIndex;
break;
case 3:
Coords[0] = OpCode.Gpr8();
Coords[0].Index += CoordStartIndex;
Coords[1] = OpCode.Gpr8();
Coords[1].Index += 1 + CoordStartIndex;
break;
default:
throw new NotSupportedException($"{Coords.Length - CoordStartIndex} coords textures aren't supported in TEXS");
}
int OperBIndex = 0;
ShaderIrOperGpr LevelOfDetail = null;
ShaderIrOperGpr Offset = null;
ShaderIrOperGpr DepthCompare = null;
// OperB is always the last value
// Not applicable to 1d textures
if (Coords.Length - CoordStartIndex != 1)
{
Coords[Coords.Length - CoordStartIndex - 1] = OperB;
OperBIndex++;
}
// Encoding of TEXS/TLDS is a bit special and change for 2d textures
// NOTE: OperA seems to hold at best two args.
// On 2D textures, if no suffix need an additional values, Y is stored in OperB, otherwise coords are in OperA and the additional values is in OperB.
if (TextureInstructionSuffix != TextureInstructionSuffix.None && TextureInstructionSuffix != TextureInstructionSuffix.LZ && TextureTarget == GalTextureTarget.TwoD)
{
Coords[Coords.Length - CoordStartIndex - 1] = OpCode.Gpr8();
Coords[Coords.Length - CoordStartIndex - 1].Index += Coords.Length - CoordStartIndex - 1;
OperBIndex--;
}
// TODO: Find what MZ does and what changes about the encoding (Maybe Multisample?)
if ((TextureInstructionSuffix & TextureInstructionSuffix.LL) != 0)
{
LevelOfDetail = OpCode.Gpr20();
LevelOfDetail.Index += OperBIndex;
OperBIndex++;
}
if ((TextureInstructionSuffix & TextureInstructionSuffix.AOffI) != 0)
{
Offset = OpCode.Gpr20();
Offset.Index += OperBIndex;
OperBIndex++;
}
if ((TextureInstructionSuffix & TextureInstructionSuffix.DC) != 0)
{
DepthCompare = OpCode.Gpr20();
DepthCompare.Index += OperBIndex;
OperBIndex++;
}
int LutIndex;
LutIndex = !OpCode.Gpr0().IsConst ? 1 : 0;
@ -276,12 +695,7 @@ namespace Ryujinx.Graphics.Gal.Shader
}
ShaderIrNode OperC = OpCode.Imm13_36();
ShaderIrOperGpr Coord0 = ShaderIrOperGpr.MakeTemporary(0);
ShaderIrOperGpr Coord1 = ShaderIrOperGpr.MakeTemporary(1);
Block.AddNode(new ShaderIrAsg(Coord0, OpCode.Gpr8()));
Block.AddNode(new ShaderIrAsg(Coord1, OpCode.Gpr20()));
Coords = CoordsRegistersToTempRegisters(Block, Coords);
for (int Ch = 0; Ch < 4; Ch++)
{
@ -290,9 +704,13 @@ namespace Ryujinx.Graphics.Gal.Shader
continue;
}
ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch);
ShaderIrOp Op = new ShaderIrOp(Inst, Coord0, Coord1, OperC, Meta);
ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch, TextureTarget, TextureInstructionSuffix, Coords)
{
LevelOfDetail = LevelOfDetail,
Offset = Offset,
DepthCompare = DepthCompare
};
ShaderIrOp Op = new ShaderIrOp(Inst, OperA, OperB, OperC, Meta);
ShaderIrOperGpr Dst = GetDst();
@ -303,9 +721,156 @@ namespace Ryujinx.Graphics.Gal.Shader
}
}
private static void EmitTld4(ShaderIrBlock Block, long OpCode, GalTextureTarget TextureType, TextureInstructionSuffix TextureInstructionSuffix, int ChMask, int Component, bool Scalar)
{
ShaderIrOperGpr OperA = OpCode.Gpr8();
ShaderIrOperGpr OperB = OpCode.Gpr20();
ShaderIrOperImm OperC = OpCode.Imm13_36();
ShaderIrOperGpr[] Coords = new ShaderIrOperGpr[ImageUtils.GetCoordsCountTextureTarget(TextureType)];
ShaderIrOperGpr Offset = null;
ShaderIrOperGpr DepthCompare = null;
bool IsArray = ImageUtils.IsArray(TextureType);
int OperBIndex = 0;
if (Scalar)
{
int CoordStartIndex = 0;
if (IsArray)
{
CoordStartIndex++;
Coords[Coords.Length - 1] = OperB;
}
switch (Coords.Length - CoordStartIndex)
{
case 1:
Coords[0] = OpCode.Gpr8();
break;
case 2:
Coords[0] = OpCode.Gpr8();
Coords[0].Index += CoordStartIndex;
break;
case 3:
Coords[0] = OpCode.Gpr8();
Coords[0].Index += CoordStartIndex;
Coords[1] = OpCode.Gpr8();
Coords[1].Index += 1 + CoordStartIndex;
break;
default:
throw new NotSupportedException($"{Coords.Length - CoordStartIndex} coords textures aren't supported in TLD4S");
}
if (Coords.Length - CoordStartIndex != 1)
{
Coords[Coords.Length - CoordStartIndex - 1] = OperB;
OperBIndex++;
}
if (TextureInstructionSuffix != TextureInstructionSuffix.None && TextureType == GalTextureTarget.TwoD)
{
Coords[Coords.Length - CoordStartIndex - 1] = OpCode.Gpr8();
Coords[Coords.Length - CoordStartIndex - 1].Index += Coords.Length - CoordStartIndex - 1;
OperBIndex--;
}
}
else
{
int IndexExtraCoord = 0;
if (IsArray)
{
IndexExtraCoord++;
Coords[Coords.Length - 1] = OpCode.Gpr8();
}
for (int Index = 0; Index < Coords.Length - IndexExtraCoord; Index++)
{
Coords[Index] = OpCode.Gpr8();
Coords[Index].Index += Index;
Coords[Index].Index += IndexExtraCoord;
if (Coords[Index].Index > ShaderIrOperGpr.ZRIndex)
{
Coords[Index].Index = ShaderIrOperGpr.ZRIndex;
}
}
}
if ((TextureInstructionSuffix & TextureInstructionSuffix.AOffI) != 0)
{
Offset = OpCode.Gpr20();
Offset.Index += OperBIndex;
OperBIndex++;
}
if ((TextureInstructionSuffix & TextureInstructionSuffix.DC) != 0)
{
DepthCompare = OpCode.Gpr20();
DepthCompare.Index += OperBIndex;
OperBIndex++;
}
Coords = CoordsRegistersToTempRegisters(Block, Coords);
int RegInc = 0;
for (int Ch = 0; Ch < 4; Ch++)
{
if (!IsChannelUsed(ChMask, Ch))
{
continue;
}
ShaderIrOperGpr Dst = OpCode.Gpr0();
Dst.Index += RegInc++;
if (!Dst.IsValidRegister || Dst.IsConst)
{
continue;
}
ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch, TextureType, TextureInstructionSuffix, Coords)
{
Component = Component,
Offset = Offset,
DepthCompare = DepthCompare
};
ShaderIrOp Op = new ShaderIrOp(ShaderIrInst.Tld4, OperA, OperB, OperC, Meta);
Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Op)));
}
}
private static bool IsChannelUsed(int ChMask, int Ch)
{
return (ChMask & (1 << Ch)) != 0;
}
private static ShaderIrOperGpr[] CoordsRegistersToTempRegisters(ShaderIrBlock Block, params ShaderIrOperGpr[] Registers)
{
ShaderIrOperGpr[] Res = new ShaderIrOperGpr[Registers.Length];
for (int Index = 0; Index < Res.Length; Index++)
{
Res[Index] = ShaderIrOperGpr.MakeTemporary(Index);
Block.AddNode(new ShaderIrAsg(Res[Index], Registers[Index]));
}
return Res;
}
}
}

View file

@ -19,6 +19,11 @@ namespace Ryujinx.Graphics.Gal.Shader
return ((int)(OpCode >> 20) << 8) >> 8;
}
private static bool HasArray(this long OpCode)
{
return OpCode.Read(0x1c);
}
private static ShaderIrOperAbuf[] Abuf20(this long OpCode)
{
int Abuf = OpCode.Read(20, 0x3ff);

View file

@ -49,6 +49,7 @@ namespace Ryujinx.Graphics.Gal.Shader
Ipa,
Texb,
Texs,
Tld4,
Trunc,
F_End,

View file

@ -1,12 +1,24 @@
using Ryujinx.Graphics.Texture;
namespace Ryujinx.Graphics.Gal.Shader
{
class ShaderIrMetaTex : ShaderIrMeta
{
public int Elem { get; private set; }
public int Elem { get; private set; }
public GalTextureTarget TextureTarget { get; private set; }
public ShaderIrNode[] Coordinates { get; private set; }
public TextureInstructionSuffix TextureInstructionSuffix { get; private set; }
public ShaderIrOperGpr LevelOfDetail;
public ShaderIrOperGpr Offset;
public ShaderIrOperGpr DepthCompare;
public int Component; // for TLD4(S)
public ShaderIrMetaTex(int Elem)
public ShaderIrMetaTex(int Elem, GalTextureTarget TextureTarget, TextureInstructionSuffix TextureInstructionSuffix, params ShaderIrNode[] Coordinates)
{
this.Elem = Elem;
this.Elem = Elem;
this.TextureTarget = TextureTarget;
this.TextureInstructionSuffix = TextureInstructionSuffix;
this.Coordinates = Coordinates;
}
}
}

View file

@ -122,6 +122,8 @@ namespace Ryujinx.Graphics.Gal.Shader
Set("1101111101001x", ShaderDecode.Texq);
Set("1101x00xxxxxxx", ShaderDecode.Texs);
Set("1101101xxxxxxx", ShaderDecode.Tlds);
Set("110010xxxx111x", ShaderDecode.Tld4);
Set("1101111100xxxx", ShaderDecode.Tld4s);
Set("01011111xxxxxx", ShaderDecode.Vmad);
Set("0100111xxxxxxx", ShaderDecode.Xmad_CR);
Set("0011011x00xxxx", ShaderDecode.Xmad_I);

View file

@ -1,3 +1,5 @@
using Ryujinx.Graphics.Texture;
namespace Ryujinx.Graphics.Gal
{
public class ShaderDeclInfo
@ -9,18 +11,27 @@ namespace Ryujinx.Graphics.Gal
public int Cbuf { get; private set; }
public int Size { get; private set; }
public GalTextureTarget TextureTarget { get; private set; }
public TextureInstructionSuffix TextureSuffix { get; private set; }
public ShaderDeclInfo(
string Name,
int Index,
bool IsCb = false,
int Cbuf = 0,
int Size = 1)
int Size = 1,
GalTextureTarget TextureTarget = GalTextureTarget.TwoD,
TextureInstructionSuffix TextureSuffix = TextureInstructionSuffix.None)
{
this.Name = Name;
this.Index = Index;
this.IsCb = IsCb;
this.Cbuf = Cbuf;
this.Size = Size;
this.Name = Name;
this.Index = Index;
this.IsCb = IsCb;
this.Cbuf = Cbuf;
this.Size = Size;
this.TextureTarget = TextureTarget;
this.TextureSuffix = TextureSuffix;
}
internal void Enlarge(int NewSize)

View file

@ -1,6 +1,8 @@
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.Gal;
using Ryujinx.Graphics.Memory;
using Ryujinx.Graphics.Texture;
using System;
using System.Collections.Generic;
namespace Ryujinx.Graphics
@ -11,6 +13,7 @@ namespace Ryujinx.Graphics
{
None,
Texture,
TextureArrayLayer,
ColorBuffer,
ZetaBuffer
}
@ -20,6 +23,7 @@ namespace Ryujinx.Graphics
private HashSet<long>[] UploadedKeys;
private Dictionary<long, ImageType> ImageTypes;
private Dictionary<long, int> MirroredTextures;
public GpuResourceManager(NvGpu Gpu)
{
@ -33,6 +37,7 @@ namespace Ryujinx.Graphics
}
ImageTypes = new Dictionary<long, ImageType>();
MirroredTextures = new Dictionary<long, int>();
}
public void SendColorBuffer(NvGpuVmm Vmm, long Position, int Attachment, GalImage NewImage)
@ -70,6 +75,32 @@ namespace Ryujinx.Graphics
ImageTypes[Position] = ImageType.Texture;
}
public bool TryGetTextureLayer(long Position, out int LayerIndex)
{
if (MirroredTextures.TryGetValue(Position, out LayerIndex))
{
ImageType Type = ImageTypes[Position];
// FIXME(thog): I'm actually unsure if we should deny all other image type, gpu testing needs to be done here.
if (Type != ImageType.Texture && Type != ImageType.TextureArrayLayer)
{
LayerIndex = -1;
return false;
}
return true;
}
LayerIndex = -1;
return false;
}
public void SetTextureArrayLayer(long Position, int LayerIndex)
{
ImageTypes[Position] = ImageType.TextureArrayLayer;
MirroredTextures[Position] = LayerIndex;
}
private void PrepareSendTexture(NvGpuVmm Vmm, long Position, GalImage NewImage)
{
long Size = ImageUtils.GetSize(NewImage);
@ -102,7 +133,7 @@ namespace Ryujinx.Graphics
private bool TryReuse(NvGpuVmm Vmm, long Position, GalImage NewImage)
{
if (Gpu.Renderer.Texture.TryGetImage(Position, out GalImage CachedImage) && CachedImage.SizeMatches(NewImage))
if (Gpu.Renderer.Texture.TryGetImage(Position, out GalImage CachedImage) && CachedImage.TextureTarget == NewImage.TextureTarget && CachedImage.SizeMatches(NewImage))
{
Gpu.Renderer.RenderTarget.Reinterpret(Position, NewImage);

View file

@ -1,3 +1,4 @@
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.Gal;
using Ryujinx.Graphics.Memory;
using Ryujinx.Graphics.Texture;
@ -46,6 +47,8 @@ namespace Ryujinx.Graphics.Graphics3d
bool DstLinear = ReadRegister(NvGpuEngine2dReg.DstLinear) != 0;
int DstWidth = ReadRegister(NvGpuEngine2dReg.DstWidth);
int DstHeight = ReadRegister(NvGpuEngine2dReg.DstHeight);
int DstDepth = ReadRegister(NvGpuEngine2dReg.DstDepth);
int DstLayer = ReadRegister(NvGpuEngine2dReg.DstLayer);
int DstPitch = ReadRegister(NvGpuEngine2dReg.DstPitch);
int DstBlkDim = ReadRegister(NvGpuEngine2dReg.DstBlockDimensions);
@ -53,6 +56,8 @@ namespace Ryujinx.Graphics.Graphics3d
bool SrcLinear = ReadRegister(NvGpuEngine2dReg.SrcLinear) != 0;
int SrcWidth = ReadRegister(NvGpuEngine2dReg.SrcWidth);
int SrcHeight = ReadRegister(NvGpuEngine2dReg.SrcHeight);
int SrcDepth = ReadRegister(NvGpuEngine2dReg.SrcDepth);
int SrcLayer = ReadRegister(NvGpuEngine2dReg.SrcLayer);
int SrcPitch = ReadRegister(NvGpuEngine2dReg.SrcPitch);
int SrcBlkDim = ReadRegister(NvGpuEngine2dReg.SrcBlockDimensions);
@ -82,26 +87,99 @@ namespace Ryujinx.Graphics.Graphics3d
long SrcKey = Vmm.GetPhysicalAddress(SrcAddress);
long DstKey = Vmm.GetPhysicalAddress(DstAddress);
bool IsSrcLayered = false;
bool IsDstLayered = false;
GalTextureTarget SrcTarget = GalTextureTarget.TwoD;
if (SrcDepth != 0)
{
SrcTarget = GalTextureTarget.TwoDArray;
SrcDepth++;
IsSrcLayered = true;
}
else
{
SrcDepth = 1;
}
GalTextureTarget DstTarget = GalTextureTarget.TwoD;
if (DstDepth != 0)
{
DstTarget = GalTextureTarget.TwoDArray;
DstDepth++;
IsDstLayered = true;
}
else
{
DstDepth = 1;
}
GalImage SrcTexture = new GalImage(
SrcWidth,
SrcHeight, 1,
SrcBlockHeight,
SrcHeight,
1, SrcDepth, 1,
SrcBlockHeight, 1,
SrcLayout,
SrcImgFormat);
SrcImgFormat,
SrcTarget);
GalImage DstTexture = new GalImage(
DstWidth,
DstHeight, 1,
DstBlockHeight,
DstHeight,
1, DstDepth, 1,
DstBlockHeight, 1,
DstLayout,
DstImgFormat);
DstImgFormat,
DstTarget);
SrcTexture.Pitch = SrcPitch;
DstTexture.Pitch = DstPitch;
long GetLayerOffset(GalImage Image, int Layer)
{
int TargetMipLevel = Image.MaxMipmapLevel <= 1 ? 1 : Image.MaxMipmapLevel - 1;
return ImageUtils.GetLayerOffset(Image, TargetMipLevel) * Layer;
}
int SrcLayerIndex = -1;
if (IsSrcLayered && Gpu.ResourceManager.TryGetTextureLayer(SrcKey, out SrcLayerIndex) && SrcLayerIndex != 0)
{
SrcKey = SrcKey - GetLayerOffset(SrcTexture, SrcLayerIndex);
}
int DstLayerIndex = -1;
if (IsDstLayered && Gpu.ResourceManager.TryGetTextureLayer(DstKey, out DstLayerIndex) && DstLayerIndex != 0)
{
DstKey = DstKey - GetLayerOffset(DstTexture, DstLayerIndex);
}
Gpu.ResourceManager.SendTexture(Vmm, SrcKey, SrcTexture);
Gpu.ResourceManager.SendTexture(Vmm, DstKey, DstTexture);
if (IsSrcLayered && SrcLayerIndex == -1)
{
for (int Layer = 0; Layer < SrcTexture.LayerCount; Layer++)
{
Gpu.ResourceManager.SetTextureArrayLayer(SrcKey + GetLayerOffset(SrcTexture, Layer), Layer);
}
SrcLayerIndex = 0;
}
if (IsDstLayered && DstLayerIndex == -1)
{
for (int Layer = 0; Layer < DstTexture.LayerCount; Layer++)
{
Gpu.ResourceManager.SetTextureArrayLayer(DstKey + GetLayerOffset(DstTexture, Layer), Layer);
}
DstLayerIndex = 0;
}
int SrcBlitX1 = (int)(SrcBlitX >> 32);
int SrcBlitY1 = (int)(SrcBlitY >> 32);
@ -109,8 +187,12 @@ namespace Ryujinx.Graphics.Graphics3d
int SrcBlitY2 = (int)(SrcBlitY + DstBlitH * BlitDvDy >> 32);
Gpu.Renderer.RenderTarget.Copy(
SrcTexture,
DstTexture,
SrcKey,
DstKey,
SrcLayerIndex,
DstLayerIndex,
SrcBlitX1,
SrcBlitY1,
SrcBlitX2,
@ -124,6 +206,8 @@ namespace Ryujinx.Graphics.Graphics3d
//the texture is modified by the guest, however it doesn't
//work when resources that the gpu can write to are copied,
//like framebuffers.
// FIXME: SUPPORT MULTILAYER CORRECTLY HERE (this will cause weird stuffs on the first layer)
ImageUtils.CopyTexture(
Vmm,
SrcTexture,

View file

@ -11,6 +11,7 @@ namespace Ryujinx.Graphics.Graphics3d
DstWidth = 0x86,
DstHeight = 0x87,
DstAddress = 0x88,
DstAddressLow = 0x89,
SrcFormat = 0x8c,
SrcLinear = 0x8d,
SrcBlockDimensions = 0x8e,
@ -20,6 +21,7 @@ namespace Ryujinx.Graphics.Graphics3d
SrcWidth = 0x92,
SrcHeight = 0x93,
SrcAddress = 0x94,
SrcAddressLow = 0x95,
ClipEnable = 0xa4,
CopyOperation = 0xab,
BlitControl = 0x223,

View file

@ -1,4 +1,5 @@
using Ryujinx.Common;
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.Gal;
using Ryujinx.Graphics.Memory;
using Ryujinx.Graphics.Texture;
@ -71,6 +72,8 @@ namespace Ryujinx.Graphics.Graphics3d
WriteRegister(NvGpuEngine3dReg.FrameBufferSrgb, 1);
WriteRegister(NvGpuEngine3dReg.FrontFace, (int)GalFrontFace.CW);
for (int Index = 0; Index < GalPipelineState.RenderTargetsCount; Index++)
{
WriteRegister(NvGpuEngine3dReg.IBlendNEquationRgb + Index * 8, (int)GalBlendEquation.FuncAdd);
@ -195,7 +198,11 @@ namespace Ryujinx.Graphics.Graphics3d
int Width = ReadRegister(NvGpuEngine3dReg.FrameBufferNWidth + FbIndex * 0x10);
int Height = ReadRegister(NvGpuEngine3dReg.FrameBufferNHeight + FbIndex * 0x10);
int BlockDim = ReadRegister(NvGpuEngine3dReg.FrameBufferNBlockDim + FbIndex * 0x10);
int ArrayMode = ReadRegister(NvGpuEngine3dReg.FrameBufferNArrayMode + FbIndex * 0x10);
int LayerCount = ArrayMode & 0xFFFF;
int LayerStride = ReadRegister(NvGpuEngine3dReg.FrameBufferNLayerStride + FbIndex * 0x10);
int BaseLayer = ReadRegister(NvGpuEngine3dReg.FrameBufferNBaseLayer + FbIndex * 0x10);
int BlockDim = ReadRegister(NvGpuEngine3dReg.FrameBufferNBlockDim + FbIndex * 0x10);
int GobBlockHeight = 1 << ((BlockDim >> 4) & 7);
@ -215,7 +222,7 @@ namespace Ryujinx.Graphics.Graphics3d
GalImageFormat Format = ImageUtils.ConvertSurface((GalSurfaceFormat)SurfFormat);
GalImage Image = new GalImage(Width, Height, 1, GobBlockHeight, Layout, Format);
GalImage Image = new GalImage(Width, Height, 1, 1, 1, GobBlockHeight, 1, Layout, Format, GalTextureTarget.TwoD);
Gpu.ResourceManager.SendColorBuffer(Vmm, Key, FbIndex, Image);
@ -267,7 +274,8 @@ namespace Ryujinx.Graphics.Graphics3d
GalImageFormat Format = ImageUtils.ConvertZeta((GalZetaFormat)ZetaFormat);
GalImage Image = new GalImage(Width, Height, 1, GobBlockHeight, Layout, Format);
// TODO: Support non 2D?
GalImage Image = new GalImage(Width, Height, 1, 1, 1, GobBlockHeight, 1, Layout, Format, GalTextureTarget.TwoD);
Gpu.ResourceManager.SendZetaBuffer(Vmm, Key, Image);
}
@ -683,7 +691,7 @@ namespace Ryujinx.Graphics.Graphics3d
}
Gpu.Renderer.Texture.Bind(Key, Index, Image);
Gpu.Renderer.Texture.SetSampler(Sampler);
Gpu.Renderer.Texture.SetSampler(Image, Sampler);
}
}

View file

@ -2,112 +2,115 @@ namespace Ryujinx.Graphics.Graphics3d
{
enum NvGpuEngine3dReg
{
FrameBufferNAddress = 0x200,
FrameBufferNWidth = 0x202,
FrameBufferNHeight = 0x203,
FrameBufferNFormat = 0x204,
FrameBufferNBlockDim = 0x205,
ViewportNScaleX = 0x280,
ViewportNScaleY = 0x281,
ViewportNScaleZ = 0x282,
ViewportNTranslateX = 0x283,
ViewportNTranslateY = 0x284,
ViewportNTranslateZ = 0x285,
ViewportNHoriz = 0x300,
ViewportNVert = 0x301,
DepthRangeNNear = 0x302,
DepthRangeNFar = 0x303,
VertexArrayFirst = 0x35d,
VertexArrayCount = 0x35e,
ClearNColor = 0x360,
ClearDepth = 0x364,
ClearStencil = 0x368,
ScissorEnable = 0x380,
ScissorHorizontal = 0x381,
ScissorVertical = 0x382,
StencilBackFuncRef = 0x3d5,
StencilBackMask = 0x3d6,
StencilBackFuncMask = 0x3d7,
ColorMaskCommon = 0x3e4,
RTSeparateFragData = 0x3eb,
ZetaAddress = 0x3f8,
ZetaFormat = 0x3fa,
ZetaBlockDimensions = 0x3fb,
ZetaLayerStride = 0x3fc,
VertexAttribNFormat = 0x458,
RTControl = 0x487,
ZetaHoriz = 0x48a,
ZetaVert = 0x48b,
ZetaArrayMode = 0x48c,
LinkedTsc = 0x48d,
DepthTestEnable = 0x4b3,
BlendIndependent = 0x4b9,
DepthWriteEnable = 0x4ba,
DepthTestFunction = 0x4c3,
BlendSeparateAlpha = 0x4cf,
BlendEquationRgb = 0x4d0,
BlendFuncSrcRgb = 0x4d1,
BlendFuncDstRgb = 0x4d2,
BlendEquationAlpha = 0x4d3,
BlendFuncSrcAlpha = 0x4d4,
BlendFuncDstAlpha = 0x4d6,
BlendEnable = 0x4d7,
IBlendNEnable = 0x4d8,
StencilEnable = 0x4e0,
StencilFrontOpFail = 0x4e1,
StencilFrontOpZFail = 0x4e2,
StencilFrontOpZPass = 0x4e3,
StencilFrontFuncFunc = 0x4e4,
StencilFrontFuncRef = 0x4e5,
StencilFrontFuncMask = 0x4e6,
StencilFrontMask = 0x4e7,
ScreenYControl = 0x4eb,
VertexArrayElemBase = 0x50d,
VertexArrayInstBase = 0x50e,
ZetaEnable = 0x54e,
TexHeaderPoolOffset = 0x55d,
TexSamplerPoolOffset = 0x557,
StencilTwoSideEnable = 0x565,
StencilBackOpFail = 0x566,
StencilBackOpZFail = 0x567,
StencilBackOpZPass = 0x568,
StencilBackFuncFunc = 0x569,
FrameBufferSrgb = 0x56e,
ShaderAddress = 0x582,
VertexBeginGl = 0x586,
PrimRestartEnable = 0x591,
PrimRestartIndex = 0x592,
IndexArrayAddress = 0x5f2,
IndexArrayEndAddr = 0x5f4,
IndexArrayFormat = 0x5f6,
IndexBatchFirst = 0x5f7,
IndexBatchCount = 0x5f8,
VertexArrayNInstance = 0x620,
CullFaceEnable = 0x646,
FrontFace = 0x647,
CullFace = 0x648,
ColorMaskN = 0x680,
QueryAddress = 0x6c0,
QuerySequence = 0x6c2,
QueryControl = 0x6c3,
VertexArrayNControl = 0x700,
VertexArrayNAddress = 0x701,
VertexArrayNDivisor = 0x703,
IBlendNSeparateAlpha = 0x780,
IBlendNEquationRgb = 0x781,
IBlendNFuncSrcRgb = 0x782,
IBlendNFuncDstRgb = 0x783,
IBlendNEquationAlpha = 0x784,
IBlendNFuncSrcAlpha = 0x785,
IBlendNFuncDstAlpha = 0x786,
VertexArrayNEndAddr = 0x7c0,
ShaderNControl = 0x800,
ShaderNOffset = 0x801,
ShaderNMaxGprs = 0x803,
ShaderNType = 0x804,
ConstBufferSize = 0x8e0,
ConstBufferAddress = 0x8e1,
ConstBufferOffset = 0x8e3,
TextureCbIndex = 0x982
FrameBufferNAddress = 0x200,
FrameBufferNWidth = 0x202,
FrameBufferNHeight = 0x203,
FrameBufferNFormat = 0x204,
FrameBufferNBlockDim = 0x205,
FrameBufferNArrayMode = 0x206,
FrameBufferNLayerStride = 0x207,
FrameBufferNBaseLayer = 0x208,
ViewportNScaleX = 0x280,
ViewportNScaleY = 0x281,
ViewportNScaleZ = 0x282,
ViewportNTranslateX = 0x283,
ViewportNTranslateY = 0x284,
ViewportNTranslateZ = 0x285,
ViewportNHoriz = 0x300,
ViewportNVert = 0x301,
DepthRangeNNear = 0x302,
DepthRangeNFar = 0x303,
VertexArrayFirst = 0x35d,
VertexArrayCount = 0x35e,
ClearNColor = 0x360,
ClearDepth = 0x364,
ClearStencil = 0x368,
ScissorEnable = 0x380,
ScissorHorizontal = 0x381,
ScissorVertical = 0x382,
StencilBackFuncRef = 0x3d5,
StencilBackMask = 0x3d6,
StencilBackFuncMask = 0x3d7,
ColorMaskCommon = 0x3e4,
RTSeparateFragData = 0x3eb,
ZetaAddress = 0x3f8,
ZetaFormat = 0x3fa,
ZetaBlockDimensions = 0x3fb,
ZetaLayerStride = 0x3fc,
VertexAttribNFormat = 0x458,
RTControl = 0x487,
ZetaHoriz = 0x48a,
ZetaVert = 0x48b,
ZetaArrayMode = 0x48c,
LinkedTsc = 0x48d,
DepthTestEnable = 0x4b3,
BlendIndependent = 0x4b9,
DepthWriteEnable = 0x4ba,
DepthTestFunction = 0x4c3,
BlendSeparateAlpha = 0x4cf,
BlendEquationRgb = 0x4d0,
BlendFuncSrcRgb = 0x4d1,
BlendFuncDstRgb = 0x4d2,
BlendEquationAlpha = 0x4d3,
BlendFuncSrcAlpha = 0x4d4,
BlendFuncDstAlpha = 0x4d6,
BlendEnable = 0x4d7,
IBlendNEnable = 0x4d8,
StencilEnable = 0x4e0,
StencilFrontOpFail = 0x4e1,
StencilFrontOpZFail = 0x4e2,
StencilFrontOpZPass = 0x4e3,
StencilFrontFuncFunc = 0x4e4,
StencilFrontFuncRef = 0x4e5,
StencilFrontFuncMask = 0x4e6,
StencilFrontMask = 0x4e7,
ScreenYControl = 0x4eb,
VertexArrayElemBase = 0x50d,
VertexArrayInstBase = 0x50e,
ZetaEnable = 0x54e,
TexHeaderPoolOffset = 0x55d,
TexSamplerPoolOffset = 0x557,
StencilTwoSideEnable = 0x565,
StencilBackOpFail = 0x566,
StencilBackOpZFail = 0x567,
StencilBackOpZPass = 0x568,
StencilBackFuncFunc = 0x569,
FrameBufferSrgb = 0x56e,
ShaderAddress = 0x582,
VertexBeginGl = 0x586,
PrimRestartEnable = 0x591,
PrimRestartIndex = 0x592,
IndexArrayAddress = 0x5f2,
IndexArrayEndAddr = 0x5f4,
IndexArrayFormat = 0x5f6,
IndexBatchFirst = 0x5f7,
IndexBatchCount = 0x5f8,
VertexArrayNInstance = 0x620,
CullFaceEnable = 0x646,
FrontFace = 0x647,
CullFace = 0x648,
ColorMaskN = 0x680,
QueryAddress = 0x6c0,
QuerySequence = 0x6c2,
QueryControl = 0x6c3,
VertexArrayNControl = 0x700,
VertexArrayNAddress = 0x701,
VertexArrayNDivisor = 0x703,
IBlendNSeparateAlpha = 0x780,
IBlendNEquationRgb = 0x781,
IBlendNFuncSrcRgb = 0x782,
IBlendNFuncDstRgb = 0x783,
IBlendNEquationAlpha = 0x784,
IBlendNFuncSrcAlpha = 0x785,
IBlendNFuncDstAlpha = 0x786,
VertexArrayNEndAddr = 0x7c0,
ShaderNControl = 0x800,
ShaderNOffset = 0x801,
ShaderNMaxGprs = 0x803,
ShaderNType = 0x804,
ConstBufferSize = 0x8e0,
ConstBufferAddress = 0x8e1,
ConstBufferOffset = 0x8e3,
TextureCbIndex = 0x982
}
}

View file

@ -1,3 +1,4 @@
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.Memory;
using Ryujinx.Graphics.Texture;
using System.Collections.Generic;
@ -125,29 +126,37 @@ namespace Ryujinx.Graphics.Graphics3d
if (SrcLinear)
{
SrcSwizzle = new LinearSwizzle(SrcPitch, SrcCpp);
SrcSwizzle = new LinearSwizzle(SrcPitch, SrcCpp, SrcSizeX, SrcSizeY);
}
else
{
SrcSwizzle = new BlockLinearSwizzle(SrcSizeX, SrcCpp, SrcBlockHeight);
SrcSwizzle = new BlockLinearSwizzle(
SrcSizeX,
SrcSizeY, 1,
SrcBlockHeight, 1,
SrcCpp);
}
ISwizzle DstSwizzle;
if (DstLinear)
{
DstSwizzle = new LinearSwizzle(DstPitch, DstCpp);
DstSwizzle = new LinearSwizzle(DstPitch, DstCpp, SrcSizeX, SrcSizeY);
}
else
{
DstSwizzle = new BlockLinearSwizzle(DstSizeX, DstCpp, DstBlockHeight);
DstSwizzle = new BlockLinearSwizzle(
DstSizeX,
DstSizeY, 1,
DstBlockHeight, 1,
DstCpp);
}
for (int Y = 0; Y < YCount; Y++)
for (int X = 0; X < XCount; X++)
{
int SrcOffset = SrcSwizzle.GetSwizzleOffset(SrcPosX + X, SrcPosY + Y);
int DstOffset = DstSwizzle.GetSwizzleOffset(DstPosX + X, DstPosY + Y);
int SrcOffset = SrcSwizzle.GetSwizzleOffset(SrcPosX + X, SrcPosY + Y, 0);
int DstOffset = DstSwizzle.GetSwizzleOffset(DstPosX + X, DstPosY + Y, 0);
long Src = SrcPA + (uint)SrcOffset;
long Dst = DstPA + (uint)DstOffset;

View file

@ -1,3 +1,4 @@
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.Memory;
using Ryujinx.Graphics.Texture;
using System.Collections.Generic;
@ -119,14 +120,17 @@ namespace Ryujinx.Graphics.Graphics3d
}
else
{
BlockLinearSwizzle Swizzle = new BlockLinearSwizzle(CopyWidth, 1, CopyGobBlockHeight);
BlockLinearSwizzle Swizzle = new BlockLinearSwizzle(
CopyWidth,
CopyHeight, 1,
CopyGobBlockHeight, 1, 1);
int SrcOffset = 0;
for (int Y = CopyStartY; Y < CopyHeight && SrcOffset < CopySize; Y++)
for (int X = CopyStartX; X < CopyWidth && SrcOffset < CopySize; X++)
{
int DstOffset = Swizzle.GetSwizzleOffset(X, Y);
int DstOffset = Swizzle.GetSwizzleOffset(X, Y, 0);
Vmm.WriteByte(CopyAddress + DstOffset, Buffer[SrcOffset++]);
}

View file

@ -72,6 +72,7 @@ namespace Ryujinx.Graphics.Texture
if (BlockZ != 1 || Z != 1)
{
// TODO: Support 3D textures?
throw new ASTCDecoderException("3D compressed textures unsupported!");
}

View file

@ -1,51 +1,178 @@
using Ryujinx.Common;
using System;
namespace Ryujinx.Graphics.Texture
{
class BlockLinearSwizzle : ISwizzle
{
private int BhShift;
private int BppShift;
private const int GobWidth = 64;
private const int GobHeight = 8;
private const int GobSize = GobWidth * GobHeight;
private int TexWidth;
private int TexHeight;
private int TexDepth;
private int TexGobBlockHeight;
private int TexGobBlockDepth;
private int TexBpp;
private int BhMask;
private int BdMask;
private int BhShift;
private int BdShift;
private int BppShift;
private int XShift;
private int GobStride;
public BlockLinearSwizzle(int Width, int Bpp, int BlockHeight = 16)
private int RobSize;
private int SliceSize;
private int BaseOffset;
public BlockLinearSwizzle(
int Width,
int Height,
int Depth,
int GobBlockHeight,
int GobBlockDepth,
int Bpp)
{
BhMask = (BlockHeight * 8) - 1;
TexWidth = Width;
TexHeight = Height;
TexDepth = Depth;
TexGobBlockHeight = GobBlockHeight;
TexGobBlockDepth = GobBlockDepth;
TexBpp = Bpp;
BhShift = CountLsbZeros(BlockHeight * 8);
BppShift = CountLsbZeros(Bpp);
BppShift = BitUtils.CountTrailingZeros32(Bpp);
int WidthInGobs = (int)MathF.Ceiling(Width * Bpp / 64f);
GobStride = 512 * BlockHeight * WidthInGobs;
XShift = CountLsbZeros(512 * BlockHeight);
SetMipLevel(0);
}
private int CountLsbZeros(int Value)
public void SetMipLevel(int Level)
{
int Count = 0;
BaseOffset = GetMipOffset(Level);
while (((Value >> Count) & 1) == 0)
int Width = Math.Max(1, TexWidth >> Level);
int Height = Math.Max(1, TexHeight >> Level);
int Depth = Math.Max(1, TexDepth >> Level);
GobBlockSizes GbSizes = AdjustGobBlockSizes(Height, Depth);
BhMask = GbSizes.Height - 1;
BdMask = GbSizes.Depth - 1;
BhShift = BitUtils.CountTrailingZeros32(GbSizes.Height);
BdShift = BitUtils.CountTrailingZeros32(GbSizes.Depth);
XShift = BitUtils.CountTrailingZeros32(GobSize * GbSizes.Height * GbSizes.Depth);
RobAndSliceSizes GsSizes = GetRobAndSliceSizes(Width, Height, GbSizes);
RobSize = GsSizes.RobSize;
SliceSize = GsSizes.SliceSize;
}
public int GetImageSize(int MipsCount)
{
int Size = GetMipOffset(MipsCount);
Size = (Size + 0x1fff) & ~0x1fff;
return Size;
}
public int GetMipOffset(int Level)
{
int TotalSize = 0;
for (int Index = 0; Index < Level; Index++)
{
Count++;
int Width = Math.Max(1, TexWidth >> Index);
int Height = Math.Max(1, TexHeight >> Index);
int Depth = Math.Max(1, TexDepth >> Index);
GobBlockSizes GbSizes = AdjustGobBlockSizes(Height, Depth);
RobAndSliceSizes RsSizes = GetRobAndSliceSizes(Width, Height, GbSizes);
TotalSize += BitUtils.DivRoundUp(Depth, GbSizes.Depth) * RsSizes.SliceSize;
}
return Count;
return TotalSize;
}
public int GetSwizzleOffset(int X, int Y)
private struct GobBlockSizes
{
public int Height;
public int Depth;
public GobBlockSizes(int GobBlockHeight, int GobBlockDepth)
{
this.Height = GobBlockHeight;
this.Depth = GobBlockDepth;
}
}
private GobBlockSizes AdjustGobBlockSizes(int Height, int Depth)
{
int GobBlockHeight = TexGobBlockHeight;
int GobBlockDepth = TexGobBlockDepth;
int Pow2Height = BitUtils.Pow2RoundUp(Height);
int Pow2Depth = BitUtils.Pow2RoundUp(Depth);
while (GobBlockHeight * GobHeight > Pow2Height && GobBlockHeight > 1)
{
GobBlockHeight >>= 1;
}
while (GobBlockDepth > Pow2Depth && GobBlockDepth > 1)
{
GobBlockDepth >>= 1;
}
return new GobBlockSizes(GobBlockHeight, GobBlockDepth);
}
private struct RobAndSliceSizes
{
public int RobSize;
public int SliceSize;
public RobAndSliceSizes(int RobSize, int SliceSize)
{
this.RobSize = RobSize;
this.SliceSize = SliceSize;
}
}
private RobAndSliceSizes GetRobAndSliceSizes(int Width, int Height, GobBlockSizes GbSizes)
{
int WidthInGobs = BitUtils.DivRoundUp(Width * TexBpp, GobWidth);
int RobSize = GobSize * GbSizes.Height * GbSizes.Depth * WidthInGobs;
int SliceSize = BitUtils.DivRoundUp(Height, GbSizes.Height * GobHeight) * RobSize;
return new RobAndSliceSizes(RobSize, SliceSize);
}
public int GetSwizzleOffset(int X, int Y, int Z)
{
X <<= BppShift;
int Position = (Y >> BhShift) * GobStride;
int YH = Y / GobHeight;
Position += (X >> 6) << XShift;
int Position = (Z >> BdShift) * SliceSize + (YH >> BhShift) * RobSize;
Position += ((Y & BhMask) >> 3) << 9;
Position += (X / GobWidth) << XShift;
Position += (YH & BhMask) * GobSize;
Position += ((Z & BdMask) * GobSize) << BhShift;
Position += ((X & 0x3f) >> 5) << 8;
Position += ((Y & 0x07) >> 1) << 6;
@ -53,7 +180,7 @@ namespace Ryujinx.Graphics.Texture
Position += ((Y & 0x01) >> 0) << 4;
Position += ((X & 0x0f) >> 0) << 0;
return Position;
return BaseOffset + Position;
}
}
}

View file

@ -2,6 +2,12 @@ namespace Ryujinx.Graphics.Texture
{
interface ISwizzle
{
int GetSwizzleOffset(int X, int Y);
int GetSwizzleOffset(int X, int Y, int Z);
void SetMipLevel(int Level);
int GetMipOffset(int Level);
int GetImageSize(int MipsCount);
}
}

View file

@ -1,4 +1,6 @@
using ChocolArm64.Memory;
using OpenTK.Graphics.OpenGL;
using Ryujinx.Common;
using Ryujinx.Graphics.Gal;
using Ryujinx.Graphics.Memory;
using System;
@ -23,14 +25,16 @@ namespace Ryujinx.Graphics.Texture
public int BytesPerPixel { get; private set; }
public int BlockWidth { get; private set; }
public int BlockHeight { get; private set; }
public int BlockDepth { get; private set; }
public TargetBuffer Target { get; private set; }
public ImageDescriptor(int BytesPerPixel, int BlockWidth, int BlockHeight, TargetBuffer Target)
public ImageDescriptor(int BytesPerPixel, int BlockWidth, int BlockHeight, int BlockDepth, TargetBuffer Target)
{
this.BytesPerPixel = BytesPerPixel;
this.BlockWidth = BlockWidth;
this.BlockHeight = BlockHeight;
this.BlockDepth = BlockDepth;
this.Target = Target;
}
}
@ -92,52 +96,52 @@ namespace Ryujinx.Graphics.Texture
private static readonly Dictionary<GalImageFormat, ImageDescriptor> s_ImageTable =
new Dictionary<GalImageFormat, ImageDescriptor>()
{
{ GalImageFormat.RGBA32, new ImageDescriptor(16, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.RGBA16, new ImageDescriptor(8, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.RG32, new ImageDescriptor(8, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.RGBX8, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.RGBA8, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.BGRA8, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.RGB10A2, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.R32, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.RGBA4, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.BptcSfloat, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) },
{ GalImageFormat.BptcUfloat, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) },
{ GalImageFormat.BGR5A1, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.RGB5A1, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.RGB565, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.BGR565, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.BptcUnorm, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) },
{ GalImageFormat.RG16, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.RG8, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.R16, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.R8, new ImageDescriptor(1, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.R11G11B10, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.BC1, new ImageDescriptor(8, 4, 4, TargetBuffer.Color) },
{ GalImageFormat.BC2, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) },
{ GalImageFormat.BC3, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) },
{ GalImageFormat.BC4, new ImageDescriptor(8, 4, 4, TargetBuffer.Color) },
{ GalImageFormat.BC5, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) },
{ GalImageFormat.Astc2D4x4, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) },
{ GalImageFormat.Astc2D5x5, new ImageDescriptor(16, 5, 5, TargetBuffer.Color) },
{ GalImageFormat.Astc2D6x6, new ImageDescriptor(16, 6, 6, TargetBuffer.Color) },
{ GalImageFormat.Astc2D8x8, new ImageDescriptor(16, 8, 8, TargetBuffer.Color) },
{ GalImageFormat.Astc2D10x10, new ImageDescriptor(16, 10, 10, TargetBuffer.Color) },
{ GalImageFormat.Astc2D12x12, new ImageDescriptor(16, 12, 12, TargetBuffer.Color) },
{ GalImageFormat.Astc2D5x4, new ImageDescriptor(16, 5, 4, TargetBuffer.Color) },
{ GalImageFormat.Astc2D6x5, new ImageDescriptor(16, 6, 5, TargetBuffer.Color) },
{ GalImageFormat.Astc2D8x6, new ImageDescriptor(16, 8, 6, TargetBuffer.Color) },
{ GalImageFormat.Astc2D10x8, new ImageDescriptor(16, 10, 8, TargetBuffer.Color) },
{ GalImageFormat.Astc2D12x10, new ImageDescriptor(16, 12, 10, TargetBuffer.Color) },
{ GalImageFormat.Astc2D8x5, new ImageDescriptor(16, 8, 5, TargetBuffer.Color) },
{ GalImageFormat.Astc2D10x5, new ImageDescriptor(16, 10, 5, TargetBuffer.Color) },
{ GalImageFormat.Astc2D10x6, new ImageDescriptor(16, 10, 6, TargetBuffer.Color) },
{ GalImageFormat.RGBA32, new ImageDescriptor(16, 1, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.RGBA16, new ImageDescriptor(8, 1, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.RG32, new ImageDescriptor(8, 1, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.RGBX8, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.RGBA8, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.BGRA8, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.RGB10A2, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.R32, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.RGBA4, new ImageDescriptor(2, 1, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.BptcSfloat, new ImageDescriptor(16, 4, 4, 1, TargetBuffer.Color) },
{ GalImageFormat.BptcUfloat, new ImageDescriptor(16, 4, 4, 1, TargetBuffer.Color) },
{ GalImageFormat.BGR5A1, new ImageDescriptor(2, 1, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.RGB5A1, new ImageDescriptor(2, 1, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.RGB565, new ImageDescriptor(2, 1, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.BGR565, new ImageDescriptor(2, 1, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.BptcUnorm, new ImageDescriptor(16, 4, 4, 1, TargetBuffer.Color) },
{ GalImageFormat.RG16, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.RG8, new ImageDescriptor(2, 1, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.R16, new ImageDescriptor(2, 1, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.R8, new ImageDescriptor(1, 1, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.R11G11B10, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.Color) },
{ GalImageFormat.BC1, new ImageDescriptor(8, 4, 4, 1, TargetBuffer.Color) },
{ GalImageFormat.BC2, new ImageDescriptor(16, 4, 4, 1, TargetBuffer.Color) },
{ GalImageFormat.BC3, new ImageDescriptor(16, 4, 4, 1, TargetBuffer.Color) },
{ GalImageFormat.BC4, new ImageDescriptor(8, 4, 4, 1, TargetBuffer.Color) },
{ GalImageFormat.BC5, new ImageDescriptor(16, 4, 4, 1, TargetBuffer.Color) },
{ GalImageFormat.Astc2D4x4, new ImageDescriptor(16, 4, 4, 1, TargetBuffer.Color) },
{ GalImageFormat.Astc2D5x5, new ImageDescriptor(16, 5, 5, 1, TargetBuffer.Color) },
{ GalImageFormat.Astc2D6x6, new ImageDescriptor(16, 6, 6, 1, TargetBuffer.Color) },
{ GalImageFormat.Astc2D8x8, new ImageDescriptor(16, 8, 8, 1, TargetBuffer.Color) },
{ GalImageFormat.Astc2D10x10, new ImageDescriptor(16, 10, 10, 1, TargetBuffer.Color) },
{ GalImageFormat.Astc2D12x12, new ImageDescriptor(16, 12, 12, 1, TargetBuffer.Color) },
{ GalImageFormat.Astc2D5x4, new ImageDescriptor(16, 5, 4, 1, TargetBuffer.Color) },
{ GalImageFormat.Astc2D6x5, new ImageDescriptor(16, 6, 5, 1, TargetBuffer.Color) },
{ GalImageFormat.Astc2D8x6, new ImageDescriptor(16, 8, 6, 1, TargetBuffer.Color) },
{ GalImageFormat.Astc2D10x8, new ImageDescriptor(16, 10, 8, 1, TargetBuffer.Color) },
{ GalImageFormat.Astc2D12x10, new ImageDescriptor(16, 12, 10, 1, TargetBuffer.Color) },
{ GalImageFormat.Astc2D8x5, new ImageDescriptor(16, 8, 5, 1, TargetBuffer.Color) },
{ GalImageFormat.Astc2D10x5, new ImageDescriptor(16, 10, 5, 1, TargetBuffer.Color) },
{ GalImageFormat.Astc2D10x6, new ImageDescriptor(16, 10, 6, 1, TargetBuffer.Color) },
{ GalImageFormat.D16, new ImageDescriptor(2, 1, 1, TargetBuffer.Depth) },
{ GalImageFormat.D24, new ImageDescriptor(4, 1, 1, TargetBuffer.Depth) },
{ GalImageFormat.D24S8, new ImageDescriptor(4, 1, 1, TargetBuffer.DepthStencil) },
{ GalImageFormat.D32, new ImageDescriptor(4, 1, 1, TargetBuffer.Depth) },
{ GalImageFormat.D32S8, new ImageDescriptor(8, 1, 1, TargetBuffer.DepthStencil) }
{ GalImageFormat.D16, new ImageDescriptor(2, 1, 1, 1, TargetBuffer.Depth) },
{ GalImageFormat.D24, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.Depth) },
{ GalImageFormat.D24S8, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.DepthStencil) },
{ GalImageFormat.D32, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.Depth) },
{ GalImageFormat.D32S8, new ImageDescriptor(8, 1, 1, 1, TargetBuffer.DepthStencil) }
};
public static GalImageFormat ConvertTexture(
@ -241,26 +245,37 @@ namespace Ryujinx.Graphics.Texture
ImageDescriptor Desc = GetImageDescriptor(Image.Format);
(int Width, int Height) = GetImageSizeInBlocks(Image);
(int Width, int Height, int Depth) = GetImageSizeInBlocks(Image);
int BytesPerPixel = Desc.BytesPerPixel;
//Note: Each row of the texture needs to be aligned to 4 bytes.
int Pitch = (Width * BytesPerPixel + 3) & ~3;
byte[] Data = new byte[Height * Pitch];
for (int Y = 0; Y < Height; Y++)
int DataLayerSize = Height * Pitch * Depth;
byte[] Data = new byte[DataLayerSize * Image.LayerCount];
int TargetMipLevel = Image.MaxMipmapLevel <= 1 ? 1 : Image.MaxMipmapLevel - 1;
int LayerOffset = ImageUtils.GetLayerOffset(Image, TargetMipLevel);
for (int Layer = 0; Layer < Image.LayerCount; Layer++)
{
int OutOffs = Y * Pitch;
for (int X = 0; X < Width; X++)
for (int Z = 0; Z < Depth; Z++)
{
long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y);
for (int Y = 0; Y < Height; Y++)
{
int OutOffs = (DataLayerSize * Layer) + Y * Pitch + (Z * Width * Height * BytesPerPixel);
CpuMemory.ReadBytes(Position + Offset, Data, OutOffs, BytesPerPixel);
for (int X = 0; X < Width; X++)
{
long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y, Z);
OutOffs += BytesPerPixel;
CpuMemory.ReadBytes(Position + (LayerOffset * Layer) + Offset, Data, OutOffs, BytesPerPixel);
OutOffs += BytesPerPixel;
}
}
}
}
@ -273,16 +288,17 @@ namespace Ryujinx.Graphics.Texture
ImageDescriptor Desc = GetImageDescriptor(Image.Format);
(int Width, int Height) = ImageUtils.GetImageSizeInBlocks(Image);
(int Width, int Height, int Depth) = ImageUtils.GetImageSizeInBlocks(Image);
int BytesPerPixel = Desc.BytesPerPixel;
int InOffs = 0;
for (int Z = 0; Z < Depth; Z++)
for (int Y = 0; Y < Height; Y++)
for (int X = 0; X < Width; X++)
{
long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y);
long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y, Z);
Vmm.Memory.WriteBytes(Position + Offset, Data, InOffs, BytesPerPixel);
@ -290,6 +306,7 @@ namespace Ryujinx.Graphics.Texture
}
}
// TODO: Support non 2D
public static bool CopyTexture(
NvGpuVmm Vmm,
GalImage SrcImage,
@ -318,8 +335,8 @@ namespace Ryujinx.Graphics.Texture
for (int Y = 0; Y < Height; Y++)
for (int X = 0; X < Width; X++)
{
long SrcOffset = (uint)SrcSwizzle.GetSwizzleOffset(SrcX + X, SrcY + Y);
long DstOffset = (uint)DstSwizzle.GetSwizzleOffset(DstX + X, DstY + Y);
long SrcOffset = (uint)SrcSwizzle.GetSwizzleOffset(SrcX + X, SrcY + Y, 0);
long DstOffset = (uint)DstSwizzle.GetSwizzleOffset(DstX + X, DstY + Y, 0);
byte[] Texel = Vmm.ReadBytes(SrcAddress + SrcOffset, BytesPerPixel);
@ -333,10 +350,41 @@ namespace Ryujinx.Graphics.Texture
{
ImageDescriptor Desc = GetImageDescriptor(Image.Format);
int ComponentCount = GetCoordsCountTextureTarget(Image.TextureTarget);
if (IsArray(Image.TextureTarget))
ComponentCount--;
int Width = DivRoundUp(Image.Width, Desc.BlockWidth);
int Height = DivRoundUp(Image.Height, Desc.BlockHeight);
int Depth = DivRoundUp(Image.Depth, Desc.BlockDepth);
return Desc.BytesPerPixel * Width * Height;
switch (ComponentCount)
{
case 1:
return Desc.BytesPerPixel * Width * Image.LayerCount;
case 2:
return Desc.BytesPerPixel * Width * Height * Image.LayerCount;
case 3:
return Desc.BytesPerPixel * Width * Height * Depth * Image.LayerCount;
default:
throw new InvalidOperationException($"Invalid component count: {ComponentCount}");
}
}
public static int GetGpuSize(GalImage Image, bool forcePitch = false)
{
return TextureHelper.GetSwizzle(Image).GetImageSize(Image.MaxMipmapLevel) * Image.LayerCount;
}
public static int GetLayerOffset(GalImage Image, int MipLevel)
{
if (MipLevel <= 0)
{
MipLevel = 1;
}
return TextureHelper.GetSwizzle(Image).GetMipOffset(MipLevel);
}
public static int GetPitch(GalImageFormat Format, int Width)
@ -360,6 +408,11 @@ namespace Ryujinx.Graphics.Texture
return GetImageDescriptor(Format).BlockHeight;
}
public static int GetBlockDepth(GalImageFormat Format)
{
return GetImageDescriptor(Format).BlockDepth;
}
public static int GetAlignedWidth(GalImage Image)
{
ImageDescriptor Desc = GetImageDescriptor(Image.Format);
@ -378,12 +431,13 @@ namespace Ryujinx.Graphics.Texture
return (Image.Width + AlignMask) & ~AlignMask;
}
public static (int Width, int Height) GetImageSizeInBlocks(GalImage Image)
public static (int Width, int Height, int Depth) GetImageSizeInBlocks(GalImage Image)
{
ImageDescriptor Desc = GetImageDescriptor(Image.Format);
return (DivRoundUp(Image.Width, Desc.BlockWidth),
DivRoundUp(Image.Height, Desc.BlockHeight));
DivRoundUp(Image.Height, Desc.BlockHeight),
DivRoundUp(Image.Depth, Desc.BlockDepth));
}
public static int GetBytesPerPixel(GalImageFormat Format)
@ -443,5 +497,66 @@ namespace Ryujinx.Graphics.Texture
default: throw new NotImplementedException(((int)Type).ToString());
}
}
public static TextureTarget GetTextureTarget(GalTextureTarget GalTextureTarget)
{
switch (GalTextureTarget)
{
case GalTextureTarget.OneD:
return TextureTarget.Texture1D;
case GalTextureTarget.TwoD:
case GalTextureTarget.TwoDNoMipMap:
return TextureTarget.Texture2D;
case GalTextureTarget.ThreeD:
return TextureTarget.Texture3D;
case GalTextureTarget.OneDArray:
return TextureTarget.Texture1DArray;
case GalTextureTarget.OneDBuffer:
return TextureTarget.TextureBuffer;
case GalTextureTarget.TwoDArray:
return TextureTarget.Texture2DArray;
case GalTextureTarget.CubeMap:
return TextureTarget.TextureCubeMap;
case GalTextureTarget.CubeArray:
return TextureTarget.TextureCubeMapArray;
default:
throw new NotSupportedException($"Texture target {GalTextureTarget} currently not supported!");
}
}
public static bool IsArray(GalTextureTarget TextureTarget)
{
switch (TextureTarget)
{
case GalTextureTarget.OneDArray:
case GalTextureTarget.TwoDArray:
case GalTextureTarget.CubeArray:
return true;
default:
return false;
}
}
public static int GetCoordsCountTextureTarget(GalTextureTarget TextureTarget)
{
switch (TextureTarget)
{
case GalTextureTarget.OneD:
return 1;
case GalTextureTarget.OneDArray:
case GalTextureTarget.OneDBuffer:
case GalTextureTarget.TwoD:
case GalTextureTarget.TwoDNoMipMap:
return 2;
case GalTextureTarget.ThreeD:
case GalTextureTarget.TwoDArray:
case GalTextureTarget.CubeMap:
return 3;
case GalTextureTarget.CubeArray:
return 4;
default:
throw new NotImplementedException($"TextureTarget.{TextureTarget} not implemented yet.");
}
}
}
}

View file

@ -1,3 +1,5 @@
using System;
namespace Ryujinx.Graphics.Texture
{
class LinearSwizzle : ISwizzle
@ -5,15 +7,39 @@ namespace Ryujinx.Graphics.Texture
private int Pitch;
private int Bpp;
public LinearSwizzle(int Pitch, int Bpp)
private int SliceSize;
public LinearSwizzle(int Pitch, int Bpp, int Width, int Height)
{
this.Pitch = Pitch;
this.Bpp = Bpp;
this.Pitch = Pitch;
this.Bpp = Bpp;
SliceSize = Width * Height * Bpp;
}
public int GetSwizzleOffset(int X, int Y)
public void SetMipLevel(int Level)
{
return X * Bpp + Y * Pitch;
throw new NotImplementedException();
}
public int GetMipOffset(int Level)
{
if (Level == 1)
return SliceSize;
throw new NotImplementedException();
}
public int GetImageSize(int MipsCount)
{
int Size = GetMipOffset(MipsCount);
Size = (Size + 0x1fff) & ~0x1fff;
return Size;
}
public int GetSwizzleOffset(int X, int Y, int Z)
{
return Z * SliceSize + X * Bpp + Y * Pitch;
}
}
}

View file

@ -12,6 +12,8 @@ namespace Ryujinx.Graphics.Texture
GalImageFormat Format = GetImageFormat(Tic);
GalTextureTarget TextureTarget = (GalTextureTarget)((Tic[4] >> 23) & 0xF);
GalTextureSource XSource = (GalTextureSource)((Tic[0] >> 19) & 7);
GalTextureSource YSource = (GalTextureSource)((Tic[0] >> 22) & 7);
GalTextureSource ZSource = (GalTextureSource)((Tic[0] >> 25) & 7);
@ -19,6 +21,8 @@ namespace Ryujinx.Graphics.Texture
TextureSwizzle Swizzle = (TextureSwizzle)((Tic[2] >> 21) & 7);
int MaxMipmapLevel = (Tic[3] >> 28) & 0xF + 1;
GalMemoryLayout Layout;
if (Swizzle == TextureSwizzle.BlockLinear ||
@ -31,22 +35,61 @@ namespace Ryujinx.Graphics.Texture
Layout = GalMemoryLayout.Pitch;
}
int BlockHeightLog2 = (Tic[3] >> 3) & 7;
int TileWidthLog2 = (Tic[3] >> 10) & 7;
int GobBlockHeightLog2 = (Tic[3] >> 3) & 7;
int GobBlockDepthLog2 = (Tic[3] >> 6) & 7;
int TileWidthLog2 = (Tic[3] >> 10) & 7;
int BlockHeight = 1 << BlockHeightLog2;
int TileWidth = 1 << TileWidthLog2;
int GobBlockHeight = 1 << GobBlockHeightLog2;
int GobBlockDepth = 1 << GobBlockDepthLog2;
int TileWidth = 1 << TileWidthLog2;
int Width = (Tic[4] & 0xffff) + 1;
int Height = (Tic[5] & 0xffff) + 1;
int Width = ((Tic[4] >> 0) & 0xffff) + 1;
int Height = ((Tic[5] >> 0) & 0xffff) + 1;
int Depth = ((Tic[5] >> 16) & 0x3fff) + 1;
int LayoutCount = 1;
// TODO: check this
if (ImageUtils.IsArray(TextureTarget))
{
LayoutCount = Depth;
Depth = 1;
}
if (TextureTarget == GalTextureTarget.OneD)
{
Height = 1;
}
if (TextureTarget == GalTextureTarget.TwoD || TextureTarget == GalTextureTarget.OneD)
{
Depth = 1;
}
else if (TextureTarget == GalTextureTarget.CubeMap)
{
// FIXME: This is a bit hacky but I guess it's fine for now
LayoutCount = 6;
Depth = 1;
}
else if (TextureTarget == GalTextureTarget.CubeArray)
{
// FIXME: This is a really really hacky but I guess it's fine for now
LayoutCount *= 6;
Depth = 1;
}
GalImage Image = new GalImage(
Width,
Height,
Depth,
LayoutCount,
TileWidth,
BlockHeight,
GobBlockHeight,
GobBlockDepth,
Layout,
Format,
TextureTarget,
MaxMipmapLevel,
XSource,
YSource,
ZSource,
@ -68,6 +111,10 @@ namespace Ryujinx.Graphics.Texture
GalTextureWrap AddressV = (GalTextureWrap)((Tsc[0] >> 3) & 7);
GalTextureWrap AddressP = (GalTextureWrap)((Tsc[0] >> 6) & 7);
bool DepthCompare = ((Tsc[0] >> 9) & 1) == 1;
DepthCompareFunc DepthCompareFunc = (DepthCompareFunc)((Tsc[0] >> 10) & 7);
GalTextureFilter MagFilter = (GalTextureFilter) ((Tsc[1] >> 0) & 3);
GalTextureFilter MinFilter = (GalTextureFilter) ((Tsc[1] >> 4) & 3);
GalTextureMipFilter MipFilter = (GalTextureMipFilter)((Tsc[1] >> 6) & 3);
@ -85,7 +132,9 @@ namespace Ryujinx.Graphics.Texture
MinFilter,
MagFilter,
MipFilter,
BorderColor);
BorderColor,
DepthCompare,
DepthCompareFunc);
}
private static GalImageFormat GetImageFormat(int[] Tic)

View file

@ -1,4 +1,5 @@
using ChocolArm64.Memory;
using Ryujinx.Common;
using Ryujinx.Graphics.Gal;
using Ryujinx.Graphics.Memory;
@ -9,9 +10,13 @@ namespace Ryujinx.Graphics.Texture
public static ISwizzle GetSwizzle(GalImage Image)
{
int BlockWidth = ImageUtils.GetBlockWidth (Image.Format);
int BlockHeight = ImageUtils.GetBlockHeight (Image.Format);
int BlockDepth = ImageUtils.GetBlockDepth (Image.Format);
int BytesPerPixel = ImageUtils.GetBytesPerPixel(Image.Format);
int Width = (Image.Width + (BlockWidth - 1)) / BlockWidth;
int Width = BitUtils.DivRoundUp(Image.Width, BlockWidth);
int Height = BitUtils.DivRoundUp(Image.Height, BlockHeight);
int Depth = BitUtils.DivRoundUp(Image.Depth, BlockDepth);
if (Image.Layout == GalMemoryLayout.BlockLinear)
{
@ -19,11 +24,17 @@ namespace Ryujinx.Graphics.Texture
Width = (Width + AlignMask) & ~AlignMask;
return new BlockLinearSwizzle(Width, BytesPerPixel, Image.GobBlockHeight);
return new BlockLinearSwizzle(
Width,
Height,
Depth,
Image.GobBlockHeight,
Image.GobBlockDepth,
BytesPerPixel);
}
else
{
return new LinearSwizzle(Image.Pitch, BytesPerPixel);
return new LinearSwizzle(Image.Pitch, BytesPerPixel, Width, Height);
}
}

View file

@ -1,4 +1,3 @@
using ChocolArm64.Events;
using ChocolArm64.Memory;
using System.Collections.Concurrent;
@ -19,35 +18,28 @@ namespace Ryujinx.Graphics.Memory
{
_memory = memory;
_memory.ObservedAccess += MemoryAccessHandler;
CachedPages = new ConcurrentDictionary<long, int>[1 << 20];
}
private void MemoryAccessHandler(object sender, MemoryAccessEventArgs e)
{
long pa = _memory.GetPhysicalAddress(e.Position);
CachedPages[pa >> PageBits]?.Clear();
}
public bool IsRegionModified(long position, long size, NvGpuBufferType bufferType)
{
long pa = _memory.GetPhysicalAddress(position);
long va = position;
long addr = pa;
long pa = _memory.GetPhysicalAddress(va);
long endAddr = (addr + size + PageMask) & ~PageMask;
long endAddr = (va + size + PageMask) & ~PageMask;
long addrTruncated = va & ~PageMask;
bool modified = _memory.IsRegionModified(addrTruncated, endAddr - addrTruncated);
int newBuffMask = 1 << (int)bufferType;
_memory.StartObservingRegion(position, size);
long cachedPagesCount = 0;
while (addr < endAddr)
while (va < endAddr)
{
long page = addr >> PageBits;
long page = _memory.GetPhysicalAddress(va) >> PageBits;
ConcurrentDictionary<long, int> dictionary = CachedPages[page];
@ -57,6 +49,10 @@ namespace Ryujinx.Graphics.Memory
CachedPages[page] = dictionary;
}
else if (modified)
{
CachedPages[page].Clear();
}
if (dictionary.TryGetValue(pa, out int currBuffMask))
{
@ -74,10 +70,10 @@ namespace Ryujinx.Graphics.Memory
dictionary[pa] = newBuffMask;
}
addr += PageSize;
va += PageSize;
}
return cachedPagesCount != (endAddr - pa + PageMask) >> PageBits;
return cachedPagesCount != (endAddr - addrTruncated) >> PageBits;
}
}
}

View file

@ -0,0 +1,19 @@
using System;
namespace Ryujinx.Graphics.Texture
{
[Flags]
public enum TextureInstructionSuffix
{
None = 0x00, // No Modifier
LZ = 0x02, // Load LOD Zero
LB = 0x08, // Load Bias
LL = 0x10, // Load LOD
LBA = 0x20, // Load Bias with OperA? Auto?
LLA = 0x40, // Load LOD with OperA? Auto?
DC = 0x80, // Depth Compare
AOffI = 0x100, // Offset
MZ = 0x200, // Multisample Zero?
PTP = 0x400 // ???
}
}

View file

@ -216,10 +216,11 @@ namespace Ryujinx.Graphics.VDec
GalImage Image = new GalImage(
OutputConfig.SurfaceWidth,
OutputConfig.SurfaceHeight, 1,
OutputConfig.GobBlockHeight,
OutputConfig.SurfaceHeight, 1, 1, 1,
OutputConfig.GobBlockHeight, 1,
GalMemoryLayout.BlockLinear,
GalImageFormat.RGBA8 | GalImageFormat.Unorm);
GalImageFormat.RGBA8 | GalImageFormat.Unorm,
GalTextureTarget.TwoD);
ImageUtils.WriteTexture(Vmm, Image, Vmm.GetPhysicalAddress(OutputConfig.SurfaceLumaAddress), Frame.Data);
}

View file

@ -1,5 +1,5 @@
using ChocolArm64.Memory;
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.HLE
{
@ -7,13 +7,13 @@ namespace Ryujinx.HLE
{
public const long RamSize = 4L * 1024 * 1024 * 1024;
public IntPtr RamPointer { get; private set; }
public IntPtr RamPointer { get; }
private unsafe byte* _ramPtr;
public unsafe DeviceMemory()
{
RamPointer = Marshal.AllocHGlobal(new IntPtr(RamSize));
RamPointer = MemoryManagement.AllocateWriteTracked(RamSize);
_ramPtr = (byte*)RamPointer;
}
@ -177,7 +177,7 @@ namespace Ryujinx.HLE
protected virtual void Dispose(bool disposing)
{
Marshal.FreeHGlobal(RamPointer);
MemoryManagement.Free(RamPointer);
}
}
}

View file

@ -66,7 +66,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Common
if (currentProcess.CpuMemory.IsMapped((long)address) &&
currentProcess.CpuMemory.IsMapped((long)address + 3))
{
currentProcess.CpuMemory.WriteInt32ToSharedAddr((long)address, value);
currentProcess.CpuMemory.WriteInt32((long)address, value);
return true;
}

View file

@ -80,12 +80,14 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
public bool IsPaused { get; private set; }
public Translator Translator { get; private set; }
public MemoryManager CpuMemory { get; private set; }
public Translator Translator { get; private set; }
private SvcHandler _svcHandler;
private Horizon _system;
public HleProcessDebugger Debugger { get; private set; }
public KProcess(Horizon system) : base(system)
@ -93,14 +95,10 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
_processLock = new object();
_threadingLock = new object();
CpuMemory = new MemoryManager(system.Device.Memory.RamPointer);
CpuMemory.InvalidAccess += InvalidAccessHandler;
_system = system;
AddressArbiter = new KAddressArbiter(system);
MemoryManager = new KMemoryManager(system, CpuMemory);
_fullTlsPages = new SortedDictionary<ulong, KTlsPageInfo>();
_freeTlsPages = new SortedDictionary<ulong, KTlsPageInfo>();
@ -110,10 +108,6 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
_threads = new LinkedList<KThread>();
Translator = new Translator(CpuMemory);
Translator.CpuTrace += CpuTraceHandler;
_svcHandler = new SvcHandler(system.Device, this);
Debugger = new HleProcessDebugger(this);
@ -131,6 +125,8 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
AddressSpaceType addrSpaceType = (AddressSpaceType)((creationInfo.MmuFlags >> 1) & 7);
InitializeMemoryManager(addrSpaceType, memRegion);
bool aslrEnabled = ((creationInfo.MmuFlags >> 5) & 1) != 0;
ulong codeAddress = creationInfo.CodeAddress;
@ -238,6 +234,8 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
AddressSpaceType addrSpaceType = (AddressSpaceType)((creationInfo.MmuFlags >> 1) & 7);
InitializeMemoryManager(addrSpaceType, memRegion);
bool aslrEnabled = ((creationInfo.MmuFlags >> 5) & 1) != 0;
ulong codeAddress = creationInfo.CodeAddress;
@ -405,7 +403,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
case AddressSpaceType.Addr36Bits:
case AddressSpaceType.Addr39Bits:
_memoryUsageCapacity = MemoryManager.HeapRegionEnd -
MemoryManager.HeapRegionStart;
MemoryManager.HeapRegionStart;
break;
case AddressSpaceType.Addr32BitsNoMap:
@ -1010,9 +1008,29 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
}
}
private void InvalidAccessHandler(object sender, MemoryAccessEventArgs e)
private void InitializeMemoryManager(AddressSpaceType addrSpaceType, MemoryRegion memRegion)
{
PrintCurrentThreadStackTrace();
int addrSpaceBits;
switch (addrSpaceType)
{
case AddressSpaceType.Addr32Bits: addrSpaceBits = 32; break;
case AddressSpaceType.Addr36Bits: addrSpaceBits = 36; break;
case AddressSpaceType.Addr32BitsNoMap: addrSpaceBits = 32; break;
case AddressSpaceType.Addr39Bits: addrSpaceBits = 39; break;
default: throw new ArgumentException(nameof(addrSpaceType));
}
bool useFlatPageTable = memRegion == MemoryRegion.Application;
CpuMemory = new MemoryManager(_system.Device.Memory.RamPointer, addrSpaceBits, useFlatPageTable);
MemoryManager = new KMemoryManager(_system, CpuMemory);
Translator = new Translator(CpuMemory);
Translator.CpuTrace += CpuTraceHandler;
}
public void PrintCurrentThreadStackTrace()

View file

@ -1,5 +1,4 @@
using ChocolArm64.Events;
using ChocolArm64.Memory;
using ChocolArm64.State;
using Ryujinx.HLE.HOS.Kernel.Process;
using System;
@ -11,14 +10,12 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
private Switch _device;
private KProcess _process;
private Horizon _system;
private MemoryManager _memory;
public SvcHandler(Switch device, KProcess process)
{
_device = device;
_process = process;
_system = device.System;
_memory = process.CpuMemory;
}
public void SvcCall(object sender, InstExceptionEventArgs e)

View file

@ -93,7 +93,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
private KernelResult SendSyncRequest(ulong messagePtr, ulong size, int handle)
{
byte[] messageData = _memory.ReadBytes((long)messagePtr, (long)size);
byte[] messageData = _process.CpuMemory.ReadBytes((long)messagePtr, (long)size);
KClientSession clientSession = _process.HandleTable.GetObject<KClientSession>(handle);
@ -142,7 +142,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
ipcMessage.Thread.ObjSyncResult = IpcHandler.IpcCall(
_device,
_process,
_memory,
_process.CpuMemory,
ipcMessage.Session,
ipcMessage.Message,
ipcMessage.MessagePtr);

View file

@ -62,11 +62,6 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
attributeMask,
attributeValue);
if (result == KernelResult.Success)
{
_memory.StopObservingRegion((long)position, (long)size);
}
return result;
}
@ -157,14 +152,14 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
{
KMemoryInfo blkInfo = _process.MemoryManager.QueryMemory(position);
_memory.WriteUInt64((long)infoPtr + 0x00, blkInfo.Address);
_memory.WriteUInt64((long)infoPtr + 0x08, blkInfo.Size);
_memory.WriteInt32 ((long)infoPtr + 0x10, (int)blkInfo.State & 0xff);
_memory.WriteInt32 ((long)infoPtr + 0x14, (int)blkInfo.Attribute);
_memory.WriteInt32 ((long)infoPtr + 0x18, (int)blkInfo.Permission);
_memory.WriteInt32 ((long)infoPtr + 0x1c, blkInfo.IpcRefCount);
_memory.WriteInt32 ((long)infoPtr + 0x20, blkInfo.DeviceRefCount);
_memory.WriteInt32 ((long)infoPtr + 0x24, 0);
_process.CpuMemory.WriteUInt64((long)infoPtr + 0x00, blkInfo.Address);
_process.CpuMemory.WriteUInt64((long)infoPtr + 0x08, blkInfo.Size);
_process.CpuMemory.WriteInt32 ((long)infoPtr + 0x10, (int)blkInfo.State & 0xff);
_process.CpuMemory.WriteInt32 ((long)infoPtr + 0x14, (int)blkInfo.Attribute);
_process.CpuMemory.WriteInt32 ((long)infoPtr + 0x18, (int)blkInfo.Permission);
_process.CpuMemory.WriteInt32 ((long)infoPtr + 0x1c, blkInfo.IpcRefCount);
_process.CpuMemory.WriteInt32 ((long)infoPtr + 0x20, blkInfo.DeviceRefCount);
_process.CpuMemory.WriteInt32 ((long)infoPtr + 0x24, 0);
return KernelResult.Success;
}

View file

@ -201,7 +201,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
private void OutputDebugString(ulong strPtr, ulong size)
{
string str = MemoryHelper.ReadAsciiString(_memory, (long)strPtr, (long)size);
string str = MemoryHelper.ReadAsciiString(_process.CpuMemory, (long)strPtr, (long)size);
Logger.PrintWarning(LogClass.KernelSvc, str);
}

View file

@ -1,3 +1,4 @@
using ChocolArm64.Memory;
using Ryujinx.HLE.HOS.Kernel.Common;
using Ryujinx.HLE.HOS.Kernel.Process;
using Ryujinx.HLE.HOS.Kernel.Threading;
@ -346,79 +347,81 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
return KernelResult.InvalidThread;
}
_memory.WriteUInt64((long)address + 0x0, thread.Context.ThreadState.X0);
_memory.WriteUInt64((long)address + 0x8, thread.Context.ThreadState.X1);
_memory.WriteUInt64((long)address + 0x10, thread.Context.ThreadState.X2);
_memory.WriteUInt64((long)address + 0x18, thread.Context.ThreadState.X3);
_memory.WriteUInt64((long)address + 0x20, thread.Context.ThreadState.X4);
_memory.WriteUInt64((long)address + 0x28, thread.Context.ThreadState.X5);
_memory.WriteUInt64((long)address + 0x30, thread.Context.ThreadState.X6);
_memory.WriteUInt64((long)address + 0x38, thread.Context.ThreadState.X7);
_memory.WriteUInt64((long)address + 0x40, thread.Context.ThreadState.X8);
_memory.WriteUInt64((long)address + 0x48, thread.Context.ThreadState.X9);
_memory.WriteUInt64((long)address + 0x50, thread.Context.ThreadState.X10);
_memory.WriteUInt64((long)address + 0x58, thread.Context.ThreadState.X11);
_memory.WriteUInt64((long)address + 0x60, thread.Context.ThreadState.X12);
_memory.WriteUInt64((long)address + 0x68, thread.Context.ThreadState.X13);
_memory.WriteUInt64((long)address + 0x70, thread.Context.ThreadState.X14);
_memory.WriteUInt64((long)address + 0x78, thread.Context.ThreadState.X15);
_memory.WriteUInt64((long)address + 0x80, thread.Context.ThreadState.X16);
_memory.WriteUInt64((long)address + 0x88, thread.Context.ThreadState.X17);
_memory.WriteUInt64((long)address + 0x90, thread.Context.ThreadState.X18);
_memory.WriteUInt64((long)address + 0x98, thread.Context.ThreadState.X19);
_memory.WriteUInt64((long)address + 0xa0, thread.Context.ThreadState.X20);
_memory.WriteUInt64((long)address + 0xa8, thread.Context.ThreadState.X21);
_memory.WriteUInt64((long)address + 0xb0, thread.Context.ThreadState.X22);
_memory.WriteUInt64((long)address + 0xb8, thread.Context.ThreadState.X23);
_memory.WriteUInt64((long)address + 0xc0, thread.Context.ThreadState.X24);
_memory.WriteUInt64((long)address + 0xc8, thread.Context.ThreadState.X25);
_memory.WriteUInt64((long)address + 0xd0, thread.Context.ThreadState.X26);
_memory.WriteUInt64((long)address + 0xd8, thread.Context.ThreadState.X27);
_memory.WriteUInt64((long)address + 0xe0, thread.Context.ThreadState.X28);
_memory.WriteUInt64((long)address + 0xe8, thread.Context.ThreadState.X29);
_memory.WriteUInt64((long)address + 0xf0, thread.Context.ThreadState.X30);
_memory.WriteUInt64((long)address + 0xf8, thread.Context.ThreadState.X31);
MemoryManager memory = currentProcess.CpuMemory;
_memory.WriteInt64((long)address + 0x100, thread.LastPc);
memory.WriteUInt64((long)address + 0x0, thread.Context.ThreadState.X0);
memory.WriteUInt64((long)address + 0x8, thread.Context.ThreadState.X1);
memory.WriteUInt64((long)address + 0x10, thread.Context.ThreadState.X2);
memory.WriteUInt64((long)address + 0x18, thread.Context.ThreadState.X3);
memory.WriteUInt64((long)address + 0x20, thread.Context.ThreadState.X4);
memory.WriteUInt64((long)address + 0x28, thread.Context.ThreadState.X5);
memory.WriteUInt64((long)address + 0x30, thread.Context.ThreadState.X6);
memory.WriteUInt64((long)address + 0x38, thread.Context.ThreadState.X7);
memory.WriteUInt64((long)address + 0x40, thread.Context.ThreadState.X8);
memory.WriteUInt64((long)address + 0x48, thread.Context.ThreadState.X9);
memory.WriteUInt64((long)address + 0x50, thread.Context.ThreadState.X10);
memory.WriteUInt64((long)address + 0x58, thread.Context.ThreadState.X11);
memory.WriteUInt64((long)address + 0x60, thread.Context.ThreadState.X12);
memory.WriteUInt64((long)address + 0x68, thread.Context.ThreadState.X13);
memory.WriteUInt64((long)address + 0x70, thread.Context.ThreadState.X14);
memory.WriteUInt64((long)address + 0x78, thread.Context.ThreadState.X15);
memory.WriteUInt64((long)address + 0x80, thread.Context.ThreadState.X16);
memory.WriteUInt64((long)address + 0x88, thread.Context.ThreadState.X17);
memory.WriteUInt64((long)address + 0x90, thread.Context.ThreadState.X18);
memory.WriteUInt64((long)address + 0x98, thread.Context.ThreadState.X19);
memory.WriteUInt64((long)address + 0xa0, thread.Context.ThreadState.X20);
memory.WriteUInt64((long)address + 0xa8, thread.Context.ThreadState.X21);
memory.WriteUInt64((long)address + 0xb0, thread.Context.ThreadState.X22);
memory.WriteUInt64((long)address + 0xb8, thread.Context.ThreadState.X23);
memory.WriteUInt64((long)address + 0xc0, thread.Context.ThreadState.X24);
memory.WriteUInt64((long)address + 0xc8, thread.Context.ThreadState.X25);
memory.WriteUInt64((long)address + 0xd0, thread.Context.ThreadState.X26);
memory.WriteUInt64((long)address + 0xd8, thread.Context.ThreadState.X27);
memory.WriteUInt64((long)address + 0xe0, thread.Context.ThreadState.X28);
memory.WriteUInt64((long)address + 0xe8, thread.Context.ThreadState.X29);
memory.WriteUInt64((long)address + 0xf0, thread.Context.ThreadState.X30);
memory.WriteUInt64((long)address + 0xf8, thread.Context.ThreadState.X31);
_memory.WriteUInt64((long)address + 0x108, (ulong)thread.Context.ThreadState.Psr);
memory.WriteInt64((long)address + 0x100, thread.LastPc);
_memory.WriteVector128((long)address + 0x110, thread.Context.ThreadState.V0);
_memory.WriteVector128((long)address + 0x120, thread.Context.ThreadState.V1);
_memory.WriteVector128((long)address + 0x130, thread.Context.ThreadState.V2);
_memory.WriteVector128((long)address + 0x140, thread.Context.ThreadState.V3);
_memory.WriteVector128((long)address + 0x150, thread.Context.ThreadState.V4);
_memory.WriteVector128((long)address + 0x160, thread.Context.ThreadState.V5);
_memory.WriteVector128((long)address + 0x170, thread.Context.ThreadState.V6);
_memory.WriteVector128((long)address + 0x180, thread.Context.ThreadState.V7);
_memory.WriteVector128((long)address + 0x190, thread.Context.ThreadState.V8);
_memory.WriteVector128((long)address + 0x1a0, thread.Context.ThreadState.V9);
_memory.WriteVector128((long)address + 0x1b0, thread.Context.ThreadState.V10);
_memory.WriteVector128((long)address + 0x1c0, thread.Context.ThreadState.V11);
_memory.WriteVector128((long)address + 0x1d0, thread.Context.ThreadState.V12);
_memory.WriteVector128((long)address + 0x1e0, thread.Context.ThreadState.V13);
_memory.WriteVector128((long)address + 0x1f0, thread.Context.ThreadState.V14);
_memory.WriteVector128((long)address + 0x200, thread.Context.ThreadState.V15);
_memory.WriteVector128((long)address + 0x210, thread.Context.ThreadState.V16);
_memory.WriteVector128((long)address + 0x220, thread.Context.ThreadState.V17);
_memory.WriteVector128((long)address + 0x230, thread.Context.ThreadState.V18);
_memory.WriteVector128((long)address + 0x240, thread.Context.ThreadState.V19);
_memory.WriteVector128((long)address + 0x250, thread.Context.ThreadState.V20);
_memory.WriteVector128((long)address + 0x260, thread.Context.ThreadState.V21);
_memory.WriteVector128((long)address + 0x270, thread.Context.ThreadState.V22);
_memory.WriteVector128((long)address + 0x280, thread.Context.ThreadState.V23);
_memory.WriteVector128((long)address + 0x290, thread.Context.ThreadState.V24);
_memory.WriteVector128((long)address + 0x2a0, thread.Context.ThreadState.V25);
_memory.WriteVector128((long)address + 0x2b0, thread.Context.ThreadState.V26);
_memory.WriteVector128((long)address + 0x2c0, thread.Context.ThreadState.V27);
_memory.WriteVector128((long)address + 0x2d0, thread.Context.ThreadState.V28);
_memory.WriteVector128((long)address + 0x2e0, thread.Context.ThreadState.V29);
_memory.WriteVector128((long)address + 0x2f0, thread.Context.ThreadState.V30);
_memory.WriteVector128((long)address + 0x300, thread.Context.ThreadState.V31);
memory.WriteUInt64((long)address + 0x108, (ulong)thread.Context.ThreadState.Psr);
_memory.WriteInt32((long)address + 0x310, thread.Context.ThreadState.Fpcr);
_memory.WriteInt32((long)address + 0x314, thread.Context.ThreadState.Fpsr);
_memory.WriteInt64((long)address + 0x318, thread.Context.ThreadState.Tpidr);
memory.WriteVector128((long)address + 0x110, thread.Context.ThreadState.V0);
memory.WriteVector128((long)address + 0x120, thread.Context.ThreadState.V1);
memory.WriteVector128((long)address + 0x130, thread.Context.ThreadState.V2);
memory.WriteVector128((long)address + 0x140, thread.Context.ThreadState.V3);
memory.WriteVector128((long)address + 0x150, thread.Context.ThreadState.V4);
memory.WriteVector128((long)address + 0x160, thread.Context.ThreadState.V5);
memory.WriteVector128((long)address + 0x170, thread.Context.ThreadState.V6);
memory.WriteVector128((long)address + 0x180, thread.Context.ThreadState.V7);
memory.WriteVector128((long)address + 0x190, thread.Context.ThreadState.V8);
memory.WriteVector128((long)address + 0x1a0, thread.Context.ThreadState.V9);
memory.WriteVector128((long)address + 0x1b0, thread.Context.ThreadState.V10);
memory.WriteVector128((long)address + 0x1c0, thread.Context.ThreadState.V11);
memory.WriteVector128((long)address + 0x1d0, thread.Context.ThreadState.V12);
memory.WriteVector128((long)address + 0x1e0, thread.Context.ThreadState.V13);
memory.WriteVector128((long)address + 0x1f0, thread.Context.ThreadState.V14);
memory.WriteVector128((long)address + 0x200, thread.Context.ThreadState.V15);
memory.WriteVector128((long)address + 0x210, thread.Context.ThreadState.V16);
memory.WriteVector128((long)address + 0x220, thread.Context.ThreadState.V17);
memory.WriteVector128((long)address + 0x230, thread.Context.ThreadState.V18);
memory.WriteVector128((long)address + 0x240, thread.Context.ThreadState.V19);
memory.WriteVector128((long)address + 0x250, thread.Context.ThreadState.V20);
memory.WriteVector128((long)address + 0x260, thread.Context.ThreadState.V21);
memory.WriteVector128((long)address + 0x270, thread.Context.ThreadState.V22);
memory.WriteVector128((long)address + 0x280, thread.Context.ThreadState.V23);
memory.WriteVector128((long)address + 0x290, thread.Context.ThreadState.V24);
memory.WriteVector128((long)address + 0x2a0, thread.Context.ThreadState.V25);
memory.WriteVector128((long)address + 0x2b0, thread.Context.ThreadState.V26);
memory.WriteVector128((long)address + 0x2c0, thread.Context.ThreadState.V27);
memory.WriteVector128((long)address + 0x2d0, thread.Context.ThreadState.V28);
memory.WriteVector128((long)address + 0x2e0, thread.Context.ThreadState.V29);
memory.WriteVector128((long)address + 0x2f0, thread.Context.ThreadState.V30);
memory.WriteVector128((long)address + 0x300, thread.Context.ThreadState.V31);
memory.WriteInt32((long)address + 0x310, thread.Context.ThreadState.Fpcr);
memory.WriteInt32((long)address + 0x314, thread.Context.ThreadState.Fpsr);
memory.WriteInt64((long)address + 0x318, thread.Context.ThreadState.Tpidr);
return KernelResult.Success;
}

Some files were not shown because too many files have changed in this diff Show more