diff --git a/ARMeilleure/Decoders/OpCode32SimdImm44.cs b/ARMeilleure/Decoders/OpCode32SimdImm44.cs index a74c8da56b..8f84c45172 100644 --- a/ARMeilleure/Decoders/OpCode32SimdImm44.cs +++ b/ARMeilleure/Decoders/OpCode32SimdImm44.cs @@ -12,19 +12,28 @@ namespace ARMeilleure.Decoders public int Elems { get; private set; } public OpCode32SimdImm44(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { - Vd = (opCode >> 12) & 0xf; - Vd |= (opCode >> 18) & 0x10; + Size = (opCode >> 8) & 0x3; - Size = ((opCode >> 8) & 0x3) + 1; + var single = Size != 3; + + if (single) + { + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + else + { + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + } long imm; imm = ((uint)opCode >> 0) & 0xf; imm |= ((uint)opCode >> 12) & 0xf0; - Immediate = OpCodeSimdHelper.VFPExpandImm(imm, 8 << (Size)); + //OpCodeSimdHelper.VFPExpandImm(imm, 8 << (Size + 1)); + Immediate = (Size == 3) ? (long)DecoderHelper.Imm8ToFP64Table[(int)imm] : DecoderHelper.Imm8ToFP32Table[(int)imm]; - RegisterSize = (Size == 3) ? RegisterSize.Simd64 : RegisterSize.Simd32; + RegisterSize = (!single) ? RegisterSize.Simd64 : RegisterSize.Simd32; Elems = 1; } } diff --git a/ARMeilleure/Decoders/OpCode32SimdMemImm.cs b/ARMeilleure/Decoders/OpCode32SimdMemImm.cs index 0482679506..1863c7398e 100644 --- a/ARMeilleure/Decoders/OpCode32SimdMemImm.cs +++ b/ARMeilleure/Decoders/OpCode32SimdMemImm.cs @@ -20,6 +20,8 @@ namespace ARMeilleure.Decoders Rn = (opCode >> 16) & 0xf; Size = (opCode >> 8) & 0x3; + Immediate <<= (Size == 1) ? 1 : 2; + bool u = (opCode & (1 << 23)) != 0; Add = u; diff --git a/ARMeilleure/Decoders/OpCode32SimdMemMult.cs b/ARMeilleure/Decoders/OpCode32SimdMemMult.cs index 1acac05bf5..3e2a3bf7ff 100644 --- a/ARMeilleure/Decoders/OpCode32SimdMemMult.cs +++ b/ARMeilleure/Decoders/OpCode32SimdMemMult.cs @@ -17,11 +17,11 @@ namespace ARMeilleure.Decoders public bool IsLoad { get; private set; } public bool DoubleWidth { get; private set; } + public bool Add { get; private set; } public OpCode32SimdMemMult(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { Rn = (opCode >> 16) & 0xf; - Vd = (opCode >> 12) & 0xf; bool isLoad = (opCode & (1 << 20)) != 0; bool w = (opCode & (1 << 21)) != 0; @@ -30,6 +30,17 @@ namespace ARMeilleure.Decoders DoubleWidth = (opCode & (1 << 8)) != 0; + if (!DoubleWidth) + { + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + else + { + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + } + + Add = u; + RegisterRange = opCode & 0xff; int regsSize = RegisterRange * 4; // double mode is still measured in single register size @@ -39,11 +50,6 @@ namespace ARMeilleure.Decoders Offset -= regsSize; } - if (u == p) - { - Offset += 4; - } - if (w) { PostOffset = u ? regsSize : -regsSize; diff --git a/ARMeilleure/Decoders/OpCode32SimdMemPair.cs b/ARMeilleure/Decoders/OpCode32SimdMemPair.cs index 867da92380..3b361b71f4 100644 --- a/ARMeilleure/Decoders/OpCode32SimdMemPair.cs +++ b/ARMeilleure/Decoders/OpCode32SimdMemPair.cs @@ -24,7 +24,7 @@ namespace ARMeilleure.Decoders public bool WBack { get; private set; } public bool RegisterIndex { get; private set; } public int Size { get; private set; } - public int Elems => GetBytesCount() >> Size; + public int Elems => 8 >> Size; public int Regs { get; private set; } public int Increment { get; private set; } public OpCode32SimdMemPair(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) diff --git a/ARMeilleure/Decoders/OpCode32SimdMovGp.cs b/ARMeilleure/Decoders/OpCode32SimdMovGp.cs index 446fb127f5..88b859b3d0 100644 --- a/ARMeilleure/Decoders/OpCode32SimdMovGp.cs +++ b/ARMeilleure/Decoders/OpCode32SimdMovGp.cs @@ -23,7 +23,7 @@ namespace ARMeilleure.Decoders Opc1 = ((opCode >> 21) & 0x3); Opc2 = ((opCode >> 5) & 0x3); - Vn = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf); + Vn = ((opCode >> 7) & 0x1) | ((opCode >> 15) & 0x1e); Rt = (opCode >> 12) & 0xf; } } diff --git a/ARMeilleure/Decoders/OpCodeSimdHelper.cs b/ARMeilleure/Decoders/OpCodeSimdHelper.cs index c5547acd69..58ad6860ec 100644 --- a/ARMeilleure/Decoders/OpCodeSimdHelper.cs +++ b/ARMeilleure/Decoders/OpCodeSimdHelper.cs @@ -41,14 +41,14 @@ namespace ARMeilleure.Decoders case 2: // 2 x 32-bits floating point Immediate. - size = 0 + fpBaseSize; + size = 3; imm = (long)DecoderHelper.Imm8ToFP32Table[(int)imm]; imm |= imm << 32; break; case 3: // 64-bits floating point Immediate. - size = 1 + fpBaseSize; + size = 3; imm = (long)DecoderHelper.Imm8ToFP64Table[(int)imm]; break; } diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 5cf0116c1f..e763c0e429 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -730,8 +730,9 @@ namespace ARMeilleure.Decoders SetA32("<<<<11100x11xxxxxxxx10xxx0x0xxxx", InstName.Vadd, InstEmit32.Vadd_S, typeof(OpCode32SimdRegS)); SetA32("111100100x0xxxxxxxxx1101xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_V, typeof(OpCode32SimdReg)); SetA32("111100100x0xxxxxxxxx1100xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_I, typeof(OpCode32SimdReg)); - SetA32("<<<<11101x11010xxxxx10xx01x0xxxx", InstName.Vcmp, InstEmit32.Vcmp, typeof(OpCode32SimdReg)); - SetA32("<<<<11101x11010xxxxx10xx11x0xxxx", InstName.Vcmpe,InstEmit32.Vcmpe, typeof(OpCode32SimdReg)); + SetA32("<<<<11101x11010xxxxx10xx01x0xxxx", InstName.Vcmp, InstEmit32.Vcmp, typeof(OpCode32SimdS)); + SetA32("<<<<11101x11010xxxxx10xx11x0xxxx", InstName.Vcmpe,InstEmit32.Vcmpe, typeof(OpCode32SimdS)); + SetA32("<<<<11101x110111xxxx101x11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FD, typeof(OpCode32SimdS)); SetA32("<<<<11101x11110xxxxx10xx11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, typeof(OpCode32SimdCvtFI)); SetA32("<<<<11101x111000xxxx10xxx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, typeof(OpCode32SimdCvtFI)); SetA32("<<<<11101x00xxxxxxxx10xxx0x0xxxx", InstName.Vdiv, InstEmit32.Vdiv_S, typeof(OpCode32SimdRegS)); @@ -762,8 +763,14 @@ namespace ARMeilleure.Decoders SetA32("111101001x10xxxxxxxx1011xxxxxxxx", InstName.Vld4, InstEmit32.Vld4, typeof(OpCode32SimdMemSingle)); SetA32("111101000x10xxxxxxxx000xxxxxxxxx", InstName.Vld4, InstEmit32.Vld4, typeof(OpCode32SimdMemPair)); //inc = 1/2 (itype) - SetA32("<<<<11001x11xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); - SetA32("<<<<11001x11xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x01xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x11xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11010x11xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + + SetA32("<<<<11001x01xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x11xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11010x11xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<1101xx01xxxxxxxx10xxxxxxxxxx", InstName.Vldr, InstEmit32.Vldr, typeof(OpCode32SimdMemImm)); SetA32("<<<<11100x00xxxxxxxx10xxx0x0xxxx", InstName.Vmla, InstEmit32.Vmla_S, typeof(OpCode32SimdRegS)); @@ -820,8 +827,13 @@ namespace ARMeilleure.Decoders SetA32("111101001x00xxxxxxxx1011xxxxxxxx", InstName.Vst4, InstEmit32.Vst4, typeof(OpCode32SimdMemSingle)); SetA32("111101000x00xxxxxxxx000xxxxxxxxx", InstName.Vst4, InstEmit32.Vst4, typeof(OpCode32SimdMemPair)); //inc = 1/2 (itype) - SetA32("<<<<11010x10xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); - SetA32("<<<<11010x10xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x00xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x10xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11010x10xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + + SetA32("<<<<11001x00xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x10xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11010x10xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); SetA32("<<<<1101xx00xxxxxxxx10xxxxxxxxxx", InstName.Vstr, InstEmit32.Vstr, typeof(OpCode32SimdMemImm)); SetA32("<<<<11101x110001xxxx10xx11x0xxxx", InstName.Vsqrt, InstEmit32.Vsqrt_S, typeof(OpCode32SimdS)); diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp32.cs b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs index 4332d5f2d4..44950b9b36 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCmp32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs @@ -26,7 +26,7 @@ namespace ARMeilleure.Instructions private static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs) { - OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; bool cmpWithZero = (op.RawOpCode & (1 << 16)) != 0; { diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs index e9444b508a..9a70b189b5 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs @@ -14,6 +14,52 @@ namespace ARMeilleure.Instructions { static partial class InstEmit32 { + private static int FlipVdBits(int vd, bool lowBit) + { + if (lowBit) + { + //move the low bit to the top + return ((vd & 0x1) << 4) | (vd >> 1); + } + else + { + //move the high bit to the bottom + return ((vd & 0xf) << 1) | (vd >> 4); + } + } + + public static void Vcvt_FD(ArmEmitterContext context) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + int vm = op.Vm; + int vd; + if (op.Size == 3) + { + vd = FlipVdBits(op.Vd, true); + // double to single + Operand fp = ExtractScalar(context, OperandType.FP64, vm); + + Operand res = context.ConvertToFP(OperandType.FP32, fp); + + InsertScalar(context, vd, res); + + //Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n); + } + else + { + vd = FlipVdBits(op.Vd, false); + // single to double + Operand fp = ExtractScalar(context, OperandType.FP32, vm); + + Operand res = context.ConvertToFP(OperandType.FP64, fp); + + InsertScalar(context, vd, res); + + //Operand res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), n); + } + } + public static void Vcvt_FI(ArmEmitterContext context) { OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs index 4b7fc7aacf..458a698618 100644 --- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs @@ -74,12 +74,12 @@ namespace ARMeilleure.Instructions int elems = op.Elems; (int index, int subIndex) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); - Operand vec = GetVec(index); + Operand vec = GetVecA32(index); Operand res = vec; - for (int item = 0; item < elems; item++, subIndex++) + for (int item = 0; item < elems; item++) { - res = EmitVectorInsert(context, vec, emit(imm), subIndex, op.Size); + res = EmitVectorInsert(context, res, emit(imm), item + subIndex * elems, op.Size); } context.Copy(vec, res); @@ -153,7 +153,7 @@ namespace ARMeilleure.Instructions OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; - int elems = op.GetBytesCount() >> sizeF + 2; + int elems = op.GetBytesCount() >> (sizeF + 2); (int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize); (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); diff --git a/ARMeilleure/Instructions/InstEmitSimdMemory32.cs b/ARMeilleure/Instructions/InstEmitSimdMemory32.cs index a240a01aea..cb255e12ff 100644 --- a/ARMeilleure/Instructions/InstEmitSimdMemory32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdMemory32.cs @@ -64,16 +64,29 @@ namespace ARMeilleure.Instructions { //write an element from a double simd register Operand address = context.Add(n, Const(offset)); - int index = ((d & 1) << (3 - op.Size)) + op.Index; - if (load) + if (eBytes == 8) { - EmitLoadSimd(context, address, GetVecA32(d >> 1), d >> 1, index, op.Size); + if (load) + { + EmitDVectorLoad(context, address, d); + } + else + { + EmitDVectorStore(context, address, d); + } } else { - EmitStoreSimd(context, address, d >> 1, index, op.Size); + int index = ((d & 1) << (3 - op.Size)) + op.Index; + if (load) + { + EmitLoadSimd(context, address, GetVecA32(d >> 1), d >> 1, index, op.Size); + } + else + { + EmitStoreSimd(context, address, d >> 1, index, op.Size); + } } - //TODO: big endian at size == 4 offset += eBytes; d += op.Increment; } @@ -111,14 +124,29 @@ namespace ARMeilleure.Instructions // write an element from a double simd register // add ebytes for each element Operand address = context.Add(n, Const(offset)); - int index = ((d & 1) << (3 - op.Size)) + elem; - if (load) + int index = ((elemD & 1) << (3 - op.Size)) + elem; + if (eBytes == 8) { - EmitLoadSimd(context, address, GetVecA32(d >> 1), d >> 1, index, op.Size); - } + if (load) + { + EmitDVectorLoad(context, address, elemD); + } + else + { + EmitDVectorStore(context, address, elemD); + } + } else { - EmitStoreSimd(context, address, d >> 1, index, op.Size); + + if (load) + { + EmitLoadSimd(context, address, GetVecA32(elemD >> 1), elemD >> 1, index, op.Size); + } + else + { + EmitStoreSimd(context, address, elemD >> 1, index, op.Size); + } } offset += eBytes; @@ -146,13 +174,11 @@ namespace ARMeilleure.Instructions { OpCode32SimdMemMult op = (OpCode32SimdMemMult)context.CurrOp; - Operand n = GetIntA32(context, op.Rn); + Operand n = context.Copy(GetIntA32(context, op.Rn)); Operand baseAddress = context.Add(n, Const(op.Offset)); - bool writesToPc = (op.RegisterRange & (1 << RegisterAlias.Aarch32Pc)) != 0; - - bool writeBack = op.PostOffset != 0 && (op.Rn != RegisterAlias.Aarch32Pc || !writesToPc); + bool writeBack = op.PostOffset != 0; if (writeBack) { @@ -160,6 +186,7 @@ namespace ARMeilleure.Instructions } int range = op.RegisterRange; + int sReg = (op.DoubleWidth) ? (op.Vd << 1) : op.Vd; int offset = 0; int size = (op.DoubleWidth) ? DWordSizeLog2 : WordSizeLog2; @@ -179,7 +206,7 @@ namespace ARMeilleure.Instructions { OpCode32SimdMemMult op = (OpCode32SimdMemMult)context.CurrOp; - Operand n = GetIntA32(context, op.Rn); + Operand n = context.Copy(GetIntA32(context, op.Rn)); Operand baseAddress = context.Add(n, Const(op.Offset)); @@ -217,6 +244,52 @@ namespace ARMeilleure.Instructions EmitVLoadOrStore(context, AccessType.Store); } + private static void EmitDVectorStore(ArmEmitterContext context, Operand address, int vecD) + { + int vecQ = vecD >> 1; + int vecSElem = (vecD & 1) << 1; + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + EmitStoreSimd(context, address, vecQ, vecSElem, WordSizeLog2); + EmitStoreSimd(context, context.Add(address, Const(4)), vecQ, vecSElem | 1, WordSizeLog2); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + EmitStoreSimd(context, address, vecQ, vecSElem | 1, WordSizeLog2); + EmitStoreSimd(context, context.Add(address, Const(4)), vecQ, vecSElem, WordSizeLog2); + + context.MarkLabel(lblEnd); + } + + private static void EmitDVectorLoad(ArmEmitterContext context, Operand address, int vecD) + { + int vecQ = vecD >> 1; + int vecSElem = (vecD & 1) << 1; + Operand vec = GetVecA32(vecQ); + + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + EmitLoadSimd(context, address, vec, vecQ, vecSElem, WordSizeLog2); + EmitLoadSimd(context, context.Add(address, Const(4)), vec, vecQ, vecSElem | 1, WordSizeLog2); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + EmitLoadSimd(context, address, vec, vecQ, vecSElem | 1, WordSizeLog2); + EmitLoadSimd(context, context.Add(address, Const(4)), vec, vecQ, vecSElem, WordSizeLog2); + + context.MarkLabel(lblEnd); + } + private static void EmitVLoadOrStore(ArmEmitterContext context, AccessType accType) { OpCode32SimdMemImm op = (OpCode32SimdMemImm)context.CurrOp; @@ -235,26 +308,7 @@ namespace ARMeilleure.Instructions if (size == DWordSizeLog2) { - int vecQ = op.Vd >> 1; - int vecSElem = (op.Vd & 1) << 1; - Operand vec = GetVecA32(vecQ); - - Operand lblBigEndian = Label(); - Operand lblEnd = Label(); - - context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); - - EmitLoadSimd(context, address, vec, vecQ, vecSElem, WordSizeLog2); - EmitLoadSimd(context, context.Add(address, Const(4)), vec, vecQ, vecSElem | 1, WordSizeLog2); - - context.Branch(lblEnd); - - context.MarkLabel(lblBigEndian); - - EmitLoadSimd(context, address, vec, vecQ, vecSElem | 1, WordSizeLog2); - EmitLoadSimd(context, context.Add(address, Const(4)), vec, vecQ, vecSElem, WordSizeLog2); - - context.MarkLabel(lblEnd); + EmitDVectorLoad(context, address, op.Vd); } else { @@ -266,24 +320,7 @@ namespace ARMeilleure.Instructions { if (size == DWordSizeLog2) { - int vecQ = op.Vd >> 1; - int vecSElem = (op.Vd & 1) << 1; - Operand lblBigEndian = Label(); - Operand lblEnd = Label(); - - context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); - - EmitStoreSimd(context, address, vecQ, vecSElem, WordSizeLog2); - EmitStoreSimd(context, context.Add(address, Const(4)), vecQ, vecSElem | 1, WordSizeLog2); - - context.Branch(lblEnd); - - context.MarkLabel(lblBigEndian); - - EmitStoreSimd(context, address, vecQ, vecSElem | 1, WordSizeLog2); - EmitStoreSimd(context, context.Add(address, Const(4)), vecQ, vecSElem, WordSizeLog2); - - context.MarkLabel(lblEnd); + EmitDVectorStore(context, address, op.Vd); } else { diff --git a/ARMeilleure/Translation/TranslatedFunction.cs b/ARMeilleure/Translation/TranslatedFunction.cs index 6e5284cb5e..06069cf8fe 100644 --- a/ARMeilleure/Translation/TranslatedFunction.cs +++ b/ARMeilleure/Translation/TranslatedFunction.cs @@ -24,7 +24,7 @@ namespace ARMeilleure.Translation public bool ShouldRejit() { - return false && _rejit && Interlocked.Increment(ref _callCount) == MinCallsForRejit; + return _rejit && Interlocked.Increment(ref _callCount) == MinCallsForRejit; } } } \ No newline at end of file diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs index 6531e9f0f7..7c53f5bdec 100644 --- a/ARMeilleure/Translation/Translator.cs +++ b/ARMeilleure/Translation/Translator.cs @@ -20,7 +20,7 @@ namespace ARMeilleure.Translation private ConcurrentDictionary _funcs; - private PriorityQueue _backgroundQueue; + private PriorityQueue> _backgroundQueue; private AutoResetEvent _backgroundTranslatorEvent; @@ -32,7 +32,7 @@ namespace ARMeilleure.Translation _funcs = new ConcurrentDictionary(); - _backgroundQueue = new PriorityQueue(2); + _backgroundQueue = new PriorityQueue>(2); _backgroundTranslatorEvent = new AutoResetEvent(false); } @@ -41,11 +41,11 @@ namespace ARMeilleure.Translation { while (_threadCount != 0) { - if (_backgroundQueue.TryDequeue(out ulong address)) + if (_backgroundQueue.TryDequeue(out Tuple request)) { - TranslatedFunction func = Translate(address, ExecutionMode.Aarch64, highCq: true); + TranslatedFunction func = Translate(request.Item1, request.Item2, highCq: true); - _funcs.AddOrUpdate(address, func, (key, oldFunc) => func); + _funcs.AddOrUpdate(request.Item1, func, (key, oldFunc) => func); } else { @@ -111,7 +111,7 @@ namespace ARMeilleure.Translation } else if (isCallTarget && func.ShouldRejit()) { - _backgroundQueue.Enqueue(0, address); + _backgroundQueue.Enqueue(0, new Tuple(address, mode)); _backgroundTranslatorEvent.Set(); } diff --git a/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs b/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs index f987c83c01..751b9a8433 100644 --- a/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs +++ b/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs @@ -795,6 +795,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Process { context.Interrupt += InterruptHandler; context.SupervisorCall += _svcHandler.SvcCall; + context.Break += (object sender, InstExceptionEventArgs e) => _svcHandler.Break64((ulong)e.Id, 0, 0); context.Undefined += UndefinedInstructionHandler; } diff --git a/Ryujinx.Tests.Unicorn/UnicornAArch32.cs b/Ryujinx.Tests.Unicorn/UnicornAArch32.cs index 5209b60da6..fa9e930d46 100644 --- a/Ryujinx.Tests.Unicorn/UnicornAArch32.cs +++ b/Ryujinx.Tests.Unicorn/UnicornAArch32.cs @@ -55,7 +55,7 @@ namespace Ryujinx.Tests.Unicorn public int Fpscr { - get => (int)GetRegister(Arm32Register.FPSCR); + get => (int)GetRegister(Arm32Register.FPSCR) | ((int)GetRegister(Arm32Register.FPSCR_NZCV)); set => SetRegister(Arm32Register.FPSCR, (uint)value); } @@ -87,6 +87,8 @@ namespace Ryujinx.Tests.Unicorn { Interface.Checked(Interface.uc_open(UnicornArch.UC_ARCH_ARM, UnicornMode.UC_MODE_LITTLE_ENDIAN, out uc)); + SetRegister(Arm32Register.C1_C0_2, GetRegister(Arm32Register.C1_C0_2) | 0xf00000); + SetRegister(Arm32Register.FPEXC, 0x40000000); //SetRegister(Arm32Register.FPSCR, 0x00300000); } @@ -172,7 +174,7 @@ namespace Ryujinx.Tests.Unicorn throw new ArgumentOutOfRangeException(nameof(index)); } - return GetVector(QRegisters[index]); + return GetVector((Arm32Register)((int)Arm32Register.D0 + index * 2)); //QRegisters[index]); } public void SetQ(int index, SimdValue value) @@ -182,10 +184,10 @@ namespace Ryujinx.Tests.Unicorn throw new ArgumentOutOfRangeException(nameof(index)); } - SetVector(QRegisters[index], value); + SetVector((Arm32Register)((int)Arm32Register.D0 + index * 2), value); } - private uint GetRegister(Arm32Register register) + public uint GetRegister(Arm32Register register) { byte[] data = new byte[4]; @@ -194,27 +196,31 @@ namespace Ryujinx.Tests.Unicorn return (uint)BitConverter.ToInt32(data, 0); } - private void SetRegister(Arm32Register register, uint value) + public void SetRegister(Arm32Register register, uint value) { byte[] data = BitConverter.GetBytes(value); Interface.Checked(Interface.uc_reg_write(uc, (int)register, data)); } - private SimdValue GetVector(Arm32Register register) + public SimdValue GetVector(Arm32Register register) { - byte[] data = new byte[16]; + byte[] data = new byte[8]; Interface.Checked(Interface.uc_reg_read(uc, (int)register, data)); + ulong lo = BitConverter.ToUInt64(data, 0); + Interface.Checked(Interface.uc_reg_read(uc, (int)register + 1, data)); + ulong hi = BitConverter.ToUInt64(data, 0); - return new SimdValue(data); + return new SimdValue(lo, hi); } private void SetVector(Arm32Register register, SimdValue value) { - byte[] data = value.ToArray(); - + byte[] data = BitConverter.GetBytes(value.GetUInt64(0)); Interface.Checked(Interface.uc_reg_write(uc, (int)register, data)); + data = BitConverter.GetBytes(value.GetUInt64(1)); + Interface.Checked(Interface.uc_reg_write(uc, (int)register + 1, data)); } public byte[] MemoryRead(ulong address, ulong size) diff --git a/Ryujinx.Tests/Cpu/CpuTest32.cs b/Ryujinx.Tests/Cpu/CpuTest32.cs index df236ffe86..ac153ec294 100644 --- a/Ryujinx.Tests/Cpu/CpuTest32.cs +++ b/Ryujinx.Tests/Cpu/CpuTest32.cs @@ -5,13 +5,14 @@ using NUnit.Framework; using Ryujinx.Tests.Unicorn; using System; using System.Collections.Generic; +using System.Linq; using System.Runtime.InteropServices; using System.Text; namespace Ryujinx.Tests.Cpu { [TestFixture] - class CpuTest32 + public class CpuTest32 { private uint _currAddress; private long _size; @@ -29,6 +30,8 @@ namespace Ryujinx.Tests.Cpu private static bool _unicornAvailable; private UnicornAArch32 _unicornEmu; + private bool usingMemory; + static CpuTest32() { _unicornAvailable = UnicornAArch32.IsAvailable(); @@ -47,9 +50,10 @@ namespace Ryujinx.Tests.Cpu _entryPoint = _currAddress; - _ramPointer = Marshal.AllocHGlobal(new IntPtr(_size)); - _memory = new MemoryManager(_ramPointer); - _memory.Map((long)_currAddress, 0, _size); + _ramPointer = Marshal.AllocHGlobal(new IntPtr(_size * 2)); + _memory = new MemoryManager(_ramPointer, addressSpaceBits: 16, useFlatPageTable: true); + _memory.Map((long)_currAddress, 0, _size*2); + //_memory.Map((long)(_currAddress + _size), _size, _size); _context = new ExecutionContext(); _context.IsAarch32 = true; @@ -60,6 +64,7 @@ namespace Ryujinx.Tests.Cpu { _unicornEmu = new UnicornAArch32(); _unicornEmu.MemoryMap(_currAddress, (ulong)_size, MemoryPermission.READ | MemoryPermission.EXEC); + _unicornEmu.MemoryMap((ulong)(_currAddress + _size), (ulong)_size, MemoryPermission.READ | MemoryPermission.WRITE); _unicornEmu.PC = _entryPoint; } } @@ -191,9 +196,14 @@ namespace Ryujinx.Tests.Cpu bool carry = false, bool zero = false, bool negative = false, - int fpscr = 0) + int fpscr = 0, + bool copyFpFlags = false) { Opcode(opcode); + if (copyFpFlags) + { + Opcode(0xeef1fa10); + } Opcode(0xe12fff1e); // BX LR SetContext(r0, r1, r2, r3, sp, v0, v1, v2, v3, v4, v5, v14, v15, overflow, carry, zero, negative, fpscr); ExecuteOpcodes(); @@ -201,6 +211,18 @@ namespace Ryujinx.Tests.Cpu return GetContext(); } + protected void SetWorkingMemory(byte[] data) + { + _memory.WriteBytes(0x2000, data); + + if (_unicornAvailable) + { + _unicornEmu.MemoryWrite((ulong)(0x2000), data); + } + + usingMemory = true; // When true, CompareAgainstUnicorn checks the working memory for equality too. + } + /// Rounding Mode control field. public enum RMode { @@ -247,7 +269,10 @@ namespace Ryujinx.Tests.Cpu Idc = 1 << 7, /// Cumulative saturation bit. - Qc = 1 << 27 + Qc = 1 << 27, + + /// NZCV flags + Nzcv = (1 << 28) | (1 << 29) | (1 << 30) | (1 << 31) } [Flags] @@ -331,6 +356,16 @@ namespace Ryujinx.Tests.Cpu Assert.That(_context.GetPstateFlag(PState.CFlag), Is.EqualTo(_unicornEmu.CarryFlag)); Assert.That(_context.GetPstateFlag(PState.ZFlag), Is.EqualTo(_unicornEmu.ZeroFlag)); Assert.That(_context.GetPstateFlag(PState.NFlag), Is.EqualTo(_unicornEmu.NegativeFlag)); + + if (usingMemory) + { + byte[] meilleureMem = _memory.ReadBytes((long)(0x2000), _size); + byte[] unicornMem = _unicornEmu.MemoryRead((ulong)(0x2000), (ulong)_size); + + for (int i = 0; i < _size; i++) { + Assert.AreEqual(meilleureMem[i], unicornMem[i]); + } + } } private void ManageFpSkips(FpSkips fpSkips) diff --git a/Ryujinx.Tests/Cpu/CpuTestAluRs32.cs b/Ryujinx.Tests/Cpu/CpuTestAluRs32.cs new file mode 100644 index 0000000000..aec5a59bba --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestAluRs32.cs @@ -0,0 +1,224 @@ +//#define AluRs32 + +using NUnit.Framework; + +namespace Ryujinx.Tests.Cpu +{ + [Category("AluRs")] + public sealed class CpuTestAluRs32 : CpuTest32 + { +#if AluRs32 + private const int RndCnt = 50; + private const int RndCntAmount = 50; + private const int RndCntLsb = 2; + + [Test, Pairwise, Description("ADC , , ")] + public void Adc([Values(0u, 13u)] uint rd, + [Values(1u, 13u)] uint rn, + [Values(2u, 13u)] uint rm, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm, + [Values] bool carryIn) + { + uint opcode = 0xe0a00000; // ADC R0, R0, R0 + opcode |= ((rm & 15) << 0) | ((rn & 15) << 16) | ((rd & 15) << 12); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r1: wn, r2: wm, sp: sp, carry: carryIn); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("ADCS , , ")] + public void Adcs([Values(0u, 13u)] uint rd, + [Values(1u, 13u)] uint rn, + [Values(2u, 13u)] uint rm, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm, + [Values] bool carryIn) + { + uint opcode = 0xe0b00000; // ADCS R0, R0, R0 + opcode |= ((rm & 15) << 0) | ((rn & 15) << 16) | ((rd & 15) << 12); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r1: wn, r2: wm, sp: sp, carry: carryIn); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("ADD , , {, #}")] + public void Add([Values(0u, 13u)] uint rd, + [Values(1u, 13u)] uint rn, + [Values(2u, 13u)] uint rm, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint shift, // + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntAmount)] uint amount) + { + uint opcode = 0xe0800000; // ADD R0, R0, R0, LSL #0 + opcode |= ((rm & 15) << 0) | ((rn & 15) << 16) | ((rd & 15) << 12); + opcode |= ((shift & 3) << 5) | ((amount & 31) << 7); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r1: wn, r2: wm, sp: sp); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("ADDS , , {, #}")] + public void Adds([Values(0u, 13u)] uint rd, + [Values(1u, 13u)] uint rn, + [Values(2u, 13u)] uint rm, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint shift, // + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntAmount)] uint amount) + { + uint opcode = 0xe0900000; // ADDS R0, R0, R0, LSL #0 + opcode |= ((rm & 15) << 0) | ((rn & 15) << 16) | ((rd & 15) << 12); + opcode |= ((shift & 3) << 5) | ((amount & 31) << 7); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r1: wn, r2: wm, sp: sp); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("RSB , , {, #}")] + public void Rsb([Values(0u, 13u)] uint rd, + [Values(1u, 13u)] uint rn, + [Values(2u, 13u)] uint rm, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint shift, // + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntAmount)] uint amount) + { + uint opcode = 0xe0600000; // RSB R0, R0, R0, LSL #0 + opcode |= ((rm & 15) << 0) | ((rn & 15) << 16) | ((rd & 15) << 12); + opcode |= ((shift & 3) << 5) | ((amount & 31) << 7); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r1: wn, r2: wm, sp: sp); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("RSBS , , {, #}")] + public void Rsbs([Values(0u, 13u)] uint rd, + [Values(1u, 13u)] uint rn, + [Values(2u, 13u)] uint rm, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint shift, // + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntAmount)] uint amount) + { + uint opcode = 0xe0700000; // RSBS R0, R0, R0, LSL #0 + opcode |= ((rm & 15) << 0) | ((rn & 15) << 16) | ((rd & 15) << 12); + opcode |= ((shift & 3) << 5) | ((amount & 31) << 7); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r1: wn, r2: wm, sp: sp); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("RSB , , ")] + public void Rsc([Values(0u, 13u)] uint rd, + [Values(1u, 13u)] uint rn, + [Values(2u, 13u)] uint rm, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm, + [Values] bool carryIn) + { + uint opcode = 0xe0e00000; // RSC R0, R0, R0 + opcode |= ((rm & 15) << 0) | ((rn & 15) << 16) | ((rd & 15) << 12); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r1: wn, r2: wm, sp: sp, carry: carryIn); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("RSCS , , ")] + public void Rscs([Values(0u, 13u)] uint rd, + [Values(1u, 13u)] uint rn, + [Values(2u, 13u)] uint rm, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm, + [Values] bool carryIn) + { + uint opcode = 0xe0f00000; // RSCS R0, R0, R0 + opcode |= ((rm & 15) << 0) | ((rn & 15) << 16) | ((rd & 15) << 12); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r1: wn, r2: wm, sp: sp, carry: carryIn); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("SBC , , ")] + public void Sbc([Values(0u, 13u)] uint rd, + [Values(1u, 13u)] uint rn, + [Values(2u, 13u)] uint rm, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm, + [Values] bool carryIn) + { + uint opcode = 0xe0c00000; // SBC R0, R0, R0 + opcode |= ((rm & 15) << 0) | ((rn & 15) << 16) | ((rd & 15) << 12); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r1: wn, r2: wm, sp: sp, carry: carryIn); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("SBCS , , ")] + public void Sbcs([Values(0u, 13u)] uint rd, + [Values(1u, 13u)] uint rn, + [Values(2u, 13u)] uint rm, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm, + [Values] bool carryIn) + { + uint opcode = 0xe0d00000; // SBCS R0, R0, R0 + opcode |= ((rm & 15) << 0) | ((rn & 15) << 16) | ((rd & 15) << 12); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r1: wn, r2: wm, sp: sp, carry: carryIn); + + CompareAgainstUnicorn(); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestBf32.cs b/Ryujinx.Tests/Cpu/CpuTestBf32.cs new file mode 100644 index 0000000000..684a404219 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestBf32.cs @@ -0,0 +1,56 @@ +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.Text; + +namespace Ryujinx.Tests.Cpu +{ + [Category("Bfm")] + public sealed class CpuTestBf32 : CpuTest32 + { + private const int RndCnt = 10; + private const int RndCntImmr = 10; + private const int RndCntImms = 10; + + [Test, Pairwise, Description("BFC , #, #")] + public void Bfc([Values(0u, 0xdu)] uint rd, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wd, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImmr)] uint lsb, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImms)] uint msb) + { + msb = Math.Max(lsb, msb); // don't test unpredictable for now + uint opcode = 0xe7c0001f; // BFC R0, #0, #1 + opcode |= ((rd & 0xf) << 12); + opcode |= ((msb & 31) << 16) | ((lsb & 31) << 7); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r0: wd, sp: sp); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("BFI , , #, #")] + public void Bfi([Values(0u, 0xdu)] uint rd, + [Values(1u, 0xdu)] uint rn, + [Random(RndCnt)] uint wd, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImmr)] uint lsb, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImms)] uint msb) + { + msb = Math.Max(lsb, msb); // don't test unpredictable for now + uint opcode = 0xe7c00010; // BFI r0, r0, #0, #1 + opcode |= ((rd & 0xf) << 12); + opcode |= ((rn & 0xf) << 0); + opcode |= ((msb & 31) << 16) | ((lsb & 31) << 7); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r0: wd, r1: wn, sp: sp); + + CompareAgainstUnicorn(); + } + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdMemory32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdMemory32.cs new file mode 100644 index 0000000000..840b387f98 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdMemory32.cs @@ -0,0 +1,295 @@ +//#define SimdMem32 + +using ARMeilleure.State; +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdMemory32")] + public sealed class CpuTestSimdMemory32 : CpuTest32 + { +#if SimdMem32 + private const int RndCntImm = 2; + + private uint[] LDSTModes = + { + //LD1 + 0b0111, + 0b1010, + 0b0110, + 0b0010, + + //LD2 + 0b1000, + 0b1001, + 0b0011, + + //LD3 + 0b0100, + 0b0101, + + //LD4 + 0b0000, + 0b0001 + }; + + [Test, Combinatorial, Description("VLDn. , [ {:}]{ /!/, } (single n element structure)")] + public void Vldn_Single([Values(0u, 1u, 2u)] uint size, + [Values(0u, 13u)] uint rn, + [Values(1u, 13u, 15u)] uint rm, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd, + [Range(0u, 7u)] uint index, + [Range(0u, 3u)] uint n, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + uint opcode = 0xf4a00000; // vld1.8 {d0[0]}, [r0], r0 + + opcode |= ((size & 3) << 10) | ((rn & 15) << 16) | (rm & 15); + + uint index_align = (index << (int)(1 + size)) & 15; + + opcode |= (index_align) << 4; + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + opcode |= (n & 3) << 8; //LD1 is 0, LD2 is 1 etc + + SingleOpcode(opcode, r0: 0x2500, r1: offset, sp: 0x2500); + + CompareAgainstUnicorn(); + } + + [Test, Combinatorial, Description("VLDn. , [ {:}]{ /!/, } (multiple n element structures)")] + public void Vldn_Pair([Values(0u, 1u, 2u, 3u)] uint size, + [Values(0u, 13u)] uint rn, + [Values(1u, 13u, 15u)] uint rm, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd, + [Range(0u, 3u)] uint mode, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + uint opcode = 0xf4200000; // vld4.8 {d0, d1, d2, d3}, [r0], r0 + + opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (LDSTModes[mode] << 8); + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + SingleOpcode(opcode, r0: 0x2500, r1: offset, sp: 0x2500); + + CompareAgainstUnicorn(); + } + + [Test, Combinatorial, Description("VSTn. , [ {:}]{ /!/, } (single n element structure)")] + public void Vstn_Single([Values(0u, 1u, 2u)] uint size, + [Values(0u, 13u)] uint rn, + [Values(1u, 13u, 15u)] uint rm, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd, + [Range(0u, 7u)] uint index, + [Range(0u, 3u)] uint n, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + (V128 vec1, V128 vec2, V128 vec3, V128 vec4) = GenerateTestVectors(); + + uint opcode = 0xf4800000; // vst1.8 {d0[0]}, [r0], r0 + + opcode |= ((size & 3) << 10) | ((rn & 15) << 16) | (rm & 15); + + uint index_align = (index << (int)(1 + size)) & 15; + + opcode |= (index_align) << 4; + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + opcode |= (n & 3) << 8; //ST1 is 0, ST2 is 1 etc + + SingleOpcode(opcode, r0: 0x2500, r1: offset, v1: vec1, v2: vec2, v3: vec3, v4: vec4, sp: 0x2500); + + CompareAgainstUnicorn(); + } + + [Test, Combinatorial, Description("VSTn. , [ {:}]{ /!/, } (multiple n element structures)")] + public void Vstn_Pair([Values(0u, 1u, 2u, 3u)] uint size, + [Values(0u, 13u)] uint rn, + [Values(1u, 13u, 15u)] uint rm, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd, + [Range(0u, 3u)] uint mode, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + (V128 vec1, V128 vec2, V128 vec3, V128 vec4) = GenerateTestVectors(); + + uint opcode = 0xf4000000; // vst4.8 {d0, d1, d2, d3}, [r0], r0 + + opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (LDSTModes[mode] << 8); + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + SingleOpcode(opcode, r0: 0x2500, r1: offset, v1: vec1, v2: vec2, v3: vec3, v4: vec4, sp: 0x2500); + + CompareAgainstUnicorn(); + } + + [Test, Combinatorial, Description("VLDM. {!}, ")] + public void Vldm([Values(0u, 13u)] uint rn, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd, + [Range(0u, 2u)] uint mode, + [Values(0x1u, 0x32u)] [Random(2u, 31u, RndCntImm)] uint regs, + [Values] bool single) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + uint opcode = 0xec100a00; // vst4.8 {d0, d1, d2, d3}, [r0], r0 + + uint[] vldmModes = { + //note: 3rd 0 leaves a space for "D" + 0b0100, // increment after + 0b0101, // increment after ! + 0b1001 // decrement before ! + }; + + opcode |= ((vldmModes[mode] & 15) << 21); + opcode |= ((rn & 15) << 16); + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + opcode |= ((uint)(single ? 0 : 1) << 8); + + if (!single) regs = (regs << 1); //low bit must be 0 - must be even number of registers. + uint regSize = single ? 1u : 2u; + + if (vd + (regs / regSize) > 32) //can't address further than s31 or d31 + { + regs -= (vd + (regs / regSize)) - 32; + } + + if (regs / regSize > 16) //can't do more than 16 registers at a time + { + regs = 16 * regSize; + } + + opcode |= regs & 0xff; + + SingleOpcode(opcode, r0: 0x2500, sp: 0x2500); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VLDR. , [ {, #{+/-}}]")] + public void Vldr([Values(2u, 3u)] uint size, //fp16 is not supported for now + [Values(0u)] uint rn, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint sd, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint imm, + [Values] bool sub) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + uint opcode = 0xed900a00; // VLDR.32 S0, [R0, #0] + opcode |= ((size & 3) << 8) | ((rn & 15) << 16); + + if (sub) + { + opcode &= ~(uint)(1 << 23); + } + + if (size == 2) + { + opcode |= ((sd & 0x1) << 22); + opcode |= ((sd & 0x1e) << 11); + } + else + { + opcode |= ((sd & 0x10) << 18); + opcode |= ((sd & 0xf) << 12); + } + opcode |= (uint)imm & 0xff; + + SingleOpcode(opcode, r0: 0x2500); //correct + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VSTR. , [ {, #{+/-}}]")] + public void Vstr([Values(2u, 3u)] uint size, //fp16 is not supported for now + [Values(0u)] uint rn, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint sd, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint imm, + [Values] bool sub) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + uint opcode = 0xed800a00; // VSTR.32 S0, [R0, #0] + opcode |= ((size & 3) << 8) | ((rn & 15) << 16); + + if (sub) + { + opcode &= ~(uint)(1 << 23); + } + + if (size == 2) + { + opcode |= ((sd & 0x1) << 22); + opcode |= ((sd & 0x1e) << 11); + } + else + { + opcode |= ((sd & 0x10) << 18); + opcode |= ((sd & 0xf) << 12); + } + opcode |= (uint)imm & 0xff; + + (V128 vec1, V128 vec2, _, _) = GenerateTestVectors(); + + SingleOpcode(opcode, r0: 0x2500, v0: vec1, v1: vec2); //correct + + CompareAgainstUnicorn(); + } + + private (V128, V128, V128, V128) GenerateTestVectors() + { + return ( + new V128(-12.43f, 1872.23f, 4456.23f, -5622.2f), + new V128(0.0f, float.NaN, float.PositiveInfinity, float.NegativeInfinity), + new V128(1.23e10f, -0.0f, -0.123f, 0.123f), + new V128(float.Epsilon, 3.5f, 925.23f, -104.9f) + ); + } + + private byte[] GenerateVectorSequence(int length) + { + int floatLength = length >> 2; + float[] data = new float[floatLength]; + + for (int i=0; i
, #")] + public void Movi_V([Range(0u, 10u)] uint variant, + [Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0x0u)] [Random(1u, 0xffu, RndCntImm)] uint imm, + [Values] bool q) + { + uint[] variants = + { + //I32 + 0b0000_0, + 0b0010_0, + 0b0100_0, + 0b0110_0, + + //I16 + 0b1000_0, + 0b1010_0, + + //dt + 0b1100_0, + 0b1101_0, + 0b1110_0, + 0b1111_0, + + 0b1110_1 + }; + + + uint opcode = 0xf2800010; // vmov.i32 d0, #0 + uint cmodeOp = variants[variant]; + + if (q) vd &= 0x1e; + + opcode |= ((cmodeOp & 1) << 5) | ((cmodeOp & 0x1e) << 7); + opcode |= ((q ? 1u : 0u) << 6); + opcode |= (imm & 0xf) | ((imm & 0x70) << 12) | ((imm & 0x80) << 16); + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + SingleOpcode(opcode); //correct + + CompareAgainstUnicorn(); + } + + [Test, Combinatorial, Description("VMOV.F , #")] + public void Movi_S([Range(2u, 3u)] uint size, //fp16 is not supported for now + [Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint imm) + { + uint opcode = 0xeeb00800; // invalid + opcode |= (size & 3) << 8; + opcode |= (imm & 0xf) | ((imm & 0xf0) << 12); + + if (size == 2) + { + opcode |= ((vd & 0x1) << 22); + opcode |= ((vd & 0x1e) << 11); + } + else + { + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + } + + SingleOpcode(opcode); //correct + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VMOV , ")] + public void Mov_GP([Values(0u, 1u, 2u, 3u)] uint vn, + [Values(0u, 1u, 2u, 3u)] uint rt, + [Random(RndCntImm)] uint valueRn, + [Random(RndCntImm)] ulong valueVn1, + [Random(RndCntImm)] ulong valueVn2, + [Values] bool op) + { + uint opcode = 0xee000a10; // invalid + opcode |= (vn & 1) << 7; + opcode |= (vn & 0x1e) << 15; + opcode |= (rt & 0xf) << 12; + + if (op) opcode |= 1 << 20; + + SingleOpcode(opcode, r0: valueRn, r1: valueRn, r2: valueRn, r3: valueRn, v0: new V128(valueVn1, valueVn2)); //correct + + CompareAgainstUnicorn(); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs new file mode 100644 index 0000000000..7195e526a0 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs @@ -0,0 +1,279 @@ +#define SimdReg32 + +using ARMeilleure.State; +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdReg32")] + public sealed class CpuTestSimdReg32 : CpuTest32 { +#if SimdReg32 + + #region "ValueSource (Types)" + private static ulong[] _1B1H1S1D_() + { + return new ulong[] { 0x0000000000000000ul, 0x000000000000007Ful, + 0x0000000000000080ul, 0x00000000000000FFul, + 0x0000000000007FFFul, 0x0000000000008000ul, + 0x000000000000FFFFul, 0x000000007FFFFFFFul, + 0x0000000080000000ul, 0x00000000FFFFFFFFul, + 0x7FFFFFFFFFFFFFFFul, 0x8000000000000000ul, + 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _1D_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, + 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _1H1S_() + { + return new ulong[] { 0x0000000000000000ul, 0x0000000000007FFFul, + 0x0000000000008000ul, 0x000000000000FFFFul, + 0x000000007FFFFFFFul, 0x0000000080000000ul, + 0x00000000FFFFFFFFul }; + } + + private static ulong[] _4H2S_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFF7FFF7FFF7FFFul, + 0x8000800080008000ul, 0x7FFFFFFF7FFFFFFFul, + 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _4H2S1D_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFF7FFF7FFF7FFFul, + 0x8000800080008000ul, 0x7FFFFFFF7FFFFFFFul, + 0x8000000080000000ul, 0x7FFFFFFFFFFFFFFFul, + 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _8B_() + { + return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful, + 0x8080808080808080ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _8B4H2S_() + { + return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful, + 0x8080808080808080ul, 0x7FFF7FFF7FFF7FFFul, + 0x8000800080008000ul, 0x7FFFFFFF7FFFFFFFul, + 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _8B4H2S1D_() + { + return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful, + 0x8080808080808080ul, 0x7FFF7FFF7FFF7FFFul, + 0x8000800080008000ul, 0x7FFFFFFF7FFFFFFFul, + 0x8000000080000000ul, 0x7FFFFFFFFFFFFFFFul, + 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static IEnumerable _1S_F_() + { + yield return 0x00000000FF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0x0000000080800000ul; // -Min Normal + yield return 0x00000000807FFFFFul; // -Max Subnormal + yield return 0x0000000080000001ul; // -Min Subnormal (-float.Epsilon) + yield return 0x000000007F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x0000000000800000ul; // +Min Normal + yield return 0x00000000007FFFFFul; // +Max Subnormal + yield return 0x0000000000000001ul; // +Min Subnormal (float.Epsilon) + + if (!NoZeros) + { + yield return 0x0000000080000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0x00000000FF800000ul; // -Infinity + yield return 0x000000007F800000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0x00000000FFC00000ul; // -QNaN (all zeros payload) (float.NaN) + yield return 0x00000000FFBFFFFFul; // -SNaN (all ones payload) + yield return 0x000000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN) + yield return 0x000000007FBFFFFFul; // +SNaN (all ones payload) + } + + for (int cnt = 1; cnt <= RndCnt; cnt++) + { + ulong grbg = TestContext.CurrentContext.Random.NextUInt(); + ulong rnd1 = GenNormalS(); + ulong rnd2 = GenSubnormalS(); + + yield return (grbg << 32) | rnd1; + yield return (grbg << 32) | rnd2; + } + } + + private static IEnumerable _2S_F_() + { + yield return 0xFF7FFFFFFF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0x8080000080800000ul; // -Min Normal + yield return 0x807FFFFF807FFFFFul; // -Max Subnormal + yield return 0x8000000180000001ul; // -Min Subnormal (-float.Epsilon) + yield return 0x7F7FFFFF7F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x0080000000800000ul; // +Min Normal + yield return 0x007FFFFF007FFFFFul; // +Max Subnormal + yield return 0x0000000100000001ul; // +Min Subnormal (float.Epsilon) + + if (!NoZeros) + { + yield return 0x8000000080000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0xFF800000FF800000ul; // -Infinity + yield return 0x7F8000007F800000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0xFFC00000FFC00000ul; // -QNaN (all zeros payload) (float.NaN) + yield return 0xFFBFFFFFFFBFFFFFul; // -SNaN (all ones payload) + yield return 0x7FC000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN) + yield return 0x7FBFFFFF7FBFFFFFul; // +SNaN (all ones payload) + } + + for (int cnt = 1; cnt <= RndCnt; cnt++) + { + ulong rnd1 = GenNormalS(); + ulong rnd2 = GenSubnormalS(); + + yield return (rnd1 << 32) | rnd1; + yield return (rnd2 << 32) | rnd2; + } + } + + private static IEnumerable _1D_F_() + { + yield return 0xFFEFFFFFFFFFFFFFul; // -Max Normal (double.MinValue) + yield return 0x8010000000000000ul; // -Min Normal + yield return 0x800FFFFFFFFFFFFFul; // -Max Subnormal + yield return 0x8000000000000001ul; // -Min Subnormal (-double.Epsilon) + yield return 0x7FEFFFFFFFFFFFFFul; // +Max Normal (double.MaxValue) + yield return 0x0010000000000000ul; // +Min Normal + yield return 0x000FFFFFFFFFFFFFul; // +Max Subnormal + yield return 0x0000000000000001ul; // +Min Subnormal (double.Epsilon) + + if (!NoZeros) + { + yield return 0x8000000000000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0xFFF0000000000000ul; // -Infinity + yield return 0x7FF0000000000000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0xFFF8000000000000ul; // -QNaN (all zeros payload) (double.NaN) + yield return 0xFFF7FFFFFFFFFFFFul; // -SNaN (all ones payload) + yield return 0x7FF8000000000000ul; // +QNaN (all zeros payload) (-double.NaN) (DefaultNaN) + yield return 0x7FF7FFFFFFFFFFFFul; // +SNaN (all ones payload) + } + + for (int cnt = 1; cnt <= RndCnt; cnt++) + { + ulong rnd1 = GenNormalD(); + ulong rnd2 = GenSubnormalD(); + + yield return rnd1; + yield return rnd2; + } + } + #endregion + + private const int RndCnt = 20; + + private static readonly bool NoZeros = false; + private static readonly bool NoInfs = false; + private static readonly bool NoNaNs = false; + + [Test, Pairwise, Description("VADD.f32 V0, V0, V0")] + public void Vadd_f32([Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [Values(2u, 0u)] uint rm, + [ValueSource("_2S_F_")] [Random(RndCnt)] ulong z, + [ValueSource("_2S_F_")] [Random(RndCnt)] ulong a, + [ValueSource("_2S_F_")] [Random(RndCnt)] ulong b, + [Values] bool q) + { + uint opcode = 0xf2000d00; // VADD.f32 D0, D0, D0 + if (q) + { + rm &= 0x1e; + rn &= 0x1e; + rd &= 0x1e; + } + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); + + if (q) opcode |= 1 << 6; + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(fpTolerances: FpTolerances.UpToOneUlpsS); + } + + [Test, Pairwise, Description("VCMP.f Vd, Vm")] + public void Vcmp([Values(2u, 3u)] uint size, + [ValueSource("_1S_F_")] ulong a, + [ValueSource("_1S_F_")] ulong b) + { + uint opcode = 0xeeb40840; + uint rm = 1; + uint rd = 2; + + if (size == 3) + { + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + } else + { + opcode |= ((rm & 0x1e) >> 1) | ((rm & 0x1) << 5); + opcode |= ((rd & 0x1e) << 11) | ((rd & 0x1) << 22); + } + opcode |= ((size & 3) << 8); + + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); + + bool v = TestContext.CurrentContext.Random.NextBool(); + bool c = TestContext.CurrentContext.Random.NextBool(); + bool z = TestContext.CurrentContext.Random.NextBool(); + bool n = TestContext.CurrentContext.Random.NextBool(); + + int fpscr = (int)(TestContext.CurrentContext.Random.NextUInt(0xf) << 28); + + SingleOpcode(opcode, v1: v1, v2: v2, overflow: v, carry: c, zero: z, negative: n, fpscr: fpscr, copyFpFlags: true); + + CompareAgainstUnicorn(); + } +#endif + } +}