Merge from master

This commit is contained in:
Alex Barney 2018-12-04 19:29:09 -06:00
commit 6534604cdf
73 changed files with 3591 additions and 214 deletions

View file

@ -10,8 +10,8 @@ namespace ChocolArm64.Decoders
public OpCodeSimdFcond64(Inst inst, long position, int opCode) : base(inst, position, opCode)
{
Nzcv = (opCode >> 0) & 0xf;
Nzcv = (opCode >> 0) & 0xf;
Cond = (Cond)((opCode >> 12) & 0xf);
}
}
}
}

View file

@ -1638,7 +1638,34 @@ namespace ChocolArm64.Instructions
public static void Neg_V(ILEmitterCtx context)
{
EmitVectorUnaryOpSx(context, () => context.Emit(OpCodes.Neg));
if (Optimizations.UseSse2)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
Type[] typesSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
string[] namesSzv = new string[] { nameof(VectorHelper.VectorSByteZero),
nameof(VectorHelper.VectorInt16Zero),
nameof(VectorHelper.VectorInt32Zero),
nameof(VectorHelper.VectorInt64Zero) };
VectorHelper.EmitCall(context, namesSzv[op.Size]);
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitVectorUnaryOpSx(context, () => context.Emit(OpCodes.Neg));
}
}
public static void Raddhn_V(ILEmitterCtx context)

View file

@ -3,6 +3,7 @@ using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instructions.InstEmitSimdHelper;
@ -29,18 +30,14 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
Type[] typesAndNot = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
Type[] types = new Type[]
{
VectorUIntTypesPerSizeLog2[op.Size],
VectorUIntTypesPerSizeLog2[op.Size]
};
EmitLdvecWithUnsignedCast(context, op.Rm, 0);
EmitLdvecWithUnsignedCast(context, op.Rn, 0);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), types));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
EmitStvecWithUnsignedCast(context, op.Rd, 0);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -68,41 +65,34 @@ namespace ChocolArm64.Instructions
public static void Bif_V(ILEmitterCtx context)
{
EmitBitBif(context, true);
EmitBifBit(context, notRm: true);
}
public static void Bit_V(ILEmitterCtx context)
{
EmitBitBif(context, false);
EmitBifBit(context, notRm: false);
}
private static void EmitBitBif(ILEmitterCtx context, bool notRm)
private static void EmitBifBit(ILEmitterCtx context, bool notRm)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
if (Optimizations.UseSse2)
{
Type[] types = new Type[]
{
VectorUIntTypesPerSizeLog2[op.Size],
VectorUIntTypesPerSizeLog2[op.Size]
};
Type[] typesXorAndNot = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
string nameAndNot = notRm ? nameof(Sse2.AndNot) : nameof(Sse2.And);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types));
EmitLdvecWithUnsignedCast(context, op.Rd, 0);
EmitLdvecWithUnsignedCast(context, op.Rm, 0);
EmitLdvecWithUnsignedCast(context, op.Rn, 0);
EmitLdvecWithUnsignedCast(context, op.Rd, 0);
string name = notRm ? nameof(Sse2.AndNot) : nameof(Sse2.And);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAndNot));
context.EmitCall(typeof(Sse2).GetMethod(nameAndNot, typesXorAndNot));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAndNot));
context.EmitCall(typeof(Sse2).GetMethod(name, types));
EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
EmitStvecWithUnsignedCast(context, op.Rd, 0);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -111,17 +101,18 @@ namespace ChocolArm64.Instructions
}
else
{
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;
int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1;
for (int index = 0; index < elems; index++)
{
EmitVectorExtractZx(context, op.Rd, index, op.Size);
EmitVectorExtractZx(context, op.Rn, index, op.Size);
EmitVectorExtractZx(context, op.Rd, index, 3);
context.Emit(OpCodes.Dup);
EmitVectorExtractZx(context, op.Rn, index, 3);
context.Emit(OpCodes.Xor);
EmitVectorExtractZx(context, op.Rm, index, op.Size);
EmitVectorExtractZx(context, op.Rm, index, 3);
if (notRm)
{
@ -130,11 +121,9 @@ namespace ChocolArm64.Instructions
context.Emit(OpCodes.And);
EmitVectorExtractZx(context, op.Rd, index, op.Size);
context.Emit(OpCodes.Xor);
EmitVectorInsert(context, op.Rd, index, op.Size);
EmitVectorInsert(context, op.Rd, index, 3);
}
if (op.RegisterSize == RegisterSize.Simd64)
@ -150,26 +139,22 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
Type[] types = new Type[]
{
VectorUIntTypesPerSizeLog2[op.Size],
VectorUIntTypesPerSizeLog2[op.Size]
};
Type[] typesXorAnd = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rm, 0);
context.Emit(OpCodes.Dup);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types));
EmitLdvecWithUnsignedCast(context, op.Rn, 0);
EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), types));
EmitLdvecWithUnsignedCast(context, op.Rd, 0);
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesXorAnd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
EmitStvecWithUnsignedCast(context, op.Rd, 0);
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -207,16 +192,66 @@ namespace ChocolArm64.Instructions
public static void Not_V(ILEmitterCtx context)
{
EmitVectorUnaryOpZx(context, () => context.Emit(OpCodes.Not));
if (Optimizations.UseSse2)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
Type[] typesSav = new Type[] { typeof(byte) };
Type[] typesAndNot = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
EmitLdvecWithUnsignedCast(context, op.Rn, 0);
context.EmitLdc_I4(byte.MaxValue);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
EmitStvecWithUnsignedCast(context, op.Rd, 0);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitVectorUnaryOpZx(context, () => context.Emit(OpCodes.Not));
}
}
public static void Orn_V(ILEmitterCtx context)
{
EmitVectorBinaryOpZx(context, () =>
if (Optimizations.UseSse2)
{
context.Emit(OpCodes.Not);
context.Emit(OpCodes.Or);
});
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
Type[] typesSav = new Type[] { typeof(byte) };
Type[] typesAndNotOr = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
EmitLdvecWithUnsignedCast(context, op.Rn, 0);
EmitLdvecWithUnsignedCast(context, op.Rm, 0);
context.EmitLdc_I4(byte.MaxValue);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNotOr));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndNotOr));
EmitStvecWithUnsignedCast(context, op.Rd, 0);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitVectorBinaryOpZx(context, () =>
{
context.Emit(OpCodes.Not);
context.Emit(OpCodes.Or);
});
}
}
public static void Orr_V(ILEmitterCtx context)
@ -263,28 +298,122 @@ namespace ChocolArm64.Instructions
public static void Rev16_V(ILEmitterCtx context)
{
EmitRev_V(context, containerSize: 1);
if (Optimizations.UseSsse3)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
Type[] typesSve = new Type[] { typeof(long), typeof(long) };
Type[] typesSfl = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
EmitLdvecWithSignedCast(context, op.Rn, 0); // value
context.EmitLdc_I8(14L << 56 | 15L << 48 | 12L << 40 | 13L << 32 | 10L << 24 | 11L << 16 | 08L << 8 | 09L << 0); // maskE1
context.EmitLdc_I8(06L << 56 | 07L << 48 | 04L << 40 | 05L << 32 | 02L << 24 | 03L << 16 | 00L << 8 | 01L << 0); // maskE0
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
EmitStvecWithSignedCast(context, op.Rd, 0);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitRev_V(context, containerSize: 1);
}
}
public static void Rev32_V(ILEmitterCtx context)
{
EmitRev_V(context, containerSize: 2);
if (Optimizations.UseSsse3)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
Type[] typesSve = new Type[] { typeof(long), typeof(long) };
Type[] typesSfl = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value
if (op.Size == 0)
{
context.EmitLdc_I8(12L << 56 | 13L << 48 | 14L << 40 | 15L << 32 | 08L << 24 | 09L << 16 | 10L << 8 | 11L << 0); // maskE1
context.EmitLdc_I8(04L << 56 | 05L << 48 | 06L << 40 | 07L << 32 | 00L << 24 | 01L << 16 | 02L << 8 | 03L << 0); // maskE0
}
else /* if (op.Size == 1) */
{
context.EmitLdc_I8(13L << 56 | 12L << 48 | 15L << 40 | 14L << 32 | 09L << 24 | 08L << 16 | 11L << 8 | 10L << 0); // maskE1
context.EmitLdc_I8(05L << 56 | 04L << 48 | 07L << 40 | 06L << 32 | 01L << 24 | 00L << 16 | 03L << 8 | 02L << 0); // maskE0
}
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitRev_V(context, containerSize: 2);
}
}
public static void Rev64_V(ILEmitterCtx context)
{
EmitRev_V(context, containerSize: 3);
if (Optimizations.UseSsse3)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
Type[] typesSve = new Type[] { typeof(long), typeof(long) };
Type[] typesSfl = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value
if (op.Size == 0)
{
context.EmitLdc_I8(08L << 56 | 09L << 48 | 10L << 40 | 11L << 32 | 12L << 24 | 13L << 16 | 14L << 8 | 15L << 0); // maskE1
context.EmitLdc_I8(00L << 56 | 01L << 48 | 02L << 40 | 03L << 32 | 04L << 24 | 05L << 16 | 06L << 8 | 07L << 0); // maskE0
}
else if (op.Size == 1)
{
context.EmitLdc_I8(09L << 56 | 08L << 48 | 11L << 40 | 10L << 32 | 13L << 24 | 12L << 16 | 15L << 8 | 14L << 0); // maskE1
context.EmitLdc_I8(01L << 56 | 00L << 48 | 03L << 40 | 02L << 32 | 05L << 24 | 04L << 16 | 07L << 8 | 06L << 0); // maskE0
}
else /* if (op.Size == 2) */
{
context.EmitLdc_I8(11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 15L << 24 | 14L << 16 | 13L << 8 | 12L << 0); // maskE1
context.EmitLdc_I8(03L << 56 | 02L << 48 | 01L << 40 | 00L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0); // maskE0
}
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitRev_V(context, containerSize: 3);
}
}
private static void EmitRev_V(ILEmitterCtx context, int containerSize)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
if (op.Size >= containerSize)
{
throw new InvalidOperationException();
}
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;

View file

@ -110,6 +110,34 @@ namespace ChocolArm64.Instructions
}
}
public static void Sqrshl_V(ILEmitterCtx context)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;
for (int index = 0; index < elems; index++)
{
EmitVectorExtractSx(context, op.Rn, index, op.Size);
EmitVectorExtractSx(context, op.Rm, index, op.Size);
context.Emit(OpCodes.Ldc_I4_1);
context.EmitLdc_I4(op.Size);
context.EmitLdarg(TranslatedSub.StateArgIdx);
SoftFallback.EmitCall(context, nameof(SoftFallback.SignedShlRegSatQ));
EmitVectorInsert(context, op.Rd, index, op.Size);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void Sqrshrn_S(ILEmitterCtx context)
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
@ -130,6 +158,34 @@ namespace ChocolArm64.Instructions
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
}
public static void Sqshl_V(ILEmitterCtx context)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;
for (int index = 0; index < elems; index++)
{
EmitVectorExtractSx(context, op.Rn, index, op.Size);
EmitVectorExtractSx(context, op.Rm, index, op.Size);
context.Emit(OpCodes.Ldc_I4_0);
context.EmitLdc_I4(op.Size);
context.EmitLdarg(TranslatedSub.StateArgIdx);
SoftFallback.EmitCall(context, nameof(SoftFallback.SignedShlRegSatQ));
EmitVectorInsert(context, op.Rd, index, op.Size);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void Sqshrn_S(ILEmitterCtx context)
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
@ -150,6 +206,32 @@ namespace ChocolArm64.Instructions
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
}
public static void Srshl_V(ILEmitterCtx context)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;
for (int index = 0; index < elems; index++)
{
EmitVectorExtractSx(context, op.Rn, index, op.Size);
EmitVectorExtractSx(context, op.Rm, index, op.Size);
context.Emit(OpCodes.Ldc_I4_1);
context.EmitLdc_I4(op.Size);
SoftFallback.EmitCall(context, nameof(SoftFallback.SignedShlReg));
EmitVectorInsert(context, op.Rd, index, op.Size);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void Srshr_S(ILEmitterCtx context)
{
EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
@ -252,7 +334,28 @@ namespace ChocolArm64.Instructions
public static void Sshl_V(ILEmitterCtx context)
{
EmitVectorShl(context, signed: true);
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;
for (int index = 0; index < elems; index++)
{
EmitVectorExtractSx(context, op.Rn, index, op.Size);
EmitVectorExtractSx(context, op.Rm, index, op.Size);
context.Emit(OpCodes.Ldc_I4_0);
context.EmitLdc_I4(op.Size);
SoftFallback.EmitCall(context, nameof(SoftFallback.SignedShlReg));
EmitVectorInsert(context, op.Rd, index, op.Size);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void Sshll_V(ILEmitterCtx context)
@ -330,6 +433,34 @@ namespace ChocolArm64.Instructions
}
}
public static void Uqrshl_V(ILEmitterCtx context)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;
for (int index = 0; index < elems; index++)
{
EmitVectorExtractZx(context, op.Rn, index, op.Size);
EmitVectorExtractZx(context, op.Rm, index, op.Size);
context.Emit(OpCodes.Ldc_I4_1);
context.EmitLdc_I4(op.Size);
context.EmitLdarg(TranslatedSub.StateArgIdx);
SoftFallback.EmitCall(context, nameof(SoftFallback.UnsignedShlRegSatQ));
EmitVectorInsert(context, op.Rd, index, op.Size);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void Uqrshrn_S(ILEmitterCtx context)
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
@ -340,6 +471,34 @@ namespace ChocolArm64.Instructions
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
}
public static void Uqshl_V(ILEmitterCtx context)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;
for (int index = 0; index < elems; index++)
{
EmitVectorExtractZx(context, op.Rn, index, op.Size);
EmitVectorExtractZx(context, op.Rm, index, op.Size);
context.Emit(OpCodes.Ldc_I4_0);
context.EmitLdc_I4(op.Size);
context.EmitLdarg(TranslatedSub.StateArgIdx);
SoftFallback.EmitCall(context, nameof(SoftFallback.UnsignedShlRegSatQ));
EmitVectorInsert(context, op.Rd, index, op.Size);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void Uqshrn_S(ILEmitterCtx context)
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
@ -350,6 +509,32 @@ namespace ChocolArm64.Instructions
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
}
public static void Urshl_V(ILEmitterCtx context)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;
for (int index = 0; index < elems; index++)
{
EmitVectorExtractZx(context, op.Rn, index, op.Size);
EmitVectorExtractZx(context, op.Rm, index, op.Size);
context.Emit(OpCodes.Ldc_I4_1);
context.EmitLdc_I4(op.Size);
SoftFallback.EmitCall(context, nameof(SoftFallback.UnsignedShlReg));
EmitVectorInsert(context, op.Rd, index, op.Size);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void Urshr_S(ILEmitterCtx context)
{
EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
@ -450,7 +635,28 @@ namespace ChocolArm64.Instructions
public static void Ushl_V(ILEmitterCtx context)
{
EmitVectorShl(context, signed: false);
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;
for (int index = 0; index < elems; index++)
{
EmitVectorExtractZx(context, op.Rn, index, op.Size);
EmitVectorExtractZx(context, op.Rm, index, op.Size);
context.Emit(OpCodes.Ldc_I4_0);
context.EmitLdc_I4(op.Size);
SoftFallback.EmitCall(context, nameof(SoftFallback.UnsignedShlReg));
EmitVectorInsert(context, op.Rd, index, op.Size);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void Ushll_V(ILEmitterCtx context)
@ -526,69 +732,6 @@ namespace ChocolArm64.Instructions
}
}
private static void EmitVectorShl(ILEmitterCtx context, bool signed)
{
//This instruction shifts the value on vector A by the number of bits
//specified on the signed, lower 8 bits of vector B. If the shift value
//is greater or equal to the data size of each lane, then the result is zero.
//Additionally, negative shifts produces right shifts by the negated shift value.
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int maxShift = 8 << op.Size;
Action emit = () =>
{
ILLabel lblShl = new ILLabel();
ILLabel lblZero = new ILLabel();
ILLabel lblEnd = new ILLabel();
void EmitShift(OpCode ilOp)
{
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(maxShift);
context.Emit(OpCodes.Bge_S, lblZero);
context.Emit(ilOp);
context.Emit(OpCodes.Br_S, lblEnd);
}
context.Emit(OpCodes.Conv_I1);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(0);
context.Emit(OpCodes.Bge_S, lblShl);
context.Emit(OpCodes.Neg);
EmitShift(signed
? OpCodes.Shr
: OpCodes.Shr_Un);
context.MarkLabel(lblShl);
EmitShift(OpCodes.Shl);
context.MarkLabel(lblZero);
context.Emit(OpCodes.Pop);
context.Emit(OpCodes.Pop);
context.EmitLdc_I8(0);
context.MarkLabel(lblEnd);
};
if (signed)
{
EmitVectorBinaryOpSx(context, emit);
}
else
{
EmitVectorBinaryOpZx(context, emit);
}
}
[Flags]
private enum ShrImmFlags
{

View file

@ -16,6 +16,283 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(SoftFallback), mthdName);
}
#region "ShlReg"
public static long SignedShlReg(long value, long shift, bool round, int size)
{
int eSize = 8 << size;
int shiftLsB = (sbyte)shift;
if (shiftLsB < 0)
{
return SignedShrReg(value, -shiftLsB, round, eSize);
}
else if (shiftLsB > 0)
{
if (shiftLsB >= eSize)
{
return 0L;
}
return value << shiftLsB;
}
else /* if (shiftLsB == 0) */
{
return value;
}
}
public static ulong UnsignedShlReg(ulong value, ulong shift, bool round, int size)
{
int eSize = 8 << size;
int shiftLsB = (sbyte)shift;
if (shiftLsB < 0)
{
return UnsignedShrReg(value, -shiftLsB, round, eSize);
}
else if (shiftLsB > 0)
{
if (shiftLsB >= eSize)
{
return 0UL;
}
return value << shiftLsB;
}
else /* if (shiftLsB == 0) */
{
return value;
}
}
public static long SignedShlRegSatQ(long value, long shift, bool round, int size, CpuThreadState state)
{
int eSize = 8 << size;
int shiftLsB = (sbyte)shift;
if (shiftLsB < 0)
{
return SignedShrReg(value, -shiftLsB, round, eSize);
}
else if (shiftLsB > 0)
{
if (shiftLsB >= eSize)
{
return SignedSignSatQ(value, eSize, state);
}
if (eSize == 64)
{
long shl = value << shiftLsB;
long shr = shl >> shiftLsB;
if (shr != value)
{
return SignedSignSatQ(value, eSize, state);
}
else /* if (shr == value) */
{
return shl;
}
}
else /* if (eSize != 64) */
{
return SignedSrcSignedDstSatQ(value << shiftLsB, size, state);
}
}
else /* if (shiftLsB == 0) */
{
return value;
}
}
public static ulong UnsignedShlRegSatQ(ulong value, ulong shift, bool round, int size, CpuThreadState state)
{
int eSize = 8 << size;
int shiftLsB = (sbyte)shift;
if (shiftLsB < 0)
{
return UnsignedShrReg(value, -shiftLsB, round, eSize);
}
else if (shiftLsB > 0)
{
if (shiftLsB >= eSize)
{
return UnsignedSignSatQ(value, eSize, state);
}
if (eSize == 64)
{
ulong shl = value << shiftLsB;
ulong shr = shl >> shiftLsB;
if (shr != value)
{
return UnsignedSignSatQ(value, eSize, state);
}
else /* if (shr == value) */
{
return shl;
}
}
else /* if (eSize != 64) */
{
return UnsignedSrcUnsignedDstSatQ(value << shiftLsB, size, state);
}
}
else /* if (shiftLsB == 0) */
{
return value;
}
}
private static long SignedShrReg(long value, int shift, bool round, int eSize) // shift := [1, 128]; eSize := {8, 16, 32, 64}.
{
if (round)
{
if (shift >= eSize)
{
return 0L;
}
long roundConst = 1L << (shift - 1);
long add = value + roundConst;
if (eSize == 64)
{
if ((~value & (value ^ add)) < 0L)
{
return (long)((ulong)add >> shift);
}
else
{
return add >> shift;
}
}
else /* if (eSize != 64) */
{
return add >> shift;
}
}
else /* if (!round) */
{
if (shift >= eSize)
{
if (value < 0L)
{
return -1L;
}
else /* if (value >= 0L) */
{
return 0L;
}
}
return value >> shift;
}
}
private static ulong UnsignedShrReg(ulong value, int shift, bool round, int eSize) // shift := [1, 128]; eSize := {8, 16, 32, 64}.
{
if (round)
{
if (shift > 64)
{
return 0UL;
}
ulong roundConst = 1UL << (shift - 1);
ulong add = value + roundConst;
if (eSize == 64)
{
if ((add < value) && (add < roundConst))
{
if (shift == 64)
{
return 1UL;
}
return (add >> shift) | (0x8000000000000000UL >> (shift - 1));
}
else
{
if (shift == 64)
{
return 0UL;
}
return add >> shift;
}
}
else /* if (eSize != 64) */
{
if (shift == 64)
{
return 0UL;
}
return add >> shift;
}
}
else /* if (!round) */
{
if (shift >= eSize)
{
return 0UL;
}
return value >> shift;
}
}
private static long SignedSignSatQ(long op, int eSize, CpuThreadState state) // eSize := {8, 16, 32, 64}.
{
long tMaxValue = (1L << (eSize - 1)) - 1L;
long tMinValue = -(1L << (eSize - 1));
if (op > 0L)
{
state.SetFpsrFlag(Fpsr.Qc);
return tMaxValue;
}
else if (op < 0L)
{
state.SetFpsrFlag(Fpsr.Qc);
return tMinValue;
}
else
{
return 0L;
}
}
private static ulong UnsignedSignSatQ(ulong op, int eSize, CpuThreadState state) // eSize := {8, 16, 32, 64}.
{
ulong tMaxValue = ulong.MaxValue >> (64 - eSize);
if (op > 0UL)
{
state.SetFpsrFlag(Fpsr.Qc);
return tMaxValue;
}
else
{
return 0UL;
}
}
#endregion
#region "ShrImm64"
public static long SignedShrImm64(long value, long roundConst, int shift)
{
@ -31,7 +308,7 @@ namespace ChocolArm64.Instructions
{
return -1L;
}
else
else /* if (value >= 0L) */
{
return 0L;
}

View file

@ -427,10 +427,12 @@ namespace ChocolArm64
SetA64("01111110101xxxxx101101xxxxxxxxxx", InstEmit.Sqrdmulh_S, typeof(OpCodeSimdReg64));
SetA64("0x101110011xxxxx101101xxxxxxxxxx", InstEmit.Sqrdmulh_V, typeof(OpCodeSimdReg64));
SetA64("0x101110101xxxxx101101xxxxxxxxxx", InstEmit.Sqrdmulh_V, typeof(OpCodeSimdReg64));
SetA64("0>001110<<1xxxxx010111xxxxxxxxxx", InstEmit.Sqrshl_V, typeof(OpCodeSimdReg64));
SetA64("0101111100>>>xxx100111xxxxxxxxxx", InstEmit.Sqrshrn_S, typeof(OpCodeSimdShImm64));
SetA64("0x00111100>>>xxx100111xxxxxxxxxx", InstEmit.Sqrshrn_V, typeof(OpCodeSimdShImm64));
SetA64("0111111100>>>xxx100011xxxxxxxxxx", InstEmit.Sqrshrun_S, typeof(OpCodeSimdShImm64));
SetA64("0x10111100>>>xxx100011xxxxxxxxxx", InstEmit.Sqrshrun_V, typeof(OpCodeSimdShImm64));
SetA64("0>001110<<1xxxxx010011xxxxxxxxxx", InstEmit.Sqshl_V, typeof(OpCodeSimdReg64));
SetA64("0101111100>>>xxx100101xxxxxxxxxx", InstEmit.Sqshrn_S, typeof(OpCodeSimdShImm64));
SetA64("0x00111100>>>xxx100101xxxxxxxxxx", InstEmit.Sqshrn_V, typeof(OpCodeSimdShImm64));
SetA64("0111111100>>>xxx100001xxxxxxxxxx", InstEmit.Sqshrun_S, typeof(OpCodeSimdShImm64));
@ -442,6 +444,7 @@ namespace ChocolArm64
SetA64("01111110<<100001001010xxxxxxxxxx", InstEmit.Sqxtun_S, typeof(OpCodeSimd64));
SetA64("0x101110<<100001001010xxxxxxxxxx", InstEmit.Sqxtun_V, typeof(OpCodeSimd64));
SetA64("0x001110<<1xxxxx000101xxxxxxxxxx", InstEmit.Srhadd_V, typeof(OpCodeSimdReg64));
SetA64("0>001110<<1xxxxx010101xxxxxxxxxx", InstEmit.Srshl_V, typeof(OpCodeSimdReg64));
SetA64("0101111101xxxxxx001001xxxxxxxxxx", InstEmit.Srshr_S, typeof(OpCodeSimdShImm64));
SetA64("0x00111100>>>xxx001001xxxxxxxxxx", InstEmit.Srshr_V, typeof(OpCodeSimdShImm64));
SetA64("0100111101xxxxxx001001xxxxxxxxxx", InstEmit.Srshr_V, typeof(OpCodeSimdShImm64));
@ -501,8 +504,10 @@ namespace ChocolArm64
SetA64("0x101110<<1xxxxx110000xxxxxxxxxx", InstEmit.Umull_V, typeof(OpCodeSimdReg64));
SetA64("01111110xx1xxxxx000011xxxxxxxxxx", InstEmit.Uqadd_S, typeof(OpCodeSimdReg64));
SetA64("0>101110<<1xxxxx000011xxxxxxxxxx", InstEmit.Uqadd_V, typeof(OpCodeSimdReg64));
SetA64("0>101110<<1xxxxx010111xxxxxxxxxx", InstEmit.Uqrshl_V, typeof(OpCodeSimdReg64));
SetA64("0111111100>>>xxx100111xxxxxxxxxx", InstEmit.Uqrshrn_S, typeof(OpCodeSimdShImm64));
SetA64("0x10111100>>>xxx100111xxxxxxxxxx", InstEmit.Uqrshrn_V, typeof(OpCodeSimdShImm64));
SetA64("0>101110<<1xxxxx010011xxxxxxxxxx", InstEmit.Uqshl_V, typeof(OpCodeSimdReg64));
SetA64("0111111100>>>xxx100101xxxxxxxxxx", InstEmit.Uqshrn_S, typeof(OpCodeSimdShImm64));
SetA64("0x10111100>>>xxx100101xxxxxxxxxx", InstEmit.Uqshrn_V, typeof(OpCodeSimdShImm64));
SetA64("01111110xx1xxxxx001011xxxxxxxxxx", InstEmit.Uqsub_S, typeof(OpCodeSimdReg64));
@ -510,6 +515,7 @@ namespace ChocolArm64
SetA64("01111110<<100001010010xxxxxxxxxx", InstEmit.Uqxtn_S, typeof(OpCodeSimd64));
SetA64("0x101110<<100001010010xxxxxxxxxx", InstEmit.Uqxtn_V, typeof(OpCodeSimd64));
SetA64("0x101110<<1xxxxx000101xxxxxxxxxx", InstEmit.Urhadd_V, typeof(OpCodeSimdReg64));
SetA64("0>101110<<1xxxxx010101xxxxxxxxxx", InstEmit.Urshl_V, typeof(OpCodeSimdReg64));
SetA64("0111111101xxxxxx001001xxxxxxxxxx", InstEmit.Urshr_S, typeof(OpCodeSimdShImm64));
SetA64("0x10111100>>>xxx001001xxxxxxxxxx", InstEmit.Urshr_V, typeof(OpCodeSimdShImm64));
SetA64("0110111101xxxxxx001001xxxxxxxxxx", InstEmit.Urshr_V, typeof(OpCodeSimdShImm64));

View file

@ -8,11 +8,13 @@ public static class Optimizations
private static bool _useSseIfAvailable = true;
private static bool _useSse2IfAvailable = true;
private static bool _useSsse3IfAvailable = true;
private static bool _useSse41IfAvailable = true;
private static bool _useSse42IfAvailable = true;
internal static bool UseSse = (_useAllSseIfAvailable && _useSseIfAvailable) && Sse.IsSupported;
internal static bool UseSse2 = (_useAllSseIfAvailable && _useSse2IfAvailable) && Sse2.IsSupported;
internal static bool UseSsse3 = (_useAllSseIfAvailable && _useSsse3IfAvailable) && Ssse3.IsSupported;
internal static bool UseSse41 = (_useAllSseIfAvailable && _useSse41IfAvailable) && Sse41.IsSupported;
internal static bool UseSse42 = (_useAllSseIfAvailable && _useSse42IfAvailable) && Sse42.IsSupported;
}

View file

@ -0,0 +1,99 @@
using Ryujinx.Graphics.Memory;
using System.Collections.Generic;
namespace Ryujinx.Graphics
{
public class CdmaProcessor
{
private const int MethSetMethod = 0x10;
private const int MethSetData = 0x11;
private NvGpu Gpu;
public CdmaProcessor(NvGpu Gpu)
{
this.Gpu = Gpu;
}
public void PushCommands(NvGpuVmm Vmm, int[] CmdBuffer)
{
List<ChCommand> Commands = new List<ChCommand>();
ChClassId CurrentClass = 0;
for (int Index = 0; Index < CmdBuffer.Length; Index++)
{
int Cmd = CmdBuffer[Index];
int Value = (Cmd >> 0) & 0xffff;
int MethodOffset = (Cmd >> 16) & 0xfff;
ChSubmissionMode SubmissionMode = (ChSubmissionMode)((Cmd >> 28) & 0xf);
switch (SubmissionMode)
{
case ChSubmissionMode.SetClass: CurrentClass = (ChClassId)(Value >> 6); break;
case ChSubmissionMode.Incrementing:
{
int Count = Value;
for (int ArgIdx = 0; ArgIdx < Count; ArgIdx++)
{
int Argument = CmdBuffer[++Index];
Commands.Add(new ChCommand(CurrentClass, MethodOffset + ArgIdx, Argument));
}
break;
}
case ChSubmissionMode.NonIncrementing:
{
int Count = Value;
int[] Arguments = new int[Count];
for (int ArgIdx = 0; ArgIdx < Count; ArgIdx++)
{
Arguments[ArgIdx] = CmdBuffer[++Index];
}
Commands.Add(new ChCommand(CurrentClass, MethodOffset, Arguments));
break;
}
}
}
ProcessCommands(Vmm, Commands.ToArray());
}
private void ProcessCommands(NvGpuVmm Vmm, ChCommand[] Commands)
{
int MethodOffset = 0;
foreach (ChCommand Command in Commands)
{
switch (Command.MethodOffset)
{
case MethSetMethod: MethodOffset = Command.Arguments[0]; break;
case MethSetData:
{
if (Command.ClassId == ChClassId.NvDec)
{
Gpu.VideoDecoder.Process(Vmm, MethodOffset, Command.Arguments);
}
else if (Command.ClassId == ChClassId.GraphicsVic)
{
Gpu.VideoImageComposer.Process(Vmm, MethodOffset, Command.Arguments);
}
break;
}
}
}
}
}
}

View file

@ -0,0 +1,20 @@
namespace Ryujinx.Graphics
{
enum ChClassId
{
Host1x = 0x1,
VideoEncodeMpeg = 0x20,
VideoEncodeNvEnc = 0x21,
VideoStreamingVi = 0x30,
VideoStreamingIsp = 0x32,
VideoStreamingIspB = 0x34,
VideoStreamingViI2c = 0x36,
GraphicsVic = 0x5d,
Graphics3d = 0x60,
GraphicsGpu = 0x61,
Tsec = 0xe0,
TsecB = 0xe1,
NvJpg = 0xc0,
NvDec = 0xf0
}
}

View file

@ -0,0 +1,18 @@
namespace Ryujinx.Graphics
{
struct ChCommand
{
public ChClassId ClassId { get; private set; }
public int MethodOffset { get; private set; }
public int[] Arguments { get; private set; }
public ChCommand(ChClassId ClassId, int MethodOffset, params int[] Arguments)
{
this.ClassId = ClassId;
this.MethodOffset = MethodOffset;
this.Arguments = Arguments;
}
}
}

View file

@ -0,0 +1,13 @@
namespace Ryujinx.Graphics
{
enum ChSubmissionMode
{
SetClass = 0,
Incrementing = 1,
NonIncrementing = 2,
Mask = 3,
Immediate = 4,
Restart = 5,
Gather = 6
}
}

View file

@ -63,15 +63,10 @@ namespace Ryujinx.Graphics
Gpu.Renderer.RenderTarget.BindZeta(Position);
}
public void SendTexture(NvGpuVmm Vmm, long Position, GalImage NewImage, int TexIndex = -1)
public void SendTexture(NvGpuVmm Vmm, long Position, GalImage NewImage)
{
PrepareSendTexture(Vmm, Position, NewImage);
if (TexIndex >= 0)
{
Gpu.Renderer.Texture.Bind(Position, TexIndex, NewImage);
}
ImageTypes[Position] = ImageType.Texture;
}

View file

@ -1,6 +1,6 @@
using Ryujinx.Graphics.Memory;
namespace Ryujinx.Graphics
namespace Ryujinx.Graphics.Graphics3d
{
interface INvGpuEngine
{

View file

@ -3,7 +3,7 @@ using Ryujinx.Graphics.Memory;
using System;
using System.Collections.Generic;
namespace Ryujinx.Graphics
namespace Ryujinx.Graphics.Graphics3d
{
class MacroInterpreter
{

View file

@ -1,4 +1,4 @@
namespace Ryujinx.Graphics
namespace Ryujinx.Graphics.Graphics3d
{
enum NvGpuEngine
{

View file

@ -2,7 +2,7 @@ using Ryujinx.Graphics.Gal;
using Ryujinx.Graphics.Memory;
using Ryujinx.Graphics.Texture;
namespace Ryujinx.Graphics
namespace Ryujinx.Graphics.Graphics3d
{
class NvGpuEngine2d : INvGpuEngine
{

View file

@ -1,4 +1,4 @@
namespace Ryujinx.Graphics
namespace Ryujinx.Graphics.Graphics3d
{
enum NvGpuEngine2dReg
{

View file

@ -5,7 +5,7 @@ using Ryujinx.Graphics.Texture;
using System;
using System.Collections.Generic;
namespace Ryujinx.Graphics
namespace Ryujinx.Graphics.Graphics3d
{
class NvGpuEngine3d : INvGpuEngine
{
@ -523,7 +523,7 @@ namespace Ryujinx.Graphics
int TextureCbIndex = ReadRegister(NvGpuEngine3dReg.TextureCbIndex);
int TexIndex = 0;
List<(long, GalImage, GalTextureSampler)> UnboundTextures = new List<(long, GalImage, GalTextureSampler)>();
for (int Index = 0; Index < Keys.Length; Index++)
{
@ -542,20 +542,31 @@ namespace Ryujinx.Graphics
int TextureHandle = Vmm.ReadInt32(Position + DeclInfo.Index * 4);
UploadTexture(Vmm, TexIndex, TextureHandle);
TexIndex++;
UnboundTextures.Add(UploadTexture(Vmm, TextureHandle));
}
}
for (int Index = 0; Index < UnboundTextures.Count; Index++)
{
(long Key, GalImage Image, GalTextureSampler Sampler) = UnboundTextures[Index];
if (Key == 0)
{
continue;
}
Gpu.Renderer.Texture.Bind(Key, Index, Image);
Gpu.Renderer.Texture.SetSampler(Sampler);
}
}
private void UploadTexture(NvGpuVmm Vmm, int TexIndex, int TextureHandle)
private (long, GalImage, GalTextureSampler) UploadTexture(NvGpuVmm Vmm, int TextureHandle)
{
if (TextureHandle == 0)
{
//FIXME: Some games like puyo puyo will use handles with the value 0.
//This is a bug, most likely caused by sync issues.
return;
return (0, default(GalImage), default(GalTextureSampler));
}
bool LinkedTsc = ReadRegisterBool(NvGpuEngine3dReg.LinkedTsc);
@ -590,12 +601,12 @@ namespace Ryujinx.Graphics
if (Key == -1)
{
//FIXME: Shouldn't ignore invalid addresses.
return;
return (0, default(GalImage), default(GalTextureSampler));
}
Gpu.ResourceManager.SendTexture(Vmm, Key, Image, TexIndex);
Gpu.ResourceManager.SendTexture(Vmm, Key, Image);
Gpu.Renderer.Texture.SetSampler(Sampler);
return (Key, Image, Sampler);
}
private void UploadConstBuffers(NvGpuVmm Vmm, GalPipelineState State, long[] Keys)

View file

@ -1,4 +1,4 @@
namespace Ryujinx.Graphics
namespace Ryujinx.Graphics.Graphics3d
{
enum NvGpuEngine3dReg
{

View file

@ -2,7 +2,7 @@ using Ryujinx.Graphics.Memory;
using Ryujinx.Graphics.Texture;
using System.Collections.Generic;
namespace Ryujinx.Graphics
namespace Ryujinx.Graphics.Graphics3d
{
class NvGpuEngineM2mf : INvGpuEngine
{

View file

@ -1,4 +1,4 @@
namespace Ryujinx.Graphics
namespace Ryujinx.Graphics.Graphics3d
{
enum NvGpuEngineM2mfReg
{

View file

@ -2,7 +2,7 @@ using Ryujinx.Graphics.Memory;
using Ryujinx.Graphics.Texture;
using System.Collections.Generic;
namespace Ryujinx.Graphics
namespace Ryujinx.Graphics.Graphics3d
{
class NvGpuEngineP2mf : INvGpuEngine
{

View file

@ -1,4 +1,4 @@
namespace Ryujinx.Graphics
namespace Ryujinx.Graphics.Graphics3d
{
enum NvGpuEngineP2mfReg
{

View file

@ -1,6 +1,6 @@
using Ryujinx.Graphics.Memory;
namespace Ryujinx.Graphics
namespace Ryujinx.Graphics.Graphics3d
{
class NvGpuFifo
{

View file

@ -1,4 +1,4 @@
namespace Ryujinx.Graphics
namespace Ryujinx.Graphics.Graphics3d
{
enum NvGpuFifoMeth
{

View file

@ -1,6 +1,6 @@
using Ryujinx.Graphics.Memory;
namespace Ryujinx.Graphics
namespace Ryujinx.Graphics.Graphics3d
{
delegate void NvGpuMethod(NvGpuVmm Vmm, GpuMethodCall MethCall);
}

View file

@ -1,4 +1,4 @@
namespace Ryujinx.Graphics
namespace Ryujinx.Graphics.Memory
{
public enum NvGpuBufferType
{

View file

@ -72,6 +72,28 @@ namespace Ryujinx.Graphics.Memory
}
}
public long MapLow(long PA, long Size)
{
lock (PageTable)
{
long VA = GetFreePosition(Size, 1, PageSize);
if (VA != -1 && (ulong)VA <= uint.MaxValue && (ulong)(VA + Size) <= uint.MaxValue)
{
for (long Offset = 0; Offset < Size; Offset += PageSize)
{
SetPte(VA + Offset, PA + Offset);
}
}
else
{
VA = -1;
}
return VA;
}
}
public long ReserveFixed(long VA, long Size)
{
lock (PageTable)
@ -122,11 +144,11 @@ namespace Ryujinx.Graphics.Memory
}
}
private long GetFreePosition(long Size, long Align = 1)
private long GetFreePosition(long Size, long Align = 1, long Start = 1L << 32)
{
//Note: Address 0 is not considered valid by the driver,
//when 0 is returned it's considered a mapping error.
long Position = PageSize;
long Position = Start;
long FreeSize = 0;
if (Align < 1)

View file

@ -1,4 +1,8 @@
using Ryujinx.Graphics.Gal;
using Ryujinx.Graphics.Graphics3d;
using Ryujinx.Graphics.Memory;
using Ryujinx.Graphics.VDec;
using Ryujinx.Graphics.Vic;
namespace Ryujinx.Graphics
{
@ -16,6 +20,10 @@ namespace Ryujinx.Graphics
internal NvGpuEngineM2mf EngineM2mf { get; private set; }
internal NvGpuEngineP2mf EngineP2mf { get; private set; }
private CdmaProcessor CdmaProcessor;
internal VideoDecoder VideoDecoder { get; private set; }
internal VideoImageComposer VideoImageComposer { get; private set; }
public NvGpu(IGalRenderer Renderer)
{
this.Renderer = Renderer;
@ -29,6 +37,26 @@ namespace Ryujinx.Graphics
Engine3d = new NvGpuEngine3d(this);
EngineM2mf = new NvGpuEngineM2mf(this);
EngineP2mf = new NvGpuEngineP2mf(this);
CdmaProcessor = new CdmaProcessor(this);
VideoDecoder = new VideoDecoder(this);
VideoImageComposer = new VideoImageComposer(this);
}
public void PushCommandBuffer(NvGpuVmm Vmm, int[] CmdBuffer)
{
lock (CdmaProcessor)
{
CdmaProcessor.PushCommands(Vmm, CmdBuffer);
}
}
public void UninitializeVideoDecoder()
{
lock (CdmaProcessor)
{
FFmpegWrapper.Uninitialize();
}
}
}
}

View file

@ -14,6 +14,7 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="FFmpeg.AutoGen" Version="4.0.0.4" />
<PackageReference Include="OpenTK.NetStandard" Version="1.0.4" />
</ItemGroup>

View file

@ -0,0 +1,75 @@
using System.IO;
namespace Ryujinx.Graphics.VDec
{
class BitStreamWriter
{
private const int BufferSize = 8;
private Stream BaseStream;
private int Buffer;
private int BufferPos;
public BitStreamWriter(Stream BaseStream)
{
this.BaseStream = BaseStream;
}
public void WriteBit(bool Value)
{
WriteBits(Value ? 1 : 0, 1);
}
public void WriteBits(int Value, int ValueSize)
{
int ValuePos = 0;
int Remaining = ValueSize;
while (Remaining > 0)
{
int CopySize = Remaining;
int Free = GetFreeBufferBits();
if (CopySize > Free)
{
CopySize = Free;
}
int Mask = (1 << CopySize) - 1;
int SrcShift = (ValueSize - ValuePos) - CopySize;
int DstShift = (BufferSize - BufferPos) - CopySize;
Buffer |= ((Value >> SrcShift) & Mask) << DstShift;
ValuePos += CopySize;
BufferPos += CopySize;
Remaining -= CopySize;
}
}
private int GetFreeBufferBits()
{
if (BufferPos == BufferSize)
{
Flush();
}
return BufferSize - BufferPos;
}
public void Flush()
{
if (BufferPos != 0)
{
BaseStream.WriteByte((byte)Buffer);
Buffer = 0;
BufferPos = 0;
}
}
}
}

View file

@ -0,0 +1,17 @@
using System;
namespace Ryujinx.Graphics.VDec
{
static class DecoderHelper
{
public static byte[] Combine(byte[] Arr0, byte[] Arr1)
{
byte[] Output = new byte[Arr0.Length + Arr1.Length];
Buffer.BlockCopy(Arr0, 0, Output, 0, Arr0.Length);
Buffer.BlockCopy(Arr1, 0, Output, Arr0.Length, Arr1.Length);
return Output;
}
}
}

View file

@ -0,0 +1,168 @@
using FFmpeg.AutoGen;
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.VDec
{
unsafe static class FFmpegWrapper
{
private static AVCodec* Codec;
private static AVCodecContext* Context;
private static AVFrame* Frame;
private static SwsContext* ScalerCtx;
private static int ScalerWidth;
private static int ScalerHeight;
public static bool IsInitialized { get; private set; }
public static void H264Initialize()
{
EnsureCodecInitialized(AVCodecID.AV_CODEC_ID_H264);
}
public static void Vp9Initialize()
{
EnsureCodecInitialized(AVCodecID.AV_CODEC_ID_VP9);
}
private static void EnsureCodecInitialized(AVCodecID CodecId)
{
if (IsInitialized)
{
Uninitialize();
}
Codec = ffmpeg.avcodec_find_decoder(CodecId);
Context = ffmpeg.avcodec_alloc_context3(Codec);
Frame = ffmpeg.av_frame_alloc();
ffmpeg.avcodec_open2(Context, Codec, null);
IsInitialized = true;
}
public static int DecodeFrame(byte[] Data)
{
if (!IsInitialized)
{
throw new InvalidOperationException("Tried to use uninitialized codec!");
}
AVPacket Packet;
ffmpeg.av_init_packet(&Packet);
fixed (byte* Ptr = Data)
{
Packet.data = Ptr;
Packet.size = Data.Length;
ffmpeg.avcodec_send_packet(Context, &Packet);
}
return ffmpeg.avcodec_receive_frame(Context, Frame);
}
public static FFmpegFrame GetFrame()
{
if (!IsInitialized)
{
throw new InvalidOperationException("Tried to use uninitialized codec!");
}
AVFrame ManagedFrame = Marshal.PtrToStructure<AVFrame>((IntPtr)Frame);
byte*[] Data = ManagedFrame.data.ToArray();
return new FFmpegFrame()
{
Width = ManagedFrame.width,
Height = ManagedFrame.height,
LumaPtr = Data[0],
ChromaBPtr = Data[1],
ChromaRPtr = Data[2]
};
}
public static FFmpegFrame GetFrameRgba()
{
if (!IsInitialized)
{
throw new InvalidOperationException("Tried to use uninitialized codec!");
}
AVFrame ManagedFrame = Marshal.PtrToStructure<AVFrame>((IntPtr)Frame);
EnsureScalerSetup(ManagedFrame.width, ManagedFrame.height);
byte*[] Data = ManagedFrame.data.ToArray();
int[] LineSizes = ManagedFrame.linesize.ToArray();
byte[] Dst = new byte[ManagedFrame.width * ManagedFrame.height * 4];
fixed (byte* Ptr = Dst)
{
byte*[] DstData = new byte*[] { Ptr };
int[] DstLineSizes = new int[] { ManagedFrame.width * 4 };
ffmpeg.sws_scale(ScalerCtx, Data, LineSizes, 0, ManagedFrame.height, DstData, DstLineSizes);
}
return new FFmpegFrame()
{
Width = ManagedFrame.width,
Height = ManagedFrame.height,
Data = Dst
};
}
private static void EnsureScalerSetup(int Width, int Height)
{
if (Width == 0 || Height == 0)
{
return;
}
if (ScalerCtx == null || ScalerWidth != Width || ScalerHeight != Height)
{
FreeScaler();
ScalerCtx = ffmpeg.sws_getContext(
Width, Height, AVPixelFormat.AV_PIX_FMT_YUV420P,
Width, Height, AVPixelFormat.AV_PIX_FMT_RGBA, 0, null, null, null);
ScalerWidth = Width;
ScalerHeight = Height;
}
}
public static void Uninitialize()
{
if (IsInitialized)
{
ffmpeg.av_frame_unref(Frame);
ffmpeg.av_free(Frame);
ffmpeg.avcodec_close(Context);
FreeScaler();
IsInitialized = false;
}
}
private static void FreeScaler()
{
if (ScalerCtx != null)
{
ffmpeg.sws_freeContext(ScalerCtx);
ScalerCtx = null;
}
}
}
}

View file

@ -0,0 +1,14 @@
namespace Ryujinx.Graphics.VDec
{
unsafe struct FFmpegFrame
{
public int Width;
public int Height;
public byte* LumaPtr;
public byte* ChromaBPtr;
public byte* ChromaRPtr;
public byte[] Data;
}
}

View file

@ -0,0 +1,79 @@
using System.IO;
namespace Ryujinx.Graphics.VDec
{
class H264BitStreamWriter : BitStreamWriter
{
public H264BitStreamWriter(Stream BaseStream) : base(BaseStream) { }
public void WriteU(int Value, int ValueSize)
{
WriteBits(Value, ValueSize);
}
public void WriteSe(int Value)
{
WriteExpGolombCodedInt(Value);
}
public void WriteUe(int Value)
{
WriteExpGolombCodedUInt((uint)Value);
}
public void End()
{
WriteBit(true);
Flush();
}
private void WriteExpGolombCodedInt(int Value)
{
int Sign = Value <= 0 ? 0 : 1;
if (Value < 0)
{
Value = -Value;
}
Value = (Value << 1) - Sign;
WriteExpGolombCodedUInt((uint)Value);
}
private void WriteExpGolombCodedUInt(uint Value)
{
int Size = 32 - CountLeadingZeros((int)Value + 1);
WriteBits(1, Size);
Value -= (1u << (Size - 1)) - 1;
WriteBits((int)Value, Size - 1);
}
private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
private static int CountLeadingZeros(int Value)
{
if (Value == 0)
{
return 32;
}
int NibbleIdx = 32;
int PreCount, Count = 0;
do
{
NibbleIdx -= 4;
PreCount = ClzNibbleTbl[(Value >> NibbleIdx) & 0b1111];
Count += PreCount;
}
while (PreCount == 4);
return Count;
}
}
}

View file

@ -0,0 +1,238 @@
using System.IO;
namespace Ryujinx.Graphics.VDec
{
class H264Decoder
{
private int Log2MaxPicOrderCntLsbMinus4;
private bool DeltaPicOrderAlwaysZeroFlag;
private bool FrameMbsOnlyFlag;
private int PicWidthInMbs;
private int PicHeightInMapUnits;
private bool EntropyCodingModeFlag;
private bool BottomFieldPicOrderInFramePresentFlag;
private int NumRefIdxL0DefaultActiveMinus1;
private int NumRefIdxL1DefaultActiveMinus1;
private bool DeblockingFilterControlPresentFlag;
private bool RedundantPicCntPresentFlag;
private bool Transform8x8ModeFlag;
private bool MbAdaptiveFrameFieldFlag;
private bool Direct8x8InferenceFlag;
private bool WeightedPredFlag;
private bool ConstrainedIntraPredFlag;
private bool FieldPicFlag;
private bool BottomFieldFlag;
private int Log2MaxFrameNumMinus4;
private int ChromaFormatIdc;
private int PicOrderCntType;
private int PicInitQpMinus26;
private int ChromaQpIndexOffset;
private int ChromaQpIndexOffset2;
private int WeightedBipredIdc;
private int FrameNumber;
private byte[] ScalingMatrix4;
private byte[] ScalingMatrix8;
public void Decode(H264ParameterSets Params, H264Matrices Matrices, byte[] FrameData)
{
Log2MaxPicOrderCntLsbMinus4 = Params.Log2MaxPicOrderCntLsbMinus4;
DeltaPicOrderAlwaysZeroFlag = Params.DeltaPicOrderAlwaysZeroFlag;
FrameMbsOnlyFlag = Params.FrameMbsOnlyFlag;
PicWidthInMbs = Params.PicWidthInMbs;
PicHeightInMapUnits = Params.PicHeightInMapUnits;
EntropyCodingModeFlag = Params.EntropyCodingModeFlag;
BottomFieldPicOrderInFramePresentFlag = Params.BottomFieldPicOrderInFramePresentFlag;
NumRefIdxL0DefaultActiveMinus1 = Params.NumRefIdxL0DefaultActiveMinus1;
NumRefIdxL1DefaultActiveMinus1 = Params.NumRefIdxL1DefaultActiveMinus1;
DeblockingFilterControlPresentFlag = Params.DeblockingFilterControlPresentFlag;
RedundantPicCntPresentFlag = Params.RedundantPicCntPresentFlag;
Transform8x8ModeFlag = Params.Transform8x8ModeFlag;
MbAdaptiveFrameFieldFlag = ((Params.Flags >> 0) & 1) != 0;
Direct8x8InferenceFlag = ((Params.Flags >> 1) & 1) != 0;
WeightedPredFlag = ((Params.Flags >> 2) & 1) != 0;
ConstrainedIntraPredFlag = ((Params.Flags >> 3) & 1) != 0;
FieldPicFlag = ((Params.Flags >> 5) & 1) != 0;
BottomFieldFlag = ((Params.Flags >> 6) & 1) != 0;
Log2MaxFrameNumMinus4 = (int)(Params.Flags >> 8) & 0xf;
ChromaFormatIdc = (int)(Params.Flags >> 12) & 0x3;
PicOrderCntType = (int)(Params.Flags >> 14) & 0x3;
PicInitQpMinus26 = (int)(Params.Flags >> 16) & 0x3f;
ChromaQpIndexOffset = (int)(Params.Flags >> 22) & 0x1f;
ChromaQpIndexOffset2 = (int)(Params.Flags >> 27) & 0x1f;
WeightedBipredIdc = (int)(Params.Flags >> 32) & 0x3;
FrameNumber = (int)(Params.Flags >> 46) & 0x1ffff;
PicInitQpMinus26 = (PicInitQpMinus26 << 26) >> 26;
ChromaQpIndexOffset = (ChromaQpIndexOffset << 27) >> 27;
ChromaQpIndexOffset2 = (ChromaQpIndexOffset2 << 27) >> 27;
ScalingMatrix4 = Matrices.ScalingMatrix4;
ScalingMatrix8 = Matrices.ScalingMatrix8;
if (FFmpegWrapper.IsInitialized)
{
FFmpegWrapper.DecodeFrame(FrameData);
}
else
{
FFmpegWrapper.H264Initialize();
FFmpegWrapper.DecodeFrame(DecoderHelper.Combine(EncodeHeader(), FrameData));
}
}
private byte[] EncodeHeader()
{
using (MemoryStream Data = new MemoryStream())
{
H264BitStreamWriter Writer = new H264BitStreamWriter(Data);
//Sequence Parameter Set.
Writer.WriteU(1, 24);
Writer.WriteU(0, 1);
Writer.WriteU(3, 2);
Writer.WriteU(7, 5);
Writer.WriteU(100, 8);
Writer.WriteU(0, 8);
Writer.WriteU(31, 8);
Writer.WriteUe(0);
Writer.WriteUe(ChromaFormatIdc);
if (ChromaFormatIdc == 3)
{
Writer.WriteBit(false);
}
Writer.WriteUe(0);
Writer.WriteUe(0);
Writer.WriteBit(false);
Writer.WriteBit(false); //Scaling matrix present flag
Writer.WriteUe(Log2MaxFrameNumMinus4);
Writer.WriteUe(PicOrderCntType);
if (PicOrderCntType == 0)
{
Writer.WriteUe(Log2MaxPicOrderCntLsbMinus4);
}
else if (PicOrderCntType == 1)
{
Writer.WriteBit(DeltaPicOrderAlwaysZeroFlag);
Writer.WriteSe(0);
Writer.WriteSe(0);
Writer.WriteUe(0);
}
int PicHeightInMbs = PicHeightInMapUnits / (FrameMbsOnlyFlag ? 1 : 2);
Writer.WriteUe(16);
Writer.WriteBit(false);
Writer.WriteUe(PicWidthInMbs - 1);
Writer.WriteUe(PicHeightInMbs - 1);
Writer.WriteBit(FrameMbsOnlyFlag);
if (!FrameMbsOnlyFlag)
{
Writer.WriteBit(MbAdaptiveFrameFieldFlag);
}
Writer.WriteBit(Direct8x8InferenceFlag);
Writer.WriteBit(false); //Frame cropping flag
Writer.WriteBit(false); //VUI parameter present flag
Writer.End();
//Picture Parameter Set.
Writer.WriteU(1, 24);
Writer.WriteU(0, 1);
Writer.WriteU(3, 2);
Writer.WriteU(8, 5);
Writer.WriteUe(0);
Writer.WriteUe(0);
Writer.WriteBit(EntropyCodingModeFlag);
Writer.WriteBit(false);
Writer.WriteUe(0);
Writer.WriteUe(NumRefIdxL0DefaultActiveMinus1);
Writer.WriteUe(NumRefIdxL1DefaultActiveMinus1);
Writer.WriteBit(WeightedPredFlag);
Writer.WriteU(WeightedBipredIdc, 2);
Writer.WriteSe(PicInitQpMinus26);
Writer.WriteSe(0);
Writer.WriteSe(ChromaQpIndexOffset);
Writer.WriteBit(DeblockingFilterControlPresentFlag);
Writer.WriteBit(ConstrainedIntraPredFlag);
Writer.WriteBit(RedundantPicCntPresentFlag);
Writer.WriteBit(Transform8x8ModeFlag);
Writer.WriteBit(true);
for (int Index = 0; Index < 6; Index++)
{
Writer.WriteBit(true);
WriteScalingList(Writer, ScalingMatrix4, Index * 16, 16);
}
if (Transform8x8ModeFlag)
{
for (int Index = 0; Index < 2; Index++)
{
Writer.WriteBit(true);
WriteScalingList(Writer, ScalingMatrix8, Index * 64, 64);
}
}
Writer.WriteSe(ChromaQpIndexOffset2);
Writer.End();
return Data.ToArray();
}
}
//ZigZag LUTs from libavcodec.
private static readonly byte[] ZigZagDirect = new byte[]
{
0, 1, 8, 16, 9, 2, 3, 10,
17, 24, 32, 25, 18, 11, 4, 5,
12, 19, 26, 33, 40, 48, 41, 34,
27, 20, 13, 6, 7, 14, 21, 28,
35, 42, 49, 56, 57, 50, 43, 36,
29, 22, 15, 23, 30, 37, 44, 51,
58, 59, 52, 45, 38, 31, 39, 46,
53, 60, 61, 54, 47, 55, 62, 63
};
private static readonly byte[] ZigZagScan = new byte[]
{
0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4,
1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4,
3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4
};
private static void WriteScalingList(H264BitStreamWriter Writer, byte[] List, int Start, int Count)
{
byte[] Scan = Count == 16 ? ZigZagScan : ZigZagDirect;
int LastScale = 8;
for (int Index = 0; Index < Count; Index++)
{
byte Value = List[Start + Scan[Index]];
int DeltaScale = Value - LastScale;
Writer.WriteSe(DeltaScale);
LastScale = Value;
}
}
}
}

View file

@ -0,0 +1,8 @@
namespace Ryujinx.Graphics.VDec
{
struct H264Matrices
{
public byte[] ScalingMatrix4;
public byte[] ScalingMatrix8;
}
}

View file

@ -0,0 +1,34 @@
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.VDec
{
[StructLayout(LayoutKind.Sequential, Pack = 4)]
struct H264ParameterSets
{
public int Log2MaxPicOrderCntLsbMinus4;
public bool DeltaPicOrderAlwaysZeroFlag;
public bool FrameMbsOnlyFlag;
public int PicWidthInMbs;
public int PicHeightInMapUnits;
public int Reserved6C;
public bool EntropyCodingModeFlag;
public bool BottomFieldPicOrderInFramePresentFlag;
public int NumRefIdxL0DefaultActiveMinus1;
public int NumRefIdxL1DefaultActiveMinus1;
public bool DeblockingFilterControlPresentFlag;
public bool RedundantPicCntPresentFlag;
public bool Transform8x8ModeFlag;
public int Unknown8C;
public int Unknown90;
public int Reserved94;
public int Unknown98;
public int Reserved9C;
public int ReservedA0;
public int UnknownA4;
public int ReservedA8;
public int UnknownAC;
public long Flags;
public int FrameNumber;
public int FrameNumber2;
}
}

View file

@ -0,0 +1,10 @@
namespace Ryujinx.Graphics.VDec
{
enum VideoCodec
{
H264 = 3,
Vp8 = 5,
H265 = 7,
Vp9 = 9
}
}

View file

@ -0,0 +1,280 @@
using ChocolArm64.Memory;
using Ryujinx.Graphics.Gal;
using Ryujinx.Graphics.Memory;
using Ryujinx.Graphics.Texture;
using Ryujinx.Graphics.Vic;
using System;
namespace Ryujinx.Graphics.VDec
{
unsafe class VideoDecoder
{
private NvGpu Gpu;
private H264Decoder H264Decoder;
private Vp9Decoder Vp9Decoder;
private VideoCodec CurrentVideoCodec;
private long DecoderContextAddress;
private long FrameDataAddress;
private long VpxCurrLumaAddress;
private long VpxRef0LumaAddress;
private long VpxRef1LumaAddress;
private long VpxRef2LumaAddress;
private long VpxCurrChromaAddress;
private long VpxRef0ChromaAddress;
private long VpxRef1ChromaAddress;
private long VpxRef2ChromaAddress;
private long VpxProbTablesAddress;
public VideoDecoder(NvGpu Gpu)
{
this.Gpu = Gpu;
H264Decoder = new H264Decoder();
Vp9Decoder = new Vp9Decoder();
}
public void Process(NvGpuVmm Vmm, int MethodOffset, int[] Arguments)
{
VideoDecoderMeth Method = (VideoDecoderMeth)MethodOffset;
switch (Method)
{
case VideoDecoderMeth.SetVideoCodec: SetVideoCodec (Vmm, Arguments); break;
case VideoDecoderMeth.Execute: Execute (Vmm, Arguments); break;
case VideoDecoderMeth.SetDecoderCtxAddr: SetDecoderCtxAddr (Vmm, Arguments); break;
case VideoDecoderMeth.SetFrameDataAddr: SetFrameDataAddr (Vmm, Arguments); break;
case VideoDecoderMeth.SetVpxCurrLumaAddr: SetVpxCurrLumaAddr (Vmm, Arguments); break;
case VideoDecoderMeth.SetVpxRef0LumaAddr: SetVpxRef0LumaAddr (Vmm, Arguments); break;
case VideoDecoderMeth.SetVpxRef1LumaAddr: SetVpxRef1LumaAddr (Vmm, Arguments); break;
case VideoDecoderMeth.SetVpxRef2LumaAddr: SetVpxRef2LumaAddr (Vmm, Arguments); break;
case VideoDecoderMeth.SetVpxCurrChromaAddr: SetVpxCurrChromaAddr(Vmm, Arguments); break;
case VideoDecoderMeth.SetVpxRef0ChromaAddr: SetVpxRef0ChromaAddr(Vmm, Arguments); break;
case VideoDecoderMeth.SetVpxRef1ChromaAddr: SetVpxRef1ChromaAddr(Vmm, Arguments); break;
case VideoDecoderMeth.SetVpxRef2ChromaAddr: SetVpxRef2ChromaAddr(Vmm, Arguments); break;
case VideoDecoderMeth.SetVpxProbTablesAddr: SetVpxProbTablesAddr(Vmm, Arguments); break;
}
}
private void SetVideoCodec(NvGpuVmm Vmm, int[] Arguments)
{
CurrentVideoCodec = (VideoCodec)Arguments[0];
}
private void Execute(NvGpuVmm Vmm, int[] Arguments)
{
if (CurrentVideoCodec == VideoCodec.H264)
{
int FrameDataSize = Vmm.ReadInt32(DecoderContextAddress + 0x48);
H264ParameterSets Params = MemoryHelper.Read<H264ParameterSets>(Vmm.Memory, Vmm.GetPhysicalAddress(DecoderContextAddress + 0x58));
H264Matrices Matrices = new H264Matrices()
{
ScalingMatrix4 = Vmm.ReadBytes(DecoderContextAddress + 0x1c0, 6 * 16),
ScalingMatrix8 = Vmm.ReadBytes(DecoderContextAddress + 0x220, 2 * 64)
};
byte[] FrameData = Vmm.ReadBytes(FrameDataAddress, FrameDataSize);
H264Decoder.Decode(Params, Matrices, FrameData);
}
else if (CurrentVideoCodec == VideoCodec.Vp9)
{
int FrameDataSize = Vmm.ReadInt32(DecoderContextAddress + 0x30);
Vp9FrameKeys Keys = new Vp9FrameKeys()
{
CurrKey = Vmm.GetPhysicalAddress(VpxCurrLumaAddress),
Ref0Key = Vmm.GetPhysicalAddress(VpxRef0LumaAddress),
Ref1Key = Vmm.GetPhysicalAddress(VpxRef1LumaAddress),
Ref2Key = Vmm.GetPhysicalAddress(VpxRef2LumaAddress)
};
Vp9FrameHeader Header = MemoryHelper.Read<Vp9FrameHeader>(Vmm.Memory, Vmm.GetPhysicalAddress(DecoderContextAddress + 0x48));
Vp9ProbabilityTables Probs = new Vp9ProbabilityTables()
{
SegmentationTreeProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x387, 0x7),
SegmentationPredProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x38e, 0x3),
Tx8x8Probs = Vmm.ReadBytes(VpxProbTablesAddress + 0x470, 0x2),
Tx16x16Probs = Vmm.ReadBytes(VpxProbTablesAddress + 0x472, 0x4),
Tx32x32Probs = Vmm.ReadBytes(VpxProbTablesAddress + 0x476, 0x6),
CoefProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x5a0, 0x900),
SkipProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x537, 0x3),
InterModeProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x400, 0x1c),
InterpFilterProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x52a, 0x8),
IsInterProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x41c, 0x4),
CompModeProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x532, 0x5),
SingleRefProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x580, 0xa),
CompRefProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x58a, 0x5),
YModeProbs0 = Vmm.ReadBytes(VpxProbTablesAddress + 0x480, 0x20),
YModeProbs1 = Vmm.ReadBytes(VpxProbTablesAddress + 0x47c, 0x4),
PartitionProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x4e0, 0x40),
MvJointProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x53b, 0x3),
MvSignProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x53e, 0x3),
MvClassProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x54c, 0x14),
MvClass0BitProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x540, 0x3),
MvBitsProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x56c, 0x14),
MvClass0FrProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x560, 0xc),
MvFrProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x542, 0x6),
MvClass0HpProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x548, 0x2),
MvHpProbs = Vmm.ReadBytes(VpxProbTablesAddress + 0x54a, 0x2)
};
byte[] FrameData = Vmm.ReadBytes(FrameDataAddress, FrameDataSize);
Vp9Decoder.Decode(Keys, Header, Probs, FrameData);
}
else
{
ThrowUnimplementedCodec();
}
}
private void SetDecoderCtxAddr(NvGpuVmm Vmm, int[] Arguments)
{
DecoderContextAddress = GetAddress(Arguments);
}
private void SetFrameDataAddr(NvGpuVmm Vmm, int[] Arguments)
{
FrameDataAddress = GetAddress(Arguments);
}
private void SetVpxCurrLumaAddr(NvGpuVmm Vmm, int[] Arguments)
{
VpxCurrLumaAddress = GetAddress(Arguments);
}
private void SetVpxRef0LumaAddr(NvGpuVmm Vmm, int[] Arguments)
{
VpxRef0LumaAddress = GetAddress(Arguments);
}
private void SetVpxRef1LumaAddr(NvGpuVmm Vmm, int[] Arguments)
{
VpxRef1LumaAddress = GetAddress(Arguments);
}
private void SetVpxRef2LumaAddr(NvGpuVmm Vmm, int[] Arguments)
{
VpxRef2LumaAddress = GetAddress(Arguments);
}
private void SetVpxCurrChromaAddr(NvGpuVmm Vmm, int[] Arguments)
{
VpxCurrChromaAddress = GetAddress(Arguments);
}
private void SetVpxRef0ChromaAddr(NvGpuVmm Vmm, int[] Arguments)
{
VpxRef0ChromaAddress = GetAddress(Arguments);
}
private void SetVpxRef1ChromaAddr(NvGpuVmm Vmm, int[] Arguments)
{
VpxRef1ChromaAddress = GetAddress(Arguments);
}
private void SetVpxRef2ChromaAddr(NvGpuVmm Vmm, int[] Arguments)
{
VpxRef2ChromaAddress = GetAddress(Arguments);
}
private void SetVpxProbTablesAddr(NvGpuVmm Vmm, int[] Arguments)
{
VpxProbTablesAddress = GetAddress(Arguments);
}
private static long GetAddress(int[] Arguments)
{
return (long)(uint)Arguments[0] << 8;
}
internal void CopyPlanes(NvGpuVmm Vmm, SurfaceOutputConfig OutputConfig)
{
switch (OutputConfig.PixelFormat)
{
case SurfacePixelFormat.RGBA8: CopyPlanesRgba8 (Vmm, OutputConfig); break;
case SurfacePixelFormat.YUV420P: CopyPlanesYuv420p(Vmm, OutputConfig); break;
default: ThrowUnimplementedPixelFormat(OutputConfig.PixelFormat); break;
}
}
private void CopyPlanesRgba8(NvGpuVmm Vmm, SurfaceOutputConfig OutputConfig)
{
FFmpegFrame Frame = FFmpegWrapper.GetFrameRgba();
if ((Frame.Width | Frame.Height) == 0)
{
return;
}
GalImage Image = new GalImage(
OutputConfig.SurfaceWidth,
OutputConfig.SurfaceHeight, 1,
OutputConfig.GobBlockHeight,
GalMemoryLayout.BlockLinear,
GalImageFormat.RGBA8 | GalImageFormat.Unorm);
ImageUtils.WriteTexture(Vmm, Image, Vmm.GetPhysicalAddress(OutputConfig.SurfaceLumaAddress), Frame.Data);
}
private void CopyPlanesYuv420p(NvGpuVmm Vmm, SurfaceOutputConfig OutputConfig)
{
FFmpegFrame Frame = FFmpegWrapper.GetFrame();
if ((Frame.Width | Frame.Height) == 0)
{
return;
}
int HalfSrcWidth = Frame.Width / 2;
int HalfWidth = Frame.Width / 2;
int HalfHeight = Frame.Height / 2;
int AlignedWidth = (OutputConfig.SurfaceWidth + 0xff) & ~0xff;
for (int Y = 0; Y < Frame.Height; Y++)
{
int Src = Y * Frame.Width;
int Dst = Y * AlignedWidth;
int Size = Frame.Width;
for (int Offset = 0; Offset < Size; Offset++)
{
Vmm.WriteByte(OutputConfig.SurfaceLumaAddress + Dst + Offset, *(Frame.LumaPtr + Src + Offset));
}
}
//Copy chroma data from both channels with interleaving.
for (int Y = 0; Y < HalfHeight; Y++)
{
int Src = Y * HalfSrcWidth;
int Dst = Y * AlignedWidth;
for (int X = 0; X < HalfWidth; X++)
{
Vmm.WriteByte(OutputConfig.SurfaceChromaUAddress + Dst + X * 2 + 0, *(Frame.ChromaBPtr + Src + X));
Vmm.WriteByte(OutputConfig.SurfaceChromaUAddress + Dst + X * 2 + 1, *(Frame.ChromaRPtr + Src + X));
}
}
}
private void ThrowUnimplementedCodec()
{
throw new NotImplementedException("Codec \"" + CurrentVideoCodec + "\" is not supported!");
}
private void ThrowUnimplementedPixelFormat(SurfacePixelFormat PixelFormat)
{
throw new NotImplementedException("Pixel format \"" + PixelFormat + "\" is not supported!");
}
}
}

View file

@ -0,0 +1,19 @@
namespace Ryujinx.Graphics.VDec
{
enum VideoDecoderMeth
{
SetVideoCodec = 0x80,
Execute = 0xc0,
SetDecoderCtxAddr = 0x101,
SetFrameDataAddr = 0x102,
SetVpxRef0LumaAddr = 0x10c,
SetVpxRef1LumaAddr = 0x10d,
SetVpxRef2LumaAddr = 0x10e,
SetVpxCurrLumaAddr = 0x10f,
SetVpxRef0ChromaAddr = 0x11d,
SetVpxRef1ChromaAddr = 0x11e,
SetVpxRef2ChromaAddr = 0x11f,
SetVpxCurrChromaAddr = 0x120,
SetVpxProbTablesAddr = 0x170
}
}

View file

@ -0,0 +1,879 @@
using System.Collections.Generic;
using System.IO;
namespace Ryujinx.Graphics.VDec
{
class Vp9Decoder
{
private const int DiffUpdateProbability = 252;
private const int FrameSyncCode = 0x498342;
private static readonly int[] MapLut = new int[]
{
20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34,
35, 36, 37, 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
2, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 3, 62, 63,
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 4, 74, 75, 76, 77, 78,
79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88, 89, 90, 91, 92, 93,
94, 95, 96, 97, 6, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
109, 7, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 8, 122,
123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 9, 134, 135, 136, 137,
138, 139, 140, 141, 142, 143, 144, 145, 10, 146, 147, 148, 149, 150, 151, 152,
153, 154, 155, 156, 157, 11, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
168, 169, 12, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 13,
182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 14, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211,
212, 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226,
227, 228, 229, 17, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241,
18, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19
};
private byte[] DefaultTx8x8Probs = new byte[] { 100, 66 };
private byte[] DefaultTx16x16Probs = new byte[] { 20, 152, 15, 101 };
private byte[] DefaultTx32x32Probs = new byte[] { 3, 136, 37, 5, 52, 13 };
private byte[] DefaultCoefProbs = new byte[]
{
195, 29, 183, 0, 84, 49, 136, 0, 8, 42, 71, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 31, 107, 169, 0, 35, 99, 159, 0,
17, 82, 140, 0, 8, 66, 114, 0, 2, 44, 76, 0, 1, 19, 32, 0,
40, 132, 201, 0, 29, 114, 187, 0, 13, 91, 157, 0, 7, 75, 127, 0,
3, 58, 95, 0, 1, 28, 47, 0, 69, 142, 221, 0, 42, 122, 201, 0,
15, 91, 159, 0, 6, 67, 121, 0, 1, 42, 77, 0, 1, 17, 31, 0,
102, 148, 228, 0, 67, 117, 204, 0, 17, 82, 154, 0, 6, 59, 114, 0,
2, 39, 75, 0, 1, 15, 29, 0, 156, 57, 233, 0, 119, 57, 212, 0,
58, 48, 163, 0, 29, 40, 124, 0, 12, 30, 81, 0, 3, 12, 31, 0,
191, 107, 226, 0, 124, 117, 204, 0, 25, 99, 155, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 29, 148, 210, 0, 37, 126, 194, 0,
8, 93, 157, 0, 2, 68, 118, 0, 1, 39, 69, 0, 1, 17, 33, 0,
41, 151, 213, 0, 27, 123, 193, 0, 3, 82, 144, 0, 1, 58, 105, 0,
1, 32, 60, 0, 1, 13, 26, 0, 59, 159, 220, 0, 23, 126, 198, 0,
4, 88, 151, 0, 1, 66, 114, 0, 1, 38, 71, 0, 1, 18, 34, 0,
114, 136, 232, 0, 51, 114, 207, 0, 11, 83, 155, 0, 3, 56, 105, 0,
1, 33, 65, 0, 1, 17, 34, 0, 149, 65, 234, 0, 121, 57, 215, 0,
61, 49, 166, 0, 28, 36, 114, 0, 12, 25, 76, 0, 3, 16, 42, 0,
214, 49, 220, 0, 132, 63, 188, 0, 42, 65, 137, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 85, 137, 221, 0, 104, 131, 216, 0,
49, 111, 192, 0, 21, 87, 155, 0, 2, 49, 87, 0, 1, 16, 28, 0,
89, 163, 230, 0, 90, 137, 220, 0, 29, 100, 183, 0, 10, 70, 135, 0,
2, 42, 81, 0, 1, 17, 33, 0, 108, 167, 237, 0, 55, 133, 222, 0,
15, 97, 179, 0, 4, 72, 135, 0, 1, 45, 85, 0, 1, 19, 38, 0,
124, 146, 240, 0, 66, 124, 224, 0, 17, 88, 175, 0, 4, 58, 122, 0,
1, 36, 75, 0, 1, 18, 37, 0, 141, 79, 241, 0, 126, 70, 227, 0,
66, 58, 182, 0, 30, 44, 136, 0, 12, 34, 96, 0, 2, 20, 47, 0,
229, 99, 249, 0, 143, 111, 235, 0, 46, 109, 192, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 82, 158, 236, 0, 94, 146, 224, 0,
25, 117, 191, 0, 9, 87, 149, 0, 3, 56, 99, 0, 1, 33, 57, 0,
83, 167, 237, 0, 68, 145, 222, 0, 10, 103, 177, 0, 2, 72, 131, 0,
1, 41, 79, 0, 1, 20, 39, 0, 99, 167, 239, 0, 47, 141, 224, 0,
10, 104, 178, 0, 2, 73, 133, 0, 1, 44, 85, 0, 1, 22, 47, 0,
127, 145, 243, 0, 71, 129, 228, 0, 17, 93, 177, 0, 3, 61, 124, 0,
1, 41, 84, 0, 1, 21, 52, 0, 157, 78, 244, 0, 140, 72, 231, 0,
69, 58, 184, 0, 31, 44, 137, 0, 14, 38, 105, 0, 8, 23, 61, 0,
125, 34, 187, 0, 52, 41, 133, 0, 6, 31, 56, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 37, 109, 153, 0, 51, 102, 147, 0,
23, 87, 128, 0, 8, 67, 101, 0, 1, 41, 63, 0, 1, 19, 29, 0,
31, 154, 185, 0, 17, 127, 175, 0, 6, 96, 145, 0, 2, 73, 114, 0,
1, 51, 82, 0, 1, 28, 45, 0, 23, 163, 200, 0, 10, 131, 185, 0,
2, 93, 148, 0, 1, 67, 111, 0, 1, 41, 69, 0, 1, 14, 24, 0,
29, 176, 217, 0, 12, 145, 201, 0, 3, 101, 156, 0, 1, 69, 111, 0,
1, 39, 63, 0, 1, 14, 23, 0, 57, 192, 233, 0, 25, 154, 215, 0,
6, 109, 167, 0, 3, 78, 118, 0, 1, 48, 69, 0, 1, 21, 29, 0,
202, 105, 245, 0, 108, 106, 216, 0, 18, 90, 144, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 33, 172, 219, 0, 64, 149, 206, 0,
14, 117, 177, 0, 5, 90, 141, 0, 2, 61, 95, 0, 1, 37, 57, 0,
33, 179, 220, 0, 11, 140, 198, 0, 1, 89, 148, 0, 1, 60, 104, 0,
1, 33, 57, 0, 1, 12, 21, 0, 30, 181, 221, 0, 8, 141, 198, 0,
1, 87, 145, 0, 1, 58, 100, 0, 1, 31, 55, 0, 1, 12, 20, 0,
32, 186, 224, 0, 7, 142, 198, 0, 1, 86, 143, 0, 1, 58, 100, 0,
1, 31, 55, 0, 1, 12, 22, 0, 57, 192, 227, 0, 20, 143, 204, 0,
3, 96, 154, 0, 1, 68, 112, 0, 1, 42, 69, 0, 1, 19, 32, 0,
212, 35, 215, 0, 113, 47, 169, 0, 29, 48, 105, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 74, 129, 203, 0, 106, 120, 203, 0,
49, 107, 178, 0, 19, 84, 144, 0, 4, 50, 84, 0, 1, 15, 25, 0,
71, 172, 217, 0, 44, 141, 209, 0, 15, 102, 173, 0, 6, 76, 133, 0,
2, 51, 89, 0, 1, 24, 42, 0, 64, 185, 231, 0, 31, 148, 216, 0,
8, 103, 175, 0, 3, 74, 131, 0, 1, 46, 81, 0, 1, 18, 30, 0,
65, 196, 235, 0, 25, 157, 221, 0, 5, 105, 174, 0, 1, 67, 120, 0,
1, 38, 69, 0, 1, 15, 30, 0, 65, 204, 238, 0, 30, 156, 224, 0,
7, 107, 177, 0, 2, 70, 124, 0, 1, 42, 73, 0, 1, 18, 34, 0,
225, 86, 251, 0, 144, 104, 235, 0, 42, 99, 181, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 85, 175, 239, 0, 112, 165, 229, 0,
29, 136, 200, 0, 12, 103, 162, 0, 6, 77, 123, 0, 2, 53, 84, 0,
75, 183, 239, 0, 30, 155, 221, 0, 3, 106, 171, 0, 1, 74, 128, 0,
1, 44, 76, 0, 1, 17, 28, 0, 73, 185, 240, 0, 27, 159, 222, 0,
2, 107, 172, 0, 1, 75, 127, 0, 1, 42, 73, 0, 1, 17, 29, 0,
62, 190, 238, 0, 21, 159, 222, 0, 2, 107, 172, 0, 1, 72, 122, 0,
1, 40, 71, 0, 1, 18, 32, 0, 61, 199, 240, 0, 27, 161, 226, 0,
4, 113, 180, 0, 1, 76, 129, 0, 1, 46, 80, 0, 1, 23, 41, 0,
7, 27, 153, 0, 5, 30, 95, 0, 1, 16, 30, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 50, 75, 127, 0, 57, 75, 124, 0,
27, 67, 108, 0, 10, 54, 86, 0, 1, 33, 52, 0, 1, 12, 18, 0,
43, 125, 151, 0, 26, 108, 148, 0, 7, 83, 122, 0, 2, 59, 89, 0,
1, 38, 60, 0, 1, 17, 27, 0, 23, 144, 163, 0, 13, 112, 154, 0,
2, 75, 117, 0, 1, 50, 81, 0, 1, 31, 51, 0, 1, 14, 23, 0,
18, 162, 185, 0, 6, 123, 171, 0, 1, 78, 125, 0, 1, 51, 86, 0,
1, 31, 54, 0, 1, 14, 23, 0, 15, 199, 227, 0, 3, 150, 204, 0,
1, 91, 146, 0, 1, 55, 95, 0, 1, 30, 53, 0, 1, 11, 20, 0,
19, 55, 240, 0, 19, 59, 196, 0, 3, 52, 105, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 41, 166, 207, 0, 104, 153, 199, 0,
31, 123, 181, 0, 14, 101, 152, 0, 5, 72, 106, 0, 1, 36, 52, 0,
35, 176, 211, 0, 12, 131, 190, 0, 2, 88, 144, 0, 1, 60, 101, 0,
1, 36, 60, 0, 1, 16, 28, 0, 28, 183, 213, 0, 8, 134, 191, 0,
1, 86, 142, 0, 1, 56, 96, 0, 1, 30, 53, 0, 1, 12, 20, 0,
20, 190, 215, 0, 4, 135, 192, 0, 1, 84, 139, 0, 1, 53, 91, 0,
1, 28, 49, 0, 1, 11, 20, 0, 13, 196, 216, 0, 2, 137, 192, 0,
1, 86, 143, 0, 1, 57, 99, 0, 1, 32, 56, 0, 1, 13, 24, 0,
211, 29, 217, 0, 96, 47, 156, 0, 22, 43, 87, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 78, 120, 193, 0, 111, 116, 186, 0,
46, 102, 164, 0, 15, 80, 128, 0, 2, 49, 76, 0, 1, 18, 28, 0,
71, 161, 203, 0, 42, 132, 192, 0, 10, 98, 150, 0, 3, 69, 109, 0,
1, 44, 70, 0, 1, 18, 29, 0, 57, 186, 211, 0, 30, 140, 196, 0,
4, 93, 146, 0, 1, 62, 102, 0, 1, 38, 65, 0, 1, 16, 27, 0,
47, 199, 217, 0, 14, 145, 196, 0, 1, 88, 142, 0, 1, 57, 98, 0,
1, 36, 62, 0, 1, 15, 26, 0, 26, 219, 229, 0, 5, 155, 207, 0,
1, 94, 151, 0, 1, 60, 104, 0, 1, 36, 62, 0, 1, 16, 28, 0,
233, 29, 248, 0, 146, 47, 220, 0, 43, 52, 140, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 100, 163, 232, 0, 179, 161, 222, 0,
63, 142, 204, 0, 37, 113, 174, 0, 26, 89, 137, 0, 18, 68, 97, 0,
85, 181, 230, 0, 32, 146, 209, 0, 7, 100, 164, 0, 3, 71, 121, 0,
1, 45, 77, 0, 1, 18, 30, 0, 65, 187, 230, 0, 20, 148, 207, 0,
2, 97, 159, 0, 1, 68, 116, 0, 1, 40, 70, 0, 1, 14, 29, 0,
40, 194, 227, 0, 8, 147, 204, 0, 1, 94, 155, 0, 1, 65, 112, 0,
1, 39, 66, 0, 1, 14, 26, 0, 16, 208, 228, 0, 3, 151, 207, 0,
1, 98, 160, 0, 1, 67, 117, 0, 1, 41, 74, 0, 1, 17, 31, 0,
17, 38, 140, 0, 7, 34, 80, 0, 1, 17, 29, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 37, 75, 128, 0, 41, 76, 128, 0,
26, 66, 116, 0, 12, 52, 94, 0, 2, 32, 55, 0, 1, 10, 16, 0,
50, 127, 154, 0, 37, 109, 152, 0, 16, 82, 121, 0, 5, 59, 85, 0,
1, 35, 54, 0, 1, 13, 20, 0, 40, 142, 167, 0, 17, 110, 157, 0,
2, 71, 112, 0, 1, 44, 72, 0, 1, 27, 45, 0, 1, 11, 17, 0,
30, 175, 188, 0, 9, 124, 169, 0, 1, 74, 116, 0, 1, 48, 78, 0,
1, 30, 49, 0, 1, 11, 18, 0, 10, 222, 223, 0, 2, 150, 194, 0,
1, 83, 128, 0, 1, 48, 79, 0, 1, 27, 45, 0, 1, 11, 17, 0,
36, 41, 235, 0, 29, 36, 193, 0, 10, 27, 111, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 85, 165, 222, 0, 177, 162, 215, 0,
110, 135, 195, 0, 57, 113, 168, 0, 23, 83, 120, 0, 10, 49, 61, 0,
85, 190, 223, 0, 36, 139, 200, 0, 5, 90, 146, 0, 1, 60, 103, 0,
1, 38, 65, 0, 1, 18, 30, 0, 72, 202, 223, 0, 23, 141, 199, 0,
2, 86, 140, 0, 1, 56, 97, 0, 1, 36, 61, 0, 1, 16, 27, 0,
55, 218, 225, 0, 13, 145, 200, 0, 1, 86, 141, 0, 1, 57, 99, 0,
1, 35, 61, 0, 1, 13, 22, 0, 15, 235, 212, 0, 1, 132, 184, 0,
1, 84, 139, 0, 1, 57, 97, 0, 1, 34, 56, 0, 1, 14, 23, 0,
181, 21, 201, 0, 61, 37, 123, 0, 10, 38, 71, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 47, 106, 172, 0, 95, 104, 173, 0,
42, 93, 159, 0, 18, 77, 131, 0, 4, 50, 81, 0, 1, 17, 23, 0,
62, 147, 199, 0, 44, 130, 189, 0, 28, 102, 154, 0, 18, 75, 115, 0,
2, 44, 65, 0, 1, 12, 19, 0, 55, 153, 210, 0, 24, 130, 194, 0,
3, 93, 146, 0, 1, 61, 97, 0, 1, 31, 50, 0, 1, 10, 16, 0,
49, 186, 223, 0, 17, 148, 204, 0, 1, 96, 142, 0, 1, 53, 83, 0,
1, 26, 44, 0, 1, 11, 17, 0, 13, 217, 212, 0, 2, 136, 180, 0,
1, 78, 124, 0, 1, 50, 83, 0, 1, 29, 49, 0, 1, 14, 23, 0,
197, 13, 247, 0, 82, 17, 222, 0, 25, 17, 162, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 126, 186, 247, 0, 234, 191, 243, 0,
176, 177, 234, 0, 104, 158, 220, 0, 66, 128, 186, 0, 55, 90, 137, 0,
111, 197, 242, 0, 46, 158, 219, 0, 9, 104, 171, 0, 2, 65, 125, 0,
1, 44, 80, 0, 1, 17, 91, 0, 104, 208, 245, 0, 39, 168, 224, 0,
3, 109, 162, 0, 1, 79, 124, 0, 1, 50, 102, 0, 1, 43, 102, 0,
84, 220, 246, 0, 31, 177, 231, 0, 2, 115, 180, 0, 1, 79, 134, 0,
1, 55, 77, 0, 1, 60, 79, 0, 43, 243, 240, 0, 8, 180, 217, 0,
1, 115, 166, 0, 1, 84, 121, 0, 1, 51, 67, 0, 1, 16, 6, 0
};
private byte[] DefaultSkipProbs = new byte[] { 192, 128, 64 };
private byte[] DefaultInterModeProbs = new byte[]
{
2, 173, 34, 0, 7, 145, 85, 0, 7, 166, 63, 0, 7, 94, 66, 0,
8, 64, 46, 0, 17, 81, 31, 0, 25, 29, 30, 0
};
private byte[] DefaultInterpFilterProbs = new byte[]
{
235, 162, 36, 255, 34, 3, 149, 144
};
private byte[] DefaultIsInterProbs = new byte[] { 9, 102, 187, 225 };
private byte[] DefaultCompModeProbs = new byte[] { 239, 183, 119, 96, 41 };
private byte[] DefaultSingleRefProbs = new byte[]
{
33, 16, 77, 74, 142, 142, 172, 170, 238, 247
};
private byte[] DefaultCompRefProbs = new byte[] { 50, 126, 123, 221, 226 };
private byte[] DefaultYModeProbs0 = new byte[]
{
65, 32, 18, 144, 162, 194, 41, 51, 132, 68, 18, 165, 217, 196, 45, 40,
173, 80, 19, 176, 240, 193, 64, 35, 221, 135, 38, 194, 248, 121, 96, 85
};
private byte[] DefaultYModeProbs1 = new byte[] { 98, 78, 46, 29 };
private byte[] DefaultPartitionProbs = new byte[]
{
199, 122, 141, 0, 147, 63, 159, 0, 148, 133, 118, 0, 121, 104, 114, 0,
174, 73, 87, 0, 92, 41, 83, 0, 82, 99, 50, 0, 53, 39, 39, 0,
177, 58, 59, 0, 68, 26, 63, 0, 52, 79, 25, 0, 17, 14, 12, 0,
222, 34, 30, 0, 72, 16, 44, 0, 58, 32, 12, 0, 10, 7, 6, 0
};
private byte[] DefaultMvJointProbs = new byte[] { 32, 64, 96 };
private byte[] DefaultMvSignProbs = new byte[] { 128, 128 };
private byte[] DefaultMvClassProbs = new byte[]
{
224, 144, 192, 168, 192, 176, 192, 198, 198, 245, 216, 128, 176, 160, 176, 176,
192, 198, 198, 208
};
private byte[] DefaultMvClass0BitProbs = new byte[] { 216, 208 };
private byte[] DefaultMvBitsProbs = new byte[]
{
136, 140, 148, 160, 176, 192, 224, 234, 234, 240, 136, 140, 148, 160, 176, 192,
224, 234, 234, 240
};
private byte[] DefaultMvClass0FrProbs = new byte[]
{
128, 128, 64, 96, 112, 64, 128, 128, 64, 96, 112, 64
};
private byte[] DefaultMvFrProbs = new byte[] { 64, 96, 64, 64, 96, 64 };
private byte[] DefaultMvClass0HpProbs = new byte[] { 160, 160 };
private byte[] DefaultMvHpProbs = new byte[] { 128, 128 };
private sbyte[] LoopFilterRefDeltas;
private sbyte[] LoopFilterModeDeltas;
private LinkedList<int> FrameSlotByLastUse;
private Dictionary<long, LinkedListNode<int>> CachedRefFrames;
public Vp9Decoder()
{
LoopFilterRefDeltas = new sbyte[4];
LoopFilterModeDeltas = new sbyte[2];
FrameSlotByLastUse = new LinkedList<int>();
for (int Slot = 0; Slot < 8; Slot++)
{
FrameSlotByLastUse.AddFirst(Slot);
}
CachedRefFrames = new Dictionary<long, LinkedListNode<int>>();
}
public void Decode(
Vp9FrameKeys Keys,
Vp9FrameHeader Header,
Vp9ProbabilityTables Probs,
byte[] FrameData)
{
bool IsKeyFrame = ((Header.Flags >> 0) & 1) != 0;
bool LastIsKeyFrame = ((Header.Flags >> 1) & 1) != 0;
bool FrameSizeChanged = ((Header.Flags >> 2) & 1) != 0;
bool ErrorResilientMode = ((Header.Flags >> 3) & 1) != 0;
bool LastShowFrame = ((Header.Flags >> 4) & 1) != 0;
bool IsFrameIntra = ((Header.Flags >> 5) & 1) != 0;
bool ShowFrame = !IsFrameIntra;
//Write compressed header.
byte[] CompressedHeaderData;
using (MemoryStream CompressedHeader = new MemoryStream())
{
VpxRangeEncoder Writer = new VpxRangeEncoder(CompressedHeader);
if (!Header.Lossless)
{
if ((uint)Header.TxMode >= 3)
{
Writer.Write(3, 2);
Writer.Write(Header.TxMode == 4);
}
else
{
Writer.Write(Header.TxMode, 2);
}
}
if (Header.TxMode == 4)
{
WriteProbabilityUpdate(Writer, Probs.Tx8x8Probs, DefaultTx8x8Probs);
WriteProbabilityUpdate(Writer, Probs.Tx16x16Probs, DefaultTx16x16Probs);
WriteProbabilityUpdate(Writer, Probs.Tx32x32Probs, DefaultTx32x32Probs);
}
WriteCoefProbabilityUpdate(Writer, Header.TxMode, Probs.CoefProbs, DefaultCoefProbs);
WriteProbabilityUpdate(Writer, Probs.SkipProbs, DefaultSkipProbs);
if (!IsFrameIntra)
{
WriteProbabilityUpdateAligned4(Writer, Probs.InterModeProbs, DefaultInterModeProbs);
if (Header.RawInterpolationFilter == 4)
{
WriteProbabilityUpdate(Writer, Probs.InterpFilterProbs, DefaultInterpFilterProbs);
}
WriteProbabilityUpdate(Writer, Probs.IsInterProbs, DefaultIsInterProbs);
if ((Header.RefFrameSignBias[1] & 1) != (Header.RefFrameSignBias[2] & 1) ||
(Header.RefFrameSignBias[1] & 1) != (Header.RefFrameSignBias[3] & 1))
{
if ((uint)Header.CompPredMode >= 1)
{
Writer.Write(1, 1);
Writer.Write(Header.CompPredMode == 2);
}
else
{
Writer.Write(0, 1);
}
}
if (Header.CompPredMode == 2)
{
WriteProbabilityUpdate(Writer, Probs.CompModeProbs, DefaultCompModeProbs);
}
if (Header.CompPredMode != 1)
{
WriteProbabilityUpdate(Writer, Probs.SingleRefProbs, DefaultSingleRefProbs);
}
if (Header.CompPredMode != 0)
{
WriteProbabilityUpdate(Writer, Probs.CompRefProbs, DefaultCompRefProbs);
}
for (int Index = 0; Index < 4; Index++)
{
int i = Index * 8;
int j = Index;
WriteProbabilityUpdate(Writer, Probs.YModeProbs0[i + 0], DefaultYModeProbs0[i + 0]);
WriteProbabilityUpdate(Writer, Probs.YModeProbs0[i + 1], DefaultYModeProbs0[i + 1]);
WriteProbabilityUpdate(Writer, Probs.YModeProbs0[i + 2], DefaultYModeProbs0[i + 2]);
WriteProbabilityUpdate(Writer, Probs.YModeProbs0[i + 3], DefaultYModeProbs0[i + 3]);
WriteProbabilityUpdate(Writer, Probs.YModeProbs0[i + 4], DefaultYModeProbs0[i + 4]);
WriteProbabilityUpdate(Writer, Probs.YModeProbs0[i + 5], DefaultYModeProbs0[i + 5]);
WriteProbabilityUpdate(Writer, Probs.YModeProbs0[i + 6], DefaultYModeProbs0[i + 6]);
WriteProbabilityUpdate(Writer, Probs.YModeProbs0[i + 7], DefaultYModeProbs0[i + 7]);
WriteProbabilityUpdate(Writer, Probs.YModeProbs1[j + 0], DefaultYModeProbs1[j + 0]);
}
WriteProbabilityUpdateAligned4(Writer, Probs.PartitionProbs, DefaultPartitionProbs);
for (int i = 0; i < 3; i++)
{
WriteMvProbabilityUpdate(Writer, Probs.MvJointProbs[i], DefaultMvJointProbs[i]);
}
for (int i = 0; i < 2; i++)
{
WriteMvProbabilityUpdate(Writer, Probs.MvSignProbs[i], DefaultMvSignProbs[i]);
for (int j = 0; j < 10; j++)
{
int Index = i * 10 + j;
WriteMvProbabilityUpdate(Writer, Probs.MvClassProbs[Index], DefaultMvClassProbs[Index]);
}
WriteMvProbabilityUpdate(Writer, Probs.MvClass0BitProbs[i], DefaultMvClass0BitProbs[i]);
for (int j = 0; j < 10; j++)
{
int Index = i * 10 + j;
WriteMvProbabilityUpdate(Writer, Probs.MvBitsProbs[Index], DefaultMvBitsProbs[Index]);
}
}
for (int i = 0; i < 2; i++)
{
for (int j = 0; j < 2; j++)
{
for (int k = 0; k < 3; k++)
{
int Index = i * 2 * 3 + j * 3 + k;
WriteMvProbabilityUpdate(Writer, Probs.MvClass0FrProbs[Index], DefaultMvClass0FrProbs[Index]);
}
}
for (int j = 0; j < 3; j++)
{
int Index = i * 3 + j;
WriteMvProbabilityUpdate(Writer, Probs.MvFrProbs[Index], DefaultMvFrProbs[Index]);
}
}
if (Header.AllowHighPrecisionMv)
{
for (int Index = 0; Index < 2; Index++)
{
WriteMvProbabilityUpdate(Writer, Probs.MvClass0HpProbs[Index], DefaultMvClass0HpProbs[Index]);
WriteMvProbabilityUpdate(Writer, Probs.MvHpProbs[Index], DefaultMvHpProbs[Index]);
}
}
}
Writer.End();
CompressedHeaderData = CompressedHeader.ToArray();
}
//Write uncompressed header.
using (MemoryStream EncodedHeader = new MemoryStream())
{
VpxBitStreamWriter Writer = new VpxBitStreamWriter(EncodedHeader);
Writer.WriteU(2, 2); //Frame marker.
Writer.WriteU(0, 2); //Profile.
Writer.WriteBit(false); //Show existing frame.
Writer.WriteBit(!IsKeyFrame);
Writer.WriteBit(ShowFrame);
Writer.WriteBit(ErrorResilientMode);
if (IsKeyFrame)
{
Writer.WriteU(FrameSyncCode, 24);
Writer.WriteU(0, 3); //Color space.
Writer.WriteU(0, 1); //Color range.
Writer.WriteU(Header.CurrentFrame.Width - 1, 16);
Writer.WriteU(Header.CurrentFrame.Height - 1, 16);
Writer.WriteBit(false); //Render and frame size different.
CachedRefFrames.Clear();
//On key frames, all frame slots are set to the current frame,
//so the value of the selected slot doesn't really matter.
GetNewFrameSlot(Keys.CurrKey);
}
else
{
if (!ShowFrame)
{
Writer.WriteBit(IsFrameIntra);
}
if (!ErrorResilientMode)
{
Writer.WriteU(0, 2); //Reset frame context.
}
int RefreshFrameFlags = 1 << GetNewFrameSlot(Keys.CurrKey);
if (IsFrameIntra)
{
Writer.WriteU(FrameSyncCode, 24);
Writer.WriteU(RefreshFrameFlags, 8);
Writer.WriteU(Header.CurrentFrame.Width - 1, 16);
Writer.WriteU(Header.CurrentFrame.Height - 1, 16);
Writer.WriteBit(false); //Render and frame size different.
}
else
{
Writer.WriteU(RefreshFrameFlags, 8);
int[] RefFrameIndex = new int[]
{
GetFrameSlot(Keys.Ref0Key),
GetFrameSlot(Keys.Ref1Key),
GetFrameSlot(Keys.Ref2Key)
};
byte[] RefFrameSignBias = Header.RefFrameSignBias;
for (int Index = 1; Index < 4; Index++)
{
Writer.WriteU(RefFrameIndex[Index - 1], 3);
Writer.WriteU(RefFrameSignBias[Index], 1);
}
Writer.WriteBit(true); //Frame size with refs.
Writer.WriteBit(false); //Render and frame size different.
Writer.WriteBit(Header.AllowHighPrecisionMv);
Writer.WriteBit(Header.RawInterpolationFilter == 4);
if (Header.RawInterpolationFilter != 4)
{
Writer.WriteU(Header.RawInterpolationFilter, 2);
}
}
}
if (!ErrorResilientMode)
{
Writer.WriteBit(false); //Refresh frame context.
Writer.WriteBit(true); //Frame parallel decoding mode.
}
Writer.WriteU(0, 2); //Frame context index.
Writer.WriteU(Header.LoopFilterLevel, 6);
Writer.WriteU(Header.LoopFilterSharpness, 3);
Writer.WriteBit(Header.LoopFilterDeltaEnabled);
if (Header.LoopFilterDeltaEnabled)
{
bool[] UpdateLoopFilterRefDeltas = new bool[4];
bool[] UpdateLoopFilterModeDeltas = new bool[2];
bool LoopFilterDeltaUpdate = false;
for (int Index = 0; Index < Header.LoopFilterRefDeltas.Length; Index++)
{
sbyte Old = LoopFilterRefDeltas[Index];
sbyte New = Header.LoopFilterRefDeltas[Index];
LoopFilterDeltaUpdate |= (UpdateLoopFilterRefDeltas[Index] = Old != New);
}
for (int Index = 0; Index < Header.LoopFilterModeDeltas.Length; Index++)
{
sbyte Old = LoopFilterModeDeltas[Index];
sbyte New = Header.LoopFilterModeDeltas[Index];
LoopFilterDeltaUpdate |= (UpdateLoopFilterModeDeltas[Index] = Old != New);
}
Writer.WriteBit(LoopFilterDeltaUpdate);
if (LoopFilterDeltaUpdate)
{
for (int Index = 0; Index < Header.LoopFilterRefDeltas.Length; Index++)
{
Writer.WriteBit(UpdateLoopFilterRefDeltas[Index]);
if (UpdateLoopFilterRefDeltas[Index])
{
Writer.WriteS(Header.LoopFilterRefDeltas[Index], 6);
}
}
for (int Index = 0; Index < Header.LoopFilterModeDeltas.Length; Index++)
{
Writer.WriteBit(UpdateLoopFilterModeDeltas[Index]);
if (UpdateLoopFilterModeDeltas[Index])
{
Writer.WriteS(Header.LoopFilterModeDeltas[Index], 6);
}
}
}
}
Writer.WriteU(Header.BaseQIndex, 8);
Writer.WriteDeltaQ(Header.DeltaQYDc);
Writer.WriteDeltaQ(Header.DeltaQUvDc);
Writer.WriteDeltaQ(Header.DeltaQUvAc);
Writer.WriteBit(false); //Segmentation enabled (TODO).
int MinTileColsLog2 = CalcMinLog2TileCols(Header.CurrentFrame.Width);
int MaxTileColsLog2 = CalcMaxLog2TileCols(Header.CurrentFrame.Width);
int TileColsLog2Diff = Header.TileColsLog2 - MinTileColsLog2;
int TileColsLog2IncMask = (1 << TileColsLog2Diff) - 1;
//If it's less than the maximum, we need to add an extra 0 on the bitstream
//to indicate that it should stop reading.
if (Header.TileColsLog2 < MaxTileColsLog2)
{
Writer.WriteU(TileColsLog2IncMask << 1, TileColsLog2Diff + 1);
}
else
{
Writer.WriteU(TileColsLog2IncMask, TileColsLog2Diff);
}
bool TileRowsLog2IsNonZero = Header.TileRowsLog2 != 0;
Writer.WriteBit(TileRowsLog2IsNonZero);
if (TileRowsLog2IsNonZero)
{
Writer.WriteBit(Header.TileRowsLog2 > 1);
}
Writer.WriteU(CompressedHeaderData.Length, 16);
Writer.Flush();
EncodedHeader.Write(CompressedHeaderData, 0, CompressedHeaderData.Length);
if (!FFmpegWrapper.IsInitialized)
{
FFmpegWrapper.Vp9Initialize();
}
FFmpegWrapper.DecodeFrame(DecoderHelper.Combine(EncodedHeader.ToArray(), FrameData));
}
LoopFilterRefDeltas = Header.LoopFilterRefDeltas;
LoopFilterModeDeltas = Header.LoopFilterModeDeltas;
}
private int GetNewFrameSlot(long Key)
{
LinkedListNode<int> Node = FrameSlotByLastUse.Last;
FrameSlotByLastUse.RemoveLast();
FrameSlotByLastUse.AddFirst(Node);
CachedRefFrames[Key] = Node;
return Node.Value;
}
private int GetFrameSlot(long Key)
{
if (CachedRefFrames.TryGetValue(Key, out LinkedListNode<int> Node))
{
FrameSlotByLastUse.Remove(Node);
FrameSlotByLastUse.AddFirst(Node);
return Node.Value;
}
//Reference frame was lost.
//What we should do in this case?
return 0;
}
private void WriteProbabilityUpdate(VpxRangeEncoder Writer, byte[] New, byte[] Old)
{
for (int Offset = 0; Offset < New.Length; Offset++)
{
WriteProbabilityUpdate(Writer, New[Offset], Old[Offset]);
}
}
private void WriteCoefProbabilityUpdate(VpxRangeEncoder Writer, int TxMode, byte[] New, byte[] Old)
{
//Note: There's 1 byte added on each packet for alignment,
//this byte is ignored when doing updates.
const int BlockBytes = 2 * 2 * 6 * 6 * 4;
bool NeedsUpdate(int BaseIndex)
{
int Index = BaseIndex;
for (int i = 0; i < 2; i++)
for (int j = 0; j < 2; j++)
for (int k = 0; k < 6; k++)
for (int l = 0; l < 6; l++)
{
if (New[Index + 0] != Old[Index + 0] ||
New[Index + 1] != Old[Index + 1] ||
New[Index + 2] != Old[Index + 2])
{
return true;
}
Index += 4;
}
return false;
}
for (int BlockIndex = 0; BlockIndex < 4; BlockIndex++)
{
int BaseIndex = BlockIndex * BlockBytes;
bool Update = NeedsUpdate(BaseIndex);
Writer.Write(Update);
if (Update)
{
int Index = BaseIndex;
for (int i = 0; i < 2; i++)
for (int j = 0; j < 2; j++)
for (int k = 0; k < 6; k++)
for (int l = 0; l < 6; l++)
{
if (k != 0 || l < 3)
{
WriteProbabilityUpdate(Writer, New[Index + 0], Old[Index + 0]);
WriteProbabilityUpdate(Writer, New[Index + 1], Old[Index + 1]);
WriteProbabilityUpdate(Writer, New[Index + 2], Old[Index + 2]);
}
Index += 4;
}
}
if (BlockIndex == TxMode)
{
break;
}
}
}
private void WriteProbabilityUpdateAligned4(VpxRangeEncoder Writer, byte[] New, byte[] Old)
{
for (int Offset = 0; Offset < New.Length; Offset += 4)
{
WriteProbabilityUpdate(Writer, New[Offset + 0], Old[Offset + 0]);
WriteProbabilityUpdate(Writer, New[Offset + 1], Old[Offset + 1]);
WriteProbabilityUpdate(Writer, New[Offset + 2], Old[Offset + 2]);
}
}
private void WriteProbabilityUpdate(VpxRangeEncoder Writer, byte New, byte Old)
{
bool Update = New != Old;
Writer.Write(Update, DiffUpdateProbability);
if (Update)
{
WriteProbabilityDelta(Writer, New, Old);
}
}
private void WriteProbabilityDelta(VpxRangeEncoder Writer, int New, int Old)
{
int Delta = RemapProbability(New, Old);
EncodeTermSubExp(Writer, Delta);
}
private int RemapProbability(int New, int Old)
{
New--;
Old--;
int Index;
if (Old * 2 <= 0xff)
{
Index = RecenterNonNeg(New, Old) - 1;
}
else
{
Index = RecenterNonNeg(0xff - 1 - New, 0xff - 1 - Old) - 1;
}
return MapLut[Index];
}
private int RecenterNonNeg(int New, int Old)
{
if (New > Old * 2)
{
return New;
}
else if (New >= Old)
{
return (New - Old) * 2;
}
else /* if (New < Old) */
{
return (Old - New) * 2 - 1;
}
}
private void EncodeTermSubExp(VpxRangeEncoder Writer, int Value)
{
if (WriteLessThan(Writer, Value, 16))
{
Writer.Write(Value, 4);
}
else if (WriteLessThan(Writer, Value, 32))
{
Writer.Write(Value - 16, 4);
}
else if (WriteLessThan(Writer, Value, 64))
{
Writer.Write(Value - 32, 5);
}
else
{
Value -= 64;
const int Size = 8;
int Mask = (1 << Size) - 191;
int Delta = Value - Mask;
if (Delta < 0)
{
Writer.Write(Value, Size - 1);
}
else
{
Writer.Write(Delta / 2 + Mask, Size - 1);
Writer.Write(Delta & 1, 1);
}
}
}
private bool WriteLessThan(VpxRangeEncoder Writer, int Value, int Test)
{
bool IsLessThan = Value < Test;
Writer.Write(!IsLessThan);
return IsLessThan;
}
private void WriteMvProbabilityUpdate(VpxRangeEncoder Writer, byte New, byte Old)
{
bool Update = New != Old;
Writer.Write(Update, DiffUpdateProbability);
if (Update)
{
Writer.Write(New >> 1, 7);
}
}
private static int CalcMinLog2TileCols(int FrameWidth)
{
int Sb64Cols = (FrameWidth + 63) / 64;
int MinLog2 = 0;
while ((64 << MinLog2) < Sb64Cols)
{
MinLog2++;
}
return MinLog2;
}
private static int CalcMaxLog2TileCols(int FrameWidth)
{
int Sb64Cols = (FrameWidth + 63) / 64;
int MaxLog2 = 1;
while ((Sb64Cols >> MaxLog2) >= 4)
{
MaxLog2++;
}
return MaxLog2 - 1;
}
}
}

View file

@ -0,0 +1,79 @@
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.VDec
{
[StructLayout(LayoutKind.Sequential, Pack = 2)]
struct Vp9FrameDimensions
{
public short Width;
public short Height;
public short SubsamplingX; //?
public short SubsamplingY; //?
}
[StructLayout(LayoutKind.Sequential, Pack = 1)]
struct Vp9FrameHeader
{
[MarshalAs(UnmanagedType.ByValArray, SizeConst = 3)]
public Vp9FrameDimensions[] RefFrames;
public Vp9FrameDimensions CurrentFrame;
public int Flags;
[MarshalAs(UnmanagedType.ByValArray, SizeConst = 4)]
public byte[] RefFrameSignBias;
public byte LoopFilterLevel;
public byte LoopFilterSharpness;
public byte BaseQIndex;
public sbyte DeltaQYDc;
public sbyte DeltaQUvDc;
public sbyte DeltaQUvAc;
[MarshalAs(UnmanagedType.I1)]
public bool Lossless;
public byte TxMode;
[MarshalAs(UnmanagedType.I1)]
public bool AllowHighPrecisionMv;
public byte RawInterpolationFilter;
public byte CompPredMode;
public byte FixCompRef;
public byte VarCompRef0;
public byte VarCompRef1;
public byte TileColsLog2;
public byte TileRowsLog2;
[MarshalAs(UnmanagedType.I1)]
public bool SegmentationEnabled;
[MarshalAs(UnmanagedType.I1)]
public bool SegmentationUpdate;
[MarshalAs(UnmanagedType.I1)]
public bool SegmentationTemporalUpdate;
[MarshalAs(UnmanagedType.I1)]
public bool SegmentationAbsOrDeltaUpdate;
[MarshalAs(UnmanagedType.ByValArray, SizeConst = 8 * 4, ArraySubType = UnmanagedType.I1)]
public bool[] FeatureEnabled;
[MarshalAs(UnmanagedType.ByValArray, SizeConst = 8 * 4)]
public short[] FeatureData;
[MarshalAs(UnmanagedType.I1)]
public bool LoopFilterDeltaEnabled;
[MarshalAs(UnmanagedType.ByValArray, SizeConst = 4)]
public sbyte[] LoopFilterRefDeltas;
[MarshalAs(UnmanagedType.ByValArray, SizeConst = 2)]
public sbyte[] LoopFilterModeDeltas;
}
}

View file

@ -0,0 +1,10 @@
namespace Ryujinx.Graphics.VDec
{
struct Vp9FrameKeys
{
public long CurrKey;
public long Ref0Key;
public long Ref1Key;
public long Ref2Key;
}
}

View file

@ -0,0 +1,31 @@
namespace Ryujinx.Graphics.VDec
{
struct Vp9ProbabilityTables
{
public byte[] SegmentationTreeProbs;
public byte[] SegmentationPredProbs;
public byte[] Tx8x8Probs;
public byte[] Tx16x16Probs;
public byte[] Tx32x32Probs;
public byte[] CoefProbs;
public byte[] SkipProbs;
public byte[] InterModeProbs;
public byte[] InterpFilterProbs;
public byte[] IsInterProbs;
public byte[] CompModeProbs;
public byte[] SingleRefProbs;
public byte[] CompRefProbs;
public byte[] YModeProbs0;
public byte[] YModeProbs1;
public byte[] PartitionProbs;
public byte[] MvJointProbs;
public byte[] MvSignProbs;
public byte[] MvClassProbs;
public byte[] MvClass0BitProbs;
public byte[] MvBitsProbs;
public byte[] MvClass0FrProbs;
public byte[] MvFrProbs;
public byte[] MvClass0HpProbs;
public byte[] MvHpProbs;
}
}

View file

@ -0,0 +1,38 @@
using System.IO;
namespace Ryujinx.Graphics.VDec
{
class VpxBitStreamWriter : BitStreamWriter
{
public VpxBitStreamWriter(Stream BaseStream) : base(BaseStream) { }
public void WriteU(int Value, int ValueSize)
{
WriteBits(Value, ValueSize);
}
public void WriteS(int Value, int ValueSize)
{
bool Sign = Value < 0;
if (Sign)
{
Value = -Value;
}
WriteBits((Value << 1) | (Sign ? 1 : 0), ValueSize + 1);
}
public void WriteDeltaQ(int Value)
{
bool DeltaCoded = Value != 0;
WriteBit(DeltaCoded);
if (DeltaCoded)
{
WriteBits(Value, 4);
}
}
}
}

View file

@ -0,0 +1,134 @@
using System.IO;
namespace Ryujinx.Graphics.VDec
{
class VpxRangeEncoder
{
private const int HalfProbability = 128;
private static readonly int[] NormLut = new int[]
{
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
private Stream BaseStream;
private uint LowValue;
private uint Range;
private int Count;
public VpxRangeEncoder(Stream BaseStream)
{
this.BaseStream = BaseStream;
Range = 0xff;
Count = -24;
Write(false);
}
public void WriteByte(byte Value)
{
Write(Value, 8);
}
public void Write(int Value, int ValueSize)
{
for (int Bit = ValueSize - 1; Bit >= 0; Bit--)
{
Write(((Value >> Bit) & 1) != 0);
}
}
public void Write(bool Bit)
{
Write(Bit, HalfProbability);
}
public void Write(bool Bit, int Probability)
{
uint Range = this.Range;
uint Split = 1 + (((Range - 1) * (uint)Probability) >> 8);
Range = Split;
if (Bit)
{
LowValue += Split;
Range = this.Range - Split;
}
int Shift = NormLut[Range];
Range <<= Shift;
Count += Shift;
if (Count >= 0)
{
int Offset = Shift - Count;
if (((LowValue << (Offset - 1)) >> 31) != 0)
{
long CurrentPos = BaseStream.Position;
BaseStream.Seek(-1, SeekOrigin.Current);
while (BaseStream.Position >= 0 && PeekByte() == 0xff)
{
BaseStream.WriteByte(0);
BaseStream.Seek(-2, SeekOrigin.Current);
}
BaseStream.WriteByte((byte)(PeekByte() + 1));
BaseStream.Seek(CurrentPos, SeekOrigin.Begin);
}
BaseStream.WriteByte((byte)(LowValue >> (24 - Offset)));
LowValue <<= Offset;
Shift = Count;
LowValue &= 0xffffff;
Count -= 8;
}
LowValue <<= Shift;
this.Range = Range;
}
private byte PeekByte()
{
byte Value = (byte)BaseStream.ReadByte();
BaseStream.Seek(-1, SeekOrigin.Current);
return Value;
}
public void End()
{
for (int Index = 0; Index < 32; Index++)
{
Write(false);
}
}
}
}

View file

@ -0,0 +1,69 @@
using Ryujinx.Graphics.Memory;
using System;
namespace Ryujinx.Graphics.Vic
{
class StructUnpacker
{
private NvGpuVmm Vmm;
private long Position;
private ulong Buffer;
private int BuffPos;
public StructUnpacker(NvGpuVmm Vmm, long Position)
{
this.Vmm = Vmm;
this.Position = Position;
BuffPos = 64;
}
public int Read(int Bits)
{
if ((uint)Bits > 32)
{
throw new ArgumentOutOfRangeException(nameof(Bits));
}
int Value = 0;
while (Bits > 0)
{
RefillBufferIfNeeded();
int ReadBits = Bits;
int MaxReadBits = 64 - BuffPos;
if (ReadBits > MaxReadBits)
{
ReadBits = MaxReadBits;
}
Value <<= ReadBits;
Value |= (int)(Buffer >> BuffPos) & (int)(0xffffffff >> (32 - ReadBits));
BuffPos += ReadBits;
Bits -= ReadBits;
}
return Value;
}
private void RefillBufferIfNeeded()
{
if (BuffPos >= 64)
{
Buffer = Vmm.ReadUInt64(Position);
Position += 8;
BuffPos = 0;
}
}
}
}

View file

@ -0,0 +1,33 @@
namespace Ryujinx.Graphics.Vic
{
struct SurfaceOutputConfig
{
public SurfacePixelFormat PixelFormat;
public int SurfaceWidth;
public int SurfaceHeight;
public int GobBlockHeight;
public long SurfaceLumaAddress;
public long SurfaceChromaUAddress;
public long SurfaceChromaVAddress;
public SurfaceOutputConfig(
SurfacePixelFormat PixelFormat,
int SurfaceWidth,
int SurfaceHeight,
int GobBlockHeight,
long OutputSurfaceLumaAddress,
long OutputSurfaceChromaUAddress,
long OutputSurfaceChromaVAddress)
{
this.PixelFormat = PixelFormat;
this.SurfaceWidth = SurfaceWidth;
this.SurfaceHeight = SurfaceHeight;
this.GobBlockHeight = GobBlockHeight;
this.SurfaceLumaAddress = OutputSurfaceLumaAddress;
this.SurfaceChromaUAddress = OutputSurfaceChromaUAddress;
this.SurfaceChromaVAddress = OutputSurfaceChromaVAddress;
}
}
}

View file

@ -0,0 +1,8 @@
namespace Ryujinx.Graphics.Vic
{
enum SurfacePixelFormat
{
RGBA8 = 0x1f,
YUV420P = 0x44
}
}

View file

@ -0,0 +1,107 @@
using Ryujinx.Graphics.Memory;
namespace Ryujinx.Graphics.Vic
{
class VideoImageComposer
{
private NvGpu Gpu;
private long ConfigStructAddress;
private long OutputSurfaceLumaAddress;
private long OutputSurfaceChromaUAddress;
private long OutputSurfaceChromaVAddress;
public VideoImageComposer(NvGpu Gpu)
{
this.Gpu = Gpu;
}
public void Process(NvGpuVmm Vmm, int MethodOffset, int[] Arguments)
{
VideoImageComposerMeth Method = (VideoImageComposerMeth)MethodOffset;
switch (Method)
{
case VideoImageComposerMeth.Execute:
Execute(Vmm, Arguments);
break;
case VideoImageComposerMeth.SetConfigStructOffset:
SetConfigStructOffset(Vmm, Arguments);
break;
case VideoImageComposerMeth.SetOutputSurfaceLumaOffset:
SetOutputSurfaceLumaOffset(Vmm, Arguments);
break;
case VideoImageComposerMeth.SetOutputSurfaceChromaUOffset:
SetOutputSurfaceChromaUOffset(Vmm, Arguments);
break;
case VideoImageComposerMeth.SetOutputSurfaceChromaVOffset:
SetOutputSurfaceChromaVOffset(Vmm, Arguments);
break;
}
}
private void Execute(NvGpuVmm Vmm, int[] Arguments)
{
StructUnpacker Unpacker = new StructUnpacker(Vmm, ConfigStructAddress + 0x20);
SurfacePixelFormat PixelFormat = (SurfacePixelFormat)Unpacker.Read(7);
int ChromaLocHoriz = Unpacker.Read(2);
int ChromaLocVert = Unpacker.Read(2);
int BlockLinearKind = Unpacker.Read(4);
int BlockLinearHeightLog2 = Unpacker.Read(4);
int Reserved0 = Unpacker.Read(3);
int Reserved1 = Unpacker.Read(10);
int SurfaceWidthMinus1 = Unpacker.Read(14);
int SurfaceHeightMinus1 = Unpacker.Read(14);
int GobBlockHeight = 1 << BlockLinearHeightLog2;
int SurfaceWidth = SurfaceWidthMinus1 + 1;
int SurfaceHeight = SurfaceHeightMinus1 + 1;
SurfaceOutputConfig OutputConfig = new SurfaceOutputConfig(
PixelFormat,
SurfaceWidth,
SurfaceHeight,
GobBlockHeight,
OutputSurfaceLumaAddress,
OutputSurfaceChromaUAddress,
OutputSurfaceChromaVAddress);
Gpu.VideoDecoder.CopyPlanes(Vmm, OutputConfig);
}
private void SetConfigStructOffset(NvGpuVmm Vmm, int[] Arguments)
{
ConfigStructAddress = GetAddress(Arguments);
}
private void SetOutputSurfaceLumaOffset(NvGpuVmm Vmm, int[] Arguments)
{
OutputSurfaceLumaAddress = GetAddress(Arguments);
}
private void SetOutputSurfaceChromaUOffset(NvGpuVmm Vmm, int[] Arguments)
{
OutputSurfaceChromaUAddress = GetAddress(Arguments);
}
private void SetOutputSurfaceChromaVOffset(NvGpuVmm Vmm, int[] Arguments)
{
OutputSurfaceChromaVAddress = GetAddress(Arguments);
}
private static long GetAddress(int[] Arguments)
{
return (long)(uint)Arguments[0] << 8;
}
}
}

View file

@ -0,0 +1,12 @@
namespace Ryujinx.Graphics.Vic
{
enum VideoImageComposerMeth
{
Execute = 0xc0,
SetControlParams = 0x1c1,
SetConfigStructOffset = 0x1c2,
SetOutputSurfaceLumaOffset = 0x1c8,
SetOutputSurfaceChromaUOffset = 0x1c9,
SetOutputSurfaceChromaVOffset = 0x1ca
}
}

View file

@ -12,10 +12,11 @@ namespace Ryujinx.HLE.HOS.Services.Mm
public IRequest()
{
_commands = new Dictionary<int, ServiceProcessRequest>
_commands = new Dictionary<int, ServiceProcessRequest>()
{
{ 1, InitializeOld },
{ 4, Initialize },
{ 5, Finalize },
{ 6, SetAndWait },
{ 7, Get }
};
@ -40,6 +41,15 @@ namespace Ryujinx.HLE.HOS.Services.Mm
return 0;
}
public long Finalize(ServiceCtx context)
{
context.Device.Gpu.UninitializeVideoDecoder();
Logger.PrintStub(LogClass.ServiceMm, "Stubbed.");
return 0;
}
public long SetAndWait(ServiceCtx context)
{
Logger.PrintStub(LogClass.ServiceMm, "Stubbed.");

View file

@ -21,13 +21,15 @@ namespace Ryujinx.HLE.HOS.Services.Nv
public override IReadOnlyDictionary<int, ServiceProcessRequest> Commands => _commands;
private static Dictionary<string, IoctlProcessor> _ioctlProcessors =
new Dictionary<string, IoctlProcessor>
{
{ "/dev/nvhost-as-gpu", ProcessIoctlNvGpuAS },
{ "/dev/nvhost-ctrl", ProcessIoctlNvHostCtrl },
{ "/dev/nvhost-ctrl-gpu", ProcessIoctlNvGpuGpu },
{ "/dev/nvhost-gpu", ProcessIoctlNvHostGpu },
{ "/dev/nvmap", ProcessIoctlNvMap }
new Dictionary<string, IoctlProcessor>()
{
{ "/dev/nvhost-as-gpu", ProcessIoctlNvGpuAS },
{ "/dev/nvhost-ctrl", ProcessIoctlNvHostCtrl },
{ "/dev/nvhost-ctrl-gpu", ProcessIoctlNvGpuGpu },
{ "/dev/nvhost-gpu", ProcessIoctlNvHostChannel },
{ "/dev/nvhost-nvdec", ProcessIoctlNvHostChannel },
{ "/dev/nvhost-vic", ProcessIoctlNvHostChannel },
{ "/dev/nvmap", ProcessIoctlNvMap }
};
public static GlobalStateTable Fds { get; private set; }
@ -36,7 +38,7 @@ namespace Ryujinx.HLE.HOS.Services.Nv
public INvDrvServices(Horizon system)
{
_commands = new Dictionary<int, ServiceProcessRequest>
_commands = new Dictionary<int, ServiceProcessRequest>()
{
{ 0, Open },
{ 1, Ioctl },
@ -166,9 +168,9 @@ namespace Ryujinx.HLE.HOS.Services.Nv
return ProcessIoctl(context, cmd, NvGpuGpuIoctl.ProcessIoctl);
}
private static int ProcessIoctlNvHostGpu(ServiceCtx context, int cmd)
private static int ProcessIoctlNvHostChannel(ServiceCtx context, int cmd)
{
return ProcessIoctl(context, cmd, NvHostChannelIoctl.ProcessIoctlGpu);
return ProcessIoctl(context, cmd, NvHostChannelIoctl.ProcessIoctl);
}
private static int ProcessIoctlNvMap(ServiceCtx context, int cmd)

View file

@ -1,7 +0,0 @@
namespace Ryujinx.HLE.HOS.Services.Nv.NvHostChannel
{
enum NvChannelName
{
Gpu
}
}

View file

@ -0,0 +1,12 @@
using System.Runtime.InteropServices;
namespace Ryujinx.HLE.HOS.Services.Nv.NvHostChannel
{
[StructLayout(LayoutKind.Sequential, Size = 8, Pack = 4)]
struct NvHostChannelCmdBuf
{
public int MemoryId;
public int Offset;
public int WordsCount;
}
}

View file

@ -0,0 +1,11 @@
using System.Runtime.InteropServices;
namespace Ryujinx.HLE.HOS.Services.Nv.NvHostChannel
{
[StructLayout(LayoutKind.Sequential, Size = 8, Pack = 4)]
struct NvHostChannelGetParamArg
{
public int Param;
public int Value;
}
}

View file

@ -3,6 +3,7 @@ using Ryujinx.Common.Logging;
using Ryujinx.Graphics.Memory;
using Ryujinx.HLE.HOS.Kernel;
using Ryujinx.HLE.HOS.Services.Nv.NvGpuAS;
using Ryujinx.HLE.HOS.Services.Nv.NvMap;
using System;
using System.Collections.Concurrent;
@ -10,37 +11,25 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvHostChannel
{
class NvHostChannelIoctl
{
private class ChannelsPerProcess
{
public ConcurrentDictionary<NvChannelName, NvChannel> Channels { get; private set; }
public ChannelsPerProcess()
{
Channels = new ConcurrentDictionary<NvChannelName, NvChannel>();
Channels.TryAdd(NvChannelName.Gpu, new NvChannel());
}
}
private static ConcurrentDictionary<KProcess, ChannelsPerProcess> _channels;
private static ConcurrentDictionary<KProcess, NvChannel> _channels;
static NvHostChannelIoctl()
{
_channels = new ConcurrentDictionary<KProcess, ChannelsPerProcess>();
_channels = new ConcurrentDictionary<KProcess, NvChannel>();
}
public static int ProcessIoctlGpu(ServiceCtx context, int cmd)
{
return ProcessIoctl(context, NvChannelName.Gpu, cmd);
}
public static int ProcessIoctl(ServiceCtx context, NvChannelName channel, int cmd)
public static int ProcessIoctl(ServiceCtx context, int cmd)
{
switch (cmd & 0xffff)
{
case 0x0001: return Submit (context);
case 0x0002: return GetSyncpoint (context);
case 0x0003: return GetWaitBase (context);
case 0x0009: return MapBuffer (context);
case 0x000a: return UnmapBuffer (context);
case 0x4714: return SetUserData (context);
case 0x4801: return SetNvMap (context);
case 0x4803: return SetTimeout (context, channel);
case 0x4803: return SetTimeout (context);
case 0x4808: return SubmitGpfifo (context);
case 0x4809: return AllocObjCtx (context);
case 0x480b: return ZcullBind (context);
@ -53,6 +42,138 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvHostChannel
throw new NotImplementedException(cmd.ToString("x8"));
}
private static int Submit(ServiceCtx context)
{
long inputPosition = context.Request.GetBufferType0x21().Position;
long outputPosition = context.Request.GetBufferType0x22().Position;
NvHostChannelSubmit args = MemoryHelper.Read<NvHostChannelSubmit>(context.Memory, inputPosition);
NvGpuVmm vmm = NvGpuASIoctl.GetASCtx(context).Vmm;
for (int index = 0; index < args.CmdBufsCount; index++)
{
long cmdBufOffset = inputPosition + 0x10 + index * 0xc;
NvHostChannelCmdBuf cmdBuf = MemoryHelper.Read<NvHostChannelCmdBuf>(context.Memory, cmdBufOffset);
NvMapHandle map = NvMapIoctl.GetNvMap(context, cmdBuf.MemoryId);
int[] cmdBufData = new int[cmdBuf.WordsCount];
for (int offset = 0; offset < cmdBufData.Length; offset++)
{
cmdBufData[offset] = context.Memory.ReadInt32(map.Address + cmdBuf.Offset + offset * 4);
}
context.Device.Gpu.PushCommandBuffer(vmm, cmdBufData);
}
//TODO: Relocation, waitchecks, etc.
return NvResult.Success;
}
private static int GetSyncpoint(ServiceCtx context)
{
//TODO
long inputPosition = context.Request.GetBufferType0x21().Position;
long outputPosition = context.Request.GetBufferType0x22().Position;
NvHostChannelGetParamArg args = MemoryHelper.Read<NvHostChannelGetParamArg>(context.Memory, inputPosition);
args.Value = 0;
MemoryHelper.Write(context.Memory, outputPosition, args);
return NvResult.Success;
}
private static int GetWaitBase(ServiceCtx context)
{
long inputPosition = context.Request.GetBufferType0x21().Position;
long outputPosition = context.Request.GetBufferType0x22().Position;
NvHostChannelGetParamArg args = MemoryHelper.Read<NvHostChannelGetParamArg>(context.Memory, inputPosition);
args.Value = 0;
MemoryHelper.Write(context.Memory, outputPosition, args);
return NvResult.Success;
}
private static int MapBuffer(ServiceCtx context)
{
long inputPosition = context.Request.GetBufferType0x21().Position;
long outputPosition = context.Request.GetBufferType0x22().Position;
NvHostChannelMapBuffer args = MemoryHelper.Read<NvHostChannelMapBuffer>(context.Memory, inputPosition);
NvGpuVmm vmm = NvGpuASIoctl.GetASCtx(context).Vmm;
for (int index = 0; index < args.NumEntries; index++)
{
int handle = context.Memory.ReadInt32(inputPosition + 0xc + index * 8);
NvMapHandle map = NvMapIoctl.GetNvMap(context, handle);
if (map == null)
{
Logger.PrintWarning(LogClass.ServiceNv, $"Invalid handle 0x{handle:x8}!");
return NvResult.InvalidInput;
}
lock (map)
{
if (map.DmaMapAddress == 0)
{
map.DmaMapAddress = vmm.MapLow(map.Address, map.Size);
}
context.Memory.WriteInt32(outputPosition + 0xc + 4 + index * 8, (int)map.DmaMapAddress);
}
}
return NvResult.Success;
}
private static int UnmapBuffer(ServiceCtx context)
{
long inputPosition = context.Request.GetBufferType0x21().Position;
NvHostChannelMapBuffer args = MemoryHelper.Read<NvHostChannelMapBuffer>(context.Memory, inputPosition);
NvGpuVmm vmm = NvGpuASIoctl.GetASCtx(context).Vmm;
for (int index = 0; index < args.NumEntries; index++)
{
int handle = context.Memory.ReadInt32(inputPosition + 0xc + index * 8);
NvMapHandle map = NvMapIoctl.GetNvMap(context, handle);
if (map == null)
{
Logger.PrintWarning(LogClass.ServiceNv, $"Invalid handle 0x{handle:x8}!");
return NvResult.InvalidInput;
}
lock (map)
{
if (map.DmaMapAddress != 0)
{
vmm.Free(map.DmaMapAddress, map.Size);
map.DmaMapAddress = 0;
}
}
}
return NvResult.Success;
}
private static int SetUserData(ServiceCtx context)
{
long inputPosition = context.Request.GetBufferType0x21().Position;
@ -73,11 +194,11 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvHostChannel
return NvResult.Success;
}
private static int SetTimeout(ServiceCtx context, NvChannelName channel)
private static int SetTimeout(ServiceCtx context)
{
long inputPosition = context.Request.GetBufferType0x21().Position;
GetChannel(context, channel).Timeout = context.Memory.ReadInt32(inputPosition);
GetChannel(context).Timeout = context.Memory.ReadInt32(inputPosition);
return NvResult.Success;
}
@ -89,7 +210,7 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvHostChannel
NvHostChannelSubmitGpfifo args = MemoryHelper.Read<NvHostChannelSubmitGpfifo>(context.Memory, inputPosition);
NvGpuVmm vmm = NvGpuASIoctl.GetASCtx(context).Vmm;
NvGpuVmm vmm = NvGpuASIoctl.GetASCtx(context).Vmm;;
for (int index = 0; index < args.NumEntries; index++)
{
@ -163,7 +284,7 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvHostChannel
NvHostChannelSubmitGpfifo args = MemoryHelper.Read<NvHostChannelSubmitGpfifo>(context.Memory, inputPosition);
NvGpuVmm vmm = NvGpuASIoctl.GetASCtx(context).Vmm;
NvGpuVmm vmm = NvGpuASIoctl.GetASCtx(context).Vmm;;
for (int index = 0; index < args.NumEntries; index++)
{
@ -185,14 +306,9 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvHostChannel
context.Device.Gpu.Pusher.Push(vmm, gpfifo);
}
public static NvChannel GetChannel(ServiceCtx context, NvChannelName channel)
public static NvChannel GetChannel(ServiceCtx context)
{
ChannelsPerProcess cpp = _channels.GetOrAdd(context.Process, (key) =>
{
return new ChannelsPerProcess();
});
return cpp.Channels[channel];
return _channels.GetOrAdd(context.Process, (key) => new NvChannel());
}
public static void UnloadProcess(KProcess process)

View file

@ -0,0 +1,12 @@
using System.Runtime.InteropServices;
namespace Ryujinx.HLE.HOS.Services.Nv.NvHostChannel
{
[StructLayout(LayoutKind.Sequential, Size = 0xc, Pack = 4)]
struct NvHostChannelMapBuffer
{
public int NumEntries;
public int DataAddress; //Ignored by the driver.
public bool AttachHostChDas;
}
}

View file

@ -0,0 +1,13 @@
using System.Runtime.InteropServices;
namespace Ryujinx.HLE.HOS.Services.Nv.NvHostChannel
{
[StructLayout(LayoutKind.Sequential, Size = 8, Pack = 4)]
struct NvHostChannelSubmit
{
public int CmdBufsCount;
public int RelocsCount;
public int SyncptIncrsCount;
public int WaitchecksCount;
}
}

View file

@ -11,6 +11,7 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvMap
public int Kind;
public long Address;
public bool Allocated;
public long DmaMapAddress;
private long _dupes;

View file

@ -420,6 +420,36 @@ namespace Ryujinx.Tests.Cpu
};
}
private static uint[] _ShlReg_V_8B_4H_2S_()
{
return new uint[]
{
0x0E205C00u, // SQRSHL V0.8B, V0.8B, V0.8B
0x0E204C00u, // SQSHL V0.8B, V0.8B, V0.8B
0x0E205400u, // SRSHL V0.8B, V0.8B, V0.8B
0x0E204400u, // SSHL V0.8B, V0.8B, V0.8B
0x2E205C00u, // UQRSHL V0.8B, V0.8B, V0.8B
0x2E204C00u, // UQSHL V0.8B, V0.8B, V0.8B
0x2E205400u, // URSHL V0.8B, V0.8B, V0.8B
0x2E204400u // USHL V0.8B, V0.8B, V0.8B
};
}
private static uint[] _ShlReg_V_16B_8H_4S_2D_()
{
return new uint[]
{
0x4E205C00u, // SQRSHL V0.16B, V0.16B, V0.16B
0x4E204C00u, // SQSHL V0.16B, V0.16B, V0.16B
0x4E205400u, // SRSHL V0.16B, V0.16B, V0.16B
0x4E204400u, // SSHL V0.16B, V0.16B, V0.16B
0x6E205C00u, // UQRSHL V0.16B, V0.16B, V0.16B
0x6E204C00u, // UQSHL V0.16B, V0.16B, V0.16B
0x6E205400u, // URSHL V0.16B, V0.16B, V0.16B
0x6E204400u // USHL V0.16B, V0.16B, V0.16B
};
}
private static uint[] _U_Max_Min_P_V_()
{
return new uint[]
@ -2602,6 +2632,50 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn();
}
[Test, Pairwise]
public void ShlReg_V_8B_4H_2S([ValueSource("_ShlReg_V_8B_4H_2S_")] uint opcodes,
[Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,
[Values(2u, 0u)] uint rm,
[ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
[ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a,
[ValueSource("_8B4H2S_")] [Random(0ul, 255ul, RndCnt)] ulong b,
[Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S>
{
opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= ((size & 3) << 22);
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0(a);
Vector128<float> v2 = MakeVectorE0(b);
SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
CompareAgainstUnicorn(fpsrMask: Fpsr.Qc);
}
[Test, Pairwise]
public void ShlReg_V_16B_8H_4S_2D([ValueSource("_ShlReg_V_16B_8H_4S_2D_")] uint opcodes,
[Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,
[Values(2u, 0u)] uint rm,
[ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong z,
[ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong a,
[ValueSource("_8B4H2S1D_")] [Random(0ul, 255ul, RndCnt)] ulong b,
[Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D>
{
opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= ((size & 3) << 22);
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(a, a);
Vector128<float> v2 = MakeVectorE0E1(b, b);
SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
CompareAgainstUnicorn(fpsrMask: Fpsr.Qc);
}
[Test, Pairwise, Description("SSUBL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
public void Ssubl_V_8B8H_4H4S_2S2D([Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,