diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs index dc8cfc0879..bf030314ec 100644 --- a/ChocolArm64/AOpCodeTable.cs +++ b/ChocolArm64/AOpCodeTable.cs @@ -374,10 +374,12 @@ namespace ChocolArm64 SetA64("01011110000xxxxx010100xxxxxxxxxx", AInstEmit.Sha256h2_V, typeof(AOpCodeSimdReg)); SetA64("0101111000101000001010xxxxxxxxxx", AInstEmit.Sha256su0_V, typeof(AOpCodeSimd)); SetA64("01011110000xxxxx011000xxxxxxxxxx", AInstEmit.Sha256su1_V, typeof(AOpCodeSimdReg)); + SetA64("0x001110<<1xxxxx000001xxxxxxxxxx", AInstEmit.Shadd_V, typeof(AOpCodeSimdReg)); SetA64("010111110>>>>xxx010101xxxxxxxxxx", AInstEmit.Shl_S, typeof(AOpCodeSimdShImm)); SetA64("0x0011110>>>>xxx010101xxxxxxxxxx", AInstEmit.Shl_V, typeof(AOpCodeSimdShImm)); SetA64("0x101110<<100001001110xxxxxxxxxx", AInstEmit.Shll_V, typeof(AOpCodeSimd)); SetA64("0x00111100>>>xxx100001xxxxxxxxxx", AInstEmit.Shrn_V, typeof(AOpCodeSimdShImm)); + SetA64("0x001110<<1xxxxx001001xxxxxxxxxx", AInstEmit.Shsub_V, typeof(AOpCodeSimdReg)); SetA64("0x1011110>>>>xxx010101xxxxxxxxxx", AInstEmit.Sli_V, typeof(AOpCodeSimdShImm)); SetA64("0x001110<<1xxxxx011001xxxxxxxxxx", AInstEmit.Smax_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx101001xxxxxxxxxx", AInstEmit.Smaxp_V, typeof(AOpCodeSimdReg)); @@ -407,6 +409,7 @@ namespace ChocolArm64 SetA64("0x001110<<100001010010xxxxxxxxxx", AInstEmit.Sqxtn_V, typeof(AOpCodeSimd)); SetA64("01111110<<100001001010xxxxxxxxxx", AInstEmit.Sqxtun_S, typeof(AOpCodeSimd)); SetA64("0x101110<<100001001010xxxxxxxxxx", AInstEmit.Sqxtun_V, typeof(AOpCodeSimd)); + SetA64("0x001110<<1xxxxx000101xxxxxxxxxx", AInstEmit.Srhadd_V, typeof(AOpCodeSimdReg)); SetA64("0x00111100>>>xxx001001xxxxxxxxxx", AInstEmit.Srshr_V, typeof(AOpCodeSimdShImm)); SetA64("0100111101xxxxxx001001xxxxxxxxxx", AInstEmit.Srshr_V, typeof(AOpCodeSimdShImm)); SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", AInstEmit.Sshl_V, typeof(AOpCodeSimdReg)); @@ -449,6 +452,7 @@ namespace ChocolArm64 SetA64("011111100x100001110110xxxxxxxxxx", AInstEmit.Ucvtf_S, typeof(AOpCodeSimd)); SetA64("0x1011100x100001110110xxxxxxxxxx", AInstEmit.Ucvtf_V, typeof(AOpCodeSimd)); SetA64("0x101110<<1xxxxx000001xxxxxxxxxx", AInstEmit.Uhadd_V, typeof(AOpCodeSimdReg)); + SetA64("0x101110<<1xxxxx001001xxxxxxxxxx", AInstEmit.Uhsub_V, typeof(AOpCodeSimdReg)); SetA64("0x101110<<1xxxxx011001xxxxxxxxxx", AInstEmit.Umax_V, typeof(AOpCodeSimdReg)); SetA64("0x101110<<1xxxxx101001xxxxxxxxxx", AInstEmit.Umaxp_V, typeof(AOpCodeSimdReg)); SetA64("0x101110<<1xxxxx011011xxxxxxxxxx", AInstEmit.Umin_V, typeof(AOpCodeSimdReg)); @@ -461,6 +465,7 @@ namespace ChocolArm64 SetA64("0>101110<<1xxxxx001011xxxxxxxxxx", AInstEmit.Uqsub_V, typeof(AOpCodeSimdReg)); SetA64("01111110<<100001010010xxxxxxxxxx", AInstEmit.Uqxtn_S, typeof(AOpCodeSimd)); SetA64("0x101110<<100001010010xxxxxxxxxx", AInstEmit.Uqxtn_V, typeof(AOpCodeSimd)); + SetA64("0x101110<<1xxxxx000101xxxxxxxxxx", AInstEmit.Urhadd_V, typeof(AOpCodeSimdReg)); SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", AInstEmit.Ushl_V, typeof(AOpCodeSimdReg)); SetA64("0x10111100>>>xxx101001xxxxxxxxxx", AInstEmit.Ushll_V, typeof(AOpCodeSimdShImm)); SetA64("0111111101xxxxxx000001xxxxxxxxxx", AInstEmit.Ushr_S, typeof(AOpCodeSimdShImm)); diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index 92da9ff9cd..1e4002a0e6 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -1042,6 +1042,28 @@ namespace ChocolArm64.Instruction EmitVectorWidenRmBinaryOpSx(Context, () => Context.Emit(OpCodes.Add)); } + public static void Shadd_V(AILEmitterCtx Context) + { + EmitVectorBinaryOpSx(Context, () => + { + Context.Emit(OpCodes.Add); + + Context.Emit(OpCodes.Ldc_I4_1); + Context.Emit(OpCodes.Shr); + }); + } + + public static void Shsub_V(AILEmitterCtx Context) + { + EmitVectorBinaryOpSx(Context, () => + { + Context.Emit(OpCodes.Sub); + + Context.Emit(OpCodes.Ldc_I4_1); + Context.Emit(OpCodes.Shr); + }); + } + public static void Smax_V(AILEmitterCtx Context) { Type[] Types = new Type[] { typeof(long), typeof(long) }; @@ -1181,6 +1203,20 @@ namespace ChocolArm64.Instruction EmitVectorSaturatingNarrowOpSxZx(Context, () => { }); } + public static void Srhadd_V(AILEmitterCtx Context) + { + EmitVectorBinaryOpSx(Context, () => + { + Context.Emit(OpCodes.Add); + + Context.Emit(OpCodes.Ldc_I4_1); + Context.Emit(OpCodes.Add); + + Context.Emit(OpCodes.Ldc_I4_1); + Context.Emit(OpCodes.Shr); + }); + } + public static void Ssubw_V(AILEmitterCtx Context) { EmitVectorWidenRmBinaryOpSx(Context, () => Context.Emit(OpCodes.Sub)); @@ -1303,28 +1339,20 @@ namespace ChocolArm64.Instruction { Context.Emit(OpCodes.Add); - Context.EmitLdc_I4(1); - + Context.Emit(OpCodes.Ldc_I4_1); Context.Emit(OpCodes.Shr_Un); }); } - public static void Umin_V(AILEmitterCtx Context) + public static void Uhsub_V(AILEmitterCtx Context) { - Type[] Types = new Type[] { typeof(ulong), typeof(ulong) }; + EmitVectorBinaryOpZx(Context, () => + { + Context.Emit(OpCodes.Sub); - MethodInfo MthdInfo = typeof(Math).GetMethod(nameof(Math.Min), Types); - - EmitVectorBinaryOpZx(Context, () => Context.EmitCall(MthdInfo)); - } - - public static void Uminp_V(AILEmitterCtx Context) - { - Type[] Types = new Type[] { typeof(ulong), typeof(ulong) }; - - MethodInfo MthdInfo = typeof(Math).GetMethod(nameof(Math.Min), Types); - - EmitVectorPairwiseOpZx(Context, () => Context.EmitCall(MthdInfo)); + Context.Emit(OpCodes.Ldc_I4_1); + Context.Emit(OpCodes.Shr_Un); + }); } public static void Umax_V(AILEmitterCtx Context) @@ -1345,6 +1373,24 @@ namespace ChocolArm64.Instruction EmitVectorPairwiseOpZx(Context, () => Context.EmitCall(MthdInfo)); } + public static void Umin_V(AILEmitterCtx Context) + { + Type[] Types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo MthdInfo = typeof(Math).GetMethod(nameof(Math.Min), Types); + + EmitVectorBinaryOpZx(Context, () => Context.EmitCall(MthdInfo)); + } + + public static void Uminp_V(AILEmitterCtx Context) + { + Type[] Types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo MthdInfo = typeof(Math).GetMethod(nameof(Math.Min), Types); + + EmitVectorPairwiseOpZx(Context, () => Context.EmitCall(MthdInfo)); + } + public static void Umull_V(AILEmitterCtx Context) { EmitVectorWidenRnRmBinaryOpZx(Context, () => Context.Emit(OpCodes.Mul)); @@ -1380,6 +1426,20 @@ namespace ChocolArm64.Instruction EmitVectorSaturatingNarrowOpZxZx(Context, () => { }); } + public static void Urhadd_V(AILEmitterCtx Context) + { + EmitVectorBinaryOpZx(Context, () => + { + Context.Emit(OpCodes.Add); + + Context.Emit(OpCodes.Ldc_I4_1); + Context.Emit(OpCodes.Add); + + Context.Emit(OpCodes.Ldc_I4_1); + Context.Emit(OpCodes.Shr_Un); + }); + } + public static void Usqadd_S(AILEmitterCtx Context) { EmitScalarSaturatingBinaryOpZx(Context, SaturatingFlags.Accumulate); diff --git a/Ryujinx.Graphics/Gal/GalPipelineState.cs b/Ryujinx.Graphics/Gal/GalPipelineState.cs index d1ffbe76de..7c66951415 100644 --- a/Ryujinx.Graphics/Gal/GalPipelineState.cs +++ b/Ryujinx.Graphics/Gal/GalPipelineState.cs @@ -7,6 +7,8 @@ public bool Enabled; public int Stride; public long VboKey; + public bool Instanced; + public int Divisor; public GalVertexAttrib[] Attribs; } @@ -22,6 +24,8 @@ public float FlipX; public float FlipY; + public int Instance; + public GalFrontFace FrontFace; public bool CullFaceEnabled; diff --git a/Ryujinx.Graphics/Gal/GalTextureFormat.cs b/Ryujinx.Graphics/Gal/GalTextureFormat.cs index d376ea78a1..009d2b826e 100644 --- a/Ryujinx.Graphics/Gal/GalTextureFormat.cs +++ b/Ryujinx.Graphics/Gal/GalTextureFormat.cs @@ -4,6 +4,7 @@ namespace Ryujinx.Graphics.Gal { R32G32B32A32 = 0x1, R16G16B16A16 = 0x3, + R32G32 = 0x4, A8B8G8R8 = 0x8, A2B10G10R10 = 0x9, R32 = 0xf, diff --git a/Ryujinx.Graphics/Gal/IGalFrameBuffer.cs b/Ryujinx.Graphics/Gal/IGalFrameBuffer.cs index bce1981a47..108d3d9b1d 100644 --- a/Ryujinx.Graphics/Gal/IGalFrameBuffer.cs +++ b/Ryujinx.Graphics/Gal/IGalFrameBuffer.cs @@ -18,6 +18,8 @@ namespace Ryujinx.Graphics.Gal void Set(byte[] Data, int Width, int Height); + void SetMap(int[] Map); + void SetTransform(bool FlipX, bool FlipY, int Top, int Left, int Right, int Bottom); void SetWindowSize(int Width, int Height); diff --git a/Ryujinx.Graphics/Gal/IGalShader.cs b/Ryujinx.Graphics/Gal/IGalShader.cs index e906e6cdcb..4b951fa611 100644 --- a/Ryujinx.Graphics/Gal/IGalShader.cs +++ b/Ryujinx.Graphics/Gal/IGalShader.cs @@ -11,8 +11,6 @@ namespace Ryujinx.Graphics.Gal IEnumerable GetConstBufferUsage(long Key); IEnumerable GetTextureUsage(long Key); - void EnsureTextureBinding(string UniformName, int Value); - void Bind(long Key); void Unbind(GalShaderType Type); diff --git a/Ryujinx.Graphics/Gal/ImageFormatConverter.cs b/Ryujinx.Graphics/Gal/ImageFormatConverter.cs index fd513aaeaa..8684ba9618 100644 --- a/Ryujinx.Graphics/Gal/ImageFormatConverter.cs +++ b/Ryujinx.Graphics/Gal/ImageFormatConverter.cs @@ -148,6 +148,9 @@ namespace Ryujinx.Graphics.Gal case GalFrameBufferFormat.RG8Unorm: return GalImageFormat.R8G8_UNORM; case GalFrameBufferFormat.BGRA8Unorm: return GalImageFormat.A8B8G8R8_UNORM_PACK32; case GalFrameBufferFormat.BGRA8Srgb: return GalImageFormat.A8B8G8R8_SRGB_PACK32; + case GalFrameBufferFormat.RG32Float: return GalImageFormat.R32G32_SFLOAT; + case GalFrameBufferFormat.RG32Sint: return GalImageFormat.R32G32_SINT; + case GalFrameBufferFormat.RG32Uint: return GalImageFormat.R32G32_UINT; } throw new NotImplementedException(Format.ToString()); @@ -176,6 +179,9 @@ namespace Ryujinx.Graphics.Gal case GalImageFormat.R16G16B16A16_SFLOAT: case GalImageFormat.R16G16B16A16_SINT: case GalImageFormat.R16G16B16A16_UINT: + case GalImageFormat.R32G32_SFLOAT: + case GalImageFormat.R32G32_SINT: + case GalImageFormat.R32G32_UINT: case GalImageFormat.A8B8G8R8_SNORM_PACK32: case GalImageFormat.A8B8G8R8_UNORM_PACK32: case GalImageFormat.A8B8G8R8_SINT_PACK32: diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs index 9cedfe8cd6..959d0e3292 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs @@ -135,6 +135,9 @@ namespace Ryujinx.Graphics.Gal.OpenGL case GalImageFormat.R16G16B16A16_SFLOAT: return (PixelInternalFormat.Rgba16f, PixelFormat.Rgba, PixelType.HalfFloat); case GalImageFormat.R16G16B16A16_SINT: return (PixelInternalFormat.Rgba16i, PixelFormat.RgbaInteger, PixelType.Short); case GalImageFormat.R16G16B16A16_UINT: return (PixelInternalFormat.Rgba16ui, PixelFormat.RgbaInteger, PixelType.UnsignedShort); + case GalImageFormat.R32G32_SFLOAT: return (PixelInternalFormat.Rg32f, PixelFormat.Rg, PixelType.Float); + case GalImageFormat.R32G32_SINT: return (PixelInternalFormat.Rg32i, PixelFormat.RgInteger, PixelType.Int); + case GalImageFormat.R32G32_UINT: return (PixelInternalFormat.Rg32ui, PixelFormat.RgInteger, PixelType.UnsignedInt); case GalImageFormat.A8B8G8R8_SNORM_PACK32: return (PixelInternalFormat.Rgba8Snorm, PixelFormat.Rgba, PixelType.Byte); case GalImageFormat.A8B8G8R8_UNORM_PACK32: return (PixelInternalFormat.Rgba8, PixelFormat.Rgba, PixelType.UnsignedByte); case GalImageFormat.A8B8G8R8_SINT_PACK32: return (PixelInternalFormat.Rgba8i, PixelFormat.RgbaInteger, PixelType.Byte); @@ -216,16 +219,31 @@ namespace Ryujinx.Graphics.Gal.OpenGL { switch (Wrap) { - case GalTextureWrap.Repeat: return TextureWrapMode.Repeat; - case GalTextureWrap.MirroredRepeat: return TextureWrapMode.MirroredRepeat; - case GalTextureWrap.ClampToEdge: return TextureWrapMode.ClampToEdge; - case GalTextureWrap.ClampToBorder: return TextureWrapMode.ClampToBorder; - case GalTextureWrap.Clamp: return TextureWrapMode.Clamp; + case GalTextureWrap.Repeat: return TextureWrapMode.Repeat; + case GalTextureWrap.MirroredRepeat: return TextureWrapMode.MirroredRepeat; + case GalTextureWrap.ClampToEdge: return TextureWrapMode.ClampToEdge; + case GalTextureWrap.ClampToBorder: return TextureWrapMode.ClampToBorder; + case GalTextureWrap.Clamp: return TextureWrapMode.Clamp; + } - //TODO: Those needs extensions (and are currently wrong). - case GalTextureWrap.MirrorClampToEdge: return TextureWrapMode.ClampToEdge; - case GalTextureWrap.MirrorClampToBorder: return TextureWrapMode.ClampToBorder; - case GalTextureWrap.MirrorClamp: return TextureWrapMode.Clamp; + if (OGLExtension.HasTextureMirrorClamp()) + { + switch (Wrap) + { + case GalTextureWrap.MirrorClampToEdge: return (TextureWrapMode)ExtTextureMirrorClamp.MirrorClampToEdgeExt; + case GalTextureWrap.MirrorClampToBorder: return (TextureWrapMode)ExtTextureMirrorClamp.MirrorClampToBorderExt; + case GalTextureWrap.MirrorClamp: return (TextureWrapMode)ExtTextureMirrorClamp.MirrorClampExt; + } + } + else + { + //Fallback to non-mirrored clamps + switch (Wrap) + { + case GalTextureWrap.MirrorClampToEdge: return TextureWrapMode.ClampToEdge; + case GalTextureWrap.MirrorClampToBorder: return TextureWrapMode.ClampToBorder; + case GalTextureWrap.MirrorClamp: return TextureWrapMode.Clamp; + } } throw new ArgumentException(nameof(Wrap)); diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLExtension.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLExtension.cs index 69fce6d31d..5ad422980c 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLExtension.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLExtension.cs @@ -8,6 +8,8 @@ namespace Ryujinx.Graphics.Gal.OpenGL private static bool EnhancedLayouts; + private static bool TextureMirrorClamp; + public static bool HasEnhancedLayouts() { EnsureInitialized(); @@ -15,6 +17,13 @@ namespace Ryujinx.Graphics.Gal.OpenGL return EnhancedLayouts; } + public static bool HasTextureMirrorClamp() + { + EnsureInitialized(); + + return TextureMirrorClamp; + } + private static void EnsureInitialized() { if (Initialized) @@ -23,6 +32,8 @@ namespace Ryujinx.Graphics.Gal.OpenGL } EnhancedLayouts = HasExtension("GL_ARB_enhanced_layouts"); + + TextureMirrorClamp = HasExtension("GL_EXT_texture_mirror_clamp"); } private static bool HasExtension(string Name) diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLFrameBuffer.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLFrameBuffer.cs index e0f12e4eca..12239c4f06 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLFrameBuffer.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLFrameBuffer.cs @@ -21,18 +21,6 @@ namespace Ryujinx.Graphics.Gal.OpenGL } } - private static readonly DrawBuffersEnum[] DrawBuffers = new DrawBuffersEnum[] - { - DrawBuffersEnum.ColorAttachment0, - DrawBuffersEnum.ColorAttachment1, - DrawBuffersEnum.ColorAttachment2, - DrawBuffersEnum.ColorAttachment3, - DrawBuffersEnum.ColorAttachment4, - DrawBuffersEnum.ColorAttachment5, - DrawBuffersEnum.ColorAttachment6, - DrawBuffersEnum.ColorAttachment7, - }; - private const int NativeWidth = 1280; private const int NativeHeight = 720; @@ -194,6 +182,25 @@ namespace Ryujinx.Graphics.Gal.OpenGL ReadTex = RawTex; } + public void SetMap(int[] Map) + { + if (Map != null && Map.Length > 0) + { + DrawBuffersEnum[] Mode = new DrawBuffersEnum[Map.Length]; + + for (int i = 0; i < Map.Length; i++) + { + Mode[i] = DrawBuffersEnum.ColorAttachment0 + Map[i]; + } + + GL.DrawBuffers(Mode.Length, Mode); + } + else + { + GL.DrawBuffer(DrawBufferMode.ColorAttachment0); + } + } + public void SetTransform(bool FlipX, bool FlipY, int Top, int Left, int Right, int Bottom) { this.FlipX = FlipX; @@ -421,8 +428,6 @@ namespace Ryujinx.Graphics.Gal.OpenGL } GL.BindFramebuffer(FramebufferTarget.DrawFramebuffer, DummyFrameBuffer); - - GL.DrawBuffers(8, DrawBuffers); } private void Attach(ref int OldHandle, int NewHandle, FramebufferAttachment FbAttachment) diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLPipeline.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLPipeline.cs index 5828921d44..051b105048 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLPipeline.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLPipeline.cs @@ -126,9 +126,9 @@ namespace Ryujinx.Graphics.Gal.OpenGL BindVertexLayout(New); - if (New.FlipX != Old.FlipX || New.FlipY != Old.FlipY) + if (New.FlipX != Old.FlipX || New.FlipY != Old.FlipY || New.Instance != Old.Instance) { - Shader.SetFlip(New.FlipX, New.FlipY); + Shader.SetExtraData(New.FlipX, New.FlipY, New.Instance); } //Note: Uncomment SetFrontFace and SetCullFace when flipping issues are solved @@ -290,8 +290,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL private void BindConstBuffers(GalPipelineState New) { - //Index 0 is reserved - int FreeBinding = 1; + int FreeBinding = OGLShader.ReservedCbufCount; void BindIfNotNull(OGLShaderStage Stage) { @@ -385,6 +384,15 @@ namespace Ryujinx.Graphics.Gal.OpenGL { GL.VertexAttribPointer(Attrib.Index, Size, Type, Normalize, Binding.Stride, Offset); } + + if (Binding.Instanced && Binding.Divisor != 0) + { + GL.VertexAttribDivisor(Attrib.Index, 1); + } + else + { + GL.VertexAttribDivisor(Attrib.Index, 0); + } } } } diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs index 9c7b8668fe..73d37b8791 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs @@ -9,6 +9,10 @@ namespace Ryujinx.Graphics.Gal.OpenGL { class OGLShader : IGalShader { + public const int ReservedCbufCount = 1; + + private const int ExtraDataSize = 4; + public OGLShaderProgram Current; private ConcurrentDictionary Stages; @@ -96,16 +100,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL return Enumerable.Empty(); } - public void EnsureTextureBinding(string UniformName, int Value) - { - BindProgram(); - - int Location = GL.GetUniformLocation(CurrentProgramHandle, UniformName); - - GL.Uniform1(Location, Value); - } - - public unsafe void SetFlip(float X, float Y) + public unsafe void SetExtraData(float FlipX, float FlipY, int Instance) { BindProgram(); @@ -113,14 +108,15 @@ namespace Ryujinx.Graphics.Gal.OpenGL GL.BindBuffer(BufferTarget.UniformBuffer, ExtraUboHandle); - float* Data = stackalloc float[4]; - Data[0] = X; - Data[1] = Y; + float* Data = stackalloc float[ExtraDataSize]; + Data[0] = FlipX; + Data[1] = FlipY; + Data[2] = BitConverter.Int32BitsToSingle(Instance); //Invalidate buffer - GL.BufferData(BufferTarget.UniformBuffer, 4 * sizeof(float), IntPtr.Zero, BufferUsageHint.StreamDraw); + GL.BufferData(BufferTarget.UniformBuffer, ExtraDataSize * sizeof(float), IntPtr.Zero, BufferUsageHint.StreamDraw); - GL.BufferSubData(BufferTarget.UniformBuffer, IntPtr.Zero, 4 * sizeof(float), (IntPtr)Data); + GL.BufferSubData(BufferTarget.UniformBuffer, IntPtr.Zero, ExtraDataSize * sizeof(float), (IntPtr)Data); } public void Bind(long Key) @@ -188,6 +184,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL CheckProgramLink(Handle); BindUniformBlocks(Handle); + BindTextureLocations(Handle); Programs.Add(Current, Handle); } @@ -205,7 +202,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL GL.BindBuffer(BufferTarget.UniformBuffer, ExtraUboHandle); - GL.BufferData(BufferTarget.UniformBuffer, 4 * sizeof(float), IntPtr.Zero, BufferUsageHint.StreamDraw); + GL.BufferData(BufferTarget.UniformBuffer, ExtraDataSize * sizeof(float), IntPtr.Zero, BufferUsageHint.StreamDraw); GL.BindBufferBase(BufferRangeTarget.UniformBuffer, 0, ExtraUboHandle); } @@ -227,8 +224,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL GL.UniformBlockBinding(ProgramHandle, ExtraBlockindex, 0); - //First index is reserved - int FreeBinding = 1; + int FreeBinding = ReservedCbufCount; void BindUniformBlocksIfNotNull(OGLShaderStage Stage) { @@ -258,6 +254,34 @@ namespace Ryujinx.Graphics.Gal.OpenGL BindUniformBlocksIfNotNull(Current.Fragment); } + private void BindTextureLocations(int ProgramHandle) + { + int Index = 0; + + void BindTexturesIfNotNull(OGLShaderStage Stage) + { + if (Stage != null) + { + foreach (ShaderDeclInfo Decl in Stage.TextureUsage) + { + int Location = GL.GetUniformLocation(ProgramHandle, Decl.Name); + + GL.Uniform1(Location, Index); + + Index++; + } + } + } + + GL.UseProgram(ProgramHandle); + + BindTexturesIfNotNull(Current.Vertex); + BindTexturesIfNotNull(Current.TessControl); + BindTexturesIfNotNull(Current.TessEvaluation); + BindTexturesIfNotNull(Current.Geometry); + BindTexturesIfNotNull(Current.Fragment); + } + private static void CheckProgramLink(int Handle) { int Status = 0; diff --git a/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs b/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs index 56745bc150..c837632ec2 100644 --- a/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs +++ b/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs @@ -16,7 +16,6 @@ namespace Ryujinx.Graphics.Gal.Shader public const int VertexIdAttr = 0x2fc; public const int FaceAttr = 0x3fc; - public const int MaxFrameBufferAttachments = 8; public const int MaxUboSize = 1024; public const int GlPositionVec4Index = 7; @@ -42,10 +41,15 @@ namespace Ryujinx.Graphics.Gal.Shader public const string ExtraUniformBlockName = "Extra"; public const string FlipUniformName = "flip"; + public const string InstanceUniformName = "instance"; - public const string ProgramName = "program"; - public const string ProgramAName = ProgramName + "_a"; - public const string ProgramBName = ProgramName + "_b"; + public const string BasicBlockName = "bb"; + public const string BasicBlockAName = BasicBlockName + "_a"; + public const string BasicBlockBName = BasicBlockName + "_b"; + + public const int SsyStackSize = 16; + public const string SsyStackName = "ssy_stack"; + public const string SsyCursorName = "ssy_cursor"; private string[] StagePrefixes = new string[] { "vp", "tcp", "tep", "gp", "fp" }; @@ -94,16 +98,33 @@ namespace Ryujinx.Graphics.Gal.Shader m_Preds = new Dictionary(); } - public GlslDecl(ShaderIrBlock[] Blocks, GalShaderType ShaderType) : this(ShaderType) + public GlslDecl(ShaderIrBlock[] Blocks, GalShaderType ShaderType, ShaderHeader Header) + : this(ShaderType) { StagePrefix = StagePrefixes[(int)ShaderType] + "_"; if (ShaderType == GalShaderType.Fragment) { - //Note: Replace 1 with MaxFrameBufferAttachments when attachments start to work - for (int Index = 0; Index < 1; Index++) + int Index = 0; + + for (int Attachment = 0; Attachment < 8; Attachment++) { - m_Gprs.Add(Index * 4, new ShaderDeclInfo(FragmentOutputName + Index, Index * 4, false, 0, 4)); + for (int Component = 0; Component < 4; Component++) + { + if (Header.OmapTargets[Attachment].ComponentEnabled(Component)) + { + m_Gprs.TryAdd(Index, new ShaderDeclInfo(GetGprName(Index), Index)); + + Index++; + } + } + } + + if (Header.OmapDepth) + { + Index = Header.DepthRegister; + + m_Gprs.TryAdd(Index, new ShaderDeclInfo(GetGprName(Index), Index)); } } @@ -153,6 +174,11 @@ namespace Ryujinx.Graphics.Gal.Shader return Combined; } + public static string GetGprName(int Index) + { + return GprName + Index; + } + private static void Merge( Dictionary C, Dictionary A, @@ -316,9 +342,9 @@ namespace Ryujinx.Graphics.Gal.Shader case ShaderIrOperGpr Gpr: { - if (!Gpr.IsConst && !HasName(m_Gprs, Gpr.Index)) + if (!Gpr.IsConst) { - string Name = GprName + Gpr.Index; + string Name = GetGprName(Gpr.Index); m_Gprs.TryAdd(Gpr.Index, new ShaderDeclInfo(Name, Gpr.Index)); } diff --git a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs index 726379846d..104fd72354 100644 --- a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs +++ b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs @@ -120,8 +120,8 @@ namespace Ryujinx.Graphics.Gal.Shader Blocks = ShaderDecoder.Decode(Memory, VpAPosition); BlocksB = ShaderDecoder.Decode(Memory, VpBPosition); - GlslDecl DeclVpA = new GlslDecl(Blocks, ShaderType); - GlslDecl DeclVpB = new GlslDecl(BlocksB, ShaderType); + GlslDecl DeclVpA = new GlslDecl(Blocks, ShaderType, Header); + GlslDecl DeclVpB = new GlslDecl(BlocksB, ShaderType, HeaderB); Decl = GlslDecl.Merge(DeclVpA, DeclVpB); @@ -136,7 +136,7 @@ namespace Ryujinx.Graphics.Gal.Shader Blocks = ShaderDecoder.Decode(Memory, Position); BlocksB = null; - Decl = new GlslDecl(Blocks, ShaderType); + Decl = new GlslDecl(Blocks, ShaderType, Header); return Decompile(); } @@ -155,18 +155,19 @@ namespace Ryujinx.Graphics.Gal.Shader PrintDeclOutAttributes(); PrintDeclGprs(); PrintDeclPreds(); + PrintDeclSsy(); if (BlocksB != null) { - PrintBlockScope(Blocks[0], null, null, "void " + GlslDecl.ProgramAName + "()", IdentationStr); + PrintBlockScope(Blocks, GlslDecl.BasicBlockAName); SB.AppendLine(); - PrintBlockScope(BlocksB[0], null, null, "void " + GlslDecl.ProgramBName + "()", IdentationStr); + PrintBlockScope(BlocksB, GlslDecl.BasicBlockBName); } else { - PrintBlockScope(Blocks[0], null, null, "void " + GlslDecl.ProgramName + "()", IdentationStr); + PrintBlockScope(Blocks, GlslDecl.BasicBlockName); } SB.AppendLine(); @@ -241,10 +242,15 @@ namespace Ryujinx.Graphics.Gal.Shader { if (Decl.ShaderType == GalShaderType.Vertex) { - SB.AppendLine("layout (std140) uniform " + GlslDecl.ExtraUniformBlockName + "{"); + //Memory layout here is [flip_x, flip_y, instance, unused] + //It's using 4 bytes, not 8 + + SB.AppendLine("layout (std140) uniform " + GlslDecl.ExtraUniformBlockName + " {"); SB.AppendLine(IdentationStr + "vec2 " + GlslDecl.FlipUniformName + ";"); + SB.AppendLine(IdentationStr + "int " + GlslDecl.InstanceUniformName + ";"); + SB.AppendLine("};"); } @@ -304,7 +310,17 @@ namespace Ryujinx.Graphics.Gal.Shader private void PrintDeclOutAttributes() { - if (Decl.ShaderType != GalShaderType.Fragment) + if (Decl.ShaderType == GalShaderType.Fragment) + { + for (int Attachment = 0; Attachment < 8; Attachment++) + { + if (Header.OmapTargets[Attachment].Enabled) + { + SB.AppendLine("layout (location = " + Attachment + ") out vec4 " + GlslDecl.FragmentOutputName + Attachment + ";"); + } + } + } + else { SB.AppendLine("layout (location = " + GlslDecl.PositionOutAttrLocation + ") out vec4 " + GlslDecl.PositionOutAttrName + ";"); } @@ -342,6 +358,13 @@ namespace Ryujinx.Graphics.Gal.Shader PrintDecls(Decl.Preds, "bool"); } + private void PrintDeclSsy() + { + SB.AppendLine("uint " + GlslDecl.SsyCursorName + ";"); + + SB.AppendLine("uint " + GlslDecl.SsyStackName + "[" + GlslDecl.SsyStackSize + "];" + Environment.NewLine); + } + private void PrintDecls(IReadOnlyDictionary Dict, string CustomType = null, string Suffix = "") { foreach (ShaderDeclInfo DeclInfo in Dict.Values.OrderBy(DeclKeySelector)) @@ -417,14 +440,16 @@ namespace Ryujinx.Graphics.Gal.Shader } } + SB.AppendLine(IdentationStr + "uint pc;"); + if (BlocksB != null) { - SB.AppendLine(IdentationStr + GlslDecl.ProgramAName + "();"); - SB.AppendLine(IdentationStr + GlslDecl.ProgramBName + "();"); + PrintProgram(Blocks, GlslDecl.BasicBlockAName); + PrintProgram(BlocksB, GlslDecl.BasicBlockBName); } else { - SB.AppendLine(IdentationStr + GlslDecl.ProgramName + "();"); + PrintProgram(Blocks, GlslDecl.BasicBlockName); } if (Decl.ShaderType != GalShaderType.Geometry) @@ -432,9 +457,62 @@ namespace Ryujinx.Graphics.Gal.Shader PrintAttrToOutput(); } + if (Decl.ShaderType == GalShaderType.Fragment) + { + if (Header.OmapDepth) + { + SB.AppendLine(IdentationStr + "gl_FragDepth = " + GlslDecl.GetGprName(Header.DepthRegister) + ";"); + } + + int GprIndex = 0; + + for (int Attachment = 0; Attachment < 8; Attachment++) + { + string Output = GlslDecl.FragmentOutputName + Attachment; + + OmapTarget Target = Header.OmapTargets[Attachment]; + + for (int Component = 0; Component < 4; Component++) + { + if (Target.ComponentEnabled(Component)) + { + SB.AppendLine(IdentationStr + Output + "[" + Component + "] = " + GlslDecl.GetGprName(GprIndex) + ";"); + + GprIndex++; + } + } + } + } + SB.AppendLine("}"); } + private void PrintProgram(ShaderIrBlock[] Blocks, string Name) + { + const string Ident1 = IdentationStr; + const string Ident2 = Ident1 + IdentationStr; + const string Ident3 = Ident2 + IdentationStr; + const string Ident4 = Ident3 + IdentationStr; + + SB.AppendLine(Ident1 + "pc = " + GetBlockPosition(Blocks[0]) + ";"); + SB.AppendLine(Ident1 + "do {"); + SB.AppendLine(Ident2 + "switch (pc) {"); + + foreach (ShaderIrBlock Block in Blocks) + { + string FunctionName = Block.Position.ToString("x8"); + + SB.AppendLine(Ident3 + "case 0x" + FunctionName + ": pc = " + Name + "_" + FunctionName + "(); break;"); + } + + SB.AppendLine(Ident3 + "default:"); + SB.AppendLine(Ident4 + "pc = 0;"); + SB.AppendLine(Ident4 + "break;"); + + SB.AppendLine(Ident2 + "}"); + SB.AppendLine(Ident1 + "} while (pc != 0);"); + } + private void PrintAttrToOutput(string Identation = IdentationStr) { foreach (KeyValuePair KV in Decl.OutAttributes) @@ -468,193 +546,145 @@ namespace Ryujinx.Graphics.Gal.Shader } } - private void PrintBlockScope( - ShaderIrBlock Block, - ShaderIrBlock EndBlock, - ShaderIrBlock LoopBlock, - string ScopeName, - string Identation, - bool IsDoWhile = false) + private void PrintBlockScope(ShaderIrBlock[] Blocks, string Name) { - string UpIdent = Identation.Substring(0, Identation.Length - IdentationStr.Length); + foreach (ShaderIrBlock Block in Blocks) + { + SB.AppendLine("uint " + Name + "_" + Block.Position.ToString("x8") + "() {"); - if (IsDoWhile) - { - SB.AppendLine(UpIdent + "do {"); - } - else - { - SB.AppendLine(UpIdent + ScopeName + " {"); - } + PrintNodes(Block, Block.GetNodes()); - while (Block != null && Block != EndBlock) - { - ShaderIrNode[] Nodes = Block.GetNodes(); - - Block = PrintNodes(Block, EndBlock, LoopBlock, Identation, Nodes); - } - - if (IsDoWhile) - { - SB.AppendLine(UpIdent + "} " + ScopeName + ";"); - } - else - { - SB.AppendLine(UpIdent + "}"); + SB.AppendLine("}" + Environment.NewLine); } } - private ShaderIrBlock PrintNodes( - ShaderIrBlock Block, - ShaderIrBlock EndBlock, - ShaderIrBlock LoopBlock, - string Identation, - params ShaderIrNode[] Nodes) + private void PrintNode(ShaderIrBlock Block, ShaderIrNode Node, string Identation) { - /* - * Notes about control flow and if-else/loop generation: - * The code assumes that the program has sane control flow, - * that is, there's no jumps to a location after another jump or - * jump target (except for the end of an if-else block), and backwards - * jumps to a location before the last loop dominator. - * Such cases needs to be transformed on a step before the GLSL code - * generation to ensure that we have sane graphs to work with. - * TODO: Such transformation is not yet implemented. - */ - string NewIdent = Identation + IdentationStr; - - ShaderIrBlock LoopTail = GetLoopTailBlock(Block); - - if (LoopTail != null && LoopBlock != Block) + if (Node is ShaderIrCond Cond) { - //Shoock! kuma shock! We have a loop here! - //The entire sequence needs to be inside a do-while block. - ShaderIrBlock LoopEnd = GetDownBlock(LoopTail); + string IfExpr = GetSrcExpr(Cond.Pred, true); - PrintBlockScope(Block, LoopEnd, Block, "while (false)", NewIdent, IsDoWhile: true); + if (Cond.Not) + { + IfExpr = "!(" + IfExpr + ")"; + } - return LoopEnd; + SB.AppendLine(Identation + "if (" + IfExpr + ") {"); + + if (Cond.Child is ShaderIrOp Op && Op.Inst == ShaderIrInst.Bra) + { + SB.AppendLine(Identation + IdentationStr + "return " + GetBlockPosition(Block.Branch) + ";"); + } + else + { + PrintNode(Block, Cond.Child, Identation + IdentationStr); + } + + SB.AppendLine(Identation + "}"); } - - foreach (ShaderIrNode Node in Nodes) + else if (Node is ShaderIrAsg Asg) { - if (Node is ShaderIrCond Cond) + if (IsValidOutOper(Asg.Dst)) { - string IfExpr = GetSrcExpr(Cond.Pred, true); + string Expr = GetSrcExpr(Asg.Src, true); - if (Cond.Not) - { - IfExpr = "!(" + IfExpr + ")"; - } + Expr = GetExprWithCast(Asg.Dst, Asg.Src, Expr); - if (Cond.Child is ShaderIrOp Op && Op.Inst == ShaderIrInst.Bra) - { - //Branch is a loop branch and would result in infinite recursion. - if (Block.Branch.Position <= Block.Position) - { - SB.AppendLine(Identation + "if (" + IfExpr + ") {"); - - SB.AppendLine(Identation + IdentationStr + "continue;"); - - SB.AppendLine(Identation + "}"); - - continue; - } - - string SubScopeName = "if (!" + IfExpr + ")"; - - PrintBlockScope(Block.Next, Block.Branch, LoopBlock, SubScopeName, NewIdent); - - ShaderIrBlock IfElseEnd = GetUpBlock(Block.Branch).Branch; - - if (IfElseEnd?.Position > Block.Branch.Position) - { - PrintBlockScope(Block.Branch, IfElseEnd, LoopBlock, "else", NewIdent); - - return IfElseEnd; - } - - return Block.Branch; - } - else - { - SB.AppendLine(Identation + "if (" + IfExpr + ") {"); - - PrintNodes(Block, EndBlock, LoopBlock, NewIdent, Cond.Child); - - SB.AppendLine(Identation + "}"); - } + SB.AppendLine(Identation + GetDstOperName(Asg.Dst) + " = " + Expr + ";"); } - else if (Node is ShaderIrAsg Asg) + } + else if (Node is ShaderIrOp Op) + { + switch (Op.Inst) { - if (IsValidOutOper(Asg.Dst)) + case ShaderIrInst.Bra: { - string Expr = GetSrcExpr(Asg.Src, true); + SB.AppendLine(Identation + "return " + GetBlockPosition(Block.Branch) + ";"); - Expr = GetExprWithCast(Asg.Dst, Asg.Src, Expr); + break; + } - SB.AppendLine(Identation + GetDstOperName(Asg.Dst) + " = " + Expr + ";"); - } - } - else if (Node is ShaderIrOp Op) - { - if (Op.Inst == ShaderIrInst.Bra) - { - if (Block.Branch.Position <= Block.Position) - { - SB.AppendLine(Identation + "continue;"); - } - } - else if (Op.Inst == ShaderIrInst.Emit) + case ShaderIrInst.Emit: { PrintAttrToOutput(Identation); SB.AppendLine(Identation + "EmitVertex();"); + + break; } - else + + case ShaderIrInst.Ssy: { + string StackIndex = GlslDecl.SsyStackName + "[" + GlslDecl.SsyCursorName + "]"; + + int TargetPosition = (Op.OperandA as ShaderIrOperImm).Value; + + string Target = "0x" + TargetPosition.ToString("x8") + "u"; + + SB.AppendLine(Identation + StackIndex + " = " + Target + ";"); + + SB.AppendLine(Identation + GlslDecl.SsyCursorName + "++;"); + + break; + } + + case ShaderIrInst.Sync: + { + SB.AppendLine(Identation + GlslDecl.SsyCursorName + "--;"); + + string Target = GlslDecl.SsyStackName + "[" + GlslDecl.SsyCursorName + "]"; + + SB.AppendLine(Identation + "return " + Target + ";"); + + break; + } + + default: SB.AppendLine(Identation + GetSrcExpr(Op, true) + ";"); - } - } - else if (Node is ShaderIrCmnt Cmnt) - { - SB.AppendLine(Identation + "// " + Cmnt.Comment); - } - else - { - throw new InvalidOperationException(); + break; } } - - return Block.Next; - } - - private ShaderIrBlock GetUpBlock(ShaderIrBlock Block) - { - return Blocks.FirstOrDefault(x => x.EndPosition == Block.Position); - } - - private ShaderIrBlock GetDownBlock(ShaderIrBlock Block) - { - return Blocks.FirstOrDefault(x => x.Position == Block.EndPosition); - } - - private ShaderIrBlock GetLoopTailBlock(ShaderIrBlock LoopHead) - { - ShaderIrBlock Tail = null; - - foreach (ShaderIrBlock Block in LoopHead.Sources) + else if (Node is ShaderIrCmnt Cmnt) { - if (Block.Position >= LoopHead.Position) - { - if (Tail == null || Tail.Position < Block.Position) - { - Tail = Block; - } - } + SB.AppendLine(Identation + "// " + Cmnt.Comment); + } + else + { + throw new InvalidOperationException(); + } + } + + private void PrintNodes(ShaderIrBlock Block, ShaderIrNode[] Nodes) + { + foreach (ShaderIrNode Node in Nodes) + { + PrintNode(Block, Node, IdentationStr); } - return Tail; + if (Nodes.Length > 0) + { + ShaderIrNode Last = Nodes[Nodes.Length - 1]; + + bool UnconditionalFlowChange = false; + + if (Last is ShaderIrOp Op) + { + switch (Op.Inst) + { + case ShaderIrInst.Bra: + case ShaderIrInst.Exit: + case ShaderIrInst.Kil: + case ShaderIrInst.Sync: + UnconditionalFlowChange = true; + break; + } + } + + if (!UnconditionalFlowChange) + { + SB.AppendLine(IdentationStr + "return " + GetBlockPosition(Block.Next) + ";"); + } + } } private bool IsValidOutOper(ShaderIrNode Node) @@ -779,7 +809,7 @@ namespace Ryujinx.Graphics.Gal.Shader switch (Abuf.Offs) { case GlslDecl.VertexIdAttr: return "gl_VertexID"; - case GlslDecl.InstanceIdAttr: return "gl_InstanceID"; + case GlslDecl.InstanceIdAttr: return GlslDecl.InstanceUniformName; } } else if (Decl.ShaderType == GalShaderType.TessEvaluation) @@ -964,7 +994,7 @@ namespace Ryujinx.Graphics.Gal.Shader private string GetCnumExpr(ShaderIrOp Op) => GetUnaryCall(Op, "!isnan"); - private string GetExitExpr(ShaderIrOp Op) => "return"; + private string GetExitExpr(ShaderIrOp Op) => "return 0u"; private string GetFcosExpr(ShaderIrOp Op) => GetUnaryCall(Op, "cos"); @@ -1309,5 +1339,17 @@ namespace Ryujinx.Graphics.Gal.Shader throw new ArgumentException(nameof(Node)); } + + private static string GetBlockPosition(ShaderIrBlock Block) + { + if (Block != null) + { + return "0x" + Block.Position.ToString("x8") + "u"; + } + else + { + return "0u"; + } + } } } diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecode.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecode.cs index ef0fd78bd3..73625f65fe 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderDecode.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecode.cs @@ -1,4 +1,4 @@ namespace Ryujinx.Graphics.Gal.Shader { - delegate void ShaderDecodeFunc(ShaderIrBlock Block, long OpCode); + delegate void ShaderDecodeFunc(ShaderIrBlock Block, long OpCode, long Position); } \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeAlu.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeAlu.cs index b60da7c1c3..5eb761dab6 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeAlu.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeAlu.cs @@ -6,32 +6,32 @@ namespace Ryujinx.Graphics.Gal.Shader { static partial class ShaderDecode { - public static void Bfe_C(ShaderIrBlock Block, long OpCode) + public static void Bfe_C(ShaderIrBlock Block, long OpCode, long Position) { EmitBfe(Block, OpCode, ShaderOper.CR); } - public static void Bfe_I(ShaderIrBlock Block, long OpCode) + public static void Bfe_I(ShaderIrBlock Block, long OpCode, long Position) { EmitBfe(Block, OpCode, ShaderOper.Imm); } - public static void Bfe_R(ShaderIrBlock Block, long OpCode) + public static void Bfe_R(ShaderIrBlock Block, long OpCode, long Position) { EmitBfe(Block, OpCode, ShaderOper.RR); } - public static void Fadd_C(ShaderIrBlock Block, long OpCode) + public static void Fadd_C(ShaderIrBlock Block, long OpCode, long Position) { EmitFadd(Block, OpCode, ShaderOper.CR); } - public static void Fadd_I(ShaderIrBlock Block, long OpCode) + public static void Fadd_I(ShaderIrBlock Block, long OpCode, long Position) { EmitFadd(Block, OpCode, ShaderOper.Immf); } - public static void Fadd_I32(ShaderIrBlock Block, long OpCode) + public static void Fadd_I32(ShaderIrBlock Block, long OpCode, long Position) { ShaderIrNode OperA = GetOperGpr8 (OpCode); ShaderIrNode OperB = GetOperImmf32_20(OpCode); @@ -49,47 +49,47 @@ namespace Ryujinx.Graphics.Gal.Shader Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), Op), OpCode)); } - public static void Fadd_R(ShaderIrBlock Block, long OpCode) + public static void Fadd_R(ShaderIrBlock Block, long OpCode, long Position) { EmitFadd(Block, OpCode, ShaderOper.RR); } - public static void Ffma_CR(ShaderIrBlock Block, long OpCode) + public static void Ffma_CR(ShaderIrBlock Block, long OpCode, long Position) { EmitFfma(Block, OpCode, ShaderOper.CR); } - public static void Ffma_I(ShaderIrBlock Block, long OpCode) + public static void Ffma_I(ShaderIrBlock Block, long OpCode, long Position) { EmitFfma(Block, OpCode, ShaderOper.Immf); } - public static void Ffma_RC(ShaderIrBlock Block, long OpCode) + public static void Ffma_RC(ShaderIrBlock Block, long OpCode, long Position) { EmitFfma(Block, OpCode, ShaderOper.RC); } - public static void Ffma_RR(ShaderIrBlock Block, long OpCode) + public static void Ffma_RR(ShaderIrBlock Block, long OpCode, long Position) { EmitFfma(Block, OpCode, ShaderOper.RR); } - public static void Fmnmx_C(ShaderIrBlock Block, long OpCode) + public static void Fmnmx_C(ShaderIrBlock Block, long OpCode, long Position) { EmitFmnmx(Block, OpCode, ShaderOper.CR); } - public static void Fmnmx_I(ShaderIrBlock Block, long OpCode) + public static void Fmnmx_I(ShaderIrBlock Block, long OpCode, long Position) { EmitFmnmx(Block, OpCode, ShaderOper.Immf); } - public static void Fmnmx_R(ShaderIrBlock Block, long OpCode) + public static void Fmnmx_R(ShaderIrBlock Block, long OpCode, long Position) { EmitFmnmx(Block, OpCode, ShaderOper.RR); } - public static void Fmul_I32(ShaderIrBlock Block, long OpCode) + public static void Fmul_I32(ShaderIrBlock Block, long OpCode, long Position) { ShaderIrNode OperA = GetOperGpr8 (OpCode); ShaderIrNode OperB = GetOperImmf32_20(OpCode); @@ -99,62 +99,62 @@ namespace Ryujinx.Graphics.Gal.Shader Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), Op), OpCode)); } - public static void Fmul_C(ShaderIrBlock Block, long OpCode) + public static void Fmul_C(ShaderIrBlock Block, long OpCode, long Position) { EmitFmul(Block, OpCode, ShaderOper.CR); } - public static void Fmul_I(ShaderIrBlock Block, long OpCode) + public static void Fmul_I(ShaderIrBlock Block, long OpCode, long Position) { EmitFmul(Block, OpCode, ShaderOper.Immf); } - public static void Fmul_R(ShaderIrBlock Block, long OpCode) + public static void Fmul_R(ShaderIrBlock Block, long OpCode, long Position) { EmitFmul(Block, OpCode, ShaderOper.RR); } - public static void Fset_C(ShaderIrBlock Block, long OpCode) + public static void Fset_C(ShaderIrBlock Block, long OpCode, long Position) { EmitFset(Block, OpCode, ShaderOper.CR); } - public static void Fset_I(ShaderIrBlock Block, long OpCode) + public static void Fset_I(ShaderIrBlock Block, long OpCode, long Position) { EmitFset(Block, OpCode, ShaderOper.Immf); } - public static void Fset_R(ShaderIrBlock Block, long OpCode) + public static void Fset_R(ShaderIrBlock Block, long OpCode, long Position) { EmitFset(Block, OpCode, ShaderOper.RR); } - public static void Fsetp_C(ShaderIrBlock Block, long OpCode) + public static void Fsetp_C(ShaderIrBlock Block, long OpCode, long Position) { EmitFsetp(Block, OpCode, ShaderOper.CR); } - public static void Fsetp_I(ShaderIrBlock Block, long OpCode) + public static void Fsetp_I(ShaderIrBlock Block, long OpCode, long Position) { EmitFsetp(Block, OpCode, ShaderOper.Immf); } - public static void Fsetp_R(ShaderIrBlock Block, long OpCode) + public static void Fsetp_R(ShaderIrBlock Block, long OpCode, long Position) { EmitFsetp(Block, OpCode, ShaderOper.RR); } - public static void Iadd_C(ShaderIrBlock Block, long OpCode) + public static void Iadd_C(ShaderIrBlock Block, long OpCode, long Position) { EmitIadd(Block, OpCode, ShaderOper.CR); } - public static void Iadd_I(ShaderIrBlock Block, long OpCode) + public static void Iadd_I(ShaderIrBlock Block, long OpCode, long Position) { EmitIadd(Block, OpCode, ShaderOper.Imm); } - public static void Iadd_I32(ShaderIrBlock Block, long OpCode) + public static void Iadd_I32(ShaderIrBlock Block, long OpCode, long Position) { ShaderIrNode OperA = GetOperGpr8 (OpCode); ShaderIrNode OperB = GetOperImm32_20(OpCode); @@ -168,42 +168,42 @@ namespace Ryujinx.Graphics.Gal.Shader Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), Op), OpCode)); } - public static void Iadd_R(ShaderIrBlock Block, long OpCode) + public static void Iadd_R(ShaderIrBlock Block, long OpCode, long Position) { EmitIadd(Block, OpCode, ShaderOper.RR); } - public static void Iadd3_C(ShaderIrBlock Block, long OpCode) + public static void Iadd3_C(ShaderIrBlock Block, long OpCode, long Position) { EmitIadd3(Block, OpCode, ShaderOper.CR); } - public static void Iadd3_I(ShaderIrBlock Block, long OpCode) + public static void Iadd3_I(ShaderIrBlock Block, long OpCode, long Position) { EmitIadd3(Block, OpCode, ShaderOper.Imm); } - public static void Iadd3_R(ShaderIrBlock Block, long OpCode) + public static void Iadd3_R(ShaderIrBlock Block, long OpCode, long Position) { EmitIadd3(Block, OpCode, ShaderOper.RR); } - public static void Imnmx_C(ShaderIrBlock Block, long OpCode) + public static void Imnmx_C(ShaderIrBlock Block, long OpCode, long Position) { EmitImnmx(Block, OpCode, ShaderOper.CR); } - public static void Imnmx_I(ShaderIrBlock Block, long OpCode) + public static void Imnmx_I(ShaderIrBlock Block, long OpCode, long Position) { EmitImnmx(Block, OpCode, ShaderOper.Imm); } - public static void Imnmx_R(ShaderIrBlock Block, long OpCode) + public static void Imnmx_R(ShaderIrBlock Block, long OpCode, long Position) { EmitImnmx(Block, OpCode, ShaderOper.RR); } - public static void Ipa(ShaderIrBlock Block, long OpCode) + public static void Ipa(ShaderIrBlock Block, long OpCode, long Position) { ShaderIrNode OperA = GetOperAbuf28(OpCode); ShaderIrNode OperB = GetOperGpr20 (OpCode); @@ -213,52 +213,52 @@ namespace Ryujinx.Graphics.Gal.Shader Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), Op), OpCode)); } - public static void Iscadd_C(ShaderIrBlock Block, long OpCode) + public static void Iscadd_C(ShaderIrBlock Block, long OpCode, long Position) { EmitIscadd(Block, OpCode, ShaderOper.CR); } - public static void Iscadd_I(ShaderIrBlock Block, long OpCode) + public static void Iscadd_I(ShaderIrBlock Block, long OpCode, long Position) { EmitIscadd(Block, OpCode, ShaderOper.Imm); } - public static void Iscadd_R(ShaderIrBlock Block, long OpCode) + public static void Iscadd_R(ShaderIrBlock Block, long OpCode, long Position) { EmitIscadd(Block, OpCode, ShaderOper.RR); } - public static void Iset_C(ShaderIrBlock Block, long OpCode) + public static void Iset_C(ShaderIrBlock Block, long OpCode, long Position) { EmitIset(Block, OpCode, ShaderOper.CR); } - public static void Iset_I(ShaderIrBlock Block, long OpCode) + public static void Iset_I(ShaderIrBlock Block, long OpCode, long Position) { EmitIset(Block, OpCode, ShaderOper.Imm); } - public static void Iset_R(ShaderIrBlock Block, long OpCode) + public static void Iset_R(ShaderIrBlock Block, long OpCode, long Position) { EmitIset(Block, OpCode, ShaderOper.RR); } - public static void Isetp_C(ShaderIrBlock Block, long OpCode) + public static void Isetp_C(ShaderIrBlock Block, long OpCode, long Position) { EmitIsetp(Block, OpCode, ShaderOper.CR); } - public static void Isetp_I(ShaderIrBlock Block, long OpCode) + public static void Isetp_I(ShaderIrBlock Block, long OpCode, long Position) { EmitIsetp(Block, OpCode, ShaderOper.Imm); } - public static void Isetp_R(ShaderIrBlock Block, long OpCode) + public static void Isetp_R(ShaderIrBlock Block, long OpCode, long Position) { EmitIsetp(Block, OpCode, ShaderOper.RR); } - public static void Lop_I32(ShaderIrBlock Block, long OpCode) + public static void Lop_I32(ShaderIrBlock Block, long OpCode, long Position) { int SubOp = (int)(OpCode >> 53) & 3; @@ -292,22 +292,22 @@ namespace Ryujinx.Graphics.Gal.Shader } } - public static void Lop_C(ShaderIrBlock Block, long OpCode) + public static void Lop_C(ShaderIrBlock Block, long OpCode, long Position) { EmitLop(Block, OpCode, ShaderOper.CR); } - public static void Lop_I(ShaderIrBlock Block, long OpCode) + public static void Lop_I(ShaderIrBlock Block, long OpCode, long Position) { EmitLop(Block, OpCode, ShaderOper.Imm); } - public static void Lop_R(ShaderIrBlock Block, long OpCode) + public static void Lop_R(ShaderIrBlock Block, long OpCode, long Position) { EmitLop(Block, OpCode, ShaderOper.RR); } - public static void Mufu(ShaderIrBlock Block, long OpCode) + public static void Mufu(ShaderIrBlock Block, long OpCode, long Position) { int SubOp = (int)(OpCode >> 20) & 0xf; @@ -336,7 +336,7 @@ namespace Ryujinx.Graphics.Gal.Shader Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), Op), OpCode)); } - public static void Psetp(ShaderIrBlock Block, long OpCode) + public static void Psetp(ShaderIrBlock Block, long OpCode, long Position) { bool NegA = ((OpCode >> 15) & 1) != 0; bool NegB = ((OpCode >> 32) & 1) != 0; @@ -390,47 +390,47 @@ namespace Ryujinx.Graphics.Gal.Shader Block.AddNode(GetPredNode(new ShaderIrAsg(P0Node, Op), OpCode)); } - public static void Rro_C(ShaderIrBlock Block, long OpCode) + public static void Rro_C(ShaderIrBlock Block, long OpCode, long Position) { EmitRro(Block, OpCode, ShaderOper.CR); } - public static void Rro_I(ShaderIrBlock Block, long OpCode) + public static void Rro_I(ShaderIrBlock Block, long OpCode, long Position) { EmitRro(Block, OpCode, ShaderOper.Immf); } - public static void Rro_R(ShaderIrBlock Block, long OpCode) + public static void Rro_R(ShaderIrBlock Block, long OpCode, long Position) { EmitRro(Block, OpCode, ShaderOper.RR); } - public static void Shl_C(ShaderIrBlock Block, long OpCode) + public static void Shl_C(ShaderIrBlock Block, long OpCode, long Position) { EmitAluBinary(Block, OpCode, ShaderOper.CR, ShaderIrInst.Lsl); } - public static void Shl_I(ShaderIrBlock Block, long OpCode) + public static void Shl_I(ShaderIrBlock Block, long OpCode, long Position) { EmitAluBinary(Block, OpCode, ShaderOper.Imm, ShaderIrInst.Lsl); } - public static void Shl_R(ShaderIrBlock Block, long OpCode) + public static void Shl_R(ShaderIrBlock Block, long OpCode, long Position) { EmitAluBinary(Block, OpCode, ShaderOper.RR, ShaderIrInst.Lsl); } - public static void Shr_C(ShaderIrBlock Block, long OpCode) + public static void Shr_C(ShaderIrBlock Block, long OpCode, long Position) { EmitAluBinary(Block, OpCode, ShaderOper.CR, GetShrInst(OpCode)); } - public static void Shr_I(ShaderIrBlock Block, long OpCode) + public static void Shr_I(ShaderIrBlock Block, long OpCode, long Position) { EmitAluBinary(Block, OpCode, ShaderOper.Imm, GetShrInst(OpCode)); } - public static void Shr_R(ShaderIrBlock Block, long OpCode) + public static void Shr_R(ShaderIrBlock Block, long OpCode, long Position) { EmitAluBinary(Block, OpCode, ShaderOper.RR, GetShrInst(OpCode)); } @@ -442,7 +442,7 @@ namespace Ryujinx.Graphics.Gal.Shader return Signed ? ShaderIrInst.Asr : ShaderIrInst.Lsr; } - public static void Vmad(ShaderIrBlock Block, long OpCode) + public static void Vmad(ShaderIrBlock Block, long OpCode, long Position) { ShaderIrNode OperA = GetOperGpr8(OpCode); @@ -477,22 +477,22 @@ namespace Ryujinx.Graphics.Gal.Shader Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), Final), OpCode)); } - public static void Xmad_CR(ShaderIrBlock Block, long OpCode) + public static void Xmad_CR(ShaderIrBlock Block, long OpCode, long Position) { EmitXmad(Block, OpCode, ShaderOper.CR); } - public static void Xmad_I(ShaderIrBlock Block, long OpCode) + public static void Xmad_I(ShaderIrBlock Block, long OpCode, long Position) { EmitXmad(Block, OpCode, ShaderOper.Imm); } - public static void Xmad_RC(ShaderIrBlock Block, long OpCode) + public static void Xmad_RC(ShaderIrBlock Block, long OpCode, long Position) { EmitXmad(Block, OpCode, ShaderOper.RC); } - public static void Xmad_RR(ShaderIrBlock Block, long OpCode) + public static void Xmad_RR(ShaderIrBlock Block, long OpCode, long Position) { EmitXmad(Block, OpCode, ShaderOper.RR); } diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeFlow.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeFlow.cs index 8d0925a321..2c699a1b82 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeFlow.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeFlow.cs @@ -6,7 +6,7 @@ namespace Ryujinx.Graphics.Gal.Shader { static partial class ShaderDecode { - public static void Bra(ShaderIrBlock Block, long OpCode) + public static void Bra(ShaderIrBlock Block, long OpCode, long Position) { if ((OpCode & 0x20) != 0) { @@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gal.Shader Block.AddNode(GetPredNode(new ShaderIrOp(ShaderIrInst.Bra, Imm), OpCode)); } - public static void Exit(ShaderIrBlock Block, long OpCode) + public static void Exit(ShaderIrBlock Block, long OpCode, long Position) { int CCode = (int)OpCode & 0x1f; @@ -34,9 +34,34 @@ namespace Ryujinx.Graphics.Gal.Shader } - public static void Kil(ShaderIrBlock Block, long OpCode) + public static void Kil(ShaderIrBlock Block, long OpCode, long Position) { Block.AddNode(GetPredNode(new ShaderIrOp(ShaderIrInst.Kil), OpCode)); } + + public static void Ssy(ShaderIrBlock Block, long OpCode, long Position) + { + if ((OpCode & 0x20) != 0) + { + //This reads the target offset from the constant buffer. + //Almost impossible to support with GLSL. + throw new NotImplementedException(); + } + + int Offset = ((int)(OpCode >> 20) << 8) >> 8; + + int Target = (int)(Position + Offset); + + ShaderIrOperImm Imm = new ShaderIrOperImm(Target); + + Block.AddNode(new ShaderIrOp(ShaderIrInst.Ssy, Imm)); + } + + public static void Sync(ShaderIrBlock Block, long OpCode, long Position) + { + //TODO: Implement Sync condition codes + + Block.AddNode(GetPredNode(new ShaderIrOp(ShaderIrInst.Sync), OpCode)); + } } } \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMem.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMem.cs index a183b0c69a..2ae58bf89d 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMem.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMem.cs @@ -31,7 +31,7 @@ namespace Ryujinx.Graphics.Gal.Shader { RGB_, RG_A, R_BA, _GBA, RGBA, ____, ____, ____ } }; - public static void Ld_A(ShaderIrBlock Block, long OpCode) + public static void Ld_A(ShaderIrBlock Block, long OpCode, long Position) { ShaderIrNode[] Opers = GetOperAbuf20(OpCode); @@ -50,7 +50,7 @@ namespace Ryujinx.Graphics.Gal.Shader } } - public static void Ld_C(ShaderIrBlock Block, long OpCode) + public static void Ld_C(ShaderIrBlock Block, long OpCode, long Position) { int CbufPos = (int)(OpCode >> 22) & 0x3fff; int CbufIndex = (int)(OpCode >> 36) & 0x1f; @@ -97,7 +97,7 @@ namespace Ryujinx.Graphics.Gal.Shader } } - public static void St_A(ShaderIrBlock Block, long OpCode) + public static void St_A(ShaderIrBlock Block, long OpCode, long Position) { ShaderIrNode[] Opers = GetOperAbuf20(OpCode); @@ -113,7 +113,7 @@ namespace Ryujinx.Graphics.Gal.Shader } } - public static void Texq(ShaderIrBlock Block, long OpCode) + public static void Texq(ShaderIrBlock Block, long OpCode, long Position) { ShaderIrNode OperD = GetOperGpr0(OpCode); ShaderIrNode OperA = GetOperGpr8(OpCode); @@ -132,12 +132,12 @@ namespace Ryujinx.Graphics.Gal.Shader Block.AddNode(GetPredNode(new ShaderIrAsg(OperA, Op1), OpCode)); //Is this right? } - public static void Tex(ShaderIrBlock Block, long OpCode) + public static void Tex(ShaderIrBlock Block, long OpCode, long Position) { EmitTex(Block, OpCode, GprHandle: false); } - public static void Tex_B(ShaderIrBlock Block, long OpCode) + public static void Tex_B(ShaderIrBlock Block, long OpCode, long Position) { EmitTex(Block, OpCode, GprHandle: true); } @@ -202,12 +202,12 @@ namespace Ryujinx.Graphics.Gal.Shader } } - public static void Texs(ShaderIrBlock Block, long OpCode) + public static void Texs(ShaderIrBlock Block, long OpCode, long Position) { EmitTexs(Block, OpCode, ShaderIrInst.Texs); } - public static void Tlds(ShaderIrBlock Block, long OpCode) + public static void Tlds(ShaderIrBlock Block, long OpCode, long Position) { EmitTexs(Block, OpCode, ShaderIrInst.Txlf); } diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMove.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMove.cs index c6b71fb01a..aef92c5a90 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMove.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMove.cs @@ -25,67 +25,67 @@ namespace Ryujinx.Graphics.Gal.Shader F64 = 3 } - public static void F2f_C(ShaderIrBlock Block, long OpCode) + public static void F2f_C(ShaderIrBlock Block, long OpCode, long Position) { EmitF2f(Block, OpCode, ShaderOper.CR); } - public static void F2f_I(ShaderIrBlock Block, long OpCode) + public static void F2f_I(ShaderIrBlock Block, long OpCode, long Position) { EmitF2f(Block, OpCode, ShaderOper.Immf); } - public static void F2f_R(ShaderIrBlock Block, long OpCode) + public static void F2f_R(ShaderIrBlock Block, long OpCode, long Position) { EmitF2f(Block, OpCode, ShaderOper.RR); } - public static void F2i_C(ShaderIrBlock Block, long OpCode) + public static void F2i_C(ShaderIrBlock Block, long OpCode, long Position) { EmitF2i(Block, OpCode, ShaderOper.CR); } - public static void F2i_I(ShaderIrBlock Block, long OpCode) + public static void F2i_I(ShaderIrBlock Block, long OpCode, long Position) { EmitF2i(Block, OpCode, ShaderOper.Immf); } - public static void F2i_R(ShaderIrBlock Block, long OpCode) + public static void F2i_R(ShaderIrBlock Block, long OpCode, long Position) { EmitF2i(Block, OpCode, ShaderOper.RR); } - public static void I2f_C(ShaderIrBlock Block, long OpCode) + public static void I2f_C(ShaderIrBlock Block, long OpCode, long Position) { EmitI2f(Block, OpCode, ShaderOper.CR); } - public static void I2f_I(ShaderIrBlock Block, long OpCode) + public static void I2f_I(ShaderIrBlock Block, long OpCode, long Position) { EmitI2f(Block, OpCode, ShaderOper.Imm); } - public static void I2f_R(ShaderIrBlock Block, long OpCode) + public static void I2f_R(ShaderIrBlock Block, long OpCode, long Position) { EmitI2f(Block, OpCode, ShaderOper.RR); } - public static void I2i_C(ShaderIrBlock Block, long OpCode) + public static void I2i_C(ShaderIrBlock Block, long OpCode, long Position) { EmitI2i(Block, OpCode, ShaderOper.CR); } - public static void I2i_I(ShaderIrBlock Block, long OpCode) + public static void I2i_I(ShaderIrBlock Block, long OpCode, long Position) { EmitI2i(Block, OpCode, ShaderOper.Imm); } - public static void I2i_R(ShaderIrBlock Block, long OpCode) + public static void I2i_R(ShaderIrBlock Block, long OpCode, long Position) { EmitI2i(Block, OpCode, ShaderOper.RR); } - public static void Isberd(ShaderIrBlock Block, long OpCode) + public static void Isberd(ShaderIrBlock Block, long OpCode, long Position) { //This instruction seems to be used to translate from an address to a vertex index in a GS //Stub it as such @@ -95,50 +95,50 @@ namespace Ryujinx.Graphics.Gal.Shader Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), GetOperGpr8(OpCode)), OpCode)); } - public static void Mov_C(ShaderIrBlock Block, long OpCode) + public static void Mov_C(ShaderIrBlock Block, long OpCode, long Position) { ShaderIrOperCbuf Cbuf = GetOperCbuf34(OpCode); Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), Cbuf), OpCode)); } - public static void Mov_I(ShaderIrBlock Block, long OpCode) + public static void Mov_I(ShaderIrBlock Block, long OpCode, long Position) { ShaderIrOperImm Imm = GetOperImm19_20(OpCode); Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), Imm), OpCode)); } - public static void Mov_I32(ShaderIrBlock Block, long OpCode) + public static void Mov_I32(ShaderIrBlock Block, long OpCode, long Position) { ShaderIrOperImm Imm = GetOperImm32_20(OpCode); Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), Imm), OpCode)); } - public static void Mov_R(ShaderIrBlock Block, long OpCode) + public static void Mov_R(ShaderIrBlock Block, long OpCode, long Position) { ShaderIrOperGpr Gpr = GetOperGpr20(OpCode); Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), Gpr), OpCode)); } - public static void Sel_C(ShaderIrBlock Block, long OpCode) + public static void Sel_C(ShaderIrBlock Block, long OpCode, long Position) { EmitSel(Block, OpCode, ShaderOper.CR); } - public static void Sel_I(ShaderIrBlock Block, long OpCode) + public static void Sel_I(ShaderIrBlock Block, long OpCode, long Position) { EmitSel(Block, OpCode, ShaderOper.Imm); } - public static void Sel_R(ShaderIrBlock Block, long OpCode) + public static void Sel_R(ShaderIrBlock Block, long OpCode, long Position) { EmitSel(Block, OpCode, ShaderOper.RR); } - public static void Mov_S(ShaderIrBlock Block, long OpCode) + public static void Mov_S(ShaderIrBlock Block, long OpCode, long Position) { Block.AddNode(new ShaderIrCmnt("Stubbed.")); diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeSpecial.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeSpecial.cs index f1be005f9c..4300c32e32 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeSpecial.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeSpecial.cs @@ -4,7 +4,7 @@ namespace Ryujinx.Graphics.Gal.Shader { static partial class ShaderDecode { - public static void Out_R(ShaderIrBlock Block, long OpCode) + public static void Out_R(ShaderIrBlock Block, long OpCode, long Position) { //TODO: Those registers have to be used for something ShaderIrOperGpr Gpr0 = GetOperGpr0(OpCode); diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecoder.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecoder.cs index 98f371b573..81d8f31268 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderDecoder.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecoder.cs @@ -50,17 +50,29 @@ namespace Ryujinx.Graphics.Gal.Shader { ShaderIrNode LastNode = Current.GetLastNode(); - ShaderIrOp Op = GetInnermostOp(LastNode); + ShaderIrOp InnerOp = GetInnermostOp(LastNode); - if (Op?.Inst == ShaderIrInst.Bra) + if (InnerOp?.Inst == ShaderIrInst.Bra) { - int Offset = ((ShaderIrOperImm)Op.OperandA).Value; + int Offset = ((ShaderIrOperImm)InnerOp.OperandA).Value; long Target = Current.EndPosition + Offset; Current.Branch = Enqueue(Target, Current); } + foreach (ShaderIrNode Node in Current.Nodes) + { + if (Node is ShaderIrOp CurrOp && CurrOp.Inst == ShaderIrInst.Ssy) + { + int Offset = ((ShaderIrOperImm)CurrOp.OperandA).Value; + + long Target = Offset; + + Current.Branch = Enqueue(Target, Current); + } + } + if (NodeHasNext(LastNode)) { Current.Next = Enqueue(Current.EndPosition); @@ -157,7 +169,7 @@ namespace Ryujinx.Graphics.Gal.Shader { int Offset = ((int)(OpCode >> 20) << 8) >> 8; - long Target = Position + Offset; + long Target = Position + Offset - Beginning; DbgOpCode += " (0x" + Target.ToString("x16") + ")"; } @@ -170,7 +182,7 @@ namespace Ryujinx.Graphics.Gal.Shader continue; } - Decode(Block, OpCode); + Decode(Block, OpCode, Position); } while (!IsFlowChange(Block.GetLastNode())); diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderHeader.cs b/Ryujinx.Graphics/Gal/Shader/ShaderHeader.cs index 8e5057ed9e..eca90fc3aa 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderHeader.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderHeader.cs @@ -1,5 +1,30 @@ -namespace Ryujinx.Graphics.Gal.Shader +using System; + +namespace Ryujinx.Graphics.Gal.Shader { + struct OmapTarget + { + public bool Red; + public bool Green; + public bool Blue; + public bool Alpha; + + public bool Enabled => Red || Green || Blue || Alpha; + + public bool ComponentEnabled(int Component) + { + switch (Component) + { + case 0: return Red; + case 1: return Green; + case 2: return Blue; + case 3: return Alpha; + } + + throw new ArgumentException(nameof(Component)); + } + } + class ShaderHeader { public const int PointList = 1; @@ -30,6 +55,10 @@ public int StoreReqStart { get; private set; } public int StoreReqEnd { get; private set; } + public OmapTarget[] OmapTargets { get; private set; } + public bool OmapSampleMask { get; private set; } + public bool OmapDepth { get; private set; } + public ShaderHeader(IGalMemory Memory, long Position) { uint CommonWord0 = (uint)Memory.ReadInt32(Position + 0); @@ -61,6 +90,50 @@ MaxOutputVertexCount = ReadBits(CommonWord4, 0, 12); StoreReqStart = ReadBits(CommonWord4, 12, 8); StoreReqEnd = ReadBits(CommonWord4, 24, 8); + + //Type 2 (fragment?) reading + uint Type2OmapTarget = (uint)Memory.ReadInt32(Position + 72); + uint Type2Omap = (uint)Memory.ReadInt32(Position + 76); + + OmapTargets = new OmapTarget[8]; + + for (int i = 0; i < OmapTargets.Length; i++) + { + int Offset = i * 4; + + OmapTargets[i] = new OmapTarget + { + Red = ReadBits(Type2OmapTarget, Offset + 0, 1) != 0, + Green = ReadBits(Type2OmapTarget, Offset + 1, 1) != 0, + Blue = ReadBits(Type2OmapTarget, Offset + 2, 1) != 0, + Alpha = ReadBits(Type2OmapTarget, Offset + 3, 1) != 0 + }; + } + + OmapSampleMask = ReadBits(Type2Omap, 0, 1) != 0; + OmapDepth = ReadBits(Type2Omap, 1, 1) != 0; + } + + public int DepthRegister + { + get + { + int Count = 0; + + for (int Index = 0; Index < OmapTargets.Length; Index++) + { + for (int Component = 0; Component < 4; Component++) + { + if (OmapTargets[Index].ComponentEnabled(Component)) + { + Count++; + } + } + } + + // Depth register is always two registers after the last color output + return Count + 1; + } } private static int ReadBits(uint Word, int Offset, int BitWidth) diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderIrInst.cs b/Ryujinx.Graphics/Gal/Shader/ShaderIrInst.cs index d197835a7a..35dea61216 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderIrInst.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderIrInst.cs @@ -84,6 +84,8 @@ namespace Ryujinx.Graphics.Gal.Shader Bra, Exit, Kil, + Ssy, + Sync, Emit, Cut diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderOpCodeTable.cs b/Ryujinx.Graphics/Gal/Shader/ShaderOpCodeTable.cs index 95b8e467d2..1e76eab169 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderOpCodeTable.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderOpCodeTable.cs @@ -112,7 +112,9 @@ namespace Ryujinx.Graphics.Gal.Shader Set("0100110000101x", ShaderDecode.Shr_C); Set("0011100x00101x", ShaderDecode.Shr_I); Set("0101110000101x", ShaderDecode.Shr_R); + Set("1110001010010x", ShaderDecode.Ssy); Set("1110111111110x", ShaderDecode.St_A); + Set("1111000011111x", ShaderDecode.Sync); Set("110000xxxx111x", ShaderDecode.Tex); Set("1101111010111x", ShaderDecode.Tex_B); Set("1101111101001x", ShaderDecode.Texq); diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs index 1d0834ddb5..2010e43bd2 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs @@ -27,6 +27,8 @@ namespace Ryujinx.HLE.Gpu.Engines private List[] UploadedKeys; + private int CurrentInstance = 0; + public NvGpuEngine3d(NvGpu Gpu) { this.Gpu = Gpu; @@ -102,10 +104,15 @@ namespace Ryujinx.HLE.Gpu.Engines SetAlphaBlending(State); SetPrimitiveRestart(State); - //Enabling multiple framebuffer attachments cause graphics reggresions - SetFrameBuffer(Vmm, 0); + for (int FbIndex = 0; FbIndex < 8; FbIndex++) + { + SetFrameBuffer(Vmm, 0); + } + SetZeta(Vmm); + SetRenderTargets(); + long[] Keys = UploadShaders(Vmm); Gpu.Renderer.Shader.BindProgram(); @@ -415,6 +422,33 @@ namespace Ryujinx.HLE.Gpu.Engines } } + private void SetRenderTargets() + { + bool SeparateFragData = (ReadRegister(NvGpuEngine3dReg.RTSeparateFragData) & 1) != 0; + + if (SeparateFragData) + { + uint Control = (uint)(ReadRegister(NvGpuEngine3dReg.RTControl)); + + uint Count = Control & 0xf; + + int[] Map = new int[Count]; + + for (int i = 0; i < Count; i++) + { + int Shift = 4 + i * 3; + + Map[i] = (int)((Control >> Shift) & 7); + } + + Gpu.Renderer.FrameBuffer.SetMap(Map); + } + else + { + Gpu.Renderer.FrameBuffer.SetMap(null); + } + } + private void UploadTextures(NvGpuVmm Vmm, GalPipelineState State, long[] Keys) { long BaseShPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress); @@ -442,8 +476,6 @@ namespace Ryujinx.HLE.Gpu.Engines UploadTexture(Vmm, TexIndex, TextureHandle); - Gpu.Renderer.Shader.EnsureTextureBinding(DeclInfo.Name, TexIndex); - TexIndex++; } } @@ -624,10 +656,25 @@ namespace Ryujinx.HLE.Gpu.Engines long VertexPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.VertexArrayNAddress + Index * 4); long VertexEndPos = MakeInt64From2xInt32(NvGpuEngine3dReg.VertexArrayNEndAddr + Index * 2); - long VboKey = Vmm.GetPhysicalAddress(VertexPosition); + int VertexDivisor = ReadRegister(NvGpuEngine3dReg.VertexArrayNDivisor + Index * 4); + + bool Instanced = (ReadRegister(NvGpuEngine3dReg.VertexArrayNInstance + Index) & 1) != 0; int Stride = Control & 0xfff; + if (Instanced && VertexDivisor != 0) + { + VertexPosition += Stride * (CurrentInstance / VertexDivisor); + } + + if (VertexPosition > VertexEndPos) + { + //Instance is invalid, ignore the draw call + continue; + } + + long VboKey = Vmm.GetPhysicalAddress(VertexPosition); + long VbSize = (VertexEndPos - VertexPosition) + 1; bool VboCached = Gpu.Renderer.Rasterizer.IsVboCached(VboKey, VbSize); @@ -639,10 +686,12 @@ namespace Ryujinx.HLE.Gpu.Engines Gpu.Renderer.Rasterizer.CreateVbo(VboKey, (int)VbSize, DataAddress); } - State.VertexBindings[Index].Enabled = true; - State.VertexBindings[Index].Stride = Stride; - State.VertexBindings[Index].VboKey = VboKey; - State.VertexBindings[Index].Attribs = Attribs[Index].ToArray(); + State.VertexBindings[Index].Enabled = true; + State.VertexBindings[Index].Stride = Stride; + State.VertexBindings[Index].VboKey = VboKey; + State.VertexBindings[Index].Instanced = Instanced; + State.VertexBindings[Index].Divisor = VertexDivisor; + State.VertexBindings[Index].Attribs = Attribs[Index].ToArray(); } } @@ -653,6 +702,25 @@ namespace Ryujinx.HLE.Gpu.Engines GalPrimitiveType PrimType = (GalPrimitiveType)(PrimCtrl & 0xffff); + bool InstanceNext = ((PrimCtrl >> 26) & 1) != 0; + bool InstanceCont = ((PrimCtrl >> 27) & 1) != 0; + + if (InstanceNext && InstanceCont) + { + throw new InvalidOperationException("GPU tried to increase and reset instance count at the same time"); + } + + if (InstanceNext) + { + CurrentInstance++; + } + else if (!InstanceCont) + { + CurrentInstance = 0; + } + + State.Instance = CurrentInstance; + Gpu.Renderer.Pipeline.Bind(State); if (IndexCount != 0) @@ -803,4 +871,4 @@ namespace Ryujinx.HLE.Gpu.Engines return Vmm.IsRegionModified(Key, Size, Type); } } -} \ No newline at end of file +} diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs index b03aef0241..ace324e91d 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs @@ -22,11 +22,13 @@ namespace Ryujinx.HLE.Gpu.Engines StencilBackFuncRef = 0x3d5, StencilBackMask = 0x3d6, StencilBackFuncMask = 0x3d7, + RTSeparateFragData = 0x3eb, ZetaAddress = 0x3f8, ZetaFormat = 0x3fa, ZetaBlockDimensions = 0x3fb, ZetaLayerStride = 0x3fc, VertexAttribNFormat = 0x458, + RTControl = 0x487, ZetaHoriz = 0x48a, ZetaVert = 0x48b, ZetaArrayMode = 0x48c, @@ -51,6 +53,7 @@ namespace Ryujinx.HLE.Gpu.Engines StencilFrontFuncMask = 0x4e6, StencilFrontMask = 0x4e7, VertexArrayElemBase = 0x50d, + VertexArrayInstBase = 0x50e, ZetaEnable = 0x54e, TexHeaderPoolOffset = 0x55d, TexSamplerPoolOffset = 0x557, @@ -68,6 +71,7 @@ namespace Ryujinx.HLE.Gpu.Engines IndexArrayFormat = 0x5f6, IndexBatchFirst = 0x5f7, IndexBatchCount = 0x5f8, + VertexArrayNInstance = 0x620, CullFaceEnable = 0x646, FrontFace = 0x647, CullFace = 0x648, diff --git a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs index 2958be81f3..98da852eee 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs @@ -45,6 +45,9 @@ namespace Ryujinx.HLE.Gpu.Texture case GalImageFormat.R16G16B16A16_UINT: case GalImageFormat.R16G16B16A16_UNORM: case GalImageFormat.D32_SFLOAT_S8_UINT: + case GalImageFormat.R32G32_SFLOAT: + case GalImageFormat.R32G32_SINT: + case GalImageFormat.R32G32_UINT: return Image.Width * Image.Height * 8; case GalImageFormat.A8B8G8R8_SINT_PACK32: diff --git a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs index 95be40d9b7..65fa7e6ebe 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs @@ -12,6 +12,7 @@ namespace Ryujinx.HLE.Gpu.Texture { case GalTextureFormat.R32G32B32A32: return Read16Bpp (Memory, Texture); case GalTextureFormat.R16G16B16A16: return Read8Bpp (Memory, Texture); + case GalTextureFormat.R32G32: return Read8Bpp (Memory, Texture); case GalTextureFormat.A8B8G8R8: return Read4Bpp (Memory, Texture); case GalTextureFormat.A2B10G10R10: return Read4Bpp (Memory, Texture); case GalTextureFormat.R32: return Read4Bpp (Memory, Texture); diff --git a/Ryujinx.HLE/HOS/Ipc/IpcMessage.cs b/Ryujinx.HLE/HOS/Ipc/IpcMessage.cs index c8153fdb4b..02900444aa 100644 --- a/Ryujinx.HLE/HOS/Ipc/IpcMessage.cs +++ b/Ryujinx.HLE/HOS/Ipc/IpcMessage.cs @@ -174,39 +174,39 @@ namespace Ryujinx.HLE.HOS.Ipc return 0; } - public (long Position, long Size) GetBufferType0x21() + public (long Position, long Size) GetBufferType0x21(int Index = 0) { - if (PtrBuff.Count != 0 && - PtrBuff[0].Position != 0 && - PtrBuff[0].Size != 0) + if (PtrBuff.Count > Index && + PtrBuff[Index].Position != 0 && + PtrBuff[Index].Size != 0) { - return (PtrBuff[0].Position, PtrBuff[0].Size); + return (PtrBuff[Index].Position, PtrBuff[Index].Size); } - if (SendBuff.Count != 0 && - SendBuff[0].Position != 0 && - SendBuff[0].Size != 0) + if (SendBuff.Count > Index && + SendBuff[Index].Position != 0 && + SendBuff[Index].Size != 0) { - return (SendBuff[0].Position, SendBuff[0].Size); + return (SendBuff[Index].Position, SendBuff[Index].Size); } return (0, 0); } - public (long Position, long Size) GetBufferType0x22() + public (long Position, long Size) GetBufferType0x22(int Index = 0) { - if (RecvListBuff.Count != 0 && - RecvListBuff[0].Position != 0 && - RecvListBuff[0].Size != 0) + if (RecvListBuff.Count > Index && + RecvListBuff[Index].Position != 0 && + RecvListBuff[Index].Size != 0) { - return (RecvListBuff[0].Position, RecvListBuff[0].Size); + return (RecvListBuff[Index].Position, RecvListBuff[Index].Size); } - if (ReceiveBuff.Count != 0 && - ReceiveBuff[0].Position != 0 && - ReceiveBuff[0].Size != 0) + if (ReceiveBuff.Count > Index && + ReceiveBuff[Index].Position != 0 && + ReceiveBuff[Index].Size != 0) { - return (ReceiveBuff[0].Position, ReceiveBuff[0].Size); + return (ReceiveBuff[Index].Position, ReceiveBuff[Index].Size); } return (0, 0); diff --git a/Ryujinx.HLE/HOS/Services/Bsd/IClient.cs b/Ryujinx.HLE/HOS/Services/Bsd/IClient.cs index b5a457db56..e2cd0dcdb7 100644 --- a/Ryujinx.HLE/HOS/Services/Bsd/IClient.cs +++ b/Ryujinx.HLE/HOS/Services/Bsd/IClient.cs @@ -32,6 +32,8 @@ namespace Ryujinx.HLE.HOS.Services.Bsd { 14, Connect }, { 18, Listen }, { 21, SetSockOpt }, + { 24, Write }, + { 25, Read }, { 26, Close } }; } @@ -122,15 +124,15 @@ namespace Ryujinx.HLE.HOS.Services.Bsd int SocketId = Context.RequestData.ReadInt32(); int SocketFlags = Context.RequestData.ReadInt32(); - byte[] ReceivedBuffer = new byte[Context.Request.ReceiveBuff[0].Size]; + (long ReceivePosition, long ReceiveLength) = Context.Request.GetBufferType0x22(); + + byte[] ReceivedBuffer = new byte[ReceiveLength]; try { int BytesRead = Sockets[SocketId].Handle.Receive(ReceivedBuffer); - //Logging.Debug("Received Buffer:" + Environment.NewLine + Logging.HexDump(ReceivedBuffer)); - - Context.Memory.WriteBytes(Context.Request.ReceiveBuff[0].Position, ReceivedBuffer); + Context.Memory.WriteBytes(ReceivePosition, ReceivedBuffer); Context.ResponseData.Write(BytesRead); Context.ResponseData.Write(0); @@ -150,13 +152,12 @@ namespace Ryujinx.HLE.HOS.Services.Bsd int SocketId = Context.RequestData.ReadInt32(); int SocketFlags = Context.RequestData.ReadInt32(); - byte[] SentBuffer = Context.Memory.ReadBytes(Context.Request.SendBuff[0].Position, - Context.Request.SendBuff[0].Size); + (long SentPosition, long SentSize) = Context.Request.GetBufferType0x21(); + + byte[] SentBuffer = Context.Memory.ReadBytes(SentPosition, SentSize); try { - //Logging.Debug("Sent Buffer:" + Environment.NewLine + Logging.HexDump(SentBuffer)); - int BytesSent = Sockets[SocketId].Handle.Send(SentBuffer); Context.ResponseData.Write(BytesSent); @@ -180,8 +181,9 @@ namespace Ryujinx.HLE.HOS.Services.Bsd byte[] SentBuffer = Context.Memory.ReadBytes(Context.Request.SendBuff[0].Position, Context.Request.SendBuff[0].Size); - byte[] AddressBuffer = Context.Memory.ReadBytes(Context.Request.SendBuff[1].Position, - Context.Request.SendBuff[1].Size); + (long AddressPosition, long AddressSize) = Context.Request.GetBufferType0x21(Index: 1); + + byte[] AddressBuffer = Context.Memory.ReadBytes(AddressPosition, AddressSize); if (!Sockets[SocketId].Handle.Connected) { @@ -200,8 +202,6 @@ namespace Ryujinx.HLE.HOS.Services.Bsd try { - //Logging.Debug("Sent Buffer:" + Environment.NewLine + Logging.HexDump(SentBuffer)); - int BytesSent = Sockets[SocketId].Handle.Send(SentBuffer); Context.ResponseData.Write(BytesSent); @@ -221,7 +221,7 @@ namespace Ryujinx.HLE.HOS.Services.Bsd { int SocketId = Context.RequestData.ReadInt32(); - long AddrBufferPtr = Context.Request.ReceiveBuff[0].Position; + (long AddrBufferPosition, long AddrBuffSize) = Context.Request.GetBufferType0x22(); Socket HandleAccept = null; @@ -246,7 +246,7 @@ namespace Ryujinx.HLE.HOS.Services.Bsd { IpAddress = ((IPEndPoint)Sockets[SocketId].Handle.LocalEndPoint).Address, RemoteEP = ((IPEndPoint)Sockets[SocketId].Handle.LocalEndPoint), - Handle = HandleAccept + Handle = HandleAccept }; Sockets.Add(NewBsdSocket); @@ -265,7 +265,7 @@ namespace Ryujinx.HLE.HOS.Services.Bsd Writer.Write(IpAddress); - Context.Memory.WriteBytes(AddrBufferPtr, MS.ToArray()); + Context.Memory.WriteBytes(AddrBufferPosition, MS.ToArray()); Context.ResponseData.Write(Sockets.Count - 1); Context.ResponseData.Write(0); @@ -286,8 +286,9 @@ namespace Ryujinx.HLE.HOS.Services.Bsd { int SocketId = Context.RequestData.ReadInt32(); - byte[] AddressBuffer = Context.Memory.ReadBytes(Context.Request.SendBuff[0].Position, - Context.Request.SendBuff[0].Size); + (long AddressPosition, long AddressSize) = Context.Request.GetBufferType0x21(); + + byte[] AddressBuffer = Context.Memory.ReadBytes(AddressPosition, AddressSize); try { @@ -310,8 +311,9 @@ namespace Ryujinx.HLE.HOS.Services.Bsd { int SocketId = Context.RequestData.ReadInt32(); - byte[] AddressBuffer = Context.Memory.ReadBytes(Context.Request.SendBuff[0].Position, - Context.Request.SendBuff[0].Size); + (long AddressPosition, long AddressSize) = Context.Request.GetBufferType0x21(); + + byte[] AddressBuffer = Context.Memory.ReadBytes(AddressPosition, AddressSize); try { @@ -359,8 +361,8 @@ namespace Ryujinx.HLE.HOS.Services.Bsd { int SocketId = Context.RequestData.ReadInt32(); - SocketOptionLevel SocketLevel = (SocketOptionLevel)Context.RequestData.ReadInt32(); - SocketOptionName SocketOptionName = (SocketOptionName)Context.RequestData.ReadInt32(); + SocketOptionLevel SocketLevel = (SocketOptionLevel)Context.RequestData.ReadInt32(); + SocketOptionName SocketOptionName = (SocketOptionName)Context.RequestData.ReadInt32(); byte[] SocketOptionValue = Context.Memory.ReadBytes(Context.Request.PtrBuff[0].Position, Context.Request.PtrBuff[0].Size); @@ -383,6 +385,60 @@ namespace Ryujinx.HLE.HOS.Services.Bsd return 0; } + //(u32 socket, buffer message) -> (i32 ret, u32 bsd_errno) + public long Write(ServiceCtx Context) + { + int SocketId = Context.RequestData.ReadInt32(); + + (long SentPosition, long SentSize) = Context.Request.GetBufferType0x21(); + + byte[] SentBuffer = Context.Memory.ReadBytes(SentPosition, SentSize); + + try + { + //Logging.Debug("Wrote Buffer:" + Environment.NewLine + Logging.HexDump(SentBuffer)); + + int BytesSent = Sockets[SocketId].Handle.Send(SentBuffer); + + Context.ResponseData.Write(BytesSent); + Context.ResponseData.Write(0); + } + catch (SocketException Ex) + { + Context.ResponseData.Write(-1); + Context.ResponseData.Write(Ex.ErrorCode - 10000); + } + + return 0; + } + + //(u32 socket) -> (i32 ret, u32 bsd_errno, buffer message) + public long Read(ServiceCtx Context) + { + int SocketId = Context.RequestData.ReadInt32(); + + (long ReceivePosition, long ReceiveLength) = Context.Request.GetBufferType0x22(); + + byte[] ReceivedBuffer = new byte[ReceiveLength]; + + try + { + int BytesRead = Sockets[SocketId].Handle.Receive(ReceivedBuffer); + + Context.Memory.WriteBytes(ReceivePosition, ReceivedBuffer); + + Context.ResponseData.Write(BytesRead); + Context.ResponseData.Write(0); + } + catch (SocketException Ex) + { + Context.ResponseData.Write(-1); + Context.ResponseData.Write(Ex.ErrorCode - 10000); + } + + return 0; + } + //(u32 socket) -> (i32 ret, u32 bsd_errno) public long Close(ServiceCtx Context) { @@ -413,7 +469,7 @@ namespace Ryujinx.HLE.HOS.Services.Bsd int Size = Reader.ReadByte(); int Family = Reader.ReadByte(); - int Port = EndianSwap.Swap16(Reader.ReadInt16()); + int Port = EndianSwap.Swap16(Reader.ReadUInt16()); string IpAddress = Reader.ReadByte().ToString() + "." + Reader.ReadByte().ToString() + "." + @@ -421,8 +477,7 @@ namespace Ryujinx.HLE.HOS.Services.Bsd Reader.ReadByte().ToString(); Sockets[SocketId].IpAddress = IPAddress.Parse(IpAddress); - - Sockets[SocketId].RemoteEP = new IPEndPoint(Sockets[SocketId].IpAddress, Port); + Sockets[SocketId].RemoteEP = new IPEndPoint(Sockets[SocketId].IpAddress, Port); } } diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvGpuAS/NvGpuASCtx.cs b/Ryujinx.HLE/HOS/Services/Nv/NvGpuAS/NvGpuASCtx.cs index d69ec719c9..7b6a8676b0 100644 --- a/Ryujinx.HLE/HOS/Services/Nv/NvGpuAS/NvGpuASCtx.cs +++ b/Ryujinx.HLE/HOS/Services/Nv/NvGpuAS/NvGpuASCtx.cs @@ -187,7 +187,10 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvGpuAS { Left = Middle + 1; - LtRg = Rg; + if ((ulong)Position > Rg.Start) + { + LtRg = Rg; + } } } diff --git a/Ryujinx.HLE/Utilities/EndianSwap.cs b/Ryujinx.HLE/Utilities/EndianSwap.cs index d773516d35..5d0c8a845e 100644 --- a/Ryujinx.HLE/Utilities/EndianSwap.cs +++ b/Ryujinx.HLE/Utilities/EndianSwap.cs @@ -2,7 +2,7 @@ { static class EndianSwap { - public static short Swap16(short Value) => (short)(((Value >> 8) & 0xff) | (Value << 8)); + public static ushort Swap16(ushort Value) => (ushort)(((Value >> 8) & 0xff) | (Value << 8)); public static int Swap32(int Value) { diff --git a/Ryujinx.Tests/Cpu/CpuTest.cs b/Ryujinx.Tests/Cpu/CpuTest.cs index e6a0237987..4ac05f1b83 100644 --- a/Ryujinx.Tests/Cpu/CpuTest.cs +++ b/Ryujinx.Tests/Cpu/CpuTest.cs @@ -119,22 +119,42 @@ namespace Ryujinx.Tests.Cpu protected static Vector128 MakeVectorE0(double E0) { + if (!Sse2.IsSupported) + { + throw new PlatformNotSupportedException(); + } + return Sse.StaticCast(Sse2.SetVector128(0, BitConverter.DoubleToInt64Bits(E0))); } protected static Vector128 MakeVectorE0E1(double E0, double E1) { - return Sse.StaticCast(Sse2.SetVector128(BitConverter.DoubleToInt64Bits(E1), - BitConverter.DoubleToInt64Bits(E0))); + if (!Sse2.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + return Sse.StaticCast( + Sse2.SetVector128(BitConverter.DoubleToInt64Bits(E1), BitConverter.DoubleToInt64Bits(E0))); } protected static Vector128 MakeVectorE1(double E1) { + if (!Sse2.IsSupported) + { + throw new PlatformNotSupportedException(); + } + return Sse.StaticCast(Sse2.SetVector128(BitConverter.DoubleToInt64Bits(E1), 0)); } protected static double VectorExtractDouble(Vector128 Vector, byte Index) { + if (!Sse41.IsSupported) + { + throw new PlatformNotSupportedException(); + } + long Value = Sse41.Extract(Sse.StaticCast(Vector), Index); return BitConverter.Int64BitsToDouble(Value); @@ -142,26 +162,51 @@ namespace Ryujinx.Tests.Cpu protected static Vector128 MakeVectorE0(ulong E0) { + if (!Sse2.IsSupported) + { + throw new PlatformNotSupportedException(); + } + return Sse.StaticCast(Sse2.SetVector128(0, E0)); } protected static Vector128 MakeVectorE0E1(ulong E0, ulong E1) { + if (!Sse2.IsSupported) + { + throw new PlatformNotSupportedException(); + } + return Sse.StaticCast(Sse2.SetVector128(E1, E0)); } protected static Vector128 MakeVectorE1(ulong E1) { + if (!Sse2.IsSupported) + { + throw new PlatformNotSupportedException(); + } + return Sse.StaticCast(Sse2.SetVector128(E1, 0)); } protected static ulong GetVectorE0(Vector128 Vector) { + if (!Sse41.IsSupported) + { + throw new PlatformNotSupportedException(); + } + return Sse41.Extract(Sse.StaticCast(Vector), (byte)0); } protected static ulong GetVectorE1(Vector128 Vector) { + if (!Sse41.IsSupported) + { + throw new PlatformNotSupportedException(); + } + return Sse41.Extract(Sse.StaticCast(Vector), (byte)1); } } diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs index 68e2d721d5..d1832ce89b 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs @@ -1245,11 +1245,11 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Explicit, Description("SHA256SU0 .4S, .4S")] // 1250 tests. + [Test, Pairwise, Description("SHA256SU0 .4S, .4S")] public void Sha256su0_V([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, - [Random(5)] ulong Z0, [Random(5)] ulong Z1, - [Random(5)] ulong A0, [Random(5)] ulong A1) + [Random(RndCnt * 2)] ulong Z0, [Random(RndCnt * 2)] ulong Z1, + [Random(RndCnt * 2)] ulong A0, [Random(RndCnt * 2)] ulong A1) { uint Opcode = 0x5E282800; // SHA256SU0 V0.4S, V0.4S Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs b/Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs index e46937339b..cce0db6364 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs @@ -10,7 +10,7 @@ namespace Ryujinx.Tests.Cpu { public class CpuTestSimdCrypto : CpuTest { - [Test, Explicit, Description("AESD .16B, .16B")] + [Test, Description("AESD .16B, .16B")] public void Aesd_V([Values(0u)] uint Rd, [Values(1u)] uint Rn, [Values(0x7B5B546573745665ul)] ulong ValueH, @@ -39,7 +39,7 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Explicit, Description("AESE .16B, .16B")] + [Test, Description("AESE .16B, .16B")] public void Aese_V([Values(0u)] uint Rd, [Values(1u)] uint Rn, [Values(0x7B5B546573745665ul)] ulong ValueH, @@ -68,7 +68,7 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Explicit, Description("AESIMC .16B, .16B")] + [Test, Description("AESIMC .16B, .16B")] public void Aesimc_V([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, [Values(0x8DCAB9DC035006BCul)] ulong ValueH, @@ -100,7 +100,7 @@ namespace Ryujinx.Tests.Cpu } } - [Test, Explicit, Description("AESMC .16B, .16B")] + [Test, Description("AESMC .16B, .16B")] public void Aesmc_V([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, [Values(0x627A6F6644B109C8ul)] ulong ValueH, diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs index 9aa9385688..2ca91b37d9 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs @@ -1690,8 +1690,8 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint Rn, [Values(2u, 0u)] uint Rm, [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, - [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, - [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H8H, 4H4S4S, 2S2D2D> { uint Opcode = 0x0E201000; // SADDW V0.8H, V0.8H, V0.8B @@ -1721,8 +1721,8 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint Rn, [Values(2u, 0u)] uint Rm, [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, - [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, - [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H8H, 8H4S4S, 4S2D2D> { uint Opcode = 0x4E201000; // SADDW2 V0.8H, V0.8H, V0.16B @@ -1747,13 +1747,13 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Explicit, Description("SHA256H , , .4S")] // 2916 tests. + [Test, Pairwise, Description("SHA256H , , .4S")] public void Sha256h_V([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, [Values(2u, 0u)] uint Rm, - [Random(3)] ulong Z0, [Random(3)] ulong Z1, - [Random(3)] ulong A0, [Random(3)] ulong A1, - [Random(3)] ulong B0, [Random(3)] ulong B1) + [Random(RndCnt / 2)] ulong Z0, [Random(RndCnt / 2)] ulong Z1, + [Random(RndCnt / 2)] ulong A0, [Random(RndCnt / 2)] ulong A1, + [Random(RndCnt / 2)] ulong B0, [Random(RndCnt / 2)] ulong B1) { uint Opcode = 0x5E004000; // SHA256H Q0, Q0, V0.4S Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); @@ -1784,13 +1784,13 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Explicit, Description("SHA256H2 , , .4S")] // 2916 tests. + [Test, Pairwise, Description("SHA256H2 , , .4S")] public void Sha256h2_V([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, [Values(2u, 0u)] uint Rm, - [Random(3)] ulong Z0, [Random(3)] ulong Z1, - [Random(3)] ulong A0, [Random(3)] ulong A1, - [Random(3)] ulong B0, [Random(3)] ulong B1) + [Random(RndCnt / 2)] ulong Z0, [Random(RndCnt / 2)] ulong Z1, + [Random(RndCnt / 2)] ulong A0, [Random(RndCnt / 2)] ulong A1, + [Random(RndCnt / 2)] ulong B0, [Random(RndCnt / 2)] ulong B1) { uint Opcode = 0x5E005000; // SHA256H2 Q0, Q0, V0.4S Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); @@ -1821,13 +1821,13 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Explicit, Description("SHA256SU1 .4S, .4S, .4S")] // 2916 tests. + [Test, Pairwise, Description("SHA256SU1 .4S, .4S, .4S")] public void Sha256su1_V([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, [Values(2u, 0u)] uint Rm, - [Random(3)] ulong Z0, [Random(3)] ulong Z1, - [Random(3)] ulong A0, [Random(3)] ulong A1, - [Random(3)] ulong B0, [Random(3)] ulong B1) + [Random(RndCnt / 2)] ulong Z0, [Random(RndCnt / 2)] ulong Z1, + [Random(RndCnt / 2)] ulong A0, [Random(RndCnt / 2)] ulong A1, + [Random(RndCnt / 2)] ulong B0, [Random(RndCnt / 2)] ulong B1) { uint Opcode = 0x5E006000; // SHA256SU1 V0.4S, V0.4S, V0.4S Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); @@ -1858,6 +1858,130 @@ namespace Ryujinx.Tests.Cpu }); } + [Test, Pairwise, Description("SHADD ., ., .")] + public void Shadd_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> + { + uint Opcode = 0x0E200400; // SHADD V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); + SimdFp.Shadd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("SHADD ., ., .")] + public void Shadd_V_16B_8H_4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> + { + uint Opcode = 0x4E200400; // SHADD V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Shadd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("SHSUB ., ., .")] + public void Shsub_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> + { + uint Opcode = 0x0E202400; // SHSUB V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); + SimdFp.Shsub_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("SHSUB ., ., .")] + public void Shsub_V_16B_8H_4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> + { + uint Opcode = 0x4E202400; // SHSUB V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Shsub_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + [Test, Pairwise, Description("SQADD , , ")] public void Sqadd_S_B_H_S_D([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, @@ -2278,13 +2402,75 @@ namespace Ryujinx.Tests.Cpu Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32())); } + [Test, Pairwise, Description("SRHADD ., ., .")] + public void Srhadd_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> + { + uint Opcode = 0x0E201400; // SRHADD V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); + SimdFp.Srhadd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("SRHADD ., ., .")] + public void Srhadd_V_16B_8H_4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> + { + uint Opcode = 0x4E201400; // SRHADD V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Srhadd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + [Test, Pairwise, Description("SSUBW{2} ., ., .")] public void Ssubw_V_8B8H8H_4H4S4S_2S2D2D([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, [Values(2u, 0u)] uint Rm, [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, - [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, - [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H8H, 4H4S4S, 2S2D2D> { uint Opcode = 0x0E203000; // SSUBW V0.8H, V0.8H, V0.8B @@ -2314,8 +2500,8 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint Rn, [Values(2u, 0u)] uint Rm, [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, - [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, - [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H8H, 8H4S4S, 4S2D2D> { uint Opcode = 0x4E203000; // SSUBW2 V0.8H, V0.8H, V0.16B @@ -2870,8 +3056,8 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint Rn, [Values(2u, 0u)] uint Rm, [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, - [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, - [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H8H, 4H4S4S, 2S2D2D> { uint Opcode = 0x2E201000; // UADDW V0.8H, V0.8H, V0.8B @@ -2901,8 +3087,8 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint Rn, [Values(2u, 0u)] uint Rm, [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, - [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, - [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H8H, 8H4S4S, 4S2D2D> { uint Opcode = 0x6E201000; // UADDW2 V0.8H, V0.8H, V0.16B @@ -2927,6 +3113,130 @@ namespace Ryujinx.Tests.Cpu }); } + [Test, Pairwise, Description("UHADD ., ., .")] + public void Uhadd_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> + { + uint Opcode = 0x2E200400; // UHADD V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); + SimdFp.Uhadd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("UHADD ., ., .")] + public void Uhadd_V_16B_8H_4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> + { + uint Opcode = 0x6E200400; // UHADD V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Uhadd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("UHSUB ., ., .")] + public void Uhsub_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> + { + uint Opcode = 0x2E202400; // UHSUB V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); + SimdFp.Uhsub_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("UHSUB ., ., .")] + public void Uhsub_V_16B_8H_4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> + { + uint Opcode = 0x6E202400; // UHSUB V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Uhsub_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + [Test, Pairwise, Description("UQADD , , ")] public void Uqadd_S_B_H_S_D([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, @@ -3137,13 +3447,75 @@ namespace Ryujinx.Tests.Cpu Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32())); } + [Test, Pairwise, Description("URHADD ., ., .")] + public void Urhadd_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> + { + uint Opcode = 0x2E201400; // URHADD V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); + SimdFp.Urhadd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("URHADD ., ., .")] + public void Urhadd_V_16B_8H_4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> + { + uint Opcode = 0x6E201400; // URHADD V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Urhadd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + [Test, Pairwise, Description("USUBW{2} ., ., .")] public void Usubw_V_8B8H8H_4H4S4S_2S2D2D([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, [Values(2u, 0u)] uint Rm, [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, - [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, - [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H8H, 4H4S4S, 2S2D2D> { uint Opcode = 0x2E203000; // USUBW V0.8H, V0.8H, V0.8B @@ -3173,8 +3545,8 @@ namespace Ryujinx.Tests.Cpu [Values(1u, 0u)] uint Rn, [Values(2u, 0u)] uint Rm, [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, - [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, - [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H8H, 8H4S4S, 4S2D2D> { uint Opcode = 0x6E203000; // USUBW2 V0.8H, V0.8H, V0.16B diff --git a/Ryujinx.Tests/Cpu/Tester/Instructions.cs b/Ryujinx.Tests/Cpu/Tester/Instructions.cs index b0eff58808..206d3963b0 100644 --- a/Ryujinx.Tests/Cpu/Tester/Instructions.cs +++ b/Ryujinx.Tests/Cpu/Tester/Instructions.cs @@ -5251,6 +5251,88 @@ namespace Ryujinx.Tests.Cpu.Tester V(d, result); } + // shadd_advsimd.html + public static void Shadd_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool U = false; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = (Q ? 128 : 64); + int elements = datasize / esize; + + bool unsigned = (U == true); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(datasize); + Bits operand1 = V(datasize, n); + Bits operand2 = V(datasize, m); + BigInteger element1; + BigInteger element2; + BigInteger sum; + + for (int e = 0; e <= elements - 1; e++) + { + element1 = Int(Elem(operand1, e, esize), unsigned); + element2 = Int(Elem(operand2, e, esize), unsigned); + + sum = element1 + element2; + + Elem(result, e, esize, sum.SubBigInteger(esize, 1)); + } + + V(d, result); + } + + // shsub_advsimd.html + public static void Shsub_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool U = false; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = (Q ? 128 : 64); + int elements = datasize / esize; + + bool unsigned = (U == true); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(datasize); + Bits operand1 = V(datasize, n); + Bits operand2 = V(datasize, m); + BigInteger element1; + BigInteger element2; + BigInteger diff; + + for (int e = 0; e <= elements - 1; e++) + { + element1 = Int(Elem(operand1, e, esize), unsigned); + element2 = Int(Elem(operand2, e, esize), unsigned); + + diff = element1 - element2; + + Elem(result, e, esize, diff.SubBigInteger(esize, 1)); + } + + V(d, result); + } + // sqadd_advsimd.html#SQADD_asisdsame_only public static void Sqadd_S(Bits size, Bits Rm, Bits Rn, Bits Rd) { @@ -5651,6 +5733,44 @@ namespace Ryujinx.Tests.Cpu.Tester V(d, result); } + // srhadd_advsimd.html + public static void Srhadd_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool U = false; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = (Q ? 128 : 64); + int elements = datasize / esize; + + bool unsigned = (U == true); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(datasize); + Bits operand1 = V(datasize, n); + Bits operand2 = V(datasize, m); + BigInteger element1; + BigInteger element2; + + for (int e = 0; e <= elements - 1; e++) + { + element1 = Int(Elem(operand1, e, esize), unsigned); + element2 = Int(Elem(operand2, e, esize), unsigned); + + Elem(result, e, esize, (element1 + element2 + 1).SubBigInteger(esize, 1)); + } + + V(d, result); + } + // ssubw_advsimd.html public static void Ssubw_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) { @@ -6143,6 +6263,88 @@ namespace Ryujinx.Tests.Cpu.Tester V(d, result); } + // uhadd_advsimd.html + public static void Uhadd_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool U = true; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = (Q ? 128 : 64); + int elements = datasize / esize; + + bool unsigned = (U == true); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(datasize); + Bits operand1 = V(datasize, n); + Bits operand2 = V(datasize, m); + BigInteger element1; + BigInteger element2; + BigInteger sum; + + for (int e = 0; e <= elements - 1; e++) + { + element1 = Int(Elem(operand1, e, esize), unsigned); + element2 = Int(Elem(operand2, e, esize), unsigned); + + sum = element1 + element2; + + Elem(result, e, esize, sum.SubBigInteger(esize, 1)); + } + + V(d, result); + } + + // uhsub_advsimd.html + public static void Uhsub_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool U = true; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = (Q ? 128 : 64); + int elements = datasize / esize; + + bool unsigned = (U == true); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(datasize); + Bits operand1 = V(datasize, n); + Bits operand2 = V(datasize, m); + BigInteger element1; + BigInteger element2; + BigInteger diff; + + for (int e = 0; e <= elements - 1; e++) + { + element1 = Int(Elem(operand1, e, esize), unsigned); + element2 = Int(Elem(operand2, e, esize), unsigned); + + diff = element1 - element2; + + Elem(result, e, esize, diff.SubBigInteger(esize, 1)); + } + + V(d, result); + } + // uqadd_advsimd.html#UQADD_asisdsame_only public static void Uqadd_S(Bits size, Bits Rm, Bits Rn, Bits Rd) { @@ -6339,6 +6541,44 @@ namespace Ryujinx.Tests.Cpu.Tester V(d, result); } + // urhadd_advsimd.html + public static void Urhadd_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool U = true; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = (Q ? 128 : 64); + int elements = datasize / esize; + + bool unsigned = (U == true); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(datasize); + Bits operand1 = V(datasize, n); + Bits operand2 = V(datasize, m); + BigInteger element1; + BigInteger element2; + + for (int e = 0; e <= elements - 1; e++) + { + element1 = Int(Elem(operand1, e, esize), unsigned); + element2 = Int(Elem(operand2, e, esize), unsigned); + + Elem(result, e, esize, (element1 + element2 + 1).SubBigInteger(esize, 1)); + } + + V(d, result); + } + // usubw_advsimd.html public static void Usubw_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) {