From 97ca974213ec9564ed4a9c57e998ca726dbbb64f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 5 Jul 2018 15:47:29 -0300 Subject: [PATCH 01/20] Implement some GPU features (#209) * Implement stencil testing * Implement depth testing * Implement face culling * Implement front face * Comparison functions now take OGL enums too * Fix front facing when flipping was used * Add depth and stencil clear values --- Ryujinx.Graphics/Gal/GalComparisonOp.cs | 16 +- Ryujinx.Graphics/Gal/GalCullFace.cs | 9 + Ryujinx.Graphics/Gal/GalFrontFace.cs | 8 + Ryujinx.Graphics/Gal/GalStencilOp.cs | 14 ++ Ryujinx.Graphics/Gal/IGalRasterizer.cs | 18 ++ .../Gal/OpenGL/OGLEnumConverter.cs | 59 +++++++ Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs | 53 ++++++ Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs | 157 ++++++++++++++++-- Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs | 18 ++ 9 files changed, 334 insertions(+), 18 deletions(-) create mode 100644 Ryujinx.Graphics/Gal/GalCullFace.cs create mode 100644 Ryujinx.Graphics/Gal/GalFrontFace.cs create mode 100644 Ryujinx.Graphics/Gal/GalStencilOp.cs diff --git a/Ryujinx.Graphics/Gal/GalComparisonOp.cs b/Ryujinx.Graphics/Gal/GalComparisonOp.cs index ddddecebb4..f26a775337 100644 --- a/Ryujinx.Graphics/Gal/GalComparisonOp.cs +++ b/Ryujinx.Graphics/Gal/GalComparisonOp.cs @@ -2,13 +2,13 @@ namespace Ryujinx.Graphics.Gal { public enum GalComparisonOp { - Never = 0x200, - Less = 0x201, - Equal = 0x202, - Lequal = 0x203, - Greater = 0x204, - NotEqual = 0x205, - Gequal = 0x206, - Always = 0x207 + Never = 0x1, + Less = 0x2, + Equal = 0x3, + Lequal = 0x4, + Greater = 0x5, + NotEqual = 0x6, + Gequal = 0x7, + Always = 0x8 } } \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/GalCullFace.cs b/Ryujinx.Graphics/Gal/GalCullFace.cs new file mode 100644 index 0000000000..4ab3e1742c --- /dev/null +++ b/Ryujinx.Graphics/Gal/GalCullFace.cs @@ -0,0 +1,9 @@ +namespace Ryujinx.Graphics.Gal +{ + public enum GalCullFace + { + Front = 0x404, + Back = 0x405, + FrontAndBack = 0x408 + } +} diff --git a/Ryujinx.Graphics/Gal/GalFrontFace.cs b/Ryujinx.Graphics/Gal/GalFrontFace.cs new file mode 100644 index 0000000000..17ad11267b --- /dev/null +++ b/Ryujinx.Graphics/Gal/GalFrontFace.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.Graphics.Gal +{ + public enum GalFrontFace + { + CW = 0x900, + CCW = 0x901 + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/GalStencilOp.cs b/Ryujinx.Graphics/Gal/GalStencilOp.cs new file mode 100644 index 0000000000..fc83ca5ea6 --- /dev/null +++ b/Ryujinx.Graphics/Gal/GalStencilOp.cs @@ -0,0 +1,14 @@ +namespace Ryujinx.Graphics.Gal +{ + public enum GalStencilOp + { + Keep = 0x1, + Zero = 0x2, + Replace = 0x3, + Incr = 0x4, + Decr = 0x5, + Invert = 0x6, + IncrWrap = 0x7, + DecrWrap = 0x8 + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/IGalRasterizer.cs b/Ryujinx.Graphics/Gal/IGalRasterizer.cs index e0469382fe..586eae6ba7 100644 --- a/Ryujinx.Graphics/Gal/IGalRasterizer.cs +++ b/Ryujinx.Graphics/Gal/IGalRasterizer.cs @@ -8,16 +8,34 @@ namespace Ryujinx.Graphics.Gal bool IsIboCached(long Key, long DataSize); + void SetFrontFace(GalFrontFace FrontFace); + void EnableCullFace(); void DisableCullFace(); + void SetCullFace(GalCullFace CullFace); + void EnableDepthTest(); void DisableDepthTest(); void SetDepthFunction(GalComparisonOp Func); + void SetClearDepth(float Depth); + + void EnableStencilTest(); + + void DisableStencilTest(); + + void SetStencilFunction(bool IsFrontFace, GalComparisonOp Func, int Ref, int Mask); + + void SetStencilOp(bool IsFrontFace, GalStencilOp Fail, GalStencilOp ZFail, GalStencilOp ZPass); + + void SetStencilMask(bool IsFrontFace, int Mask); + + void SetClearStencil(int Stencil); + void CreateVbo(long Key, byte[] Buffer); void CreateIbo(long Key, byte[] Buffer); diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs index 349c695e5b..3a81150d6f 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs @@ -5,17 +5,76 @@ namespace Ryujinx.Graphics.Gal.OpenGL { static class OGLEnumConverter { + public static FrontFaceDirection GetFrontFace(GalFrontFace FrontFace) + { + switch (FrontFace) + { + case GalFrontFace.CW: return FrontFaceDirection.Cw; + case GalFrontFace.CCW: return FrontFaceDirection.Ccw; + } + + throw new ArgumentException(nameof(FrontFace)); + } + + public static CullFaceMode GetCullFace(GalCullFace CullFace) + { + switch (CullFace) + { + case GalCullFace.Front: return CullFaceMode.Front; + case GalCullFace.Back: return CullFaceMode.Back; + case GalCullFace.FrontAndBack: return CullFaceMode.FrontAndBack; + } + + throw new ArgumentException(nameof(CullFace)); + } + + public static StencilOp GetStencilOp(GalStencilOp Op) + { + switch (Op) + { + case GalStencilOp.Keep: return StencilOp.Keep; + case GalStencilOp.Zero: return StencilOp.Zero; + case GalStencilOp.Replace: return StencilOp.Replace; + case GalStencilOp.Incr: return StencilOp.Incr; + case GalStencilOp.Decr: return StencilOp.Decr; + case GalStencilOp.Invert: return StencilOp.Invert; + case GalStencilOp.IncrWrap: return StencilOp.IncrWrap; + case GalStencilOp.DecrWrap: return StencilOp.DecrWrap; + } + + throw new ArgumentException(nameof(Op)); + } + public static DepthFunction GetDepthFunc(GalComparisonOp Func) { + //Looks like the GPU can take it's own values (described in GalComparisonOp) and OpenGL values alike if ((int)Func >= (int)DepthFunction.Never && (int)Func <= (int)DepthFunction.Always) { return (DepthFunction)Func; } + switch (Func) + { + case GalComparisonOp.Never: return DepthFunction.Never; + case GalComparisonOp.Less: return DepthFunction.Less; + case GalComparisonOp.Equal: return DepthFunction.Equal; + case GalComparisonOp.Lequal: return DepthFunction.Lequal; + case GalComparisonOp.Greater: return DepthFunction.Greater; + case GalComparisonOp.NotEqual: return DepthFunction.Notequal; + case GalComparisonOp.Gequal: return DepthFunction.Gequal; + case GalComparisonOp.Always: return DepthFunction.Always; + } + throw new ArgumentException(nameof(Func)); } + public static StencilFunction GetStencilFunc(GalComparisonOp Func) + { + //OGL comparison values match, it's just an enum cast + return (StencilFunction)GetDepthFunc(Func); + } + public static DrawElementsType GetDrawElementsType(GalIndexFormat Format) { switch (Format) diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs index 8bff6bb3e0..b988571172 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs @@ -106,6 +106,11 @@ namespace Ryujinx.Graphics.Gal.OpenGL return IboCache.TryGetSize(Key, out long Size) && Size == DataSize; } + public void SetFrontFace(GalFrontFace FrontFace) + { + GL.FrontFace(OGLEnumConverter.GetFrontFace(FrontFace)); + } + public void EnableCullFace() { GL.Enable(EnableCap.CullFace); @@ -116,6 +121,11 @@ namespace Ryujinx.Graphics.Gal.OpenGL GL.Disable(EnableCap.CullFace); } + public void SetCullFace(GalCullFace CullFace) + { + GL.CullFace(OGLEnumConverter.GetCullFace(CullFace)); + } + public void EnableDepthTest() { GL.Enable(EnableCap.DepthTest); @@ -131,6 +141,49 @@ namespace Ryujinx.Graphics.Gal.OpenGL GL.DepthFunc(OGLEnumConverter.GetDepthFunc(Func)); } + public void SetClearDepth(float Depth) + { + GL.ClearDepth(Depth); + } + + public void EnableStencilTest() + { + GL.Enable(EnableCap.StencilTest); + } + + public void DisableStencilTest() + { + GL.Disable(EnableCap.StencilTest); + } + + public void SetStencilFunction(bool IsFrontFace, GalComparisonOp Func, int Ref, int Mask) + { + GL.StencilFuncSeparate( + IsFrontFace ? StencilFace.Front : StencilFace.Back, + OGLEnumConverter.GetStencilFunc(Func), + Ref, + Mask); + } + + public void SetStencilOp(bool IsFrontFace, GalStencilOp Fail, GalStencilOp ZFail, GalStencilOp ZPass) + { + GL.StencilOpSeparate( + IsFrontFace ? StencilFace.Front : StencilFace.Back, + OGLEnumConverter.GetStencilOp(Fail), + OGLEnumConverter.GetStencilOp(ZFail), + OGLEnumConverter.GetStencilOp(ZPass)); + } + + public void SetStencilMask(bool IsFrontFace, int Mask) + { + GL.StencilMaskSeparate(IsFrontFace ? StencilFace.Front : StencilFace.Back, Mask); + } + + public void SetClearStencil(int Stencil) + { + GL.ClearStencil(Stencil); + } + public void CreateVbo(long Key, byte[] Buffer) { int Handle = GL.GenBuffer(); diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs index b27f5c142b..e0e769d486 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs @@ -79,8 +79,10 @@ namespace Ryujinx.HLE.Gpu.Engines Gpu.Renderer.Shader.BindProgram(); + SetFrontFace(); SetCullFace(); SetDepth(); + SetStencil(); SetAlphaBlending(); UploadTextures(Vmm, Keys); @@ -173,14 +175,8 @@ namespace Ryujinx.HLE.Gpu.Engines Gpu.Renderer.Shader.Bind(Key); } - int RawSX = ReadRegister(NvGpuEngine3dReg.ViewportScaleX); - int RawSY = ReadRegister(NvGpuEngine3dReg.ViewportScaleY); - - float SX = BitConverter.Int32BitsToSingle(RawSX); - float SY = BitConverter.Int32BitsToSingle(RawSY); - - float SignX = MathF.Sign(SX); - float SignY = MathF.Sign(SY); + float SignX = GetFlipSign(NvGpuEngine3dReg.ViewportScaleX); + float SignY = GetFlipSign(NvGpuEngine3dReg.ViewportScaleY); Gpu.Renderer.Shader.SetFlip(SignX, SignY); @@ -202,14 +198,145 @@ namespace Ryujinx.HLE.Gpu.Engines throw new ArgumentOutOfRangeException(nameof(Program)); } + private void SetFrontFace() + { + float SignX = GetFlipSign(NvGpuEngine3dReg.ViewportScaleX); + float SignY = GetFlipSign(NvGpuEngine3dReg.ViewportScaleY); + + GalFrontFace FrontFace = (GalFrontFace)ReadRegister(NvGpuEngine3dReg.FrontFace); + + //Flipping breaks facing. Flipping front facing too fixes it + if (SignX != SignY) + { + switch (FrontFace) + { + case GalFrontFace.CW: + FrontFace = GalFrontFace.CCW; + break; + + case GalFrontFace.CCW: + FrontFace = GalFrontFace.CW; + break; + } + } + + Gpu.Renderer.Rasterizer.SetFrontFace(FrontFace); + } + private void SetCullFace() { - //TODO. + bool Enable = (ReadRegister(NvGpuEngine3dReg.CullFaceEnable) & 1) != 0; + + if (Enable) + { + Gpu.Renderer.Rasterizer.EnableCullFace(); + } + else + { + Gpu.Renderer.Rasterizer.DisableCullFace(); + } + + if (!Enable) + { + return; + } + + GalCullFace CullFace = (GalCullFace)ReadRegister(NvGpuEngine3dReg.CullFace); + + Gpu.Renderer.Rasterizer.SetCullFace(CullFace); } private void SetDepth() { - //TODO. + float ClearDepth = ReadRegisterFloat(NvGpuEngine3dReg.ClearDepth); + + Gpu.Renderer.Rasterizer.SetClearDepth(ClearDepth); + + bool Enable = (ReadRegister(NvGpuEngine3dReg.DepthTestEnable) & 1) != 0; + + if (Enable) + { + Gpu.Renderer.Rasterizer.EnableDepthTest(); + } + else + { + Gpu.Renderer.Rasterizer.DisableDepthTest(); + } + + if (!Enable) + { + return; + } + + GalComparisonOp Func = (GalComparisonOp)ReadRegister(NvGpuEngine3dReg.DepthTestFunction); + + Gpu.Renderer.Rasterizer.SetDepthFunction(Func); + } + + private void SetStencil() + { + int ClearStencil = ReadRegister(NvGpuEngine3dReg.ClearStencil); + + Gpu.Renderer.Rasterizer.SetClearStencil(ClearStencil); + + bool Enable = (ReadRegister(NvGpuEngine3dReg.StencilEnable) & 1) != 0; + + if (Enable) + { + Gpu.Renderer.Rasterizer.EnableStencilTest(); + } + else + { + Gpu.Renderer.Rasterizer.DisableStencilTest(); + } + + if (!Enable) + { + return; + } + + void SetFaceStencil( + bool IsFrontFace, + NvGpuEngine3dReg Func, + NvGpuEngine3dReg FuncRef, + NvGpuEngine3dReg FuncMask, + NvGpuEngine3dReg OpFail, + NvGpuEngine3dReg OpZFail, + NvGpuEngine3dReg OpZPass, + NvGpuEngine3dReg Mask) + { + Gpu.Renderer.Rasterizer.SetStencilFunction( + IsFrontFace, + (GalComparisonOp)ReadRegister(Func), + ReadRegister(FuncRef), + ReadRegister(FuncMask)); + + Gpu.Renderer.Rasterizer.SetStencilOp( + IsFrontFace, + (GalStencilOp)ReadRegister(OpFail), + (GalStencilOp)ReadRegister(OpZFail), + (GalStencilOp)ReadRegister(OpZPass)); + + Gpu.Renderer.Rasterizer.SetStencilMask(IsFrontFace, ReadRegister(Mask)); + } + + SetFaceStencil(false, + NvGpuEngine3dReg.StencilBackFuncFunc, + NvGpuEngine3dReg.StencilBackFuncRef, + NvGpuEngine3dReg.StencilBackFuncMask, + NvGpuEngine3dReg.StencilBackOpFail, + NvGpuEngine3dReg.StencilBackOpZFail, + NvGpuEngine3dReg.StencilBackOpZPass, + NvGpuEngine3dReg.StencilBackMask); + + SetFaceStencil(true, + NvGpuEngine3dReg.StencilFrontFuncFunc, + NvGpuEngine3dReg.StencilFrontFuncRef, + NvGpuEngine3dReg.StencilFrontFuncMask, + NvGpuEngine3dReg.StencilFrontOpFail, + NvGpuEngine3dReg.StencilFrontOpZFail, + NvGpuEngine3dReg.StencilFrontOpZPass, + NvGpuEngine3dReg.StencilFrontMask); } private void SetAlphaBlending() @@ -549,6 +676,11 @@ namespace Ryujinx.HLE.Gpu.Engines ConstBuffers[Stage][Index].Size = ReadRegister(NvGpuEngine3dReg.ConstBufferSize); } + private float GetFlipSign(NvGpuEngine3dReg Reg) + { + return MathF.Sign(ReadRegisterFloat(Reg)); + } + private long MakeInt64From2xInt32(NvGpuEngine3dReg Reg) { return @@ -571,6 +703,11 @@ namespace Ryujinx.HLE.Gpu.Engines return Registers[(int)Reg]; } + private float ReadRegisterFloat(NvGpuEngine3dReg Reg) + { + return BitConverter.Int32BitsToSingle(ReadRegister(Reg)); + } + private void WriteRegister(NvGpuEngine3dReg Reg, int Value) { Registers[(int)Reg] = Value; diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs index 64866ce9aa..9eb2966d9e 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs @@ -14,6 +14,11 @@ namespace Ryujinx.HLE.Gpu.Engines ViewportTranslateZ = 0x285, VertexArrayFirst = 0x35d, VertexArrayCount = 0x35e, + ClearDepth = 0x364, + ClearStencil = 0x368, + StencilBackFuncRef = 0x3d5, + StencilBackMask = 0x3d6, + StencilBackFuncMask = 0x3d7, VertexAttribNFormat = 0x458, DepthTestEnable = 0x4b3, IBlendEnable = 0x4b9, @@ -27,9 +32,22 @@ namespace Ryujinx.HLE.Gpu.Engines BlendFuncDstAlpha = 0x4d6, BlendEnableMaster = 0x4d7, IBlendNEnable = 0x4d8, + StencilEnable = 0x4e0, + StencilFrontOpFail = 0x4e1, + StencilFrontOpZFail = 0x4e2, + StencilFrontOpZPass = 0x4e3, + StencilFrontFuncFunc = 0x4e4, + StencilFrontFuncRef = 0x4e5, + StencilFrontFuncMask = 0x4e6, + StencilFrontMask = 0x4e7, VertexArrayElemBase = 0x50d, TexHeaderPoolOffset = 0x55d, TexSamplerPoolOffset = 0x557, + StencilTwoSideEnable = 0x565, + StencilBackOpFail = 0x566, + StencilBackOpZFail = 0x567, + StencilBackOpZPass = 0x568, + StencilBackFuncFunc = 0x569, ShaderAddress = 0x582, VertexBeginGl = 0x586, IndexArrayAddress = 0x5f2, From 0bec547b9dc11cb01c42db7f015cc47a0e649f6b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 6 Jul 2018 23:40:12 -0300 Subject: [PATCH 02/20] Disable front facing and face culling to avoid regression (#226) * Disable tests for framebuffer blitting --- Ryujinx.Graphics/Gal/OpenGL/OGLFrameBuffer.cs | 29 ++++++++++++++++++- Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs | 5 ++-- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLFrameBuffer.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLFrameBuffer.cs index 4d91ff97ee..305fa37d8f 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLFrameBuffer.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLFrameBuffer.cs @@ -239,7 +239,19 @@ namespace Ryujinx.Graphics.Gal.OpenGL { EnsureInitialized(); - bool AlphaBlendEnable = GL.GetInteger(GetPName.Blend) != 0; + //bool CullFaceEnable = GL.IsEnabled(EnableCap.CullFace); + + bool DepthTestEnable = GL.IsEnabled(EnableCap.DepthTest); + + bool StencilTestEnable = GL.IsEnabled(EnableCap.StencilTest); + + bool AlphaBlendEnable = GL.IsEnabled(EnableCap.Blend); + + //GL.Disable(EnableCap.CullFace); + + GL.Disable(EnableCap.DepthTest); + + GL.Disable(EnableCap.StencilTest); GL.Disable(EnableCap.Blend); @@ -268,6 +280,21 @@ namespace Ryujinx.Graphics.Gal.OpenGL GL.UseProgram(CurrentProgram); + //if (CullFaceEnable) + //{ + // GL.Enable(EnableCap.CullFace); + //} + + if (DepthTestEnable) + { + GL.Enable(EnableCap.DepthTest); + } + + if (StencilTestEnable) + { + GL.Enable(EnableCap.StencilTest); + } + if (AlphaBlendEnable) { GL.Enable(EnableCap.Blend); diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs index e0e769d486..d46f5089a5 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs @@ -79,8 +79,9 @@ namespace Ryujinx.HLE.Gpu.Engines Gpu.Renderer.Shader.BindProgram(); - SetFrontFace(); - SetCullFace(); + //Note: Uncomment SetFrontFace SetCullFace when flipping issues are solved + //SetFrontFace(); + //SetCullFace(); SetDepth(); SetStencil(); SetAlphaBlending(); From af1516a1466de474c7f8fb5f564219b9323e1c26 Mon Sep 17 00:00:00 2001 From: Merry Date: Sun, 8 Jul 2018 16:41:46 +0100 Subject: [PATCH 03/20] ASoftFloat: Fix InvSqrtEstimate for negative values (#233) --- ChocolArm64/Instruction/ASoftFloat.cs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/ChocolArm64/Instruction/ASoftFloat.cs b/ChocolArm64/Instruction/ASoftFloat.cs index 7bee69baea..1bd7166589 100644 --- a/ChocolArm64/Instruction/ASoftFloat.cs +++ b/ChocolArm64/Instruction/ASoftFloat.cs @@ -50,14 +50,8 @@ namespace ChocolArm64.Instruction long x_exp = (long)((x_bits >> 52) & 0x7FF); ulong scaled = x_bits & ((1ul << 52) - 1); - if (x_exp == 0x7ff) + if (x_exp == 0x7FF && scaled != 0) { - if (scaled == 0) - { - // Infinity -> Zero - return BitConverter.Int64BitsToDouble((long)x_sign); - } - // NaN return BitConverter.Int64BitsToDouble((long)(x_bits | 0x0008000000000000)); } @@ -79,6 +73,18 @@ namespace ChocolArm64.Instruction scaled <<= 1; } + if (x_sign != 0) + { + // Negative -> NaN + return BitConverter.Int64BitsToDouble((long)0x7ff8000000000000); + } + + if (x_exp == 0x7ff && scaled == 0) + { + // Infinity -> Zero + return BitConverter.Int64BitsToDouble((long)x_sign); + } + if (((ulong)x_exp & 1) == 1) { scaled >>= 45; From dc04b5465fe57231e88411c4771f2d63c9b342c1 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 8 Jul 2018 12:42:10 -0300 Subject: [PATCH 04/20] Improvements to IAudioOutManager (#232) * Improvements to IAudioOutManager * Make implementation private --- Ryujinx.HLE/OsHle/Services/Aud/AudErr.cs | 8 ++ .../OsHle/Services/Aud/IAudioOutManager.cs | 86 ++++++++++++------- 2 files changed, 62 insertions(+), 32 deletions(-) create mode 100644 Ryujinx.HLE/OsHle/Services/Aud/AudErr.cs diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudErr.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudErr.cs new file mode 100644 index 0000000000..fa201d8cdf --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudErr.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.HLE.OsHle.Services.Aud +{ + static class AudErr + { + public const int DeviceNotFound = 1; + public const int UnsupportedSampleRate = 3; + } +} \ No newline at end of file diff --git a/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs b/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs index 18aedb32cc..54ffa6d901 100644 --- a/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs +++ b/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs @@ -6,6 +6,8 @@ using Ryujinx.HLE.OsHle.Ipc; using System.Collections.Generic; using System.Text; +using static Ryujinx.HLE.OsHle.ErrorCode; + namespace Ryujinx.HLE.OsHle.Services.Aud { class IAudioOutManager : IpcService @@ -28,36 +30,44 @@ namespace Ryujinx.HLE.OsHle.Services.Aud } public long ListAudioOuts(ServiceCtx Context) - { - ListAudioOutsMethod(Context, Context.Request.ReceiveBuff[0].Position, Context.Request.ReceiveBuff[0].Size); - - return 0; + { + return ListAudioOutsImpl( + Context, + Context.Request.ReceiveBuff[0].Position, + Context.Request.ReceiveBuff[0].Size); } public long OpenAudioOut(ServiceCtx Context) { - OpenAudioOutMethod(Context, Context.Request.SendBuff[0].Position, Context.Request.SendBuff[0].Size, - Context.Request.ReceiveBuff[0].Position, Context.Request.ReceiveBuff[0].Size); - - return 0; + return OpenAudioOutImpl( + Context, + Context.Request.SendBuff[0].Position, + Context.Request.SendBuff[0].Size, + Context.Request.ReceiveBuff[0].Position, + Context.Request.ReceiveBuff[0].Size); } - + public long ListAudioOutsAuto(ServiceCtx Context) - { - ListAudioOutsMethod(Context, Context.Request.GetBufferType0x22().Position, Context.Request.GetBufferType0x22().Size); + { + (long RecvPosition, long RecvSize) = Context.Request.GetBufferType0x22(); - return 0; + return ListAudioOutsImpl(Context, RecvPosition, RecvSize); } - + public long OpenAudioOutAuto(ServiceCtx Context) { - OpenAudioOutMethod(Context, Context.Request.GetBufferType0x21().Position, Context.Request.GetBufferType0x21().Size, - Context.Request.GetBufferType0x22().Position, Context.Request.GetBufferType0x22().Size); + (long SendPosition, long SendSize) = Context.Request.GetBufferType0x21(); + (long RecvPosition, long RecvSize) = Context.Request.GetBufferType0x22(); - return 0; + return OpenAudioOutImpl( + Context, + SendPosition, + SendSize, + RecvPosition, + RecvSize); } - - public void ListAudioOutsMethod(ServiceCtx Context, long Position, long Size) + + private long ListAudioOutsImpl(ServiceCtx Context, long Position, long Size) { int NameCount = 0; @@ -75,23 +85,29 @@ namespace Ryujinx.HLE.OsHle.Services.Aud } Context.ResponseData.Write(NameCount); + + return 0; } - - public void OpenAudioOutMethod(ServiceCtx Context, long SendPosition, long SendSize, long ReceivePosition, long ReceiveSize) + + private long OpenAudioOutImpl(ServiceCtx Context, long SendPosition, long SendSize, long ReceivePosition, long ReceiveSize) { - IAalOutput AudioOut = Context.Ns.AudioOut; - string DeviceName = AMemoryHelper.ReadAsciiString( Context.Memory, SendPosition, - SendSize - ); - + SendSize); + if (DeviceName == string.Empty) { DeviceName = DefaultAudioOutput; } + if (DeviceName != DefaultAudioOutput) + { + Context.Ns.Log.PrintWarning(LogClass.Audio, "Invalid device name!"); + + return MakeError(ErrorModule.Audio, AudErr.DeviceNotFound); + } + byte[] DeviceNameBuffer = Encoding.ASCII.GetBytes(DeviceName + "\0"); if ((ulong)DeviceNameBuffer.Length <= (ulong)ReceiveSize) @@ -101,19 +117,21 @@ namespace Ryujinx.HLE.OsHle.Services.Aud else { Context.Ns.Log.PrintError(LogClass.ServiceAudio, $"Output buffer size {ReceiveSize} too small!"); - } + } int SampleRate = Context.RequestData.ReadInt32(); int Channels = Context.RequestData.ReadInt32(); - Channels = (ushort)(Channels >> 16); - - if (SampleRate == 0) + if (SampleRate != 48000) { - SampleRate = 48000; + Context.Ns.Log.PrintWarning(LogClass.Audio, "Invalid sample rate!"); + + return MakeError(ErrorModule.Audio, AudErr.UnsupportedSampleRate); } - if (Channels < 1 || Channels > 2) + Channels = (ushort)Channels; + + if (Channels == 0) { Channels = 2; } @@ -125,7 +143,9 @@ namespace Ryujinx.HLE.OsHle.Services.Aud ReleaseEvent.WaitEvent.Set(); }; - int Track = AudioOut.OpenTrack(SampleRate, Channels, Callback, out AudioFormat Format); + IAalOutput AudioOut = Context.Ns.AudioOut; + + int Track = AudioOut.OpenTrack(SampleRate, 2, Callback, out AudioFormat Format); MakeObject(Context, new IAudioOut(AudioOut, ReleaseEvent, Track)); @@ -133,6 +153,8 @@ namespace Ryujinx.HLE.OsHle.Services.Aud Context.ResponseData.Write(Channels); Context.ResponseData.Write((int)Format); Context.ResponseData.Write((int)PlaybackState.Stopped); + + return 0; } } } From 6479c3e48479259bca79bee6f1016e8108cc33a8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 8 Jul 2018 13:14:35 -0300 Subject: [PATCH 05/20] Implement GPU primitive restart (#221) --- Ryujinx.Graphics/Gal/IGalRasterizer.cs | 6 +++++ Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs | 15 ++++++++++++ Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs | 24 ++++++++++++++++++++ Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs | 2 ++ 4 files changed, 47 insertions(+) diff --git a/Ryujinx.Graphics/Gal/IGalRasterizer.cs b/Ryujinx.Graphics/Gal/IGalRasterizer.cs index 586eae6ba7..2598efb610 100644 --- a/Ryujinx.Graphics/Gal/IGalRasterizer.cs +++ b/Ryujinx.Graphics/Gal/IGalRasterizer.cs @@ -36,6 +36,12 @@ namespace Ryujinx.Graphics.Gal void SetClearStencil(int Stencil); + void EnablePrimitiveRestart(); + + void DisablePrimitiveRestart(); + + void SetPrimitiveRestartIndex(uint Index); + void CreateVbo(long Key, byte[] Buffer); void CreateIbo(long Key, byte[] Buffer); diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs index b988571172..a4ec7f87cf 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs @@ -184,6 +184,21 @@ namespace Ryujinx.Graphics.Gal.OpenGL GL.ClearStencil(Stencil); } + public void EnablePrimitiveRestart() + { + GL.Enable(EnableCap.PrimitiveRestart); + } + + public void DisablePrimitiveRestart() + { + GL.Disable(EnableCap.PrimitiveRestart); + } + + public void SetPrimitiveRestartIndex(uint Index) + { + GL.PrimitiveRestartIndex(Index); + } + public void CreateVbo(long Key, byte[] Buffer) { int Handle = GL.GenBuffer(); diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs index d46f5089a5..10c99494b2 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs @@ -85,6 +85,7 @@ namespace Ryujinx.HLE.Gpu.Engines SetDepth(); SetStencil(); SetAlphaBlending(); + SetPrimitiveRestart(); UploadTextures(Vmm, Keys); UploadUniforms(Vmm); @@ -389,6 +390,29 @@ namespace Ryujinx.HLE.Gpu.Engines } } + private void SetPrimitiveRestart() + { + bool Enable = (ReadRegister(NvGpuEngine3dReg.PrimRestartEnable) & 1) != 0; + + if (Enable) + { + Gpu.Renderer.Rasterizer.EnablePrimitiveRestart(); + } + else + { + Gpu.Renderer.Rasterizer.DisablePrimitiveRestart(); + } + + if (!Enable) + { + return; + } + + uint Index = (uint)ReadRegister(NvGpuEngine3dReg.PrimRestartIndex); + + Gpu.Renderer.Rasterizer.SetPrimitiveRestartIndex(Index); + } + private void UploadTextures(NvGpuVmm Vmm, long[] Keys) { long BaseShPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress); diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs index 9eb2966d9e..3de2885ef2 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs @@ -50,6 +50,8 @@ namespace Ryujinx.HLE.Gpu.Engines StencilBackFuncFunc = 0x569, ShaderAddress = 0x582, VertexBeginGl = 0x586, + PrimRestartEnable = 0x591, + PrimRestartIndex = 0x592, IndexArrayAddress = 0x5f2, IndexArrayEndAddr = 0x5f4, IndexArrayFormat = 0x5f6, From 0f8f40486d1b3215c845325744bd545149223805 Mon Sep 17 00:00:00 2001 From: Merry Date: Sun, 8 Jul 2018 20:54:47 +0100 Subject: [PATCH 06/20] ChocolArm64: More accurate implementation of Frecpe & Frecps (#228) * ChocolArm64: More accurate implementation of Frecpe * ChocolArm64: Handle infinities and zeros in Frecps --- .../Instruction/AInstEmitSimdArithmetic.cs | 100 ++------------- .../Instruction/AInstEmitSimdHelper.cs | 20 +++ ChocolArm64/Instruction/ASoftFloat.cs | 120 ++++++++++++++++++ Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs | 39 +++--- 4 files changed, 170 insertions(+), 109 deletions(-) diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index b96b71be46..39331f965c 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -641,106 +641,34 @@ namespace ChocolArm64.Instruction public static void Frecpe_S(AILEmitterCtx Context) { - EmitFrecpe(Context, 0, Scalar: true); + EmitScalarUnaryOpF(Context, () => + { + EmitUnarySoftFloatCall(Context, nameof(ASoftFloat.RecipEstimate)); + }); } public static void Frecpe_V(AILEmitterCtx Context) { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int SizeF = Op.Size & 1; - - int Bytes = Context.CurrOp.GetBitsCount() >> 3; - - for (int Index = 0; Index < Bytes >> SizeF + 2; Index++) + EmitVectorUnaryOpF(Context, () => { - EmitFrecpe(Context, Index, Scalar: false); - } - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - - private static void EmitFrecpe(AILEmitterCtx Context, int Index, bool Scalar) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int SizeF = Op.Size & 1; - - if (SizeF == 0) - { - Context.EmitLdc_R4(1); - } - else /* if (SizeF == 1) */ - { - Context.EmitLdc_R8(1); - } - - EmitVectorExtractF(Context, Op.Rn, Index, SizeF); - - Context.Emit(OpCodes.Div); - - if (Scalar) - { - EmitVectorZeroAll(Context, Op.Rd); - } - - EmitVectorInsertF(Context, Op.Rd, Index, SizeF); + EmitUnarySoftFloatCall(Context, nameof(ASoftFloat.RecipEstimate)); + }); } public static void Frecps_S(AILEmitterCtx Context) { - EmitFrecps(Context, 0, Scalar: true); + EmitScalarBinaryOpF(Context, () => + { + EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.RecipStep)); + }); } public static void Frecps_V(AILEmitterCtx Context) { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int SizeF = Op.Size & 1; - - int Bytes = Context.CurrOp.GetBitsCount() >> 3; - - for (int Index = 0; Index < Bytes >> SizeF + 2; Index++) + EmitVectorBinaryOpF(Context, () => { - EmitFrecps(Context, Index, Scalar: false); - } - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - - private static void EmitFrecps(AILEmitterCtx Context, int Index, bool Scalar) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - int SizeF = Op.Size & 1; - - if (SizeF == 0) - { - Context.EmitLdc_R4(2); - } - else /* if (SizeF == 1) */ - { - Context.EmitLdc_R8(2); - } - - EmitVectorExtractF(Context, Op.Rn, Index, SizeF); - EmitVectorExtractF(Context, Op.Rm, Index, SizeF); - - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Sub); - - if (Scalar) - { - EmitVectorZeroAll(Context, Op.Rd); - } - - EmitVectorInsertF(Context, Op.Rd, Index, SizeF); + EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.RecipStep)); + }); } public static void Frinta_S(AILEmitterCtx Context) diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs index 0f6ea42cec..d895ec9c7c 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs @@ -253,6 +253,26 @@ namespace ChocolArm64.Instruction Context.EmitCall(MthdInfo); } + public static void EmitBinarySoftFloatCall(AILEmitterCtx Context, string Name) + { + IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp; + + int SizeF = Op.Size & 1; + + MethodInfo MthdInfo; + + if (SizeF == 0) + { + MthdInfo = typeof(ASoftFloat).GetMethod(Name, new Type[] { typeof(float), typeof(float) }); + } + else /* if (SizeF == 1) */ + { + MthdInfo = typeof(ASoftFloat).GetMethod(Name, new Type[] { typeof(double), typeof(double) }); + } + + Context.EmitCall(MthdInfo); + } + public static void EmitScalarBinaryOpByElemF(AILEmitterCtx Context, Action Emit) { AOpCodeSimdRegElemF Op = (AOpCodeSimdRegElemF)Context.CurrOp; diff --git a/ChocolArm64/Instruction/ASoftFloat.cs b/ChocolArm64/Instruction/ASoftFloat.cs index 1bd7166589..e63c82beea 100644 --- a/ChocolArm64/Instruction/ASoftFloat.cs +++ b/ChocolArm64/Instruction/ASoftFloat.cs @@ -7,8 +7,10 @@ namespace ChocolArm64.Instruction static ASoftFloat() { InvSqrtEstimateTable = BuildInvSqrtEstimateTable(); + RecipEstimateTable = BuildRecipEstimateTable(); } + private static readonly byte[] RecipEstimateTable; private static readonly byte[] InvSqrtEstimateTable; private static byte[] BuildInvSqrtEstimateTable() @@ -38,6 +40,22 @@ namespace ChocolArm64.Instruction return Table; } + private static byte[] BuildRecipEstimateTable() + { + byte[] Table = new byte[256]; + for (ulong index = 0; index < 256; index++) + { + ulong a = index | 0x100; + + a = (a << 1) + 1; + ulong b = 0x80000 / a; + b = (b + 1) >> 1; + + Table[index] = (byte)(b & 0xFF); + } + return Table; + } + public static float InvSqrtEstimate(float x) { return (float)InvSqrtEstimate((double)x); @@ -105,5 +123,107 @@ namespace ChocolArm64.Instruction ulong result = x_sign | (result_exp << 52) | fraction; return BitConverter.Int64BitsToDouble((long)result); } + + public static float RecipEstimate(float x) + { + return (float)RecipEstimate((double)x); + } + + public static double RecipEstimate(double x) + { + ulong x_bits = (ulong)BitConverter.DoubleToInt64Bits(x); + ulong x_sign = x_bits & 0x8000000000000000; + ulong x_exp = (x_bits >> 52) & 0x7FF; + ulong scaled = x_bits & ((1ul << 52) - 1); + + if (x_exp >= 2045) + { + if (x_exp == 0x7ff && scaled != 0) + { + // NaN + return BitConverter.Int64BitsToDouble((long)(x_bits | 0x0008000000000000)); + } + + // Infinity, or Out of range -> Zero + return BitConverter.Int64BitsToDouble((long)x_sign); + } + + if (x_exp == 0) + { + if (scaled == 0) + { + // Zero -> Infinity + return BitConverter.Int64BitsToDouble((long)(x_sign | 0x7ff0000000000000)); + } + + // Denormal + if ((scaled & (1ul << 51)) == 0) + { + x_exp = ~0ul; + scaled <<= 2; + } + else + { + scaled <<= 1; + } + } + + scaled >>= 44; + scaled &= 0xFF; + + ulong result_exp = (2045 - x_exp) & 0x7FF; + ulong estimate = (ulong)RecipEstimateTable[scaled]; + ulong fraction = estimate << 44; + + if (result_exp == 0) + { + fraction >>= 1; + fraction |= 1ul << 51; + } + else if (result_exp == 0x7FF) + { + result_exp = 0; + fraction >>= 2; + fraction |= 1ul << 50; + } + + ulong result = x_sign | (result_exp << 52) | fraction; + return BitConverter.Int64BitsToDouble((long)result); + } + + public static float RecipStep(float op1, float op2) + { + return (float)RecipStep((double)op1, (double)op2); + } + + public static double RecipStep(double op1, double op2) + { + op1 = -op1; + + ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1); + ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2); + + ulong op1_sign = op1_bits & 0x8000000000000000; + ulong op2_sign = op2_bits & 0x8000000000000000; + ulong op1_other = op1_bits & 0x7FFFFFFFFFFFFFFF; + ulong op2_other = op2_bits & 0x7FFFFFFFFFFFFFFF; + + bool inf1 = op1_other == 0x7ff0000000000000; + bool inf2 = op2_other == 0x7ff0000000000000; + bool zero1 = op1_other == 0; + bool zero2 = op2_other == 0; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + return 2.0; + } + else if (inf1 || inf2) + { + // Infinity + return BitConverter.Int64BitsToDouble((long)(0x7ff0000000000000 | (op1_sign ^ op2_sign))); + } + + return 2.0 + op1 * op2; + } } } \ No newline at end of file diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs index 98be2fc5be..2a0f5ed919 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs @@ -163,26 +163,18 @@ namespace Ryujinx.Tests.Cpu Assert.That(Sse41.Extract(ThreadState.V6, (byte)0), Is.EqualTo(A * B)); } - [Test, Description("FRECPE D0, D1")] - public void Frecpe_S([Random(100)] double A) + [TestCase(0x00000000u, 0x7F800000u)] + [TestCase(0x80000000u, 0xFF800000u)] + [TestCase(0x00FFF000u, 0x7E000000u)] + [TestCase(0x41200000u, 0x3DCC8000u)] + [TestCase(0xC1200000u, 0xBDCC8000u)] + [TestCase(0x001FFFFFu, 0x7F800000u)] + [TestCase(0x007FF000u, 0x7E800000u)] + public void Frecpe_S(uint A, uint Result) { - AThreadState ThreadState = SingleOpcode(0x5EE1D820, V1: MakeVectorE0(A)); - - Assert.That(VectorExtractDouble(ThreadState.V0, 0), Is.EqualTo(1 / A)); - } - - [Test, Description("FRECPE V2.4S, V0.4S")] - public void Frecpe_V([Random(100)] float A) - { - AThreadState ThreadState = SingleOpcode(0x4EA1D802, V0: Sse.SetAllVector128(A)); - - Assert.Multiple(() => - { - Assert.That(Sse41.Extract(ThreadState.V2, (byte)0), Is.EqualTo(1 / A)); - Assert.That(Sse41.Extract(ThreadState.V2, (byte)1), Is.EqualTo(1 / A)); - Assert.That(Sse41.Extract(ThreadState.V2, (byte)2), Is.EqualTo(1 / A)); - Assert.That(Sse41.Extract(ThreadState.V2, (byte)3), Is.EqualTo(1 / A)); - }); + Vector128 V1 = MakeVectorE0(A); + AThreadState ThreadState = SingleOpcode(0x5EA1D820, V1: V1); + Assert.AreEqual(Result, GetVectorE0(ThreadState.V0)); } [Test, Description("FRECPS D0, D1, D2")] @@ -202,12 +194,13 @@ namespace Ryujinx.Tests.Cpu V2: Sse.SetAllVector128(A), V0: Sse.SetAllVector128(B)); + float Result = (float)(2 - ((double)A * (double)B)); Assert.Multiple(() => { - Assert.That(Sse41.Extract(ThreadState.V4, (byte)0), Is.EqualTo(2 - (A * B))); - Assert.That(Sse41.Extract(ThreadState.V4, (byte)1), Is.EqualTo(2 - (A * B))); - Assert.That(Sse41.Extract(ThreadState.V4, (byte)2), Is.EqualTo(2 - (A * B))); - Assert.That(Sse41.Extract(ThreadState.V4, (byte)3), Is.EqualTo(2 - (A * B))); + Assert.That(Sse41.Extract(ThreadState.V4, (byte)0), Is.EqualTo(Result)); + Assert.That(Sse41.Extract(ThreadState.V4, (byte)1), Is.EqualTo(Result)); + Assert.That(Sse41.Extract(ThreadState.V4, (byte)2), Is.EqualTo(Result)); + Assert.That(Sse41.Extract(ThreadState.V4, (byte)3), Is.EqualTo(Result)); }); } From 095db47e132a475e25d128e691ebdae101611cc9 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 8 Jul 2018 16:55:15 -0300 Subject: [PATCH 07/20] Query multiple pages at once with GetWriteWatch (#222) * Query multiple pages at once with GetWriteWatch * Allow multiple buffer types to share the same page, aways use the physical address as cache key * Remove a variable that is no longer needed --- ChocolArm64/Memory/AMemory.cs | 58 ++++++----- ChocolArm64/Memory/AMemoryWin32.cs | 29 +++++- Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs | 32 +++--- Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs | 3 +- Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs | 6 +- Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs | 120 ++++++++++++---------- 6 files changed, 145 insertions(+), 103 deletions(-) diff --git a/ChocolArm64/Memory/AMemory.cs b/ChocolArm64/Memory/AMemory.cs index c02bf172f9..da5cf00749 100644 --- a/ChocolArm64/Memory/AMemory.cs +++ b/ChocolArm64/Memory/AMemory.cs @@ -33,19 +33,25 @@ namespace ChocolArm64.Memory private byte* RamPtr; + private int HostPageSize; + public AMemory() { Manager = new AMemoryMgr(); Monitors = new Dictionary(); + IntPtr Size = (IntPtr)AMemoryMgr.RamSize + AMemoryMgr.PageSize; + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { - Ram = AMemoryWin32.Allocate((IntPtr)AMemoryMgr.RamSize + AMemoryMgr.PageSize); + Ram = AMemoryWin32.Allocate(Size); + + HostPageSize = AMemoryWin32.GetPageSize(Ram, Size); } else { - Ram = Marshal.AllocHGlobal((IntPtr)AMemoryMgr.RamSize + AMemoryMgr.PageSize); + Ram = Marshal.AllocHGlobal(Size); } RamPtr = (byte*)Ram; @@ -149,49 +155,53 @@ namespace ChocolArm64.Memory } } - public long GetHostPageSize() + public int GetHostPageSize() { - if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) - { - return AMemoryMgr.PageSize; - } - - IntPtr MemAddress = new IntPtr(RamPtr); - IntPtr MemSize = new IntPtr(AMemoryMgr.RamSize); - - long PageSize = AMemoryWin32.IsRegionModified(MemAddress, MemSize, Reset: false); - - if (PageSize < 1) - { - throw new InvalidOperationException(); - } - - return PageSize; + return HostPageSize; } - public bool IsRegionModified(long Position, long Size) + public bool[] IsRegionModified(long Position, long Size) { if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { - return true; + return null; } long EndPos = Position + Size; if ((ulong)EndPos < (ulong)Position) { - return false; + return null; } if ((ulong)EndPos > AMemoryMgr.RamSize) { - return false; + return null; } IntPtr MemAddress = new IntPtr(RamPtr + Position); IntPtr MemSize = new IntPtr(Size); - return AMemoryWin32.IsRegionModified(MemAddress, MemSize, Reset: true) != 0; + int HostPageMask = HostPageSize - 1; + + Position &= ~HostPageMask; + + Size = EndPos - Position; + + IntPtr[] Addresses = new IntPtr[(Size + HostPageMask) / HostPageSize]; + + AMemoryWin32.IsRegionModified(MemAddress, MemSize, Addresses, out int Count); + + bool[] Modified = new bool[Addresses.Length]; + + for (int Index = 0; Index < Count; Index++) + { + long VA = Addresses[Index].ToInt64() - Ram.ToInt64(); + + Modified[(VA - Position) / HostPageSize] = true; + } + + return Modified; } public sbyte ReadSByte(long Position) diff --git a/ChocolArm64/Memory/AMemoryWin32.cs b/ChocolArm64/Memory/AMemoryWin32.cs index d097dc8718..387ca32c2e 100644 --- a/ChocolArm64/Memory/AMemoryWin32.cs +++ b/ChocolArm64/Memory/AMemoryWin32.cs @@ -49,7 +49,7 @@ namespace ChocolArm64.Memory VirtualFree(Address, IntPtr.Zero, MEM_RELEASE); } - public unsafe static long IsRegionModified(IntPtr Address, IntPtr Size, bool Reset) + public unsafe static int GetPageSize(IntPtr Address, IntPtr Size) { IntPtr[] Addresses = new IntPtr[1]; @@ -57,17 +57,36 @@ namespace ChocolArm64.Memory long Granularity; - int Flags = Reset ? WRITE_WATCH_FLAG_RESET : 0; - GetWriteWatch( - Flags, + 0, Address, Size, Addresses, &Count, &Granularity); - return Count != 0 ? Granularity : 0; + return (int)Granularity; + } + + public unsafe static void IsRegionModified( + IntPtr Address, + IntPtr Size, + IntPtr[] Addresses, + out int AddrCount) + { + long Count = Addresses.Length; + + long Granularity; + + GetWriteWatch( + WRITE_WATCH_FLAG_RESET, + Address, + Size, + Addresses, + &Count, + &Granularity); + + AddrCount = (int)Count; } } } \ No newline at end of file diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs index 10c99494b2..b9f9cc4974 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs @@ -464,19 +464,17 @@ namespace Ryujinx.HLE.Gpu.Engines GalTextureSampler Sampler = TextureFactory.MakeSampler(Gpu, Vmm, TscPosition); - long TextureAddress = Vmm.ReadInt64(TicPosition + 4) & 0xffffffffffff; + long Key = Vmm.ReadInt64(TicPosition + 4) & 0xffffffffffff; - long Key = TextureAddress; + Key = Vmm.GetPhysicalAddress(Key); - TextureAddress = Vmm.GetPhysicalAddress(TextureAddress); - - if (IsFrameBufferPosition(TextureAddress)) + if (IsFrameBufferPosition(Key)) { //This texture is a frame buffer texture, //we shouldn't read anything from memory and bind //the frame buffer texture instead, since we're not //really writing anything to memory. - Gpu.Renderer.FrameBuffer.BindTexture(TextureAddress, TexIndex); + Gpu.Renderer.FrameBuffer.BindTexture(Key, TexIndex); } else { @@ -544,6 +542,8 @@ namespace Ryujinx.HLE.Gpu.Engines { long IndexPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.IndexArrayAddress); + long IboKey = Vmm.GetPhysicalAddress(IndexPosition); + int IndexEntryFmt = ReadRegister(NvGpuEngine3dReg.IndexArrayFormat); int IndexFirst = ReadRegister(NvGpuEngine3dReg.IndexBatchFirst); int IndexCount = ReadRegister(NvGpuEngine3dReg.IndexBatchCount); @@ -561,16 +561,16 @@ namespace Ryujinx.HLE.Gpu.Engines { int IbSize = IndexCount * IndexEntrySize; - bool IboCached = Gpu.Renderer.Rasterizer.IsIboCached(IndexPosition, (uint)IbSize); + bool IboCached = Gpu.Renderer.Rasterizer.IsIboCached(IboKey, (uint)IbSize); - if (!IboCached || Vmm.IsRegionModified(IndexPosition, (uint)IbSize, NvGpuBufferType.Index)) + if (!IboCached || Vmm.IsRegionModified(IboKey, (uint)IbSize, NvGpuBufferType.Index)) { byte[] Data = Vmm.ReadBytes(IndexPosition, (uint)IbSize); - Gpu.Renderer.Rasterizer.CreateIbo(IndexPosition, Data); + Gpu.Renderer.Rasterizer.CreateIbo(IboKey, Data); } - Gpu.Renderer.Rasterizer.SetIndexArray(IndexPosition, IbSize, IndexFormat); + Gpu.Renderer.Rasterizer.SetIndexArray(IboKey, IbSize, IndexFormat); } List[] Attribs = new List[32]; @@ -619,20 +619,22 @@ namespace Ryujinx.HLE.Gpu.Engines continue; } + long VboKey = Vmm.GetPhysicalAddress(VertexPosition); + int Stride = Control & 0xfff; long VbSize = (VertexEndPos - VertexPosition) + 1; - bool VboCached = Gpu.Renderer.Rasterizer.IsVboCached(VertexPosition, VbSize); + bool VboCached = Gpu.Renderer.Rasterizer.IsVboCached(VboKey, VbSize); - if (!VboCached || Vmm.IsRegionModified(VertexPosition, VbSize, NvGpuBufferType.Vertex)) + if (!VboCached || Vmm.IsRegionModified(VboKey, VbSize, NvGpuBufferType.Vertex)) { byte[] Data = Vmm.ReadBytes(VertexPosition, VbSize); - Gpu.Renderer.Rasterizer.CreateVbo(VertexPosition, Data); + Gpu.Renderer.Rasterizer.CreateVbo(VboKey, Data); } - Gpu.Renderer.Rasterizer.SetVertexArray(Index, Stride, VertexPosition, Attribs[Index].ToArray()); + Gpu.Renderer.Rasterizer.SetVertexArray(Index, Stride, VboKey, Attribs[Index].ToArray()); } GalPrimitiveType PrimType = (GalPrimitiveType)(PrimCtrl & 0xffff); @@ -641,7 +643,7 @@ namespace Ryujinx.HLE.Gpu.Engines { int VertexBase = ReadRegister(NvGpuEngine3dReg.VertexArrayElemBase); - Gpu.Renderer.Rasterizer.DrawElements(IndexPosition, IndexFirst, VertexBase, PrimType); + Gpu.Renderer.Rasterizer.DrawElements(IboKey, IndexFirst, VertexBase, PrimType); } else { diff --git a/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs b/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs index 7474aa33fa..469cd6cd0c 100644 --- a/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs +++ b/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs @@ -4,6 +4,7 @@ namespace Ryujinx.HLE.Gpu.Memory { Index, Vertex, - Texture + Texture, + Count } } \ No newline at end of file diff --git a/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs b/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs index 36f6406a1e..0c81dd1508 100644 --- a/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs +++ b/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs @@ -274,11 +274,9 @@ namespace Ryujinx.HLE.Gpu.Memory PageTable[L0][L1] = TgtAddr; } - public bool IsRegionModified(long Position, long Size, NvGpuBufferType BufferType) + public bool IsRegionModified(long PA, long Size, NvGpuBufferType BufferType) { - long PA = GetPhysicalAddress(Position); - - return Cache.IsRegionModified(Memory, BufferType, Position, PA, Size); + return Cache.IsRegionModified(Memory, BufferType, PA, Size); } public byte ReadByte(long Position) diff --git a/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs b/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs index c7108f00ca..ac9bd850e0 100644 --- a/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs +++ b/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs @@ -11,43 +11,53 @@ namespace Ryujinx.HLE.Gpu.Memory private class CachedPage { - private List<(long Start, long End)> Regions; + private struct Range + { + public long Start; + public long End; + + public Range(long Start, long End) + { + this.Start = Start; + this.End = End; + } + } + + private List[] Regions; public LinkedListNode Node { get; set; } - public int Count => Regions.Count; - public int Timestamp { get; private set; } - public long PABase { get; private set; } - - public NvGpuBufferType BufferType { get; private set; } - - public CachedPage(long PABase, NvGpuBufferType BufferType) + public CachedPage() { - this.PABase = PABase; - this.BufferType = BufferType; + Regions = new List[(int)NvGpuBufferType.Count]; - Regions = new List<(long, long)>(); + for (int Index = 0; Index < Regions.Length; Index++) + { + Regions[Index] = new List(); + } } - public bool AddRange(long Start, long End) + public bool AddRange(long Start, long End, NvGpuBufferType BufferType) { - for (int Index = 0; Index < Regions.Count; Index++) - { - (long RgStart, long RgEnd) = Regions[Index]; + List BtRegions = Regions[(int)BufferType]; - if (Start >= RgStart && End <= RgEnd) + for (int Index = 0; Index < BtRegions.Count; Index++) + { + Range Rg = BtRegions[Index]; + + if (Start >= Rg.Start && End <= Rg.End) { return false; } - if (Start <= RgEnd && RgStart <= End) + if (Start <= Rg.End && Rg.Start <= End) { - long MinStart = Math.Min(RgStart, Start); - long MaxEnd = Math.Max(RgEnd, End); + long MinStart = Math.Min(Rg.Start, Start); + long MaxEnd = Math.Max(Rg.End, End); - Regions[Index] = (MinStart, MaxEnd); + BtRegions[Index] = new Range(MinStart, MaxEnd); Timestamp = Environment.TickCount; @@ -55,12 +65,24 @@ namespace Ryujinx.HLE.Gpu.Memory } } - Regions.Add((Start, End)); + BtRegions.Add(new Range(Start, End)); Timestamp = Environment.TickCount; return true; } + + public int GetTotalCount() + { + int Count = 0; + + for (int Index = 0; Index < Regions.Length; Index++) + { + Count += Regions[Index].Count; + } + + return Count; + } } private Dictionary Cache; @@ -76,71 +98,61 @@ namespace Ryujinx.HLE.Gpu.Memory SortedCache = new LinkedList(); } - public bool IsRegionModified( - AMemory Memory, - NvGpuBufferType BufferType, - long VA, - long PA, - long Size) + public bool IsRegionModified(AMemory Memory, NvGpuBufferType BufferType, long PA, long Size) { + bool[] Modified = Memory.IsRegionModified(PA, Size); + + if (Modified == null) + { + return true; + } + ClearCachedPagesIfNeeded(); long PageSize = Memory.GetHostPageSize(); long Mask = PageSize - 1; - long VAEnd = VA + Size; long PAEnd = PA + Size; bool RegMod = false; - while (VA < VAEnd) - { - long Key = VA & ~Mask; - long PABase = PA & ~Mask; + int Index = 0; + + while (PA < PAEnd) + { + long Key = PA & ~Mask; - long VAPgEnd = Math.Min((VA + PageSize) & ~Mask, VAEnd); long PAPgEnd = Math.Min((PA + PageSize) & ~Mask, PAEnd); bool IsCached = Cache.TryGetValue(Key, out CachedPage Cp); - bool PgReset = false; - - if (!IsCached) + if (IsCached) { - Cp = new CachedPage(PABase, BufferType); + CpCount -= Cp.GetTotalCount(); - Cache.Add(Key, Cp); + SortedCache.Remove(Cp.Node); } else { - CpCount -= Cp.Count; + Cp = new CachedPage(); - SortedCache.Remove(Cp.Node); - - if (Cp.PABase != PABase || - Cp.BufferType != BufferType) - { - PgReset = true; - } + Cache.Add(Key, Cp); } - PgReset |= Memory.IsRegionModified(PA, PAPgEnd - PA) && IsCached; - - if (PgReset) + if (Modified[Index++] && IsCached) { - Cp = new CachedPage(PABase, BufferType); + Cp = new CachedPage(); Cache[Key] = Cp; } Cp.Node = SortedCache.AddLast(Key); - RegMod |= Cp.AddRange(VA, VAPgEnd); + RegMod |= Cp.AddRange(PA, PAPgEnd, BufferType); - CpCount += Cp.Count; + CpCount += Cp.GetTotalCount(); - VA = VAPgEnd; PA = PAPgEnd; } @@ -169,7 +181,7 @@ namespace Ryujinx.HLE.Gpu.Memory Cache.Remove(Key); - CpCount -= Cp.Count; + CpCount -= Cp.GetTotalCount(); TimeDelta = RingDelta(Cp.Timestamp, Timestamp); } From 0a36bfbf921038e8eb7d4294ec8543903c933d90 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Mon, 9 Jul 2018 22:48:28 -0300 Subject: [PATCH 08/20] Fix ZIP/UZP/TRN instructions when Rd == Rn || Rd == Rm (#239) --- ChocolArm64/Instruction/AInstEmitSimdMove.cs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/ChocolArm64/Instruction/AInstEmitSimdMove.cs b/ChocolArm64/Instruction/AInstEmitSimdMove.cs index 95fe594994..d67946a977 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdMove.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdMove.cs @@ -339,9 +339,12 @@ namespace ChocolArm64.Instruction EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size); - EmitVectorInsert(Context, Op.Rd, Index, Op.Size); + EmitVectorInsertTmp(Context, Index, Op.Size); } + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + if (Op.RegisterSize == ARegisterSize.SIMD64) { EmitVectorZeroUpper(Context, Op.Rd); @@ -363,9 +366,12 @@ namespace ChocolArm64.Instruction EmitVectorExtractZx(Context, Index < Half ? Op.Rn : Op.Rm, Elem, Op.Size); - EmitVectorInsert(Context, Op.Rd, Index, Op.Size); + EmitVectorInsertTmp(Context, Index, Op.Size); } + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + if (Op.RegisterSize == ARegisterSize.SIMD64) { EmitVectorZeroUpper(Context, Op.Rd); @@ -387,9 +393,12 @@ namespace ChocolArm64.Instruction EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size); - EmitVectorInsert(Context, Op.Rd, Index, Op.Size); + EmitVectorInsertTmp(Context, Index, Op.Size); } + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + if (Op.RegisterSize == ARegisterSize.SIMD64) { EmitVectorZeroUpper(Context, Op.Rd); From 791fe70810f0f0f417c74aaff5446551bed78fee Mon Sep 17 00:00:00 2001 From: gdkchan Date: Mon, 9 Jul 2018 22:49:07 -0300 Subject: [PATCH 09/20] Allow sample rate of 0 on OpenAudioOut, fix 5.1 sound output (#240) --- Ryujinx.Audio/OpenAL/OpenALAudioOut.cs | 27 ++++++++++++------- .../OsHle/Services/Aud/IAudioOutManager.cs | 15 ++++++++--- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/Ryujinx.Audio/OpenAL/OpenALAudioOut.cs b/Ryujinx.Audio/OpenAL/OpenALAudioOut.cs index f574b46f38..2860dc2e2d 100644 --- a/Ryujinx.Audio/OpenAL/OpenALAudioOut.cs +++ b/Ryujinx.Audio/OpenAL/OpenALAudioOut.cs @@ -20,7 +20,7 @@ namespace Ryujinx.Audio.OpenAL public int SourceId { get; private set; } public int SampleRate { get; private set; } - + public ALFormat Format { get; private set; } private ReleaseCallback Callback; @@ -153,7 +153,7 @@ namespace Ryujinx.Audio.OpenAL ShouldCallReleaseCallback = true; } } - + private void SyncQueuedTags() { AL.GetSource(SourceId, ALGetSourcei.BuffersQueued, out int QueuedCount); @@ -249,11 +249,6 @@ namespace Ryujinx.Audio.OpenAL private ALFormat GetALFormat(int Channels, AudioFormat Format) { - if (Channels < 1 || Channels > 2) - { - throw new ArgumentOutOfRangeException(nameof(Channels)); - } - if (Channels == 1) { switch (Format) @@ -262,7 +257,7 @@ namespace Ryujinx.Audio.OpenAL case AudioFormat.PcmInt16: return ALFormat.Mono16; } } - else /* if (Channels == 2) */ + else if (Channels == 2) { switch (Format) { @@ -270,6 +265,18 @@ namespace Ryujinx.Audio.OpenAL case AudioFormat.PcmInt16: return ALFormat.Stereo16; } } + else if (Channels == 6) + { + switch (Format) + { + case AudioFormat.PcmInt8: return ALFormat.Multi51Chn8Ext; + case AudioFormat.PcmInt16: return ALFormat.Multi51Chn16Ext; + } + } + else + { + throw new ArgumentOutOfRangeException(nameof(Channels)); + } throw new ArgumentException(nameof(Format)); } @@ -288,7 +295,7 @@ namespace Ryujinx.Audio.OpenAL { return Td.ContainsBuffer(Tag); } - + return false; } @@ -298,7 +305,7 @@ namespace Ryujinx.Audio.OpenAL { return Td.GetReleasedBuffers(MaxCount); } - + return null; } diff --git a/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs b/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs index 54ffa6d901..8c78d1d493 100644 --- a/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs +++ b/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs @@ -14,6 +14,10 @@ namespace Ryujinx.HLE.OsHle.Services.Aud { private const string DefaultAudioOutput = "DeviceOut"; + private const int DefaultSampleRate = 48000; + + private const int DefaultChannelsCount = 2; + private Dictionary m_Commands; public override IReadOnlyDictionary Commands => m_Commands; @@ -122,7 +126,12 @@ namespace Ryujinx.HLE.OsHle.Services.Aud int SampleRate = Context.RequestData.ReadInt32(); int Channels = Context.RequestData.ReadInt32(); - if (SampleRate != 48000) + if (SampleRate == 0) + { + SampleRate = DefaultSampleRate; + } + + if (SampleRate != DefaultSampleRate) { Context.Ns.Log.PrintWarning(LogClass.Audio, "Invalid sample rate!"); @@ -133,7 +142,7 @@ namespace Ryujinx.HLE.OsHle.Services.Aud if (Channels == 0) { - Channels = 2; + Channels = DefaultChannelsCount; } KEvent ReleaseEvent = new KEvent(); @@ -145,7 +154,7 @@ namespace Ryujinx.HLE.OsHle.Services.Aud IAalOutput AudioOut = Context.Ns.AudioOut; - int Track = AudioOut.OpenTrack(SampleRate, 2, Callback, out AudioFormat Format); + int Track = AudioOut.OpenTrack(SampleRate, Channels, Callback, out AudioFormat Format); MakeObject(Context, new IAudioOut(AudioOut, ReleaseEvent, Track)); From 1968386808bb48f823b1877b0e5ff8c6e2f8bd49 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Mon, 9 Jul 2018 23:01:59 -0300 Subject: [PATCH 10/20] Add locking methods to the ogl resource cache (#238) * Add locking methods to the ogl resource cache * Remove some unused arguments * Add the ZF32 texture format --- Ryujinx.Graphics/Gal/GalTextureFormat.cs | 1 + Ryujinx.Graphics/Gal/IGalRasterizer.cs | 7 ++- Ryujinx.Graphics/Gal/IGalTexture.cs | 3 ++ .../Gal/OpenGL/OGLCachedResource.cs | 43 ++++++++++++++++++- .../Gal/OpenGL/OGLEnumConverter.cs | 19 ++++---- Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs | 16 ++++++- Ryujinx.Graphics/Gal/OpenGL/OGLTexture.cs | 10 +++++ Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs | 20 ++++++++- Ryujinx.HLE/Gpu/Texture/TextureHelper.cs | 28 ++++++++---- Ryujinx.HLE/Gpu/Texture/TextureReader.cs | 1 + 10 files changed, 122 insertions(+), 26 deletions(-) diff --git a/Ryujinx.Graphics/Gal/GalTextureFormat.cs b/Ryujinx.Graphics/Gal/GalTextureFormat.cs index 7d19dc26d4..231d33ec0e 100644 --- a/Ryujinx.Graphics/Gal/GalTextureFormat.cs +++ b/Ryujinx.Graphics/Gal/GalTextureFormat.cs @@ -17,6 +17,7 @@ namespace Ryujinx.Graphics.Gal BC3 = 0x26, BC4 = 0x27, BC5 = 0x28, + ZF32 = 0x2f, Astc2D4x4 = 0x40, Astc2D5x5 = 0x41, Astc2D6x6 = 0x42, diff --git a/Ryujinx.Graphics/Gal/IGalRasterizer.cs b/Ryujinx.Graphics/Gal/IGalRasterizer.cs index 2598efb610..0c5d37e40e 100644 --- a/Ryujinx.Graphics/Gal/IGalRasterizer.cs +++ b/Ryujinx.Graphics/Gal/IGalRasterizer.cs @@ -2,6 +2,9 @@ namespace Ryujinx.Graphics.Gal { public interface IGalRasterizer { + void LockCaches(); + void UnlockCaches(); + void ClearBuffers(GalClearBufferFlags Flags); bool IsVboCached(long Key, long DataSize); @@ -46,9 +49,9 @@ namespace Ryujinx.Graphics.Gal void CreateIbo(long Key, byte[] Buffer); - void SetVertexArray(int VbIndex, int Stride, long VboKey, GalVertexAttrib[] Attribs); + void SetVertexArray(int Stride, long VboKey, GalVertexAttrib[] Attribs); - void SetIndexArray(long Key, int Size, GalIndexFormat Format); + void SetIndexArray(int Size, GalIndexFormat Format); void DrawArrays(int First, int PrimCount, GalPrimitiveType PrimType); diff --git a/Ryujinx.Graphics/Gal/IGalTexture.cs b/Ryujinx.Graphics/Gal/IGalTexture.cs index 6379e73af4..2ab4119904 100644 --- a/Ryujinx.Graphics/Gal/IGalTexture.cs +++ b/Ryujinx.Graphics/Gal/IGalTexture.cs @@ -2,6 +2,9 @@ namespace Ryujinx.Graphics.Gal { public interface IGalTexture { + void LockCache(); + void UnlockCache(); + void Create(long Key, byte[] Data, GalTexture Texture); bool TryGetCachedTexture(long Key, long DataSize, out GalTexture Texture); diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLCachedResource.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLCachedResource.cs index 06d76b8bdd..01ebf98202 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLCachedResource.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLCachedResource.cs @@ -36,6 +36,10 @@ namespace Ryujinx.Graphics.Gal.OpenGL private DeleteValue DeleteValueCallback; + private Queue DeletePending; + + private bool Locked; + public OGLCachedResource(DeleteValue DeleteValueCallback) { if (DeleteValueCallback == null) @@ -48,11 +52,33 @@ namespace Ryujinx.Graphics.Gal.OpenGL Cache = new Dictionary(); SortedCache = new LinkedList(); + + DeletePending = new Queue(); + } + + public void Lock() + { + Locked = true; + } + + public void Unlock() + { + Locked = false; + + while (DeletePending.TryDequeue(out T Value)) + { + DeleteValueCallback(Value); + } + + ClearCacheIfNeeded(); } public void AddOrUpdate(long Key, T Value, long Size) { - ClearCacheIfNeeded(); + if (!Locked) + { + ClearCacheIfNeeded(); + } LinkedListNode Node = SortedCache.AddLast(Key); @@ -60,7 +86,14 @@ namespace Ryujinx.Graphics.Gal.OpenGL if (Cache.TryGetValue(Key, out CacheBucket Bucket)) { - DeleteValueCallback(Bucket.Value); + if (Locked) + { + DeletePending.Enqueue(Bucket.Value); + } + else + { + DeleteValueCallback(Bucket.Value); + } SortedCache.Remove(Bucket.Node); @@ -78,6 +111,12 @@ namespace Ryujinx.Graphics.Gal.OpenGL { Value = Bucket.Value; + SortedCache.Remove(Bucket.Node); + + LinkedListNode Node = SortedCache.AddLast(Key); + + Cache[Key] = new CacheBucket(Value, Bucket.DataSize, Node); + return true; } diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs index 3a81150d6f..8f189d2b08 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs @@ -129,15 +129,16 @@ namespace Ryujinx.Graphics.Gal.OpenGL { switch (Format) { - case GalTextureFormat.R32G32B32A32: return (PixelFormat.Rgba, PixelType.Float); - case GalTextureFormat.R16G16B16A16: return (PixelFormat.Rgba, PixelType.HalfFloat); - case GalTextureFormat.A8B8G8R8: return (PixelFormat.Rgba, PixelType.UnsignedByte); - case GalTextureFormat.R32: return (PixelFormat.Red, PixelType.Float); - case GalTextureFormat.A1B5G5R5: return (PixelFormat.Rgba, PixelType.UnsignedShort5551); - case GalTextureFormat.B5G6R5: return (PixelFormat.Rgb, PixelType.UnsignedShort565); - case GalTextureFormat.G8R8: return (PixelFormat.Rg, PixelType.UnsignedByte); - case GalTextureFormat.R16: return (PixelFormat.Red, PixelType.HalfFloat); - case GalTextureFormat.R8: return (PixelFormat.Red, PixelType.UnsignedByte); + case GalTextureFormat.R32G32B32A32: return (PixelFormat.Rgba, PixelType.Float); + case GalTextureFormat.R16G16B16A16: return (PixelFormat.Rgba, PixelType.HalfFloat); + case GalTextureFormat.A8B8G8R8: return (PixelFormat.Rgba, PixelType.UnsignedByte); + case GalTextureFormat.R32: return (PixelFormat.Red, PixelType.Float); + case GalTextureFormat.A1B5G5R5: return (PixelFormat.Rgba, PixelType.UnsignedShort5551); + case GalTextureFormat.B5G6R5: return (PixelFormat.Rgb, PixelType.UnsignedShort565); + case GalTextureFormat.G8R8: return (PixelFormat.Rg, PixelType.UnsignedByte); + case GalTextureFormat.R16: return (PixelFormat.Red, PixelType.HalfFloat); + case GalTextureFormat.R8: return (PixelFormat.Red, PixelType.UnsignedByte); + case GalTextureFormat.ZF32: return (PixelFormat.DepthComponent, PixelType.Float); } throw new NotImplementedException(Format.ToString()); diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs index a4ec7f87cf..0dc56966b3 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs @@ -71,6 +71,18 @@ namespace Ryujinx.Graphics.Gal.OpenGL IndexBuffer = new IbInfo(); } + public void LockCaches() + { + VboCache.Lock(); + IboCache.Lock(); + } + + public void UnlockCaches() + { + VboCache.Unlock(); + IboCache.Unlock(); + } + public void ClearBuffers(GalClearBufferFlags Flags) { ClearBufferMask Mask = ClearBufferMask.ColorBufferBit; @@ -223,7 +235,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL GL.BufferData(BufferTarget.ElementArrayBuffer, Length, Buffer, BufferUsageHint.StreamDraw); } - public void SetVertexArray(int VbIndex, int Stride, long VboKey, GalVertexAttrib[] Attribs) + public void SetVertexArray(int Stride, long VboKey, GalVertexAttrib[] Attribs) { if (!VboCache.TryGetValue(VboKey, out int VboHandle)) { @@ -270,7 +282,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL } } - public void SetIndexArray(long Key, int Size, GalIndexFormat Format) + public void SetIndexArray(int Size, GalIndexFormat Format) { IndexBuffer.Type = OGLEnumConverter.GetDrawElementsType(Format); diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLTexture.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLTexture.cs index c50bdd71b5..5caca6ecde 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLTexture.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLTexture.cs @@ -26,6 +26,16 @@ namespace Ryujinx.Graphics.Gal.OpenGL TextureCache = new OGLCachedResource(DeleteTexture); } + public void LockCache() + { + TextureCache.Lock(); + } + + public void UnlockCache() + { + TextureCache.Unlock(); + } + private static void DeleteTexture(TCE CachedTexture) { GL.DeleteTexture(CachedTexture.Handle); diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs index b9f9cc4974..2bacd71b36 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs @@ -73,6 +73,8 @@ namespace Ryujinx.HLE.Gpu.Engines private void VertexEndGl(NvGpuVmm Vmm, NvGpuPBEntry PBEntry) { + LockCaches(); + SetFrameBuffer(Vmm, 0); long[] Keys = UploadShaders(Vmm); @@ -90,6 +92,20 @@ namespace Ryujinx.HLE.Gpu.Engines UploadTextures(Vmm, Keys); UploadUniforms(Vmm); UploadVertexArrays(Vmm); + + UnlockCaches(); + } + + private void LockCaches() + { + Gpu.Renderer.Rasterizer.LockCaches(); + Gpu.Renderer.Texture.LockCache(); + } + + private void UnlockCaches() + { + Gpu.Renderer.Rasterizer.UnlockCaches(); + Gpu.Renderer.Texture.UnlockCache(); } private void ClearBuffers(NvGpuVmm Vmm, NvGpuPBEntry PBEntry) @@ -570,7 +586,7 @@ namespace Ryujinx.HLE.Gpu.Engines Gpu.Renderer.Rasterizer.CreateIbo(IboKey, Data); } - Gpu.Renderer.Rasterizer.SetIndexArray(IboKey, IbSize, IndexFormat); + Gpu.Renderer.Rasterizer.SetIndexArray(IbSize, IndexFormat); } List[] Attribs = new List[32]; @@ -634,7 +650,7 @@ namespace Ryujinx.HLE.Gpu.Engines Gpu.Renderer.Rasterizer.CreateVbo(VboKey, Data); } - Gpu.Renderer.Rasterizer.SetVertexArray(Index, Stride, VboKey, Attribs[Index].ToArray()); + Gpu.Renderer.Rasterizer.SetVertexArray(Stride, VboKey, Attribs[Index].ToArray()); } GalPrimitiveType PrimType = (GalPrimitiveType)(PrimCtrl & 0xffff); diff --git a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs index ac8f75c5f1..3c633b6928 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs @@ -28,15 +28,25 @@ namespace Ryujinx.HLE.Gpu.Texture { switch (Texture.Format) { - case GalTextureFormat.R32G32B32A32: return Texture.Width * Texture.Height * 16; - case GalTextureFormat.R16G16B16A16: return Texture.Width * Texture.Height * 8; - case GalTextureFormat.A8B8G8R8: return Texture.Width * Texture.Height * 4; - case GalTextureFormat.R32: return Texture.Width * Texture.Height * 4; - case GalTextureFormat.A1B5G5R5: return Texture.Width * Texture.Height * 2; - case GalTextureFormat.B5G6R5: return Texture.Width * Texture.Height * 2; - case GalTextureFormat.G8R8: return Texture.Width * Texture.Height * 2; - case GalTextureFormat.R16: return Texture.Width * Texture.Height * 2; - case GalTextureFormat.R8: return Texture.Width * Texture.Height; + case GalTextureFormat.R32G32B32A32: + return Texture.Width * Texture.Height * 16; + + case GalTextureFormat.R16G16B16A16: + return Texture.Width * Texture.Height * 8; + + case GalTextureFormat.A8B8G8R8: + case GalTextureFormat.R32: + case GalTextureFormat.ZF32: + return Texture.Width * Texture.Height * 4; + + case GalTextureFormat.A1B5G5R5: + case GalTextureFormat.B5G6R5: + case GalTextureFormat.G8R8: + case GalTextureFormat.R16: + return Texture.Width * Texture.Height * 2; + + case GalTextureFormat.R8: + return Texture.Width * Texture.Height; case GalTextureFormat.BC1: case GalTextureFormat.BC4: diff --git a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs index 48bf1a90fe..24bceffb12 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs @@ -25,6 +25,7 @@ namespace Ryujinx.HLE.Gpu.Texture case GalTextureFormat.BC3: return Read16Bpt4x4(Memory, Texture); case GalTextureFormat.BC4: return Read8Bpt4x4 (Memory, Texture); case GalTextureFormat.BC5: return Read16Bpt4x4(Memory, Texture); + case GalTextureFormat.ZF32: return Read4Bpp (Memory, Texture); case GalTextureFormat.Astc2D4x4: return Read16Bpt4x4(Memory, Texture); } From 09c53fe06fd9dd6ec6a9e585771f73d7e45f7148 Mon Sep 17 00:00:00 2001 From: Ac_K Date: Thu, 12 Jul 2018 00:08:20 +0200 Subject: [PATCH 11/20] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 71dad9ce29..f6bac98c8c 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,8 @@ If you have some homebrew that currently doesn't work within the emulator, you c For help, support, suggestions, or if you just want to get in touch with the team; join our Discord server! https://discord.gg/VkQYXAZ +For donation support, please take a look at our Patreon: https://www.patreon.com/ryujinx + **Running** To run this emulator, you need the .NET Core 2.1 (or higher) SDK *and* the OpenAL 11 Core SDK. @@ -92,6 +94,7 @@ Run `dotnet run -c Release -- path\to\homebrew.nro` inside the Ryujinx solution Run `dotnet run -c Release -- path\to\game_exefs_and_romfs_folder` to run official games (they need to be decrypted and extracted first!) **Compatibility** + You can check out the compatibility list within the Wiki. Only a handful of games actually work. **Latest build** From 37071285bcf14ef96b16b55cb6ed7c8c003489e0 Mon Sep 17 00:00:00 2001 From: David <25727384+ogniK5377@users.noreply.github.com> Date: Thu, 12 Jul 2018 11:41:35 +1000 Subject: [PATCH 12/20] NvGetConfig with production/non production swapping (#243) * GetConfig should return 0x30006 in production mode * GetConfig will now check settings only if nv!rmos_set_production_mode is set to "0" * Code formatting, TryGetValue * Slight fixup * dont forget the setting * Implemented non production mode setting grabbing * format issue * style changes --- .../Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs | 59 ++++++++++++++++--- Ryujinx.HLE/OsHle/Services/Nv/NvResult.cs | 17 +++--- 2 files changed, 60 insertions(+), 16 deletions(-) diff --git a/Ryujinx.HLE/OsHle/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs b/Ryujinx.HLE/OsHle/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs index a9fd9d3abd..7705a1f78f 100644 --- a/Ryujinx.HLE/OsHle/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs +++ b/Ryujinx.HLE/OsHle/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs @@ -2,6 +2,7 @@ using ChocolArm64.Memory; using Ryujinx.HLE.Logging; using System; using System.Collections.Concurrent; +using System.Text; using System.Threading; namespace Ryujinx.HLE.OsHle.Services.Nv.NvHostCtrl @@ -10,9 +11,16 @@ namespace Ryujinx.HLE.OsHle.Services.Nv.NvHostCtrl { private static ConcurrentDictionary UserCtxs; + private static bool IsProductionMode = true; + static NvHostCtrlIoctl() { UserCtxs = new ConcurrentDictionary(); + + if (Set.NxSettings.Settings.TryGetValue("nv!rmos_set_production_mode", out object ProductionModeSetting)) + { + IsProductionMode = ((string)ProductionModeSetting) != "0"; // Default value is "" + } } public static int ProcessIoctl(ServiceCtx Context, int Cmd) @@ -71,17 +79,52 @@ namespace Ryujinx.HLE.OsHle.Services.Nv.NvHostCtrl private static int GetConfig(ServiceCtx Context) { - long InputPosition = Context.Request.GetBufferType0x21().Position; - long OutputPosition = Context.Request.GetBufferType0x22().Position; + if (!IsProductionMode) + { + long InputPosition = Context.Request.GetBufferType0x21().Position; + long OutputPosition = Context.Request.GetBufferType0x22().Position; - string Nv = AMemoryHelper.ReadAsciiString(Context.Memory, InputPosition + 0, 0x41); - string Name = AMemoryHelper.ReadAsciiString(Context.Memory, InputPosition + 0x41, 0x41); + string Domain = AMemoryHelper.ReadAsciiString(Context.Memory, InputPosition + 0, 0x41); + string Name = AMemoryHelper.ReadAsciiString(Context.Memory, InputPosition + 0x41, 0x41); - Context.Memory.WriteByte(OutputPosition + 0x82, 0); + if (Set.NxSettings.Settings.TryGetValue($"{Domain}!{Name}", out object NvSetting)) + { + byte[] SettingBuffer = new byte[0x101]; - Context.Ns.Log.PrintStub(LogClass.ServiceNv, "Stubbed."); + if (NvSetting is string StringValue) + { + if (StringValue.Length > 0x100) + { + Context.Ns.Log.PrintError(Logging.LogClass.ServiceNv, $"{Domain}!{Name} String value size is too big!"); + } + else + { + SettingBuffer = Encoding.ASCII.GetBytes(StringValue + "\0"); + } + } - return NvResult.Success; + if (NvSetting is int IntValue) + { + SettingBuffer = BitConverter.GetBytes(IntValue); + } + else if (NvSetting is bool BoolValue) + { + SettingBuffer[0] = BoolValue ? (byte)1 : (byte)0; + } + else + { + throw new NotImplementedException(NvSetting.GetType().Name); + } + + Context.Memory.WriteBytes(OutputPosition + 0x82, SettingBuffer); + + Context.Ns.Log.PrintDebug(Logging.LogClass.ServiceNv, $"Got setting {Domain}!{Name}"); + } + + return NvResult.Success; + } + + return NvResult.NotAvailableInProduction; } private static int EventWait(ServiceCtx Context) @@ -352,4 +395,4 @@ namespace Ryujinx.HLE.OsHle.Services.Nv.NvHostCtrl UserCtxs.TryRemove(Process, out _); } } -} \ No newline at end of file +} diff --git a/Ryujinx.HLE/OsHle/Services/Nv/NvResult.cs b/Ryujinx.HLE/OsHle/Services/Nv/NvResult.cs index 720f5ccf2c..78ae5ae33b 100644 --- a/Ryujinx.HLE/OsHle/Services/Nv/NvResult.cs +++ b/Ryujinx.HLE/OsHle/Services/Nv/NvResult.cs @@ -2,12 +2,13 @@ namespace Ryujinx.HLE.OsHle.Services.Nv { static class NvResult { - public const int Success = 0; - public const int TryAgain = -11; - public const int OutOfMemory = -12; - public const int InvalidInput = -22; - public const int NotSupported = -25; - public const int Restart = -85; - public const int TimedOut = -110; + public const int NotAvailableInProduction = 196614; + public const int Success = 0; + public const int TryAgain = -11; + public const int OutOfMemory = -12; + public const int InvalidInput = -22; + public const int NotSupported = -25; + public const int Restart = -85; + public const int TimedOut = -110; } -} \ No newline at end of file +} From cd18ab29dfacd1f7a3218d4ec73ce664bccc3887 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 12 Jul 2018 14:03:52 -0300 Subject: [PATCH 13/20] Loop GLScreen with custom method (#244) * Loop GLScreen with custom method * Fix deadlocks * Fix screen resizing * Change event to bool * Try to fix quitting error * Set title from main thread * Queue max 1 vsync, fix high FPS after a slowdown --- Ryujinx.HLE/Gpu/Engines/NvGpuFifo.cs | 7 ++ Ryujinx.HLE/Switch.cs | 5 ++ Ryujinx/Ui/GLScreen.cs | 110 ++++++++++++++++++++++++--- Ryujinx/Ui/Program.cs | 2 +- 4 files changed, 113 insertions(+), 11 deletions(-) diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuFifo.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuFifo.cs index 0bc682a70f..7b999eaed1 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuFifo.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuFifo.cs @@ -1,5 +1,6 @@ using Ryujinx.HLE.Gpu.Memory; using System.Collections.Concurrent; +using System.Threading; namespace Ryujinx.HLE.Gpu.Engines { @@ -18,6 +19,8 @@ namespace Ryujinx.HLE.Gpu.Engines private NvGpuEngine[] SubChannels; + public AutoResetEvent Event { get; private set; } + private struct CachedMacro { public int Position { get; private set; } @@ -60,6 +63,8 @@ namespace Ryujinx.HLE.Gpu.Engines Macros = new CachedMacro[MacrosCount]; Mme = new int[MmeWords]; + + Event = new AutoResetEvent(false); } public void PushBuffer(NvGpuVmm Vmm, NvGpuPBEntry[] Buffer) @@ -68,6 +73,8 @@ namespace Ryujinx.HLE.Gpu.Engines { BufferQueue.Enqueue((Vmm, PBEntry)); } + + Event.Set(); } public void DispatchCalls() diff --git a/Ryujinx.HLE/Switch.cs b/Ryujinx.HLE/Switch.cs index f7b263cd0f..1946b187ba 100644 --- a/Ryujinx.HLE/Switch.cs +++ b/Ryujinx.HLE/Switch.cs @@ -71,6 +71,11 @@ namespace Ryujinx.HLE Os.LoadProgram(FileName); } + public bool WaitFifo() + { + return Gpu.Fifo.Event.WaitOne(8); + } + public void ProcessFrame() { Gpu.Fifo.DispatchCalls(); diff --git a/Ryujinx/Ui/GLScreen.cs b/Ryujinx/Ui/GLScreen.cs index 7a4e42e9e2..9b5dda4f0c 100644 --- a/Ryujinx/Ui/GLScreen.cs +++ b/Ryujinx/Ui/GLScreen.cs @@ -5,6 +5,9 @@ using Ryujinx.Graphics.Gal; using Ryujinx.HLE; using Ryujinx.HLE.Input; using System; +using System.Threading; + +using Stopwatch = System.Diagnostics.Stopwatch; namespace Ryujinx { @@ -16,6 +19,8 @@ namespace Ryujinx private const float TouchScreenRatioX = (float)TouchScreenWidth / TouchScreenHeight; private const float TouchScreenRatioY = (float)TouchScreenHeight / TouchScreenWidth; + private const int TargetFPS = 60; + private Switch Ns; private IGalRenderer Renderer; @@ -24,6 +29,14 @@ namespace Ryujinx private MouseState? Mouse = null; + private Thread RenderThread; + + private bool ResizeEvent; + + private bool TitleEvent; + + private string NewTitle; + public GLScreen(Switch Ns, IGalRenderer Renderer) : base(1280, 720, new GraphicsMode(), "Ryujinx", 0, @@ -36,13 +49,85 @@ namespace Ryujinx Location = new Point( (DisplayDevice.Default.Width / 2) - (Width / 2), (DisplayDevice.Default.Height / 2) - (Height / 2)); + + ResizeEvent = false; + + TitleEvent = false; } - protected override void OnLoad(EventArgs e) + private void RenderLoop() { - VSync = VSyncMode.On; + MakeCurrent(); + + Stopwatch Chrono = new Stopwatch(); + + Chrono.Start(); + + long TicksPerFrame = Stopwatch.Frequency / TargetFPS; + + long Ticks = 0; + + while (Exists && !IsExiting) + { + if (Ns.WaitFifo()) + { + Ns.ProcessFrame(); + } + + Renderer.RunActions(); + + if (ResizeEvent) + { + ResizeEvent = false; + + Renderer.FrameBuffer.SetWindowSize(Width, Height); + } + + Ticks += Chrono.ElapsedTicks; + + Chrono.Restart(); + + if (Ticks >= TicksPerFrame) + { + RenderFrame(); + + //Queue max. 1 vsync + Ticks = Math.Min(Ticks - TicksPerFrame, TicksPerFrame); + } + } + } + + public void MainLoop() + { + VSync = VSyncMode.Off; + + Visible = true; Renderer.FrameBuffer.SetWindowSize(Width, Height); + + Context.MakeCurrent(null); + + //OpenTK doesn't like sleeps in its thread, to avoid this a renderer thread is created + RenderThread = new Thread(RenderLoop); + + RenderThread.Start(); + + while (Exists && !IsExiting) + { + ProcessEvents(); + + if (!IsExiting) + { + UpdateFrame(); + + if (TitleEvent) + { + TitleEvent = false; + + Title = NewTitle; + } + } + } } private bool IsGamePadButtonPressedFromString(GamePadState GamePad, string Button) @@ -99,7 +184,7 @@ namespace Ryujinx } } - protected override void OnUpdateFrame(FrameEventArgs e) + private new void UpdateFrame() { HidControllerButtons CurrentButton = 0; HidJoystickPosition LeftJoystick; @@ -278,13 +363,9 @@ namespace Ryujinx CurrentButton, LeftJoystick, RightJoystick); - - Ns.ProcessFrame(); - - Renderer.RunActions(); } - protected override void OnRenderFrame(FrameEventArgs e) + private new void RenderFrame() { Renderer.FrameBuffer.Render(); @@ -293,16 +374,25 @@ namespace Ryujinx double HostFps = Ns.Statistics.GetSystemFrameRate(); double GameFps = Ns.Statistics.GetGameFrameRate(); - Title = $"Ryujinx | Host FPS: {HostFps:0.0} | Game FPS: {GameFps:0.0}"; + NewTitle = $"Ryujinx | Host FPS: {HostFps:0.0} | Game FPS: {GameFps:0.0}"; + + TitleEvent = true; SwapBuffers(); Ns.Os.SignalVsync(); } + protected override void OnUnload(EventArgs e) + { + RenderThread.Join(); + + base.OnUnload(e); + } + protected override void OnResize(EventArgs e) { - Renderer.FrameBuffer.SetWindowSize(Width, Height); + ResizeEvent = true; } protected override void OnKeyDown(KeyboardKeyEventArgs e) diff --git a/Ryujinx/Ui/Program.cs b/Ryujinx/Ui/Program.cs index b14897695d..5cacc6228b 100644 --- a/Ryujinx/Ui/Program.cs +++ b/Ryujinx/Ui/Program.cs @@ -67,7 +67,7 @@ namespace Ryujinx Screen.Exit(); }; - Screen.Run(0.0, 60.0); + Screen.MainLoop(); } Environment.Exit(0); From b233ae964fcaae900cdefa6ce51b0edb2892dfaf Mon Sep 17 00:00:00 2001 From: Merry Date: Thu, 12 Jul 2018 19:51:02 +0100 Subject: [PATCH 14/20] AInstEmitSimdCvt: Half-precision to single-precision conversion (#235) --- ChocolArm64/Instruction/AInstEmitSimdCvt.cs | 8 ++--- ChocolArm64/Instruction/ASoftFloat.cs | 36 +++++++++++++++++++ Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs | 40 +++++++++++++++++++++ 3 files changed, 80 insertions(+), 4 deletions(-) create mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs diff --git a/ChocolArm64/Instruction/AInstEmitSimdCvt.cs b/ChocolArm64/Instruction/AInstEmitSimdCvt.cs index 98bb972a2d..da584743c3 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdCvt.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdCvt.cs @@ -45,10 +45,10 @@ namespace ChocolArm64.Instruction { if (SizeF == 0) { - //TODO: This need the half precision floating point type, - //that is not yet supported on .NET. We should probably - //do our own implementation on the meantime. - throw new NotImplementedException(); + EmitVectorExtractZx(Context, Op.Rn, Part + Index, 1); + Context.Emit(OpCodes.Conv_U2); + + Context.EmitCall(typeof(ASoftFloat), nameof(ASoftFloat.ConvertHalfToSingle)); } else /* if (SizeF == 1) */ { diff --git a/ChocolArm64/Instruction/ASoftFloat.cs b/ChocolArm64/Instruction/ASoftFloat.cs index e63c82beea..27f4f7fb4f 100644 --- a/ChocolArm64/Instruction/ASoftFloat.cs +++ b/ChocolArm64/Instruction/ASoftFloat.cs @@ -225,5 +225,41 @@ namespace ChocolArm64.Instruction return 2.0 + op1 * op2; } + + public static float ConvertHalfToSingle(ushort x) + { + uint x_sign = (uint)(x >> 15) & 0x0001; + uint x_exp = (uint)(x >> 10) & 0x001F; + uint x_mantissa = (uint)x & 0x03FF; + + if (x_exp == 0 && x_mantissa == 0) + { + // Zero + return BitConverter.Int32BitsToSingle((int)(x_sign << 31)); + } + + if (x_exp == 0x1F) + { + // NaN or Infinity + return BitConverter.Int32BitsToSingle((int)((x_sign << 31) | 0x7F800000 | (x_mantissa << 13))); + } + + int exponent = (int)x_exp - 15; + + if (x_exp == 0) + { + // Denormal + x_mantissa <<= 1; + while ((x_mantissa & 0x0400) == 0) + { + x_mantissa <<= 1; + exponent--; + } + x_mantissa &= 0x03FF; + } + + uint new_exp = (uint)((exponent + 127) & 0xFF) << 23; + return BitConverter.Int32BitsToSingle((int)((x_sign << 31) | new_exp | (x_mantissa << 13))); + } } } \ No newline at end of file diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs b/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs new file mode 100644 index 0000000000..2d021616c6 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs @@ -0,0 +1,40 @@ +using ChocolArm64.State; + +using NUnit.Framework; + +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace Ryujinx.Tests.Cpu +{ + public class CpuTestSimdCvt : CpuTest + { + [TestCase((ushort)0x0000, 0x00000000u)] // Positive Zero + [TestCase((ushort)0x8000, 0x80000000u)] // Negative Zero + [TestCase((ushort)0x3E00, 0x3FC00000u)] // +1.5 + [TestCase((ushort)0xBE00, 0xBFC00000u)] // -1.5 + [TestCase((ushort)0xFFFF, 0xFFFFE000u)] // -QNaN + [TestCase((ushort)0x7C00, 0x7F800000u)] // +Inf + [TestCase((ushort)0x3C00, 0x3F800000u)] // 1.0 + [TestCase((ushort)0x3C01, 0x3F802000u)] // 1.0009765625 + [TestCase((ushort)0xC000, 0xC0000000u)] // -2.0 + [TestCase((ushort)0x7BFF, 0x477FE000u)] // 65504.0 (Largest Normal) + [TestCase((ushort)0x03FF, 0x387FC000u)] // 0.00006097555 (Largest Subnormal) + [TestCase((ushort)0x0001, 0x33800000u)] // 5.96046448e-8 (Smallest Subnormal) + public void Fcvtl_V_f16(ushort Value, uint Result) + { + uint Opcode = 0x0E217801; + Vector128 V0 = Sse.StaticCast(Sse2.SetAllVector128(Value)); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0); + + Assert.Multiple(() => + { + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V1), (byte)0), Is.EqualTo(Result)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V1), (byte)1), Is.EqualTo(Result)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V1), (byte)2), Is.EqualTo(Result)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V1), (byte)3), Is.EqualTo(Result)); + }); + } + } +} From 3b00333b0ce21538e5ff361e3e41d89bee586d36 Mon Sep 17 00:00:00 2001 From: greggameplayer <33609333+greggameplayer@users.noreply.github.com> Date: Fri, 13 Jul 2018 02:27:59 +0200 Subject: [PATCH 15/20] Add return of Texture Size and Bytes Per Pixel of ASTC2D 5x5, 6x6, 8x8, 10x10 and 12x12 (#249) * return correct size of ASTC 5x5, 6x6, 8x8, 10x10 and 12x12 * return correct Bytes Per Pixel * Use method in order to get CompressedTextureSize * Add Read16BptCompressedTexture method * add Bpb integer argument --- Ryujinx.HLE/Gpu/Texture/TextureHelper.cs | 83 +++++++++++++++++++++--- Ryujinx.HLE/Gpu/Texture/TextureReader.cs | 53 +++++++++------ 2 files changed, 108 insertions(+), 28 deletions(-) diff --git a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs index 3c633b6928..6b9a306355 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs @@ -51,10 +51,7 @@ namespace Ryujinx.HLE.Gpu.Texture case GalTextureFormat.BC1: case GalTextureFormat.BC4: { - int W = (Texture.Width + 3) / 4; - int H = (Texture.Height + 3) / 4; - - return W * H * 8; + return CompressedTextureSize(Texture.Width, Texture.Height, 4, 4, 8); } case GalTextureFormat.BC7U: @@ -63,16 +60,86 @@ namespace Ryujinx.HLE.Gpu.Texture case GalTextureFormat.BC5: case GalTextureFormat.Astc2D4x4: { - int W = (Texture.Width + 3) / 4; - int H = (Texture.Height + 3) / 4; - - return W * H * 16; + return CompressedTextureSize(Texture.Width, Texture.Height, 4, 4, 16); + } + + case GalTextureFormat.Astc2D5x5: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 5, 5, 16); + } + + case GalTextureFormat.Astc2D6x6: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 6, 6, 16); + } + + case GalTextureFormat.Astc2D8x8: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 8, 8, 16); + } + + case GalTextureFormat.Astc2D10x10: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 10, 10, 16); + } + + case GalTextureFormat.Astc2D12x12: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 12, 12, 16); + } + + case GalTextureFormat.Astc2D5x4: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 5, 4, 16); + } + + case GalTextureFormat.Astc2D6x5: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 6, 5, 16); + } + + case GalTextureFormat.Astc2D8x6: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 8, 6, 16); + } + + case GalTextureFormat.Astc2D10x8: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 10, 8, 16); + } + + case GalTextureFormat.Astc2D12x10: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 12, 10, 16); + } + + case GalTextureFormat.Astc2D8x5: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 8, 5, 16); + } + + case GalTextureFormat.Astc2D10x5: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 10, 5, 16); + } + + case GalTextureFormat.Astc2D10x6: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 10, 6, 16); } } throw new NotImplementedException(Texture.Format.ToString()); } + public static int CompressedTextureSize(int TextureWidth, int TextureHeight, int BlockWidth, int BlockHeight, int Bpb) + { + int W = (TextureWidth + (BlockWidth - 1)) / BlockWidth; + int H = (TextureHeight + (BlockHeight - 1)) / BlockHeight; + + return W * H * Bpb; + } + public static (AMemory Memory, long Position) GetMemoryAndPosition( IAMemory Memory, long Position) diff --git a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs index 24bceffb12..8bd4dbcbaa 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs @@ -10,23 +10,36 @@ namespace Ryujinx.HLE.Gpu.Texture { switch (Texture.Format) { - case GalTextureFormat.R32G32B32A32: return Read16Bpp (Memory, Texture); - case GalTextureFormat.R16G16B16A16: return Read8Bpp (Memory, Texture); - case GalTextureFormat.A8B8G8R8: return Read4Bpp (Memory, Texture); - case GalTextureFormat.R32: return Read4Bpp (Memory, Texture); - case GalTextureFormat.A1B5G5R5: return Read5551 (Memory, Texture); - case GalTextureFormat.B5G6R5: return Read565 (Memory, Texture); - case GalTextureFormat.G8R8: return Read2Bpp (Memory, Texture); - case GalTextureFormat.R16: return Read2Bpp (Memory, Texture); - case GalTextureFormat.R8: return Read1Bpp (Memory, Texture); - case GalTextureFormat.BC7U: return Read16Bpt4x4(Memory, Texture); - case GalTextureFormat.BC1: return Read8Bpt4x4 (Memory, Texture); - case GalTextureFormat.BC2: return Read16Bpt4x4(Memory, Texture); - case GalTextureFormat.BC3: return Read16Bpt4x4(Memory, Texture); - case GalTextureFormat.BC4: return Read8Bpt4x4 (Memory, Texture); - case GalTextureFormat.BC5: return Read16Bpt4x4(Memory, Texture); - case GalTextureFormat.ZF32: return Read4Bpp (Memory, Texture); - case GalTextureFormat.Astc2D4x4: return Read16Bpt4x4(Memory, Texture); + case GalTextureFormat.R32G32B32A32: return Read16Bpp (Memory, Texture); + case GalTextureFormat.R16G16B16A16: return Read8Bpp (Memory, Texture); + case GalTextureFormat.A8B8G8R8: return Read4Bpp (Memory, Texture); + case GalTextureFormat.R32: return Read4Bpp (Memory, Texture); + case GalTextureFormat.A1B5G5R5: return Read5551 (Memory, Texture); + case GalTextureFormat.B5G6R5: return Read565 (Memory, Texture); + case GalTextureFormat.G8R8: return Read2Bpp (Memory, Texture); + case GalTextureFormat.R16: return Read2Bpp (Memory, Texture); + case GalTextureFormat.R8: return Read1Bpp (Memory, Texture); + case GalTextureFormat.BC7U: return Read16BptCompressedTexture(Memory, Texture, 4, 4); + case GalTextureFormat.BC1: return Read8Bpt4x4 (Memory, Texture); + case GalTextureFormat.BC2: return Read16BptCompressedTexture(Memory, Texture, 4, 4); + case GalTextureFormat.BC3: return Read16BptCompressedTexture(Memory, Texture, 4, 4); + case GalTextureFormat.BC4: return Read8Bpt4x4 (Memory, Texture); + case GalTextureFormat.BC5: return Read16BptCompressedTexture(Memory, Texture, 4, 4); + case GalTextureFormat.ZF32: return Read4Bpp (Memory, Texture); + case GalTextureFormat.Astc2D4x4: return Read16BptCompressedTexture(Memory, Texture, 4, 4); + case GalTextureFormat.Astc2D5x5: return Read16BptCompressedTexture(Memory, Texture, 5, 5); + case GalTextureFormat.Astc2D6x6: return Read16BptCompressedTexture(Memory, Texture, 6, 6); + case GalTextureFormat.Astc2D8x8: return Read16BptCompressedTexture(Memory, Texture, 8, 8); + case GalTextureFormat.Astc2D10x10: return Read16BptCompressedTexture(Memory, Texture, 10, 10); + case GalTextureFormat.Astc2D12x12: return Read16BptCompressedTexture(Memory, Texture, 12, 12); + case GalTextureFormat.Astc2D5x4: return Read16BptCompressedTexture(Memory, Texture, 5, 4); + case GalTextureFormat.Astc2D6x5: return Read16BptCompressedTexture(Memory, Texture, 6, 5); + case GalTextureFormat.Astc2D8x6: return Read16BptCompressedTexture(Memory, Texture, 8, 6); + case GalTextureFormat.Astc2D10x8: return Read16BptCompressedTexture(Memory, Texture, 10, 8); + case GalTextureFormat.Astc2D12x10: return Read16BptCompressedTexture(Memory, Texture, 12, 10); + case GalTextureFormat.Astc2D8x5: return Read16BptCompressedTexture(Memory, Texture, 8, 5); + case GalTextureFormat.Astc2D10x5: return Read16BptCompressedTexture(Memory, Texture, 10, 5); + case GalTextureFormat.Astc2D10x6: return Read16BptCompressedTexture(Memory, Texture, 10, 6); } throw new NotImplementedException(Texture.Format.ToString()); @@ -307,10 +320,10 @@ namespace Ryujinx.HLE.Gpu.Texture return Output; } - private unsafe static byte[] Read16Bpt4x4(IAMemory Memory, TextureInfo Texture) + private unsafe static byte[] Read16BptCompressedTexture(IAMemory Memory, TextureInfo Texture, int BlockWidth, int BlockHeight) { - int Width = (Texture.Width + 3) / 4; - int Height = (Texture.Height + 3) / 4; + int Width = (Texture.Width + (BlockWidth - 1)) / BlockWidth; + int Height = (Texture.Height + (BlockHeight - 1)) / BlockHeight; byte[] Output = new byte[Width * Height * 16]; From 37bf02f0572ff5535695ffa9527b1e651d0a1b7d Mon Sep 17 00:00:00 2001 From: Thomas Guillemard Date: Fri, 13 Jul 2018 23:35:19 +0200 Subject: [PATCH 16/20] TimeZone implements cmd 0, 1, 2, 3, 4 and 100 (#250) The implementation of the TimezoneRule isn't matching hardware but doesn't need to be accurate (games are only passing the value) --- .../OsHle/Services/Time/ITimeZoneService.cs | 174 +++++++++++++++--- 1 file changed, 146 insertions(+), 28 deletions(-) diff --git a/Ryujinx.HLE/OsHle/Services/Time/ITimeZoneService.cs b/Ryujinx.HLE/OsHle/Services/Time/ITimeZoneService.cs index 39454d4335..a2206a126e 100644 --- a/Ryujinx.HLE/OsHle/Services/Time/ITimeZoneService.cs +++ b/Ryujinx.HLE/OsHle/Services/Time/ITimeZoneService.cs @@ -2,6 +2,7 @@ using Ryujinx.HLE.Logging; using Ryujinx.HLE.OsHle.Ipc; using System; using System.Collections.Generic; +using System.Text; namespace Ryujinx.HLE.OsHle.Services.Time { @@ -13,20 +14,31 @@ namespace Ryujinx.HLE.OsHle.Services.Time private static readonly DateTime Epoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc); + private TimeZoneInfo TimeZone = TimeZoneInfo.Local; + public ITimeZoneService() { m_Commands = new Dictionary() { - { 0, GetDeviceLocationName }, - { 101, ToCalendarTimeWithMyRule } + { 0, GetDeviceLocationName }, + { 1, SetDeviceLocationName }, + { 2, GetTotalLocationNameCount }, + { 3, LoadLocationNameList }, + { 4, LoadTimeZoneRule }, + { 100, ToCalendarTime }, + { 101, ToCalendarTimeWithMyRule } }; } public long GetDeviceLocationName(ServiceCtx Context) { - Context.Ns.Log.PrintStub(LogClass.ServiceTime, "Stubbed."); + char[] TzName = TimeZone.Id.ToCharArray(); - for (int Index = 0; Index < 0x24; Index++) + Context.ResponseData.Write(TzName); + + int Padding = 0x24 - TzName.Length; + + for (int Index = 0; Index < Padding; Index++) { Context.ResponseData.Write((byte)0); } @@ -34,11 +46,94 @@ namespace Ryujinx.HLE.OsHle.Services.Time return 0; } - public long ToCalendarTimeWithMyRule(ServiceCtx Context) + public long SetDeviceLocationName(ServiceCtx Context) { - long PosixTime = Context.RequestData.ReadInt64(); + byte[] LocationName = Context.RequestData.ReadBytes(0x24); + string TzID = Encoding.ASCII.GetString(LocationName).TrimEnd('\0'); - DateTime CurrentTime = Epoch.AddSeconds(PosixTime).ToLocalTime(); + long ResultCode = 0; + + try + { + TimeZone = TimeZoneInfo.FindSystemTimeZoneById(TzID); + } + catch (TimeZoneNotFoundException e) + { + ResultCode = 0x7BA74; + } + + return ResultCode; + } + + public long GetTotalLocationNameCount(ServiceCtx Context) + { + Context.ResponseData.Write(TimeZoneInfo.GetSystemTimeZones().Count); + + return 0; + } + + public long LoadLocationNameList(ServiceCtx Context) + { + long BufferPosition = Context.Response.SendBuff[0].Position; + long BufferSize = Context.Response.SendBuff[0].Size; + + int i = 0; + foreach (TimeZoneInfo info in TimeZoneInfo.GetSystemTimeZones()) + { + byte[] TzData = Encoding.ASCII.GetBytes(info.Id); + + Context.Memory.WriteBytes(BufferPosition + i, TzData); + + int Padding = 0x24 - TzData.Length; + + for (int Index = 0; Index < Padding; Index++) + { + Context.ResponseData.Write((byte)0); + } + + i += 0x24; + } + return 0; + } + + public long LoadTimeZoneRule(ServiceCtx Context) + { + long BufferPosition = Context.Request.ReceiveBuff[0].Position; + long BufferSize = Context.Request.ReceiveBuff[0].Size; + + if (BufferSize != 0x4000) + { + Context.Ns.Log.PrintWarning(LogClass.ServiceTime, $"TimeZoneRule buffer size is 0x{BufferSize:x} (expected 0x4000)"); + } + + long ResultCode = 0; + + byte[] LocationName = Context.RequestData.ReadBytes(0x24); + string TzID = Encoding.ASCII.GetString(LocationName).TrimEnd('\0'); + + // Check if the Time Zone exists, otherwise error out. + try + { + TimeZoneInfo Info = TimeZoneInfo.FindSystemTimeZoneById(TzID); + byte[] TzData = Encoding.ASCII.GetBytes(Info.Id); + + // FIXME: This is not in ANY cases accurate, but the games don't about the content of the buffer, they only pass it. + // TODO: Reverse the TZif2 conversion in PCV to make this match with real hardware. + Context.Memory.WriteBytes(BufferPosition, TzData); + } + catch (TimeZoneNotFoundException e) + { + Context.Ns.Log.PrintWarning(LogClass.ServiceTime, $"Timezone not found for string: {TzID} (len: {TzID.Length})"); + ResultCode = 0x7BA74; + } + + return ResultCode; + } + + private long ToCalendarTimeWithTz(ServiceCtx Context, long PosixTime, TimeZoneInfo Info) + { + DateTime CurrentTime = Epoch.AddSeconds(PosixTime); + CurrentTime = TimeZoneInfo.ConvertTimeFromUtc(CurrentTime, Info); Context.ResponseData.Write((ushort)CurrentTime.Year); Context.ResponseData.Write((byte)CurrentTime.Month); @@ -46,31 +141,54 @@ namespace Ryujinx.HLE.OsHle.Services.Time Context.ResponseData.Write((byte)CurrentTime.Hour); Context.ResponseData.Write((byte)CurrentTime.Minute); Context.ResponseData.Write((byte)CurrentTime.Second); - Context.ResponseData.Write((byte)0); - - /* Thanks to TuxSH - struct CalendarAdditionalInfo { - u32 tm_wday; //day of week [0,6] (Sunday = 0) - s32 tm_yday; //day of year [0,365] - struct timezone { - char[8] tz_name; - bool isDaylightSavingTime; - s32 utcOffsetSeconds; - }; - }; - */ + Context.ResponseData.Write((byte)0); //MilliSecond ? Context.ResponseData.Write((int)CurrentTime.DayOfWeek); - Context.ResponseData.Write(CurrentTime.DayOfYear - 1); - - //TODO: Find out the names used. - Context.ResponseData.Write(new byte[8]); - + Context.ResponseData.Write(new byte[8]); //TODO: Find out the names used. Context.ResponseData.Write((byte)(CurrentTime.IsDaylightSavingTime() ? 1 : 0)); - - Context.ResponseData.Write((int)TimeZoneInfo.Local.GetUtcOffset(CurrentTime).TotalSeconds); + Context.ResponseData.Write((int)Info.GetUtcOffset(CurrentTime).TotalSeconds); return 0; } + + public long ToCalendarTime(ServiceCtx Context) + { + long PosixTime = Context.RequestData.ReadInt64(); + long BufferPosition = Context.Request.SendBuff[0].Position; + long BufferSize = Context.Request.SendBuff[0].Size; + + if (BufferSize != 0x4000) + { + Context.Ns.Log.PrintWarning(LogClass.ServiceTime, $"TimeZoneRule buffer size is 0x{BufferSize:x} (expected 0x4000)"); + } + + // TODO: Reverse the TZif2 conversion in PCV to make this match with real hardware. + byte[] TzData = Context.Memory.ReadBytes(BufferPosition, 0x24); + string TzID = Encoding.ASCII.GetString(TzData).TrimEnd('\0'); + + long ResultCode = 0; + + // Check if the Time Zone exists, otherwise error out. + try + { + TimeZoneInfo Info = TimeZoneInfo.FindSystemTimeZoneById(TzID); + + ResultCode = ToCalendarTimeWithTz(Context, PosixTime, Info); + } + catch (TimeZoneNotFoundException e) + { + Context.Ns.Log.PrintWarning(LogClass.ServiceTime, $"Timezone not found for string: {TzID} (len: {TzID.Length})"); + ResultCode = 0x7BA74; + } + + return ResultCode; + } + + public long ToCalendarTimeWithMyRule(ServiceCtx Context) + { + long PosixTime = Context.RequestData.ReadInt64(); + + return ToCalendarTimeWithTz(Context, PosixTime, TimeZone); + } } -} \ No newline at end of file +} From 494f8f0248e7daf3fdfb89a6d90f1598232b6a87 Mon Sep 17 00:00:00 2001 From: Starlet Date: Fri, 13 Jul 2018 16:36:57 -0500 Subject: [PATCH 17/20] Implement CSRNG (Cryptographically Secure Random Bytes) (#216) * Implement CSRNG (Cryptographically Secure Random Bytes) * Compliant with review. * Dispose Rng --- Ryujinx.HLE/OsHle/Services/ServiceFactory.cs | 4 ++ .../OsHle/Services/Spl/IRandomInterface.cs | 50 +++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 Ryujinx.HLE/OsHle/Services/Spl/IRandomInterface.cs diff --git a/Ryujinx.HLE/OsHle/Services/ServiceFactory.cs b/Ryujinx.HLE/OsHle/Services/ServiceFactory.cs index 914c84490d..b69fc9f884 100644 --- a/Ryujinx.HLE/OsHle/Services/ServiceFactory.cs +++ b/Ryujinx.HLE/OsHle/Services/ServiceFactory.cs @@ -18,6 +18,7 @@ using Ryujinx.HLE.OsHle.Services.Prepo; using Ryujinx.HLE.OsHle.Services.Set; using Ryujinx.HLE.OsHle.Services.Sfdnsres; using Ryujinx.HLE.OsHle.Services.Sm; +using Ryujinx.HLE.OsHle.Services.Spl; using Ryujinx.HLE.OsHle.Services.Ssl; using Ryujinx.HLE.OsHle.Services.Vi; using System; @@ -66,6 +67,9 @@ namespace Ryujinx.HLE.OsHle.Services case "caps:ss": return new IScreenshotService(); + case "csrng": + return new IRandomInterface(); + case "friend:a": return new IServiceCreator(); diff --git a/Ryujinx.HLE/OsHle/Services/Spl/IRandomInterface.cs b/Ryujinx.HLE/OsHle/Services/Spl/IRandomInterface.cs new file mode 100644 index 0000000000..489ca52ce6 --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Spl/IRandomInterface.cs @@ -0,0 +1,50 @@ +using Ryujinx.HLE.OsHle.Ipc; +using System; +using System.Collections.Generic; +using System.Security.Cryptography; + +namespace Ryujinx.HLE.OsHle.Services.Spl +{ + class IRandomInterface : IpcService, IDisposable + { + private Dictionary m_Commands; + + public override IReadOnlyDictionary Commands => m_Commands; + + private RNGCryptoServiceProvider Rng; + + public IRandomInterface() + { + m_Commands = new Dictionary() + { + { 0, GetRandomBytes } + }; + + Rng = new RNGCryptoServiceProvider(); + } + + public long GetRandomBytes(ServiceCtx Context) + { + byte[] RandomBytes = new byte[Context.Request.ReceiveBuff[0].Size]; + + Rng.GetBytes(RandomBytes); + + Context.Memory.WriteBytes(Context.Request.ReceiveBuff[0].Position, RandomBytes); + + return 0; + } + + public void Dispose() + { + Dispose(true); + } + + protected virtual void Dispose(bool Disposing) + { + if (Disposing) + { + Rng.Dispose(); + } + } + } +} \ No newline at end of file From 2f37583ab3b49aa5064a72c8d3b4e8245ebb6b5b Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sat, 14 Jul 2018 13:08:39 -0300 Subject: [PATCH 18/20] Some small shader related fixes (#258) * Some small shader related fixes * Address PR feedback --- Ryujinx.Graphics/Gal/IGalShader.cs | 2 ++ Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs | 12 +++++++ Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs | 31 +++++++++++++------ Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs | 10 +++--- Ryujinx.HLE/Gpu/Texture/TextureHelper.cs | 28 ++++++++--------- Ryujinx.HLE/Gpu/Texture/TextureReader.cs | 2 +- 6 files changed, 57 insertions(+), 28 deletions(-) diff --git a/Ryujinx.Graphics/Gal/IGalShader.cs b/Ryujinx.Graphics/Gal/IGalShader.cs index 06f3fac979..9adaceaf50 100644 --- a/Ryujinx.Graphics/Gal/IGalShader.cs +++ b/Ryujinx.Graphics/Gal/IGalShader.cs @@ -18,6 +18,8 @@ namespace Ryujinx.Graphics.Gal void Bind(long Key); + void Unbind(GalShaderType Type); + void BindProgram(); } } \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs index 3c5c874eaa..c55a758b4a 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs @@ -203,6 +203,18 @@ namespace Ryujinx.Graphics.Gal.OpenGL } } + public void Unbind(GalShaderType Type) + { + switch (Type) + { + case GalShaderType.Vertex: Current.Vertex = null; break; + case GalShaderType.TessControl: Current.TessControl = null; break; + case GalShaderType.TessEvaluation: Current.TessEvaluation = null; break; + case GalShaderType.Geometry: Current.Geometry = null; break; + case GalShaderType.Fragment: Current.Fragment = null; break; + } + } + public void BindProgram() { if (Current.Vertex == null || diff --git a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs index f3075a504e..575fb72f9a 100644 --- a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs +++ b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs @@ -216,7 +216,7 @@ namespace Ryujinx.Graphics.Gal.Shader private void PrintDeclOutAttributes() { - if (Decl.ShaderType == GalShaderType.Vertex) + if (Decl.ShaderType != GalShaderType.Fragment) { SB.AppendLine("layout (location = " + GlslDecl.PositionOutAttrLocation + ") out vec4 " + GlslDecl.PositionOutAttrName + ";"); } @@ -337,7 +337,10 @@ namespace Ryujinx.Graphics.Gal.Shader if (Decl.ShaderType == GalShaderType.Vertex) { SB.AppendLine(IdentationStr + "gl_Position.xy *= " + GlslDecl.FlipUniformName + ";"); + } + if (Decl.ShaderType != GalShaderType.Fragment) + { SB.AppendLine(IdentationStr + GlslDecl.PositionOutAttrName + " = gl_Position;"); SB.AppendLine(IdentationStr + GlslDecl.PositionOutAttrName + ".w = 1;"); } @@ -598,9 +601,6 @@ namespace Ryujinx.Graphics.Gal.Shader { switch (Op.Inst) { - case ShaderIrInst.Frcp: - return true; - case ShaderIrInst.Ipa: case ShaderIrInst.Texq: case ShaderIrInst.Texs: @@ -608,8 +608,7 @@ namespace Ryujinx.Graphics.Gal.Shader return false; } - return Op.OperandB != null || - Op.OperandC != null; + return true; } private string GetName(ShaderIrOperCbuf Cbuf) @@ -711,13 +710,13 @@ namespace Ryujinx.Graphics.Gal.Shader } else { - return Imm.Value.ToString(CultureInfo.InvariantCulture); + return GetIntConst(Imm.Value); } } private string GetValue(ShaderIrOperImmf Immf) { - return Immf.Value.ToString(CultureInfo.InvariantCulture); + return GetFloatConst(Immf.Value); } private string GetName(ShaderIrOperPred Pred) @@ -1047,7 +1046,7 @@ namespace Ryujinx.Graphics.Gal.Shader if (!float.IsNaN(Value) && !float.IsInfinity(Value)) { - return Value.ToString(CultureInfo.InvariantCulture); + return GetFloatConst(Value); } } break; @@ -1064,6 +1063,20 @@ namespace Ryujinx.Graphics.Gal.Shader return Expr; } + private static string GetIntConst(int Value) + { + string Expr = Value.ToString(CultureInfo.InvariantCulture); + + return Value < 0 ? "(" + Expr + ")" : Expr; + } + + private static string GetFloatConst(float Value) + { + string Expr = Value.ToString(CultureInfo.InvariantCulture); + + return Value < 0 ? "(" + Expr + ")" : Expr; + } + private static OperType GetDstNodeType(ShaderIrNode Node) { //Special case instructions with the result type different diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs index 2bacd71b36..5c474ab0bc 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs @@ -172,6 +172,8 @@ namespace Ryujinx.HLE.Gpu.Engines for (; Index < 6; Index++) { + GalShaderType Type = GetTypeFromProgram(Index); + int Control = ReadRegister(NvGpuEngine3dReg.ShaderNControl + Index * 0x10); int Offset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset + Index * 0x10); @@ -180,16 +182,16 @@ namespace Ryujinx.HLE.Gpu.Engines if (!Enable) { + Gpu.Renderer.Shader.Unbind(Type); + continue; } long Key = BasePosition + (uint)Offset; - GalShaderType ShaderType = GetTypeFromProgram(Index); + Keys[(int)Type] = Key; - Keys[(int)ShaderType] = Key; - - Gpu.Renderer.Shader.Create(Vmm, Key, ShaderType); + Gpu.Renderer.Shader.Create(Vmm, Key, Type); Gpu.Renderer.Shader.Bind(Key); } diff --git a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs index 6b9a306355..c0749d6a25 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs @@ -62,67 +62,67 @@ namespace Ryujinx.HLE.Gpu.Texture { return CompressedTextureSize(Texture.Width, Texture.Height, 4, 4, 16); } - + case GalTextureFormat.Astc2D5x5: { return CompressedTextureSize(Texture.Width, Texture.Height, 5, 5, 16); } - + case GalTextureFormat.Astc2D6x6: { return CompressedTextureSize(Texture.Width, Texture.Height, 6, 6, 16); } - + case GalTextureFormat.Astc2D8x8: { return CompressedTextureSize(Texture.Width, Texture.Height, 8, 8, 16); } - + case GalTextureFormat.Astc2D10x10: { return CompressedTextureSize(Texture.Width, Texture.Height, 10, 10, 16); } - + case GalTextureFormat.Astc2D12x12: { return CompressedTextureSize(Texture.Width, Texture.Height, 12, 12, 16); } - + case GalTextureFormat.Astc2D5x4: { return CompressedTextureSize(Texture.Width, Texture.Height, 5, 4, 16); } - + case GalTextureFormat.Astc2D6x5: { return CompressedTextureSize(Texture.Width, Texture.Height, 6, 5, 16); } - + case GalTextureFormat.Astc2D8x6: { return CompressedTextureSize(Texture.Width, Texture.Height, 8, 6, 16); } - + case GalTextureFormat.Astc2D10x8: { return CompressedTextureSize(Texture.Width, Texture.Height, 10, 8, 16); } - + case GalTextureFormat.Astc2D12x10: { return CompressedTextureSize(Texture.Width, Texture.Height, 12, 10, 16); } - + case GalTextureFormat.Astc2D8x5: { return CompressedTextureSize(Texture.Width, Texture.Height, 8, 5, 16); } - + case GalTextureFormat.Astc2D10x5: { return CompressedTextureSize(Texture.Width, Texture.Height, 10, 5, 16); } - + case GalTextureFormat.Astc2D10x6: { return CompressedTextureSize(Texture.Width, Texture.Height, 10, 6, 16); @@ -139,7 +139,7 @@ namespace Ryujinx.HLE.Gpu.Texture return W * H * Bpb; } - + public static (AMemory Memory, long Position) GetMemoryAndPosition( IAMemory Memory, long Position) diff --git a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs index 8bd4dbcbaa..6c08cd6c4d 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs @@ -30,7 +30,7 @@ namespace Ryujinx.HLE.Gpu.Texture case GalTextureFormat.Astc2D5x5: return Read16BptCompressedTexture(Memory, Texture, 5, 5); case GalTextureFormat.Astc2D6x6: return Read16BptCompressedTexture(Memory, Texture, 6, 6); case GalTextureFormat.Astc2D8x8: return Read16BptCompressedTexture(Memory, Texture, 8, 8); - case GalTextureFormat.Astc2D10x10: return Read16BptCompressedTexture(Memory, Texture, 10, 10); + case GalTextureFormat.Astc2D10x10: return Read16BptCompressedTexture(Memory, Texture, 10, 10); case GalTextureFormat.Astc2D12x12: return Read16BptCompressedTexture(Memory, Texture, 12, 12); case GalTextureFormat.Astc2D5x4: return Read16BptCompressedTexture(Memory, Texture, 5, 4); case GalTextureFormat.Astc2D6x5: return Read16BptCompressedTexture(Memory, Texture, 6, 5); From 514218ab98acc1f0ace2e2cc0b8c1091ffccc6ce Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sat, 14 Jul 2018 13:13:02 -0300 Subject: [PATCH 19/20] Add SMLSL, SQRSHRN and SRSHR (Vector) cpu instructions, nits (#225) * Add SMLSL, SQRSHRN and SRSHR (Vector) cpu instructions * Address PR feedback * Address PR feedback * Remove another useless temp var * nit: Alignment * Replace Context.CurrOp.GetBitsCount() with Op.GetBitsCount() * Fix encodings and move flag bit test out of the loop --- ChocolArm64/AOpCodeTable.cs | 20 ++- .../Instruction/AInstEmitSimdArithmetic.cs | 118 ++++----------- ChocolArm64/Instruction/AInstEmitSimdCmp.cs | 6 +- ChocolArm64/Instruction/AInstEmitSimdCvt.cs | 4 +- .../Instruction/AInstEmitSimdHelper.cs | 135 ++++++++++++++++-- .../Instruction/AInstEmitSimdLogical.cs | 4 +- .../Instruction/AInstEmitSimdMemory.cs | 5 +- ChocolArm64/Instruction/AInstEmitSimdMove.cs | 18 +-- ChocolArm64/Instruction/AInstEmitSimdShift.cs | 108 ++++++++++---- 9 files changed, 265 insertions(+), 153 deletions(-) diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs index fb4763ef8c..0e979aa44f 100644 --- a/ChocolArm64/AOpCodeTable.cs +++ b/ChocolArm64/AOpCodeTable.cs @@ -371,16 +371,22 @@ namespace ChocolArm64 SetA64("0x001110<<1xxxxx011011xxxxxxxxxx", AInstEmit.Smin_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx101011xxxxxxxxxx", AInstEmit.Sminp_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", AInstEmit.Smlal_V, typeof(AOpCodeSimdReg)); + SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", AInstEmit.Smlsl_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", AInstEmit.Smull_V, typeof(AOpCodeSimdReg)); + SetA64("0x00111100>>>xxx100111xxxxxxxxxx", AInstEmit.Sqrshrn_V, typeof(AOpCodeSimdShImm)); SetA64("01011110<<100001010010xxxxxxxxxx", AInstEmit.Sqxtn_S, typeof(AOpCodeSimd)); SetA64("0x001110<<100001010010xxxxxxxxxx", AInstEmit.Sqxtn_V, typeof(AOpCodeSimd)); SetA64("01111110<<100001001010xxxxxxxxxx", AInstEmit.Sqxtun_S, typeof(AOpCodeSimd)); SetA64("0x101110<<100001001010xxxxxxxxxx", AInstEmit.Sqxtun_V, typeof(AOpCodeSimd)); + SetA64("0x00111100>>>xxx001001xxxxxxxxxx", AInstEmit.Srshr_V, typeof(AOpCodeSimdShImm)); + SetA64("0100111101xxxxxx001001xxxxxxxxxx", AInstEmit.Srshr_V, typeof(AOpCodeSimdShImm)); SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", AInstEmit.Sshl_V, typeof(AOpCodeSimdReg)); SetA64("0x00111100>>>xxx101001xxxxxxxxxx", AInstEmit.Sshll_V, typeof(AOpCodeSimdShImm)); - SetA64("010111110>>>>xxx000001xxxxxxxxxx", AInstEmit.Sshr_S, typeof(AOpCodeSimdShImm)); - SetA64("0x0011110>>>>xxx000001xxxxxxxxxx", AInstEmit.Sshr_V, typeof(AOpCodeSimdShImm)); - SetA64("0x0011110>>>>xxx000101xxxxxxxxxx", AInstEmit.Ssra_V, typeof(AOpCodeSimdShImm)); + SetA64("0101111101xxxxxx000001xxxxxxxxxx", AInstEmit.Sshr_S, typeof(AOpCodeSimdShImm)); + SetA64("0x00111100>>>xxx000001xxxxxxxxxx", AInstEmit.Sshr_V, typeof(AOpCodeSimdShImm)); + SetA64("0100111101xxxxxx000001xxxxxxxxxx", AInstEmit.Sshr_V, typeof(AOpCodeSimdShImm)); + SetA64("0x00111100>>>xxx000101xxxxxxxxxx", AInstEmit.Ssra_V, typeof(AOpCodeSimdShImm)); + SetA64("0100111101xxxxxx000101xxxxxxxxxx", AInstEmit.Ssra_V, typeof(AOpCodeSimdShImm)); SetA64("0x00110000000000xxxxxxxxxxxxxxxx", AInstEmit.St__Vms, typeof(AOpCodeSimdMemMs)); SetA64("0x001100100xxxxxxxxxxxxxxxxxxxxx", AInstEmit.St__Vms, typeof(AOpCodeSimdMemMs)); SetA64("0x00110100x00000xxxxxxxxxxxxxxxx", AInstEmit.St__Vss, typeof(AOpCodeSimdMemSs)); @@ -419,9 +425,11 @@ namespace ChocolArm64 SetA64("0x101110<<100001010010xxxxxxxxxx", AInstEmit.Uqxtn_V, typeof(AOpCodeSimd)); SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", AInstEmit.Ushl_V, typeof(AOpCodeSimdReg)); SetA64("0x10111100>>>xxx101001xxxxxxxxxx", AInstEmit.Ushll_V, typeof(AOpCodeSimdShImm)); - SetA64("011111110>>>>xxx000001xxxxxxxxxx", AInstEmit.Ushr_S, typeof(AOpCodeSimdShImm)); - SetA64("0x1011110>>>>xxx000001xxxxxxxxxx", AInstEmit.Ushr_V, typeof(AOpCodeSimdShImm)); - SetA64("0x1011110>>>>xxx000101xxxxxxxxxx", AInstEmit.Usra_V, typeof(AOpCodeSimdShImm)); + SetA64("0111111101xxxxxx000001xxxxxxxxxx", AInstEmit.Ushr_S, typeof(AOpCodeSimdShImm)); + SetA64("0x10111100>>>xxx000001xxxxxxxxxx", AInstEmit.Ushr_V, typeof(AOpCodeSimdShImm)); + SetA64("0110111101xxxxxx000001xxxxxxxxxx", AInstEmit.Ushr_V, typeof(AOpCodeSimdShImm)); + SetA64("0x10111100>>>xxx000101xxxxxxxxxx", AInstEmit.Usra_V, typeof(AOpCodeSimdShImm)); + SetA64("0110111101xxxxxx000101xxxxxxxxxx", AInstEmit.Usra_V, typeof(AOpCodeSimdShImm)); SetA64("0>001110<<0xxxxx000110xxxxxxxxxx", AInstEmit.Uzp1_V, typeof(AOpCodeSimdReg)); SetA64("0>001110<<0xxxxx010110xxxxxxxxxx", AInstEmit.Uzp2_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<100001001010xxxxxxxxxx", AInstEmit.Xtn_V, typeof(AOpCodeSimd)); diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index 39331f965c..2fc8f178de 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -65,11 +65,12 @@ namespace ChocolArm64.Instruction { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; EmitVectorExtractZx(Context, Op.Rn, 0, Op.Size); - for (int Index = 1; Index < (Bytes >> Op.Size); Index++) + for (int Index = 1; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); @@ -97,9 +98,10 @@ namespace ChocolArm64.Instruction { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); @@ -190,84 +192,6 @@ namespace ChocolArm64.Instruction } } - private static void EmitSaturatingExtNarrow(AILEmitterCtx Context, bool SignedSrc, bool SignedDst, bool Scalar) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int Elems = (!Scalar ? 8 >> Op.Size : 1); - int ESize = 8 << Op.Size; - - int Part = (!Scalar & (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0); - - int TMaxValue = (SignedDst ? (1 << (ESize - 1)) - 1 : (int)((1L << ESize) - 1L)); - int TMinValue = (SignedDst ? -((1 << (ESize - 1))) : 0); - - Context.EmitLdc_I8(0L); - Context.EmitSttmp(); - - for (int Index = 0; Index < Elems; Index++) - { - AILLabel LblLe = new AILLabel(); - AILLabel LblGeEnd = new AILLabel(); - - EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, SignedSrc); - - Context.Emit(OpCodes.Dup); - - Context.EmitLdc_I4(TMaxValue); - Context.Emit(OpCodes.Conv_U8); - - Context.Emit(SignedSrc ? OpCodes.Ble_S : OpCodes.Ble_Un_S, LblLe); - - Context.Emit(OpCodes.Pop); - - Context.EmitLdc_I4(TMaxValue); - - Context.EmitLdc_I8(0x8000000L); - Context.EmitSttmp(); - - Context.Emit(OpCodes.Br_S, LblGeEnd); - - Context.MarkLabel(LblLe); - - Context.Emit(OpCodes.Dup); - - Context.EmitLdc_I4(TMinValue); - Context.Emit(OpCodes.Conv_I8); - - Context.Emit(SignedSrc ? OpCodes.Bge_S : OpCodes.Bge_Un_S, LblGeEnd); - - Context.Emit(OpCodes.Pop); - - Context.EmitLdc_I4(TMinValue); - - Context.EmitLdc_I8(0x8000000L); - Context.EmitSttmp(); - - Context.MarkLabel(LblGeEnd); - - if (Scalar) - { - EmitVectorZeroLower(Context, Op.Rd); - } - - EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size); - } - - if (Part == 0) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - - Context.EmitLdarg(ATranslatedSub.StateArgIdx); - Context.EmitLdarg(ATranslatedSub.StateArgIdx); - Context.EmitCallPropGet(typeof(AThreadState), nameof(AThreadState.Fpsr)); - Context.EmitLdtmp(); - Context.Emit(OpCodes.Conv_I4); - Context.Emit(OpCodes.Or); - Context.EmitCallPropSet(typeof(AThreadState), nameof(AThreadState.Fpsr)); - } - public static void Fabd_S(AILEmitterCtx Context) { EmitScalarBinaryOpF(Context, () => @@ -338,7 +262,7 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> SizeF + 2; int Half = Elems >> 1; @@ -870,7 +794,7 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; for (int Index = 0; Index < Bytes >> SizeF + 2; Index++) { @@ -1102,6 +1026,15 @@ namespace ChocolArm64.Instruction }); } + public static void Smlsl_V(AILEmitterCtx Context) + { + EmitVectorWidenRnRmTernaryOpSx(Context, () => + { + Context.Emit(OpCodes.Mul); + Context.Emit(OpCodes.Sub); + }); + } + public static void Smull_V(AILEmitterCtx Context) { EmitVectorWidenRnRmBinaryOpSx(Context, () => Context.Emit(OpCodes.Mul)); @@ -1109,22 +1042,22 @@ namespace ChocolArm64.Instruction public static void Sqxtn_S(AILEmitterCtx Context) { - EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: true, Scalar: true); + EmitScalarSaturatingNarrowOpSxSx(Context, () => { }); } public static void Sqxtn_V(AILEmitterCtx Context) { - EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: true, Scalar: false); + EmitVectorSaturatingNarrowOpSxSx(Context, () => { }); } public static void Sqxtun_S(AILEmitterCtx Context) { - EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: false, Scalar: true); + EmitScalarSaturatingNarrowOpSxZx(Context, () => { }); } public static void Sqxtun_V(AILEmitterCtx Context) { - EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: false, Scalar: false); + EmitVectorSaturatingNarrowOpSxZx(Context, () => { }); } public static void Sub_S(AILEmitterCtx Context) @@ -1198,11 +1131,12 @@ namespace ChocolArm64.Instruction { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; EmitVectorExtractZx(Context, Op.Rn, 0, Op.Size); - for (int Index = 1; Index < (Bytes >> Op.Size); Index++) + for (int Index = 1; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); @@ -1272,12 +1206,12 @@ namespace ChocolArm64.Instruction public static void Uqxtn_S(AILEmitterCtx Context) { - EmitSaturatingExtNarrow(Context, SignedSrc: false, SignedDst: false, Scalar: true); + EmitScalarSaturatingNarrowOpZxZx(Context, () => { }); } public static void Uqxtn_V(AILEmitterCtx Context) { - EmitSaturatingExtNarrow(Context, SignedSrc: false, SignedDst: false, Scalar: false); + EmitVectorSaturatingNarrowOpZxZx(Context, () => { }); } } } diff --git a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs index 68a7ab8808..773d989447 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs @@ -363,7 +363,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = (!Scalar ? Bytes >> Op.Size : 1); ulong SzMask = ulong.MaxValue >> (64 - (8 << Op.Size)); @@ -407,7 +407,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = (!Scalar ? Bytes >> Op.Size : 1); ulong SzMask = ulong.MaxValue >> (64 - (8 << Op.Size)); @@ -454,7 +454,7 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; for (int Index = 0; Index < Bytes >> SizeF + 2; Index++) { diff --git a/ChocolArm64/Instruction/AInstEmitSimdCvt.cs b/ChocolArm64/Instruction/AInstEmitSimdCvt.cs index da584743c3..7b355494dc 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdCvt.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdCvt.cs @@ -337,7 +337,7 @@ namespace ChocolArm64.Instruction int FBits = GetFBits(Context); - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; for (int Index = 0; Index < (Bytes >> SizeI); Index++) { @@ -426,7 +426,7 @@ namespace ChocolArm64.Instruction int FBits = GetFBits(Context); - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; for (int Index = 0; Index < (Bytes >> SizeI); Index++) { diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs index d895ec9c7c..1f7a2dad13 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs @@ -3,6 +3,7 @@ using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Reflection; +using System.Reflection.Emit; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; @@ -417,7 +418,7 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; for (int Index = 0; Index < (Bytes >> SizeF + 2); Index++) { @@ -467,7 +468,7 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; for (int Index = 0; Index < (Bytes >> SizeF + 2); Index++) { @@ -527,9 +528,10 @@ namespace ChocolArm64.Instruction { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { if (Opers.HasFlag(OperFlags.Rd)) { @@ -582,9 +584,10 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { if (Ternary) { @@ -622,9 +625,10 @@ namespace ChocolArm64.Instruction { AOpCodeSimdImm Op = (AOpCodeSimdImm)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { if (Binary) { @@ -739,11 +743,11 @@ namespace ChocolArm64.Instruction EmitVectorPairwiseOp(Context, Emit, false); } - private static void EmitVectorPairwiseOp(AILEmitterCtx Context, Action Emit, bool Signed) + public static void EmitVectorPairwiseOp(AILEmitterCtx Context, Action Emit, bool Signed) { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; int Half = Elems >> 1; @@ -769,6 +773,117 @@ namespace ChocolArm64.Instruction } } + public static void EmitScalarSaturatingNarrowOpSxSx(AILEmitterCtx Context, Action Emit) + { + EmitSaturatingNarrowOp(Context, Emit, true, true, true); + } + + public static void EmitScalarSaturatingNarrowOpSxZx(AILEmitterCtx Context, Action Emit) + { + EmitSaturatingNarrowOp(Context, Emit, true, false, true); + } + + public static void EmitScalarSaturatingNarrowOpZxZx(AILEmitterCtx Context, Action Emit) + { + EmitSaturatingNarrowOp(Context, Emit, false, false, true); + } + + public static void EmitVectorSaturatingNarrowOpSxSx(AILEmitterCtx Context, Action Emit) + { + EmitSaturatingNarrowOp(Context, Emit, true, true, false); + } + + public static void EmitVectorSaturatingNarrowOpSxZx(AILEmitterCtx Context, Action Emit) + { + EmitSaturatingNarrowOp(Context, Emit, true, false, false); + } + + public static void EmitVectorSaturatingNarrowOpZxZx(AILEmitterCtx Context, Action Emit) + { + EmitSaturatingNarrowOp(Context, Emit, false, false, false); + } + + public static void EmitSaturatingNarrowOp( + AILEmitterCtx Context, + Action Emit, + bool SignedSrc, + bool SignedDst, + bool Scalar) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + int Elems = !Scalar ? 8 >> Op.Size : 1; + int ESize = 8 << Op.Size; + + int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0; + + long TMaxValue = SignedDst ? (1 << (ESize - 1)) - 1 : (1L << ESize) - 1L; + long TMinValue = SignedDst ? -((1 << (ESize - 1))) : 0; + + Context.EmitLdc_I8(0L); + Context.EmitSttmp(); + + for (int Index = 0; Index < Elems; Index++) + { + AILLabel LblLe = new AILLabel(); + AILLabel LblGeEnd = new AILLabel(); + + EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, SignedSrc); + + Emit(); + + Context.Emit(OpCodes.Dup); + + Context.EmitLdc_I8(TMaxValue); + + Context.Emit(SignedSrc ? OpCodes.Ble_S : OpCodes.Ble_Un_S, LblLe); + + Context.Emit(OpCodes.Pop); + + Context.EmitLdc_I8(TMaxValue); + Context.EmitLdc_I8(0x8000000L); + Context.EmitSttmp(); + + Context.Emit(OpCodes.Br_S, LblGeEnd); + + Context.MarkLabel(LblLe); + + Context.Emit(OpCodes.Dup); + + Context.EmitLdc_I8(TMinValue); + + Context.Emit(SignedSrc ? OpCodes.Bge_S : OpCodes.Bge_Un_S, LblGeEnd); + + Context.Emit(OpCodes.Pop); + + Context.EmitLdc_I8(TMinValue); + Context.EmitLdc_I8(0x8000000L); + Context.EmitSttmp(); + + Context.MarkLabel(LblGeEnd); + + if (Scalar) + { + EmitVectorZeroLower(Context, Op.Rd); + } + + EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size); + } + + if (Part == 0) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + + Context.EmitLdarg(ATranslatedSub.StateArgIdx); + Context.EmitLdarg(ATranslatedSub.StateArgIdx); + Context.EmitCallPropGet(typeof(AThreadState), nameof(AThreadState.Fpsr)); + Context.EmitLdtmp(); + Context.Emit(OpCodes.Conv_I4); + Context.Emit(OpCodes.Or); + Context.EmitCallPropSet(typeof(AThreadState), nameof(AThreadState.Fpsr)); + } + public static void EmitScalarSet(AILEmitterCtx Context, int Reg, int Size) { EmitVectorZeroAll(Context, Reg); diff --git a/ChocolArm64/Instruction/AInstEmitSimdLogical.cs b/ChocolArm64/Instruction/AInstEmitSimdLogical.cs index 8475a8a474..9f5af96cb4 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdLogical.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdLogical.cs @@ -55,7 +55,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; for (int Index = 0; Index < Elems; Index++) @@ -195,7 +195,7 @@ namespace ChocolArm64.Instruction throw new InvalidOperationException(); } - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; int ContainerMask = (1 << (ContainerSize - Op.Size)) - 1; diff --git a/ChocolArm64/Instruction/AInstEmitSimdMemory.cs b/ChocolArm64/Instruction/AInstEmitSimdMemory.cs index d98ec012e4..368b014fba 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdMemory.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdMemory.cs @@ -105,13 +105,14 @@ namespace ChocolArm64.Instruction throw new InvalidOperationException(); } - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; for (int SElem = 0; SElem < Op.SElems; SElem++) { int Rt = (Op.Rt + SElem) & 0x1f; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { EmitMemAddress(); diff --git a/ChocolArm64/Instruction/AInstEmitSimdMove.cs b/ChocolArm64/Instruction/AInstEmitSimdMove.cs index d67946a977..739f01c62b 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdMove.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdMove.cs @@ -14,9 +14,10 @@ namespace ChocolArm64.Instruction { AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { Context.EmitLdintzr(Op.Rn); @@ -42,9 +43,10 @@ namespace ChocolArm64.Instruction { AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Op.DstIndex, Op.Size); @@ -64,7 +66,7 @@ namespace ChocolArm64.Instruction Context.EmitLdvec(Op.Rd); Context.EmitStvectmp(); - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Position = Op.Imm4; @@ -329,7 +331,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; @@ -355,7 +357,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; int Half = Elems >> 1; @@ -382,7 +384,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; int Half = Elems >> 1; diff --git a/ChocolArm64/Instruction/AInstEmitSimdShift.cs b/ChocolArm64/Instruction/AInstEmitSimdShift.cs index 24d35abe4c..6f6b56068e 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdShift.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdShift.cs @@ -27,9 +27,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Shift = Op.Imm - (8 << Op.Size); - - EmitVectorShImmBinaryZx(Context, () => Context.Emit(OpCodes.Shl), Shift); + EmitVectorShImmBinaryZx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op)); } public static void Shll_V(AILEmitterCtx Context) @@ -45,22 +43,21 @@ namespace ChocolArm64.Instruction { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Shift = (8 << (Op.Size + 1)) - Op.Imm; - - EmitVectorShImmNarrowBinaryZx(Context, () => Context.Emit(OpCodes.Shr_Un), Shift); + EmitVectorShImmNarrowBinaryZx(Context, () => Context.Emit(OpCodes.Shr_Un), GetImmShr(Op)); } public static void Sli_V(AILEmitterCtx Context) { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - int Shift = Op.Imm - (8 << Op.Size); + int Shift = GetImmShl(Op); - ulong Mask = Shift != 0 ? ulong.MaxValue >> (64 - Shift) : 0; + ulong Mask = Shift != 0 ? ulong.MaxValue >> (64 - Shift) : 0; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); @@ -84,6 +81,39 @@ namespace ChocolArm64.Instruction } } + public static void Sqrshrn_V(AILEmitterCtx Context) + { + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + int Shift = GetImmShr(Op); + + long RoundConst = 1L << (Shift - 1); + + Action Emit = () => + { + Context.EmitLdc_I8(RoundConst); + + Context.Emit(OpCodes.Add); + + Context.EmitLdc_I4(Shift); + + Context.Emit(OpCodes.Shr); + }; + + EmitVectorSaturatingNarrowOpSxSx(Context, Emit); + } + + public static void Srshr_V(AILEmitterCtx Context) + { + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + int Shift = GetImmShr(Op); + + long RoundConst = 1L << (Shift - 1); + + EmitVectorRoundShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), Shift, RoundConst); + } + public static void Sshl_V(AILEmitterCtx Context) { EmitVectorShl(Context, Signed: true); @@ -93,9 +123,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Shift = Op.Imm - (8 << Op.Size); - - EmitVectorShImmWidenBinarySx(Context, () => Context.Emit(OpCodes.Shl), Shift); + EmitVectorShImmWidenBinarySx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op)); } public static void Sshr_S(AILEmitterCtx Context) @@ -115,24 +143,20 @@ namespace ChocolArm64.Instruction { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Shift = (8 << (Op.Size + 1)) - Op.Imm; - - EmitVectorShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), Shift); + EmitVectorShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), GetImmShr(Op)); } public static void Ssra_V(AILEmitterCtx Context) { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Shift = (8 << (Op.Size + 1)) - Op.Imm; - Action Emit = () => { Context.Emit(OpCodes.Shr); Context.Emit(OpCodes.Add); }; - EmitVectorShImmTernarySx(Context, Emit, Shift); + EmitVectorShImmTernarySx(Context, Emit, GetImmShr(Op)); } public static void Ushl_V(AILEmitterCtx Context) @@ -144,9 +168,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Shift = Op.Imm - (8 << Op.Size); - - EmitVectorShImmWidenBinaryZx(Context, () => Context.Emit(OpCodes.Shl), Shift); + EmitVectorShImmWidenBinaryZx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op)); } public static void Ushr_S(AILEmitterCtx Context) @@ -251,28 +273,51 @@ namespace ChocolArm64.Instruction } } + [Flags] + private enum ShImmFlags + { + None = 0, + + Signed = 1 << 0, + Ternary = 1 << 1, + Rounded = 1 << 2, + + SignedTernary = Signed | Ternary, + SignedRounded = Signed | Rounded + } + private static void EmitVectorShImmBinarySx(AILEmitterCtx Context, Action Emit, int Imm) { - EmitVectorShImmOp(Context, Emit, Imm, false, true); + EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.Signed); } private static void EmitVectorShImmTernarySx(AILEmitterCtx Context, Action Emit, int Imm) { - EmitVectorShImmOp(Context, Emit, Imm, true, true); + EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.SignedTernary); } private static void EmitVectorShImmBinaryZx(AILEmitterCtx Context, Action Emit, int Imm) { - EmitVectorShImmOp(Context, Emit, Imm, false, false); + EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.None); } - private static void EmitVectorShImmOp(AILEmitterCtx Context, Action Emit, int Imm, bool Ternary, bool Signed) + private static void EmitVectorRoundShImmBinarySx(AILEmitterCtx Context, Action Emit, int Imm, long Rc) + { + EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.SignedRounded, Rc); + } + + private static void EmitVectorShImmOp(AILEmitterCtx Context, Action Emit, int Imm, ShImmFlags Flags, long Rc = 0) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + bool Signed = (Flags & ShImmFlags.Signed) != 0; + bool Ternary = (Flags & ShImmFlags.Ternary) != 0; + bool Rounded = (Flags & ShImmFlags.Rounded) != 0; + + for (int Index = 0; Index < Elems; Index++) { if (Ternary) { @@ -281,6 +326,13 @@ namespace ChocolArm64.Instruction EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed); + if (Rounded) + { + Context.EmitLdc_I8(Rc); + + Context.Emit(OpCodes.Add); + } + Context.EmitLdc_I4(Imm); Emit(); From fc12fca96237c9aadc78436dc7c77480fa83fa09 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sat, 14 Jul 2018 13:53:44 -0300 Subject: [PATCH 20/20] Allow using ulong max value as yield (#263) --- Ryujinx.HLE/OsHle/Kernel/SvcThread.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Ryujinx.HLE/OsHle/Kernel/SvcThread.cs b/Ryujinx.HLE/OsHle/Kernel/SvcThread.cs index b0a7490a39..8702203e4e 100644 --- a/Ryujinx.HLE/OsHle/Kernel/SvcThread.cs +++ b/Ryujinx.HLE/OsHle/Kernel/SvcThread.cs @@ -87,7 +87,7 @@ namespace Ryujinx.HLE.OsHle.Kernel KThread CurrThread = Process.GetThread(ThreadState.Tpidr); - if (TimeoutNs == 0) + if (TimeoutNs == 0 || TimeoutNs == ulong.MaxValue) { Process.Scheduler.Yield(CurrThread); }