From 9288ffd26d0b0b831f62cfc0281c71662d251884 Mon Sep 17 00:00:00 2001 From: Ac_K Date: Wed, 28 Jun 2023 17:36:30 +0200 Subject: [PATCH] Cpu: Implement VCVT (between floating-point and fixed-point) instruction (#5343) * cpu: Implement VCVT (between floating-point and fixed-point) instruction Rebase, fix and superseed of https://github.com/Ryujinx/Ryujinx/pull/2915 (Since I only have little CPU knowledge, I hope I have done everything good) * Update Ptc.cs * Fix wrong cast * Rename tests * Addresses feedback Co-Authored-By: gdkchan <5624669+gdkchan@users.noreply.github.com> * Remove extra empty line --------- Co-authored-by: gdkchan <5624669+gdkchan@users.noreply.github.com> --- .../Decoders/OpCode32SimdCvtFFixed.cs | 23 ++ src/ARMeilleure/Decoders/OpCodeTable.cs | 341 +++++++++--------- .../Instructions/InstEmitSimdCvt32.cs | 29 ++ src/ARMeilleure/Translation/PTC/Ptc.cs | 2 +- src/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs | 90 ++++- 5 files changed, 309 insertions(+), 176 deletions(-) create mode 100644 src/ARMeilleure/Decoders/OpCode32SimdCvtFFixed.cs diff --git a/src/ARMeilleure/Decoders/OpCode32SimdCvtFFixed.cs b/src/ARMeilleure/Decoders/OpCode32SimdCvtFFixed.cs new file mode 100644 index 000000000..f8564d0e5 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdCvtFFixed.cs @@ -0,0 +1,23 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdCvtFFixed : OpCode32Simd + { + public int Fbits { get; protected set; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCvtFFixed(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCvtFFixed(inst, address, opCode, true); + + public OpCode32SimdCvtFFixed(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + Opc = (opCode >> 8) & 0x1; + + Size = Opc == 1 ? 0 : 2; + Fbits = 64 - ((opCode >> 16) & 0x3f); + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} \ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeTable.cs b/src/ARMeilleure/Decoders/OpCodeTable.cs index d3fc4ca08..5cfd0bb81 100644 --- a/src/ARMeilleure/Decoders/OpCodeTable.cs +++ b/src/ARMeilleure/Decoders/OpCodeTable.cs @@ -883,174 +883,175 @@ namespace ARMeilleure.Decoders SetVfp("<<<<11100x11xxxxxxxx101xx1x0xxxx", InstName.Vsub, InstEmit32.Vsub_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); // ASIMD - SetAsimd("111100111x110000xxx0001101x0xxx0", InstName.Aesd_V, InstEmit32.Aesd_V, OpCode32Simd.Create, OpCode32Simd.CreateT32); - SetAsimd("111100111x110000xxx0001100x0xxx0", InstName.Aese_V, InstEmit32.Aese_V, OpCode32Simd.Create, OpCode32Simd.CreateT32); - SetAsimd("111100111x110000xxx0001111x0xxx0", InstName.Aesimc_V, InstEmit32.Aesimc_V, OpCode32Simd.Create, OpCode32Simd.CreateT32); - SetAsimd("111100111x110000xxx0001110x0xxx0", InstName.Aesmc_V, InstEmit32.Aesmc_V, OpCode32Simd.Create, OpCode32Simd.CreateT32); - SetAsimd("111100110x00xxx0xxx01100x1x0xxx0", InstName.Sha256h_V, InstEmit32.Sha256h_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); - SetAsimd("111100110x01xxx0xxx01100x1x0xxx0", InstName.Sha256h2_V, InstEmit32.Sha256h2_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); - SetAsimd("111100111x111010xxx0001111x0xxx0", InstName.Sha256su0_V, InstEmit32.Sha256su0_V, OpCode32Simd.Create, OpCode32Simd.CreateT32); - SetAsimd("111100110x10xxx0xxx01100x1x0xxx0", InstName.Sha256su1_V, InstEmit32.Sha256su1_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); - SetAsimd("1111001x0x<xxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); - SetAsimd("111101000x10xxxxxxxx000x<>>xxxxxxx100101x1xxx0", InstName.Vqrshrn, InstEmit32.Vqrshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); - SetAsimd("111100111x>>>xxxxxxx100001x1xxx0", InstName.Vqrshrun, InstEmit32.Vqrshrun, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); - SetAsimd("1111001x1x>>>xxxxxxx100100x1xxx0", InstName.Vqshrn, InstEmit32.Vqshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); - SetAsimd("111100111x>>>xxxxxxx100000x1xxx0", InstName.Vqshrun, InstEmit32.Vqshrun, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); - SetAsimd("1111001x0xxxxxxxxxxx0010xxx1xxxx", InstName.Vqsub, InstEmit32.Vqsub, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); - SetAsimd("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, OpCode32SimdSqrte.Create, OpCode32SimdSqrte.CreateT32); - SetAsimd("111100100x00xxxxxxxx1111xxx1xxxx", InstName.Vrecps, InstEmit32.Vrecps, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); - SetAsimd("111100111x11xx00xxxx000<>>xxxxxxx0010>xx1xxxx", InstName.Vrshr, InstEmit32.Vrshr, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); - SetAsimd("111100101x>>>xxxxxxx100001x1xxx0", InstName.Vrshrn, InstEmit32.Vrshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); - SetAsimd("111100111x111011xxxx010x1xx0xxxx", InstName.Vrsqrte, InstEmit32.Vrsqrte, OpCode32SimdSqrte.Create, OpCode32SimdSqrte.CreateT32); - SetAsimd("111100100x10xxxxxxxx1111xxx1xxxx", InstName.Vrsqrts, InstEmit32.Vrsqrts, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); - SetAsimd("1111001x1x>>>xxxxxxx0011>xx1xxxx", InstName.Vrsra, InstEmit32.Vrsra, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); - SetAsimd("111100101x>>>xxxxxxx0101>xx1xxxx", InstName.Vshl, InstEmit32.Vshl, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); - SetAsimd("1111001x0xxxxxxxxxxx0100xxx0xxxx", InstName.Vshl, InstEmit32.Vshl_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); - SetAsimd("1111001x1x>>>xxxxxxx101000x1xxxx", InstName.Vshll, InstEmit32.Vshll, OpCode32SimdShImmLong.Create, OpCode32SimdShImmLong.CreateT32); // A1 encoding. - SetAsimd("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); - SetAsimd("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); - SetAsimd("1111001x1x>>>xxxxxxx0001>xx1xxxx", InstName.Vsra, InstEmit32.Vsra, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); - SetAsimd("111101001x00xxxxxxxx0000xxx0xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); - SetAsimd("111101001x00xxxxxxxx0100xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); - SetAsimd("111101001x00xxxxxxxx1000x000xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); - SetAsimd("111101001x00xxxxxxxx1000x011xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); - SetAsimd("111101000x00xxxxxxxx0111xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 1. - SetAsimd("111101000x00xxxxxxxx1010xx<xxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101000x10xxxxxxxx000x<>>xxxxxxx100101x1xxx0", InstName.Vqrshrn, InstEmit32.Vqrshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); + SetAsimd("111100111x>>>xxxxxxx100001x1xxx0", InstName.Vqrshrun, InstEmit32.Vqrshrun, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); + SetAsimd("1111001x1x>>>xxxxxxx100100x1xxx0", InstName.Vqshrn, InstEmit32.Vqshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); + SetAsimd("111100111x>>>xxxxxxx100000x1xxx0", InstName.Vqshrun, InstEmit32.Vqshrun, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); + SetAsimd("1111001x0xxxxxxxxxxx0010xxx1xxxx", InstName.Vqsub, InstEmit32.Vqsub, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, OpCode32SimdSqrte.Create, OpCode32SimdSqrte.CreateT32); + SetAsimd("111100100x00xxxxxxxx1111xxx1xxxx", InstName.Vrecps, InstEmit32.Vrecps, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100111x11xx00xxxx000<>>xxxxxxx0010>xx1xxxx", InstName.Vrshr, InstEmit32.Vrshr, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); + SetAsimd("111100101x>>>xxxxxxx100001x1xxx0", InstName.Vrshrn, InstEmit32.Vrshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); + SetAsimd("111100111x111011xxxx010x1xx0xxxx", InstName.Vrsqrte, InstEmit32.Vrsqrte, OpCode32SimdSqrte.Create, OpCode32SimdSqrte.CreateT32); + SetAsimd("111100100x10xxxxxxxx1111xxx1xxxx", InstName.Vrsqrts, InstEmit32.Vrsqrts, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("1111001x1x>>>xxxxxxx0011>xx1xxxx", InstName.Vrsra, InstEmit32.Vrsra, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); + SetAsimd("111100101x>>>xxxxxxx0101>xx1xxxx", InstName.Vshl, InstEmit32.Vshl, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); + SetAsimd("1111001x0xxxxxxxxxxx0100xxx0xxxx", InstName.Vshl, InstEmit32.Vshl_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("1111001x1x>>>xxxxxxx101000x1xxxx", InstName.Vshll, InstEmit32.Vshll, OpCode32SimdShImmLong.Create, OpCode32SimdShImmLong.CreateT32); // A1 encoding. + SetAsimd("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); + SetAsimd("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); + SetAsimd("1111001x1x>>>xxxxxxx0001>xx1xxxx", InstName.Vsra, InstEmit32.Vsra, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); + SetAsimd("111101001x00xxxxxxxx0000xxx0xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x00xxxxxxxx0100xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x00xxxxxxxx1000x000xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x00xxxxxxxx1000x011xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101000x00xxxxxxxx0111xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 1. + SetAsimd("111101000x00xxxxxxxx1010xx< + { + var scaledValue = context.Multiply(op1, ConstF(MathF.Pow(2f, fracBits))); + MethodInfo info = unsigned ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32)) : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32)); + + return context.Call(info, scaledValue); + }); + } + else // S32 or U32 (fixed) to F32 + { + EmitVectorUnaryOpI32(context, (op1) => + { + var floatValue = unsigned ? context.ConvertToFPUI(OperandType.FP32, op1) : context.ConvertToFP(OperandType.FP32, op1); + + return context.Multiply(floatValue, ConstF(1f / MathF.Pow(2f, fracBits))); + }, !unsigned); + } + } + public static void Vcvt_FD(ArmEmitterContext context) { OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; diff --git a/src/ARMeilleure/Translation/PTC/Ptc.cs b/src/ARMeilleure/Translation/PTC/Ptc.cs index 72b60fab0..14d4e471f 100644 --- a/src/ARMeilleure/Translation/PTC/Ptc.cs +++ b/src/ARMeilleure/Translation/PTC/Ptc.cs @@ -29,7 +29,7 @@ namespace ARMeilleure.Translation.PTC private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0"; - private const uint InternalVersion = 5292; //! To be incremented manually for each change to the ARMeilleure project. + private const uint InternalVersion = 5343; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1"; diff --git a/src/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs b/src/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs index aa68cded3..e9a8ad590 100644 --- a/src/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs +++ b/src/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs @@ -395,11 +395,11 @@ namespace Ryujinx.Tests.Cpu [Explicit] [Test, Pairwise, Description("VCVT.F.F16 , ")] public void Vcvt_F16_Fx([Values(0u, 1u, 2u, 3u)] uint rd, - [Values(0u, 1u, 2u, 3u)] uint rm, - [ValueSource(nameof(_1D_F_))] ulong d0, - [ValueSource(nameof(_1D_F_))] ulong d1, - [Values] bool top, - [Values] bool sz) + [Values(0u, 1u, 2u, 3u)] uint rm, + [ValueSource(nameof(_1D_F_))] ulong d0, + [ValueSource(nameof(_1D_F_))] ulong d1, + [Values] bool top, + [Values] bool sz) { uint opcode = 0xeeb20a40; // VCVTB.F32.F16 S0, S0 @@ -426,6 +426,86 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + + [Test, Pairwise, Description("VCVT.I32.F32 , , #")] + public void Vcvt_V_Fixed_F32_I32([Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0u, 1u, 2u, 3u)] uint vm, + [ValueSource(nameof(_1S_F_))][Random(RndCnt)] ulong s0, + [ValueSource(nameof(_1S_F_))][Random(RndCnt)] ulong s1, + [ValueSource(nameof(_1S_F_))][Random(RndCnt)] ulong s2, + [ValueSource(nameof(_1S_F_))][Random(RndCnt)] ulong s3, + [Random(32u, 63u, 1)] uint fixImm, + [Values] bool unsigned, + [Values] bool q) + { + uint opcode = 0xF2800F10u; // VCVT.U32.F32 D0, D0, #0 + + if (q) + { + opcode |= 1 << 6; + vm <<= 1; + vd <<= 1; + } + + if (unsigned) + { + opcode |= 1 << 24; + } + + opcode |= ((vm & 0x10) << 1); + opcode |= ((vm & 0xf) << 0); + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + opcode |= (fixImm & 0x3f) << 16; + + var v0 = new V128((uint)s0, (uint)s1, (uint)s2, (uint)s3); + + SingleOpcode(opcode, v0: v0); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VCVT.F32.I32 , , #")] + public void Vcvt_V_Fixed_I32_F32([Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0u, 1u, 2u, 3u)] uint vm, + [ValueSource(nameof(_1S_))][Random(RndCnt)] uint s0, + [ValueSource(nameof(_1S_))][Random(RndCnt)] uint s1, + [ValueSource(nameof(_1S_))][Random(RndCnt)] uint s2, + [ValueSource(nameof(_1S_))][Random(RndCnt)] uint s3, + [Range(32u, 63u, 1)] uint fixImm, + [Values] bool unsigned, + [Values] bool q) + { + uint opcode = 0xF2800E10u; // VCVT.F32.U32 D0, D0, #0 + + if (q) + { + opcode |= 1 << 6; + vm <<= 1; + vd <<= 1; + } + + if (unsigned) + { + opcode |= 1 << 24; + } + + opcode |= ((vm & 0x10) << 1); + opcode |= ((vm & 0xf) << 0); + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + opcode |= (fixImm & 0x3f) << 16; + + var v0 = new V128(s0, s1, s2, s3); + + SingleOpcode(opcode, v0: v0); + + CompareAgainstUnicorn(); + } #endif } } \ No newline at end of file