From 8d8983049ea23af0600e077b6389e2cd5de74c38 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Thu, 8 Aug 2024 17:07:24 -0300 Subject: [PATCH] Implement UQADD16, UQADD8, UQSUB16, UQSUB8, VQRDMULH, VSLI and VSWP Arm32 instructions (#7174) --- src/ARMeilleure/Decoders/OpCodeTable.cs | 8 + src/ARMeilleure/Instructions/InstEmitAlu32.cs | 184 ++++++++++++++++++ .../Instructions/InstEmitSimdArithmetic32.cs | 27 +++ .../Instructions/InstEmitSimdMove32.cs | 20 ++ .../Instructions/InstEmitSimdShift32.cs | 30 +++ src/ARMeilleure/Instructions/InstName.cs | 7 + .../Arm32/Target/Arm64/InstEmitMove.cs | 1 - .../Arm32/Target/Arm64/InstEmitSaturate.cs | 32 +-- src/Ryujinx.Tests/Cpu/CpuTestAlu32.cs | 54 +++++ src/Ryujinx.Tests/Cpu/CpuTestSimd32.cs | 26 +++ src/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs | 33 ++++ src/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs | 36 +++- 12 files changed, 445 insertions(+), 13 deletions(-) diff --git a/src/ARMeilleure/Decoders/OpCodeTable.cs b/src/ARMeilleure/Decoders/OpCodeTable.cs index edc004125..859535670 100644 --- a/src/ARMeilleure/Decoders/OpCodeTable.cs +++ b/src/ARMeilleure/Decoders/OpCodeTable.cs @@ -822,6 +822,10 @@ namespace ARMeilleure.Decoders SetA32("<<<<00000100xxxxxxxxxxxx1001xxxx", InstName.Umaal, InstEmit32.Umaal, OpCode32AluUmull.Create); SetA32("<<<<0000101xxxxxxxxxxxxx1001xxxx", InstName.Umlal, InstEmit32.Umlal, OpCode32AluUmull.Create); SetA32("<<<<0000100xxxxxxxxxxxxx1001xxxx", InstName.Umull, InstEmit32.Umull, OpCode32AluUmull.Create); + SetA32("<<<<01100110xxxxxxxx11110001xxxx", InstName.Uqadd16, InstEmit32.Uqadd16, OpCode32AluReg.Create); + SetA32("<<<<01100110xxxxxxxx11111001xxxx", InstName.Uqadd8, InstEmit32.Uqadd8, OpCode32AluReg.Create); + SetA32("<<<<01100110xxxxxxxx11110111xxxx", InstName.Uqsub16, InstEmit32.Uqsub16, OpCode32AluReg.Create); + SetA32("<<<<01100110xxxxxxxx11111111xxxx", InstName.Uqsub8, InstEmit32.Uqsub8, OpCode32AluReg.Create); SetA32("<<<<0110111xxxxxxxxxxxxxxx01xxxx", InstName.Usat, InstEmit32.Usat, OpCode32Sat.Create); SetA32("<<<<01101110xxxxxxxx11110011xxxx", InstName.Usat16, InstEmit32.Usat16, OpCode32Sat16.Create); SetA32("<<<<01100101xxxxxxxx11111111xxxx", InstName.Usub8, InstEmit32.Usub8, OpCode32AluReg.Create); @@ -1007,6 +1011,8 @@ namespace ARMeilleure.Decoders SetAsimd("111100100x10xxxxxxxx1011xxx0xxxx", InstName.Vqdmulh, InstEmit32.Vqdmulh, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); SetAsimd("111100111x11<<10xxxx00101xx0xxx0", InstName.Vqmovn, InstEmit32.Vqmovn, OpCode32SimdMovn.Create, OpCode32SimdMovn.CreateT32); SetAsimd("111100111x11<<10xxxx001001x0xxx0", InstName.Vqmovun, InstEmit32.Vqmovun, OpCode32SimdMovn.Create, OpCode32SimdMovn.CreateT32); + SetAsimd("111100110x01xxxxxxxx1011xxx0xxxx", InstName.Vqrdmulh, InstEmit32.Vqrdmulh, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100110x10xxxxxxxx1011xxx0xxxx", InstName.Vqrdmulh, InstEmit32.Vqrdmulh, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); SetAsimd("1111001x1x>>>xxxxxxx100101x1xxx0", InstName.Vqrshrn, InstEmit32.Vqrshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); SetAsimd("111100111x>>>xxxxxxx100001x1xxx0", InstName.Vqrshrun, InstEmit32.Vqrshrun, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); SetAsimd("1111001x1x>>>xxxxxxx100100x1xxx0", InstName.Vqshrn, InstEmit32.Vqshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); @@ -1030,6 +1036,7 @@ namespace ARMeilleure.Decoders SetAsimd("1111001x1x>>>xxxxxxx101000x1xxxx", InstName.Vshll, InstEmit32.Vshll, OpCode32SimdShImmLong.Create, OpCode32SimdShImmLong.CreateT32); // A1 encoding. SetAsimd("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); SetAsimd("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); + SetAsimd("111100111x>>>xxxxxxx0101>xx1xxxx", InstName.Vsli, InstEmit32.Vsli_I, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); SetAsimd("1111001x1x>>>xxxxxxx0001>xx1xxxx", InstName.Vsra, InstEmit32.Vsra, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); SetAsimd("111101001x00xxxxxxxx0000xxx0xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); SetAsimd("111101001x00xxxxxxxx0100xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); @@ -1054,6 +1061,7 @@ namespace ARMeilleure.Decoders SetAsimd("111100100x10xxxxxxxx1101xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); SetAsimd("1111001x1x< + { + EmitSaturateUqadd(context, d, context.Add(n, m), 16); + })); + } + + public static void Uqadd8(ArmEmitterContext context) + { + OpCode32AluReg op = (OpCode32AluReg)context.CurrOp; + + SetIntA32(context, op.Rd, EmitUnsigned8BitPair(context, GetIntA32(context, op.Rn), GetIntA32(context, op.Rm), (d, n, m) => + { + EmitSaturateUqadd(context, d, context.Add(n, m), 8); + })); + } + + public static void Uqsub16(ArmEmitterContext context) + { + OpCode32AluReg op = (OpCode32AluReg)context.CurrOp; + + SetIntA32(context, op.Rd, EmitUnsigned16BitPair(context, GetIntA32(context, op.Rn), GetIntA32(context, op.Rm), (d, n, m) => + { + EmitSaturateUqsub(context, d, context.Subtract(n, m), 16); + })); + } + + public static void Uqsub8(ArmEmitterContext context) + { + OpCode32AluReg op = (OpCode32AluReg)context.CurrOp; + + SetIntA32(context, op.Rd, EmitUnsigned8BitPair(context, GetIntA32(context, op.Rn), GetIntA32(context, op.Rm), (d, n, m) => + { + EmitSaturateUqsub(context, d, context.Subtract(n, m), 8); + })); + } + public static void Usat(ArmEmitterContext context) { OpCode32Sat op = (OpCode32Sat)context.CurrOp; @@ -934,6 +976,148 @@ namespace ARMeilleure.Instructions } } + private static void EmitSaturateUqadd(ArmEmitterContext context, Operand result, Operand value, uint saturateTo) + { + Debug.Assert(saturateTo <= 32); + + if (saturateTo == 32) + { + // No saturation possible for this case. + + context.Copy(result, value); + + return; + } + else if (saturateTo == 0) + { + // Result is always zero if we saturate 0 bits. + + context.Copy(result, Const(0)); + + return; + } + + // If the result is 0, the values are equal and we don't need saturation. + Operand lblNoSat = Label(); + context.BranchIfFalse(lblNoSat, context.ShiftRightUI(value, Const((int)saturateTo))); + + // Saturate. + context.Copy(result, Const(uint.MaxValue >> (32 - (int)saturateTo))); + + Operand lblExit = Label(); + context.Branch(lblExit); + + context.MarkLabel(lblNoSat); + + context.Copy(result, value); + + context.MarkLabel(lblExit); + } + + private static void EmitSaturateUqsub(ArmEmitterContext context, Operand result, Operand value, uint saturateTo) + { + Debug.Assert(saturateTo <= 32); + + if (saturateTo == 32) + { + // No saturation possible for this case. + + context.Copy(result, value); + + return; + } + else if (saturateTo == 0) + { + // Result is always zero if we saturate 0 bits. + + context.Copy(result, Const(0)); + + return; + } + + // If the result is 0, the values are equal and we don't need saturation. + Operand lblNoSat = Label(); + context.BranchIf(lblNoSat, value, Const(0), Comparison.GreaterOrEqual); + + // Saturate. + // Assumes that the value can only underflow, since this is only used for unsigned subtraction. + context.Copy(result, Const(0)); + + Operand lblExit = Label(); + context.Branch(lblExit); + + context.MarkLabel(lblNoSat); + + context.Copy(result, value); + + context.MarkLabel(lblExit); + } + + private static Operand EmitUnsigned16BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action elementAction) + { + Operand tempD = context.AllocateLocal(OperandType.I32); + + Operand tempN = context.ZeroExtend16(OperandType.I32, rn); + Operand tempM = context.ZeroExtend16(OperandType.I32, rm); + elementAction(tempD, tempN, tempM); + Operand tempD2 = context.ZeroExtend16(OperandType.I32, tempD); + + tempN = context.ShiftRightUI(rn, Const(16)); + tempM = context.ShiftRightUI(rm, Const(16)); + elementAction(tempD, tempN, tempM); + return context.BitwiseOr(tempD2, context.ShiftLeft(tempD, Const(16))); + } + + private static Operand EmitSigned8BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action elementAction) + { + return Emit8BitPair(context, rn, rm, elementAction, unsigned: false); + } + + private static Operand EmitUnsigned8BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action elementAction) + { + return Emit8BitPair(context, rn, rm, elementAction, unsigned: true); + } + + private static Operand Emit8BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action elementAction, bool unsigned) + { + Operand tempD = context.AllocateLocal(OperandType.I32); + Operand result = default; + + for (int b = 0; b < 4; b++) + { + Operand nByte = b != 0 ? context.ShiftRightUI(rn, Const(b * 8)) : rn; + Operand mByte = b != 0 ? context.ShiftRightUI(rm, Const(b * 8)) : rm; + + if (unsigned) + { + nByte = context.ZeroExtend8(OperandType.I32, nByte); + mByte = context.ZeroExtend8(OperandType.I32, mByte); + } + else + { + nByte = context.SignExtend8(OperandType.I32, nByte); + mByte = context.SignExtend8(OperandType.I32, mByte); + } + + elementAction(tempD, nByte, mByte); + + if (b == 0) + { + result = context.ZeroExtend8(OperandType.I32, tempD); + } + else if (b < 3) + { + result = context.BitwiseOr(result, context.ShiftLeft(context.ZeroExtend8(OperandType.I32, tempD), Const(b * 8))); + } + else + { + result = context.BitwiseOr(result, context.ShiftLeft(tempD, Const(24))); + } + } + + return result; + } + private static void EmitAluStore(ArmEmitterContext context, Operand value) { IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; diff --git a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs index dc2646a55..c807fc858 100644 --- a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs +++ b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs @@ -1246,6 +1246,33 @@ namespace ARMeilleure.Instructions EmitVectorUnaryNarrowOp32(context, (op1) => EmitSatQ(context, op1, 8 << op.Size, signedSrc: true, signedDst: false), signed: true); } + public static void Vqrdmulh(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + int eSize = 8 << op.Size; + + EmitVectorBinaryOpI32(context, (op1, op2) => + { + if (op.Size == 2) + { + op1 = context.SignExtend32(OperandType.I64, op1); + op2 = context.SignExtend32(OperandType.I64, op2); + } + + Operand res = context.Multiply(op1, op2); + res = context.Add(res, Const(res.Type, 1L << (eSize - 2))); + res = context.ShiftRightSI(res, Const(eSize - 1)); + res = EmitSatQ(context, res, eSize, signedSrc: true, signedDst: true); + + if (op.Size == 2) + { + res = context.ConvertI64ToI32(res); + } + + return res; + }, signed: true); + } + public static void Vqsub(ArmEmitterContext context) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; diff --git a/src/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/src/ARMeilleure/Instructions/InstEmitSimdMove32.cs index 9fa740997..fb2641f66 100644 --- a/src/ARMeilleure/Instructions/InstEmitSimdMove32.cs +++ b/src/ARMeilleure/Instructions/InstEmitSimdMove32.cs @@ -191,6 +191,26 @@ namespace ARMeilleure.Instructions context.Copy(GetVecA32(op.Qd), res); } + public static void Vswp(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.Q) + { + Operand temp = context.Copy(GetVecA32(op.Qd)); + + context.Copy(GetVecA32(op.Qd), GetVecA32(op.Qm)); + context.Copy(GetVecA32(op.Qm), temp); + } + else + { + Operand temp = ExtractScalar(context, OperandType.I64, op.Vd); + + InsertScalar(context, op.Vd, ExtractScalar(context, OperandType.I64, op.Vm)); + InsertScalar(context, op.Vm, temp); + } + } + public static void Vtbl(ArmEmitterContext context) { OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp; diff --git a/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs b/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs index e40600a47..e9e3b52b9 100644 --- a/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs +++ b/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs @@ -130,6 +130,36 @@ namespace ARMeilleure.Instructions EmitVectorUnaryNarrowOp32(context, (op1) => context.ShiftRightUI(op1, Const(shift))); } + public static void Vsli_I(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + int shift = op.Shift; + int eSize = 8 << op.Size; + + ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0UL; + + Operand res = GetVec(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand me = EmitVectorExtractZx(context, op.Qm, op.Im + index, op.Size); + + Operand neShifted = context.ShiftLeft(me, Const(shift)); + + Operand de = EmitVectorExtractZx(context, op.Qd, op.Id + index, op.Size); + + Operand deMasked = context.BitwiseAnd(de, Const(mask)); + + Operand e = context.BitwiseOr(neShifted, deMasked); + + res = EmitVectorInsert(context, res, e, op.Id + index, op.Size); + } + + context.Copy(GetVec(op.Qd), res); + } + public static void Vsra(ArmEmitterContext context) { OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; diff --git a/src/ARMeilleure/Instructions/InstName.cs b/src/ARMeilleure/Instructions/InstName.cs index 457abbf49..ac85412d1 100644 --- a/src/ARMeilleure/Instructions/InstName.cs +++ b/src/ARMeilleure/Instructions/InstName.cs @@ -571,6 +571,10 @@ namespace ARMeilleure.Instructions Umaal, Umlal, Umull, + Uqadd16, + Uqadd8, + Uqsub16, + Uqsub8, Usat, Usat16, Usub8, @@ -645,6 +649,7 @@ namespace ARMeilleure.Instructions Vqdmulh, Vqmovn, Vqmovun, + Vqrdmulh, Vqrshrn, Vqrshrun, Vqshrn, @@ -666,6 +671,7 @@ namespace ARMeilleure.Instructions Vshll, Vshr, Vshrn, + Vsli, Vst1, Vst2, Vst3, @@ -682,6 +688,7 @@ namespace ARMeilleure.Instructions Vsub, Vsubl, Vsubw, + Vswp, Vtbl, Vtrn, Vtst, diff --git a/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitMove.cs b/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitMove.cs index 88850cb33..d57750fc1 100644 --- a/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitMove.cs +++ b/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitMove.cs @@ -1,6 +1,5 @@ using Ryujinx.Cpu.LightningJit.CodeGen; using Ryujinx.Cpu.LightningJit.CodeGen.Arm64; -using System.Diagnostics; namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64 { diff --git a/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitSaturate.cs b/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitSaturate.cs index e2354f448..f1b6e395b 100644 --- a/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitSaturate.cs +++ b/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitSaturate.cs @@ -114,7 +114,7 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64 InstEmitCommon.EmitUnsigned16BitPair(context, rd, rn, rm, (d, n, m) => { context.Arm64Assembler.Add(d, n, m); - EmitSaturateUnsignedRange(context, d, 16); + EmitSaturateUqadd(context, d, 16); }); } @@ -123,7 +123,7 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64 InstEmitCommon.EmitUnsigned8BitPair(context, rd, rn, rm, (d, n, m) => { context.Arm64Assembler.Add(d, n, m); - EmitSaturateUnsignedRange(context, d, 8); + EmitSaturateUqadd(context, d, 8); }); } @@ -140,7 +140,7 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64 context.Arm64Assembler.Add(d, n, m); } - EmitSaturateUnsignedRange(context, d, 16); + EmitSaturateUq(context, d, 16, e == 0); }); } @@ -157,25 +157,25 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64 context.Arm64Assembler.Sub(d, n, m); } - EmitSaturateUnsignedRange(context, d, 16); + EmitSaturateUq(context, d, 16, e != 0); }); } public static void Uqsub16(CodeGenContext context, uint rd, uint rn, uint rm) { - InstEmitCommon.EmitSigned16BitPair(context, rd, rn, rm, (d, n, m) => + InstEmitCommon.EmitUnsigned16BitPair(context, rd, rn, rm, (d, n, m) => { context.Arm64Assembler.Sub(d, n, m); - EmitSaturateUnsignedRange(context, d, 16); + EmitSaturateUqsub(context, d, 16); }); } public static void Uqsub8(CodeGenContext context, uint rd, uint rn, uint rm) { - InstEmitCommon.EmitSigned8BitPair(context, rd, rn, rm, (d, n, m) => + InstEmitCommon.EmitUnsigned8BitPair(context, rd, rn, rm, (d, n, m) => { context.Arm64Assembler.Sub(d, n, m); - EmitSaturateUnsignedRange(context, d, 8); + EmitSaturateUqsub(context, d, 8); }); } @@ -358,7 +358,17 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64 } } - private static void EmitSaturateUnsignedRange(CodeGenContext context, Operand value, uint saturateTo) + private static void EmitSaturateUqadd(CodeGenContext context, Operand value, uint saturateTo) + { + EmitSaturateUq(context, value, saturateTo, isSub: false); + } + + private static void EmitSaturateUqsub(CodeGenContext context, Operand value, uint saturateTo) + { + EmitSaturateUq(context, value, saturateTo, isSub: true); + } + + private static void EmitSaturateUq(CodeGenContext context, Operand value, uint saturateTo, bool isSub) { Debug.Assert(saturateTo <= 32); @@ -379,7 +389,7 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64 return; } - context.Arm64Assembler.Lsr(tempRegister.Operand, value, InstEmitCommon.Const(32 - (int)saturateTo)); + context.Arm64Assembler.Lsr(tempRegister.Operand, value, InstEmitCommon.Const((int)saturateTo)); int branchIndex = context.CodeWriter.InstructionPointer; @@ -387,7 +397,7 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64 context.Arm64Assembler.Cbz(tempRegister.Operand, 0); // Saturate. - context.Arm64Assembler.Mov(value, uint.MaxValue >> (32 - (int)saturateTo)); + context.Arm64Assembler.Mov(value, isSub ? 0u : uint.MaxValue >> (32 - (int)saturateTo)); int delta = context.CodeWriter.InstructionPointer - branchIndex; context.CodeWriter.WriteInstructionAt(branchIndex, context.CodeWriter.ReadInstructionAt(branchIndex) | (uint)((delta & 0x7ffff) << 5)); diff --git a/src/Ryujinx.Tests/Cpu/CpuTestAlu32.cs b/src/Ryujinx.Tests/Cpu/CpuTestAlu32.cs index 41365c624..132ddfd0e 100644 --- a/src/Ryujinx.Tests/Cpu/CpuTestAlu32.cs +++ b/src/Ryujinx.Tests/Cpu/CpuTestAlu32.cs @@ -25,6 +25,24 @@ namespace Ryujinx.Tests.Cpu }; } + private static uint[] UQAddSub16() + { + return new[] + { + 0xe6600f10u, // UQADD16 R0, R0, R0 + 0xe6600f70u, // UQSUB16 R0, R0, R0 + }; + } + + private static uint[] UQAddSub8() + { + return new[] + { + 0xe6600f90u, // UQADD8 R0, R0, R0 + 0xe6600ff0u, // UQSUB8 R0, R0, R0 + }; + } + private static uint[] SsatUsat() { return new[] @@ -182,6 +200,42 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise] + public void U_Q_AddSub_16([ValueSource(nameof(UQAddSub16))] uint opcode, + [Values(0u, 0xdu)] uint rd, + [Values(1u)] uint rm, + [Values(2u)] uint rn, + [Random(RndCnt)] uint w0, + [Random(RndCnt)] uint w1, + [Random(RndCnt)] uint w2) + { + opcode |= ((rm & 15) << 0) | ((rd & 15) << 12) | ((rn & 15) << 16); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r0: w0, r1: w1, r2: w2, sp: sp); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void U_Q_AddSub_8([ValueSource(nameof(UQAddSub8))] uint opcode, + [Values(0u, 0xdu)] uint rd, + [Values(1u)] uint rm, + [Values(2u)] uint rn, + [Random(RndCnt)] uint w0, + [Random(RndCnt)] uint w1, + [Random(RndCnt)] uint w2) + { + opcode |= ((rm & 15) << 0) | ((rd & 15) << 12) | ((rn & 15) << 16); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r0: w0, r1: w1, r2: w2, sp: sp); + + CompareAgainstUnicorn(); + } + [Test, Pairwise] public void Uadd8_Sel([Values(0u)] uint rd, [Values(1u)] uint rm, diff --git a/src/Ryujinx.Tests/Cpu/CpuTestSimd32.cs b/src/Ryujinx.Tests/Cpu/CpuTestSimd32.cs index 6087a6834..f843fd561 100644 --- a/src/Ryujinx.Tests/Cpu/CpuTestSimd32.cs +++ b/src/Ryujinx.Tests/Cpu/CpuTestSimd32.cs @@ -327,6 +327,32 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + + [Test, Pairwise, Description("VSWP D0, D0")] + public void Vswp([Values(0u, 1u)] uint rd, + [Values(0u, 1u)] uint rm, + [Values] bool q) + { + uint opcode = 0xf3b20000u; // VSWP D0, D0 + + if (q) + { + opcode |= 1u << 6; + + rd &= ~1u; + rm &= ~1u; + } + + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + + V128 v0 = new(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v1 = new(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + SingleOpcode(opcode, v0: v0, v1: v1); + + CompareAgainstUnicorn(); + } #endif } } diff --git a/src/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs b/src/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs index 38e08bf89..843273dc2 100644 --- a/src/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs +++ b/src/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs @@ -909,6 +909,39 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise, Description("VQRDMULH. , , ")] + public void Vqrdmulh_I([Range(0u, 5u)] uint rd, + [Range(0u, 5u)] uint rn, + [Range(0u, 5u)] uint rm, + [ValueSource(nameof(_8B4H2S1D_))] ulong z, + [ValueSource(nameof(_8B4H2S1D_))] ulong a, + [ValueSource(nameof(_8B4H2S1D_))] ulong b, + [Values(1u, 2u)] uint size) // + { + rd >>= 1; + rd <<= 1; + rn >>= 1; + rn <<= 1; + rm >>= 1; + rm <<= 1; + + uint opcode = 0xf3100b40u & ~(3u << 20); // VQRDMULH.S16 Q0, Q0, Q0 + + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + + opcode |= (size & 0x3) << 20; + + V128 v0 = MakeVectorE0E1(z, ~z); + V128 v1 = MakeVectorE0E1(a, ~a); + V128 v2 = MakeVectorE0E1(b, ~b); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + [Test, Pairwise] public void Vp_Add_Long_Accumulate([Values(0u, 2u, 4u, 8u)] uint rd, [Values(0u, 2u, 4u, 8u)] uint rm, diff --git a/src/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs b/src/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs index 39b50867f..7375f4d55 100644 --- a/src/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs +++ b/src/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs @@ -202,7 +202,7 @@ namespace Ryujinx.Tests.Cpu } [Test, Pairwise, Description("VSHL. {}, , #")] - public void Vshl_Imm([Values(0u)] uint rd, + public void Vshl_Imm([Values(0u, 1u)] uint rd, [Values(2u, 0u)] uint rm, [Values(0u, 1u, 2u, 3u)] uint size, [Random(RndCntShiftImm)] uint shiftImm, @@ -262,6 +262,40 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise, Description("VSLI. {}, , #")] + public void Vsli([Values(0u, 1u)] uint rd, + [Values(2u, 0u)] uint rm, + [Values(0u, 1u, 2u, 3u)] uint size, + [Random(RndCntShiftImm)] uint shiftImm, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b, + [Values] bool q) + { + uint opcode = 0xf3800510u; // VORR.I32 D0, #0x800000 (immediate value changes it into SLI) + if (q) + { + opcode |= 1 << 6; + rm <<= 1; + rd <<= 1; + } + + uint imm = 1u << ((int)size + 3); + imm |= shiftImm & (imm - 1); + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((imm & 0x3f) << 16) | ((imm & 0x40) << 1); + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + [Test, Pairwise] public void Vqshrn_Vqrshrn_Vrshrn_Imm([ValueSource(nameof(_Vqshrn_Vqrshrn_Vrshrn_Imm_))] uint opcode, [Values(0u, 1u)] uint rd,