diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index 30dbaacd46..084ab24902 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -2334,6 +2334,16 @@ void ARM64FloatEmitter::EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, (1 << 11) | (DecodeReg(Rn) << 5) | DecodeReg(Rd)); } +void ARM64FloatEmitter::EmitExtract(u32 imm4, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + ASSERT_MSG(DYNA_REC, !IsSingle(Rd), "Singles are not supported!"); + + bool quad = IsQuad(Rd); + + Write32((quad << 30) | (23 << 25) | (op << 22) | (DecodeReg(Rm) << 16) | (imm4 << 11) | + (DecodeReg(Rn) << 5) | DecodeReg(Rd)); +} + void ARM64FloatEmitter::EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8) { ASSERT_MSG(DYNA_REC, !IsQuad(Rd), "Vector is not supported!"); @@ -3540,6 +3550,12 @@ void ARM64FloatEmitter::ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) EmitPermute(size, 0b111, Rd, Rn, Rm); } +// Extract +void ARM64FloatEmitter::EXT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 index) +{ + EmitExtract(index, 0, Rd, Rn, Rm); +} + // Scalar shift by immediate void ARM64FloatEmitter::SHL(ARM64Reg Rd, ARM64Reg Rn, u32 shift) { diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index bd2c4822d5..09a2633660 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -1247,6 +1247,9 @@ public: void TRN2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + // Extract + void EXT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 index); + // Scalar shift by immediate void SHL(ARM64Reg Rd, ARM64Reg Rn, u32 shift); void URSHR(ARM64Reg Rd, ARM64Reg Rn, u32 shift); @@ -1305,6 +1308,7 @@ private: void EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm); void EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void EmitExtract(u32 imm4, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8); void EmitShiftImm(bool Q, bool U, u32 imm, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); void EmitScalarShiftImm(bool U, u32 imm, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index 85d22f6183..239a235533 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -33,9 +33,9 @@ void JitArm64::ps_mergeXX(UGeckoInstruction inst) const u8 size = singles ? 32 : 64; const auto reg_encoder = singles ? EncodeRegToDouble : EncodeRegToQuad; - const ARM64Reg VA = fpr.R(a, type); - const ARM64Reg VB = fpr.R(b, type); - const ARM64Reg VD = fpr.RW(d, type); + const ARM64Reg VA = reg_encoder(fpr.R(a, type)); + const ARM64Reg VB = reg_encoder(fpr.R(b, type)); + const ARM64Reg VD = reg_encoder(fpr.RW(d, type)); switch (inst.SUBOP10) { @@ -43,23 +43,20 @@ void JitArm64::ps_mergeXX(UGeckoInstruction inst) m_float_emit.TRN1(size, VD, VA, VB); break; case 560: // 01 - m_float_emit.INS(size, VD, 0, VA, 0); - m_float_emit.INS(size, VD, 1, VB, 1); + if (d != b) + { + if (d != a) + m_float_emit.MOV(VD, VA); + if (a != b) + m_float_emit.INS(size, VD, 1, VB, 1); + } + else if (d != a) + { + m_float_emit.INS(size, VD, 0, VA, 0); + } break; case 592: // 10 - if (d != a && d != b) - { - m_float_emit.INS(size, VD, 0, VA, 1); - m_float_emit.INS(size, VD, 1, VB, 0); - } - else - { - ARM64Reg V0 = fpr.GetReg(); - m_float_emit.INS(size, V0, 0, VA, 1); - m_float_emit.INS(size, V0, 1, VB, 0); - m_float_emit.MOV(reg_encoder(VD), reg_encoder(V0)); - fpr.Unlock(V0); - } + m_float_emit.EXT(VD, VA, VB, size >> 3); break; case 624: // 11 m_float_emit.TRN2(size, VD, VA, VB);