diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 9f5e830f18..020e72fa0b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -76,23 +76,13 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, if (flags & BackPatchInfo::FLAG_SIZE_F32) { m_float_emit.LDR(32, EncodeRegToDouble(RS), X28, addr); - m_float_emit.INS(32, RS, 1, RS, 0); m_float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); m_float_emit.FCVTL(64, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); } else { - if (flags & BackPatchInfo::FLAG_ONLY_LOWER) - { - m_float_emit.LDR(64, EncodeRegToDouble(RS), X28, addr); - m_float_emit.REV64(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); - } - else - { - m_float_emit.LDR(64, Q0, X28, addr); - m_float_emit.REV64(8, D0, D0); - m_float_emit.INS(64, RS, 0, Q0, 0); - } + m_float_emit.LDR(64, EncodeRegToDouble(RS), X28, addr); + m_float_emit.REV64(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); } } else if (flags & BackPatchInfo::FLAG_STORE) @@ -142,7 +132,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, handler.addr_reg = addr; handler.gprs = gprs_to_push; handler.fprs = fprs_to_push; - handler.flags = flags & ~BackPatchInfo::FLAG_ONLY_LOWER; + handler.flags = flags; FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_start]; auto handler_loc_iter = m_handler_to_loc.find(handler); @@ -199,7 +189,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, { MOVI2R(X30, (u64)&PowerPC::Read_U32); BLR(X30); - m_float_emit.DUP(32, RS, X0); + m_float_emit.INS(32, RS, 0, X0); m_float_emit.FCVTL(64, RS, RS); } else diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 4ee267f55c..099f97b710 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -24,7 +24,7 @@ void JitArm64::fabsx(UGeckoInstruction inst) u32 b = inst.FB, d = inst.FD; fpr.BindToRegister(d, d == b); - ARM64Reg VB = fpr.R(b); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); ARM64Reg VD = fpr.R(d); m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); @@ -37,14 +37,13 @@ void JitArm64::faddsx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b, false); + fpr.BindToRegister(d, d == a || d == b, REG_DUP); - ARM64Reg VA = fpr.R(a); - ARM64Reg VB = fpr.R(b); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_IS_LOADED); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); + ARM64Reg VD = fpr.R(d, REG_DUP); m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB)); - m_float_emit.INS(64, VD, 1, VD, 0); } void JitArm64::faddx(UGeckoInstruction inst) @@ -56,8 +55,8 @@ void JitArm64::faddx(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB, d = inst.FD; fpr.BindToRegister(d, d == a || d == b); - ARM64Reg VA = fpr.R(a); - ARM64Reg VB = fpr.R(b); + ARM64Reg VA = fpr.R(a, REG_IS_LOADED); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); ARM64Reg VD = fpr.R(d); m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB)); @@ -70,17 +69,17 @@ void JitArm64::fmaddsx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b || d == c, false); + fpr.BindToRegister(d, d == a || d == b || d == c, REG_DUP); - ARM64Reg VA = fpr.R(a); - ARM64Reg VB = fpr.R(b); - ARM64Reg VC = fpr.R(c); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_IS_LOADED); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); + ARM64Reg VC = fpr.R(c, REG_IS_LOADED); + ARM64Reg VD = fpr.R(d, REG_DUP); ARM64Reg V0 = fpr.GetReg(); m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB)); - m_float_emit.INS(64, VD, 1, VD, 0); + fpr.Unlock(V0); } @@ -93,9 +92,9 @@ void JitArm64::fmaddx(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; fpr.BindToRegister(d, d == a || d == b || d == c); - ARM64Reg VA = fpr.R(a); - ARM64Reg VB = fpr.R(b); - ARM64Reg VC = fpr.R(c); + ARM64Reg VA = fpr.R(a, REG_IS_LOADED); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); + ARM64Reg VC = fpr.R(c, REG_IS_LOADED); ARM64Reg VD = fpr.R(d); m_float_emit.FMADD(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB)); @@ -110,7 +109,7 @@ void JitArm64::fmrx(UGeckoInstruction inst) u32 b = inst.FB, d = inst.FD; fpr.BindToRegister(d, d == b); - ARM64Reg VB = fpr.R(b); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); ARM64Reg VD = fpr.R(d); m_float_emit.INS(64, VD, 0, VB, 0); @@ -123,17 +122,17 @@ void JitArm64::fmsubsx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b || d == c, false); + fpr.BindToRegister(d, d == a || d == b || d == c, REG_DUP); - ARM64Reg VA = fpr.R(a); - ARM64Reg VB = fpr.R(b); - ARM64Reg VC = fpr.R(c); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_IS_LOADED); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); + ARM64Reg VC = fpr.R(c, REG_IS_LOADED); + ARM64Reg VD = fpr.R(d, REG_DUP); ARM64Reg V0 = fpr.GetReg(); m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB)); - m_float_emit.INS(64, VD, 1, VD, 0); + fpr.Unlock(V0); } @@ -146,9 +145,9 @@ void JitArm64::fmsubx(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; fpr.BindToRegister(d, d == a || d == b || d == c); - ARM64Reg VA = fpr.R(a); - ARM64Reg VB = fpr.R(b); - ARM64Reg VC = fpr.R(c); + ARM64Reg VA = fpr.R(a, REG_IS_LOADED); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); + ARM64Reg VC = fpr.R(c, REG_IS_LOADED); ARM64Reg VD = fpr.R(d); m_float_emit.FNMSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB)); @@ -161,14 +160,13 @@ void JitArm64::fmulsx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == c, false); + fpr.BindToRegister(d, d == a || d == c, REG_DUP); - ARM64Reg VA = fpr.R(a); - ARM64Reg VC = fpr.R(c); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_IS_LOADED); + ARM64Reg VC = fpr.R(c, REG_IS_LOADED); + ARM64Reg VD = fpr.R(d, REG_DUP); m_float_emit.FMUL(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); - m_float_emit.INS(64, VD, 1, VD, 0); } void JitArm64::fmulx(UGeckoInstruction inst) @@ -180,8 +178,8 @@ void JitArm64::fmulx(UGeckoInstruction inst) u32 a = inst.FA, c = inst.FC, d = inst.FD; fpr.BindToRegister(d, d == a || d == c); - ARM64Reg VA = fpr.R(a); - ARM64Reg VC = fpr.R(c); + ARM64Reg VA = fpr.R(a, REG_IS_LOADED); + ARM64Reg VC = fpr.R(c, REG_IS_LOADED); ARM64Reg VD = fpr.R(d); m_float_emit.FMUL(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); @@ -196,7 +194,7 @@ void JitArm64::fnabsx(UGeckoInstruction inst) u32 b = inst.FB, d = inst.FD; fpr.BindToRegister(d, d == b); - ARM64Reg VB = fpr.R(b); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); ARM64Reg VD = fpr.R(d); m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); @@ -212,7 +210,7 @@ void JitArm64::fnegx(UGeckoInstruction inst) u32 b = inst.FB, d = inst.FD; fpr.BindToRegister(d, d == b); - ARM64Reg VB = fpr.R(b); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); ARM64Reg VD = fpr.R(d); m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); @@ -225,18 +223,18 @@ void JitArm64::fnmaddsx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b || d == c, false); + fpr.BindToRegister(d, d == a || d == b || d == c, REG_DUP); - ARM64Reg VA = fpr.R(a); - ARM64Reg VB = fpr.R(b); - ARM64Reg VC = fpr.R(c); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_IS_LOADED); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); + ARM64Reg VC = fpr.R(c, REG_IS_LOADED); + ARM64Reg VD = fpr.R(d, REG_DUP); ARM64Reg V0 = fpr.GetReg(); m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB)); m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD)); - m_float_emit.INS(64, VD, 1, VD, 0); + fpr.Unlock(V0); } @@ -249,9 +247,9 @@ void JitArm64::fnmaddx(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; fpr.BindToRegister(d, d == a || d == b || d == c); - ARM64Reg VA = fpr.R(a); - ARM64Reg VB = fpr.R(b); - ARM64Reg VC = fpr.R(c); + ARM64Reg VA = fpr.R(a, REG_IS_LOADED); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); + ARM64Reg VC = fpr.R(c, REG_IS_LOADED); ARM64Reg VD = fpr.R(d); m_float_emit.FNMADD(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB)); @@ -264,18 +262,18 @@ void JitArm64::fnmsubsx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b || d == c, false); + fpr.BindToRegister(d, d == a || d == b || d == c, REG_DUP); - ARM64Reg VA = fpr.R(a); - ARM64Reg VB = fpr.R(b); - ARM64Reg VC = fpr.R(c); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_IS_LOADED); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); + ARM64Reg VC = fpr.R(c, REG_IS_LOADED); + ARM64Reg VD = fpr.R(d, REG_DUP); ARM64Reg V0 = fpr.GetReg(); m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB)); m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD)); - m_float_emit.INS(64, VD, 1, VD, 0); + fpr.Unlock(V0); } @@ -288,9 +286,9 @@ void JitArm64::fnmsubx(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; fpr.BindToRegister(d, d == a || d == b || d == c); - ARM64Reg VA = fpr.R(a); - ARM64Reg VB = fpr.R(b); - ARM64Reg VC = fpr.R(c); + ARM64Reg VA = fpr.R(a, REG_IS_LOADED); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); + ARM64Reg VC = fpr.R(c, REG_IS_LOADED); ARM64Reg VD = fpr.R(d); m_float_emit.FMSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB)); @@ -305,9 +303,9 @@ void JitArm64::fselx(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; fpr.BindToRegister(d, d == a || d == b || d == c); - ARM64Reg VD = fpr.R(d); - ARM64Reg VA = fpr.R(a); - ARM64Reg VB = fpr.R(b); + ARM64Reg VD = fpr.R(d, REG_IS_LOADED); + ARM64Reg VA = fpr.R(a, REG_IS_LOADED); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); ARM64Reg VC = fpr.R(c); m_float_emit.FCMPE(EncodeRegToDouble(VA)); @@ -321,14 +319,13 @@ void JitArm64::fsubsx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b, false); + fpr.BindToRegister(d, d == a || d == b, REG_DUP); - ARM64Reg VA = fpr.R(a); - ARM64Reg VB = fpr.R(b); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_IS_LOADED); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); + ARM64Reg VD = fpr.R(d, REG_DUP); m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB)); - m_float_emit.INS(64, VD, 1, VD, 0); } void JitArm64::fsubx(UGeckoInstruction inst) @@ -340,8 +337,8 @@ void JitArm64::fsubx(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB, d = inst.FD; fpr.BindToRegister(d, d == a || d == b); - ARM64Reg VA = fpr.R(a); - ARM64Reg VB = fpr.R(b); + ARM64Reg VA = fpr.R(a, REG_IS_LOADED); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); ARM64Reg VD = fpr.R(d); m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB)); @@ -353,14 +350,13 @@ void JitArm64::frspx(UGeckoInstruction inst) JITDISABLE(bJITFloatingPointOff); u32 b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, d == b, false); + fpr.BindToRegister(d, d == b, REG_DUP); - ARM64Reg VB = fpr.R(b); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); + ARM64Reg VD = fpr.R(d, REG_DUP); m_float_emit.FCVTN(32, EncodeRegToDouble(VD), EncodeRegToDouble(VB)); m_float_emit.FCVTL(64, EncodeRegToDouble(VD), EncodeRegToDouble(VD)); - m_float_emit.INS(64, VD, 1, VD, 0); } void JitArm64::fcmpx(UGeckoInstruction inst) @@ -371,8 +367,8 @@ void JitArm64::fcmpx(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB; int crf = inst.CRFD; - ARM64Reg VA = fpr.R(a); - ARM64Reg VB = fpr.R(b); + ARM64Reg VA = fpr.R(a, REG_IS_LOADED); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); ARM64Reg WA = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); @@ -457,7 +453,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst) u32 b = inst.FB, d = inst.FD; fpr.BindToRegister(d, d == b); - ARM64Reg VB = fpr.R(b); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); ARM64Reg VD = fpr.R(d); ARM64Reg V0 = fpr.GetReg(); @@ -481,8 +477,8 @@ void JitArm64::fdivx(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB, d = inst.FD; fpr.BindToRegister(d, d == a || d == b); - ARM64Reg VA = fpr.R(a); - ARM64Reg VB = fpr.R(b); + ARM64Reg VA = fpr.R(a, REG_IS_LOADED); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); ARM64Reg VD = fpr.R(d); m_float_emit.FDIV(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB)); @@ -495,12 +491,11 @@ void JitArm64::fdivsx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b, false); + fpr.BindToRegister(d, d == a || d == b, REG_DUP); - ARM64Reg VA = fpr.R(a); - ARM64Reg VB = fpr.R(b); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_IS_LOADED); + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); + ARM64Reg VD = fpr.R(d, REG_DUP); m_float_emit.FDIV(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB)); - m_float_emit.INS(64, VD, 1, VD, 0); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 31bae8de51..5d5f7a750f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -71,19 +71,13 @@ void JitArm64::lfXX(UGeckoInstruction inst) u32 imm_addr = 0; bool is_immediate = false; - bool only_lower = !!(flags & BackPatchInfo::FLAG_SIZE_F64); + RegType type = !!(flags & BackPatchInfo::FLAG_SIZE_F64) ? REG_LOWER_PAIR : REG_DUP; - fpr.BindToRegister(inst.FD, false, only_lower); + fpr.BindToRegister(inst.FD, false, type); - ARM64Reg VD = fpr.R(inst.FD, only_lower); + ARM64Reg VD = fpr.R(inst.FD, type); ARM64Reg addr_reg = W0; - if (!fpr.IsLower(inst.FD)) - only_lower = false; - - if (only_lower) - flags |= BackPatchInfo::FLAG_ONLY_LOWER; - gpr.Lock(W0, W30); fpr.Lock(Q0); @@ -270,7 +264,7 @@ void JitArm64::stfXX(UGeckoInstruction inst) u32 imm_addr = 0; bool is_immediate = false; - ARM64Reg V0 = fpr.R(inst.FS); + ARM64Reg V0 = fpr.R(inst.FS, REG_IS_LOADED); ARM64Reg addr_reg = W1; gpr.Lock(W0, W1, W30); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 8bf46e17d4..c31ec67615 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -58,7 +58,7 @@ void JitArm64::psq_l(UGeckoInstruction inst) if (update) { - gpr.BindToRegister(inst.RA, false); + gpr.BindToRegister(inst.RA, REG_REG); MOV(arm_addr, addr_reg); } @@ -66,8 +66,8 @@ void JitArm64::psq_l(UGeckoInstruction inst) LDR(X30, X30, ArithOption(EncodeRegTo64(type_reg), true)); BLR(X30); - fpr.BindToRegister(inst.RS, false, false); - ARM64Reg VS = fpr.R(inst.RS, false); + fpr.BindToRegister(inst.RS, false, REG_REG); + ARM64Reg VS = fpr.R(inst.RS, REG_REG); m_float_emit.FCVTL(64, VS, D0); if (inst.W) { @@ -97,7 +97,7 @@ void JitArm64::psq_st(UGeckoInstruction inst) fpr.Lock(Q0, Q1); ARM64Reg arm_addr = gpr.R(inst.RA); - ARM64Reg VS = fpr.R(inst.RS, false); + ARM64Reg VS = fpr.R(inst.RS, REG_REG); ARM64Reg scale_reg = W0; ARM64Reg addr_reg = W1; @@ -129,7 +129,7 @@ void JitArm64::psq_st(UGeckoInstruction inst) if (update) { - gpr.BindToRegister(inst.RA, false); + gpr.BindToRegister(inst.RA, REG_REG); MOV(arm_addr, addr_reg); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index ee27a5caf8..cff1f49ce9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -23,10 +23,10 @@ void JitArm64::ps_abs(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, d == b, false); + fpr.BindToRegister(d, d == b, REG_REG); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); m_float_emit.FABS(64, VD, VB); } @@ -38,11 +38,11 @@ void JitArm64::ps_add(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b, false); + fpr.BindToRegister(d, d == a || d == b, REG_REG); - ARM64Reg VA = fpr.R(a, false); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); m_float_emit.FADD(64, VD, VA, VB); } @@ -54,11 +54,11 @@ void JitArm64::ps_div(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b, false); + fpr.BindToRegister(d, d == a || d == b, REG_REG); - ARM64Reg VA = fpr.R(a, false); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); m_float_emit.FDIV(64, VD, VA, VB); } @@ -70,12 +70,12 @@ void JitArm64::ps_madd(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b || d == c, false); + fpr.BindToRegister(d, d == a || d == b || d == c, REG_REG); - ARM64Reg VA = fpr.R(a, false); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VC = fpr.R(c, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VC = fpr.R(c, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); ARM64Reg V0 = fpr.GetReg(); m_float_emit.FMUL(64, V0, VA, VC); @@ -91,12 +91,12 @@ void JitArm64::ps_madds0(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b || d == c, false); + fpr.BindToRegister(d, d == a || d == b || d == c, REG_REG); - ARM64Reg VA = fpr.R(a, false); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VC = fpr.R(c, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VC = fpr.R(c, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); ARM64Reg V0 = fpr.GetReg(); m_float_emit.DUP(64, V0, VC, 0); @@ -113,12 +113,12 @@ void JitArm64::ps_madds1(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b || d == c, false); + fpr.BindToRegister(d, d == a || d == b || d == c, REG_REG); - ARM64Reg VA = fpr.R(a, false); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VC = fpr.R(c, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VC = fpr.R(c, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); ARM64Reg V0 = fpr.GetReg(); m_float_emit.DUP(64, V0, VC, 1); @@ -135,11 +135,11 @@ void JitArm64::ps_merge00(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b, false); + fpr.BindToRegister(d, d == a || d == b, REG_REG); - ARM64Reg VA = fpr.R(a, false); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); m_float_emit.TRN1(64, VD, VA, VB); } @@ -151,11 +151,11 @@ void JitArm64::ps_merge01(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b, false); + fpr.BindToRegister(d, d == a || d == b, REG_REG); - ARM64Reg VA = fpr.R(a, false); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); m_float_emit.INS(64, VD, 0, VA, 0); m_float_emit.INS(64, VD, 1, VB, 1); @@ -168,11 +168,11 @@ void JitArm64::ps_merge10(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b, false); + fpr.BindToRegister(d, d == a || d == b, REG_REG); - ARM64Reg VA = fpr.R(a, false); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); if (d != a && d != b) { @@ -196,11 +196,11 @@ void JitArm64::ps_merge11(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b, false); + fpr.BindToRegister(d, d == a || d == b, REG_REG); - ARM64Reg VA = fpr.R(a, false); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); m_float_emit.TRN2(64, VD, VA, VB); } @@ -216,10 +216,10 @@ void JitArm64::ps_mr(UGeckoInstruction inst) if (d == b) return; - fpr.BindToRegister(d, false, false); + fpr.BindToRegister(d, REG_REG, REG_REG); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); m_float_emit.ORR(VD, VB, VB); } @@ -231,11 +231,11 @@ void JitArm64::ps_mul(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == c, false); + fpr.BindToRegister(d, d == a || d == c, REG_REG); - ARM64Reg VA = fpr.R(a, false); - ARM64Reg VC = fpr.R(c, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VC = fpr.R(c, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); m_float_emit.FMUL(64, VD, VA, VC); } @@ -247,11 +247,11 @@ void JitArm64::ps_muls0(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == c, false); + fpr.BindToRegister(d, d == a || d == c, REG_REG); - ARM64Reg VA = fpr.R(a, false); - ARM64Reg VC = fpr.R(c, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VC = fpr.R(c, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); ARM64Reg V0 = fpr.GetReg(); m_float_emit.DUP(64, V0, VC, 0); @@ -266,11 +266,11 @@ void JitArm64::ps_muls1(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == c, false); + fpr.BindToRegister(d, d == a || d == c, REG_REG); - ARM64Reg VA = fpr.R(a, false); - ARM64Reg VC = fpr.R(c, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VC = fpr.R(c, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); ARM64Reg V0 = fpr.GetReg(); m_float_emit.DUP(64, V0, VC, 1); @@ -285,12 +285,12 @@ void JitArm64::ps_msub(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b || d == c, false); + fpr.BindToRegister(d, d == a || d == b || d == c, REG_REG); - ARM64Reg VA = fpr.R(a, false); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VC = fpr.R(c, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VC = fpr.R(c, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); ARM64Reg V0 = fpr.GetReg(); m_float_emit.FMUL(64, V0, VA, VC); @@ -306,10 +306,10 @@ void JitArm64::ps_nabs(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, d == b, false); + fpr.BindToRegister(d, d == b, REG_REG); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); m_float_emit.FABS(64, VD, VB); m_float_emit.FNEG(64, VD, VD); @@ -322,10 +322,10 @@ void JitArm64::ps_neg(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, d == b, false); + fpr.BindToRegister(d, d == b, REG_REG); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); m_float_emit.FNEG(64, VD, VB); } @@ -337,12 +337,12 @@ void JitArm64::ps_nmadd(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b || d == c, false); + fpr.BindToRegister(d, d == a || d == b || d == c, REG_REG); - ARM64Reg VA = fpr.R(a, false); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VC = fpr.R(c, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VC = fpr.R(c, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); ARM64Reg V0 = fpr.GetReg(); m_float_emit.FMUL(64, V0, VA, VC); @@ -359,12 +359,12 @@ void JitArm64::ps_nmsub(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b || d == c, false); + fpr.BindToRegister(d, d == a || d == b || d == c, REG_REG); - ARM64Reg VA = fpr.R(a, false); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VC = fpr.R(c, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VC = fpr.R(c, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); ARM64Reg V0 = fpr.GetReg(); m_float_emit.FMUL(64, V0, VA, VC); @@ -381,10 +381,10 @@ void JitArm64::ps_res(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, d == b, false); + fpr.BindToRegister(d, d == b, REG_REG); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); m_float_emit.FRSQRTE(64, VD, VB); } @@ -396,12 +396,12 @@ void JitArm64::ps_sel(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b || d == c, false); + fpr.BindToRegister(d, d == a || d == b || d == c, REG_REG); - ARM64Reg VA = fpr.R(a, false); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VC = fpr.R(c, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VC = fpr.R(c, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); if (d != a && d != b && d != c) { @@ -425,11 +425,11 @@ void JitArm64::ps_sub(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b, false); + fpr.BindToRegister(d, d == a || d == b, REG_REG); - ARM64Reg VA = fpr.R(a, false); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); m_float_emit.FSUB(64, VD, VA, VB); } @@ -441,12 +441,12 @@ void JitArm64::ps_sum0(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b || d == c, false); + fpr.BindToRegister(d, d == a || d == b || d == c, REG_REG); - ARM64Reg VA = fpr.R(a, false); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VC = fpr.R(c, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VC = fpr.R(c, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); ARM64Reg V0 = fpr.GetReg(); m_float_emit.DUP(64, V0, VB, 1); @@ -471,12 +471,12 @@ void JitArm64::ps_sum1(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b || d == c, false); + fpr.BindToRegister(d, d == a || d == b || d == c, REG_REG); - ARM64Reg VA = fpr.R(a, false); - ARM64Reg VB = fpr.R(b, false); - ARM64Reg VC = fpr.R(c, false); - ARM64Reg VD = fpr.R(d, false); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VC = fpr.R(c, REG_REG); + ARM64Reg VD = fpr.R(d, REG_REG); ARM64Reg V0 = fpr.GetReg(); m_float_emit.DUP(64, V0, VA, 0); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 4f539371ab..6a674bf477 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -307,7 +307,7 @@ void Arm64FPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op) } } -ARM64Reg Arm64FPRCache::R(u32 preg, bool only_lower) +ARM64Reg Arm64FPRCache::R(u32 preg, RegType type) { OpArg& reg = m_guest_registers[preg]; IncrementAllUsed(); @@ -320,7 +320,7 @@ ARM64Reg Arm64FPRCache::R(u32 preg, bool only_lower) break; case REG_LOWER_PAIR: { - if (!only_lower) + if (type == REG_REG) { // Load the high 64bits from the file and insert them in to the high 64bits of the host register ARM64Reg tmp_reg = GetReg(); @@ -331,18 +331,52 @@ ARM64Reg Arm64FPRCache::R(u32 preg, bool only_lower) // Change it over to a full 128bit register reg.LoadToReg(reg.GetReg()); } + else if (type == REG_DUP) + { + // We already only have the lower 64bits + // Don't do anything + } return reg.GetReg(); } break; + case REG_DUP: + { + ARM64Reg host_reg = reg.GetReg(); + if (type == REG_REG) + { + // We are requesting a full 128bit register + // but we are only available in the lower 64bits + // Duplicate to the top and change over + m_float_emit->INS(64, host_reg, 1, host_reg, 0); + reg.LoadToReg(host_reg); + } + else if (type == REG_LOWER_PAIR) + { + // We are only requesting the lower 64bits of a pair + // We've got to be careful in this instance + // Store our current duplicated high bits to the file + // then convert over to a lower reg + if (reg.IsDirty()) + m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][1])); + reg.LoadLowerReg(host_reg); + } + return host_reg; + } + break; case REG_NOTLOADED: // Register isn't loaded at /all/ { ARM64Reg host_reg = GetReg(); u32 load_size; - if (only_lower) + if (type == REG_LOWER_PAIR) { load_size = 64; reg.LoadLowerReg(host_reg); } + else if (type == REG_DUP) + { + load_size = 64; + reg.LoadDup(host_reg); + } else { load_size = 128; @@ -361,7 +395,7 @@ ARM64Reg Arm64FPRCache::R(u32 preg, bool only_lower) return INVALID_REG; } -void Arm64FPRCache::BindToRegister(u32 preg, bool do_load, bool only_lower) +void Arm64FPRCache::BindToRegister(u32 preg, bool do_load, RegType type) { OpArg& reg = m_guest_registers[preg]; @@ -376,12 +410,17 @@ void Arm64FPRCache::BindToRegister(u32 preg, bool do_load, bool only_lower) { ARM64Reg host_reg = GetReg(); u32 load_size; - if (only_lower) + if (type == REG_LOWER_PAIR) { // We only want the lower 64bits load_size = 64; reg.LoadLowerReg(host_reg); } + else if (type == REG_DUP) + { + load_size = 64; + reg.LoadDup(host_reg); + } else { // We want the full 128bit register @@ -394,7 +433,8 @@ void Arm64FPRCache::BindToRegister(u32 preg, bool do_load, bool only_lower) break; case REG_LOWER_PAIR: { - if (!only_lower) + ARM64Reg host_reg = reg.GetReg(); + if (type == REG_REG) { // Okay, we've got the lower reg loaded and we really wanted the full register if (do_load) @@ -402,27 +442,63 @@ void Arm64FPRCache::BindToRegister(u32 preg, bool do_load, bool only_lower) // Load the high 64bits from the file and insert them in to the high 64bits of the host register ARM64Reg tmp_reg = GetReg(); m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, X29, PPCSTATE_OFF(ps[preg][1])); - m_float_emit->INS(64, reg.GetReg(), 1, tmp_reg, 0); + m_float_emit->INS(64, host_reg, 1, tmp_reg, 0); UnlockRegister(tmp_reg); } // Change it over to a full 128bit register - reg.LoadToReg(reg.GetReg()); + reg.LoadToReg(host_reg); + } + else if (type == REG_DUP) + { + // Register is already the lower pair + // Just convert it over to a dup + reg.LoadDup(host_reg); } } break; case REG_REG: { - if (only_lower) + ARM64Reg host_reg = reg.GetReg(); + if (type == REG_LOWER_PAIR) { // If we only want the lower bits, let's store away the high bits and drop to a lower only register // We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit store. // It would take longer to do an insert to a temporary and a 64bit store than to just do this. - ARM64Reg host_reg = reg.GetReg(); if (was_dirty) m_float_emit->STR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); reg.LoadLowerReg(host_reg); } + else if (type == REG_DUP) + { + // If we are going from a full 128bit register to a duplicate + // then we can just change over + reg.LoadDup(host_reg); + } + } + break; + case REG_DUP: + { + ARM64Reg host_reg = reg.GetReg(); + if (type == REG_REG) + { + // We are a duplicated register going to a full 128bit register + // Do an insert of our lower 64bits to the higher 64bits + m_float_emit->INS(64, host_reg, 1, host_reg, 0); + + // Change over to the full 128bit register + reg.LoadToReg(host_reg); + } + else if (type == REG_LOWER_PAIR) + { + // We are duplicated changing over to a lower register + // We've got to be careful in this instance and do a store of our lower 64bits + // to the upper 64bits in the PowerPC state + // That way incase if we hit the path of DUP->LOWER->REG we get the correct bits back + if (was_dirty) + m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][1])); + reg.LoadLowerReg(host_reg); + } } break; default: @@ -487,6 +563,24 @@ void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state) if (reg.IsDirty()) m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); + if (!maintain_state) + { + UnlockRegister(host_reg); + reg.Flush(); + } + } + else if (reg.GetType() == REG_DUP) + { + ARM64Reg host_reg = reg.GetReg(); + if (reg.IsDirty()) + { + // If the paired registers were at the start of ppcState we could do an STP here. + // Too bad moving them would break savestate compatibility between x86_64 and AArch64 + //m_float_emit->STP(64, INDEX_SIGNED, host_reg, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); + m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); + m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][1])); + } + if (!maintain_state) { UnlockRegister(host_reg); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 084ab8c94a..94edd0c9f8 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -20,6 +20,8 @@ enum RegType REG_REG, // Reg type is register REG_IMM, // Reg is really a IMM REG_LOWER_PAIR, // Only the lower pair of a paired register + REG_DUP, // The lower reg is the same as the upper one (physical upper doesn't actually have the duplicated value) + REG_IS_LOADED, // We don't care what type it is, as long as the lower 64bits are loaded }; enum FlushMode @@ -65,6 +67,11 @@ public: m_type = REG_LOWER_PAIR; m_reg = reg; } + void LoadDup(ARM64Reg reg) + { + m_type = REG_DUP; + m_reg = reg; + } void LoadToImm(u32 imm) { m_type = REG_IMM; @@ -266,12 +273,9 @@ public: // Returns a guest register inside of a host register // Will dump an immediate to the host register as well - ARM64Reg R(u32 preg, bool only_lower = true); + ARM64Reg R(u32 preg, RegType type = REG_LOWER_PAIR); - void BindToRegister(u32 preg, bool do_load, bool only_lower = true); - - // Returns if the register is only the lower 64bit register - bool IsLower(u32 preg) const { return m_guest_registers[preg].GetType() == REG_LOWER_PAIR; } + void BindToRegister(u32 preg, bool do_load, RegType type = REG_LOWER_PAIR); BitSet32 GetCallerSavedUsed() override; diff --git a/Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h b/Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h index 508b10b45d..d3b6f46c31 100644 --- a/Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h +++ b/Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h @@ -18,8 +18,7 @@ struct BackPatchInfo FLAG_SIZE_F64 = (1 << 6), FLAG_REVERSE = (1 << 7), FLAG_EXTEND = (1 << 8), - FLAG_ONLY_LOWER = (1 << 9), - FLAG_SIZE_F32I = (1 << 10), + FLAG_SIZE_F32I = (1 << 9), }; static u32 GetFlagSize(u32 flags)