From 2a9d88739c6c9c3e7a9fc5f3443e0cb07c4eefe0 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Tue, 2 Feb 2021 21:43:50 +0100 Subject: [PATCH] JitArm64: Skip accurate single/double conversion if store-safe --- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 3 ++ .../PowerPC/Jit64/Jit_LoadStoreFloating.cpp | 2 +- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 4 +++ Source/Core/Core/PowerPC/JitArm64/Jit.h | 12 ++++--- .../JitArm64/JitArm64_FloatingPoint.cpp | 34 ++++++++++++++++--- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 2 +- .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 18 +++++----- Source/Core/Core/PowerPC/JitCommon/JitBase.h | 2 ++ Source/Core/Core/PowerPC/PPCAnalyst.cpp | 3 +- Source/Core/Core/PowerPC/PPCAnalyst.h | 3 +- 10 files changed, 62 insertions(+), 21 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index ac8b60afee..ed69b5d4cd 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -982,6 +982,7 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) js.compilerPC = op.address; js.op = &op; + js.fpr_is_store_safe = op.fprIsStoreSafeBeforeInst; js.instructionNumber = i; js.instructionsLeft = (code_block.m_num_instructions - 1) - i; const GekkoOPInfo* opinfo = op.opinfo; @@ -1118,6 +1119,8 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) CompileInstruction(op); + js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst; + if (jo.memcheck && (opinfo->flags & FL_LOADSTORE)) { // If we have a fastmem loadstore, we can omit the exception check and let fastmem handle diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index 2ce40f08c8..347f67e2c6 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -105,7 +105,7 @@ void Jit64::stfXXX(UGeckoInstruction inst) if (single) { - if (js.op->fprIsStoreSafe[s]) + if (js.fpr_is_store_safe[s]) { RCOpArg Rs = fpr.Use(s, RCMode::Read); RegCache::Realize(Rs); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 9c1b98d2f0..2d838d7ba6 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -695,6 +695,7 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) js.compilerPC = op.address; js.op = &op; + js.fpr_is_store_safe = op.fprIsStoreSafeBeforeInst; js.instructionNumber = i; js.instructionsLeft = (code_block.m_num_instructions - 1) - i; const GekkoOPInfo* opinfo = op.opinfo; @@ -830,6 +831,9 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) } CompileInstruction(op); + + js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst; + if (!CanMergeNextInstructions(1) || js.op[1].opinfo->type != ::OpType::Integer) FlushCarry(); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 1c60ae0aaf..f8b4b5f146 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -152,11 +152,15 @@ public: void psq_l(UGeckoInstruction inst); void psq_st(UGeckoInstruction inst); - void ConvertDoubleToSingleLower(Arm64Gen::ARM64Reg dest_reg, Arm64Gen::ARM64Reg src_reg); - void ConvertDoubleToSinglePair(Arm64Gen::ARM64Reg dest_reg, Arm64Gen::ARM64Reg src_reg); - void ConvertSingleToDoubleLower(Arm64Gen::ARM64Reg dest_reg, Arm64Gen::ARM64Reg src_reg, + void ConvertDoubleToSingleLower(size_t guest_reg, Arm64Gen::ARM64Reg dest_reg, + Arm64Gen::ARM64Reg src_reg); + void ConvertDoubleToSinglePair(size_t guest_reg, Arm64Gen::ARM64Reg dest_reg, + Arm64Gen::ARM64Reg src_reg); + void ConvertSingleToDoubleLower(size_t guest_reg, Arm64Gen::ARM64Reg dest_reg, + Arm64Gen::ARM64Reg src_reg, Arm64Gen::ARM64Reg scratch_reg = Arm64Gen::ARM64Reg::INVALID_REG); - void ConvertSingleToDoublePair(Arm64Gen::ARM64Reg dest_reg, Arm64Gen::ARM64Reg src_reg, + void ConvertSingleToDoublePair(size_t guest_reg, Arm64Gen::ARM64Reg dest_reg, + Arm64Gen::ARM64Reg src_reg, Arm64Gen::ARM64Reg scratch_reg = Arm64Gen::ARM64Reg::INVALID_REG); private: diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 59e27431cd..8d3553afd7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -389,8 +389,14 @@ void JitArm64::fctiwzx(UGeckoInstruction inst) // instructions, they must convert floats bitexact and never flush denormals to zero or turn SNaNs // into QNaNs. This means we can't just use FCVT/FCVTL/FCVTN. -void JitArm64::ConvertDoubleToSingleLower(ARM64Reg dest_reg, ARM64Reg src_reg) +void JitArm64::ConvertDoubleToSingleLower(size_t guest_reg, ARM64Reg dest_reg, ARM64Reg src_reg) { + if (js.fpr_is_store_safe[guest_reg]) + { + m_float_emit.FCVT(32, 64, EncodeRegToDouble(dest_reg), EncodeRegToDouble(src_reg)); + return; + } + FlushCarry(); const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 30}; @@ -403,8 +409,14 @@ void JitArm64::ConvertDoubleToSingleLower(ARM64Reg dest_reg, ARM64Reg src_reg) ABI_PopRegisters(gpr_saved); } -void JitArm64::ConvertDoubleToSinglePair(ARM64Reg dest_reg, ARM64Reg src_reg) +void JitArm64::ConvertDoubleToSinglePair(size_t guest_reg, ARM64Reg dest_reg, ARM64Reg src_reg) { + if (js.fpr_is_store_safe[guest_reg]) + { + m_float_emit.FCVTN(32, EncodeRegToDouble(dest_reg), EncodeRegToDouble(src_reg)); + return; + } + FlushCarry(); const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 30}; @@ -421,10 +433,17 @@ void JitArm64::ConvertDoubleToSinglePair(ARM64Reg dest_reg, ARM64Reg src_reg) ABI_PopRegisters(gpr_saved); } -void JitArm64::ConvertSingleToDoubleLower(ARM64Reg dest_reg, ARM64Reg src_reg, ARM64Reg scratch_reg) +void JitArm64::ConvertSingleToDoubleLower(size_t guest_reg, ARM64Reg dest_reg, ARM64Reg src_reg, + ARM64Reg scratch_reg) { ASSERT(scratch_reg != src_reg); + if (js.fpr_is_store_safe[guest_reg]) + { + m_float_emit.FCVT(64, 32, EncodeRegToDouble(dest_reg), EncodeRegToDouble(src_reg)); + return; + } + const bool switch_to_farcode = !IsInFarCode(); FlushCarry(); @@ -476,10 +495,17 @@ void JitArm64::ConvertSingleToDoubleLower(ARM64Reg dest_reg, ARM64Reg src_reg, A } } -void JitArm64::ConvertSingleToDoublePair(ARM64Reg dest_reg, ARM64Reg src_reg, ARM64Reg scratch_reg) +void JitArm64::ConvertSingleToDoublePair(size_t guest_reg, ARM64Reg dest_reg, ARM64Reg src_reg, + ARM64Reg scratch_reg) { ASSERT(scratch_reg != src_reg); + if (js.fpr_is_store_safe[guest_reg]) + { + m_float_emit.FCVTL(64, EncodeRegToDouble(dest_reg), EncodeRegToDouble(src_reg)); + return; + } + const bool switch_to_farcode = !IsInFarCode(); FlushCarry(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 3509df1936..068d61d0fb 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -258,7 +258,7 @@ void JitArm64::stfXX(UGeckoInstruction inst) if (want_single && !have_single) { const ARM64Reg single_reg = fpr.GetReg(); - ConvertDoubleToSingleLower(single_reg, V0); + ConvertDoubleToSingleLower(inst.FS, single_reg, V0); V0 = single_reg; } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 1363863286..3715c897d2 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -469,7 +469,7 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type) // Else convert this register back to doubles. const ARM64Reg tmp_reg = GetReg(); - m_jit->ConvertSingleToDoublePair(host_reg, host_reg, tmp_reg); + m_jit->ConvertSingleToDoublePair(preg, host_reg, host_reg, tmp_reg); UnlockRegister(tmp_reg); reg.Load(host_reg, RegType::Register); @@ -487,7 +487,7 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type) // Else convert this register back to a double. const ARM64Reg tmp_reg = GetReg(); - m_jit->ConvertSingleToDoubleLower(host_reg, host_reg, tmp_reg); + m_jit->ConvertSingleToDoubleLower(preg, host_reg, host_reg, tmp_reg); UnlockRegister(tmp_reg); reg.Load(host_reg, RegType::LowerPair); @@ -524,7 +524,7 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type) } const ARM64Reg tmp_reg = GetReg(); - m_jit->ConvertSingleToDoubleLower(host_reg, host_reg, tmp_reg); + m_jit->ConvertSingleToDoubleLower(preg, host_reg, host_reg, tmp_reg); UnlockRegister(tmp_reg); reg.Load(host_reg, RegType::Duplicated); @@ -594,7 +594,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type) if ((type == RegType::LowerPair || type == RegType::LowerPairSingle) && was_dirty) { // We must *not* change host_reg as this register might still be in use. So it's fine to - // store this register, but it's *not* fine to convert it to double. So for double convertion, + // store this register, but it's *not* fine to convert it to double. So for double conversion, // a temporary register needs to be used. ARM64Reg host_reg = reg.GetReg(); ARM64Reg flush_reg = host_reg; @@ -603,7 +603,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type) { case RegType::Single: flush_reg = GetReg(); - m_jit->ConvertSingleToDoublePair(flush_reg, host_reg, flush_reg); + m_jit->ConvertSingleToDoublePair(preg, flush_reg, host_reg, flush_reg); [[fallthrough]]; case RegType::Register: // We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit @@ -614,7 +614,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type) break; case RegType::DuplicatedSingle: flush_reg = GetReg(); - m_jit->ConvertSingleToDoubleLower(flush_reg, host_reg, flush_reg); + m_jit->ConvertSingleToDoubleLower(preg, flush_reg, host_reg, flush_reg); [[fallthrough]]; case RegType::Duplicated: // Store PSR1 (which is equal to PSR0) in memory. @@ -725,13 +725,13 @@ void Arm64FPRCache::FlushRegister(size_t preg, bool maintain_state) if (type == RegType::Single) { if (dirty) - m_jit->ConvertSingleToDoublePair(host_reg, host_reg, tmp_reg); + m_jit->ConvertSingleToDoublePair(preg, host_reg, host_reg, tmp_reg); type = RegType::Register; } if (type == RegType::DuplicatedSingle || type == RegType::LowerPairSingle) { if (dirty) - m_jit->ConvertSingleToDoubleLower(host_reg, host_reg, tmp_reg); + m_jit->ConvertSingleToDoubleLower(preg, host_reg, host_reg, tmp_reg); if (type == RegType::DuplicatedSingle) type = RegType::Duplicated; @@ -822,7 +822,7 @@ void Arm64FPRCache::FixSinglePrecision(size_t preg) m_float_emit->FCVT(32, 64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); reg.Load(host_reg, RegType::DuplicatedSingle); break; - case RegType::Register: // PS0 and PS1 needs to be converted + case RegType::Register: // PS0 and PS1 need to be converted m_float_emit->FCVTN(32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); reg.Load(host_reg, RegType::Single); break; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 558beec9b9..2f1686b832 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -8,6 +8,7 @@ #include #include +#include "Common/BitSet.h" #include "Common/CommonTypes.h" #include "Common/x64Emitter.h" #include "Core/ConfigManager.h" @@ -98,6 +99,7 @@ protected: PPCAnalyst::BlockRegStats gpa; PPCAnalyst::BlockRegStats fpa; PPCAnalyst::CodeOp* op; + BitSet32 fpr_is_store_safe; JitBlock* curBlock; diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/PowerPC/PPCAnalyst.cpp index 0017d636e7..caa9f0c398 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/PowerPC/PPCAnalyst.cpp @@ -976,7 +976,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std: op.fprIsSingle = fprIsSingle; op.fprIsDuplicated = fprIsDuplicated; - op.fprIsStoreSafe = fprIsStoreSafe; + op.fprIsStoreSafeBeforeInst = fprIsStoreSafe; if (op.fregOut >= 0) { if (op.opinfo->type == OpType::SingleFP) @@ -1036,6 +1036,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std: (op.opinfo->type == OpType::SingleFP || op.opinfo->type == OpType::PS); } } + op.fprIsStoreSafeAfterInst = fprIsStoreSafe; if (op.opinfo->type == OpType::StorePS || op.opinfo->type == OpType::LoadPS) { diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.h b/Source/Core/Core/PowerPC/PPCAnalyst.h index d1154baee6..740e23848f 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.h +++ b/Source/Core/Core/PowerPC/PPCAnalyst.h @@ -66,7 +66,8 @@ struct CodeOp // 16B // convert between single and double formats by just using the host machine's instruction for it. // (The reason why we can't always do this is because some games rely on the exact bits of // denormals and SNaNs being preserved as long as no arithmetic operation is performed on them.) - BitSet32 fprIsStoreSafe; + BitSet32 fprIsStoreSafeBeforeInst; + BitSet32 fprIsStoreSafeAfterInst; BitSet32 GetFregsOut() const {