JitArm64: Skip accurate single/double conversion if store-safe
This commit is contained in:
parent
1d106ceaf5
commit
2a9d88739c
|
@ -982,6 +982,7 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
||||||
|
|
||||||
js.compilerPC = op.address;
|
js.compilerPC = op.address;
|
||||||
js.op = &op;
|
js.op = &op;
|
||||||
|
js.fpr_is_store_safe = op.fprIsStoreSafeBeforeInst;
|
||||||
js.instructionNumber = i;
|
js.instructionNumber = i;
|
||||||
js.instructionsLeft = (code_block.m_num_instructions - 1) - i;
|
js.instructionsLeft = (code_block.m_num_instructions - 1) - i;
|
||||||
const GekkoOPInfo* opinfo = op.opinfo;
|
const GekkoOPInfo* opinfo = op.opinfo;
|
||||||
|
@ -1118,6 +1119,8 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
||||||
|
|
||||||
CompileInstruction(op);
|
CompileInstruction(op);
|
||||||
|
|
||||||
|
js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst;
|
||||||
|
|
||||||
if (jo.memcheck && (opinfo->flags & FL_LOADSTORE))
|
if (jo.memcheck && (opinfo->flags & FL_LOADSTORE))
|
||||||
{
|
{
|
||||||
// If we have a fastmem loadstore, we can omit the exception check and let fastmem handle
|
// If we have a fastmem loadstore, we can omit the exception check and let fastmem handle
|
||||||
|
|
|
@ -105,7 +105,7 @@ void Jit64::stfXXX(UGeckoInstruction inst)
|
||||||
|
|
||||||
if (single)
|
if (single)
|
||||||
{
|
{
|
||||||
if (js.op->fprIsStoreSafe[s])
|
if (js.fpr_is_store_safe[s])
|
||||||
{
|
{
|
||||||
RCOpArg Rs = fpr.Use(s, RCMode::Read);
|
RCOpArg Rs = fpr.Use(s, RCMode::Read);
|
||||||
RegCache::Realize(Rs);
|
RegCache::Realize(Rs);
|
||||||
|
|
|
@ -695,6 +695,7 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
||||||
|
|
||||||
js.compilerPC = op.address;
|
js.compilerPC = op.address;
|
||||||
js.op = &op;
|
js.op = &op;
|
||||||
|
js.fpr_is_store_safe = op.fprIsStoreSafeBeforeInst;
|
||||||
js.instructionNumber = i;
|
js.instructionNumber = i;
|
||||||
js.instructionsLeft = (code_block.m_num_instructions - 1) - i;
|
js.instructionsLeft = (code_block.m_num_instructions - 1) - i;
|
||||||
const GekkoOPInfo* opinfo = op.opinfo;
|
const GekkoOPInfo* opinfo = op.opinfo;
|
||||||
|
@ -830,6 +831,9 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
||||||
}
|
}
|
||||||
|
|
||||||
CompileInstruction(op);
|
CompileInstruction(op);
|
||||||
|
|
||||||
|
js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst;
|
||||||
|
|
||||||
if (!CanMergeNextInstructions(1) || js.op[1].opinfo->type != ::OpType::Integer)
|
if (!CanMergeNextInstructions(1) || js.op[1].opinfo->type != ::OpType::Integer)
|
||||||
FlushCarry();
|
FlushCarry();
|
||||||
|
|
||||||
|
|
|
@ -152,11 +152,15 @@ public:
|
||||||
void psq_l(UGeckoInstruction inst);
|
void psq_l(UGeckoInstruction inst);
|
||||||
void psq_st(UGeckoInstruction inst);
|
void psq_st(UGeckoInstruction inst);
|
||||||
|
|
||||||
void ConvertDoubleToSingleLower(Arm64Gen::ARM64Reg dest_reg, Arm64Gen::ARM64Reg src_reg);
|
void ConvertDoubleToSingleLower(size_t guest_reg, Arm64Gen::ARM64Reg dest_reg,
|
||||||
void ConvertDoubleToSinglePair(Arm64Gen::ARM64Reg dest_reg, Arm64Gen::ARM64Reg src_reg);
|
Arm64Gen::ARM64Reg src_reg);
|
||||||
void ConvertSingleToDoubleLower(Arm64Gen::ARM64Reg dest_reg, Arm64Gen::ARM64Reg src_reg,
|
void ConvertDoubleToSinglePair(size_t guest_reg, Arm64Gen::ARM64Reg dest_reg,
|
||||||
|
Arm64Gen::ARM64Reg src_reg);
|
||||||
|
void ConvertSingleToDoubleLower(size_t guest_reg, Arm64Gen::ARM64Reg dest_reg,
|
||||||
|
Arm64Gen::ARM64Reg src_reg,
|
||||||
Arm64Gen::ARM64Reg scratch_reg = Arm64Gen::ARM64Reg::INVALID_REG);
|
Arm64Gen::ARM64Reg scratch_reg = Arm64Gen::ARM64Reg::INVALID_REG);
|
||||||
void ConvertSingleToDoublePair(Arm64Gen::ARM64Reg dest_reg, Arm64Gen::ARM64Reg src_reg,
|
void ConvertSingleToDoublePair(size_t guest_reg, Arm64Gen::ARM64Reg dest_reg,
|
||||||
|
Arm64Gen::ARM64Reg src_reg,
|
||||||
Arm64Gen::ARM64Reg scratch_reg = Arm64Gen::ARM64Reg::INVALID_REG);
|
Arm64Gen::ARM64Reg scratch_reg = Arm64Gen::ARM64Reg::INVALID_REG);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -389,8 +389,14 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
|
||||||
// instructions, they must convert floats bitexact and never flush denormals to zero or turn SNaNs
|
// instructions, they must convert floats bitexact and never flush denormals to zero or turn SNaNs
|
||||||
// into QNaNs. This means we can't just use FCVT/FCVTL/FCVTN.
|
// into QNaNs. This means we can't just use FCVT/FCVTL/FCVTN.
|
||||||
|
|
||||||
void JitArm64::ConvertDoubleToSingleLower(ARM64Reg dest_reg, ARM64Reg src_reg)
|
void JitArm64::ConvertDoubleToSingleLower(size_t guest_reg, ARM64Reg dest_reg, ARM64Reg src_reg)
|
||||||
{
|
{
|
||||||
|
if (js.fpr_is_store_safe[guest_reg])
|
||||||
|
{
|
||||||
|
m_float_emit.FCVT(32, 64, EncodeRegToDouble(dest_reg), EncodeRegToDouble(src_reg));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
FlushCarry();
|
FlushCarry();
|
||||||
|
|
||||||
const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 30};
|
const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 30};
|
||||||
|
@ -403,8 +409,14 @@ void JitArm64::ConvertDoubleToSingleLower(ARM64Reg dest_reg, ARM64Reg src_reg)
|
||||||
ABI_PopRegisters(gpr_saved);
|
ABI_PopRegisters(gpr_saved);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::ConvertDoubleToSinglePair(ARM64Reg dest_reg, ARM64Reg src_reg)
|
void JitArm64::ConvertDoubleToSinglePair(size_t guest_reg, ARM64Reg dest_reg, ARM64Reg src_reg)
|
||||||
{
|
{
|
||||||
|
if (js.fpr_is_store_safe[guest_reg])
|
||||||
|
{
|
||||||
|
m_float_emit.FCVTN(32, EncodeRegToDouble(dest_reg), EncodeRegToDouble(src_reg));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
FlushCarry();
|
FlushCarry();
|
||||||
|
|
||||||
const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 30};
|
const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 30};
|
||||||
|
@ -421,10 +433,17 @@ void JitArm64::ConvertDoubleToSinglePair(ARM64Reg dest_reg, ARM64Reg src_reg)
|
||||||
ABI_PopRegisters(gpr_saved);
|
ABI_PopRegisters(gpr_saved);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::ConvertSingleToDoubleLower(ARM64Reg dest_reg, ARM64Reg src_reg, ARM64Reg scratch_reg)
|
void JitArm64::ConvertSingleToDoubleLower(size_t guest_reg, ARM64Reg dest_reg, ARM64Reg src_reg,
|
||||||
|
ARM64Reg scratch_reg)
|
||||||
{
|
{
|
||||||
ASSERT(scratch_reg != src_reg);
|
ASSERT(scratch_reg != src_reg);
|
||||||
|
|
||||||
|
if (js.fpr_is_store_safe[guest_reg])
|
||||||
|
{
|
||||||
|
m_float_emit.FCVT(64, 32, EncodeRegToDouble(dest_reg), EncodeRegToDouble(src_reg));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const bool switch_to_farcode = !IsInFarCode();
|
const bool switch_to_farcode = !IsInFarCode();
|
||||||
|
|
||||||
FlushCarry();
|
FlushCarry();
|
||||||
|
@ -476,10 +495,17 @@ void JitArm64::ConvertSingleToDoubleLower(ARM64Reg dest_reg, ARM64Reg src_reg, A
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::ConvertSingleToDoublePair(ARM64Reg dest_reg, ARM64Reg src_reg, ARM64Reg scratch_reg)
|
void JitArm64::ConvertSingleToDoublePair(size_t guest_reg, ARM64Reg dest_reg, ARM64Reg src_reg,
|
||||||
|
ARM64Reg scratch_reg)
|
||||||
{
|
{
|
||||||
ASSERT(scratch_reg != src_reg);
|
ASSERT(scratch_reg != src_reg);
|
||||||
|
|
||||||
|
if (js.fpr_is_store_safe[guest_reg])
|
||||||
|
{
|
||||||
|
m_float_emit.FCVTL(64, EncodeRegToDouble(dest_reg), EncodeRegToDouble(src_reg));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const bool switch_to_farcode = !IsInFarCode();
|
const bool switch_to_farcode = !IsInFarCode();
|
||||||
|
|
||||||
FlushCarry();
|
FlushCarry();
|
||||||
|
|
|
@ -258,7 +258,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
||||||
if (want_single && !have_single)
|
if (want_single && !have_single)
|
||||||
{
|
{
|
||||||
const ARM64Reg single_reg = fpr.GetReg();
|
const ARM64Reg single_reg = fpr.GetReg();
|
||||||
ConvertDoubleToSingleLower(single_reg, V0);
|
ConvertDoubleToSingleLower(inst.FS, single_reg, V0);
|
||||||
V0 = single_reg;
|
V0 = single_reg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -469,7 +469,7 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type)
|
||||||
|
|
||||||
// Else convert this register back to doubles.
|
// Else convert this register back to doubles.
|
||||||
const ARM64Reg tmp_reg = GetReg();
|
const ARM64Reg tmp_reg = GetReg();
|
||||||
m_jit->ConvertSingleToDoublePair(host_reg, host_reg, tmp_reg);
|
m_jit->ConvertSingleToDoublePair(preg, host_reg, host_reg, tmp_reg);
|
||||||
UnlockRegister(tmp_reg);
|
UnlockRegister(tmp_reg);
|
||||||
|
|
||||||
reg.Load(host_reg, RegType::Register);
|
reg.Load(host_reg, RegType::Register);
|
||||||
|
@ -487,7 +487,7 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type)
|
||||||
|
|
||||||
// Else convert this register back to a double.
|
// Else convert this register back to a double.
|
||||||
const ARM64Reg tmp_reg = GetReg();
|
const ARM64Reg tmp_reg = GetReg();
|
||||||
m_jit->ConvertSingleToDoubleLower(host_reg, host_reg, tmp_reg);
|
m_jit->ConvertSingleToDoubleLower(preg, host_reg, host_reg, tmp_reg);
|
||||||
UnlockRegister(tmp_reg);
|
UnlockRegister(tmp_reg);
|
||||||
|
|
||||||
reg.Load(host_reg, RegType::LowerPair);
|
reg.Load(host_reg, RegType::LowerPair);
|
||||||
|
@ -524,7 +524,7 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type)
|
||||||
}
|
}
|
||||||
|
|
||||||
const ARM64Reg tmp_reg = GetReg();
|
const ARM64Reg tmp_reg = GetReg();
|
||||||
m_jit->ConvertSingleToDoubleLower(host_reg, host_reg, tmp_reg);
|
m_jit->ConvertSingleToDoubleLower(preg, host_reg, host_reg, tmp_reg);
|
||||||
UnlockRegister(tmp_reg);
|
UnlockRegister(tmp_reg);
|
||||||
|
|
||||||
reg.Load(host_reg, RegType::Duplicated);
|
reg.Load(host_reg, RegType::Duplicated);
|
||||||
|
@ -594,7 +594,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
|
||||||
if ((type == RegType::LowerPair || type == RegType::LowerPairSingle) && was_dirty)
|
if ((type == RegType::LowerPair || type == RegType::LowerPairSingle) && was_dirty)
|
||||||
{
|
{
|
||||||
// We must *not* change host_reg as this register might still be in use. So it's fine to
|
// We must *not* change host_reg as this register might still be in use. So it's fine to
|
||||||
// store this register, but it's *not* fine to convert it to double. So for double convertion,
|
// store this register, but it's *not* fine to convert it to double. So for double conversion,
|
||||||
// a temporary register needs to be used.
|
// a temporary register needs to be used.
|
||||||
ARM64Reg host_reg = reg.GetReg();
|
ARM64Reg host_reg = reg.GetReg();
|
||||||
ARM64Reg flush_reg = host_reg;
|
ARM64Reg flush_reg = host_reg;
|
||||||
|
@ -603,7 +603,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
|
||||||
{
|
{
|
||||||
case RegType::Single:
|
case RegType::Single:
|
||||||
flush_reg = GetReg();
|
flush_reg = GetReg();
|
||||||
m_jit->ConvertSingleToDoublePair(flush_reg, host_reg, flush_reg);
|
m_jit->ConvertSingleToDoublePair(preg, flush_reg, host_reg, flush_reg);
|
||||||
[[fallthrough]];
|
[[fallthrough]];
|
||||||
case RegType::Register:
|
case RegType::Register:
|
||||||
// We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit
|
// We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit
|
||||||
|
@ -614,7 +614,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
|
||||||
break;
|
break;
|
||||||
case RegType::DuplicatedSingle:
|
case RegType::DuplicatedSingle:
|
||||||
flush_reg = GetReg();
|
flush_reg = GetReg();
|
||||||
m_jit->ConvertSingleToDoubleLower(flush_reg, host_reg, flush_reg);
|
m_jit->ConvertSingleToDoubleLower(preg, flush_reg, host_reg, flush_reg);
|
||||||
[[fallthrough]];
|
[[fallthrough]];
|
||||||
case RegType::Duplicated:
|
case RegType::Duplicated:
|
||||||
// Store PSR1 (which is equal to PSR0) in memory.
|
// Store PSR1 (which is equal to PSR0) in memory.
|
||||||
|
@ -725,13 +725,13 @@ void Arm64FPRCache::FlushRegister(size_t preg, bool maintain_state)
|
||||||
if (type == RegType::Single)
|
if (type == RegType::Single)
|
||||||
{
|
{
|
||||||
if (dirty)
|
if (dirty)
|
||||||
m_jit->ConvertSingleToDoublePair(host_reg, host_reg, tmp_reg);
|
m_jit->ConvertSingleToDoublePair(preg, host_reg, host_reg, tmp_reg);
|
||||||
type = RegType::Register;
|
type = RegType::Register;
|
||||||
}
|
}
|
||||||
if (type == RegType::DuplicatedSingle || type == RegType::LowerPairSingle)
|
if (type == RegType::DuplicatedSingle || type == RegType::LowerPairSingle)
|
||||||
{
|
{
|
||||||
if (dirty)
|
if (dirty)
|
||||||
m_jit->ConvertSingleToDoubleLower(host_reg, host_reg, tmp_reg);
|
m_jit->ConvertSingleToDoubleLower(preg, host_reg, host_reg, tmp_reg);
|
||||||
|
|
||||||
if (type == RegType::DuplicatedSingle)
|
if (type == RegType::DuplicatedSingle)
|
||||||
type = RegType::Duplicated;
|
type = RegType::Duplicated;
|
||||||
|
@ -822,7 +822,7 @@ void Arm64FPRCache::FixSinglePrecision(size_t preg)
|
||||||
m_float_emit->FCVT(32, 64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
|
m_float_emit->FCVT(32, 64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
|
||||||
reg.Load(host_reg, RegType::DuplicatedSingle);
|
reg.Load(host_reg, RegType::DuplicatedSingle);
|
||||||
break;
|
break;
|
||||||
case RegType::Register: // PS0 and PS1 needs to be converted
|
case RegType::Register: // PS0 and PS1 need to be converted
|
||||||
m_float_emit->FCVTN(32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
|
m_float_emit->FCVTN(32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
|
||||||
reg.Load(host_reg, RegType::Single);
|
reg.Load(host_reg, RegType::Single);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
|
|
||||||
|
#include "Common/BitSet.h"
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
#include "Common/x64Emitter.h"
|
#include "Common/x64Emitter.h"
|
||||||
#include "Core/ConfigManager.h"
|
#include "Core/ConfigManager.h"
|
||||||
|
@ -98,6 +99,7 @@ protected:
|
||||||
PPCAnalyst::BlockRegStats gpa;
|
PPCAnalyst::BlockRegStats gpa;
|
||||||
PPCAnalyst::BlockRegStats fpa;
|
PPCAnalyst::BlockRegStats fpa;
|
||||||
PPCAnalyst::CodeOp* op;
|
PPCAnalyst::CodeOp* op;
|
||||||
|
BitSet32 fpr_is_store_safe;
|
||||||
|
|
||||||
JitBlock* curBlock;
|
JitBlock* curBlock;
|
||||||
|
|
||||||
|
|
|
@ -976,7 +976,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std:
|
||||||
|
|
||||||
op.fprIsSingle = fprIsSingle;
|
op.fprIsSingle = fprIsSingle;
|
||||||
op.fprIsDuplicated = fprIsDuplicated;
|
op.fprIsDuplicated = fprIsDuplicated;
|
||||||
op.fprIsStoreSafe = fprIsStoreSafe;
|
op.fprIsStoreSafeBeforeInst = fprIsStoreSafe;
|
||||||
if (op.fregOut >= 0)
|
if (op.fregOut >= 0)
|
||||||
{
|
{
|
||||||
if (op.opinfo->type == OpType::SingleFP)
|
if (op.opinfo->type == OpType::SingleFP)
|
||||||
|
@ -1036,6 +1036,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std:
|
||||||
(op.opinfo->type == OpType::SingleFP || op.opinfo->type == OpType::PS);
|
(op.opinfo->type == OpType::SingleFP || op.opinfo->type == OpType::PS);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
op.fprIsStoreSafeAfterInst = fprIsStoreSafe;
|
||||||
|
|
||||||
if (op.opinfo->type == OpType::StorePS || op.opinfo->type == OpType::LoadPS)
|
if (op.opinfo->type == OpType::StorePS || op.opinfo->type == OpType::LoadPS)
|
||||||
{
|
{
|
||||||
|
|
|
@ -66,7 +66,8 @@ struct CodeOp // 16B
|
||||||
// convert between single and double formats by just using the host machine's instruction for it.
|
// convert between single and double formats by just using the host machine's instruction for it.
|
||||||
// (The reason why we can't always do this is because some games rely on the exact bits of
|
// (The reason why we can't always do this is because some games rely on the exact bits of
|
||||||
// denormals and SNaNs being preserved as long as no arithmetic operation is performed on them.)
|
// denormals and SNaNs being preserved as long as no arithmetic operation is performed on them.)
|
||||||
BitSet32 fprIsStoreSafe;
|
BitSet32 fprIsStoreSafeBeforeInst;
|
||||||
|
BitSet32 fprIsStoreSafeAfterInst;
|
||||||
|
|
||||||
BitSet32 GetFregsOut() const
|
BitSet32 GetFregsOut() const
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue