Merge pull request #7492 from MerryMage/regcache2
JitRegCache: Refactor register cache
This commit is contained in:
commit
61b9ef33ab
|
@ -193,10 +193,14 @@ public:
|
|||
constexpr BitSet operator&(BitSet other) const { return BitSet(m_val & other.m_val); }
|
||||
constexpr BitSet operator^(BitSet other) const { return BitSet(m_val ^ other.m_val); }
|
||||
constexpr BitSet operator~() const { return BitSet(~m_val); }
|
||||
constexpr BitSet operator<<(IntTy shift) const { return BitSet(m_val << shift); }
|
||||
constexpr BitSet operator>>(IntTy shift) const { return BitSet(m_val >> shift); }
|
||||
constexpr explicit operator bool() const { return m_val != 0; }
|
||||
BitSet& operator|=(BitSet other) { return *this = *this | other; }
|
||||
BitSet& operator&=(BitSet other) { return *this = *this & other; }
|
||||
BitSet& operator^=(BitSet other) { return *this = *this ^ other; }
|
||||
BitSet& operator<<=(IntTy shift) { return *this = *this << shift; }
|
||||
BitSet& operator>>=(IntTy shift) { return *this = *this >> shift; }
|
||||
// Warning: Even though on modern CPUs this is a single fast instruction,
|
||||
// Dolphin's official builds do not currently assume POPCNT support on x86,
|
||||
// so slower explicit bit twiddling is generated. Still should generally
|
||||
|
|
|
@ -159,6 +159,7 @@
|
|||
<ClInclude Include="TraversalClient.h" />
|
||||
<ClInclude Include="TraversalProto.h" />
|
||||
<ClInclude Include="UPnP.h" />
|
||||
<ClInclude Include="VariantUtil.h" />
|
||||
<ClInclude Include="Version.h" />
|
||||
<ClInclude Include="WorkQueueThread.h" />
|
||||
<ClInclude Include="x64ABI.h" />
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
// Copyright 2018 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <variant>
|
||||
|
||||
namespace detail
|
||||
{
|
||||
template <typename... From>
|
||||
struct VariantCastProxy
|
||||
{
|
||||
const std::variant<From...>& v;
|
||||
|
||||
template <typename... To>
|
||||
operator std::variant<To...>() const
|
||||
{
|
||||
return std::visit([](auto&& arg) { return std::variant<To...>{arg}; }, v);
|
||||
}
|
||||
};
|
||||
} // namespace detail
|
||||
|
||||
template <typename... From>
|
||||
auto VariantCast(const std::variant<From...>& v)
|
||||
{
|
||||
return detail::VariantCastProxy<From...>{v};
|
||||
}
|
|
@ -236,20 +236,20 @@ if(_M_X86)
|
|||
DSP/Jit/x64/DSPJitMultiplier.cpp
|
||||
DSP/Jit/x64/DSPJitTables.cpp
|
||||
DSP/Jit/x64/DSPJitUtil.cpp
|
||||
PowerPC/Jit64/FPURegCache.cpp
|
||||
PowerPC/Jit64/GPRRegCache.cpp
|
||||
PowerPC/Jit64/Jit64_Tables.cpp
|
||||
PowerPC/Jit64/JitAsm.cpp
|
||||
PowerPC/Jit64/Jit_Branch.cpp
|
||||
PowerPC/Jit64/Jit.cpp
|
||||
PowerPC/Jit64/Jit64_Tables.cpp
|
||||
PowerPC/Jit64/Jit_Branch.cpp
|
||||
PowerPC/Jit64/Jit_FloatingPoint.cpp
|
||||
PowerPC/Jit64/Jit_Integer.cpp
|
||||
PowerPC/Jit64/Jit_LoadStore.cpp
|
||||
PowerPC/Jit64/Jit_LoadStoreFloating.cpp
|
||||
PowerPC/Jit64/Jit_LoadStorePaired.cpp
|
||||
PowerPC/Jit64/Jit_Paired.cpp
|
||||
PowerPC/Jit64/JitRegCache.cpp
|
||||
PowerPC/Jit64/Jit_SystemRegisters.cpp
|
||||
PowerPC/Jit64/JitAsm.cpp
|
||||
PowerPC/Jit64/RegCache/FPURegCache.cpp
|
||||
PowerPC/Jit64/RegCache/GPRRegCache.cpp
|
||||
PowerPC/Jit64/RegCache/JitRegCache.cpp
|
||||
PowerPC/Jit64Common/BlockCache.cpp
|
||||
PowerPC/Jit64Common/ConstantPool.cpp
|
||||
PowerPC/Jit64Common/EmuCodeBlock.cpp
|
||||
|
|
|
@ -241,8 +241,8 @@
|
|||
</ClCompile>
|
||||
<ClCompile Include="IOS\USB\Bluetooth\WiimoteDevice.cpp" />
|
||||
<ClCompile Include="IOS\USB\Bluetooth\WiimoteHIDAttr.cpp" />
|
||||
<ClCompile Include="IOS\WFS\WFSSRV.cpp" />
|
||||
<ClCompile Include="IOS\WFS\WFSI.cpp" />
|
||||
<ClCompile Include="IOS\WFS\WFSSRV.cpp" />
|
||||
<ClCompile Include="MemTools.cpp" />
|
||||
<ClCompile Include="Movie.cpp" />
|
||||
<ClCompile Include="NetPlayClient.cpp" />
|
||||
|
@ -260,13 +260,8 @@
|
|||
<ClCompile Include="PowerPC\Interpreter\Interpreter_Paired.cpp" />
|
||||
<ClCompile Include="PowerPC\Interpreter\Interpreter_SystemRegisters.cpp" />
|
||||
<ClCompile Include="PowerPC\Interpreter\Interpreter_Tables.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64Common\ConstantPool.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\FPURegCache.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\GPRRegCache.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\Jit.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\Jit64_Tables.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\JitAsm.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\JitRegCache.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\Jit_Branch.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\Jit_FloatingPoint.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\Jit_Integer.cpp" />
|
||||
|
@ -275,7 +270,12 @@
|
|||
<ClCompile Include="PowerPC\Jit64\Jit_LoadStorePaired.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\Jit_Paired.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\Jit_SystemRegisters.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\JitAsm.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\RegCache\FPURegCache.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\RegCache\GPRRegCache.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64\RegCache\JitRegCache.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64Common\BlockCache.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64Common\ConstantPool.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64Common\EmuCodeBlock.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64Common\FarCodeCache.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64Common\Jit64AsmCommon.cpp" />
|
||||
|
@ -284,10 +284,6 @@
|
|||
<ClCompile Include="PowerPC\JitCommon\JitAsmCommon.cpp" />
|
||||
<ClCompile Include="PowerPC\JitCommon\JitBase.cpp" />
|
||||
<ClCompile Include="PowerPC\JitCommon\JitCache.cpp" />
|
||||
<ClCompile Include="PowerPC\SignatureDB\CSVSignatureDB.cpp" />
|
||||
<ClCompile Include="PowerPC\SignatureDB\DSYSignatureDB.cpp" />
|
||||
<ClCompile Include="PowerPC\SignatureDB\MEGASignatureDB.cpp" />
|
||||
<ClCompile Include="PowerPC\SignatureDB\SignatureDB.cpp" />
|
||||
<ClCompile Include="PowerPC\JitInterface.cpp" />
|
||||
<ClCompile Include="PowerPC\MMU.cpp" />
|
||||
<ClCompile Include="PowerPC\PowerPC.cpp" />
|
||||
|
@ -295,6 +291,10 @@
|
|||
<ClCompile Include="PowerPC\PPCCache.cpp" />
|
||||
<ClCompile Include="PowerPC\PPCSymbolDB.cpp" />
|
||||
<ClCompile Include="PowerPC\PPCTables.cpp" />
|
||||
<ClCompile Include="PowerPC\SignatureDB\CSVSignatureDB.cpp" />
|
||||
<ClCompile Include="PowerPC\SignatureDB\DSYSignatureDB.cpp" />
|
||||
<ClCompile Include="PowerPC\SignatureDB\MEGASignatureDB.cpp" />
|
||||
<ClCompile Include="PowerPC\SignatureDB\SignatureDB.cpp" />
|
||||
<ClCompile Include="State.cpp" />
|
||||
<ClCompile Include="SysConf.cpp" />
|
||||
<ClCompile Include="TitleDatabase.cpp" />
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
#include "Core/HW/ProcessorInterface.h"
|
||||
#include "Core/PatchEngine.h"
|
||||
#include "Core/PowerPC/Jit64/JitAsm.h"
|
||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64Common/FarCodeCache.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
|
||||
#include "Core/PowerPC/Jit64Common/TrampolineCache.h"
|
||||
|
@ -756,8 +756,6 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
|||
js.downcountAmount += opinfo->numCycles;
|
||||
js.fastmemLoadStore = nullptr;
|
||||
js.fixupExceptionHandler = false;
|
||||
js.revertGprLoad = -1;
|
||||
js.revertFprLoad = -1;
|
||||
|
||||
if (!SConfig::GetInstance().bEnableDebugging)
|
||||
js.downcountAmount += PatchEngine::GetSpeedhackCycles(js.compilerPC);
|
||||
|
@ -800,13 +798,17 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
|||
ProcessorInterface::INT_CAUSE_PE_FINISH));
|
||||
FixupBranch noCPInt = J_CC(CC_Z, true);
|
||||
|
||||
gpr.Flush(RegCache::FlushMode::MaintainState);
|
||||
fpr.Flush(RegCache::FlushMode::MaintainState);
|
||||
{
|
||||
RCForkGuard gpr_guard = gpr.Fork();
|
||||
RCForkGuard fpr_guard = fpr.Fork();
|
||||
|
||||
MOV(32, PPCSTATE(pc), Imm32(op.address));
|
||||
WriteExternalExceptionExit();
|
||||
gpr.Flush();
|
||||
fpr.Flush();
|
||||
|
||||
MOV(32, PPCSTATE(pc), Imm32(op.address));
|
||||
WriteExternalExceptionExit();
|
||||
}
|
||||
SwitchToNearCode();
|
||||
|
||||
SetJumpTarget(noCPInt);
|
||||
SetJumpTarget(noExtIntEnable);
|
||||
}
|
||||
|
@ -824,14 +826,19 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
|||
|
||||
SwitchToFarCode();
|
||||
SetJumpTarget(b1);
|
||||
gpr.Flush(RegCache::FlushMode::MaintainState);
|
||||
fpr.Flush(RegCache::FlushMode::MaintainState);
|
||||
{
|
||||
RCForkGuard gpr_guard = gpr.Fork();
|
||||
RCForkGuard fpr_guard = fpr.Fork();
|
||||
|
||||
// If a FPU exception occurs, the exception handler will read
|
||||
// from PC. Update PC with the latest value in case that happens.
|
||||
MOV(32, PPCSTATE(pc), Imm32(op.address));
|
||||
OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
|
||||
WriteExceptionExit();
|
||||
gpr.Flush();
|
||||
fpr.Flush();
|
||||
|
||||
// If a FPU exception occurs, the exception handler will read
|
||||
// from PC. Update PC with the latest value in case that happens.
|
||||
MOV(32, PPCSTATE(pc), Imm32(op.address));
|
||||
OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
|
||||
WriteExceptionExit();
|
||||
}
|
||||
SwitchToNearCode();
|
||||
|
||||
js.firstFPInstructionFound = true;
|
||||
|
@ -866,20 +873,8 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
|||
// output, which needs to be bound in the actual instruction compilation.
|
||||
// TODO: make this smarter in the case that we're actually register-starved, i.e.
|
||||
// prioritize the more important registers.
|
||||
for (int reg : op.regsIn)
|
||||
{
|
||||
if (gpr.NumFreeRegisters() < 2)
|
||||
break;
|
||||
if (op.gprInReg[reg] && !gpr.R(reg).IsImm())
|
||||
gpr.BindToRegister(reg, true, false);
|
||||
}
|
||||
for (int reg : op.fregsIn)
|
||||
{
|
||||
if (fpr.NumFreeRegisters() < 2)
|
||||
break;
|
||||
if (op.fprInXmm[reg])
|
||||
fpr.BindToRegister(reg, true, false);
|
||||
}
|
||||
gpr.PreloadRegisters(op.regsIn & op.gprInReg);
|
||||
fpr.PreloadRegisters(op.fregsIn & op.fprInXmm);
|
||||
|
||||
CompileInstruction(op);
|
||||
|
||||
|
@ -908,24 +903,25 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
|||
m_exception_handler_at_loc[js.fastmemLoadStore] = GetWritableCodePtr();
|
||||
}
|
||||
|
||||
BitSet32 gprToFlush = BitSet32::AllTrue(32);
|
||||
BitSet32 fprToFlush = BitSet32::AllTrue(32);
|
||||
if (js.revertGprLoad >= 0)
|
||||
gprToFlush[js.revertGprLoad] = false;
|
||||
if (js.revertFprLoad >= 0)
|
||||
fprToFlush[js.revertFprLoad] = false;
|
||||
gpr.Flush(RegCache::FlushMode::MaintainState, gprToFlush);
|
||||
fpr.Flush(RegCache::FlushMode::MaintainState, fprToFlush);
|
||||
RCForkGuard gpr_guard = gpr.Fork();
|
||||
RCForkGuard fpr_guard = fpr.Fork();
|
||||
|
||||
gpr.Revert();
|
||||
fpr.Revert();
|
||||
gpr.Flush();
|
||||
fpr.Flush();
|
||||
|
||||
MOV(32, PPCSTATE(pc), Imm32(op.address));
|
||||
WriteExceptionExit();
|
||||
SwitchToNearCode();
|
||||
}
|
||||
|
||||
gpr.Commit();
|
||||
fpr.Commit();
|
||||
|
||||
// If we have a register that will never be used again, flush it.
|
||||
for (int j : ~op.gprInUse)
|
||||
gpr.StoreFromRegister(j);
|
||||
for (int j : ~op.fprInUse)
|
||||
fpr.StoreFromRegister(j);
|
||||
gpr.Flush(~op.gprInUse);
|
||||
fpr.Flush(~op.fprInUse);
|
||||
|
||||
if (opinfo->flags & FL_LOADSTORE)
|
||||
++js.numLoadStoreInst;
|
||||
|
@ -969,15 +965,8 @@ BitSet8 Jit64::ComputeStaticGQRs(const PPCAnalyst::CodeBlock& cb) const
|
|||
|
||||
BitSet32 Jit64::CallerSavedRegistersInUse() const
|
||||
{
|
||||
BitSet32 result;
|
||||
for (size_t i = 0; i < RegCache::NUM_XREGS; i++)
|
||||
{
|
||||
if (!gpr.IsFreeX(i))
|
||||
result[i] = true;
|
||||
if (!fpr.IsFreeX(i))
|
||||
result[16 + i] = true;
|
||||
}
|
||||
return result & ABI_ALL_CALLER_SAVED;
|
||||
BitSet32 in_use = gpr.RegistersInUse() | (fpr.RegistersInUse() << 16);
|
||||
return in_use & ABI_ALL_CALLER_SAVED;
|
||||
}
|
||||
|
||||
void Jit64::EnableBlockLink()
|
||||
|
|
|
@ -21,10 +21,10 @@
|
|||
#include "Common/CommonTypes.h"
|
||||
#include "Common/x64ABI.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/Jit64/FPURegCache.h"
|
||||
#include "Core/PowerPC/Jit64/GPRRegCache.h"
|
||||
#include "Core/PowerPC/Jit64/JitAsm.h"
|
||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/FPURegCache.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/GPRRegCache.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64Base.h"
|
||||
#include "Core/PowerPC/JitCommon/JitCache.h"
|
||||
|
||||
|
@ -88,10 +88,8 @@ public:
|
|||
void FinalizeCarryOverflow(bool oe, bool inv = false);
|
||||
void FinalizeCarry(Gen::CCFlags cond);
|
||||
void FinalizeCarry(bool ca);
|
||||
void ComputeRC(const Gen::OpArg& arg, bool needs_test = true, bool needs_sext = true);
|
||||
void ComputeRC(preg_t preg, bool needs_test = true, bool needs_sext = true);
|
||||
|
||||
// Use to extract bytes from a register using the regcache. offset is in bytes.
|
||||
Gen::OpArg ExtractFromReg(int reg, int offset);
|
||||
void AndWithMask(Gen::X64Reg reg, u32 mask);
|
||||
bool CheckMergedBranch(u32 crf) const;
|
||||
void DoMergedBranch();
|
||||
|
|
|
@ -1,324 +0,0 @@
|
|||
// Copyright 2008 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cinttypes>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
|
||||
#include "Common/Assert.h"
|
||||
#include "Common/BitSet.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/MsgHandler.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/Jit64/Jit.h"
|
||||
#include "Core/PowerPC/PowerPC.h"
|
||||
|
||||
using namespace Gen;
|
||||
using namespace PowerPC;
|
||||
|
||||
RegCache::RegCache(Jit64& jit) : m_jit{jit}
|
||||
{
|
||||
}
|
||||
|
||||
void RegCache::Start()
|
||||
{
|
||||
m_xregs.fill({});
|
||||
for (size_t i = 0; i < m_regs.size(); i++)
|
||||
{
|
||||
m_regs[i] = PPCCachedReg{GetDefaultLocation(i)};
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::DiscardRegContentsIfCached(preg_t preg)
|
||||
{
|
||||
if (m_regs[preg].IsBound())
|
||||
{
|
||||
X64Reg xr = m_regs[preg].Location().GetSimpleReg();
|
||||
m_xregs[xr].SetFlushed();
|
||||
m_regs[preg].SetFlushed();
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::SetEmitter(XEmitter* emitter)
|
||||
{
|
||||
m_emitter = emitter;
|
||||
}
|
||||
|
||||
void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush)
|
||||
{
|
||||
ASSERT_MSG(
|
||||
DYNA_REC,
|
||||
std::none_of(m_xregs.begin(), m_xregs.end(), [](const auto& x) { return x.IsLocked(); }),
|
||||
"Someone forgot to unlock a X64 reg");
|
||||
|
||||
for (unsigned int i : regsToFlush)
|
||||
{
|
||||
ASSERT_MSG(DYNA_REC, !m_regs[i].IsLocked(), "Someone forgot to unlock PPC reg %u (X64 reg %i).",
|
||||
i, RX(i));
|
||||
|
||||
switch (m_regs[i].GetLocationType())
|
||||
{
|
||||
case PPCCachedReg::LocationType::Default:
|
||||
break;
|
||||
case PPCCachedReg::LocationType::SpeculativeImmediate:
|
||||
// We can have a cached value without a host register through speculative constants.
|
||||
// It must be cleared when flushing, otherwise it may be out of sync with PPCSTATE,
|
||||
// if PPCSTATE is modified externally (e.g. fallback to interpreter).
|
||||
m_regs[i].SetFlushed();
|
||||
break;
|
||||
case PPCCachedReg::LocationType::Bound:
|
||||
case PPCCachedReg::LocationType::Immediate:
|
||||
StoreFromRegister(i, mode);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::FlushLockX(X64Reg reg)
|
||||
{
|
||||
FlushX(reg);
|
||||
LockX(reg);
|
||||
}
|
||||
|
||||
void RegCache::FlushLockX(X64Reg reg1, X64Reg reg2)
|
||||
{
|
||||
FlushX(reg1);
|
||||
FlushX(reg2);
|
||||
LockX(reg1);
|
||||
LockX(reg2);
|
||||
}
|
||||
|
||||
bool RegCache::SanityCheck() const
|
||||
{
|
||||
for (size_t i = 0; i < m_regs.size(); i++)
|
||||
{
|
||||
switch (m_regs[i].GetLocationType())
|
||||
{
|
||||
case PPCCachedReg::LocationType::Default:
|
||||
case PPCCachedReg::LocationType::SpeculativeImmediate:
|
||||
case PPCCachedReg::LocationType::Immediate:
|
||||
break;
|
||||
case PPCCachedReg::LocationType::Bound:
|
||||
{
|
||||
if (m_regs[i].IsLocked())
|
||||
return false;
|
||||
|
||||
Gen::X64Reg xr = m_regs[i].Location().GetSimpleReg();
|
||||
if (m_xregs[xr].IsLocked())
|
||||
return false;
|
||||
if (m_xregs[xr].Contents() != i)
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void RegCache::KillImmediate(preg_t preg, bool doLoad, bool makeDirty)
|
||||
{
|
||||
switch (m_regs[preg].GetLocationType())
|
||||
{
|
||||
case PPCCachedReg::LocationType::Default:
|
||||
case PPCCachedReg::LocationType::SpeculativeImmediate:
|
||||
break;
|
||||
case PPCCachedReg::LocationType::Bound:
|
||||
if (makeDirty)
|
||||
m_xregs[RX(preg)].MakeDirty();
|
||||
break;
|
||||
case PPCCachedReg::LocationType::Immediate:
|
||||
BindToRegister(preg, doLoad, makeDirty);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::BindToRegister(preg_t i, bool doLoad, bool makeDirty)
|
||||
{
|
||||
if (!m_regs[i].IsBound())
|
||||
{
|
||||
X64Reg xr = GetFreeXReg();
|
||||
|
||||
ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsDirty(), "Xreg %i already dirty", xr);
|
||||
ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsLocked(), "GetFreeXReg returned locked register");
|
||||
|
||||
m_xregs[xr].SetBoundTo(i, makeDirty || m_regs[i].IsAway());
|
||||
|
||||
if (doLoad)
|
||||
{
|
||||
LoadRegister(i, xr);
|
||||
}
|
||||
|
||||
ASSERT_MSG(DYNA_REC,
|
||||
std::none_of(m_regs.begin(), m_regs.end(),
|
||||
[xr](const auto& r) { return r.Location().IsSimpleReg(xr); }),
|
||||
"Xreg %i already bound", xr);
|
||||
|
||||
m_regs[i].SetBoundTo(xr);
|
||||
}
|
||||
else
|
||||
{
|
||||
// reg location must be simplereg; memory locations
|
||||
// and immediates are taken care of above.
|
||||
if (makeDirty)
|
||||
m_xregs[RX(i)].MakeDirty();
|
||||
}
|
||||
|
||||
ASSERT_MSG(DYNA_REC, !m_xregs[RX(i)].IsLocked(), "WTF, this reg should have been flushed");
|
||||
}
|
||||
|
||||
void RegCache::StoreFromRegister(preg_t i, FlushMode mode)
|
||||
{
|
||||
bool doStore = false;
|
||||
|
||||
switch (m_regs[i].GetLocationType())
|
||||
{
|
||||
case PPCCachedReg::LocationType::Default:
|
||||
case PPCCachedReg::LocationType::SpeculativeImmediate:
|
||||
return;
|
||||
case PPCCachedReg::LocationType::Bound:
|
||||
{
|
||||
X64Reg xr = RX(i);
|
||||
doStore = m_xregs[xr].IsDirty();
|
||||
if (mode == FlushMode::All)
|
||||
m_xregs[xr].SetFlushed();
|
||||
break;
|
||||
}
|
||||
case PPCCachedReg::LocationType::Immediate:
|
||||
doStore = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (doStore)
|
||||
StoreRegister(i, GetDefaultLocation(i));
|
||||
if (mode == FlushMode::All)
|
||||
m_regs[i].SetFlushed();
|
||||
}
|
||||
|
||||
const OpArg& RegCache::R(preg_t preg) const
|
||||
{
|
||||
return m_regs[preg].Location();
|
||||
}
|
||||
|
||||
X64Reg RegCache::RX(preg_t preg) const
|
||||
{
|
||||
ASSERT_MSG(DYNA_REC, m_regs[preg].IsBound(), "Unbound register - %zu", preg);
|
||||
return m_regs[preg].Location().GetSimpleReg();
|
||||
}
|
||||
|
||||
void RegCache::UnlockAll()
|
||||
{
|
||||
for (auto& reg : m_regs)
|
||||
reg.Unlock();
|
||||
}
|
||||
|
||||
void RegCache::UnlockAllX()
|
||||
{
|
||||
for (auto& xreg : m_xregs)
|
||||
xreg.Unlock();
|
||||
}
|
||||
|
||||
bool RegCache::IsFreeX(size_t xreg) const
|
||||
{
|
||||
return m_xregs[xreg].IsFree();
|
||||
}
|
||||
|
||||
X64Reg RegCache::GetFreeXReg()
|
||||
{
|
||||
size_t aCount;
|
||||
const X64Reg* aOrder = GetAllocationOrder(&aCount);
|
||||
for (size_t i = 0; i < aCount; i++)
|
||||
{
|
||||
X64Reg xr = aOrder[i];
|
||||
if (m_xregs[xr].IsFree())
|
||||
{
|
||||
return xr;
|
||||
}
|
||||
}
|
||||
|
||||
// Okay, not found; run the register allocator heuristic and figure out which register we should
|
||||
// clobber.
|
||||
float min_score = std::numeric_limits<float>::max();
|
||||
X64Reg best_xreg = INVALID_REG;
|
||||
size_t best_preg = 0;
|
||||
for (size_t i = 0; i < aCount; i++)
|
||||
{
|
||||
X64Reg xreg = (X64Reg)aOrder[i];
|
||||
preg_t preg = m_xregs[xreg].Contents();
|
||||
if (m_xregs[xreg].IsLocked() || m_regs[preg].IsLocked())
|
||||
continue;
|
||||
float score = ScoreRegister(xreg);
|
||||
if (score < min_score)
|
||||
{
|
||||
min_score = score;
|
||||
best_xreg = xreg;
|
||||
best_preg = preg;
|
||||
}
|
||||
}
|
||||
|
||||
if (best_xreg != INVALID_REG)
|
||||
{
|
||||
StoreFromRegister(best_preg);
|
||||
return best_xreg;
|
||||
}
|
||||
|
||||
// Still no dice? Die!
|
||||
ASSERT_MSG(DYNA_REC, false, "Regcache ran out of regs");
|
||||
return INVALID_REG;
|
||||
}
|
||||
|
||||
int RegCache::NumFreeRegisters() const
|
||||
{
|
||||
int count = 0;
|
||||
size_t aCount;
|
||||
const X64Reg* aOrder = GetAllocationOrder(&aCount);
|
||||
for (size_t i = 0; i < aCount; i++)
|
||||
if (m_xregs[aOrder[i]].IsFree())
|
||||
count++;
|
||||
return count;
|
||||
}
|
||||
|
||||
void RegCache::FlushX(X64Reg reg)
|
||||
{
|
||||
ASSERT_MSG(DYNA_REC, reg < m_xregs.size(), "Flushing non-existent reg %i", reg);
|
||||
ASSERT(!m_xregs[reg].IsLocked());
|
||||
if (!m_xregs[reg].IsFree())
|
||||
{
|
||||
StoreFromRegister(m_xregs[reg].Contents());
|
||||
}
|
||||
}
|
||||
|
||||
// Estimate roughly how bad it would be to de-allocate this register. Higher score
|
||||
// means more bad.
|
||||
float RegCache::ScoreRegister(X64Reg xreg) const
|
||||
{
|
||||
preg_t preg = m_xregs[xreg].Contents();
|
||||
float score = 0;
|
||||
|
||||
// If it's not dirty, we don't need a store to write it back to the register file, so
|
||||
// bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly
|
||||
// right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative
|
||||
// to the number of extra stores it causes.
|
||||
if (m_xregs[xreg].IsDirty())
|
||||
score += 2;
|
||||
|
||||
// If the register isn't actually needed in a physical register for a later instruction,
|
||||
// writing it back to the register file isn't quite as bad.
|
||||
if (GetRegUtilization()[preg])
|
||||
{
|
||||
// Don't look too far ahead; we don't want to have quadratic compilation times for
|
||||
// enormous block sizes!
|
||||
// This actually improves register allocation a tiny bit; I'm not sure why.
|
||||
u32 lookahead = std::min(m_jit.js.instructionsLeft, 64);
|
||||
// Count how many other registers are going to be used before we need this one again.
|
||||
u32 regs_in_count = CountRegsIn(preg, lookahead).Count();
|
||||
// Totally ad-hoc heuristic to bias based on how many other registers we'll need
|
||||
// before this one gets used again.
|
||||
score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count));
|
||||
}
|
||||
|
||||
return score;
|
||||
}
|
|
@ -1,230 +0,0 @@
|
|||
// Copyright 2008 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cinttypes>
|
||||
|
||||
#include "Common/Assert.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/PPCAnalyst.h"
|
||||
|
||||
class Jit64;
|
||||
|
||||
using preg_t = size_t;
|
||||
|
||||
class PPCCachedReg
|
||||
{
|
||||
public:
|
||||
enum class LocationType
|
||||
{
|
||||
/// Value is currently at its default location
|
||||
Default,
|
||||
/// Value is currently bound to a x64 register
|
||||
Bound,
|
||||
/// Value is known as an immediate and has not been written back to its default location
|
||||
Immediate,
|
||||
/// Value is known as an immediate and is already present at its default location
|
||||
SpeculativeImmediate,
|
||||
};
|
||||
|
||||
PPCCachedReg() = default;
|
||||
|
||||
explicit PPCCachedReg(Gen::OpArg default_location_)
|
||||
: default_location(default_location_), location(default_location_)
|
||||
{
|
||||
}
|
||||
|
||||
const Gen::OpArg& Location() const { return location; }
|
||||
|
||||
LocationType GetLocationType() const
|
||||
{
|
||||
if (!away)
|
||||
{
|
||||
if (location.IsImm())
|
||||
return LocationType::SpeculativeImmediate;
|
||||
|
||||
ASSERT(location == default_location);
|
||||
return LocationType::Default;
|
||||
}
|
||||
|
||||
ASSERT(location.IsImm() || location.IsSimpleReg());
|
||||
return location.IsImm() ? LocationType::Immediate : LocationType::Bound;
|
||||
}
|
||||
|
||||
bool IsAway() const { return away; }
|
||||
bool IsBound() const { return GetLocationType() == LocationType::Bound; }
|
||||
|
||||
void SetBoundTo(Gen::X64Reg xreg)
|
||||
{
|
||||
away = true;
|
||||
location = Gen::R(xreg);
|
||||
}
|
||||
|
||||
void SetFlushed()
|
||||
{
|
||||
away = false;
|
||||
location = default_location;
|
||||
}
|
||||
|
||||
void SetToImm32(u32 imm32, bool dirty = true)
|
||||
{
|
||||
away |= dirty;
|
||||
location = Gen::Imm32(imm32);
|
||||
}
|
||||
|
||||
bool IsLocked() const { return locked; }
|
||||
void Lock() { locked = true; }
|
||||
void Unlock() { locked = false; }
|
||||
|
||||
private:
|
||||
Gen::OpArg default_location{};
|
||||
Gen::OpArg location{};
|
||||
bool away = false; // value not in source register
|
||||
bool locked = false;
|
||||
};
|
||||
|
||||
class X64CachedReg
|
||||
{
|
||||
public:
|
||||
preg_t Contents() const { return ppcReg; }
|
||||
|
||||
void SetBoundTo(preg_t ppcReg_, bool dirty_)
|
||||
{
|
||||
free = false;
|
||||
ppcReg = ppcReg_;
|
||||
dirty = dirty_;
|
||||
}
|
||||
|
||||
void SetFlushed()
|
||||
{
|
||||
ppcReg = static_cast<preg_t>(Gen::INVALID_REG);
|
||||
free = true;
|
||||
dirty = false;
|
||||
}
|
||||
|
||||
bool IsFree() const { return free && !locked; }
|
||||
|
||||
bool IsDirty() const { return dirty; }
|
||||
void MakeDirty() { dirty = true; }
|
||||
|
||||
bool IsLocked() const { return locked; }
|
||||
void Lock() { locked = true; }
|
||||
void Unlock() { locked = false; }
|
||||
|
||||
private:
|
||||
preg_t ppcReg = static_cast<preg_t>(Gen::INVALID_REG);
|
||||
bool free = true;
|
||||
bool dirty = false;
|
||||
bool locked = false;
|
||||
};
|
||||
|
||||
class RegCache
|
||||
{
|
||||
public:
|
||||
enum class FlushMode
|
||||
{
|
||||
All,
|
||||
MaintainState,
|
||||
};
|
||||
|
||||
static constexpr size_t NUM_XREGS = 16;
|
||||
|
||||
explicit RegCache(Jit64& jit);
|
||||
virtual ~RegCache() = default;
|
||||
|
||||
virtual Gen::OpArg GetDefaultLocation(preg_t preg) const = 0;
|
||||
|
||||
void Start();
|
||||
|
||||
void DiscardRegContentsIfCached(preg_t preg);
|
||||
void SetEmitter(Gen::XEmitter* emitter);
|
||||
|
||||
void Flush(FlushMode mode = FlushMode::All, BitSet32 regsToFlush = BitSet32::AllTrue(32));
|
||||
|
||||
void FlushLockX(Gen::X64Reg reg);
|
||||
void FlushLockX(Gen::X64Reg reg1, Gen::X64Reg reg2);
|
||||
|
||||
bool SanityCheck() const;
|
||||
void KillImmediate(preg_t preg, bool doLoad, bool makeDirty);
|
||||
|
||||
// TODO - instead of doload, use "read", "write"
|
||||
// read only will not set dirty flag
|
||||
void BindToRegister(preg_t preg, bool doLoad = true, bool makeDirty = true);
|
||||
void StoreFromRegister(preg_t preg, FlushMode mode = FlushMode::All);
|
||||
|
||||
const Gen::OpArg& R(preg_t preg) const;
|
||||
Gen::X64Reg RX(preg_t preg) const;
|
||||
|
||||
// Register locking.
|
||||
|
||||
// these are powerpc reg indices
|
||||
template <typename T>
|
||||
void Lock(T p)
|
||||
{
|
||||
m_regs[p].Lock();
|
||||
}
|
||||
template <typename T, typename... Args>
|
||||
void Lock(T first, Args... args)
|
||||
{
|
||||
Lock(first);
|
||||
Lock(args...);
|
||||
}
|
||||
|
||||
// these are x64 reg indices
|
||||
template <typename T>
|
||||
void LockX(T x)
|
||||
{
|
||||
if (m_xregs[x].IsLocked())
|
||||
PanicAlert("RegCache: x %i already locked!", x);
|
||||
m_xregs[x].Lock();
|
||||
}
|
||||
template <typename T, typename... Args>
|
||||
void LockX(T first, Args... args)
|
||||
{
|
||||
LockX(first);
|
||||
LockX(args...);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void UnlockX(T x)
|
||||
{
|
||||
if (!m_xregs[x].IsLocked())
|
||||
PanicAlert("RegCache: x %i already unlocked!", x);
|
||||
m_xregs[x].Unlock();
|
||||
}
|
||||
template <typename T, typename... Args>
|
||||
void UnlockX(T first, Args... args)
|
||||
{
|
||||
UnlockX(first);
|
||||
UnlockX(args...);
|
||||
}
|
||||
|
||||
void UnlockAll();
|
||||
void UnlockAllX();
|
||||
|
||||
bool IsFreeX(size_t xreg) const;
|
||||
|
||||
Gen::X64Reg GetFreeXReg();
|
||||
int NumFreeRegisters() const;
|
||||
|
||||
protected:
|
||||
virtual void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) = 0;
|
||||
virtual void LoadRegister(preg_t preg, Gen::X64Reg new_loc) = 0;
|
||||
|
||||
virtual const Gen::X64Reg* GetAllocationOrder(size_t* count) const = 0;
|
||||
|
||||
virtual BitSet32 GetRegUtilization() const = 0;
|
||||
virtual BitSet32 CountRegsIn(preg_t preg, u32 lookahead) const = 0;
|
||||
|
||||
void FlushX(Gen::X64Reg reg);
|
||||
|
||||
float ScoreRegister(Gen::X64Reg xreg) const;
|
||||
|
||||
Jit64& m_jit;
|
||||
std::array<PPCCachedReg, 32> m_regs;
|
||||
std::array<X64CachedReg, NUM_XREGS> m_xregs;
|
||||
Gen::XEmitter* m_emitter = nullptr;
|
||||
};
|
|
@ -8,7 +8,7 @@
|
|||
#include "Core/CoreTiming.h"
|
||||
#include "Core/PowerPC/Gekko.h"
|
||||
#include "Core/PowerPC/Jit64/Jit.h"
|
||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
|
||||
#include "Core/PowerPC/PPCAnalyst.h"
|
||||
#include "Core/PowerPC/PowerPC.h"
|
||||
|
@ -160,9 +160,13 @@ void Jit64::bcx(UGeckoInstruction inst)
|
|||
else
|
||||
destination = js.compilerPC + SignExt16(inst.BD << 2);
|
||||
|
||||
gpr.Flush(RegCache::FlushMode::MaintainState);
|
||||
fpr.Flush(RegCache::FlushMode::MaintainState);
|
||||
WriteExit(destination, inst.LK, js.compilerPC + 4);
|
||||
{
|
||||
RCForkGuard gpr_guard = gpr.Fork();
|
||||
RCForkGuard fpr_guard = fpr.Fork();
|
||||
gpr.Flush();
|
||||
fpr.Flush();
|
||||
WriteExit(destination, inst.LK, js.compilerPC + 4);
|
||||
}
|
||||
|
||||
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
|
||||
SetJumpTarget(pConditionDontBranch);
|
||||
|
@ -215,10 +219,14 @@ void Jit64::bcctrx(UGeckoInstruction inst)
|
|||
if (inst.LK_3)
|
||||
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4;
|
||||
|
||||
gpr.Flush(RegCache::FlushMode::MaintainState);
|
||||
fpr.Flush(RegCache::FlushMode::MaintainState);
|
||||
WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4);
|
||||
// Would really like to continue the block here, but it ends. TODO.
|
||||
{
|
||||
RCForkGuard gpr_guard = gpr.Fork();
|
||||
RCForkGuard fpr_guard = fpr.Fork();
|
||||
gpr.Flush();
|
||||
fpr.Flush();
|
||||
WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4);
|
||||
// Would really like to continue the block here, but it ends. TODO.
|
||||
}
|
||||
SetJumpTarget(b);
|
||||
|
||||
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
|
||||
|
@ -269,9 +277,13 @@ void Jit64::bclrx(UGeckoInstruction inst)
|
|||
if (inst.LK)
|
||||
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4));
|
||||
|
||||
gpr.Flush(RegCache::FlushMode::MaintainState);
|
||||
fpr.Flush(RegCache::FlushMode::MaintainState);
|
||||
WriteBLRExit();
|
||||
{
|
||||
RCForkGuard gpr_guard = gpr.Fork();
|
||||
RCForkGuard fpr_guard = fpr.Fork();
|
||||
gpr.Flush();
|
||||
fpr.Flush();
|
||||
WriteBLRExit();
|
||||
}
|
||||
|
||||
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
|
||||
SetJumpTarget(pConditionDontBranch);
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
#include "Core/ConfigManager.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/PowerPC/Jit64/Jit.h"
|
||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
|
||||
#include "Core/PowerPC/PPCAnalyst.h"
|
||||
#include "Core/PowerPC/PowerPC.h"
|
||||
|
@ -76,7 +76,9 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
|
|||
std::vector<FixupBranch> fixups;
|
||||
for (u32 x : inputs)
|
||||
{
|
||||
MOVDDUP(xmm, fpr.R(x));
|
||||
RCOpArg Rx = fpr.Use(x, RCMode::Read);
|
||||
RegCache::Realize(Rx);
|
||||
MOVDDUP(xmm, Rx);
|
||||
UCOMISD(xmm, R(xmm));
|
||||
fixups.push_back(J_CC(CC_P));
|
||||
}
|
||||
|
@ -102,8 +104,10 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
|
|||
BLENDVPD(xmm, MConst(psGeneratedQNaN));
|
||||
for (u32 x : inputs)
|
||||
{
|
||||
avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, fpr.R(x), fpr.R(x), CMP_UNORD);
|
||||
BLENDVPD(xmm, fpr.R(x));
|
||||
RCOpArg Rx = fpr.Use(x, RCMode::Read);
|
||||
RegCache::Realize(Rx);
|
||||
avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, Rx, Rx, CMP_UNORD);
|
||||
BLENDVPD(xmm, Rx);
|
||||
}
|
||||
FixupBranch done = J(true);
|
||||
SwitchToNearCode();
|
||||
|
@ -112,8 +116,8 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
|
|||
else
|
||||
{
|
||||
// SSE2 fallback
|
||||
X64Reg tmp = fpr.GetFreeXReg();
|
||||
fpr.FlushLockX(tmp);
|
||||
RCX64Reg tmp = fpr.Scratch();
|
||||
RegCache::Realize(tmp);
|
||||
MOVAPD(clobber, R(xmm));
|
||||
CMPPD(clobber, R(clobber), CMP_UNORD);
|
||||
MOVMSKPD(RSCRATCH, R(clobber));
|
||||
|
@ -125,20 +129,21 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
|
|||
ANDNPD(clobber, R(xmm));
|
||||
ANDPD(tmp, MConst(psGeneratedQNaN));
|
||||
ORPD(tmp, R(clobber));
|
||||
MOVAPD(xmm, R(tmp));
|
||||
MOVAPD(xmm, tmp);
|
||||
for (u32 x : inputs)
|
||||
{
|
||||
MOVAPD(clobber, fpr.R(x));
|
||||
RCOpArg Rx = fpr.Use(x, RCMode::Read);
|
||||
RegCache::Realize(Rx);
|
||||
MOVAPD(clobber, Rx);
|
||||
CMPPD(clobber, R(clobber), CMP_ORD);
|
||||
MOVAPD(tmp, R(clobber));
|
||||
ANDNPD(clobber, fpr.R(x));
|
||||
ANDPD(xmm, R(tmp));
|
||||
ANDNPD(clobber, Rx);
|
||||
ANDPD(xmm, tmp);
|
||||
ORPD(xmm, R(clobber));
|
||||
}
|
||||
FixupBranch done = J(true);
|
||||
SwitchToNearCode();
|
||||
SetJumpTarget(done);
|
||||
fpr.UnlockX(tmp);
|
||||
}
|
||||
}
|
||||
if (xmm_out != xmm)
|
||||
|
@ -172,53 +177,55 @@ void Jit64::fp_arith(UGeckoInstruction inst)
|
|||
bool round_input = single && !js.op->fprIsSingle[inst.FC];
|
||||
bool preserve_inputs = SConfig::GetInstance().bAccurateNaNs;
|
||||
|
||||
const auto fp_tri_op = [&](int d, int a, int b, bool reversible,
|
||||
const auto fp_tri_op = [&](int op1, int op2, bool reversible,
|
||||
void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&),
|
||||
void (XEmitter::*sseOp)(X64Reg, const OpArg&), bool roundRHS = false) {
|
||||
fpr.Lock(d, a, b);
|
||||
fpr.BindToRegister(d, d == a || d == b || !single);
|
||||
X64Reg dest = preserve_inputs ? XMM1 : fpr.RX(d);
|
||||
RCX64Reg Rd = fpr.Bind(d, !single ? RCMode::ReadWrite : RCMode::Write);
|
||||
RCOpArg Rop1 = fpr.Use(op1, RCMode::Read);
|
||||
RCOpArg Rop2 = fpr.Use(op2, RCMode::Read);
|
||||
RegCache::Realize(Rd, Rop1, Rop2);
|
||||
|
||||
X64Reg dest = preserve_inputs ? XMM1 : static_cast<X64Reg>(Rd);
|
||||
if (roundRHS)
|
||||
{
|
||||
if (d == a && !preserve_inputs)
|
||||
if (d == op1 && !preserve_inputs)
|
||||
{
|
||||
Force25BitPrecision(XMM0, fpr.R(b), XMM1);
|
||||
(this->*sseOp)(fpr.RX(d), R(XMM0));
|
||||
Force25BitPrecision(XMM0, Rop2, XMM1);
|
||||
(this->*sseOp)(Rd, R(XMM0));
|
||||
}
|
||||
else
|
||||
{
|
||||
Force25BitPrecision(dest, fpr.R(b), XMM0);
|
||||
(this->*sseOp)(dest, fpr.R(a));
|
||||
Force25BitPrecision(dest, Rop2, XMM0);
|
||||
(this->*sseOp)(dest, Rop1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
avx_op(avxOp, sseOp, dest, fpr.R(a), fpr.R(b), packed, reversible);
|
||||
avx_op(avxOp, sseOp, dest, Rop1, Rop2, packed, reversible);
|
||||
}
|
||||
|
||||
HandleNaNs(inst, fpr.RX(d), dest);
|
||||
HandleNaNs(inst, Rd, dest);
|
||||
if (single)
|
||||
ForceSinglePrecision(fpr.RX(d), fpr.R(d), packed, true);
|
||||
SetFPRFIfNeeded(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
ForceSinglePrecision(Rd, Rd, packed, true);
|
||||
SetFPRFIfNeeded(Rd);
|
||||
};
|
||||
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 18:
|
||||
fp_tri_op(d, a, b, false, packed ? &XEmitter::VDIVPD : &XEmitter::VDIVSD,
|
||||
fp_tri_op(a, b, false, packed ? &XEmitter::VDIVPD : &XEmitter::VDIVSD,
|
||||
packed ? &XEmitter::DIVPD : &XEmitter::DIVSD);
|
||||
break;
|
||||
case 20:
|
||||
fp_tri_op(d, a, b, false, packed ? &XEmitter::VSUBPD : &XEmitter::VSUBSD,
|
||||
fp_tri_op(a, b, false, packed ? &XEmitter::VSUBPD : &XEmitter::VSUBSD,
|
||||
packed ? &XEmitter::SUBPD : &XEmitter::SUBSD);
|
||||
break;
|
||||
case 21:
|
||||
fp_tri_op(d, a, b, true, packed ? &XEmitter::VADDPD : &XEmitter::VADDSD,
|
||||
fp_tri_op(a, b, true, packed ? &XEmitter::VADDPD : &XEmitter::VADDSD,
|
||||
packed ? &XEmitter::ADDPD : &XEmitter::ADDSD);
|
||||
break;
|
||||
case 25:
|
||||
fp_tri_op(d, a, c, true, packed ? &XEmitter::VMULPD : &XEmitter::VMULSD,
|
||||
fp_tri_op(a, c, true, packed ? &XEmitter::VMULPD : &XEmitter::VMULSD,
|
||||
packed ? &XEmitter::MULPD : &XEmitter::MULSD, round_input);
|
||||
break;
|
||||
default:
|
||||
|
@ -241,17 +248,32 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||
bool packed = inst.OPCD == 4 || (!cpu_info.bAtom && single && js.op->fprIsDuplicated[a] &&
|
||||
js.op->fprIsDuplicated[b] && js.op->fprIsDuplicated[c]);
|
||||
|
||||
fpr.Lock(a, b, c, d);
|
||||
// While we don't know if any games are actually affected (replays seem to work with all the usual
|
||||
// suspects for desyncing), netplay and other applications need absolute perfect determinism, so
|
||||
// be extra careful and don't use FMA, even if in theory it might be okay.
|
||||
// Note that FMA isn't necessarily less correct (it may actually be closer to correct) compared
|
||||
// to what the Gekko does here; in deterministic mode, the important thing is multiple Dolphin
|
||||
// instances on different computers giving identical results.
|
||||
const bool use_fma = cpu_info.bFMA && !Core::WantsDeterminism();
|
||||
|
||||
// For use_fma == true:
|
||||
// Statistics suggests b is a lot less likely to be unbound in practice, so
|
||||
// if we have to pick one of a or b to bind, let's make it b.
|
||||
RCOpArg Ra = fpr.Use(a, RCMode::Read);
|
||||
RCOpArg Rb = use_fma ? fpr.Bind(b, RCMode::Read) : fpr.Use(b, RCMode::Read);
|
||||
RCOpArg Rc = fpr.Use(c, RCMode::Read);
|
||||
RCX64Reg Rd = fpr.Bind(d, single ? RCMode::Write : RCMode::ReadWrite);
|
||||
RegCache::Realize(Ra, Rb, Rc, Rd);
|
||||
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 14:
|
||||
MOVDDUP(XMM1, fpr.R(c));
|
||||
MOVDDUP(XMM1, Rc);
|
||||
if (round_input)
|
||||
Force25BitPrecision(XMM1, R(XMM1), XMM0);
|
||||
break;
|
||||
case 15:
|
||||
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM1, fpr.R(c), fpr.R(c), 3);
|
||||
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM1, Rc, Rc, 3);
|
||||
if (round_input)
|
||||
Force25BitPrecision(XMM1, R(XMM1), XMM0);
|
||||
break;
|
||||
|
@ -260,38 +282,29 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||
X64Reg tmp1 = special ? XMM0 : XMM1;
|
||||
X64Reg tmp2 = special ? XMM1 : XMM0;
|
||||
if (single && round_input)
|
||||
Force25BitPrecision(tmp1, fpr.R(c), tmp2);
|
||||
Force25BitPrecision(tmp1, Rc, tmp2);
|
||||
else
|
||||
MOVAPD(tmp1, fpr.R(c));
|
||||
MOVAPD(tmp1, Rc);
|
||||
break;
|
||||
}
|
||||
|
||||
// While we don't know if any games are actually affected (replays seem to work with all the usual
|
||||
// suspects for desyncing), netplay and other applications need absolute perfect determinism, so
|
||||
// be extra careful and don't use FMA, even if in theory it might be okay.
|
||||
// Note that FMA isn't necessarily less correct (it may actually be closer to correct) compared
|
||||
// to what the Gekko does here; in deterministic mode, the important thing is multiple Dolphin
|
||||
// instances on different computers giving identical results.
|
||||
if (cpu_info.bFMA && !Core::WantsDeterminism())
|
||||
if (use_fma)
|
||||
{
|
||||
// Statistics suggests b is a lot less likely to be unbound in practice, so
|
||||
// if we have to pick one of a or b to bind, let's make it b.
|
||||
fpr.BindToRegister(b, true, false);
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 28: // msub
|
||||
if (packed)
|
||||
VFMSUB132PD(XMM1, fpr.RX(b), fpr.R(a));
|
||||
VFMSUB132PD(XMM1, Rb.GetSimpleReg(), Ra);
|
||||
else
|
||||
VFMSUB132SD(XMM1, fpr.RX(b), fpr.R(a));
|
||||
VFMSUB132SD(XMM1, Rb.GetSimpleReg(), Ra);
|
||||
break;
|
||||
case 14: // madds0
|
||||
case 15: // madds1
|
||||
case 29: // madd
|
||||
if (packed)
|
||||
VFMADD132PD(XMM1, fpr.RX(b), fpr.R(a));
|
||||
VFMADD132PD(XMM1, Rb.GetSimpleReg(), Ra);
|
||||
else
|
||||
VFMADD132SD(XMM1, fpr.RX(b), fpr.R(a));
|
||||
VFMADD132SD(XMM1, Rb.GetSimpleReg(), Ra);
|
||||
break;
|
||||
// PowerPC and x86 define NMADD/NMSUB differently
|
||||
// x86: D = -A*C (+/-) B
|
||||
|
@ -299,15 +312,15 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||
// so we have to swap them; the ADD/SUB here isn't a typo.
|
||||
case 30: // nmsub
|
||||
if (packed)
|
||||
VFNMADD132PD(XMM1, fpr.RX(b), fpr.R(a));
|
||||
VFNMADD132PD(XMM1, Rb.GetSimpleReg(), Ra);
|
||||
else
|
||||
VFNMADD132SD(XMM1, fpr.RX(b), fpr.R(a));
|
||||
VFNMADD132SD(XMM1, Rb.GetSimpleReg(), Ra);
|
||||
break;
|
||||
case 31: // nmadd
|
||||
if (packed)
|
||||
VFNMSUB132PD(XMM1, fpr.RX(b), fpr.R(a));
|
||||
VFNMSUB132PD(XMM1, Rb.GetSimpleReg(), Ra);
|
||||
else
|
||||
VFNMSUB132SD(XMM1, fpr.RX(b), fpr.R(a));
|
||||
VFNMSUB132SD(XMM1, Rb.GetSimpleReg(), Ra);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -315,15 +328,15 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||
{
|
||||
// We implement nmsub a little differently ((b - a*c) instead of -(a*c - b)), so handle it
|
||||
// separately.
|
||||
MOVAPD(XMM1, fpr.R(b));
|
||||
MOVAPD(XMM1, Rb);
|
||||
if (packed)
|
||||
{
|
||||
MULPD(XMM0, fpr.R(a));
|
||||
MULPD(XMM0, Ra);
|
||||
SUBPD(XMM1, R(XMM0));
|
||||
}
|
||||
else
|
||||
{
|
||||
MULSD(XMM0, fpr.R(a));
|
||||
MULSD(XMM0, Ra);
|
||||
SUBSD(XMM1, R(XMM0));
|
||||
}
|
||||
}
|
||||
|
@ -331,36 +344,35 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||
{
|
||||
if (packed)
|
||||
{
|
||||
MULPD(XMM1, fpr.R(a));
|
||||
MULPD(XMM1, Ra);
|
||||
if (inst.SUBOP5 == 28) // msub
|
||||
SUBPD(XMM1, fpr.R(b));
|
||||
SUBPD(XMM1, Rb);
|
||||
else //(n)madd(s[01])
|
||||
ADDPD(XMM1, fpr.R(b));
|
||||
ADDPD(XMM1, Rb);
|
||||
}
|
||||
else
|
||||
{
|
||||
MULSD(XMM1, fpr.R(a));
|
||||
MULSD(XMM1, Ra);
|
||||
if (inst.SUBOP5 == 28)
|
||||
SUBSD(XMM1, fpr.R(b));
|
||||
SUBSD(XMM1, Rb);
|
||||
else
|
||||
ADDSD(XMM1, fpr.R(b));
|
||||
ADDSD(XMM1, Rb);
|
||||
}
|
||||
if (inst.SUBOP5 == 31) // nmadd
|
||||
XORPD(XMM1, MConst(packed ? psSignBits2 : psSignBits));
|
||||
}
|
||||
fpr.BindToRegister(d, !single);
|
||||
|
||||
if (single)
|
||||
{
|
||||
HandleNaNs(inst, fpr.RX(d), XMM1);
|
||||
ForceSinglePrecision(fpr.RX(d), fpr.R(d), packed, true);
|
||||
HandleNaNs(inst, Rd, XMM1);
|
||||
ForceSinglePrecision(Rd, Rd, packed, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
HandleNaNs(inst, XMM1, XMM1);
|
||||
MOVSD(fpr.RX(d), R(XMM1));
|
||||
MOVSD(Rd, R(XMM1));
|
||||
}
|
||||
SetFPRFIfNeeded(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
SetFPRFIfNeeded(Rd);
|
||||
}
|
||||
|
||||
void Jit64::fsign(UGeckoInstruction inst)
|
||||
|
@ -373,29 +385,28 @@ void Jit64::fsign(UGeckoInstruction inst)
|
|||
int b = inst.FB;
|
||||
bool packed = inst.OPCD == 4;
|
||||
|
||||
fpr.Lock(b, d);
|
||||
OpArg src = fpr.R(b);
|
||||
fpr.BindToRegister(d, false);
|
||||
RCOpArg src = fpr.Use(b, RCMode::Read);
|
||||
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
|
||||
RegCache::Realize(src, Rd);
|
||||
|
||||
switch (inst.SUBOP10)
|
||||
{
|
||||
case 40: // neg
|
||||
avx_op(&XEmitter::VXORPD, &XEmitter::XORPD, fpr.RX(d), src,
|
||||
MConst(packed ? psSignBits2 : psSignBits), packed);
|
||||
avx_op(&XEmitter::VXORPD, &XEmitter::XORPD, Rd, src, MConst(packed ? psSignBits2 : psSignBits),
|
||||
packed);
|
||||
break;
|
||||
case 136: // nabs
|
||||
avx_op(&XEmitter::VORPD, &XEmitter::ORPD, fpr.RX(d), src,
|
||||
MConst(packed ? psSignBits2 : psSignBits), packed);
|
||||
avx_op(&XEmitter::VORPD, &XEmitter::ORPD, Rd, src, MConst(packed ? psSignBits2 : psSignBits),
|
||||
packed);
|
||||
break;
|
||||
case 264: // abs
|
||||
avx_op(&XEmitter::VANDPD, &XEmitter::ANDPD, fpr.RX(d), src,
|
||||
MConst(packed ? psAbsMask2 : psAbsMask), packed);
|
||||
avx_op(&XEmitter::VANDPD, &XEmitter::ANDPD, Rd, src, MConst(packed ? psAbsMask2 : psAbsMask),
|
||||
packed);
|
||||
break;
|
||||
default:
|
||||
PanicAlert("fsign bleh");
|
||||
break;
|
||||
}
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::fselx(UGeckoInstruction inst)
|
||||
|
@ -411,35 +422,38 @@ void Jit64::fselx(UGeckoInstruction inst)
|
|||
|
||||
bool packed = inst.OPCD == 4; // ps_sel
|
||||
|
||||
fpr.Lock(a, b, c, d);
|
||||
RCOpArg Ra = fpr.Use(a, RCMode::Read);
|
||||
RCOpArg Rb = fpr.Use(b, RCMode::Read);
|
||||
RCOpArg Rc = fpr.Use(c, RCMode::Read);
|
||||
RCX64Reg Rd = fpr.Bind(d, packed ? RCMode::Write : RCMode::ReadWrite);
|
||||
RegCache::Realize(Ra, Rb, Rc, Rd);
|
||||
|
||||
XORPD(XMM0, R(XMM0));
|
||||
// This condition is very tricky; there's only one right way to handle both the case of
|
||||
// negative/positive zero and NaN properly.
|
||||
// (a >= -0.0 ? c : b) transforms into (0 > a ? b : c), hence the NLE.
|
||||
if (packed)
|
||||
CMPPD(XMM0, fpr.R(a), CMP_NLE);
|
||||
CMPPD(XMM0, Ra, CMP_NLE);
|
||||
else
|
||||
CMPSD(XMM0, fpr.R(a), CMP_NLE);
|
||||
CMPSD(XMM0, Ra, CMP_NLE);
|
||||
|
||||
if (cpu_info.bSSE4_1)
|
||||
{
|
||||
MOVAPD(XMM1, fpr.R(c));
|
||||
BLENDVPD(XMM1, fpr.R(b));
|
||||
MOVAPD(XMM1, Rc);
|
||||
BLENDVPD(XMM1, Rb);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVAPD(XMM1, R(XMM0));
|
||||
ANDPD(XMM0, fpr.R(b));
|
||||
ANDNPD(XMM1, fpr.R(c));
|
||||
ANDPD(XMM0, Rb);
|
||||
ANDNPD(XMM1, Rc);
|
||||
ORPD(XMM1, R(XMM0));
|
||||
}
|
||||
|
||||
fpr.BindToRegister(d, !packed);
|
||||
if (packed)
|
||||
MOVAPD(fpr.RX(d), R(XMM1));
|
||||
MOVAPD(Rd, R(XMM1));
|
||||
else
|
||||
MOVSD(fpr.RX(d), R(XMM1));
|
||||
fpr.UnlockAll();
|
||||
MOVSD(Rd, R(XMM1));
|
||||
}
|
||||
|
||||
void Jit64::fmrx(UGeckoInstruction inst)
|
||||
|
@ -454,26 +468,25 @@ void Jit64::fmrx(UGeckoInstruction inst)
|
|||
if (d == b)
|
||||
return;
|
||||
|
||||
fpr.Lock(b, d);
|
||||
|
||||
if (fpr.R(d).IsSimpleReg())
|
||||
RCOpArg Rd = fpr.Use(d, RCMode::Write);
|
||||
RegCache::Realize(Rd);
|
||||
if (Rd.IsSimpleReg())
|
||||
{
|
||||
// We don't need to load d, but if it is loaded, we need to mark it as dirty.
|
||||
fpr.BindToRegister(d);
|
||||
RCOpArg Rb = fpr.Use(b, RCMode::Read);
|
||||
RegCache::Realize(Rb);
|
||||
// We have to use MOVLPD if b isn't loaded because "MOVSD reg, mem" sets the upper bits (64+)
|
||||
// to zero and we don't want that.
|
||||
if (!fpr.R(b).IsSimpleReg())
|
||||
MOVLPD(fpr.RX(d), fpr.R(b));
|
||||
if (!Rb.IsSimpleReg())
|
||||
MOVLPD(Rd.GetSimpleReg(), Rb);
|
||||
else
|
||||
MOVSD(fpr.R(d), fpr.RX(b));
|
||||
MOVSD(Rd, Rb.GetSimpleReg());
|
||||
}
|
||||
else
|
||||
{
|
||||
fpr.BindToRegister(b, true, false);
|
||||
MOVSD(fpr.R(d), fpr.RX(b));
|
||||
RCOpArg Rb = fpr.Bind(b, RCMode::Read);
|
||||
RegCache::Realize(Rb);
|
||||
MOVSD(Rd, Rb.GetSimpleReg());
|
||||
}
|
||||
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
|
||||
|
@ -500,22 +513,22 @@ void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
|
|||
output[3 - (next.CRBB & 3)] |= 1 << dst;
|
||||
}
|
||||
|
||||
fpr.Lock(a, b);
|
||||
fpr.BindToRegister(b, true, false);
|
||||
RCOpArg Ra = upper ? fpr.Bind(a, RCMode::Read) : fpr.Use(a, RCMode::Read);
|
||||
RCX64Reg Rb = fpr.Bind(b, RCMode::Read);
|
||||
RegCache::Realize(Ra, Rb);
|
||||
|
||||
if (fprf)
|
||||
AND(32, PPCSTATE(fpscr), Imm32(~FPRF_MASK));
|
||||
|
||||
if (upper)
|
||||
{
|
||||
fpr.BindToRegister(a, true, false);
|
||||
MOVHLPS(XMM0, fpr.RX(a));
|
||||
MOVHLPS(XMM1, fpr.RX(b));
|
||||
MOVHLPS(XMM0, Ra.GetSimpleReg());
|
||||
MOVHLPS(XMM1, Rb);
|
||||
UCOMISD(XMM1, R(XMM0));
|
||||
}
|
||||
else
|
||||
{
|
||||
UCOMISD(fpr.RX(b), fpr.R(a));
|
||||
UCOMISD(Rb, Ra);
|
||||
}
|
||||
|
||||
FixupBranch pNaN, pLesser, pGreater;
|
||||
|
@ -572,7 +585,6 @@ void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
|
|||
}
|
||||
|
||||
MOV(64, PPCSTATE(cr_val[crf]), R(RSCRATCH));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::fcmpX(UGeckoInstruction inst)
|
||||
|
@ -591,8 +603,10 @@ void Jit64::fctiwx(UGeckoInstruction inst)
|
|||
|
||||
int d = inst.RD;
|
||||
int b = inst.RB;
|
||||
fpr.Lock(d, b);
|
||||
fpr.BindToRegister(d);
|
||||
|
||||
RCOpArg Rb = fpr.Use(b, RCMode::Read);
|
||||
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
|
||||
RegCache::Realize(Rb, Rd);
|
||||
|
||||
// Intel uses 0x80000000 as a generic error code while PowerPC uses clamping:
|
||||
//
|
||||
|
@ -606,7 +620,7 @@ void Jit64::fctiwx(UGeckoInstruction inst)
|
|||
// except for -0.0 where they are set to 0xfff80001 (TODO).
|
||||
|
||||
MOVAPD(XMM0, MConst(half_qnan_and_s32_max));
|
||||
MINSD(XMM0, fpr.R(b));
|
||||
MINSD(XMM0, Rb);
|
||||
switch (inst.SUBOP10)
|
||||
{
|
||||
// fctiwx
|
||||
|
@ -620,8 +634,7 @@ void Jit64::fctiwx(UGeckoInstruction inst)
|
|||
break;
|
||||
}
|
||||
// d[64+] must not be modified
|
||||
MOVSD(fpr.R(d), XMM0);
|
||||
fpr.UnlockAll();
|
||||
MOVSD(Rd, XMM0);
|
||||
}
|
||||
|
||||
void Jit64::frspx(UGeckoInstruction inst)
|
||||
|
@ -633,12 +646,12 @@ void Jit64::frspx(UGeckoInstruction inst)
|
|||
int d = inst.FD;
|
||||
bool packed = js.op->fprIsDuplicated[b] && !cpu_info.bAtom;
|
||||
|
||||
fpr.Lock(b, d);
|
||||
OpArg src = fpr.R(b);
|
||||
fpr.BindToRegister(d, false);
|
||||
ForceSinglePrecision(fpr.RX(d), src, packed, true);
|
||||
SetFPRFIfNeeded(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
RCOpArg Rb = fpr.Use(b, RCMode::Read);
|
||||
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
|
||||
RegCache::Realize(Rb, Rd);
|
||||
|
||||
ForceSinglePrecision(Rd, Rb, packed, true);
|
||||
SetFPRFIfNeeded(Rd);
|
||||
}
|
||||
|
||||
void Jit64::frsqrtex(UGeckoInstruction inst)
|
||||
|
@ -649,15 +662,15 @@ void Jit64::frsqrtex(UGeckoInstruction inst)
|
|||
int b = inst.FB;
|
||||
int d = inst.FD;
|
||||
|
||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||
fpr.Lock(b, d);
|
||||
fpr.BindToRegister(d);
|
||||
MOVAPD(XMM0, fpr.R(b));
|
||||
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
|
||||
RCOpArg Rb = fpr.Use(b, RCMode::Read);
|
||||
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
|
||||
RegCache::Realize(scratch_guard, Rb, Rd);
|
||||
|
||||
MOVAPD(XMM0, Rb);
|
||||
CALL(asm_routines.frsqrte);
|
||||
MOVSD(fpr.R(d), XMM0);
|
||||
SetFPRFIfNeeded(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
MOVSD(Rd, XMM0);
|
||||
SetFPRFIfNeeded(Rd);
|
||||
}
|
||||
|
||||
void Jit64::fresx(UGeckoInstruction inst)
|
||||
|
@ -668,13 +681,13 @@ void Jit64::fresx(UGeckoInstruction inst)
|
|||
int b = inst.FB;
|
||||
int d = inst.FD;
|
||||
|
||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||
fpr.Lock(b, d);
|
||||
MOVAPD(XMM0, fpr.R(b));
|
||||
fpr.BindToRegister(d, false);
|
||||
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
|
||||
RCOpArg Rb = fpr.Use(b, RCMode::Read);
|
||||
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
|
||||
RegCache::Realize(scratch_guard, Rb, Rd);
|
||||
|
||||
MOVAPD(XMM0, Rb);
|
||||
CALL(asm_routines.fres);
|
||||
MOVDDUP(fpr.RX(d), R(XMM0));
|
||||
SetFPRFIfNeeded(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
MOVDDUP(Rd, R(XMM0));
|
||||
SetFPRFIfNeeded(Rd);
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -18,7 +18,7 @@
|
|||
#include "Core/CoreTiming.h"
|
||||
#include "Core/HW/CPU.h"
|
||||
#include "Core/HW/Memmap.h"
|
||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
|
||||
#include "Core/PowerPC/JitInterface.h"
|
||||
#include "Core/PowerPC/PowerPC.h"
|
||||
|
@ -126,12 +126,14 @@ void Jit64::lXXx(UGeckoInstruction inst)
|
|||
js.op[2].inst.hex == 0x4182fff8)
|
||||
{
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
gpr.BindToRegister(a, true, false);
|
||||
gpr.BindToRegister(d, false, true);
|
||||
SafeLoadToReg(gpr.RX(d), gpr.R(a), accessSize, offset, CallerSavedRegistersInUse(), signExtend);
|
||||
RCX64Reg Ra = gpr.Bind(a, RCMode::Read);
|
||||
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
|
||||
RegCache::Realize(Ra, Rd);
|
||||
|
||||
SafeLoadToReg(Rd, Ra, accessSize, offset, CallerSavedRegistersInUse(), signExtend);
|
||||
|
||||
// if it's still 0, we can wait until the next event
|
||||
TEST(32, gpr.R(d), gpr.R(d));
|
||||
TEST(32, Rd, Rd);
|
||||
FixupBranch noIdle = J_CC(CC_NZ);
|
||||
|
||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||
|
@ -155,7 +157,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
|
|||
// Determine whether this instruction updates inst.RA
|
||||
bool update;
|
||||
if (inst.OPCD == 31)
|
||||
update = ((inst.SUBOP10 & 0x20) != 0) && (!gpr.R(b).IsImm() || gpr.R(b).Imm32() != 0);
|
||||
update = ((inst.SUBOP10 & 0x20) != 0) && (!gpr.IsImm(b) || gpr.Imm32(b) != 0);
|
||||
else
|
||||
update = ((inst.OPCD & 1) != 0) && inst.SIMM_16 != 0;
|
||||
|
||||
|
@ -165,19 +167,20 @@ void Jit64::lXXx(UGeckoInstruction inst)
|
|||
bool storeAddress = false;
|
||||
s32 loadOffset = 0;
|
||||
|
||||
// Prepare result
|
||||
RCX64Reg Rd = jo.memcheck ? gpr.RevertableBind(d, RCMode::Write) : gpr.Bind(d, RCMode::Write);
|
||||
|
||||
// Prepare address operand
|
||||
OpArg opAddress;
|
||||
RCOpArg opAddress;
|
||||
if (!update && !a)
|
||||
{
|
||||
if (indexed)
|
||||
{
|
||||
if (!gpr.R(b).IsImm())
|
||||
gpr.BindToRegister(b, true, false);
|
||||
opAddress = gpr.R(b);
|
||||
opAddress = gpr.BindOrImm(b, RCMode::Read);
|
||||
}
|
||||
else
|
||||
{
|
||||
opAddress = Imm32((u32)(s32)inst.SIMM_16);
|
||||
opAddress = RCOpArg::Imm32((u32)(s32)inst.SIMM_16);
|
||||
}
|
||||
}
|
||||
else if (update && ((a == 0) || (d == a)))
|
||||
|
@ -186,36 +189,40 @@ void Jit64::lXXx(UGeckoInstruction inst)
|
|||
}
|
||||
else
|
||||
{
|
||||
if (!indexed && gpr.R(a).IsImm() && !jo.memcheck)
|
||||
if (!indexed && gpr.IsImm(a) && !jo.memcheck)
|
||||
{
|
||||
u32 val = gpr.R(a).Imm32() + inst.SIMM_16;
|
||||
opAddress = Imm32(val);
|
||||
u32 val = gpr.Imm32(a) + inst.SIMM_16;
|
||||
opAddress = RCOpArg::Imm32(val);
|
||||
if (update)
|
||||
gpr.SetImmediate32(a, val);
|
||||
}
|
||||
else if (indexed && gpr.R(a).IsImm() && gpr.R(b).IsImm() && !jo.memcheck)
|
||||
else if (indexed && gpr.IsImm(a) && gpr.IsImm(b) && !jo.memcheck)
|
||||
{
|
||||
u32 val = gpr.R(a).Imm32() + gpr.R(b).Imm32();
|
||||
opAddress = Imm32(val);
|
||||
u32 val = gpr.Imm32(a) + gpr.Imm32(b);
|
||||
opAddress = RCOpArg::Imm32(val);
|
||||
if (update)
|
||||
gpr.SetImmediate32(a, val);
|
||||
}
|
||||
else
|
||||
{
|
||||
// If we're using reg+reg mode and b is an immediate, pretend we're using constant offset mode
|
||||
bool use_constant_offset = !indexed || gpr.R(b).IsImm();
|
||||
const bool use_constant_offset = !indexed || gpr.IsImm(b);
|
||||
|
||||
s32 offset = 0;
|
||||
if (use_constant_offset)
|
||||
offset = indexed ? gpr.R(b).SImm32() : (s32)inst.SIMM_16;
|
||||
offset = indexed ? gpr.SImm32(b) : (s32)inst.SIMM_16;
|
||||
|
||||
RCOpArg Rb = use_constant_offset ? RCOpArg{} : gpr.Use(b, RCMode::Read);
|
||||
|
||||
// Depending on whether we have an immediate and/or update, find the optimum way to calculate
|
||||
// the load address.
|
||||
if ((update || use_constant_offset) && !jo.memcheck)
|
||||
{
|
||||
gpr.BindToRegister(a, true, update);
|
||||
opAddress = gpr.R(a);
|
||||
opAddress = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read);
|
||||
RegCache::Realize(opAddress, Rb);
|
||||
|
||||
if (!use_constant_offset)
|
||||
ADD(32, opAddress, gpr.R(b));
|
||||
ADD(32, opAddress, Rb);
|
||||
else if (update)
|
||||
ADD(32, opAddress, Imm32((u32)offset));
|
||||
else
|
||||
|
@ -223,51 +230,36 @@ void Jit64::lXXx(UGeckoInstruction inst)
|
|||
}
|
||||
else
|
||||
{
|
||||
// In this case we need an extra temporary register.
|
||||
opAddress = R(RSCRATCH2);
|
||||
storeAddress = true;
|
||||
// In this case we need an extra temporary register.
|
||||
opAddress = RCOpArg::R(RSCRATCH2);
|
||||
RCOpArg Ra = gpr.Use(a, RCMode::Read);
|
||||
RegCache::Realize(opAddress, Ra, Rb);
|
||||
|
||||
if (use_constant_offset)
|
||||
MOV_sum(32, RSCRATCH2, gpr.R(a), Imm32((u32)offset));
|
||||
MOV_sum(32, RSCRATCH2, Ra, Imm32((u32)offset));
|
||||
else
|
||||
MOV_sum(32, RSCRATCH2, gpr.R(a), gpr.R(b));
|
||||
MOV_sum(32, RSCRATCH2, Ra, Rb);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
gpr.Lock(a, b, d);
|
||||
|
||||
if (update && storeAddress)
|
||||
gpr.BindToRegister(a, true, true);
|
||||
|
||||
// A bit of an evil hack here. We need to retain the original value of this register for the
|
||||
// exception path, but we'd rather not needlessly pass it around if we don't have to, since
|
||||
// the exception path is very rare. So we store the value in the regcache, let the load path
|
||||
// clobber it, then restore the value in the exception path.
|
||||
// TODO: no other load has to do this at the moment, since no other loads go directly to the
|
||||
// target registers, but if that ever changes, we need to do it there too.
|
||||
if (jo.memcheck)
|
||||
{
|
||||
gpr.StoreFromRegister(d);
|
||||
js.revertGprLoad = d;
|
||||
}
|
||||
gpr.BindToRegister(d, false, true);
|
||||
RCX64Reg Ra = (update && storeAddress) ? gpr.Bind(a, RCMode::Write) : RCX64Reg{};
|
||||
RegCache::Realize(opAddress, Ra, Rd);
|
||||
|
||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||
// We need to save the (usually scratch) address register for the update.
|
||||
if (update && storeAddress)
|
||||
registersInUse[RSCRATCH2] = true;
|
||||
|
||||
SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend);
|
||||
SafeLoadToReg(Rd, opAddress, accessSize, loadOffset, registersInUse, signExtend);
|
||||
|
||||
if (update && storeAddress)
|
||||
MOV(32, gpr.R(a), opAddress);
|
||||
MOV(32, Ra, opAddress);
|
||||
|
||||
// TODO: support no-swap in SafeLoadToReg instead
|
||||
if (byte_reversed)
|
||||
BSWAP(accessSize, gpr.RX(d));
|
||||
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
BSWAP(accessSize, Rd);
|
||||
}
|
||||
|
||||
void Jit64::dcbx(UGeckoInstruction inst)
|
||||
|
@ -277,10 +269,12 @@ void Jit64::dcbx(UGeckoInstruction inst)
|
|||
|
||||
X64Reg addr = RSCRATCH;
|
||||
X64Reg value = RSCRATCH2;
|
||||
X64Reg tmp = gpr.GetFreeXReg();
|
||||
gpr.FlushLockX(tmp);
|
||||
RCOpArg Ra = inst.RA ? gpr.Use(inst.RA, RCMode::Read) : RCOpArg::Imm32(0);
|
||||
RCOpArg Rb = gpr.Use(inst.RB, RCMode::Read);
|
||||
RCX64Reg tmp = gpr.Scratch();
|
||||
RegCache::Realize(Ra, Rb, tmp);
|
||||
|
||||
MOV_sum(32, addr, inst.RA ? gpr.R(inst.RA) : Imm32(0), gpr.R(inst.RB));
|
||||
MOV_sum(32, addr, Ra, Rb);
|
||||
|
||||
// Check whether a JIT cache line needs to be invalidated.
|
||||
LEA(32, value, MScaled(addr, SCALE_8, 0)); // addr << 3 (masks the first 3 bits)
|
||||
|
@ -305,8 +299,6 @@ void Jit64::dcbx(UGeckoInstruction inst)
|
|||
c = J(true);
|
||||
SwitchToNearCode();
|
||||
SetJumpTarget(c);
|
||||
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit64::dcbt(UGeckoInstruction inst)
|
||||
|
@ -338,10 +330,14 @@ void Jit64::dcbz(UGeckoInstruction inst)
|
|||
int a = inst.RA;
|
||||
int b = inst.RB;
|
||||
|
||||
MOV(32, R(RSCRATCH), gpr.R(b));
|
||||
if (a)
|
||||
ADD(32, R(RSCRATCH), gpr.R(a));
|
||||
AND(32, R(RSCRATCH), Imm32(~31));
|
||||
{
|
||||
RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0);
|
||||
RCOpArg Rb = gpr.Use(b, RCMode::Read);
|
||||
RegCache::Realize(Ra, Rb);
|
||||
|
||||
MOV_sum(32, RSCRATCH, Ra, Rb);
|
||||
AND(32, R(RSCRATCH), Imm32(~31));
|
||||
}
|
||||
|
||||
if (MSR.DR)
|
||||
{
|
||||
|
@ -407,10 +403,14 @@ void Jit64::stX(UGeckoInstruction inst)
|
|||
}
|
||||
|
||||
// If we already know the address of the write
|
||||
if (!a || gpr.R(a).IsImm())
|
||||
if (!a || gpr.IsImm(a))
|
||||
{
|
||||
u32 addr = (a ? gpr.R(a).Imm32() : 0) + offset;
|
||||
bool exception = WriteToConstAddress(accessSize, gpr.R(s), addr, CallerSavedRegistersInUse());
|
||||
const u32 addr = (a ? gpr.Imm32(a) : 0) + offset;
|
||||
const bool exception = [&] {
|
||||
RCOpArg Rs = gpr.Use(s, RCMode::Read);
|
||||
RegCache::Realize(Rs);
|
||||
return WriteToConstAddress(accessSize, Rs, addr, CallerSavedRegistersInUse());
|
||||
}();
|
||||
if (update)
|
||||
{
|
||||
if (!jo.memcheck || !exception)
|
||||
|
@ -419,42 +419,35 @@ void Jit64::stX(UGeckoInstruction inst)
|
|||
}
|
||||
else
|
||||
{
|
||||
gpr.KillImmediate(a, true, true);
|
||||
RCOpArg Ra = gpr.UseNoImm(a, RCMode::ReadWrite);
|
||||
RegCache::Realize(Ra);
|
||||
MemoryExceptionCheck();
|
||||
ADD(32, gpr.R(a), Imm32((u32)offset));
|
||||
ADD(32, Ra, Imm32((u32)offset));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
gpr.Lock(a, s);
|
||||
gpr.BindToRegister(a, true, update);
|
||||
if (gpr.R(s).IsImm())
|
||||
RCX64Reg Ra = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read);
|
||||
RCOpArg reg_value;
|
||||
if (!gpr.IsImm(s) && WriteClobbersRegValue(accessSize, /* swap */ true))
|
||||
{
|
||||
SafeWriteRegToReg(gpr.R(s), gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(),
|
||||
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR);
|
||||
RCOpArg Rs = gpr.Use(s, RCMode::Read);
|
||||
RegCache::Realize(Rs);
|
||||
reg_value = RCOpArg::R(RSCRATCH2);
|
||||
MOV(32, reg_value, Rs);
|
||||
}
|
||||
else
|
||||
{
|
||||
X64Reg reg_value;
|
||||
if (WriteClobbersRegValue(accessSize, /* swap */ true))
|
||||
{
|
||||
MOV(32, R(RSCRATCH2), gpr.R(s));
|
||||
reg_value = RSCRATCH2;
|
||||
}
|
||||
else
|
||||
{
|
||||
gpr.BindToRegister(s, true, false);
|
||||
reg_value = gpr.RX(s);
|
||||
}
|
||||
SafeWriteRegToReg(reg_value, gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(),
|
||||
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR);
|
||||
reg_value = gpr.BindOrImm(s, RCMode::Read);
|
||||
}
|
||||
RegCache::Realize(Ra, reg_value);
|
||||
SafeWriteRegToReg(reg_value, Ra, accessSize, offset, CallerSavedRegistersInUse(),
|
||||
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR);
|
||||
|
||||
if (update)
|
||||
ADD(32, gpr.R(a), Imm32((u32)offset));
|
||||
ADD(32, Ra, Imm32((u32)offset));
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::stXx(UGeckoInstruction inst)
|
||||
|
@ -467,13 +460,6 @@ void Jit64::stXx(UGeckoInstruction inst)
|
|||
bool byte_reverse = !!(inst.SUBOP10 & 512);
|
||||
FALLBACK_IF(!a || (update && a == s) || (update && jo.memcheck && a == b));
|
||||
|
||||
gpr.Lock(a, b, s);
|
||||
|
||||
if (update)
|
||||
gpr.BindToRegister(a, true, true);
|
||||
|
||||
MOV_sum(32, RSCRATCH2, gpr.R(a), gpr.R(b));
|
||||
|
||||
int accessSize;
|
||||
switch (inst.SUBOP10 & ~32)
|
||||
{
|
||||
|
@ -494,39 +480,28 @@ void Jit64::stXx(UGeckoInstruction inst)
|
|||
break;
|
||||
}
|
||||
|
||||
if (gpr.R(s).IsImm())
|
||||
const bool does_clobber = WriteClobbersRegValue(accessSize, /* swap */ !byte_reverse);
|
||||
|
||||
RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read);
|
||||
RCOpArg Rb = gpr.Use(b, RCMode::Read);
|
||||
RCOpArg Rs = does_clobber ? gpr.Use(s, RCMode::Read) : gpr.BindOrImm(s, RCMode::Read);
|
||||
RegCache::Realize(Ra, Rb, Rs);
|
||||
|
||||
MOV_sum(32, RSCRATCH2, Ra, Rb);
|
||||
|
||||
if (!Rs.IsImm() && does_clobber)
|
||||
{
|
||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||
if (update)
|
||||
registersInUse[RSCRATCH2] = true;
|
||||
SafeWriteRegToReg(gpr.R(s), RSCRATCH2, accessSize, 0, registersInUse,
|
||||
byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
X64Reg reg_value;
|
||||
if (WriteClobbersRegValue(accessSize, /* swap */ !byte_reverse))
|
||||
{
|
||||
MOV(32, R(RSCRATCH), gpr.R(s));
|
||||
reg_value = RSCRATCH;
|
||||
}
|
||||
else
|
||||
{
|
||||
gpr.BindToRegister(s, true, false);
|
||||
reg_value = gpr.RX(s);
|
||||
}
|
||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||
if (update)
|
||||
registersInUse[RSCRATCH2] = true;
|
||||
SafeWriteRegToReg(reg_value, RSCRATCH2, accessSize, 0, registersInUse,
|
||||
byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0);
|
||||
MOV(32, R(RSCRATCH), Rs);
|
||||
Rs = RCOpArg::R(RSCRATCH);
|
||||
}
|
||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||
if (update)
|
||||
registersInUse[RSCRATCH2] = true;
|
||||
SafeWriteRegToReg(Rs, RSCRATCH2, accessSize, 0, registersInUse,
|
||||
byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0);
|
||||
|
||||
if (update)
|
||||
MOV(32, gpr.R(a), R(RSCRATCH2));
|
||||
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
MOV(32, Ra, R(RSCRATCH2));
|
||||
}
|
||||
|
||||
// A few games use these heavily in video codecs.
|
||||
|
@ -535,18 +510,22 @@ void Jit64::lmw(UGeckoInstruction inst)
|
|||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreOff);
|
||||
|
||||
int a = inst.RA, d = inst.RD;
|
||||
|
||||
// TODO: This doesn't handle rollback on DSI correctly
|
||||
MOV(32, R(RSCRATCH2), Imm32((u32)(s32)inst.SIMM_16));
|
||||
if (inst.RA)
|
||||
ADD(32, R(RSCRATCH2), gpr.R(inst.RA));
|
||||
for (int i = inst.RD; i < 32; i++)
|
||||
{
|
||||
SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - inst.RD) * 4,
|
||||
CallerSavedRegistersInUse() | BitSet32{RSCRATCH2}, false);
|
||||
gpr.BindToRegister(i, false, true);
|
||||
MOV(32, gpr.R(i), R(RSCRATCH));
|
||||
RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0);
|
||||
RegCache::Realize(Ra);
|
||||
MOV_sum(32, RSCRATCH2, Ra, Imm32((u32)(s32)inst.SIMM_16));
|
||||
}
|
||||
for (int i = d; i < 32; i++)
|
||||
{
|
||||
SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - d) * 4,
|
||||
CallerSavedRegistersInUse() | BitSet32{RSCRATCH2}, false);
|
||||
RCOpArg Ri = gpr.Bind(i, RCMode::Write);
|
||||
RegCache::Realize(Ri);
|
||||
MOV(32, Ri, R(RSCRATCH));
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit64::stmw(UGeckoInstruction inst)
|
||||
|
@ -554,26 +533,27 @@ void Jit64::stmw(UGeckoInstruction inst)
|
|||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreOff);
|
||||
|
||||
int a = inst.RA, d = inst.RD;
|
||||
|
||||
// TODO: This doesn't handle rollback on DSI correctly
|
||||
for (int i = inst.RD; i < 32; i++)
|
||||
for (int i = d; i < 32; i++)
|
||||
{
|
||||
if (inst.RA)
|
||||
MOV(32, R(RSCRATCH), gpr.R(inst.RA));
|
||||
else
|
||||
RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0);
|
||||
RCOpArg Ri = gpr.Use(i, RCMode::Read);
|
||||
RegCache::Realize(Ra, Ri);
|
||||
|
||||
if (Ra.IsZero())
|
||||
XOR(32, R(RSCRATCH), R(RSCRATCH));
|
||||
if (gpr.R(i).IsImm())
|
||||
{
|
||||
SafeWriteRegToReg(gpr.R(i), RSCRATCH, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16,
|
||||
CallerSavedRegistersInUse());
|
||||
}
|
||||
else
|
||||
MOV(32, R(RSCRATCH), Ra);
|
||||
if (!Ri.IsImm())
|
||||
{
|
||||
MOV(32, R(RSCRATCH2), gpr.R(i));
|
||||
SafeWriteRegToReg(RSCRATCH2, RSCRATCH, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16,
|
||||
CallerSavedRegistersInUse());
|
||||
MOV(32, R(RSCRATCH2), Ri);
|
||||
Ri = RCOpArg::R(RSCRATCH2);
|
||||
}
|
||||
SafeWriteRegToReg(Ri, RSCRATCH, 32, (i - d) * 4 + (u32)(s32)inst.SIMM_16,
|
||||
CallerSavedRegistersInUse());
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit64::eieio(UGeckoInstruction inst)
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#include "Common/CommonTypes.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/Jit64/Jit.h"
|
||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
|
||||
|
||||
using namespace Gen;
|
||||
|
@ -30,25 +30,27 @@ void Jit64::lfXXX(UGeckoInstruction inst)
|
|||
|
||||
FALLBACK_IF(!indexed && !a);
|
||||
|
||||
gpr.BindToRegister(a, true, update);
|
||||
|
||||
s32 offset = 0;
|
||||
OpArg addr = gpr.R(a);
|
||||
RCOpArg addr = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read);
|
||||
RegCache::Realize(addr);
|
||||
|
||||
if (update && jo.memcheck)
|
||||
{
|
||||
addr = R(RSCRATCH2);
|
||||
MOV(32, addr, gpr.R(a));
|
||||
MOV(32, R(RSCRATCH2), addr);
|
||||
addr = RCOpArg::R(RSCRATCH2);
|
||||
}
|
||||
if (indexed)
|
||||
{
|
||||
RCOpArg Rb = gpr.Use(b, RCMode::Read);
|
||||
RegCache::Realize(Rb);
|
||||
if (update)
|
||||
{
|
||||
ADD(32, addr, gpr.R(b));
|
||||
ADD(32, addr, Rb);
|
||||
}
|
||||
else
|
||||
{
|
||||
addr = R(RSCRATCH2);
|
||||
MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b));
|
||||
MOV_sum(32, RSCRATCH2, a ? addr.Location() : Imm32(0), Rb);
|
||||
addr = RCOpArg::R(RSCRATCH2);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -59,13 +61,9 @@ void Jit64::lfXXX(UGeckoInstruction inst)
|
|||
offset = (s16)inst.SIMM_16;
|
||||
}
|
||||
|
||||
fpr.Lock(d);
|
||||
if (jo.memcheck && single)
|
||||
{
|
||||
fpr.StoreFromRegister(d);
|
||||
js.revertFprLoad = d;
|
||||
}
|
||||
fpr.BindToRegister(d, !single);
|
||||
RCMode Rd_mode = !single ? RCMode::ReadWrite : RCMode::Write;
|
||||
RCX64Reg Rd = jo.memcheck && single ? fpr.RevertableBind(d, Rd_mode) : fpr.Bind(d, Rd_mode);
|
||||
RegCache::Realize(Rd);
|
||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||
if (update && jo.memcheck)
|
||||
registersInUse[RSCRATCH2] = true;
|
||||
|
@ -73,17 +71,19 @@ void Jit64::lfXXX(UGeckoInstruction inst)
|
|||
|
||||
if (single)
|
||||
{
|
||||
ConvertSingleToDouble(fpr.RX(d), RSCRATCH, true);
|
||||
ConvertSingleToDouble(Rd, RSCRATCH, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVQ_xmm(XMM0, R(RSCRATCH));
|
||||
MOVSD(fpr.RX(d), R(XMM0));
|
||||
MOVSD(Rd, R(XMM0));
|
||||
}
|
||||
if (update && jo.memcheck)
|
||||
MOV(32, gpr.R(a), addr);
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAll();
|
||||
{
|
||||
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
|
||||
RegCache::Realize(Ra);
|
||||
MOV(32, Ra, addr);
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::stfXXX(UGeckoInstruction inst)
|
||||
|
@ -107,26 +107,31 @@ void Jit64::stfXXX(UGeckoInstruction inst)
|
|||
{
|
||||
if (js.op->fprIsStoreSafe[s])
|
||||
{
|
||||
CVTSD2SS(XMM0, fpr.R(s));
|
||||
RCOpArg Rs = fpr.Use(s, RCMode::Read);
|
||||
RegCache::Realize(Rs);
|
||||
CVTSD2SS(XMM0, Rs);
|
||||
}
|
||||
else
|
||||
{
|
||||
fpr.BindToRegister(s, true, false);
|
||||
ConvertDoubleToSingle(XMM0, fpr.RX(s));
|
||||
RCX64Reg Rs = fpr.Bind(s, RCMode::Read);
|
||||
RegCache::Realize(Rs);
|
||||
ConvertDoubleToSingle(XMM0, Rs);
|
||||
}
|
||||
MOVD_xmm(R(RSCRATCH), XMM0);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (fpr.R(s).IsSimpleReg())
|
||||
MOVQ_xmm(R(RSCRATCH), fpr.RX(s));
|
||||
RCOpArg Rs = fpr.Use(s, RCMode::Read);
|
||||
RegCache::Realize(Rs);
|
||||
if (Rs.IsSimpleReg())
|
||||
MOVQ_xmm(R(RSCRATCH), Rs.GetSimpleReg());
|
||||
else
|
||||
MOV(64, R(RSCRATCH), fpr.R(s));
|
||||
MOV(64, R(RSCRATCH), Rs);
|
||||
}
|
||||
|
||||
if (!indexed && (!a || gpr.R(a).IsImm()))
|
||||
if (!indexed && (!a || gpr.IsImm(a)))
|
||||
{
|
||||
u32 addr = (a ? gpr.R(a).Imm32() : 0) + imm;
|
||||
u32 addr = (a ? gpr.Imm32(a) : 0) + imm;
|
||||
bool exception =
|
||||
WriteToConstAddress(accessSize, R(RSCRATCH), addr, CallerSavedRegistersInUse());
|
||||
|
||||
|
@ -138,33 +143,34 @@ void Jit64::stfXXX(UGeckoInstruction inst)
|
|||
}
|
||||
else
|
||||
{
|
||||
gpr.KillImmediate(a, true, true);
|
||||
RCOpArg Ra = gpr.UseNoImm(a, RCMode::ReadWrite);
|
||||
RegCache::Realize(Ra);
|
||||
MemoryExceptionCheck();
|
||||
ADD(32, gpr.R(a), Imm32((u32)imm));
|
||||
ADD(32, Ra, Imm32((u32)imm));
|
||||
}
|
||||
}
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAll();
|
||||
return;
|
||||
}
|
||||
|
||||
s32 offset = 0;
|
||||
if (update)
|
||||
gpr.BindToRegister(a, true, true);
|
||||
RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read);
|
||||
RegCache::Realize(Ra);
|
||||
if (indexed)
|
||||
{
|
||||
MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b));
|
||||
RCOpArg Rb = gpr.Use(b, RCMode::Read);
|
||||
RegCache::Realize(Rb);
|
||||
MOV_sum(32, RSCRATCH2, a ? Ra.Location() : Imm32(0), Rb);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (update)
|
||||
{
|
||||
LEA(32, RSCRATCH2, MDisp(gpr.RX(a), imm));
|
||||
MOV_sum(32, RSCRATCH2, Ra, Imm32(imm));
|
||||
}
|
||||
else
|
||||
{
|
||||
offset = imm;
|
||||
MOV(32, R(RSCRATCH2), gpr.R(a));
|
||||
MOV(32, R(RSCRATCH2), Ra);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -176,11 +182,7 @@ void Jit64::stfXXX(UGeckoInstruction inst)
|
|||
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, registersInUse);
|
||||
|
||||
if (update)
|
||||
MOV(32, gpr.R(a), R(RSCRATCH2));
|
||||
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
MOV(32, Ra, R(RSCRATCH2));
|
||||
}
|
||||
|
||||
// This one is a little bit weird; it stores the low 32 bits of a double without converting it
|
||||
|
@ -193,12 +195,16 @@ void Jit64::stfiwx(UGeckoInstruction inst)
|
|||
int a = inst.RA;
|
||||
int b = inst.RB;
|
||||
|
||||
MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b));
|
||||
RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0);
|
||||
RCOpArg Rb = gpr.Use(b, RCMode::Read);
|
||||
RCOpArg Rs = fpr.Use(s, RCMode::Read);
|
||||
RegCache::Realize(Ra, Rb, Rs);
|
||||
|
||||
if (fpr.R(s).IsSimpleReg())
|
||||
MOVD_xmm(R(RSCRATCH), fpr.RX(s));
|
||||
MOV_sum(32, RSCRATCH2, Ra, Rb);
|
||||
|
||||
if (Rs.IsSimpleReg())
|
||||
MOVD_xmm(R(RSCRATCH), Rs.GetSimpleReg());
|
||||
else
|
||||
MOV(32, R(RSCRATCH), fpr.R(s));
|
||||
MOV(32, R(RSCRATCH), Rs);
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 32, 0, CallerSavedRegistersInUse());
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
|
||||
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
|
||||
#include "Core/PowerPC/PowerPC.h"
|
||||
|
@ -40,21 +40,22 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
|
|||
bool gqrIsConstant = it != js.constantGqr.end();
|
||||
u32 gqrValue = gqrIsConstant ? it->second & 0xffff : 0;
|
||||
|
||||
gpr.Lock(a, b);
|
||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||
if (update)
|
||||
gpr.BindToRegister(a, true, true);
|
||||
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
|
||||
RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read);
|
||||
RCOpArg Rb = indexed ? gpr.Use(b, RCMode::Read) : RCOpArg::Imm32((u32)offset);
|
||||
RCOpArg Rs = fpr.Use(s, RCMode::Read);
|
||||
RegCache::Realize(scratch_guard, Ra, Rb, Rs);
|
||||
|
||||
MOV_sum(32, RSCRATCH_EXTRA, gpr.R(a), indexed ? gpr.R(b) : Imm32((u32)offset));
|
||||
MOV_sum(32, RSCRATCH_EXTRA, Ra, Rb);
|
||||
|
||||
// In memcheck mode, don't update the address until the exception check
|
||||
if (update && !jo.memcheck)
|
||||
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
|
||||
MOV(32, Ra, R(RSCRATCH_EXTRA));
|
||||
|
||||
if (w)
|
||||
CVTSD2SS(XMM0, fpr.R(s)); // one
|
||||
CVTSD2SS(XMM0, Rs); // one
|
||||
else
|
||||
CVTPD2PS(XMM0, fpr.R(s)); // pair
|
||||
CVTPD2PS(XMM0, Rs); // pair
|
||||
|
||||
if (gqrIsConstant)
|
||||
{
|
||||
|
@ -104,13 +105,8 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
|
|||
|
||||
if (update && jo.memcheck)
|
||||
{
|
||||
if (indexed)
|
||||
ADD(32, gpr.R(a), gpr.R(b));
|
||||
else
|
||||
ADD(32, gpr.R(a), Imm32((u32)offset));
|
||||
ADD(32, Ra, Rb);
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit64::psq_lXX(UGeckoInstruction inst)
|
||||
|
@ -135,17 +131,17 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
|
|||
bool gqrIsConstant = it != js.constantGqr.end();
|
||||
u32 gqrValue = gqrIsConstant ? it->second >> 16 : 0;
|
||||
|
||||
gpr.Lock(a, b);
|
||||
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
|
||||
RCX64Reg Ra = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read);
|
||||
RCOpArg Rb = indexed ? gpr.Use(b, RCMode::Read) : RCOpArg::Imm32((u32)offset);
|
||||
RCX64Reg Rs = fpr.Bind(s, RCMode::Write);
|
||||
RegCache::Realize(scratch_guard, Ra, Rb, Rs);
|
||||
|
||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||
gpr.BindToRegister(a, true, update);
|
||||
fpr.BindToRegister(s, false, true);
|
||||
|
||||
MOV_sum(32, RSCRATCH_EXTRA, gpr.R(a), indexed ? gpr.R(b) : Imm32((u32)offset));
|
||||
MOV_sum(32, RSCRATCH_EXTRA, Ra, Rb);
|
||||
|
||||
// In memcheck mode, don't update the address until the exception check
|
||||
if (update && !jo.memcheck)
|
||||
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
|
||||
MOV(32, Ra, R(RSCRATCH_EXTRA));
|
||||
|
||||
if (gqrIsConstant)
|
||||
{
|
||||
|
@ -169,15 +165,9 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
|
|||
CALLptr(MatR(RSCRATCH));
|
||||
}
|
||||
|
||||
CVTPS2PD(fpr.RX(s), R(XMM0));
|
||||
CVTPS2PD(Rs, R(XMM0));
|
||||
if (update && jo.memcheck)
|
||||
{
|
||||
if (indexed)
|
||||
ADD(32, gpr.R(a), gpr.R(b));
|
||||
else
|
||||
ADD(32, gpr.R(a), Imm32((u32)offset));
|
||||
ADD(32, Ra, Rb);
|
||||
}
|
||||
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#include "Common/MsgHandler.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/Jit64/Jit.h"
|
||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
|
||||
|
||||
using namespace Gen;
|
||||
|
||||
|
@ -22,8 +22,10 @@ void Jit64::ps_mr(UGeckoInstruction inst)
|
|||
if (d == b)
|
||||
return;
|
||||
|
||||
fpr.BindToRegister(d, false);
|
||||
MOVAPD(fpr.RX(d), fpr.R(b));
|
||||
RCOpArg Rb = fpr.Use(b, RCMode::Read);
|
||||
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
|
||||
RegCache::Realize(Rb, Rd);
|
||||
MOVAPD(Rd, Rb);
|
||||
}
|
||||
|
||||
void Jit64::ps_sum(UGeckoInstruction inst)
|
||||
|
@ -36,43 +38,46 @@ void Jit64::ps_sum(UGeckoInstruction inst)
|
|||
int a = inst.FA;
|
||||
int b = inst.FB;
|
||||
int c = inst.FC;
|
||||
fpr.Lock(a, b, c, d);
|
||||
OpArg op_a = fpr.R(a);
|
||||
fpr.BindToRegister(d, d == b || d == c);
|
||||
|
||||
RCOpArg Ra = fpr.Use(a, RCMode::Read);
|
||||
RCOpArg Rb = fpr.Use(b, RCMode::Read);
|
||||
RCOpArg Rc = fpr.Use(c, RCMode::Read);
|
||||
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
|
||||
RegCache::Realize(Ra, Rb, Rc, Rd);
|
||||
|
||||
X64Reg tmp = XMM1;
|
||||
MOVDDUP(tmp, op_a); // {a.ps0, a.ps0}
|
||||
ADDPD(tmp, fpr.R(b)); // {a.ps0 + b.ps0, a.ps0 + b.ps1}
|
||||
MOVDDUP(tmp, Ra); // {a.ps0, a.ps0}
|
||||
ADDPD(tmp, Rb); // {a.ps0 + b.ps0, a.ps0 + b.ps1}
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 10: // ps_sum0: {a.ps0 + b.ps1, c.ps1}
|
||||
UNPCKHPD(tmp, fpr.R(c));
|
||||
UNPCKHPD(tmp, Rc);
|
||||
break;
|
||||
case 11: // ps_sum1: {c.ps0, a.ps0 + b.ps1}
|
||||
if (fpr.R(c).IsSimpleReg())
|
||||
if (Rc.IsSimpleReg())
|
||||
{
|
||||
if (cpu_info.bSSE4_1)
|
||||
{
|
||||
BLENDPD(tmp, fpr.R(c), 1);
|
||||
BLENDPD(tmp, Rc, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVAPD(XMM0, fpr.R(c));
|
||||
MOVAPD(XMM0, Rc);
|
||||
SHUFPD(XMM0, R(tmp), 2);
|
||||
tmp = XMM0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVLPD(tmp, fpr.R(c));
|
||||
MOVLPD(tmp, Rc);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
PanicAlert("ps_sum WTF!!!");
|
||||
}
|
||||
HandleNaNs(inst, fpr.RX(d), tmp, tmp == XMM1 ? XMM0 : XMM1);
|
||||
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
|
||||
SetFPRFIfNeeded(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
HandleNaNs(inst, Rd, tmp, tmp == XMM1 ? XMM0 : XMM1);
|
||||
ForceSinglePrecision(Rd, Rd);
|
||||
SetFPRFIfNeeded(Rd);
|
||||
}
|
||||
|
||||
void Jit64::ps_muls(UGeckoInstruction inst)
|
||||
|
@ -85,26 +90,29 @@ void Jit64::ps_muls(UGeckoInstruction inst)
|
|||
int a = inst.FA;
|
||||
int c = inst.FC;
|
||||
bool round_input = !js.op->fprIsSingle[c];
|
||||
fpr.Lock(a, c, d);
|
||||
|
||||
RCOpArg Ra = fpr.Use(a, RCMode::Read);
|
||||
RCOpArg Rc = fpr.Use(c, RCMode::Read);
|
||||
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
|
||||
RegCache::Realize(Ra, Rc, Rd);
|
||||
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 12: // ps_muls0
|
||||
MOVDDUP(XMM1, fpr.R(c));
|
||||
MOVDDUP(XMM1, Rc);
|
||||
break;
|
||||
case 13: // ps_muls1
|
||||
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM1, fpr.R(c), fpr.R(c), 3);
|
||||
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM1, Rc, Rc, 3);
|
||||
break;
|
||||
default:
|
||||
PanicAlert("ps_muls WTF!!!");
|
||||
}
|
||||
if (round_input)
|
||||
Force25BitPrecision(XMM1, R(XMM1), XMM0);
|
||||
MULPD(XMM1, fpr.R(a));
|
||||
fpr.BindToRegister(d, false);
|
||||
HandleNaNs(inst, fpr.RX(d), XMM1);
|
||||
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
|
||||
SetFPRFIfNeeded(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
MULPD(XMM1, Ra);
|
||||
HandleNaNs(inst, Rd, XMM1);
|
||||
ForceSinglePrecision(Rd, Rd);
|
||||
SetFPRFIfNeeded(Rd);
|
||||
}
|
||||
|
||||
void Jit64::ps_mergeXX(UGeckoInstruction inst)
|
||||
|
@ -116,27 +124,29 @@ void Jit64::ps_mergeXX(UGeckoInstruction inst)
|
|||
int d = inst.FD;
|
||||
int a = inst.FA;
|
||||
int b = inst.FB;
|
||||
fpr.Lock(a, b, d);
|
||||
fpr.BindToRegister(d, d == a || d == b);
|
||||
|
||||
RCOpArg Ra = fpr.Use(a, RCMode::Read);
|
||||
RCOpArg Rb = fpr.Use(b, RCMode::Read);
|
||||
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
|
||||
RegCache::Realize(Ra, Rb, Rd);
|
||||
|
||||
switch (inst.SUBOP10)
|
||||
{
|
||||
case 528:
|
||||
avx_op(&XEmitter::VUNPCKLPD, &XEmitter::UNPCKLPD, fpr.RX(d), fpr.R(a), fpr.R(b));
|
||||
avx_op(&XEmitter::VUNPCKLPD, &XEmitter::UNPCKLPD, Rd, Ra, Rb);
|
||||
break; // 00
|
||||
case 560:
|
||||
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, fpr.RX(d), fpr.R(a), fpr.R(b), 2);
|
||||
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, Rd, Ra, Rb, 2);
|
||||
break; // 01
|
||||
case 592:
|
||||
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, fpr.RX(d), fpr.R(a), fpr.R(b), 1);
|
||||
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, Rd, Ra, Rb, 1);
|
||||
break; // 10
|
||||
case 624:
|
||||
avx_op(&XEmitter::VUNPCKHPD, &XEmitter::UNPCKHPD, fpr.RX(d), fpr.R(a), fpr.R(b));
|
||||
avx_op(&XEmitter::VUNPCKHPD, &XEmitter::UNPCKHPD, Rd, Ra, Rb);
|
||||
break; // 11
|
||||
default:
|
||||
ASSERT_MSG(DYNA_REC, 0, "ps_merge - invalid op");
|
||||
}
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::ps_rsqrte(UGeckoInstruction inst)
|
||||
|
@ -147,23 +157,21 @@ void Jit64::ps_rsqrte(UGeckoInstruction inst)
|
|||
int b = inst.FB;
|
||||
int d = inst.FD;
|
||||
|
||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||
fpr.Lock(b, d);
|
||||
fpr.BindToRegister(b, true, false);
|
||||
fpr.BindToRegister(d, false);
|
||||
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
|
||||
RCX64Reg Rb = fpr.Bind(b, RCMode::Read);
|
||||
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
|
||||
RegCache::Realize(scratch_guard, Rb, Rd);
|
||||
|
||||
MOVSD(XMM0, fpr.R(b));
|
||||
MOVSD(XMM0, Rb);
|
||||
CALL(asm_routines.frsqrte);
|
||||
MOVSD(fpr.R(d), XMM0);
|
||||
MOVSD(Rd, XMM0);
|
||||
|
||||
MOVHLPS(XMM0, fpr.RX(b));
|
||||
MOVHLPS(XMM0, Rb);
|
||||
CALL(asm_routines.frsqrte);
|
||||
MOVLHPS(fpr.RX(d), XMM0);
|
||||
MOVLHPS(Rd, XMM0);
|
||||
|
||||
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
|
||||
SetFPRFIfNeeded(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
ForceSinglePrecision(Rd, Rd);
|
||||
SetFPRFIfNeeded(Rd);
|
||||
}
|
||||
|
||||
void Jit64::ps_res(UGeckoInstruction inst)
|
||||
|
@ -174,23 +182,21 @@ void Jit64::ps_res(UGeckoInstruction inst)
|
|||
int b = inst.FB;
|
||||
int d = inst.FD;
|
||||
|
||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||
fpr.Lock(b, d);
|
||||
fpr.BindToRegister(b, true, false);
|
||||
fpr.BindToRegister(d, false);
|
||||
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
|
||||
RCX64Reg Rb = fpr.Bind(b, RCMode::Read);
|
||||
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
|
||||
RegCache::Realize(scratch_guard, Rb, Rd);
|
||||
|
||||
MOVSD(XMM0, fpr.R(b));
|
||||
MOVSD(XMM0, Rb);
|
||||
CALL(asm_routines.fres);
|
||||
MOVSD(fpr.R(d), XMM0);
|
||||
MOVSD(Rd, XMM0);
|
||||
|
||||
MOVHLPS(XMM0, fpr.RX(b));
|
||||
MOVHLPS(XMM0, Rb);
|
||||
CALL(asm_routines.fres);
|
||||
MOVLHPS(fpr.RX(d), XMM0);
|
||||
MOVLHPS(Rd, XMM0);
|
||||
|
||||
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
|
||||
SetFPRFIfNeeded(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
ForceSinglePrecision(Rd, Rd);
|
||||
SetFPRFIfNeeded(Rd);
|
||||
}
|
||||
|
||||
void Jit64::ps_cmpXX(UGeckoInstruction inst)
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
#include "Core/CoreTiming.h"
|
||||
#include "Core/HW/ProcessorInterface.h"
|
||||
#include "Core/PowerPC/Jit64/Jit.h"
|
||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
|
||||
#include "Core/PowerPC/PowerPC.h"
|
||||
|
||||
|
@ -219,26 +219,32 @@ void Jit64::mtspr(UGeckoInstruction inst)
|
|||
break;
|
||||
|
||||
case SPR_XER:
|
||||
gpr.Lock(d);
|
||||
gpr.BindToRegister(d, true, false);
|
||||
MOV(32, R(RSCRATCH), gpr.R(d));
|
||||
{
|
||||
RCX64Reg Rd = gpr.Bind(d, RCMode::Read);
|
||||
RegCache::Realize(Rd);
|
||||
|
||||
MOV(32, R(RSCRATCH), Rd);
|
||||
AND(32, R(RSCRATCH), Imm32(0xff7f));
|
||||
MOV(16, PPCSTATE(xer_stringctrl), R(RSCRATCH));
|
||||
|
||||
MOV(32, R(RSCRATCH), gpr.R(d));
|
||||
MOV(32, R(RSCRATCH), Rd);
|
||||
SHR(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
|
||||
AND(8, R(RSCRATCH), Imm8(1));
|
||||
MOV(8, PPCSTATE(xer_ca), R(RSCRATCH));
|
||||
|
||||
MOV(32, R(RSCRATCH), gpr.R(d));
|
||||
MOV(32, R(RSCRATCH), Rd);
|
||||
SHR(32, R(RSCRATCH), Imm8(XER_OV_SHIFT));
|
||||
MOV(8, PPCSTATE(xer_so_ov), R(RSCRATCH));
|
||||
gpr.UnlockAll();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
case SPR_HID0:
|
||||
{
|
||||
MOV(32, R(RSCRATCH), gpr.R(d));
|
||||
RCOpArg Rd = gpr.Use(d, RCMode::Read);
|
||||
RegCache::Realize(Rd);
|
||||
|
||||
MOV(32, R(RSCRATCH), Rd);
|
||||
BTR(32, R(RSCRATCH), Imm8(31 - 20)); // ICFI
|
||||
MOV(32, PPCSTATE(spr[iIndex]), R(RSCRATCH));
|
||||
FixupBranch dont_reset_icache = J_CC(CC_NC);
|
||||
|
@ -255,13 +261,9 @@ void Jit64::mtspr(UGeckoInstruction inst)
|
|||
}
|
||||
|
||||
// OK, this is easy.
|
||||
if (!gpr.R(d).IsImm())
|
||||
{
|
||||
gpr.Lock(d);
|
||||
gpr.BindToRegister(d, true, false);
|
||||
}
|
||||
MOV(32, PPCSTATE(spr[iIndex]), gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
RCOpArg Rd = gpr.BindOrImm(d, RCMode::Read);
|
||||
RegCache::Realize(Rd);
|
||||
MOV(32, PPCSTATE(spr[iIndex]), Rd);
|
||||
}
|
||||
|
||||
void Jit64::mfspr(UGeckoInstruction inst)
|
||||
|
@ -281,22 +283,23 @@ void Jit64::mfspr(UGeckoInstruction inst)
|
|||
// redundant for the JIT.
|
||||
// no register choice
|
||||
|
||||
gpr.FlushLockX(RDX, RAX);
|
||||
gpr.FlushLockX(RCX);
|
||||
RCX64Reg rdx = gpr.Scratch(RDX);
|
||||
RCX64Reg rax = gpr.Scratch(RAX);
|
||||
RCX64Reg rcx = gpr.Scratch(RCX);
|
||||
|
||||
MOV(64, R(RCX), ImmPtr(&CoreTiming::g));
|
||||
MOV(64, rcx, ImmPtr(&CoreTiming::g));
|
||||
|
||||
// An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the
|
||||
// cost of calling out to C for this is actually significant.
|
||||
// Scale downcount by the CPU overclocking factor.
|
||||
CVTSI2SS(XMM0, PPCSTATE(downcount));
|
||||
MULSS(XMM0, MDisp(RCX, offsetof(CoreTiming::Globals, last_OC_factor_inverted)));
|
||||
CVTSS2SI(RDX, R(XMM0)); // RDX is downcount scaled by the overclocking factor
|
||||
MOV(32, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, slice_length)));
|
||||
SUB(64, R(RAX), R(RDX)); // cycles since the last CoreTiming::Advance() event is (slicelength -
|
||||
// Scaled_downcount)
|
||||
ADD(64, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, global_timer)));
|
||||
SUB(64, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, fake_TB_start_ticks)));
|
||||
MULSS(XMM0, MDisp(rcx, offsetof(CoreTiming::Globals, last_OC_factor_inverted)));
|
||||
CVTSS2SI(rdx, R(XMM0)); // RDX is downcount scaled by the overclocking factor
|
||||
MOV(32, rax, MDisp(rcx, offsetof(CoreTiming::Globals, slice_length)));
|
||||
SUB(64, rax, rdx); // cycles since the last CoreTiming::Advance() event is (slicelength -
|
||||
// Scaled_downcount)
|
||||
ADD(64, rax, MDisp(rcx, offsetof(CoreTiming::Globals, global_timer)));
|
||||
SUB(64, rax, MDisp(rcx, offsetof(CoreTiming::Globals, fake_TB_start_ticks)));
|
||||
// It might seem convenient to correct the timer for the block position here for even more
|
||||
// accurate
|
||||
// timing, but as of currently, this can break games. If we end up reading a time *after* the
|
||||
|
@ -307,15 +310,15 @@ void Jit64::mfspr(UGeckoInstruction inst)
|
|||
// Revolution,
|
||||
// which won't get past the loading screen.
|
||||
// if (js.downcountAmount)
|
||||
// ADD(64, R(RAX), Imm32(js.downcountAmount));
|
||||
// ADD(64, rax, Imm32(js.downcountAmount));
|
||||
|
||||
// a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67
|
||||
MOV(64, R(RDX), Imm64(0xAAAAAAAAAAAAAAABULL));
|
||||
MUL(64, R(RDX));
|
||||
MOV(64, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, fake_TB_start_value)));
|
||||
SHR(64, R(RDX), Imm8(3));
|
||||
ADD(64, R(RAX), R(RDX));
|
||||
MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX));
|
||||
MOV(64, rdx, Imm64(0xAAAAAAAAAAAAAAABULL));
|
||||
MUL(64, rdx);
|
||||
MOV(64, rax, MDisp(rcx, offsetof(CoreTiming::Globals, fake_TB_start_value)));
|
||||
SHR(64, rdx, Imm8(3));
|
||||
ADD(64, rax, rdx);
|
||||
MOV(64, PPCSTATE(spr[SPR_TL]), rax);
|
||||
|
||||
if (CanMergeNextInstructions(1))
|
||||
{
|
||||
|
@ -330,40 +333,42 @@ void Jit64::mfspr(UGeckoInstruction inst)
|
|||
{
|
||||
js.downcountAmount++;
|
||||
js.skipInstructions = 1;
|
||||
gpr.Lock(d, n);
|
||||
gpr.BindToRegister(d, false);
|
||||
gpr.BindToRegister(n, false);
|
||||
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
|
||||
RCX64Reg Rn = gpr.Bind(n, RCMode::Write);
|
||||
RegCache::Realize(Rd, Rn);
|
||||
if (iIndex == SPR_TL)
|
||||
MOV(32, gpr.R(d), R(RAX));
|
||||
MOV(32, Rd, rax);
|
||||
if (nextIndex == SPR_TL)
|
||||
MOV(32, gpr.R(n), R(RAX));
|
||||
SHR(64, R(RAX), Imm8(32));
|
||||
MOV(32, Rn, rax);
|
||||
SHR(64, rax, Imm8(32));
|
||||
if (iIndex == SPR_TU)
|
||||
MOV(32, gpr.R(d), R(RAX));
|
||||
MOV(32, Rd, rax);
|
||||
if (nextIndex == SPR_TU)
|
||||
MOV(32, gpr.R(n), R(RAX));
|
||||
MOV(32, Rn, rax);
|
||||
break;
|
||||
}
|
||||
}
|
||||
gpr.Lock(d);
|
||||
gpr.BindToRegister(d, false);
|
||||
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
|
||||
RegCache::Realize(Rd);
|
||||
if (iIndex == SPR_TU)
|
||||
SHR(64, R(RAX), Imm8(32));
|
||||
MOV(32, gpr.R(d), R(RAX));
|
||||
SHR(64, rax, Imm8(32));
|
||||
MOV(32, Rd, rax);
|
||||
break;
|
||||
}
|
||||
case SPR_XER:
|
||||
gpr.Lock(d);
|
||||
gpr.BindToRegister(d, false);
|
||||
MOVZX(32, 16, gpr.RX(d), PPCSTATE(xer_stringctrl));
|
||||
{
|
||||
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
|
||||
RegCache::Realize(Rd);
|
||||
MOVZX(32, 16, Rd, PPCSTATE(xer_stringctrl));
|
||||
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_ca));
|
||||
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
|
||||
OR(32, gpr.R(d), R(RSCRATCH));
|
||||
OR(32, Rd, R(RSCRATCH));
|
||||
|
||||
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_so_ov));
|
||||
SHL(32, R(RSCRATCH), Imm8(XER_OV_SHIFT));
|
||||
OR(32, gpr.R(d), R(RSCRATCH));
|
||||
OR(32, Rd, R(RSCRATCH));
|
||||
break;
|
||||
}
|
||||
case SPR_WPAR:
|
||||
case SPR_DEC:
|
||||
case SPR_PMC1:
|
||||
|
@ -372,26 +377,25 @@ void Jit64::mfspr(UGeckoInstruction inst)
|
|||
case SPR_PMC4:
|
||||
FALLBACK_IF(true);
|
||||
default:
|
||||
gpr.Lock(d);
|
||||
gpr.BindToRegister(d, false);
|
||||
MOV(32, gpr.R(d), PPCSTATE(spr[iIndex]));
|
||||
{
|
||||
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
|
||||
RegCache::Realize(Rd);
|
||||
MOV(32, Rd, PPCSTATE(spr[iIndex]));
|
||||
break;
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::mtmsr(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITSystemRegistersOff);
|
||||
if (!gpr.R(inst.RS).IsImm())
|
||||
|
||||
{
|
||||
gpr.Lock(inst.RS);
|
||||
gpr.BindToRegister(inst.RS, true, false);
|
||||
RCOpArg Rs = gpr.BindOrImm(inst.RS, RCMode::Read);
|
||||
RegCache::Realize(Rs);
|
||||
MOV(32, PPCSTATE(msr), Rs);
|
||||
}
|
||||
MOV(32, PPCSTATE(msr), gpr.R(inst.RS));
|
||||
gpr.UnlockAll();
|
||||
gpr.Flush();
|
||||
fpr.Flush();
|
||||
|
||||
|
@ -430,10 +434,9 @@ void Jit64::mfmsr(UGeckoInstruction inst)
|
|||
INSTRUCTION_START
|
||||
JITDISABLE(bJITSystemRegistersOff);
|
||||
// Privileged?
|
||||
gpr.Lock(inst.RD);
|
||||
gpr.BindToRegister(inst.RD, false, true);
|
||||
MOV(32, gpr.R(inst.RD), PPCSTATE(msr));
|
||||
gpr.UnlockAll();
|
||||
RCX64Reg Rd = gpr.Bind(inst.RD, RCMode::Write);
|
||||
RegCache::Realize(Rd);
|
||||
MOV(32, Rd, PPCSTATE(msr));
|
||||
}
|
||||
|
||||
void Jit64::mftb(UGeckoInstruction inst)
|
||||
|
@ -448,13 +451,13 @@ void Jit64::mfcr(UGeckoInstruction inst)
|
|||
INSTRUCTION_START
|
||||
JITDISABLE(bJITSystemRegistersOff);
|
||||
int d = inst.RD;
|
||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||
|
||||
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
|
||||
CALL(asm_routines.mfcr);
|
||||
gpr.Lock(d);
|
||||
gpr.BindToRegister(d, false, true);
|
||||
MOV(32, gpr.R(d), R(RSCRATCH));
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
|
||||
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
|
||||
RegCache::Realize(Rd);
|
||||
MOV(32, Rd, R(RSCRATCH));
|
||||
}
|
||||
|
||||
void Jit64::mtcrf(UGeckoInstruction inst)
|
||||
|
@ -466,13 +469,13 @@ void Jit64::mtcrf(UGeckoInstruction inst)
|
|||
u32 crm = inst.CRM;
|
||||
if (crm != 0)
|
||||
{
|
||||
if (gpr.R(inst.RS).IsImm())
|
||||
if (gpr.IsImm(inst.RS))
|
||||
{
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
if ((crm & (0x80 >> i)) != 0)
|
||||
{
|
||||
u8 newcr = (gpr.R(inst.RS).Imm32() >> (28 - (i * 4))) & 0xF;
|
||||
u8 newcr = (gpr.Imm32(inst.RS) >> (28 - (i * 4))) & 0xF;
|
||||
u64 newcrval = PowerPC::PPCCRToInternal(newcr);
|
||||
if ((s64)newcrval == (s32)newcrval)
|
||||
{
|
||||
|
@ -489,13 +492,13 @@ void Jit64::mtcrf(UGeckoInstruction inst)
|
|||
else
|
||||
{
|
||||
MOV(64, R(RSCRATCH2), ImmPtr(PowerPC::m_crTable.data()));
|
||||
gpr.Lock(inst.RS);
|
||||
gpr.BindToRegister(inst.RS, true, false);
|
||||
RCX64Reg Rs = gpr.Bind(inst.RS, RCMode::Read);
|
||||
RegCache::Realize(Rs);
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
if ((crm & (0x80 >> i)) != 0)
|
||||
{
|
||||
MOV(32, R(RSCRATCH), gpr.R(inst.RS));
|
||||
MOV(32, R(RSCRATCH), Rs);
|
||||
if (i != 7)
|
||||
SHR(32, R(RSCRATCH), Imm8(28 - (i * 4)));
|
||||
if (i != 0)
|
||||
|
@ -504,7 +507,6 @@ void Jit64::mtcrf(UGeckoInstruction inst)
|
|||
MOV(64, PPCSTATE(cr_val[i]), R(RSCRATCH));
|
||||
}
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -653,11 +655,12 @@ void Jit64::mffsx(UGeckoInstruction inst)
|
|||
MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
|
||||
|
||||
int d = inst.FD;
|
||||
fpr.BindToRegister(d, false, true);
|
||||
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
|
||||
RegCache::Realize(Rd);
|
||||
MOV(64, R(RSCRATCH2), Imm64(0xFFF8000000000000));
|
||||
OR(64, R(RSCRATCH), R(RSCRATCH2));
|
||||
MOVQ_xmm(XMM0, R(RSCRATCH));
|
||||
MOVSD(fpr.RX(d), R(XMM0));
|
||||
MOVSD(Rd, R(XMM0));
|
||||
}
|
||||
|
||||
// MXCSR = s_fpscr_to_mxcsr[FPSCR & 7]
|
||||
|
@ -751,10 +754,14 @@ void Jit64::mtfsfx(UGeckoInstruction inst)
|
|||
}
|
||||
|
||||
int b = inst.FB;
|
||||
if (fpr.R(b).IsSimpleReg())
|
||||
MOVQ_xmm(R(RSCRATCH), fpr.RX(b));
|
||||
|
||||
RCOpArg Rb = fpr.Use(b, RCMode::Read);
|
||||
RegCache::Realize(Rb);
|
||||
|
||||
if (Rb.IsSimpleReg())
|
||||
MOVQ_xmm(R(RSCRATCH), Rb.GetSimpleReg());
|
||||
else
|
||||
MOV(32, R(RSCRATCH), fpr.R(b));
|
||||
MOV(32, R(RSCRATCH), Rb);
|
||||
|
||||
MOV(32, R(RSCRATCH2), PPCSTATE(fpscr));
|
||||
AND(32, R(RSCRATCH), Imm32(mask));
|
||||
|
|
|
@ -0,0 +1,284 @@
|
|||
// Copyright 2008 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "Common/Assert.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/RCMode.h"
|
||||
|
||||
using preg_t = size_t;
|
||||
|
||||
class PPCCachedReg
|
||||
{
|
||||
public:
|
||||
enum class LocationType
|
||||
{
|
||||
/// Value is currently at its default location
|
||||
Default,
|
||||
/// Value is currently bound to a x64 register
|
||||
Bound,
|
||||
/// Value is known as an immediate and has not been written back to its default location
|
||||
Immediate,
|
||||
/// Value is known as an immediate and is already present at its default location
|
||||
SpeculativeImmediate,
|
||||
};
|
||||
|
||||
PPCCachedReg() = default;
|
||||
|
||||
explicit PPCCachedReg(Gen::OpArg default_location_)
|
||||
: default_location(default_location_), location(default_location_)
|
||||
{
|
||||
}
|
||||
|
||||
const Gen::OpArg& Location() const { return location; }
|
||||
|
||||
LocationType GetLocationType() const
|
||||
{
|
||||
if (!away)
|
||||
{
|
||||
ASSERT(!revertable);
|
||||
|
||||
if (location.IsImm())
|
||||
return LocationType::SpeculativeImmediate;
|
||||
|
||||
ASSERT(location == default_location);
|
||||
return LocationType::Default;
|
||||
}
|
||||
|
||||
ASSERT(location.IsImm() || location.IsSimpleReg());
|
||||
return location.IsImm() ? LocationType::Immediate : LocationType::Bound;
|
||||
}
|
||||
|
||||
bool IsAway() const { return away; }
|
||||
bool IsBound() const { return GetLocationType() == LocationType::Bound; }
|
||||
|
||||
void SetBoundTo(Gen::X64Reg xreg)
|
||||
{
|
||||
away = true;
|
||||
location = Gen::R(xreg);
|
||||
}
|
||||
|
||||
void SetFlushed()
|
||||
{
|
||||
ASSERT(!revertable);
|
||||
away = false;
|
||||
location = default_location;
|
||||
}
|
||||
|
||||
void SetToImm32(u32 imm32, bool dirty = true)
|
||||
{
|
||||
away |= dirty;
|
||||
location = Gen::Imm32(imm32);
|
||||
}
|
||||
|
||||
bool IsRevertable() const { return revertable; }
|
||||
void SetRevertable()
|
||||
{
|
||||
ASSERT(IsBound());
|
||||
revertable = true;
|
||||
}
|
||||
void SetRevert()
|
||||
{
|
||||
ASSERT(revertable);
|
||||
revertable = false;
|
||||
SetFlushed();
|
||||
}
|
||||
void SetCommit()
|
||||
{
|
||||
ASSERT(revertable);
|
||||
revertable = false;
|
||||
}
|
||||
|
||||
bool IsLocked() const { return locked > 0; }
|
||||
void Lock() { locked++; }
|
||||
void Unlock()
|
||||
{
|
||||
ASSERT(IsLocked());
|
||||
locked--;
|
||||
}
|
||||
|
||||
private:
|
||||
Gen::OpArg default_location{};
|
||||
Gen::OpArg location{};
|
||||
bool away = false; // value not in source register
|
||||
bool revertable = false;
|
||||
size_t locked = 0;
|
||||
};
|
||||
|
||||
class X64CachedReg
|
||||
{
|
||||
public:
|
||||
preg_t Contents() const { return ppcReg; }
|
||||
|
||||
void SetBoundTo(preg_t ppcReg_, bool dirty_)
|
||||
{
|
||||
free = false;
|
||||
ppcReg = ppcReg_;
|
||||
dirty = dirty_;
|
||||
}
|
||||
|
||||
void SetFlushed()
|
||||
{
|
||||
ppcReg = static_cast<preg_t>(Gen::INVALID_REG);
|
||||
free = true;
|
||||
dirty = false;
|
||||
}
|
||||
|
||||
bool IsFree() const { return free && !locked; }
|
||||
|
||||
bool IsDirty() const { return dirty; }
|
||||
void MakeDirty() { dirty = true; }
|
||||
|
||||
bool IsLocked() const { return locked > 0; }
|
||||
void Lock() { locked++; }
|
||||
void Unlock()
|
||||
{
|
||||
ASSERT(IsLocked());
|
||||
locked--;
|
||||
}
|
||||
|
||||
private:
|
||||
preg_t ppcReg = static_cast<preg_t>(Gen::INVALID_REG);
|
||||
bool free = true;
|
||||
bool dirty = false;
|
||||
size_t locked = 0;
|
||||
};
|
||||
|
||||
class RCConstraint
|
||||
{
|
||||
public:
|
||||
bool IsRealized() const { return realized != RealizedLoc::Invalid; }
|
||||
bool IsActive() const
|
||||
{
|
||||
return IsRealized() || write || read || kill_imm || kill_mem || revertable;
|
||||
}
|
||||
|
||||
bool ShouldLoad() const { return read; }
|
||||
bool ShouldDirty() const { return write; }
|
||||
bool ShouldBeRevertable() const { return revertable; }
|
||||
bool ShouldKillImmediate() const { return kill_imm; }
|
||||
bool ShouldKillMemory() const { return kill_mem; }
|
||||
|
||||
enum class RealizedLoc
|
||||
{
|
||||
Invalid,
|
||||
Bound,
|
||||
Imm,
|
||||
Mem,
|
||||
};
|
||||
|
||||
void Realized(RealizedLoc loc)
|
||||
{
|
||||
realized = loc;
|
||||
ASSERT(IsRealized());
|
||||
}
|
||||
|
||||
enum class ConstraintLoc
|
||||
{
|
||||
Bound,
|
||||
BoundOrImm,
|
||||
BoundOrMem,
|
||||
Any,
|
||||
};
|
||||
|
||||
void AddUse(RCMode mode) { AddConstraint(mode, ConstraintLoc::Any, false); }
|
||||
void AddUseNoImm(RCMode mode) { AddConstraint(mode, ConstraintLoc::BoundOrMem, false); }
|
||||
void AddBindOrImm(RCMode mode) { AddConstraint(mode, ConstraintLoc::BoundOrImm, false); }
|
||||
void AddBind(RCMode mode) { AddConstraint(mode, ConstraintLoc::Bound, false); }
|
||||
void AddRevertableBind(RCMode mode) { AddConstraint(mode, ConstraintLoc::Bound, true); }
|
||||
|
||||
private:
|
||||
void AddConstraint(RCMode mode, ConstraintLoc loc, bool should_revertable)
|
||||
{
|
||||
if (IsRealized())
|
||||
{
|
||||
ASSERT(IsCompatible(mode, loc, should_revertable));
|
||||
return;
|
||||
}
|
||||
|
||||
if (should_revertable)
|
||||
revertable = true;
|
||||
|
||||
switch (loc)
|
||||
{
|
||||
case ConstraintLoc::Bound:
|
||||
kill_imm = true;
|
||||
kill_mem = true;
|
||||
break;
|
||||
case ConstraintLoc::BoundOrImm:
|
||||
kill_mem = true;
|
||||
break;
|
||||
case ConstraintLoc::BoundOrMem:
|
||||
kill_imm = true;
|
||||
break;
|
||||
case ConstraintLoc::Any:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case RCMode::Read:
|
||||
read = true;
|
||||
break;
|
||||
case RCMode::Write:
|
||||
write = true;
|
||||
break;
|
||||
case RCMode::ReadWrite:
|
||||
read = true;
|
||||
write = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool IsCompatible(RCMode mode, ConstraintLoc loc, bool should_revertable) const
|
||||
{
|
||||
if (should_revertable && !revertable)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool is_loc_compatible = [&] {
|
||||
switch (loc)
|
||||
{
|
||||
case ConstraintLoc::Bound:
|
||||
return realized == RealizedLoc::Bound;
|
||||
case ConstraintLoc::BoundOrImm:
|
||||
return realized == RealizedLoc::Bound || realized == RealizedLoc::Imm;
|
||||
case ConstraintLoc::BoundOrMem:
|
||||
return realized == RealizedLoc::Bound || realized == RealizedLoc::Mem;
|
||||
case ConstraintLoc::Any:
|
||||
return true;
|
||||
}
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}();
|
||||
|
||||
const bool is_mode_compatible = [&] {
|
||||
switch (mode)
|
||||
{
|
||||
case RCMode::Read:
|
||||
return read;
|
||||
case RCMode::Write:
|
||||
return write;
|
||||
case RCMode::ReadWrite:
|
||||
return read && write;
|
||||
}
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}();
|
||||
|
||||
return is_loc_compatible && is_mode_compatible;
|
||||
}
|
||||
|
||||
RealizedLoc realized = RealizedLoc::Invalid;
|
||||
bool write = false;
|
||||
bool read = false;
|
||||
bool kill_imm = false;
|
||||
bool kill_mem = false;
|
||||
bool revertable = false;
|
||||
};
|
|
@ -2,7 +2,7 @@
|
|||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "Core/PowerPC/Jit64/FPURegCache.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/FPURegCache.h"
|
||||
|
||||
#include "Core/PowerPC/Jit64/Jit.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64Base.h"
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
|
||||
|
||||
class Jit64;
|
||||
|
||||
|
@ -12,9 +12,9 @@ class FPURegCache final : public RegCache
|
|||
{
|
||||
public:
|
||||
explicit FPURegCache(Jit64& jit);
|
||||
Gen::OpArg GetDefaultLocation(preg_t preg) const override;
|
||||
|
||||
protected:
|
||||
Gen::OpArg GetDefaultLocation(preg_t preg) const override;
|
||||
void StoreRegister(preg_t preg, const Gen::OpArg& newLoc) override;
|
||||
void LoadRegister(preg_t preg, Gen::X64Reg newLoc) override;
|
||||
const Gen::X64Reg* GetAllocationOrder(size_t* count) const override;
|
|
@ -2,7 +2,7 @@
|
|||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "Core/PowerPC/Jit64/GPRRegCache.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/GPRRegCache.h"
|
||||
|
||||
#include "Core/PowerPC/Jit64/Jit.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64Base.h"
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
|
||||
|
||||
class Jit64;
|
||||
|
||||
|
@ -12,10 +12,10 @@ class GPRRegCache final : public RegCache
|
|||
{
|
||||
public:
|
||||
explicit GPRRegCache(Jit64& jit);
|
||||
Gen::OpArg GetDefaultLocation(preg_t preg) const override;
|
||||
void SetImmediate32(preg_t preg, u32 imm_value, bool dirty = true);
|
||||
|
||||
protected:
|
||||
Gen::OpArg GetDefaultLocation(preg_t preg) const override;
|
||||
void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) override;
|
||||
void LoadRegister(preg_t preg, Gen::X64Reg new_loc) override;
|
||||
const Gen::X64Reg* GetAllocationOrder(size_t* count) const override;
|
|
@ -0,0 +1,729 @@
|
|||
// Copyright 2008 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cinttypes>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
|
||||
#include "Common/Assert.h"
|
||||
#include "Common/BitSet.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/MsgHandler.h"
|
||||
#include "Common/VariantUtil.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/Jit64/Jit.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/CachedReg.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/RCMode.h"
|
||||
#include "Core/PowerPC/PowerPC.h"
|
||||
|
||||
using namespace Gen;
|
||||
using namespace PowerPC;
|
||||
|
||||
RCOpArg RCOpArg::Imm32(u32 imm)
|
||||
{
|
||||
return RCOpArg{imm};
|
||||
}
|
||||
|
||||
RCOpArg RCOpArg::R(X64Reg xr)
|
||||
{
|
||||
return RCOpArg{xr};
|
||||
}
|
||||
|
||||
RCOpArg::RCOpArg() = default;
|
||||
|
||||
RCOpArg::RCOpArg(u32 imm) : rc(nullptr), contents(imm)
|
||||
{
|
||||
}
|
||||
|
||||
RCOpArg::RCOpArg(X64Reg xr) : rc(nullptr), contents(xr)
|
||||
{
|
||||
}
|
||||
|
||||
RCOpArg::RCOpArg(RegCache* rc_, preg_t preg) : rc(rc_), contents(preg)
|
||||
{
|
||||
rc->Lock(preg);
|
||||
}
|
||||
|
||||
RCOpArg::~RCOpArg()
|
||||
{
|
||||
Unlock();
|
||||
}
|
||||
|
||||
RCOpArg::RCOpArg(RCOpArg&& other) noexcept
|
||||
: rc(std::exchange(other.rc, nullptr)),
|
||||
contents(std::exchange(other.contents, std::monostate{}))
|
||||
{
|
||||
}
|
||||
|
||||
RCOpArg& RCOpArg::operator=(RCOpArg&& other) noexcept
|
||||
{
|
||||
Unlock();
|
||||
rc = std::exchange(other.rc, nullptr);
|
||||
contents = std::exchange(other.contents, std::monostate{});
|
||||
return *this;
|
||||
}
|
||||
|
||||
RCOpArg::RCOpArg(RCX64Reg&& other) noexcept
|
||||
: rc(std::exchange(other.rc, nullptr)),
|
||||
contents(VariantCast(std::exchange(other.contents, std::monostate{})))
|
||||
{
|
||||
}
|
||||
|
||||
RCOpArg& RCOpArg::operator=(RCX64Reg&& other) noexcept
|
||||
{
|
||||
Unlock();
|
||||
rc = std::exchange(other.rc, nullptr);
|
||||
contents = VariantCast(std::exchange(other.contents, std::monostate{}));
|
||||
return *this;
|
||||
}
|
||||
|
||||
void RCOpArg::Realize()
|
||||
{
|
||||
if (const preg_t* preg = std::get_if<preg_t>(&contents))
|
||||
{
|
||||
rc->Realize(*preg);
|
||||
}
|
||||
}
|
||||
|
||||
OpArg RCOpArg::Location() const
|
||||
{
|
||||
if (const preg_t* preg = std::get_if<preg_t>(&contents))
|
||||
{
|
||||
ASSERT(rc->IsRealized(*preg));
|
||||
return rc->R(*preg);
|
||||
}
|
||||
else if (const X64Reg* xr = std::get_if<X64Reg>(&contents))
|
||||
{
|
||||
return Gen::R(*xr);
|
||||
}
|
||||
else if (const u32* imm = std::get_if<u32>(&contents))
|
||||
{
|
||||
return Gen::Imm32(*imm);
|
||||
}
|
||||
ASSERT(false);
|
||||
return {};
|
||||
}
|
||||
|
||||
OpArg RCOpArg::ExtractWithByteOffset(int offset)
|
||||
{
|
||||
if (offset == 0)
|
||||
return Location();
|
||||
|
||||
ASSERT(rc);
|
||||
const preg_t preg = std::get<preg_t>(contents);
|
||||
rc->StoreFromRegister(preg, RegCache::FlushMode::MaintainState);
|
||||
OpArg result = rc->GetDefaultLocation(preg);
|
||||
result.AddMemOffset(offset);
|
||||
return result;
|
||||
}
|
||||
|
||||
void RCOpArg::Unlock()
|
||||
{
|
||||
if (const preg_t* preg = std::get_if<preg_t>(&contents))
|
||||
{
|
||||
ASSERT(rc);
|
||||
rc->Unlock(*preg);
|
||||
}
|
||||
else if (const X64Reg* xr = std::get_if<X64Reg>(&contents))
|
||||
{
|
||||
// If rc, we got this from an RCX64Reg.
|
||||
// If !rc, we got this from RCOpArg::R.
|
||||
if (rc)
|
||||
rc->UnlockX(*xr);
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT(!rc);
|
||||
}
|
||||
|
||||
rc = nullptr;
|
||||
contents = std::monostate{};
|
||||
}
|
||||
|
||||
bool RCOpArg::IsImm() const
|
||||
{
|
||||
if (const preg_t* preg = std::get_if<preg_t>(&contents))
|
||||
{
|
||||
return rc->R(*preg).IsImm();
|
||||
}
|
||||
else if (std::holds_alternative<u32>(contents))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
s32 RCOpArg::SImm32() const
|
||||
{
|
||||
if (const preg_t* preg = std::get_if<preg_t>(&contents))
|
||||
{
|
||||
return rc->R(*preg).SImm32();
|
||||
}
|
||||
else if (const u32* imm = std::get_if<u32>(&contents))
|
||||
{
|
||||
return static_cast<s32>(*imm);
|
||||
}
|
||||
ASSERT(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
u32 RCOpArg::Imm32() const
|
||||
{
|
||||
if (const preg_t* preg = std::get_if<preg_t>(&contents))
|
||||
{
|
||||
return rc->R(*preg).Imm32();
|
||||
}
|
||||
else if (const u32* imm = std::get_if<u32>(&contents))
|
||||
{
|
||||
return *imm;
|
||||
}
|
||||
ASSERT(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
RCX64Reg::RCX64Reg() = default;
|
||||
|
||||
RCX64Reg::RCX64Reg(RegCache* rc_, preg_t preg) : rc(rc_), contents(preg)
|
||||
{
|
||||
rc->Lock(preg);
|
||||
}
|
||||
|
||||
RCX64Reg::RCX64Reg(RegCache* rc_, X64Reg xr) : rc(rc_), contents(xr)
|
||||
{
|
||||
rc->LockX(xr);
|
||||
}
|
||||
|
||||
RCX64Reg::~RCX64Reg()
|
||||
{
|
||||
Unlock();
|
||||
}
|
||||
|
||||
RCX64Reg::RCX64Reg(RCX64Reg&& other) noexcept
|
||||
: rc(std::exchange(other.rc, nullptr)),
|
||||
contents(std::exchange(other.contents, std::monostate{}))
|
||||
{
|
||||
}
|
||||
|
||||
RCX64Reg& RCX64Reg::operator=(RCX64Reg&& other) noexcept
|
||||
{
|
||||
Unlock();
|
||||
rc = std::exchange(other.rc, nullptr);
|
||||
contents = std::exchange(other.contents, std::monostate{});
|
||||
return *this;
|
||||
}
|
||||
|
||||
void RCX64Reg::Realize()
|
||||
{
|
||||
if (const preg_t* preg = std::get_if<preg_t>(&contents))
|
||||
{
|
||||
rc->Realize(*preg);
|
||||
}
|
||||
}
|
||||
|
||||
RCX64Reg::operator X64Reg() const &
|
||||
{
|
||||
if (const preg_t* preg = std::get_if<preg_t>(&contents))
|
||||
{
|
||||
ASSERT(rc->IsRealized(*preg));
|
||||
return rc->RX(*preg);
|
||||
}
|
||||
else if (const X64Reg* xr = std::get_if<X64Reg>(&contents))
|
||||
{
|
||||
return *xr;
|
||||
}
|
||||
ASSERT(false);
|
||||
return {};
|
||||
}
|
||||
|
||||
RCX64Reg::operator OpArg() const &
|
||||
{
|
||||
return Gen::R(RCX64Reg::operator X64Reg());
|
||||
}
|
||||
|
||||
void RCX64Reg::Unlock()
|
||||
{
|
||||
if (const preg_t* preg = std::get_if<preg_t>(&contents))
|
||||
{
|
||||
ASSERT(rc);
|
||||
rc->Unlock(*preg);
|
||||
}
|
||||
else if (const X64Reg* xr = std::get_if<X64Reg>(&contents))
|
||||
{
|
||||
ASSERT(rc);
|
||||
rc->UnlockX(*xr);
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT(!rc);
|
||||
}
|
||||
|
||||
rc = nullptr;
|
||||
contents = std::monostate{};
|
||||
}
|
||||
|
||||
RCForkGuard::RCForkGuard(RegCache& rc_) : rc(&rc_), m_regs(rc_.m_regs), m_xregs(rc_.m_xregs)
|
||||
{
|
||||
ASSERT(!rc->IsAnyConstraintActive());
|
||||
}
|
||||
|
||||
RCForkGuard::RCForkGuard(RCForkGuard&& other) noexcept
|
||||
: rc(other.rc), m_regs(std::move(other.m_regs)), m_xregs(std::move(other.m_xregs))
|
||||
{
|
||||
other.rc = nullptr;
|
||||
}
|
||||
|
||||
void RCForkGuard::EndFork()
|
||||
{
|
||||
if (!rc)
|
||||
return;
|
||||
|
||||
ASSERT(!rc->IsAnyConstraintActive());
|
||||
rc->m_regs = m_regs;
|
||||
rc->m_xregs = m_xregs;
|
||||
rc = nullptr;
|
||||
}
|
||||
|
||||
RegCache::RegCache(Jit64& jit) : m_jit{jit}
|
||||
{
|
||||
}
|
||||
|
||||
void RegCache::Start()
|
||||
{
|
||||
m_xregs.fill({});
|
||||
for (size_t i = 0; i < m_regs.size(); i++)
|
||||
{
|
||||
m_regs[i] = PPCCachedReg{GetDefaultLocation(i)};
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::SetEmitter(XEmitter* emitter)
|
||||
{
|
||||
m_emitter = emitter;
|
||||
}
|
||||
|
||||
bool RegCache::SanityCheck() const
|
||||
{
|
||||
for (size_t i = 0; i < m_regs.size(); i++)
|
||||
{
|
||||
switch (m_regs[i].GetLocationType())
|
||||
{
|
||||
case PPCCachedReg::LocationType::Default:
|
||||
case PPCCachedReg::LocationType::SpeculativeImmediate:
|
||||
case PPCCachedReg::LocationType::Immediate:
|
||||
break;
|
||||
case PPCCachedReg::LocationType::Bound:
|
||||
{
|
||||
if (m_regs[i].IsLocked() || m_regs[i].IsRevertable())
|
||||
return false;
|
||||
|
||||
Gen::X64Reg xr = m_regs[i].Location().GetSimpleReg();
|
||||
if (m_xregs[xr].IsLocked())
|
||||
return false;
|
||||
if (m_xregs[xr].Contents() != i)
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
RCOpArg RegCache::Use(preg_t preg, RCMode mode)
|
||||
{
|
||||
m_constraints[preg].AddUse(mode);
|
||||
return RCOpArg{this, preg};
|
||||
}
|
||||
|
||||
RCOpArg RegCache::UseNoImm(preg_t preg, RCMode mode)
|
||||
{
|
||||
m_constraints[preg].AddUseNoImm(mode);
|
||||
return RCOpArg{this, preg};
|
||||
}
|
||||
|
||||
RCOpArg RegCache::BindOrImm(preg_t preg, RCMode mode)
|
||||
{
|
||||
m_constraints[preg].AddBindOrImm(mode);
|
||||
return RCOpArg{this, preg};
|
||||
}
|
||||
|
||||
RCX64Reg RegCache::Bind(preg_t preg, RCMode mode)
|
||||
{
|
||||
m_constraints[preg].AddBind(mode);
|
||||
return RCX64Reg{this, preg};
|
||||
}
|
||||
|
||||
RCX64Reg RegCache::RevertableBind(preg_t preg, RCMode mode)
|
||||
{
|
||||
m_constraints[preg].AddRevertableBind(mode);
|
||||
return RCX64Reg{this, preg};
|
||||
}
|
||||
|
||||
RCX64Reg RegCache::Scratch()
|
||||
{
|
||||
return Scratch(GetFreeXReg());
|
||||
}
|
||||
|
||||
RCX64Reg RegCache::Scratch(X64Reg xr)
|
||||
{
|
||||
FlushX(xr);
|
||||
return RCX64Reg{this, xr};
|
||||
}
|
||||
|
||||
RCForkGuard RegCache::Fork()
|
||||
{
|
||||
return RCForkGuard{*this};
|
||||
}
|
||||
|
||||
void RegCache::Flush(BitSet32 pregs)
|
||||
{
|
||||
ASSERT_MSG(
|
||||
DYNA_REC,
|
||||
std::none_of(m_xregs.begin(), m_xregs.end(), [](const auto& x) { return x.IsLocked(); }),
|
||||
"Someone forgot to unlock a X64 reg");
|
||||
|
||||
for (preg_t i : pregs)
|
||||
{
|
||||
ASSERT_MSG(DYNA_REC, !m_regs[i].IsLocked(),
|
||||
"Someone forgot to unlock PPC reg %zu (X64 reg %i).", i, RX(i));
|
||||
ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction is in progress!");
|
||||
|
||||
switch (m_regs[i].GetLocationType())
|
||||
{
|
||||
case PPCCachedReg::LocationType::Default:
|
||||
break;
|
||||
case PPCCachedReg::LocationType::SpeculativeImmediate:
|
||||
// We can have a cached value without a host register through speculative constants.
|
||||
// It must be cleared when flushing, otherwise it may be out of sync with PPCSTATE,
|
||||
// if PPCSTATE is modified externally (e.g. fallback to interpreter).
|
||||
m_regs[i].SetFlushed();
|
||||
break;
|
||||
case PPCCachedReg::LocationType::Bound:
|
||||
case PPCCachedReg::LocationType::Immediate:
|
||||
StoreFromRegister(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::Revert()
|
||||
{
|
||||
ASSERT(IsAllUnlocked());
|
||||
for (auto& reg : m_regs)
|
||||
{
|
||||
if (reg.IsRevertable())
|
||||
reg.SetRevert();
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::Commit()
|
||||
{
|
||||
ASSERT(IsAllUnlocked());
|
||||
for (auto& reg : m_regs)
|
||||
{
|
||||
if (reg.IsRevertable())
|
||||
reg.SetCommit();
|
||||
}
|
||||
}
|
||||
|
||||
bool RegCache::IsAllUnlocked() const
|
||||
{
|
||||
return std::none_of(m_regs.begin(), m_regs.end(), [](const auto& r) { return r.IsLocked(); }) &&
|
||||
std::none_of(m_xregs.begin(), m_xregs.end(), [](const auto& x) { return x.IsLocked(); }) &&
|
||||
!IsAnyConstraintActive();
|
||||
}
|
||||
|
||||
void RegCache::PreloadRegisters(BitSet32 to_preload)
|
||||
{
|
||||
for (preg_t preg : to_preload)
|
||||
{
|
||||
if (NumFreeRegisters() < 2)
|
||||
return;
|
||||
if (!R(preg).IsImm())
|
||||
BindToRegister(preg, true, false);
|
||||
}
|
||||
}
|
||||
|
||||
BitSet32 RegCache::RegistersInUse() const
|
||||
{
|
||||
BitSet32 result;
|
||||
for (size_t i = 0; i < m_xregs.size(); i++)
|
||||
{
|
||||
if (!m_xregs[i].IsFree())
|
||||
result[i] = true;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void RegCache::FlushX(X64Reg reg)
|
||||
{
|
||||
ASSERT_MSG(DYNA_REC, reg < m_xregs.size(), "Flushing non-existent reg %i", reg);
|
||||
ASSERT(!m_xregs[reg].IsLocked());
|
||||
if (!m_xregs[reg].IsFree())
|
||||
{
|
||||
StoreFromRegister(m_xregs[reg].Contents());
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::DiscardRegContentsIfCached(preg_t preg)
|
||||
{
|
||||
if (m_regs[preg].IsBound())
|
||||
{
|
||||
X64Reg xr = m_regs[preg].Location().GetSimpleReg();
|
||||
m_xregs[xr].SetFlushed();
|
||||
m_regs[preg].SetFlushed();
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::BindToRegister(preg_t i, bool doLoad, bool makeDirty)
|
||||
{
|
||||
if (!m_regs[i].IsBound())
|
||||
{
|
||||
X64Reg xr = GetFreeXReg();
|
||||
|
||||
ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsDirty(), "Xreg %i already dirty", xr);
|
||||
ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsLocked(), "GetFreeXReg returned locked register");
|
||||
ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Invalid transaction state");
|
||||
|
||||
m_xregs[xr].SetBoundTo(i, makeDirty || m_regs[i].IsAway());
|
||||
|
||||
if (doLoad)
|
||||
{
|
||||
LoadRegister(i, xr);
|
||||
}
|
||||
|
||||
ASSERT_MSG(DYNA_REC,
|
||||
std::none_of(m_regs.begin(), m_regs.end(),
|
||||
[xr](const auto& r) { return r.Location().IsSimpleReg(xr); }),
|
||||
"Xreg %i already bound", xr);
|
||||
|
||||
m_regs[i].SetBoundTo(xr);
|
||||
}
|
||||
else
|
||||
{
|
||||
// reg location must be simplereg; memory locations
|
||||
// and immediates are taken care of above.
|
||||
if (makeDirty)
|
||||
m_xregs[RX(i)].MakeDirty();
|
||||
}
|
||||
|
||||
ASSERT_MSG(DYNA_REC, !m_xregs[RX(i)].IsLocked(), "WTF, this reg should have been flushed");
|
||||
}
|
||||
|
||||
void RegCache::StoreFromRegister(preg_t i, FlushMode mode)
|
||||
{
|
||||
// When a transaction is in progress, allowing the store would overwrite the old value.
|
||||
ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction is in progress!");
|
||||
|
||||
bool doStore = false;
|
||||
|
||||
switch (m_regs[i].GetLocationType())
|
||||
{
|
||||
case PPCCachedReg::LocationType::Default:
|
||||
case PPCCachedReg::LocationType::SpeculativeImmediate:
|
||||
return;
|
||||
case PPCCachedReg::LocationType::Bound:
|
||||
{
|
||||
X64Reg xr = RX(i);
|
||||
doStore = m_xregs[xr].IsDirty();
|
||||
if (mode == FlushMode::Full)
|
||||
m_xregs[xr].SetFlushed();
|
||||
break;
|
||||
}
|
||||
case PPCCachedReg::LocationType::Immediate:
|
||||
doStore = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (doStore)
|
||||
StoreRegister(i, GetDefaultLocation(i));
|
||||
if (mode == FlushMode::Full)
|
||||
m_regs[i].SetFlushed();
|
||||
}
|
||||
|
||||
X64Reg RegCache::GetFreeXReg()
|
||||
{
|
||||
size_t aCount;
|
||||
const X64Reg* aOrder = GetAllocationOrder(&aCount);
|
||||
for (size_t i = 0; i < aCount; i++)
|
||||
{
|
||||
X64Reg xr = aOrder[i];
|
||||
if (m_xregs[xr].IsFree())
|
||||
{
|
||||
return xr;
|
||||
}
|
||||
}
|
||||
|
||||
// Okay, not found; run the register allocator heuristic and figure out which register we should
|
||||
// clobber.
|
||||
float min_score = std::numeric_limits<float>::max();
|
||||
X64Reg best_xreg = INVALID_REG;
|
||||
size_t best_preg = 0;
|
||||
for (size_t i = 0; i < aCount; i++)
|
||||
{
|
||||
X64Reg xreg = (X64Reg)aOrder[i];
|
||||
preg_t preg = m_xregs[xreg].Contents();
|
||||
if (m_xregs[xreg].IsLocked() || m_regs[preg].IsLocked())
|
||||
continue;
|
||||
float score = ScoreRegister(xreg);
|
||||
if (score < min_score)
|
||||
{
|
||||
min_score = score;
|
||||
best_xreg = xreg;
|
||||
best_preg = preg;
|
||||
}
|
||||
}
|
||||
|
||||
if (best_xreg != INVALID_REG)
|
||||
{
|
||||
StoreFromRegister(best_preg);
|
||||
return best_xreg;
|
||||
}
|
||||
|
||||
// Still no dice? Die!
|
||||
ASSERT_MSG(DYNA_REC, false, "Regcache ran out of regs");
|
||||
return INVALID_REG;
|
||||
}
|
||||
|
||||
int RegCache::NumFreeRegisters() const
|
||||
{
|
||||
int count = 0;
|
||||
size_t aCount;
|
||||
const X64Reg* aOrder = GetAllocationOrder(&aCount);
|
||||
for (size_t i = 0; i < aCount; i++)
|
||||
if (m_xregs[aOrder[i]].IsFree())
|
||||
count++;
|
||||
return count;
|
||||
}
|
||||
|
||||
// Estimate roughly how bad it would be to de-allocate this register. Higher score
|
||||
// means more bad.
|
||||
float RegCache::ScoreRegister(X64Reg xreg) const
|
||||
{
|
||||
preg_t preg = m_xregs[xreg].Contents();
|
||||
float score = 0;
|
||||
|
||||
// If it's not dirty, we don't need a store to write it back to the register file, so
|
||||
// bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly
|
||||
// right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative
|
||||
// to the number of extra stores it causes.
|
||||
if (m_xregs[xreg].IsDirty())
|
||||
score += 2;
|
||||
|
||||
// If the register isn't actually needed in a physical register for a later instruction,
|
||||
// writing it back to the register file isn't quite as bad.
|
||||
if (GetRegUtilization()[preg])
|
||||
{
|
||||
// Don't look too far ahead; we don't want to have quadratic compilation times for
|
||||
// enormous block sizes!
|
||||
// This actually improves register allocation a tiny bit; I'm not sure why.
|
||||
u32 lookahead = std::min(m_jit.js.instructionsLeft, 64);
|
||||
// Count how many other registers are going to be used before we need this one again.
|
||||
u32 regs_in_count = CountRegsIn(preg, lookahead).Count();
|
||||
// Totally ad-hoc heuristic to bias based on how many other registers we'll need
|
||||
// before this one gets used again.
|
||||
score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count));
|
||||
}
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
const OpArg& RegCache::R(preg_t preg) const
|
||||
{
|
||||
return m_regs[preg].Location();
|
||||
}
|
||||
|
||||
X64Reg RegCache::RX(preg_t preg) const
|
||||
{
|
||||
ASSERT_MSG(DYNA_REC, m_regs[preg].IsBound(), "Unbound register - %zu", preg);
|
||||
return m_regs[preg].Location().GetSimpleReg();
|
||||
}
|
||||
|
||||
void RegCache::Lock(preg_t preg)
|
||||
{
|
||||
m_regs[preg].Lock();
|
||||
}
|
||||
|
||||
void RegCache::Unlock(preg_t preg)
|
||||
{
|
||||
m_regs[preg].Unlock();
|
||||
if (!m_regs[preg].IsLocked())
|
||||
{
|
||||
// Fully unlocked, reset realization state.
|
||||
m_constraints[preg] = {};
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::LockX(X64Reg xr)
|
||||
{
|
||||
m_xregs[xr].Lock();
|
||||
}
|
||||
|
||||
void RegCache::UnlockX(X64Reg xr)
|
||||
{
|
||||
m_xregs[xr].Unlock();
|
||||
}
|
||||
|
||||
bool RegCache::IsRealized(preg_t preg) const
|
||||
{
|
||||
return m_constraints[preg].IsRealized();
|
||||
}
|
||||
|
||||
void RegCache::Realize(preg_t preg)
|
||||
{
|
||||
if (m_constraints[preg].IsRealized())
|
||||
return;
|
||||
|
||||
const bool load = m_constraints[preg].ShouldLoad();
|
||||
const bool dirty = m_constraints[preg].ShouldDirty();
|
||||
const bool kill_imm = m_constraints[preg].ShouldKillImmediate();
|
||||
const bool kill_mem = m_constraints[preg].ShouldKillMemory();
|
||||
|
||||
const auto do_bind = [&] {
|
||||
BindToRegister(preg, load, dirty);
|
||||
m_constraints[preg].Realized(RCConstraint::RealizedLoc::Bound);
|
||||
};
|
||||
|
||||
if (m_constraints[preg].ShouldBeRevertable())
|
||||
{
|
||||
StoreFromRegister(preg, FlushMode::MaintainState);
|
||||
do_bind();
|
||||
m_regs[preg].SetRevertable();
|
||||
return;
|
||||
}
|
||||
|
||||
switch (m_regs[preg].GetLocationType())
|
||||
{
|
||||
case PPCCachedReg::LocationType::Default:
|
||||
if (kill_mem)
|
||||
{
|
||||
do_bind();
|
||||
return;
|
||||
}
|
||||
m_constraints[preg].Realized(RCConstraint::RealizedLoc::Mem);
|
||||
return;
|
||||
case PPCCachedReg::LocationType::Bound:
|
||||
do_bind();
|
||||
return;
|
||||
case PPCCachedReg::LocationType::Immediate:
|
||||
case PPCCachedReg::LocationType::SpeculativeImmediate:
|
||||
if (dirty || kill_imm)
|
||||
{
|
||||
do_bind();
|
||||
return;
|
||||
}
|
||||
m_constraints[preg].Realized(RCConstraint::RealizedLoc::Imm);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool RegCache::IsAnyConstraintActive() const
|
||||
{
|
||||
return std::any_of(m_constraints.begin(), m_constraints.end(),
|
||||
[](const auto& c) { return c.IsActive(); });
|
||||
}
|
|
@ -0,0 +1,222 @@
|
|||
// Copyright 2008 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cinttypes>
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
#include <variant>
|
||||
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/CachedReg.h"
|
||||
#include "Core/PowerPC/PPCAnalyst.h"
|
||||
|
||||
class Jit64;
|
||||
enum class RCMode;
|
||||
|
||||
class RCOpArg;
|
||||
class RCX64Reg;
|
||||
class RegCache;
|
||||
|
||||
using preg_t = size_t;
|
||||
static constexpr size_t NUM_XREGS = 16;
|
||||
|
||||
class RCOpArg
|
||||
{
|
||||
public:
|
||||
static RCOpArg Imm32(u32 imm);
|
||||
static RCOpArg R(Gen::X64Reg xr);
|
||||
RCOpArg();
|
||||
~RCOpArg();
|
||||
RCOpArg(RCOpArg&&) noexcept;
|
||||
RCOpArg& operator=(RCOpArg&&) noexcept;
|
||||
|
||||
RCOpArg(RCX64Reg&&) noexcept;
|
||||
RCOpArg& operator=(RCX64Reg&&) noexcept;
|
||||
|
||||
RCOpArg(const RCOpArg&) = delete;
|
||||
RCOpArg& operator=(const RCOpArg&) = delete;
|
||||
|
||||
void Realize();
|
||||
Gen::OpArg Location() const;
|
||||
operator Gen::OpArg() const & { return Location(); }
|
||||
operator Gen::OpArg() const && = delete;
|
||||
bool IsSimpleReg() const { return Location().IsSimpleReg(); }
|
||||
bool IsSimpleReg(Gen::X64Reg reg) const { return Location().IsSimpleReg(reg); }
|
||||
Gen::X64Reg GetSimpleReg() const { return Location().GetSimpleReg(); }
|
||||
|
||||
// Use to extract bytes from a register using the regcache. offset is in bytes.
|
||||
Gen::OpArg ExtractWithByteOffset(int offset);
|
||||
|
||||
void Unlock();
|
||||
|
||||
bool IsImm() const;
|
||||
s32 SImm32() const;
|
||||
u32 Imm32() const;
|
||||
bool IsZero() const { return IsImm() && Imm32() == 0; }
|
||||
|
||||
private:
|
||||
friend class RegCache;
|
||||
|
||||
explicit RCOpArg(u32 imm);
|
||||
explicit RCOpArg(Gen::X64Reg xr);
|
||||
RCOpArg(RegCache* rc_, preg_t preg);
|
||||
|
||||
RegCache* rc = nullptr;
|
||||
std::variant<std::monostate, Gen::X64Reg, u32, preg_t> contents;
|
||||
};
|
||||
|
||||
class RCX64Reg
|
||||
{
|
||||
public:
|
||||
RCX64Reg();
|
||||
~RCX64Reg();
|
||||
RCX64Reg(RCX64Reg&&) noexcept;
|
||||
RCX64Reg& operator=(RCX64Reg&&) noexcept;
|
||||
|
||||
RCX64Reg(const RCX64Reg&) = delete;
|
||||
RCX64Reg& operator=(const RCX64Reg&) = delete;
|
||||
|
||||
void Realize();
|
||||
operator Gen::OpArg() const &;
|
||||
operator Gen::X64Reg() const &;
|
||||
operator Gen::OpArg() const && = delete;
|
||||
operator Gen::X64Reg() const && = delete;
|
||||
|
||||
void Unlock();
|
||||
|
||||
private:
|
||||
friend class RegCache;
|
||||
friend class RCOpArg;
|
||||
|
||||
RCX64Reg(RegCache* rc_, preg_t preg);
|
||||
RCX64Reg(RegCache* rc_, Gen::X64Reg xr);
|
||||
|
||||
RegCache* rc = nullptr;
|
||||
std::variant<std::monostate, Gen::X64Reg, preg_t> contents;
|
||||
};
|
||||
|
||||
class RCForkGuard
|
||||
{
|
||||
public:
|
||||
~RCForkGuard() { EndFork(); }
|
||||
RCForkGuard(RCForkGuard&&) noexcept;
|
||||
|
||||
RCForkGuard(const RCForkGuard&) = delete;
|
||||
RCForkGuard& operator=(const RCForkGuard&) = delete;
|
||||
RCForkGuard& operator=(RCForkGuard&&) = delete;
|
||||
|
||||
void EndFork();
|
||||
|
||||
private:
|
||||
friend class RegCache;
|
||||
|
||||
explicit RCForkGuard(RegCache& rc_);
|
||||
|
||||
RegCache* rc;
|
||||
std::array<PPCCachedReg, 32> m_regs;
|
||||
std::array<X64CachedReg, NUM_XREGS> m_xregs;
|
||||
};
|
||||
|
||||
class RegCache
|
||||
{
|
||||
public:
|
||||
enum class FlushMode
|
||||
{
|
||||
Full,
|
||||
MaintainState,
|
||||
};
|
||||
|
||||
explicit RegCache(Jit64& jit);
|
||||
virtual ~RegCache() = default;
|
||||
|
||||
void Start();
|
||||
void SetEmitter(Gen::XEmitter* emitter);
|
||||
bool SanityCheck() const;
|
||||
|
||||
template <typename... Ts>
|
||||
static void Realize(Ts&... rc)
|
||||
{
|
||||
static_assert(((std::is_same<Ts, RCOpArg>() || std::is_same<Ts, RCX64Reg>()) && ...));
|
||||
(rc.Realize(), ...);
|
||||
}
|
||||
|
||||
template <typename... Ts>
|
||||
static void Unlock(Ts&... rc)
|
||||
{
|
||||
static_assert(((std::is_same<Ts, RCOpArg>() || std::is_same<Ts, RCX64Reg>()) && ...));
|
||||
(rc.Unlock(), ...);
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
bool IsImm(Args... pregs) const
|
||||
{
|
||||
static_assert(sizeof...(pregs) > 0);
|
||||
return (R(pregs).IsImm() && ...);
|
||||
}
|
||||
u32 Imm32(preg_t preg) const { return R(preg).Imm32(); }
|
||||
s32 SImm32(preg_t preg) const { return R(preg).SImm32(); }
|
||||
|
||||
RCOpArg Use(preg_t preg, RCMode mode);
|
||||
RCOpArg UseNoImm(preg_t preg, RCMode mode);
|
||||
RCOpArg BindOrImm(preg_t preg, RCMode mode);
|
||||
RCX64Reg Bind(preg_t preg, RCMode mode);
|
||||
RCX64Reg RevertableBind(preg_t preg, RCMode mode);
|
||||
RCX64Reg Scratch();
|
||||
RCX64Reg Scratch(Gen::X64Reg xr);
|
||||
|
||||
RCForkGuard Fork();
|
||||
void Flush(BitSet32 pregs = BitSet32::AllTrue(32));
|
||||
void Revert();
|
||||
void Commit();
|
||||
|
||||
bool IsAllUnlocked() const;
|
||||
|
||||
void PreloadRegisters(BitSet32 pregs);
|
||||
BitSet32 RegistersInUse() const;
|
||||
|
||||
protected:
|
||||
friend class RCOpArg;
|
||||
friend class RCX64Reg;
|
||||
friend class RCForkGuard;
|
||||
|
||||
virtual Gen::OpArg GetDefaultLocation(preg_t preg) const = 0;
|
||||
virtual void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) = 0;
|
||||
virtual void LoadRegister(preg_t preg, Gen::X64Reg new_loc) = 0;
|
||||
|
||||
virtual const Gen::X64Reg* GetAllocationOrder(size_t* count) const = 0;
|
||||
|
||||
virtual BitSet32 GetRegUtilization() const = 0;
|
||||
virtual BitSet32 CountRegsIn(preg_t preg, u32 lookahead) const = 0;
|
||||
|
||||
void FlushX(Gen::X64Reg reg);
|
||||
void DiscardRegContentsIfCached(preg_t preg);
|
||||
void BindToRegister(preg_t preg, bool doLoad = true, bool makeDirty = true);
|
||||
void StoreFromRegister(preg_t preg, FlushMode mode = FlushMode::Full);
|
||||
|
||||
Gen::X64Reg GetFreeXReg();
|
||||
|
||||
int NumFreeRegisters() const;
|
||||
float ScoreRegister(Gen::X64Reg xreg) const;
|
||||
|
||||
const Gen::OpArg& R(preg_t preg) const;
|
||||
Gen::X64Reg RX(preg_t preg) const;
|
||||
|
||||
void Lock(preg_t preg);
|
||||
void Unlock(preg_t preg);
|
||||
void LockX(Gen::X64Reg xr);
|
||||
void UnlockX(Gen::X64Reg xr);
|
||||
bool IsRealized(preg_t preg) const;
|
||||
void Realize(preg_t preg);
|
||||
|
||||
bool IsAnyConstraintActive() const;
|
||||
|
||||
Jit64& m_jit;
|
||||
std::array<PPCCachedReg, 32> m_regs;
|
||||
std::array<X64CachedReg, NUM_XREGS> m_xregs;
|
||||
std::array<RCConstraint, 32> m_constraints;
|
||||
Gen::XEmitter* m_emitter = nullptr;
|
||||
};
|
|
@ -0,0 +1,12 @@
|
|||
// Copyright 2018 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
enum class RCMode
|
||||
{
|
||||
Read,
|
||||
Write,
|
||||
ReadWrite,
|
||||
};
|
|
@ -70,11 +70,6 @@ protected:
|
|||
// so just fixup that branch instead of testing for a DSI again.
|
||||
bool fixupExceptionHandler;
|
||||
Gen::FixupBranch exceptionHandler;
|
||||
// If these are set, we've stored the old value of a register which will be loaded in
|
||||
// revertLoad,
|
||||
// which lets us revert it on the exception path.
|
||||
int revertGprLoad;
|
||||
int revertFprLoad;
|
||||
|
||||
bool assumeNoPairedQuantize;
|
||||
std::map<u8, u32> constantGqr;
|
||||
|
|
Loading…
Reference in New Issue