Merge pull request #7492 from MerryMage/regcache2

JitRegCache: Refactor register cache
This commit is contained in:
Pierre Bourdon 2018-11-09 04:45:47 +01:00 committed by GitHub
commit 61b9ef33ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 2475 additions and 1692 deletions

View File

@ -193,10 +193,14 @@ public:
constexpr BitSet operator&(BitSet other) const { return BitSet(m_val & other.m_val); }
constexpr BitSet operator^(BitSet other) const { return BitSet(m_val ^ other.m_val); }
constexpr BitSet operator~() const { return BitSet(~m_val); }
constexpr BitSet operator<<(IntTy shift) const { return BitSet(m_val << shift); }
constexpr BitSet operator>>(IntTy shift) const { return BitSet(m_val >> shift); }
constexpr explicit operator bool() const { return m_val != 0; }
BitSet& operator|=(BitSet other) { return *this = *this | other; }
BitSet& operator&=(BitSet other) { return *this = *this & other; }
BitSet& operator^=(BitSet other) { return *this = *this ^ other; }
BitSet& operator<<=(IntTy shift) { return *this = *this << shift; }
BitSet& operator>>=(IntTy shift) { return *this = *this >> shift; }
// Warning: Even though on modern CPUs this is a single fast instruction,
// Dolphin's official builds do not currently assume POPCNT support on x86,
// so slower explicit bit twiddling is generated. Still should generally

View File

@ -159,6 +159,7 @@
<ClInclude Include="TraversalClient.h" />
<ClInclude Include="TraversalProto.h" />
<ClInclude Include="UPnP.h" />
<ClInclude Include="VariantUtil.h" />
<ClInclude Include="Version.h" />
<ClInclude Include="WorkQueueThread.h" />
<ClInclude Include="x64ABI.h" />

View File

@ -0,0 +1,26 @@
// Copyright 2018 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <variant>
namespace detail
{
template <typename... From>
struct VariantCastProxy
{
const std::variant<From...>& v;
template <typename... To>
operator std::variant<To...>() const
{
return std::visit([](auto&& arg) { return std::variant<To...>{arg}; }, v);
}
};
} // namespace detail
template <typename... From>
auto VariantCast(const std::variant<From...>& v)
{
return detail::VariantCastProxy<From...>{v};
}

View File

@ -236,20 +236,20 @@ if(_M_X86)
DSP/Jit/x64/DSPJitMultiplier.cpp
DSP/Jit/x64/DSPJitTables.cpp
DSP/Jit/x64/DSPJitUtil.cpp
PowerPC/Jit64/FPURegCache.cpp
PowerPC/Jit64/GPRRegCache.cpp
PowerPC/Jit64/Jit64_Tables.cpp
PowerPC/Jit64/JitAsm.cpp
PowerPC/Jit64/Jit_Branch.cpp
PowerPC/Jit64/Jit.cpp
PowerPC/Jit64/Jit64_Tables.cpp
PowerPC/Jit64/Jit_Branch.cpp
PowerPC/Jit64/Jit_FloatingPoint.cpp
PowerPC/Jit64/Jit_Integer.cpp
PowerPC/Jit64/Jit_LoadStore.cpp
PowerPC/Jit64/Jit_LoadStoreFloating.cpp
PowerPC/Jit64/Jit_LoadStorePaired.cpp
PowerPC/Jit64/Jit_Paired.cpp
PowerPC/Jit64/JitRegCache.cpp
PowerPC/Jit64/Jit_SystemRegisters.cpp
PowerPC/Jit64/JitAsm.cpp
PowerPC/Jit64/RegCache/FPURegCache.cpp
PowerPC/Jit64/RegCache/GPRRegCache.cpp
PowerPC/Jit64/RegCache/JitRegCache.cpp
PowerPC/Jit64Common/BlockCache.cpp
PowerPC/Jit64Common/ConstantPool.cpp
PowerPC/Jit64Common/EmuCodeBlock.cpp

View File

@ -241,8 +241,8 @@
</ClCompile>
<ClCompile Include="IOS\USB\Bluetooth\WiimoteDevice.cpp" />
<ClCompile Include="IOS\USB\Bluetooth\WiimoteHIDAttr.cpp" />
<ClCompile Include="IOS\WFS\WFSSRV.cpp" />
<ClCompile Include="IOS\WFS\WFSI.cpp" />
<ClCompile Include="IOS\WFS\WFSSRV.cpp" />
<ClCompile Include="MemTools.cpp" />
<ClCompile Include="Movie.cpp" />
<ClCompile Include="NetPlayClient.cpp" />
@ -260,13 +260,8 @@
<ClCompile Include="PowerPC\Interpreter\Interpreter_Paired.cpp" />
<ClCompile Include="PowerPC\Interpreter\Interpreter_SystemRegisters.cpp" />
<ClCompile Include="PowerPC\Interpreter\Interpreter_Tables.cpp" />
<ClCompile Include="PowerPC\Jit64Common\ConstantPool.cpp" />
<ClCompile Include="PowerPC\Jit64\FPURegCache.cpp" />
<ClCompile Include="PowerPC\Jit64\GPRRegCache.cpp" />
<ClCompile Include="PowerPC\Jit64\Jit.cpp" />
<ClCompile Include="PowerPC\Jit64\Jit64_Tables.cpp" />
<ClCompile Include="PowerPC\Jit64\JitAsm.cpp" />
<ClCompile Include="PowerPC\Jit64\JitRegCache.cpp" />
<ClCompile Include="PowerPC\Jit64\Jit_Branch.cpp" />
<ClCompile Include="PowerPC\Jit64\Jit_FloatingPoint.cpp" />
<ClCompile Include="PowerPC\Jit64\Jit_Integer.cpp" />
@ -275,7 +270,12 @@
<ClCompile Include="PowerPC\Jit64\Jit_LoadStorePaired.cpp" />
<ClCompile Include="PowerPC\Jit64\Jit_Paired.cpp" />
<ClCompile Include="PowerPC\Jit64\Jit_SystemRegisters.cpp" />
<ClCompile Include="PowerPC\Jit64\JitAsm.cpp" />
<ClCompile Include="PowerPC\Jit64\RegCache\FPURegCache.cpp" />
<ClCompile Include="PowerPC\Jit64\RegCache\GPRRegCache.cpp" />
<ClCompile Include="PowerPC\Jit64\RegCache\JitRegCache.cpp" />
<ClCompile Include="PowerPC\Jit64Common\BlockCache.cpp" />
<ClCompile Include="PowerPC\Jit64Common\ConstantPool.cpp" />
<ClCompile Include="PowerPC\Jit64Common\EmuCodeBlock.cpp" />
<ClCompile Include="PowerPC\Jit64Common\FarCodeCache.cpp" />
<ClCompile Include="PowerPC\Jit64Common\Jit64AsmCommon.cpp" />
@ -284,10 +284,6 @@
<ClCompile Include="PowerPC\JitCommon\JitAsmCommon.cpp" />
<ClCompile Include="PowerPC\JitCommon\JitBase.cpp" />
<ClCompile Include="PowerPC\JitCommon\JitCache.cpp" />
<ClCompile Include="PowerPC\SignatureDB\CSVSignatureDB.cpp" />
<ClCompile Include="PowerPC\SignatureDB\DSYSignatureDB.cpp" />
<ClCompile Include="PowerPC\SignatureDB\MEGASignatureDB.cpp" />
<ClCompile Include="PowerPC\SignatureDB\SignatureDB.cpp" />
<ClCompile Include="PowerPC\JitInterface.cpp" />
<ClCompile Include="PowerPC\MMU.cpp" />
<ClCompile Include="PowerPC\PowerPC.cpp" />
@ -295,6 +291,10 @@
<ClCompile Include="PowerPC\PPCCache.cpp" />
<ClCompile Include="PowerPC\PPCSymbolDB.cpp" />
<ClCompile Include="PowerPC\PPCTables.cpp" />
<ClCompile Include="PowerPC\SignatureDB\CSVSignatureDB.cpp" />
<ClCompile Include="PowerPC\SignatureDB\DSYSignatureDB.cpp" />
<ClCompile Include="PowerPC\SignatureDB\MEGASignatureDB.cpp" />
<ClCompile Include="PowerPC\SignatureDB\SignatureDB.cpp" />
<ClCompile Include="State.cpp" />
<ClCompile Include="SysConf.cpp" />
<ClCompile Include="TitleDatabase.cpp" />

View File

@ -27,7 +27,7 @@
#include "Core/HW/ProcessorInterface.h"
#include "Core/PatchEngine.h"
#include "Core/PowerPC/Jit64/JitAsm.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/FarCodeCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/Jit64Common/TrampolineCache.h"
@ -756,8 +756,6 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
js.downcountAmount += opinfo->numCycles;
js.fastmemLoadStore = nullptr;
js.fixupExceptionHandler = false;
js.revertGprLoad = -1;
js.revertFprLoad = -1;
if (!SConfig::GetInstance().bEnableDebugging)
js.downcountAmount += PatchEngine::GetSpeedhackCycles(js.compilerPC);
@ -800,13 +798,17 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
ProcessorInterface::INT_CAUSE_PE_FINISH));
FixupBranch noCPInt = J_CC(CC_Z, true);
gpr.Flush(RegCache::FlushMode::MaintainState);
fpr.Flush(RegCache::FlushMode::MaintainState);
{
RCForkGuard gpr_guard = gpr.Fork();
RCForkGuard fpr_guard = fpr.Fork();
MOV(32, PPCSTATE(pc), Imm32(op.address));
WriteExternalExceptionExit();
gpr.Flush();
fpr.Flush();
MOV(32, PPCSTATE(pc), Imm32(op.address));
WriteExternalExceptionExit();
}
SwitchToNearCode();
SetJumpTarget(noCPInt);
SetJumpTarget(noExtIntEnable);
}
@ -824,14 +826,19 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
SwitchToFarCode();
SetJumpTarget(b1);
gpr.Flush(RegCache::FlushMode::MaintainState);
fpr.Flush(RegCache::FlushMode::MaintainState);
{
RCForkGuard gpr_guard = gpr.Fork();
RCForkGuard fpr_guard = fpr.Fork();
// If a FPU exception occurs, the exception handler will read
// from PC. Update PC with the latest value in case that happens.
MOV(32, PPCSTATE(pc), Imm32(op.address));
OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
WriteExceptionExit();
gpr.Flush();
fpr.Flush();
// If a FPU exception occurs, the exception handler will read
// from PC. Update PC with the latest value in case that happens.
MOV(32, PPCSTATE(pc), Imm32(op.address));
OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
WriteExceptionExit();
}
SwitchToNearCode();
js.firstFPInstructionFound = true;
@ -866,20 +873,8 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
// output, which needs to be bound in the actual instruction compilation.
// TODO: make this smarter in the case that we're actually register-starved, i.e.
// prioritize the more important registers.
for (int reg : op.regsIn)
{
if (gpr.NumFreeRegisters() < 2)
break;
if (op.gprInReg[reg] && !gpr.R(reg).IsImm())
gpr.BindToRegister(reg, true, false);
}
for (int reg : op.fregsIn)
{
if (fpr.NumFreeRegisters() < 2)
break;
if (op.fprInXmm[reg])
fpr.BindToRegister(reg, true, false);
}
gpr.PreloadRegisters(op.regsIn & op.gprInReg);
fpr.PreloadRegisters(op.fregsIn & op.fprInXmm);
CompileInstruction(op);
@ -908,24 +903,25 @@ u8* Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
m_exception_handler_at_loc[js.fastmemLoadStore] = GetWritableCodePtr();
}
BitSet32 gprToFlush = BitSet32::AllTrue(32);
BitSet32 fprToFlush = BitSet32::AllTrue(32);
if (js.revertGprLoad >= 0)
gprToFlush[js.revertGprLoad] = false;
if (js.revertFprLoad >= 0)
fprToFlush[js.revertFprLoad] = false;
gpr.Flush(RegCache::FlushMode::MaintainState, gprToFlush);
fpr.Flush(RegCache::FlushMode::MaintainState, fprToFlush);
RCForkGuard gpr_guard = gpr.Fork();
RCForkGuard fpr_guard = fpr.Fork();
gpr.Revert();
fpr.Revert();
gpr.Flush();
fpr.Flush();
MOV(32, PPCSTATE(pc), Imm32(op.address));
WriteExceptionExit();
SwitchToNearCode();
}
gpr.Commit();
fpr.Commit();
// If we have a register that will never be used again, flush it.
for (int j : ~op.gprInUse)
gpr.StoreFromRegister(j);
for (int j : ~op.fprInUse)
fpr.StoreFromRegister(j);
gpr.Flush(~op.gprInUse);
fpr.Flush(~op.fprInUse);
if (opinfo->flags & FL_LOADSTORE)
++js.numLoadStoreInst;
@ -969,15 +965,8 @@ BitSet8 Jit64::ComputeStaticGQRs(const PPCAnalyst::CodeBlock& cb) const
BitSet32 Jit64::CallerSavedRegistersInUse() const
{
BitSet32 result;
for (size_t i = 0; i < RegCache::NUM_XREGS; i++)
{
if (!gpr.IsFreeX(i))
result[i] = true;
if (!fpr.IsFreeX(i))
result[16 + i] = true;
}
return result & ABI_ALL_CALLER_SAVED;
BitSet32 in_use = gpr.RegistersInUse() | (fpr.RegistersInUse() << 16);
return in_use & ABI_ALL_CALLER_SAVED;
}
void Jit64::EnableBlockLink()

View File

@ -21,10 +21,10 @@
#include "Common/CommonTypes.h"
#include "Common/x64ABI.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/FPURegCache.h"
#include "Core/PowerPC/Jit64/GPRRegCache.h"
#include "Core/PowerPC/Jit64/JitAsm.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/FPURegCache.h"
#include "Core/PowerPC/Jit64/RegCache/GPRRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64Base.h"
#include "Core/PowerPC/JitCommon/JitCache.h"
@ -88,10 +88,8 @@ public:
void FinalizeCarryOverflow(bool oe, bool inv = false);
void FinalizeCarry(Gen::CCFlags cond);
void FinalizeCarry(bool ca);
void ComputeRC(const Gen::OpArg& arg, bool needs_test = true, bool needs_sext = true);
void ComputeRC(preg_t preg, bool needs_test = true, bool needs_sext = true);
// Use to extract bytes from a register using the regcache. offset is in bytes.
Gen::OpArg ExtractFromReg(int reg, int offset);
void AndWithMask(Gen::X64Reg reg, u32 mask);
bool CheckMergedBranch(u32 crf) const;
void DoMergedBranch();

View File

@ -1,324 +0,0 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include <algorithm>
#include <cinttypes>
#include <cmath>
#include <limits>
#include "Common/Assert.h"
#include "Common/BitSet.h"
#include "Common/CommonTypes.h"
#include "Common/MsgHandler.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/PowerPC.h"
using namespace Gen;
using namespace PowerPC;
RegCache::RegCache(Jit64& jit) : m_jit{jit}
{
}
void RegCache::Start()
{
m_xregs.fill({});
for (size_t i = 0; i < m_regs.size(); i++)
{
m_regs[i] = PPCCachedReg{GetDefaultLocation(i)};
}
}
void RegCache::DiscardRegContentsIfCached(preg_t preg)
{
if (m_regs[preg].IsBound())
{
X64Reg xr = m_regs[preg].Location().GetSimpleReg();
m_xregs[xr].SetFlushed();
m_regs[preg].SetFlushed();
}
}
void RegCache::SetEmitter(XEmitter* emitter)
{
m_emitter = emitter;
}
void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush)
{
ASSERT_MSG(
DYNA_REC,
std::none_of(m_xregs.begin(), m_xregs.end(), [](const auto& x) { return x.IsLocked(); }),
"Someone forgot to unlock a X64 reg");
for (unsigned int i : regsToFlush)
{
ASSERT_MSG(DYNA_REC, !m_regs[i].IsLocked(), "Someone forgot to unlock PPC reg %u (X64 reg %i).",
i, RX(i));
switch (m_regs[i].GetLocationType())
{
case PPCCachedReg::LocationType::Default:
break;
case PPCCachedReg::LocationType::SpeculativeImmediate:
// We can have a cached value without a host register through speculative constants.
// It must be cleared when flushing, otherwise it may be out of sync with PPCSTATE,
// if PPCSTATE is modified externally (e.g. fallback to interpreter).
m_regs[i].SetFlushed();
break;
case PPCCachedReg::LocationType::Bound:
case PPCCachedReg::LocationType::Immediate:
StoreFromRegister(i, mode);
break;
}
}
}
void RegCache::FlushLockX(X64Reg reg)
{
FlushX(reg);
LockX(reg);
}
void RegCache::FlushLockX(X64Reg reg1, X64Reg reg2)
{
FlushX(reg1);
FlushX(reg2);
LockX(reg1);
LockX(reg2);
}
bool RegCache::SanityCheck() const
{
for (size_t i = 0; i < m_regs.size(); i++)
{
switch (m_regs[i].GetLocationType())
{
case PPCCachedReg::LocationType::Default:
case PPCCachedReg::LocationType::SpeculativeImmediate:
case PPCCachedReg::LocationType::Immediate:
break;
case PPCCachedReg::LocationType::Bound:
{
if (m_regs[i].IsLocked())
return false;
Gen::X64Reg xr = m_regs[i].Location().GetSimpleReg();
if (m_xregs[xr].IsLocked())
return false;
if (m_xregs[xr].Contents() != i)
return false;
break;
}
}
}
return true;
}
void RegCache::KillImmediate(preg_t preg, bool doLoad, bool makeDirty)
{
switch (m_regs[preg].GetLocationType())
{
case PPCCachedReg::LocationType::Default:
case PPCCachedReg::LocationType::SpeculativeImmediate:
break;
case PPCCachedReg::LocationType::Bound:
if (makeDirty)
m_xregs[RX(preg)].MakeDirty();
break;
case PPCCachedReg::LocationType::Immediate:
BindToRegister(preg, doLoad, makeDirty);
break;
}
}
void RegCache::BindToRegister(preg_t i, bool doLoad, bool makeDirty)
{
if (!m_regs[i].IsBound())
{
X64Reg xr = GetFreeXReg();
ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsDirty(), "Xreg %i already dirty", xr);
ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsLocked(), "GetFreeXReg returned locked register");
m_xregs[xr].SetBoundTo(i, makeDirty || m_regs[i].IsAway());
if (doLoad)
{
LoadRegister(i, xr);
}
ASSERT_MSG(DYNA_REC,
std::none_of(m_regs.begin(), m_regs.end(),
[xr](const auto& r) { return r.Location().IsSimpleReg(xr); }),
"Xreg %i already bound", xr);
m_regs[i].SetBoundTo(xr);
}
else
{
// reg location must be simplereg; memory locations
// and immediates are taken care of above.
if (makeDirty)
m_xregs[RX(i)].MakeDirty();
}
ASSERT_MSG(DYNA_REC, !m_xregs[RX(i)].IsLocked(), "WTF, this reg should have been flushed");
}
void RegCache::StoreFromRegister(preg_t i, FlushMode mode)
{
bool doStore = false;
switch (m_regs[i].GetLocationType())
{
case PPCCachedReg::LocationType::Default:
case PPCCachedReg::LocationType::SpeculativeImmediate:
return;
case PPCCachedReg::LocationType::Bound:
{
X64Reg xr = RX(i);
doStore = m_xregs[xr].IsDirty();
if (mode == FlushMode::All)
m_xregs[xr].SetFlushed();
break;
}
case PPCCachedReg::LocationType::Immediate:
doStore = true;
break;
}
if (doStore)
StoreRegister(i, GetDefaultLocation(i));
if (mode == FlushMode::All)
m_regs[i].SetFlushed();
}
const OpArg& RegCache::R(preg_t preg) const
{
return m_regs[preg].Location();
}
X64Reg RegCache::RX(preg_t preg) const
{
ASSERT_MSG(DYNA_REC, m_regs[preg].IsBound(), "Unbound register - %zu", preg);
return m_regs[preg].Location().GetSimpleReg();
}
void RegCache::UnlockAll()
{
for (auto& reg : m_regs)
reg.Unlock();
}
void RegCache::UnlockAllX()
{
for (auto& xreg : m_xregs)
xreg.Unlock();
}
bool RegCache::IsFreeX(size_t xreg) const
{
return m_xregs[xreg].IsFree();
}
X64Reg RegCache::GetFreeXReg()
{
size_t aCount;
const X64Reg* aOrder = GetAllocationOrder(&aCount);
for (size_t i = 0; i < aCount; i++)
{
X64Reg xr = aOrder[i];
if (m_xregs[xr].IsFree())
{
return xr;
}
}
// Okay, not found; run the register allocator heuristic and figure out which register we should
// clobber.
float min_score = std::numeric_limits<float>::max();
X64Reg best_xreg = INVALID_REG;
size_t best_preg = 0;
for (size_t i = 0; i < aCount; i++)
{
X64Reg xreg = (X64Reg)aOrder[i];
preg_t preg = m_xregs[xreg].Contents();
if (m_xregs[xreg].IsLocked() || m_regs[preg].IsLocked())
continue;
float score = ScoreRegister(xreg);
if (score < min_score)
{
min_score = score;
best_xreg = xreg;
best_preg = preg;
}
}
if (best_xreg != INVALID_REG)
{
StoreFromRegister(best_preg);
return best_xreg;
}
// Still no dice? Die!
ASSERT_MSG(DYNA_REC, false, "Regcache ran out of regs");
return INVALID_REG;
}
int RegCache::NumFreeRegisters() const
{
int count = 0;
size_t aCount;
const X64Reg* aOrder = GetAllocationOrder(&aCount);
for (size_t i = 0; i < aCount; i++)
if (m_xregs[aOrder[i]].IsFree())
count++;
return count;
}
void RegCache::FlushX(X64Reg reg)
{
ASSERT_MSG(DYNA_REC, reg < m_xregs.size(), "Flushing non-existent reg %i", reg);
ASSERT(!m_xregs[reg].IsLocked());
if (!m_xregs[reg].IsFree())
{
StoreFromRegister(m_xregs[reg].Contents());
}
}
// Estimate roughly how bad it would be to de-allocate this register. Higher score
// means more bad.
float RegCache::ScoreRegister(X64Reg xreg) const
{
preg_t preg = m_xregs[xreg].Contents();
float score = 0;
// If it's not dirty, we don't need a store to write it back to the register file, so
// bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly
// right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative
// to the number of extra stores it causes.
if (m_xregs[xreg].IsDirty())
score += 2;
// If the register isn't actually needed in a physical register for a later instruction,
// writing it back to the register file isn't quite as bad.
if (GetRegUtilization()[preg])
{
// Don't look too far ahead; we don't want to have quadratic compilation times for
// enormous block sizes!
// This actually improves register allocation a tiny bit; I'm not sure why.
u32 lookahead = std::min(m_jit.js.instructionsLeft, 64);
// Count how many other registers are going to be used before we need this one again.
u32 regs_in_count = CountRegsIn(preg, lookahead).Count();
// Totally ad-hoc heuristic to bias based on how many other registers we'll need
// before this one gets used again.
score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count));
}
return score;
}

View File

@ -1,230 +0,0 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cinttypes>
#include "Common/Assert.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/PPCAnalyst.h"
class Jit64;
using preg_t = size_t;
class PPCCachedReg
{
public:
enum class LocationType
{
/// Value is currently at its default location
Default,
/// Value is currently bound to a x64 register
Bound,
/// Value is known as an immediate and has not been written back to its default location
Immediate,
/// Value is known as an immediate and is already present at its default location
SpeculativeImmediate,
};
PPCCachedReg() = default;
explicit PPCCachedReg(Gen::OpArg default_location_)
: default_location(default_location_), location(default_location_)
{
}
const Gen::OpArg& Location() const { return location; }
LocationType GetLocationType() const
{
if (!away)
{
if (location.IsImm())
return LocationType::SpeculativeImmediate;
ASSERT(location == default_location);
return LocationType::Default;
}
ASSERT(location.IsImm() || location.IsSimpleReg());
return location.IsImm() ? LocationType::Immediate : LocationType::Bound;
}
bool IsAway() const { return away; }
bool IsBound() const { return GetLocationType() == LocationType::Bound; }
void SetBoundTo(Gen::X64Reg xreg)
{
away = true;
location = Gen::R(xreg);
}
void SetFlushed()
{
away = false;
location = default_location;
}
void SetToImm32(u32 imm32, bool dirty = true)
{
away |= dirty;
location = Gen::Imm32(imm32);
}
bool IsLocked() const { return locked; }
void Lock() { locked = true; }
void Unlock() { locked = false; }
private:
Gen::OpArg default_location{};
Gen::OpArg location{};
bool away = false; // value not in source register
bool locked = false;
};
class X64CachedReg
{
public:
preg_t Contents() const { return ppcReg; }
void SetBoundTo(preg_t ppcReg_, bool dirty_)
{
free = false;
ppcReg = ppcReg_;
dirty = dirty_;
}
void SetFlushed()
{
ppcReg = static_cast<preg_t>(Gen::INVALID_REG);
free = true;
dirty = false;
}
bool IsFree() const { return free && !locked; }
bool IsDirty() const { return dirty; }
void MakeDirty() { dirty = true; }
bool IsLocked() const { return locked; }
void Lock() { locked = true; }
void Unlock() { locked = false; }
private:
preg_t ppcReg = static_cast<preg_t>(Gen::INVALID_REG);
bool free = true;
bool dirty = false;
bool locked = false;
};
class RegCache
{
public:
enum class FlushMode
{
All,
MaintainState,
};
static constexpr size_t NUM_XREGS = 16;
explicit RegCache(Jit64& jit);
virtual ~RegCache() = default;
virtual Gen::OpArg GetDefaultLocation(preg_t preg) const = 0;
void Start();
void DiscardRegContentsIfCached(preg_t preg);
void SetEmitter(Gen::XEmitter* emitter);
void Flush(FlushMode mode = FlushMode::All, BitSet32 regsToFlush = BitSet32::AllTrue(32));
void FlushLockX(Gen::X64Reg reg);
void FlushLockX(Gen::X64Reg reg1, Gen::X64Reg reg2);
bool SanityCheck() const;
void KillImmediate(preg_t preg, bool doLoad, bool makeDirty);
// TODO - instead of doload, use "read", "write"
// read only will not set dirty flag
void BindToRegister(preg_t preg, bool doLoad = true, bool makeDirty = true);
void StoreFromRegister(preg_t preg, FlushMode mode = FlushMode::All);
const Gen::OpArg& R(preg_t preg) const;
Gen::X64Reg RX(preg_t preg) const;
// Register locking.
// these are powerpc reg indices
template <typename T>
void Lock(T p)
{
m_regs[p].Lock();
}
template <typename T, typename... Args>
void Lock(T first, Args... args)
{
Lock(first);
Lock(args...);
}
// these are x64 reg indices
template <typename T>
void LockX(T x)
{
if (m_xregs[x].IsLocked())
PanicAlert("RegCache: x %i already locked!", x);
m_xregs[x].Lock();
}
template <typename T, typename... Args>
void LockX(T first, Args... args)
{
LockX(first);
LockX(args...);
}
template <typename T>
void UnlockX(T x)
{
if (!m_xregs[x].IsLocked())
PanicAlert("RegCache: x %i already unlocked!", x);
m_xregs[x].Unlock();
}
template <typename T, typename... Args>
void UnlockX(T first, Args... args)
{
UnlockX(first);
UnlockX(args...);
}
void UnlockAll();
void UnlockAllX();
bool IsFreeX(size_t xreg) const;
Gen::X64Reg GetFreeXReg();
int NumFreeRegisters() const;
protected:
virtual void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) = 0;
virtual void LoadRegister(preg_t preg, Gen::X64Reg new_loc) = 0;
virtual const Gen::X64Reg* GetAllocationOrder(size_t* count) const = 0;
virtual BitSet32 GetRegUtilization() const = 0;
virtual BitSet32 CountRegsIn(preg_t preg, u32 lookahead) const = 0;
void FlushX(Gen::X64Reg reg);
float ScoreRegister(Gen::X64Reg xreg) const;
Jit64& m_jit;
std::array<PPCCachedReg, 32> m_regs;
std::array<X64CachedReg, NUM_XREGS> m_xregs;
Gen::XEmitter* m_emitter = nullptr;
};

View File

@ -8,7 +8,7 @@
#include "Core/CoreTiming.h"
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/PowerPC.h"
@ -160,9 +160,13 @@ void Jit64::bcx(UGeckoInstruction inst)
else
destination = js.compilerPC + SignExt16(inst.BD << 2);
gpr.Flush(RegCache::FlushMode::MaintainState);
fpr.Flush(RegCache::FlushMode::MaintainState);
WriteExit(destination, inst.LK, js.compilerPC + 4);
{
RCForkGuard gpr_guard = gpr.Fork();
RCForkGuard fpr_guard = fpr.Fork();
gpr.Flush();
fpr.Flush();
WriteExit(destination, inst.LK, js.compilerPC + 4);
}
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
SetJumpTarget(pConditionDontBranch);
@ -215,10 +219,14 @@ void Jit64::bcctrx(UGeckoInstruction inst)
if (inst.LK_3)
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4;
gpr.Flush(RegCache::FlushMode::MaintainState);
fpr.Flush(RegCache::FlushMode::MaintainState);
WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4);
// Would really like to continue the block here, but it ends. TODO.
{
RCForkGuard gpr_guard = gpr.Fork();
RCForkGuard fpr_guard = fpr.Fork();
gpr.Flush();
fpr.Flush();
WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4);
// Would really like to continue the block here, but it ends. TODO.
}
SetJumpTarget(b);
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
@ -269,9 +277,13 @@ void Jit64::bclrx(UGeckoInstruction inst)
if (inst.LK)
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4));
gpr.Flush(RegCache::FlushMode::MaintainState);
fpr.Flush(RegCache::FlushMode::MaintainState);
WriteBLRExit();
{
RCForkGuard gpr_guard = gpr.Fork();
RCForkGuard fpr_guard = fpr.Fork();
gpr.Flush();
fpr.Flush();
WriteBLRExit();
}
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
SetJumpTarget(pConditionDontBranch);

View File

@ -12,7 +12,7 @@
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/PowerPC.h"
@ -76,7 +76,9 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
std::vector<FixupBranch> fixups;
for (u32 x : inputs)
{
MOVDDUP(xmm, fpr.R(x));
RCOpArg Rx = fpr.Use(x, RCMode::Read);
RegCache::Realize(Rx);
MOVDDUP(xmm, Rx);
UCOMISD(xmm, R(xmm));
fixups.push_back(J_CC(CC_P));
}
@ -102,8 +104,10 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
BLENDVPD(xmm, MConst(psGeneratedQNaN));
for (u32 x : inputs)
{
avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, fpr.R(x), fpr.R(x), CMP_UNORD);
BLENDVPD(xmm, fpr.R(x));
RCOpArg Rx = fpr.Use(x, RCMode::Read);
RegCache::Realize(Rx);
avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, Rx, Rx, CMP_UNORD);
BLENDVPD(xmm, Rx);
}
FixupBranch done = J(true);
SwitchToNearCode();
@ -112,8 +116,8 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
else
{
// SSE2 fallback
X64Reg tmp = fpr.GetFreeXReg();
fpr.FlushLockX(tmp);
RCX64Reg tmp = fpr.Scratch();
RegCache::Realize(tmp);
MOVAPD(clobber, R(xmm));
CMPPD(clobber, R(clobber), CMP_UNORD);
MOVMSKPD(RSCRATCH, R(clobber));
@ -125,20 +129,21 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
ANDNPD(clobber, R(xmm));
ANDPD(tmp, MConst(psGeneratedQNaN));
ORPD(tmp, R(clobber));
MOVAPD(xmm, R(tmp));
MOVAPD(xmm, tmp);
for (u32 x : inputs)
{
MOVAPD(clobber, fpr.R(x));
RCOpArg Rx = fpr.Use(x, RCMode::Read);
RegCache::Realize(Rx);
MOVAPD(clobber, Rx);
CMPPD(clobber, R(clobber), CMP_ORD);
MOVAPD(tmp, R(clobber));
ANDNPD(clobber, fpr.R(x));
ANDPD(xmm, R(tmp));
ANDNPD(clobber, Rx);
ANDPD(xmm, tmp);
ORPD(xmm, R(clobber));
}
FixupBranch done = J(true);
SwitchToNearCode();
SetJumpTarget(done);
fpr.UnlockX(tmp);
}
}
if (xmm_out != xmm)
@ -172,53 +177,55 @@ void Jit64::fp_arith(UGeckoInstruction inst)
bool round_input = single && !js.op->fprIsSingle[inst.FC];
bool preserve_inputs = SConfig::GetInstance().bAccurateNaNs;
const auto fp_tri_op = [&](int d, int a, int b, bool reversible,
const auto fp_tri_op = [&](int op1, int op2, bool reversible,
void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&),
void (XEmitter::*sseOp)(X64Reg, const OpArg&), bool roundRHS = false) {
fpr.Lock(d, a, b);
fpr.BindToRegister(d, d == a || d == b || !single);
X64Reg dest = preserve_inputs ? XMM1 : fpr.RX(d);
RCX64Reg Rd = fpr.Bind(d, !single ? RCMode::ReadWrite : RCMode::Write);
RCOpArg Rop1 = fpr.Use(op1, RCMode::Read);
RCOpArg Rop2 = fpr.Use(op2, RCMode::Read);
RegCache::Realize(Rd, Rop1, Rop2);
X64Reg dest = preserve_inputs ? XMM1 : static_cast<X64Reg>(Rd);
if (roundRHS)
{
if (d == a && !preserve_inputs)
if (d == op1 && !preserve_inputs)
{
Force25BitPrecision(XMM0, fpr.R(b), XMM1);
(this->*sseOp)(fpr.RX(d), R(XMM0));
Force25BitPrecision(XMM0, Rop2, XMM1);
(this->*sseOp)(Rd, R(XMM0));
}
else
{
Force25BitPrecision(dest, fpr.R(b), XMM0);
(this->*sseOp)(dest, fpr.R(a));
Force25BitPrecision(dest, Rop2, XMM0);
(this->*sseOp)(dest, Rop1);
}
}
else
{
avx_op(avxOp, sseOp, dest, fpr.R(a), fpr.R(b), packed, reversible);
avx_op(avxOp, sseOp, dest, Rop1, Rop2, packed, reversible);
}
HandleNaNs(inst, fpr.RX(d), dest);
HandleNaNs(inst, Rd, dest);
if (single)
ForceSinglePrecision(fpr.RX(d), fpr.R(d), packed, true);
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
ForceSinglePrecision(Rd, Rd, packed, true);
SetFPRFIfNeeded(Rd);
};
switch (inst.SUBOP5)
{
case 18:
fp_tri_op(d, a, b, false, packed ? &XEmitter::VDIVPD : &XEmitter::VDIVSD,
fp_tri_op(a, b, false, packed ? &XEmitter::VDIVPD : &XEmitter::VDIVSD,
packed ? &XEmitter::DIVPD : &XEmitter::DIVSD);
break;
case 20:
fp_tri_op(d, a, b, false, packed ? &XEmitter::VSUBPD : &XEmitter::VSUBSD,
fp_tri_op(a, b, false, packed ? &XEmitter::VSUBPD : &XEmitter::VSUBSD,
packed ? &XEmitter::SUBPD : &XEmitter::SUBSD);
break;
case 21:
fp_tri_op(d, a, b, true, packed ? &XEmitter::VADDPD : &XEmitter::VADDSD,
fp_tri_op(a, b, true, packed ? &XEmitter::VADDPD : &XEmitter::VADDSD,
packed ? &XEmitter::ADDPD : &XEmitter::ADDSD);
break;
case 25:
fp_tri_op(d, a, c, true, packed ? &XEmitter::VMULPD : &XEmitter::VMULSD,
fp_tri_op(a, c, true, packed ? &XEmitter::VMULPD : &XEmitter::VMULSD,
packed ? &XEmitter::MULPD : &XEmitter::MULSD, round_input);
break;
default:
@ -241,17 +248,32 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
bool packed = inst.OPCD == 4 || (!cpu_info.bAtom && single && js.op->fprIsDuplicated[a] &&
js.op->fprIsDuplicated[b] && js.op->fprIsDuplicated[c]);
fpr.Lock(a, b, c, d);
// While we don't know if any games are actually affected (replays seem to work with all the usual
// suspects for desyncing), netplay and other applications need absolute perfect determinism, so
// be extra careful and don't use FMA, even if in theory it might be okay.
// Note that FMA isn't necessarily less correct (it may actually be closer to correct) compared
// to what the Gekko does here; in deterministic mode, the important thing is multiple Dolphin
// instances on different computers giving identical results.
const bool use_fma = cpu_info.bFMA && !Core::WantsDeterminism();
// For use_fma == true:
// Statistics suggests b is a lot less likely to be unbound in practice, so
// if we have to pick one of a or b to bind, let's make it b.
RCOpArg Ra = fpr.Use(a, RCMode::Read);
RCOpArg Rb = use_fma ? fpr.Bind(b, RCMode::Read) : fpr.Use(b, RCMode::Read);
RCOpArg Rc = fpr.Use(c, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, single ? RCMode::Write : RCMode::ReadWrite);
RegCache::Realize(Ra, Rb, Rc, Rd);
switch (inst.SUBOP5)
{
case 14:
MOVDDUP(XMM1, fpr.R(c));
MOVDDUP(XMM1, Rc);
if (round_input)
Force25BitPrecision(XMM1, R(XMM1), XMM0);
break;
case 15:
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM1, fpr.R(c), fpr.R(c), 3);
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM1, Rc, Rc, 3);
if (round_input)
Force25BitPrecision(XMM1, R(XMM1), XMM0);
break;
@ -260,38 +282,29 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
X64Reg tmp1 = special ? XMM0 : XMM1;
X64Reg tmp2 = special ? XMM1 : XMM0;
if (single && round_input)
Force25BitPrecision(tmp1, fpr.R(c), tmp2);
Force25BitPrecision(tmp1, Rc, tmp2);
else
MOVAPD(tmp1, fpr.R(c));
MOVAPD(tmp1, Rc);
break;
}
// While we don't know if any games are actually affected (replays seem to work with all the usual
// suspects for desyncing), netplay and other applications need absolute perfect determinism, so
// be extra careful and don't use FMA, even if in theory it might be okay.
// Note that FMA isn't necessarily less correct (it may actually be closer to correct) compared
// to what the Gekko does here; in deterministic mode, the important thing is multiple Dolphin
// instances on different computers giving identical results.
if (cpu_info.bFMA && !Core::WantsDeterminism())
if (use_fma)
{
// Statistics suggests b is a lot less likely to be unbound in practice, so
// if we have to pick one of a or b to bind, let's make it b.
fpr.BindToRegister(b, true, false);
switch (inst.SUBOP5)
{
case 28: // msub
if (packed)
VFMSUB132PD(XMM1, fpr.RX(b), fpr.R(a));
VFMSUB132PD(XMM1, Rb.GetSimpleReg(), Ra);
else
VFMSUB132SD(XMM1, fpr.RX(b), fpr.R(a));
VFMSUB132SD(XMM1, Rb.GetSimpleReg(), Ra);
break;
case 14: // madds0
case 15: // madds1
case 29: // madd
if (packed)
VFMADD132PD(XMM1, fpr.RX(b), fpr.R(a));
VFMADD132PD(XMM1, Rb.GetSimpleReg(), Ra);
else
VFMADD132SD(XMM1, fpr.RX(b), fpr.R(a));
VFMADD132SD(XMM1, Rb.GetSimpleReg(), Ra);
break;
// PowerPC and x86 define NMADD/NMSUB differently
// x86: D = -A*C (+/-) B
@ -299,15 +312,15 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
// so we have to swap them; the ADD/SUB here isn't a typo.
case 30: // nmsub
if (packed)
VFNMADD132PD(XMM1, fpr.RX(b), fpr.R(a));
VFNMADD132PD(XMM1, Rb.GetSimpleReg(), Ra);
else
VFNMADD132SD(XMM1, fpr.RX(b), fpr.R(a));
VFNMADD132SD(XMM1, Rb.GetSimpleReg(), Ra);
break;
case 31: // nmadd
if (packed)
VFNMSUB132PD(XMM1, fpr.RX(b), fpr.R(a));
VFNMSUB132PD(XMM1, Rb.GetSimpleReg(), Ra);
else
VFNMSUB132SD(XMM1, fpr.RX(b), fpr.R(a));
VFNMSUB132SD(XMM1, Rb.GetSimpleReg(), Ra);
break;
}
}
@ -315,15 +328,15 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
{
// We implement nmsub a little differently ((b - a*c) instead of -(a*c - b)), so handle it
// separately.
MOVAPD(XMM1, fpr.R(b));
MOVAPD(XMM1, Rb);
if (packed)
{
MULPD(XMM0, fpr.R(a));
MULPD(XMM0, Ra);
SUBPD(XMM1, R(XMM0));
}
else
{
MULSD(XMM0, fpr.R(a));
MULSD(XMM0, Ra);
SUBSD(XMM1, R(XMM0));
}
}
@ -331,36 +344,35 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
{
if (packed)
{
MULPD(XMM1, fpr.R(a));
MULPD(XMM1, Ra);
if (inst.SUBOP5 == 28) // msub
SUBPD(XMM1, fpr.R(b));
SUBPD(XMM1, Rb);
else //(n)madd(s[01])
ADDPD(XMM1, fpr.R(b));
ADDPD(XMM1, Rb);
}
else
{
MULSD(XMM1, fpr.R(a));
MULSD(XMM1, Ra);
if (inst.SUBOP5 == 28)
SUBSD(XMM1, fpr.R(b));
SUBSD(XMM1, Rb);
else
ADDSD(XMM1, fpr.R(b));
ADDSD(XMM1, Rb);
}
if (inst.SUBOP5 == 31) // nmadd
XORPD(XMM1, MConst(packed ? psSignBits2 : psSignBits));
}
fpr.BindToRegister(d, !single);
if (single)
{
HandleNaNs(inst, fpr.RX(d), XMM1);
ForceSinglePrecision(fpr.RX(d), fpr.R(d), packed, true);
HandleNaNs(inst, Rd, XMM1);
ForceSinglePrecision(Rd, Rd, packed, true);
}
else
{
HandleNaNs(inst, XMM1, XMM1);
MOVSD(fpr.RX(d), R(XMM1));
MOVSD(Rd, R(XMM1));
}
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
SetFPRFIfNeeded(Rd);
}
void Jit64::fsign(UGeckoInstruction inst)
@ -373,29 +385,28 @@ void Jit64::fsign(UGeckoInstruction inst)
int b = inst.FB;
bool packed = inst.OPCD == 4;
fpr.Lock(b, d);
OpArg src = fpr.R(b);
fpr.BindToRegister(d, false);
RCOpArg src = fpr.Use(b, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(src, Rd);
switch (inst.SUBOP10)
{
case 40: // neg
avx_op(&XEmitter::VXORPD, &XEmitter::XORPD, fpr.RX(d), src,
MConst(packed ? psSignBits2 : psSignBits), packed);
avx_op(&XEmitter::VXORPD, &XEmitter::XORPD, Rd, src, MConst(packed ? psSignBits2 : psSignBits),
packed);
break;
case 136: // nabs
avx_op(&XEmitter::VORPD, &XEmitter::ORPD, fpr.RX(d), src,
MConst(packed ? psSignBits2 : psSignBits), packed);
avx_op(&XEmitter::VORPD, &XEmitter::ORPD, Rd, src, MConst(packed ? psSignBits2 : psSignBits),
packed);
break;
case 264: // abs
avx_op(&XEmitter::VANDPD, &XEmitter::ANDPD, fpr.RX(d), src,
MConst(packed ? psAbsMask2 : psAbsMask), packed);
avx_op(&XEmitter::VANDPD, &XEmitter::ANDPD, Rd, src, MConst(packed ? psAbsMask2 : psAbsMask),
packed);
break;
default:
PanicAlert("fsign bleh");
break;
}
fpr.UnlockAll();
}
void Jit64::fselx(UGeckoInstruction inst)
@ -411,35 +422,38 @@ void Jit64::fselx(UGeckoInstruction inst)
bool packed = inst.OPCD == 4; // ps_sel
fpr.Lock(a, b, c, d);
RCOpArg Ra = fpr.Use(a, RCMode::Read);
RCOpArg Rb = fpr.Use(b, RCMode::Read);
RCOpArg Rc = fpr.Use(c, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, packed ? RCMode::Write : RCMode::ReadWrite);
RegCache::Realize(Ra, Rb, Rc, Rd);
XORPD(XMM0, R(XMM0));
// This condition is very tricky; there's only one right way to handle both the case of
// negative/positive zero and NaN properly.
// (a >= -0.0 ? c : b) transforms into (0 > a ? b : c), hence the NLE.
if (packed)
CMPPD(XMM0, fpr.R(a), CMP_NLE);
CMPPD(XMM0, Ra, CMP_NLE);
else
CMPSD(XMM0, fpr.R(a), CMP_NLE);
CMPSD(XMM0, Ra, CMP_NLE);
if (cpu_info.bSSE4_1)
{
MOVAPD(XMM1, fpr.R(c));
BLENDVPD(XMM1, fpr.R(b));
MOVAPD(XMM1, Rc);
BLENDVPD(XMM1, Rb);
}
else
{
MOVAPD(XMM1, R(XMM0));
ANDPD(XMM0, fpr.R(b));
ANDNPD(XMM1, fpr.R(c));
ANDPD(XMM0, Rb);
ANDNPD(XMM1, Rc);
ORPD(XMM1, R(XMM0));
}
fpr.BindToRegister(d, !packed);
if (packed)
MOVAPD(fpr.RX(d), R(XMM1));
MOVAPD(Rd, R(XMM1));
else
MOVSD(fpr.RX(d), R(XMM1));
fpr.UnlockAll();
MOVSD(Rd, R(XMM1));
}
void Jit64::fmrx(UGeckoInstruction inst)
@ -454,26 +468,25 @@ void Jit64::fmrx(UGeckoInstruction inst)
if (d == b)
return;
fpr.Lock(b, d);
if (fpr.R(d).IsSimpleReg())
RCOpArg Rd = fpr.Use(d, RCMode::Write);
RegCache::Realize(Rd);
if (Rd.IsSimpleReg())
{
// We don't need to load d, but if it is loaded, we need to mark it as dirty.
fpr.BindToRegister(d);
RCOpArg Rb = fpr.Use(b, RCMode::Read);
RegCache::Realize(Rb);
// We have to use MOVLPD if b isn't loaded because "MOVSD reg, mem" sets the upper bits (64+)
// to zero and we don't want that.
if (!fpr.R(b).IsSimpleReg())
MOVLPD(fpr.RX(d), fpr.R(b));
if (!Rb.IsSimpleReg())
MOVLPD(Rd.GetSimpleReg(), Rb);
else
MOVSD(fpr.R(d), fpr.RX(b));
MOVSD(Rd, Rb.GetSimpleReg());
}
else
{
fpr.BindToRegister(b, true, false);
MOVSD(fpr.R(d), fpr.RX(b));
RCOpArg Rb = fpr.Bind(b, RCMode::Read);
RegCache::Realize(Rb);
MOVSD(Rd, Rb.GetSimpleReg());
}
fpr.UnlockAll();
}
void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
@ -500,22 +513,22 @@ void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
output[3 - (next.CRBB & 3)] |= 1 << dst;
}
fpr.Lock(a, b);
fpr.BindToRegister(b, true, false);
RCOpArg Ra = upper ? fpr.Bind(a, RCMode::Read) : fpr.Use(a, RCMode::Read);
RCX64Reg Rb = fpr.Bind(b, RCMode::Read);
RegCache::Realize(Ra, Rb);
if (fprf)
AND(32, PPCSTATE(fpscr), Imm32(~FPRF_MASK));
if (upper)
{
fpr.BindToRegister(a, true, false);
MOVHLPS(XMM0, fpr.RX(a));
MOVHLPS(XMM1, fpr.RX(b));
MOVHLPS(XMM0, Ra.GetSimpleReg());
MOVHLPS(XMM1, Rb);
UCOMISD(XMM1, R(XMM0));
}
else
{
UCOMISD(fpr.RX(b), fpr.R(a));
UCOMISD(Rb, Ra);
}
FixupBranch pNaN, pLesser, pGreater;
@ -572,7 +585,6 @@ void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
}
MOV(64, PPCSTATE(cr_val[crf]), R(RSCRATCH));
fpr.UnlockAll();
}
void Jit64::fcmpX(UGeckoInstruction inst)
@ -591,8 +603,10 @@ void Jit64::fctiwx(UGeckoInstruction inst)
int d = inst.RD;
int b = inst.RB;
fpr.Lock(d, b);
fpr.BindToRegister(d);
RCOpArg Rb = fpr.Use(b, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(Rb, Rd);
// Intel uses 0x80000000 as a generic error code while PowerPC uses clamping:
//
@ -606,7 +620,7 @@ void Jit64::fctiwx(UGeckoInstruction inst)
// except for -0.0 where they are set to 0xfff80001 (TODO).
MOVAPD(XMM0, MConst(half_qnan_and_s32_max));
MINSD(XMM0, fpr.R(b));
MINSD(XMM0, Rb);
switch (inst.SUBOP10)
{
// fctiwx
@ -620,8 +634,7 @@ void Jit64::fctiwx(UGeckoInstruction inst)
break;
}
// d[64+] must not be modified
MOVSD(fpr.R(d), XMM0);
fpr.UnlockAll();
MOVSD(Rd, XMM0);
}
void Jit64::frspx(UGeckoInstruction inst)
@ -633,12 +646,12 @@ void Jit64::frspx(UGeckoInstruction inst)
int d = inst.FD;
bool packed = js.op->fprIsDuplicated[b] && !cpu_info.bAtom;
fpr.Lock(b, d);
OpArg src = fpr.R(b);
fpr.BindToRegister(d, false);
ForceSinglePrecision(fpr.RX(d), src, packed, true);
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
RCOpArg Rb = fpr.Use(b, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(Rb, Rd);
ForceSinglePrecision(Rd, Rb, packed, true);
SetFPRFIfNeeded(Rd);
}
void Jit64::frsqrtex(UGeckoInstruction inst)
@ -649,15 +662,15 @@ void Jit64::frsqrtex(UGeckoInstruction inst)
int b = inst.FB;
int d = inst.FD;
gpr.FlushLockX(RSCRATCH_EXTRA);
fpr.Lock(b, d);
fpr.BindToRegister(d);
MOVAPD(XMM0, fpr.R(b));
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
RCOpArg Rb = fpr.Use(b, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(scratch_guard, Rb, Rd);
MOVAPD(XMM0, Rb);
CALL(asm_routines.frsqrte);
MOVSD(fpr.R(d), XMM0);
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
gpr.UnlockAllX();
MOVSD(Rd, XMM0);
SetFPRFIfNeeded(Rd);
}
void Jit64::fresx(UGeckoInstruction inst)
@ -668,13 +681,13 @@ void Jit64::fresx(UGeckoInstruction inst)
int b = inst.FB;
int d = inst.FD;
gpr.FlushLockX(RSCRATCH_EXTRA);
fpr.Lock(b, d);
MOVAPD(XMM0, fpr.R(b));
fpr.BindToRegister(d, false);
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
RCOpArg Rb = fpr.Use(b, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(scratch_guard, Rb, Rd);
MOVAPD(XMM0, Rb);
CALL(asm_routines.fres);
MOVDDUP(fpr.RX(d), R(XMM0));
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
gpr.UnlockAllX();
MOVDDUP(Rd, R(XMM0));
SetFPRFIfNeeded(Rd);
}

File diff suppressed because it is too large Load Diff

View File

@ -18,7 +18,7 @@
#include "Core/CoreTiming.h"
#include "Core/HW/CPU.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/PowerPC.h"
@ -126,12 +126,14 @@ void Jit64::lXXx(UGeckoInstruction inst)
js.op[2].inst.hex == 0x4182fff8)
{
s32 offset = (s32)(s16)inst.SIMM_16;
gpr.BindToRegister(a, true, false);
gpr.BindToRegister(d, false, true);
SafeLoadToReg(gpr.RX(d), gpr.R(a), accessSize, offset, CallerSavedRegistersInUse(), signExtend);
RCX64Reg Ra = gpr.Bind(a, RCMode::Read);
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
RegCache::Realize(Ra, Rd);
SafeLoadToReg(Rd, Ra, accessSize, offset, CallerSavedRegistersInUse(), signExtend);
// if it's still 0, we can wait until the next event
TEST(32, gpr.R(d), gpr.R(d));
TEST(32, Rd, Rd);
FixupBranch noIdle = J_CC(CC_NZ);
BitSet32 registersInUse = CallerSavedRegistersInUse();
@ -155,7 +157,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
// Determine whether this instruction updates inst.RA
bool update;
if (inst.OPCD == 31)
update = ((inst.SUBOP10 & 0x20) != 0) && (!gpr.R(b).IsImm() || gpr.R(b).Imm32() != 0);
update = ((inst.SUBOP10 & 0x20) != 0) && (!gpr.IsImm(b) || gpr.Imm32(b) != 0);
else
update = ((inst.OPCD & 1) != 0) && inst.SIMM_16 != 0;
@ -165,19 +167,20 @@ void Jit64::lXXx(UGeckoInstruction inst)
bool storeAddress = false;
s32 loadOffset = 0;
// Prepare result
RCX64Reg Rd = jo.memcheck ? gpr.RevertableBind(d, RCMode::Write) : gpr.Bind(d, RCMode::Write);
// Prepare address operand
OpArg opAddress;
RCOpArg opAddress;
if (!update && !a)
{
if (indexed)
{
if (!gpr.R(b).IsImm())
gpr.BindToRegister(b, true, false);
opAddress = gpr.R(b);
opAddress = gpr.BindOrImm(b, RCMode::Read);
}
else
{
opAddress = Imm32((u32)(s32)inst.SIMM_16);
opAddress = RCOpArg::Imm32((u32)(s32)inst.SIMM_16);
}
}
else if (update && ((a == 0) || (d == a)))
@ -186,36 +189,40 @@ void Jit64::lXXx(UGeckoInstruction inst)
}
else
{
if (!indexed && gpr.R(a).IsImm() && !jo.memcheck)
if (!indexed && gpr.IsImm(a) && !jo.memcheck)
{
u32 val = gpr.R(a).Imm32() + inst.SIMM_16;
opAddress = Imm32(val);
u32 val = gpr.Imm32(a) + inst.SIMM_16;
opAddress = RCOpArg::Imm32(val);
if (update)
gpr.SetImmediate32(a, val);
}
else if (indexed && gpr.R(a).IsImm() && gpr.R(b).IsImm() && !jo.memcheck)
else if (indexed && gpr.IsImm(a) && gpr.IsImm(b) && !jo.memcheck)
{
u32 val = gpr.R(a).Imm32() + gpr.R(b).Imm32();
opAddress = Imm32(val);
u32 val = gpr.Imm32(a) + gpr.Imm32(b);
opAddress = RCOpArg::Imm32(val);
if (update)
gpr.SetImmediate32(a, val);
}
else
{
// If we're using reg+reg mode and b is an immediate, pretend we're using constant offset mode
bool use_constant_offset = !indexed || gpr.R(b).IsImm();
const bool use_constant_offset = !indexed || gpr.IsImm(b);
s32 offset = 0;
if (use_constant_offset)
offset = indexed ? gpr.R(b).SImm32() : (s32)inst.SIMM_16;
offset = indexed ? gpr.SImm32(b) : (s32)inst.SIMM_16;
RCOpArg Rb = use_constant_offset ? RCOpArg{} : gpr.Use(b, RCMode::Read);
// Depending on whether we have an immediate and/or update, find the optimum way to calculate
// the load address.
if ((update || use_constant_offset) && !jo.memcheck)
{
gpr.BindToRegister(a, true, update);
opAddress = gpr.R(a);
opAddress = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read);
RegCache::Realize(opAddress, Rb);
if (!use_constant_offset)
ADD(32, opAddress, gpr.R(b));
ADD(32, opAddress, Rb);
else if (update)
ADD(32, opAddress, Imm32((u32)offset));
else
@ -223,51 +230,36 @@ void Jit64::lXXx(UGeckoInstruction inst)
}
else
{
// In this case we need an extra temporary register.
opAddress = R(RSCRATCH2);
storeAddress = true;
// In this case we need an extra temporary register.
opAddress = RCOpArg::R(RSCRATCH2);
RCOpArg Ra = gpr.Use(a, RCMode::Read);
RegCache::Realize(opAddress, Ra, Rb);
if (use_constant_offset)
MOV_sum(32, RSCRATCH2, gpr.R(a), Imm32((u32)offset));
MOV_sum(32, RSCRATCH2, Ra, Imm32((u32)offset));
else
MOV_sum(32, RSCRATCH2, gpr.R(a), gpr.R(b));
MOV_sum(32, RSCRATCH2, Ra, Rb);
}
}
}
gpr.Lock(a, b, d);
if (update && storeAddress)
gpr.BindToRegister(a, true, true);
// A bit of an evil hack here. We need to retain the original value of this register for the
// exception path, but we'd rather not needlessly pass it around if we don't have to, since
// the exception path is very rare. So we store the value in the regcache, let the load path
// clobber it, then restore the value in the exception path.
// TODO: no other load has to do this at the moment, since no other loads go directly to the
// target registers, but if that ever changes, we need to do it there too.
if (jo.memcheck)
{
gpr.StoreFromRegister(d);
js.revertGprLoad = d;
}
gpr.BindToRegister(d, false, true);
RCX64Reg Ra = (update && storeAddress) ? gpr.Bind(a, RCMode::Write) : RCX64Reg{};
RegCache::Realize(opAddress, Ra, Rd);
BitSet32 registersInUse = CallerSavedRegistersInUse();
// We need to save the (usually scratch) address register for the update.
if (update && storeAddress)
registersInUse[RSCRATCH2] = true;
SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend);
SafeLoadToReg(Rd, opAddress, accessSize, loadOffset, registersInUse, signExtend);
if (update && storeAddress)
MOV(32, gpr.R(a), opAddress);
MOV(32, Ra, opAddress);
// TODO: support no-swap in SafeLoadToReg instead
if (byte_reversed)
BSWAP(accessSize, gpr.RX(d));
gpr.UnlockAll();
gpr.UnlockAllX();
BSWAP(accessSize, Rd);
}
void Jit64::dcbx(UGeckoInstruction inst)
@ -277,10 +269,12 @@ void Jit64::dcbx(UGeckoInstruction inst)
X64Reg addr = RSCRATCH;
X64Reg value = RSCRATCH2;
X64Reg tmp = gpr.GetFreeXReg();
gpr.FlushLockX(tmp);
RCOpArg Ra = inst.RA ? gpr.Use(inst.RA, RCMode::Read) : RCOpArg::Imm32(0);
RCOpArg Rb = gpr.Use(inst.RB, RCMode::Read);
RCX64Reg tmp = gpr.Scratch();
RegCache::Realize(Ra, Rb, tmp);
MOV_sum(32, addr, inst.RA ? gpr.R(inst.RA) : Imm32(0), gpr.R(inst.RB));
MOV_sum(32, addr, Ra, Rb);
// Check whether a JIT cache line needs to be invalidated.
LEA(32, value, MScaled(addr, SCALE_8, 0)); // addr << 3 (masks the first 3 bits)
@ -305,8 +299,6 @@ void Jit64::dcbx(UGeckoInstruction inst)
c = J(true);
SwitchToNearCode();
SetJumpTarget(c);
gpr.UnlockAllX();
}
void Jit64::dcbt(UGeckoInstruction inst)
@ -338,10 +330,14 @@ void Jit64::dcbz(UGeckoInstruction inst)
int a = inst.RA;
int b = inst.RB;
MOV(32, R(RSCRATCH), gpr.R(b));
if (a)
ADD(32, R(RSCRATCH), gpr.R(a));
AND(32, R(RSCRATCH), Imm32(~31));
{
RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0);
RCOpArg Rb = gpr.Use(b, RCMode::Read);
RegCache::Realize(Ra, Rb);
MOV_sum(32, RSCRATCH, Ra, Rb);
AND(32, R(RSCRATCH), Imm32(~31));
}
if (MSR.DR)
{
@ -407,10 +403,14 @@ void Jit64::stX(UGeckoInstruction inst)
}
// If we already know the address of the write
if (!a || gpr.R(a).IsImm())
if (!a || gpr.IsImm(a))
{
u32 addr = (a ? gpr.R(a).Imm32() : 0) + offset;
bool exception = WriteToConstAddress(accessSize, gpr.R(s), addr, CallerSavedRegistersInUse());
const u32 addr = (a ? gpr.Imm32(a) : 0) + offset;
const bool exception = [&] {
RCOpArg Rs = gpr.Use(s, RCMode::Read);
RegCache::Realize(Rs);
return WriteToConstAddress(accessSize, Rs, addr, CallerSavedRegistersInUse());
}();
if (update)
{
if (!jo.memcheck || !exception)
@ -419,42 +419,35 @@ void Jit64::stX(UGeckoInstruction inst)
}
else
{
gpr.KillImmediate(a, true, true);
RCOpArg Ra = gpr.UseNoImm(a, RCMode::ReadWrite);
RegCache::Realize(Ra);
MemoryExceptionCheck();
ADD(32, gpr.R(a), Imm32((u32)offset));
ADD(32, Ra, Imm32((u32)offset));
}
}
}
else
{
gpr.Lock(a, s);
gpr.BindToRegister(a, true, update);
if (gpr.R(s).IsImm())
RCX64Reg Ra = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read);
RCOpArg reg_value;
if (!gpr.IsImm(s) && WriteClobbersRegValue(accessSize, /* swap */ true))
{
SafeWriteRegToReg(gpr.R(s), gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(),
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR);
RCOpArg Rs = gpr.Use(s, RCMode::Read);
RegCache::Realize(Rs);
reg_value = RCOpArg::R(RSCRATCH2);
MOV(32, reg_value, Rs);
}
else
{
X64Reg reg_value;
if (WriteClobbersRegValue(accessSize, /* swap */ true))
{
MOV(32, R(RSCRATCH2), gpr.R(s));
reg_value = RSCRATCH2;
}
else
{
gpr.BindToRegister(s, true, false);
reg_value = gpr.RX(s);
}
SafeWriteRegToReg(reg_value, gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(),
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR);
reg_value = gpr.BindOrImm(s, RCMode::Read);
}
RegCache::Realize(Ra, reg_value);
SafeWriteRegToReg(reg_value, Ra, accessSize, offset, CallerSavedRegistersInUse(),
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR);
if (update)
ADD(32, gpr.R(a), Imm32((u32)offset));
ADD(32, Ra, Imm32((u32)offset));
}
gpr.UnlockAll();
}
void Jit64::stXx(UGeckoInstruction inst)
@ -467,13 +460,6 @@ void Jit64::stXx(UGeckoInstruction inst)
bool byte_reverse = !!(inst.SUBOP10 & 512);
FALLBACK_IF(!a || (update && a == s) || (update && jo.memcheck && a == b));
gpr.Lock(a, b, s);
if (update)
gpr.BindToRegister(a, true, true);
MOV_sum(32, RSCRATCH2, gpr.R(a), gpr.R(b));
int accessSize;
switch (inst.SUBOP10 & ~32)
{
@ -494,39 +480,28 @@ void Jit64::stXx(UGeckoInstruction inst)
break;
}
if (gpr.R(s).IsImm())
const bool does_clobber = WriteClobbersRegValue(accessSize, /* swap */ !byte_reverse);
RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read);
RCOpArg Rb = gpr.Use(b, RCMode::Read);
RCOpArg Rs = does_clobber ? gpr.Use(s, RCMode::Read) : gpr.BindOrImm(s, RCMode::Read);
RegCache::Realize(Ra, Rb, Rs);
MOV_sum(32, RSCRATCH2, Ra, Rb);
if (!Rs.IsImm() && does_clobber)
{
BitSet32 registersInUse = CallerSavedRegistersInUse();
if (update)
registersInUse[RSCRATCH2] = true;
SafeWriteRegToReg(gpr.R(s), RSCRATCH2, accessSize, 0, registersInUse,
byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0);
}
else
{
X64Reg reg_value;
if (WriteClobbersRegValue(accessSize, /* swap */ !byte_reverse))
{
MOV(32, R(RSCRATCH), gpr.R(s));
reg_value = RSCRATCH;
}
else
{
gpr.BindToRegister(s, true, false);
reg_value = gpr.RX(s);
}
BitSet32 registersInUse = CallerSavedRegistersInUse();
if (update)
registersInUse[RSCRATCH2] = true;
SafeWriteRegToReg(reg_value, RSCRATCH2, accessSize, 0, registersInUse,
byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0);
MOV(32, R(RSCRATCH), Rs);
Rs = RCOpArg::R(RSCRATCH);
}
BitSet32 registersInUse = CallerSavedRegistersInUse();
if (update)
registersInUse[RSCRATCH2] = true;
SafeWriteRegToReg(Rs, RSCRATCH2, accessSize, 0, registersInUse,
byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0);
if (update)
MOV(32, gpr.R(a), R(RSCRATCH2));
gpr.UnlockAll();
gpr.UnlockAllX();
MOV(32, Ra, R(RSCRATCH2));
}
// A few games use these heavily in video codecs.
@ -535,18 +510,22 @@ void Jit64::lmw(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
int a = inst.RA, d = inst.RD;
// TODO: This doesn't handle rollback on DSI correctly
MOV(32, R(RSCRATCH2), Imm32((u32)(s32)inst.SIMM_16));
if (inst.RA)
ADD(32, R(RSCRATCH2), gpr.R(inst.RA));
for (int i = inst.RD; i < 32; i++)
{
SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - inst.RD) * 4,
CallerSavedRegistersInUse() | BitSet32{RSCRATCH2}, false);
gpr.BindToRegister(i, false, true);
MOV(32, gpr.R(i), R(RSCRATCH));
RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0);
RegCache::Realize(Ra);
MOV_sum(32, RSCRATCH2, Ra, Imm32((u32)(s32)inst.SIMM_16));
}
for (int i = d; i < 32; i++)
{
SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - d) * 4,
CallerSavedRegistersInUse() | BitSet32{RSCRATCH2}, false);
RCOpArg Ri = gpr.Bind(i, RCMode::Write);
RegCache::Realize(Ri);
MOV(32, Ri, R(RSCRATCH));
}
gpr.UnlockAllX();
}
void Jit64::stmw(UGeckoInstruction inst)
@ -554,26 +533,27 @@ void Jit64::stmw(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
int a = inst.RA, d = inst.RD;
// TODO: This doesn't handle rollback on DSI correctly
for (int i = inst.RD; i < 32; i++)
for (int i = d; i < 32; i++)
{
if (inst.RA)
MOV(32, R(RSCRATCH), gpr.R(inst.RA));
else
RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0);
RCOpArg Ri = gpr.Use(i, RCMode::Read);
RegCache::Realize(Ra, Ri);
if (Ra.IsZero())
XOR(32, R(RSCRATCH), R(RSCRATCH));
if (gpr.R(i).IsImm())
{
SafeWriteRegToReg(gpr.R(i), RSCRATCH, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16,
CallerSavedRegistersInUse());
}
else
MOV(32, R(RSCRATCH), Ra);
if (!Ri.IsImm())
{
MOV(32, R(RSCRATCH2), gpr.R(i));
SafeWriteRegToReg(RSCRATCH2, RSCRATCH, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16,
CallerSavedRegistersInUse());
MOV(32, R(RSCRATCH2), Ri);
Ri = RCOpArg::R(RSCRATCH2);
}
SafeWriteRegToReg(Ri, RSCRATCH, 32, (i - d) * 4 + (u32)(s32)inst.SIMM_16,
CallerSavedRegistersInUse());
}
gpr.UnlockAllX();
}
void Jit64::eieio(UGeckoInstruction inst)

View File

@ -6,7 +6,7 @@
#include "Common/CommonTypes.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
using namespace Gen;
@ -30,25 +30,27 @@ void Jit64::lfXXX(UGeckoInstruction inst)
FALLBACK_IF(!indexed && !a);
gpr.BindToRegister(a, true, update);
s32 offset = 0;
OpArg addr = gpr.R(a);
RCOpArg addr = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read);
RegCache::Realize(addr);
if (update && jo.memcheck)
{
addr = R(RSCRATCH2);
MOV(32, addr, gpr.R(a));
MOV(32, R(RSCRATCH2), addr);
addr = RCOpArg::R(RSCRATCH2);
}
if (indexed)
{
RCOpArg Rb = gpr.Use(b, RCMode::Read);
RegCache::Realize(Rb);
if (update)
{
ADD(32, addr, gpr.R(b));
ADD(32, addr, Rb);
}
else
{
addr = R(RSCRATCH2);
MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b));
MOV_sum(32, RSCRATCH2, a ? addr.Location() : Imm32(0), Rb);
addr = RCOpArg::R(RSCRATCH2);
}
}
else
@ -59,13 +61,9 @@ void Jit64::lfXXX(UGeckoInstruction inst)
offset = (s16)inst.SIMM_16;
}
fpr.Lock(d);
if (jo.memcheck && single)
{
fpr.StoreFromRegister(d);
js.revertFprLoad = d;
}
fpr.BindToRegister(d, !single);
RCMode Rd_mode = !single ? RCMode::ReadWrite : RCMode::Write;
RCX64Reg Rd = jo.memcheck && single ? fpr.RevertableBind(d, Rd_mode) : fpr.Bind(d, Rd_mode);
RegCache::Realize(Rd);
BitSet32 registersInUse = CallerSavedRegistersInUse();
if (update && jo.memcheck)
registersInUse[RSCRATCH2] = true;
@ -73,17 +71,19 @@ void Jit64::lfXXX(UGeckoInstruction inst)
if (single)
{
ConvertSingleToDouble(fpr.RX(d), RSCRATCH, true);
ConvertSingleToDouble(Rd, RSCRATCH, true);
}
else
{
MOVQ_xmm(XMM0, R(RSCRATCH));
MOVSD(fpr.RX(d), R(XMM0));
MOVSD(Rd, R(XMM0));
}
if (update && jo.memcheck)
MOV(32, gpr.R(a), addr);
fpr.UnlockAll();
gpr.UnlockAll();
{
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
RegCache::Realize(Ra);
MOV(32, Ra, addr);
}
}
void Jit64::stfXXX(UGeckoInstruction inst)
@ -107,26 +107,31 @@ void Jit64::stfXXX(UGeckoInstruction inst)
{
if (js.op->fprIsStoreSafe[s])
{
CVTSD2SS(XMM0, fpr.R(s));
RCOpArg Rs = fpr.Use(s, RCMode::Read);
RegCache::Realize(Rs);
CVTSD2SS(XMM0, Rs);
}
else
{
fpr.BindToRegister(s, true, false);
ConvertDoubleToSingle(XMM0, fpr.RX(s));
RCX64Reg Rs = fpr.Bind(s, RCMode::Read);
RegCache::Realize(Rs);
ConvertDoubleToSingle(XMM0, Rs);
}
MOVD_xmm(R(RSCRATCH), XMM0);
}
else
{
if (fpr.R(s).IsSimpleReg())
MOVQ_xmm(R(RSCRATCH), fpr.RX(s));
RCOpArg Rs = fpr.Use(s, RCMode::Read);
RegCache::Realize(Rs);
if (Rs.IsSimpleReg())
MOVQ_xmm(R(RSCRATCH), Rs.GetSimpleReg());
else
MOV(64, R(RSCRATCH), fpr.R(s));
MOV(64, R(RSCRATCH), Rs);
}
if (!indexed && (!a || gpr.R(a).IsImm()))
if (!indexed && (!a || gpr.IsImm(a)))
{
u32 addr = (a ? gpr.R(a).Imm32() : 0) + imm;
u32 addr = (a ? gpr.Imm32(a) : 0) + imm;
bool exception =
WriteToConstAddress(accessSize, R(RSCRATCH), addr, CallerSavedRegistersInUse());
@ -138,33 +143,34 @@ void Jit64::stfXXX(UGeckoInstruction inst)
}
else
{
gpr.KillImmediate(a, true, true);
RCOpArg Ra = gpr.UseNoImm(a, RCMode::ReadWrite);
RegCache::Realize(Ra);
MemoryExceptionCheck();
ADD(32, gpr.R(a), Imm32((u32)imm));
ADD(32, Ra, Imm32((u32)imm));
}
}
fpr.UnlockAll();
gpr.UnlockAll();
return;
}
s32 offset = 0;
if (update)
gpr.BindToRegister(a, true, true);
RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read);
RegCache::Realize(Ra);
if (indexed)
{
MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b));
RCOpArg Rb = gpr.Use(b, RCMode::Read);
RegCache::Realize(Rb);
MOV_sum(32, RSCRATCH2, a ? Ra.Location() : Imm32(0), Rb);
}
else
{
if (update)
{
LEA(32, RSCRATCH2, MDisp(gpr.RX(a), imm));
MOV_sum(32, RSCRATCH2, Ra, Imm32(imm));
}
else
{
offset = imm;
MOV(32, R(RSCRATCH2), gpr.R(a));
MOV(32, R(RSCRATCH2), Ra);
}
}
@ -176,11 +182,7 @@ void Jit64::stfXXX(UGeckoInstruction inst)
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, registersInUse);
if (update)
MOV(32, gpr.R(a), R(RSCRATCH2));
fpr.UnlockAll();
gpr.UnlockAll();
gpr.UnlockAllX();
MOV(32, Ra, R(RSCRATCH2));
}
// This one is a little bit weird; it stores the low 32 bits of a double without converting it
@ -193,12 +195,16 @@ void Jit64::stfiwx(UGeckoInstruction inst)
int a = inst.RA;
int b = inst.RB;
MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b));
RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0);
RCOpArg Rb = gpr.Use(b, RCMode::Read);
RCOpArg Rs = fpr.Use(s, RCMode::Read);
RegCache::Realize(Ra, Rb, Rs);
if (fpr.R(s).IsSimpleReg())
MOVD_xmm(R(RSCRATCH), fpr.RX(s));
MOV_sum(32, RSCRATCH2, Ra, Rb);
if (Rs.IsSimpleReg())
MOVD_xmm(R(RSCRATCH), Rs.GetSimpleReg());
else
MOV(32, R(RSCRATCH), fpr.R(s));
MOV(32, R(RSCRATCH), Rs);
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 32, 0, CallerSavedRegistersInUse());
gpr.UnlockAllX();
}

View File

@ -9,7 +9,7 @@
#include "Common/CommonTypes.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
#include "Core/PowerPC/PowerPC.h"
@ -40,21 +40,22 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
bool gqrIsConstant = it != js.constantGqr.end();
u32 gqrValue = gqrIsConstant ? it->second & 0xffff : 0;
gpr.Lock(a, b);
gpr.FlushLockX(RSCRATCH_EXTRA);
if (update)
gpr.BindToRegister(a, true, true);
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read);
RCOpArg Rb = indexed ? gpr.Use(b, RCMode::Read) : RCOpArg::Imm32((u32)offset);
RCOpArg Rs = fpr.Use(s, RCMode::Read);
RegCache::Realize(scratch_guard, Ra, Rb, Rs);
MOV_sum(32, RSCRATCH_EXTRA, gpr.R(a), indexed ? gpr.R(b) : Imm32((u32)offset));
MOV_sum(32, RSCRATCH_EXTRA, Ra, Rb);
// In memcheck mode, don't update the address until the exception check
if (update && !jo.memcheck)
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
MOV(32, Ra, R(RSCRATCH_EXTRA));
if (w)
CVTSD2SS(XMM0, fpr.R(s)); // one
CVTSD2SS(XMM0, Rs); // one
else
CVTPD2PS(XMM0, fpr.R(s)); // pair
CVTPD2PS(XMM0, Rs); // pair
if (gqrIsConstant)
{
@ -104,13 +105,8 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
if (update && jo.memcheck)
{
if (indexed)
ADD(32, gpr.R(a), gpr.R(b));
else
ADD(32, gpr.R(a), Imm32((u32)offset));
ADD(32, Ra, Rb);
}
gpr.UnlockAll();
gpr.UnlockAllX();
}
void Jit64::psq_lXX(UGeckoInstruction inst)
@ -135,17 +131,17 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
bool gqrIsConstant = it != js.constantGqr.end();
u32 gqrValue = gqrIsConstant ? it->second >> 16 : 0;
gpr.Lock(a, b);
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
RCX64Reg Ra = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read);
RCOpArg Rb = indexed ? gpr.Use(b, RCMode::Read) : RCOpArg::Imm32((u32)offset);
RCX64Reg Rs = fpr.Bind(s, RCMode::Write);
RegCache::Realize(scratch_guard, Ra, Rb, Rs);
gpr.FlushLockX(RSCRATCH_EXTRA);
gpr.BindToRegister(a, true, update);
fpr.BindToRegister(s, false, true);
MOV_sum(32, RSCRATCH_EXTRA, gpr.R(a), indexed ? gpr.R(b) : Imm32((u32)offset));
MOV_sum(32, RSCRATCH_EXTRA, Ra, Rb);
// In memcheck mode, don't update the address until the exception check
if (update && !jo.memcheck)
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
MOV(32, Ra, R(RSCRATCH_EXTRA));
if (gqrIsConstant)
{
@ -169,15 +165,9 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
CALLptr(MatR(RSCRATCH));
}
CVTPS2PD(fpr.RX(s), R(XMM0));
CVTPS2PD(Rs, R(XMM0));
if (update && jo.memcheck)
{
if (indexed)
ADD(32, gpr.R(a), gpr.R(b));
else
ADD(32, gpr.R(a), Imm32((u32)offset));
ADD(32, Ra, Rb);
}
gpr.UnlockAll();
gpr.UnlockAllX();
}

View File

@ -7,7 +7,7 @@
#include "Common/MsgHandler.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
using namespace Gen;
@ -22,8 +22,10 @@ void Jit64::ps_mr(UGeckoInstruction inst)
if (d == b)
return;
fpr.BindToRegister(d, false);
MOVAPD(fpr.RX(d), fpr.R(b));
RCOpArg Rb = fpr.Use(b, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(Rb, Rd);
MOVAPD(Rd, Rb);
}
void Jit64::ps_sum(UGeckoInstruction inst)
@ -36,43 +38,46 @@ void Jit64::ps_sum(UGeckoInstruction inst)
int a = inst.FA;
int b = inst.FB;
int c = inst.FC;
fpr.Lock(a, b, c, d);
OpArg op_a = fpr.R(a);
fpr.BindToRegister(d, d == b || d == c);
RCOpArg Ra = fpr.Use(a, RCMode::Read);
RCOpArg Rb = fpr.Use(b, RCMode::Read);
RCOpArg Rc = fpr.Use(c, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(Ra, Rb, Rc, Rd);
X64Reg tmp = XMM1;
MOVDDUP(tmp, op_a); // {a.ps0, a.ps0}
ADDPD(tmp, fpr.R(b)); // {a.ps0 + b.ps0, a.ps0 + b.ps1}
MOVDDUP(tmp, Ra); // {a.ps0, a.ps0}
ADDPD(tmp, Rb); // {a.ps0 + b.ps0, a.ps0 + b.ps1}
switch (inst.SUBOP5)
{
case 10: // ps_sum0: {a.ps0 + b.ps1, c.ps1}
UNPCKHPD(tmp, fpr.R(c));
UNPCKHPD(tmp, Rc);
break;
case 11: // ps_sum1: {c.ps0, a.ps0 + b.ps1}
if (fpr.R(c).IsSimpleReg())
if (Rc.IsSimpleReg())
{
if (cpu_info.bSSE4_1)
{
BLENDPD(tmp, fpr.R(c), 1);
BLENDPD(tmp, Rc, 1);
}
else
{
MOVAPD(XMM0, fpr.R(c));
MOVAPD(XMM0, Rc);
SHUFPD(XMM0, R(tmp), 2);
tmp = XMM0;
}
}
else
{
MOVLPD(tmp, fpr.R(c));
MOVLPD(tmp, Rc);
}
break;
default:
PanicAlert("ps_sum WTF!!!");
}
HandleNaNs(inst, fpr.RX(d), tmp, tmp == XMM1 ? XMM0 : XMM1);
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
HandleNaNs(inst, Rd, tmp, tmp == XMM1 ? XMM0 : XMM1);
ForceSinglePrecision(Rd, Rd);
SetFPRFIfNeeded(Rd);
}
void Jit64::ps_muls(UGeckoInstruction inst)
@ -85,26 +90,29 @@ void Jit64::ps_muls(UGeckoInstruction inst)
int a = inst.FA;
int c = inst.FC;
bool round_input = !js.op->fprIsSingle[c];
fpr.Lock(a, c, d);
RCOpArg Ra = fpr.Use(a, RCMode::Read);
RCOpArg Rc = fpr.Use(c, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(Ra, Rc, Rd);
switch (inst.SUBOP5)
{
case 12: // ps_muls0
MOVDDUP(XMM1, fpr.R(c));
MOVDDUP(XMM1, Rc);
break;
case 13: // ps_muls1
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM1, fpr.R(c), fpr.R(c), 3);
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM1, Rc, Rc, 3);
break;
default:
PanicAlert("ps_muls WTF!!!");
}
if (round_input)
Force25BitPrecision(XMM1, R(XMM1), XMM0);
MULPD(XMM1, fpr.R(a));
fpr.BindToRegister(d, false);
HandleNaNs(inst, fpr.RX(d), XMM1);
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
MULPD(XMM1, Ra);
HandleNaNs(inst, Rd, XMM1);
ForceSinglePrecision(Rd, Rd);
SetFPRFIfNeeded(Rd);
}
void Jit64::ps_mergeXX(UGeckoInstruction inst)
@ -116,27 +124,29 @@ void Jit64::ps_mergeXX(UGeckoInstruction inst)
int d = inst.FD;
int a = inst.FA;
int b = inst.FB;
fpr.Lock(a, b, d);
fpr.BindToRegister(d, d == a || d == b);
RCOpArg Ra = fpr.Use(a, RCMode::Read);
RCOpArg Rb = fpr.Use(b, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(Ra, Rb, Rd);
switch (inst.SUBOP10)
{
case 528:
avx_op(&XEmitter::VUNPCKLPD, &XEmitter::UNPCKLPD, fpr.RX(d), fpr.R(a), fpr.R(b));
avx_op(&XEmitter::VUNPCKLPD, &XEmitter::UNPCKLPD, Rd, Ra, Rb);
break; // 00
case 560:
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, fpr.RX(d), fpr.R(a), fpr.R(b), 2);
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, Rd, Ra, Rb, 2);
break; // 01
case 592:
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, fpr.RX(d), fpr.R(a), fpr.R(b), 1);
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, Rd, Ra, Rb, 1);
break; // 10
case 624:
avx_op(&XEmitter::VUNPCKHPD, &XEmitter::UNPCKHPD, fpr.RX(d), fpr.R(a), fpr.R(b));
avx_op(&XEmitter::VUNPCKHPD, &XEmitter::UNPCKHPD, Rd, Ra, Rb);
break; // 11
default:
ASSERT_MSG(DYNA_REC, 0, "ps_merge - invalid op");
}
fpr.UnlockAll();
}
void Jit64::ps_rsqrte(UGeckoInstruction inst)
@ -147,23 +157,21 @@ void Jit64::ps_rsqrte(UGeckoInstruction inst)
int b = inst.FB;
int d = inst.FD;
gpr.FlushLockX(RSCRATCH_EXTRA);
fpr.Lock(b, d);
fpr.BindToRegister(b, true, false);
fpr.BindToRegister(d, false);
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
RCX64Reg Rb = fpr.Bind(b, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(scratch_guard, Rb, Rd);
MOVSD(XMM0, fpr.R(b));
MOVSD(XMM0, Rb);
CALL(asm_routines.frsqrte);
MOVSD(fpr.R(d), XMM0);
MOVSD(Rd, XMM0);
MOVHLPS(XMM0, fpr.RX(b));
MOVHLPS(XMM0, Rb);
CALL(asm_routines.frsqrte);
MOVLHPS(fpr.RX(d), XMM0);
MOVLHPS(Rd, XMM0);
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
gpr.UnlockAllX();
ForceSinglePrecision(Rd, Rd);
SetFPRFIfNeeded(Rd);
}
void Jit64::ps_res(UGeckoInstruction inst)
@ -174,23 +182,21 @@ void Jit64::ps_res(UGeckoInstruction inst)
int b = inst.FB;
int d = inst.FD;
gpr.FlushLockX(RSCRATCH_EXTRA);
fpr.Lock(b, d);
fpr.BindToRegister(b, true, false);
fpr.BindToRegister(d, false);
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
RCX64Reg Rb = fpr.Bind(b, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(scratch_guard, Rb, Rd);
MOVSD(XMM0, fpr.R(b));
MOVSD(XMM0, Rb);
CALL(asm_routines.fres);
MOVSD(fpr.R(d), XMM0);
MOVSD(Rd, XMM0);
MOVHLPS(XMM0, fpr.RX(b));
MOVHLPS(XMM0, Rb);
CALL(asm_routines.fres);
MOVLHPS(fpr.RX(d), XMM0);
MOVLHPS(Rd, XMM0);
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
gpr.UnlockAllX();
ForceSinglePrecision(Rd, Rd);
SetFPRFIfNeeded(Rd);
}
void Jit64::ps_cmpXX(UGeckoInstruction inst)

View File

@ -9,7 +9,7 @@
#include "Core/CoreTiming.h"
#include "Core/HW/ProcessorInterface.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/PowerPC.h"
@ -219,26 +219,32 @@ void Jit64::mtspr(UGeckoInstruction inst)
break;
case SPR_XER:
gpr.Lock(d);
gpr.BindToRegister(d, true, false);
MOV(32, R(RSCRATCH), gpr.R(d));
{
RCX64Reg Rd = gpr.Bind(d, RCMode::Read);
RegCache::Realize(Rd);
MOV(32, R(RSCRATCH), Rd);
AND(32, R(RSCRATCH), Imm32(0xff7f));
MOV(16, PPCSTATE(xer_stringctrl), R(RSCRATCH));
MOV(32, R(RSCRATCH), gpr.R(d));
MOV(32, R(RSCRATCH), Rd);
SHR(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
AND(8, R(RSCRATCH), Imm8(1));
MOV(8, PPCSTATE(xer_ca), R(RSCRATCH));
MOV(32, R(RSCRATCH), gpr.R(d));
MOV(32, R(RSCRATCH), Rd);
SHR(32, R(RSCRATCH), Imm8(XER_OV_SHIFT));
MOV(8, PPCSTATE(xer_so_ov), R(RSCRATCH));
gpr.UnlockAll();
return;
}
case SPR_HID0:
{
MOV(32, R(RSCRATCH), gpr.R(d));
RCOpArg Rd = gpr.Use(d, RCMode::Read);
RegCache::Realize(Rd);
MOV(32, R(RSCRATCH), Rd);
BTR(32, R(RSCRATCH), Imm8(31 - 20)); // ICFI
MOV(32, PPCSTATE(spr[iIndex]), R(RSCRATCH));
FixupBranch dont_reset_icache = J_CC(CC_NC);
@ -255,13 +261,9 @@ void Jit64::mtspr(UGeckoInstruction inst)
}
// OK, this is easy.
if (!gpr.R(d).IsImm())
{
gpr.Lock(d);
gpr.BindToRegister(d, true, false);
}
MOV(32, PPCSTATE(spr[iIndex]), gpr.R(d));
gpr.UnlockAll();
RCOpArg Rd = gpr.BindOrImm(d, RCMode::Read);
RegCache::Realize(Rd);
MOV(32, PPCSTATE(spr[iIndex]), Rd);
}
void Jit64::mfspr(UGeckoInstruction inst)
@ -281,22 +283,23 @@ void Jit64::mfspr(UGeckoInstruction inst)
// redundant for the JIT.
// no register choice
gpr.FlushLockX(RDX, RAX);
gpr.FlushLockX(RCX);
RCX64Reg rdx = gpr.Scratch(RDX);
RCX64Reg rax = gpr.Scratch(RAX);
RCX64Reg rcx = gpr.Scratch(RCX);
MOV(64, R(RCX), ImmPtr(&CoreTiming::g));
MOV(64, rcx, ImmPtr(&CoreTiming::g));
// An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the
// cost of calling out to C for this is actually significant.
// Scale downcount by the CPU overclocking factor.
CVTSI2SS(XMM0, PPCSTATE(downcount));
MULSS(XMM0, MDisp(RCX, offsetof(CoreTiming::Globals, last_OC_factor_inverted)));
CVTSS2SI(RDX, R(XMM0)); // RDX is downcount scaled by the overclocking factor
MOV(32, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, slice_length)));
SUB(64, R(RAX), R(RDX)); // cycles since the last CoreTiming::Advance() event is (slicelength -
// Scaled_downcount)
ADD(64, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, global_timer)));
SUB(64, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, fake_TB_start_ticks)));
MULSS(XMM0, MDisp(rcx, offsetof(CoreTiming::Globals, last_OC_factor_inverted)));
CVTSS2SI(rdx, R(XMM0)); // RDX is downcount scaled by the overclocking factor
MOV(32, rax, MDisp(rcx, offsetof(CoreTiming::Globals, slice_length)));
SUB(64, rax, rdx); // cycles since the last CoreTiming::Advance() event is (slicelength -
// Scaled_downcount)
ADD(64, rax, MDisp(rcx, offsetof(CoreTiming::Globals, global_timer)));
SUB(64, rax, MDisp(rcx, offsetof(CoreTiming::Globals, fake_TB_start_ticks)));
// It might seem convenient to correct the timer for the block position here for even more
// accurate
// timing, but as of currently, this can break games. If we end up reading a time *after* the
@ -307,15 +310,15 @@ void Jit64::mfspr(UGeckoInstruction inst)
// Revolution,
// which won't get past the loading screen.
// if (js.downcountAmount)
// ADD(64, R(RAX), Imm32(js.downcountAmount));
// ADD(64, rax, Imm32(js.downcountAmount));
// a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67
MOV(64, R(RDX), Imm64(0xAAAAAAAAAAAAAAABULL));
MUL(64, R(RDX));
MOV(64, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, fake_TB_start_value)));
SHR(64, R(RDX), Imm8(3));
ADD(64, R(RAX), R(RDX));
MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX));
MOV(64, rdx, Imm64(0xAAAAAAAAAAAAAAABULL));
MUL(64, rdx);
MOV(64, rax, MDisp(rcx, offsetof(CoreTiming::Globals, fake_TB_start_value)));
SHR(64, rdx, Imm8(3));
ADD(64, rax, rdx);
MOV(64, PPCSTATE(spr[SPR_TL]), rax);
if (CanMergeNextInstructions(1))
{
@ -330,40 +333,42 @@ void Jit64::mfspr(UGeckoInstruction inst)
{
js.downcountAmount++;
js.skipInstructions = 1;
gpr.Lock(d, n);
gpr.BindToRegister(d, false);
gpr.BindToRegister(n, false);
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
RCX64Reg Rn = gpr.Bind(n, RCMode::Write);
RegCache::Realize(Rd, Rn);
if (iIndex == SPR_TL)
MOV(32, gpr.R(d), R(RAX));
MOV(32, Rd, rax);
if (nextIndex == SPR_TL)
MOV(32, gpr.R(n), R(RAX));
SHR(64, R(RAX), Imm8(32));
MOV(32, Rn, rax);
SHR(64, rax, Imm8(32));
if (iIndex == SPR_TU)
MOV(32, gpr.R(d), R(RAX));
MOV(32, Rd, rax);
if (nextIndex == SPR_TU)
MOV(32, gpr.R(n), R(RAX));
MOV(32, Rn, rax);
break;
}
}
gpr.Lock(d);
gpr.BindToRegister(d, false);
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
RegCache::Realize(Rd);
if (iIndex == SPR_TU)
SHR(64, R(RAX), Imm8(32));
MOV(32, gpr.R(d), R(RAX));
SHR(64, rax, Imm8(32));
MOV(32, Rd, rax);
break;
}
case SPR_XER:
gpr.Lock(d);
gpr.BindToRegister(d, false);
MOVZX(32, 16, gpr.RX(d), PPCSTATE(xer_stringctrl));
{
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
RegCache::Realize(Rd);
MOVZX(32, 16, Rd, PPCSTATE(xer_stringctrl));
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_ca));
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
OR(32, gpr.R(d), R(RSCRATCH));
OR(32, Rd, R(RSCRATCH));
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_so_ov));
SHL(32, R(RSCRATCH), Imm8(XER_OV_SHIFT));
OR(32, gpr.R(d), R(RSCRATCH));
OR(32, Rd, R(RSCRATCH));
break;
}
case SPR_WPAR:
case SPR_DEC:
case SPR_PMC1:
@ -372,26 +377,25 @@ void Jit64::mfspr(UGeckoInstruction inst)
case SPR_PMC4:
FALLBACK_IF(true);
default:
gpr.Lock(d);
gpr.BindToRegister(d, false);
MOV(32, gpr.R(d), PPCSTATE(spr[iIndex]));
{
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
RegCache::Realize(Rd);
MOV(32, Rd, PPCSTATE(spr[iIndex]));
break;
}
gpr.UnlockAllX();
gpr.UnlockAll();
}
}
void Jit64::mtmsr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
if (!gpr.R(inst.RS).IsImm())
{
gpr.Lock(inst.RS);
gpr.BindToRegister(inst.RS, true, false);
RCOpArg Rs = gpr.BindOrImm(inst.RS, RCMode::Read);
RegCache::Realize(Rs);
MOV(32, PPCSTATE(msr), Rs);
}
MOV(32, PPCSTATE(msr), gpr.R(inst.RS));
gpr.UnlockAll();
gpr.Flush();
fpr.Flush();
@ -430,10 +434,9 @@ void Jit64::mfmsr(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
// Privileged?
gpr.Lock(inst.RD);
gpr.BindToRegister(inst.RD, false, true);
MOV(32, gpr.R(inst.RD), PPCSTATE(msr));
gpr.UnlockAll();
RCX64Reg Rd = gpr.Bind(inst.RD, RCMode::Write);
RegCache::Realize(Rd);
MOV(32, Rd, PPCSTATE(msr));
}
void Jit64::mftb(UGeckoInstruction inst)
@ -448,13 +451,13 @@ void Jit64::mfcr(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
int d = inst.RD;
gpr.FlushLockX(RSCRATCH_EXTRA);
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
CALL(asm_routines.mfcr);
gpr.Lock(d);
gpr.BindToRegister(d, false, true);
MOV(32, gpr.R(d), R(RSCRATCH));
gpr.UnlockAll();
gpr.UnlockAllX();
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
RegCache::Realize(Rd);
MOV(32, Rd, R(RSCRATCH));
}
void Jit64::mtcrf(UGeckoInstruction inst)
@ -466,13 +469,13 @@ void Jit64::mtcrf(UGeckoInstruction inst)
u32 crm = inst.CRM;
if (crm != 0)
{
if (gpr.R(inst.RS).IsImm())
if (gpr.IsImm(inst.RS))
{
for (int i = 0; i < 8; i++)
{
if ((crm & (0x80 >> i)) != 0)
{
u8 newcr = (gpr.R(inst.RS).Imm32() >> (28 - (i * 4))) & 0xF;
u8 newcr = (gpr.Imm32(inst.RS) >> (28 - (i * 4))) & 0xF;
u64 newcrval = PowerPC::PPCCRToInternal(newcr);
if ((s64)newcrval == (s32)newcrval)
{
@ -489,13 +492,13 @@ void Jit64::mtcrf(UGeckoInstruction inst)
else
{
MOV(64, R(RSCRATCH2), ImmPtr(PowerPC::m_crTable.data()));
gpr.Lock(inst.RS);
gpr.BindToRegister(inst.RS, true, false);
RCX64Reg Rs = gpr.Bind(inst.RS, RCMode::Read);
RegCache::Realize(Rs);
for (int i = 0; i < 8; i++)
{
if ((crm & (0x80 >> i)) != 0)
{
MOV(32, R(RSCRATCH), gpr.R(inst.RS));
MOV(32, R(RSCRATCH), Rs);
if (i != 7)
SHR(32, R(RSCRATCH), Imm8(28 - (i * 4)));
if (i != 0)
@ -504,7 +507,6 @@ void Jit64::mtcrf(UGeckoInstruction inst)
MOV(64, PPCSTATE(cr_val[i]), R(RSCRATCH));
}
}
gpr.UnlockAll();
}
}
}
@ -653,11 +655,12 @@ void Jit64::mffsx(UGeckoInstruction inst)
MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
int d = inst.FD;
fpr.BindToRegister(d, false, true);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(Rd);
MOV(64, R(RSCRATCH2), Imm64(0xFFF8000000000000));
OR(64, R(RSCRATCH), R(RSCRATCH2));
MOVQ_xmm(XMM0, R(RSCRATCH));
MOVSD(fpr.RX(d), R(XMM0));
MOVSD(Rd, R(XMM0));
}
// MXCSR = s_fpscr_to_mxcsr[FPSCR & 7]
@ -751,10 +754,14 @@ void Jit64::mtfsfx(UGeckoInstruction inst)
}
int b = inst.FB;
if (fpr.R(b).IsSimpleReg())
MOVQ_xmm(R(RSCRATCH), fpr.RX(b));
RCOpArg Rb = fpr.Use(b, RCMode::Read);
RegCache::Realize(Rb);
if (Rb.IsSimpleReg())
MOVQ_xmm(R(RSCRATCH), Rb.GetSimpleReg());
else
MOV(32, R(RSCRATCH), fpr.R(b));
MOV(32, R(RSCRATCH), Rb);
MOV(32, R(RSCRATCH2), PPCSTATE(fpscr));
AND(32, R(RSCRATCH), Imm32(mask));

View File

@ -0,0 +1,284 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/RegCache/RCMode.h"
using preg_t = size_t;
class PPCCachedReg
{
public:
enum class LocationType
{
/// Value is currently at its default location
Default,
/// Value is currently bound to a x64 register
Bound,
/// Value is known as an immediate and has not been written back to its default location
Immediate,
/// Value is known as an immediate and is already present at its default location
SpeculativeImmediate,
};
PPCCachedReg() = default;
explicit PPCCachedReg(Gen::OpArg default_location_)
: default_location(default_location_), location(default_location_)
{
}
const Gen::OpArg& Location() const { return location; }
LocationType GetLocationType() const
{
if (!away)
{
ASSERT(!revertable);
if (location.IsImm())
return LocationType::SpeculativeImmediate;
ASSERT(location == default_location);
return LocationType::Default;
}
ASSERT(location.IsImm() || location.IsSimpleReg());
return location.IsImm() ? LocationType::Immediate : LocationType::Bound;
}
bool IsAway() const { return away; }
bool IsBound() const { return GetLocationType() == LocationType::Bound; }
void SetBoundTo(Gen::X64Reg xreg)
{
away = true;
location = Gen::R(xreg);
}
void SetFlushed()
{
ASSERT(!revertable);
away = false;
location = default_location;
}
void SetToImm32(u32 imm32, bool dirty = true)
{
away |= dirty;
location = Gen::Imm32(imm32);
}
bool IsRevertable() const { return revertable; }
void SetRevertable()
{
ASSERT(IsBound());
revertable = true;
}
void SetRevert()
{
ASSERT(revertable);
revertable = false;
SetFlushed();
}
void SetCommit()
{
ASSERT(revertable);
revertable = false;
}
bool IsLocked() const { return locked > 0; }
void Lock() { locked++; }
void Unlock()
{
ASSERT(IsLocked());
locked--;
}
private:
Gen::OpArg default_location{};
Gen::OpArg location{};
bool away = false; // value not in source register
bool revertable = false;
size_t locked = 0;
};
class X64CachedReg
{
public:
preg_t Contents() const { return ppcReg; }
void SetBoundTo(preg_t ppcReg_, bool dirty_)
{
free = false;
ppcReg = ppcReg_;
dirty = dirty_;
}
void SetFlushed()
{
ppcReg = static_cast<preg_t>(Gen::INVALID_REG);
free = true;
dirty = false;
}
bool IsFree() const { return free && !locked; }
bool IsDirty() const { return dirty; }
void MakeDirty() { dirty = true; }
bool IsLocked() const { return locked > 0; }
void Lock() { locked++; }
void Unlock()
{
ASSERT(IsLocked());
locked--;
}
private:
preg_t ppcReg = static_cast<preg_t>(Gen::INVALID_REG);
bool free = true;
bool dirty = false;
size_t locked = 0;
};
class RCConstraint
{
public:
bool IsRealized() const { return realized != RealizedLoc::Invalid; }
bool IsActive() const
{
return IsRealized() || write || read || kill_imm || kill_mem || revertable;
}
bool ShouldLoad() const { return read; }
bool ShouldDirty() const { return write; }
bool ShouldBeRevertable() const { return revertable; }
bool ShouldKillImmediate() const { return kill_imm; }
bool ShouldKillMemory() const { return kill_mem; }
enum class RealizedLoc
{
Invalid,
Bound,
Imm,
Mem,
};
void Realized(RealizedLoc loc)
{
realized = loc;
ASSERT(IsRealized());
}
enum class ConstraintLoc
{
Bound,
BoundOrImm,
BoundOrMem,
Any,
};
void AddUse(RCMode mode) { AddConstraint(mode, ConstraintLoc::Any, false); }
void AddUseNoImm(RCMode mode) { AddConstraint(mode, ConstraintLoc::BoundOrMem, false); }
void AddBindOrImm(RCMode mode) { AddConstraint(mode, ConstraintLoc::BoundOrImm, false); }
void AddBind(RCMode mode) { AddConstraint(mode, ConstraintLoc::Bound, false); }
void AddRevertableBind(RCMode mode) { AddConstraint(mode, ConstraintLoc::Bound, true); }
private:
void AddConstraint(RCMode mode, ConstraintLoc loc, bool should_revertable)
{
if (IsRealized())
{
ASSERT(IsCompatible(mode, loc, should_revertable));
return;
}
if (should_revertable)
revertable = true;
switch (loc)
{
case ConstraintLoc::Bound:
kill_imm = true;
kill_mem = true;
break;
case ConstraintLoc::BoundOrImm:
kill_mem = true;
break;
case ConstraintLoc::BoundOrMem:
kill_imm = true;
break;
case ConstraintLoc::Any:
break;
}
switch (mode)
{
case RCMode::Read:
read = true;
break;
case RCMode::Write:
write = true;
break;
case RCMode::ReadWrite:
read = true;
write = true;
break;
}
}
bool IsCompatible(RCMode mode, ConstraintLoc loc, bool should_revertable) const
{
if (should_revertable && !revertable)
{
return false;
}
const bool is_loc_compatible = [&] {
switch (loc)
{
case ConstraintLoc::Bound:
return realized == RealizedLoc::Bound;
case ConstraintLoc::BoundOrImm:
return realized == RealizedLoc::Bound || realized == RealizedLoc::Imm;
case ConstraintLoc::BoundOrMem:
return realized == RealizedLoc::Bound || realized == RealizedLoc::Mem;
case ConstraintLoc::Any:
return true;
}
ASSERT(false);
return false;
}();
const bool is_mode_compatible = [&] {
switch (mode)
{
case RCMode::Read:
return read;
case RCMode::Write:
return write;
case RCMode::ReadWrite:
return read && write;
}
ASSERT(false);
return false;
}();
return is_loc_compatible && is_mode_compatible;
}
RealizedLoc realized = RealizedLoc::Invalid;
bool write = false;
bool read = false;
bool kill_imm = false;
bool kill_mem = false;
bool revertable = false;
};

View File

@ -2,7 +2,7 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Core/PowerPC/Jit64/FPURegCache.h"
#include "Core/PowerPC/Jit64/RegCache/FPURegCache.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64Common/Jit64Base.h"

View File

@ -4,7 +4,7 @@
#pragma once
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
class Jit64;
@ -12,9 +12,9 @@ class FPURegCache final : public RegCache
{
public:
explicit FPURegCache(Jit64& jit);
Gen::OpArg GetDefaultLocation(preg_t preg) const override;
protected:
Gen::OpArg GetDefaultLocation(preg_t preg) const override;
void StoreRegister(preg_t preg, const Gen::OpArg& newLoc) override;
void LoadRegister(preg_t preg, Gen::X64Reg newLoc) override;
const Gen::X64Reg* GetAllocationOrder(size_t* count) const override;

View File

@ -2,7 +2,7 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Core/PowerPC/Jit64/GPRRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/GPRRegCache.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64Common/Jit64Base.h"

View File

@ -4,7 +4,7 @@
#pragma once
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
class Jit64;
@ -12,10 +12,10 @@ class GPRRegCache final : public RegCache
{
public:
explicit GPRRegCache(Jit64& jit);
Gen::OpArg GetDefaultLocation(preg_t preg) const override;
void SetImmediate32(preg_t preg, u32 imm_value, bool dirty = true);
protected:
Gen::OpArg GetDefaultLocation(preg_t preg) const override;
void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) override;
void LoadRegister(preg_t preg, Gen::X64Reg new_loc) override;
const Gen::X64Reg* GetAllocationOrder(size_t* count) const override;

View File

@ -0,0 +1,729 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include <algorithm>
#include <cinttypes>
#include <cmath>
#include <limits>
#include <utility>
#include <variant>
#include "Common/Assert.h"
#include "Common/BitSet.h"
#include "Common/CommonTypes.h"
#include "Common/MsgHandler.h"
#include "Common/VariantUtil.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64/RegCache/CachedReg.h"
#include "Core/PowerPC/Jit64/RegCache/RCMode.h"
#include "Core/PowerPC/PowerPC.h"
using namespace Gen;
using namespace PowerPC;
RCOpArg RCOpArg::Imm32(u32 imm)
{
return RCOpArg{imm};
}
RCOpArg RCOpArg::R(X64Reg xr)
{
return RCOpArg{xr};
}
RCOpArg::RCOpArg() = default;
RCOpArg::RCOpArg(u32 imm) : rc(nullptr), contents(imm)
{
}
RCOpArg::RCOpArg(X64Reg xr) : rc(nullptr), contents(xr)
{
}
RCOpArg::RCOpArg(RegCache* rc_, preg_t preg) : rc(rc_), contents(preg)
{
rc->Lock(preg);
}
RCOpArg::~RCOpArg()
{
Unlock();
}
RCOpArg::RCOpArg(RCOpArg&& other) noexcept
: rc(std::exchange(other.rc, nullptr)),
contents(std::exchange(other.contents, std::monostate{}))
{
}
RCOpArg& RCOpArg::operator=(RCOpArg&& other) noexcept
{
Unlock();
rc = std::exchange(other.rc, nullptr);
contents = std::exchange(other.contents, std::monostate{});
return *this;
}
RCOpArg::RCOpArg(RCX64Reg&& other) noexcept
: rc(std::exchange(other.rc, nullptr)),
contents(VariantCast(std::exchange(other.contents, std::monostate{})))
{
}
RCOpArg& RCOpArg::operator=(RCX64Reg&& other) noexcept
{
Unlock();
rc = std::exchange(other.rc, nullptr);
contents = VariantCast(std::exchange(other.contents, std::monostate{}));
return *this;
}
void RCOpArg::Realize()
{
if (const preg_t* preg = std::get_if<preg_t>(&contents))
{
rc->Realize(*preg);
}
}
OpArg RCOpArg::Location() const
{
if (const preg_t* preg = std::get_if<preg_t>(&contents))
{
ASSERT(rc->IsRealized(*preg));
return rc->R(*preg);
}
else if (const X64Reg* xr = std::get_if<X64Reg>(&contents))
{
return Gen::R(*xr);
}
else if (const u32* imm = std::get_if<u32>(&contents))
{
return Gen::Imm32(*imm);
}
ASSERT(false);
return {};
}
OpArg RCOpArg::ExtractWithByteOffset(int offset)
{
if (offset == 0)
return Location();
ASSERT(rc);
const preg_t preg = std::get<preg_t>(contents);
rc->StoreFromRegister(preg, RegCache::FlushMode::MaintainState);
OpArg result = rc->GetDefaultLocation(preg);
result.AddMemOffset(offset);
return result;
}
void RCOpArg::Unlock()
{
if (const preg_t* preg = std::get_if<preg_t>(&contents))
{
ASSERT(rc);
rc->Unlock(*preg);
}
else if (const X64Reg* xr = std::get_if<X64Reg>(&contents))
{
// If rc, we got this from an RCX64Reg.
// If !rc, we got this from RCOpArg::R.
if (rc)
rc->UnlockX(*xr);
}
else
{
ASSERT(!rc);
}
rc = nullptr;
contents = std::monostate{};
}
bool RCOpArg::IsImm() const
{
if (const preg_t* preg = std::get_if<preg_t>(&contents))
{
return rc->R(*preg).IsImm();
}
else if (std::holds_alternative<u32>(contents))
{
return true;
}
return false;
}
s32 RCOpArg::SImm32() const
{
if (const preg_t* preg = std::get_if<preg_t>(&contents))
{
return rc->R(*preg).SImm32();
}
else if (const u32* imm = std::get_if<u32>(&contents))
{
return static_cast<s32>(*imm);
}
ASSERT(false);
return 0;
}
u32 RCOpArg::Imm32() const
{
if (const preg_t* preg = std::get_if<preg_t>(&contents))
{
return rc->R(*preg).Imm32();
}
else if (const u32* imm = std::get_if<u32>(&contents))
{
return *imm;
}
ASSERT(false);
return 0;
}
RCX64Reg::RCX64Reg() = default;
RCX64Reg::RCX64Reg(RegCache* rc_, preg_t preg) : rc(rc_), contents(preg)
{
rc->Lock(preg);
}
RCX64Reg::RCX64Reg(RegCache* rc_, X64Reg xr) : rc(rc_), contents(xr)
{
rc->LockX(xr);
}
RCX64Reg::~RCX64Reg()
{
Unlock();
}
RCX64Reg::RCX64Reg(RCX64Reg&& other) noexcept
: rc(std::exchange(other.rc, nullptr)),
contents(std::exchange(other.contents, std::monostate{}))
{
}
RCX64Reg& RCX64Reg::operator=(RCX64Reg&& other) noexcept
{
Unlock();
rc = std::exchange(other.rc, nullptr);
contents = std::exchange(other.contents, std::monostate{});
return *this;
}
void RCX64Reg::Realize()
{
if (const preg_t* preg = std::get_if<preg_t>(&contents))
{
rc->Realize(*preg);
}
}
RCX64Reg::operator X64Reg() const &
{
if (const preg_t* preg = std::get_if<preg_t>(&contents))
{
ASSERT(rc->IsRealized(*preg));
return rc->RX(*preg);
}
else if (const X64Reg* xr = std::get_if<X64Reg>(&contents))
{
return *xr;
}
ASSERT(false);
return {};
}
RCX64Reg::operator OpArg() const &
{
return Gen::R(RCX64Reg::operator X64Reg());
}
void RCX64Reg::Unlock()
{
if (const preg_t* preg = std::get_if<preg_t>(&contents))
{
ASSERT(rc);
rc->Unlock(*preg);
}
else if (const X64Reg* xr = std::get_if<X64Reg>(&contents))
{
ASSERT(rc);
rc->UnlockX(*xr);
}
else
{
ASSERT(!rc);
}
rc = nullptr;
contents = std::monostate{};
}
RCForkGuard::RCForkGuard(RegCache& rc_) : rc(&rc_), m_regs(rc_.m_regs), m_xregs(rc_.m_xregs)
{
ASSERT(!rc->IsAnyConstraintActive());
}
RCForkGuard::RCForkGuard(RCForkGuard&& other) noexcept
: rc(other.rc), m_regs(std::move(other.m_regs)), m_xregs(std::move(other.m_xregs))
{
other.rc = nullptr;
}
void RCForkGuard::EndFork()
{
if (!rc)
return;
ASSERT(!rc->IsAnyConstraintActive());
rc->m_regs = m_regs;
rc->m_xregs = m_xregs;
rc = nullptr;
}
RegCache::RegCache(Jit64& jit) : m_jit{jit}
{
}
void RegCache::Start()
{
m_xregs.fill({});
for (size_t i = 0; i < m_regs.size(); i++)
{
m_regs[i] = PPCCachedReg{GetDefaultLocation(i)};
}
}
void RegCache::SetEmitter(XEmitter* emitter)
{
m_emitter = emitter;
}
bool RegCache::SanityCheck() const
{
for (size_t i = 0; i < m_regs.size(); i++)
{
switch (m_regs[i].GetLocationType())
{
case PPCCachedReg::LocationType::Default:
case PPCCachedReg::LocationType::SpeculativeImmediate:
case PPCCachedReg::LocationType::Immediate:
break;
case PPCCachedReg::LocationType::Bound:
{
if (m_regs[i].IsLocked() || m_regs[i].IsRevertable())
return false;
Gen::X64Reg xr = m_regs[i].Location().GetSimpleReg();
if (m_xregs[xr].IsLocked())
return false;
if (m_xregs[xr].Contents() != i)
return false;
break;
}
}
}
return true;
}
RCOpArg RegCache::Use(preg_t preg, RCMode mode)
{
m_constraints[preg].AddUse(mode);
return RCOpArg{this, preg};
}
RCOpArg RegCache::UseNoImm(preg_t preg, RCMode mode)
{
m_constraints[preg].AddUseNoImm(mode);
return RCOpArg{this, preg};
}
RCOpArg RegCache::BindOrImm(preg_t preg, RCMode mode)
{
m_constraints[preg].AddBindOrImm(mode);
return RCOpArg{this, preg};
}
RCX64Reg RegCache::Bind(preg_t preg, RCMode mode)
{
m_constraints[preg].AddBind(mode);
return RCX64Reg{this, preg};
}
RCX64Reg RegCache::RevertableBind(preg_t preg, RCMode mode)
{
m_constraints[preg].AddRevertableBind(mode);
return RCX64Reg{this, preg};
}
RCX64Reg RegCache::Scratch()
{
return Scratch(GetFreeXReg());
}
RCX64Reg RegCache::Scratch(X64Reg xr)
{
FlushX(xr);
return RCX64Reg{this, xr};
}
RCForkGuard RegCache::Fork()
{
return RCForkGuard{*this};
}
void RegCache::Flush(BitSet32 pregs)
{
ASSERT_MSG(
DYNA_REC,
std::none_of(m_xregs.begin(), m_xregs.end(), [](const auto& x) { return x.IsLocked(); }),
"Someone forgot to unlock a X64 reg");
for (preg_t i : pregs)
{
ASSERT_MSG(DYNA_REC, !m_regs[i].IsLocked(),
"Someone forgot to unlock PPC reg %zu (X64 reg %i).", i, RX(i));
ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction is in progress!");
switch (m_regs[i].GetLocationType())
{
case PPCCachedReg::LocationType::Default:
break;
case PPCCachedReg::LocationType::SpeculativeImmediate:
// We can have a cached value without a host register through speculative constants.
// It must be cleared when flushing, otherwise it may be out of sync with PPCSTATE,
// if PPCSTATE is modified externally (e.g. fallback to interpreter).
m_regs[i].SetFlushed();
break;
case PPCCachedReg::LocationType::Bound:
case PPCCachedReg::LocationType::Immediate:
StoreFromRegister(i);
break;
}
}
}
void RegCache::Revert()
{
ASSERT(IsAllUnlocked());
for (auto& reg : m_regs)
{
if (reg.IsRevertable())
reg.SetRevert();
}
}
void RegCache::Commit()
{
ASSERT(IsAllUnlocked());
for (auto& reg : m_regs)
{
if (reg.IsRevertable())
reg.SetCommit();
}
}
bool RegCache::IsAllUnlocked() const
{
return std::none_of(m_regs.begin(), m_regs.end(), [](const auto& r) { return r.IsLocked(); }) &&
std::none_of(m_xregs.begin(), m_xregs.end(), [](const auto& x) { return x.IsLocked(); }) &&
!IsAnyConstraintActive();
}
void RegCache::PreloadRegisters(BitSet32 to_preload)
{
for (preg_t preg : to_preload)
{
if (NumFreeRegisters() < 2)
return;
if (!R(preg).IsImm())
BindToRegister(preg, true, false);
}
}
BitSet32 RegCache::RegistersInUse() const
{
BitSet32 result;
for (size_t i = 0; i < m_xregs.size(); i++)
{
if (!m_xregs[i].IsFree())
result[i] = true;
}
return result;
}
void RegCache::FlushX(X64Reg reg)
{
ASSERT_MSG(DYNA_REC, reg < m_xregs.size(), "Flushing non-existent reg %i", reg);
ASSERT(!m_xregs[reg].IsLocked());
if (!m_xregs[reg].IsFree())
{
StoreFromRegister(m_xregs[reg].Contents());
}
}
void RegCache::DiscardRegContentsIfCached(preg_t preg)
{
if (m_regs[preg].IsBound())
{
X64Reg xr = m_regs[preg].Location().GetSimpleReg();
m_xregs[xr].SetFlushed();
m_regs[preg].SetFlushed();
}
}
void RegCache::BindToRegister(preg_t i, bool doLoad, bool makeDirty)
{
if (!m_regs[i].IsBound())
{
X64Reg xr = GetFreeXReg();
ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsDirty(), "Xreg %i already dirty", xr);
ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsLocked(), "GetFreeXReg returned locked register");
ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Invalid transaction state");
m_xregs[xr].SetBoundTo(i, makeDirty || m_regs[i].IsAway());
if (doLoad)
{
LoadRegister(i, xr);
}
ASSERT_MSG(DYNA_REC,
std::none_of(m_regs.begin(), m_regs.end(),
[xr](const auto& r) { return r.Location().IsSimpleReg(xr); }),
"Xreg %i already bound", xr);
m_regs[i].SetBoundTo(xr);
}
else
{
// reg location must be simplereg; memory locations
// and immediates are taken care of above.
if (makeDirty)
m_xregs[RX(i)].MakeDirty();
}
ASSERT_MSG(DYNA_REC, !m_xregs[RX(i)].IsLocked(), "WTF, this reg should have been flushed");
}
void RegCache::StoreFromRegister(preg_t i, FlushMode mode)
{
// When a transaction is in progress, allowing the store would overwrite the old value.
ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction is in progress!");
bool doStore = false;
switch (m_regs[i].GetLocationType())
{
case PPCCachedReg::LocationType::Default:
case PPCCachedReg::LocationType::SpeculativeImmediate:
return;
case PPCCachedReg::LocationType::Bound:
{
X64Reg xr = RX(i);
doStore = m_xregs[xr].IsDirty();
if (mode == FlushMode::Full)
m_xregs[xr].SetFlushed();
break;
}
case PPCCachedReg::LocationType::Immediate:
doStore = true;
break;
}
if (doStore)
StoreRegister(i, GetDefaultLocation(i));
if (mode == FlushMode::Full)
m_regs[i].SetFlushed();
}
X64Reg RegCache::GetFreeXReg()
{
size_t aCount;
const X64Reg* aOrder = GetAllocationOrder(&aCount);
for (size_t i = 0; i < aCount; i++)
{
X64Reg xr = aOrder[i];
if (m_xregs[xr].IsFree())
{
return xr;
}
}
// Okay, not found; run the register allocator heuristic and figure out which register we should
// clobber.
float min_score = std::numeric_limits<float>::max();
X64Reg best_xreg = INVALID_REG;
size_t best_preg = 0;
for (size_t i = 0; i < aCount; i++)
{
X64Reg xreg = (X64Reg)aOrder[i];
preg_t preg = m_xregs[xreg].Contents();
if (m_xregs[xreg].IsLocked() || m_regs[preg].IsLocked())
continue;
float score = ScoreRegister(xreg);
if (score < min_score)
{
min_score = score;
best_xreg = xreg;
best_preg = preg;
}
}
if (best_xreg != INVALID_REG)
{
StoreFromRegister(best_preg);
return best_xreg;
}
// Still no dice? Die!
ASSERT_MSG(DYNA_REC, false, "Regcache ran out of regs");
return INVALID_REG;
}
int RegCache::NumFreeRegisters() const
{
int count = 0;
size_t aCount;
const X64Reg* aOrder = GetAllocationOrder(&aCount);
for (size_t i = 0; i < aCount; i++)
if (m_xregs[aOrder[i]].IsFree())
count++;
return count;
}
// Estimate roughly how bad it would be to de-allocate this register. Higher score
// means more bad.
float RegCache::ScoreRegister(X64Reg xreg) const
{
preg_t preg = m_xregs[xreg].Contents();
float score = 0;
// If it's not dirty, we don't need a store to write it back to the register file, so
// bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly
// right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative
// to the number of extra stores it causes.
if (m_xregs[xreg].IsDirty())
score += 2;
// If the register isn't actually needed in a physical register for a later instruction,
// writing it back to the register file isn't quite as bad.
if (GetRegUtilization()[preg])
{
// Don't look too far ahead; we don't want to have quadratic compilation times for
// enormous block sizes!
// This actually improves register allocation a tiny bit; I'm not sure why.
u32 lookahead = std::min(m_jit.js.instructionsLeft, 64);
// Count how many other registers are going to be used before we need this one again.
u32 regs_in_count = CountRegsIn(preg, lookahead).Count();
// Totally ad-hoc heuristic to bias based on how many other registers we'll need
// before this one gets used again.
score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count));
}
return score;
}
const OpArg& RegCache::R(preg_t preg) const
{
return m_regs[preg].Location();
}
X64Reg RegCache::RX(preg_t preg) const
{
ASSERT_MSG(DYNA_REC, m_regs[preg].IsBound(), "Unbound register - %zu", preg);
return m_regs[preg].Location().GetSimpleReg();
}
void RegCache::Lock(preg_t preg)
{
m_regs[preg].Lock();
}
void RegCache::Unlock(preg_t preg)
{
m_regs[preg].Unlock();
if (!m_regs[preg].IsLocked())
{
// Fully unlocked, reset realization state.
m_constraints[preg] = {};
}
}
void RegCache::LockX(X64Reg xr)
{
m_xregs[xr].Lock();
}
void RegCache::UnlockX(X64Reg xr)
{
m_xregs[xr].Unlock();
}
bool RegCache::IsRealized(preg_t preg) const
{
return m_constraints[preg].IsRealized();
}
void RegCache::Realize(preg_t preg)
{
if (m_constraints[preg].IsRealized())
return;
const bool load = m_constraints[preg].ShouldLoad();
const bool dirty = m_constraints[preg].ShouldDirty();
const bool kill_imm = m_constraints[preg].ShouldKillImmediate();
const bool kill_mem = m_constraints[preg].ShouldKillMemory();
const auto do_bind = [&] {
BindToRegister(preg, load, dirty);
m_constraints[preg].Realized(RCConstraint::RealizedLoc::Bound);
};
if (m_constraints[preg].ShouldBeRevertable())
{
StoreFromRegister(preg, FlushMode::MaintainState);
do_bind();
m_regs[preg].SetRevertable();
return;
}
switch (m_regs[preg].GetLocationType())
{
case PPCCachedReg::LocationType::Default:
if (kill_mem)
{
do_bind();
return;
}
m_constraints[preg].Realized(RCConstraint::RealizedLoc::Mem);
return;
case PPCCachedReg::LocationType::Bound:
do_bind();
return;
case PPCCachedReg::LocationType::Immediate:
case PPCCachedReg::LocationType::SpeculativeImmediate:
if (dirty || kill_imm)
{
do_bind();
return;
}
m_constraints[preg].Realized(RCConstraint::RealizedLoc::Imm);
break;
}
}
bool RegCache::IsAnyConstraintActive() const
{
return std::any_of(m_constraints.begin(), m_constraints.end(),
[](const auto& c) { return c.IsActive(); });
}

View File

@ -0,0 +1,222 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cinttypes>
#include <cstddef>
#include <type_traits>
#include <variant>
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/RegCache/CachedReg.h"
#include "Core/PowerPC/PPCAnalyst.h"
class Jit64;
enum class RCMode;
class RCOpArg;
class RCX64Reg;
class RegCache;
using preg_t = size_t;
static constexpr size_t NUM_XREGS = 16;
class RCOpArg
{
public:
static RCOpArg Imm32(u32 imm);
static RCOpArg R(Gen::X64Reg xr);
RCOpArg();
~RCOpArg();
RCOpArg(RCOpArg&&) noexcept;
RCOpArg& operator=(RCOpArg&&) noexcept;
RCOpArg(RCX64Reg&&) noexcept;
RCOpArg& operator=(RCX64Reg&&) noexcept;
RCOpArg(const RCOpArg&) = delete;
RCOpArg& operator=(const RCOpArg&) = delete;
void Realize();
Gen::OpArg Location() const;
operator Gen::OpArg() const & { return Location(); }
operator Gen::OpArg() const && = delete;
bool IsSimpleReg() const { return Location().IsSimpleReg(); }
bool IsSimpleReg(Gen::X64Reg reg) const { return Location().IsSimpleReg(reg); }
Gen::X64Reg GetSimpleReg() const { return Location().GetSimpleReg(); }
// Use to extract bytes from a register using the regcache. offset is in bytes.
Gen::OpArg ExtractWithByteOffset(int offset);
void Unlock();
bool IsImm() const;
s32 SImm32() const;
u32 Imm32() const;
bool IsZero() const { return IsImm() && Imm32() == 0; }
private:
friend class RegCache;
explicit RCOpArg(u32 imm);
explicit RCOpArg(Gen::X64Reg xr);
RCOpArg(RegCache* rc_, preg_t preg);
RegCache* rc = nullptr;
std::variant<std::monostate, Gen::X64Reg, u32, preg_t> contents;
};
class RCX64Reg
{
public:
RCX64Reg();
~RCX64Reg();
RCX64Reg(RCX64Reg&&) noexcept;
RCX64Reg& operator=(RCX64Reg&&) noexcept;
RCX64Reg(const RCX64Reg&) = delete;
RCX64Reg& operator=(const RCX64Reg&) = delete;
void Realize();
operator Gen::OpArg() const &;
operator Gen::X64Reg() const &;
operator Gen::OpArg() const && = delete;
operator Gen::X64Reg() const && = delete;
void Unlock();
private:
friend class RegCache;
friend class RCOpArg;
RCX64Reg(RegCache* rc_, preg_t preg);
RCX64Reg(RegCache* rc_, Gen::X64Reg xr);
RegCache* rc = nullptr;
std::variant<std::monostate, Gen::X64Reg, preg_t> contents;
};
class RCForkGuard
{
public:
~RCForkGuard() { EndFork(); }
RCForkGuard(RCForkGuard&&) noexcept;
RCForkGuard(const RCForkGuard&) = delete;
RCForkGuard& operator=(const RCForkGuard&) = delete;
RCForkGuard& operator=(RCForkGuard&&) = delete;
void EndFork();
private:
friend class RegCache;
explicit RCForkGuard(RegCache& rc_);
RegCache* rc;
std::array<PPCCachedReg, 32> m_regs;
std::array<X64CachedReg, NUM_XREGS> m_xregs;
};
class RegCache
{
public:
enum class FlushMode
{
Full,
MaintainState,
};
explicit RegCache(Jit64& jit);
virtual ~RegCache() = default;
void Start();
void SetEmitter(Gen::XEmitter* emitter);
bool SanityCheck() const;
template <typename... Ts>
static void Realize(Ts&... rc)
{
static_assert(((std::is_same<Ts, RCOpArg>() || std::is_same<Ts, RCX64Reg>()) && ...));
(rc.Realize(), ...);
}
template <typename... Ts>
static void Unlock(Ts&... rc)
{
static_assert(((std::is_same<Ts, RCOpArg>() || std::is_same<Ts, RCX64Reg>()) && ...));
(rc.Unlock(), ...);
}
template <typename... Args>
bool IsImm(Args... pregs) const
{
static_assert(sizeof...(pregs) > 0);
return (R(pregs).IsImm() && ...);
}
u32 Imm32(preg_t preg) const { return R(preg).Imm32(); }
s32 SImm32(preg_t preg) const { return R(preg).SImm32(); }
RCOpArg Use(preg_t preg, RCMode mode);
RCOpArg UseNoImm(preg_t preg, RCMode mode);
RCOpArg BindOrImm(preg_t preg, RCMode mode);
RCX64Reg Bind(preg_t preg, RCMode mode);
RCX64Reg RevertableBind(preg_t preg, RCMode mode);
RCX64Reg Scratch();
RCX64Reg Scratch(Gen::X64Reg xr);
RCForkGuard Fork();
void Flush(BitSet32 pregs = BitSet32::AllTrue(32));
void Revert();
void Commit();
bool IsAllUnlocked() const;
void PreloadRegisters(BitSet32 pregs);
BitSet32 RegistersInUse() const;
protected:
friend class RCOpArg;
friend class RCX64Reg;
friend class RCForkGuard;
virtual Gen::OpArg GetDefaultLocation(preg_t preg) const = 0;
virtual void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) = 0;
virtual void LoadRegister(preg_t preg, Gen::X64Reg new_loc) = 0;
virtual const Gen::X64Reg* GetAllocationOrder(size_t* count) const = 0;
virtual BitSet32 GetRegUtilization() const = 0;
virtual BitSet32 CountRegsIn(preg_t preg, u32 lookahead) const = 0;
void FlushX(Gen::X64Reg reg);
void DiscardRegContentsIfCached(preg_t preg);
void BindToRegister(preg_t preg, bool doLoad = true, bool makeDirty = true);
void StoreFromRegister(preg_t preg, FlushMode mode = FlushMode::Full);
Gen::X64Reg GetFreeXReg();
int NumFreeRegisters() const;
float ScoreRegister(Gen::X64Reg xreg) const;
const Gen::OpArg& R(preg_t preg) const;
Gen::X64Reg RX(preg_t preg) const;
void Lock(preg_t preg);
void Unlock(preg_t preg);
void LockX(Gen::X64Reg xr);
void UnlockX(Gen::X64Reg xr);
bool IsRealized(preg_t preg) const;
void Realize(preg_t preg);
bool IsAnyConstraintActive() const;
Jit64& m_jit;
std::array<PPCCachedReg, 32> m_regs;
std::array<X64CachedReg, NUM_XREGS> m_xregs;
std::array<RCConstraint, 32> m_constraints;
Gen::XEmitter* m_emitter = nullptr;
};

View File

@ -0,0 +1,12 @@
// Copyright 2018 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
enum class RCMode
{
Read,
Write,
ReadWrite,
};

View File

@ -70,11 +70,6 @@ protected:
// so just fixup that branch instead of testing for a DSI again.
bool fixupExceptionHandler;
Gen::FixupBranch exceptionHandler;
// If these are set, we've stored the old value of a register which will be loaded in
// revertLoad,
// which lets us revert it on the exception path.
int revertGprLoad;
int revertFprLoad;
bool assumeNoPairedQuantize;
std::map<u8, u32> constantGqr;