diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 0d76fb8c47..a1bb58f238 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -196,6 +196,8 @@ if(_M_X86) PowerPC/Jit64IL/IR_X86.cpp PowerPC/Jit64IL/JitIL.cpp PowerPC/Jit64IL/JitIL_Tables.cpp + PowerPC/Jit64/FPURegCache.cpp + PowerPC/Jit64/GPRRegCache.cpp PowerPC/Jit64/Jit64_Tables.cpp PowerPC/Jit64/JitAsm.cpp PowerPC/Jit64/Jit_Branch.cpp diff --git a/Source/Core/Core/Core.vcxproj b/Source/Core/Core/Core.vcxproj index 47845d7b3d..13492f654f 100644 --- a/Source/Core/Core/Core.vcxproj +++ b/Source/Core/Core/Core.vcxproj @@ -225,6 +225,8 @@ + + @@ -427,6 +429,8 @@ + + diff --git a/Source/Core/Core/Core.vcxproj.filters b/Source/Core/Core/Core.vcxproj.filters index ea1040901e..00023e98f4 100644 --- a/Source/Core/Core/Core.vcxproj.filters +++ b/Source/Core/Core/Core.vcxproj.filters @@ -666,6 +666,12 @@ PowerPC\JitIL + + PowerPC\Jit64 + + + PowerPC\Jit64 + PowerPC\Jit64 @@ -1262,6 +1268,12 @@ PowerPC\JitIL + + PowerPC\Jit64 + + + PowerPC\Jit64 + PowerPC\Jit64 diff --git a/Source/Core/Core/PowerPC/Jit64/FPURegCache.cpp b/Source/Core/Core/PowerPC/Jit64/FPURegCache.cpp new file mode 100644 index 0000000000..698f7da640 --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64/FPURegCache.cpp @@ -0,0 +1,53 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Core/PowerPC/Jit64/FPURegCache.h" + +#include "Core/PowerPC/Jit64Common/Jit64Base.h" +#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" + +using namespace Gen; + +void FPURegCache::StoreRegister(size_t preg, const OpArg& new_loc) +{ + emit->MOVAPD(new_loc, regs[preg].location.GetSimpleReg()); +} + +void FPURegCache::LoadRegister(size_t preg, X64Reg new_loc) +{ + emit->MOVAPD(new_loc, regs[preg].location); +} + +const X64Reg* FPURegCache::GetAllocationOrder(size_t* count) +{ + static const X64Reg allocation_order[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, + XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5}; + *count = sizeof(allocation_order) / sizeof(X64Reg); + return allocation_order; +} + +OpArg FPURegCache::GetDefaultLocation(size_t reg) const +{ + return PPCSTATE(ps[reg][0]); +} + +BitSet32 FPURegCache::GetRegUtilization() +{ + return jit->js.op->gprInReg; +} + +BitSet32 FPURegCache::CountRegsIn(size_t preg, u32 lookahead) +{ + BitSet32 regs_used; + + for (u32 i = 1; i < lookahead; i++) + { + BitSet32 regs_in = jit->js.op[i].fregsIn; + regs_used |= regs_in; + if (regs_in[preg]) + return regs_used; + } + + return regs_used; +} diff --git a/Source/Core/Core/PowerPC/Jit64/FPURegCache.h b/Source/Core/Core/PowerPC/Jit64/FPURegCache.h new file mode 100644 index 0000000000..0ea2faf2f8 --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64/FPURegCache.h @@ -0,0 +1,18 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "Core/PowerPC/Jit64/JitRegCache.h" + +class FPURegCache final : public RegCache +{ +public: + void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override; + void LoadRegister(size_t preg, Gen::X64Reg newLoc) override; + const Gen::X64Reg* GetAllocationOrder(size_t* count) override; + Gen::OpArg GetDefaultLocation(size_t reg) const override; + BitSet32 GetRegUtilization() override; + BitSet32 CountRegsIn(size_t preg, u32 lookahead) override; +}; diff --git a/Source/Core/Core/PowerPC/Jit64/GPRRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/GPRRegCache.cpp new file mode 100644 index 0000000000..c71e4471e8 --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64/GPRRegCache.cpp @@ -0,0 +1,71 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Core/PowerPC/Jit64/GPRRegCache.h" + +#include "Core/PowerPC/Jit64Common/Jit64Base.h" +#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" + +using namespace Gen; + +void GPRRegCache::StoreRegister(size_t preg, const OpArg& new_loc) +{ + emit->MOV(32, new_loc, regs[preg].location); +} + +void GPRRegCache::LoadRegister(size_t preg, X64Reg new_loc) +{ + emit->MOV(32, ::Gen::R(new_loc), regs[preg].location); +} + +OpArg GPRRegCache::GetDefaultLocation(size_t reg) const +{ + return PPCSTATE(gpr[reg]); +} + +const X64Reg* GPRRegCache::GetAllocationOrder(size_t* count) +{ + static const X64Reg allocation_order[] = { +// R12, when used as base register, for example in a LEA, can generate bad code! Need to look into +// this. +#ifdef _WIN32 + RSI, RDI, R13, R14, R15, R8, + R9, R10, R11, R12, RCX +#else + R12, R13, R14, R15, RSI, RDI, + R8, R9, R10, R11, RCX +#endif + }; + *count = sizeof(allocation_order) / sizeof(X64Reg); + return allocation_order; +} + +void GPRRegCache::SetImmediate32(size_t preg, u32 imm_value, bool dirty) +{ + // "dirty" can be false to avoid redundantly flushing an immediate when + // processing speculative constants. + DiscardRegContentsIfCached(preg); + regs[preg].away |= dirty; + regs[preg].location = Imm32(imm_value); +} + +BitSet32 GPRRegCache::GetRegUtilization() +{ + return jit->js.op->gprInReg; +} + +BitSet32 GPRRegCache::CountRegsIn(size_t preg, u32 lookahead) +{ + BitSet32 regs_used; + + for (u32 i = 1; i < lookahead; i++) + { + BitSet32 regs_in = jit->js.op[i].regsIn; + regs_used |= regs_in; + if (regs_in[preg]) + return regs_used; + } + + return regs_used; +} diff --git a/Source/Core/Core/PowerPC/Jit64/GPRRegCache.h b/Source/Core/Core/PowerPC/Jit64/GPRRegCache.h new file mode 100644 index 0000000000..7032254611 --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64/GPRRegCache.h @@ -0,0 +1,19 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "Core/PowerPC/Jit64/JitRegCache.h" + +class GPRRegCache final : public RegCache +{ +public: + void StoreRegister(size_t preg, const Gen::OpArg& new_loc) override; + void LoadRegister(size_t preg, Gen::X64Reg new_loc) override; + Gen::OpArg GetDefaultLocation(size_t reg) const override; + const Gen::X64Reg* GetAllocationOrder(size_t* count) override; + void SetImmediate32(size_t preg, u32 imm_value, bool dirty = true); + BitSet32 GetRegUtilization() override; + BitSet32 CountRegsIn(size_t preg, u32 lookahead) override; +}; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 588d776fab..f2318971c2 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -21,6 +21,8 @@ #include "Common/CommonTypes.h" #include "Common/x64ABI.h" #include "Common/x64Emitter.h" +#include "Core/PowerPC/Jit64/FPURegCache.h" +#include "Core/PowerPC/Jit64/GPRRegCache.h" #include "Core/PowerPC/Jit64/JitAsm.h" #include "Core/PowerPC/Jit64/JitRegCache.h" #include "Core/PowerPC/Jit64Common/Jit64Base.h" diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp index 278d6a126c..c99f8511c7 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp @@ -14,7 +14,6 @@ #include "Common/x64Emitter.h" #include "Core/PowerPC/Jit64/Jit.h" #include "Core/PowerPC/Jit64/JitRegCache.h" -#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/PowerPC.h" using namespace Gen; @@ -57,128 +56,17 @@ void RegCache::Start() // But only preload IF written OR reads >= 3 } -void RegCache::UnlockAll() +void RegCache::DiscardRegContentsIfCached(size_t preg) { - for (auto& reg : regs) - reg.locked = false; -} - -void RegCache::UnlockAllX() -{ - for (auto& xreg : xregs) - xreg.locked = false; -} - -BitSet32 GPRRegCache::GetRegUtilization() -{ - return jit->js.op->gprInReg; -} - -BitSet32 FPURegCache::GetRegUtilization() -{ - return jit->js.op->gprInReg; -} - -BitSet32 GPRRegCache::CountRegsIn(size_t preg, u32 lookahead) -{ - BitSet32 regsUsed; - for (u32 i = 1; i < lookahead; i++) + if (IsBound(preg)) { - BitSet32 regsIn = jit->js.op[i].regsIn; - regsUsed |= regsIn; - if (regsIn[preg]) - return regsUsed; + X64Reg xr = regs[preg].location.GetSimpleReg(); + xregs[xr].free = true; + xregs[xr].dirty = false; + xregs[xr].ppcReg = INVALID_REG; + regs[preg].away = false; + regs[preg].location = GetDefaultLocation(preg); } - return regsUsed; -} - -BitSet32 FPURegCache::CountRegsIn(size_t preg, u32 lookahead) -{ - BitSet32 regsUsed; - for (u32 i = 1; i < lookahead; i++) - { - BitSet32 regsIn = jit->js.op[i].fregsIn; - regsUsed |= regsIn; - if (regsIn[preg]) - return regsUsed; - } - return regsUsed; -} - -// Estimate roughly how bad it would be to de-allocate this register. Higher score -// means more bad. -float RegCache::ScoreRegister(X64Reg xr) -{ - size_t preg = xregs[xr].ppcReg; - float score = 0; - - // If it's not dirty, we don't need a store to write it back to the register file, so - // bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly - // right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative - // to the number of extra stores it causes. - if (xregs[xr].dirty) - score += 2; - - // If the register isn't actually needed in a physical register for a later instruction, - // writing it back to the register file isn't quite as bad. - if (GetRegUtilization()[preg]) - { - // Don't look too far ahead; we don't want to have quadratic compilation times for - // enormous block sizes! - // This actually improves register allocation a tiny bit; I'm not sure why. - u32 lookahead = std::min(jit->js.instructionsLeft, 64); - // Count how many other registers are going to be used before we need this one again. - u32 regs_in_count = CountRegsIn(preg, lookahead).Count(); - // Totally ad-hoc heuristic to bias based on how many other registers we'll need - // before this one gets used again. - score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count)); - } - - return score; -} - -X64Reg RegCache::GetFreeXReg() -{ - size_t aCount; - const X64Reg* aOrder = GetAllocationOrder(&aCount); - for (size_t i = 0; i < aCount; i++) - { - X64Reg xr = aOrder[i]; - if (!xregs[xr].locked && xregs[xr].free) - { - return xr; - } - } - - // Okay, not found; run the register allocator heuristic and figure out which register we should - // clobber. - float min_score = std::numeric_limits::max(); - X64Reg best_xreg = INVALID_REG; - size_t best_preg = 0; - for (size_t i = 0; i < aCount; i++) - { - X64Reg xreg = (X64Reg)aOrder[i]; - size_t preg = xregs[xreg].ppcReg; - if (xregs[xreg].locked || regs[preg].locked) - continue; - float score = ScoreRegister(xreg); - if (score < min_score) - { - min_score = score; - best_xreg = xreg; - best_preg = preg; - } - } - - if (best_xreg != INVALID_REG) - { - StoreFromRegister(best_preg); - return best_xreg; - } - - // Still no dice? Die! - _assert_msg_(DYNA_REC, 0, "Regcache ran out of regs"); - return INVALID_REG; } void RegCache::FlushR(X64Reg reg) @@ -191,6 +79,35 @@ void RegCache::FlushR(X64Reg reg) } } +void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush) +{ + for (size_t i = 0; i < xregs.size(); i++) + { + if (xregs[i].locked) + PanicAlert("Someone forgot to unlock X64 reg %zu", i); + } + + for (unsigned int i : regsToFlush) + { + if (regs[i].locked) + { + PanicAlert("Someone forgot to unlock PPC reg %u (X64 reg %i).", i, RX(i)); + } + + if (regs[i].away) + { + if (regs[i].location.IsSimpleReg() || regs[i].location.IsImm()) + { + StoreFromRegister(i, mode); + } + else + { + _assert_msg_(DYNA_REC, 0, "Jit64 - Flush unhandled case, reg %u PC: %08x", i, PC); + } + } + } +} + int RegCache::SanityCheck() const { for (size_t i = 0; i < regs.size(); i++) @@ -214,63 +131,6 @@ int RegCache::SanityCheck() const return 0; } -void RegCache::DiscardRegContentsIfCached(size_t preg) -{ - if (IsBound(preg)) - { - X64Reg xr = regs[preg].location.GetSimpleReg(); - xregs[xr].free = true; - xregs[xr].dirty = false; - xregs[xr].ppcReg = INVALID_REG; - regs[preg].away = false; - regs[preg].location = GetDefaultLocation(preg); - } -} - -void GPRRegCache::SetImmediate32(size_t preg, u32 immValue, bool dirty) -{ - // "dirty" can be false to avoid redundantly flushing an immediate when - // processing speculative constants. - DiscardRegContentsIfCached(preg); - regs[preg].away |= dirty; - regs[preg].location = Imm32(immValue); -} - -const X64Reg* GPRRegCache::GetAllocationOrder(size_t* count) -{ - static const X64Reg allocationOrder[] = { -// R12, when used as base register, for example in a LEA, can generate bad code! Need to look into -// this. -#ifdef _WIN32 - RSI, RDI, R13, R14, R15, R8, - R9, R10, R11, R12, RCX -#else - R12, R13, R14, R15, RSI, RDI, - R8, R9, R10, R11, RCX -#endif - }; - *count = sizeof(allocationOrder) / sizeof(X64Reg); - return allocationOrder; -} - -const X64Reg* FPURegCache::GetAllocationOrder(size_t* count) -{ - static const X64Reg allocationOrder[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, - XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5}; - *count = sizeof(allocationOrder) / sizeof(X64Reg); - return allocationOrder; -} - -OpArg GPRRegCache::GetDefaultLocation(size_t reg) const -{ - return PPCSTATE(gpr[reg]); -} - -OpArg FPURegCache::GetDefaultLocation(size_t reg) const -{ - return PPCSTATE(ps[reg][0]); -} - void RegCache::KillImmediate(size_t preg, bool doLoad, bool makeDirty) { if (regs[preg].away) @@ -351,53 +211,60 @@ void RegCache::StoreFromRegister(size_t i, FlushMode mode) } } -void GPRRegCache::LoadRegister(size_t preg, X64Reg newLoc) +void RegCache::UnlockAll() { - emit->MOV(32, ::Gen::R(newLoc), regs[preg].location); + for (auto& reg : regs) + reg.locked = false; } -void GPRRegCache::StoreRegister(size_t preg, const OpArg& newLoc) +void RegCache::UnlockAllX() { - emit->MOV(32, newLoc, regs[preg].location); + for (auto& xreg : xregs) + xreg.locked = false; } -void FPURegCache::LoadRegister(size_t preg, X64Reg newLoc) +X64Reg RegCache::GetFreeXReg() { - emit->MOVAPD(newLoc, regs[preg].location); -} - -void FPURegCache::StoreRegister(size_t preg, const OpArg& newLoc) -{ - emit->MOVAPD(newLoc, regs[preg].location.GetSimpleReg()); -} - -void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush) -{ - for (size_t i = 0; i < xregs.size(); i++) + size_t aCount; + const X64Reg* aOrder = GetAllocationOrder(&aCount); + for (size_t i = 0; i < aCount; i++) { - if (xregs[i].locked) - PanicAlert("Someone forgot to unlock X64 reg %zu", i); - } - - for (unsigned int i : regsToFlush) - { - if (regs[i].locked) + X64Reg xr = aOrder[i]; + if (!xregs[xr].locked && xregs[xr].free) { - PanicAlert("Someone forgot to unlock PPC reg %u (X64 reg %i).", i, RX(i)); - } - - if (regs[i].away) - { - if (regs[i].location.IsSimpleReg() || regs[i].location.IsImm()) - { - StoreFromRegister(i, mode); - } - else - { - _assert_msg_(DYNA_REC, 0, "Jit64 - Flush unhandled case, reg %u PC: %08x", i, PC); - } + return xr; } } + + // Okay, not found; run the register allocator heuristic and figure out which register we should + // clobber. + float min_score = std::numeric_limits::max(); + X64Reg best_xreg = INVALID_REG; + size_t best_preg = 0; + for (size_t i = 0; i < aCount; i++) + { + X64Reg xreg = (X64Reg)aOrder[i]; + size_t preg = xregs[xreg].ppcReg; + if (xregs[xreg].locked || regs[preg].locked) + continue; + float score = ScoreRegister(xreg); + if (score < min_score) + { + min_score = score; + best_xreg = xreg; + best_preg = preg; + } + } + + if (best_xreg != INVALID_REG) + { + StoreFromRegister(best_preg); + return best_xreg; + } + + // Still no dice? Die! + _assert_msg_(DYNA_REC, 0, "Regcache ran out of regs"); + return INVALID_REG; } int RegCache::NumFreeRegisters() @@ -410,3 +277,35 @@ int RegCache::NumFreeRegisters() count++; return count; } + +// Estimate roughly how bad it would be to de-allocate this register. Higher score +// means more bad. +float RegCache::ScoreRegister(X64Reg xr) +{ + size_t preg = xregs[xr].ppcReg; + float score = 0; + + // If it's not dirty, we don't need a store to write it back to the register file, so + // bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly + // right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative + // to the number of extra stores it causes. + if (xregs[xr].dirty) + score += 2; + + // If the register isn't actually needed in a physical register for a later instruction, + // writing it back to the register file isn't quite as bad. + if (GetRegUtilization()[preg]) + { + // Don't look too far ahead; we don't want to have quadratic compilation times for + // enormous block sizes! + // This actually improves register allocation a tiny bit; I'm not sure why. + u32 lookahead = std::min(jit->js.instructionsLeft, 64); + // Count how many other registers are going to be used before we need this one again. + u32 regs_in_count = CountRegsIn(preg, lookahead).Count(); + // Totally ad-hoc heuristic to bias based on how many other registers we'll need + // before this one gets used again. + score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count)); + } + + return score; +} diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h b/Source/Core/Core/PowerPC/Jit64/JitRegCache.h index f47e57e377..41fab3d79e 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.h @@ -153,26 +153,3 @@ public: Gen::X64Reg GetFreeXReg(); int NumFreeRegisters(); }; - -class GPRRegCache final : public RegCache -{ -public: - void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override; - void LoadRegister(size_t preg, Gen::X64Reg newLoc) override; - Gen::OpArg GetDefaultLocation(size_t reg) const override; - const Gen::X64Reg* GetAllocationOrder(size_t* count) override; - void SetImmediate32(size_t preg, u32 immValue, bool dirty = true); - BitSet32 GetRegUtilization() override; - BitSet32 CountRegsIn(size_t preg, u32 lookahead) override; -}; - -class FPURegCache final : public RegCache -{ -public: - void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override; - void LoadRegister(size_t preg, Gen::X64Reg newLoc) override; - const Gen::X64Reg* GetAllocationOrder(size_t* count) override; - Gen::OpArg GetDefaultLocation(size_t reg) const override; - BitSet32 GetRegUtilization() override; - BitSet32 CountRegsIn(size_t preg, u32 lookahead) override; -};