From 256a0cf4db17b767aeb8291037d016d822b40033 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 19 May 2017 16:57:31 +0200 Subject: [PATCH] Remove JITIL --- Source/Core/Core/CMakeLists.txt | 12 - Source/Core/Core/ConfigManager.cpp | 2 - Source/Core/Core/ConfigManager.h | 5 +- Source/Core/Core/Core.vcxproj | 15 - Source/Core/Core/Core.vcxproj.filters | 51 - Source/Core/Core/Movie.h | 2 +- .../Core/Core/PowerPC/Jit64Common/Jit64Base.h | 1 - Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp | 2354 ----------------- Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp | 688 ----- Source/Core/Core/PowerPC/Jit64IL/JitIL.h | 83 - .../Core/PowerPC/Jit64IL/JitIL_Tables.cpp | 493 ---- Source/Core/Core/PowerPC/JitILCommon/IR.cpp | 1817 ------------- Source/Core/Core/PowerPC/JitILCommon/IR.h | 443 ---- .../Core/Core/PowerPC/JitILCommon/JitILBase.h | 130 - .../PowerPC/JitILCommon/JitILBase_Branch.cpp | 217 -- .../JitILCommon/JitILBase_FloatingPoint.cpp | 125 - .../PowerPC/JitILCommon/JitILBase_Integer.cpp | 559 ---- .../JitILCommon/JitILBase_LoadStore.cpp | 310 --- .../JitILBase_LoadStoreFloating.cpp | 139 - .../JitILCommon/JitILBase_LoadStorePaired.cpp | 56 - .../PowerPC/JitILCommon/JitILBase_Paired.cpp | 186 -- .../JitILCommon/JitILBase_SystemRegisters.cpp | 215 -- Source/Core/Core/PowerPC/JitInterface.cpp | 4 - Source/Core/Core/PowerPC/PowerPC.cpp | 2 +- Source/Core/Core/PowerPC/PowerPC.h | 12 +- .../DolphinWX/Config/GeneralConfigPane.cpp | 1 - 26 files changed, 9 insertions(+), 7913 deletions(-) delete mode 100644 Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp delete mode 100644 Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp delete mode 100644 Source/Core/Core/PowerPC/Jit64IL/JitIL.h delete mode 100644 Source/Core/Core/PowerPC/Jit64IL/JitIL_Tables.cpp delete mode 100644 Source/Core/Core/PowerPC/JitILCommon/IR.cpp delete mode 100644 Source/Core/Core/PowerPC/JitILCommon/IR.h delete mode 100644 Source/Core/Core/PowerPC/JitILCommon/JitILBase.h delete mode 100644 Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp delete mode 100644 Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp delete mode 100644 Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp delete mode 100644 Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp delete mode 100644 Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStoreFloating.cpp delete mode 100644 Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStorePaired.cpp delete mode 100644 Source/Core/Core/PowerPC/JitILCommon/JitILBase_Paired.cpp delete mode 100644 Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 28e10f52ae..eb30e9c083 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -219,22 +219,10 @@ set(SRCS PowerPC/JitCommon/JitAsmCommon.cpp PowerPC/JitCommon/JitBase.cpp PowerPC/JitCommon/JitCache.cpp - PowerPC/JitILCommon/IR.cpp - PowerPC/JitILCommon/JitILBase_Branch.cpp - PowerPC/JitILCommon/JitILBase_LoadStore.cpp - PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp - PowerPC/JitILCommon/JitILBase_LoadStoreFloating.cpp - PowerPC/JitILCommon/JitILBase_LoadStorePaired.cpp - PowerPC/JitILCommon/JitILBase_Paired.cpp - PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp - PowerPC/JitILCommon/JitILBase_Integer.cpp ) if(_M_X86) set(SRCS ${SRCS} - PowerPC/Jit64IL/IR_X86.cpp - PowerPC/Jit64IL/JitIL.cpp - PowerPC/Jit64IL/JitIL_Tables.cpp PowerPC/Jit64/FPURegCache.cpp PowerPC/Jit64/GPRRegCache.cpp PowerPC/Jit64/Jit64_Tables.cpp diff --git a/Source/Core/Core/ConfigManager.cpp b/Source/Core/Core/ConfigManager.cpp index bacb70b658..e710c6d323 100644 --- a/Source/Core/Core/ConfigManager.cpp +++ b/Source/Core/Core/ConfigManager.cpp @@ -582,8 +582,6 @@ void SConfig::LoadCoreSettings(IniFile& ini) core->Get("SlotB", (int*)&m_EXIDevice[1], ExpansionInterface::EXIDEVICE_NONE); core->Get("SerialPort1", (int*)&m_EXIDevice[2], ExpansionInterface::EXIDEVICE_NONE); core->Get("BBA_MAC", &m_bba_mac); - core->Get("TimeProfiling", &bJITILTimeProfiling, false); - core->Get("OutputIR", &bJITILOutputIR, false); for (int i = 0; i < SerialInterface::MAX_SI_CHANNELS; ++i) { core->Get(StringFromFormat("SIDevice%i", i), (u32*)&m_SIDevice[i], diff --git a/Source/Core/Core/ConfigManager.h b/Source/Core/Core/ConfigManager.h index 8559361c80..c9271ef729 100644 --- a/Source/Core/Core/ConfigManager.h +++ b/Source/Core/Core/ConfigManager.h @@ -74,9 +74,8 @@ struct SConfig : NonCopyable bool bAutomaticStart = false; bool bBootToPause = false; - int iCPUCore; + int iCPUCore; // Uses the values of PowerPC::CPUCore - // JIT (shared between JIT and JITIL) bool bJITNoBlockCache = false; bool bJITNoBlockLinking = false; bool bJITOff = false; @@ -91,8 +90,6 @@ struct SConfig : NonCopyable bool bJITPairedOff = false; bool bJITSystemRegistersOff = false; bool bJITBranchOff = false; - bool bJITILTimeProfiling = false; - bool bJITILOutputIR = false; bool bFastmem; bool bFPRF = false; diff --git a/Source/Core/Core/Core.vcxproj b/Source/Core/Core/Core.vcxproj index b046d1140e..87a03f73a3 100644 --- a/Source/Core/Core/Core.vcxproj +++ b/Source/Core/Core/Core.vcxproj @@ -249,18 +249,6 @@ - - - - - - - - - - - - @@ -496,14 +484,11 @@ - - - diff --git a/Source/Core/Core/Core.vcxproj.filters b/Source/Core/Core/Core.vcxproj.filters index 1afefc23e1..7967b61478 100644 --- a/Source/Core/Core/Core.vcxproj.filters +++ b/Source/Core/Core/Core.vcxproj.filters @@ -43,9 +43,6 @@ {c88ec388-371f-4401-851c-a32dcdc0b88b} - - {f26d3866-92d1-4623-9445-caf9a065ed74} - {6204f663-bbd0-4eb5-bc15-e3778d8b6091} @@ -112,9 +109,6 @@ {2b41ab45-ba8c-45dc-92cc-9107c1fa3e36} - - {827afa93-1a80-4835-93ae-b5516d95867f} - {81956f71-d9fe-454f-96a6-855195d611c4} @@ -631,15 +625,6 @@ PowerPC\JitCommon - - PowerPC\JitIL - - - PowerPC\JitIL - - - PowerPC\JitIL - PowerPC\Jit64 @@ -676,33 +661,6 @@ PowerPC\Jit64 - - PowerPC\JitILCommon - - - PowerPC\JitILCommon - - - PowerPC\JitILCommon - - - PowerPC\JitILCommon - - - PowerPC\JitILCommon - - - PowerPC\JitILCommon - - - PowerPC\JitILCommon - - - PowerPC\JitILCommon - - - PowerPC\JitILCommon - HW %28Flipper/Hollywood%29\GCKeyboard @@ -1328,9 +1286,6 @@ PowerPC\JitCommon - - PowerPC\JitIL - PowerPC\Jit64 @@ -1343,12 +1298,6 @@ PowerPC\Jit64 - - PowerPC\JitILCommon - - - PowerPC\JitILCommon - HW %28Flipper/Hollywood%29\GCKeyboard diff --git a/Source/Core/Core/Movie.h b/Source/Core/Core/Movie.h index 30c93e2eec..84326dcdb6 100644 --- a/Source/Core/Core/Movie.h +++ b/Source/Core/Core/Movie.h @@ -81,7 +81,7 @@ struct DTMHeader bool bProgressive; bool bDSPHLE; bool bFastDiscSpeed; - u8 CPUCore; // 0 = interpreter, 1 = JIT, 2 = JITIL + u8 CPUCore; // Uses the values of PowerPC::CPUCore bool bEFBAccessEnable; bool bEFBCopyEnable; bool bSkipEFBCopyToRam; diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64Base.h b/Source/Core/Core/PowerPC/Jit64Common/Jit64Base.h index 49f4e52a9f..06de777807 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64Base.h +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64Base.h @@ -19,7 +19,6 @@ namespace PPCAnalyst class CodeBuffer; } -// The following register assignments are common to Jit64 and Jit64IL: // RSCRATCH and RSCRATCH2 are always scratch registers and can be used without // limitation. constexpr Gen::X64Reg RSCRATCH = Gen::RAX; diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp deleted file mode 100644 index 6ef5e3c26e..0000000000 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ /dev/null @@ -1,2354 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -/* -For a more general explanation of the IR, see IR.cpp. - -X86 codegen is a backward pass followed by a forward pass. - -The first pass to actually doing codegen is a liveness analysis pass. -Liveness is important for two reasons: one, it lets us do dead code -elimination, which results both from earlier folding, PPC -instructions with unused parts like srawx, and just random strangeness. -The other bit is that is allows us to identify the last instruction to -use a value: this is absolutely essential for register allocation -because it the allocator needs to be able to free unused registers. -In addition, this allows eliminating redundant mov instructions in a lot -of cases. - -The register allocation is linear scan allocation. -*/ - -#ifdef _MSC_VER -#pragma warning( \ - disable : 4146) // unary minus operator applied to unsigned type, result still unsigned -#endif - -#include -#include -#include -#include - -#include "Common/BitSet.h" -#include "Common/CPUDetect.h" -#include "Common/CommonTypes.h" -#include "Common/MathUtil.h" -#include "Common/MsgHandler.h" -#include "Common/NonCopyable.h" -#include "Common/x64ABI.h" -#include "Common/x64Emitter.h" -#include "Core/CoreTiming.h" -#include "Core/HW/CPU.h" -#include "Core/HW/ProcessorInterface.h" -#include "Core/PowerPC/Gekko.h" -#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" -#include "Core/PowerPC/Jit64IL/JitIL.h" -#include "Core/PowerPC/PowerPC.h" - -using namespace IREmitter; -using namespace Gen; - -struct RegInfo final : private NonCopyable -{ - static constexpr size_t MAX_NUMBER_OF_REGS = 16; - - JitIL* Jit; - IRBuilder* Build = nullptr; - InstLoc FirstI; - - // IInfo contains (per instruction) - // Bits 0-1: Saturating count of number of instructions referencing this instruction. - // Bits 2-3: single bit per operand marking if this is the last instruction to reference that - // operand's result. - // Used to decide if we should free any registers associated with the operands after - // this instruction - // and if we can clobber the operands registers. - // Warning, Memory instruction use these bits slightly differently. - // Bits 15-31: Spill location - std::vector IInfo; - - // The last instruction which uses the result of this instruction. Used by the register allocator. - std::vector lastUsed; - - std::array regs{}; - std::array fregs{}; - u32 numSpills = 0; - u32 numFSpills = 0; - u32 exitNumber = 0; - - RegInfo(JitIL* j, InstLoc f, size_t insts) : Jit(j), FirstI(f), IInfo(insts), lastUsed(insts) {} -}; - -static BitSet32 regsInUse(RegInfo& R) -{ - BitSet32 result; - for (size_t i = 0; i < RegInfo::MAX_NUMBER_OF_REGS; i++) - { - if (R.regs[i] != nullptr) - result[i] = true; - if (R.fregs[i] != nullptr) - result[16 + i] = true; - } - return result; -} - -static void regMarkUse(RegInfo& R, InstLoc I, InstLoc Op, unsigned OpNum) -{ - unsigned& info = R.IInfo[Op - R.FirstI]; - - if (info == 0) - R.IInfo[I - R.FirstI] |= 1 << (OpNum + 1); - - if (info < 2) - info++; - - R.lastUsed[Op - R.FirstI] = std::max(R.lastUsed[Op - R.FirstI], I); -} - -static unsigned regReadUse(RegInfo& R, InstLoc I) -{ - return R.IInfo[I - R.FirstI] & 3; -} - -static u64 SlotSet[1000]; -alignas(16) static u8 FSlotSet[16 * 1000]; - -static OpArg regLocForSlot(RegInfo& RI, unsigned slot) -{ - return M(&SlotSet[slot - 1]); -} - -static unsigned regCreateSpill(RegInfo& RI, InstLoc I) -{ - unsigned newSpill = ++RI.numSpills; - RI.IInfo[I - RI.FirstI] |= newSpill << 16; - return newSpill; -} - -static unsigned regGetSpill(RegInfo& RI, InstLoc I) -{ - return RI.IInfo[I - RI.FirstI] >> 16; -} - -static void regSpill(RegInfo& RI, X64Reg reg) -{ - if (!RI.regs[reg]) - return; - - unsigned slot = regGetSpill(RI, RI.regs[reg]); - if (!slot) - { - slot = regCreateSpill(RI, RI.regs[reg]); - RI.Jit->MOV(64, regLocForSlot(RI, slot), R(reg)); - } - - RI.regs[reg] = nullptr; -} - -static OpArg fregLocForSlot(RegInfo& RI, unsigned slot) -{ - return M(&FSlotSet[slot * 16]); -} - -static unsigned fregCreateSpill(RegInfo& RI, InstLoc I) -{ - unsigned newSpill = ++RI.numFSpills; - RI.IInfo[I - RI.FirstI] |= newSpill << 16; - return newSpill; -} - -static unsigned fregGetSpill(RegInfo& RI, InstLoc I) -{ - return RI.IInfo[I - RI.FirstI] >> 16; -} - -static void fregSpill(RegInfo& RI, X64Reg reg) -{ - if (!RI.fregs[reg]) - return; - - unsigned slot = fregGetSpill(RI, RI.fregs[reg]); - if (!slot) - { - slot = fregCreateSpill(RI, RI.fregs[reg]); - RI.Jit->MOVAPD(fregLocForSlot(RI, slot), reg); - } - - RI.fregs[reg] = nullptr; -} - -// RAX and RDX are scratch, so we don't allocate them -// (TODO: if we could lock RCX here too then we could allocate it - needed for -// shifts) - -// 64-bit - calling conventions differ between Linux & Windows, so... -#ifdef _WIN32 -static const X64Reg RegAllocOrder[] = {RSI, RDI, R12, R13, R14, R8, R9, R10, R11}; -#else -static const X64Reg RegAllocOrder[] = {R12, R13, R14, R8, R9, R10, R11}; -#endif -static const int RegAllocSize = sizeof(RegAllocOrder) / sizeof(X64Reg); -static const X64Reg FRegAllocOrder[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, - XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5}; -static const int FRegAllocSize = sizeof(FRegAllocOrder) / sizeof(X64Reg); - -static X64Reg regFindFreeReg(RegInfo& RI) -{ - for (auto& reg : RegAllocOrder) - { - if (RI.regs[reg] == nullptr) - return reg; - } - - int bestIndex = -1; - InstLoc bestEnd = nullptr; - for (int i = 0; i < RegAllocSize; ++i) - { - const InstLoc start = RI.regs[RegAllocOrder[i]]; - const InstLoc end = RI.lastUsed[start - RI.FirstI]; - - if (bestEnd < end) - { - bestEnd = end; - bestIndex = i; - } - } - - X64Reg reg = RegAllocOrder[bestIndex]; - regSpill(RI, reg); - return reg; -} - -static X64Reg fregFindFreeReg(RegInfo& RI) -{ - for (auto& reg : FRegAllocOrder) - { - if (RI.fregs[reg] == nullptr) - return reg; - } - - int bestIndex = -1; - InstLoc bestEnd = nullptr; - for (int i = 0; i < FRegAllocSize; ++i) - { - const InstLoc start = RI.fregs[FRegAllocOrder[i]]; - const InstLoc end = RI.lastUsed[start - RI.FirstI]; - - if (bestEnd < end) - { - bestEnd = end; - bestIndex = i; - } - } - - X64Reg reg = FRegAllocOrder[bestIndex]; - fregSpill(RI, reg); - return reg; -} - -static OpArg regLocForInst(RegInfo& RI, InstLoc I) -{ - for (auto& reg : RegAllocOrder) - { - if (RI.regs[reg] == I) - return R(reg); - } - - unsigned slot = regGetSpill(RI, I); - if (!slot) - PanicAlert("Retrieving unknown spill slot?!"); - return regLocForSlot(RI, slot); -} - -static OpArg fregLocForInst(RegInfo& RI, InstLoc I) -{ - for (auto& reg : FRegAllocOrder) - { - if (RI.fregs[reg] == I) - return R(reg); - } - - unsigned slot = fregGetSpill(RI, I); - if (!slot) - PanicAlert("Retrieving unknown spill slot?!"); - return fregLocForSlot(RI, slot); -} - -static void regClearInst(RegInfo& RI, InstLoc I) -{ - for (auto& reg : RegAllocOrder) - { - if (RI.regs[reg] == I) - RI.regs[reg] = nullptr; - } -} - -static void fregClearInst(RegInfo& RI, InstLoc I) -{ - for (auto& reg : FRegAllocOrder) - { - if (RI.fregs[reg] == I) - RI.fregs[reg] = nullptr; - } -} - -static X64Reg regEnsureInReg(RegInfo& RI, InstLoc I) -{ - OpArg loc = regLocForInst(RI, I); - - if (!loc.IsSimpleReg()) - { - X64Reg newReg = regFindFreeReg(RI); - RI.Jit->MOV(32, R(newReg), loc); - loc = R(newReg); - } - - return loc.GetSimpleReg(); -} - -static X64Reg fregEnsureInReg(RegInfo& RI, InstLoc I) -{ - OpArg loc = fregLocForInst(RI, I); - - if (!loc.IsSimpleReg()) - { - X64Reg newReg = fregFindFreeReg(RI); - RI.Jit->MOVAPD(newReg, loc); - loc = R(newReg); - } - - return loc.GetSimpleReg(); -} - -static void regSpillCallerSaved(RegInfo& RI) -{ - regSpill(RI, RCX); - regSpill(RI, RDX); - regSpill(RI, RSI); - regSpill(RI, RDI); - regSpill(RI, R8); - regSpill(RI, R9); - regSpill(RI, R10); - regSpill(RI, R11); -} - -static X64Reg regUReg(RegInfo& RI, InstLoc I) -{ - const OpArg loc = regLocForInst(RI, getOp1(I)); - - if ((RI.IInfo[I - RI.FirstI] & 4) && loc.IsSimpleReg()) - { - return loc.GetSimpleReg(); - } - - return regFindFreeReg(RI); -} - -// Recycle the register if the lifetime of op1 register ends at I. -static X64Reg fregURegWithoutMov(RegInfo& RI, InstLoc I) -{ - const OpArg loc = fregLocForInst(RI, getOp1(I)); - - if ((RI.IInfo[I - RI.FirstI] & 4) && loc.IsSimpleReg()) - { - return loc.GetSimpleReg(); - } - - return fregFindFreeReg(RI); -} - -static X64Reg fregURegWithMov(RegInfo& RI, InstLoc I) -{ - const OpArg loc = fregLocForInst(RI, getOp1(I)); - - if ((RI.IInfo[I - RI.FirstI] & 4) && loc.IsSimpleReg()) - { - return loc.GetSimpleReg(); - } - - X64Reg reg = fregFindFreeReg(RI); - RI.Jit->MOVAPD(reg, loc); - return reg; -} - -// Recycle the register if the lifetime of op1 register ends at I. -static X64Reg fregBinLHSRegWithMov(RegInfo& RI, InstLoc I) -{ - const OpArg loc = fregLocForInst(RI, getOp1(I)); - - if ((RI.IInfo[I - RI.FirstI] & 4) && loc.IsSimpleReg()) - { - return loc.GetSimpleReg(); - } - - X64Reg reg = fregFindFreeReg(RI); - RI.Jit->MOVAPD(reg, loc); - return reg; -} - -// Recycle the register if the lifetime of op2 register ends at I. -static X64Reg fregBinRHSRegWithMov(RegInfo& RI, InstLoc I) -{ - const OpArg loc = fregLocForInst(RI, getOp2(I)); - - if ((RI.IInfo[I - RI.FirstI] & 8) && loc.IsSimpleReg()) - { - return loc.GetSimpleReg(); - } - - X64Reg reg = fregFindFreeReg(RI); - RI.Jit->MOVAPD(reg, loc); - return reg; -} - -// If the lifetime of the register used by an operand ends at I, -// return the register. Otherwise return a free register. -static X64Reg regBinReg(RegInfo& RI, InstLoc I) -{ - // FIXME: When regLocForInst() is extracted as a local variable, - // "Retrieving unknown spill slot?!" is shown. - if ((RI.IInfo[I - RI.FirstI] & 4) && regLocForInst(RI, getOp1(I)).IsSimpleReg()) - { - return regLocForInst(RI, getOp1(I)).GetSimpleReg(); - } - else if ((RI.IInfo[I - RI.FirstI] & 8) && regLocForInst(RI, getOp2(I)).IsSimpleReg()) - { - return regLocForInst(RI, getOp2(I)).GetSimpleReg(); - } - - return regFindFreeReg(RI); -} - -static X64Reg regBinLHSReg(RegInfo& RI, InstLoc I) -{ - if (RI.IInfo[I - RI.FirstI] & 4) - { - return regEnsureInReg(RI, getOp1(I)); - } - - X64Reg reg = regFindFreeReg(RI); - RI.Jit->MOV(32, R(reg), regLocForInst(RI, getOp1(I))); - return reg; -} - -// Clear any registers which end their lifetime at I -// Don't use this for special instructions like memory load/stores -static void regNormalRegClear(RegInfo& RI, InstLoc I) -{ - if (RI.IInfo[I - RI.FirstI] & 4) - regClearInst(RI, getOp1(I)); - if (RI.IInfo[I - RI.FirstI] & 8) - regClearInst(RI, getOp2(I)); -} - -// Clear any floating point registers which end their lifetime at I -static void fregNormalRegClear(RegInfo& RI, InstLoc I) -{ - if (RI.IInfo[I - RI.FirstI] & 4) - fregClearInst(RI, getOp1(I)); - if (RI.IInfo[I - RI.FirstI] & 8) - fregClearInst(RI, getOp2(I)); -} - -static void regEmitBinInst(RegInfo& RI, InstLoc I, - void (JitIL::*op)(int, const OpArg&, const OpArg&), - bool commutable = false) -{ - X64Reg reg; - bool commuted = false; - if (RI.IInfo[I - RI.FirstI] & 4) - { - reg = regEnsureInReg(RI, getOp1(I)); - } - else if (commutable && (RI.IInfo[I - RI.FirstI] & 8)) - { - reg = regEnsureInReg(RI, getOp2(I)); - commuted = true; - } - else - { - reg = regFindFreeReg(RI); - RI.Jit->MOV(32, R(reg), regLocForInst(RI, getOp1(I))); - } - - if (isImm(*getOp2(I))) - { - unsigned RHS = RI.Build->GetImmValue(getOp2(I)); - if (RHS + 128 < 256) - { - (RI.Jit->*op)(32, R(reg), Imm8(RHS)); - } - else - { - (RI.Jit->*op)(32, R(reg), Imm32(RHS)); - } - } - else if (commuted) - { - (RI.Jit->*op)(32, R(reg), regLocForInst(RI, getOp1(I))); - } - else - { - (RI.Jit->*op)(32, R(reg), regLocForInst(RI, getOp2(I))); - } - - RI.regs[reg] = I; - regNormalRegClear(RI, I); -} - -static void fregEmitBinInst(RegInfo& RI, InstLoc I, void (JitIL::*op)(X64Reg, const OpArg&)) -{ - X64Reg reg; - - if (RI.IInfo[I - RI.FirstI] & 4) - { - reg = fregEnsureInReg(RI, getOp1(I)); - } - else - { - reg = fregFindFreeReg(RI); - RI.Jit->MOVAPD(reg, fregLocForInst(RI, getOp1(I))); - } - - (RI.Jit->*op)(reg, fregLocForInst(RI, getOp2(I))); - RI.fregs[reg] = I; - fregNormalRegClear(RI, I); -} - -// Mark and calculation routines for profiled load/store addresses -// Could be extended to unprofiled addresses. -static void regMarkMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, unsigned OpNum) -{ - if (isImm(*AI)) - { - unsigned addr = RI.Build->GetImmValue(AI); - if (PowerPC::IsOptimizableRAMAddress(addr)) - return; - } - - if (getOpcode(*AI) == Add && isImm(*getOp2(AI))) - { - regMarkUse(RI, I, getOp1(AI), OpNum); - return; - } - - regMarkUse(RI, I, AI, OpNum); -} - -// in 64-bit build, this returns a completely bizarre address sometimes! -static std::pair regBuildMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, unsigned OpNum, - X64Reg* dest) -{ - if (isImm(*AI)) - { - unsigned addr = RI.Build->GetImmValue(AI); - if (PowerPC::IsOptimizableRAMAddress(addr)) - { - if (dest) - *dest = regFindFreeReg(RI); - - return std::make_pair(Imm32(addr), 0); - } - } - - unsigned offset; - InstLoc AddrBase; - if (getOpcode(*AI) == Add && isImm(*getOp2(AI))) - { - offset = RI.Build->GetImmValue(getOp2(AI)); - AddrBase = getOp1(AI); - } - else - { - offset = 0; - AddrBase = AI; - } - - X64Reg baseReg; - // Ok, this stuff needs a comment or three :P -ector - if (RI.IInfo[I - RI.FirstI] & (2 << OpNum)) - { - baseReg = regEnsureInReg(RI, AddrBase); - regClearInst(RI, AddrBase); - if (dest) - *dest = baseReg; - } - else if (dest) - { - X64Reg reg = regFindFreeReg(RI); - const OpArg loc = regLocForInst(RI, AddrBase); - if (!loc.IsSimpleReg()) - { - RI.Jit->MOV(32, R(reg), loc); - baseReg = reg; - } - else - { - baseReg = loc.GetSimpleReg(); - } - *dest = reg; - } - else - { - baseReg = regEnsureInReg(RI, AddrBase); - } - - return std::make_pair(R(baseReg), offset); -} - -static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) -{ - X64Reg reg; - auto info = regBuildMemAddress(RI, I, getOp1(I), 1, ®); - - RI.Jit->SafeLoadToReg(reg, info.first, Size, info.second, regsInUse(RI), false); - if (regReadUse(RI, I)) - RI.regs[reg] = I; -} - -static OpArg regImmForConst(RegInfo& RI, InstLoc I, unsigned Size) -{ - unsigned imm = RI.Build->GetImmValue(I); - - if (Size == 32) - { - return Imm32(imm); - } - else if (Size == 16) - { - return Imm16(imm); - } - else - { - return Imm8(imm); - } -} - -static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) -{ - auto info = regBuildMemAddress(RI, I, getOp2(I), 2, nullptr); - if (info.first.IsImm()) - RI.Jit->MOV(32, R(RSCRATCH2), info.first); - else - RI.Jit->LEA(32, RSCRATCH2, MDisp(info.first.GetSimpleReg(), info.second)); - - regSpill(RI, RSCRATCH); - - if (isImm(*getOp1(I))) - { - RI.Jit->MOV(Size, R(RSCRATCH), regImmForConst(RI, getOp1(I), Size)); - } - else - { - RI.Jit->MOV(32, R(RSCRATCH), regLocForInst(RI, getOp1(I))); - } - - RI.Jit->SafeWriteRegToReg(RSCRATCH, RSCRATCH2, Size, 0, regsInUse(RI)); - if (RI.IInfo[I - RI.FirstI] & 4) - regClearInst(RI, getOp1(I)); -} - -static void regEmitShiftInst(RegInfo& RI, InstLoc I, - void (JitIL::*op)(int, const OpArg&, const OpArg&)) -{ - X64Reg reg = regBinLHSReg(RI, I); - - if (isImm(*getOp2(I))) - { - unsigned RHS = RI.Build->GetImmValue(getOp2(I)); - (RI.Jit->*op)(32, R(reg), Imm8(RHS)); - RI.regs[reg] = I; - return; - } - - RI.Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I))); - (RI.Jit->*op)(32, R(reg), R(ECX)); - RI.regs[reg] = I; - regNormalRegClear(RI, I); -} - -static void regStoreInstToConstLoc(RegInfo& RI, unsigned width, InstLoc I, void* loc) -{ - if (width != 32) - { - PanicAlert("Not implemented!"); - return; - } - - if (isImm(*I)) - { - RI.Jit->MOV(32, M(loc), Imm32(RI.Build->GetImmValue(I))); - return; - } - - X64Reg reg = regEnsureInReg(RI, I); - RI.Jit->MOV(32, M(loc), R(reg)); -} - -static void regEmitCmp(RegInfo& RI, InstLoc I) -{ - if (isImm(*getOp2(I))) - { - unsigned RHS = RI.Build->GetImmValue(getOp2(I)); - RI.Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm32(RHS)); - } - else - { - X64Reg reg = regEnsureInReg(RI, getOp1(I)); - RI.Jit->CMP(32, R(reg), regLocForInst(RI, getOp2(I))); - } -} - -static void regEmitICmpInst(RegInfo& RI, InstLoc I, CCFlags flag) -{ - regEmitCmp(RI, I); - RI.Jit->SETcc(flag, R(RSCRATCH2)); // Caution: SETCC uses 8-bit regs! - X64Reg reg = regBinReg(RI, I); - RI.Jit->MOVZX(32, 8, reg, R(RSCRATCH2)); - RI.regs[reg] = I; - regNormalRegClear(RI, I); -} - -static void regEmitICmpCRInst(RegInfo& RI, InstLoc I) -{ - bool signed_compare = getOpcode(*I) == ICmpCRSigned; - X64Reg reg; - - if (RI.IInfo[I - RI.FirstI] & 4) - { - reg = regEnsureInReg(RI, getOp1(I)); - if (signed_compare) - RI.Jit->MOVSX(64, 32, reg, R(reg)); - } - else - { - reg = regFindFreeReg(RI); - if (signed_compare) - RI.Jit->MOVSX(64, 32, reg, regLocForInst(RI, getOp1(I))); - else - RI.Jit->MOV(32, R(reg), regLocForInst(RI, getOp1(I))); - } - - if (isImm(*getOp2(I))) - { - unsigned RHS = RI.Build->GetImmValue(getOp2(I)); - if (!signed_compare && (RHS & 0x80000000U)) - { - RI.Jit->MOV(32, R(RSCRATCH), Imm32(RHS)); - RI.Jit->SUB(64, R(reg), R(RSCRATCH)); - } - else if (RHS) - { - RI.Jit->SUB(64, R(reg), Imm32(RHS)); - } - } - else - { - if (signed_compare) - RI.Jit->MOVSX(64, 32, RSCRATCH, regLocForInst(RI, getOp2(I))); - else - RI.Jit->MOV(32, R(RSCRATCH), regLocForInst(RI, getOp2(I))); - RI.Jit->SUB(64, R(reg), R(RSCRATCH)); - } - - RI.regs[reg] = I; - regNormalRegClear(RI, I); -} - -static void regWriteExit(RegInfo& RI, InstLoc dest) -{ - if (isImm(*dest)) - { - RI.exitNumber++; - RI.Jit->WriteExit(RI.Build->GetImmValue(dest)); - } - else - { - RI.Jit->WriteExitDestInOpArg(regLocForInst(RI, dest)); - } -} - -// Helper function to check floating point exceptions -alignas(16) static double isSNANTemp[2][2]; -static bool checkIsSNAN() -{ - return MathUtil::IsSNAN(isSNANTemp[0][0]) || MathUtil::IsSNAN(isSNANTemp[1][0]); -} - -static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) -{ - // printf("Writing block: %x\n", js.blockStart); - RegInfo RI(Jit, ibuild->getFirstInst(), ibuild->getNumInsts()); - RI.Build = ibuild; - - // Pass to compute liveness - ibuild->StartBackPass(); - for (unsigned int index = (unsigned int)RI.IInfo.size() - 1; index != -1U; --index) - { - InstLoc I = ibuild->ReadBackward(); - unsigned int op = getOpcode(*I); - bool thisUsed = regReadUse(RI, I) != 0; - - switch (op) - { - default: - PanicAlert("Unexpected inst!"); - case Nop: - case CInt16: - case CInt32: - case LoadGReg: - case LoadLink: - case LoadCR: - case LoadCarry: - case LoadCTR: - case LoadMSR: - case LoadFReg: - case LoadFRegDENToZero: - case LoadGQR: - case BlockEnd: - case BlockStart: - case FallBackToInterpreter: - case SystemCall: - case RFIExit: - case InterpreterBranch: - case ShortIdleLoop: - case FPExceptionCheck: - case DSIExceptionCheck: - case ExtExceptionCheck: - case BreakPointCheck: - case Int3: - case Tramp: - // No liveness effects - break; - case SExt8: - case SExt16: - case BSwap32: - case BSwap16: - case Cntlzw: - case Not: - case DupSingleToMReg: - case DoubleToSingle: - case ExpandPackedToMReg: - case CompactMRegToPacked: - case FPNeg: - case FPDup0: - case FPDup1: - case FSNeg: - case FDNeg: - case ConvertFromFastCR: - case ConvertToFastCR: - case FastCRSOSet: - case FastCREQSet: - case FastCRGTSet: - case FastCRLTSet: - if (thisUsed) - regMarkUse(RI, I, getOp1(I), 1); - break; - case Load8: - case Load16: - case Load32: - case LoadDouble: - case LoadSingle: - regMarkMemAddress(RI, I, getOp1(I), 1); - break; - case LoadPaired: - if (thisUsed) - regMarkUse(RI, I, getOp1(I), 1); - break; - case StoreCR: - case StoreCarry: - case StoreFPRF: - regMarkUse(RI, I, getOp1(I), 1); - break; - case StoreGReg: - case StoreLink: - case StoreCTR: - case StoreMSR: - case StoreGQR: - case StoreSRR: - case StoreFReg: - if (!isImm(*getOp1(I))) - regMarkUse(RI, I, getOp1(I), 1); - break; - case Add: - case Sub: - case And: - case Or: - case Xor: - case Mul: - case MulHighUnsigned: - case Rol: - case Shl: - case Shrl: - case Sarl: - case ICmpCRUnsigned: - case ICmpCRSigned: - case ICmpEq: - case ICmpNe: - case ICmpUgt: - case ICmpUlt: - case ICmpUge: - case ICmpUle: - case ICmpSgt: - case ICmpSlt: - case ICmpSge: - case ICmpSle: - case FSMul: - case FSAdd: - case FSSub: - case FDMul: - case FDAdd: - case FDSub: - case FPAdd: - case FPMul: - case FPSub: - case FPMerge00: - case FPMerge01: - case FPMerge10: - case FPMerge11: - case FDCmpCR: - case InsertDoubleInMReg: - if (thisUsed) - { - regMarkUse(RI, I, getOp1(I), 1); - if (!isImm(*getOp2(I))) - regMarkUse(RI, I, getOp2(I), 2); - } - break; - case Store8: - case Store16: - case Store32: - if (!isImm(*getOp1(I))) - regMarkUse(RI, I, getOp1(I), 1); - regMarkMemAddress(RI, I, getOp2(I), 2); - break; - case StoreSingle: - case StoreDouble: - regMarkUse(RI, I, getOp1(I), 1); - regMarkMemAddress(RI, I, getOp2(I), 2); - break; - case StorePaired: - regMarkUse(RI, I, getOp1(I), 1); - regMarkUse(RI, I, getOp2(I), 2); - break; - case BranchUncond: - if (!isImm(*getOp1(I))) - regMarkUse(RI, I, getOp1(I), 1); - break; - case IdleBranch: - regMarkUse(RI, I, getOp1(I), 1); - break; - case BranchCond: - { - if (isICmp(*getOp1(I))) - { - regMarkUse(RI, I, getOp1(getOp1(I)), 1); - if (!isImm(*getOp2(getOp1(I)))) - regMarkUse(RI, I, getOp2(getOp1(I)), 2); - } - else - { - regMarkUse(RI, I, getOp1(I), 1); - } - if (!isImm(*getOp2(I))) - regMarkUse(RI, I, getOp2(I), 2); - break; - } - } - } - - ibuild->StartForwardPass(); - for (unsigned i = 0; i != RI.IInfo.size(); i++) - { - InstLoc I = ibuild->ReadForward(); - bool thisUsed = regReadUse(RI, I) != 0; - if (thisUsed) - { - // Needed for IR Writer - ibuild->SetMarkUsed(I); - } - - switch (getOpcode(*I)) - { - case FallBackToInterpreter: - { - unsigned InstCode = ibuild->GetImmValue(getOp1(I)); - unsigned InstLoc = ibuild->GetImmValue(getOp2(I)); - // There really shouldn't be anything live across an - // interpreter call at the moment, but optimizing interpreter - // calls isn't completely out of the question... - regSpillCallerSaved(RI); - Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); - Jit->MOV(32, PPCSTATE(npc), Imm32(InstLoc + 4)); - Jit->ABI_CallFunctionC(GetInterpreterOp(InstCode), InstCode); - break; - } - case LoadGReg: - { - if (!thisUsed) - break; - - X64Reg reg = regFindFreeReg(RI); - unsigned ppcreg = *I >> 8; - Jit->MOV(32, R(reg), PPCSTATE(gpr[ppcreg])); - RI.regs[reg] = I; - break; - } - case LoadCR: - { - if (!thisUsed) - break; - - X64Reg reg = regFindFreeReg(RI); - unsigned ppcreg = *I >> 8; - Jit->MOV(64, R(reg), PPCSTATE(cr_val[ppcreg])); - RI.regs[reg] = I; - break; - } - case LoadCTR: - { - if (!thisUsed) - break; - - X64Reg reg = regFindFreeReg(RI); - Jit->MOV(32, R(reg), PPCSTATE_CTR); - RI.regs[reg] = I; - break; - } - case LoadLink: - { - if (!thisUsed) - break; - - X64Reg reg = regFindFreeReg(RI); - Jit->MOV(32, R(reg), PPCSTATE_LR); - RI.regs[reg] = I; - break; - } - case LoadMSR: - { - if (!thisUsed) - break; - - X64Reg reg = regFindFreeReg(RI); - Jit->MOV(32, R(reg), PPCSTATE(msr)); - RI.regs[reg] = I; - break; - } - case LoadGQR: - { - if (!thisUsed) - break; - - X64Reg reg = regFindFreeReg(RI); - unsigned gqr = *I >> 8; - Jit->MOV(32, R(reg), PPCSTATE(spr[SPR_GQR0 + gqr])); - RI.regs[reg] = I; - break; - } - case LoadCarry: - { - if (!thisUsed) - break; - - X64Reg reg = regFindFreeReg(RI); - Jit->MOVZX(32, 8, reg, PPCSTATE(xer_ca)); - RI.regs[reg] = I; - break; - } - case StoreGReg: - { - unsigned ppcreg = *I >> 16; - regStoreInstToConstLoc(RI, 32, getOp1(I), &PowerPC::ppcState.gpr[ppcreg]); - regNormalRegClear(RI, I); - break; - } - case StoreCR: - { - X64Reg reg = regEnsureInReg(RI, getOp1(I)); - unsigned ppcreg = *I >> 16; - Jit->MOV(64, PPCSTATE(cr_val[ppcreg]), R(reg)); - regNormalRegClear(RI, I); - break; - } - case StoreLink: - { - regStoreInstToConstLoc(RI, 32, getOp1(I), &LR); - regNormalRegClear(RI, I); - break; - } - case StoreCTR: - { - regStoreInstToConstLoc(RI, 32, getOp1(I), &CTR); - regNormalRegClear(RI, I); - break; - } - case StoreMSR: - { - unsigned InstLoc = ibuild->GetImmValue(getOp2(I)); - regStoreInstToConstLoc(RI, 32, getOp1(I), &MSR); - regNormalRegClear(RI, I); - - // If some exceptions are pending and EE are now enabled, force checking - // external exceptions when going out of mtmsr in order to execute delayed - // interrupts as soon as possible. - Jit->MOV(32, R(RSCRATCH), PPCSTATE(msr)); - Jit->TEST(32, R(RSCRATCH), Imm32(0x8000)); - FixupBranch eeDisabled = Jit->J_CC(CC_Z); - - Jit->MOV(32, R(RSCRATCH), PPCSTATE(Exceptions)); - Jit->TEST(32, R(RSCRATCH), R(RSCRATCH)); - FixupBranch noExceptionsPending = Jit->J_CC(CC_Z); - - Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc + 4)); - Jit->WriteExceptionExit(); // TODO: Implement WriteExternalExceptionExit for JitIL - - Jit->SetJumpTarget(eeDisabled); - Jit->SetJumpTarget(noExceptionsPending); - break; - } - case StoreGQR: - { - unsigned gqr = *I >> 16; - regStoreInstToConstLoc(RI, 32, getOp1(I), &GQR(gqr)); - regNormalRegClear(RI, I); - break; - } - case StoreSRR: - { - unsigned srr = *I >> 16; - regStoreInstToConstLoc(RI, 32, getOp1(I), &PowerPC::ppcState.spr[SPR_SRR0 + srr]); - regNormalRegClear(RI, I); - break; - } - case StoreCarry: - { - Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm8(0)); - FixupBranch nocarry = Jit->J_CC(CC_Z); - Jit->JitSetCA(); - FixupBranch cont = Jit->J(); - Jit->SetJumpTarget(nocarry); - Jit->JitClearCA(); - Jit->SetJumpTarget(cont); - regNormalRegClear(RI, I); - break; - } - case StoreFPRF: - { - Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp1(I))); - Jit->AND(32, R(RSCRATCH2), Imm8(0x1F)); - Jit->SHL(32, R(RSCRATCH2), Imm8(12)); - Jit->AND(32, PPCSTATE(fpscr), Imm32(~(0x1F << 12))); - Jit->OR(32, PPCSTATE(fpscr), R(RSCRATCH2)); - regNormalRegClear(RI, I); - break; - } - case Load8: - { - regEmitMemLoad(RI, I, 8); - break; - } - case Load16: - { - regEmitMemLoad(RI, I, 16); - break; - } - case Load32: - { - regEmitMemLoad(RI, I, 32); - break; - } - case Store8: - { - regEmitMemStore(RI, I, 8); - break; - } - case Store16: - { - regEmitMemStore(RI, I, 16); - break; - } - case Store32: - { - regEmitMemStore(RI, I, 32); - break; - } - case SExt8: - { - if (!thisUsed) - break; - - X64Reg reg = regUReg(RI, I); - Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp1(I))); - Jit->MOVSX(32, 8, reg, R(RSCRATCH2)); - RI.regs[reg] = I; - regNormalRegClear(RI, I); - break; - } - case SExt16: - { - if (!thisUsed) - break; - - X64Reg reg = regUReg(RI, I); - Jit->MOVSX(32, 16, reg, regLocForInst(RI, getOp1(I))); - RI.regs[reg] = I; - regNormalRegClear(RI, I); - break; - } - case Cntlzw: - { - if (!thisUsed) - break; - - X64Reg reg = regUReg(RI, I); - Jit->MOV(32, R(RSCRATCH2), Imm32(63)); - Jit->BSR(32, reg, regLocForInst(RI, getOp1(I))); - Jit->CMOVcc(32, reg, R(RSCRATCH2), CC_Z); - Jit->XOR(32, R(reg), Imm8(31)); - RI.regs[reg] = I; - regNormalRegClear(RI, I); - break; - } - case Not: - { - if (!thisUsed) - break; - - X64Reg reg = regBinLHSReg(RI, I); - Jit->NOT(32, R(reg)); - RI.regs[reg] = I; - regNormalRegClear(RI, I); - break; - } - case And: - { - if (!thisUsed) - break; - - regEmitBinInst(RI, I, &JitIL::AND, true); - break; - } - case Xor: - { - if (!thisUsed) - break; - - regEmitBinInst(RI, I, &JitIL::XOR, true); - break; - } - case Sub: - { - if (!thisUsed) - break; - - regEmitBinInst(RI, I, &JitIL::SUB); - break; - } - case Or: - { - if (!thisUsed) - break; - - regEmitBinInst(RI, I, &JitIL::OR, true); - break; - } - case Add: - { - if (!thisUsed) - break; - - regEmitBinInst(RI, I, &JitIL::ADD, true); - break; - } - case Mul: - { - if (!thisUsed) - break; - - // FIXME: Use three-address capability of IMUL! - X64Reg reg = regBinLHSReg(RI, I); - if (isImm(*getOp2(I))) - { - unsigned RHS = RI.Build->GetImmValue(getOp2(I)); - if (RHS + 128 < 256) - Jit->IMUL(32, reg, Imm8(RHS)); - else - Jit->IMUL(32, reg, Imm32(RHS)); - } - else - { - Jit->IMUL(32, reg, regLocForInst(RI, getOp2(I))); - } - RI.regs[reg] = I; - regNormalRegClear(RI, I); - break; - } - case MulHighUnsigned: - { - if (!thisUsed) - break; - - // no register choice - regSpill(RI, EAX); - regSpill(RI, EDX); - X64Reg reg = regBinReg(RI, I); - if (isImm(*getOp2(I))) - { - unsigned RHS = RI.Build->GetImmValue(getOp2(I)); - Jit->MOV(32, R(EAX), Imm32(RHS)); - } - else - { - Jit->MOV(32, R(EAX), regLocForInst(RI, getOp2(I))); - } - Jit->MUL(32, regLocForInst(RI, getOp1(I))); - Jit->MOV(32, R(reg), R(EDX)); - RI.regs[reg] = I; - regNormalRegClear(RI, I); - break; - } - case Rol: - { - if (!thisUsed) - break; - - regEmitShiftInst(RI, I, &JitIL::ROL); - break; - } - case Shl: - { - if (!thisUsed) - break; - - regEmitShiftInst(RI, I, &JitIL::SHL); - break; - } - case Shrl: - { - if (!thisUsed) - break; - - regEmitShiftInst(RI, I, &JitIL::SHR); - break; - } - case Sarl: - { - if (!thisUsed) - break; - - regEmitShiftInst(RI, I, &JitIL::SAR); - break; - } - case ICmpEq: - { - if (!thisUsed) - break; - - regEmitICmpInst(RI, I, CC_E); - break; - } - case ICmpNe: - { - if (!thisUsed) - break; - - regEmitICmpInst(RI, I, CC_NE); - break; - } - case ICmpUgt: - { - if (!thisUsed) - break; - - regEmitICmpInst(RI, I, CC_A); - break; - } - case ICmpUlt: - { - if (!thisUsed) - break; - - regEmitICmpInst(RI, I, CC_B); - break; - } - case ICmpUge: - { - if (!thisUsed) - break; - - regEmitICmpInst(RI, I, CC_AE); - break; - } - case ICmpUle: - { - if (!thisUsed) - break; - - regEmitICmpInst(RI, I, CC_BE); - break; - } - case ICmpSgt: - { - if (!thisUsed) - break; - - regEmitICmpInst(RI, I, CC_G); - break; - } - case ICmpSlt: - { - if (!thisUsed) - break; - - regEmitICmpInst(RI, I, CC_L); - break; - } - case ICmpSge: - { - if (!thisUsed) - break; - - regEmitICmpInst(RI, I, CC_GE); - break; - } - case ICmpSle: - { - if (!thisUsed) - break; - - regEmitICmpInst(RI, I, CC_LE); - break; - } - case ICmpCRUnsigned: - { - if (!thisUsed) - break; - - regEmitICmpCRInst(RI, I); - break; - } - case ICmpCRSigned: - { - if (!thisUsed) - break; - - regEmitICmpCRInst(RI, I); - break; - } - case ConvertFromFastCR: - { - if (!thisUsed) - break; - - X64Reg cr_val = regUReg(RI, I); - Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I))); - - Jit->XOR(32, R(RSCRATCH), R(RSCRATCH)); - - // SO: Bit 61 set. - Jit->MOV(64, R(RSCRATCH2), R(cr_val)); - Jit->SHR(64, R(RSCRATCH2), Imm8(61)); - Jit->AND(32, R(RSCRATCH2), Imm8(1)); - Jit->OR(32, R(RSCRATCH), R(RSCRATCH2)); - - // EQ: Bits 31-0 == 0. - Jit->XOR(32, R(RSCRATCH2), R(RSCRATCH2)); - Jit->TEST(32, R(cr_val), R(cr_val)); - Jit->SETcc(CC_Z, R(RSCRATCH2)); - Jit->SHL(32, R(RSCRATCH2), Imm8(1)); - Jit->OR(32, R(RSCRATCH), R(RSCRATCH2)); - - // GT: Value > 0. - Jit->XOR(32, R(RSCRATCH2), R(RSCRATCH2)); - Jit->TEST(64, R(cr_val), R(cr_val)); - Jit->SETcc(CC_G, R(RSCRATCH2)); - Jit->SHL(32, R(RSCRATCH2), Imm8(2)); - Jit->OR(32, R(RSCRATCH), R(RSCRATCH2)); - - // LT: Bit 62 set. - Jit->MOV(64, R(RSCRATCH2), R(cr_val)); - Jit->SHR(64, R(RSCRATCH2), Imm8(62 - 3)); - Jit->AND(32, R(RSCRATCH2), Imm8(0x8)); - Jit->OR(32, R(RSCRATCH), R(RSCRATCH2)); - - Jit->MOV(32, R(cr_val), R(RSCRATCH)); - RI.regs[cr_val] = I; - regNormalRegClear(RI, I); - break; - } - case ConvertToFastCR: - { - if (!thisUsed) - break; - - X64Reg cr_val = regUReg(RI, I); - Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I))); - - Jit->MOV(64, R(RSCRATCH2), Imm64(1ull << 32)); - - // SO - Jit->MOV(64, R(RSCRATCH), R(cr_val)); - Jit->SHL(64, R(RSCRATCH), Imm8(63)); - Jit->SHR(64, R(RSCRATCH), Imm8(63 - 61)); - Jit->OR(64, R(RSCRATCH2), R(RSCRATCH)); - - // EQ - Jit->MOV(64, R(RSCRATCH), R(cr_val)); - Jit->NOT(64, R(RSCRATCH)); - Jit->AND(64, R(RSCRATCH), Imm8(CR_EQ)); - Jit->OR(64, R(RSCRATCH2), R(RSCRATCH)); - - // GT - Jit->MOV(64, R(RSCRATCH), R(cr_val)); - Jit->NOT(64, R(RSCRATCH)); - Jit->AND(64, R(RSCRATCH), Imm8(CR_GT)); - Jit->SHL(64, R(RSCRATCH), Imm8(63 - 2)); - Jit->OR(64, R(RSCRATCH2), R(RSCRATCH)); - - // LT - Jit->MOV(64, R(RSCRATCH), R(cr_val)); - Jit->AND(64, R(RSCRATCH), Imm8(CR_LT)); - Jit->SHL(64, R(RSCRATCH), Imm8(62 - 3)); - Jit->OR(64, R(RSCRATCH2), R(RSCRATCH)); - - Jit->MOV(64, R(cr_val), R(RSCRATCH2)); - - RI.regs[cr_val] = I; - regNormalRegClear(RI, I); - break; - } - case FastCRSOSet: - { - if (!thisUsed) - break; - - X64Reg reg = regUReg(RI, I); - Jit->MOV(64, R(RSCRATCH), Imm64(1ull << 61)); - Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RSCRATCH)); - Jit->SETcc(CC_NZ, R(RSCRATCH)); - Jit->MOVZX(32, 8, reg, R(RSCRATCH)); - RI.regs[reg] = I; - regNormalRegClear(RI, I); - break; - } - case FastCREQSet: - { - if (!thisUsed) - break; - - X64Reg reg = regUReg(RI, I); - Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm32(0)); - Jit->SETcc(CC_Z, R(RSCRATCH)); - Jit->MOVZX(32, 8, reg, R(RSCRATCH)); - RI.regs[reg] = I; - regNormalRegClear(RI, I); - break; - } - case FastCRGTSet: - { - if (!thisUsed) - break; - - X64Reg reg = regUReg(RI, I); - Jit->CMP(64, regLocForInst(RI, getOp1(I)), Imm8(0)); - Jit->SETcc(CC_G, R(RSCRATCH)); - Jit->MOVZX(32, 8, reg, R(RSCRATCH)); - RI.regs[reg] = I; - regNormalRegClear(RI, I); - break; - } - case FastCRLTSet: - { - if (!thisUsed) - break; - - X64Reg reg = regUReg(RI, I); - Jit->MOV(64, R(RSCRATCH), Imm64(1ull << 62)); - Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RSCRATCH)); - Jit->SETcc(CC_NZ, R(RSCRATCH)); - Jit->MOVZX(32, 8, reg, R(RSCRATCH)); - RI.regs[reg] = I; - regNormalRegClear(RI, I); - break; - } - case LoadSingle: - { - if (!thisUsed) - break; - - X64Reg reg = fregFindFreeReg(RI); - auto info = regBuildMemAddress(RI, I, getOp1(I), 1, nullptr); - - RI.Jit->SafeLoadToReg(RSCRATCH2, info.first, 32, info.second, regsInUse(RI), false); - Jit->MOVD_xmm(reg, R(RSCRATCH2)); - RI.fregs[reg] = I; - break; - } - case LoadDouble: - { - if (!thisUsed) - break; - - X64Reg reg = fregFindFreeReg(RI); - auto info = regBuildMemAddress(RI, I, getOp1(I), 1, nullptr); - - RI.Jit->SafeLoadToReg(RSCRATCH2, info.first, 64, info.second, regsInUse(RI), false); - Jit->MOVQ_xmm(reg, R(RSCRATCH2)); - RI.fregs[reg] = I; - break; - } - case LoadPaired: - { - if (!thisUsed) - break; - - X64Reg reg = fregFindFreeReg(RI); - // The lower 3 bits is for GQR index. The next 1 bit is for inst.W - unsigned int quantreg = (*I >> 16) & 0x7; - unsigned int w = *I >> 19; - // Some games (e.g. Dirt 2) incorrectly set the unused bits which breaks the lookup table - // code. - // Hence, we need to mask out the unused bits. The layout of the GQR register is - // UU[SCALE]UUUUU[TYPE] where SCALE is 6 bits and TYPE is 3 bits, so we have to AND with - // 0b0011111100000111, or 0x3F07. - Jit->MOV(32, R(RSCRATCH2), Imm32(0x3F07)); - Jit->AND(32, R(RSCRATCH2), M(((char*)&GQR(quantreg)) + 2)); - Jit->MOVZX(32, 8, RSCRATCH, R(RSCRATCH2)); - - const u8** table = - w ? Jit->asm_routines.singleLoadQuantized : Jit->asm_routines.pairedLoadQuantized; - - Jit->MOV(32, R(RSCRATCH_EXTRA), regLocForInst(RI, getOp1(I))); - Jit->CALLptr(MScaled(RSCRATCH, SCALE_8, PtrOffset(table))); - Jit->MOVAPD(reg, R(XMM0)); - RI.fregs[reg] = I; - regNormalRegClear(RI, I); - break; - } - case StoreSingle: - { - regSpill(RI, RSCRATCH); - const OpArg loc1 = fregLocForInst(RI, getOp1(I)); - if (loc1.IsSimpleReg()) - Jit->MOVD_xmm(R(RSCRATCH), loc1.GetSimpleReg()); - else - Jit->MOV(32, R(RSCRATCH), loc1); - - auto info = regBuildMemAddress(RI, I, getOp2(I), 2, nullptr); - if (info.first.IsImm()) - RI.Jit->MOV(32, R(RSCRATCH2), info.first); - else - RI.Jit->LEA(32, RSCRATCH2, MDisp(info.first.GetSimpleReg(), info.second)); - - RI.Jit->SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 32, 0, regsInUse(RI)); - - if (RI.IInfo[I - RI.FirstI] & 4) - fregClearInst(RI, getOp1(I)); - break; - } - case StoreDouble: - { - regSpill(RI, RSCRATCH); - - OpArg value = fregLocForInst(RI, getOp1(I)); - Jit->MOVAPD(XMM0, value); - Jit->MOVQ_xmm(R(RSCRATCH), XMM0); - - auto info = regBuildMemAddress(RI, I, getOp2(I), 2, nullptr); - if (info.first.IsImm()) - RI.Jit->MOV(32, R(RSCRATCH2), info.first); - else - RI.Jit->LEA(32, RSCRATCH2, MDisp(info.first.GetSimpleReg(), info.second)); - - RI.Jit->SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 64, 0, regsInUse(RI)); - - if (RI.IInfo[I - RI.FirstI] & 4) - fregClearInst(RI, getOp1(I)); - break; - } - case StorePaired: - { - regSpill(RI, RSCRATCH); - regSpill(RI, RSCRATCH2); - u32 quantreg = *I >> 24; - Jit->MOV(32, R(RSCRATCH2), Imm32(0x3F07)); - Jit->AND(32, R(RSCRATCH2), PPCSTATE(spr[SPR_GQR0 + quantreg])); - Jit->MOVZX(32, 8, RSCRATCH, R(RSCRATCH2)); - - Jit->MOV(32, R(RSCRATCH_EXTRA), regLocForInst(RI, getOp2(I))); - Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I))); - Jit->CALLptr(MScaled(RSCRATCH, SCALE_8, PtrOffset(Jit->asm_routines.pairedStoreQuantized))); - if (RI.IInfo[I - RI.FirstI] & 4) - fregClearInst(RI, getOp1(I)); - if (RI.IInfo[I - RI.FirstI] & 8) - regClearInst(RI, getOp2(I)); - break; - } - case DupSingleToMReg: - { - if (!thisUsed) - break; - - X64Reg input = fregEnsureInReg(RI, getOp1(I)); - X64Reg output = fregURegWithoutMov(RI, I); - Jit->ConvertSingleToDouble(output, input); - - RI.fregs[output] = I; - fregNormalRegClear(RI, I); - break; - } - case InsertDoubleInMReg: - { - if (!thisUsed) - break; - // r[0] = op1[0]; r[1] = op2[1]; - - // TODO: Optimize the case that the register of op1 can be - // recycled. (SHUFPD may not be so fast.) - X64Reg reg = fregBinRHSRegWithMov(RI, I); - OpArg loc1 = fregLocForInst(RI, getOp1(I)); - if (loc1.IsSimpleReg()) - { - Jit->MOVSD(reg, loc1); - } - else - { - // If op1 is in FSlotSet, we have to mov loc1 to XMM0 - // before MOVSD/MOVSS. - // Because register<->memory transfer with MOVSD/MOVSS - // clears upper 64/96-bits of the destination register. - Jit->MOVAPD(XMM0, loc1); - Jit->MOVSD(reg, R(XMM0)); - } - RI.fregs[reg] = I; - fregNormalRegClear(RI, I); - break; - } - case ExpandPackedToMReg: - { - if (!thisUsed) - break; - - X64Reg reg = fregURegWithoutMov(RI, I); - Jit->CVTPS2PD(reg, fregLocForInst(RI, getOp1(I))); - RI.fregs[reg] = I; - fregNormalRegClear(RI, I); - break; - } - case CompactMRegToPacked: - { - if (!thisUsed) - break; - - X64Reg reg = fregURegWithoutMov(RI, I); - Jit->CVTPD2PS(reg, fregLocForInst(RI, getOp1(I))); - RI.fregs[reg] = I; - fregNormalRegClear(RI, I); - break; - } - case FSNeg: - { - if (!thisUsed) - break; - - X64Reg reg = fregURegWithMov(RI, I); - alignas(16) static const u32 ssSignBits[4] = {0x80000000}; - Jit->PXOR(reg, Jit->MConst(ssSignBits)); - RI.fregs[reg] = I; - fregNormalRegClear(RI, I); - break; - } - case FDNeg: - { - if (!thisUsed) - break; - - X64Reg reg = fregURegWithMov(RI, I); - alignas(16) static const u64 sdSignBits[2] = {0x8000000000000000ULL}; - Jit->PXOR(reg, Jit->MConst(sdSignBits)); - RI.fregs[reg] = I; - fregNormalRegClear(RI, I); - break; - } - case FPNeg: - { - if (!thisUsed) - break; - - X64Reg reg = fregURegWithMov(RI, I); - alignas(16) static const u32 psSignBits[4] = {0x80000000, 0x80000000}; - Jit->PXOR(reg, Jit->MConst(psSignBits)); - RI.fregs[reg] = I; - fregNormalRegClear(RI, I); - break; - } - case FPDup0: - { - if (!thisUsed) - break; - - X64Reg reg = fregURegWithMov(RI, I); - Jit->PUNPCKLDQ(reg, R(reg)); - RI.fregs[reg] = I; - fregNormalRegClear(RI, I); - break; - } - case FPDup1: - { - if (!thisUsed) - break; - - X64Reg reg = fregURegWithMov(RI, I); - Jit->SHUFPS(reg, R(reg), 0xE5); - RI.fregs[reg] = I; - fregNormalRegClear(RI, I); - break; - } - case LoadFReg: - { - if (!thisUsed) - break; - - X64Reg reg = fregFindFreeReg(RI); - unsigned ppcreg = *I >> 8; - Jit->MOVAPD(reg, PPCSTATE(ps[ppcreg])); - RI.fregs[reg] = I; - break; - } - case LoadFRegDENToZero: - { - if (!thisUsed) - break; - - X64Reg reg = fregFindFreeReg(RI); - unsigned ppcreg = *I >> 8; - char* p = (char*)&(PowerPC::ppcState.ps[ppcreg][0]); - Jit->MOV(32, R(RSCRATCH2), M(p + 4)); - Jit->AND(32, R(RSCRATCH2), Imm32(0x7ff00000)); - Jit->CMP(32, R(RSCRATCH2), Imm32(0x38000000)); - FixupBranch ok = Jit->J_CC(CC_AE); - Jit->AND(32, M(p + 4), Imm32(0x80000000)); - Jit->MOV(32, M(p), Imm32(0)); - Jit->SetJumpTarget(ok); - Jit->MOVAPD(reg, PPCSTATE(ps[ppcreg])); - RI.fregs[reg] = I; - break; - } - case StoreFReg: - { - unsigned ppcreg = *I >> 16; - Jit->MOVAPD(PPCSTATE(ps[ppcreg]), fregEnsureInReg(RI, getOp1(I))); - fregNormalRegClear(RI, I); - break; - } - case DoubleToSingle: - { - if (!thisUsed) - break; - - X64Reg input = fregEnsureInReg(RI, getOp1(I)); - X64Reg output = fregURegWithoutMov(RI, I); - Jit->ConvertDoubleToSingle(output, input); - - RI.fregs[output] = I; - fregNormalRegClear(RI, I); - break; - } - case FSMul: - { - if (!thisUsed) - break; - - fregEmitBinInst(RI, I, &JitIL::MULSS); - break; - } - case FSAdd: - { - if (!thisUsed) - break; - - fregEmitBinInst(RI, I, &JitIL::ADDSS); - break; - } - case FSSub: - { - if (!thisUsed) - break; - - fregEmitBinInst(RI, I, &JitIL::SUBSS); - break; - } - case FDMul: - { - if (!thisUsed) - break; - - fregEmitBinInst(RI, I, &JitIL::MULSD); - break; - } - case FDAdd: - { - if (!thisUsed) - break; - - fregEmitBinInst(RI, I, &JitIL::ADDSD); - break; - } - case FDSub: - { - if (!thisUsed) - break; - - fregEmitBinInst(RI, I, &JitIL::SUBSD); - break; - } - case FDCmpCR: - { - const u32 ordered = *I >> 24; - X64Reg destreg = regFindFreeReg(RI); - // TODO: Remove an extra MOVSD if loc1.IsSimpleReg() - OpArg loc1 = fregLocForInst(RI, getOp1(I)); - OpArg loc2 = fregLocForInst(RI, getOp2(I)); - Jit->MOVSD(XMM0, loc1); - Jit->UCOMISD(XMM0, loc2); - FixupBranch pNan = Jit->J_CC(CC_P); - FixupBranch pEqual = Jit->J_CC(CC_Z); - FixupBranch pLesser = Jit->J_CC(CC_C); - // Greater - Jit->MOV(32, R(destreg), Imm32(0x4)); - FixupBranch continue1 = Jit->J(); - // NaN - Jit->SetJumpTarget(pNan); - Jit->MOV(32, R(destreg), Imm32(0x1)); - - if (ordered) - { - // fcmpo - // TODO: Optimize the following code if slow. - // SNAN check may not be needed - // because it does not happen so much. - Jit->MOVSD(M(isSNANTemp[0]), XMM0); - if (loc2.IsSimpleReg()) - { - Jit->MOVSD(M(isSNANTemp[1]), loc2.GetSimpleReg()); - } - else - { - Jit->MOVSD(XMM0, loc2); - Jit->MOVSD(M(isSNANTemp[1]), XMM0); - } - Jit->ABI_CallFunction(checkIsSNAN); - Jit->TEST(8, R(ABI_RETURN), R(ABI_RETURN)); - FixupBranch ok = Jit->J_CC(CC_Z); - Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); // FPSCR.FX = 1; - Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSNAN)); // FPSCR.Hex |= mask; - Jit->TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_VE)); - FixupBranch finish0 = Jit->J_CC(CC_NZ); - Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; - FixupBranch finish1 = Jit->J(); - Jit->SetJumpTarget(ok); - Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); // FPSCR.FX = 1; - Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; - Jit->SetJumpTarget(finish0); - Jit->SetJumpTarget(finish1); - } - else - { - // fcmpu - // TODO: Optimize the following code if slow - Jit->MOVSD(M(isSNANTemp[0]), XMM0); - if (loc2.IsSimpleReg()) - { - Jit->MOVSD(M(isSNANTemp[1]), loc2.GetSimpleReg()); - } - else - { - Jit->MOVSD(XMM0, loc2); - Jit->MOVSD(M(isSNANTemp[1]), XMM0); - } - Jit->ABI_CallFunction(checkIsSNAN); - Jit->TEST(8, R(ABI_RETURN), R(ABI_RETURN)); - FixupBranch finish = Jit->J_CC(CC_Z); - Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); // FPSCR.FX = 1; - Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; - Jit->SetJumpTarget(finish); - } - - FixupBranch continue2 = Jit->J(); - // Equal - Jit->SetJumpTarget(pEqual); - Jit->MOV(32, R(destreg), Imm32(0x2)); - FixupBranch continue3 = Jit->J(); - // Less - Jit->SetJumpTarget(pLesser); - Jit->MOV(32, R(destreg), Imm32(0x8)); - Jit->SetJumpTarget(continue1); - Jit->SetJumpTarget(continue2); - Jit->SetJumpTarget(continue3); - RI.regs[destreg] = I; - fregNormalRegClear(RI, I); - break; - } - case FPAdd: - { - if (!thisUsed) - break; - - fregEmitBinInst(RI, I, &JitIL::ADDPS); - break; - } - case FPMul: - { - if (!thisUsed) - break; - - fregEmitBinInst(RI, I, &JitIL::MULPS); - break; - } - case FPSub: - { - if (!thisUsed) - break; - - fregEmitBinInst(RI, I, &JitIL::SUBPS); - break; - } - case FPMerge00: - { - // r[0] = op1[0]; r[1] = op2[0]; - if (!thisUsed) - break; - - // TODO: Optimize the case that the register of only op2 can be - // recycled. - X64Reg reg = fregBinLHSRegWithMov(RI, I); - Jit->PUNPCKLDQ(reg, fregLocForInst(RI, getOp2(I))); - RI.fregs[reg] = I; - fregNormalRegClear(RI, I); - break; - } - case FPMerge01: - { - // r[0] = op1[0]; r[1] = op2[1]; - if (!thisUsed) - break; - - // TODO: Optimize the case that the register of only op1 can be - // recycled. - X64Reg reg = fregBinRHSRegWithMov(RI, I); - OpArg loc1 = fregLocForInst(RI, getOp1(I)); - if (loc1.IsSimpleReg()) - { - Jit->MOVSS(reg, loc1); - } - else - { - Jit->MOVAPD(XMM0, loc1); - Jit->MOVSS(reg, R(XMM0)); - } - RI.fregs[reg] = I; - fregNormalRegClear(RI, I); - break; - } - case FPMerge10: - { - // r[0] = op1[1]; r[1] = op2[0]; - if (!thisUsed) - break; - - // TODO: Optimize the case that the register of only op2 can be - // recycled. - X64Reg reg = fregBinLHSRegWithMov(RI, I); - OpArg loc2 = fregLocForInst(RI, getOp2(I)); - if (loc2.IsSimpleReg()) - { - Jit->MOVSS(reg, loc2); - } - else - { - Jit->MOVAPD(XMM0, loc2); - Jit->MOVSS(reg, R(XMM0)); - } - Jit->SHUFPS(reg, R(reg), 0xF1); - RI.fregs[reg] = I; - fregNormalRegClear(RI, I); - break; - } - case FPMerge11: - { - // r[0] = op1[1]; r[1] = op2[1]; - if (!thisUsed) - break; - - // TODO: Optimize the case that the register of only op2 can be - // recycled. - X64Reg reg = fregBinLHSRegWithMov(RI, I); - // TODO: Check whether the following code works - // when the op1 is in the FSlotSet - Jit->PUNPCKLDQ(reg, fregLocForInst(RI, getOp2(I))); - Jit->SHUFPD(reg, R(reg), 0x1); - RI.fregs[reg] = I; - fregNormalRegClear(RI, I); - break; - } - case CInt32: - case CInt16: - { - if (!thisUsed) - break; - - X64Reg reg = regFindFreeReg(RI); - u64 val = ibuild->GetImmValue64(I); - if (static_cast(val) == val) - Jit->MOV(32, R(reg), Imm32(static_cast(val))); - else if (static_cast(val) == static_cast(val)) - Jit->MOV(64, R(reg), Imm32(static_cast(val))); - else - Jit->MOV(64, R(reg), Imm64(val)); - RI.regs[reg] = I; - break; - } - case BlockStart: - case BlockEnd: - break; - - case IdleBranch: - { - // If value is 0, we don't need to call out to the idle function. - OpArg value = regLocForInst(RI, getOp1(I)); - Jit->TEST(32, value, value); - FixupBranch noidle = Jit->J_CC(CC_NZ); - - RI.Jit->Cleanup(); // is it needed? - Jit->ABI_CallFunction(CoreTiming::Idle); - - Jit->MOV(32, PPCSTATE(pc), Imm32(ibuild->GetImmValue(getOp2(I)))); - Jit->WriteExceptionExit(); - - Jit->SetJumpTarget(noidle); - if (RI.IInfo[I - RI.FirstI] & 4) - regClearInst(RI, getOp1(I)); - if (RI.IInfo[I - RI.FirstI] & 8) - regClearInst(RI, getOp2(I)); - break; - } - - case BranchCond: - { - if (isICmp(*getOp1(I))) - { - regEmitCmp(RI, getOp1(I)); - CCFlags flag; - switch (getOpcode(*getOp1(I))) - { - case ICmpEq: - flag = CC_NE; - break; - case ICmpNe: - flag = CC_E; - break; - case ICmpUgt: - flag = CC_BE; - break; - case ICmpUlt: - flag = CC_AE; - break; - case ICmpUge: - flag = CC_B; - break; - case ICmpUle: - flag = CC_A; - break; - case ICmpSgt: - flag = CC_LE; - break; - case ICmpSlt: - flag = CC_GE; - break; - case ICmpSge: - flag = CC_L; - break; - case ICmpSle: - flag = CC_G; - break; - default: - PanicAlert("cmpXX"); - flag = CC_O; - break; - } - FixupBranch cont = Jit->J_CC(flag); - regWriteExit(RI, getOp2(I)); - Jit->SetJumpTarget(cont); - if (RI.IInfo[I - RI.FirstI] & 4) - regClearInst(RI, getOp1(getOp1(I))); - if (RI.IInfo[I - RI.FirstI] & 8) - regClearInst(RI, getOp2(getOp1(I))); - } - else - { - Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm8(0)); - FixupBranch cont = Jit->J_CC(CC_Z); - regWriteExit(RI, getOp2(I)); - Jit->SetJumpTarget(cont); - if (RI.IInfo[I - RI.FirstI] & 4) - regClearInst(RI, getOp1(I)); - } - if (RI.IInfo[I - RI.FirstI] & 8) - regClearInst(RI, getOp2(I)); - break; - } - case BranchUncond: - { - regWriteExit(RI, getOp1(I)); - regNormalRegClear(RI, I); - break; - } - case ShortIdleLoop: - { - unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); - Jit->ABI_CallFunction(CoreTiming::Idle); - Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); - Jit->WriteExceptionExit(); - break; - } - case SystemCall: - { - unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); - Jit->LOCK(); - Jit->OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_SYSCALL)); - Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc + 4)); - Jit->WriteExceptionExit(); - break; - } - case InterpreterBranch: - { - Jit->MOV(32, R(RSCRATCH), PPCSTATE(npc)); - Jit->WriteExitDestInOpArg(R(RSCRATCH)); - break; - } - case RFIExit: - { - // See Interpreter rfi for details - const u32 mask = 0x87C0FFFF; - // MSR = (MSR & ~mask) | (SRR1 & mask); - Jit->MOV(32, R(RSCRATCH), PPCSTATE(msr)); - Jit->MOV(32, R(RSCRATCH2), PPCSTATE_SRR1); - Jit->AND(32, R(RSCRATCH), Imm32(~mask)); - Jit->AND(32, R(RSCRATCH2), Imm32(mask)); - Jit->OR(32, R(RSCRATCH), R(RSCRATCH2)); - // MSR &= 0xFFFBFFFF; // Mask used to clear the bit MSR[13] - Jit->AND(32, R(RSCRATCH), Imm32(0xFFFBFFFF)); - Jit->MOV(32, PPCSTATE(msr), R(RSCRATCH)); - // NPC = SRR0; - Jit->MOV(32, R(RSCRATCH), PPCSTATE_SRR0); - Jit->WriteRfiExitDestInOpArg(R(RSCRATCH)); - break; - } - case FPExceptionCheck: - { - unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); - // This instruction uses FPU - needs to add FP exception bailout - Jit->TEST(32, PPCSTATE(msr), Imm32(1 << 13)); // Test FP enabled bit - FixupBranch b1 = Jit->J_CC(CC_NZ); - - // If a FPU exception occurs, the exception handler will read - // from PC. Update PC with the latest value in case that happens. - Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); - Jit->SUB(32, PPCSTATE(downcount), Imm32(Jit->js.downcountAmount)); - Jit->OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); - Jit->WriteExceptionExit(); - Jit->SetJumpTarget(b1); - break; - } - case DSIExceptionCheck: - { - unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); - Jit->TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI)); - FixupBranch noMemException = Jit->J_CC(CC_Z); - - // If a memory exception occurs, the exception handler will read - // from PC. Update PC with the latest value in case that happens. - Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); - Jit->WriteExceptionExit(); - Jit->SetJumpTarget(noMemException); - break; - } - case ExtExceptionCheck: - { - unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); - - Jit->TEST(32, PPCSTATE(Exceptions), - Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | - EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT)); - FixupBranch clearInt = Jit->J_CC(CC_NZ); - Jit->TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); - FixupBranch noExtException = Jit->J_CC(CC_Z); - Jit->TEST(32, PPCSTATE(msr), Imm32(0x0008000)); - FixupBranch noExtIntEnable = Jit->J_CC(CC_Z); - Jit->TEST(32, M(&ProcessorInterface::m_InterruptCause), - Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | - ProcessorInterface::INT_CAUSE_PE_FINISH)); - FixupBranch noCPInt = Jit->J_CC(CC_Z); - - Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); - Jit->WriteExceptionExit(); - - Jit->SetJumpTarget(noCPInt); - Jit->SetJumpTarget(noExtIntEnable); - Jit->SetJumpTarget(noExtException); - Jit->SetJumpTarget(clearInt); - break; - } - case BreakPointCheck: - { - unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); - - Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); - Jit->ABI_CallFunction(PowerPC::CheckBreakPoints); - Jit->TEST(32, M(CPU::GetStatePtr()), Imm32(0xFFFFFFFF)); - FixupBranch noBreakpoint = Jit->J_CC(CC_Z); - Jit->WriteExit(InstLoc); - Jit->SetJumpTarget(noBreakpoint); - break; - } - case Int3: - { - Jit->INT3(); - break; - } - case Tramp: - break; - case Nop: - break; - default: - PanicAlert("Unknown JIT instruction; aborting!"); - exit(1); - } - } - - for (size_t i = 0; i < RegInfo::MAX_NUMBER_OF_REGS; i++) - { - if (RI.regs[i]) - { - // Start a game in Burnout 2 to get this. Or animal crossing. - PanicAlert("Incomplete cleanup! (regs)"); - exit(1); - } - - if (RI.fregs[i]) - { - PanicAlert("Incomplete cleanup! (fregs)"); - exit(1); - } - } - - Jit->WriteExit(exitAddress); - Jit->UD2(); -} - -void JitIL::WriteCode(u32 exitAddress) -{ - DoWriteCode(&ibuild, this, exitAddress); -} diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp deleted file mode 100644 index 740771241d..0000000000 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ /dev/null @@ -1,688 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Core/PowerPC/Jit64IL/JitIL.h" - -#include -#include // For profiling -#include -#include -#include -#include - -#include "Common/CommonTypes.h" -#include "Common/FileUtil.h" -#include "Common/Intrinsics.h" -#include "Common/Logging/Log.h" -#include "Common/StringUtil.h" -#include "Common/x64ABI.h" -#include "Core/HLE/HLE.h" -#include "Core/HW/CPU.h" -#include "Core/PatchEngine.h" -#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" -#include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/Profiler.h" - -using namespace Gen; -using namespace PowerPC; - -// Dolphin's PowerPC->x86 JIT dynamic recompiler -// (Nearly) all code by ector (hrydgard) -// Features: -// * x86 & x64 support, lots of shared code. -// * Basic block linking -// * Fast dispatcher - -// Unfeatures: -// * Does not recompile all instructions - sometimes falls back to inserting a CALL to the -// corresponding Interpreter function. - -// Various notes below - -// Register allocation -// RAX - Generic quicktemp register -// RBX - point to base of memory map -// RSI RDI R12 R13 R14 R15 - free for allocation -// RCX RDX R8 R9 R10 R11 - allocate in emergencies. These need to be flushed before functions are -// called. -// RSP - stack pointer, do not generally use, very dangerous -// RBP - ? - -// IMPORTANT: -// Make sure that all generated code and all emulator state sits under the 2GB boundary so that -// RIP addressing can be used easily. Windows will always allocate static code under the 2GB -// boundary. -// Also make sure to use VirtualAlloc and specify EXECUTE permission. - -// Open questions -// * Should there be any statically allocated registers? r3, r4, r5, r8, r0 come to mind.. maybe sp -// * Does it make sense to finish off the remaining non-jitted instructions? Seems we are hitting -// diminishing returns. - -// Other considerations -// -// Many instructions have shorter forms for EAX. However, I believe their performance boost -// will be as small to be negligible, so I haven't dirtied up the code with that. AMD recommends it -// in their -// optimization manuals, though. -// -// We support block linking. Reserve space at the exits of every block for a full 5-byte jmp. Save -// 16-bit offsets -// from the starts of each block, marking the exits so that they can be nicely patched at any time. -// -// Blocks do NOT use call/ret, they only jmp to each other and to the dispatcher when necessary. -// -// All blocks that can be precompiled will be precompiled. Code will be memory protected - any write -// will mark -// the region as non-compilable, and all links to the page will be torn out and replaced with -// dispatcher jmps. -// -// Alternatively, icbi instruction SHOULD mark where we can't compile -// -// Seldom-happening events is handled by adding a decrement of a counter to all blr instructions -// (which are -// expensive anyway since we need to return to dispatcher, except when they can be predicted). - -// TODO: SERIOUS synchronization problem with the video backend setting tokens and breakpoints in -// dual core mode!!! -// Somewhat fixed by disabling idle skipping when certain interrupts are enabled -// This is no permanent reliable fix -// TODO: Zeldas go whacko when you hang the gfx thread - -// Idea - Accurate exception handling -// Compute register state at a certain instruction by running the JIT in "dry mode", and stopping at -// the right place. -// Not likely to be done :P - -// Optimization Ideas - -/* - * Assume SP is in main RAM (in Wii mode too?) - partly done - * Assume all floating point loads and double precision loads+stores are to/from main ram - (single precision can be used in write gather pipe, specialized fast check added) - * AMD only - use movaps instead of movapd when loading ps from memory? - * HLE functions like floorf, sin, memcpy, etc - they can be much faster - * ABI optimizations - drop F0-F13 on blr, for example. Watch out for context switching. - CR2-CR4 are non-volatile, rest of CR is volatile -> dropped on blr. - R5-R12 are volatile -> dropped on blr. - * classic inlining across calls. - -Low hanging fruit: -stfd -- guaranteed in memory -cmpl -mulli -stfs -stwu -lb/stzx - -bcx - optimize! -bcctr -stfs -psq_st -addx -orx -rlwimix -fcmpo -DSP_UpdateARAMDMA -lfd -stwu -cntlzwx -bcctrx -WriteBigEData - -TODO -lha -srawx -addic_rc -addex -subfcx -subfex - -fmaddx -fmulx -faddx -fnegx -frspx -frsqrtex -ps_sum0 -ps_muls0 -ps_adds1 - -*/ - -#ifdef _WIN32 -#include -#else -#include -#include - -#if defined(__clang__) -#if !__has_builtin(__builtin_ia32_rdtsc) -static inline uint64_t __rdtsc() -{ - uint32_t lo, hi; -#ifdef _LP64 - __asm__ __volatile__("xorl %%eax,%%eax \n cpuid" ::: "%rax", "%rbx", "%rcx", "%rdx"); - __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi)); - return (uint64_t)hi << 32 | lo; -#else - __asm__ __volatile__("xor %%eax,%%eax;" - "push %%ebx;" - "cpuid;" - "pop %%ebx;" :: - : "%eax", "%ecx", "%edx"); - __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi)); -#endif - return (uint64_t)hi << 32 | lo; -} -#endif -#endif -#endif - -namespace JitILProfiler -{ -struct Block -{ - u32 index; - u64 codeHash; - u64 totalElapsed; - u64 numberOfCalls; - - Block() : index(0), codeHash(0), totalElapsed(0), numberOfCalls(0) {} -}; - -static std::vector blocks; -static u32 blockIndex; -static u64 beginTime; -static Block& Add(u64 codeHash) -{ - const u32 _blockIndex = (u32)blocks.size(); - blocks.emplace_back(); - Block& block = blocks.back(); - block.index = _blockIndex; - block.codeHash = codeHash; - return block; -} - -// These functions need to be static because they are called with -// ABI_CallFunction(). -static void Begin(u32 index) -{ - blockIndex = index; - beginTime = __rdtsc(); -} - -static void End() -{ - const u64 endTime = __rdtsc(); - const u64 duration = endTime - beginTime; - Block& block = blocks[blockIndex]; - block.totalElapsed += duration; - ++block.numberOfCalls; -} - -struct JitILProfilerFinalizer -{ - virtual ~JitILProfilerFinalizer() - { - std::string filename = StringFromFormat("JitIL_profiling_%d.csv", (int)time(nullptr)); - File::IOFile file(filename, "w"); - setvbuf(file.GetHandle(), nullptr, _IOFBF, 1024 * 1024); - fprintf(file.GetHandle(), "code hash,total elapsed,number of calls,elapsed per call\n"); - for (auto& block : blocks) - { - const u64 codeHash = block.codeHash; - const u64 totalElapsed = block.totalElapsed; - const u64 numberOfCalls = block.numberOfCalls; - const double elapsedPerCall = totalElapsed / (double)numberOfCalls; - fprintf(file.GetHandle(), "%016" PRIx64 ",%" PRId64 ",%" PRId64 ",%f\n", codeHash, - totalElapsed, numberOfCalls, elapsedPerCall); - } - } -}; - -static std::unique_ptr finalizer; -static void Init() -{ - finalizer = std::make_unique(); -} - -static void Shutdown() -{ - finalizer.reset(); -} -}; - -void JitIL::Init() -{ - InitializeInstructionTables(); - EnableBlockLink(); - - jo.optimizeGatherPipe = true; - jo.accurateSinglePrecision = false; - UpdateMemoryOptions(); - - const size_t routines_size = asm_routines.CODE_SIZE; - const size_t trampolines_size = jo.memcheck ? TRAMPOLINE_CODE_SIZE_MMU : TRAMPOLINE_CODE_SIZE; - const size_t farcode_size = jo.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE; - const size_t constpool_size = m_const_pool.CONST_POOL_SIZE; - AllocCodeSpace(CODE_SIZE + routines_size + trampolines_size + farcode_size + constpool_size); - AddChildCodeSpace(&asm_routines, routines_size); - AddChildCodeSpace(&trampolines, trampolines_size); - AddChildCodeSpace(&m_far_code, farcode_size); - m_const_pool.Init(AllocChildCodeSpace(constpool_size), constpool_size); - - blocks.Init(); - asm_routines.Init(nullptr); - m_far_code.Init(); - Clear(); - - code_block.m_stats = &js.st; - code_block.m_gpa = &js.gpa; - code_block.m_fpa = &js.fpa; - - if (SConfig::GetInstance().bJITILTimeProfiling) - { - JitILProfiler::Init(); - } -} - -void JitIL::ClearCache() -{ - blocks.Clear(); - trampolines.ClearCodeSpace(); - m_far_code.ClearCodeSpace(); - ClearCodeSpace(); - Clear(); -} - -void JitIL::Shutdown() -{ - if (SConfig::GetInstance().bJITILTimeProfiling) - { - JitILProfiler::Shutdown(); - } - - FreeCodeSpace(); - - blocks.Shutdown(); - m_far_code.Shutdown(); -} - -void JitIL::FallBackToInterpreter(UGeckoInstruction _inst) -{ - ibuild.EmitFallBackToInterpreter(ibuild.EmitIntConst(_inst.hex), - ibuild.EmitIntConst(js.compilerPC)); -} - -void JitIL::HLEFunction(UGeckoInstruction _inst) -{ - ABI_CallFunctionCC(HLE::Execute, js.compilerPC, _inst.hex); - MOV(32, R(RSCRATCH), PPCSTATE(npc)); - WriteExitDestInOpArg(R(RSCRATCH)); -} - -void JitIL::DoNothing(UGeckoInstruction _inst) -{ - // Yup, just don't do anything. -} - -static const bool ImHereDebug = false; -static const bool ImHereLog = false; -static std::map been_here; - -static void ImHere() -{ - static File::IOFile f; - if (ImHereLog) - { - if (!f) - { - f.Open("log64.txt", "w"); - } - fprintf(f.GetHandle(), "%08x r0: %08x r5: %08x r6: %08x\n", PC, PowerPC::ppcState.gpr[0], - PowerPC::ppcState.gpr[5], PowerPC::ppcState.gpr[6]); - f.Flush(); - } - - if (been_here.find(PC) != been_here.end()) - { - been_here.find(PC)->second++; - if ((been_here.find(PC)->second) & 1023) - return; - } - - INFO_LOG(DYNA_REC, "I'm here - PC = %08x , LR = %08x", PC, LR); - been_here[PC] = 1; -} - -void JitIL::Cleanup() -{ - // SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time. - if (MMCR0.Hex || MMCR1.Hex) - ABI_CallFunctionCCC(PowerPC::UpdatePerformanceMonitor, js.downcountAmount, js.numLoadStoreInst, - js.numFloatingPointInst); -} - -void JitIL::WriteExit(u32 destination) -{ - Cleanup(); - if (SConfig::GetInstance().bJITILTimeProfiling) - { - ABI_CallFunction(JitILProfiler::End); - } - SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); - - // If nobody has taken care of this yet (this can be removed when all branches are done) - JitBlock* b = js.curBlock; - JitBlock::LinkData linkData; - linkData.exitAddress = destination; - linkData.exitPtrs = GetWritableCodePtr(); - linkData.linkStatus = false; - - MOV(32, PPCSTATE(pc), Imm32(destination)); - JMP(asm_routines.dispatcher, true); - - b->linkData.push_back(linkData); -} - -void JitIL::WriteExitDestInOpArg(const OpArg& arg) -{ - MOV(32, PPCSTATE(pc), arg); - Cleanup(); - if (SConfig::GetInstance().bJITILTimeProfiling) - { - ABI_CallFunction(JitILProfiler::End); - } - SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); - JMP(asm_routines.dispatcher, true); -} - -void JitIL::WriteRfiExitDestInOpArg(const OpArg& arg) -{ - MOV(32, PPCSTATE(pc), arg); - MOV(32, PPCSTATE(npc), arg); - Cleanup(); - if (SConfig::GetInstance().bJITILTimeProfiling) - { - ABI_CallFunction(JitILProfiler::End); - } - ABI_CallFunction(PowerPC::CheckExceptions); - SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); - JMP(asm_routines.dispatcher, true); -} - -void JitIL::WriteExceptionExit() -{ - Cleanup(); - if (SConfig::GetInstance().bJITILTimeProfiling) - { - ABI_CallFunction(JitILProfiler::End); - } - MOV(32, R(EAX), PPCSTATE(pc)); - MOV(32, PPCSTATE(npc), R(EAX)); - ABI_CallFunction(PowerPC::CheckExceptions); - SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); - JMP(asm_routines.dispatcher, true); -} - -void JitIL::Run() -{ - CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; - pExecAddr(); - // Will return when PowerPC::state changes -} - -void JitIL::SingleStep() -{ - CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; - pExecAddr(); -} - -void JitIL::Trace() -{ - std::string regs; - std::string fregs; - -#ifdef JIT_LOG_GPR - for (int i = 0; i < 32; i++) - { - regs += StringFromFormat("r%02d: %08x ", i, PowerPC::ppcState.gpr[i]); - } -#endif - -#ifdef JIT_LOG_FPR - for (int i = 0; i < 32; i++) - { - fregs += StringFromFormat("f%02d: %016x ", i, riPS0(i)); - } -#endif - - DEBUG_LOG(DYNA_REC, "JITIL PC: %08x SRR0: %08x SRR1: %08x CRval: " - "%016lx%016lx%016lx%016lx%016lx%016lx%016lx%016lx FPSCR: %08x MSR: %08x LR: " - "%08x %s %s", - PC, SRR0, SRR1, (unsigned long)PowerPC::ppcState.cr_val[0], - (unsigned long)PowerPC::ppcState.cr_val[1], (unsigned long)PowerPC::ppcState.cr_val[2], - (unsigned long)PowerPC::ppcState.cr_val[3], (unsigned long)PowerPC::ppcState.cr_val[4], - (unsigned long)PowerPC::ppcState.cr_val[5], (unsigned long)PowerPC::ppcState.cr_val[6], - (unsigned long)PowerPC::ppcState.cr_val[7], PowerPC::ppcState.fpscr, - PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str()); -} - -void JitIL::Jit(u32 em_address) -{ - if (IsAlmostFull() || m_far_code.IsAlmostFull() || trampolines.IsAlmostFull() || - SConfig::GetInstance().bJITNoBlockCache) - { - ClearCache(); - } - - int blockSize = code_buffer.GetSize(); - - if (SConfig::GetInstance().bEnableDebugging) - { - // We can link blocks as long as we are not single stepping and there are no breakpoints here - EnableBlockLink(); - - // Comment out the following to disable breakpoints (speed-up) - if (!Profiler::g_ProfileBlocks) - { - if (CPU::IsStepping()) - { - blockSize = 1; - - // Do not link this block to other blocks While single stepping - jo.enableBlocklink = false; - } - Trace(); - } - } - - // Analyze the block, collect all instructions it is made of (including inlining, - // if that is enabled), reorder instructions for optimal performance, and join joinable - // instructions. - u32 nextPC = analyzer.Analyze(em_address, &code_block, &code_buffer, blockSize); - - if (code_block.m_memory_exception) - { - // Address of instruction could not be translated - NPC = nextPC; - PowerPC::ppcState.Exceptions |= EXCEPTION_ISI; - PowerPC::CheckExceptions(); - WARN_LOG(POWERPC, "ISI exception at 0x%08x", nextPC); - return; - } - - JitBlock* b = blocks.AllocateBlock(em_address); - DoJit(em_address, &code_buffer, b, nextPC); - blocks.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses); -} - -const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock* b, u32 nextPC) -{ - js.isLastInstruction = false; - js.blockStart = em_address; - js.fifoBytesSinceCheck = 0; - js.curBlock = b; - js.numLoadStoreInst = 0; - js.numFloatingPointInst = 0; - - PPCAnalyst::CodeOp* ops = code_buf->codebuffer; - - const u8* start = - AlignCode4(); // TODO: Test if this or AlignCode16 make a difference from GetCodePtr - b->checkedEntry = start; - b->runCount = 0; - - // Downcount flag check. The last block decremented downcounter, and the flag should still be - // available. - FixupBranch skip = J_CC(CC_NBE); - MOV(32, PPCSTATE(pc), Imm32(js.blockStart)); - JMP(asm_routines.doTiming, true); // downcount hit zero - go doTiming. - SetJumpTarget(skip); - - const u8* normalEntry = GetCodePtr(); - b->normalEntry = normalEntry; - - // Used to get a trace of the last few blocks before a crash, sometimes VERY useful. - if (ImHereDebug) - ABI_CallFunction(ImHere); - - if (js.fpa.any) - { - // This block uses FPU - needs to add FP exception bailout - TEST(32, PPCSTATE(msr), Imm32(1 << 13)); // Test FP enabled bit - FixupBranch b1 = J_CC(CC_NZ); - - // If a FPU exception occurs, the exception handler will read - // from PC. Update PC with the latest value in case that happens. - MOV(32, PPCSTATE(pc), Imm32(js.blockStart)); - OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); - WriteExceptionExit(); - - SetJumpTarget(b1); - } - - js.rewriteStart = (u8*)GetCodePtr(); - - u64 codeHash = -1; - if (SConfig::GetInstance().bJITILTimeProfiling || SConfig::GetInstance().bJITILOutputIR) - { - // For profiling and IR Writer - for (u32 i = 0; i < code_block.m_num_instructions; i++) - { - const u64 inst = ops[i].inst.hex; - // Ported from boost::hash - codeHash ^= inst + (codeHash << 6) + (codeHash >> 2); - } - } - - if (SConfig::GetInstance().bJITILTimeProfiling) - { - JitILProfiler::Block& block = JitILProfiler::Add(codeHash); - ABI_CallFunctionC(JitILProfiler::Begin, block.index); - } - - // Start up IR builder (structure that collects the - // instruction processed by the JIT routines) - ibuild.Reset(); - - js.downcountAmount = 0; - - // Translate instructions - for (u32 i = 0; i < code_block.m_num_instructions; i++) - { - js.compilerPC = ops[i].address; - js.op = &ops[i]; - js.instructionNumber = i; - const GekkoOPInfo* opinfo = GetOpInfo(ops[i].inst); - js.downcountAmount += opinfo->numCycles; - - if (!SConfig::GetInstance().bEnableDebugging) - js.downcountAmount += PatchEngine::GetSpeedhackCycles(js.compilerPC); - - if (i == (code_block.m_num_instructions - 1)) - js.isLastInstruction = true; - - u32 function = HLE::GetFirstFunctionIndex(ops[i].address); - if (function != 0) - { - int type = HLE::GetFunctionTypeByIndex(function); - if (type == HLE::HLE_HOOK_START || type == HLE::HLE_HOOK_REPLACE) - { - int flags = HLE::GetFunctionFlagsByIndex(function); - if (HLE::IsEnabled(flags)) - { - HLEFunction(function); - if (type == HLE::HLE_HOOK_REPLACE) - { - MOV(32, R(EAX), PPCSTATE(npc)); - js.downcountAmount += js.st.numCycles; - WriteExitDestInOpArg(R(EAX)); - break; - } - } - } - } - - if (!ops[i].skip) - { - if (jo.memcheck && (opinfo->flags & FL_USE_FPU)) - { - ibuild.EmitFPExceptionCheck(ibuild.EmitIntConst(ops[i].address)); - } - - if (js.fifoWriteAddresses.find(js.compilerPC) != js.fifoWriteAddresses.end()) - { - ibuild.EmitExtExceptionCheck(ibuild.EmitIntConst(ops[i].address)); - } - - if (SConfig::GetInstance().bEnableDebugging && - breakpoints.IsAddressBreakPoint(ops[i].address) && !CPU::IsStepping()) - { - // Turn off block linking if there are breakpoints so that the Step Over command does not - // link this block. - jo.enableBlocklink = false; - - ibuild.EmitBreakPointCheck(ibuild.EmitIntConst(ops[i].address)); - } - - CompileInstruction(ops[i]); - - if (jo.memcheck && (opinfo->flags & FL_LOADSTORE)) - { - ibuild.EmitDSIExceptionCheck(ibuild.EmitIntConst(ops[i].address)); - } - - if (opinfo->flags & FL_LOADSTORE) - ++js.numLoadStoreInst; - - if (opinfo->flags & FL_USE_FPU) - ++js.numFloatingPointInst; - } - } - - // Perform actual code generation - WriteCode(nextPC); - - b->codeSize = (u32)(GetCodePtr() - start); - b->originalSize = code_block.m_num_instructions; - -#ifdef JIT_LOG_X86 - LogGeneratedX86(code_block.m_num_instructions, code_buf, normalEntry, b); -#endif - - if (SConfig::GetInstance().bJITILOutputIR) - { - ibuild.WriteToFile(codeHash); - } - - return normalEntry; -} - -void JitIL::EnableBlockLink() -{ - jo.enableBlocklink = true; - if (SConfig::GetInstance().bJITNoBlockLinking) - jo.enableBlocklink = false; -} diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.h b/Source/Core/Core/PowerPC/Jit64IL/JitIL.h deleted file mode 100644 index 168f0f2ccc..0000000000 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.h +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -// ======================== -// See comments in Jit.cpp. -// ======================== - -// Mystery: Capcom vs SNK 800aa278 - -// CR flags approach: -// * Store that "N+Z flag contains CR0" or "S+Z flag contains CR3". -// * All flag altering instructions flush this -// * A flush simply does a conditional write to the appropriate CRx. -// * If flag available, branch code can become absolutely trivial. - -#pragma once - -#include "Common/CommonTypes.h" -#include "Common/x64ABI.h" -#include "Common/x64Emitter.h" -#include "Core/PowerPC/Gekko.h" -#include "Core/PowerPC/Jit64/JitAsm.h" -#include "Core/PowerPC/JitCommon/JitCache.h" -#include "Core/PowerPC/JitILCommon/JitILBase.h" -#include "Core/PowerPC/PPCAnalyst.h" - -class JitIL : public JitILBase -{ -public: - Jit64AsmRoutineManager asm_routines; - - JitIL() {} - ~JitIL() {} - // Initialization, etc - - void Init() override; - void Shutdown() override; - - void EnableBlockLink(); - - // Jit! - - void Jit(u32 em_address) override; - const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock* b, u32 nextPC); - - void Trace(); - - JitBlockCache* GetBlockCache() override { return &blocks; } - void ClearCache() override; - - const CommonAsmRoutines* GetAsmRoutines() override { return &asm_routines; } - const char* GetName() override { return "JIT64IL"; } - // Run! - void Run() override; - void SingleStep() override; - - // Utilities for use by opcodes - - void WriteExit(u32 destination); - void WriteExitDestInOpArg(const Gen::OpArg& arg); - void WriteExceptionExit(); - void WriteRfiExitDestInOpArg(const Gen::OpArg& arg); - void Cleanup(); - - void WriteCode(u32 exitAddress); - - // OPCODES - using Instruction = void (JitIL::*)(UGeckoInstruction instCode); - void FallBackToInterpreter(UGeckoInstruction _inst) override; - void DoNothing(UGeckoInstruction _inst) override; - void HLEFunction(UGeckoInstruction _inst) override; - - void DynaRunTable4(UGeckoInstruction _inst) override; - void DynaRunTable19(UGeckoInstruction _inst) override; - void DynaRunTable31(UGeckoInstruction _inst) override; - void DynaRunTable59(UGeckoInstruction _inst) override; - void DynaRunTable63(UGeckoInstruction _inst) override; - -private: - static void InitializeInstructionTables(); - void CompileInstruction(PPCAnalyst::CodeOp& op); -}; diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL_Tables.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL_Tables.cpp deleted file mode 100644 index 0fdd17473d..0000000000 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL_Tables.cpp +++ /dev/null @@ -1,493 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Core/PowerPC/Jit64IL/JitIL.h" -#include "Core/PowerPC/Gekko.h" -#include "Core/PowerPC/PPCTables.h" - -static JitIL::Instruction dynaOpTable[64]; -static JitIL::Instruction dynaOpTable4[1024]; -static JitIL::Instruction dynaOpTable19[1024]; -static JitIL::Instruction dynaOpTable31[1024]; -static JitIL::Instruction dynaOpTable59[32]; -static JitIL::Instruction dynaOpTable63[1024]; - -void JitIL::DynaRunTable4(UGeckoInstruction _inst) -{ - (this->*dynaOpTable4[_inst.SUBOP10])(_inst); -} -void JitIL::DynaRunTable19(UGeckoInstruction _inst) -{ - (this->*dynaOpTable19[_inst.SUBOP10])(_inst); -} -void JitIL::DynaRunTable31(UGeckoInstruction _inst) -{ - (this->*dynaOpTable31[_inst.SUBOP10])(_inst); -} -void JitIL::DynaRunTable59(UGeckoInstruction _inst) -{ - (this->*dynaOpTable59[_inst.SUBOP5])(_inst); -} -void JitIL::DynaRunTable63(UGeckoInstruction _inst) -{ - (this->*dynaOpTable63[_inst.SUBOP10])(_inst); -} - -struct GekkoOPTemplate -{ - int opcode; - JitIL::Instruction Inst; -}; - -const GekkoOPTemplate primarytable[] = { - {4, &JitIL::DynaRunTable4}, //"RunTable4", OPTYPE_SUBTABLE | (4<<24), 0}}, - {19, &JitIL::DynaRunTable19}, //"RunTable19", OPTYPE_SUBTABLE | (19<<24), 0}}, - {31, &JitIL::DynaRunTable31}, //"RunTable31", OPTYPE_SUBTABLE | (31<<24), 0}}, - {59, &JitIL::DynaRunTable59}, //"RunTable59", OPTYPE_SUBTABLE | (59<<24), 0}}, - {63, &JitIL::DynaRunTable63}, //"RunTable63", OPTYPE_SUBTABLE | (63<<24), 0}}, - - {16, &JitIL::bcx}, //"bcx", OPTYPE_SYSTEM, FL_ENDBLOCK}}, - {18, &JitIL::bx}, //"bx", OPTYPE_SYSTEM, FL_ENDBLOCK}}, - - {3, &JitIL::FallBackToInterpreter}, //"twi", OPTYPE_SYSTEM, 0}}, - {17, &JitIL::sc}, //"sc", OPTYPE_SYSTEM, FL_ENDBLOCK, 1}}, - - {7, &JitIL::mulli}, //"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}}, - {8, &JitIL::subfic}, //"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, - {10, &JitIL::cmpXX}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, - {11, &JitIL::cmpXX}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, - {12, &JitIL::reg_imm}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, - {13, &JitIL::reg_imm}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}}, - {14, &JitIL::reg_imm}, //"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, - {15, &JitIL::reg_imm}, //"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, - - {20, - &JitIL::rlwimix}, //"rlwimix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_A | FL_IN_S | FL_RC_BIT}}, - {21, &JitIL::rlwinmx}, //"rlwinmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, - {23, &JitIL::rlwnmx}, //"rlwnmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_IN_B | FL_RC_BIT}}, - - {24, &JitIL::reg_imm}, //"ori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, - {25, &JitIL::reg_imm}, //"oris", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, - {26, &JitIL::reg_imm}, //"xori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, - {27, &JitIL::reg_imm}, //"xoris", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, - {28, &JitIL::reg_imm}, //"andi_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, - {29, &JitIL::reg_imm}, //"andis_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, - - {32, &JitIL::lXz}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {33, &JitIL::lXz}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {34, &JitIL::lXz}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {35, &JitIL::lbzu}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {40, &JitIL::lXz}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {41, &JitIL::lXz}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {42, &JitIL::lha}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {43, &JitIL::lhau}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - - {44, &JitIL::stX}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, - {45, &JitIL::stX}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, - {36, &JitIL::stX}, //"stw", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, - {37, &JitIL::stX}, //"stwu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, - {38, &JitIL::stX}, //"stb", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, - {39, &JitIL::stX}, //"stbu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, - - {46, &JitIL::lmw}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, - {47, &JitIL::stmw}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, - - {48, &JitIL::lfs}, //"lfs", OPTYPE_LOADFP, FL_IN_A}}, - {49, &JitIL::lfsu}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, - {50, &JitIL::lfd}, //"lfd", OPTYPE_LOADFP, FL_IN_A}}, - {51, &JitIL::lfdu}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, - - {52, &JitIL::stfs}, //"stfs", OPTYPE_STOREFP, FL_IN_A}}, - {53, &JitIL::stfs}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, - {54, &JitIL::stfd}, //"stfd", OPTYPE_STOREFP, FL_IN_A}}, - {55, &JitIL::stfd}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, - - {56, &JitIL::psq_l}, //"psq_l", OPTYPE_PS, FL_IN_A}}, - {57, &JitIL::psq_l}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, - {60, &JitIL::psq_st}, //"psq_st", OPTYPE_PS, FL_IN_A}}, - {61, &JitIL::psq_st}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, - - // missing: 0, 1, 2, 5, 6, 9, 22, 30, 62, 58 -}; - -const GekkoOPTemplate table4[] = { - // SUBOP10 - {0, &JitIL::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}}, - {32, &JitIL::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}}, - {40, &JitIL::FallBackToInterpreter}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}}, - {136, &JitIL::FallBackToInterpreter}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}}, - {264, &JitIL::FallBackToInterpreter}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}}, - {64, &JitIL::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}}, - {72, &JitIL::FallBackToInterpreter}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}}, - {96, &JitIL::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}}, - {528, &JitIL::ps_mergeXX}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}}, - {560, &JitIL::ps_mergeXX}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}}, - {592, &JitIL::ps_mergeXX}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}}, - {624, &JitIL::ps_mergeXX}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}}, - - {1014, &JitIL::FallBackToInterpreter}, //"dcbz_l", OPTYPE_SYSTEM, 0}}, -}; - -const GekkoOPTemplate table4_2[] = { - {10, &JitIL::ps_sum}, //"ps_sum0", OPTYPE_PS, 0}}, - {11, &JitIL::ps_sum}, //"ps_sum1", OPTYPE_PS, 0}}, - {12, &JitIL::ps_muls}, //"ps_muls0", OPTYPE_PS, 0}}, - {13, &JitIL::ps_muls}, //"ps_muls1", OPTYPE_PS, 0}}, - {14, &JitIL::ps_maddXX}, //"ps_madds0", OPTYPE_PS, 0}}, - {15, &JitIL::ps_maddXX}, //"ps_madds1", OPTYPE_PS, 0}}, - {18, &JitIL::ps_arith}, //"ps_div", OPTYPE_PS, 0, 16}}, - {20, &JitIL::ps_arith}, //"ps_sub", OPTYPE_PS, 0}}, - {21, &JitIL::ps_arith}, //"ps_add", OPTYPE_PS, 0}}, - {23, &JitIL::FallBackToInterpreter}, //"ps_sel", OPTYPE_PS, 0}}, - {24, &JitIL::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}}, - {25, &JitIL::ps_arith}, //"ps_mul", OPTYPE_PS, 0}}, - {26, &JitIL::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}}, - {28, &JitIL::ps_maddXX}, //"ps_msub", OPTYPE_PS, 0}}, - {29, &JitIL::ps_maddXX}, //"ps_madd", OPTYPE_PS, 0}}, - {30, &JitIL::ps_maddXX}, //"ps_nmsub", OPTYPE_PS, 0}}, - {31, &JitIL::ps_maddXX}, //"ps_nmadd", OPTYPE_PS, 0}}, -}; - -const GekkoOPTemplate table4_3[] = { - {6, &JitIL::FallBackToInterpreter}, //"psq_lx", OPTYPE_PS, 0}}, - {7, &JitIL::FallBackToInterpreter}, //"psq_stx", OPTYPE_PS, 0}}, - {38, &JitIL::FallBackToInterpreter}, //"psq_lux", OPTYPE_PS, 0}}, - {39, &JitIL::FallBackToInterpreter}, //"psq_stux", OPTYPE_PS, 0}}, -}; - -const GekkoOPTemplate table19[] = { - {528, &JitIL::bcctrx}, //"bcctrx", OPTYPE_BRANCH, FL_ENDBLOCK}}, - {16, &JitIL::bclrx}, //"bclrx", OPTYPE_BRANCH, FL_ENDBLOCK}}, - {257, &JitIL::crXX}, //"crand", OPTYPE_CR, FL_EVIL}}, - {129, &JitIL::crXX}, //"crandc", OPTYPE_CR, FL_EVIL}}, - {289, &JitIL::crXX}, //"creqv", OPTYPE_CR, FL_EVIL}}, - {225, &JitIL::crXX}, //"crnand", OPTYPE_CR, FL_EVIL}}, - {33, &JitIL::crXX}, //"crnor", OPTYPE_CR, FL_EVIL}}, - {449, &JitIL::crXX}, //"cror", OPTYPE_CR, FL_EVIL}}, - {417, &JitIL::crXX}, //"crorc", OPTYPE_CR, FL_EVIL}}, - {193, &JitIL::crXX}, //"crxor", OPTYPE_CR, FL_EVIL}}, - - {150, &JitIL::DoNothing}, //"isync", OPTYPE_ICACHE, FL_EVIL}}, - {0, &JitIL::mcrf}, //"mcrf", OPTYPE_SYSTEM, FL_EVIL}}, - - {50, &JitIL::rfi}, //"rfi", OPTYPE_SYSTEM, FL_ENDBLOCK | FL_CHECKEXCEPTIONS, 1}}, -}; - -const GekkoOPTemplate table31[] = { - {266, &JitIL::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {778, &JitIL::addx}, //"addox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {10, &JitIL::FallBackToInterpreter}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | - // FL_SET_CA | FL_RC_BIT}}, - {522, &JitIL::FallBackToInterpreter}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | - // FL_SET_CA | FL_RC_BIT}}, - {138, &JitIL::addex}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA - //| FL_RC_BIT}}, - {650, &JitIL::addex}, //"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA - //| FL_RC_BIT}}, - {234, &JitIL::FallBackToInterpreter}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | - // FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {746, &JitIL::FallBackToInterpreter}, //"addmeox" - {202, &JitIL::addzex}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | - // FL_SET_CA | FL_RC_BIT}}, - {714, &JitIL::addzex}, //"addzeox" - {491, &JitIL::FallBackToInterpreter}, //"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | - // FL_RC_BIT, 39}}, - {1003, &JitIL::FallBackToInterpreter}, //"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | - // FL_RC_BIT, 39}}, - {459, &JitIL::divwux}, //"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, - {971, &JitIL::divwux}, //"divwuox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, - {75, &JitIL::FallBackToInterpreter}, //"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | - // FL_RC_BIT, 4}}, - {11, &JitIL::mulhwux}, //"mulhwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, - {235, &JitIL::mullwx}, //"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, - {747, &JitIL::mullwx}, //"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, - {104, &JitIL::negx}, //"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {616, &JitIL::negx}, //"negox" - {40, &JitIL::subfx}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {552, &JitIL::subfx}, //"subfox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {8, - &JitIL::subfcx}, //"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, - {520, - &JitIL::subfcx}, //"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, - {136, &JitIL::subfex}, //"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | - // FL_SET_CA | FL_RC_BIT}}, - {648, &JitIL::subfex}, //"subfeox" - {232, &JitIL::FallBackToInterpreter}, //"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | - // FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {744, &JitIL::FallBackToInterpreter}, //"subfmeox" - {200, &JitIL::FallBackToInterpreter}, //"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | - // FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {712, &JitIL::FallBackToInterpreter}, //"subfzeox" - - {28, &JitIL::boolX}, //"andx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {60, &JitIL::boolX}, //"andcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {444, &JitIL::boolX}, //"orx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {124, &JitIL::boolX}, //"norx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {316, &JitIL::boolX}, //"xorx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {412, &JitIL::boolX}, //"orcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {476, &JitIL::boolX}, //"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {284, &JitIL::boolX}, //"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {0, &JitIL::cmpXX}, //"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, - {32, &JitIL::cmpXX}, //"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, - {26, &JitIL::cntlzwx}, //"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, - {922, &JitIL::extshx}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, - {954, &JitIL::extsbx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, - {536, &JitIL::srwx}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, - {792, &JitIL::srawx}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, - {824, &JitIL::srawix}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, - {24, &JitIL::slwx}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, - - {54, &JitIL::dcbst}, //"dcbst", OPTYPE_DCACHE, 0, 4}}, - {86, &JitIL::FallBackToInterpreter}, //"dcbf", OPTYPE_DCACHE, 0, 4}}, - {246, &JitIL::DoNothing}, //"dcbtst", OPTYPE_DCACHE, 0, 1}}, - {278, &JitIL::DoNothing}, //"dcbt", OPTYPE_DCACHE, 0, 1}}, - {470, &JitIL::FallBackToInterpreter}, //"dcbi", OPTYPE_DCACHE, 0, 4}}, - {758, &JitIL::DoNothing}, //"dcba", OPTYPE_DCACHE, 0, 4}}, - {1014, &JitIL::dcbz}, //"dcbz", OPTYPE_DCACHE, 0, 4}}, - - // load word - {23, &JitIL::lXzx}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {55, &JitIL::lXzx}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, - - // load halfword - {279, &JitIL::lXzx}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {311, &JitIL::lXzx}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, - - // load halfword signextend - {343, &JitIL::lhax}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {375, &JitIL::lhaux}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, - - // load byte - {87, &JitIL::lXzx}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {119, &JitIL::lXzx}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, - - // load byte reverse - {534, &JitIL::FallBackToInterpreter}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {790, &JitIL::FallBackToInterpreter}, //"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - - // Conditional load/store (Wii SMP) - {150, &JitIL::FallBackToInterpreter}, //"stwcxd", OPTYPE_STORE, FL_EVIL | FL_SET_CR0}}, - {20, &JitIL::FallBackToInterpreter}, //"lwarx", OPTYPE_LOAD, FL_EVIL | FL_OUT_D | FL_IN_A0B | - // FL_SET_CR0}}, - - // load string (interpret these) - {533, &JitIL::FallBackToInterpreter}, //"lswx", OPTYPE_LOAD, FL_EVIL | FL_IN_A | FL_OUT_D}}, - {597, &JitIL::FallBackToInterpreter}, //"lswi", OPTYPE_LOAD, FL_EVIL | FL_IN_AB | FL_OUT_D}}, - - // store word - {151, &JitIL::stXx}, //"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {183, &JitIL::stXx}, //"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, - - // store halfword - {407, &JitIL::stXx}, //"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {439, &JitIL::stXx}, //"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, - - // store byte - {215, &JitIL::stXx}, //"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {247, &JitIL::stXx}, //"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, - - // store bytereverse - {662, &JitIL::FallBackToInterpreter}, //"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {918, &JitIL::FallBackToInterpreter}, //"sthbrx", OPTYPE_STORE, FL_IN_A | FL_IN_B}}, - - {661, &JitIL::FallBackToInterpreter}, //"stswx", OPTYPE_STORE, FL_EVIL}}, - {725, &JitIL::FallBackToInterpreter}, //"stswi", OPTYPE_STORE, FL_EVIL}}, - - // fp load/store - {535, &JitIL::lfsx}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, - {567, &JitIL::FallBackToInterpreter}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, - {599, &JitIL::FallBackToInterpreter}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, - {631, &JitIL::FallBackToInterpreter}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, - - {663, &JitIL::stfsx}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, - {695, &JitIL::FallBackToInterpreter}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, - {727, &JitIL::FallBackToInterpreter}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, - {759, &JitIL::FallBackToInterpreter}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, - {983, &JitIL::FallBackToInterpreter}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, - - {19, &JitIL::mfcr}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}}, - {83, &JitIL::mfmsr}, //"mfmsr", OPTYPE_SYSTEM, FL_OUT_D}}, - {144, &JitIL::mtcrf}, //"mtcrf", OPTYPE_SYSTEM, 0}}, - {146, &JitIL::mtmsr}, //"mtmsr", OPTYPE_SYSTEM, FL_ENDBLOCK}}, - {210, &JitIL::FallBackToInterpreter}, //"mtsr", OPTYPE_SYSTEM, 0}}, - {242, &JitIL::FallBackToInterpreter}, //"mtsrin", OPTYPE_SYSTEM, 0}}, - {339, &JitIL::mfspr}, //"mfspr", OPTYPE_SPR, FL_OUT_D}}, - {467, &JitIL::mtspr}, //"mtspr", OPTYPE_SPR, 0, 2}}, - {371, &JitIL::mftb}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}}, - {512, &JitIL::FallBackToInterpreter}, //"mcrxr", OPTYPE_SYSTEM, 0}}, - {595, &JitIL::FallBackToInterpreter}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}}, - {659, &JitIL::FallBackToInterpreter}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}}, - - {4, &JitIL::FallBackToInterpreter}, //"tw", OPTYPE_SYSTEM, 0, 1}}, - {598, &JitIL::DoNothing}, //"sync", OPTYPE_SYSTEM, 0, 2}}, - {982, &JitIL::icbi}, //"icbi", OPTYPE_SYSTEM, FL_ENDBLOCK, 3}}, - - // Unused instructions on GC - {310, &JitIL::FallBackToInterpreter}, //"eciwx", OPTYPE_INTEGER, FL_RC_BIT}}, - {438, &JitIL::FallBackToInterpreter}, //"ecowx", OPTYPE_INTEGER, FL_RC_BIT}}, - {854, &JitIL::DoNothing}, //"eieio", OPTYPE_INTEGER, FL_RC_BIT}}, - {306, &JitIL::FallBackToInterpreter}, //"tlbie", OPTYPE_SYSTEM, 0}}, - {566, &JitIL::DoNothing}, //"tlbsync", OPTYPE_SYSTEM, 0}}, -}; - -const GekkoOPTemplate table59[] = { - {18, &JitIL::FallBackToInterpreter}, //{"fdivsx", OPTYPE_FPU, FL_RC_BIT_F, 16}}, - {20, &JitIL::fp_arith_s}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {21, &JitIL::fp_arith_s}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {24, &JitIL::FallBackToInterpreter}, //"fresx", OPTYPE_FPU, FL_RC_BIT_F}}, - {25, &JitIL::fp_arith_s}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {28, &JitIL::fmaddXX}, //"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {29, &JitIL::fmaddXX}, //"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {30, &JitIL::fmaddXX}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {31, &JitIL::fmaddXX}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, -}; - -const GekkoOPTemplate table63[] = { - {264, &JitIL::fsign}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {32, &JitIL::fcmpX}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}}, - {0, &JitIL::fcmpX}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}}, - {14, &JitIL::FallBackToInterpreter}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}}, - {15, &JitIL::FallBackToInterpreter}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}}, - {72, &JitIL::fmrx}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}}, - {136, &JitIL::fsign}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {40, &JitIL::fsign}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}}, - {12, &JitIL::FallBackToInterpreter}, //"frspx", OPTYPE_FPU, FL_RC_BIT_F}}, - - {64, &JitIL::FallBackToInterpreter}, //"mcrfs", OPTYPE_SYSTEMFP, 0}}, - {583, &JitIL::FallBackToInterpreter}, //"mffsx", OPTYPE_SYSTEMFP, 0}}, - {70, &JitIL::FallBackToInterpreter}, //"mtfsb0x", OPTYPE_SYSTEMFP, 0, 2}}, - {38, &JitIL::FallBackToInterpreter}, //"mtfsb1x", OPTYPE_SYSTEMFP, 0, 2}}, - {134, &JitIL::FallBackToInterpreter}, //"mtfsfix", OPTYPE_SYSTEMFP, 0, 2}}, - {711, &JitIL::FallBackToInterpreter}, //"mtfsfx", OPTYPE_SYSTEMFP, 0, 2}}, -}; - -const GekkoOPTemplate table63_2[] = { - {18, &JitIL::FallBackToInterpreter}, //"fdivx", OPTYPE_FPU, FL_RC_BIT_F, 30}}, - {20, &JitIL::FallBackToInterpreter}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}}, - {21, &JitIL::FallBackToInterpreter}, //"faddx", OPTYPE_FPU, FL_RC_BIT_F}}, - {23, &JitIL::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}}, - {25, &JitIL::fp_arith_s}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}}, - {26, &JitIL::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, - {28, &JitIL::fmaddXX}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, - {29, &JitIL::fmaddXX}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, - {30, &JitIL::fmaddXX}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, - {31, &JitIL::fmaddXX}, //"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, -}; - -void JitIL::CompileInstruction(PPCAnalyst::CodeOp& op) -{ - (this->*dynaOpTable[op.inst.OPCD])(op.inst); - - GekkoOPInfo* info = op.opinfo; - if (info) - { -#ifdef OPLOG - if (!strcmp(info->opname, OP_TO_LOG)) // "mcrfs" - { - rsplocations.push_back(js.compilerPC); - } -#endif - info->compileCount++; - info->lastUse = js.compilerPC; - } - else - { - PanicAlert("Tried to compile illegal (or unknown) instruction %08x, at %08x", op.inst.hex, - js.compilerPC); - } -} - -void JitIL::InitializeInstructionTables() -{ - // once initialized, tables are read-only - static bool initialized = false; - if (initialized) - return; - - // clear - for (auto& tpl : dynaOpTable) - { - tpl = &JitIL::FallBackToInterpreter; - } - - for (auto& tpl : dynaOpTable59) - { - tpl = &JitIL::FallBackToInterpreter; - } - - for (int i = 0; i < 1024; i++) - { - dynaOpTable4[i] = &JitIL::FallBackToInterpreter; - dynaOpTable19[i] = &JitIL::FallBackToInterpreter; - dynaOpTable31[i] = &JitIL::FallBackToInterpreter; - dynaOpTable63[i] = &JitIL::FallBackToInterpreter; - } - - for (const auto& tpl : primarytable) - { - dynaOpTable[tpl.opcode] = tpl.Inst; - } - - for (int i = 0; i < 32; i++) - { - int fill = i << 5; - for (const auto& tpl : table4_2) - { - int op = fill + tpl.opcode; - dynaOpTable4[op] = tpl.Inst; - } - } - - for (int i = 0; i < 16; i++) - { - int fill = i << 6; - for (const auto& tpl : table4_3) - { - int op = fill + tpl.opcode; - dynaOpTable4[op] = tpl.Inst; - } - } - - for (const auto& tpl : table4) - { - int op = tpl.opcode; - dynaOpTable4[op] = tpl.Inst; - } - - for (const auto& tpl : table31) - { - int op = tpl.opcode; - dynaOpTable31[op] = tpl.Inst; - } - - for (const auto& tpl : table19) - { - int op = tpl.opcode; - dynaOpTable19[op] = tpl.Inst; - } - - for (const auto& tpl : table59) - { - int op = tpl.opcode; - dynaOpTable59[op] = tpl.Inst; - } - - for (const auto& tpl : table63) - { - int op = tpl.opcode; - dynaOpTable63[op] = tpl.Inst; - } - - for (int i = 0; i < 32; i++) - { - int fill = i << 5; - for (const auto& tpl : table63_2) - { - int op = fill + tpl.opcode; - dynaOpTable63[op] = tpl.Inst; - } - } - - initialized = true; -} diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp deleted file mode 100644 index 62327b18e8..0000000000 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp +++ /dev/null @@ -1,1817 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -/* - -IR implementation comments: -This file implements code generation for a new IR-based JIT. The idea of -the IR is that as much as possible, it strips away the complexities -of the PPC instruction set into a simpler instruction set. In its current -form, the semantics are very simple: each instruction does its calculation -and performs its side effects in order. For an instruction with a result, -the instruction also represents the returned value. This works quite -simply because jumps within a block are not allowed. - -The IR treats loads and stores to PPC registers as separate steps from actual -calculations. This allows the instruction set to be significantly simpler, -because one PPC instruction can be mapped to multiple IR instructions. It -also allows optimizing out dead register stores: this reduces register -pressure and allows dead code elimination to completely remove instructions -which produce unused values, or the carry flag of srawx. - -The actual IR representation uses a few tricks I picked up from nanojit: -each instruction is a single 32-bit integer, the operands are 8-bit offsets -back from the current instruction, and there's a special Tramp instruction -to reference operands that are too far away to reference otherwise. - -The first step of code generation is producing the IR; this is roughly -equivalent to all of code generation in the previous code. In addition -to storing the IR, some optimizations occur in this step: the primary -optimizations are that redundant register loads/stores are eliminated, -and constant-folding is done. - -The second step is a quick pass over the IL to figure out liveness: this -information is used both for dead code elimination and to find the last -use of an instruction, which is allowed to destroy the value. - -The third step is the actual code generation: this just goes through each -instruction and generates code. Dead code elimination works in this step, -by simply skipping unused instructions. The register allocator is a dumb, -greedy allocator: at the moment, it's really a bit too dumb, but it's -actually not as bad as it looks: unless a block is relatively long, spills -are rarely needed. EDX is used as a scratch register: requiring a scratch -register isn't ideal, but the register allocator is too dumb to handle -instructions that need a specific register at the moment. - -In addition to the optimizations that are deeply tied to the IR, -I've implemented one additional trick: fast memory for 32-bit machines. -This works off of the observation that loads and stores can be classified -at runtime: any particular load instruction will always load similar addresses, -and any store will store to similar addresses. Using this observation, every -block is JIT-ed twice: the first time, the block includes extra code to -instrument the loads. Then, at the end of the block, it jumps back into the JIT -to recompile itself. The second recompilation looks at the address of each load -and store, and bakes the information into the generated code. This allows removing -the overhead for both the mask and the branch normally required for loads on 32-bit -machines. This optimization isn't completely safe: it depends on a guarantee which -isn't made by the PPC instruction set. That said, it's reliable enough that games -work without any fallback, and it's a large performance boost. Also, if it turns -out it isn't completely reliable, we can use a solution using segments which is -similar to the 64-bit fast memory implementation. - -The speed with this JIT is better than I expected, but not at much as I hoped... -on the test I've been working on (which bounded by JIT performance and doesn't -use any floating-point), it's roughly 25% faster than the current JIT, with the -edge over the current JIT mostly due to the fast memory optimization. - -Update on perf: -I've been doing a bit more tweaking for a small perf improvement (in the -range of 5-10%). That said, it's getting to the point where I'm simply -not seeing potential for improvements to codegen, at least for long, -straightforward blocks. For one long block that's at the top of my samples, -I've managed to get the bloat% (number of instructions compared to PPC -equivalent) down to 225%, and I can't really see it going down much further. -It looks like the most promising paths to further improvement for pure -integer code are more aggresively combining blocks and dead condition -register elimination, which should be very helpful for small blocks. - -TODO (in no particular order): -- JIT for misc remaining FP instructions -- JIT for bcctrx -- Misc optimizations for FP instructions -- Inter-block dead register elimination; this seems likely to have large - performance benefits, although I'm not completely sure. - -- Inter-block inlining; also likely to have large performance benefits. - The tricky parts are deciding which blocks to inline, and that the - IR can't really deal with branches whose destination is in the - the middle of a generated block. - -- Specialized slw/srw/sraw; I think there are some tricks that could - have a non-trivial effect, and there are significantly shorter - implementations for 64-bit involving abusing 64-bit shifts. - 64-bit compat (it should only be a few tweaks to register allocation and the load/store code) - -- Scheduling to reduce register pressure: PowerPCcompilers like to push - uses far away from definitions, but it's rather unfriendly to modern - x86 processors, which are short on registers and extremely good at instruction reordering. - -- Common subexpression elimination -- Optimize load/store of sum using complex addressing (partially implemented) -- Loop optimizations (loop-carried registers, LICM) -- Code refactoring/cleanup - -- Investigate performance of the JIT itself; this doesn't affect - framerates significantly, but it does take a visible amount - of time for a complicated piece of code like a video decoder to compile. - -- Fix profiled loads/stores to work safely. On 32-bit, one solution is to - use a spare segment register, and expand the backpatch solution - to work in all the relevant situations. On 64-bit, the existing - fast memory solution should basically work. An alternative - would be to figure out a heuristic for what loads actually - vary their "type", and special-case them. - -*/ - -#ifdef _MSC_VER -#pragma warning( \ - disable : 4146) // unary minus operator applied to unsigned type, result still unsigned -#endif - -#include -#include -#include -#include -#include -#include -#include - -#include "Common/CommonTypes.h" -#include "Common/FileUtil.h" -#include "Common/StringUtil.h" -#include "Common/x64Emitter.h" -#include "Core/PowerPC/JitILCommon/IR.h" - -using namespace Gen; - -namespace IREmitter -{ -IRBuilder::IRBuilder() -{ - Reset(); -} - -void IRBuilder::Reset() -{ - InstList.clear(); - InstList.reserve(100000); - MarkUsed.clear(); - MarkUsed.reserve(100000); - - InvalidateCaches(); -} - -void IRBuilder::InvalidateCaches() -{ - GRegCache = {}; - GRegCacheStore = {}; - - FRegCache = {}; - FRegCacheStore = {}; - - CarryCache = nullptr; - CarryCacheStore = nullptr; - - CRCache = {}; - CRCacheStore = {}; - - CTRCache = nullptr; - CTRCacheStore = nullptr; -} - -InstLoc IRBuilder::EmitZeroOp(unsigned Opcode, unsigned extra = 0) -{ - InstLoc curIndex = InstList.data() + InstList.size(); - InstList.push_back(Opcode | (extra << 8)); - MarkUsed.push_back(false); - return curIndex; -} - -InstLoc IRBuilder::EmitUOp(unsigned Opcode, InstLoc Op1, unsigned extra) -{ - InstLoc curIndex = InstList.data() + InstList.size(); - unsigned backOp1 = (s32)(curIndex - 1 - Op1); - if (backOp1 >= 256) - { - InstList.push_back(Tramp | backOp1 << 8); - MarkUsed.push_back(false); - backOp1 = 0; - curIndex++; - } - - InstList.push_back(Opcode | (backOp1 << 8) | (extra << 16)); - MarkUsed.push_back(false); - return curIndex; -} - -InstLoc IRBuilder::EmitBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned extra) -{ - InstLoc curIndex = InstList.data() + InstList.size(); - unsigned backOp1 = (s32)(curIndex - 1 - Op1); - if (backOp1 >= 255) - { - InstList.push_back(Tramp | backOp1 << 8); - MarkUsed.push_back(false); - backOp1 = 0; - curIndex++; - } - - unsigned backOp2 = (s32)(curIndex - 1 - Op2); - if (backOp2 >= 256) - { - InstList.push_back(Tramp | backOp2 << 8); - MarkUsed.push_back(false); - backOp2 = 0; - backOp1++; - curIndex++; - } - - InstList.push_back(Opcode | (backOp1 << 8) | (backOp2 << 16) | (extra << 24)); - MarkUsed.push_back(false); - return curIndex; -} - -#if 0 -InstLoc IRBuilder::EmitTriOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, InstLoc Op3) -{ - InstLoc curIndex = InstList.data() + InstList.size(); - unsigned backOp1 = curIndex - 1 - Op1; - if (backOp1 >= 254) - { - InstList.push_back(Tramp | backOp1 << 8); - MarkUsed.push_back(false); - backOp1 = 0; - curIndex++; - } - - unsigned backOp2 = curIndex - 1 - Op2; - if (backOp2 >= 255) - { - InstList.push_back((Tramp | backOp2 << 8)); - MarkUsed.push_back(false); - backOp2 = 0; - backOp1++; - curIndex++; - } - - unsigned backOp3 = curIndex - 1 - Op3; - if (backOp3 >= 256) - { - InstList.push_back(Tramp | (backOp3 << 8)); - MarkUsed.push_back(false); - backOp3 = 0; - backOp2++; - backOp1++; - curIndex++; - } - - InstList.push_back(Opcode | (backOp1 << 8) | (backOp2 << 16) | (backOp3 << 24)); - MarkUsed.push_back(false); - return curIndex; -} -#endif - -unsigned IRBuilder::ComputeKnownZeroBits(InstLoc I) const -{ - switch (getOpcode(*I)) - { - case Load8: - return 0xFFFFFF00; - case Or: - return ComputeKnownZeroBits(getOp1(I)) & ComputeKnownZeroBits(getOp2(I)); - case And: - return ComputeKnownZeroBits(getOp1(I)) | ComputeKnownZeroBits(getOp2(I)); - case Shl: - if (isImm(*getOp2(I))) - { - unsigned samt = GetImmValue(getOp2(I)) & 31; - return (ComputeKnownZeroBits(getOp1(I)) << samt) | ~(-1U << samt); - } - return 0; - case Shrl: - if (isImm(*getOp2(I))) - { - unsigned samt = GetImmValue(getOp2(I)) & 31; - return (ComputeKnownZeroBits(getOp1(I)) >> samt) | ~(-1U >> samt); - } - return 0; - case Rol: - if (isImm(*getOp2(I))) - { - return _rotl(ComputeKnownZeroBits(getOp1(I)), GetImmValue(getOp2(I))); - } - default: - return 0; - } -} - -InstLoc IRBuilder::FoldZeroOp(unsigned Opcode, unsigned extra) -{ - if (Opcode == LoadGReg) - { - // Reg load folding: if we already loaded the value, - // load it again - if (!GRegCache[extra]) - GRegCache[extra] = EmitZeroOp(LoadGReg, extra); - return GRegCache[extra]; - } - else if (Opcode == LoadFReg) - { - // Reg load folding: if we already loaded the value, - // load it again - if (!FRegCache[extra]) - FRegCache[extra] = EmitZeroOp(LoadFReg, extra); - return FRegCache[extra]; - } - else if (Opcode == LoadFRegDENToZero) - { - FRegCacheStore[extra] = nullptr; // prevent previous store operation from zapping - FRegCache[extra] = EmitZeroOp(LoadFRegDENToZero, extra); - return FRegCache[extra]; - } - else if (Opcode == LoadCarry) - { - if (!CarryCache) - CarryCache = EmitZeroOp(LoadCarry, extra); - return CarryCache; - } - else if (Opcode == LoadCR) - { - if (!CRCache[extra]) - CRCache[extra] = EmitZeroOp(LoadCR, extra); - return CRCache[extra]; - } - else if (Opcode == LoadCTR) - { - if (!CTRCache) - CTRCache = EmitZeroOp(LoadCTR, extra); - return CTRCache; - } - - return EmitZeroOp(Opcode, extra); -} - -InstLoc IRBuilder::FoldUOp(unsigned Opcode, InstLoc Op1, unsigned extra) -{ - if (Opcode == StoreGReg) - { - // Reg store folding: save the value for load folding. - // If there's a previous store, zap it because it's dead. - GRegCache[extra] = Op1; - if (GRegCacheStore[extra]) - *GRegCacheStore[extra] = 0; - - GRegCacheStore[extra] = EmitUOp(StoreGReg, Op1, extra); - return GRegCacheStore[extra]; - } - else if (Opcode == StoreFReg) - { - FRegCache[extra] = Op1; - if (FRegCacheStore[extra]) - *FRegCacheStore[extra] = 0; - - FRegCacheStore[extra] = EmitUOp(StoreFReg, Op1, extra); - return FRegCacheStore[extra]; - } - else if (Opcode == StoreCarry) - { - CarryCache = Op1; - if (CarryCacheStore) - *CarryCacheStore = 0; - - CarryCacheStore = EmitUOp(StoreCarry, Op1, extra); - return CarryCacheStore; - } - else if (Opcode == StoreCR) - { - CRCache[extra] = Op1; - if (CRCacheStore[extra]) - *CRCacheStore[extra] = 0; - - CRCacheStore[extra] = EmitUOp(StoreCR, Op1, extra); - return CRCacheStore[extra]; - } - else if (Opcode == StoreCTR) - { - CTRCache = Op1; - if (CTRCacheStore) - *CTRCacheStore = 0; - - CTRCacheStore = EmitUOp(StoreCTR, Op1, extra); - return CTRCacheStore; - } - else if (Opcode == CompactMRegToPacked) - { - if (getOpcode(*Op1) == ExpandPackedToMReg) - return getOp1(Op1); - } - else if (Opcode == DoubleToSingle) - { - if (getOpcode(*Op1) == DupSingleToMReg) - return getOp1(Op1); - - if (getOpcode(*Op1) >= FDMul && getOpcode(*Op1) <= FDSub) - { - InstLoc OOp1 = getOp1(Op1), OOp2 = getOp2(Op1); - if (getOpcode(*OOp1) == DupSingleToMReg && getOpcode(*OOp2) == DupSingleToMReg) - { - if (getOpcode(*Op1) == FDMul) - return FoldBiOp(FSMul, getOp1(OOp1), getOp2(OOp2)); - else if (getOpcode(*Op1) == FDAdd) - return FoldBiOp(FSAdd, getOp1(OOp1), getOp2(OOp2)); - else if (getOpcode(*Op1) == FDSub) - return FoldBiOp(FSSub, getOp1(OOp1), getOp2(OOp2)); - } - } - } - else if (Opcode == Not) - { - if (getOpcode(*Op1) == Not) - { - return getOp1(Op1); - } - } - else if (Opcode == FastCRGTSet) - { - if (getOpcode(*Op1) == ICmpCRSigned) - return EmitICmpSgt(getOp1(Op1), getOp2(Op1)); - if (getOpcode(*Op1) == ICmpCRUnsigned) - return EmitICmpUgt(getOp1(Op1), getOp2(Op1)); - if (isImm(*Op1)) - return EmitIntConst((s64)GetImmValue64(Op1) > 0); - } - else if (Opcode == FastCRLTSet) - { - if (getOpcode(*Op1) == ICmpCRSigned) - return EmitICmpSlt(getOp1(Op1), getOp2(Op1)); - if (getOpcode(*Op1) == ICmpCRUnsigned) - return EmitICmpUlt(getOp1(Op1), getOp2(Op1)); - if (isImm(*Op1)) - return EmitIntConst(!!(GetImmValue64(Op1) & (1ull << 62))); - } - else if (Opcode == FastCREQSet) - { - if (getOpcode(*Op1) == ICmpCRSigned || getOpcode(*Op1) == ICmpCRUnsigned) - return EmitICmpEq(getOp1(Op1), getOp2(Op1)); - if (isImm(*Op1)) - return EmitIntConst((GetImmValue64(Op1) & 0xFFFFFFFFU) == 0); - } - - return EmitUOp(Opcode, Op1, extra); -} - -// Fold Add opcode. Some rules are ported from LLVM -InstLoc IRBuilder::FoldAdd(InstLoc Op1, InstLoc Op2) -{ - simplifyCommutative(Add, Op1, Op2); - - // i0 + i1 => (i0 + i1) - if (isImm(*Op1) && isImm(*Op2)) - { - return EmitIntConst(GetImmValue(Op1) + GetImmValue(Op2)); - } - - // x + 0 => x - if (isImm(*Op2) && GetImmValue(Op2) == 0) - { - return Op1; - } - - // x + (y - x) --> y - if (getOpcode(*Op2) == Sub && isSameValue(Op1, getOp2(Op2))) - { - return getOp1(Op2); - } - - // (x - y) + y => x - if (getOpcode(*Op1) == Sub && isSameValue(getOp2(Op1), Op2)) - { - return getOp1(Op1); - } - - if (InstLoc negOp1 = isNeg(Op1)) - { - //// TODO: Test the folding below - //// -A + -B --> -(A + B) - // if (InstLoc negOp2 = isNeg(Op2)) - //{ - // return FoldSub(EmitIntConst(0), FoldAdd(negOp1, negOp2)); - //} - - // -A + B --> B - A - return FoldSub(Op2, negOp1); - } - - // A + -B --> A - B - if (InstLoc negOp2 = isNeg(Op2)) - { - return FoldSub(Op1, negOp2); - } - - // (x * i0) + x => x * (i0 + 1) - if (getOpcode(*Op1) == Mul && isImm(*getOp2(Op1)) && isSameValue(getOp1(Op1), Op2)) - { - return FoldMul(getOp1(Op1), EmitIntConst(GetImmValue(getOp2(Op1)) + 1)); - } - - //// TODO: Test the folding below - //// (x * i0) + (x * i1) => x * (i0 + i1) - // if (getOpcode(*Op1) == Mul && getOpcode(*Op2) == Mul && isSameValue(getOp1(Op1), getOp1(Op2)) - // && isImm(*getOp2(Op1)) && isImm(*getOp2(Op2))) - //{ - // return FoldMul(getOp1(Op1), EmitIntConst(GetImmValue(getOp2(Op1)) + - // GetImmValue(getOp2(Op2)))); - //} - - // x + x * i0 => x * (i0 + 1) - if (getOpcode(*Op2) == Mul && isImm(*getOp2(Op2)) && isSameValue(Op1, getOp1(Op2))) - { - return FoldMul(Op1, EmitIntConst(GetImmValue(getOp2(Op2)) + 1)); - } - - // w * x + y * z => w * (x + z) iff w == y - if (getOpcode(*Op1) == Mul && getOpcode(*Op2) == Mul) - { - InstLoc w = getOp1(Op1); - InstLoc x = getOp2(Op1); - InstLoc y = getOp1(Op2); - InstLoc z = getOp2(Op2); - - if (!isSameValue(w, y)) - { - if (isSameValue(w, z)) - { - std::swap(y, z); - } - else if (isSameValue(y, x)) - { - std::swap(w, x); - } - else if (isSameValue(x, z)) - { - std::swap(y, z); - std::swap(w, x); - } - } - - if (isSameValue(w, y)) - { - return FoldMul(w, FoldAdd(x, z)); - } - } - - return EmitBiOp(Add, Op1, Op2); -} - -// Fold Sub opcode. Some rules are ported from LLVM -InstLoc IRBuilder::FoldSub(InstLoc Op1, InstLoc Op2) -{ - // (x - x) => 0 - if (isSameValue(Op1, Op2)) - { - return EmitIntConst(0); - } - - // x - (-A) => x + A - if (InstLoc negOp2 = isNeg(Op2)) - { - return FoldAdd(Op1, negOp2); - } - - // (x - i0) => x + -i0 - if (isImm(*Op2)) - { - return FoldAdd(Op1, EmitIntConst(-GetImmValue(Op2))); - } - - if (getOpcode(*Op2) == Add) - { - // x - (x + y) => -y - if (isSameValue(Op1, getOp1(Op2))) - { - return FoldSub(EmitIntConst(0), getOp2(Op2)); - } - - // x - (y + x) => -y - if (isSameValue(Op1, getOp2(Op2))) - { - return FoldSub(EmitIntConst(0), getOp1(Op2)); - } - - // i0 - (x + i1) => (i0 - i1) - x - if (isImm(*Op1) && isImm(*getOp2(Op2))) - { - return FoldSub(EmitIntConst(GetImmValue(Op1) - GetImmValue(getOp2(Op2))), getOp1(Op2)); - } - } - - //// TODO: Test the folding below - //// 0 - (C << X) -> (-C << X) - // if (isImm(*Op1) && GetImmValue(Op1) == 0 && getOpcode(*Op2) == Shl && isImm(*getOp1(Op2))) - //{ - // return FoldShl(EmitIntConst(-GetImmValue(getOp1(Op2))), getOp2(Op2)); - //} - - //// TODO: Test the folding below - //// x - x * i0 = x * (1 - i0) - // if (getOpcode(*Op2) == Mul && isImm(*getOp2(Op2)) && isSameValue(Op1, getOp1(Op2))) - //{ - // return FoldMul(Op1, EmitIntConst(1 - GetImmValue(getOp2(Op2)))); - //} - - if (getOpcode(*Op1) == Add) - { - // (x + y) - x => y - if (isSameValue(getOp1(Op1), Op2)) - { - return getOp2(Op1); - } - - // (x + y) - y => x - if (isSameValue(getOp2(Op1), Op2)) - { - return getOp1(Op1); - } - } - - // if (getOpcode(*Op1) == Sub) - //{ - // // TODO: Test the folding below - // // (x - y) - x => -y - // if (isSameValue(getOp1(Op1), Op2)) - // { - // return FoldSub(EmitIntConst(0), getOp2(Op1)); - // } - //} - - if (getOpcode(*Op1) == Mul) - { - // x * i0 - x => x * (i0 - 1) - if (isImm(*getOp2(Op1)) && isSameValue(getOp1(Op1), Op2)) - { - return FoldMul(getOp1(Op1), EmitIntConst(GetImmValue(getOp2(Op1)) - 1)); - } - - //// TODO: Test the folding below - //// x * i0 - x * i1 => x * (i0 - i1) - // if (getOpcode(*Op2) == Mul && isSameValue(getOp1(Op1), getOp1(Op2)) && isImm(*getOp2(Op1)) && - // isImm(*getOp2(Op2))) - //{ - // return FoldMul(getOp1(Op1), EmitIntConst(GetImmValue(getOp2(Op1)) + - // GetImmValue(getOp2(Op2)))); - //} - } - - // (x + i0) - (y + i1) => (x - y) + (i0 - i1) - if (getOpcode(*Op1) == Add && getOpcode(*Op2) == Add && isImm(*getOp2(Op1)) && - isImm(*getOp2(Op2))) - { - return FoldAdd(FoldSub(getOp1(Op1), getOp1(Op2)), - EmitIntConst(GetImmValue(getOp2(Op1)) - GetImmValue(getOp2(Op2)))); - } - - // w * x - y * z => w * (x - z) iff w == y - if (getOpcode(*Op1) == Mul && getOpcode(*Op2) == Mul) - { - InstLoc w = getOp1(Op1); - InstLoc x = getOp2(Op1); - InstLoc y = getOp1(Op2); - InstLoc z = getOp2(Op2); - - if (!isSameValue(w, y)) - { - if (isSameValue(w, z)) - { - std::swap(y, z); - } - else if (isSameValue(y, x)) - { - std::swap(w, x); - } - else if (isSameValue(x, z)) - { - std::swap(y, z); - std::swap(w, x); - } - } - - if (isSameValue(w, y)) - { - return FoldMul(w, FoldSub(x, z)); - } - } - - return EmitBiOp(Sub, Op1, Op2); -} - -// Fold Mul opcode. Some rules are ported from LLVM -InstLoc IRBuilder::FoldMul(InstLoc Op1, InstLoc Op2) -{ - simplifyCommutative(Mul, Op1, Op2); - - // i0 * i1 => (i0 * i1) - if (isImm(*Op1) && isImm(*Op2)) - { - return EmitIntConst(GetImmValue(Op1) * GetImmValue(Op2)); - } - - // (x << i0) * i1 => x * (i1 << i0) - if (getOpcode(*Op1) == Shl && isImm(*getOp2(Op1)) && isImm(*Op2)) - { - return FoldMul(getOp1(Op1), EmitIntConst(GetImmValue(Op2) << GetImmValue(getOp2(Op1)))); - } - - if (isImm(*Op2)) - { - const unsigned imm = GetImmValue(Op2); - - // x * 0 => 0 - if (imm == 0) - { - return EmitIntConst(0); - } - - // x * -1 => 0 - x - if (imm == -1U) - { - return FoldSub(EmitIntConst(0), Op1); - } - - for (unsigned i0 = 0; i0 < 30; ++i0) - { - // x * (1 << i0) => x << i0 - // One "shl" is faster than one "imul". - if (imm == (1U << i0)) - { - return FoldShl(Op1, EmitIntConst(i0)); - } - } - } - - // (x + i0) * i1 => x * i1 + i0 * i1 - // The later format can be folded by other rules, again. - if (getOpcode(*Op1) == Add && isImm(*getOp2(Op1)) && isImm(*Op2)) - { - return FoldAdd(FoldMul(getOp1(Op1), Op2), - EmitIntConst(GetImmValue(getOp2(Op1)) * GetImmValue(Op2))); - } - - //// TODO: Test the folding below - //// -X * -Y => X * Y - // if (InstLoc negOp1 = isNeg(Op1)) - //{ - // if (InstLoc negOp2 = isNeg(Op2)) - // { - // return FoldMul(negOp1, negOp2); - // } - //} - - //// TODO: Test the folding below - //// x * (1 << y) => x << y - // if (getOpcode(*Op2) == Shl && isImm(*getOp1(Op2)) && GetImmValue(getOp1(Op2)) == 1) - //{ - // return FoldShl(Op1, getOp2(Op2)); - //} - - //// TODO: Test the folding below - //// (1 << y) * x => x << y - // if (getOpcode(*Op1) == Shl && isImm(*getOp1(Op1)) && GetImmValue(getOp1(Op1)) == 1) - //{ - // return FoldShl(Op2, getOp2(Op1)); - //} - - // x * y (where y is 0 or 1) => (0 - y) & x - if (ComputeKnownZeroBits(Op2) == -2U) - { - return FoldAnd(FoldSub(EmitIntConst(0), Op2), Op1); - } - - // x * y (where y is 0 or 1) => (0 - x) & y - if (ComputeKnownZeroBits(Op1) == -2U) - { - return FoldAnd(FoldSub(EmitIntConst(0), Op1), Op2); - } - - return EmitBiOp(Mul, Op1, Op2); -} - -InstLoc IRBuilder::FoldMulHighUnsigned(InstLoc Op1, InstLoc Op2) -{ - // (i0 * i1) >> 32 - if (isImm(*Op1) && isImm(*Op2)) - { - return EmitIntConst((u32)(((u64)GetImmValue(Op1) * (u64)GetImmValue(Op2)) >> 32)); - } - - if (isImm(*Op1) && !isImm(*Op2)) - { - return FoldMulHighUnsigned(Op2, Op1); - } - - if (isImm(*Op2)) - { - const unsigned imm = GetImmValue(Op2); - - // (x * 0) >> 32 => 0 - if (imm == 0) - { - return EmitIntConst(0); - } - - for (unsigned i0 = 0; i0 < 30; ++i0) - { - // (x * (1 << i0)) => x >> (32 - i0) - // One "shl" is faster than one "imul". - if (imm == (1U << i0)) - { - return FoldShrl(Op1, EmitIntConst(32 - i0)); - } - } - } - - return EmitBiOp(MulHighUnsigned, Op1, Op2); -} - -InstLoc IRBuilder::FoldAnd(InstLoc Op1, InstLoc Op2) -{ - simplifyCommutative(And, Op1, Op2); - - if (isImm(*Op1) && isImm(*Op2)) - { - return EmitIntConst(GetImmValue(Op1) & GetImmValue(Op2)); - } - - if (isImm(*Op2)) - { - if (!GetImmValue(Op2)) - return EmitIntConst(0); - - if (GetImmValue(Op2) == -1U) - return Op1; - - if (getOpcode(*Op1) == And && isImm(*getOp2(Op1))) - { - unsigned RHS = GetImmValue(Op2) & GetImmValue(getOp2(Op1)); - return FoldAnd(getOp1(Op1), EmitIntConst(RHS)); - } - else if (getOpcode(*Op1) == Rol && isImm(*getOp2(Op1))) - { - unsigned shiftMask1 = -1U << (GetImmValue(getOp2(Op1)) & 31); - - if (GetImmValue(Op2) == shiftMask1) - return FoldShl(getOp1(Op1), getOp2(Op1)); - - unsigned shiftAmt2 = ((32 - GetImmValue(getOp2(Op1))) & 31); - unsigned shiftMask2 = -1U >> shiftAmt2; - - if (GetImmValue(Op2) == shiftMask2) - { - return FoldShrl(getOp1(Op1), EmitIntConst(shiftAmt2)); - } - } - - if (!(~ComputeKnownZeroBits(Op1) & ~GetImmValue(Op2))) - { - return Op1; - } - - // if (getOpcode(*Op1) == Xor || getOpcode(*Op1) == Or) - //{ - // // TODO: Test the folding below - // // (x op y) & z => (x & z) op y if (y & z) == 0 - // if ((~ComputeKnownZeroBits(getOp2(Op1)) & ~ComputeKnownZeroBits(Op2)) == 0) - // { - // return FoldBiOp(getOpcode(*Op1), FoldAnd(getOp1(Op1), Op2), getOp2(Op1)); - // } - - // // TODO: Test the folding below - // // (x op y) & z => (y & z) op x if (x & z) == 0 - // if ((~ComputeKnownZeroBits(getOp1(Op1)) & ~ComputeKnownZeroBits(Op2)) == 0) - // { - // return FoldBiOp(getOpcode(*Op1), FoldAnd(getOp2(Op1), Op2), getOp1(Op1)); - // } - //} - } - - //// TODO: Test the folding below - //// (x >> z) & (y >> z) => (x & y) >> z - // if (getOpcode(*Op1) == Shrl && getOpcode(*Op2) == Shrl && isSameValue(getOp2(Op1), - // getOp2(Op2))) - //{ - // return FoldShl(FoldAnd(getOp1(Op1), getOp2(Op1)), getOp2(Op1)); - //} - - //// TODO: Test the folding below - //// ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 - //// ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 - //// ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 - //// ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 - // if ((getOpcode(*Op1) == Add || getOpcode(*Op1) == Sub) && - // (getOpcode(*getOp1(Op1)) == Or || getOpcode(*getOp1(Op1)) == Xor)) - //{ - // const InstLoc A = getOp1(getOp1(Op1)); - // const InstLoc N = getOp2(getOp1(Op1)); - // const InstLoc B = getOp2(Op1); - // const InstLoc AndRHS = Op2; - // if ((~ComputeKnownZeroBits(N) & ~ComputeKnownZeroBits(AndRHS)) == 0) - // { - // return FoldAnd(FoldBiOp(getOpcode(*Op1), A, B), AndRHS); - // } - //} - - //// TODO: Test the folding below - //// (~A & ~B) == (~(A | B)) - De Morgan's Law - // if (InstLoc notOp1 = isNot(Op1)) - //{ - // if (InstLoc notOp2 = isNot(Op2)) - // { - // return FoldXor(EmitIntConst(-1U), FoldOr(notOp1, notOp2)); - // } - //} - - //// TODO: Test the folding below - //// (X^C)|Y -> (X|Y)^C iff Y&C == 0 - // if (getOpcode(*Op1) == Xor && isImm(*getOp2(Op1)) && (~ComputeKnownZeroBits(Op2) & - // GetImmValue(getOp2(Op1))) == 0) - //{ - // return FoldXor(FoldOr(getOp1(Op1), Op2), getOp2(Op1)); - //} - - if (Op1 == Op2) - return Op1; - - return EmitBiOp(And, Op1, Op2); -} - -InstLoc IRBuilder::FoldOr(InstLoc Op1, InstLoc Op2) -{ - simplifyCommutative(Or, Op1, Op2); - - if (isImm(*Op1) && isImm(*Op2)) - { - return EmitIntConst(GetImmValue(Op1) | GetImmValue(Op2)); - } - - if (isImm(*Op2)) - { - if (!GetImmValue(Op2)) - return Op1; - - if (GetImmValue(Op2) == -1U) - return EmitIntConst(-1U); - - if (getOpcode(*Op1) == Or && isImm(*getOp2(Op1))) - { - unsigned RHS = GetImmValue(Op2) | GetImmValue(getOp2(Op1)); - - return FoldOr(getOp1(Op1), EmitIntConst(RHS)); - } - - // (X & C1) | C2 --> (X | C2) & (C1|C2) - // iff (C1 & C2) == 0. - if (getOpcode(*Op1) == And && isImm(*getOp2(Op1)) && - (GetImmValue(getOp2(Op1)) & GetImmValue(Op2)) == 0) - { - return FoldAnd(FoldOr(getOp1(Op1), Op2), - EmitIntConst(GetImmValue(getOp2(Op1)) | GetImmValue(Op2))); - } - - // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) - if (getOpcode(*Op1) == Xor && isImm(*getOp2(Op1)) && isImm(*Op2)) - { - return FoldXor(FoldOr(getOp1(Op1), Op2), - EmitIntConst(GetImmValue(getOp2(Op1)) & ~GetImmValue(Op2))); - } - } - - // (~A | ~B) == (~(A & B)) - De Morgan's Law - if (getOpcode(*Op1) == Not && getOpcode(*Op2) == Not) - { - return EmitNot(FoldAnd(getOp1(Op1), getOp1(Op2))); - } - - if (Op1 == Op2) - return Op1; - - return EmitBiOp(Or, Op1, Op2); -} - -static unsigned ICmpInverseOp(unsigned op) -{ - switch (op) - { - case ICmpEq: - return ICmpNe; - case ICmpNe: - return ICmpEq; - case ICmpUlt: - return ICmpUge; - case ICmpUgt: - return ICmpUle; - case ICmpUle: - return ICmpUgt; - case ICmpUge: - return ICmpUlt; - case ICmpSlt: - return ICmpSge; - case ICmpSgt: - return ICmpSle; - case ICmpSle: - return ICmpSgt; - case ICmpSge: - return ICmpSlt; - default: - PanicAlert("Bad opcode"); - return Nop; - } -} - -InstLoc IRBuilder::FoldXor(InstLoc Op1, InstLoc Op2) -{ - simplifyCommutative(Xor, Op1, Op2); - - if (isImm(*Op1) && isImm(*Op2)) - { - return EmitIntConst(GetImmValue(Op1) ^ GetImmValue(Op2)); - } - - if (isImm(*Op2)) - { - if (!GetImmValue(Op2)) - return Op1; - - if (GetImmValue(Op2) == 0xFFFFFFFFU) - { - return EmitNot(Op1); - } - - if (getOpcode(*Op1) == Xor && isImm(*getOp2(Op1))) - { - unsigned RHS = GetImmValue(Op2) ^ GetImmValue(getOp2(Op1)); - return FoldXor(getOp1(Op1), EmitIntConst(RHS)); - } - - if (isICmp(getOpcode(*Op1)) && GetImmValue(Op2) == 1) - { - return FoldBiOp(ICmpInverseOp(getOpcode(*Op1)), getOp1(Op1), getOp2(Op1)); - } - } - - if (Op1 == Op2) - return EmitIntConst(0); - - return EmitBiOp(Xor, Op1, Op2); -} - -InstLoc IRBuilder::FoldShl(InstLoc Op1, InstLoc Op2) -{ - if (isImm(*Op2)) - { - // Shl x 0 => x - if (!GetImmValue(Op2)) - { - return Op1; - } - - if (isImm(*Op1)) - return EmitIntConst(GetImmValue(Op1) << (GetImmValue(Op2) & 31)); - - // ((x * i0) << i1) == x * (i0 << i1) - if (getOpcode(*Op1) == Mul && isImm(*getOp2(Op1))) - { - return FoldMul(getOp1(Op1), EmitIntConst(GetImmValue(getOp2(Op1)) << GetImmValue(Op2))); - } - } - - // 0 << x => 0 - if (isImm(*Op1) && GetImmValue(Op1) == 0) - { - return EmitIntConst(0); - } - - return EmitBiOp(Shl, Op1, Op2); -} - -InstLoc IRBuilder::FoldShrl(InstLoc Op1, InstLoc Op2) -{ - if (isImm(*Op1) && isImm(*Op2)) - { - return EmitIntConst(GetImmValue(Op1) >> (GetImmValue(Op2) & 31)); - } - - return EmitBiOp(Shrl, Op1, Op2); -} - -InstLoc IRBuilder::FoldRol(InstLoc Op1, InstLoc Op2) -{ - if (isImm(*Op2)) - { - if (isImm(*Op1)) - return EmitIntConst(_rotl(GetImmValue(Op1), GetImmValue(Op2))); - - if (!(GetImmValue(Op2) & 31)) - return Op1; - } - return EmitBiOp(Rol, Op1, Op2); -} - -InstLoc IRBuilder::FoldBranchCond(InstLoc Op1, InstLoc Op2) -{ - if (isImm(*Op1)) - { - if (GetImmValue(Op1)) - return EmitBranchUncond(Op2); - - return nullptr; - } - - return EmitBiOp(BranchCond, Op1, Op2); -} - -InstLoc IRBuilder::FoldICmp(unsigned Opcode, InstLoc Op1, InstLoc Op2) -{ - if (isImm(*Op1)) - { - if (isImm(*Op2)) - { - unsigned result = 0; - switch (Opcode) - { - case ICmpEq: - result = GetImmValue(Op1) == GetImmValue(Op2); - break; - case ICmpNe: - result = GetImmValue(Op1) != GetImmValue(Op2); - break; - case ICmpUgt: - result = GetImmValue(Op1) > GetImmValue(Op2); - break; - case ICmpUlt: - result = GetImmValue(Op1) < GetImmValue(Op2); - break; - case ICmpUge: - result = GetImmValue(Op1) >= GetImmValue(Op2); - break; - case ICmpUle: - result = GetImmValue(Op1) <= GetImmValue(Op2); - break; - case ICmpSgt: - result = (signed)GetImmValue(Op1) > (signed)GetImmValue(Op2); - break; - case ICmpSlt: - result = (signed)GetImmValue(Op1) < (signed)GetImmValue(Op2); - break; - case ICmpSge: - result = (signed)GetImmValue(Op1) >= (signed)GetImmValue(Op2); - break; - case ICmpSle: - result = (signed)GetImmValue(Op1) <= (signed)GetImmValue(Op2); - break; - } - return EmitIntConst(result); - } - switch (Opcode) - { - case ICmpEq: - return FoldICmp(ICmpEq, Op2, Op1); - case ICmpNe: - return FoldICmp(ICmpNe, Op2, Op1); - case ICmpUlt: - return FoldICmp(ICmpUgt, Op2, Op1); - case ICmpUgt: - return FoldICmp(ICmpUlt, Op2, Op1); - case ICmpUle: - return FoldICmp(ICmpUge, Op2, Op1); - case ICmpUge: - return FoldICmp(ICmpUle, Op2, Op1); - case ICmpSlt: - return FoldICmp(ICmpSgt, Op2, Op1); - case ICmpSgt: - return FoldICmp(ICmpSlt, Op2, Op1); - case ICmpSle: - return FoldICmp(ICmpSge, Op2, Op1); - case ICmpSge: - return FoldICmp(ICmpSle, Op2, Op1); - } - } - - return EmitBiOp(Opcode, Op1, Op2); -} - -InstLoc IRBuilder::FoldICmpCRSigned(InstLoc Op1, InstLoc Op2) -{ - if (isImm(*Op1) && isImm(*Op2)) - { - s64 diff = (s64)(s32)GetImmValue(Op1) - (s64)(s32)GetImmValue(Op2); - return EmitIntConst64((u64)diff); - } - - return EmitBiOp(ICmpCRSigned, Op1, Op2); -} - -InstLoc IRBuilder::FoldICmpCRUnsigned(InstLoc Op1, InstLoc Op2) -{ - if (isImm(*Op1) && isImm(*Op2)) - { - u64 diff = (u64)GetImmValue(Op1) - (u64)GetImmValue(Op2); - return EmitIntConst64(diff); - } - - return EmitBiOp(ICmpCRUnsigned, Op1, Op2); -} - -InstLoc IRBuilder::FoldFallBackToInterpreter(InstLoc Op1, InstLoc Op2) -{ - InvalidateCaches(); - return EmitBiOp(FallBackToInterpreter, Op1, Op2); -} - -InstLoc IRBuilder::FoldDoubleBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2) -{ - if (getOpcode(*Op1) == InsertDoubleInMReg) - { - return FoldDoubleBiOp(Opcode, getOp1(Op1), Op2); - } - - if (getOpcode(*Op2) == InsertDoubleInMReg) - { - return FoldDoubleBiOp(Opcode, Op1, getOp1(Op2)); - } - - return EmitBiOp(Opcode, Op1, Op2); -} - -InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned extra) -{ - switch (Opcode) - { - case Add: - return FoldAdd(Op1, Op2); - case Sub: - return FoldSub(Op1, Op2); - case Mul: - return FoldMul(Op1, Op2); - case MulHighUnsigned: - return FoldMulHighUnsigned(Op1, Op2); - case And: - return FoldAnd(Op1, Op2); - case Or: - return FoldOr(Op1, Op2); - case Xor: - return FoldXor(Op1, Op2); - case Shl: - return FoldShl(Op1, Op2); - case Shrl: - return FoldShrl(Op1, Op2); - case Rol: - return FoldRol(Op1, Op2); - case BranchCond: - return FoldBranchCond(Op1, Op2); - case ICmpEq: - case ICmpNe: - case ICmpUgt: - case ICmpUlt: - case ICmpUge: - case ICmpUle: - case ICmpSgt: - case ICmpSlt: - case ICmpSge: - case ICmpSle: - return FoldICmp(Opcode, Op1, Op2); - case ICmpCRSigned: - return FoldICmpCRSigned(Op1, Op2); - case ICmpCRUnsigned: - return FoldICmpCRUnsigned(Op1, Op2); - case FallBackToInterpreter: - return FoldFallBackToInterpreter(Op1, Op2); - case FDMul: - case FDAdd: - case FDSub: - return FoldDoubleBiOp(Opcode, Op1, Op2); - default: - return EmitBiOp(Opcode, Op1, Op2, extra); - } -} - -InstLoc IRBuilder::EmitIntConst64(u64 value) -{ - InstLoc curIndex = InstList.data() + InstList.size(); - InstList.push_back(CInt32 | ((unsigned int)ConstList.size() << 8)); - MarkUsed.push_back(false); - ConstList.push_back(value); - return curIndex; -} - -u64 IRBuilder::GetImmValue64(InstLoc I) const -{ - return ConstList[*I >> 8]; -} - -void IRBuilder::SetMarkUsed(InstLoc I) -{ - const unsigned i = (unsigned)(I - InstList.data()); - MarkUsed[i] = true; -} - -bool IRBuilder::IsMarkUsed(InstLoc I) const -{ - const unsigned i = (unsigned)(I - InstList.data()); - return MarkUsed[i]; -} - -bool IRBuilder::isSameValue(InstLoc Op1, InstLoc Op2) const -{ - if (Op1 == Op2) - { - return true; - } - - if (isImm(*Op1) && isImm(*Op2) && GetImmValue(Op1) == GetImmValue(Op2)) - { - return true; - } - - if (getNumberOfOperands(Op1) == 2 && getOpcode(*Op1) != StorePaired && - getOpcode(*Op1) == getOpcode(*Op2) && isSameValue(getOp1(Op1), getOp1(Op2)) && - isSameValue(getOp2(Op1), getOp2(Op2))) - { - return true; - } - - return false; -} - -// Assign a complexity or rank value to Inst -// Ported from InstructionCombining.cpp in LLVM -// 0 -> undef -// 1 -> Nop, Const -// 2 -> ZeroOp -// 3 -> UOp -// 4 -> BiOp -unsigned IRBuilder::getComplexity(InstLoc I) const -{ - const unsigned Opcode = getOpcode(*I); - if (Opcode == Nop || Opcode == CInt16 || Opcode == CInt32) - { - return 1; - } - - const unsigned numberOfOperands = getNumberOfOperands(I); - if (numberOfOperands == -1U) - { - return 0; - } - - return numberOfOperands + 2; -} - -unsigned IRBuilder::getNumberOfOperands(InstLoc I) const -{ - static std::array number_of_operands; - static bool initialized = false; - if (!initialized) - { - initialized = true; - - number_of_operands.fill(0xFFFFFFFF); - number_of_operands[Nop] = 0; - number_of_operands[CInt16] = 0; - number_of_operands[CInt32] = 0; - - static constexpr std::array zero_op = {{ - LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, - LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, - }}; - static constexpr std::array unary_op = {{ - StoreLink, - BranchUncond, - StoreCR, - StoreMSR, - StoreFPRF, - StoreGReg, - StoreCTR, - Load8, - Load16, - Load32, - SExt16, - SExt8, - Cntlzw, - Not, - StoreCarry, - SystemCall, - ShortIdleLoop, - LoadSingle, - LoadDouble, - LoadPaired, - StoreFReg, - DupSingleToMReg, - DupSingleToPacked, - ExpandPackedToMReg, - CompactMRegToPacked, - FSNeg, - FDNeg, - FPDup0, - FPDup1, - FPNeg, - DoubleToSingle, - StoreGQR, - StoreSRR, - ConvertFromFastCR, - ConvertToFastCR, - FastCRSOSet, - FastCREQSet, - FastCRGTSet, - FastCRLTSet, - }}; - static constexpr std::array binary_op = {{ - BranchCond, - IdleBranch, - And, - Xor, - Sub, - Or, - Add, - Mul, - Rol, - Shl, - Shrl, - Sarl, - ICmpEq, - ICmpNe, - ICmpUgt, - ICmpUlt, - ICmpSgt, - ICmpSlt, - ICmpSge, - ICmpSle, - Store8, - Store16, - Store32, - ICmpCRSigned, - ICmpCRUnsigned, - FallBackToInterpreter, - StoreSingle, - StoreDouble, - StorePaired, - InsertDoubleInMReg, - FSMul, - FSAdd, - FSSub, - FDMul, - FDAdd, - FDSub, - FPAdd, - FPMul, - FPSub, - FPMerge00, - FPMerge01, - FPMerge10, - FPMerge11, - FDCmpCR, - }}; - for (auto op : zero_op) - number_of_operands[op] = 0; - - for (auto op : unary_op) - number_of_operands[op] = 1; - - for (auto op : binary_op) - number_of_operands[op] = 2; - } - - return number_of_operands[getOpcode(*I)]; -} - -// Performs a few simplifications for commutative operators -// Ported from InstructionCombining.cpp in LLVM -void IRBuilder::simplifyCommutative(unsigned Opcode, InstLoc& Op1, InstLoc& Op2) -{ - // Order operands such that they are listed from right (least complex) to - // left (most complex). This puts constants before unary operators before - // binary operators. - if (getComplexity(Op1) < getComplexity(Op2)) - { - std::swap(Op1, Op2); - } - - // Is this associative? - switch (Opcode) - { - case Add: - case Mul: - case And: - case Or: - case Xor: - break; - default: - return; - } - - // (V op C1) op C2 => V + (C1 + C2) - if (getOpcode(*Op1) == Opcode && isImm(*getOp2(Op1)) && isImm(*Op2)) - { - const InstLoc Op1Old = Op1; - const InstLoc Op2Old = Op2; - Op1 = getOp1(Op1Old); - Op2 = FoldBiOp(Opcode, getOp2(Op1Old), Op2Old); - } - - // ((V1 op C1) op (V2 op C2)) => ((V1 op V2) op (C1 op C2)) - // Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) - if (getOpcode(*Op1) == Opcode && isImm(*getOp2(Op1)) && getOpcode(*Op2) == Opcode && - isImm(*getOp2(Op2))) - { - const InstLoc Op1Old = Op1; - const InstLoc Op2Old = Op2; - Op1 = FoldBiOp(Opcode, getOp1(Op1Old), getOp1(Op2Old)); - Op2 = FoldBiOp(Opcode, getOp2(Op1Old), getOp2(Op2Old)); - } - - // FIXME: Following code has a bug. - // ((w op x) op (y op z)) => (((w op x) op y) op z) - /* - if (getOpcode(*Op1) == Opcode && getOpcode(*Op2) == Opcode) - { - // Sort the operands where the complexities will be descending order. - std::pair ops[4]; - ops[0] = std::make_pair(getComplexity(getOp1(Op1)), getOp1(Op1)); - ops[1] = std::make_pair(getComplexity(getOp2(Op1)), getOp2(Op1)); - ops[2] = std::make_pair(getComplexity(getOp1(Op2)), getOp1(Op2)); - ops[3] = std::make_pair(getComplexity(getOp2(Op2)), getOp2(Op2)); - std::sort(ops, ops + 4, std::greater >()); - - Op1 = FoldBiOp(Opcode, FoldBiOp(Opcode, ops[0].second, ops[1].second), ops[2].second); - Op2 = ops[3].second; - } - */ -} - -bool IRBuilder::maskedValueIsZero(InstLoc Op1, InstLoc Op2) const -{ - return (~ComputeKnownZeroBits(Op1) & ~ComputeKnownZeroBits(Op2)) == 0; -} - -// Returns I' if I == (0 - I') -InstLoc IRBuilder::isNeg(InstLoc I) const -{ - if (getOpcode(*I) == Sub && isImm(*getOp1(I)) && GetImmValue(getOp1(I)) == 0) - { - return getOp2(I); - } - - return nullptr; -} - -// TODO: Move the following code to a separated file. -struct Writer -{ - File::IOFile file; - Writer() : file(nullptr) - { - std::string filename = StringFromFormat("JitIL_IR_%d.txt", (int)time(nullptr)); - file.Open(filename, "w"); - setvbuf(file.GetHandle(), nullptr, _IOFBF, 1024 * 1024); - } - - virtual ~Writer() {} -}; - -static std::unique_ptr writer; - -static const std::string opcodeNames[] = { - "Nop", - "LoadGReg", - "LoadLink", - "LoadCR", - "LoadCarry", - "LoadCTR", - "LoadMSR", - "LoadGQR", - "SExt8", - "SExt16", - "BSwap32", - "BSwap16", - "Cntlzw", - "Not", - "Load8", - "Load16", - "Load32", - "BranchUncond", - "ConvertFromFastCR", - "ConvertToFastCR", - "StoreGReg", - "StoreCR", - "StoreLink", - "StoreCarry", - "StoreCTR", - "StoreMSR", - "StoreFPRF", - "StoreGQR", - "StoreSRR", - "FastCRSOSet", - "FastCREQSet", - "FastCRGTSet", - "FastCRLTSet", - "FallBackToInterpreter", - "Add", - "Mul", - "And", - "Or", - "Xor", - "MulHighUnsigned", - "Sub", - "Shl", - "Shrl", - "Sarl", - "Rol", - "ICmpCRSigned", - "ICmpCRUnsigned", - "ICmpEq", - "ICmpNe", - "ICmpUgt", - "ICmpUlt", - "ICmpUge", - "ICmpUle", - "ICmpSgt", - "ICmpSlt", - "ICmpSge", - "ICmpSle", - "Store8", - "Store16", - "Store32", - "BranchCond", - "FResult_Start", - "LoadSingle", - "LoadDouble", - "LoadPaired", - "DoubleToSingle", - "DupSingleToMReg", - "DupSingleToPacked", - "InsertDoubleInMReg", - "ExpandPackedToMReg", - "CompactMRegToPacked", - "LoadFReg", - "LoadFRegDENToZero", - "FSMul", - "FSAdd", - "FSSub", - "FSNeg", - "FSRSqrt", - "FPAdd", - "FPMul", - "FPSub", - "FPNeg", - "FDMul", - "FDAdd", - "FDSub", - "FDNeg", - "FPMerge00", - "FPMerge01", - "FPMerge10", - "FPMerge11", - "FPDup0", - "FPDup1", - "FResult_End", - "StorePaired", - "StoreSingle", - "StoreDouble", - "StoreFReg", - "FDCmpCR", - "CInt16", - "CInt32", - "SystemCall", - "RFIExit", - "InterpreterBranch", - "IdleBranch", - "ShortIdleLoop", - "FPExceptionCheckStart", - "FPExceptionCheckEnd", - "ExtExceptionCheck", - "Tramp", - "BlockStart", - "BlockEnd", - "Int3", -}; -static const unsigned alwaysUsedList[] = {FallBackToInterpreter, - StoreGReg, - StoreCR, - StoreLink, - StoreCTR, - StoreMSR, - StoreGQR, - StoreSRR, - StoreCarry, - StoreFPRF, - Load8, - Load16, - Load32, - Store8, - Store16, - Store32, - StoreSingle, - StoreDouble, - StorePaired, - StoreFReg, - FDCmpCR, - BlockStart, - BlockEnd, - IdleBranch, - BranchCond, - BranchUncond, - ShortIdleLoop, - SystemCall, - InterpreterBranch, - RFIExit, - FPExceptionCheck, - DSIExceptionCheck, - ExtExceptionCheck, - BreakPointCheck, - Int3, - Tramp, - Nop}; -static const unsigned extra8RegList[] = { - LoadGReg, LoadCR, LoadGQR, LoadFReg, LoadFRegDENToZero, -}; -static const unsigned extra16RegList[] = { - StoreGReg, StoreCR, StoreGQR, StoreSRR, LoadPaired, StoreFReg, -}; -static const unsigned extra24RegList[] = { - StorePaired, -}; - -static const std::set alwaysUseds(alwaysUsedList, - alwaysUsedList + - sizeof(alwaysUsedList) / sizeof(alwaysUsedList[0])); -static const std::set - extra8Regs(extra8RegList, extra8RegList + sizeof(extra8RegList) / sizeof(extra8RegList[0])); -static const std::set extra16Regs(extra16RegList, - extra16RegList + - sizeof(extra16RegList) / sizeof(extra16RegList[0])); -static const std::set extra24Regs(extra24RegList, - extra24RegList + - sizeof(extra24RegList) / sizeof(extra24RegList[0])); - -void IRBuilder::WriteToFile(u64 codeHash) -{ - _assert_(sizeof(opcodeNames) / sizeof(opcodeNames[0]) == Int3 + 1); - - if (!writer.get()) - { - writer = std::make_unique(); - } - - FILE* const file = writer->file.GetHandle(); - fprintf(file, "\ncode hash:%016" PRIx64 "\n", codeHash); - - const InstLoc lastCurReadPtr = curReadPtr; - StartForwardPass(); - const size_t numInsts = getNumInsts(); - for (size_t i = 0; i < numInsts; ++i) - { - const InstLoc I = ReadForward(); - const unsigned opcode = getOpcode(*I); - const bool thisUsed = IsMarkUsed(I) || alwaysUseds.find(opcode) != alwaysUseds.end(); - - // Line number - fprintf(file, "%4zu", i); - - if (!thisUsed) - fprintf(file, "%*c", 32, ' '); - - // Opcode - const std::string& opcodeName = opcodeNames[opcode]; - fprintf(file, " %-20s", opcodeName.c_str()); - const unsigned numberOfOperands = getNumberOfOperands(I); - - // Op1 - if (numberOfOperands >= 1) - { - const IREmitter::InstLoc inst = getOp1(I); - - if (isImm(*inst)) - fprintf(file, " 0x%08x", GetImmValue(inst)); - else - fprintf(file, " %10zu", i - static_cast(I - inst)); - } - - // Op2 - if (numberOfOperands >= 2) - { - const IREmitter::InstLoc inst = getOp2(I); - - if (isImm(*inst)) - fprintf(file, " 0x%08x", GetImmValue(inst)); - else - fprintf(file, " %10zu", i - static_cast(I - inst)); - } - - if (extra8Regs.count(opcode)) - fprintf(file, " R%d", *I >> 8); - - if (extra16Regs.count(opcode)) - fprintf(file, " R%d", *I >> 16); - - if (extra24Regs.count(opcode)) - fprintf(file, " R%d", *I >> 24); - - if (opcode == CInt32 || opcode == CInt16) - fprintf(file, " 0x%08x", GetImmValue(I)); - - fprintf(file, "\n"); - } - - curReadPtr = lastCurReadPtr; -} -} diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.h b/Source/Core/Core/PowerPC/JitILCommon/IR.h deleted file mode 100644 index cb1297f7e4..0000000000 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.h +++ /dev/null @@ -1,443 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "Common/CommonTypes.h" -#include "Common/NonCopyable.h" - -namespace IREmitter -{ -enum Opcode -{ - Nop = 0, - - // "Zero-operand" operators - // Register load operators - LoadGReg, - LoadLink, - LoadCR, - LoadCarry, - LoadCTR, - LoadMSR, - LoadGQR, - - // Unary operators - // Integer unary operators - SExt8, - SExt16, - BSwap32, - BSwap16, - Cntlzw, // Count leading zeros - Not, - Load8, // These loads zext - Load16, - Load32, - // CR conversions - ConvertFromFastCR, - ConvertToFastCR, - // Branches - BranchUncond, - // Register store operators - StoreGReg, - StoreCR, - StoreLink, - StoreCarry, - StoreCTR, - StoreMSR, - StoreFPRF, - StoreGQR, - StoreSRR, - // Branch conditions - FastCRSOSet, - FastCREQSet, - FastCRGTSet, - FastCRLTSet, - // Arbitrary interpreter instruction - FallBackToInterpreter, - - // Binary operators - // Commutative integer operators - Add, - Mul, - And, - Or, - Xor, - // Non-commutative integer operators - MulHighUnsigned, - Sub, - Shl, // Note that shifts ignore bits above the bottom 5 - Shrl, - Sarl, - Rol, - ICmpCRSigned, // CR for signed int compare - ICmpCRUnsigned, // CR for unsigned int compare - ICmpEq, // One if equal, zero otherwise - ICmpNe, - ICmpUgt, // One if op1 > op2, zero otherwise - ICmpUlt, - ICmpUge, - ICmpUle, - ICmpSgt, // One if op1 > op2, zero otherwise - ICmpSlt, - ICmpSge, - ICmpSle, // Opposite of sgt - - // Memory store operators - Store8, - Store16, - Store32, - BranchCond, - // Floating-point - // There are three floating-point formats: single, double, - // and packed. For any operation where the format of the - // operand isn't known, the ForceTo* operations are used; - // these are folded into the appropriate conversion - // (or no conversion) depending on the type of the operand. - // The "mreg" format is a pair of doubles; this is the - // most general possible represenation which is used - // in the register state. - // This might seem like overkill, but the semantics require - // having the different formats. - // FIXME: Check the accuracy of the mapping: - // 1. Is paired arithmetic always rounded to single-precision - // first, or does it do double-to-single like the - // single-precision instructions? - // 2. The implementation of madd is slightly off, and - // the implementation of fmuls is very slightly off; - // likely nothing cares, though. - FResult_Start, - LoadSingle, - LoadDouble, - LoadPaired, // This handles quantizers itself - DoubleToSingle, - DupSingleToMReg, - DupSingleToPacked, - InsertDoubleInMReg, - ExpandPackedToMReg, - CompactMRegToPacked, - LoadFReg, - LoadFRegDENToZero, - FSMul, - FSAdd, - FSSub, - FSNeg, - FPAdd, - FPMul, - FPSub, - FPNeg, - FDMul, - FDAdd, - FDSub, - FDNeg, - FPMerge00, - FPMerge01, - FPMerge10, - FPMerge11, - FPDup0, - FPDup1, - FResult_End, - StorePaired, - StoreSingle, - StoreDouble, - StoreFReg, - FDCmpCR, - - // "Trinary" operators - // FIXME: Need to change representation! - // Select, // Equivalent to C "Op1 ? Op2 : Op3" - - // Integer constants - CInt16, - CInt32, - - // Funny PPC "branches" - SystemCall, - RFIExit, - InterpreterBranch, - - IdleBranch, // branch operation belonging to idle loop - ShortIdleLoop, // Idle loop seen in homebrew like Wii mahjong, - // just a branch - - // used for exception checking, at least until someone - // has a better idea of integrating it - FPExceptionCheck, - DSIExceptionCheck, - ExtExceptionCheck, - BreakPointCheck, - // "Opcode" representing a register too far away to - // reference directly; this is a size optimization - Tramp, - // "Opcode"s representing the start and end - BlockStart, - BlockEnd, - - // used for debugging - Int3 -}; - -using Inst = u32; -using InstLoc = Inst*; - -constexpr u32 getOpcode(Inst i) -{ - return i & 255; -} - -constexpr bool isImm(Inst i) -{ - return getOpcode(i) >= CInt16 && getOpcode(i) <= CInt32; -} - -constexpr bool isICmp(Inst i) -{ - return getOpcode(i) >= ICmpEq && getOpcode(i) <= ICmpSle; -} - -constexpr bool isFResult(Inst i) -{ - return getOpcode(i) > FResult_Start && getOpcode(i) < FResult_End; -} - -InstLoc inline getOp1(InstLoc i) -{ - i = i - 1 - ((*i >> 8) & 255); - - if (getOpcode(*i) == Tramp) - { - i = i - 1 - (*i >> 8); - } - - return i; -} - -InstLoc inline getOp2(InstLoc i) -{ - i = i - 1 - ((*i >> 16) & 255); - - if (getOpcode(*i) == Tramp) - { - i = i - 1 - (*i >> 8); - } - - return i; -} - -class IRBuilder final : private NonCopyable -{ -public: - IRBuilder(); - - void Reset(); - - InstLoc EmitIntConst(unsigned value) { return EmitIntConst64(value); } - InstLoc EmitIntConst64(u64 value); - - InstLoc EmitStoreLink(InstLoc val) { return FoldUOp(StoreLink, val); } - InstLoc EmitBranchUncond(InstLoc val) { return FoldUOp(BranchUncond, val); } - InstLoc EmitBranchCond(InstLoc check, InstLoc dest) { return FoldBiOp(BranchCond, check, dest); } - InstLoc EmitIdleBranch(InstLoc check, InstLoc dest) { return FoldBiOp(IdleBranch, check, dest); } - InstLoc EmitLoadCR(unsigned crreg) { return FoldZeroOp(LoadCR, crreg); } - InstLoc EmitStoreCR(InstLoc value, unsigned crreg) { return FoldUOp(StoreCR, value, crreg); } - InstLoc EmitLoadLink() { return FoldZeroOp(LoadLink, 0); } - InstLoc EmitLoadMSR() { return FoldZeroOp(LoadMSR, 0); } - InstLoc EmitStoreMSR(InstLoc val, InstLoc pc) { return FoldBiOp(StoreMSR, val, pc); } - InstLoc EmitStoreFPRF(InstLoc value) { return FoldUOp(StoreFPRF, value); } - InstLoc EmitLoadGReg(unsigned reg) { return FoldZeroOp(LoadGReg, reg); } - InstLoc EmitStoreGReg(InstLoc value, unsigned reg) { return FoldUOp(StoreGReg, value, reg); } - InstLoc EmitNot(InstLoc op1) { return FoldUOp(Not, op1); } - InstLoc EmitAnd(InstLoc op1, InstLoc op2) { return FoldBiOp(And, op1, op2); } - InstLoc EmitXor(InstLoc op1, InstLoc op2) { return FoldBiOp(Xor, op1, op2); } - InstLoc EmitSub(InstLoc op1, InstLoc op2) { return FoldBiOp(Sub, op1, op2); } - InstLoc EmitOr(InstLoc op1, InstLoc op2) { return FoldBiOp(Or, op1, op2); } - InstLoc EmitAdd(InstLoc op1, InstLoc op2) { return FoldBiOp(Add, op1, op2); } - InstLoc EmitMul(InstLoc op1, InstLoc op2) { return FoldBiOp(Mul, op1, op2); } - InstLoc EmitMulHighUnsigned(InstLoc op1, InstLoc op2) - { - return FoldBiOp(MulHighUnsigned, op1, op2); - } - - InstLoc EmitRol(InstLoc op1, InstLoc op2) { return FoldBiOp(Rol, op1, op2); } - InstLoc EmitShl(InstLoc op1, InstLoc op2) { return FoldBiOp(Shl, op1, op2); } - InstLoc EmitShrl(InstLoc op1, InstLoc op2) { return FoldBiOp(Shrl, op1, op2); } - InstLoc EmitSarl(InstLoc op1, InstLoc op2) { return FoldBiOp(Sarl, op1, op2); } - InstLoc EmitLoadCTR() { return FoldZeroOp(LoadCTR, 0); } - InstLoc EmitStoreCTR(InstLoc op1) { return FoldUOp(StoreCTR, op1); } - InstLoc EmitICmpEq(InstLoc op1, InstLoc op2) { return FoldBiOp(ICmpEq, op1, op2); } - InstLoc EmitICmpNe(InstLoc op1, InstLoc op2) { return FoldBiOp(ICmpNe, op1, op2); } - InstLoc EmitICmpUgt(InstLoc op1, InstLoc op2) { return FoldBiOp(ICmpUgt, op1, op2); } - InstLoc EmitICmpUlt(InstLoc op1, InstLoc op2) { return FoldBiOp(ICmpUlt, op1, op2); } - InstLoc EmitICmpSgt(InstLoc op1, InstLoc op2) { return FoldBiOp(ICmpSgt, op1, op2); } - InstLoc EmitICmpSlt(InstLoc op1, InstLoc op2) { return FoldBiOp(ICmpSlt, op1, op2); } - InstLoc EmitICmpSge(InstLoc op1, InstLoc op2) { return FoldBiOp(ICmpSge, op1, op2); } - InstLoc EmitICmpSle(InstLoc op1, InstLoc op2) { return FoldBiOp(ICmpSle, op1, op2); } - InstLoc EmitLoad8(InstLoc op1) { return FoldUOp(Load8, op1); } - InstLoc EmitLoad16(InstLoc op1) { return FoldUOp(Load16, op1); } - InstLoc EmitLoad32(InstLoc op1) { return FoldUOp(Load32, op1); } - InstLoc EmitStore8(InstLoc op1, InstLoc op2) { return FoldBiOp(Store8, op1, op2); } - InstLoc EmitStore16(InstLoc op1, InstLoc op2) { return FoldBiOp(Store16, op1, op2); } - InstLoc EmitStore32(InstLoc op1, InstLoc op2) { return FoldBiOp(Store32, op1, op2); } - InstLoc EmitSExt16(InstLoc op1) { return FoldUOp(SExt16, op1); } - InstLoc EmitSExt8(InstLoc op1) { return FoldUOp(SExt8, op1); } - InstLoc EmitCntlzw(InstLoc op1) { return FoldUOp(Cntlzw, op1); } - InstLoc EmitICmpCRSigned(InstLoc op1, InstLoc op2) { return FoldBiOp(ICmpCRSigned, op1, op2); } - InstLoc EmitICmpCRUnsigned(InstLoc op1, InstLoc op2) - { - return FoldBiOp(ICmpCRUnsigned, op1, op2); - } - - InstLoc EmitConvertFromFastCR(InstLoc op1) { return FoldUOp(ConvertFromFastCR, op1); } - InstLoc EmitConvertToFastCR(InstLoc op1) { return FoldUOp(ConvertToFastCR, op1); } - InstLoc EmitFastCRSOSet(InstLoc op1) { return FoldUOp(FastCRSOSet, op1); } - InstLoc EmitFastCREQSet(InstLoc op1) { return FoldUOp(FastCREQSet, op1); } - InstLoc EmitFastCRLTSet(InstLoc op1) { return FoldUOp(FastCRLTSet, op1); } - InstLoc EmitFastCRGTSet(InstLoc op1) { return FoldUOp(FastCRGTSet, op1); } - InstLoc EmitFallBackToInterpreter(InstLoc op1, InstLoc op2) - { - return FoldBiOp(FallBackToInterpreter, op1, op2); - } - - InstLoc EmitInterpreterBranch() { return FoldZeroOp(InterpreterBranch, 0); } - InstLoc EmitLoadCarry() { return FoldZeroOp(LoadCarry, 0); } - InstLoc EmitStoreCarry(InstLoc op1) { return FoldUOp(StoreCarry, op1); } - InstLoc EmitSystemCall(InstLoc pc) { return FoldUOp(SystemCall, pc); } - InstLoc EmitFPExceptionCheck(InstLoc pc) { return EmitUOp(FPExceptionCheck, pc); } - InstLoc EmitDSIExceptionCheck(InstLoc pc) { return EmitUOp(DSIExceptionCheck, pc); } - InstLoc EmitExtExceptionCheck(InstLoc pc) { return EmitUOp(ExtExceptionCheck, pc); } - InstLoc EmitBreakPointCheck(InstLoc pc) { return EmitUOp(BreakPointCheck, pc); } - InstLoc EmitRFIExit() { return FoldZeroOp(RFIExit, 0); } - InstLoc EmitShortIdleLoop(InstLoc pc) { return FoldUOp(ShortIdleLoop, pc); } - InstLoc EmitLoadSingle(InstLoc addr) { return FoldUOp(LoadSingle, addr); } - InstLoc EmitLoadDouble(InstLoc addr) { return FoldUOp(LoadDouble, addr); } - InstLoc EmitLoadPaired(InstLoc addr, unsigned quantReg) - { - return FoldUOp(LoadPaired, addr, quantReg); - } - - InstLoc EmitStoreSingle(InstLoc value, InstLoc addr) - { - return FoldBiOp(StoreSingle, value, addr); - } - - InstLoc EmitStoreDouble(InstLoc value, InstLoc addr) - { - return FoldBiOp(StoreDouble, value, addr); - } - - InstLoc EmitStorePaired(InstLoc value, InstLoc addr, unsigned quantReg) - { - return FoldBiOp(StorePaired, value, addr, quantReg); - } - - InstLoc EmitLoadFReg(unsigned freg) { return FoldZeroOp(LoadFReg, freg); } - InstLoc EmitLoadFRegDENToZero(unsigned freg) { return FoldZeroOp(LoadFRegDENToZero, freg); } - InstLoc EmitStoreFReg(InstLoc val, unsigned freg) { return FoldUOp(StoreFReg, val, freg); } - InstLoc EmitDupSingleToMReg(InstLoc val) { return FoldUOp(DupSingleToMReg, val); } - InstLoc EmitDupSingleToPacked(InstLoc val) { return FoldUOp(DupSingleToPacked, val); } - InstLoc EmitInsertDoubleInMReg(InstLoc val, InstLoc reg) - { - return FoldBiOp(InsertDoubleInMReg, val, reg); - } - - InstLoc EmitExpandPackedToMReg(InstLoc val) { return FoldUOp(ExpandPackedToMReg, val); } - InstLoc EmitCompactMRegToPacked(InstLoc val) { return FoldUOp(CompactMRegToPacked, val); } - InstLoc EmitFSMul(InstLoc op1, InstLoc op2) { return FoldBiOp(FSMul, op1, op2); } - InstLoc EmitFSAdd(InstLoc op1, InstLoc op2) { return FoldBiOp(FSAdd, op1, op2); } - InstLoc EmitFSSub(InstLoc op1, InstLoc op2) { return FoldBiOp(FSSub, op1, op2); } - InstLoc EmitFSNeg(InstLoc op1) { return FoldUOp(FSNeg, op1); } - InstLoc EmitFDMul(InstLoc op1, InstLoc op2) { return FoldBiOp(FDMul, op1, op2); } - InstLoc EmitFDAdd(InstLoc op1, InstLoc op2) { return FoldBiOp(FDAdd, op1, op2); } - InstLoc EmitFDSub(InstLoc op1, InstLoc op2) { return FoldBiOp(FDSub, op1, op2); } - InstLoc EmitFDNeg(InstLoc op1) { return FoldUOp(FDNeg, op1); } - InstLoc EmitFPAdd(InstLoc op1, InstLoc op2) { return FoldBiOp(FPAdd, op1, op2); } - InstLoc EmitFPMul(InstLoc op1, InstLoc op2) { return FoldBiOp(FPMul, op1, op2); } - InstLoc EmitFPSub(InstLoc op1, InstLoc op2) { return FoldBiOp(FPSub, op1, op2); } - InstLoc EmitFPMerge00(InstLoc op1, InstLoc op2) { return FoldBiOp(FPMerge00, op1, op2); } - InstLoc EmitFPMerge01(InstLoc op1, InstLoc op2) { return FoldBiOp(FPMerge01, op1, op2); } - InstLoc EmitFPMerge10(InstLoc op1, InstLoc op2) { return FoldBiOp(FPMerge10, op1, op2); } - InstLoc EmitFPMerge11(InstLoc op1, InstLoc op2) { return FoldBiOp(FPMerge11, op1, op2); } - InstLoc EmitFPDup0(InstLoc op1) { return FoldUOp(FPDup0, op1); } - InstLoc EmitFPDup1(InstLoc op1) { return FoldUOp(FPDup1, op1); } - InstLoc EmitFPNeg(InstLoc op1) { return FoldUOp(FPNeg, op1); } - InstLoc EmitDoubleToSingle(InstLoc op1) { return FoldUOp(DoubleToSingle, op1); } - InstLoc EmitFDCmpCR(InstLoc op1, InstLoc op2, int ordered) - { - return FoldBiOp(FDCmpCR, op1, op2, ordered); - } - - InstLoc EmitLoadGQR(unsigned gqr) { return FoldZeroOp(LoadGQR, gqr); } - InstLoc EmitStoreGQR(InstLoc op1, unsigned gqr) { return FoldUOp(StoreGQR, op1, gqr); } - InstLoc EmitStoreSRR(InstLoc op1, unsigned srr) { return FoldUOp(StoreSRR, op1, srr); } - InstLoc EmitINT3() { return FoldZeroOp(Int3, 0); } - void StartBackPass() { curReadPtr = InstList.data() + InstList.size(); } - void StartForwardPass() { curReadPtr = InstList.data(); } - InstLoc ReadForward() { return curReadPtr++; } - InstLoc ReadBackward() { return --curReadPtr; } - InstLoc getFirstInst() { return InstList.data(); } - size_t getNumInsts() const { return InstList.size(); } - unsigned int GetImmValue(InstLoc I) const { return (u32)GetImmValue64(I); } - u64 GetImmValue64(InstLoc I) const; - void SetMarkUsed(InstLoc I); - bool IsMarkUsed(InstLoc I) const; - void WriteToFile(u64 codeHash); - -private: - void InvalidateCaches(); - - InstLoc EmitZeroOp(unsigned Opcode, unsigned extra); - InstLoc EmitUOp(unsigned OpCode, InstLoc Op1, unsigned extra = 0); - InstLoc EmitBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2, unsigned extra = 0); - - InstLoc FoldAdd(InstLoc Op1, InstLoc Op2); - InstLoc FoldSub(InstLoc Op1, InstLoc Op2); - InstLoc FoldMul(InstLoc Op1, InstLoc Op2); - InstLoc FoldMulHighUnsigned(InstLoc Op1, InstLoc Op2); - InstLoc FoldAnd(InstLoc Op1, InstLoc Op2); - InstLoc FoldOr(InstLoc Op1, InstLoc Op2); - InstLoc FoldRol(InstLoc Op1, InstLoc Op2); - InstLoc FoldShl(InstLoc Op1, InstLoc Op2); - InstLoc FoldShrl(InstLoc Op1, InstLoc Op2); - InstLoc FoldXor(InstLoc Op1, InstLoc Op2); - InstLoc FoldBranchCond(InstLoc Op1, InstLoc Op2); - InstLoc FoldICmp(unsigned Opcode, InstLoc Op1, InstLoc Op2); - InstLoc FoldICmpCRSigned(InstLoc Op1, InstLoc Op2); - InstLoc FoldICmpCRUnsigned(InstLoc Op1, InstLoc Op2); - InstLoc FoldDoubleBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2); - - InstLoc FoldFallBackToInterpreter(InstLoc Op1, InstLoc Op2); - - InstLoc FoldZeroOp(unsigned Opcode, unsigned extra); - InstLoc FoldUOp(unsigned OpCode, InstLoc Op1, unsigned extra = 0); - InstLoc FoldBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2, unsigned extra = 0); - - unsigned ComputeKnownZeroBits(InstLoc I) const; - - bool isSameValue(InstLoc Op1, InstLoc Op2) const; - unsigned getComplexity(InstLoc I) const; - unsigned getNumberOfOperands(InstLoc I) const; - void simplifyCommutative(unsigned Opcode, InstLoc& Op1, InstLoc& Op2); - bool maskedValueIsZero(InstLoc Op1, InstLoc Op2) const; - InstLoc isNeg(InstLoc I) const; - - std::vector InstList; // FIXME: We must ensure this is continuous! - std::vector MarkUsed; // Used for IRWriter - std::vector ConstList; - InstLoc curReadPtr; - std::array GRegCache; - std::array GRegCacheStore; - std::array FRegCache; - std::array FRegCacheStore; - InstLoc CarryCache; - InstLoc CarryCacheStore; - InstLoc CTRCache; - InstLoc CTRCacheStore; - std::array CRCache; - std::array CRCacheStore; -}; -} // namespace IREmitter diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase.h b/Source/Core/Core/PowerPC/JitILCommon/JitILBase.h deleted file mode 100644 index bd38bc4b8e..0000000000 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase.h +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include "Common/CommonTypes.h" -#include "Core/PowerPC/Gekko.h" -#include "Core/PowerPC/Jit64Common/Jit64Base.h" -#include "Core/PowerPC/JitILCommon/IR.h" -#include "Core/PowerPC/PPCAnalyst.h" - -class JitILBase : public Jitx86Base -{ -public: - JitILBase() : code_buffer(32000) {} - ~JitILBase() {} - virtual void Jit(u32 em_address) = 0; - - virtual const CommonAsmRoutinesBase* GetAsmRoutines() = 0; - - // OPCODES - virtual void FallBackToInterpreter(UGeckoInstruction inst) = 0; - virtual void DoNothing(UGeckoInstruction inst) = 0; - virtual void HLEFunction(UGeckoInstruction inst) = 0; - - virtual void DynaRunTable4(UGeckoInstruction _inst) = 0; - virtual void DynaRunTable19(UGeckoInstruction _inst) = 0; - virtual void DynaRunTable31(UGeckoInstruction _inst) = 0; - virtual void DynaRunTable59(UGeckoInstruction _inst) = 0; - virtual void DynaRunTable63(UGeckoInstruction _inst) = 0; - - // Branches - void sc(UGeckoInstruction inst); - void rfi(UGeckoInstruction inst); - void bx(UGeckoInstruction inst); - void bcx(UGeckoInstruction inst); - void bcctrx(UGeckoInstruction inst); - void bclrx(UGeckoInstruction inst); - - // LoadStore - void lXzx(UGeckoInstruction inst); - void lhax(UGeckoInstruction inst); - void lhaux(UGeckoInstruction inst); - void stXx(UGeckoInstruction inst); - void lmw(UGeckoInstruction inst); - void stmw(UGeckoInstruction inst); - void stX(UGeckoInstruction inst); // stw sth stb - void lXz(UGeckoInstruction inst); - void lbzu(UGeckoInstruction inst); - void lha(UGeckoInstruction inst); - void lhau(UGeckoInstruction inst); - - // System Registers - void mtspr(UGeckoInstruction inst); - void mfspr(UGeckoInstruction inst); - void mtmsr(UGeckoInstruction inst); - void mfmsr(UGeckoInstruction inst); - void mftb(UGeckoInstruction inst); - void mtcrf(UGeckoInstruction inst); - void mfcr(UGeckoInstruction inst); - void mcrf(UGeckoInstruction inst); - void crXX(UGeckoInstruction inst); - - void dcbst(UGeckoInstruction inst); - void dcbz(UGeckoInstruction inst); - void icbi(UGeckoInstruction inst); - - void addx(UGeckoInstruction inst); - void boolX(UGeckoInstruction inst); - void mulli(UGeckoInstruction inst); - void mulhwux(UGeckoInstruction inst); - void mullwx(UGeckoInstruction inst); - void divwux(UGeckoInstruction inst); - void srawix(UGeckoInstruction inst); - void srawx(UGeckoInstruction inst); - void addex(UGeckoInstruction inst); - void addzex(UGeckoInstruction inst); - - void extsbx(UGeckoInstruction inst); - void extshx(UGeckoInstruction inst); - - void reg_imm(UGeckoInstruction inst); - - void ps_arith(UGeckoInstruction inst); // aggregate - void ps_mergeXX(UGeckoInstruction inst); - void ps_maddXX(UGeckoInstruction inst); - void ps_sum(UGeckoInstruction inst); - void ps_muls(UGeckoInstruction inst); - - void fp_arith_s(UGeckoInstruction inst); - - void fcmpX(UGeckoInstruction inst); - void fmrx(UGeckoInstruction inst); - - void cmpXX(UGeckoInstruction inst); - - void cntlzwx(UGeckoInstruction inst); - - void lfs(UGeckoInstruction inst); - void lfsu(UGeckoInstruction inst); - void lfd(UGeckoInstruction inst); - void lfdu(UGeckoInstruction inst); - void stfd(UGeckoInstruction inst); - void stfs(UGeckoInstruction inst); - void stfsx(UGeckoInstruction inst); - void psq_l(UGeckoInstruction inst); - void psq_st(UGeckoInstruction inst); - - void fmaddXX(UGeckoInstruction inst); - void fsign(UGeckoInstruction inst); - void rlwinmx(UGeckoInstruction inst); - void rlwimix(UGeckoInstruction inst); - void rlwnmx(UGeckoInstruction inst); - void negx(UGeckoInstruction inst); - void slwx(UGeckoInstruction inst); - void srwx(UGeckoInstruction inst); - void lfsx(UGeckoInstruction inst); - - void subfic(UGeckoInstruction inst); - void subfcx(UGeckoInstruction inst); - void subfx(UGeckoInstruction inst); - void subfex(UGeckoInstruction inst); - -protected: - // The default code buffer. We keep it around to not have to alloc/dealloc a - // large chunk of memory for each recompiled block. - PPCAnalyst::CodeBuffer code_buffer; - IREmitter::IRBuilder ibuild; -}; diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp deleted file mode 100644 index 5041ab4118..0000000000 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp +++ /dev/null @@ -1,217 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Core/PowerPC/JitILCommon/JitILBase.h" -#include "Common/CommonTypes.h" -#include "Core/ConfigManager.h" -#include "Core/PowerPC/Gekko.h" -#include "Core/PowerPC/PowerPC.h" - -// The branches are known good, or at least reasonably good. -// No need for a disable-mechanism. - -// If defined, clears CR0 at blr and bl-s. If the assumption that -// flags never carry over between functions holds, then the task for -// an optimizer becomes much easier. - -// #define ACID_TEST - -// Zelda and many more games seem to pass the Acid Test. - -//#define NORMALBRANCH_START FallBackToInterpreter(inst); ibuild.EmitInterpreterBranch(); return; -#define NORMALBRANCH_START - -void JitILBase::sc(UGeckoInstruction inst) -{ - ibuild.EmitSystemCall(ibuild.EmitIntConst(js.compilerPC)); -} - -void JitILBase::rfi(UGeckoInstruction inst) -{ - ibuild.EmitRFIExit(); -} - -void JitILBase::bx(UGeckoInstruction inst) -{ - NORMALBRANCH_START - INSTRUCTION_START; - - // We must always process the following sentence - // even if the blocks are merged by PPCAnalyst::Flatten(). - if (inst.LK) - ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4)); - - // If this is not the last instruction of a block, - // we will skip the rest process. - // Because PPCAnalyst::Flatten() merged the blocks. - if (!js.isLastInstruction) - { - return; - } - - u32 destination; - if (inst.AA) - destination = SignExt26(inst.LI << 2); - else - destination = js.compilerPC + SignExt26(inst.LI << 2); - - if (destination == js.compilerPC) - { - ibuild.EmitShortIdleLoop(ibuild.EmitIntConst(js.compilerPC)); - return; - } - - ibuild.EmitBranchUncond(ibuild.EmitIntConst(destination)); -} - -static IREmitter::InstLoc EmitCRTest(IREmitter::IRBuilder& ibuild, UGeckoInstruction inst) -{ - IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2); - IREmitter::InstLoc CRTest = nullptr; - switch (3 - (inst.BI & 3)) - { - case CR_SO_BIT: - CRTest = ibuild.EmitFastCRSOSet(CRReg); - break; - case CR_EQ_BIT: - CRTest = ibuild.EmitFastCREQSet(CRReg); - break; - case CR_GT_BIT: - CRTest = ibuild.EmitFastCRGTSet(CRReg); - break; - case CR_LT_BIT: - CRTest = ibuild.EmitFastCRLTSet(CRReg); - break; - } - if (!(inst.BO & 8)) - CRTest = ibuild.EmitXor(CRTest, ibuild.EmitIntConst(1)); - return CRTest; -} - -static IREmitter::InstLoc TestBranch(IREmitter::IRBuilder& ibuild, UGeckoInstruction inst) -{ - IREmitter::InstLoc CRTest = nullptr, CTRTest = nullptr; - if ((inst.BO & 16) == 0) // Test a CR bit - { - CRTest = EmitCRTest(ibuild, inst); - } - - if ((inst.BO & 4) == 0) - { - IREmitter::InstLoc c = ibuild.EmitLoadCTR(); - c = ibuild.EmitSub(c, ibuild.EmitIntConst(1)); - ibuild.EmitStoreCTR(c); - - if (inst.BO & 2) - CTRTest = ibuild.EmitICmpEq(c, ibuild.EmitIntConst(0)); - else - CTRTest = c; - } - - IREmitter::InstLoc Test = CRTest; - if (CTRTest) - { - if (Test) - Test = ibuild.EmitAnd(Test, CTRTest); - else - Test = CTRTest; - } - - if (!Test) - { - Test = ibuild.EmitIntConst(1); - } - - return Test; -} - -void JitILBase::bcx(UGeckoInstruction inst) -{ - NORMALBRANCH_START - if (inst.LK) - ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4)); - - IREmitter::InstLoc Test = TestBranch(ibuild, inst); - - u32 destination; - if (inst.AA) - destination = SignExt16(inst.BD << 2); - else - destination = js.compilerPC + SignExt16(inst.BD << 2); - - // Idle skipping: - // The main Idle skipping is done in the LoadStore code, but there is an optimization here. - // If idle skipping is enabled, then this branch will only be reached when the branch is not - // taken. - // TODO: We shouldn't use debug reads here. - if (inst.hex == 0x4182fff8 && - (PowerPC::HostRead_U32(js.compilerPC - 8) & 0xFFFF0000) == 0x800D0000 && - (PowerPC::HostRead_U32(js.compilerPC - 4) == 0x28000000 || - (SConfig::GetInstance().bWii && PowerPC::HostRead_U32(js.compilerPC - 4) == 0x2C000000))) - { - // Uh, Do nothing. - } - else - { - ibuild.EmitBranchCond(Test, ibuild.EmitIntConst(destination)); - } - ibuild.EmitBranchUncond(ibuild.EmitIntConst(js.compilerPC + 4)); -} - -void JitILBase::bcctrx(UGeckoInstruction inst) -{ - NORMALBRANCH_START - if ((inst.BO & 4) == 0) - { - IREmitter::InstLoc c = ibuild.EmitLoadCTR(); - c = ibuild.EmitSub(c, ibuild.EmitIntConst(1)); - ibuild.EmitStoreCTR(c); - } - - IREmitter::InstLoc test; - if ((inst.BO & 16) == 0) // Test a CR bit - { - test = EmitCRTest(ibuild, inst); - } - else - { - test = ibuild.EmitIntConst(1); - } - test = ibuild.EmitICmpEq(test, ibuild.EmitIntConst(0)); - ibuild.EmitBranchCond(test, ibuild.EmitIntConst(js.compilerPC + 4)); - - IREmitter::InstLoc destination = ibuild.EmitLoadCTR(); - destination = ibuild.EmitAnd(destination, ibuild.EmitIntConst(-4)); - if (inst.LK) - ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4)); - ibuild.EmitBranchUncond(destination); -} - -void JitILBase::bclrx(UGeckoInstruction inst) -{ - NORMALBRANCH_START - - if (!js.isLastInstruction && (inst.BO & (1 << 4)) && (inst.BO & (1 << 2))) - { - if (inst.LK) - ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4)); - return; - } - - if (inst.hex == 0x4e800020) - { - ibuild.EmitBranchUncond(ibuild.EmitLoadLink()); - return; - } - - IREmitter::InstLoc test = TestBranch(ibuild, inst); - test = ibuild.EmitICmpEq(test, ibuild.EmitIntConst(0)); - ibuild.EmitBranchCond(test, ibuild.EmitIntConst(js.compilerPC + 4)); - - IREmitter::InstLoc destination = ibuild.EmitLoadLink(); - destination = ibuild.EmitAnd(destination, ibuild.EmitIntConst(-4)); - if (inst.LK) - ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4)); - ibuild.EmitBranchUncond(destination); -} diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp deleted file mode 100644 index 0d6226941e..0000000000 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp +++ /dev/null @@ -1,125 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Core/PowerPC/JitILCommon/JitILBase.h" -#include "Common/Assert.h" -#include "Common/CommonTypes.h" -#include "Common/MsgHandler.h" -#include "Core/ConfigManager.h" - -void JitILBase::fp_arith_s(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc || (inst.SUBOP5 != 25 && inst.SUBOP5 != 20 && inst.SUBOP5 != 21)); - - // Only the interpreter has "proper" support for (some) FP flags - FALLBACK_IF(inst.SUBOP5 == 25 && SConfig::GetInstance().bFPRF); - - IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA); - switch (inst.SUBOP5) - { - case 20: // sub - val = ibuild.EmitFDSub(val, ibuild.EmitLoadFReg(inst.FB)); - break; - case 21: // add - val = ibuild.EmitFDAdd(val, ibuild.EmitLoadFReg(inst.FB)); - break; - case 25: // mul - val = ibuild.EmitFDMul(val, ibuild.EmitLoadFReg(inst.FC)); - break; - default: - _assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!"); - } - - if (inst.OPCD == 59) - { - val = ibuild.EmitDoubleToSingle(val); - val = ibuild.EmitDupSingleToMReg(val); - } - else - { - val = ibuild.EmitInsertDoubleInMReg(val, ibuild.EmitLoadFReg(inst.FD)); - } - ibuild.EmitStoreFReg(val, inst.FD); -} - -void JitILBase::fmaddXX(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - // Only the interpreter has "proper" support for (some) FP flags - FALLBACK_IF(inst.SUBOP5 == 29 && SConfig::GetInstance().bFPRF); - - IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA); - val = ibuild.EmitFDMul(val, ibuild.EmitLoadFReg(inst.FC)); - - if (inst.SUBOP5 & 1) - val = ibuild.EmitFDAdd(val, ibuild.EmitLoadFReg(inst.FB)); - else - val = ibuild.EmitFDSub(val, ibuild.EmitLoadFReg(inst.FB)); - - if (inst.SUBOP5 & 2) - val = ibuild.EmitFDNeg(val); - - if (inst.OPCD == 59) - { - val = ibuild.EmitDoubleToSingle(val); - val = ibuild.EmitDupSingleToMReg(val); - } - else - { - val = ibuild.EmitInsertDoubleInMReg(val, ibuild.EmitLoadFReg(inst.FD)); - } - - ibuild.EmitStoreFReg(val, inst.FD); -} - -void JitILBase::fmrx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FB); - val = ibuild.EmitInsertDoubleInMReg(val, ibuild.EmitLoadFReg(inst.FD)); - ibuild.EmitStoreFReg(val, inst.FD); -} - -void JitILBase::fcmpX(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - IREmitter::InstLoc lhs, rhs, res; - lhs = ibuild.EmitLoadFReg(inst.FA); - rhs = ibuild.EmitLoadFReg(inst.FB); - int ordered = (inst.SUBOP10 == 32) ? 1 : 0; - res = ibuild.EmitFDCmpCR(lhs, rhs, ordered); - ibuild.EmitStoreFPRF(res); - ibuild.EmitStoreCR(ibuild.EmitConvertToFastCR(res), inst.CRFD); -} - -void JitILBase::fsign(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - - FALLBACK_IF(true); - - // TODO - switch (inst.SUBOP10) - { - case 40: // fnegx - break; - case 264: // fabsx - break; - case 136: // fnabs - break; - default: - PanicAlert("fsign bleh"); - break; - } -} diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp deleted file mode 100644 index 346056abce..0000000000 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp +++ /dev/null @@ -1,559 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#ifdef _MSC_VER -#pragma warning( \ - disable : 4146) // unary minus operator applied to unsigned type, result still unsigned -#endif - -#include "Core/PowerPC/JitILCommon/JitILBase.h" -#include "Common/CommonTypes.h" - -static void ComputeRC(IREmitter::IRBuilder& ibuild, IREmitter::InstLoc val) -{ - IREmitter::InstLoc res = ibuild.EmitICmpCRSigned(val, ibuild.EmitIntConst(0)); - ibuild.EmitStoreCR(res, 0); -} - -void JitILBase::reg_imm(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - int d = inst.RD, a = inst.RA, s = inst.RS; - IREmitter::InstLoc val, test, c; - switch (inst.OPCD) - { - case 14: // addi - val = ibuild.EmitIntConst(inst.SIMM_16); - if (a) - val = ibuild.EmitAdd(ibuild.EmitLoadGReg(a), val); - ibuild.EmitStoreGReg(val, d); - break; - case 15: // addis - val = ibuild.EmitIntConst(inst.SIMM_16 << 16); - if (a) - val = ibuild.EmitAdd(ibuild.EmitLoadGReg(a), val); - ibuild.EmitStoreGReg(val, d); - break; - case 24: // ori - val = ibuild.EmitIntConst(inst.UIMM); - val = ibuild.EmitOr(ibuild.EmitLoadGReg(s), val); - ibuild.EmitStoreGReg(val, a); - break; - case 25: // oris - val = ibuild.EmitIntConst(inst.UIMM << 16); - val = ibuild.EmitOr(ibuild.EmitLoadGReg(s), val); - ibuild.EmitStoreGReg(val, a); - break; - case 28: // andi - val = ibuild.EmitIntConst(inst.UIMM); - val = ibuild.EmitAnd(ibuild.EmitLoadGReg(s), val); - ibuild.EmitStoreGReg(val, a); - ComputeRC(ibuild, val); - break; - case 29: // andis - val = ibuild.EmitIntConst(inst.UIMM << 16); - val = ibuild.EmitAnd(ibuild.EmitLoadGReg(s), val); - ibuild.EmitStoreGReg(val, a); - ComputeRC(ibuild, val); - break; - case 26: // xori - val = ibuild.EmitIntConst(inst.UIMM); - val = ibuild.EmitXor(ibuild.EmitLoadGReg(s), val); - ibuild.EmitStoreGReg(val, a); - break; - case 27: // xoris - val = ibuild.EmitIntConst(inst.UIMM << 16); - val = ibuild.EmitXor(ibuild.EmitLoadGReg(s), val); - ibuild.EmitStoreGReg(val, a); - break; - case 12: // addic - case 13: // addic_rc - c = ibuild.EmitIntConst(inst.SIMM_16); - val = ibuild.EmitAdd(ibuild.EmitLoadGReg(a), c); - ibuild.EmitStoreGReg(val, d); - test = ibuild.EmitICmpUgt(c, val); - ibuild.EmitStoreCarry(test); - if (inst.OPCD == 13) - ComputeRC(ibuild, val); - break; - default: - FALLBACK_IF(true); - } -} - -void JitILBase::cmpXX(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - IREmitter::InstLoc lhs, rhs, res; - lhs = ibuild.EmitLoadGReg(inst.RA); - - if (inst.OPCD == 31) - { - rhs = ibuild.EmitLoadGReg(inst.RB); - if (inst.SUBOP10 == 32) - { - res = ibuild.EmitICmpCRUnsigned(lhs, rhs); - } - else - { - res = ibuild.EmitICmpCRSigned(lhs, rhs); - } - } - else if (inst.OPCD == 10) - { - rhs = ibuild.EmitIntConst(inst.UIMM); - res = ibuild.EmitICmpCRUnsigned(lhs, rhs); - } - else // inst.OPCD == 11 - { - rhs = ibuild.EmitIntConst(inst.SIMM_16); - res = ibuild.EmitICmpCRSigned(lhs, rhs); - } - - js.downcountAmount++; // TODO: should this be somewhere else? - - ibuild.EmitStoreCR(res, inst.CRFD); -} - -void JitILBase::boolX(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - IREmitter::InstLoc a = nullptr; - IREmitter::InstLoc s = ibuild.EmitLoadGReg(inst.RS); - IREmitter::InstLoc b = ibuild.EmitLoadGReg(inst.RB); - - // FIXME: Some instructions does not work well in NSMBW, MP2, etc. - // Refer JitIL_Tables.cpp. - if (inst.SUBOP10 == 28) /* andx */ - { - a = ibuild.EmitAnd(s, b); - } - else if (inst.SUBOP10 == 476) /* nandx */ - { - a = ibuild.EmitNot(ibuild.EmitAnd(s, b)); - } - else if (inst.SUBOP10 == 60) /* andcx */ - { - a = ibuild.EmitAnd(s, ibuild.EmitNot(b)); - } - else if (inst.SUBOP10 == 444) /* orx */ - { - a = ibuild.EmitOr(s, b); - } - else if (inst.SUBOP10 == 124) /* norx */ - { - a = ibuild.EmitNot(ibuild.EmitOr(s, b)); - } - else if (inst.SUBOP10 == 412) /* orcx */ - { - a = ibuild.EmitOr(s, ibuild.EmitNot(b)); - } - else if (inst.SUBOP10 == 316) /* xorx */ - { - a = ibuild.EmitXor(s, b); - } - else if (inst.SUBOP10 == 284) /* eqvx */ - { - a = ibuild.EmitNot(ibuild.EmitXor(s, b)); - } - else - { - PanicAlert("WTF!"); - } - - ibuild.EmitStoreGReg(a, inst.RA); - if (inst.Rc) - ComputeRC(ibuild, a); -} - -void JitILBase::extsbx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS); - val = ibuild.EmitSExt8(val); - ibuild.EmitStoreGReg(val, inst.RA); - if (inst.Rc) - ComputeRC(ibuild, val); -} - -void JitILBase::extshx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS); - val = ibuild.EmitSExt16(val); - ibuild.EmitStoreGReg(val, inst.RA); - if (inst.Rc) - ComputeRC(ibuild, val); -} - -void JitILBase::subfic(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - IREmitter::InstLoc nota, lhs, val, test; - nota = ibuild.EmitXor(ibuild.EmitLoadGReg(inst.RA), ibuild.EmitIntConst(-1)); - - if (inst.SIMM_16 == -1) - { - val = nota; - test = ibuild.EmitIntConst(1); - } - else - { - lhs = ibuild.EmitIntConst(inst.SIMM_16 + 1); - val = ibuild.EmitAdd(nota, lhs); - test = ibuild.EmitICmpUgt(lhs, val); - } - - ibuild.EmitStoreGReg(val, inst.RD); - ibuild.EmitStoreCarry(test); -} - -void JitILBase::subfcx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - if (inst.OE) - PanicAlert("OE: subfcx"); - - IREmitter::InstLoc val, test, lhs, rhs; - lhs = ibuild.EmitLoadGReg(inst.RB); - rhs = ibuild.EmitLoadGReg(inst.RA); - val = ibuild.EmitSub(lhs, rhs); - ibuild.EmitStoreGReg(val, inst.RD); - test = ibuild.EmitICmpEq(rhs, ibuild.EmitIntConst(0)); - test = ibuild.EmitOr(test, ibuild.EmitICmpUgt(lhs, val)); - ibuild.EmitStoreCarry(test); - - if (inst.Rc) - ComputeRC(ibuild, val); -} - -void JitILBase::subfex(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - if (inst.OE) - PanicAlert("OE: subfex"); - - IREmitter::InstLoc val, test, lhs, rhs, carry; - rhs = ibuild.EmitLoadGReg(inst.RA); - carry = ibuild.EmitLoadCarry(); - rhs = ibuild.EmitXor(rhs, ibuild.EmitIntConst(-1)); - rhs = ibuild.EmitAdd(rhs, carry); - test = ibuild.EmitICmpEq(rhs, ibuild.EmitIntConst(0)); - test = ibuild.EmitAnd(test, carry); - lhs = ibuild.EmitLoadGReg(inst.RB); - val = ibuild.EmitAdd(lhs, rhs); - ibuild.EmitStoreGReg(val, inst.RD); - test = ibuild.EmitOr(test, ibuild.EmitICmpUgt(lhs, val)); - ibuild.EmitStoreCarry(test); - - if (inst.Rc) - ComputeRC(ibuild, val); -} - -void JitILBase::subfx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - if (inst.OE) - PanicAlert("OE: subfx"); - - IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB); - val = ibuild.EmitSub(val, ibuild.EmitLoadGReg(inst.RA)); - ibuild.EmitStoreGReg(val, inst.RD); - - if (inst.Rc) - ComputeRC(ibuild, val); -} - -void JitILBase::mulli(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RA); - val = ibuild.EmitMul(val, ibuild.EmitIntConst(inst.SIMM_16)); - ibuild.EmitStoreGReg(val, inst.RD); -} - -void JitILBase::mullwx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB); - val = ibuild.EmitMul(ibuild.EmitLoadGReg(inst.RA), val); - ibuild.EmitStoreGReg(val, inst.RD); - - if (inst.Rc) - ComputeRC(ibuild, val); -} - -void JitILBase::mulhwux(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - IREmitter::InstLoc a = ibuild.EmitLoadGReg(inst.RA); - IREmitter::InstLoc b = ibuild.EmitLoadGReg(inst.RB); - IREmitter::InstLoc d = ibuild.EmitMulHighUnsigned(a, b); - ibuild.EmitStoreGReg(d, inst.RD); - - if (inst.Rc) - ComputeRC(ibuild, d); -} - -// skipped some of the special handling in here - if we get crashes, let the interpreter handle this -// op -void JitILBase::divwux(UGeckoInstruction inst) -{ - // FIXME - FALLBACK_IF(true); - -#if 0 - int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushLockX(RSCRATCH1); - gpr.Lock(a, b, d); - - if (d != a && d != b) - { - gpr.LoadToX64(d, false, true); - } - else - { - gpr.LoadToX64(d, true, true); - } - - MOV(32, R(RSCRATCH), gpr.R(a)); - XOR(32, R(RSCRATCH2), R(RSCRATCH)); - gpr.KillImmediate(b); - DIV(32, gpr.R(b)); - MOV(32, gpr.R(d), R(RSCRATCH)); - gpr.UnlockAll(); - gpr.UnlockAllX(); - - if (inst.Rc) - { - CALL((u8*)asm_routines.computeRc); - } -#endif -} - -void JitILBase::addx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB); - val = ibuild.EmitAdd(ibuild.EmitLoadGReg(inst.RA), val); - ibuild.EmitStoreGReg(val, inst.RD); - - if (inst.Rc) - ComputeRC(ibuild, val); -} - -void JitILBase::addzex(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - IREmitter::InstLoc lhs = ibuild.EmitLoadGReg(inst.RA), val, newcarry; - val = ibuild.EmitAdd(lhs, ibuild.EmitLoadCarry()); - ibuild.EmitStoreGReg(val, inst.RD); - newcarry = ibuild.EmitICmpUlt(val, lhs); - ibuild.EmitStoreCarry(newcarry); - - if (inst.Rc) - ComputeRC(ibuild, val); -} - -void JitILBase::addex(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - IREmitter::InstLoc a = ibuild.EmitLoadGReg(inst.RA); - IREmitter::InstLoc b = ibuild.EmitLoadGReg(inst.RB); - - IREmitter::InstLoc ab = ibuild.EmitAdd(a, b); - IREmitter::InstLoc new_carry = ibuild.EmitICmpUlt(ab, a); - - IREmitter::InstLoc previous_carry = ibuild.EmitLoadCarry(); - IREmitter::InstLoc abc = ibuild.EmitAdd(ab, previous_carry); - new_carry = ibuild.EmitOr(new_carry, ibuild.EmitICmpUlt(abc, ab)); - - ibuild.EmitStoreGReg(abc, inst.RD); - ibuild.EmitStoreCarry(new_carry); - - if (inst.OE) - PanicAlert("OE: addex"); - - if (inst.Rc) - ComputeRC(ibuild, abc); -} - -void JitILBase::rlwinmx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - unsigned mask = Helper_Mask(inst.MB, inst.ME); - IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS); - val = ibuild.EmitRol(val, ibuild.EmitIntConst(inst.SH)); - val = ibuild.EmitAnd(val, ibuild.EmitIntConst(mask)); - ibuild.EmitStoreGReg(val, inst.RA); - - if (inst.Rc) - ComputeRC(ibuild, val); -} - -void JitILBase::rlwimix(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - unsigned mask = Helper_Mask(inst.MB, inst.ME); - IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS); - val = ibuild.EmitRol(val, ibuild.EmitIntConst(inst.SH)); - val = ibuild.EmitAnd(val, ibuild.EmitIntConst(mask)); - IREmitter::InstLoc ival = ibuild.EmitLoadGReg(inst.RA); - ival = ibuild.EmitAnd(ival, ibuild.EmitIntConst(~mask)); - val = ibuild.EmitOr(ival, val); - ibuild.EmitStoreGReg(val, inst.RA); - - if (inst.Rc) - ComputeRC(ibuild, val); -} - -void JitILBase::rlwnmx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - unsigned int mask = Helper_Mask(inst.MB, inst.ME); - IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS); - val = ibuild.EmitRol(val, ibuild.EmitLoadGReg(inst.RB)); - val = ibuild.EmitAnd(val, ibuild.EmitIntConst(mask)); - ibuild.EmitStoreGReg(val, inst.RA); - - if (inst.Rc) - ComputeRC(ibuild, val); -} - -void JitILBase::negx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RA); - val = ibuild.EmitSub(ibuild.EmitIntConst(0), val); - ibuild.EmitStoreGReg(val, inst.RD); - - if (inst.Rc) - ComputeRC(ibuild, val); -} - -void JitILBase::srwx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS); - IREmitter::InstLoc samt = ibuild.EmitLoadGReg(inst.RB); - IREmitter::InstLoc corr; - - // FIXME: We can do better with a cmov - // FIXME: We can do better on 64-bit - val = ibuild.EmitShrl(val, samt); - corr = ibuild.EmitShl(samt, ibuild.EmitIntConst(26)); - corr = ibuild.EmitSarl(corr, ibuild.EmitIntConst(31)); - corr = ibuild.EmitXor(corr, ibuild.EmitIntConst(-1)); - val = ibuild.EmitAnd(corr, val); - ibuild.EmitStoreGReg(val, inst.RA); - - if (inst.Rc) - ComputeRC(ibuild, val); -} - -void JitILBase::slwx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS); - IREmitter::InstLoc samt = ibuild.EmitLoadGReg(inst.RB); - IREmitter::InstLoc corr; - - // FIXME: We can do better with a cmov - // FIXME: We can do better on 64-bit - val = ibuild.EmitShl(val, samt); - corr = ibuild.EmitShl(samt, ibuild.EmitIntConst(26)); - corr = ibuild.EmitSarl(corr, ibuild.EmitIntConst(31)); - corr = ibuild.EmitXor(corr, ibuild.EmitIntConst(-1)); - val = ibuild.EmitAnd(corr, val); - ibuild.EmitStoreGReg(val, inst.RA); - - if (inst.Rc) - ComputeRC(ibuild, val); -} - -void JitILBase::srawx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - // FIXME: We can do a lot better on 64-bit - IREmitter::InstLoc val, samt, mask, mask2, test; - val = ibuild.EmitLoadGReg(inst.RS); - samt = ibuild.EmitLoadGReg(inst.RB); - mask = ibuild.EmitIntConst(-1); - val = ibuild.EmitSarl(val, samt); - mask = ibuild.EmitShl(mask, samt); - samt = ibuild.EmitShl(samt, ibuild.EmitIntConst(26)); - samt = ibuild.EmitSarl(samt, ibuild.EmitIntConst(31)); - samt = ibuild.EmitAnd(samt, ibuild.EmitIntConst(31)); - val = ibuild.EmitSarl(val, samt); - ibuild.EmitStoreGReg(val, inst.RA); - mask = ibuild.EmitShl(mask, samt); - mask2 = ibuild.EmitAnd(mask, ibuild.EmitIntConst(0x7FFFFFFF)); - test = ibuild.EmitOr(val, mask2); - test = ibuild.EmitICmpUgt(test, mask); - ibuild.EmitStoreCarry(test); - - if (inst.Rc) - ComputeRC(ibuild, val); -} - -void JitILBase::srawix(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - // Shift right by two - IREmitter::InstLoc input = ibuild.EmitLoadGReg(inst.RS); - IREmitter::InstLoc output = ibuild.EmitSarl(input, ibuild.EmitIntConst(inst.SH)); - ibuild.EmitStoreGReg(output, inst.RA); - // Check whether the input is negative and any bits got shifted out. - unsigned int mask = -1u << inst.SH; - IREmitter::InstLoc test = ibuild.EmitOr(input, ibuild.EmitIntConst(mask & 0x7FFFFFFF)); - test = ibuild.EmitICmpUgt(test, ibuild.EmitIntConst(mask)); - - ibuild.EmitStoreCarry(test); - if (inst.Rc) - ComputeRC(ibuild, output); -} - -// count leading zeroes -void JitILBase::cntlzwx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS); - val = ibuild.EmitCntlzw(val); - ibuild.EmitStoreGReg(val, inst.RA); - - if (inst.Rc) - ComputeRC(ibuild, val); -} diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp deleted file mode 100644 index a1efb94111..0000000000 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp +++ /dev/null @@ -1,310 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Core/PowerPC/JitILCommon/JitILBase.h" -#include "Common/Assert.h" -#include "Common/CommonTypes.h" -#include "Core/ConfigManager.h" -#include "Core/HW/CPU.h" -#include "Core/PowerPC/PowerPC.h" - -void JitILBase::lhax(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreOff); - FALLBACK_IF(jo.memcheck); - - IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB); - if (inst.RA) - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - - IREmitter::InstLoc val = ibuild.EmitLoad16(addr); - val = ibuild.EmitSExt16(val); - ibuild.EmitStoreGReg(val, inst.RD); -} - -void JitILBase::lhaux(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreOff); - FALLBACK_IF(jo.memcheck); - - IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB); - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - - IREmitter::InstLoc val = ibuild.EmitLoad16(addr); - val = ibuild.EmitSExt16(val); - ibuild.EmitStoreGReg(val, inst.RD); - ibuild.EmitStoreGReg(addr, inst.RA); -} - -void JitILBase::lXz(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreOff); - FALLBACK_IF(jo.memcheck); - - IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16); - if (inst.RA) - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - if (inst.OPCD & 1) - ibuild.EmitStoreGReg(addr, inst.RA); - - IREmitter::InstLoc val; - - // Idle Skipping. - // TODO: This really should be done somewhere else. Either lower in the IR - // or higher in PPCAnalyst - // TODO: We shouldn't use debug reads here. - if (!CPU::IsStepping() && inst.OPCD == 32 && // Lwx - (inst.hex & 0xFFFF0000) == 0x800D0000 && - (PowerPC::HostRead_U32(js.compilerPC + 4) == 0x28000000 || - (SConfig::GetInstance().bWii && PowerPC::HostRead_U32(js.compilerPC + 4) == 0x2C000000)) && - PowerPC::HostRead_U32(js.compilerPC + 8) == 0x4182fff8) - { - val = ibuild.EmitLoad32(addr); - ibuild.EmitIdleBranch(val, ibuild.EmitIntConst(js.compilerPC)); - ibuild.EmitStoreGReg(val, inst.RD); - return; - } - - switch (inst.OPCD & ~0x1) - { - case 32: // lwz - val = ibuild.EmitLoad32(addr); - break; - case 40: // lhz - val = ibuild.EmitLoad16(addr); - break; - case 34: // lbz - val = ibuild.EmitLoad8(addr); - break; - default: - PanicAlert("lXz: invalid access size"); - val = nullptr; - break; - } - - ibuild.EmitStoreGReg(val, inst.RD); -} - -void JitILBase::lbzu(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreOff); - const IREmitter::InstLoc uAddress = - ibuild.EmitAdd(ibuild.EmitLoadGReg(inst.RA), ibuild.EmitIntConst((int)inst.SIMM_16)); - const IREmitter::InstLoc temp = ibuild.EmitLoad8(uAddress); - ibuild.EmitStoreGReg(temp, inst.RD); - ibuild.EmitStoreGReg(uAddress, inst.RA); -} - -void JitILBase::lha(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreOff); - FALLBACK_IF(jo.memcheck); - - IREmitter::InstLoc addr = ibuild.EmitIntConst((s32)(s16)inst.SIMM_16); - - if (inst.RA) - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - - IREmitter::InstLoc val = ibuild.EmitLoad16(addr); - val = ibuild.EmitSExt16(val); - ibuild.EmitStoreGReg(val, inst.RD); -} - -void JitILBase::lhau(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreOff); - FALLBACK_IF(jo.memcheck); - - IREmitter::InstLoc addr = ibuild.EmitIntConst((s32)inst.SIMM_16); - - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - - IREmitter::InstLoc val = ibuild.EmitLoad16(addr); - val = ibuild.EmitSExt16(val); - ibuild.EmitStoreGReg(val, inst.RD); - ibuild.EmitStoreGReg(addr, inst.RA); -} - -void JitILBase::lXzx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreOff); - FALLBACK_IF(jo.memcheck); - - IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB); - - if (inst.RA) - { - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - if (inst.SUBOP10 & 32) - ibuild.EmitStoreGReg(addr, inst.RA); - } - - IREmitter::InstLoc val; - switch (inst.SUBOP10 & ~32) - { - default: - PanicAlert("lXzx: invalid access size"); - case 23: // lwzx - val = ibuild.EmitLoad32(addr); - break; - case 279: // lhzx - val = ibuild.EmitLoad16(addr); - break; - case 87: // lbzx - val = ibuild.EmitLoad8(addr); - break; - } - ibuild.EmitStoreGReg(val, inst.RD); -} - -void JitILBase::dcbst(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreOff); - - // If the dcbst instruction is preceded by dcbt, it is flushing a prefetched - // memory location. Do not invalidate the JIT cache in this case as the memory - // will be the same. - // dcbt = 0x7c00022c - // TODO: We shouldn't use a debug read here; it should be possible to get the - // previous instruction from the JIT state. - FALLBACK_IF((PowerPC::HostRead_U32(js.compilerPC - 4) & 0x7c00022c) != 0x7c00022c); -} - -// Zero cache line. -void JitILBase::dcbz(UGeckoInstruction inst) -{ - FALLBACK_IF(true); - -// TODO! -#if 0 - if (SConfig::GetInstance().bJITOff || SConfig::GetInstance().bJITLoadStoreOff) - { - Default(inst); - return; - } - INSTRUCTION_START; - MOV(32, R(RSCRATCH), gpr.R(inst.RB)); - if (inst.RA) - ADD(32, R(RSCRATCH), gpr.R(inst.RA)); - AND(32, R(RSCRATCH), Imm32(~31)); - PXOR(XMM0, R(XMM0)); - MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0); - MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0); -#endif -} - -void JitILBase::stX(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreOff); - FALLBACK_IF(jo.memcheck); - - IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16); - IREmitter::InstLoc value = ibuild.EmitLoadGReg(inst.RS); - - if (inst.RA) - addr = ibuild.EmitAdd(ibuild.EmitLoadGReg(inst.RA), addr); - if (inst.OPCD & 1) - ibuild.EmitStoreGReg(addr, inst.RA); - - switch (inst.OPCD & ~1) - { - case 36: // stw - ibuild.EmitStore32(value, addr); - break; - case 44: // sth - ibuild.EmitStore16(value, addr); - break; - case 38: // stb - ibuild.EmitStore8(value, addr); - break; - default: - _assert_msg_(DYNA_REC, 0, "stX: Invalid access size."); - return; - } -} - -void JitILBase::stXx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreOff); - FALLBACK_IF(jo.memcheck); - - IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB); - IREmitter::InstLoc value = ibuild.EmitLoadGReg(inst.RS); - - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - - if (inst.SUBOP10 & 32) - ibuild.EmitStoreGReg(addr, inst.RA); - - switch (inst.SUBOP10 & ~32) - { - case 151: // stw - ibuild.EmitStore32(value, addr); - break; - case 407: // sth - ibuild.EmitStore16(value, addr); - break; - case 215: // stb - ibuild.EmitStore8(value, addr); - break; - default: - _assert_msg_(DYNA_REC, 0, "stXx: Invalid store size."); - return; - } -} - -// A few games use these heavily in video codecs. (GFZP01 @ 0x80020E18) -void JitILBase::lmw(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreOff); - FALLBACK_IF(jo.memcheck); - - IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16); - - if (inst.RA) - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - - for (int i = inst.RD; i < 32; i++) - { - IREmitter::InstLoc val = ibuild.EmitLoad32(addr); - ibuild.EmitStoreGReg(val, i); - addr = ibuild.EmitAdd(addr, ibuild.EmitIntConst(4)); - } -} - -void JitILBase::stmw(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreOff); - FALLBACK_IF(jo.memcheck); - - IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16); - - if (inst.RA) - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - - for (int i = inst.RD; i < 32; i++) - { - IREmitter::InstLoc val = ibuild.EmitLoadGReg(i); - ibuild.EmitStore32(val, addr); - addr = ibuild.EmitAdd(addr, ibuild.EmitIntConst(4)); - } -} - -void JitILBase::icbi(UGeckoInstruction inst) -{ - FallBackToInterpreter(inst); - ibuild.EmitBranchUncond(ibuild.EmitIntConst(js.compilerPC + 4)); -} diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStoreFloating.cpp deleted file mode 100644 index 1574d8a0ff..0000000000 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStoreFloating.cpp +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Core/PowerPC/JitILCommon/JitILBase.h" -#include "Common/CommonTypes.h" - -// TODO: Add peephole optimizations for multiple consecutive lfd/lfs/stfd/stfs since they are so -// common, -// and pshufb could help a lot. -// Also add hacks for things like lfs/stfs the same reg consecutively, that is, simple memory moves. - -void JitILBase::lfs(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreFloatingOff); - FALLBACK_IF(jo.memcheck); - - IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16); - - if (inst.RA) - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - - IREmitter::InstLoc val = ibuild.EmitDupSingleToMReg(ibuild.EmitLoadSingle(addr)); - ibuild.EmitStoreFReg(val, inst.FD); -} - -void JitILBase::lfsu(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreFloatingOff); - FALLBACK_IF(jo.memcheck); - - IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16); - - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - - IREmitter::InstLoc val = ibuild.EmitDupSingleToMReg(ibuild.EmitLoadSingle(addr)); - ibuild.EmitStoreFReg(val, inst.FD); - ibuild.EmitStoreGReg(addr, inst.RA); -} - -void JitILBase::lfd(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreFloatingOff); - FALLBACK_IF(jo.memcheck); - - IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16); - - if (inst.RA) - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - - IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.RD); - val = ibuild.EmitInsertDoubleInMReg(ibuild.EmitLoadDouble(addr), val); - ibuild.EmitStoreFReg(val, inst.RD); -} - -void JitILBase::lfdu(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreFloatingOff); - FALLBACK_IF(jo.memcheck); - - IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16); - - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - - IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FD); - val = ibuild.EmitInsertDoubleInMReg(ibuild.EmitLoadDouble(addr), val); - ibuild.EmitStoreFReg(val, inst.FD); - ibuild.EmitStoreGReg(addr, inst.RA); -} - -void JitILBase::stfd(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreFloatingOff); - FALLBACK_IF(jo.memcheck); - - IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16); - IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.RS); - - if (inst.RA) - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - if (inst.OPCD & 1) - ibuild.EmitStoreGReg(addr, inst.RA); - - ibuild.EmitStoreDouble(val, addr); -} - -void JitILBase::stfs(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreFloatingOff); - FALLBACK_IF(jo.memcheck); - - IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16); - IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.RS); - - if (inst.RA) - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - if (inst.OPCD & 1) - ibuild.EmitStoreGReg(addr, inst.RA); - - val = ibuild.EmitDoubleToSingle(val); - ibuild.EmitStoreSingle(val, addr); -} - -void JitILBase::stfsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreFloatingOff); - FALLBACK_IF(jo.memcheck); - - IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB); - IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.RS); - - if (inst.RA) - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - - val = ibuild.EmitDoubleToSingle(val); - ibuild.EmitStoreSingle(val, addr); -} - -void JitILBase::lfsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreFloatingOff); - FALLBACK_IF(jo.memcheck); - - IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB), val; - - if (inst.RA) - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - - val = ibuild.EmitDupSingleToMReg(ibuild.EmitLoadSingle(addr)); - ibuild.EmitStoreFReg(val, inst.RD); -} diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStorePaired.cpp deleted file mode 100644 index 5524f9db24..0000000000 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStorePaired.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Core/PowerPC/JitILCommon/JitILBase.h" - -#include "Common/CommonTypes.h" -#include "Core/PowerPC/PowerPC.h" - -void JitILBase::psq_st(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStorePairedOff); - FALLBACK_IF(jo.memcheck || inst.W); - - // For performance, the AsmCommon routines assume address translation is on. - FALLBACK_IF(!UReg_MSR(MSR).DR); - - IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12); - IREmitter::InstLoc val; - - if (inst.RA) - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - - if (inst.OPCD == 61) - ibuild.EmitStoreGReg(addr, inst.RA); - - val = ibuild.EmitLoadFReg(inst.RS); - val = ibuild.EmitCompactMRegToPacked(val); - ibuild.EmitStorePaired(val, addr, inst.I); -} - -void JitILBase::psq_l(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStorePairedOff); - FALLBACK_IF(jo.memcheck || inst.W); - - // For performance, the AsmCommon routines assume address translation is on. - FALLBACK_IF(!UReg_MSR(MSR).DR); - - IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12); - IREmitter::InstLoc val; - - if (inst.RA) - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - - if (inst.OPCD == 57) - ibuild.EmitStoreGReg(addr, inst.RA); - - val = ibuild.EmitLoadPaired( - addr, - inst.I | (inst.W << 3)); // The lower 3 bits is for GQR index. The next 1 bit is for inst.W - val = ibuild.EmitExpandPackedToMReg(val); - ibuild.EmitStoreFReg(val, inst.RD); -} diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Paired.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Paired.cpp deleted file mode 100644 index 3912667776..0000000000 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Paired.cpp +++ /dev/null @@ -1,186 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Core/PowerPC/JitILCommon/JitILBase.h" -#include "Common/Assert.h" -#include "Common/CommonTypes.h" - -void JitILBase::ps_arith(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc || (inst.SUBOP5 != 21 && inst.SUBOP5 != 20 && inst.SUBOP5 != 25)); - - IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA); - IREmitter::InstLoc rhs; - - if (inst.SUBOP5 == 25) - rhs = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); - else - rhs = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); - - val = ibuild.EmitCompactMRegToPacked(val); - - switch (inst.SUBOP5) - { - case 20: - val = ibuild.EmitFPSub(val, rhs); - break; - case 21: - val = ibuild.EmitFPAdd(val, rhs); - break; - case 25: - val = ibuild.EmitFPMul(val, rhs); - } - - val = ibuild.EmitExpandPackedToMReg(val); - ibuild.EmitStoreFReg(val, inst.FD); -} - -void JitILBase::ps_sum(UGeckoInstruction inst) -{ - // TODO: This operation strikes me as a bit strange... - // perhaps we can optimize it depending on the users? - // TODO: ps_sum breaks Sonic Colours (black screen) - FALLBACK_IF(true); - - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc || inst.SUBOP5 != 10); - - IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA); - IREmitter::InstLoc temp; - - val = ibuild.EmitCompactMRegToPacked(val); - val = ibuild.EmitFPDup0(val); - temp = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); - val = ibuild.EmitFPAdd(val, temp); - temp = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); - val = ibuild.EmitFPMerge11(val, temp); - val = ibuild.EmitExpandPackedToMReg(val); - ibuild.EmitStoreFReg(val, inst.FD); -} - -void JitILBase::ps_muls(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA); - IREmitter::InstLoc rhs = ibuild.EmitLoadFReg(inst.FC); - - val = ibuild.EmitCompactMRegToPacked(val); - rhs = ibuild.EmitCompactMRegToPacked(rhs); - - if (inst.SUBOP5 == 12) - rhs = ibuild.EmitFPDup0(rhs); - else - rhs = ibuild.EmitFPDup1(rhs); - - val = ibuild.EmitFPMul(val, rhs); - val = ibuild.EmitExpandPackedToMReg(val); - ibuild.EmitStoreFReg(val, inst.FD); -} - -// TODO: find easy cases and optimize them, do a breakout like ps_arith -void JitILBase::ps_mergeXX(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - IREmitter::InstLoc val = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FA)); - IREmitter::InstLoc rhs = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); - - switch (inst.SUBOP10) - { - case 528: - val = ibuild.EmitFPMerge00(val, rhs); - break; // 00 - case 560: - val = ibuild.EmitFPMerge01(val, rhs); - break; // 01 - case 592: - val = ibuild.EmitFPMerge10(val, rhs); - break; // 10 - case 624: - val = ibuild.EmitFPMerge11(val, rhs); - break; // 11 - default: - _assert_msg_(DYNA_REC, 0, "ps_merge - invalid op"); - } - - val = ibuild.EmitExpandPackedToMReg(val); - ibuild.EmitStoreFReg(val, inst.FD); -} - -void JitILBase::ps_maddXX(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA), op2, op3; - val = ibuild.EmitCompactMRegToPacked(val); - - switch (inst.SUBOP5) - { - case 14: // madds0 - { - op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); - op2 = ibuild.EmitFPDup0(op2); - val = ibuild.EmitFPMul(val, op2); - op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); - val = ibuild.EmitFPAdd(val, op3); - break; - } - case 15: // madds1 - { - op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); - op2 = ibuild.EmitFPDup1(op2); - val = ibuild.EmitFPMul(val, op2); - op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); - val = ibuild.EmitFPAdd(val, op3); - break; - } - case 28: // msub - { - op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); - val = ibuild.EmitFPMul(val, op2); - op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); - val = ibuild.EmitFPSub(val, op3); - break; - } - case 29: // madd - { - op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); - val = ibuild.EmitFPMul(val, op2); - op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); - val = ibuild.EmitFPAdd(val, op3); - break; - } - case 30: // nmsub - { - op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); - val = ibuild.EmitFPMul(val, op2); - op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); - val = ibuild.EmitFPSub(val, op3); - val = ibuild.EmitFPNeg(val); - break; - } - case 31: // nmadd - { - op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); - val = ibuild.EmitFPMul(val, op2); - op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); - val = ibuild.EmitFPAdd(val, op3); - val = ibuild.EmitFPNeg(val); - break; - } - } - - val = ibuild.EmitExpandPackedToMReg(val); - ibuild.EmitStoreFReg(val, inst.FD); -} diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp deleted file mode 100644 index 89b4228281..0000000000 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp +++ /dev/null @@ -1,215 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Core/PowerPC/JitILCommon/JitILBase.h" -#include "Common/CommonTypes.h" -#include "Common/MsgHandler.h" - -void JitILBase::mtspr(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); - - switch (iIndex) - { - case SPR_TL: - case SPR_TU: - FALLBACK_IF(true); - case SPR_LR: - ibuild.EmitStoreLink(ibuild.EmitLoadGReg(inst.RD)); - return; - case SPR_CTR: - ibuild.EmitStoreCTR(ibuild.EmitLoadGReg(inst.RD)); - return; - case SPR_GQR0: - case SPR_GQR0 + 1: - case SPR_GQR0 + 2: - case SPR_GQR0 + 3: - case SPR_GQR0 + 4: - case SPR_GQR0 + 5: - case SPR_GQR0 + 6: - case SPR_GQR0 + 7: - ibuild.EmitStoreGQR(ibuild.EmitLoadGReg(inst.RD), iIndex - SPR_GQR0); - return; - case SPR_SRR0: - case SPR_SRR1: - ibuild.EmitStoreSRR(ibuild.EmitLoadGReg(inst.RD), iIndex - SPR_SRR0); - return; - default: - FALLBACK_IF(true); - } -} - -void JitILBase::mfspr(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); - switch (iIndex) - { - case SPR_TL: - case SPR_TU: - FALLBACK_IF(true); - case SPR_LR: - ibuild.EmitStoreGReg(ibuild.EmitLoadLink(), inst.RD); - return; - case SPR_CTR: - ibuild.EmitStoreGReg(ibuild.EmitLoadCTR(), inst.RD); - return; - case SPR_GQR0: - case SPR_GQR0 + 1: - case SPR_GQR0 + 2: - case SPR_GQR0 + 3: - case SPR_GQR0 + 4: - case SPR_GQR0 + 5: - case SPR_GQR0 + 6: - case SPR_GQR0 + 7: - ibuild.EmitStoreGReg(ibuild.EmitLoadGQR(iIndex - SPR_GQR0), inst.RD); - return; - default: - FALLBACK_IF(true); - } -} - -// ======================================================================================= -// Don't interpret this, if we do we get thrown out -// -------------- -void JitILBase::mtmsr(UGeckoInstruction inst) -{ - ibuild.EmitStoreMSR(ibuild.EmitLoadGReg(inst.RS), ibuild.EmitIntConst(js.compilerPC)); - ibuild.EmitBranchUncond(ibuild.EmitIntConst(js.compilerPC + 4)); -} -// ============== - -void JitILBase::mfmsr(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - ibuild.EmitStoreGReg(ibuild.EmitLoadMSR(), inst.RD); -} - -void JitILBase::mftb(UGeckoInstruction inst) -{ - INSTRUCTION_START; - JITDISABLE(bJITSystemRegistersOff); - mfspr(inst); -} - -void JitILBase::mfcr(UGeckoInstruction inst) -{ - INSTRUCTION_START; - JITDISABLE(bJITSystemRegistersOff); - - IREmitter::InstLoc d = ibuild.EmitIntConst(0); - for (int i = 0; i < 8; ++i) - { - IREmitter::InstLoc cr = ibuild.EmitLoadCR(i); - cr = ibuild.EmitConvertFromFastCR(cr); - cr = ibuild.EmitShl(cr, ibuild.EmitIntConst(28 - 4 * i)); - d = ibuild.EmitOr(d, cr); - } - ibuild.EmitStoreGReg(d, inst.RD); -} - -void JitILBase::mtcrf(UGeckoInstruction inst) -{ - INSTRUCTION_START; - JITDISABLE(bJITSystemRegistersOff); - - IREmitter::InstLoc s = ibuild.EmitLoadGReg(inst.RS); - for (int i = 0; i < 8; ++i) - { - if (inst.CRM & (0x80 >> i)) - { - IREmitter::InstLoc value; - value = ibuild.EmitShrl(s, ibuild.EmitIntConst(28 - i * 4)); - value = ibuild.EmitAnd(value, ibuild.EmitIntConst(0xF)); - value = ibuild.EmitConvertToFastCR(value); - ibuild.EmitStoreCR(value, i); - } - } -} - -void JitILBase::mcrf(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - - if (inst.CRFS != inst.CRFD) - { - ibuild.EmitStoreCR(ibuild.EmitLoadCR(inst.CRFS), inst.CRFD); - } -} - -void JitILBase::crXX(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - - // Get bit CRBA in EAX aligned with bit CRBD - const int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3); - IREmitter::InstLoc eax = ibuild.EmitLoadCR(inst.CRBA >> 2); - eax = ibuild.EmitConvertFromFastCR(eax); - if (shiftA < 0) - eax = ibuild.EmitShl(eax, ibuild.EmitIntConst(-shiftA)); - else if (shiftA > 0) - eax = ibuild.EmitShrl(eax, ibuild.EmitIntConst(shiftA)); - - // Get bit CRBB in ECX aligned with bit CRBD - const int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3); - IREmitter::InstLoc ecx = ibuild.EmitLoadCR(inst.CRBB >> 2); - ecx = ibuild.EmitConvertFromFastCR(ecx); - if (shiftB < 0) - ecx = ibuild.EmitShl(ecx, ibuild.EmitIntConst(-shiftB)); - else if (shiftB > 0) - ecx = ibuild.EmitShrl(ecx, ibuild.EmitIntConst(shiftB)); - - // Compute combined bit - const unsigned subop = inst.SUBOP10; - switch (subop) - { - case 257: // crand - eax = ibuild.EmitAnd(eax, ecx); - break; - case 129: // crandc - ecx = ibuild.EmitNot(ecx); - eax = ibuild.EmitAnd(eax, ecx); - break; - case 289: // creqv - eax = ibuild.EmitXor(eax, ecx); - eax = ibuild.EmitNot(eax); - break; - case 225: // crnand - eax = ibuild.EmitAnd(eax, ecx); - eax = ibuild.EmitNot(eax); - break; - case 33: // crnor - eax = ibuild.EmitOr(eax, ecx); - eax = ibuild.EmitNot(eax); - break; - case 449: // cror - eax = ibuild.EmitOr(eax, ecx); - break; - case 417: // crorc - ecx = ibuild.EmitNot(ecx); - eax = ibuild.EmitOr(eax, ecx); - break; - case 193: // crxor - eax = ibuild.EmitXor(eax, ecx); - break; - default: - PanicAlert("crXX: invalid instruction"); - break; - } - - // Store result bit in CRBD - eax = ibuild.EmitAnd(eax, ibuild.EmitIntConst(0x8 >> (inst.CRBD & 3))); - IREmitter::InstLoc bd = ibuild.EmitLoadCR(inst.CRBD >> 2); - bd = ibuild.EmitConvertFromFastCR(bd); - bd = ibuild.EmitAnd(bd, ibuild.EmitIntConst(~(0x8 >> (inst.CRBD & 3)))); - bd = ibuild.EmitOr(bd, eax); - bd = ibuild.EmitConvertToFastCR(bd); - ibuild.EmitStoreCR(bd, inst.CRBD >> 2); -} diff --git a/Source/Core/Core/PowerPC/JitInterface.cpp b/Source/Core/Core/PowerPC/JitInterface.cpp index bc1c7c8d89..e427671f6f 100644 --- a/Source/Core/Core/PowerPC/JitInterface.cpp +++ b/Source/Core/Core/PowerPC/JitInterface.cpp @@ -30,7 +30,6 @@ #if _M_X86 #include "Core/PowerPC/Jit64/Jit.h" -#include "Core/PowerPC/Jit64IL/JitIL.h" #endif #if _M_ARM_64 @@ -53,9 +52,6 @@ CPUCoreBase* InitJitCore(int core) case PowerPC::CORE_JIT64: ptr = new Jit64(); break; - case PowerPC::CORE_JITIL64: - ptr = new JitIL(); - break; #endif #if _M_ARM_64 case PowerPC::CORE_JITARM64: diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index c6fdaf813e..fec282e2d6 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -186,7 +186,7 @@ const std::vector& AvailableCPUCores() static const std::vector cpu_cores = { CORE_INTERPRETER, CORE_CACHEDINTERPRETER, #ifdef _M_X86_64 - CORE_JIT64, CORE_JITIL64, + CORE_JIT64, #elif defined(_M_ARM_64) CORE_JITARM64, #endif diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 4d56b4f6a8..1ca6e6aa61 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -21,14 +21,14 @@ class PointerWrap; namespace PowerPC { +// The gaps in the CPUCore numbering are from cores that only existed in the past. +// We avoid re-numbering cores so that settings will be compatible across versions. enum CPUCore { - CORE_INTERPRETER, - CORE_JIT64, - CORE_JITIL64, - CORE_JITARM, - CORE_JITARM64, - CORE_CACHEDINTERPRETER, + CORE_INTERPRETER = 0, + CORE_JIT64 = 1, + CORE_JITARM64 = 4, + CORE_CACHEDINTERPRETER = 5, }; enum class CoreMode diff --git a/Source/Core/DolphinWX/Config/GeneralConfigPane.cpp b/Source/Core/DolphinWX/Config/GeneralConfigPane.cpp index d599d049af..5671fcc24c 100644 --- a/Source/Core/DolphinWX/Config/GeneralConfigPane.cpp +++ b/Source/Core/DolphinWX/Config/GeneralConfigPane.cpp @@ -29,7 +29,6 @@ static const std::map CPU_CORE_NAMES = { {PowerPC::CORE_INTERPRETER, _trans("Interpreter (slowest)")}, {PowerPC::CORE_CACHEDINTERPRETER, _trans("Cached Interpreter (slower)")}, {PowerPC::CORE_JIT64, _trans("JIT Recompiler (recommended)")}, - {PowerPC::CORE_JITIL64, _trans("JITIL Recompiler (slow, experimental)")}, {PowerPC::CORE_JITARM64, _trans("JIT Arm64 (experimental)")}, };