mirror of https://github.com/PCSX2/pcsx2.git
370 lines
16 KiB
C++
370 lines
16 KiB
C++
/* PCSX2 - PS2 Emulator for PCs
|
|
* Copyright (C) 2002-2010 PCSX2 Dev Team
|
|
*
|
|
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
|
* of the GNU Lesser General Public License as published by the Free Software Found-
|
|
* ation, either version 3 of the License, or (at your option) any later version.
|
|
*
|
|
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
|
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
* PURPOSE. See the GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along with PCSX2.
|
|
* If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
using namespace x86Emitter;
|
|
|
|
typedef xRegisterSSE xmm;
|
|
typedef xRegisterLong x32;
|
|
|
|
struct microVU;
|
|
|
|
//------------------------------------------------------------------
|
|
// Global Variables
|
|
//------------------------------------------------------------------
|
|
|
|
struct mVU_Globals {
|
|
u32 absclip[4], signbit[4], minvals[4], maxvals[4];
|
|
u32 one[4];
|
|
u32 Pi4[4];
|
|
u32 T1[4], T2[4], T3[4], T4[4], T5[4], T6[4], T7[4], T8[4];
|
|
u32 S2[4], S3[4], S4[4], S5[4];
|
|
u32 E1[4], E2[4], E3[4], E4[4], E5[4], E6[4];
|
|
float FTOI_4[4], FTOI_12[4], FTOI_15[4];
|
|
float ITOF_4[4], ITOF_12[4], ITOF_15[4];
|
|
};
|
|
|
|
#define __four(val) { val, val, val, val }
|
|
static const __aligned(32) mVU_Globals mVUglob = {
|
|
__four(0x7fffffff), // absclip
|
|
__four(0x80000000), // signbit
|
|
__four(0xff7fffff), // minvals
|
|
__four(0x7f7fffff), // maxvals
|
|
__four(0x3f800000), // ONE!
|
|
__four(0x3f490fdb), // PI4!
|
|
__four(0x3f7ffff5), // T1
|
|
__four(0xbeaaa61c), // T5
|
|
__four(0x3e4c40a6), // T2
|
|
__four(0xbe0e6c63), // T3
|
|
__four(0x3dc577df), // T4
|
|
__four(0xbd6501c4), // T6
|
|
__four(0x3cb31652), // T7
|
|
__four(0xbb84d7e7), // T8
|
|
__four(0xbe2aaaa4), // S2
|
|
__four(0x3c08873e), // S3
|
|
__four(0xb94fb21f), // S4
|
|
__four(0x362e9c14), // S5
|
|
__four(0x3e7fffa8), // E1
|
|
__four(0x3d0007f4), // E2
|
|
__four(0x3b29d3ff), // E3
|
|
__four(0x3933e553), // E4
|
|
__four(0x36b63510), // E5
|
|
__four(0x353961ac), // E6
|
|
__four(16.0), // FTOI_4
|
|
__four(4096.0), // FTOI_12
|
|
__four(32768.0), // FTOI_15
|
|
__four(0.0625f), // ITOF_4
|
|
__four(0.000244140625), // ITOF_12
|
|
__four(0.000030517578125) // ITOF_15
|
|
};
|
|
|
|
static const uint _Ibit_ = 1 << 31;
|
|
static const uint _Ebit_ = 1 << 30;
|
|
static const uint _Mbit_ = 1 << 29;
|
|
static const uint _Dbit_ = 1 << 28;
|
|
static const uint _Tbit_ = 1 << 27;
|
|
|
|
static const uint divI = 0x1040000;
|
|
static const uint divD = 0x2080000;
|
|
|
|
static const char branchSTR[16][8] = {
|
|
"None", "B", "BAL", "IBEQ",
|
|
"IBGEZ", "IBGTZ", "IBLEZ", "IBLTZ",
|
|
"IBNE", "JR", "JALR", "N/A",
|
|
"N/A", "N/A", "N/A", "N/A"
|
|
};
|
|
|
|
//------------------------------------------------------------------
|
|
// Helper Macros
|
|
//------------------------------------------------------------------
|
|
|
|
#define _Ft_ ((mVU.code >> 16) & 0x1F) // The ft part of the instruction register
|
|
#define _Fs_ ((mVU.code >> 11) & 0x1F) // The fs part of the instruction register
|
|
#define _Fd_ ((mVU.code >> 6) & 0x1F) // The fd part of the instruction register
|
|
|
|
#define _It_ ((mVU.code >> 16) & 0xF) // The it part of the instruction register
|
|
#define _Is_ ((mVU.code >> 11) & 0xF) // The is part of the instruction register
|
|
#define _Id_ ((mVU.code >> 6) & 0xF) // The id part of the instruction register
|
|
|
|
#define _X ((mVU.code>>24) & 0x1)
|
|
#define _Y ((mVU.code>>23) & 0x1)
|
|
#define _Z ((mVU.code>>22) & 0x1)
|
|
#define _W ((mVU.code>>21) & 0x1)
|
|
|
|
#define _X_Y_Z_W (((mVU.code >> 21 ) & 0xF))
|
|
#define _XYZW_SS (_X+_Y+_Z+_W==1)
|
|
#define _XYZW_SS2 (_XYZW_SS && (_X_Y_Z_W != 8))
|
|
#define _XYZW_PS (_X_Y_Z_W == 0xf)
|
|
#define _XYZWss(x) ((x==8) || (x==4) || (x==2) || (x==1))
|
|
|
|
#define _bc_ (mVU.code & 0x3)
|
|
#define _bc_x ((mVU.code & 0x3) == 0)
|
|
#define _bc_y ((mVU.code & 0x3) == 1)
|
|
#define _bc_z ((mVU.code & 0x3) == 2)
|
|
#define _bc_w ((mVU.code & 0x3) == 3)
|
|
|
|
#define _Fsf_ ((mVU.code >> 21) & 0x03)
|
|
#define _Ftf_ ((mVU.code >> 23) & 0x03)
|
|
|
|
#define _Imm5_ ((s16) (((mVU.code & 0x400) ? 0xfff0 : 0) | ((mVU.code >> 6) & 0xf)))
|
|
#define _Imm11_ ((s32) ((mVU.code & 0x400) ? (0xfffffc00 | (mVU.code & 0x3ff)) : (mVU.code & 0x3ff)))
|
|
#define _Imm12_ ((u32)((((mVU.code >> 21) & 0x1) << 11) | (mVU.code & 0x7ff)))
|
|
#define _Imm15_ ((u32) (((mVU.code >> 10) & 0x7800) | (mVU.code & 0x7ff)))
|
|
#define _Imm24_ ((u32) (mVU.code & 0xffffff))
|
|
|
|
#define isCOP2 (mVU.cop2 != 0)
|
|
#define isVU1 (mVU.index != 0)
|
|
#define isVU0 (mVU.index == 0)
|
|
#define getIndex (isVU1 ? 1 : 0)
|
|
#define getVUmem(x) (((isVU1) ? (x & 0x3ff) : ((x >= 0x400) ? (x & 0x43f) : (x & 0xff))) * 16)
|
|
#define offsetSS ((_X) ? (0) : ((_Y) ? (4) : ((_Z) ? 8: 12)))
|
|
#define offsetReg ((_X) ? (0) : ((_Y) ? (1) : ((_Z) ? 2: 3)))
|
|
|
|
#define xmmT1 xmm0 // Used for regAlloc
|
|
#define xmmT2 xmm1 // Used for regAlloc
|
|
#define xmmT3 xmm2 // Used for regAlloc
|
|
#define xmmT4 xmm3 // Used for regAlloc
|
|
#define xmmT5 xmm4 // Used for regAlloc
|
|
#define xmmT6 xmm5 // Used for regAlloc
|
|
#define xmmT7 xmm6 // Used for regAlloc
|
|
#define xmmPQ xmm7 // Holds the Value and Backup Values of P and Q regs
|
|
|
|
#define gprT1 eax // eax - Temp Reg
|
|
#define gprT2 ecx // ecx - Temp Reg
|
|
#define gprT3 edx // edx - Temp Reg
|
|
#define gprT1b ax // Low 16-bit of gprT1 (eax)
|
|
#define gprT2b cx // Low 16-bit of gprT2 (ecx)
|
|
#define gprT3b dx // Low 16-bit of gprT3 (edx)
|
|
|
|
#define gprF0 ebx // Status Flag 0
|
|
#define gprF1 ebp // Status Flag 1
|
|
#define gprF2 esi // Status Flag 2
|
|
#define gprF3 edi // Status Flag 3
|
|
|
|
// Function Params
|
|
#define mP microVU& mVU, int recPass
|
|
#define mV microVU& mVU
|
|
#define mF int recPass
|
|
#define mX mVU, recPass
|
|
|
|
typedef void __fastcall Fntype_mVUrecInst(microVU& mVU, int recPass);
|
|
typedef Fntype_mVUrecInst* Fnptr_mVUrecInst;
|
|
|
|
// Function/Template Stuff
|
|
#define mVUx (vuIndex ? microVU1 : microVU0)
|
|
#define mVUop(opName) static void __fastcall opName (mP)
|
|
#define _mVUt template<int vuIndex>
|
|
|
|
// Define Passes
|
|
#define pass1 if (recPass == 0) // Analyze
|
|
#define pass2 if (recPass == 1) // Recompile
|
|
#define pass3 if (recPass == 2) // Logging
|
|
#define pass4 if (recPass == 3) // Flag stuff
|
|
|
|
// Upper Opcode Cases
|
|
#define opCase1 if (opCase == 1) // Normal Opcodes
|
|
#define opCase2 if (opCase == 2) // BC Opcodes
|
|
#define opCase3 if (opCase == 3) // I Opcodes
|
|
#define opCase4 if (opCase == 4) // Q Opcodes
|
|
|
|
//------------------------------------------------------------------
|
|
// Define mVUquickSearch
|
|
//------------------------------------------------------------------
|
|
extern __pagealigned u8 mVUsearchXMM[__pagesize];
|
|
typedef u32 (__fastcall *mVUCall)(void*, void*);
|
|
#define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0xf)
|
|
#define mVUemitSearch() { mVUcustomSearch(); }
|
|
//------------------------------------------------------------------
|
|
|
|
// Misc Macros...
|
|
#define mVUcurProg mVU.prog.cur[0]
|
|
#define mVUblocks mVU.prog.cur->block
|
|
#define mVUir mVU.prog.IRinfo
|
|
#define mVUbranch mVU.prog.IRinfo.branch
|
|
#define mVUcycles mVU.prog.IRinfo.cycles
|
|
#define mVUcount mVU.prog.IRinfo.count
|
|
#define mVUpBlock mVU.prog.IRinfo.pBlock
|
|
#define mVUblock mVU.prog.IRinfo.block
|
|
#define mVUregs mVU.prog.IRinfo.block.pState
|
|
#define mVUregsTemp mVU.prog.IRinfo.regsTemp
|
|
#define iPC mVU.prog.IRinfo.curPC
|
|
#define mVUsFlagHack mVU.prog.IRinfo.sFlagHack
|
|
#define mVUconstReg mVU.prog.IRinfo.constReg
|
|
#define mVUstartPC mVU.prog.IRinfo.startPC
|
|
#define mVUinfo mVU.prog.IRinfo.info[iPC / 2]
|
|
#define mVUstall mVUinfo.stall
|
|
#define mVUup mVUinfo.uOp
|
|
#define mVUlow mVUinfo.lOp
|
|
#define sFLAG mVUinfo.sFlag
|
|
#define mFLAG mVUinfo.mFlag
|
|
#define cFLAG mVUinfo.cFlag
|
|
#define mVUrange (mVUcurProg.ranges[0])[0]
|
|
#define isEvilBlock (mVUpBlock->pState.blockType == 2)
|
|
#define isBadOrEvil (mVUlow.badBranch || mVUlow.evilBranch)
|
|
#define xPC ((iPC / 2) * 8)
|
|
#define curI ((u32*)mVU.regs().Micro)[iPC] //mVUcurProg.data[iPC]
|
|
#define setCode() { mVU.code = curI; }
|
|
#define bSaveAddr (((xPC + 16) & (mVU.microMemSize-8)) / 8)
|
|
#define shufflePQ (((mVU.p) ? 0xb0 : 0xe0) | ((mVU.q) ? 0x01 : 0x04))
|
|
#define cmpOffset(x) ((u8*)&(((u8*)x)[it[0].start]))
|
|
#define Rmem &mVU.regs().VI[REG_R].UL
|
|
#define aWrap(x, m) ((x > m) ? 0 : x)
|
|
#define shuffleSS(x) ((x==1)?(0x27):((x==2)?(0xc6):((x==4)?(0xe1):(0xe4))))
|
|
#define clampE CHECK_VU_EXTRA_OVERFLOW
|
|
#define varPrint(x) DevCon.WriteLn(#x " = %d", (int)x)
|
|
#define islowerOP ((iPC & 1) == 0)
|
|
|
|
#define blockCreate(addr) { \
|
|
if (!mVUblocks[addr]) mVUblocks[addr] = new microBlockManager(); \
|
|
}
|
|
|
|
// Fetches the PC and instruction opcode relative to the current PC. Used to rewind and
|
|
// fast-forward the IR state while calculating VU pipeline conditions (branches, writebacks, etc)
|
|
#define incPC(x) { iPC = ((iPC + (x)) & mVU.progMemMask); mVU.code = curI; }
|
|
#define incPC2(x) { iPC = ((iPC + (x)) & mVU.progMemMask); }
|
|
|
|
// Flag Info (Set if next-block's first 4 ops will read current-block's flags)
|
|
#define __Status (mVUregs.needExactMatch & 1)
|
|
#define __Mac (mVUregs.needExactMatch & 2)
|
|
#define __Clip (mVUregs.needExactMatch & 4)
|
|
|
|
// Pass 3 Helper Macros (Used for program logging)
|
|
#define _Fsf_String ((_Fsf_ == 3) ? "w" : ((_Fsf_ == 2) ? "z" : ((_Fsf_ == 1) ? "y" : "x")))
|
|
#define _Ftf_String ((_Ftf_ == 3) ? "w" : ((_Ftf_ == 2) ? "z" : ((_Ftf_ == 1) ? "y" : "x")))
|
|
#define xyzwStr(x,s) (_X_Y_Z_W == x) ? s :
|
|
#define _XYZW_String (xyzwStr(1, "w") (xyzwStr(2, "z") (xyzwStr(3, "zw") (xyzwStr(4, "y") (xyzwStr(5, "yw") (xyzwStr(6, "yz") (xyzwStr(7, "yzw") (xyzwStr(8, "x") (xyzwStr(9, "xw") (xyzwStr(10, "xz") (xyzwStr(11, "xzw") (xyzwStr(12, "xy") (xyzwStr(13, "xyw") (xyzwStr(14, "xyz") "xyzw"))))))))))))))
|
|
#define _BC_String (_bc_x ? "x" : (_bc_y ? "y" : (_bc_z ? "z" : "w")))
|
|
#define mVUlogFtFs() { mVUlog(".%s vf%02d, vf%02d", _XYZW_String, _Ft_, _Fs_); }
|
|
#define mVUlogFd() { mVUlog(".%s vf%02d, vf%02d", _XYZW_String, _Fd_, _Fs_); }
|
|
#define mVUlogACC() { mVUlog(".%s ACC, vf%02d", _XYZW_String, _Fs_); }
|
|
#define mVUlogFt() { mVUlog(", vf%02d", _Ft_); }
|
|
#define mVUlogBC() { mVUlog(", vf%02d%s", _Ft_, _BC_String); }
|
|
#define mVUlogI() { mVUlog(", I"); }
|
|
#define mVUlogQ() { mVUlog(", Q"); }
|
|
#define mVUlogCLIP() { mVUlog("w.xyz vf%02d, vf%02dw", _Fs_, _Ft_); }
|
|
|
|
// Program Logging...
|
|
#ifdef mVUlogProg
|
|
#define mVUlog ((isVU1) ? __mVULog<1> : __mVULog<0>)
|
|
#define mVUdumpProg __mVUdumpProgram
|
|
#else
|
|
#define mVUlog(...) if (0) {}
|
|
#define mVUdumpProg(...) if (0) {}
|
|
#endif
|
|
|
|
//------------------------------------------------------------------
|
|
// Optimization / Debug Options
|
|
//------------------------------------------------------------------
|
|
|
|
// Reg Alloc
|
|
static const bool doRegAlloc = true; // Set to false to flush every 32bit Instruction
|
|
// This turns off reg alloc for the most part, but reg alloc will still
|
|
// be done within instructions... Also on doSwapOp() regAlloc is needed between
|
|
// Lower and Upper instructions, so in this case it flushes after the full
|
|
// 64bit instruction (lower and upper)
|
|
|
|
// No Flag Optimizations
|
|
static const bool noFlagOpts = false; // Set to true to disable all flag setting optimizations
|
|
// Note: The flag optimizations this disables should all be harmless, so
|
|
// this option is mainly just for debugging... it effectively forces mVU
|
|
// to always update Mac and Status Flags (both sticky and non-sticky) whenever
|
|
// an Upper Instruction updates them. It also always transfers the 4 possible
|
|
// flag instances between blocks...
|
|
|
|
// Multiple Flag Instances
|
|
static const bool doSFlagInsts = true; // Set to true to enable multiple status flag instances
|
|
static const bool doMFlagInsts = true; // Set to true to enable multiple mac flag instances
|
|
static const bool doCFlagInsts = true; // Set to true to enable multiple clip flag instances
|
|
// This is the correct behavior of the VU's. Due to the pipeline of the VU's
|
|
// there can be up to 4 different instances of values to keep track of
|
|
// for the 3 different types of flags: Status, Mac, Clip flags.
|
|
// Setting one of these to 0 acts as if there is only 1 instance of the
|
|
// corresponding flag, which may be useful when debugging flag pipeline bugs.
|
|
|
|
static const int doFullFlagOpt = false; // Set above to false to enable full flag optimization
|
|
// This attempts to eliminate some flag shuffling at the end of blocks, but
|
|
// can end up creating more recompiled code. The max amount of times this optimization
|
|
// is performed per block can be set by changing the doFullFlagOpt value to be that limit.
|
|
// i.e. setting doFullFlagOpt to 2 will recompile the current block at-most 2 times with
|
|
// the full flag optimization.
|
|
// Note: This optimization doesn't really seem to be benefitial and is buggy...
|
|
|
|
// Branch in Branch Delay Slots
|
|
static const bool doBranchInDelaySlot = true; // Set to true to enable evil-branches
|
|
// This attempts to emulate the correct behavior for branches in branch delay
|
|
// slots. It is evil that games do this, and handling the different possible
|
|
// cases is tricky and bug prone. If this option is disabled then the second
|
|
// branch is treated as a NOP and effectively ignored.
|
|
|
|
// Constant Propagation
|
|
static const bool doConstProp = false; // Set to true to turn on vi15 const propagation
|
|
// Enables Constant Propagation for Jumps based on vi15 'link-register'
|
|
// allowing us to know many indirect jump target addresses.
|
|
// Makes GoW a lot slower due to extra recompilation time and extra code-gen!
|
|
|
|
// Indirect Jump Caching
|
|
static const bool doJumpCaching = true; // Set to true to enable jump caching
|
|
// Indirect jumps (JR/JALR) will remember the entry points to their previously
|
|
// jumped-to addresses. This allows us to skip the microBlockManager::search()
|
|
// routine that is performed every indirect jump in order to find a block within a
|
|
// program that matches the correct pipeline state.
|
|
|
|
// Indirect Jumps are part of same cached microProgram
|
|
static const bool doJumpAsSameProgram = false; // Set to true to treat jumps as same program
|
|
// Enabling this treats indirect jumps (JR/JALR) as part of the same microProgram
|
|
// when determining the valid ranges for the microProgram cache. Disabling this
|
|
// counts indirect jumps as separate cached microPrograms which generally leads
|
|
// to more microPrograms being cached, but the programs created are smaller and
|
|
// the overall cache usage ends up being more optimal; it can also help prevent
|
|
// constant recompilation problems in certain games.
|
|
// Note: You MUST disable doJumpCaching if you enable this option.
|
|
|
|
// Handling of D-Bit in Micro Programs
|
|
static const bool doDBitHandling = false;
|
|
// This flag shouldn't be enabled in released versions of games. Any games which
|
|
// need this method of pausing the VU should be using the T-Bit instead, however
|
|
// this could prove useful for VU debugging.
|
|
|
|
//------------------------------------------------------------------
|
|
// Speed Hacks (can cause infinite loops, SPS, Black Screens, etc...)
|
|
//------------------------------------------------------------------
|
|
|
|
// Status Flag Speed Hack
|
|
#define CHECK_VU_FLAGHACK (EmuConfig.Speedhacks.vuFlagHack)
|
|
// This hack only updates the Status Flag on blocks that will read it.
|
|
// Most blocks do not read status flags, so this is a big speedup.
|
|
|
|
// Min/Max Speed Hack
|
|
#define CHECK_VU_MINMAXHACK 0 //(EmuConfig.Speedhacks.vuMinMax)
|
|
// This hack uses SSE min/max instructions instead of emulated "logical min/max"
|
|
// The PS2 does not consider denormals as zero on the mini/max opcodes.
|
|
// This speedup is minor, but on AMD X2 CPUs it can be a 1~3% speedup
|
|
|
|
//------------------------------------------------------------------
|
|
// Unknown Data
|
|
//------------------------------------------------------------------
|
|
|
|
// XG Kick Transfer Delay Amount
|
|
#define mVU_XGKICK_CYCLES ((CHECK_XGKICKHACK) ? 6 : 1)
|
|
// Its unknown at recompile time how long the xgkick transfer will take
|
|
// so give it a value that makes games happy :) (SO3 is fine at 1 cycle delay)
|
|
|
|
//------------------------------------------------------------------
|
|
|
|
extern void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW=false);
|
|
extern void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW);
|
|
extern void mVUloadReg(const xmm& reg, xAddressVoid ptr, int xyzw);
|