mirror of https://github.com/PCSX2/pcsx2.git
671 lines
20 KiB
C++
671 lines
20 KiB
C++
/* PCSX2 - PS2 Emulator for PCs
|
|
* Copyright (C) 2002-2010 PCSX2 Dev Team
|
|
*
|
|
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
|
* of the GNU Lesser General Public License as published by the Free Software Found-
|
|
* ation, either version 3 of the License, or (at your option) any later version.
|
|
*
|
|
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
|
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
* PURPOSE. See the GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along with PCSX2.
|
|
* If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#pragma once
|
|
#include "microVU.h"
|
|
|
|
union regInfo
|
|
{
|
|
u32 reg;
|
|
struct
|
|
{
|
|
u8 x;
|
|
u8 y;
|
|
u8 z;
|
|
u8 w;
|
|
};
|
|
};
|
|
|
|
// microRegInfo is carefully ordered for faster compares. The "important" information is
|
|
// housed in a union that is accessed via 'quick32' so that several u8 fields can be compared
|
|
// using a pair of 32-bit equalities.
|
|
// vi15 is only used if microVU const-prop is enabled (it is *not* by default). When constprop
|
|
// is disabled the vi15 field acts as additional padding that is required for 16 byte alignment
|
|
// needed by the xmm compare.
|
|
union alignas(16) microRegInfo
|
|
{
|
|
struct
|
|
{
|
|
union
|
|
{
|
|
struct
|
|
{
|
|
u8 needExactMatch; // If set, block needs an exact match of pipeline state
|
|
u8 flagInfo; // xC * 2 | xM * 2 | xS * 2 | 0 * 1 | fullFlag Valid * 1
|
|
u8 q;
|
|
u8 p;
|
|
u8 xgkick;
|
|
u8 viBackUp; // VI reg number that was written to on branch-delay slot
|
|
u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending)
|
|
u8 r;
|
|
};
|
|
u64 quick64[2];
|
|
u32 quick32[4];
|
|
};
|
|
|
|
u32 xgkickcycles;
|
|
u8 mbitinblock;
|
|
u8 vi15v; // 'vi15' constant is valid
|
|
u16 vi15; // Constant Prop Info for vi15
|
|
|
|
struct
|
|
{
|
|
u8 VI[16];
|
|
regInfo VF[32];
|
|
};
|
|
};
|
|
|
|
u128 full128[176 / sizeof(u128)];
|
|
u64 full64[176 / sizeof(u64)];
|
|
u32 full32[176 / sizeof(u32)];
|
|
};
|
|
|
|
static_assert(sizeof(microRegInfo) == 176, "microRegInfo was not 176 bytes");
|
|
|
|
struct microProgram;
|
|
struct microJumpCache
|
|
{
|
|
microJumpCache() : prog(NULL), x86ptrStart(NULL) {}
|
|
microProgram* prog; // Program to which the entry point below is part of
|
|
void* x86ptrStart; // Start of code (Entry point for block)
|
|
};
|
|
|
|
struct alignas(16) microBlock
|
|
{
|
|
microRegInfo pState; // Detailed State of Pipeline
|
|
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
|
|
u8* x86ptrStart; // Start of code (Entry point for block)
|
|
microJumpCache* jumpCache; // Will point to an array of entry points of size [16k/8] if block ends in JR/JALR
|
|
};
|
|
|
|
struct microTempRegInfo
|
|
{
|
|
regInfo VF[2]; // Holds cycle info for Fd, VF[0] = Upper Instruction, VF[1] = Lower Instruction
|
|
u8 VFreg[2]; // Index of the VF reg
|
|
u8 VI; // Holds cycle info for Id
|
|
u8 VIreg; // Index of the VI reg
|
|
u8 q; // Holds cycle info for Q reg
|
|
u8 p; // Holds cycle info for P reg
|
|
u8 r; // Holds cycle info for R reg (Will never cause stalls, but useful to know if R is modified)
|
|
u8 xgkick; // Holds the cycle info for XGkick
|
|
};
|
|
|
|
struct microVFreg
|
|
{
|
|
u8 reg; // Reg Index
|
|
u8 x; // X vector read/written to?
|
|
u8 y; // Y vector read/written to?
|
|
u8 z; // Z vector read/written to?
|
|
u8 w; // W vector read/written to?
|
|
};
|
|
|
|
struct microVIreg
|
|
{
|
|
u8 reg; // Reg Index
|
|
u8 used; // Reg is Used? (Read/Written)
|
|
};
|
|
|
|
struct microConstInfo
|
|
{
|
|
u8 isValid; // Is the constant in regValue valid?
|
|
u32 regValue; // Constant Value
|
|
};
|
|
|
|
struct microUpperOp
|
|
{
|
|
bool eBit; // Has E-bit set
|
|
bool iBit; // Has I-bit set
|
|
bool mBit; // Has M-bit set
|
|
bool tBit; // Has T-bit set
|
|
bool dBit; // Has D-bit set
|
|
microVFreg VF_write; // VF Vectors written to by this instruction
|
|
microVFreg VF_read[2]; // VF Vectors read by this instruction
|
|
};
|
|
|
|
struct microLowerOp
|
|
{
|
|
microVFreg VF_write; // VF Vectors written to by this instruction
|
|
microVFreg VF_read[2]; // VF Vectors read by this instruction
|
|
microVIreg VI_write; // VI reg written to by this instruction
|
|
microVIreg VI_read[2]; // VI regs read by this instruction
|
|
microConstInfo constJump; // Constant Reg Info for JR/JARL instructions
|
|
u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JR, 10 = JALR)
|
|
u32 kickcycles; // Number of xgkick cycles accumulated by this instruction
|
|
bool badBranch; // This instruction is a Branch who has another branch in its Delay Slot
|
|
bool evilBranch; // This instruction is a Branch in a Branch Delay Slot (Instruction after badBranch)
|
|
bool isNOP; // This instruction is a NOP
|
|
bool isFSSET; // This instruction is a FSSET
|
|
bool noWriteVF; // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0)
|
|
bool backupVI; // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR)
|
|
bool memReadIs; // Read Is (VI reg) from memory (used by branches)
|
|
bool memReadIt; // Read If (VI reg) from memory (used by branches)
|
|
bool readFlags; // Current Instruction reads Status, Mac, or Clip flags
|
|
bool isMemWrite; // Current Instruction writes to VU memory
|
|
bool isKick; // Op is a kick so don't count kick cycles
|
|
};
|
|
|
|
struct microFlagInst
|
|
{
|
|
bool doFlag; // Update Flag on this Instruction
|
|
bool doNonSticky; // Update O,U,S,Z (non-sticky) bits on this Instruction (status flag only)
|
|
u8 write; // Points to the instance that should be written to (s-stage write)
|
|
u8 lastWrite; // Points to the instance that was last written to (most up-to-date flag)
|
|
u8 read; // Points to the instance that should be read by a lower instruction (t-stage read)
|
|
};
|
|
|
|
struct microFlagCycles
|
|
{
|
|
int xStatus[4];
|
|
int xMac[4];
|
|
int xClip[4];
|
|
int cycles;
|
|
};
|
|
|
|
struct microOp
|
|
{
|
|
u8 stall; // Info on how much current instruction stalled
|
|
bool isBadOp; // Cur Instruction is a bad opcode (not a legal instruction)
|
|
bool isEOB; // Cur Instruction is last instruction in block (End of Block)
|
|
bool isBdelay; // Cur Instruction in Branch Delay slot
|
|
bool swapOps; // Run Lower Instruction before Upper Instruction
|
|
bool backupVF; // Backup mVUlow.VF_write.reg, and restore it before the Upper Instruction is called
|
|
bool doXGKICK; // Do XGKICK transfer on this instruction
|
|
u32 XGKICKPC; // The PC in which the XGKick has taken place, so if we break early (before it) we don run it.
|
|
bool doDivFlag; // Transfer Div flag to Status Flag on this instruction
|
|
int readQ; // Q instance for reading
|
|
int writeQ; // Q instance for writing
|
|
int readP; // P instance for reading
|
|
int writeP; // P instance for writing
|
|
microFlagInst sFlag; // Status Flag Instance Info
|
|
microFlagInst mFlag; // Mac Flag Instance Info
|
|
microFlagInst cFlag; // Clip Flag Instance Info
|
|
microUpperOp uOp; // Upper Op Info
|
|
microLowerOp lOp; // Lower Op Info
|
|
};
|
|
|
|
template <u32 pSize>
|
|
struct microIR
|
|
{
|
|
microBlock block; // Block/Pipeline info
|
|
microBlock* pBlock; // Pointer to a block in mVUblocks
|
|
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
|
|
microOp info[pSize / 2]; // Info for Instructions in current block
|
|
microConstInfo constReg[16]; // Simple Const Propagation Info for VI regs within blocks
|
|
u8 branch;
|
|
u32 cycles; // Cycles for current block
|
|
u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block)
|
|
u32 curPC; // Current PC
|
|
u32 startPC; // Start PC for Cur Block
|
|
u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags
|
|
};
|
|
|
|
//------------------------------------------------------------------
|
|
// Reg Alloc
|
|
//------------------------------------------------------------------
|
|
|
|
//#define MVURALOG(...) fprintf(stderr, __VA_ARGS__)
|
|
#define MVURALOG(...)
|
|
|
|
struct microMapXMM
|
|
{
|
|
int VFreg; // VF Reg Number Stored (-1 = Temp; 0 = vf0 and will not be written back; 32 = ACC; 33 = I reg)
|
|
int xyzw; // xyzw to write back (0 = Don't write back anything AND cached vfReg has all vectors valid)
|
|
int count; // Count of when last used
|
|
bool isNeeded; // Is needed for current instruction
|
|
bool isZero; // Register was loaded from VF00 and doesn't need clamping
|
|
};
|
|
|
|
class microRegAlloc
|
|
{
|
|
protected:
|
|
static const int xmmTotal = 15; // PQ register is reserved
|
|
microMapXMM xmmMap[xmmTotal];
|
|
int counter; // Current allocation count
|
|
int index; // VU0 or VU1
|
|
|
|
// DO NOT REMOVE THIS.
|
|
// This is here for a reason. MSVC likes to turn global writes into a load+conditional move+store.
|
|
// That creates a race with the EE thread when we're compiling on the VU thread, even though
|
|
// regAllocCOP2 is false. By adding another level of indirection, it emits a branch instead.
|
|
_xmmregs* pxmmregs;
|
|
|
|
bool regAllocCOP2; // Local COP2 check
|
|
|
|
// Helper functions to get VU regs
|
|
VURegs& regs() const { return ::vuRegs[index]; }
|
|
__fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; }
|
|
__fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; }
|
|
|
|
__ri void loadIreg(const xmm& reg, int xyzw)
|
|
{
|
|
xMOVSSZX(reg, ptr32[&getVI(REG_I)]);
|
|
if (!_XYZWss(xyzw))
|
|
xSHUF.PS(reg, reg, 0);
|
|
}
|
|
|
|
int findFreeRegRec(int startIdx)
|
|
{
|
|
for (int i = startIdx; i < xmmTotal; i++)
|
|
{
|
|
if (!xmmMap[i].isNeeded)
|
|
{
|
|
int x = findFreeRegRec(i + 1);
|
|
if (x == -1)
|
|
return i;
|
|
return ((xmmMap[i].count < xmmMap[x].count) ? i : x);
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
int findFreeReg(int vfreg)
|
|
{
|
|
if (regAllocCOP2)
|
|
{
|
|
return _allocVFtoXMMreg(vfreg, 0);
|
|
}
|
|
|
|
for (int i = 0; i < xmmTotal; i++)
|
|
{
|
|
if (!xmmMap[i].isNeeded && (xmmMap[i].VFreg < 0))
|
|
{
|
|
return i; // Reg is not needed and was a temp reg
|
|
}
|
|
}
|
|
int x = findFreeRegRec(0);
|
|
pxAssertDev(x >= 0, "microVU register allocation failure!");
|
|
return x;
|
|
}
|
|
|
|
public:
|
|
microRegAlloc(int _index)
|
|
{
|
|
index = _index;
|
|
reset(false);
|
|
}
|
|
|
|
// Fully resets the regalloc by clearing all cached data
|
|
void reset(bool cop2mode)
|
|
{
|
|
// we run this at the of cop2, so don't free fprs
|
|
regAllocCOP2 = false;
|
|
|
|
for (int i = 0; i < xmmTotal; i++)
|
|
{
|
|
clearReg(i);
|
|
}
|
|
counter = 0;
|
|
regAllocCOP2 = cop2mode;
|
|
pxmmregs = cop2mode ? xmmregs : nullptr;
|
|
|
|
if (cop2mode)
|
|
{
|
|
for (int i = 0; i < xmmTotal; i++)
|
|
{
|
|
if (!pxmmregs[i].inuse || pxmmregs[i].type != XMMTYPE_VFREG)
|
|
continue;
|
|
|
|
// we shouldn't have any temp registers in here.. except for PQ, which
|
|
// isn't allocated here yet.
|
|
// pxAssertRel(fprregs[i].reg >= 0, "Valid full register preserved");
|
|
if (pxmmregs[i].reg >= 0)
|
|
{
|
|
MVURALOG("Preserving VF reg %d in host reg %d across instruction\n", pxmmregs[i].reg, i);
|
|
pxAssert(pxmmregs[i].reg != 255);
|
|
pxmmregs[i].needed = false;
|
|
xmmMap[i].isNeeded = false;
|
|
xmmMap[i].VFreg = pxmmregs[i].reg;
|
|
xmmMap[i].xyzw = ((pxmmregs[i].mode & MODE_WRITE) != 0) ? 0xf : 0x0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
int getXmmCount()
|
|
{
|
|
return xmmTotal + 1;
|
|
}
|
|
// Flushes all allocated registers (i.e. writes-back to memory all modified registers).
|
|
// If clearState is 0, then it keeps cached reg data valid
|
|
// If clearState is 1, then it invalidates all cached reg data after write-back
|
|
void flushAll(bool clearState = true)
|
|
{
|
|
for (int i = 0; i < xmmTotal; i++)
|
|
{
|
|
writeBackReg(xmm(i));
|
|
if (clearState)
|
|
clearReg(i);
|
|
}
|
|
}
|
|
|
|
void flushPartialForCOP2()
|
|
{
|
|
for (int i = 0; i < xmmTotal; i++)
|
|
{
|
|
microMapXMM& clear = xmmMap[i];
|
|
|
|
// toss away anything which is not a full cached register
|
|
if (pxmmregs[i].inuse && pxmmregs[i].type == XMMTYPE_VFREG)
|
|
{
|
|
// Should've been done in clearNeeded()
|
|
if (clear.xyzw != 0 && clear.xyzw != 0xf)
|
|
writeBackReg(xRegisterSSE::GetInstance(i), false);
|
|
|
|
if (clear.VFreg <= 0)
|
|
{
|
|
// temps really shouldn't be here..
|
|
_freeXMMreg(i);
|
|
}
|
|
}
|
|
|
|
// needed gets cleared in iCore.
|
|
clear.VFreg = -1;
|
|
clear.count = 0;
|
|
clear.xyzw = 0;
|
|
clear.isNeeded = 0;
|
|
clear.isZero = 0;
|
|
}
|
|
}
|
|
|
|
void TDwritebackAll(bool clearState = false)
|
|
{
|
|
for (int i = 0; i < xmmTotal; i++)
|
|
{
|
|
microMapXMM& mapX = xmmMap[xmm(i).Id];
|
|
|
|
if ((mapX.VFreg > 0) && mapX.xyzw) // Reg was modified and not Temp or vf0
|
|
{
|
|
if (mapX.VFreg == 33)
|
|
xMOVSS(ptr32[&getVI(REG_I)], xmm(i));
|
|
else if (mapX.VFreg == 32)
|
|
mVUsaveReg(xmm(i), ptr[®s().ACC], mapX.xyzw, 1);
|
|
else
|
|
mVUsaveReg(xmm(i), ptr[&getVF(mapX.VFreg)], mapX.xyzw, 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool checkVFClamp(int regId)
|
|
{
|
|
if ((xmmMap[regId].VFreg == 33 && !EmuConfig.Gamefixes.IbitHack) || xmmMap[regId].isZero)
|
|
return false;
|
|
else
|
|
return true;
|
|
}
|
|
|
|
bool checkCachedReg(int regId)
|
|
{
|
|
if (regId < xmmTotal)
|
|
return xmmMap[regId].VFreg >= 0;
|
|
else
|
|
return false;
|
|
}
|
|
|
|
void clearReg(const xmm& reg) { clearReg(reg.Id); }
|
|
void clearReg(int regId)
|
|
{
|
|
microMapXMM& clear = xmmMap[regId];
|
|
if (regAllocCOP2)
|
|
{
|
|
pxAssert(pxmmregs[regId].type == XMMTYPE_VFREG);
|
|
pxmmregs[regId].inuse = false;
|
|
}
|
|
|
|
clear.VFreg = -1;
|
|
clear.count = 0;
|
|
clear.xyzw = 0;
|
|
clear.isNeeded = 0;
|
|
clear.isZero = 0;
|
|
}
|
|
|
|
void clearRegVF(int VFreg)
|
|
{
|
|
for (int i = 0; i < xmmTotal; i++)
|
|
{
|
|
if (xmmMap[i].VFreg == VFreg)
|
|
clearReg(i);
|
|
}
|
|
}
|
|
|
|
void clearRegCOP2(int xmmReg)
|
|
{
|
|
if (regAllocCOP2)
|
|
clearReg(xmmReg);
|
|
}
|
|
|
|
void updateCOP2AllocState(int rn)
|
|
{
|
|
if (!regAllocCOP2)
|
|
return;
|
|
|
|
const bool dirty = (xmmMap[rn].VFreg > 0 && xmmMap[rn].xyzw != 0);
|
|
pxAssert(pxmmregs[rn].type == XMMTYPE_VFREG);
|
|
pxmmregs[rn].reg = xmmMap[rn].VFreg;
|
|
pxmmregs[rn].mode = dirty ? (MODE_READ | MODE_WRITE) : MODE_READ;
|
|
pxmmregs[rn].needed = xmmMap[rn].isNeeded;
|
|
}
|
|
|
|
// Writes back modified reg to memory.
|
|
// If all vectors modified, then keeps the VF reg cached in the xmm register.
|
|
// If reg was not modified, then keeps the VF reg cached in the xmm register.
|
|
void writeBackReg(const xmm& reg, bool invalidateRegs = true)
|
|
{
|
|
microMapXMM& mapX = xmmMap[reg.Id];
|
|
|
|
if ((mapX.VFreg > 0) && mapX.xyzw) // Reg was modified and not Temp or vf0
|
|
{
|
|
if (mapX.VFreg == 33)
|
|
xMOVSS(ptr32[&getVI(REG_I)], reg);
|
|
else if (mapX.VFreg == 32)
|
|
mVUsaveReg(reg, ptr[®s().ACC], mapX.xyzw, true);
|
|
else
|
|
mVUsaveReg(reg, ptr[&getVF(mapX.VFreg)], mapX.xyzw, true);
|
|
|
|
if (invalidateRegs)
|
|
{
|
|
for (int i = 0; i < xmmTotal; i++)
|
|
{
|
|
microMapXMM& mapI = xmmMap[i];
|
|
|
|
if ((i == reg.Id) || mapI.isNeeded)
|
|
continue;
|
|
|
|
if (mapI.VFreg == mapX.VFreg)
|
|
{
|
|
if (mapI.xyzw && mapI.xyzw < 0xf)
|
|
DevCon.Error("microVU Error: writeBackReg() [%d]", mapI.VFreg);
|
|
clearReg(i); // Invalidate any Cached Regs of same vf Reg
|
|
}
|
|
}
|
|
}
|
|
if (mapX.xyzw == 0xf) // Make Cached Reg if All Vectors were Modified
|
|
{
|
|
mapX.count = counter;
|
|
mapX.xyzw = 0;
|
|
mapX.isNeeded = false;
|
|
updateCOP2AllocState(reg.Id);
|
|
return;
|
|
}
|
|
clearReg(reg);
|
|
}
|
|
else if (mapX.xyzw) // Clear reg if modified and is VF0 or temp reg...
|
|
{
|
|
clearReg(reg);
|
|
}
|
|
}
|
|
|
|
// Use this when done using the allocated register, it clears its "Needed" status.
|
|
// The register that was written to, should be cleared before other registers are cleared.
|
|
// This is to guarantee proper merging between registers... When a written-to reg is cleared,
|
|
// it invalidates other cached registers of the same VF reg, and merges partial-vector
|
|
// writes into them.
|
|
void clearNeeded(const xmm& reg)
|
|
{
|
|
|
|
if ((reg.Id < 0) || (reg.Id >= xmmTotal)) // Sometimes xmmPQ hits this
|
|
return;
|
|
|
|
microMapXMM& clear = xmmMap[reg.Id];
|
|
clear.isNeeded = false;
|
|
if (clear.xyzw) // Reg was modified
|
|
{
|
|
if (clear.VFreg > 0)
|
|
{
|
|
int mergeRegs = 0;
|
|
if (clear.xyzw < 0xf) // Try to merge partial writes
|
|
mergeRegs = 1;
|
|
for (int i = 0; i < xmmTotal; i++) // Invalidate any other read-only regs of same vfReg
|
|
{
|
|
if (i == reg.Id)
|
|
continue;
|
|
microMapXMM& mapI = xmmMap[i];
|
|
if (mapI.VFreg == clear.VFreg)
|
|
{
|
|
if (mapI.xyzw && mapI.xyzw < 0xf)
|
|
{
|
|
DevCon.Error("microVU Error: clearNeeded() [%d]", mapI.VFreg);
|
|
}
|
|
if (mergeRegs == 1)
|
|
{
|
|
mVUmergeRegs(xmm(i), reg, clear.xyzw, true);
|
|
mapI.xyzw = 0xf;
|
|
mapI.count = counter;
|
|
mergeRegs = 2;
|
|
updateCOP2AllocState(i);
|
|
}
|
|
else
|
|
clearReg(i); // Clears when mergeRegs is 0 or 2
|
|
}
|
|
}
|
|
if (mergeRegs == 2) // Clear Current Reg if Merged
|
|
clearReg(reg);
|
|
else if (mergeRegs == 1) // Write Back Partial Writes if couldn't merge
|
|
writeBackReg(reg);
|
|
}
|
|
else
|
|
clearReg(reg); // If Reg was temp or vf0, then invalidate itself
|
|
}
|
|
else if (regAllocCOP2 && clear.VFreg < 0)
|
|
{
|
|
// free on the EE side
|
|
pxAssert(pxmmregs[reg.Id].type == XMMTYPE_VFREG);
|
|
pxmmregs[reg.Id].inuse = false;
|
|
}
|
|
}
|
|
|
|
// vfLoadReg = VF reg to be loaded to the xmm register
|
|
// vfWriteReg = VF reg that the returned xmm register will be considered as
|
|
// xyzw = XYZW vectors that will be modified (and loaded)
|
|
// cloneWrite = When loading a reg that will be written to, it copies it to its own xmm reg instead of overwriting the cached one...
|
|
// Notes:
|
|
// To load a temp reg use the default param values, vfLoadReg = -1 and vfWriteReg = -1.
|
|
// To load a full reg which won't be modified and you want cached, specify vfLoadReg >= 0 and vfWriteReg = -1
|
|
// To load a reg which you don't want written back or cached, specify vfLoadReg >= 0 and vfWriteReg = 0
|
|
const xmm& allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1)
|
|
{
|
|
//DevCon.WriteLn("vfLoadReg = %02d, vfWriteReg = %02d, xyzw = %x, clone = %d",vfLoadReg,vfWriteReg,xyzw,(int)cloneWrite);
|
|
counter++;
|
|
if (vfLoadReg >= 0) // Search For Cached Regs
|
|
{
|
|
for (int i = 0; i < xmmTotal; i++)
|
|
{
|
|
const xmm& xmmI = xmm::GetInstance(i);
|
|
microMapXMM& mapI = xmmMap[i];
|
|
if ((mapI.VFreg == vfLoadReg)
|
|
&& (!mapI.xyzw // Reg Was Not Modified
|
|
|| (mapI.VFreg && (mapI.xyzw == 0xf)))) // Reg Had All Vectors Modified and != VF0
|
|
{
|
|
int z = i;
|
|
if (vfWriteReg >= 0) // Reg will be modified
|
|
{
|
|
if (cloneWrite) // Clone Reg so as not to use the same Cached Reg
|
|
{
|
|
z = findFreeReg(vfWriteReg);
|
|
const xmm& xmmZ = xmm::GetInstance(z);
|
|
writeBackReg(xmmZ);
|
|
|
|
if (xyzw == 4)
|
|
xPSHUF.D(xmmZ, xmmI, 1);
|
|
else if (xyzw == 2)
|
|
xPSHUF.D(xmmZ, xmmI, 2);
|
|
else if (xyzw == 1)
|
|
xPSHUF.D(xmmZ, xmmI, 3);
|
|
else if (z != i)
|
|
xMOVAPS(xmmZ, xmmI);
|
|
|
|
mapI.count = counter; // Reg i was used, so update counter
|
|
}
|
|
else // Don't clone reg, but shuffle to adjust for SS ops
|
|
{
|
|
if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf))
|
|
writeBackReg(xmmI);
|
|
|
|
if (xyzw == 4)
|
|
xPSHUF.D(xmmI, xmmI, 1);
|
|
else if (xyzw == 2)
|
|
xPSHUF.D(xmmI, xmmI, 2);
|
|
else if (xyzw == 1)
|
|
xPSHUF.D(xmmI, xmmI, 3);
|
|
}
|
|
xmmMap[z].VFreg = vfWriteReg;
|
|
xmmMap[z].xyzw = xyzw;
|
|
xmmMap[z].isZero = (vfLoadReg == 0);
|
|
}
|
|
xmmMap[z].count = counter;
|
|
xmmMap[z].isNeeded = true;
|
|
updateCOP2AllocState(z);
|
|
|
|
return xmm::GetInstance(z);
|
|
}
|
|
}
|
|
}
|
|
int x = findFreeReg((vfWriteReg >= 0) ? vfWriteReg : vfLoadReg);
|
|
const xmm& xmmX = xmm::GetInstance(x);
|
|
writeBackReg(xmmX);
|
|
|
|
if (vfWriteReg >= 0) // Reg Will Be Modified (allow partial reg loading)
|
|
{
|
|
if ((vfLoadReg == 0) && !(xyzw & 1))
|
|
xPXOR(xmmX, xmmX);
|
|
else if (vfLoadReg == 33)
|
|
loadIreg(xmmX, xyzw);
|
|
else if (vfLoadReg == 32)
|
|
mVUloadReg(xmmX, ptr[®s().ACC], xyzw);
|
|
else if (vfLoadReg >= 0)
|
|
mVUloadReg(xmmX, ptr[&getVF(vfLoadReg)], xyzw);
|
|
|
|
xmmMap[x].VFreg = vfWriteReg;
|
|
xmmMap[x].xyzw = xyzw;
|
|
}
|
|
else // Reg Will Not Be Modified (always load full reg for caching)
|
|
{
|
|
if (vfLoadReg == 33)
|
|
loadIreg(xmmX, 0xf);
|
|
else if (vfLoadReg == 32)
|
|
xMOVAPS (xmmX, ptr128[®s().ACC]);
|
|
else if (vfLoadReg >= 0)
|
|
xMOVAPS (xmmX, ptr128[&getVF(vfLoadReg)]);
|
|
|
|
xmmMap[x].VFreg = vfLoadReg;
|
|
xmmMap[x].xyzw = 0;
|
|
}
|
|
xmmMap[x].isZero = (vfLoadReg == 0);
|
|
xmmMap[x].count = counter;
|
|
xmmMap[x].isNeeded = true;
|
|
updateCOP2AllocState(x);
|
|
return xmmX;
|
|
}
|
|
};
|