pcsx2/pcsx2/x86/sVU_zerorec.cpp

4680 lines
128 KiB
C++

/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2010 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
// Super VU recompiler - author: zerofrog(@gmail.com)
#include "PrecompiledHeader.h"
#include <float.h>
#include <list>
#include <map>
#include "Utilities/AsciiFile.h"
#ifndef _WIN32
#include <sys/types.h>
#endif
#include "Common.h"
#include "GS.h"
#include "Gif.h"
#include "VU.h"
#include "MTVU.h"
#include "R5900.h"
#include "iR5900.h"
#include "System/RecTypes.h"
#include "sVU_zerorec.h"
#include "NakedAsm.h"
#include "AppConfig.h"
// Needed in gcc for find.
#include <algorithm>
using namespace x86Emitter;
// temporary externs
extern void iDumpVU0Registers();
extern void iDumpVU1Registers();
// SuperVURec optimization options, uncomment only for debugging purposes
#define SUPERVU_CACHING // vu programs are saved and queried via memcompare (should be no reason to disable this)
#define SUPERVU_WRITEBACKS // don't flush the writebacks after every block
#define SUPERVU_VIBRANCHDELAY // when integers are modified right before a branch that uses the integer,
// the old integer value is used in the branch, fixes kh2
#define SUPERVU_PROPAGATEFLAGS // the correct behavior of VUs, for some reason superman breaks gfx with it on...
// use x86reg caching (faster) (not really. rather lots slower :p (rama) )
// ... and buggy too since we disabled EBP. Causes GoW2 to hang. Let's get rid of it,
// sVU is only here to serve as a regression model for Nan/INF behavior anyway. (--air)
//#define SUPERVU_X86CACHING
// registers won't be flushed at block boundaries (faster) (nothing noticeable speed-wise, causes SPS in Ratchet and clank (Nneeve) )
#ifndef PCSX2_DEBUG
//#define SUPERVU_INTERCACHING
#endif
#define SUPERVU_CHECKCONDITION 0 // has to be 0!!
static const uint sVU_EXESIZE = _8mb;
#define _Imm11_ (s32)( (vucode & 0x400) ? (0xfffffc00 | (vucode & 0x3ff)) : (vucode & 0x3ff) )
#define _UImm11_ (s32)(vucode & 0x7ff)
#define _Ft_ ((VU->code >> 16) & 0x1F) // The rt part of the instruction register
#define _Fs_ ((VU->code >> 11) & 0x1F) // The rd part of the instruction register
#define _Fd_ ((VU->code >> 6) & 0x1F) // The sa part of the instruction register
#define _It_ (_Ft_ & 15)
#define _Is_ (_Fs_ & 15)
#define _Id_ (_Fd_ & 15)
static const u32 QWaitTimes[] = { 6, 12 };
static const u32 PWaitTimes[] = { 53, 43, 28, 23, 17, 11, 10 };
static u32 s_vuInfo; // info passed into rec insts
static const u32 s_MemSize[2] = {VU0_MEMSIZE, VU1_MEMSIZE};
//static u8* s_recVUMem = NULL, *s_recVUPtr = NULL;
static RecompiledCodeReserve* s_recVUMem[2] = { NULL, NULL };
static u8* s_recVUPtr[2] = { NULL, NULL };
// tables which are defined at the bottom of this massive file.
extern void (*recVU_UPPER_OPCODE[64])(VURegs* VU, s32 info);
extern void (*recVU_LOWER_OPCODE[128])(VURegs* VU, s32 info);
#define INST_Q_READ 0x0001 // flush Q
#define INST_P_READ 0x0002 // flush P
#define INST_BRANCH_DELAY 0x0004
#define INST_CLIP_WRITE 0x0040 // inst writes CLIP in the future
#define INST_STATUS_WRITE 0x0080
#define INST_MAC_WRITE 0x0100
#define INST_Q_WRITE 0x0200
#define INST_CACHE_VI 0x0400 // write old vi value to s_VIBranchDelay
// Let's tempt fate by defining two different constants with almost identical names
#define INST_DUMMY_ 0x8000
#define INST_DUMMY 0x83c0
#define VFFREE_INVALID0 0x80000000 // (vffree[i]&0xf) is invalid
//#define FORIT(it, v) for(it = (v).begin(); it != (v).end(); ++(it))
#ifdef PCSX2_DEBUG
u32 s_vucount = 0;
static u32 g_vu1lastrec = 0, skipparent = -1;
static u32 s_svulast = 0, s_vufnheader;
static u32 badaddrs[][2] = {0, 0xffff};
#endif
union VURecRegs
{
struct
{
u16 reg;
u16 type;
};
u32 id;
};
#define SUPERVU_XGKICKDELAY 1 // yes this is needed as default (wipeout)
class VuBaseBlock;
struct VuFunctionHeader
{
struct RANGE
{
RANGE() : start(0), size(0), pmem(NULL) {}
u16 start, size;
void* pmem; // all the mem
};
VuFunctionHeader() : startpc(0xffffffff), pprogfunc(NULL) {}
~VuFunctionHeader()
{
for (std::vector<RANGE>::iterator it = ranges.begin(); it != ranges.end(); ++it)
{
free(it->pmem);
}
}
// returns true if the checksum for the current mem is the same as this fn
bool IsSame(void* pmem);
u32 startpc;
void* pprogfunc;
std::vector<RANGE> ranges;
};
struct VuBlockHeader
{
VuBaseBlock* pblock;
u32 delay;
};
// one vu inst (lower and upper)
class VuInstruction
{
public:
VuInstruction()
{
memzero(*this);
nParentPc = -1;
vicached = -1;
}
int nParentPc; // used for syncing with flag writes, -1 for no parent
_vuopinfo info;
_VURegsNum regs[2]; // [0] - lower, [1] - upper
u32 livevars[2]; // live variables right before this inst, [0] - inst, [1] - float
u32 addvars[2]; // live variables to add
u32 usedvars[2]; // set if var is used in the future including vars used in this inst
u32 keepvars[2];
u16 pqcycles; // the number of cycles to stall if function writes to the regs
u16 type; // INST_
u32 pClipWrite, pMACWrite, pStatusWrite; // addrs to write the flags
u32 vffree[2];
s8 vfwrite[2], vfread0[2], vfread1[2], vfacc[2];
s8 vfflush[2]; // extra flush regs
s8 vicached; // if >= 0, then use the cached integer s_VIBranchDelay
VuInstruction *pPrevInst;
int SetCachedRegs(int upper, u32 vuxyz);
void Recompile(std::list<VuInstruction>::iterator& itinst, u32 vuxyz);
};
enum BlockType
{
BLOCKTYPE_EOP = 0x01, // at least one of the children of the block contains eop (or the block itself)
BLOCKTYPE_FUNCTION = 0x02,
BLOCKTYPE_HASEOP = 0x04, // last inst of block is an eop
BLOCKTYPE_MACFLAGS = 0x08,
BLOCKTYPE_ANALYZED = 0x40,
BLOCKTYPE_IGNORE = 0x80, // special for recursive fns
BLOCKTYPE_ANALYZEDPARENT = 0x100
};
// base block used when recompiling
class VuBaseBlock
{
public:
typedef std::list<VuBaseBlock*> LISTBLOCKS;
VuBaseBlock();
// returns true if the leads to a EOP (ALL VU blocks must ret true)
void AssignVFRegs();
void AssignVIRegs(int parent);
std::list<VuInstruction>::iterator GetInstIterAtPc(int instpc);
void GetInstsAtPc(int instpc, std::list<VuInstruction*>& listinsts);
void Recompile();
u16 type; // BLOCKTYPE_
u16 id;
u16 startpc;
u16 endpc; // first inst not in block
void* pcode; // x86 code pointer
void* pendcode; // end of the x86 code pointer
int cycles;
std::list<VuInstruction> insts;
std::list<VuBaseBlock*> parents;
LISTBLOCKS blocks; // blocks branches to
u32* pChildJumps[4]; // addrs that need to be filled with the children's start addrs
// if highest bit is set, addr needs to be relational
u32 vuxyz; // corresponding bit is set if reg's xyz channels are used only
u32 vuxy; // corresponding bit is set if reg's xyz channels are used only
_xmmregs startregs[iREGCNT_XMM], endregs[iREGCNT_XMM];
int nStartx86, nEndx86; // indices into s_vecRegArray
int allocX86Regs;
int prevFlagsOutOfBlock;
};
struct WRITEBACK
{
WRITEBACK() : nParentPc(0), cycle(0) //, pStatusWrite(NULL), pMACWrite(NULL)
{
viwrite[0] = viwrite[1] = 0;
viread[0] = viread[1] = 0;
}
void InitInst(VuInstruction* pinst, int cycle) const
{
u32 write = viwrite[0] | viwrite[1];
pinst->type = ((write & (1 << REG_CLIP_FLAG)) ? INST_CLIP_WRITE : 0) |
((write & (1 << REG_MAC_FLAG)) ? INST_MAC_WRITE : 0) |
((write & (1 << REG_STATUS_FLAG)) ? INST_STATUS_WRITE : 0) |
((write & (1 << REG_Q)) ? INST_Q_WRITE : 0);
pinst->nParentPc = nParentPc;
pinst->info.cycle = cycle;
for (int i = 0; i < 2; ++i)
{
pinst->regs[i].VIwrite = viwrite[i];
pinst->regs[i].VIread = viread[i];
}
}
static int SortWritebacks(const WRITEBACK& w1, const WRITEBACK& w2)
{
return w1.cycle < w2.cycle;
}
int nParentPc;
int cycle;
u32 viwrite[2];
u32 viread[2];
};
struct VUPIPELINES
{
fmacPipe fmac[8];
fdivPipe fdiv;
efuPipe efu;
ialuPipe ialu[8];
std::list< WRITEBACK > listWritebacks;
};
VuBaseBlock::VuBaseBlock()
{
type = 0;
endpc = 0;
cycles = 0;
id = 0;
memzero(pChildJumps);
memzero(startregs);
memzero(endregs);
allocX86Regs = nStartx86 = nEndx86 = -1;
prevFlagsOutOfBlock = 0;
startpc = 0;
vuxy = 0;
vuxyz = 0;
pendcode = nullptr;
pcode = nullptr;
}
#define SUPERVU_STACKSIZE 0x1000
static std::list<VuFunctionHeader*> s_listVUHeaders[2];
static std::list<VuFunctionHeader*>* s_plistCachedHeaders[2] = {NULL, NULL};
static VuFunctionHeader** recVUHeaders[2] = { NULL, NULL };
static VuBlockHeader* recVUBlocks[2] = { NULL, NULL };
static u8* recVUStack[2] = { NULL, NULL };
static u8* recVUStackPtr[2] = { NULL, NULL };
static std::vector<_x86regs> s_vecRegArray(128);
static VURegs* VU = NULL;
static std::list<VuBaseBlock*> s_listBlocks;
static u32 s_vu = 0;
static u32 s_UnconditionalDelay = 0; // 1 if there are two sequential branches and the last is unconditional
static u32 g_nLastBlockExecuted = 0;
static VuFunctionHeader* SuperVURecompileProgram(u32 startpc, int vuindex);
static VuBaseBlock* SuperVUBuildBlocks(VuBaseBlock* parent, u32 startpc, const VUPIPELINES& pipes);
static void SuperVUInitLiveness(VuBaseBlock* pblock);
static void SuperVULivenessAnalysis();
static void SuperVUEliminateDeadCode();
static void SuperVUAssignRegs();
//void SuperVUFreeXMMreg(int xmmreg, int xmmtype, int reg);
#define SuperVUFreeXMMreg(...) (void)0
void SuperVUFreeXMMregs(u32* livevars);
static u32* SuperVUStaticAlloc(u32 size);
static void SuperVURecompile();
// allocate VU resources
static void SuperVUAlloc(int vuindex)
{
if (s_recVUMem[vuindex]) return;
wxString mem_name = pxsFmt("SuperVU%u Recompiler Cache", vuindex);
s_recVUMem[vuindex] = new RecompiledCodeReserve( mem_name, 0 );
s_recVUMem[vuindex]->Reserve( sVU_EXESIZE, vuindex ? HostMemoryMap::sVU1rec : HostMemoryMap::sVU0rec, _256mb );
s_recVUMem[vuindex]->SetProfilerName(pxsFmt("sVU%urec",vuindex));
// upper 4 bits must be zero!
if (!s_recVUMem[vuindex]->IsOk())
{
safe_delete(s_recVUMem[vuindex]);
throw Exception::VirtualMemoryMapConflict(mem_name)
.SetDiagMsg(pxsFmt( L"SuperVU failed to allocate virtual memory below 256MB." ))
.SetUserMsg(pxE( L"Out of Memory (sorta): The SuperVU recompiler was unable to reserve the specific memory ranges required, and will not be available for use. This is not a critical error, since the sVU rec is obsolete, and you should use microVU instead anyway. :)"
));
}
}
void DestroyCachedHeaders(int vuindex, int j)
{
std::list<VuFunctionHeader*>::iterator it = s_plistCachedHeaders[vuindex][j].begin();
while (it != s_plistCachedHeaders[vuindex][j].end())
{
delete *it;
it++;
}
s_plistCachedHeaders[vuindex][j].clear();
}
void DestroyVUHeaders(int vuindex)
{
std::list<VuFunctionHeader*>::iterator it = s_listVUHeaders[vuindex].begin();
while (it != s_listVUHeaders[vuindex].end())
{
delete *it;
it++;
}
s_listVUHeaders[vuindex].clear();
}
// destroy VU resources
void SuperVUDestroy(int vuindex)
{
pxAssertDev(vuindex == 0 || vuindex == 1, "Invalid VU index parameter!");
safe_delete_array(recVUHeaders[vuindex]);
safe_delete_array(recVUBlocks[vuindex]);
if (s_plistCachedHeaders[vuindex] != NULL)
{
for (u32 j = 0; j < s_MemSize[vuindex] / 8; ++j)
{
DestroyCachedHeaders(vuindex, j);
}
safe_delete_array(s_plistCachedHeaders[vuindex]);
}
DestroyVUHeaders(vuindex);
safe_delete(s_recVUMem[vuindex]);
safe_delete_array(recVUStack[vuindex]);
}
// reset VU
void SuperVUReset(int vuindex)
{
pxAssertDev(vuindex == 0 || vuindex == 1, "Invalid VU index parameter!");
#ifdef PCSX2_DEBUG
s_vucount = 0;
#endif
DevCon.WriteLn("SuperVU%d: Resetting function and block lists.", vuindex);
if (recVUHeaders[vuindex] == NULL)
recVUHeaders[vuindex] = new VuFunctionHeader* [s_MemSize[vuindex] / 8];
if (recVUBlocks[vuindex] == NULL)
recVUBlocks[vuindex] = new VuBlockHeader[s_MemSize[vuindex] / 8];
if (s_plistCachedHeaders[vuindex] == NULL)
s_plistCachedHeaders[vuindex] = new std::list<VuFunctionHeader*>[s_MemSize[vuindex] / 8];
if (recVUHeaders[vuindex]) memset(recVUHeaders[vuindex], 0, sizeof(VuFunctionHeader*) * (s_MemSize[vuindex] / 8));
if (recVUBlocks[vuindex]) memset(recVUBlocks[vuindex], 0, sizeof(VuBlockHeader) * (s_MemSize[vuindex] / 8));
if (s_plistCachedHeaders[vuindex] != NULL)
{
for (u32 j = 0; j < s_MemSize[vuindex] / 8; ++j)
{
DestroyCachedHeaders(vuindex, j);
}
}
DestroyVUHeaders(vuindex);
if (!s_recVUMem[vuindex] || !s_recVUMem[vuindex]->IsOk()) return;
DevCon.WriteLn("SuperVU%u: Resetting recompiler cache.", vuindex);
if (!recVUStack[vuindex]) recVUStack[vuindex] = new u8[SUPERVU_STACKSIZE * 4];
memset(recVUStack[vuindex], 0, SUPERVU_STACKSIZE);
s_recVUMem[vuindex]->Reset();
s_recVUPtr[vuindex] = *s_recVUMem[vuindex];
}
// clear the block and any joining blocks (size given in bytes)
static void __fastcall SuperVUClear(u32 startpc, u32 size, int vuindex)
{
std::vector<VuFunctionHeader::RANGE>::iterator itrange;
std::list<VuFunctionHeader*>::iterator it = s_listVUHeaders[vuindex].begin();
u32 endpc = startpc + ((size + 7) & ~7); // Ensure size is a multiple of u64 (round up)
while (it != s_listVUHeaders[vuindex].end())
{
// for every fn, check if it has code in the range
for(itrange = (*it)->ranges.begin(); itrange != (*it)->ranges.end(); itrange++)
{
if (startpc < (u32)itrange->start + itrange->size && itrange->start < endpc)
break;
}
if (itrange != (*it)->ranges.end())
{
recVUHeaders[vuindex][(*it)->startpc/8] = NULL;
#ifdef SUPERVU_CACHING
std::list<VuFunctionHeader*>* plist = &s_plistCachedHeaders[vuindex][(*it)->startpc / 8];
plist->push_back(*it);
if (plist->size() > 30)
{
// list is too big, delete
//Console.Warning("Performance warning: deleting cached VU program!");
delete plist->front();
plist->pop_front();
}
it = s_listVUHeaders[vuindex].erase(it);
#else
delete *it;
it = s_listVUHeaders[vuindex].erase(it);
#endif
}
else ++it;
}
}
static VuFunctionHeader* s_pFnHeader = NULL;
static VuBaseBlock* s_pCurBlock = NULL;
static VuInstruction* s_pCurInst = NULL;
static u32 s_StatusRead = 0, s_MACRead = 0, s_ClipRead = 0; // read addrs
static u32 s_PrevStatusWrite = 0, s_PrevMACWrite = 0, s_PrevClipWrite = 0, s_PrevIWrite = 0;
static u32 s_WriteToReadQ = 0;
static u32 s_VIBranchDelay = 0; //Value of register to use in a vi branch delayed situation
u32 s_TotalVUCycles; // total cycles since start of program execution
u32 SuperVUGetVIAddr(int reg, int read)
{
pxAssert(s_pCurInst != NULL);
switch (reg)
{
case REG_STATUS_FLAG:
{
u32 addr = (read == 2) ? s_PrevStatusWrite : (read ? s_StatusRead : s_pCurInst->pStatusWrite);
pxAssert(!read || addr != 0);
return addr;
}
case REG_MAC_FLAG:
{
u32 addr = (read == 2) ? s_PrevMACWrite : (read ? s_MACRead : s_pCurInst->pMACWrite);
return addr;
}
case REG_CLIP_FLAG:
{
u32 addr = (read == 2) ? s_PrevClipWrite : (read ? s_ClipRead : s_pCurInst->pClipWrite);
pxAssert(!read || addr != 0);
return addr;
}
case REG_Q:
return (read || s_WriteToReadQ) ? (uptr)&VU->VI[REG_Q] : (uptr)&VU->q;
case REG_P:
return read ? (uptr)&VU->VI[REG_P] : (uptr)&VU->p;
case REG_I:
return s_PrevIWrite;
}
#ifdef SUPERVU_VIBRANCHDELAY
if ((read != 0) && (s_pCurInst->regs[0].pipe == VUPIPE_BRANCH) && (s_pCurInst->vicached >= 0) && (s_pCurInst->vicached == reg))
{
return (uptr)&s_VIBranchDelay; // test for branch delays
}
#endif
return (uptr)&VU->VI[reg];
}
void SuperVUDumpBlock(std::list<VuBaseBlock*>& blocks, int vuindex)
{
u32 *mem;
u32 i;
g_Conf->Folders.Logs.Mkdir();
AsciiFile eff(
Path::Combine( g_Conf->Folders.Logs, wxsFormat(L"svu%cdump%.4X.txt", s_vu?L'0':L'1', s_pFnHeader->startpc) ), L"w"
);
eff.Printf("Format: upper_inst lower_inst\ntype f:vf_live_vars vf_used_vars i:vi_live_vars vi_used_vars inst_cycle pq_inst\n");
eff.Printf("Type: %.2x - qread, %.2x - pread, %.2x - clip_write, %.2x - status_write\n"
"%.2x - mac_write, %.2x -qflush\n",
INST_Q_READ, INST_P_READ, INST_CLIP_WRITE, INST_STATUS_WRITE, INST_MAC_WRITE, INST_Q_WRITE);
eff.Printf("XMM: Upper: read0 read1 write acc temp; Lower: read0 read1 write acc temp\n\n");
std::list<VuBaseBlock*>::iterator itblock;
std::list<VuInstruction>::iterator itinst;
VuBaseBlock::LISTBLOCKS::iterator itchild;
for(itblock = blocks.begin(); itblock != blocks.end(); itblock++)
{
eff.Printf( "block:%c %x-%x; children: ", ((*itblock)->type&BLOCKTYPE_HASEOP) ? '*' : ' ',
(*itblock)->startpc, (*itblock)->endpc - 8);
for(itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); itchild++)
{
eff.Printf("%x ", (*itchild)->startpc);
}
eff.Printf("; vuxyz = %x, vuxy = %x\n", (*itblock)->vuxyz&(*itblock)->insts.front().usedvars[1],
(*itblock)->vuxy&(*itblock)->insts.front().usedvars[1]);
itinst = (*itblock)->insts.begin();
i = (*itblock)->startpc;
while (itinst != (*itblock)->insts.end())
{
pxAssert(i <= (*itblock)->endpc);
if (itinst->type & INST_DUMMY)
{
if (itinst->nParentPc >= 0 && !(itinst->type&INST_DUMMY_))
{
// search for the parent
eff.Printf("writeback 0x%x (%x)\n", itinst->type, itinst->nParentPc);
}
}
else
{
mem = (u32*) & VU->Micro[i];
char* pstr = disVU1MicroUF(mem[1], i + 4);
eff.Printf( "%.4x: %-40s", i, pstr);
if (mem[1] & 0x80000000) eff.Printf( " I=%f(%.8x)\n", *(float*)mem, mem[0]);
else eff.Printf( "%s\n", disVU1MicroLF(mem[0], i));
i += 8;
}
++itinst;
}
eff.Printf("\n");
_x86regs* pregs;
if ((*itblock)->nStartx86 >= 0 || (*itblock)->nEndx86 >= 0)
{
eff.Printf( "X86: AX CX DX BX SP BP SI DI\n");
}
if ((*itblock)->nStartx86 >= 0)
{
pregs = &s_vecRegArray[(*itblock)->nStartx86];
eff.Printf( "STR: ");
for (i = 0; i < iREGCNT_GPR; ++i)
{
if (pregs[i].inuse)
eff.Printf( "%.2d ", pregs[i].reg);
else
eff.Printf( "-1 ");
}
eff.Printf( "\n");
}
if ((*itblock)->nEndx86 >= 0)
{
eff.Printf( "END: ");
pregs = &s_vecRegArray[(*itblock)->nEndx86];
for (i = 0; i < iREGCNT_GPR; ++i)
{
if (pregs[i].inuse)
eff.Printf( "%.2d ", pregs[i].reg);
else
eff.Printf( "-1 ");
}
eff.Printf( "\n");
}
itinst = (*itblock)->insts.begin();
for (i = (*itblock)->startpc; i < (*itblock)->endpc; ++itinst)
{
if (itinst->type & INST_DUMMY)
{
}
else
{
char str[256];
sprintf(str, "%.4x:%x f:%.8x_%.8x", i, itinst->type, itinst->livevars[1], itinst->usedvars[1]);
eff.Printf( "%-46s i:%.8x_%.8x c:%d pq:%d\n", str,
itinst->livevars[0], itinst->usedvars[0], (int)itinst->info.cycle, (int)itinst->pqcycles);
sprintf(str, "XMM r0:%d r1:%d w:%d a:%d t:%x;",
itinst->vfread0[1], itinst->vfread1[1], itinst->vfwrite[1], itinst->vfacc[1], itinst->vffree[1]);
eff.Printf( "%-46s r0:%d r1:%d w:%d a:%d t:%x\n", str,
itinst->vfread0[0], itinst->vfread1[0], itinst->vfwrite[0], itinst->vfacc[0], itinst->vffree[0]);
i += 8;
}
}
//
#if 0 // __linux__
// dump the asm
if ((*itblock)->pcode != NULL)
{
char command[255];
FILE* fasm = fopen("mydump1", "wb");
//Console.WriteLn("writing: %x, %x", (*itblock)->startpc, (uptr)(*itblock)->pendcode - (uptr)(*itblock)->pcode);
fwrite((*itblock)->pcode, 1, (uptr)(*itblock)->pendcode - (uptr)(*itblock)->pcode, fasm);
fclose(fasm);
sprintf(command, "objdump -D --target=binary --architecture=i386 -M intel mydump1 > tempdump");
system(command);
fasm = fopen("tempdump", "r");
// read all of it and write it to f
fseek(fasm, 0, SEEK_END);
vector<char> vbuffer(ftell(fasm));
fseek(fasm, 0, SEEK_SET);
fread(&vbuffer[0], vbuffer.size(), 1, fasm);
fprintf(f, "\n\n");
fwrite(&vbuffer[0], vbuffer.size(), 1, f);
fclose(fasm);
}
#endif
eff.Printf("\n---------------\n");
}
}
// uncomment to count svu exec time
//#define SUPERVU_COUNT
// Private methods
void* SuperVUGetProgram(u32 startpc, int vuindex)
{
pxAssert(startpc < s_MemSize[vuindex]);
pxAssert((startpc % 8) == 0);
pxAssert(recVUHeaders[vuindex] != NULL);
VuFunctionHeader** pheader = &recVUHeaders[vuindex][startpc/8];
if (*pheader == NULL)
{
#ifdef SUPERVU_CACHING
void* pmem = (vuindex & 1) ? VU1.Micro : VU0.Micro;
// check if program exists in cache
std::list<VuFunctionHeader*>::iterator it;
for(it = s_plistCachedHeaders[vuindex][startpc/8].begin(); it != s_plistCachedHeaders[vuindex][startpc/8].end(); it++)
{
if ((*it)->IsSame(pmem))
{
// found, transfer to regular lists
void* pfn = (*it)->pprogfunc;
recVUHeaders[vuindex][startpc/8] = *it;
s_listVUHeaders[vuindex].push_back(*it);
s_plistCachedHeaders[vuindex][startpc/8].erase(it);
return pfn;
}
}
#endif
*pheader = SuperVURecompileProgram(startpc, vuindex);
if (*pheader == NULL)
{
pxAssert(s_TotalVUCycles > 0);
if (vuindex)
VU1.VI[REG_TPC].UL = startpc;
else
VU0.VI[REG_TPC].UL = startpc;
return (void*)SuperVUEndProgram;
}
pxAssert((*pheader)->pprogfunc != NULL);
}
//else pxAssert( (*pheader)->IsSame((vuindex&1) ? VU1.Micro : VU0.Micro) );
pxAssert((*pheader)->startpc == startpc);
return (*pheader)->pprogfunc;
}
bool VuFunctionHeader::IsSame(void* pmem)
{
#ifdef SUPERVU_CACHING
std::vector<RANGE>::iterator it;
for(it = ranges.begin(); it != ranges.end(); it++)
{
if (memcmp_mmx((u8*)pmem + it->start, it->pmem, it->size))
return false;
}
#endif
return true;
}
std::list<VuInstruction>::iterator VuBaseBlock::GetInstIterAtPc(int instpc)
{
pxAssert(instpc >= 0);
int curpc = startpc;
std::list<VuInstruction>::iterator it;
for (it = insts.begin(); it != insts.end(); ++it)
{
if (it->type & INST_DUMMY) continue;
if (curpc == instpc) break;
curpc += 8;
}
if (it != insts.end()) return it;
pxAssert(0);
return insts.begin();
}
void VuBaseBlock::GetInstsAtPc(int instpc, std::list<VuInstruction*>& listinsts)
{
pxAssert(instpc >= 0);
listinsts.clear();
int curpc = startpc;
std::list<VuInstruction>::iterator it;
for (it = insts.begin(); it != insts.end(); ++it)
{
if (it->type & INST_DUMMY) continue;
if (curpc == instpc) break;
curpc += 8;
}
if (it != insts.end())
{
listinsts.push_back(&(*it));
return;
}
// look for the pc in other blocks
for (std::list<VuBaseBlock*>::iterator itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); ++itblock)
{
if (*itblock == this) continue;
if (instpc >= (*itblock)->startpc && instpc < (*itblock)->endpc)
{
listinsts.push_back(&(*(*itblock)->GetInstIterAtPc(instpc)));
}
}
pxAssert(!listinsts.empty());
}
static VuFunctionHeader* SuperVURecompileProgram(u32 startpc, int vuindex)
{
pxAssert(vuindex == 0 || vuindex == 1);
pxAssert(s_recVUPtr[vuindex] != NULL);
//Console.WriteLn("svu%c rec: %x", '0'+vuindex, startpc);
// if recPtr reached the mem limit reset whole mem
if ((s_recVUPtr[vuindex] < s_recVUMem[vuindex]->GetPtr()) || (s_recVUPtr[vuindex] >= s_recVUMem[vuindex]->GetPtrEnd() - _256kb))
{
Console.WriteLn("SuperVU%u: Recompiler cache reset...", vuindex);
SuperVUReset(0);
SuperVUReset(1);
if (s_TotalVUCycles > 0)
{
// already executing, so return NULL
return NULL;
}
}
std::list<VuBaseBlock*>::iterator itblock;
s_vu = vuindex;
VU = s_vu ? &VU1 : &VU0;
s_pFnHeader = new VuFunctionHeader();
s_listVUHeaders[vuindex].push_back(s_pFnHeader);
s_pFnHeader->startpc = startpc;
memset(recVUBlocks[s_vu], 0, sizeof(VuBlockHeader) * (s_MemSize[s_vu] / 8));
// analyze the global graph
s_listBlocks.clear();
VUPIPELINES pipes;
memzero(pipes.fmac);
memzero(pipes.fdiv);
memzero(pipes.efu);
memzero(pipes.ialu);
SuperVUBuildBlocks(NULL, startpc, pipes);
// fill parents
VuBaseBlock::LISTBLOCKS::iterator itchild;
for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
{
for(itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); itchild++)
{
(*itchild)->parents.push_back(*itblock);
}
//(*itblock)->type &= ~(BLOCKTYPE_IGNORE|BLOCKTYPE_ANALYZED);
}
pxAssert(s_listBlocks.front()->startpc == startpc);
s_listBlocks.front()->type |= BLOCKTYPE_FUNCTION;
for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
{
SuperVUInitLiveness(*itblock);
}
SuperVULivenessAnalysis();
SuperVUEliminateDeadCode();
SuperVUAssignRegs();
#ifdef PCSX2_DEBUG
if ((s_vu && (vudump&1)) || (!s_vu && (vudump&16))) SuperVUDumpBlock(s_listBlocks, s_vu);
#endif
// code generation
xSetPtr(s_recVUPtr[vuindex]);
g_branch = 0;
SuperVURecompile();
s_recVUPtr[vuindex] = xGetPtr();
// set the function's range
VuFunctionHeader::RANGE r;
s_pFnHeader->ranges.reserve(s_listBlocks.size());
for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
{
r.start = (*itblock)->startpc;
r.size = (*itblock)->endpc - (*itblock)->startpc;
#ifdef SUPERVU_CACHING
//memxor_mmx(r.checksum, &VU->Micro[r.start], r.size);
r.pmem = malloc(r.size);
memcpy(r.pmem, &VU->Micro[r.start], r.size);
#endif
s_pFnHeader->ranges.push_back(r);
}
#if defined(PCSX2_DEBUG) && defined(__linux__)
// dump at the end to capture the actual code
if ((s_vu && (vudump&1)) || (!s_vu && (vudump&16))) SuperVUDumpBlock(s_listBlocks, s_vu);
#endif
// destroy
for (std::list<VuBaseBlock*>::iterator itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); ++itblock)
{
delete *itblock;
}
s_listBlocks.clear();
pxAssertDev(s_recVUPtr[vuindex] < s_recVUMem[vuindex]->GetPtrEnd(), "SuperVU recompiler cache exceeded! (possible memory corruption)");
return s_pFnHeader;
}
static int _recbranchAddr(u32 vucode)
{
s32 bpc = pc + (_Imm11_ << 3);
/*
if ( bpc < 0 ) {
Console.WriteLn("zerorec branch warning: bpc < 0 ( %x ); Using unsigned imm11", bpc);
bpc = pc + (_UImm11_ << 3);
}*/
bpc &= (s_MemSize[s_vu] - 1);
return bpc;
}
// return inst that flushes everything
static VuInstruction SuperVUFlushInst()
{
VuInstruction inst;
// don't need to read q/p
inst.type = INST_DUMMY_;//|INST_Q_READ|INST_P_READ;
return inst;
}
void SuperVUAddWritebacks(VuBaseBlock* pblock, const std::list<WRITEBACK>& listWritebacks)
{
#ifdef SUPERVU_WRITEBACKS
// regardless of repetition, add the pipes (for selfloops)
std::list<WRITEBACK>::const_iterator itwriteback = listWritebacks.begin();
std::list<VuInstruction>::iterator itinst = pblock->insts.begin(), itinst2;
while (itwriteback != listWritebacks.end())
{
if (itinst != pblock->insts.end() && (itinst->info.cycle < itwriteback->cycle || (itinst->type&INST_DUMMY)))
{
++itinst;
continue;
}
itinst2 = pblock->insts.insert(itinst, VuInstruction());
itwriteback->InitInst(&(*itinst2), vucycle);
++itwriteback;
}
#endif
}
#ifdef SUPERVU_VIBRANCHDELAY
static VuInstruction* getDelayInst(VuInstruction* pInst)
{
// check for the N cycle branch delay
// example of 2 cycles delay (monster house) :
// sqi vi05
// sqi vi05
// ibeq vi05, vi03
// The ibeq should read the vi05 before the first sqi
//more info:
// iaddiu vi01, 0, 1
// ibeq vi01, 0 <- reads vi01 before the iaddiu
// iaddiu vi01, 0, 1
// iaddiu vi01, vi01, 1
// iaddiu vi01, vi01, 1
// ibeq vi01, 0 <- reads vi01 before the last two iaddiu's (so the value read is 1)
// ilw vi02, addr
// iaddiu vi01, 0, 1
// ibeq vi01, vi02 <- reads current values of both vi01 and vi02 because the branch instruction stalls
int delay = 1;
VuInstruction* pDelayInst = NULL;
VuInstruction* pTargetInst = pInst->pPrevInst;
while (1)
{
if (pTargetInst != NULL
&& pTargetInst->info.cycle + delay == pInst->info.cycle
&& (pTargetInst->regs[0].pipe == VUPIPE_IALU || pTargetInst->regs[0].pipe == VUPIPE_FMAC)
&& ((pTargetInst->regs[0].VIwrite & pInst->regs[0].VIread) & 0xffff)
&& (delay == 1 || ((pTargetInst->regs[0].VIwrite & pInst->regs[0].VIread) & 0xffff) == ((pTargetInst->regs[0].VIwrite & pInst->pPrevInst->regs[0].VIread) & 0xffff))
&& !(pTargetInst->regs[0].VIread&((1 << REG_STATUS_FLAG) | (1 << REG_MAC_FLAG) | (1 << REG_CLIP_FLAG))))
{
pDelayInst = pTargetInst;
pTargetInst = pTargetInst->pPrevInst;
delay++;
if (delay == 5) //maximum delay is 4 (length of the pipeline)
{
DevCon.WriteLn("supervu: cycle branch delay maximum (4) is reached");
break;
}
}
else break;
}
if (delay > 1) DevCon.WriteLn("supervu: %d cycle branch delay detected: %x %x", delay - 1, pc, s_pFnHeader->startpc);
return pDelayInst;
}
#endif
static VuBaseBlock* SuperVUBuildBlocks(VuBaseBlock* parent, u32 startpc, const VUPIPELINES& pipes)
{
// check if block already exists
//Console.WriteLn("startpc %x", startpc);
startpc &= (s_vu ? 0x3fff : 0xfff);
VuBlockHeader* pbh = &recVUBlocks[s_vu][startpc/8];
if (pbh->pblock != NULL)
{
VuBaseBlock* pblock = pbh->pblock;
std::list<VuInstruction>::iterator itinst;
if (pblock->startpc == startpc)
{
SuperVUAddWritebacks(pblock, pipes.listWritebacks);
return pblock;
}
// have to divide the blocks, pnewblock is first block
pxAssert(startpc > pblock->startpc);
pxAssert(startpc < pblock->endpc);
u32 dummyinst = (startpc - pblock->startpc) >> 3;
// count inst non-dummy insts
itinst = pblock->insts.begin();
int cycleoff = 0;
while (dummyinst > 0)
{
if (itinst->type & INST_DUMMY)
++itinst;
else
{
cycleoff = itinst->info.cycle;
++itinst;
--dummyinst;
}
}
// NOTE: still leaves insts with their writebacks in different blocks
while (itinst->type & INST_DUMMY)
++itinst;
// the difference in cycles between dummy insts (naruto utlimate ninja)
int cyclediff = 0;
if (parent == pblock)
cyclediff = itinst->info.cycle - cycleoff;
cycleoff = itinst->info.cycle;
// new block
VuBaseBlock* pnewblock = new VuBaseBlock();
s_listBlocks.push_back(pnewblock);
pnewblock->startpc = startpc;
pnewblock->endpc = pblock->endpc;
pnewblock->cycles = pblock->cycles - cycleoff + cyclediff;
pnewblock->blocks.splice(pnewblock->blocks.end(), pblock->blocks);
pnewblock->insts.splice(pnewblock->insts.end(), pblock->insts, itinst, pblock->insts.end());
pnewblock->type = pblock->type;
// any writebacks in the next 3 cycles also belong to original block
// for(itinst = pnewblock->insts.begin(); itinst != pnewblock->insts.end(); ) {
// if( (itinst->type & INST_DUMMY) && itinst->nParentPc >= 0 && itinst->nParentPc < (int)startpc ) {
//
// if( !(itinst->type & INST_Q_WRITE) )
// pblock->insts.push_back(*itinst);
// itinst = pnewblock->insts.erase(itinst);
// continue;
// }
//
// ++itinst;
// }
pbh = &recVUBlocks[s_vu][startpc/8];
for (u32 inst = startpc; inst < pblock->endpc; inst += 8)
{
if (pbh->pblock == pblock)
pbh->pblock = pnewblock;
++pbh;
}
for(itinst = pnewblock->insts.begin(); itinst != pnewblock->insts.end(); itinst++)
{
itinst->info.cycle -= cycleoff;
}
SuperVUAddWritebacks(pnewblock, pipes.listWritebacks);
// old block
pblock->blocks.push_back(pnewblock);
pblock->endpc = startpc;
pblock->cycles = cycleoff;
pblock->type &= BLOCKTYPE_MACFLAGS;
//pblock->insts.push_back(SuperVUFlushInst()); //don't need
return pnewblock;
}
VuBaseBlock* pblock = new VuBaseBlock();
s_listBlocks.push_back(pblock);
int i = 0;
g_branch = 0;
pc = startpc;
pblock->startpc = startpc;
// clear stalls (might be a prob)
memcpy(VU->fmac, pipes.fmac, sizeof(pipes.fmac));
memcpy(&VU->fdiv, &pipes.fdiv, sizeof(pipes.fdiv));
memcpy(&VU->efu, &pipes.efu, sizeof(pipes.efu));
memcpy(VU->ialu, pipes.ialu, sizeof(pipes.ialu));
// memset(VU->fmac, 0, sizeof(VU->fmac));
// memset(&VU->fdiv, 0, sizeof(VU->fdiv));
// memset(&VU->efu, 0, sizeof(VU->efu));
vucycle = 0;
u8 macflags = 0;
std::list< WRITEBACK > listWritebacks;
std::list< WRITEBACK >::iterator itwriteback;
std::list<VuInstruction>::iterator itinst;
u32 hasSecondBranch = 0;
u32 needFullStatusFlag = 0;
#ifdef SUPERVU_WRITEBACKS
listWritebacks = pipes.listWritebacks;
#endif
// first analysis pass for status flags
while (1)
{
u32* ptr = (u32*) & VU->Micro[pc];
pc += 8;
int prevbranch = g_branch;
if (ptr[1] & 0x40000000)
g_branch = 1;
if (!(ptr[1] & 0x80000000)) // not I
{
switch (ptr[0] >> 25)
{
case 0x24: // jr
case 0x25: // jalr
case 0x20: // B
case 0x21: // BAL
case 0x28: // IBEQ
case 0x2f: // IBGEZ
case 0x2d: // IBGTZ
case 0x2e: // IBLEZ
case 0x2c: // IBLTZ
case 0x29: // IBNE
g_branch = 1;
break;
case 0x14: // fseq
case 0x17: // fsor
//needFullStatusFlag = 2;
break;
case 0x16: // fsand
if ((ptr[0]&0xc0))
{
// sometimes full sticky bits are needed (simple series 2000 - oane chapara)
//Console.WriteLn("needSticky: %x-%x", s_pFnHeader->startpc, startpc);
needFullStatusFlag = 2;
}
break;
}
}
if (prevbranch)
break;
if (pc >= s_MemSize[s_vu])
{
Console.Error("inf vu0 prog %x", startpc);
break;
}
}
// second full pass
pc = startpc;
g_branch = 0;
VuInstruction* pprevinst = NULL, *pinst = NULL;
while (1)
{
if (pc == s_MemSize[s_vu])
{
g_branch |= 8;
break;
}
if (!g_branch && pbh->pblock != NULL)
{
pblock->blocks.push_back(pbh->pblock);
break;
}
int prevbranch = g_branch;
if (!prevbranch)
{
pbh->pblock = pblock;
}
else pxAssert(prevbranch || pbh->pblock == NULL);
pblock->insts.push_back(VuInstruction());
pprevinst = pinst;
pinst = &pblock->insts.back();
pinst->pPrevInst = pprevinst;
SuperVUAnalyzeOp(VU, &pinst->info, pinst->regs);
#ifdef SUPERVU_VIBRANCHDELAY
if (pinst->regs[0].pipe == VUPIPE_BRANCH && pblock->insts.size() > 1)
{
VuInstruction* pdelayinst = getDelayInst(pinst);
if (pdelayinst)
{
pdelayinst->type |= INST_CACHE_VI;
// find the correct register
u32 mask = pdelayinst->regs[0].VIwrite & pinst->regs[0].VIread;
for (int i = 0; i < 16; ++i)
{
if (mask & (1 << i))
{
pdelayinst->vicached = i;
break;
}
}
pinst->vicached = pdelayinst->vicached;
}
}
#endif
if (prevbranch)
{
if (pinst->regs[0].pipe == VUPIPE_BRANCH)
hasSecondBranch = 1;
pinst->type |= INST_BRANCH_DELAY;
}
// check write back
for (itwriteback = listWritebacks.begin(); itwriteback != listWritebacks.end();)
{
if (pinst->info.cycle >= itwriteback->cycle)
{
itinst = pblock->insts.insert(--pblock->insts.end(), VuInstruction());
itwriteback->InitInst(&(*itinst), pinst->info.cycle);
itwriteback = listWritebacks.erase(itwriteback);
}
else ++itwriteback;
}
// add new writebacks
WRITEBACK w;
const u32 allflags = (1 << REG_CLIP_FLAG) | (1 << REG_MAC_FLAG) | (1 << REG_STATUS_FLAG);
for (int j = 0; j < 2; ++j) w.viwrite[j] = pinst->regs[j].VIwrite & allflags;
if (pinst->info.macflag & VUOP_WRITE) w.viwrite[1] |= (1 << REG_MAC_FLAG);
if (pinst->info.statusflag & VUOP_WRITE) w.viwrite[1] |= (1 << REG_STATUS_FLAG);
if ((pinst->info.macflag | pinst->info.statusflag) & VUOP_READ)
macflags = 1;
if (pinst->regs[0].VIread & ((1 << REG_MAC_FLAG) | (1 << REG_STATUS_FLAG)))
macflags = 1;
// if( pinst->regs[1].pipe == VUPIPE_FMAC && (pinst->regs[1].VFwrite==0&&!(pinst->regs[1].VIwrite&(1<<REG_ACC_FLAG))) )
// pinst->regs[0].VIread |= (1<<REG_MAC_FLAG)|(1<<REG_STATUS_FLAG);
// uregs->VIwrite |= lregs->VIwrite & (1<<REG_STATUS_FLAG);
if (w.viwrite[0] | w.viwrite[1])
{
// only if coming from fmac pipeline
if (((pinst->info.statusflag&VUOP_WRITE) && !(pinst->regs[0].VIwrite&(1 << REG_STATUS_FLAG))) && needFullStatusFlag)
{
// don't read if first inst
if (needFullStatusFlag == 1)
w.viread[1] |= (1 << REG_STATUS_FLAG);
else --needFullStatusFlag;
}
for (int j = 0; j < 2; ++j)
{
w.viread[j] |= pinst->regs[j].VIread & allflags;
if ((pinst->regs[j].VIread&(1 << REG_STATUS_FLAG)) && (pinst->regs[j].VIwrite&(1 << REG_STATUS_FLAG)))
{
// don't need the read anymore
pinst->regs[j].VIread &= ~(1 << REG_STATUS_FLAG);
}
if ((pinst->regs[j].VIread&(1 << REG_MAC_FLAG)) && (pinst->regs[j].VIwrite&(1 << REG_MAC_FLAG)))
{
// don't need the read anymore
pinst->regs[j].VIread &= ~(1 << REG_MAC_FLAG);
}
pinst->regs[j].VIwrite &= ~allflags;
}
if (pinst->info.macflag & VUOP_READ) w.viread[1] |= 1 << REG_MAC_FLAG;
if (pinst->info.statusflag & VUOP_READ) w.viread[1] |= 1 << REG_STATUS_FLAG;
w.nParentPc = pc - 8;
w.cycle = pinst->info.cycle + 4;
listWritebacks.push_back(w);
}
if (pinst->info.q&VUOP_READ) pinst->type |= INST_Q_READ;
if (pinst->info.p&VUOP_READ) pinst->type |= INST_P_READ;
if (pinst->info.q&VUOP_WRITE)
{
pinst->pqcycles = QWaitTimes[pinst->info.pqinst] + 1;
memset(&w, 0, sizeof(w));
w.nParentPc = pc - 8;
w.cycle = pinst->info.cycle + pinst->pqcycles;
w.viwrite[0] = 1 << REG_Q;
listWritebacks.push_back(w);
}
if (pinst->info.p&VUOP_WRITE)
pinst->pqcycles = PWaitTimes[pinst->info.pqinst] + 1;
if (prevbranch)
{
break;
}
// make sure there is always a branch
// sensible soccer overflows on vu0, so increase the limit...
if ((s_vu == 1 && i >= 0x799) || (s_vu == 0 && i >= 0x201))
{
Console.Error("VuRec base block doesn't terminate!");
pxAssert(0);
break;
}
i++;
pbh++;
}
if (macflags)
pblock->type |= BLOCKTYPE_MACFLAGS;
pblock->endpc = pc;
u32 lastpc = pc;
pblock->cycles = vucycle;
#ifdef SUPERVU_WRITEBACKS
if (!g_branch || (g_branch&8))
#endif
{
// flush writebacks
if (!listWritebacks.empty())
{
listWritebacks.sort(WRITEBACK::SortWritebacks);
for (itwriteback = listWritebacks.begin(); itwriteback != listWritebacks.end(); ++itwriteback)
{
if (itwriteback->viwrite[0] & (1 << REG_Q))
{
// ignore all Q writebacks
continue;
}
pblock->insts.push_back(VuInstruction());
itwriteback->InitInst(&pblock->insts.back(), vucycle);
}
listWritebacks.clear();
}
}
if (!g_branch) return pblock;
if (g_branch & 8)
{
// what if also a jump?
pblock->type |= BLOCKTYPE_EOP | BLOCKTYPE_HASEOP;
// add an instruction to flush p and q (if written)
pblock->insts.push_back(SuperVUFlushInst());
return pblock;
}
// it is a (cond) branch or a jump
u32 vucode = *(u32*)(VU->Micro + lastpc - 16);
int bpc = _recbranchAddr(vucode) - 8;
VUPIPELINES newpipes;
memcpy(newpipes.fmac, VU->fmac, sizeof(newpipes.fmac));
memcpy(&newpipes.fdiv, &VU->fdiv, sizeof(newpipes.fdiv));
memcpy(&newpipes.efu, &VU->efu, sizeof(newpipes.efu));
memcpy(newpipes.ialu, VU->ialu, sizeof(newpipes.ialu));
for (i = 0; i < 8; ++i) newpipes.fmac[i].sCycle -= vucycle;
newpipes.fdiv.sCycle -= vucycle;
newpipes.efu.sCycle -= vucycle;
for (i = 0; i < 8; ++i) newpipes.ialu[i].sCycle -= vucycle;
if (!listWritebacks.empty())
{
// flush all when jumping, send down the pipe when in branching
bool bFlushWritebacks = (vucode >> 25) == 0x24 || (vucode >> 25) == 0x25;//||(vucode>>25)==0x20||(vucode>>25)==0x21;
listWritebacks.sort(WRITEBACK::SortWritebacks);
for (itwriteback = listWritebacks.begin(); itwriteback != listWritebacks.end(); ++itwriteback)
{
if (itwriteback->viwrite[0] & (1 << REG_Q))
{
// ignore all Q writebacks
continue;
}
if (itwriteback->cycle < vucycle || bFlushWritebacks)
{
pblock->insts.push_back(VuInstruction());
itwriteback->InitInst(&pblock->insts.back(), vucycle);
}
else
{
newpipes.listWritebacks.push_back(*itwriteback);
newpipes.listWritebacks.back().cycle -= vucycle;
}
}
}
if (!newpipes.listWritebacks.empty()) // other blocks might read the mac flags
pblock->type |= BLOCKTYPE_MACFLAGS;
u32 firstbranch = vucode >> 25;
switch (firstbranch)
{
case 0x24: // jr
pblock->type |= BLOCKTYPE_EOP; // jump out of procedure, since not returning, set EOP
pblock->insts.push_back(SuperVUFlushInst());
firstbranch = 0xff; //Non-Conditional Jump
break;
case 0x25: // jalr
{
// linking, so will return to procedure
pblock->insts.push_back(SuperVUFlushInst());
VuBaseBlock* pjumpblock = SuperVUBuildBlocks(pblock, lastpc, newpipes);
// update pblock since could have changed
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
pxAssert(pblock != NULL);
pblock->blocks.push_back(pjumpblock);
firstbranch = 0xff; //Non-Conditional Jump
break;
}
case 0x20: // B
{
VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes);
// update pblock since could have changed
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
pxAssert(pblock != NULL);
pblock->blocks.push_back(pbranchblock);
firstbranch = 0xff; //Non-Conditional Jump
break;
}
case 0x21: // BAL
{
VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes);
// update pblock since could have changed
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
pxAssert(pblock != NULL);
pblock->blocks.push_back(pbranchblock);
firstbranch = 0xff; //Non-Conditional Jump
break;
}
case 0x28: // IBEQ
case 0x2f: // IBGEZ
case 0x2d: // IBGTZ
case 0x2e: // IBLEZ
case 0x2c: // IBLTZ
case 0x29: // IBNE
{
VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes);
// update pblock since could have changed
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
pxAssert(pblock != NULL);
pblock->blocks.push_back(pbranchblock);
// if has a second branch that is B or BAL, skip this
u32 secondbranch = (*(u32*)(VU->Micro + lastpc - 8)) >> 25;
if (!hasSecondBranch || (secondbranch != 0x21 && secondbranch != 0x20))
{
pbranchblock = SuperVUBuildBlocks(pblock, lastpc, newpipes);
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
pblock->blocks.push_back(pbranchblock);
}
break;
}
default:
pxAssert(pblock->blocks.size() == 1);
break;
}
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
#ifdef SUPERVU_VIBRANCHDELAY
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///// NOTE! This could still be a hack for KH2/GoW, but until we know how it properly works, this will do for now.///
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
if (hasSecondBranch && firstbranch != 0xff) //check the previous jump was conditional and there is a second branch
{
#else
if (hasSecondBranch)
{
#endif
u32 vucode = *(u32*)(VU->Micro + lastpc - 8);
pc = lastpc;
int bpc = _recbranchAddr(vucode);
switch (vucode >> 25)
{
case 0x24: // jr
Console.Error("svurec bad jr jump!");
pxAssert(0);
break;
case 0x25: // jalr
{
Console.Error("svurec bad jalr jump!");
pxAssert(0);
break;
}
case 0x20: // B
{
VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes);
// update pblock since could have changed
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
pblock->blocks.push_back(pbranchblock);
break;
}
case 0x21: // BAL
{
VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes);
// replace instead of pushing a new block
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
pblock->blocks.push_back(pbranchblock);
break;
}
case 0x28: // IBEQ
case 0x2f: // IBGEZ
case 0x2d: // IBGTZ
case 0x2e: // IBLEZ
case 0x2c: // IBLTZ
case 0x29: // IBNE
{
VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes);
// update pblock since could have changed
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
pblock->blocks.push_back(pbranchblock);
// only add the block if the previous branch doesn't include the next instruction (ie, if a direct jump)
if (firstbranch == 0x24 || firstbranch == 0x25 || firstbranch == 0x20 || firstbranch == 0x21)
{
pbranchblock = SuperVUBuildBlocks(pblock, lastpc, newpipes);
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
pblock->blocks.push_back(pbranchblock);
}
break;
}
jNO_DEFAULT;
}
}
return recVUBlocks[s_vu][startpc/8].pblock;
}
static void SuperVUInitLiveness(VuBaseBlock* pblock)
{
std::list<VuInstruction>::iterator itinst, itnext;
pxAssert(!pblock->insts.empty());
for (itinst = pblock->insts.begin(); itinst != pblock->insts.end(); ++itinst)
{
if (itinst->type & INST_DUMMY_)
{
itinst->addvars[0] = itinst->addvars[1] = 0xffffffff;
itinst->livevars[0] = itinst->livevars[1] = 0xffffffff;
itinst->keepvars[0] = itinst->keepvars[1] = 0xffffffff;
itinst->usedvars[0] = itinst->usedvars[1] = 0;
}
else
{
itinst->addvars[0] = itinst->regs[0].VIread | itinst->regs[1].VIread;
itinst->addvars[1] = (itinst->regs[0].VFread0 ? (1 << itinst->regs[0].VFread0) : 0) |
(itinst->regs[0].VFread1 ? (1 << itinst->regs[0].VFread1) : 0) |
(itinst->regs[1].VFread0 ? (1 << itinst->regs[1].VFread0) : 0) |
(itinst->regs[1].VFread1 ? (1 << itinst->regs[1].VFread1) : 0);
// vf0 is not handled by VFread
if (!itinst->regs[0].VFread0 && (itinst->regs[0].VIread & (1 << REG_VF0_FLAG))) itinst->addvars[1] |= 1;
if (!itinst->regs[1].VFread0 && (itinst->regs[1].VIread & (1 << REG_VF0_FLAG))) itinst->addvars[1] |= 1;
if (!itinst->regs[0].VFread1 && (itinst->regs[0].VIread & (1 << REG_VF0_FLAG)) && itinst->regs[0].VFr1xyzw != 0xff) itinst->addvars[1] |= 1;
if (!itinst->regs[1].VFread1 && (itinst->regs[1].VIread & (1 << REG_VF0_FLAG)) && itinst->regs[1].VFr1xyzw != 0xff) itinst->addvars[1] |= 1;
u32 vfwrite = 0;
if (itinst->regs[0].VFwrite != 0)
{
if (itinst->regs[0].VFwxyzw != 0xf) itinst->addvars[1] |= 1 << itinst->regs[0].VFwrite;
else vfwrite |= 1 << itinst->regs[0].VFwrite;
}
if (itinst->regs[1].VFwrite != 0)
{
if (itinst->regs[1].VFwxyzw != 0xf) itinst->addvars[1] |= 1 << itinst->regs[1].VFwrite;
else vfwrite |= 1 << itinst->regs[1].VFwrite;
}
if ((itinst->regs[1].VIwrite & (1 << REG_ACC_FLAG)) && itinst->regs[1].VFwxyzw != 0xf)
itinst->addvars[1] |= 1 << REG_ACC_FLAG;
u32 viwrite = (itinst->regs[0].VIwrite | itinst->regs[1].VIwrite);
itinst->usedvars[0] = itinst->addvars[0] | viwrite;
itinst->usedvars[1] = itinst->addvars[1] | vfwrite;
// itinst->addvars[0] &= ~viwrite;
// itinst->addvars[1] &= ~vfwrite;
itinst->keepvars[0] = ~viwrite;
itinst->keepvars[1] = ~vfwrite;
}
}
itinst = --pblock->insts.end();
while (itinst != pblock->insts.begin())
{
itnext = itinst;
--itnext;
itnext->usedvars[0] |= itinst->usedvars[0];
itnext->usedvars[1] |= itinst->usedvars[1];
itinst = itnext;
}
}
u32 COMPUTE_LIVE(u32 R, u32 K, u32 L)
{
u32 live = R | ((L) & (K));
// special process mac and status flags
// only propagate liveness if doesn't write to the flag
if (!(L&(1 << REG_STATUS_FLAG)) && !(K&(1 << REG_STATUS_FLAG)))
live &= ~(1 << REG_STATUS_FLAG);
if (!(L&(1 << REG_MAC_FLAG)) && !(K&(1 << REG_MAC_FLAG)))
live &= ~(1 << REG_MAC_FLAG);
return live;//|(1<<REG_STATUS_FLAG)|(1<<REG_MAC_FLAG);
}
static void SuperVULivenessAnalysis()
{
BOOL changed;
std::list<VuBaseBlock*>::reverse_iterator itblock;
std::list<VuInstruction>::iterator itinst, itnext;
VuBaseBlock::LISTBLOCKS::iterator itchild;
u32 livevars[2];
do
{
changed = FALSE;
for (itblock = s_listBlocks.rbegin(); itblock != s_listBlocks.rend(); ++itblock)
{
u32 newlive;
VuBaseBlock* pb = *itblock;
// the last inst relies on the neighbor's insts
itinst = --pb->insts.end();
if (!pb->blocks.empty())
{
livevars[0] = 0;
livevars[1] = 0;
for (itchild = pb->blocks.begin(); itchild != pb->blocks.end(); ++itchild)
{
VuInstruction& front = (*itchild)->insts.front();
livevars[0] |= front.livevars[0];
livevars[1] |= front.livevars[1];
}
newlive = COMPUTE_LIVE(itinst->addvars[0], itinst->keepvars[0], livevars[0]);
// should propagate status flags whose parent insts are not in this block
// if( itinst->nParentPc >= 0 && (itinst->type & (INST_STATUS_WRITE|INST_MAC_WRITE)) )
// newlive |= livevars[0]&((1<<REG_STATUS_FLAG)|(1<<REG_MAC_FLAG));
if (itinst->livevars[0] != newlive)
{
changed = TRUE;
itinst->livevars[0] = newlive;
}
newlive = COMPUTE_LIVE(itinst->addvars[1], itinst->keepvars[1], livevars[1]);
if (itinst->livevars[1] != newlive)
{
changed = TRUE;
itinst->livevars[1] = newlive;
}
}
while (itinst != pb->insts.begin())
{
itnext = itinst;
--itnext;
newlive = COMPUTE_LIVE(itnext->addvars[0], itnext->keepvars[0], itinst->livevars[0]);
// should propagate status flags whose parent insts are not in this block
// if( itnext->nParentPc >= 0 && (itnext->type & (INST_STATUS_WRITE|INST_MAC_WRITE)) && !(itinst->type & (INST_STATUS_WRITE|INST_MAC_WRITE)) )
// newlive |= itinst->livevars[0]&((1<<REG_STATUS_FLAG)|(1<<REG_MAC_FLAG));
if (itnext->livevars[0] != newlive)
{
changed = TRUE;
itnext->livevars[0] = newlive;
itnext->livevars[1] = COMPUTE_LIVE(itnext->addvars[1], itnext->keepvars[1], itinst->livevars[1]);
}
else
{
newlive = COMPUTE_LIVE(itnext->addvars[1], itnext->keepvars[1], itinst->livevars[1]);
if (itnext->livevars[1] != newlive)
{
changed = TRUE;
itnext->livevars[1] = newlive;
}
}
itinst = itnext;
}
// if( (livevars[0] | itinst->livevars[0]) != itinst->livevars[0] ) {
// changed = TRUE;
// itinst->livevars[0] |= livevars[0];
// }
// if( (livevars[1] | itinst->livevars[1]) != itinst->livevars[1] ) {
// changed = TRUE;
// itinst->livevars[1] |= livevars[1];
// }
//
// while( itinst != pb->insts.begin() ) {
//
// itnext = itinst; --itnext;
// if( (itnext->livevars[0] | (itinst->livevars[0] & itnext->keepvars[0])) != itnext->livevars[0] ) {
// changed = TRUE;
// itnext->livevars[0] |= itinst->livevars[0] & itnext->keepvars[0];
// itnext->livevars[1] |= itinst->livevars[1] & itnext->keepvars[1];
// }
// else if( (itnext->livevars[1] | (itinst->livevars[1] & itnext->keepvars[1])) != itnext->livevars[1] ) {
// changed = TRUE;
// itnext->livevars[1] |= itinst->livevars[1] & itnext->keepvars[1];
// }
//
// itinst = itnext;
// }
}
}
while (changed);
}
static void SuperVUEliminateDeadCode()
{
std::list<VuBaseBlock*>::iterator itblock;
VuBaseBlock::LISTBLOCKS::iterator itchild;
std::list<VuInstruction>::iterator itinst, itnext;
std::list<VuInstruction*> listParents;
std::list<VuInstruction*>::iterator itparent;
for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
{
#ifdef PCSX2_DEBUG
u32 startpc = (*itblock)->startpc;
u32 curpc = startpc;
#endif
itnext = (*itblock)->insts.begin();
itinst = itnext++;
while (itnext != (*itblock)->insts.end())
{
if (itinst->type & (INST_CLIP_WRITE | INST_MAC_WRITE | INST_STATUS_WRITE))
{
u32 live0 = itnext->livevars[0];
if (itinst->nParentPc >= 0 && itnext->nParentPc >= 0 && itinst->nParentPc != itnext->nParentPc) // superman returns
{
// take the live vars from the next next inst
std::list<VuInstruction>::iterator itnextnext = itnext;
++itnextnext;
if (itnextnext != (*itblock)->insts.end())
{
live0 = itnextnext->livevars[0];
}
}
itinst->regs[0].VIwrite &= live0;
itinst->regs[1].VIwrite &= live0;
u32 viwrite = itinst->regs[0].VIwrite | itinst->regs[1].VIwrite;
(*itblock)->GetInstsAtPc(itinst->nParentPc, listParents);
int removetype = 0;
for(itparent = listParents.begin(); itparent != listParents.end(); itparent++)
{
VuInstruction* parent = *itparent;
if (viwrite & (1 << REG_CLIP_FLAG))
{
parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_CLIP_FLAG));
parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_CLIP_FLAG));
}
else
removetype |= INST_CLIP_WRITE;
if (parent->info.macflag && (itinst->type & INST_MAC_WRITE))
{
if (!(viwrite&(1 << REG_MAC_FLAG)))
{
//parent->info.macflag = 0;
// parent->regs[0].VIwrite &= ~(1<<REG_MAC_FLAG);
// parent->regs[1].VIwrite &= ~(1<<REG_MAC_FLAG);
// can be nonzero when a writeback belong to a different block and one branch uses
// it and this one doesn't
#ifndef SUPERVU_WRITEBACKS
pxAssert(!(parent->regs[0].VIwrite & (1 << REG_MAC_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_MAC_FLAG)));
#endif
// if VUPIPE_FMAC and destination is vf00, probably need to keep the mac flag
if (parent->regs[1].pipe == VUPIPE_FMAC && (parent->regs[1].VFwrite == 0 && !(parent->regs[1].VIwrite&(1 << REG_ACC_FLAG))))
{
parent->regs[0].VIwrite |= ((1 << REG_MAC_FLAG));
parent->regs[1].VIwrite |= ((1 << REG_MAC_FLAG));
}
else
removetype |= INST_MAC_WRITE;
}
else
{
parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_MAC_FLAG));
parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_MAC_FLAG));
}
}
else removetype |= INST_MAC_WRITE;
if (parent->info.statusflag && (itinst->type & INST_STATUS_WRITE))
{
if (!(viwrite&(1 << REG_STATUS_FLAG)))
{
//parent->info.statusflag = 0;
// parent->regs[0].VIwrite &= ~(1<<REG_STATUS_FLAG);
// parent->regs[1].VIwrite &= ~(1<<REG_STATUS_FLAG);
// can be nonzero when a writeback belong to a different block and one branch uses
// it and this one doesn't
#ifndef SUPERVU_WRITEBACKS
pxAssert(!(parent->regs[0].VIwrite & (1 << REG_STATUS_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_STATUS_FLAG)));
#endif
if (parent->regs[1].pipe == VUPIPE_FMAC && (parent->regs[1].VFwrite == 0 && !(parent->regs[1].VIwrite&(1 << REG_ACC_FLAG))))
{
parent->regs[0].VIwrite |= ((1 << REG_STATUS_FLAG));
parent->regs[1].VIwrite |= ((1 << REG_STATUS_FLAG));
}
else
removetype |= INST_STATUS_WRITE;
}
else
{
parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_STATUS_FLAG));
parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_STATUS_FLAG));
}
}
else removetype |= INST_STATUS_WRITE;
}
itinst->type &= ~removetype;
if (itinst->type == 0)
{
itnext = (*itblock)->insts.erase(itinst);
itinst = itnext++;
continue;
}
}
#ifdef PCSX2_DEBUG
else
{
curpc += 8;
}
#endif
itinst = itnext;
++itnext;
}
if (itinst->type & INST_DUMMY)
{
// last inst with the children
u32 mask = 0;
for (itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); ++itchild)
{
mask |= (*itchild)->insts.front().livevars[0];
}
itinst->regs[0].VIwrite &= mask;
itinst->regs[1].VIwrite &= mask;
u32 viwrite = itinst->regs[0].VIwrite | itinst->regs[1].VIwrite;
if (itinst->nParentPc >= 0)
{
(*itblock)->GetInstsAtPc(itinst->nParentPc, listParents);
int removetype = 0;
for(itparent = listParents.begin(); itparent != listParents.end(); itparent++)
{
VuInstruction* parent = *itparent;
if (viwrite & (1 << REG_CLIP_FLAG))
{
parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_CLIP_FLAG));
parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_CLIP_FLAG));
}
else removetype |= INST_CLIP_WRITE;
if (parent->info.macflag && (itinst->type & INST_MAC_WRITE))
{
if (!(viwrite&(1 << REG_MAC_FLAG)))
{
//parent->info.macflag = 0;
#ifndef SUPERVU_WRITEBACKS
pxAssert(!(parent->regs[0].VIwrite & (1 << REG_MAC_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_MAC_FLAG)));
#endif
removetype |= INST_MAC_WRITE;
}
else
{
parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_MAC_FLAG));
parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_MAC_FLAG));
}
}
else removetype |= INST_MAC_WRITE;
if (parent->info.statusflag && (itinst->type & INST_STATUS_WRITE))
{
if (!(viwrite&(1 << REG_STATUS_FLAG)))
{
//parent->info.statusflag = 0;
#ifndef SUPERVU_WRITEBACKS
pxAssert(!(parent->regs[0].VIwrite & (1 << REG_STATUS_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_STATUS_FLAG)));
#endif
removetype |= INST_STATUS_WRITE;
}
else
{
parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_STATUS_FLAG));
parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_STATUS_FLAG));
}
}
else removetype |= INST_STATUS_WRITE;
}
itinst->type &= ~removetype;
if (itinst->type == 0)
{
(*itblock)->insts.erase(itinst);
}
}
}
}
}
void VuBaseBlock::AssignVFRegs()
{
VuBaseBlock::LISTBLOCKS::iterator itchild;
std::list<VuBaseBlock*>::iterator itblock;
std::list<VuInstruction>::iterator itinst, itnext, itinst2;
// init the start regs
if (type & BLOCKTYPE_ANALYZED) return; // nothing changed
memcpy(xmmregs, startregs, sizeof(xmmregs));
if (type & BLOCKTYPE_ANALYZED)
{
u32 i;
// check if changed
for (i = 0; i < iREGCNT_XMM; ++i)
{
if (xmmregs[i].inuse != startregs[i].inuse)
break;
if (xmmregs[i].inuse && (xmmregs[i].reg != startregs[i].reg || xmmregs[i].type != startregs[i].type))
break;
}
if (i == iREGCNT_XMM) return; // nothing changed
}
u8* oldX86 = x86Ptr;
for(itinst = insts.begin(); itinst != insts.end(); itinst++)
{
if (itinst->type & INST_DUMMY) continue;
// reserve, go from upper to lower
int lastwrite = -1;
for (int i = 1; i >= 0; --i)
{
_VURegsNum* regs = itinst->regs + i;
// redo the counters so that the proper regs are released
for (u32 j = 0; j < iREGCNT_XMM; ++j)
{
if (xmmregs[j].inuse)
{
if (xmmregs[j].type == XMMTYPE_VFREG)
{
int count = 0;
itinst2 = itinst;
if (i)
{
if (itinst2->regs[0].VFread0 == xmmregs[j].reg || itinst2->regs[0].VFread1 == xmmregs[j].reg || itinst2->regs[0].VFwrite == xmmregs[j].reg)
{
itinst2 = insts.end();
break;
}
else
{
++count;
++itinst2;
}
}
while (itinst2 != insts.end())
{
if (itinst2->regs[0].VFread0 == xmmregs[j].reg || itinst2->regs[0].VFread1 == xmmregs[j].reg || itinst2->regs[0].VFwrite == xmmregs[j].reg ||
itinst2->regs[1].VFread0 == xmmregs[j].reg || itinst2->regs[1].VFread1 == xmmregs[j].reg || itinst2->regs[1].VFwrite == xmmregs[j].reg)
break;
++count;
++itinst2;
}
xmmregs[j].counter = 1000 - count;
}
else
{
pxAssert(xmmregs[j].type == XMMTYPE_ACC);
int count = 0;
itinst2 = itinst;
if (i) ++itinst2; // acc isn't used in lower insts
while (itinst2 != insts.end())
{
pxAssert(!((itinst2->regs[0].VIread | itinst2->regs[0].VIwrite) & (1 << REG_ACC_FLAG)));
if ((itinst2->regs[1].VIread | itinst2->regs[1].VIwrite) & (1 << REG_ACC_FLAG))
break;
++count;
++itinst2;
}
xmmregs[j].counter = 1000 - count;
}
}
}
if (regs->VFread0) _addNeededVFtoXMMreg(regs->VFread0);
if (regs->VFread1) _addNeededVFtoXMMreg(regs->VFread1);
if (regs->VFwrite) _addNeededVFtoXMMreg(regs->VFwrite);
if (regs->VIread & (1 << REG_ACC_FLAG)) _addNeededACCtoXMMreg();
if (regs->VIread & (1 << REG_VF0_FLAG)) _addNeededVFtoXMMreg(0);
// alloc
itinst->vfread0[i] = itinst->vfread1[i] = itinst->vfwrite[i] = itinst->vfacc[i] = -1;
itinst->vfflush[i] = -1;
if (regs->VFread0)
itinst->vfread0[i] = _allocVFtoXMMreg(VU, -1, regs->VFread0, 0);
else if (regs->VIread & (1 << REG_VF0_FLAG))
itinst->vfread0[i] = _allocVFtoXMMreg(VU, -1, 0, 0);
if (regs->VFread1)
itinst->vfread1[i] = _allocVFtoXMMreg(VU, -1, regs->VFread1, 0);
else if ((regs->VIread & (1 << REG_VF0_FLAG)) && regs->VFr1xyzw != 0xff)
itinst->vfread1[i] = _allocVFtoXMMreg(VU, -1, 0, 0);
if (regs->VIread & (1 << REG_ACC_FLAG)) itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0);
int reusereg = -1; // 0 - VFwrite, 1 - VFAcc
if (regs->VFwrite)
{
pxAssert(!(regs->VIwrite&(1 << REG_ACC_FLAG)));
if (regs->VFwxyzw == 0xf)
{
itinst->vfwrite[i] = _checkXMMreg(XMMTYPE_VFREG, regs->VFwrite, 0);
if (itinst->vfwrite[i] < 0) reusereg = 0;
}
else
{
itinst->vfwrite[i] = _allocVFtoXMMreg(VU, -1, regs->VFwrite, 0);
}
}
else if (regs->VIwrite & (1 << REG_ACC_FLAG))
{
if (regs->VFwxyzw == 0xf)
{
itinst->vfacc[i] = _checkXMMreg(XMMTYPE_ACC, 0, 0);
if (itinst->vfacc[i] < 0) reusereg = 1;
}
else
{
itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0);
}
}
if (reusereg >= 0)
{
// reuse
itnext = itinst;
itnext++;
u8 type = reusereg ? XMMTYPE_ACC : XMMTYPE_VFREG;
u8 reg = reusereg ? 0 : regs->VFwrite;
if (itinst->vfacc[i] >= 0 && lastwrite != itinst->vfacc[i] &&
(itnext == insts.end() || ((regs->VIread&(1 << REG_ACC_FLAG)) && (!(itnext->usedvars[0]&(1 << REG_ACC_FLAG)) || !(itnext->livevars[0]&(1 << REG_ACC_FLAG))))))
{
pxAssert(reusereg == 0);
if (itnext == insts.end() || (itnext->livevars[0]&(1 << REG_ACC_FLAG))) _freeXMMreg(itinst->vfacc[i]);
xmmregs[itinst->vfacc[i]].inuse = 1;
xmmregs[itinst->vfacc[i]].reg = reg;
xmmregs[itinst->vfacc[i]].type = type;
xmmregs[itinst->vfacc[i]].mode = 0;
itinst->vfwrite[i] = itinst->vfacc[i];
}
else if (itinst->vfread0[i] >= 0 && lastwrite != itinst->vfread0[i] &&
(itnext == insts.end() || (regs->VFread0 > 0 && (!(itnext->usedvars[1]&(1 << regs->VFread0)) || !(itnext->livevars[1]&(1 << regs->VFread0))))))
{
if (itnext == insts.end() || (itnext->livevars[1]&regs->VFread0)) _freeXMMreg(itinst->vfread0[i]);
xmmregs[itinst->vfread0[i]].inuse = 1;
xmmregs[itinst->vfread0[i]].reg = reg;
xmmregs[itinst->vfread0[i]].type = type;
xmmregs[itinst->vfread0[i]].mode = 0;
if (reusereg)
itinst->vfacc[i] = itinst->vfread0[i];
else
itinst->vfwrite[i] = itinst->vfread0[i];
}
else if (itinst->vfread1[i] >= 0 && lastwrite != itinst->vfread1[i] &&
(itnext == insts.end() || (regs->VFread1 > 0 && (!(itnext->usedvars[1]&(1 << regs->VFread1)) || !(itnext->livevars[1]&(1 << regs->VFread1))))))
{
if (itnext == insts.end() || (itnext->livevars[1]&regs->VFread1)) _freeXMMreg(itinst->vfread1[i]);
xmmregs[itinst->vfread1[i]].inuse = 1;
xmmregs[itinst->vfread1[i]].reg = reg;
xmmregs[itinst->vfread1[i]].type = type;
xmmregs[itinst->vfread1[i]].mode = 0;
if (reusereg)
itinst->vfacc[i] = itinst->vfread1[i];
else
itinst->vfwrite[i] = itinst->vfread1[i];
}
else
{
if (reusereg)
itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0);
else
itinst->vfwrite[i] = _allocVFtoXMMreg(VU, -1, regs->VFwrite, 0);
}
}
if (itinst->vfwrite[i] >= 0) lastwrite = itinst->vfwrite[i];
else if (itinst->vfacc[i] >= 0) lastwrite = itinst->vfacc[i];
// always alloc at least 1 temp reg
int free0 = (i || regs->VFwrite || regs->VFread0 || regs->VFread1 || (regs->VIwrite & (1 << REG_ACC_FLAG)) || (regs->VIread & (1 << REG_VF0_FLAG)))
? _allocTempXMMreg(XMMT_FPS, -1) : -1;
int free1 = 0, free2 = 0;
if (i == 0 && itinst->vfwrite[1] >= 0 && (itinst->vfread0[0] == itinst->vfwrite[1] || itinst->vfread1[0] == itinst->vfwrite[1]))
{
itinst->vfflush[i] = _allocTempXMMreg(XMMT_FPS, -1);
}
if (i == 1 && (regs->VIwrite & (1 << REG_CLIP_FLAG)))
{
// CLIP inst, need two extra regs
if (free0 < 0) free0 = _allocTempXMMreg(XMMT_FPS, -1);
free1 = _allocTempXMMreg(XMMT_FPS, -1);
free2 = _allocTempXMMreg(XMMT_FPS, -1);
_freeXMMreg(free1);
_freeXMMreg(free2);
}
else if (regs->VIwrite & (1 << REG_P))
{
// EFU inst, need extra reg
free1 = _allocTempXMMreg(XMMT_FPS, -1);
if (free0 == -1) free0 = free1;
_freeXMMreg(free1);
}
if (itinst->vfflush[i] >= 0) _freeXMMreg(itinst->vfflush[i]);
if (free0 >= 0) _freeXMMreg(free0);
itinst->vffree[i] = (free0 & 0xf) | (free1 << 8) | (free2 << 16);
if (free0 == -1) itinst->vffree[i] |= VFFREE_INVALID0;
_clearNeededXMMregs();
}
}
pxAssert(x86Ptr == oldX86);
u32 analyzechildren = !(type & BLOCKTYPE_ANALYZED);
type |= BLOCKTYPE_ANALYZED;
//memset(endregs, 0, sizeof(endregs));
if (analyzechildren)
{
for(itchild = blocks.begin(); itchild != blocks.end(); itchild++)
{
(*itchild)->AssignVFRegs();
}
}
}
struct MARKOVBLANKET
{
std::list<VuBaseBlock*> parents;
std::list<VuBaseBlock*> children;
};
static MARKOVBLANKET s_markov;
void VuBaseBlock::AssignVIRegs(int parent)
{
const int maxregs = 6;
if (parent)
{
if ((type&BLOCKTYPE_ANALYZEDPARENT))
return;
type |= BLOCKTYPE_ANALYZEDPARENT;
s_markov.parents.push_back(this);
for (LISTBLOCKS::iterator it = blocks.begin(); it != blocks.end(); ++it)
{
(*it)->AssignVIRegs(0);
}
return;
}
if ((type&BLOCKTYPE_ANALYZED))
return;
// child
pxAssert(allocX86Regs == -1);
allocX86Regs = s_vecRegArray.size();
s_vecRegArray.resize(allocX86Regs + iREGCNT_GPR);
_x86regs* pregs = &s_vecRegArray[allocX86Regs];
memset(pregs, 0, sizeof(_x86regs)*iREGCNT_GPR);
pxAssert(!parents.empty());
std::list<VuBaseBlock*>::iterator itparent;
u32 usedvars = insts.front().usedvars[0];
u32 livevars = insts.front().livevars[0];
if (!parents.empty())
{
u32 usedvars2 = 0xffffffff;
for(itparent = parents.begin(); itparent != parents.end(); itparent++)
{
usedvars2 &= (*itparent)->insts.front().usedvars[0];
}
usedvars |= usedvars2;
}
usedvars &= livevars;
// currently order doesn't matter
int num = 0;
if (usedvars)
{
for (int i = 1; i < 16; ++i)
{
if (usedvars & (1 << i))
{
pregs[num].inuse = 1;
pregs[num].reg = i;
livevars &= ~(1 << i);
if (++num >= maxregs) break;
}
}
}
if (num < maxregs)
{
livevars &= ~usedvars;
livevars &= insts.back().usedvars[0];
if (livevars)
{
for (int i = 1; i < 16; ++i)
{
if (livevars & (1 << i))
{
pregs[num].inuse = 1;
pregs[num].reg = i;
if (++num >= maxregs) break;
}
}
}
}
s_markov.children.push_back(this);
type |= BLOCKTYPE_ANALYZED;
for(itparent = parents.begin(); itparent != parents.end(); itparent++)
{
(*itparent)->AssignVIRegs(1);
}
}
static void SuperVUAssignRegs()
{
std::list<VuBaseBlock*>::iterator itblock, itblock2;
for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
{
(*itblock)->type &= ~BLOCKTYPE_ANALYZED;
}
s_listBlocks.front()->AssignVFRegs();
// VI assignments, find markov blanket for each node in the graph
// then allocate regs based on the commonly used ones
#ifdef SUPERVU_X86CACHING
for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
{
(*itblock)->type &= ~(BLOCKTYPE_ANALYZED | BLOCKTYPE_ANALYZEDPARENT);
}
s_vecRegArray.resize(0);
u8 usedregs[16];
// note: first block always has to start with no alloc regs
bool bfirst = true;
for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
{
if (!((*itblock)->type & BLOCKTYPE_ANALYZED))
{
if ((*itblock)->parents.size() == 0)
{
(*itblock)->type |= BLOCKTYPE_ANALYZED;
bfirst = false;
continue;
}
s_markov.children.clear();
s_markov.parents.clear();
(*itblock)->AssignVIRegs(0);
// assign the regs
int regid = s_vecRegArray.size();
s_vecRegArray.resize(regid + iREGCNT_GPR);
_x86regs* mergedx86 = &s_vecRegArray[regid];
memset(mergedx86, 0, sizeof(_x86regs)*iREGCNT_GPR);
if (!bfirst)
{
*(u32*)usedregs = *((u32*)usedregs + 1) = *((u32*)usedregs + 2) = *((u32*)usedregs + 3) = 0;
for(itblock2 = s_markov.children.begin(); itblock2 != s_markov.children.end(); itblock2++)
{
pxAssert((*itblock2)->allocX86Regs >= 0);
_x86regs* pregs = &s_vecRegArray[(*itblock2)->allocX86Regs];
for (int i = 0; i < iREGCNT_GPR; ++i)
{
if (pregs[i].inuse && pregs[i].reg < 16)
{
//pxAssert( pregs[i].reg < 16);
usedregs[pregs[i].reg]++;
}
}
}
int num = 1;
for (int i = 0; i < 16; ++i)
{
if (usedregs[i] == s_markov.children.size())
{
// use
mergedx86[num].inuse = 1;
mergedx86[num].reg = i;
mergedx86[num].type = (s_vu ? X86TYPE_VU1 : 0) | X86TYPE_VI;
mergedx86[num].mode = MODE_READ;
if (++num >= iREGCNT_GPR)
break;
if (num == ESP)
++num;
}
}
for(itblock2 = s_markov.children.begin(); itblock2 != s_markov.children.end(); itblock2++)
{
pxAssert((*itblock2)->nStartx86 == -1);
(*itblock2)->nStartx86 = regid;
}
for(itblock2 = s_markov.parents.begin(); itblock2 != s_markov.parents.end(); itblock2++)
{
pxAssert((*itblock2)->nEndx86 == -1);
(*itblock2)->nEndx86 = regid;
}
}
bfirst = false;
}
}
#endif
}
//////////////////
// Recompilation
//////////////////
// cycles in which the last Q,P regs were finished (written to VU->VI[])
// the write occurs before the instruction is executed at that cycle
// compare with s_TotalVUCycles
// if less than 0, already flushed
int s_writeQ, s_writeP;
// declare the saved registers
uptr s_vu1esp, s_callstack;
uptr s_vuebx, s_vuedi, s_vu1esi;
static int s_recWriteQ, s_recWriteP; // wait times during recompilation
static int s_needFlush; // first bit - Q, second bit - P, third bit - Q has been written, fourth bit - P has been written
static int s_JumpX86;
static int s_ScheduleXGKICK = 0, s_XGKICKReg = -1;
void recVUMI_XGKICK_(VURegs *VU);
void SuperVUCleanupProgram(u32 startpc, int vuindex)
{
#ifdef SUPERVU_COUNT
QueryPerformanceCounter(&svufinal);
svutime += (u32)(svufinal.QuadPart - svubase.QuadPart);
#endif
VU = vuindex ? &VU1 : &VU0;
VU->cycle += s_TotalVUCycles;
//VU cycle stealing hack, 3000 cycle maximum so it doesn't get out of hand
if (s_TotalVUCycles < 3000)
cpuRegs.cycle += s_TotalVUCycles * EmuConfig.Speedhacks.EECycleSkip;
else
cpuRegs.cycle += 3000 * EmuConfig.Speedhacks.EECycleSkip;
if ((int)s_writeQ > 0) VU->VI[REG_Q] = VU->q;
if ((int)s_writeP > 0)
{
pxAssert(VU == &VU1);
VU1.VI[REG_P] = VU1.p; // only VU1
}
//memset(recVUStack, 0, SUPERVU_STACKSIZE * 4);
// Could clear allocation info to prevent possibly bad data being used in other parts of pcsx2;
// not doing this because it's slow and not needed (rama)
// _initXMMregs();
// _initX86regs();
}
#if defined(_MSC_VER)
// entry point of all vu programs from emulator calls
__declspec(naked) void SuperVUExecuteProgram(u32 startpc, int vuindex)
{
// Stackframe setup for the recompiler:
// We rewind the stack 4 bytes, which places the parameters of this function before
// any calls we might make from recompiled code. The return address for this function
// call is subsequently stored in s_callstack.
__asm
{
mov eax, dword ptr [esp]
mov s_TotalVUCycles, 0 // necessary to be here!
add esp, 4
mov s_callstack, eax
call SuperVUGetProgram
// save cpu state
//mov s_vu1ebp, ebp
mov s_vu1esi, esi
mov s_vuedi, edi
mov s_vuebx, ebx
mov s_vu1esp, esp
and esp, -16 // align stack for GCC compilance
//stmxcsr s_ssecsr
ldmxcsr g_sseVUMXCSR
// init vars
mov s_writeQ, 0xffffffff
mov s_writeP, 0xffffffff
jmp eax
}
}
// exit point of all vu programs
__declspec(naked) static void SuperVUEndProgram()
{
__asm
{
// restore cpu state
ldmxcsr g_sseMXCSR
//mov ebp, s_vu1ebp
mov esi, s_vu1esi
mov edi, s_vuedi
mov ebx, s_vuebx
mov esp, s_vu1esp // restore from aligned stack
call SuperVUCleanupProgram
jmp s_callstack // so returns correctly
}
}
#endif
// Flushes P/Q regs
void SuperVUFlush(int p, int wait)
{
u8* pjmp[3] = { nullptr, nullptr, nullptr };
if (!(s_needFlush&(1 << p))) return;
int recwait = p ? s_recWriteP : s_recWriteQ;
if (!wait && s_pCurInst->info.cycle < recwait) return;
if (recwait == 0)
{
// write didn't happen this block
xMOV(eax, ptr[(void*)(p ? (uptr)&s_writeP : (uptr)&s_writeQ)]);
xOR(eax, eax);
pjmp[0] = JS8(0);
if (s_pCurInst->info.cycle) xSUB(eax, s_pCurInst->info.cycle);
// if writeQ <= total+offset
if (!wait) // only write back if time is up
{
xCMP(eax, ptr[&s_TotalVUCycles]);
pjmp[1] = JG8(0);
}
else
{
// add (writeQ-total-offset) to s_TotalVUCycles
// necessary?
xCMP(eax, ptr[&s_TotalVUCycles]);
pjmp[2] = JLE8(0);
xMOV(ptr[&s_TotalVUCycles], eax);
x86SetJ8(pjmp[2]);
}
}
else if (wait && s_pCurInst->info.cycle < recwait)
{
xADD(ptr32[&s_TotalVUCycles], recwait);
}
xMOV(eax, ptr[(void*)(SuperVUGetVIAddr(p ? REG_P : REG_Q, 0))]);
xMOV(ptr32[(u32*)(p ? (uptr)&s_writeP : (uptr)&s_writeQ)], 0x80000000);
xMOV(ptr[(void*)(SuperVUGetVIAddr(p ? REG_P : REG_Q, 1))], eax);
if (recwait == 0)
{
if (!wait) x86SetJ8(pjmp[1]);
x86SetJ8(pjmp[0]);
}
if (wait || (!p && recwait == 0 && s_pCurInst->info.cycle >= 12) || (!p && recwait > 0 && s_pCurInst->info.cycle >= recwait))
s_needFlush &= ~(1 << p);
}
// executed only once per program
static u32* SuperVUStaticAlloc(u32 size)
{
pxAssert(recVUStackPtr[s_vu] + size <= recVUStack[s_vu] + SUPERVU_STACKSIZE);
// always zero
if (size == 4) *(u32*)recVUStackPtr[s_vu] = 0;
else memset(recVUStackPtr[s_vu], 0, size);
recVUStackPtr[s_vu] += size;
return (u32*)(recVUStackPtr[s_vu] - size);
}
static void SuperVURecompile()
{
// save cpu state
recVUStackPtr[s_vu] = recVUStack[s_vu];
_initXMMregs();
std::list<VuBaseBlock*>::iterator itblock;
for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
{
(*itblock)->type &= ~BLOCKTYPE_ANALYZED;
}
s_listBlocks.front()->Recompile();
// make sure everything compiled
for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
{
pxAssert(((*itblock)->type & BLOCKTYPE_ANALYZED) && (*itblock)->pcode != NULL);
}
// link all blocks
for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
{
VuBaseBlock::LISTBLOCKS::iterator itchild;
pxAssert((*itblock)->blocks.size() <= ArraySize((*itblock)->pChildJumps));
int i = 0;
for(itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); itchild++)
{
if ((u32)(uptr)(*itblock)->pChildJumps[i] == 0xffffffff)
continue;
if ((*itblock)->pChildJumps[i] == NULL)
{
VuBaseBlock* pchild = *itchild;
if (pchild->type & BLOCKTYPE_HASEOP)
{
pxAssert(pchild->blocks.empty());
xAND(ptr32[&VU0.VI[ REG_VPU_STAT ].UL], s_vu ? ~0x100 : ~0x001); // E flag
//xAND(ptr32[(&VU->GetVifRegs().stat)], ~VIF1_STAT_VEW);
xMOV(ptr32[(&VU->VI[REG_TPC])], pchild->endpc);
JMP32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 5));
}
// only other case is when there are two branches
else
{
pxAssert((*itblock)->insts.back().regs[0].pipe == VUPIPE_BRANCH);
}
continue;
}
if ((u32)(uptr)(*itblock)->pChildJumps[i] & 0x80000000)
{
// relative
pxAssert((uptr)(*itblock)->pChildJumps[i] <= 0xffffffff);
(*itblock)->pChildJumps[i] = (u32*)((uptr)(*itblock)->pChildJumps[i] & 0x7fffffff);
*(*itblock)->pChildJumps[i] = (uptr)(*itchild)->pcode - ((uptr)(*itblock)->pChildJumps[i] + 4);
}
else
{
*(*itblock)->pChildJumps[i] = (uptr)(*itchild)->pcode;
}
++i;
}
}
s_pFnHeader->pprogfunc = s_listBlocks.front()->pcode;
}
// debug
u32 s_saveecx, s_saveedx, s_saveebx, s_saveesi, s_saveedi, s_saveebp;
u32 g_curdebugvu;
//float vuDouble(u32 f);
#ifdef PCSX2_DEBUG
static void __fastcall svudispfn( int g_curdebugvu )
{
static u32 i;
if (((vudump&8) && g_curdebugvu) || ((vudump&0x80) && !g_curdebugvu)) //&& g_vu1lastrec != g_vu1last ) {
{
if (skipparent != g_vu1lastrec)
{
for (i = 0; i < ArraySize(badaddrs); ++i)
{
if (s_svulast == badaddrs[i][1] && g_vu1lastrec == badaddrs[i][0])
break;
}
if (i == ArraySize(badaddrs))
{
//static int curesp;
//__asm mov curesp, esp
//Console.WriteLn("tVU: %x %x %x", s_svulast, s_vucount, s_vufnheader);
if (g_curdebugvu) iDumpVU1Registers();
else iDumpVU0Registers();
s_vucount++;
}
}
g_vu1lastrec = s_svulast;
}
}
#endif
// frees all regs taking into account the livevars
void SuperVUFreeXMMregs(u32* livevars)
{
for (u32 i = 0; i < iREGCNT_XMM; ++i)
{
if (xmmregs[i].inuse)
{
// same reg
if ((xmmregs[i].mode & MODE_WRITE))
{
#ifdef SUPERVU_INTERCACHING
if (xmmregs[i].type == XMMTYPE_VFREG)
{
if (!(livevars[1] & (1 << xmmregs[i].reg))) continue;
}
else if (xmmregs[i].type == XMMTYPE_ACC)
{
if (!(livevars[0] & (1 << REG_ACC_FLAG))) continue;
}
#endif
if (xmmregs[i].mode & MODE_VUXYZ)
{
// ALWAYS update
u32 addr = xmmregs[i].type == XMMTYPE_VFREG ? (uptr) & VU->VF[xmmregs[i].reg] : (uptr) & VU->ACC;
if (xmmregs[i].mode & MODE_VUZ)
{
xMOVH.PS(ptr[(void*)(addr)], xRegisterSSE((x86SSERegType)i));
xSHUF.PS(xRegisterSSE((x86SSERegType)i), ptr[(void*)(addr)], 0xc4);
}
else
{
xMOVH.PS(xRegisterSSE((x86SSERegType)i), ptr[(void*)(addr + 8)]);
}
xmmregs[i].mode &= ~MODE_VUXYZ;
}
_freeXMMreg(i);
}
}
}
//_freeXMMregs();
}
static u32 runCycles = 0; // Cycles to Compare to for early exit
static u32 backupEAX = 0; // Backup EAX (not sure if this is needed)
void SuperVUTestVU0Condition(u32 incstack)
{
if (s_vu && !SUPERVU_CHECKCONDITION) return; // vu0 only
// sometimes games spin on vu0, so be careful with
// runCycles value... woody hangs if too high
// Edit: Need to test this again, if anyone ever has a "Woody" game :p
xMOV(ptr[&backupEAX], eax);
xMOV(eax, ptr[&s_TotalVUCycles]);
xCMP(eax, ptr[&runCycles]);
xMOV(eax, ptr[&backupEAX]);
if (incstack)
{
u8* jptr = JB8(0);
xADD(esp, incstack);
//xCALL((void*)(u32)timeout);
JMP32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 5));
x86SetJ8(jptr);
}
else JAE32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 6));
}
void VuBaseBlock::Recompile()
{
if (type & BLOCKTYPE_ANALYZED) return;
x86Align(16);
pcode = x86Ptr;
#ifdef PCSX2_DEBUG
xMOV(ptr32[&s_vufnheader], s_pFnHeader->startpc);
xMOV(ptr32[(&VU->VI[REG_TPC])], startpc);
xMOV(ptr32[&s_svulast], startpc);
std::list<VuBaseBlock*>::iterator itparent;
for (itparent = parents.begin(); itparent != parents.end(); ++itparent)
{
if ((*itparent)->blocks.size() == 1 && (*itparent)->blocks.front()->startpc == startpc &&
((*itparent)->insts.size() < 2 || (----(*itparent)->insts.end())->regs[0].pipe != VUPIPE_BRANCH))
{
xMOV(ptr32[&skipparent], (*itparent)->startpc);
break;
}
}
if (itparent == parents.end()) xMOV(ptr32[&skipparent], -1);
xMOV( ecx, s_vu );
xCALL( (void*)svudispfn );
#endif
s_pCurBlock = this;
s_needFlush = 3;
pc = startpc;
g_branch = 0;
s_recWriteQ = s_recWriteP = 0;
s_XGKICKReg = -1;
s_ScheduleXGKICK = 0;
s_ClipRead = s_PrevClipWrite = (uptr) & VU->VI[REG_CLIP_FLAG];
s_StatusRead = s_PrevStatusWrite = (uptr) & VU->VI[REG_STATUS_FLAG];
s_MACRead = s_PrevMACWrite = (uptr) & VU->VI[REG_MAC_FLAG];
s_PrevIWrite = (uptr) & VU->VI[REG_I];
s_JumpX86 = 0;
s_UnconditionalDelay = 0;
memcpy(xmmregs, startregs, sizeof(xmmregs));
#ifdef SUPERVU_X86CACHING
if (nStartx86 >= 0)
memcpy(x86regs, &s_vecRegArray[nStartx86], sizeof(x86regs));
else
_initX86regs();
#else
_initX86regs();
#endif
std::list<VuInstruction>::iterator itinst;
for(itinst = insts.begin(); itinst != insts.end(); itinst++)
{
s_pCurInst = &(*itinst);
if (s_JumpX86 > 0)
{
if (!x86regs[s_JumpX86].inuse)
{
// load
s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_READ);
}
x86regs[s_JumpX86].needed = 1;
}
if (s_ScheduleXGKICK && s_XGKICKReg > 0)
{
pxAssert(x86regs[s_XGKICKReg].inuse);
x86regs[s_XGKICKReg].needed = 1;
}
itinst->Recompile(itinst, vuxyz);
if (s_ScheduleXGKICK > 0)
{
if (s_ScheduleXGKICK-- == 1)
{
recVUMI_XGKICK_(VU);
}
}
}
pxAssert(pc == endpc);
pxAssert(s_ScheduleXGKICK == 0);
// flush flags
if (s_PrevClipWrite != (uptr)&VU->VI[REG_CLIP_FLAG])
{
xMOV(eax, ptr[(void*)(s_PrevClipWrite)]);
xMOV(ptr[(&VU->VI[REG_CLIP_FLAG])], eax);
}
if (s_PrevStatusWrite != (uptr)&VU->VI[REG_STATUS_FLAG])
{
xMOV(eax, ptr[(void*)(s_PrevStatusWrite)]);
xMOV(ptr[(&VU->VI[REG_STATUS_FLAG])], eax);
}
if (s_PrevMACWrite != (uptr)&VU->VI[REG_MAC_FLAG])
{
xMOV(eax, ptr[(void*)(s_PrevMACWrite)]);
xMOV(ptr[(&VU->VI[REG_MAC_FLAG])], eax);
}
// if( s_StatusRead != (uptr)&VU->VI[REG_STATUS_FLAG] ) {
// // only lower 8 bits valid!
// xMOVZX(eax, ptr8[(u8*)(s_StatusRead)]);
// xMOV(ptr[(&VU->VI[REG_STATUS_FLAG])], eax);
// }
// if( s_MACRead != (uptr)&VU->VI[REG_MAC_FLAG] ) {
// // only lower 8 bits valid!
// xMOVZX(eax, ptr8[(u8*)(s_MACRead)]);
// xMOV(ptr[(&VU->VI[REG_MAC_FLAG])], eax);
// }
if (s_PrevIWrite != (uptr)&VU->VI[REG_I])
{
xMOV(ptr32[(&VU->VI[REG_I])], *(u32*)s_PrevIWrite); // never changes
}
xADD(ptr32[&s_TotalVUCycles], cycles);
// compute branches, jumps, eop
if (type & BLOCKTYPE_HASEOP)
{
// end
_freeXMMregs();
_freeX86regs();
xAND(ptr32[&VU0.VI[ REG_VPU_STAT ].UL], s_vu ? ~0x100 : ~0x001); // E flag
//xAND(ptr32[(&VU->GetVifRegs().stat)], ~VIF1_STAT_VEW);
if (!g_branch) xMOV(ptr32[(&VU->VI[REG_TPC])], endpc);
JMP32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 5));
}
else
{
u32 livevars[2] = {0};
std::list<VuInstruction>::iterator lastinst = GetInstIterAtPc(endpc - 8);
lastinst++;
if (lastinst != insts.end())
{
livevars[0] = lastinst->livevars[0];
livevars[1] = lastinst->livevars[1];
}
else
{
// take from children
if (!blocks.empty())
{
LISTBLOCKS::iterator itchild;
for(itchild = blocks.begin(); itchild != blocks.end(); itchild++)
{
livevars[0] |= (*itchild)->insts.front().livevars[0];
livevars[1] |= (*itchild)->insts.front().livevars[1];
}
}
else
{
livevars[0] = ~0;
livevars[1] = ~0;
}
}
SuperVUFreeXMMregs(livevars);
// get rid of any writes, otherwise _freeX86regs will write
x86regs[s_JumpX86].mode &= ~MODE_WRITE;
if (g_branch == 1)
{
if (!x86regs[s_JumpX86].inuse)
{
pxAssert(x86regs[s_JumpX86].type == X86TYPE_VUJUMP);
s_JumpX86 = -1; // notify to jump from g_recWriteback
}
}
// align VI regs
#ifdef SUPERVU_X86CACHING
if (nEndx86 >= 0)
{
_x86regs* endx86 = &s_vecRegArray[nEndx86];
for (int i = 0; i < iREGCNT_GPR; ++i)
{
if( i == ESP || i == EBP ) continue;
if (endx86[i].inuse)
{
if (s_JumpX86 == i && x86regs[s_JumpX86].inuse)
{
x86regs[s_JumpX86].inuse = 0;
x86regs[eax.GetId()].inuse = 1;
xMOV(eax, xRegister32(s_JumpX86));
s_JumpX86 = eax.GetId();
}
if (x86regs[i].inuse)
{
if (x86regs[i].type == endx86[i].type && x86regs[i].reg == endx86[i].reg)
{
_freeX86reg(i);
// will continue to use it
continue;
}
#ifdef SUPERVU_INTERCACHING
if (x86regs[i].type == (X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0)))
{
if (livevars[0] & (1 << x86regs[i].reg))
_freeX86reg(i);
else
x86regs[i].inuse = 0;
}
else
#endif
{
_freeX86reg(i);
}
}
// realloc
_allocX86reg(i, endx86[i].type, endx86[i].reg, MODE_READ);
if (x86regs[i].mode & MODE_WRITE)
{
_freeX86reg(i);
x86regs[i].inuse = 1;
}
}
else _freeX86reg(i);
}
}
else _freeX86regs();
#else
_freeX86regs();
#endif
// store the last block executed
xMOV(ptr32[&g_nLastBlockExecuted], s_pCurBlock->startpc);
switch (g_branch)
{
case 1: // branch, esi has new prog
SuperVUTestVU0Condition(0);
if (s_JumpX86 == -1)
xJMP(ptr32[&g_recWriteback]);
else
xJMP(xRegister32(s_JumpX86));
break;
case 4: // jalr
pChildJumps[0] = (u32*)0xffffffff;
// fall through
case 0x10: // jump, esi has new vupc
{
_freeXMMregs();
_freeX86regs();
SuperVUTestVU0Condition(8);
// already onto stack
xCALL((void*)(uptr)SuperVUGetProgram);
xADD(esp, 8);
xJMP(eax);
break;
}
case 0x13: // jr with uncon branch, uncond branch takes precendence (dropship)
{
// s32 delta = (s32)(VU->code & 0x400 ? 0xfffffc00 | (VU->code & 0x3ff) : VU->code & 0x3ff) << 3;
// ADD32ItoRmOffset(ESP, delta, 0);
xADD(esp, 8); // restore
pChildJumps[0] = (u32*)((uptr)JMP32(0) | 0x80000000);
break;
}
case 0:
case 3: // unconditional branch
pChildJumps[s_UnconditionalDelay] = (u32*)((uptr)JMP32(0) | 0x80000000);
break;
default:
DevCon.Error("Bad branch %x\n", g_branch);
pxAssert(0);
break;
}
}
pendcode = x86Ptr;
type |= BLOCKTYPE_ANALYZED;
LISTBLOCKS::iterator itchild;
for(itchild = blocks.begin(); itchild != blocks.end(); itchild++)
{
(*itchild)->Recompile();
}
}
#define GET_VUXYZMODE(reg) 0//((vuxyz&(1<<(reg)))?MODE_VUXYZ:0)
int VuInstruction::SetCachedRegs(int upper, u32 vuxyz)
{
if (vfread0[upper] >= 0)
{
SuperVUFreeXMMreg(vfread0[upper], XMMTYPE_VFREG, regs[upper].VFread0);
_allocVFtoXMMreg(VU, vfread0[upper], regs[upper].VFread0, MODE_READ | GET_VUXYZMODE(regs[upper].VFread0));
}
if (vfread1[upper] >= 0)
{
SuperVUFreeXMMreg(vfread1[upper], XMMTYPE_VFREG, regs[upper].VFread1);
_allocVFtoXMMreg(VU, vfread1[upper], regs[upper].VFread1, MODE_READ | GET_VUXYZMODE(regs[upper].VFread1));
}
if (vfacc[upper] >= 0 && (regs[upper].VIread&(1 << REG_ACC_FLAG)))
{
SuperVUFreeXMMreg(vfacc[upper], XMMTYPE_ACC, 0);
_allocACCtoXMMreg(VU, vfacc[upper], MODE_READ);
}
if (vfwrite[upper] >= 0)
{
pxAssert(regs[upper].VFwrite > 0);
SuperVUFreeXMMreg(vfwrite[upper], XMMTYPE_VFREG, regs[upper].VFwrite);
_allocVFtoXMMreg(VU, vfwrite[upper], regs[upper].VFwrite,
MODE_WRITE | (regs[upper].VFwxyzw != 0xf ? MODE_READ : 0) | GET_VUXYZMODE(regs[upper].VFwrite));
}
if (vfacc[upper] >= 0 && (regs[upper].VIwrite&(1 << REG_ACC_FLAG)))
{
SuperVUFreeXMMreg(vfacc[upper], XMMTYPE_ACC, 0);
_allocACCtoXMMreg(VU, vfacc[upper], MODE_WRITE | (regs[upper].VFwxyzw != 0xf ? MODE_READ : 0));
}
int info = PROCESS_VU_SUPER;
if (vfread0[upper] >= 0) info |= PROCESS_EE_SET_S(vfread0[upper]);
if (vfread1[upper] >= 0) info |= PROCESS_EE_SET_T(vfread1[upper]);
if (vfacc[upper] >= 0) info |= PROCESS_VU_SET_ACC(vfacc[upper]);
if (vfwrite[upper] >= 0)
{
if (regs[upper].VFwrite == _Ft_ && vfread1[upper] < 0)
{
info |= PROCESS_EE_SET_T(vfwrite[upper]);
}
else
{
pxAssert(regs[upper].VFwrite == _Fd_);
info |= PROCESS_EE_SET_D(vfwrite[upper]);
}
}
if (!(vffree[upper]&VFFREE_INVALID0))
{
SuperVUFreeXMMreg(vffree[upper]&0xf, XMMTYPE_TEMP, 0);
_allocTempXMMreg(XMMT_FPS, vffree[upper]&0xf);
}
info |= PROCESS_VU_SET_TEMP(vffree[upper] & 0xf);
if (vfflush[upper] >= 0)
{
SuperVUFreeXMMreg(vfflush[upper], XMMTYPE_TEMP, 0);
_allocTempXMMreg(XMMT_FPS, vfflush[upper]);
}
if (upper && (regs[upper].VIwrite & (1 << REG_CLIP_FLAG)))
{
// CLIP inst, need two extra temp registers, put it EEREC_D and EEREC_ACC
pxAssert(vfwrite[upper] == -1);
SuperVUFreeXMMreg((vffree[upper] >> 8)&0xf, XMMTYPE_TEMP, 0);
_allocTempXMMreg(XMMT_FPS, (vffree[upper] >> 8)&0xf);
info |= PROCESS_EE_SET_D((vffree[upper] >> 8) & 0xf);
SuperVUFreeXMMreg((vffree[upper] >> 16)&0xf, XMMTYPE_TEMP, 0);
_allocTempXMMreg(XMMT_FPS, (vffree[upper] >> 16)&0xf);
info |= PROCESS_EE_SET_ACC((vffree[upper] >> 16) & 0xf);
_freeXMMreg((vffree[upper] >> 8)&0xf); // don't need anymore
_freeXMMreg((vffree[upper] >> 16)&0xf); // don't need anymore
}
else if (regs[upper].VIwrite & (1 << REG_P))
{
SuperVUFreeXMMreg((vffree[upper] >> 8)&0xf, XMMTYPE_TEMP, 0);
_allocTempXMMreg(XMMT_FPS, (vffree[upper] >> 8)&0xf);
info |= PROCESS_EE_SET_D((vffree[upper] >> 8) & 0xf);
_freeXMMreg((vffree[upper] >> 8)&0xf); // don't need anymore
}
if (vfflush[upper] >= 0) _freeXMMreg(vfflush[upper]);
if (!(vffree[upper]&VFFREE_INVALID0))
_freeXMMreg(vffree[upper]&0xf); // don't need anymore
if ((regs[0].VIwrite | regs[1].VIwrite) & ((1 << REG_STATUS_FLAG) | (1 << REG_MAC_FLAG)))
info |= PROCESS_VU_UPDATEFLAGS;
return info;
}
void VuInstruction::Recompile(std::list<VuInstruction>::iterator& itinst, u32 vuxyz)
{
//static PCSX2_ALIGNED16(VECTOR _VF);
//static PCSX2_ALIGNED16(VECTOR _VFc);
u32 *code_ptr;
u8* pjmp;
int vfregstore = 0;
pxAssert(s_pCurInst == this);
s_WriteToReadQ = 0;
code_ptr = (u32*) & VU->Micro[ pc ];
if (type & INST_Q_READ)
SuperVUFlush(0, (code_ptr[0] == 0x800003bf) || !!(regs[0].VIwrite & (1 << REG_Q)));
if (type & INST_P_READ)
SuperVUFlush(1, (code_ptr[0] == 0x800007bf) || !!(regs[0].VIwrite & (1 << REG_P)));
if (type & INST_DUMMY)
{
// find nParentPc
VuInstruction* pparentinst = NULL;
// if true, will check if parent block was executed before getting the results of the flags (superman returns)
int nParentCheckForExecution = -1;
// int badaddrs[] = {
// 0x60,0x68,0x70,0x60,0x68,0x70,0x88,0x90,0x98,0x98,0xa8,0xb8,0x88,0x90,
// 0x4a8,0x4a8,0x398,0x3a0,0x3a8,0xa0
// };
#ifdef SUPERVU_PROPAGATEFLAGS
if (nParentPc != -1 && (nParentPc < s_pCurBlock->startpc || nParentPc >= (int)pc))
{
// if( !s_vu ) {
// for(int j = 0; j < ARRAYSIZE(badaddrs); ++j) {
// if( badaddrs[j] == nParentPc )
// goto NoParent;
// }
// }
std::list<VuBaseBlock*>::iterator itblock;
for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
{
if (nParentPc >= (*itblock)->startpc && nParentPc < (*itblock)->endpc)
{
pparentinst = &(*(*itblock)->GetInstIterAtPc(nParentPc));
//if( !s_vu ) SysPrintf("%x ", nParentPc);
if (find(s_pCurBlock->parents.begin(), s_pCurBlock->parents.end(), *itblock) != s_pCurBlock->parents.end())
nParentCheckForExecution = (*itblock)->startpc;
break;
}
}
pxAssert(pparentinst != NULL);
}
#endif
if (type & INST_CLIP_WRITE)
{
if (nParentPc < s_pCurBlock->startpc || nParentPc >= (int)pc)
{
if (!CHECK_VUCLIPFLAGHACK && pparentinst != NULL)
{
if (nParentCheckForExecution >= 0)
{
if (pparentinst->pClipWrite == 0)
pparentinst->pClipWrite = (uptr)SuperVUStaticAlloc(4);
if (s_ClipRead == 0)
s_ClipRead = (uptr) & VU->VI[REG_CLIP_FLAG];
xCMP(ptr32[&g_nLastBlockExecuted], nParentCheckForExecution);
u8* jptr = JNE8(0);
xCMP(ptr32[&s_ClipRead], (uptr)&VU->VI[REG_CLIP_FLAG]);
u8* jptr2 = JE8(0);
xMOV(eax, ptr[(void*)(pparentinst->pClipWrite)]);
xMOV(ptr[(void*)(s_ClipRead)], eax);
x86SetJ8(jptr);
x86SetJ8(jptr2);
}
}
else s_ClipRead = (uptr) & VU->VI[REG_CLIP_FLAG];
}
else
{
s_ClipRead = s_pCurBlock->GetInstIterAtPc(nParentPc)->pClipWrite;
if (s_ClipRead == 0) Console.WriteLn("super ClipRead allocation error!");
}
}
// before modifying, check if they will ever be read
if (s_pCurBlock->type & BLOCKTYPE_MACFLAGS)
{
u8 outofblock = 0;
if (type & INST_STATUS_WRITE)
{
if (nParentPc < s_pCurBlock->startpc || nParentPc >= (int)pc)
{
// reading from out of this block, so already flushed to mem
if (pparentinst != NULL) //&& pparentinst->pStatusWrite != NULL ) {
{
// might not have processed it yet, so reserve a mem loc
if (pparentinst->pStatusWrite == 0)
{
pparentinst->pStatusWrite = (uptr)SuperVUStaticAlloc(4);
//xMOV(ptr32[(u32*)(pparentinst->pStatusWrite)], 0);
}
// if( s_pCurBlock->prevFlagsOutOfBlock && s_StatusRead != NULL ) {
// // or instead since don't now which parent we came from
// xMOV(eax, ptr[(void*)(pparentinst->pStatusWrite)]);
// xOR(ptr[(void*)(s_StatusRead)], eax);
// xMOV(ptr32[(u32*)(pparentinst->pStatusWrite)], 0);
// }
if (nParentCheckForExecution >= 0)
{
// don't now which parent we came from, so have to check
// uptr tempstatus = (uptr)SuperVUStaticAlloc(4);
// if( s_StatusRead != NULL )
// xMOV(eax, ptr[(void*)(s_StatusRead)]);
// else
// xMOV(eax, ptr[(&VU->VI[REG_STATUS_FLAG])]);
// s_StatusRead = tempstatus;
if (s_StatusRead == 0)
s_StatusRead = (uptr) & VU->VI[REG_STATUS_FLAG];
xCMP(ptr32[&g_nLastBlockExecuted], nParentCheckForExecution);
u8* jptr = JNE8(0);
xMOV(eax, ptr[(void*)(pparentinst->pStatusWrite)]);
xMOV(ptr32[(u32*)(pparentinst->pStatusWrite)], 0);
xMOV(ptr[(void*)(s_StatusRead)], eax);
x86SetJ8(jptr);
}
else
{
uptr tempstatus = (uptr)SuperVUStaticAlloc(4);
xMOV(eax, ptr[(void*)(pparentinst->pStatusWrite)]);
xMOV(ptr[(void*)(tempstatus)], eax);
xMOV(ptr32[(u32*)(pparentinst->pStatusWrite)], 0);
s_StatusRead = tempstatus;
}
outofblock = 2;
}
else
s_StatusRead = (uptr) & VU->VI[REG_STATUS_FLAG];
}
else
{
s_StatusRead = s_pCurBlock->GetInstIterAtPc(nParentPc)->pStatusWrite;
if (s_StatusRead == 0) Console.WriteLn("super StatusRead allocation error!");
// if( pc >= (u32)s_pCurBlock->endpc-8 ) {
// // towards the end, so variable might be leaded to another block (silent hill 4)
// uptr tempstatus = (uptr)SuperVUStaticAlloc(4);
// xMOV(eax, ptr[(void*)(s_StatusRead)]);
// xMOV(ptr[(void*)(tempstatus)], eax);
// xMOV(ptr32[(u32*)(s_StatusRead)], 0);
// s_StatusRead = tempstatus;
// }
}
}
if (type & INST_MAC_WRITE)
{
if (nParentPc < s_pCurBlock->startpc || nParentPc >= (int)pc)
{
// reading from out of this block, so already flushed to mem
if (pparentinst != NULL) //&& pparentinst->pMACWrite != NULL ) {
{
// necessary for (katamari)
// towards the end, so variable might be leaked to another block (silent hill 4)
// might not have processed it yet, so reserve a mem loc
if (pparentinst->pMACWrite == 0)
{
pparentinst->pMACWrite = (uptr)SuperVUStaticAlloc(4);
//xMOV(ptr32[(u32*)(pparentinst->pMACWrite)], 0);
}
// if( s_pCurBlock->prevFlagsOutOfBlock && s_MACRead != NULL ) {
// // or instead since don't now which parent we came from
// xMOV(eax, ptr[(void*)(pparentinst->pMACWrite)]);
// xOR(ptr[(void*)(s_MACRead)], eax);
// xMOV(ptr32[(u32*)(pparentinst->pMACWrite)], 0);
// }
if (nParentCheckForExecution >= 0)
{
// don't now which parent we came from, so have to check
// uptr tempmac = (uptr)SuperVUStaticAlloc(4);
// if( s_MACRead != NULL )
// xMOV(eax, ptr[(void*)(s_MACRead)]);
// else
// xMOV(eax, ptr[(&VU->VI[REG_MAC_FLAG])]);
// s_MACRead = tempmac;
if (s_MACRead == 0) s_MACRead = (uptr) & VU->VI[REG_MAC_FLAG];
xCMP(ptr32[&g_nLastBlockExecuted], nParentCheckForExecution);
u8* jptr = JNE8(0);
xMOV(eax, ptr[(void*)(pparentinst->pMACWrite)]);
xMOV(ptr32[(u32*)(pparentinst->pMACWrite)], 0);
xMOV(ptr[(void*)(s_MACRead)], eax);
x86SetJ8(jptr);
}
else
{
uptr tempMAC = (uptr)SuperVUStaticAlloc(4);
xMOV(eax, ptr[(void*)(pparentinst->pMACWrite)]);
xMOV(ptr[(void*)(tempMAC)], eax);
xMOV(ptr32[(u32*)(pparentinst->pMACWrite)], 0);
s_MACRead = tempMAC;
}
outofblock = 2;
}
else
s_MACRead = (uptr) & VU->VI[REG_MAC_FLAG];
// if( pc >= (u32)s_pCurBlock->endpc-8 ) {
// // towards the end, so variable might be leaked to another block (silent hill 4)
// uptr tempMAC = (uptr)SuperVUStaticAlloc(4);
// xMOV(eax, ptr[(void*)(s_MACRead)]);
// xMOV(ptr[(void*)(tempMAC)], eax);
// xMOV(ptr32[(u32*)(s_MACRead)], 0);
// s_MACRead = tempMAC;
// }
}
else
{
s_MACRead = s_pCurBlock->GetInstIterAtPc(nParentPc)->pMACWrite;
}
}
s_pCurBlock->prevFlagsOutOfBlock = outofblock;
}
else if (pparentinst != NULL)
{
// make sure to reset the mac and status flags! (katamari)
if (pparentinst->pStatusWrite)
xMOV(ptr32[(u32*)(pparentinst->pStatusWrite)], 0);
if (pparentinst->pMACWrite)
xMOV(ptr32[(u32*)(pparentinst->pMACWrite)], 0);
}
pxAssert(s_ClipRead != 0);
pxAssert(s_MACRead != 0);
pxAssert(s_StatusRead != 0);
return;
}
s_pCurBlock->prevFlagsOutOfBlock = 0;
if( IsDebugBuild )
xMOV(eax, pc);
pxAssert(!(type & (INST_CLIP_WRITE | INST_STATUS_WRITE | INST_MAC_WRITE)));
pc += 8;
std::list<VuInstruction>::const_iterator itinst2;
if ((regs[0].VIwrite | regs[1].VIwrite) & ((1 << REG_MAC_FLAG) | (1 << REG_STATUS_FLAG)))
{
if (s_pCurBlock->type & BLOCKTYPE_MACFLAGS)
{
if (pMACWrite == 0)
{
pMACWrite = (uptr)SuperVUStaticAlloc(4);
//xMOV(ptr32[(u32*)(pMACWrite)], 0);
}
if (pStatusWrite == 0)
{
pStatusWrite = (uptr)SuperVUStaticAlloc(4);
//xMOV(ptr32[(u32*)(pStatusWrite)], 0);
}
}
else
{
pxAssert(s_StatusRead == (uptr)&VU->VI[REG_STATUS_FLAG]);
pxAssert(s_MACRead == (uptr)&VU->VI[REG_MAC_FLAG]);
pMACWrite = s_MACRead;
pStatusWrite = s_StatusRead;
}
}
if ((pClipWrite == 0) && ((regs[0].VIwrite | regs[1].VIwrite) & (1 << REG_CLIP_FLAG)))
{
pClipWrite = (uptr)SuperVUStaticAlloc(4);
//xMOV(ptr32[(u32*)(pClipWrite)], 0);
}
#ifdef SUPERVU_X86CACHING
// redo the counters so that the proper regs are released
for (int j = 0; j < iREGCNT_GPR; ++j)
{
if (x86regs[j].inuse && X86_ISVI(x86regs[j].type))
{
int count = 0;
itinst2 = itinst;
while (itinst2 != s_pCurBlock->insts.end())
{
if ((itinst2->regs[0].VIread | itinst2->regs[0].VIwrite | itinst2->regs[1].VIread | itinst2->regs[1].VIwrite) && (1 << x86regs[j].reg))
break;
++count;
++itinst2;
}
x86regs[j].counter = 1000 - count;
}
}
#endif
if (s_vu == 0 && (code_ptr[1] & 0x20000000)) // M flag
{
xOR(ptr8[(u8*)((uptr)&VU->flags)], VUFLAG_MFLAGSET);
}
if (code_ptr[1] & 0x10000000) // D flag
{
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], s_vu ? 0x400 : 0x004);
u8* jptr = JZ8(0);
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], s_vu ? 0x200 : 0x002);
xMOV( ecx, s_vu ? INTC_VU1 : INTC_VU0 );
xCALL( (void*)hwIntcIrq );
x86SetJ8(jptr);
}
if (code_ptr[1] & 0x08000000) // T flag
{
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], s_vu ? 0x800 : 0x008);
u8* jptr = JZ8(0);
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], s_vu ? 0x400 : 0x004);
xMOV( ecx, s_vu ? INTC_VU1 : INTC_VU0 );
xCALL( (void*)hwIntcIrq );
x86SetJ8(jptr);
}
// check upper flags
if (code_ptr[1] & 0x80000000) // I flag
{
pxAssert(!(regs[0].VIwrite & ((1 << REG_Q) | (1 << REG_P))));
VU->code = code_ptr[1];
s_vuInfo = SetCachedRegs(1, vuxyz);
if (s_JumpX86 > 0) x86regs[s_JumpX86].needed = 1;
if (s_ScheduleXGKICK && s_XGKICKReg > 0) x86regs[s_XGKICKReg].needed = 1;
recVU_UPPER_OPCODE[ VU->code & 0x3f ](VU, s_vuInfo);
s_PrevIWrite = (uptr)code_ptr;
_clearNeededXMMregs();
_clearNeededX86regs();
}
else
{
if (regs[0].VIwrite & (1 << REG_Q))
{
// search for all the insts between this inst and writeback
itinst2 = itinst;
++itinst2;
u32 cacheq = (itinst2 == s_pCurBlock->insts.end());
u32* codeptr2 = code_ptr + 2;
while (itinst2 != s_pCurBlock->insts.end())
{
if (!(itinst2->type & INST_DUMMY) && ((itinst2->regs[0].VIwrite&(1 << REG_Q)) || codeptr2[0] == 0x800003bf)) // waitq, or fdiv inst
{
break;
}
if ((itinst2->type & INST_Q_WRITE) && itinst2->nParentPc == (int)pc - 8)
{
break;
}
if (itinst2->type & INST_Q_READ)
{
cacheq = 1;
break;
}
if (itinst2->type & INST_DUMMY)
{
++itinst2;
continue;
}
codeptr2 += 2;
++itinst2;
}
if (itinst2 == s_pCurBlock->insts.end())
cacheq = 1;
int x86temp = -1;
if (cacheq)
x86temp = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0);
// new is written so flush old
// if type & INST_Q_READ, already flushed
if (!(type & INST_Q_READ) && s_recWriteQ == 0) xMOV(eax, ptr[&s_writeQ]);
if (cacheq)
xMOV(xRegister32(x86temp), ptr[&s_TotalVUCycles]);
if (!(type & INST_Q_READ))
{
if (s_recWriteQ == 0)
{
xOR(eax, eax);
pjmp = JS8(0);
xMOV(eax, ptr[(void*)(SuperVUGetVIAddr(REG_Q, 0))]);
xMOV(ptr[(void*)(SuperVUGetVIAddr(REG_Q, 1))], eax);
x86SetJ8(pjmp);
}
else if (s_needFlush & 1)
{
xMOV(eax, ptr[(void*)(SuperVUGetVIAddr(REG_Q, 0))]);
xMOV(ptr[(void*)(SuperVUGetVIAddr(REG_Q, 1))], eax);
s_needFlush &= ~1;
}
}
// write new Q
if (cacheq)
{
pxAssert(s_pCurInst->pqcycles > 1);
xADD(xRegister32(x86temp), s_pCurInst->info.cycle + s_pCurInst->pqcycles);
xMOV(ptr[&s_writeQ], xRegister32(x86temp));
s_needFlush |= 1;
}
else
{
// won't be writing back
s_WriteToReadQ = 1;
s_needFlush &= ~1;
xMOV(ptr32[&s_writeQ], 0x80000001);
}
s_recWriteQ = s_pCurInst->info.cycle + s_pCurInst->pqcycles;
if (x86temp >= 0)
_freeX86reg(x86temp);
}
if (regs[0].VIwrite & (1 << REG_P))
{
int x86temp = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0);
// new is written so flush old
if (!(type & INST_P_READ) && s_recWriteP == 0)
xMOV(eax, ptr[&s_writeP]);
xMOV(xRegister32(x86temp), ptr[&s_TotalVUCycles]);
if (!(type & INST_P_READ))
{
if (s_recWriteP == 0)
{
xOR(eax, eax);
pjmp = JS8(0);
xMOV(eax, ptr[(void*)(SuperVUGetVIAddr(REG_P, 0))]);
xMOV(ptr[(void*)(SuperVUGetVIAddr(REG_P, 1))], eax);
x86SetJ8(pjmp);
}
else if (s_needFlush & 2)
{
xMOV(eax, ptr[(void*)(SuperVUGetVIAddr(REG_P, 0))]);
xMOV(ptr[(void*)(SuperVUGetVIAddr(REG_P, 1))], eax);
s_needFlush &= ~2;
}
}
// write new P
pxAssert(s_pCurInst->pqcycles > 1);
xADD(xRegister32(x86temp), s_pCurInst->info.cycle + s_pCurInst->pqcycles);
xMOV(ptr[&s_writeP], xRegister32(x86temp));
s_needFlush |= 2;
s_recWriteP = s_pCurInst->info.cycle + s_pCurInst->pqcycles;
_freeX86reg(x86temp);
}
// waitq
if (code_ptr[0] == 0x800003bf) SuperVUFlush(0, 1);
// waitp
if (code_ptr[0] == 0x800007bf) SuperVUFlush(1, 1);
#ifdef PCSX2_DEVBUILD
if (regs[1].VIread & regs[0].VIwrite & ~((1 << REG_Q) | (1 << REG_P) | (1 << REG_VF0_FLAG) | (1 << REG_ACC_FLAG)))
{
Console.Warning("*PCSX2*: Warning, VI write to the same reg %x in both lower/upper cycle %x", regs[1].VIread & regs[0].VIwrite, s_pCurBlock->startpc);
}
#endif
u32 modewrite = 0;
if (vfwrite[1] >= 0 && xmmregs[vfwrite[1]].inuse && xmmregs[vfwrite[1]].type == XMMTYPE_VFREG && xmmregs[vfwrite[1]].reg == regs[1].VFwrite)
modewrite = xmmregs[vfwrite[1]].mode & MODE_WRITE;
VU->code = code_ptr[1];
s_vuInfo = SetCachedRegs(1, vuxyz);
if (vfwrite[1] >= 0)
{
pxAssert(regs[1].VFwrite > 0);
if (vfwrite[0] == vfwrite[1])
{
//Console.WriteLn("*PCSX2*: Warning, VF write to the same reg in both lower/upper cycle %x", s_pCurBlock->startpc);
}
if (vfread0[0] == vfwrite[1] || vfread1[0] == vfwrite[1])
{
pxAssert(regs[0].VFread0 == regs[1].VFwrite || regs[0].VFread1 == regs[1].VFwrite);
pxAssert(vfflush[0] >= 0);
if (modewrite)
{
xMOVAPS(ptr[(&VU->VF[regs[1].VFwrite])], xRegisterSSE((x86SSERegType)vfwrite[1]));
}
vfregstore = 1;
}
}
if (s_JumpX86 > 0) x86regs[s_JumpX86].needed = 1;
if (s_ScheduleXGKICK && s_XGKICKReg > 0) x86regs[s_XGKICKReg].needed = 1;
recVU_UPPER_OPCODE[ VU->code & 0x3f ](VU, s_vuInfo);
_clearNeededXMMregs();
_clearNeededX86regs();
// necessary because status can be set by both upper and lower
if (regs[1].VIwrite & (1 << REG_STATUS_FLAG))
{
pxAssert(pStatusWrite != 0);
s_PrevStatusWrite = pStatusWrite;
}
VU->code = code_ptr[0];
s_vuInfo = SetCachedRegs(0, vuxyz);
if (vfregstore)
{
// load
xMOVAPS(xRegisterSSE(vfflush[0]), ptr[(&VU->VF[regs[1].VFwrite])]);
pxAssert(xmmregs[vfwrite[1]].mode & MODE_WRITE);
// replace with vfflush
if (_Fs_ == regs[1].VFwrite)
{
s_vuInfo &= ~PROCESS_EE_SET_S(0xf);
s_vuInfo |= PROCESS_EE_SET_S(vfflush[0]);
}
if (_Ft_ == regs[1].VFwrite)
{
s_vuInfo &= ~PROCESS_EE_SET_T(0xf);
s_vuInfo |= PROCESS_EE_SET_T(vfflush[0]);
}
xmmregs[vfflush[0]].mode |= MODE_NOFLUSH | MODE_WRITE; // so that lower inst doesn't flush
}
// notify vuinsts that upper inst is a fmac
if (regs[1].pipe == VUPIPE_FMAC)
s_vuInfo |= PROCESS_VU_SET_FMAC();
if (s_JumpX86 > 0) x86regs[s_JumpX86].needed = 1;
if (s_ScheduleXGKICK && s_XGKICKReg > 0) x86regs[s_XGKICKReg].needed = 1;
#ifdef SUPERVU_VIBRANCHDELAY
if (type & INST_CACHE_VI)
{
pxAssert(vicached >= 0);
int cachedreg = _allocX86reg(xEmptyReg, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), vicached, MODE_READ);
xMOV(ptr[&s_VIBranchDelay], xRegister32(cachedreg));
}
#endif
// check if inst before branch and the write is the same as the read in the branch (wipeout)
// int oldreg=0;
// if( pc == s_pCurBlock->endpc-16 ) {
// itinst2 = itinst; ++itinst2;
// if( itinst2->regs[0].pipe == VUPIPE_BRANCH && (itinst->regs[0].VIwrite&itinst2->regs[0].VIread) ) {
//
// xCALL((void*)(u32)branchfn);
// pxAssert( itinst->regs[0].VIwrite & 0xffff );
// Console.WriteLn("vi write before branch");
// for(s_CacheVIReg = 0; s_CacheVIReg < 16; ++s_CacheVIReg) {
// if( itinst->regs[0].VIwrite & (1<<s_CacheVIReg) )
// break;
// }
//
// oldreg = _allocX86reg(xEmptyReg, X86TYPE_VI|(s_vu?X86TYPE_VU1:0), s_CacheVIReg, MODE_READ);
// s_CacheVIX86 = _allocX86reg(xEmptyReg, X86TYPE_VITEMP, s_CacheVIReg, MODE_WRITE);
// xMOV(xRegister32(s_CacheVIX86), xRegister32(oldreg));
// }
// }
// else if( pc == s_pCurBlock->endpc-8 && s_CacheVIReg >= 0 ) {
// pxAssert( s_CacheVIX86 > 0 && x86regs[s_CacheVIX86].inuse && x86regs[s_CacheVIX86].reg == s_CacheVIReg && x86regs[s_CacheVIX86].type == X86TYPE_VITEMP );
//
// oldreg = _allocX86reg(xEmptyReg, X86TYPE_VI|(s_vu?X86TYPE_VU1:0), s_CacheVIReg, MODE_READ);
// x86regs[s_CacheVIX86].needed = 1;
// pxAssert( x86regs[oldreg].mode & MODE_WRITE );
//
// x86regs[s_CacheVIX86].type = X86TYPE_VI|(s_vu?X86TYPE_VU1:0);
// x86regs[oldreg].type = X86TYPE_VITEMP;
// }
recVU_LOWER_OPCODE[ VU->code >> 25 ](VU, s_vuInfo);
// if( pc == s_pCurBlock->endpc-8 && s_CacheVIReg >= 0 ) {
// // revert
// x86regs[s_CacheVIX86].inuse = 0;
// x86regs[oldreg].type = X86TYPE_VI|(s_vu?X86TYPE_VU1:0);
// }
_clearNeededXMMregs();
_clearNeededX86regs();
}
// clip is always written so ok
if ((regs[0].VIwrite | regs[1].VIwrite) & (1 << REG_CLIP_FLAG))
{
pxAssert(pClipWrite != 0);
s_PrevClipWrite = pClipWrite;
}
if ((regs[0].VIwrite | regs[1].VIwrite) & (1 << REG_STATUS_FLAG))
{
pxAssert(pStatusWrite != 0);
s_PrevStatusWrite = pStatusWrite;
}
if ((regs[0].VIwrite | regs[1].VIwrite) & (1 << REG_MAC_FLAG))
{
pxAssert(pMACWrite != 0);
s_PrevMACWrite = pMACWrite;
}
}
///////////////////////////////////
// Super VU Recompilation Tables //
///////////////////////////////////
void recVUMI_BranchHandle()
{
int bpc = _recbranchAddr(VU->code);
int curjump = 0;
if (s_pCurInst->type & INST_BRANCH_DELAY)
{
pxAssert((g_branch&0x17) != 0x10 && (g_branch&0x17) != 4); // no jump handlig for now
if ((g_branch & 0x7) == 3)
{
// previous was a direct jump
curjump = 1;
}
else if (g_branch & 1) curjump = 2;
}
pxAssert(s_JumpX86 > 0);
if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION)
xMOV(ptr32[(u32*)(SuperVUGetVIAddr(REG_TPC, 0))], bpc);
xMOV(xRegister32(s_JumpX86), 1); // use 1 to disable optimization to XOR
s_pCurBlock->pChildJumps[curjump] = (u32*)x86Ptr - 1;
if (!(s_pCurInst->type & INST_BRANCH_DELAY))
{
j8Ptr[1] = JMP8(0);
x86SetJ8(j8Ptr[ 0 ]);
if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION)
xMOV(ptr32[(u32*)(SuperVUGetVIAddr(REG_TPC, 0))], pc + 8);
xMOV(xRegister32(s_JumpX86), 1); // use 1 to disable optimization to XOR
s_pCurBlock->pChildJumps[curjump+1] = (u32*)x86Ptr - 1;
x86SetJ8(j8Ptr[ 1 ]);
}
else
x86SetJ8(j8Ptr[ 0 ]);
g_branch |= 1;
}
// supervu specific insts
void recVUMI_IBQ_prep()
{
int isreg, itreg;
if (_Is_ == 0)
{
#ifdef SUPERVU_VIBRANCHDELAY
if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == (s8)_It_)
{
itreg = -1;
}
else
#endif
{
itreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _It_, MODE_READ);
}
s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_WRITE);
if (itreg >= 0)
{
xCMP(xRegister16(itreg), 0);
}
else xCMP(ptr16[(u16*)(SuperVUGetVIAddr(_It_, 1))], 0);
}
else if (_It_ == 0)
{
#ifdef SUPERVU_VIBRANCHDELAY
if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == (s8)_Is_)
{
isreg = -1;
}
else
#endif
{
isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ);
}
s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_WRITE);
if (isreg >= 0)
{
xCMP(xRegister16(isreg), 0);
}
else xCMP(ptr16[(u16*)(SuperVUGetVIAddr(_Is_, 1))], 0);
}
else
{
_addNeededX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _It_);
#ifdef SUPERVU_VIBRANCHDELAY
if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == (s8)_Is_)
{
isreg = -1;
}
else
#endif
{
isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ);
}
#ifdef SUPERVU_VIBRANCHDELAY
if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == (s8)_It_)
{
itreg = -1;
if (isreg <= 0)
{
// allocate fsreg
if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == (s8)_Is_)
{
isreg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, MODE_READ | MODE_WRITE);
xMOV(xRegister32(isreg), ptr[(void*)(SuperVUGetVIAddr(_Is_, 1))]);
}
else
isreg = _allocX86reg(xEmptyReg, X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ);
}
}
else
#endif
{
itreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _It_, MODE_READ);
}
s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_WRITE);
if (isreg >= 0)
{
if (itreg >= 0)
{
xCMP(xRegister16(isreg), xRegister16(itreg));
}
else xCMP(xRegister16(isreg), ptr[(void*)(SuperVUGetVIAddr(_It_, 1))]);
}
else if (itreg >= 0)
{
xCMP(xRegister16(itreg), ptr[(void*)(SuperVUGetVIAddr(_Is_, 1))]);
}
else
{
isreg = _allocX86reg(xEmptyReg, X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ);
xCMP(xRegister16(isreg), ptr[(void*)(SuperVUGetVIAddr(_It_, 1))]);
}
}
}
void recVUMI_IBEQ(VURegs* vuu, s32 info)
{
recVUMI_IBQ_prep();
j8Ptr[ 0 ] = JNE8(0);
recVUMI_BranchHandle();
}
void recVUMI_IBGEZ(VURegs* vuu, s32 info)
{
int isreg;
s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_WRITE);
#ifdef SUPERVU_VIBRANCHDELAY
if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == (s8)_Is_)
{
isreg = -1;
}
else
#endif
{
isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ);
}
if (isreg >= 0)
{
xTEST(xRegister16(isreg), xRegister16(isreg));
j8Ptr[ 0 ] = JS8(0);
}
else
{
xCMP(ptr16[(u16*)(SuperVUGetVIAddr(_Is_, 1))], 0x0);
j8Ptr[ 0 ] = JL8(0);
}
recVUMI_BranchHandle();
}
void recVUMI_IBGTZ(VURegs* vuu, s32 info)
{
int isreg;
s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_WRITE);
#ifdef SUPERVU_VIBRANCHDELAY
if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == (s8)_Is_)
{
isreg = -1;
}
else
#endif
{
isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ);
}
if (isreg >= 0)
{
xCMP(xRegister16(isreg), 0);
j8Ptr[ 0 ] = JLE8(0);
}
else
{
xCMP(ptr16[(u16*)(SuperVUGetVIAddr(_Is_, 1))], 0x0);
j8Ptr[ 0 ] = JLE8(0);
}
recVUMI_BranchHandle();
}
void recVUMI_IBLEZ(VURegs* vuu, s32 info)
{
int isreg;
s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_WRITE);
#ifdef SUPERVU_VIBRANCHDELAY
if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == (s8)_Is_)
{
isreg = -1;
}
else
#endif
{
isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ);
}
if (isreg >= 0)
{
xCMP(xRegister16(isreg), 0);
j8Ptr[ 0 ] = JG8(0);
}
else
{
xCMP(ptr16[(u16*)(SuperVUGetVIAddr(_Is_, 1))], 0x0);
j8Ptr[ 0 ] = JG8(0);
}
recVUMI_BranchHandle();
}
void recVUMI_IBLTZ(VURegs* vuu, s32 info)
{
int isreg;
s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_WRITE);
#ifdef SUPERVU_VIBRANCHDELAY
if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == (s8)_Is_)
{
isreg = -1;
}
else
#endif
{
isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ);
}
if (isreg >= 0)
{
xTEST(xRegister16(isreg), xRegister16(isreg));
j8Ptr[ 0 ] = JNS8(0);
}
else
{
xCMP(ptr16[(u16*)(SuperVUGetVIAddr(_Is_, 1))], 0x0);
j8Ptr[ 0 ] = JGE8(0);
}
recVUMI_BranchHandle();
}
void recVUMI_IBNE(VURegs* vuu, s32 info)
{
recVUMI_IBQ_prep();
j8Ptr[ 0 ] = JE8(0);
recVUMI_BranchHandle();
}
void recVUMI_B(VURegs* vuu, s32 info)
{
// supervu will take care of the rest
int bpc = _recbranchAddr(VU->code);
if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION)
xMOV(ptr32[(u32*)(SuperVUGetVIAddr(REG_TPC, 0))], bpc);
// loops to self, so check condition
if (bpc == s_pCurBlock->startpc && (s_vu == 0 || SUPERVU_CHECKCONDITION))
{
SuperVUTestVU0Condition(0);
}
if (s_pCurBlock->blocks.size() > 1)
{
s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_WRITE);
xMOV(xRegister32(s_JumpX86), 1);
s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr - 1;
s_UnconditionalDelay = 1;
}
g_branch |= 3;
}
void recVUMI_BAL(VURegs* vuu, s32 info)
{
int bpc = _recbranchAddr(VU->code);
if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION)
xMOV(ptr32[(u32*)(SuperVUGetVIAddr(REG_TPC, 0))], bpc);
// loops to self, so check condition
if (bpc == s_pCurBlock->startpc && (s_vu == 0 || SUPERVU_CHECKCONDITION))
{
SuperVUTestVU0Condition(0);
}
if (_It_)
{
_deleteX86reg(X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _It_, 2);
xMOV(ptr16[(u16*)(SuperVUGetVIAddr(_It_, 0))], (pc + 8) >> 3);
}
if (s_pCurBlock->blocks.size() > 1)
{
s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_WRITE);
xMOV(xRegister32(s_JumpX86), 1);
s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr - 1;
s_UnconditionalDelay = 1;
}
g_branch |= 3;
}
void recVUMI_JR(VURegs* vuu, s32 info)
{
int isreg = _allocX86reg(xEmptyReg, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _Is_, MODE_READ);
xLEA(eax, ptr[xAddressReg(isreg) * (1<<3)]);
//Mask the address to something valid
if (vuu == &VU0)
xAND(eax, 0xfff);
else
xAND(eax, 0x3fff);
if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0) xMOV(ptr[(void*)(SuperVUGetVIAddr(REG_TPC, 0))], eax);
if (!(s_pCurBlock->type & BLOCKTYPE_HASEOP))
{
xPUSH(s_vu);
xPUSH(eax);
}
g_branch |= 0x10; // 0x08 is reserved
}
void recVUMI_JALR(VURegs* vuu, s32 info)
{
_addNeededX86reg(X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _It_);
int isreg = _allocX86reg(xEmptyReg, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _Is_, MODE_READ);
xLEA(eax, ptr[xAddressReg(isreg) * (1<<3)]);
//Mask the address to something valid
if (vuu == &VU0)
xAND(eax, 0xfff);
else
xAND(eax, 0x3fff);
if (_It_)
{
_deleteX86reg(X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _It_, 2);
xMOV(ptr16[(u16*)(SuperVUGetVIAddr(_It_, 0))], (pc + 8) >> 3);
}
if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0) xMOV(ptr[(void*)(SuperVUGetVIAddr(REG_TPC, 0))], eax);
if (!(s_pCurBlock->type & BLOCKTYPE_HASEOP))
{
xPUSH(s_vu);
xPUSH(eax);
}
g_branch |= 4;
}
void recVUMI_XGKICK_(VURegs *VU)
{
pxAssert(s_XGKICKReg > 0 && x86regs[s_XGKICKReg].inuse && x86regs[s_XGKICKReg].type == X86TYPE_VITEMP);
x86regs[s_XGKICKReg].inuse = 0; // so free doesn't flush
_freeX86regs();
_freeXMMregs();
xMOV(ecx, xRegister32(s_XGKICKReg));
xCALL((void*)VU1XGKICK_MTGSTransfer);
s_ScheduleXGKICK = 0;
}
void recVUMI_XGKICK(VURegs *VU, int info)
{
if (s_ScheduleXGKICK) {
// second xgkick, so launch the first
recVUMI_XGKICK_(VU);
}
int isreg = _allocX86reg(ecx, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _Is_, MODE_READ);
_freeX86reg(isreg); // flush
x86regs[isreg].inuse = 1;
x86regs[isreg].type = X86TYPE_VITEMP;
x86regs[isreg].needed = 1;
x86regs[isreg].mode = MODE_WRITE | MODE_READ;
xSHL(xRegister32(isreg), 4);
xAND(xRegister32(isreg), 0x3fff);
s_XGKICKReg = isreg;
if (!SUPERVU_XGKICKDELAY || pc == s_pCurBlock->endpc) {
recVUMI_XGKICK_(VU);
}
else {
s_ScheduleXGKICK = (CHECK_XGKICKHACK) ? (std::min((u32)4, (s_pCurBlock->endpc-pc)/8)) : 2;
}
}
void recVU_UPPER_FD_00(VURegs* VU, s32 info);
void recVU_UPPER_FD_01(VURegs* VU, s32 info);
void recVU_UPPER_FD_10(VURegs* VU, s32 info);
void recVU_UPPER_FD_11(VURegs* VU, s32 info);
void recVULowerOP(VURegs* VU, s32 info);
void recVULowerOP_T3_00(VURegs* VU, s32 info);
void recVULowerOP_T3_01(VURegs* VU, s32 info);
void recVULowerOP_T3_10(VURegs* VU, s32 info);
void recVULowerOP_T3_11(VURegs* VU, s32 info);
void recVUunknown(VURegs* VU, s32 info);
void (*recVU_LOWER_OPCODE[128])(VURegs* VU, s32 info) =
{
recVUMI_LQ , recVUMI_SQ , recVUunknown , recVUunknown,
recVUMI_ILW , recVUMI_ISW , recVUunknown , recVUunknown,
recVUMI_IADDIU, recVUMI_ISUBIU, recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUMI_FCEQ , recVUMI_FCSET , recVUMI_FCAND, recVUMI_FCOR, /* 0x10 */
recVUMI_FSEQ , recVUMI_FSSET , recVUMI_FSAND, recVUMI_FSOR,
recVUMI_FMEQ , recVUunknown , recVUMI_FMAND, recVUMI_FMOR,
recVUMI_FCGET , recVUunknown , recVUunknown , recVUunknown,
recVUMI_B , recVUMI_BAL , recVUunknown , recVUunknown, /* 0x20 */
recVUMI_JR , recVUMI_JALR , recVUunknown , recVUunknown,
recVUMI_IBEQ , recVUMI_IBNE , recVUunknown , recVUunknown,
recVUMI_IBLTZ , recVUMI_IBGTZ , recVUMI_IBLEZ, recVUMI_IBGEZ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x30 */
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVULowerOP , recVUunknown , recVUunknown , recVUunknown, /* 0x40*/
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x50 */
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x60 */
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x70 */
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
};
void (*recVULowerOP_T3_00_OPCODE[32])(VURegs* VU, s32 info) =
{
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUMI_MOVE , recVUMI_LQI , recVUMI_DIV , recVUMI_MTIR,
recVUMI_RNEXT , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUMI_MFP , recVUMI_XTOP , recVUMI_XGKICK,
recVUMI_ESADD , recVUMI_EATANxy, recVUMI_ESQRT, recVUMI_ESIN,
};
void (*recVULowerOP_T3_01_OPCODE[32])(VURegs* VU, s32 info) =
{
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUMI_MR32 , recVUMI_SQI , recVUMI_SQRT , recVUMI_MFIR,
recVUMI_RGET , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUMI_XITOP, recVUunknown,
recVUMI_ERSADD, recVUMI_EATANxz, recVUMI_ERSQRT, recVUMI_EATAN,
};
void (*recVULowerOP_T3_10_OPCODE[32])(VURegs* VU, s32 info) =
{
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUMI_LQD , recVUMI_RSQRT, recVUMI_ILWR,
recVUMI_RINIT , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUMI_ELENG , recVUMI_ESUM , recVUMI_ERCPR, recVUMI_EEXP,
};
void (*recVULowerOP_T3_11_OPCODE[32])(VURegs* VU, s32 info) =
{
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUMI_SQD , recVUMI_WAITQ, recVUMI_ISWR,
recVUMI_RXOR , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUMI_ERLENG, recVUunknown , recVUMI_WAITP, recVUunknown,
};
void (*recVULowerOP_OPCODE[64])(VURegs* VU, s32 info) =
{
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x20 */
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUMI_IADD , recVUMI_ISUB , recVUMI_IADDI, recVUunknown, /* 0x30 */
recVUMI_IAND , recVUMI_IOR , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVULowerOP_T3_00, recVULowerOP_T3_01, recVULowerOP_T3_10, recVULowerOP_T3_11,
};
void (*recVU_UPPER_OPCODE[64])(VURegs* VU, s32 info) =
{
recVUMI_ADDx , recVUMI_ADDy , recVUMI_ADDz , recVUMI_ADDw,
recVUMI_SUBx , recVUMI_SUBy , recVUMI_SUBz , recVUMI_SUBw,
recVUMI_MADDx , recVUMI_MADDy , recVUMI_MADDz , recVUMI_MADDw,
recVUMI_MSUBx , recVUMI_MSUBy , recVUMI_MSUBz , recVUMI_MSUBw,
recVUMI_MAXx , recVUMI_MAXy , recVUMI_MAXz , recVUMI_MAXw, /* 0x10 */
recVUMI_MINIx , recVUMI_MINIy , recVUMI_MINIz , recVUMI_MINIw,
recVUMI_MULx , recVUMI_MULy , recVUMI_MULz , recVUMI_MULw,
recVUMI_MULq , recVUMI_MAXi , recVUMI_MULi , recVUMI_MINIi,
recVUMI_ADDq , recVUMI_MADDq , recVUMI_ADDi , recVUMI_MADDi, /* 0x20 */
recVUMI_SUBq , recVUMI_MSUBq , recVUMI_SUBi , recVUMI_MSUBi,
recVUMI_ADD , recVUMI_MADD , recVUMI_MUL , recVUMI_MAX,
recVUMI_SUB , recVUMI_MSUB , recVUMI_OPMSUB, recVUMI_MINI,
recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x30 */
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVUunknown , recVUunknown , recVUunknown , recVUunknown,
recVU_UPPER_FD_00, recVU_UPPER_FD_01, recVU_UPPER_FD_10, recVU_UPPER_FD_11,
};
void (*recVU_UPPER_FD_00_TABLE[32])(VURegs* VU, s32 info) =
{
recVUMI_ADDAx, recVUMI_SUBAx , recVUMI_MADDAx, recVUMI_MSUBAx,
recVUMI_ITOF0, recVUMI_FTOI0, recVUMI_MULAx , recVUMI_MULAq ,
recVUMI_ADDAq, recVUMI_SUBAq, recVUMI_ADDA , recVUMI_SUBA ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
};
void (*recVU_UPPER_FD_01_TABLE[32])(VURegs* VU, s32 info) =
{
recVUMI_ADDAy , recVUMI_SUBAy , recVUMI_MADDAy, recVUMI_MSUBAy,
recVUMI_ITOF4 , recVUMI_FTOI4 , recVUMI_MULAy , recVUMI_ABS ,
recVUMI_MADDAq, recVUMI_MSUBAq, recVUMI_MADDA , recVUMI_MSUBA ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
};
void (*recVU_UPPER_FD_10_TABLE[32])(VURegs* VU, s32 info) =
{
recVUMI_ADDAz , recVUMI_SUBAz , recVUMI_MADDAz, recVUMI_MSUBAz,
recVUMI_ITOF12, recVUMI_FTOI12, recVUMI_MULAz , recVUMI_MULAi ,
recVUMI_ADDAi, recVUMI_SUBAi , recVUMI_MULA , recVUMI_OPMULA,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
};
void (*recVU_UPPER_FD_11_TABLE[32])(VURegs* VU, s32 info) =
{
recVUMI_ADDAw , recVUMI_SUBAw , recVUMI_MADDAw, recVUMI_MSUBAw,
recVUMI_ITOF15, recVUMI_FTOI15, recVUMI_MULAw , recVUMI_CLIP ,
recVUMI_MADDAi, recVUMI_MSUBAi, recVUunknown , recVUMI_NOP ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
recVUunknown , recVUunknown , recVUunknown , recVUunknown ,
};
void recVU_UPPER_FD_00(VURegs* VU, s32 info)
{
recVU_UPPER_FD_00_TABLE[(VU->code >> 6) & 0x1f ](VU, info);
}
void recVU_UPPER_FD_01(VURegs* VU, s32 info)
{
recVU_UPPER_FD_01_TABLE[(VU->code >> 6) & 0x1f ](VU, info);
}
void recVU_UPPER_FD_10(VURegs* VU, s32 info)
{
recVU_UPPER_FD_10_TABLE[(VU->code >> 6) & 0x1f ](VU, info);
}
void recVU_UPPER_FD_11(VURegs* VU, s32 info)
{
recVU_UPPER_FD_11_TABLE[(VU->code >> 6) & 0x1f ](VU, info);
}
void recVULowerOP(VURegs* VU, s32 info)
{
recVULowerOP_OPCODE[ VU->code & 0x3f ](VU, info);
}
void recVULowerOP_T3_00(VURegs* VU, s32 info)
{
recVULowerOP_T3_00_OPCODE[(VU->code >> 6) & 0x1f ](VU, info);
}
void recVULowerOP_T3_01(VURegs* VU, s32 info)
{
recVULowerOP_T3_01_OPCODE[(VU->code >> 6) & 0x1f ](VU, info);
}
void recVULowerOP_T3_10(VURegs* VU, s32 info)
{
recVULowerOP_T3_10_OPCODE[(VU->code >> 6) & 0x1f ](VU, info);
}
void recVULowerOP_T3_11(VURegs* VU, s32 info)
{
recVULowerOP_T3_11_OPCODE[(VU->code >> 6) & 0x1f ](VU, info);
}
void recVUunknown(VURegs* VU, s32 info)
{
Console.Warning("Unknown SVU micromode opcode called");
}
// --------------------------------------------------------------------------------------
// recSuperVU0 Interface
// --------------------------------------------------------------------------------------
recSuperVU0::recSuperVU0()
{
m_Idx = 0;
IsInterpreter = false;
}
void recSuperVU0::Reserve()
{
SuperVUAlloc(0);
}
void recSuperVU0::Shutdown() noexcept
{
SuperVUDestroy( 0 );
}
void recSuperVU0::Reset()
{
SuperVUReset( 0 );
}
void recSuperVU0::Execute(u32 cycles)
{
if ((VU0.VI[REG_VPU_STAT].UL & 1) == 0) return;
runCycles = cycles;
SuperVUExecuteProgram(VU0.VI[REG_TPC].UL & 0xfff, 0);
}
void recSuperVU0::Clear(u32 Addr, u32 Size)
{
SuperVUClear(Addr, Size, 0);
}
uint recSuperVU0::GetCacheReserve() const
{
return sVU_EXESIZE / _1mb;
}
void recSuperVU0::SetCacheReserve( uint reserveInMegs ) const
{
//microVU0.cacheSize = reserveInMegs * _1mb;
}
// --------------------------------------------------------------------------------------
// recSuperVU1 Interface
// --------------------------------------------------------------------------------------
recSuperVU1::recSuperVU1()
{
m_Idx = 1;
IsInterpreter = false;
}
void recSuperVU1::Reserve()
{
SuperVUAlloc(1);
}
void recSuperVU1::Shutdown() noexcept
{
vu1Thread.WaitVU();
SuperVUDestroy( 1 );
}
void recSuperVU1::Reset()
{
vu1Thread.WaitVU();
SuperVUReset( 1 );
}
uint recSuperVU1::GetCacheReserve() const
{
return sVU_EXESIZE / _1mb;
}
void recSuperVU1::SetCacheReserve( uint reserveInMegs ) const
{
//microVU0.cacheSize = reserveInMegs * _1mb;
}
void recSuperVU1::Execute(u32 cycles)
{
if ((VU0.VI[REG_VPU_STAT].UL & 0x100) == 0) return;
pxAssert( (VU1.VI[REG_TPC].UL&7) == 0 );
// [TODO] Debugging pre- and post- hooks?
do { // while loop needed since not always will return finished
SuperVUExecuteProgram(VU1.VI[REG_TPC].UL & VU1_PROGMASK, 1);
} while( VU0.VI[REG_VPU_STAT].UL&0x100 );
}
void recSuperVU1::Clear(u32 Addr, u32 Size)
{
SuperVUClear(Addr, Size, 1);
}