mirror of https://github.com/PCSX2/pcsx2.git
3640 lines
109 KiB
C++
3640 lines
109 KiB
C++
/* Pcsx2 - Pc Ps2 Emulator
|
|
* Copyright (C) 2002-2005 Pcsx2 Team
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
|
|
// Super VU recompiler - author: zerofrog(@gmail.com)
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <float.h>
|
|
#include <malloc.h>
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
#include "Common.h"
|
|
#include "ix86/ix86.h"
|
|
#include "ir5900.h"
|
|
#include "iMMI.h"
|
|
#include "iCP0.h"
|
|
#include "VUmicro.h"
|
|
#include "iVUmicro.h"
|
|
#include "iVUops.h"
|
|
#include "VUops.h"
|
|
|
|
#include "iVUzerorec.h"
|
|
|
|
// temporary externs
|
|
extern u32 vudump;
|
|
extern void iDumpVU0Registers();
|
|
extern void iDumpVU1Registers();
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#include <vector>
|
|
#include <list>
|
|
#include <map>
|
|
#include <algorithm>
|
|
using namespace std;
|
|
|
|
#ifdef __MSCW32__
|
|
|
|
#pragma warning(disable:4244)
|
|
#pragma warning(disable:4761)
|
|
#endif
|
|
|
|
// SuperVURec options
|
|
#define SUPERVU_CACHING // vu programs are saved and queried via CRC (might query the wrong program)
|
|
// disable when in doubt
|
|
#define SUPERVU_X86CACHING // use x86reg caching (faster)
|
|
#define SUPERVU_INTERCACHING // registers won't be flushed at block boundaries (faster)
|
|
|
|
#define VU_EXESIZE 0x00800000
|
|
|
|
#define _Imm11_ ((s32)(vucode & 0x400 ? 0xfffffc00 | (vucode & 0x3ff) : vucode & 0x3ff)&0x3fff)
|
|
#define _UImm11_ ((s32)(vucode & 0x7ff)&0x3fff)
|
|
|
|
#define _Ft_ ((VU->code >> 16) & 0x1F) // The rt part of the instruction register
|
|
#define _Fs_ ((VU->code >> 11) & 0x1F) // The rd part of the instruction register
|
|
#define _Fd_ ((VU->code >> 6) & 0x1F) // The sa part of the instruction register
|
|
|
|
static const u32 QWaitTimes[] = { 6, 12 };
|
|
static const u32 PWaitTimes[] = { 53, 43, 28, 23, 17, 11, 10 };
|
|
|
|
static u32 s_vuInfo; // info passed into rec insts
|
|
|
|
static const u32 s_MemSize[2] = {VU0_MEMSIZE, VU1_MEMSIZE};
|
|
static char* s_recVUMem = NULL, *s_recVUPtr = NULL;
|
|
|
|
// tables
|
|
extern void (*recSVU_UPPER_OPCODE[64])();
|
|
extern void (*recSVU_LOWER_OPCODE[128])();
|
|
|
|
#define INST_Q_READ 0x0001 // flush Q
|
|
#define INST_P_READ 0x0002 // flush P
|
|
#define INST_BRANCH_DELAY 0x0004
|
|
#define INST_CLIP_WRITE 0x0040 // inst writes CLIP in the future
|
|
#define INST_STATUS_WRITE 0x0080
|
|
#define INST_MAC_WRITE 0x0100
|
|
#define INST_Q_WRITE 0x0200
|
|
#define INST_DUMMY_ 0x8000
|
|
#define INST_DUMMY 0x83c0
|
|
|
|
#define FORIT(it, v) for(it = (v).begin(); it != (v).end(); ++(it))
|
|
|
|
union VURecRegs
|
|
{
|
|
struct {
|
|
u16 reg;
|
|
u16 type;
|
|
};
|
|
u32 id;
|
|
};
|
|
|
|
class VuBaseBlock;
|
|
|
|
struct VuFunctionHeader
|
|
{
|
|
struct RANGE
|
|
{
|
|
u16 start, size;
|
|
u32 checksum[2]; // xor of all the code
|
|
};
|
|
|
|
VuFunctionHeader() : pprogfunc(NULL), startpc(0xffffffff) {}
|
|
|
|
// returns true if the checksum for the current mem is the same as this fn
|
|
bool IsSame(void* pmem);
|
|
|
|
u32 startpc;
|
|
void* pprogfunc;
|
|
|
|
vector<RANGE> ranges;
|
|
};
|
|
|
|
struct VuBlockHeader
|
|
{
|
|
VuBaseBlock* pblock;
|
|
u32 delay;
|
|
};
|
|
|
|
// one vu inst (lower and upper)
|
|
class VuInstruction
|
|
{
|
|
public:
|
|
VuInstruction() { memset(this, 0, sizeof(VuInstruction)); nParentPc = -1; }
|
|
|
|
int nParentPc; // used for syncing with flag writes, -1 for no parent
|
|
|
|
_vuopinfo info;
|
|
|
|
_VURegsNum regs[2]; // [0] - lower, [1] - upper
|
|
u32 livevars[2]; // live variables right before this inst, [0] - inst, [1] - float
|
|
u32 addvars[2]; // live variables to add
|
|
u32 usedvars[2]; // set if var is used in the future including vars used in this inst
|
|
u32 keepvars[2];
|
|
u16 pqcycles; // the number of cycles to stall if function writes to the regs
|
|
u16 type; // INST_
|
|
|
|
u32 pClipWrite, pMACWrite, pStatusWrite; // addrs to write the flags
|
|
u32 vffree[2];
|
|
s8 vfwrite[2], vfread0[2], vfread1[2], vfacc[2];
|
|
s8 vfflush[2]; // extra flush regs
|
|
|
|
int SetCachedRegs(int upper, u32 vuxyz);
|
|
void Recompile(list<VuInstruction>::const_iterator& itinst, u32 vuxyz);
|
|
};
|
|
|
|
#define BLOCKTYPE_EOP 0x01 // at least one of the children of the block contains eop (or the block itself)
|
|
#define BLOCKTYPE_FUNCTION 0x02
|
|
#define BLOCKTYPE_HASEOP 0x04 // last inst of block is an eop
|
|
#define BLOCKTYPE_MACFLAGS 0x08
|
|
#define BLOCKTYPE_ANALYZED 0x40
|
|
#define BLOCKTYPE_IGNORE 0x80 // special for recursive fns
|
|
#define BLOCKTYPE_ANALYZEDPARENT 0x100
|
|
|
|
// base block used when recompiling
|
|
class VuBaseBlock
|
|
{
|
|
public:
|
|
typedef list<VuBaseBlock*> LISTBLOCKS;
|
|
|
|
VuBaseBlock();
|
|
|
|
// returns true if the leads to a EOP (ALL VU blocks must ret true)
|
|
void AssignVFRegs();
|
|
void AssignVIRegs(int parent);
|
|
|
|
// returns true if only xyz of the reg has been used so far
|
|
u32 GetModeXYZW(u32 curpc, int vfreg);
|
|
|
|
list<VuInstruction>::iterator GetInstIterAtPc(int instpc)
|
|
{
|
|
assert( instpc >= 0 );
|
|
|
|
u32 curpc = startpc;
|
|
list<VuInstruction>::iterator it;
|
|
for(it = insts.begin(); it != insts.end(); ++it) {
|
|
if( it->type & INST_DUMMY )
|
|
continue;
|
|
|
|
if( curpc == instpc )
|
|
break;
|
|
curpc += 8;
|
|
}
|
|
|
|
assert( it != insts.end() );
|
|
return it;
|
|
}
|
|
|
|
VuInstruction* GetInstAtPc(int instpc) { return &(*GetInstIterAtPc(instpc)); }
|
|
|
|
void Recompile();
|
|
|
|
u16 type; // BLOCKTYPE_
|
|
u16 id;
|
|
u16 startpc;
|
|
u16 endpc; // first inst not in block
|
|
void* pcode; // x86 code pointer
|
|
int cycles;
|
|
list<VuInstruction> insts;
|
|
list<VuBaseBlock*> parents;
|
|
LISTBLOCKS blocks; // blocks branches to
|
|
u32* pChildJumps[4]; // addrs that need to be filled with the children's start addrs
|
|
// if highest bit is set, addr needs to be relational
|
|
u32 vuxyz; // corresponding bit is set if reg's xyz channels are used only
|
|
u32 vuxy; // corresponding bit is set if reg's xyz channels are used only
|
|
|
|
_xmmregs startregs[XMMREGS], endregs[XMMREGS];
|
|
int nStartx86, nEndx86; // indices into s_vecRegArray
|
|
|
|
int allocX86Regs;
|
|
};
|
|
|
|
VuBaseBlock::VuBaseBlock()
|
|
{
|
|
type = 0; endpc = 0; cycles = 0; pcode = NULL; id = 0;
|
|
memset(pChildJumps, 0, sizeof(pChildJumps));
|
|
memset(startregs, 0, sizeof(startregs));
|
|
memset(endregs, 0, sizeof(endregs));
|
|
allocX86Regs = nStartx86 = nEndx86 = -1;
|
|
}
|
|
|
|
#define SUPERVU_STACKSIZE 0x1000
|
|
|
|
static list<VuFunctionHeader*> s_listVUHeaders[2];
|
|
static list<VuFunctionHeader*>* s_plistCachedHeaders[2];
|
|
static VuFunctionHeader** recVUHeaders[2] = {NULL};
|
|
static VuBlockHeader* recVUBlocks[2] = {NULL};
|
|
static u8* recVUStack = NULL, *recVUStackPtr = NULL;
|
|
static vector<_x86regs> s_vecRegArray(128);
|
|
|
|
static VURegs* VU = NULL;
|
|
static list<VuBaseBlock*> s_listBlocks;
|
|
static u32 s_vu = 0;
|
|
|
|
// Global functions
|
|
static void* SuperVUGetProgram(u32 startpc, int vuindex);
|
|
static VuFunctionHeader* SuperVURecompileProgram(u32 startpc, int vuindex);
|
|
static VuBaseBlock* SuperVUBuildBlocks(VuBaseBlock* parent, u32 startpc);
|
|
static void SuperVUInitLiveness(VuBaseBlock* pblock);
|
|
static void SuperVULivenessAnalysis();
|
|
static void SuperVUEliminateDeadCode();
|
|
static void SuperVUAssignRegs();
|
|
|
|
//void SuperVUFreeXMMreg(int xmmreg, int xmmtype, int reg);
|
|
#define SuperVUFreeXMMreg 0&&
|
|
void SuperVUFreeXMMregs(u32* livevars);
|
|
|
|
static u32* SuperVUStaticAlloc(u32 size);
|
|
static void SuperVURecompile();
|
|
|
|
// allocate VU resources
|
|
void SuperVUInit(int vuindex)
|
|
{
|
|
if( vuindex < 0 ) {
|
|
s_recVUMem = (char*)SysMmap(0, VU_EXESIZE);
|
|
memset(s_recVUMem, 0xcd, VU_EXESIZE);
|
|
s_recVUPtr = s_recVUMem;
|
|
recVUStack = new u8[SUPERVU_STACKSIZE * 4];
|
|
}
|
|
else {
|
|
recVUHeaders[vuindex] = new VuFunctionHeader* [s_MemSize[vuindex]/8];
|
|
recVUBlocks[vuindex] = new VuBlockHeader[s_MemSize[vuindex]/8];
|
|
s_plistCachedHeaders[vuindex] = new list<VuFunctionHeader*>[s_MemSize[vuindex]/8];
|
|
}
|
|
}
|
|
|
|
// destroy VU resources
|
|
void SuperVUDestroy(int vuindex)
|
|
{
|
|
list<VuFunctionHeader*>::iterator it;
|
|
|
|
if( vuindex < 0 ) {
|
|
SuperVUDestroy(0);
|
|
SuperVUDestroy(1);
|
|
SysMunmap((uptr)s_recVUMem, VU_EXESIZE);
|
|
s_recVUPtr = NULL;
|
|
delete[] recVUStack; recVUStack = NULL;
|
|
}
|
|
else {
|
|
delete[] recVUHeaders[vuindex]; recVUHeaders[vuindex] = NULL;
|
|
delete[] recVUBlocks[vuindex]; recVUBlocks[vuindex] = NULL;
|
|
|
|
if( s_plistCachedHeaders[vuindex] != NULL ) {
|
|
for(u32 j = 0; j < s_MemSize[vuindex]/8; ++j) {
|
|
FORIT(it, s_plistCachedHeaders[vuindex][j]) delete *it;
|
|
s_plistCachedHeaders[vuindex][j].clear();
|
|
}
|
|
delete[] s_plistCachedHeaders[vuindex]; s_plistCachedHeaders[vuindex] = NULL;
|
|
}
|
|
|
|
FORIT(it, s_listVUHeaders[vuindex]) delete *it;
|
|
s_listVUHeaders[vuindex].clear();
|
|
}
|
|
}
|
|
|
|
// reset VU
|
|
void SuperVUReset(int vuindex)
|
|
{
|
|
list<VuFunctionHeader*>::iterator it;
|
|
|
|
if( vuindex < 0 ) {
|
|
SuperVUReset(0);
|
|
SuperVUReset(1);
|
|
|
|
//memset(s_recVUMem, 0xcd, VU_EXESIZE);
|
|
s_recVUPtr = s_recVUMem;
|
|
|
|
memset(recVUStack, 0, SUPERVU_STACKSIZE);
|
|
}
|
|
else {
|
|
if( recVUHeaders[vuindex] ) memset( recVUHeaders[vuindex], 0, sizeof(VuFunctionHeader*) * (s_MemSize[vuindex]/8) );
|
|
if( recVUBlocks[vuindex] ) memset( recVUBlocks[vuindex], 0, sizeof(VuBlockHeader) * (s_MemSize[vuindex]/8) );
|
|
|
|
if( s_plistCachedHeaders[vuindex] != NULL ) {
|
|
for(u32 j = 0; j < s_MemSize[vuindex]/8; ++j) {
|
|
FORIT(it, s_plistCachedHeaders[vuindex][j]) delete *it;
|
|
s_plistCachedHeaders[vuindex][j].clear();
|
|
}
|
|
}
|
|
|
|
FORIT(it, s_listVUHeaders[vuindex]) delete *it;
|
|
s_listVUHeaders[vuindex].clear();
|
|
}
|
|
}
|
|
|
|
// clear the block and any joining blocks
|
|
void SuperVUClear(u32 startpc, u32 size, int vuindex)
|
|
{
|
|
vector<VuFunctionHeader::RANGE>::iterator itrange;
|
|
list<VuFunctionHeader*>::iterator it = s_listVUHeaders[vuindex].begin();
|
|
u32 endpc = startpc+size;
|
|
while( it != s_listVUHeaders[vuindex].end() ) {
|
|
|
|
// for every fn, check if it has code in the range
|
|
FORIT(itrange, (*it)->ranges) {
|
|
if( startpc < (u32)itrange->start+itrange->size && itrange->start < endpc )
|
|
break;
|
|
}
|
|
|
|
if( itrange != (*it)->ranges.end() ) {
|
|
recVUHeaders[vuindex][(*it)->startpc/8] = NULL;
|
|
#ifdef SUPERVU_CACHING
|
|
list<VuFunctionHeader*>* plist = &s_plistCachedHeaders[vuindex][(*it)->startpc/8];
|
|
plist->push_back(*it);
|
|
if( plist->size() > 10 ) {
|
|
// list is too big, delete
|
|
delete plist->front();
|
|
plist->pop_front();
|
|
}
|
|
it = s_listVUHeaders[vuindex].erase(it);
|
|
#else
|
|
delete *it;
|
|
it = s_listVUHeaders[vuindex].erase(it);
|
|
#endif
|
|
}
|
|
else ++it;
|
|
}
|
|
}
|
|
|
|
static VuFunctionHeader* s_pFnHeader = NULL;
|
|
static VuBaseBlock* s_pCurBlock = NULL;
|
|
static VuInstruction* s_pCurInst = NULL;
|
|
static u32 s_StatusRead = 0, s_MACRead = 0, s_ClipRead = 0; // read addrs
|
|
static u32 s_PrevStatusWrite = 0, s_PrevClipWrite = 0, s_PrevIWrite = 0;
|
|
static u32 s_WriteToReadQ = 0;
|
|
|
|
int SuperVUGetLiveness(int vfreg)
|
|
{
|
|
assert( s_pCurInst != NULL );
|
|
if( vfreg == 32 ) return ((s_pCurInst->livevars[0]&(1<<REG_ACC_FLAG))?1:0)|((s_pCurInst->usedvars[0]&(1<<REG_ACC_FLAG))?2:0);
|
|
else if( vfreg == 0 ) return ((s_pCurInst->livevars[0]&(1<<REG_VF0_FLAG))?1:0)|((s_pCurInst->usedvars[0]&(1<<REG_VF0_FLAG))?2:0);
|
|
|
|
return ((s_pCurInst->livevars[1]&(1<<vfreg))?1:0)|((s_pCurInst->usedvars[1]&(1<<vfreg))?2:0);
|
|
}
|
|
|
|
u32 SuperVUGetVIAddr(int reg, int read)
|
|
{
|
|
assert( s_pCurInst != NULL );
|
|
|
|
switch(reg) {
|
|
case REG_STATUS_FLAG:
|
|
{
|
|
u32 addr = (read==2) ? s_PrevStatusWrite : (read ? s_StatusRead : s_pCurInst->pStatusWrite);
|
|
assert(!read || addr != 0);
|
|
return addr;
|
|
}
|
|
case REG_MAC_FLAG:
|
|
{
|
|
return read ? s_MACRead : s_pCurInst->pMACWrite;
|
|
}
|
|
case REG_CLIP_FLAG:
|
|
{
|
|
u32 addr = (read==2) ? s_PrevClipWrite : (read ? s_ClipRead : s_pCurInst->pClipWrite);
|
|
assert( !read || addr != 0 );
|
|
return addr;
|
|
}
|
|
case REG_Q: return (read || s_WriteToReadQ) ? (int)&VU->VI[REG_Q] : (u32)(u32)&VU->q;
|
|
case REG_P: return read ? (int)&VU->VI[REG_P] : (u32)(u32)&VU->p;
|
|
case REG_I: return s_PrevIWrite;
|
|
}
|
|
|
|
return (u32)&VU->VI[reg];
|
|
}
|
|
|
|
void SuperVUDumpBlock(list<VuBaseBlock*>& blocks, int vuindex)
|
|
{
|
|
FILE *f;
|
|
char filename[ 256 ], str[256];
|
|
u32 *mem;
|
|
u32 i;
|
|
static int gid = 0;
|
|
|
|
#ifdef __WIN32__
|
|
CreateDirectory("dumps", NULL);
|
|
sprintf( filename, "dumps\\svu%c_%.4X.txt", s_vu?'1':'0', s_pFnHeader->startpc );
|
|
#else
|
|
mkdir("dumps", 0755);
|
|
sprintf( filename, "dumps/svu%c_%.4X.txt", s_vu?'1':'0', s_pFnHeader->startpc );
|
|
#endif
|
|
//SysPrintf( "dump1 %x => %s\n", s_pFnHeader->startpc, filename );
|
|
|
|
f = fopen( filename, "w" );
|
|
|
|
fprintf(f, "Format: upper_inst lower_inst\ntype f:vf_live_vars vf_used_vars i:vi_live_vars vi_used_vars inst_cycle pq_inst\n");
|
|
fprintf(f, "Type: %.2x - qread, %.2x - pread, %.2x - clip_write, %.2x - status_write\n"
|
|
"%.2x - mac_write, %.2x -qflush\n",
|
|
INST_Q_READ, INST_P_READ, INST_CLIP_WRITE, INST_STATUS_WRITE, INST_MAC_WRITE, INST_Q_WRITE);
|
|
fprintf(f, "XMM: Upper: read0 read1 write acc temp; Lower: read0 read1 write acc temp\n\n");
|
|
|
|
list<VuBaseBlock*>::iterator itblock;
|
|
list<VuInstruction>::iterator itinst;
|
|
VuBaseBlock::LISTBLOCKS::iterator itchild;
|
|
|
|
FORIT(itblock, blocks) {
|
|
fprintf(f, "block:%c %x-%x; children: ", ((*itblock)->type&BLOCKTYPE_HASEOP)?'*':' ',
|
|
(*itblock)->startpc, (*itblock)->endpc-8);
|
|
FORIT(itchild, (*itblock)->blocks) {
|
|
fprintf(f, "%x ", (*itchild)->startpc);
|
|
}
|
|
fprintf(f, "; vuxyz = %x, vuxy = %x\n", (*itblock)->vuxyz&(*itblock)->insts.front().usedvars[1],
|
|
(*itblock)->vuxy&(*itblock)->insts.front().usedvars[1]);
|
|
|
|
itinst = (*itblock)->insts.begin();
|
|
i = (*itblock)->startpc;
|
|
while(itinst != (*itblock)->insts.end() ) {
|
|
assert( i <= (*itblock)->endpc );
|
|
if( itinst->type & INST_DUMMY ) {
|
|
if( itinst->nParentPc >= 0 && !(itinst->type&INST_DUMMY_)) {
|
|
// search for the parent
|
|
VuInstruction* p = (*itblock)->GetInstAtPc(itinst->nParentPc);
|
|
fprintf(f, "writeback 0x%x (%x)\n", itinst->type, itinst->nParentPc);
|
|
}
|
|
}
|
|
else {
|
|
mem = (u32*)&VU->Micro[i];
|
|
char* pstr = disVU1MicroUF( mem[1], i+4 );
|
|
fprintf(f, "%.4x: %-40s", i, pstr);
|
|
if( mem[1] & 0x80000000 ) fprintf(f, " I=%f(%.8x)\n", *(float*)mem, mem[0]);
|
|
else fprintf(f, "%s\n", disVU1MicroLF( mem[0], i ));
|
|
i += 8;
|
|
}
|
|
|
|
++itinst;
|
|
}
|
|
|
|
fprintf(f, "\n");
|
|
|
|
_x86regs* pregs;
|
|
if( (*itblock)->nStartx86 >= 0 || (*itblock)->nEndx86 >= 0 ) {
|
|
fprintf(f, "X86: AX CX DX BX SP BP SI DI\n");
|
|
}
|
|
|
|
if( (*itblock)->nStartx86 >= 0 ) {
|
|
pregs = &s_vecRegArray[(*itblock)->nStartx86];
|
|
fprintf(f, "STR: ");
|
|
for(i = 0; i < X86REGS; ++i) {
|
|
if( pregs[i].inuse ) fprintf(f, "%.2d ", pregs[i].reg);
|
|
else fprintf(f, "-1 ");
|
|
}
|
|
fprintf(f, "\n");
|
|
}
|
|
|
|
if( (*itblock)->nEndx86 >= 0 ) {
|
|
fprintf(f, "END: ");
|
|
pregs = &s_vecRegArray[(*itblock)->nEndx86];
|
|
for(i = 0; i < X86REGS; ++i) {
|
|
if( pregs[i].inuse ) fprintf(f, "%.2d ", pregs[i].reg);
|
|
else fprintf(f, "-1 ");
|
|
}
|
|
fprintf(f, "\n");
|
|
}
|
|
|
|
itinst = (*itblock)->insts.begin();
|
|
for ( i = (*itblock)->startpc; i < (*itblock)->endpc; ++itinst ) {
|
|
|
|
if( itinst->type & INST_DUMMY ) {
|
|
}
|
|
else {
|
|
sprintf(str, "%.4x:%x f:%.8x_%.8x", i, itinst->type, itinst->livevars[1], itinst->usedvars[1]);
|
|
fprintf(f, "%-46s i:%.8x_%.8x c:%d pq:%d\n", str,
|
|
itinst->livevars[0], itinst->usedvars[0], (int)itinst->info.cycle, (int)itinst->pqcycles );
|
|
|
|
sprintf(str, "XMM r0:%d r1:%d w:%d a:%d t:%x;",
|
|
itinst->vfread0[1], itinst->vfread1[1], itinst->vfwrite[1], itinst->vfacc[1], itinst->vffree[1]);
|
|
fprintf(f, "%-46s r0:%d r1:%d w:%d a:%d t:%x\n", str,
|
|
itinst->vfread0[0], itinst->vfread1[0], itinst->vfwrite[0], itinst->vfacc[0], itinst->vffree[0]);
|
|
i += 8;
|
|
}
|
|
}
|
|
|
|
fprintf(f, "\n---------------\n");
|
|
}
|
|
|
|
fclose( f );
|
|
}
|
|
|
|
static LARGE_INTEGER svubase, svufinal;
|
|
static u32 svutime;
|
|
|
|
// uncomment to count svu exec time
|
|
//#define SUPERVU_COUNT
|
|
u32 SuperVUGetRecTimes(int clear)
|
|
{
|
|
u32 temp = svutime;
|
|
if( clear ) svutime = 0;
|
|
return temp;
|
|
}
|
|
|
|
// Private methods
|
|
static void* SuperVUGetProgram(u32 startpc, int vuindex)
|
|
{
|
|
assert( startpc < s_MemSize[vuindex] );
|
|
assert( (startpc%8) == 0 );
|
|
assert( recVUHeaders[vuindex] != NULL );
|
|
VuFunctionHeader** pheader = &recVUHeaders[vuindex][startpc/8];
|
|
|
|
if( *pheader == NULL ) {
|
|
#ifdef _DEBUG
|
|
// if( vuindex ) VU1.VI[REG_TPC].UL = startpc;
|
|
// else VU0.VI[REG_TPC].UL = startpc;
|
|
// __Log("VU: %x\n", startpc);
|
|
// iDumpVU1Registers();
|
|
// vudump |= 2;
|
|
#endif
|
|
|
|
// measure run time
|
|
//QueryPerformanceCounter(&svubase);
|
|
|
|
#ifdef SUPERVU_CACHING
|
|
void* pmem = (vuindex&1) ? VU1.Micro : VU0.Micro;
|
|
// check if program exists in cache
|
|
list<VuFunctionHeader*>::iterator it;
|
|
FORIT(it, s_plistCachedHeaders[vuindex][startpc/8]) {
|
|
if( (*it)->IsSame(pmem) ) {
|
|
// found, transfer to regular lists
|
|
void* pfn = (*it)->pprogfunc;
|
|
recVUHeaders[vuindex][startpc/8] = *it;
|
|
s_listVUHeaders[vuindex].push_back(*it);
|
|
s_plistCachedHeaders[vuindex][startpc/8].erase(it);
|
|
return pfn;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
*pheader = SuperVURecompileProgram(startpc, vuindex);
|
|
|
|
//QueryPerformanceCounter(&svufinal);
|
|
//svutime += (u32)(svufinal.QuadPart-svubase.QuadPart);
|
|
|
|
assert( (*pheader)->pprogfunc != NULL );
|
|
}
|
|
|
|
assert( (*pheader)->startpc == startpc );
|
|
|
|
return (*pheader)->pprogfunc;
|
|
}
|
|
|
|
bool VuFunctionHeader::IsSame(void* pmem)
|
|
{
|
|
#ifdef SUPERVU_CACHING
|
|
u32 checksum[2];
|
|
vector<RANGE>::iterator it;
|
|
FORIT(it, ranges) {
|
|
memxor_mmx(checksum, (u8*)pmem+it->start, it->size);
|
|
if( checksum[0] != it->checksum[0] || checksum[1] != it->checksum[1] )
|
|
return false;
|
|
}
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
static VuFunctionHeader* SuperVURecompileProgram(u32 startpc, int vuindex)
|
|
{
|
|
assert( vuindex < 2 );
|
|
assert( s_recVUPtr != NULL );
|
|
//SysPrintf("svu%c rec: %x\n", '0'+vuindex, startpc);
|
|
|
|
// if recPtr reached the mem limit reset whole mem
|
|
if ( ( (u32)s_recVUPtr - (u32)s_recVUMem ) >= VU_EXESIZE-0x40000 ) {
|
|
//SysPrintf("SuperVU reset mem\n");
|
|
SuperVUReset(-1);
|
|
}
|
|
|
|
list<VuBaseBlock*>::iterator itblock;
|
|
|
|
s_vu = vuindex;
|
|
VU = s_vu ? &VU1 : &VU0;
|
|
s_pFnHeader = new VuFunctionHeader();
|
|
s_listVUHeaders[vuindex].push_back(s_pFnHeader);
|
|
s_pFnHeader->startpc = startpc;
|
|
|
|
memset( recVUBlocks[s_vu], 0, sizeof(VuBlockHeader) * (s_MemSize[s_vu]/8) );
|
|
|
|
// analyze the global graph
|
|
s_listBlocks.clear();
|
|
SuperVUBuildBlocks(NULL, startpc);
|
|
|
|
// fill parents
|
|
VuBaseBlock::LISTBLOCKS::iterator itchild;
|
|
FORIT(itblock, s_listBlocks) {
|
|
FORIT(itchild, (*itblock)->blocks)
|
|
(*itchild)->parents.push_back(*itblock);
|
|
|
|
//(*itblock)->type &= ~(BLOCKTYPE_IGNORE|BLOCKTYPE_ANALYZED);
|
|
}
|
|
|
|
assert( s_listBlocks.front()->startpc == startpc );
|
|
s_listBlocks.front()->type |= BLOCKTYPE_FUNCTION;
|
|
|
|
FORIT(itblock, s_listBlocks) {
|
|
SuperVUInitLiveness(*itblock);
|
|
}
|
|
|
|
SuperVULivenessAnalysis();
|
|
SuperVUEliminateDeadCode();
|
|
SuperVUAssignRegs();
|
|
|
|
#ifdef _DEBUG
|
|
if( (s_vu && (vudump&1)) || (!s_vu && (vudump&16)) )
|
|
SuperVUDumpBlock(s_listBlocks, s_vu);
|
|
#endif
|
|
|
|
// code generation
|
|
x86SetPtr(s_recVUPtr);
|
|
_initXMMregs();
|
|
branch = 0;
|
|
|
|
SuperVURecompile();
|
|
|
|
s_recVUPtr = x86Ptr;
|
|
|
|
// set the function's range
|
|
VuFunctionHeader::RANGE r;
|
|
s_pFnHeader->ranges.reserve(s_listBlocks.size());
|
|
|
|
FORIT(itblock, s_listBlocks) {
|
|
r.start = (*itblock)->startpc;
|
|
r.size = (*itblock)->endpc-(*itblock)->startpc;
|
|
#ifdef SUPERVU_CACHING
|
|
memxor_mmx(r.checksum, &VU->Micro[r.start], r.size);
|
|
#endif
|
|
s_pFnHeader->ranges.push_back(r);
|
|
}
|
|
|
|
// destroy
|
|
for(list<VuBaseBlock*>::iterator itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); ++itblock) {
|
|
delete *itblock;
|
|
}
|
|
s_listBlocks.clear();
|
|
|
|
assert( s_recVUPtr < s_recVUMem+VU_EXESIZE );
|
|
|
|
return s_pFnHeader;
|
|
}
|
|
|
|
static int _recbranchAddr(u32 vucode) {
|
|
u32 bpc = pc + (_Imm11_ << 3);
|
|
if (bpc < 0) {
|
|
bpc = pc + (_UImm11_ << 3);
|
|
}
|
|
bpc &= (s_MemSize[s_vu]-1);
|
|
|
|
return bpc;
|
|
}
|
|
|
|
struct WRITEBACK
|
|
{
|
|
void InitInst(VuInstruction* pinst, int cycle)
|
|
{
|
|
u32 write = viwrite[0]|viwrite[1];
|
|
pinst->type = ((write&(1<<REG_CLIP_FLAG))?INST_CLIP_WRITE:0)|
|
|
((write&(1<<REG_MAC_FLAG))?INST_MAC_WRITE:0)|
|
|
((write&(1<<REG_STATUS_FLAG))?INST_STATUS_WRITE:0)|
|
|
((write&(1<<REG_Q))?INST_Q_WRITE:0);
|
|
pinst->nParentPc = nParentPc;
|
|
pinst->info.cycle = cycle;
|
|
for(int i = 0; i < 2; ++i) {
|
|
pinst->regs[i].VIwrite = viwrite[i];
|
|
pinst->regs[i].VIread = viread[i];
|
|
}
|
|
}
|
|
|
|
static int SortWritebacks(const WRITEBACK& w1, const WRITEBACK& w2) {
|
|
return w1.cycle < w2.cycle;
|
|
}
|
|
|
|
int nParentPc;
|
|
int cycle;
|
|
u32 viwrite[2];
|
|
u32 viread[2];
|
|
};
|
|
|
|
// return inst that flushes everything
|
|
static VuInstruction SuperVUFlushInst()
|
|
{
|
|
VuInstruction inst;
|
|
// don't need to raed q/p
|
|
inst.type = INST_DUMMY_;//|INST_Q_READ|INST_P_READ;
|
|
return inst;
|
|
}
|
|
|
|
static VuBaseBlock* SuperVUBuildBlocks(VuBaseBlock* parent, u32 startpc)
|
|
{
|
|
// check if block already exists
|
|
VuBlockHeader* pbh = &recVUBlocks[s_vu][startpc/8];
|
|
if( pbh->pblock != NULL ) {
|
|
VuBaseBlock* pblock = pbh->pblock;
|
|
if( pblock->startpc == startpc )
|
|
return pblock;
|
|
|
|
// have to divide the blocks, pnewblock is first block
|
|
assert( startpc > pblock->startpc );
|
|
assert( startpc < pblock->endpc );
|
|
|
|
u32 dummyinst = (startpc-pblock->startpc)>>3;
|
|
|
|
// count inst non-dummy insts
|
|
list<VuInstruction>::iterator itinst = pblock->insts.begin();
|
|
u32 inst = 0;
|
|
while(dummyinst > 0) {
|
|
if( itinst->type & INST_DUMMY )
|
|
++itinst;
|
|
else {
|
|
++itinst;
|
|
--dummyinst;
|
|
}
|
|
}
|
|
|
|
// NOTE: still leaves insts with their writebacks in different blocks
|
|
while( itinst->type & INST_DUMMY )
|
|
++itinst;
|
|
|
|
int cycleoff = itinst->info.cycle;
|
|
|
|
// new block
|
|
VuBaseBlock* pnewblock = new VuBaseBlock();
|
|
s_listBlocks.push_back(pnewblock);
|
|
|
|
pnewblock->startpc = startpc;
|
|
pnewblock->endpc = pblock->endpc;
|
|
pnewblock->cycles = pblock->cycles-cycleoff;
|
|
|
|
pnewblock->blocks.splice(pnewblock->blocks.end(), pblock->blocks);
|
|
pnewblock->insts.splice(pnewblock->insts.end(), pblock->insts, itinst, pblock->insts.end());
|
|
pnewblock->type = pblock->type;
|
|
|
|
// any writebacks in the next 3 cycles also belong to original block
|
|
for(itinst = pnewblock->insts.begin(); itinst != pnewblock->insts.end(); ) {
|
|
if( (itinst->type & INST_DUMMY) && itinst->nParentPc >= 0 && itinst->nParentPc < (int)startpc ) {
|
|
|
|
if( !(itinst->type & INST_Q_WRITE) )
|
|
pblock->insts.push_back(*itinst);
|
|
itinst = pnewblock->insts.erase(itinst);
|
|
continue;
|
|
}
|
|
|
|
++itinst;
|
|
}
|
|
|
|
pbh = &recVUBlocks[s_vu][startpc/8];
|
|
for(u32 inst = startpc; inst < pblock->endpc; inst += 8) {
|
|
if( pbh->pblock == pblock )
|
|
pbh->pblock = pnewblock;
|
|
++pbh;
|
|
}
|
|
|
|
FORIT(itinst, pnewblock->insts)
|
|
itinst->info.cycle -= cycleoff;
|
|
|
|
// old block
|
|
pblock->blocks.push_back(pnewblock);
|
|
pblock->endpc = startpc;
|
|
pblock->cycles = cycleoff;
|
|
pblock->type &= BLOCKTYPE_MACFLAGS;;
|
|
//pblock->insts.push_back(SuperVUFlushInst()); //don't need
|
|
|
|
return pnewblock;
|
|
}
|
|
|
|
VuBaseBlock* pblock = new VuBaseBlock();
|
|
s_listBlocks.push_back(pblock);
|
|
|
|
int i = 0;
|
|
branch = 0;
|
|
pc = startpc;
|
|
pblock->startpc = startpc;
|
|
|
|
// clear stalls (might be a prob)
|
|
memset(VU->fmac,0,sizeof(VU->fmac));
|
|
memset(&VU->fdiv,0,sizeof(VU->fdiv));
|
|
memset(&VU->efu,0,sizeof(VU->efu));
|
|
vucycle = 0;
|
|
|
|
u8 macflags = 0;
|
|
|
|
list< WRITEBACK > listWritebacks;
|
|
list< WRITEBACK >::iterator itwriteback;
|
|
list<VuInstruction>::iterator itinst;
|
|
u32 hasSecondBranch = 0;
|
|
u32 needFullStatusFlag = 0;
|
|
|
|
// first analysis pass for status flags
|
|
while(1) {
|
|
u32* ptr = (u32*)&VU->Micro[pc];
|
|
pc += 8;
|
|
int prevbranch = branch;
|
|
|
|
if( ptr[1] & 0x40000000 )
|
|
branch = 1;
|
|
|
|
if( !(ptr[1] & 0x80000000) ) { // not I
|
|
switch( ptr[0]>>25 ) {
|
|
case 0x24: // jr
|
|
case 0x25: // jalr
|
|
case 0x20: // B
|
|
case 0x21: // BAL
|
|
case 0x28: // IBEQ
|
|
case 0x2f: // IBGEZ
|
|
case 0x2d: // IBGTZ
|
|
case 0x2e: // IBLEZ
|
|
case 0x2c: // IBLTZ
|
|
case 0x29: // IBNE
|
|
branch = 1;
|
|
break;
|
|
|
|
case 0x14: // fseq
|
|
case 0x17: // fsor
|
|
//needFullStatusFlag = 2;
|
|
break;
|
|
|
|
case 0x16: // fsand
|
|
if( (ptr[0]&0xc0) ) {
|
|
// sometimes full sticky bits are needed (simple series 2000 - oane chapara)
|
|
//SysPrintf("needSticky: %x-%x\n", s_pFnHeader->startpc, startpc);
|
|
needFullStatusFlag = 2;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
if( prevbranch )
|
|
break;
|
|
|
|
assert( pc < s_MemSize[s_vu] );
|
|
}
|
|
|
|
// second full pass
|
|
pc = startpc;
|
|
branch = 0;
|
|
|
|
while(1) {
|
|
|
|
if( !branch && pbh->pblock != NULL ) {
|
|
pblock->blocks.push_back(pbh->pblock);
|
|
break;
|
|
}
|
|
|
|
int prevbranch = branch;
|
|
|
|
if( !prevbranch ) {
|
|
pbh->pblock = pblock;
|
|
}
|
|
else assert( prevbranch || pbh->pblock == NULL);
|
|
|
|
pblock->insts.push_back(VuInstruction());
|
|
|
|
VuInstruction* pinst = &pblock->insts.back();
|
|
SuperVUAnalyzeOp(VU, &pinst->info, pinst->regs);
|
|
|
|
if( prevbranch ) {
|
|
if( pinst->regs[0].pipe == VUPIPE_BRANCH )
|
|
hasSecondBranch = 1;
|
|
pinst->type |= INST_BRANCH_DELAY;
|
|
}
|
|
|
|
// check write back
|
|
for(itwriteback = listWritebacks.begin(); itwriteback != listWritebacks.end(); ) {
|
|
if( pinst->info.cycle >= itwriteback->cycle ) {
|
|
itinst = pblock->insts.insert(--pblock->insts.end(), VuInstruction());
|
|
itwriteback->InitInst(&(*itinst), pinst->info.cycle);
|
|
itwriteback = listWritebacks.erase(itwriteback);
|
|
}
|
|
else ++itwriteback;
|
|
}
|
|
|
|
// add new writebacks
|
|
WRITEBACK w = {0};
|
|
const u32 allflags = (1<<REG_CLIP_FLAG)|(1<<REG_MAC_FLAG)|(1<<REG_STATUS_FLAG);
|
|
for(int j = 0; j < 2; ++j) w.viwrite[j] = pinst->regs[j].VIwrite & allflags;
|
|
|
|
if( pinst->info.macflag & VUOP_WRITE ) w.viwrite[1] |= (1<<REG_MAC_FLAG);
|
|
if( pinst->info.statusflag & VUOP_WRITE ) w.viwrite[1] |= (1<<REG_STATUS_FLAG);
|
|
|
|
if( (pinst->info.macflag|pinst->info.statusflag) & VUOP_READ )
|
|
macflags = 1;
|
|
if( pinst->regs[0].VIread & ((1<<REG_MAC_FLAG)|(1<<REG_STATUS_FLAG)) )
|
|
macflags = 1;
|
|
|
|
//uregs->VIwrite |= lregs->VIwrite & (1<<REG_STATUS_FLAG);
|
|
|
|
if( w.viwrite[0]|w.viwrite[1] ) {
|
|
|
|
// only if coming from fmac pipeline
|
|
if( ((pinst->info.statusflag&VUOP_WRITE)&&!(pinst->regs[0].VIwrite&(1<<REG_STATUS_FLAG))) && needFullStatusFlag ) {
|
|
// don't read if first inst
|
|
if( needFullStatusFlag == 1 )
|
|
w.viread[1] |= (1<<REG_STATUS_FLAG);
|
|
else --needFullStatusFlag;
|
|
}
|
|
|
|
for(int j = 0; j < 2; ++j) {
|
|
w.viread[j] |= pinst->regs[j].VIread & allflags;
|
|
|
|
if( (pinst->regs[j].VIread&(1<<REG_STATUS_FLAG)) && (pinst->regs[j].VIwrite&(1<<REG_STATUS_FLAG)) ) {
|
|
// don't need the read anymore
|
|
pinst->regs[j].VIread &= ~(1<<REG_STATUS_FLAG);
|
|
}
|
|
if( (pinst->regs[j].VIread&(1<<REG_MAC_FLAG)) && (pinst->regs[j].VIwrite&(1<<REG_MAC_FLAG)) ) {
|
|
// don't need the read anymore
|
|
pinst->regs[j].VIread &= ~(1<<REG_MAC_FLAG);
|
|
}
|
|
|
|
pinst->regs[j].VIwrite &= ~allflags;
|
|
}
|
|
|
|
if( pinst->info.macflag & VUOP_READ) w.viread[1] |= 1<<REG_MAC_FLAG;
|
|
if( pinst->info.statusflag & VUOP_READ) w.viread[1] |= 1<<REG_STATUS_FLAG;
|
|
|
|
w.nParentPc = pc-8;
|
|
w.cycle = pinst->info.cycle+4;
|
|
listWritebacks.push_back(w);
|
|
}
|
|
|
|
if( pinst->info.q&VUOP_READ ) pinst->type |= INST_Q_READ;
|
|
if( pinst->info.p&VUOP_READ ) pinst->type |= INST_P_READ;
|
|
|
|
if( pinst->info.q&VUOP_WRITE ) {
|
|
pinst->pqcycles = QWaitTimes[pinst->info.pqinst];
|
|
|
|
memset(&w, 0, sizeof(w));
|
|
w.nParentPc = pc-8;
|
|
w.cycle = pinst->info.cycle+pinst->pqcycles+1;
|
|
w.viwrite[0] = 1<<REG_Q;
|
|
listWritebacks.push_back(w);
|
|
}
|
|
if( pinst->info.p&VUOP_WRITE )
|
|
pinst->pqcycles = PWaitTimes[pinst->info.pqinst];
|
|
|
|
if( prevbranch )
|
|
break;
|
|
|
|
// make sure there is always a branch
|
|
if( (s_vu==1 && i >= 0x798) || (s_vu==0 && i >= 0x198) ) {
|
|
SysPrintf("VuRec base block doesn't terminate!\n");
|
|
break;
|
|
}
|
|
|
|
i++;
|
|
pbh++;
|
|
}
|
|
|
|
if( listWritebacks.size() > 0 ) {
|
|
listWritebacks.sort(WRITEBACK::SortWritebacks);
|
|
for(itwriteback = listWritebacks.begin(); itwriteback != listWritebacks.end(); ++itwriteback) {
|
|
if( itwriteback->viwrite[0] & (1<<REG_Q) ) {
|
|
// ignore all Q writebacks
|
|
continue;
|
|
}
|
|
|
|
pblock->insts.push_back(VuInstruction());
|
|
itwriteback->InitInst(&pblock->insts.back(), vucycle);
|
|
}
|
|
}
|
|
|
|
if( macflags )
|
|
pblock->type |= BLOCKTYPE_MACFLAGS;
|
|
|
|
u32 lastpc = pc;
|
|
pblock->endpc = lastpc;
|
|
|
|
pblock->cycles = vucycle;
|
|
|
|
if( !branch )
|
|
return pblock;
|
|
|
|
if( branch & 8 ) {
|
|
// what if also a jump?
|
|
pblock->type |= BLOCKTYPE_EOP|BLOCKTYPE_HASEOP;
|
|
|
|
// add an instruction to flush p and q (if written)
|
|
pblock->insts.push_back(SuperVUFlushInst());
|
|
return pblock;
|
|
}
|
|
|
|
// it is a (cond) branch or a jump
|
|
u32 vucode = *(u32*)(VU->Micro+lastpc-16);
|
|
int bpc = _recbranchAddr(vucode)-8;
|
|
|
|
switch(vucode>>25) {
|
|
case 0x24: // jr
|
|
pblock->type |= BLOCKTYPE_EOP; // jump out of procedure, since not returning, set EOP
|
|
pblock->insts.push_back(SuperVUFlushInst());
|
|
break;
|
|
|
|
case 0x25: // jalr
|
|
{
|
|
// linking, so will return to procedure
|
|
pblock->insts.push_back(SuperVUFlushInst());
|
|
|
|
VuBaseBlock* pjumpblock = SuperVUBuildBlocks(pblock, lastpc);
|
|
|
|
// update pblock since could have changed
|
|
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
|
|
|
|
pblock->blocks.push_back(pjumpblock);
|
|
break;
|
|
}
|
|
case 0x20: // B
|
|
{
|
|
VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc);
|
|
|
|
// update pblock since could have changed
|
|
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
|
|
|
|
pblock->blocks.push_back(pbranchblock);
|
|
break;
|
|
}
|
|
case 0x21: // BAL
|
|
{
|
|
VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc);
|
|
|
|
// update pblock since could have changed
|
|
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
|
|
pblock->blocks.push_back(pbranchblock);
|
|
break;
|
|
}
|
|
case 0x28: // IBEQ
|
|
case 0x2f: // IBGEZ
|
|
case 0x2d: // IBGTZ
|
|
case 0x2e: // IBLEZ
|
|
case 0x2c: // IBLTZ
|
|
case 0x29: // IBNE
|
|
{
|
|
VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc);
|
|
|
|
// update pblock since could have changed
|
|
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
|
|
pblock->blocks.push_back(pbranchblock);
|
|
|
|
pbranchblock = SuperVUBuildBlocks(pblock, lastpc);
|
|
|
|
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
|
|
pblock->blocks.push_back(pbranchblock);
|
|
|
|
break;
|
|
}
|
|
default:
|
|
assert(pblock->blocks.size() == 1);
|
|
break;
|
|
}
|
|
|
|
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
|
|
|
|
if( hasSecondBranch ) {
|
|
u32 vucode = *(u32*)(VU->Micro+lastpc-8);
|
|
pc = lastpc;
|
|
int bpc = _recbranchAddr(vucode);
|
|
|
|
switch(vucode>>25) {
|
|
case 0x24: // jr
|
|
SysPrintf("svurec bad jr jump!\n");
|
|
assert(0);
|
|
break;
|
|
|
|
case 0x25: // jalr
|
|
{
|
|
SysPrintf("svurec bad jalr jump!\n");
|
|
assert(0);
|
|
break;
|
|
}
|
|
case 0x20: // B
|
|
{
|
|
assert(0);
|
|
pblock->blocks.clear();
|
|
VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc);
|
|
|
|
// update pblock since could have changed
|
|
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
|
|
|
|
pblock->blocks.push_back(pbranchblock);
|
|
break;
|
|
}
|
|
case 0x21: // BAL
|
|
{
|
|
assert(0);
|
|
pblock->blocks.clear();
|
|
VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc);
|
|
|
|
// update pblock since could have changed
|
|
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
|
|
pblock->blocks.push_back(pbranchblock);
|
|
break;
|
|
}
|
|
case 0x28: // IBEQ
|
|
case 0x2f: // IBGEZ
|
|
case 0x2d: // IBGTZ
|
|
case 0x2e: // IBLEZ
|
|
case 0x2c: // IBLTZ
|
|
case 0x29: // IBNE
|
|
{
|
|
VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc);
|
|
|
|
// update pblock since could have changed
|
|
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
|
|
pblock->blocks.push_back(pbranchblock);
|
|
|
|
pbranchblock = SuperVUBuildBlocks(pblock, lastpc+8);
|
|
|
|
pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
|
|
pblock->blocks.push_back(pbranchblock);
|
|
|
|
break;
|
|
}
|
|
default:
|
|
assert(0);
|
|
}
|
|
}
|
|
|
|
return recVUBlocks[s_vu][startpc/8].pblock;
|
|
}
|
|
|
|
static void SuperVUInitLiveness(VuBaseBlock* pblock)
|
|
{
|
|
list<VuInstruction>::iterator itinst, itnext;
|
|
|
|
assert( pblock->insts.size() > 0 );
|
|
|
|
for(itinst = pblock->insts.begin(); itinst != pblock->insts.end(); ++itinst) {
|
|
|
|
if( itinst->type & INST_DUMMY_ ) {
|
|
itinst->addvars[0] = itinst->addvars[1] = 0xffffffff;
|
|
itinst->livevars[0] = itinst->livevars[1] = 0xffffffff;
|
|
itinst->keepvars[0] = itinst->keepvars[1] = 0xffffffff;
|
|
itinst->usedvars[0] = itinst->usedvars[1] = 0;
|
|
}
|
|
else {
|
|
itinst->addvars[0] = itinst->regs[0].VIread | itinst->regs[1].VIread;
|
|
itinst->addvars[1] = (itinst->regs[0].VFread0 ? (1 << itinst->regs[0].VFread0) : 0) |
|
|
(itinst->regs[0].VFread1 ? (1 << itinst->regs[0].VFread1) : 0) |
|
|
(itinst->regs[1].VFread0 ? (1 << itinst->regs[1].VFread0) : 0) |
|
|
(itinst->regs[1].VFread1 ? (1 << itinst->regs[1].VFread1) : 0);
|
|
|
|
// vf0 is not handled by VFread
|
|
if( !itinst->regs[0].VFread0 && (itinst->regs[0].VIread & (1<<REG_VF0_FLAG)) ) itinst->addvars[1] |= 1;
|
|
if( !itinst->regs[1].VFread0 && (itinst->regs[1].VIread & (1<<REG_VF0_FLAG)) ) itinst->addvars[1] |= 1;
|
|
if( !itinst->regs[0].VFread1 && (itinst->regs[0].VIread & (1<<REG_VF0_FLAG)) && itinst->regs[0].VFr1xyzw != 0xff ) itinst->addvars[1] |= 1;
|
|
if( !itinst->regs[1].VFread1 && (itinst->regs[1].VIread & (1<<REG_VF0_FLAG)) && itinst->regs[1].VFr1xyzw != 0xff ) itinst->addvars[1] |= 1;
|
|
|
|
|
|
u32 vfwrite = 0;
|
|
if( itinst->regs[0].VFwrite != 0 ) {
|
|
if( itinst->regs[0].VFwxyzw != 0xf ) itinst->addvars[1] |= 1<<itinst->regs[0].VFwrite;
|
|
else vfwrite |= 1<<itinst->regs[0].VFwrite;
|
|
}
|
|
if( itinst->regs[1].VFwrite != 0 ) {
|
|
if( itinst->regs[1].VFwxyzw != 0xf ) itinst->addvars[1] |= 1<<itinst->regs[1].VFwrite;
|
|
else vfwrite |= 1<<itinst->regs[1].VFwrite;
|
|
}
|
|
if( (itinst->regs[1].VIwrite & (1<<REG_ACC_FLAG)) && itinst->regs[1].VFwxyzw != 0xf )
|
|
itinst->addvars[1] |= 1<<REG_ACC_FLAG;
|
|
|
|
u32 viwrite = (itinst->regs[0].VIwrite|itinst->regs[1].VIwrite);
|
|
|
|
itinst->usedvars[0] = itinst->addvars[0]|viwrite;
|
|
itinst->usedvars[1] = itinst->addvars[1]|vfwrite;
|
|
|
|
// itinst->addvars[0] &= ~viwrite;
|
|
// itinst->addvars[1] &= ~vfwrite;
|
|
itinst->keepvars[0] = ~viwrite;
|
|
itinst->keepvars[1] = ~vfwrite;
|
|
}
|
|
}
|
|
|
|
itinst = --pblock->insts.end();
|
|
while( itinst != pblock->insts.begin() ) {
|
|
itnext = itinst; --itnext;
|
|
|
|
itnext->usedvars[0] |= itinst->usedvars[0];
|
|
itnext->usedvars[1] |= itinst->usedvars[1];
|
|
|
|
itinst = itnext;
|
|
}
|
|
}
|
|
|
|
u32 COMPUTE_LIVE(u32 R, u32 K, u32 L)
|
|
{
|
|
u32 live = R | ((L)&(K));
|
|
// speciall process mac and status flags
|
|
// only propagate liveness if doesn't write to the flag
|
|
if( !(L&(1<<REG_STATUS_FLAG)) && !(K&(1<<REG_STATUS_FLAG)) ) live &= ~(1<<REG_STATUS_FLAG);
|
|
if( !(L&(1<<REG_MAC_FLAG)) && !(K&(1<<REG_MAC_FLAG)) ) live &= ~(1<<REG_MAC_FLAG);
|
|
return live;
|
|
}
|
|
|
|
static void SuperVULivenessAnalysis()
|
|
{
|
|
BOOL changed;
|
|
list<VuBaseBlock*>::reverse_iterator itblock;
|
|
list<VuInstruction>::iterator itinst, itnext;
|
|
VuBaseBlock::LISTBLOCKS::iterator itchild;
|
|
|
|
u32 livevars[2];
|
|
|
|
do {
|
|
changed = FALSE;
|
|
for(itblock = s_listBlocks.rbegin(); itblock != s_listBlocks.rend(); ++itblock) {
|
|
|
|
u32 newlive;
|
|
VuBaseBlock* pb = *itblock;
|
|
|
|
// the last inst relies on the neighbor's insts
|
|
itinst = --pb->insts.end();
|
|
|
|
if( pb->blocks.size() > 0 ) {
|
|
livevars[0] = 0; livevars[1] = 0;
|
|
for( itchild = pb->blocks.begin(); itchild != pb->blocks.end(); ++itchild) {
|
|
VuInstruction& front = (*itchild)->insts.front();
|
|
livevars[0] |= front.livevars[0];
|
|
livevars[1] |= front.livevars[1];
|
|
}
|
|
|
|
newlive = COMPUTE_LIVE(itinst->addvars[0], itinst->keepvars[0], livevars[0]);
|
|
if( itinst->livevars[0] != newlive ) {
|
|
changed = TRUE;
|
|
itinst->livevars[0] = newlive;
|
|
}
|
|
|
|
newlive = COMPUTE_LIVE(itinst->addvars[1], itinst->keepvars[1], livevars[1]);
|
|
if( itinst->livevars[1] != newlive ) {
|
|
changed = TRUE;
|
|
itinst->livevars[1] = newlive;
|
|
}
|
|
}
|
|
|
|
while( itinst != pb->insts.begin() ) {
|
|
|
|
itnext = itinst; --itnext;
|
|
|
|
newlive = COMPUTE_LIVE(itnext->addvars[0], itnext->keepvars[0], itinst->livevars[0]);
|
|
|
|
if( itnext->livevars[0] != newlive ) {
|
|
changed = TRUE;
|
|
itnext->livevars[0] = newlive;
|
|
itnext->livevars[1] = COMPUTE_LIVE(itnext->addvars[1], itnext->keepvars[1], itinst->livevars[1]);
|
|
}
|
|
else {
|
|
newlive = COMPUTE_LIVE(itnext->addvars[1], itnext->keepvars[1], itinst->livevars[1]);
|
|
if( itnext->livevars[1] != newlive ) {
|
|
changed = TRUE;
|
|
itnext->livevars[1] = newlive;
|
|
}
|
|
}
|
|
|
|
itinst = itnext;
|
|
}
|
|
|
|
// if( (livevars[0] | itinst->livevars[0]) != itinst->livevars[0] ) {
|
|
// changed = TRUE;
|
|
// itinst->livevars[0] |= livevars[0];
|
|
// }
|
|
// if( (livevars[1] | itinst->livevars[1]) != itinst->livevars[1] ) {
|
|
// changed = TRUE;
|
|
// itinst->livevars[1] |= livevars[1];
|
|
// }
|
|
//
|
|
// while( itinst != pb->insts.begin() ) {
|
|
//
|
|
// itnext = itinst; --itnext;
|
|
// if( (itnext->livevars[0] | (itinst->livevars[0] & itnext->keepvars[0])) != itnext->livevars[0] ) {
|
|
// changed = TRUE;
|
|
// itnext->livevars[0] |= itinst->livevars[0] & itnext->keepvars[0];
|
|
// itnext->livevars[1] |= itinst->livevars[1] & itnext->keepvars[1];
|
|
// }
|
|
// else if( (itnext->livevars[1] | (itinst->livevars[1] & itnext->keepvars[1])) != itnext->livevars[1] ) {
|
|
// changed = TRUE;
|
|
// itnext->livevars[1] |= itinst->livevars[1] & itnext->keepvars[1];
|
|
// }
|
|
//
|
|
// itinst = itnext;
|
|
// }
|
|
}
|
|
|
|
} while(changed);
|
|
}
|
|
|
|
static void SuperVUEliminateDeadCode()
|
|
{
|
|
list<VuBaseBlock*>::iterator itblock;
|
|
VuBaseBlock::LISTBLOCKS::iterator itchild;
|
|
list<VuInstruction>::iterator itinst, itnext;
|
|
|
|
FORIT(itblock, s_listBlocks) {
|
|
|
|
#ifdef _DEBUG
|
|
u32 startpc = (*itblock)->startpc;
|
|
u32 curpc = startpc;
|
|
#endif
|
|
|
|
itnext = (*itblock)->insts.begin();
|
|
itinst = itnext++;
|
|
while(itnext != (*itblock)->insts.end() ) {
|
|
if( itinst->type & INST_DUMMY ) {
|
|
itinst->regs[0].VIwrite &= itnext->livevars[0];
|
|
itinst->regs[1].VIwrite &= itnext->livevars[0];
|
|
u32 viwrite = itinst->regs[0].VIwrite|itinst->regs[1].VIwrite;
|
|
|
|
VuInstruction* parent = (*itblock)->GetInstAtPc(itinst->nParentPc);
|
|
|
|
if( viwrite & (1<<REG_CLIP_FLAG) ) {
|
|
parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite&(1<<REG_CLIP_FLAG));
|
|
parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite&(1<<REG_CLIP_FLAG));
|
|
}
|
|
else
|
|
itinst->type &= ~INST_CLIP_WRITE;
|
|
|
|
if( parent->info.macflag && (itinst->type & INST_MAC_WRITE) ) {
|
|
if( !(viwrite&(1<<REG_MAC_FLAG)) ) {
|
|
parent->info.macflag = 0;
|
|
// parent->regs[0].VIwrite &= ~(1<<REG_MAC_FLAG);
|
|
// parent->regs[1].VIwrite &= ~(1<<REG_MAC_FLAG);
|
|
assert( !(parent->regs[0].VIwrite & (1<<REG_MAC_FLAG)) &&
|
|
!(parent->regs[1].VIwrite & (1<<REG_MAC_FLAG)) );
|
|
itinst->type &= ~INST_MAC_WRITE;
|
|
}
|
|
else {
|
|
parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite&(1<<REG_MAC_FLAG));
|
|
parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite&(1<<REG_MAC_FLAG));
|
|
}
|
|
}
|
|
|
|
if( parent->info.statusflag && (itinst->type & INST_STATUS_WRITE)) {
|
|
if( !(viwrite&(1<<REG_STATUS_FLAG)) ) {
|
|
parent->info.statusflag = 0;
|
|
// parent->regs[0].VIwrite &= ~(1<<REG_STATUS_FLAG);
|
|
// parent->regs[1].VIwrite &= ~(1<<REG_STATUS_FLAG);
|
|
assert( !(parent->regs[0].VIwrite & (1<<REG_STATUS_FLAG)) &&
|
|
!(parent->regs[1].VIwrite & (1<<REG_STATUS_FLAG)) );
|
|
itinst->type &= ~INST_STATUS_WRITE;
|
|
}
|
|
else {
|
|
parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite&(1<<REG_STATUS_FLAG));
|
|
parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite&(1<<REG_STATUS_FLAG));
|
|
}
|
|
}
|
|
|
|
if( itinst->type == 0 ) {
|
|
itnext = (*itblock)->insts.erase(itinst);
|
|
itinst = itnext++;
|
|
continue;
|
|
}
|
|
}
|
|
#ifdef _DEBUG
|
|
else curpc += 8;
|
|
#endif
|
|
itinst = itnext;
|
|
++itnext;
|
|
}
|
|
|
|
if( itinst->type & INST_DUMMY ) {
|
|
// last inst with the children
|
|
u32 mask = 0;
|
|
for(itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); ++itchild) {
|
|
mask |= (*itchild)->insts.front().livevars[0];
|
|
}
|
|
itinst->regs[0].VIwrite &= mask;
|
|
itinst->regs[1].VIwrite &= mask;
|
|
u32 viwrite = itinst->regs[0].VIwrite|itinst->regs[1].VIwrite;
|
|
|
|
if( itinst->nParentPc >= 0 ) {
|
|
VuInstruction* parent = (*itblock)->GetInstAtPc(itinst->nParentPc);
|
|
|
|
if( viwrite & (1<<REG_CLIP_FLAG) ) {
|
|
parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite&(1<<REG_CLIP_FLAG));
|
|
parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite&(1<<REG_CLIP_FLAG));
|
|
}
|
|
else itinst->type &= ~INST_CLIP_WRITE;
|
|
|
|
if( parent->info.macflag ) {
|
|
if( !(viwrite&(1<<REG_MAC_FLAG)) ) {
|
|
parent->info.macflag = 0;
|
|
assert( !(parent->regs[0].VIwrite & (1<<REG_MAC_FLAG)) &&
|
|
!(parent->regs[1].VIwrite & (1<<REG_MAC_FLAG)) );
|
|
itinst->type &= ~INST_MAC_WRITE;
|
|
}
|
|
else {
|
|
parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite&(1<<REG_MAC_FLAG));
|
|
parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite&(1<<REG_MAC_FLAG));
|
|
}
|
|
}
|
|
else assert( !(itinst->type&INST_STATUS_WRITE) );
|
|
|
|
if( parent->info.statusflag ) {
|
|
if( !(viwrite&(1<<REG_STATUS_FLAG)) ) {
|
|
parent->info.statusflag = 0;
|
|
assert( !(parent->regs[0].VIwrite & (1<<REG_STATUS_FLAG)) &&
|
|
!(parent->regs[1].VIwrite & (1<<REG_STATUS_FLAG)) );
|
|
itinst->type &= ~INST_STATUS_WRITE;
|
|
}
|
|
else {
|
|
parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite&(1<<REG_STATUS_FLAG));
|
|
parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite&(1<<REG_STATUS_FLAG));
|
|
}
|
|
}
|
|
else assert( !(itinst->type&INST_STATUS_WRITE) );
|
|
|
|
if( itinst->type == 0 ) {
|
|
(*itblock)->insts.erase(itinst);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// assigns xmm/x86 regs to all instructions, ignore mode field
|
|
// returns true if changed
|
|
bool AlignStartRegsToEndRegs(_xmmregs* startregs, const list<VuBaseBlock*>& parents)
|
|
{
|
|
list<VuBaseBlock*>::const_iterator itblock, itblock2;
|
|
int bestscore;
|
|
_xmmregs bestregs;
|
|
bool bchanged = false;
|
|
|
|
// find the best merge of regs that minimizes writes/reads
|
|
for(int i = 0; i < XMMREGS; ++i) {
|
|
|
|
bestscore = 1000;
|
|
memset(&bestregs, 0, sizeof(bestregs));
|
|
|
|
FORIT(itblock, parents) {
|
|
int curscore = 0;
|
|
if( ((*itblock)->type & BLOCKTYPE_ANALYZED) && (*itblock)->endregs[i].inuse ) {
|
|
int type = (*itblock)->endregs[i].type;
|
|
int reg = (*itblock)->endregs[i].reg;
|
|
|
|
FORIT(itblock2, parents) {
|
|
if( (*itblock2)->type & BLOCKTYPE_ANALYZED ) {
|
|
if( (*itblock2)->endregs[i].inuse ) {
|
|
if( (*itblock2)->endregs[i].type != type || (*itblock2)->endregs[i].reg != reg ) {
|
|
curscore += 1;
|
|
}
|
|
}
|
|
else curscore++;
|
|
}
|
|
}
|
|
}
|
|
|
|
if( curscore < 1 && curscore < bestscore ) {
|
|
memcpy(&bestregs, &(*itblock)->endregs[i], sizeof(bestregs));
|
|
bestscore = curscore;
|
|
}
|
|
}
|
|
|
|
if( bestscore < 1 ) {
|
|
if( startregs[i].inuse == bestregs.inuse ) {
|
|
if( bestregs.inuse && (startregs[i].type != bestregs.type || startregs[i].reg != bestregs.reg) )
|
|
bchanged = true;
|
|
}
|
|
else bchanged = true;
|
|
|
|
memcpy(&startregs[i], &bestregs, sizeof(bestregs));
|
|
FORIT(itblock, parents) memcpy(&(*itblock)->endregs[i], &bestregs, sizeof(bestregs));
|
|
}
|
|
else {
|
|
if( startregs[i].inuse ) bchanged = true;
|
|
startregs[i].inuse = 0;
|
|
FORIT(itblock, parents) (*itblock)->endregs[i].inuse = 0;
|
|
}
|
|
}
|
|
|
|
return bchanged;
|
|
}
|
|
|
|
void VuBaseBlock::AssignVFRegs()
|
|
{
|
|
int i;
|
|
VuBaseBlock::LISTBLOCKS::iterator itchild;
|
|
list<VuBaseBlock*>::iterator itblock;
|
|
list<VuInstruction>::iterator itinst, itnext, itinst2;
|
|
|
|
// init the start regs
|
|
if( type & BLOCKTYPE_ANALYZED ) return; // nothing changed
|
|
memcpy(xmmregs, startregs, sizeof(xmmregs));
|
|
|
|
if( type & BLOCKTYPE_ANALYZED ) {
|
|
// check if changed
|
|
for(i = 0; i < XMMREGS; ++i) {
|
|
if( xmmregs[i].inuse != startregs[i].inuse )
|
|
break;
|
|
if( xmmregs[i].inuse && (xmmregs[i].reg != startregs[i].reg || xmmregs[i].type != startregs[i].type) )
|
|
break;
|
|
}
|
|
|
|
if( i == XMMREGS ) return; // nothing changed
|
|
}
|
|
|
|
s8* oldX86 = x86Ptr;
|
|
|
|
FORIT(itinst, insts) {
|
|
|
|
if( itinst->type & INST_DUMMY )
|
|
continue;
|
|
|
|
// reserve, go from upper to lower
|
|
for(i = 1; i >= 0; --i) {
|
|
_VURegsNum* regs = itinst->regs+i;
|
|
|
|
// redo the counters so that the proper regs are released
|
|
for(int j = 0; j < XMMREGS; ++j) {
|
|
if( xmmregs[j].inuse ) {
|
|
if( xmmregs[j].type == XMMTYPE_VFREG ) {
|
|
int count = 0;
|
|
itinst2 = itinst;
|
|
|
|
if( i ) {
|
|
if( itinst2->regs[0].VFread0 == xmmregs[j].reg || itinst2->regs[0].VFread1 == xmmregs[j].reg || itinst2->regs[0].VFwrite == xmmregs[j].reg ) {
|
|
itinst2 = insts.end();
|
|
break;
|
|
}
|
|
else {
|
|
++count;
|
|
++itinst2;
|
|
}
|
|
}
|
|
|
|
while(itinst2 != insts.end() ) {
|
|
if( itinst2->regs[0].VFread0 == xmmregs[j].reg || itinst2->regs[0].VFread1 == xmmregs[j].reg || itinst2->regs[0].VFwrite == xmmregs[j].reg ||
|
|
itinst2->regs[1].VFread0 == xmmregs[j].reg || itinst2->regs[1].VFread1 == xmmregs[j].reg || itinst2->regs[1].VFwrite == xmmregs[j].reg)
|
|
break;
|
|
|
|
++count;
|
|
++itinst2;
|
|
}
|
|
xmmregs[j].counter = 1000-count;
|
|
}
|
|
else {
|
|
assert( xmmregs[j].type == XMMTYPE_ACC );
|
|
|
|
int count = 0;
|
|
itinst2 = itinst;
|
|
|
|
if( i ) ++itinst2; // acc isn't used in lower insts
|
|
|
|
while(itinst2 != insts.end() ) {
|
|
assert( !((itinst2->regs[0].VIread|itinst2->regs[0].VIwrite) & (1<<REG_ACC_FLAG)) );
|
|
|
|
if( (itinst2->regs[1].VIread|itinst2->regs[1].VIwrite) & (1<<REG_ACC_FLAG) )
|
|
break;
|
|
|
|
++count;
|
|
++itinst2;
|
|
}
|
|
|
|
xmmregs[j].counter = 1000-count;
|
|
}
|
|
}
|
|
}
|
|
|
|
if( regs->VFread0 ) _addNeededVFtoXMMreg(regs->VFread0);
|
|
if( regs->VFread1 ) _addNeededVFtoXMMreg(regs->VFread1);
|
|
if( regs->VFwrite ) _addNeededVFtoXMMreg(regs->VFwrite);
|
|
if( regs->VIread & (1<<REG_ACC_FLAG) ) _addNeededACCtoXMMreg();
|
|
if( regs->VIread & (1<<REG_VF0_FLAG) ) _addNeededVFtoXMMreg(0);
|
|
|
|
// alloc
|
|
itinst->vfread0[i] = itinst->vfread1[i] = itinst->vfwrite[i] = itinst->vfacc[i] = -1;
|
|
itinst->vfflush[i] = -1;
|
|
|
|
if( regs->VFread0 ) itinst->vfread0[i] = _allocVFtoXMMreg(VU, -1, regs->VFread0, 0);
|
|
else if( regs->VIread & (1<<REG_VF0_FLAG) ) itinst->vfread0[i] = _allocVFtoXMMreg(VU, -1, 0, 0);
|
|
if( regs->VFread1 ) itinst->vfread1[i] = _allocVFtoXMMreg(VU, -1, regs->VFread1, 0);
|
|
else if( (regs->VIread & (1<<REG_VF0_FLAG)) && regs->VFr1xyzw != 0xff ) itinst->vfread1[i] = _allocVFtoXMMreg(VU, -1, 0, 0);
|
|
if( regs->VIread & (1<<REG_ACC_FLAG) ) itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0);
|
|
|
|
int reusereg = -1; // 0 - VFwrite, 1 - VFAcc
|
|
|
|
if( regs->VFwrite ) {
|
|
assert( !(regs->VIwrite&(1<<REG_ACC_FLAG)) );
|
|
|
|
if( regs->VFwxyzw == 0xf ) {
|
|
itinst->vfwrite[i] = _checkXMMreg(XMMTYPE_VFREG, regs->VFwrite, 0);
|
|
if( itinst->vfwrite[i] < 0 ) reusereg = 0;
|
|
}
|
|
else {
|
|
itinst->vfwrite[i] = _allocVFtoXMMreg(VU, -1, regs->VFwrite, 0);
|
|
}
|
|
}
|
|
else if( regs->VIwrite & (1<<REG_ACC_FLAG) ) {
|
|
|
|
if( regs->VFwxyzw == 0xf ) {
|
|
itinst->vfacc[i] = _checkXMMreg(XMMTYPE_ACC, 0, 0);
|
|
if( itinst->vfacc[i] < 0 ) reusereg = 1;
|
|
}
|
|
else {
|
|
itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0);
|
|
}
|
|
}
|
|
|
|
if( reusereg >= 0 ) {
|
|
// reuse
|
|
itnext = itinst; itnext++;
|
|
|
|
u8 type = reusereg ? XMMTYPE_ACC : XMMTYPE_VFREG;
|
|
u8 reg = reusereg ? 0 : regs->VFwrite;
|
|
|
|
if( itinst->vfacc[i] >= 0 && ((regs->VIread&(1<<REG_ACC_FLAG)) && (!(itnext->usedvars[0]&(1<<REG_ACC_FLAG)) || !(itnext->livevars[0]&(1<<REG_ACC_FLAG)))) ) {
|
|
assert( reusereg == 0 );
|
|
if(itnext->livevars[0]&(1<<REG_ACC_FLAG)) _freeXMMreg(itinst->vfacc[i]);
|
|
xmmregs[itinst->vfacc[i]].inuse = 1;
|
|
xmmregs[itinst->vfacc[i]].reg = reg;
|
|
xmmregs[itinst->vfacc[i]].type = type;
|
|
xmmregs[itinst->vfacc[i]].mode = 0;
|
|
itinst->vfwrite[i] = itinst->vfacc[i];
|
|
}
|
|
else if( itinst->vfread0[i] >= 0 && (itnext == insts.end() || (regs->VFread0 > 0 && (!(itnext->usedvars[1]&(1<<regs->VFread0)) || !(itnext->livevars[1]&(1<<regs->VFread0))))) ) {
|
|
if(itnext->livevars[1]®s->VFread0) _freeXMMreg(itinst->vfread0[i]);
|
|
xmmregs[itinst->vfread0[i]].inuse = 1;
|
|
xmmregs[itinst->vfread0[i]].reg = reg;
|
|
xmmregs[itinst->vfread0[i]].type = type;
|
|
xmmregs[itinst->vfread0[i]].mode = 0;
|
|
if( reusereg ) itinst->vfacc[i] = itinst->vfread0[i];
|
|
else itinst->vfwrite[i] = itinst->vfread0[i];
|
|
}
|
|
else if( itinst->vfread1[i] >= 0 && (regs->VFread1 > 0 && (!(itnext->usedvars[1]&(1<<regs->VFread1)) || !(itnext->livevars[1]&(1<<regs->VFread1)))) ) {
|
|
if(itnext->livevars[1]®s->VFread1) _freeXMMreg(itinst->vfread1[i]);
|
|
xmmregs[itinst->vfread1[i]].inuse = 1;
|
|
xmmregs[itinst->vfread1[i]].reg = reg;
|
|
xmmregs[itinst->vfread1[i]].type = type;
|
|
xmmregs[itinst->vfread1[i]].mode = 0;
|
|
if( reusereg ) itinst->vfacc[i] = itinst->vfread1[i];
|
|
else itinst->vfwrite[i] = itinst->vfread1[i];
|
|
}
|
|
else {
|
|
if( reusereg ) itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0);
|
|
else itinst->vfwrite[i] = _allocVFtoXMMreg(VU, -1, regs->VFwrite, 0);
|
|
}
|
|
}
|
|
|
|
// always alloc at least 1 temp reg
|
|
int free0 = (i||regs->VFwrite||regs->VFread0||regs->VFread1||(regs->VIwrite&(1<<REG_ACC_FLAG)))?_allocTempXMMreg(XMMT_FPS, -1):-1;
|
|
int free1=0, free2=0;
|
|
|
|
if( i==0 && itinst->vfwrite[1] >= 0 && (itinst->vfread0[0]==itinst->vfwrite[1]||itinst->vfread1[0]==itinst->vfwrite[1]) ) {
|
|
itinst->vfflush[i] = _allocTempXMMreg(XMMT_FPS, -1);
|
|
}
|
|
|
|
if( i == 1 && (regs->VIwrite & (1<<REG_CLIP_FLAG)) ) {
|
|
// CLIP inst, need two extra regs
|
|
if( free0 < 0 )
|
|
free0 = _allocTempXMMreg(XMMT_FPS, -1);
|
|
free1 = _allocTempXMMreg(XMMT_FPS, -1);
|
|
free2 = _allocTempXMMreg(XMMT_FPS, -1);
|
|
_freeXMMreg(free1);
|
|
_freeXMMreg(free2);
|
|
}
|
|
else if( regs->VIwrite & (1<<REG_P) ) {
|
|
free1 = _allocTempXMMreg(XMMT_FPS, -1);
|
|
_freeXMMreg(free1);
|
|
}
|
|
|
|
if( itinst->vfflush[i] >= 0 ) _freeXMMreg(itinst->vfflush[i]);
|
|
if( free0 >= 0 ) _freeXMMreg(free0);
|
|
|
|
itinst->vffree[i] = (free0&0xf)|(free1<<8)|(free2<<16);
|
|
|
|
_clearNeededXMMregs();
|
|
}
|
|
}
|
|
|
|
assert( x86Ptr == oldX86 );
|
|
u32 analyzechildren = !(type&BLOCKTYPE_ANALYZED);
|
|
type |= BLOCKTYPE_ANALYZED;
|
|
|
|
//memset(endregs, 0, sizeof(endregs));
|
|
|
|
if( analyzechildren ) {
|
|
FORIT(itchild, blocks) (*itchild)->AssignVFRegs();
|
|
}
|
|
}
|
|
|
|
struct MARKOVBLANKET
|
|
{
|
|
list<VuBaseBlock*> parents;
|
|
list<VuBaseBlock*> children;
|
|
};
|
|
|
|
static MARKOVBLANKET s_markov;
|
|
|
|
void VuBaseBlock::AssignVIRegs(int parent)
|
|
{
|
|
const int maxregs = 6;
|
|
|
|
if( parent ) {
|
|
if( (type&BLOCKTYPE_ANALYZEDPARENT) )
|
|
return;
|
|
|
|
type |= BLOCKTYPE_ANALYZEDPARENT;
|
|
s_markov.parents.push_back(this);
|
|
for(LISTBLOCKS::iterator it = blocks.begin(); it != blocks.end(); ++it) {
|
|
(*it)->AssignVIRegs(0);
|
|
}
|
|
return;
|
|
}
|
|
|
|
if( (type&BLOCKTYPE_ANALYZED) )
|
|
return;
|
|
|
|
// child
|
|
assert( allocX86Regs == -1 );
|
|
allocX86Regs = s_vecRegArray.size();
|
|
s_vecRegArray.resize(allocX86Regs+X86REGS);
|
|
|
|
_x86regs* pregs = &s_vecRegArray[allocX86Regs];
|
|
memset(pregs, 0, sizeof(_x86regs)*X86REGS);
|
|
|
|
assert( parents.size() > 0 );
|
|
|
|
list<VuBaseBlock*>::iterator itparent;
|
|
u32 usedvars = insts.front().usedvars[0];
|
|
u32 livevars = insts.front().livevars[0];
|
|
|
|
if( parents.size() > 0 ) {
|
|
u32 usedvars2 = 0xffffffff;
|
|
FORIT(itparent, parents) usedvars2 &= (*itparent)->insts.front().usedvars[0];
|
|
usedvars |= usedvars2;
|
|
}
|
|
|
|
usedvars &= livevars;
|
|
|
|
// currently order doesn't matter
|
|
int num = 0;
|
|
|
|
if( usedvars ) {
|
|
for(int i = 1; i < 16; ++i) {
|
|
if( usedvars & (1<<i) ) {
|
|
pregs[num].inuse = 1;
|
|
pregs[num].reg = i;
|
|
|
|
livevars &= ~(1<<i);
|
|
|
|
if( ++num >= maxregs ) break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if( num < maxregs) {
|
|
livevars &= ~usedvars;
|
|
livevars &= insts.back().usedvars[0];
|
|
|
|
if( livevars ) {
|
|
for(int i = 1; i < 16; ++i) {
|
|
if( livevars & (1<<i) ) {
|
|
pregs[num].inuse = 1;
|
|
pregs[num].reg = i;
|
|
|
|
if( ++num >= maxregs) break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
s_markov.children.push_back(this);
|
|
type |= BLOCKTYPE_ANALYZED;
|
|
FORIT(itparent, parents) {
|
|
(*itparent)->AssignVIRegs(1);
|
|
}
|
|
}
|
|
|
|
u32 VuBaseBlock::GetModeXYZW(u32 curpc, int vfreg)
|
|
{
|
|
if( vfreg <= 0 ) return false;
|
|
|
|
list<VuInstruction>::iterator itinst = insts.begin();
|
|
advance(itinst, (curpc-startpc)/8);
|
|
|
|
u8 mxy = 1;
|
|
u8 mxyz = 1;
|
|
|
|
while(itinst != insts.end()) {
|
|
for(int i = 0; i < 2; ++i ) {
|
|
if( itinst->regs[i].VFwrite == vfreg ) {
|
|
if( itinst->regs[i].VFwxyzw != 0xe ) mxyz = 0;
|
|
if( itinst->regs[i].VFwxyzw != 0xc ) mxy = 0;
|
|
}
|
|
if( itinst->regs[i].VFread0 == vfreg ) {
|
|
if( itinst->regs[i].VFr0xyzw != 0xe ) mxyz = 0;
|
|
if( itinst->regs[i].VFr0xyzw != 0xc ) mxy = 0;
|
|
}
|
|
if( itinst->regs[i].VFread1 == vfreg ) {
|
|
if( itinst->regs[i].VFr1xyzw != 0xe ) mxyz = 0;
|
|
if( itinst->regs[i].VFr1xyzw != 0xc ) mxy = 0;
|
|
}
|
|
|
|
if( !mxy && !mxyz ) return 0;
|
|
}
|
|
++itinst;
|
|
}
|
|
|
|
return (mxy?MODE_VUXY:0)|(mxyz?MODE_VUXYZ:0);
|
|
}
|
|
|
|
static void SuperVUAssignRegs()
|
|
{
|
|
list<VuBaseBlock*>::iterator itblock, itblock2;
|
|
|
|
// assign xyz regs
|
|
// FORIT(itblock, s_listBlocks) {
|
|
// (*itblock)->vuxyz = 0;
|
|
// (*itblock)->vuxy = 0;
|
|
//
|
|
// for(int i = 0; i < 32; ++i) {
|
|
// u32 mode = (*itblock)->GetModeXYZW((*itblock)->startpc, i);
|
|
// if( mode & MODE_VUXYZ ) {
|
|
// if( mode & MODE_VUZ ) (*itblock)->vuxyz |= 1<<i;
|
|
// else (*itblock)->vuxy |= 1<<i;
|
|
// }
|
|
// }
|
|
// }
|
|
|
|
FORIT(itblock, s_listBlocks) (*itblock)->type &= ~BLOCKTYPE_ANALYZED;
|
|
s_listBlocks.front()->AssignVFRegs();
|
|
|
|
// VI assignments, find markov blanket for each node in the graph
|
|
// then allocate regs based on the commonly used ones
|
|
#ifdef SUPERVU_X86CACHING
|
|
FORIT(itblock, s_listBlocks) (*itblock)->type &= ~(BLOCKTYPE_ANALYZED|BLOCKTYPE_ANALYZEDPARENT);
|
|
s_vecRegArray.resize(0);
|
|
u8 usedregs[16];
|
|
|
|
// note: first block always has to start with no alloc regs
|
|
bool bfirst = true;
|
|
|
|
FORIT(itblock, s_listBlocks) {
|
|
|
|
if( !((*itblock)->type & BLOCKTYPE_ANALYZED) ) {
|
|
|
|
if( (*itblock)->parents.size() == 0 ) {
|
|
(*itblock)->type |= BLOCKTYPE_ANALYZED;
|
|
bfirst = false;
|
|
continue;
|
|
}
|
|
|
|
s_markov.children.clear();
|
|
s_markov.parents.clear();
|
|
(*itblock)->AssignVIRegs(0);
|
|
|
|
// assign the regs
|
|
int regid = s_vecRegArray.size();
|
|
s_vecRegArray.resize(regid+X86REGS);
|
|
|
|
_x86regs* mergedx86 = &s_vecRegArray[regid];
|
|
memset(mergedx86, 0, sizeof(_x86regs)*X86REGS);
|
|
|
|
if( !bfirst ) {
|
|
*(u32*)usedregs = *((u32*)usedregs+1) = *((u32*)usedregs+2) = *((u32*)usedregs+3) = 0;
|
|
|
|
FORIT(itblock2, s_markov.children) {
|
|
assert( (*itblock2)->allocX86Regs >= 0 );
|
|
_x86regs* pregs = &s_vecRegArray[(*itblock2)->allocX86Regs];
|
|
for(int i = 0; i < X86REGS; ++i) {
|
|
if( pregs[i].inuse && pregs[i].reg < 16) {
|
|
//assert( pregs[i].reg < 16);
|
|
usedregs[pregs[i].reg]++;
|
|
}
|
|
}
|
|
}
|
|
|
|
int num = 1;
|
|
for(int i = 0; i < 16; ++i) {
|
|
if( usedregs[i] == s_markov.children.size() ) {
|
|
// use
|
|
mergedx86[num].inuse = 1;
|
|
mergedx86[num].reg = i;
|
|
mergedx86[num].type = (s_vu?X86TYPE_VU1:0)|X86TYPE_VI;
|
|
mergedx86[num].mode = MODE_READ;
|
|
if( ++num >= X86REGS )
|
|
break;
|
|
if( num == ESP )
|
|
++num;
|
|
}
|
|
}
|
|
|
|
FORIT(itblock2, s_markov.children) {
|
|
assert( (*itblock2)->nStartx86 == -1 );
|
|
(*itblock2)->nStartx86 = regid;
|
|
}
|
|
|
|
FORIT(itblock2, s_markov.parents) {
|
|
assert( (*itblock2)->nEndx86 == -1 );
|
|
(*itblock2)->nEndx86 = regid;
|
|
}
|
|
}
|
|
|
|
bfirst = false;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
//////////////////
|
|
// Recompilation
|
|
//////////////////
|
|
static u32 s_TotalVUCycles; // total cycles since start of program execution
|
|
|
|
// cycles in which the last Q,P regs were finished (written to VU->VI[])
|
|
// the write occurs before the instruction is executed at that cycle
|
|
// compare with s_TotalVUCycles
|
|
// if less than 0, already flushed
|
|
static int s_writeQ, s_writeP;
|
|
static int s_recWriteQ, s_recWriteP; // wait times during recompilation
|
|
static int s_needFlush; // first bit - Q, second bit - P, third bit - Q has been written, fourth bit - P has been written
|
|
|
|
static u32 s_vu1ebp, s_vu1esp, s_vu1esi, s_callstack;//, s_vu1esp
|
|
static u32 s_ssecsr;
|
|
static int s_JumpX86;
|
|
|
|
//extern "C" u32 g_sseVUMXCSR;
|
|
|
|
// entry point of all vu programs from emulator calls
|
|
__declspec(naked) void SuperVUExecuteProgram(u32 startpc, int vuindex)
|
|
{
|
|
#ifdef SUPERVU_COUNT
|
|
QueryPerformanceCounter(&svubase);
|
|
#endif
|
|
__asm {
|
|
mov eax, dword ptr [esp]
|
|
add esp, 4
|
|
mov s_callstack, eax
|
|
call SuperVUGetProgram
|
|
|
|
// save cpu state
|
|
mov s_vu1ebp, ebp
|
|
mov s_vu1esi, esi // have to save even in Release
|
|
}
|
|
#ifdef _DEBUG
|
|
__asm {
|
|
mov s_vu1esp, esp
|
|
}
|
|
#endif
|
|
|
|
__asm {
|
|
// stmxcsr s_ssecsr
|
|
// ldmxcsr g_sseVUMXCSR
|
|
|
|
// init vars
|
|
xor ecx, ecx
|
|
mov s_writeQ, 0xffffffff
|
|
mov s_writeP, 0xffffffff
|
|
mov s_TotalVUCycles, ecx
|
|
|
|
jmp eax
|
|
}
|
|
}
|
|
|
|
static void SuperVUCleanupProgram(u32 startpc, int vuindex)
|
|
{
|
|
#ifdef SUPERVU_COUNT
|
|
QueryPerformanceCounter(&svufinal);
|
|
svutime += (u32)(svufinal.QuadPart-svubase.QuadPart);
|
|
#endif
|
|
|
|
#ifdef _DEBUG
|
|
assert( s_vu1esp == 0 );
|
|
#endif
|
|
|
|
VU = vuindex ? &VU1 : &VU0;
|
|
VU->cycle += s_TotalVUCycles;
|
|
if( (int)s_writeQ > 0 ) VU->VI[REG_Q] = VU->q;
|
|
if( (int)s_writeP > 0 ) {
|
|
assert(VU == &VU1);
|
|
VU1.VI[REG_P] = VU1.p; // only VU1
|
|
}
|
|
}
|
|
|
|
// exit point of all vu programs
|
|
__declspec(naked) static void SuperVUEndProgram()
|
|
{
|
|
__asm {
|
|
// restore cpu state
|
|
//ldmxcsr s_ssecsr
|
|
|
|
mov ebp, s_vu1ebp
|
|
mov esi, s_vu1esi
|
|
}
|
|
|
|
#ifdef _DEBUG
|
|
__asm {
|
|
sub s_vu1esp, esp
|
|
}
|
|
#endif
|
|
|
|
__asm {
|
|
call SuperVUCleanupProgram
|
|
jmp s_callstack // so returns correctly
|
|
}
|
|
}
|
|
|
|
// Flushes P/Q regs
|
|
void SuperVUFlush(int p, int wait)
|
|
{
|
|
u8* pjmp[3];
|
|
if( !(s_needFlush&(1<<p)) ) return;
|
|
|
|
int recwait = p ? s_recWriteP : s_recWriteQ;
|
|
if( !wait && s_pCurInst->info.cycle < recwait ) return;
|
|
|
|
if( recwait == 0 ) {
|
|
// write didn't happen this block
|
|
MOV32MtoR(EAX, p ? (u32)&s_writeP : (u32)&s_writeQ);
|
|
OR32RtoR(EAX, EAX);
|
|
pjmp[0] = JS8(0);
|
|
|
|
if( s_pCurInst->info.cycle ) SUB32ItoR(EAX, s_pCurInst->info.cycle);
|
|
|
|
// if writeQ <= total+offset
|
|
if( !wait ) { // only write back if time is up
|
|
CMP32MtoR(EAX, (u32)&s_TotalVUCycles);
|
|
pjmp[1] = JG8(0);
|
|
}
|
|
else {
|
|
// add (writeQ-total-offset) to s_TotalVUCycles
|
|
// necessary?
|
|
CMP32MtoR(EAX, (u32)&s_TotalVUCycles);
|
|
pjmp[2] = JLE8(0);
|
|
MOV32RtoM((u32)&s_TotalVUCycles, EAX);
|
|
x86SetJ8(pjmp[2]);
|
|
}
|
|
}
|
|
else if( wait && s_pCurInst->info.cycle < recwait ) {
|
|
MOV32ItoM((u32)&s_TotalVUCycles, recwait);
|
|
}
|
|
|
|
MOV32MtoR(EAX, SuperVUGetVIAddr(p?REG_P:REG_Q, 0));
|
|
MOV32ItoM(p ? (u32)&s_writeP : (u32)&s_writeQ, 0x80000000);
|
|
MOV32RtoM(SuperVUGetVIAddr(p?REG_P:REG_Q, 1), EAX);
|
|
|
|
if( recwait == 0 ) {
|
|
if( !wait ) x86SetJ8(pjmp[1]);
|
|
x86SetJ8(pjmp[0]);
|
|
}
|
|
|
|
if( wait || (!p && recwait == 0 && s_pCurInst->info.cycle >= 12) || (!p && recwait > 0 && s_pCurInst->info.cycle >= recwait ) )
|
|
s_needFlush &= ~(1<<p);
|
|
}
|
|
|
|
// executed only once per program
|
|
static u32* SuperVUStaticAlloc(u32 size)
|
|
{
|
|
assert( recVUStackPtr+size <= recVUStack+SUPERVU_STACKSIZE );
|
|
// always zero
|
|
if( size == 4 ) *(u32*)recVUStackPtr = 0;
|
|
else memset(recVUStackPtr, 0, size);
|
|
recVUStackPtr += size;
|
|
return (u32*)(recVUStackPtr-size);
|
|
}
|
|
|
|
static void SuperVURecompile()
|
|
{
|
|
// save cpu state
|
|
recVUStackPtr = recVUStack;
|
|
|
|
_initXMMregs();
|
|
|
|
list<VuBaseBlock*>::iterator itblock;
|
|
|
|
FORIT(itblock, s_listBlocks) (*itblock)->type &= ~BLOCKTYPE_ANALYZED;
|
|
s_listBlocks.front()->Recompile();
|
|
// make sure everything compiled
|
|
FORIT(itblock, s_listBlocks) assert( ((*itblock)->type & BLOCKTYPE_ANALYZED) && (*itblock)->pcode != NULL );
|
|
|
|
// link all blocks
|
|
FORIT(itblock, s_listBlocks) {
|
|
VuBaseBlock::LISTBLOCKS::iterator itchild;
|
|
|
|
assert( (*itblock)->blocks.size() <= ARRAYSIZE((*itblock)->pChildJumps) );
|
|
|
|
int i = 0;
|
|
FORIT(itchild, (*itblock)->blocks) {
|
|
|
|
if( (u32)(*itblock)->pChildJumps[i] == 0xffffffff )
|
|
continue;
|
|
|
|
if( (*itblock)->pChildJumps[i] == NULL ) {
|
|
VuBaseBlock* pchild = *itchild;
|
|
|
|
if( pchild->type & BLOCKTYPE_HASEOP) {
|
|
assert( pchild->blocks.size() == 0);
|
|
|
|
AND32ItoM( (u32)&VU0.VI[ REG_VPU_STAT ].UL, s_vu?~0x100:~0x001 ); // E flag
|
|
AND32ItoM( (u32)&VU->vifRegs->stat, ~0x4 );
|
|
|
|
MOV32ItoM((u32)&VU->VI[REG_TPC], pchild->endpc);
|
|
JMP32( (u32)SuperVUEndProgram - ( (u32)x86Ptr + 5 ));
|
|
}
|
|
// only other case is when there are two branches
|
|
else assert( (*itblock)->insts.back().regs[0].pipe == VUPIPE_BRANCH );
|
|
|
|
continue;
|
|
}
|
|
|
|
if( (u32)(*itblock)->pChildJumps[i] & 0x80000000 ) {
|
|
// relative
|
|
*(u32*)&(*itblock)->pChildJumps[i] &= 0x7fffffff;
|
|
*(*itblock)->pChildJumps[i] = (u32)(*itchild)->pcode - ((u32)(*itblock)->pChildJumps[i] + 4);
|
|
}
|
|
else *(*itblock)->pChildJumps[i] = (u32)(*itchild)->pcode;
|
|
|
|
++i;
|
|
}
|
|
}
|
|
|
|
s_pFnHeader->pprogfunc = s_listBlocks.front()->pcode;
|
|
}
|
|
|
|
static u32 s_svulast = 0, s_vufnheader;
|
|
extern "C" u32 s_vucount = 0;
|
|
static u32 lastrec = 0;
|
|
static u32 s_saveecx, s_saveedx, s_saveebx, s_saveesi, s_saveedi, s_saveebp;
|
|
static u32 badaddrs[][2] = {0,0 };
|
|
|
|
__declspec(naked) static void svudispfn()
|
|
{
|
|
static u32 i;
|
|
static u32 curvu;
|
|
|
|
__asm {
|
|
mov curvu, eax
|
|
mov s_saveecx, ecx
|
|
mov s_saveedx, edx
|
|
mov s_saveebx, ebx
|
|
mov s_saveesi, esi
|
|
mov s_saveedi, edi
|
|
mov s_saveebp, ebp
|
|
}
|
|
|
|
// for(i = 1; i < 32; ++i) {
|
|
// if( (VU1.VF[i].UL[3]&0x7f800000) == 0x7f800000 ) VU1.VF[i].UL[3] &= 0xff7fffff;
|
|
// if( (VU1.VF[i].UL[2]&0x7f800000) == 0x7f800000 ) VU1.VF[i].UL[2] &= 0xff7fffff;
|
|
// if( (VU1.VF[i].UL[1]&0x7f800000) == 0x7f800000 ) VU1.VF[i].UL[1] &= 0xff7fffff;
|
|
// if( (VU1.VF[i].UL[0]&0x7f800000) == 0x7f800000 ) VU1.VF[i].UL[0] &= 0xff7fffff;
|
|
// }
|
|
|
|
if( (vudump&8) ) { //&& lastrec != g_vu1last ) {
|
|
|
|
for(i = 0; i < ARRAYSIZE(badaddrs); ++i) {
|
|
if( s_svulast == badaddrs[i][1] && lastrec == badaddrs[i][0] )
|
|
break;
|
|
}
|
|
|
|
if( i == ARRAYSIZE(badaddrs) )
|
|
{
|
|
__Log("tVU: %x\n", s_svulast);
|
|
if( curvu ) iDumpVU1Registers();
|
|
else iDumpVU0Registers();
|
|
s_vucount++;
|
|
}
|
|
|
|
lastrec = s_svulast;
|
|
}
|
|
|
|
__asm {
|
|
mov ecx, s_saveecx
|
|
mov edx, s_saveedx
|
|
mov ebx, s_saveebx
|
|
mov esi, s_saveesi
|
|
mov edi, s_saveedi
|
|
mov ebp, s_saveebp
|
|
ret
|
|
}
|
|
}
|
|
|
|
// frees an xmmreg depending on the liveness info of hte current inst
|
|
//void SuperVUFreeXMMreg(int xmmreg, int xmmtype, int reg)
|
|
//{
|
|
// if( !xmmregs[xmmreg].inuse ) return;
|
|
// if( xmmregs[xmmreg].type == xmmtype && xmmregs[xmmreg].reg == reg ) return;
|
|
//
|
|
// if( s_pNextInst == NULL ) {
|
|
// // last inst, free
|
|
// _freeXMMreg(xmmreg);
|
|
// return;
|
|
// }
|
|
//
|
|
// if( xmmregs[xmmreg].type == XMMTYPE_VFREG ) {
|
|
// if( (s_pCurInst->livevars[1]|s_pNextInst->livevars[1]) & (1<<xmmregs[xmmreg].reg) )
|
|
// _freeXMMreg(xmmreg);
|
|
// else
|
|
// xmmregs[xmmreg].inuse = 0;
|
|
// }
|
|
// else if( xmmregs[xmmreg].type == XMMTYPE_ACC ) {
|
|
// if( (s_pCurInst->livevars[0]|s_pNextInst->livevars[0]) & (1<<REG_ACC_FLAG) )
|
|
// _freeXMMreg(xmmreg);
|
|
// else
|
|
// xmmregs[xmmreg].inuse = 0;
|
|
// }
|
|
//}
|
|
|
|
// frees all regs taking into account the livevars
|
|
void SuperVUFreeXMMregs(u32* livevars)
|
|
{
|
|
for(int i = 0; i < XMMREGS; ++i) {
|
|
if( xmmregs[i].inuse ) {
|
|
// same reg
|
|
if( (xmmregs[i].mode & MODE_WRITE) ) {
|
|
|
|
#ifdef SUPERVU_INTERCACHING
|
|
if( xmmregs[i].type == XMMTYPE_VFREG ) {
|
|
if( !(livevars[1] & (1<<xmmregs[i].reg)) )
|
|
continue;
|
|
}
|
|
else if( xmmregs[i].type == XMMTYPE_ACC ) {
|
|
if( !(livevars[0] & (1<<REG_ACC_FLAG)) )
|
|
continue;
|
|
}
|
|
#endif
|
|
|
|
if( xmmregs[i].mode & MODE_VUXYZ ) {
|
|
// ALWAYS update
|
|
u32 addr = xmmregs[i].type == XMMTYPE_VFREG ? (u32)&VU->VF[xmmregs[i].reg] : (u32)&VU->ACC;
|
|
|
|
if( xmmregs[i].mode & MODE_VUZ ) {
|
|
SSE_MOVHPS_XMM_to_M64(addr, (x86SSERegType)i);
|
|
SSE_SHUFPS_M128_to_XMM((x86SSERegType)i, addr, 0xc4);
|
|
}
|
|
else SSE_MOVHPS_M64_to_XMM((x86SSERegType)i, addr+8);
|
|
|
|
xmmregs[i].mode &= ~MODE_VUXYZ;
|
|
}
|
|
|
|
_freeXMMreg(i);
|
|
}
|
|
}
|
|
}
|
|
|
|
//_freeXMMregs();
|
|
}
|
|
|
|
//void timeout() { SysPrintf("VU0 timeout\n"); }
|
|
void SuperVUTestVU0Condition(u32 incstack)
|
|
{
|
|
if( s_vu ) return; // vu0 only
|
|
|
|
CMP32ItoM((u32)&s_TotalVUCycles, 512); // sometimes games spin on vu0, so be careful with this value
|
|
// woody hangs if too high
|
|
|
|
if( incstack ) {
|
|
u8* ptr = JB8(0);
|
|
|
|
if( incstack ) ADD32ItoR(ESP, incstack);
|
|
//CALLFunc((u32)timeout);
|
|
JMP32( (u32)SuperVUEndProgram - ( (u32)x86Ptr + 5 ));
|
|
|
|
x86SetJ8(ptr);
|
|
}
|
|
else {
|
|
JAE32( (u32)SuperVUEndProgram - ( (u32)x86Ptr + 6 ) );
|
|
}
|
|
}
|
|
|
|
void VuBaseBlock::Recompile()
|
|
{
|
|
if( type & BLOCKTYPE_ANALYZED ) return;
|
|
|
|
x86Align(16);
|
|
pcode = x86Ptr;
|
|
|
|
#ifdef _DEBUG
|
|
MOV32ItoM((u32)&s_vufnheader, s_pFnHeader->startpc);
|
|
MOV32ItoM((u32)&VU->VI[REG_TPC], startpc);
|
|
MOV32ItoM((u32)&s_svulast, startpc);
|
|
MOV32ItoR(EAX, s_vu);
|
|
CALLFunc((u32)svudispfn);
|
|
#endif
|
|
|
|
s_pCurBlock = this;
|
|
s_needFlush = 3;
|
|
pc = startpc;
|
|
branch = 0;
|
|
s_recWriteQ = s_recWriteP = 0;
|
|
|
|
s_ClipRead = s_PrevClipWrite = (u32)&VU->VI[REG_CLIP_FLAG];
|
|
s_StatusRead = s_PrevStatusWrite = (u32)&VU->VI[REG_STATUS_FLAG];
|
|
s_PrevIWrite = (u32)&VU->VI[REG_I];
|
|
s_MACRead = (u32)&VU->VI[REG_MAC_FLAG];
|
|
s_JumpX86 = 0;
|
|
|
|
memcpy(xmmregs, startregs, sizeof(xmmregs));
|
|
#ifdef SUPERVU_X86CACHING
|
|
if( nStartx86 >= 0 )
|
|
memcpy(x86regs, &s_vecRegArray[nStartx86], sizeof(x86regs));
|
|
else _initX86regs();
|
|
#else
|
|
_initX86regs();
|
|
#endif
|
|
|
|
list<VuInstruction>::iterator itinst;
|
|
FORIT(itinst, insts) {
|
|
s_pCurInst = &(*itinst);
|
|
if( s_JumpX86 > 0 ) x86regs[s_JumpX86].needed = 1;
|
|
itinst->Recompile(itinst, vuxyz);
|
|
}
|
|
assert( pc == endpc );
|
|
|
|
// flush flags
|
|
if( s_ClipRead != (u32)&VU->VI[REG_CLIP_FLAG] ) {
|
|
MOV32MtoR(EAX, s_ClipRead);
|
|
MOV32RtoM((u32)&VU->VI[REG_CLIP_FLAG], EAX);
|
|
}
|
|
if( s_StatusRead != (u32)&VU->VI[REG_STATUS_FLAG] ) {
|
|
MOV32MtoR(EAX, s_StatusRead);
|
|
MOV32RtoM((u32)&VU->VI[REG_STATUS_FLAG], EAX);
|
|
}
|
|
if( s_MACRead != (u32)&VU->VI[REG_MAC_FLAG] ) {
|
|
MOV32MtoR(EAX, s_MACRead);
|
|
MOV32RtoM((u32)&VU->VI[REG_MAC_FLAG], EAX);
|
|
}
|
|
if( s_PrevIWrite != (u32)&VU->VI[REG_I] ) {
|
|
MOV32ItoM((u32)&VU->VI[REG_I], *(u32*)s_PrevIWrite); // never changes
|
|
}
|
|
|
|
ADD32ItoM((u32)&s_TotalVUCycles, cycles);
|
|
|
|
// compute branches, jumps, eop
|
|
if( type & BLOCKTYPE_HASEOP ) {
|
|
// end
|
|
_freeXMMregs();
|
|
_freeX86regs();
|
|
AND32ItoM( (u32)&VU0.VI[ REG_VPU_STAT ].UL, s_vu?~0x100:~0x001 ); // E flag
|
|
AND32ItoM( (u32)&VU->vifRegs->stat, ~0x4 );
|
|
if( !branch ) MOV32ItoM((u32)&VU->VI[REG_TPC], endpc);
|
|
JMP32( (u32)SuperVUEndProgram - ( (u32)x86Ptr + 5 ));
|
|
}
|
|
else {
|
|
|
|
u32 livevars[2] = {0};
|
|
|
|
list<VuInstruction>::iterator lastinst = GetInstIterAtPc(endpc-8);
|
|
lastinst++;
|
|
|
|
if( lastinst != insts.end() ) {
|
|
livevars[0] = lastinst->livevars[0];
|
|
livevars[1] = lastinst->livevars[1];
|
|
}
|
|
else {
|
|
// take from children
|
|
if( blocks.size() > 0 ) {
|
|
LISTBLOCKS::iterator itchild;
|
|
FORIT(itchild, blocks) {
|
|
livevars[0] |= (*itchild)->insts.front().livevars[0];
|
|
livevars[1] |= (*itchild)->insts.front().livevars[1];
|
|
}
|
|
}
|
|
else {
|
|
livevars[0] = ~0;
|
|
livevars[1] = ~0;
|
|
}
|
|
}
|
|
|
|
SuperVUFreeXMMregs(livevars);
|
|
|
|
// get rid of any writes, otherwise _freeX86regs will write
|
|
x86regs[s_JumpX86].mode &= ~MODE_WRITE;
|
|
|
|
if( branch == 1 ) {
|
|
if( !x86regs[s_JumpX86].inuse ) {
|
|
assert( x86regs[s_JumpX86].type == X86TYPE_VUJUMP );
|
|
s_JumpX86 = 0xffffffff; // notify to jump from g_recWriteback
|
|
}
|
|
}
|
|
|
|
// align VI regs
|
|
#ifdef SUPERVU_X86CACHING
|
|
if( nEndx86 >= 0 ) {
|
|
_x86regs* endx86 = &s_vecRegArray[nEndx86];
|
|
for(int i = 0; i < X86REGS; ++i) {
|
|
if( endx86[i].inuse ) {
|
|
|
|
if( s_JumpX86 == i && x86regs[s_JumpX86].inuse ) {
|
|
x86regs[s_JumpX86].inuse = 0;
|
|
x86regs[EAX].inuse = 1;
|
|
MOV32RtoR(EAX, s_JumpX86);
|
|
s_JumpX86 = EAX;
|
|
}
|
|
|
|
if( x86regs[i].inuse ) {
|
|
if( x86regs[i].type == endx86[i].type && x86regs[i].reg == endx86[i].reg ) {
|
|
_freeX86reg(i);
|
|
// will continue to use it
|
|
continue;
|
|
}
|
|
|
|
if( x86regs[i].type == (X86TYPE_VI|(s_vu?X86TYPE_VU1:0)) ) {
|
|
#ifdef SUPERVU_INTERCACHING
|
|
if( livevars[0] & (1<<x86regs[i].reg) )
|
|
_freeX86reg(i);
|
|
else
|
|
x86regs[i].inuse = 0;
|
|
#else
|
|
_freeX86reg(i);
|
|
#endif
|
|
}
|
|
else _freeX86reg(i);
|
|
}
|
|
|
|
// realloc
|
|
_allocX86reg(i, endx86[i].type, endx86[i].reg, MODE_READ);
|
|
if( x86regs[i].mode & MODE_WRITE ) {
|
|
_freeX86reg(i);
|
|
x86regs[i].inuse = 1;
|
|
}
|
|
}
|
|
else _freeX86reg(i);
|
|
}
|
|
}
|
|
else _freeX86regs();
|
|
#else
|
|
_freeX86regs();
|
|
#endif
|
|
|
|
switch(branch) {
|
|
case 1: // branch, esi has new prog
|
|
|
|
SuperVUTestVU0Condition(0);
|
|
|
|
if( s_JumpX86 == 0xffffffff )
|
|
JMP32M((u32)&g_recWriteback);
|
|
else
|
|
JMP32R(s_JumpX86);
|
|
|
|
break;
|
|
case 4: // jalr
|
|
pChildJumps[0] = (u32*)0xffffffff;
|
|
// fall through
|
|
|
|
case 2: // jump, esi has new vupc
|
|
{
|
|
_freeXMMregs();
|
|
_freeX86regs();
|
|
|
|
SuperVUTestVU0Condition(8);
|
|
|
|
// already onto stack
|
|
CALLFunc((u32)SuperVUGetProgram);
|
|
ADD32ItoR(ESP, 8);
|
|
JMP32R(EAX);
|
|
|
|
break;
|
|
}
|
|
case 0:
|
|
case 3: // uncond branch
|
|
pChildJumps[0] = (u32*)((u32)JMP32(0)|0x80000000);
|
|
break;
|
|
|
|
default:
|
|
#ifdef PCSX2_DEVBUILD
|
|
SysPrintf("Bad branch %x\n", branch);
|
|
#endif
|
|
assert(0);
|
|
break;
|
|
}
|
|
}
|
|
|
|
type |= BLOCKTYPE_ANALYZED;
|
|
|
|
LISTBLOCKS::iterator itchild;
|
|
FORIT(itchild, blocks) {
|
|
(*itchild)->Recompile();
|
|
}
|
|
}
|
|
|
|
#define GET_VUXYZMODE(reg) 0//((vuxyz&(1<<(reg)))?MODE_VUXYZ:0)
|
|
|
|
int VuInstruction::SetCachedRegs(int upper, u32 vuxyz)
|
|
{
|
|
if( vfread0[upper] >= 0 ) {
|
|
SuperVUFreeXMMreg(vfread0[upper], XMMTYPE_VFREG, regs[upper].VFread0);
|
|
_allocVFtoXMMreg(VU, vfread0[upper], regs[upper].VFread0, MODE_READ|GET_VUXYZMODE(regs[upper].VFread0));
|
|
}
|
|
if( vfread1[upper] >= 0 ) {
|
|
SuperVUFreeXMMreg(vfread1[upper], XMMTYPE_VFREG, regs[upper].VFread1);
|
|
_allocVFtoXMMreg(VU, vfread1[upper], regs[upper].VFread1, MODE_READ|GET_VUXYZMODE(regs[upper].VFread1));
|
|
}
|
|
if( vfacc[upper] >= 0 && (regs[upper].VIread&(1<<REG_ACC_FLAG))) {
|
|
SuperVUFreeXMMreg(vfacc[upper], XMMTYPE_ACC, 0);
|
|
_allocACCtoXMMreg(VU, vfacc[upper], MODE_READ);
|
|
}
|
|
if( vfwrite[upper] >= 0 ) {
|
|
assert( regs[upper].VFwrite > 0);
|
|
SuperVUFreeXMMreg(vfwrite[upper], XMMTYPE_VFREG, regs[upper].VFwrite);
|
|
_allocVFtoXMMreg(VU, vfwrite[upper], regs[upper].VFwrite,
|
|
MODE_WRITE|(regs[upper].VFwxyzw != 0xf?MODE_READ:0)|GET_VUXYZMODE(regs[upper].VFwrite));
|
|
}
|
|
if( vfacc[upper] >= 0 && (regs[upper].VIwrite&(1<<REG_ACC_FLAG))) {
|
|
SuperVUFreeXMMreg(vfacc[upper], XMMTYPE_ACC, 0);
|
|
_allocACCtoXMMreg(VU, vfacc[upper], MODE_WRITE|(regs[upper].VFwxyzw != 0xf?MODE_READ:0));
|
|
}
|
|
|
|
int info = PROCESS_VU_SUPER;
|
|
if( vfread0[upper] >= 0 ) info |= PROCESS_EE_SET_S(vfread0[upper]);
|
|
if( vfread1[upper] >= 0 ) info |= PROCESS_EE_SET_T(vfread1[upper]);
|
|
if( vfacc[upper] >= 0 ) info |= PROCESS_VU_SET_ACC(vfacc[upper]);
|
|
if( vfwrite[upper] >= 0 ) {
|
|
if( regs[upper].VFwrite == _Ft_ && vfread1[upper] < 0 ) {
|
|
info |= PROCESS_EE_SET_T(vfwrite[upper]);
|
|
}
|
|
else {
|
|
assert( regs[upper].VFwrite == _Fd_ );
|
|
info |= PROCESS_EE_SET_D(vfwrite[upper]);
|
|
}
|
|
}
|
|
|
|
if( (vffree[upper]&0xf) < XMMREGS ) {
|
|
SuperVUFreeXMMreg(vffree[upper]&0xf, XMMTYPE_TEMP, 0);
|
|
_allocTempXMMreg(XMMT_FPS, vffree[upper]&0xf);
|
|
}
|
|
info |= PROCESS_VU_SET_TEMP(vffree[upper]&0xf);
|
|
|
|
if( vfflush[upper] >= 0 ) {
|
|
SuperVUFreeXMMreg(vfflush[upper], XMMTYPE_TEMP, 0);
|
|
_allocTempXMMreg(XMMT_FPS, vfflush[upper]);
|
|
}
|
|
|
|
if( upper && (regs[upper].VIwrite & (1 << REG_CLIP_FLAG)) ) {
|
|
// CLIP inst, need two extra temp registers, put it EEREC_D and EEREC_ACC
|
|
assert( vfwrite[upper] == -1 );
|
|
SuperVUFreeXMMreg((vffree[upper]>>8)&0xf, XMMTYPE_TEMP, 0);
|
|
_allocTempXMMreg(XMMT_FPS, (vffree[upper]>>8)&0xf);
|
|
info |= PROCESS_EE_SET_D((vffree[upper]>>8)&0xf);
|
|
|
|
SuperVUFreeXMMreg((vffree[upper]>>16)&0xf, XMMTYPE_TEMP, 0);
|
|
_allocTempXMMreg(XMMT_FPS, (vffree[upper]>>16)&0xf);
|
|
info |= PROCESS_EE_SET_ACC((vffree[upper]>>16)&0xf);
|
|
|
|
_freeXMMreg((vffree[upper]>>8)&0xf); // don't need anymore
|
|
_freeXMMreg((vffree[upper]>>16)&0xf); // don't need anymore
|
|
}
|
|
else if( regs[upper].VIwrite & (1<<REG_P) ) {
|
|
SuperVUFreeXMMreg((vffree[upper]>>8)&0xf, XMMTYPE_TEMP, 0);
|
|
_allocTempXMMreg(XMMT_FPS, (vffree[upper]>>8)&0xf);
|
|
info |= PROCESS_EE_SET_D((vffree[upper]>>8)&0xf);
|
|
_freeXMMreg((vffree[upper]>>8)&0xf); // don't need anymore
|
|
}
|
|
|
|
if( vfflush[upper] >= 0 ) _freeXMMreg(vfflush[upper]);
|
|
if( (vffree[upper]&0xf) < XMMREGS ) _freeXMMreg(vffree[upper]&0xf); // don't need anymore
|
|
|
|
if( (regs[0].VIwrite|regs[1].VIwrite) & ((1<<REG_STATUS_FLAG)|(1<<REG_MAC_FLAG)) )
|
|
info |= PROCESS_VU_UPDATEFLAGS;
|
|
|
|
return info;
|
|
}
|
|
|
|
static void checkvucodefn(u32 curpc, u32 vuindex, u32 oldcode)
|
|
{
|
|
SysPrintf("vu%c code changed (old:%x, new: %x)! %x %x\n", '0'+vuindex, oldcode, s_vu?*(u32*)&VU1.Micro[curpc]:*(u32*)&VU0.Micro[curpc], curpc, cpuRegs.cycle);
|
|
}
|
|
|
|
void VuInstruction::Recompile(list<VuInstruction>::const_iterator& itinst, u32 vuxyz)
|
|
{
|
|
__declspec(align(16)) static VECTOR _VF, _VFc;
|
|
u32 *ptr;
|
|
u8* pjmp;
|
|
int vfregstore=0, viregstore=0;
|
|
|
|
assert( s_pCurInst == this);
|
|
s_WriteToReadQ = 0;
|
|
|
|
ptr = (u32*)&VU->Micro[ pc ];
|
|
|
|
if( type & INST_Q_READ )
|
|
SuperVUFlush(0, (ptr[0] == 0x800003bf)||!!(regs[0].VIwrite & (1<<REG_Q)));
|
|
if( type & INST_P_READ )
|
|
SuperVUFlush(1, (ptr[0] == 0x800007bf)||!!(regs[0].VIwrite & (1<<REG_P)));
|
|
|
|
if( type & INST_DUMMY ) {
|
|
VuInstruction* parent;
|
|
if( type & INST_CLIP_WRITE ) {
|
|
parent = s_pCurBlock->GetInstAtPc(nParentPc);
|
|
s_ClipRead = parent->pClipWrite;
|
|
}
|
|
|
|
// before modifying, check if they will ever be read
|
|
if( s_pCurBlock->type & BLOCKTYPE_MACFLAGS ) {
|
|
if( type & INST_STATUS_WRITE ) {
|
|
parent = s_pCurBlock->GetInstAtPc(nParentPc);
|
|
s_StatusRead = parent->pStatusWrite;
|
|
}
|
|
if( type & INST_MAC_WRITE ) {
|
|
parent = s_pCurBlock->GetInstAtPc(nParentPc);
|
|
s_MACRead = parent->pMACWrite;
|
|
}
|
|
}
|
|
|
|
assert( s_ClipRead != 0 );
|
|
assert( s_MACRead != 0 );
|
|
assert( s_StatusRead != 0 );
|
|
return;
|
|
}
|
|
|
|
#ifdef _DEBUG
|
|
// CMP32ItoM((u32)ptr, ptr[0]);
|
|
// j8Ptr[0] = JNE8(0);
|
|
// CMP32ItoM((u32)(ptr+1), ptr[1]);
|
|
// j8Ptr[1] = JNE8(0);
|
|
// j8Ptr[2] = JMP8(0);
|
|
// x86SetJ8( j8Ptr[0] );
|
|
// x86SetJ8( j8Ptr[1] );
|
|
// PUSH32I(ptr[0]);
|
|
// PUSH32I(s_vu);
|
|
// PUSH32I(pc);
|
|
// CALLFunc((u32)checkvucodefn);
|
|
// ADD32ItoR(ESP, 12);
|
|
// x86SetJ8( j8Ptr[ 2 ] );
|
|
|
|
MOV32ItoR(EAX, pc);
|
|
#endif
|
|
|
|
assert( !(type & (INST_CLIP_WRITE|INST_STATUS_WRITE|INST_MAC_WRITE)) );
|
|
pc += 8;
|
|
|
|
if( (regs[0].VIwrite|regs[1].VIwrite) & ((1<<REG_MAC_FLAG)|(1<<REG_STATUS_FLAG)) ) {
|
|
if( s_pCurBlock->type & BLOCKTYPE_MACFLAGS ) {
|
|
pMACWrite = (u32)SuperVUStaticAlloc(4);
|
|
pStatusWrite = (u32)SuperVUStaticAlloc(4);
|
|
}
|
|
else {
|
|
assert( s_StatusRead == (u32)&VU->VI[REG_STATUS_FLAG] );
|
|
assert( s_MACRead == (u32)&VU->VI[REG_MAC_FLAG] );
|
|
pMACWrite = s_MACRead;
|
|
pStatusWrite = s_StatusRead;
|
|
}
|
|
}
|
|
|
|
if( (regs[0].VIwrite|regs[1].VIwrite) & (1<<REG_CLIP_FLAG) )
|
|
pClipWrite = (u32)SuperVUStaticAlloc(4);
|
|
|
|
list<VuInstruction>::const_iterator itinst2;
|
|
|
|
#ifdef SUPERVU_X86CACHING
|
|
// redo the counters so that the proper regs are released
|
|
for(int j = 0; j < X86REGS; ++j) {
|
|
if( x86regs[j].inuse && X86_ISVI(x86regs[j].type) ) {
|
|
int count = 0;
|
|
itinst2 = itinst;
|
|
|
|
while(itinst2 != s_pCurBlock->insts.end() ) {
|
|
if( (itinst2->regs[0].VIread|itinst2->regs[0].VIwrite|itinst2->regs[1].VIread|itinst2->regs[1].VIwrite) && (1<<x86regs[j].reg) )
|
|
break;
|
|
|
|
++count;
|
|
++itinst2;
|
|
}
|
|
|
|
x86regs[j].counter = 1000-count;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
if (s_vu == 0 && (ptr[1] & 0x20000000)) { // M flag
|
|
OR8ItoM((u32)&VU->flags, VUFLAG_MFLAGSET);
|
|
}
|
|
if (ptr[1] & 0x10000000) { // D flag
|
|
TEST32ItoM((u32)&VU0.VI[REG_FBRST].UL, s_vu?0x400:0x004);
|
|
u8* ptr = JZ8(0);
|
|
OR32ItoM((u32)&VU0.VI[REG_VPU_STAT].UL, s_vu?0x200:0x002);
|
|
PUSH32I(s_vu?INTC_VU1:INTC_VU0);
|
|
CALLFunc((u32)hwIntcIrq);
|
|
ADD32ItoR(ESP, 4);
|
|
x86SetJ8(ptr);
|
|
}
|
|
if (ptr[1] & 0x08000000) { // T flag
|
|
TEST32ItoM((u32)&VU0.VI[REG_FBRST].UL, s_vu?0x800:0x008);
|
|
u8* ptr = JZ8(0);
|
|
OR32ItoM((u32)&VU0.VI[REG_VPU_STAT].UL, s_vu?0x400:0x004);
|
|
PUSH32I(s_vu?INTC_VU1:INTC_VU0);
|
|
CALLFunc((u32)hwIntcIrq);
|
|
ADD32ItoR(ESP, 4);
|
|
x86SetJ8(ptr);
|
|
}
|
|
|
|
// check upper flags
|
|
if (ptr[1] & 0x80000000) { // I flag
|
|
|
|
assert( !(regs[0].VIwrite & ((1<<REG_Q)|(1<<REG_P))) );
|
|
|
|
VU->code = ptr[1];
|
|
s_vuInfo = SetCachedRegs(1, vuxyz);
|
|
if( s_JumpX86 > 0 ) x86regs[s_JumpX86].needed = 1;
|
|
|
|
recSVU_UPPER_OPCODE[ VU->code & 0x3f ]();
|
|
|
|
s_PrevIWrite = (u32)ptr;
|
|
_clearNeededXMMregs();
|
|
_clearNeededX86regs();
|
|
}
|
|
else {
|
|
if( regs[0].VIwrite & (1<<REG_Q) ) {
|
|
|
|
// search for all the insts between this inst and writeback
|
|
itinst2 = itinst;
|
|
++itinst2;
|
|
u32 cacheq = (itinst2 == s_pCurBlock->insts.end());
|
|
u32* codeptr2 = ptr+2;
|
|
|
|
while(itinst2 != s_pCurBlock->insts.end() ) {
|
|
if( !(itinst2->type & INST_DUMMY) && ((itinst2->regs[0].VIwrite&(1<<REG_Q)) || codeptr2[0] == 0x800003bf) ) { // waitq, or fdiv inst
|
|
break;
|
|
}
|
|
if( (itinst2->type & INST_Q_WRITE) && itinst2->nParentPc == pc-8 ) {
|
|
break;
|
|
}
|
|
if( itinst2->type & INST_Q_READ ) {
|
|
cacheq = 1;
|
|
break;
|
|
}
|
|
if( itinst2->type & INST_DUMMY ) {
|
|
++itinst2;
|
|
continue;
|
|
}
|
|
codeptr2 += 2;
|
|
++itinst2;
|
|
}
|
|
|
|
if( itinst2 == s_pCurBlock->insts.end() )
|
|
cacheq = 1;
|
|
|
|
int x86temp = -1;
|
|
if( cacheq )
|
|
x86temp = _allocX86reg(-1, X86TYPE_TEMP, 0, 0);
|
|
|
|
// new is written so flush old
|
|
// if type & INST_Q_READ, already flushed
|
|
if( !(type & INST_Q_READ) && s_recWriteQ == 0 ) MOV32MtoR(EAX, (u32)&s_writeQ);
|
|
|
|
if( cacheq )
|
|
MOV32MtoR(x86temp, (u32)&s_TotalVUCycles);
|
|
|
|
if( !(type & INST_Q_READ) ) {
|
|
if( s_recWriteQ == 0 ) {
|
|
OR32RtoR(EAX, EAX);
|
|
pjmp = JS8(0);
|
|
MOV32MtoR(EAX, SuperVUGetVIAddr(REG_Q, 0));
|
|
MOV32RtoM(SuperVUGetVIAddr(REG_Q, 1), EAX);
|
|
x86SetJ8(pjmp);
|
|
}
|
|
else if( s_needFlush & 1 ) {
|
|
MOV32MtoR(EAX, SuperVUGetVIAddr(REG_Q, 0));
|
|
MOV32RtoM(SuperVUGetVIAddr(REG_Q, 1), EAX);
|
|
s_needFlush &= ~1;
|
|
}
|
|
}
|
|
|
|
// write new Q
|
|
if( cacheq ) {
|
|
assert(s_pCurInst->pqcycles>1);
|
|
ADD32ItoR(x86temp, s_pCurInst->info.cycle+s_pCurInst->pqcycles);
|
|
MOV32RtoM((u32)&s_writeQ, x86temp);
|
|
s_needFlush |= 1;
|
|
}
|
|
else {
|
|
// won't be writing back
|
|
s_WriteToReadQ = 1;
|
|
s_needFlush &= ~1;
|
|
MOV32ItoM((u32)&s_writeQ, 0x80000001);
|
|
}
|
|
|
|
s_recWriteQ = s_pCurInst->info.cycle+s_pCurInst->pqcycles;
|
|
|
|
if( x86temp >= 0 )
|
|
_freeX86reg(x86temp);
|
|
}
|
|
|
|
if( regs[0].VIwrite & (1<<REG_P) ) {
|
|
int x86temp = _allocX86reg(-1, X86TYPE_TEMP, 0, 0);
|
|
|
|
// new is written so flush old
|
|
if( !(type & INST_P_READ) && s_recWriteP == 0)
|
|
MOV32MtoR(EAX, (u32)&s_writeP);
|
|
MOV32MtoR(x86temp, (u32)&s_TotalVUCycles);
|
|
|
|
if( !(type & INST_P_READ) ) {
|
|
if( s_recWriteP == 0 ) {
|
|
OR32RtoR(EAX, EAX);
|
|
pjmp = JS8(0);
|
|
MOV32MtoR(EAX, SuperVUGetVIAddr(REG_P, 0));
|
|
MOV32RtoM(SuperVUGetVIAddr(REG_P, 1), EAX);
|
|
x86SetJ8(pjmp);
|
|
}
|
|
else if( s_needFlush & 2 ) {
|
|
MOV32MtoR(EAX, SuperVUGetVIAddr(REG_P, 0));
|
|
MOV32RtoM(SuperVUGetVIAddr(REG_P, 1), EAX);
|
|
s_needFlush &= ~2;
|
|
}
|
|
}
|
|
|
|
// write new P
|
|
assert(s_pCurInst->pqcycles>1);
|
|
ADD32ItoR(x86temp, s_pCurInst->info.cycle+s_pCurInst->pqcycles);
|
|
MOV32RtoM((u32)&s_writeP, x86temp);
|
|
s_needFlush |= 2;
|
|
|
|
s_recWriteP = s_pCurInst->info.cycle+s_pCurInst->pqcycles;
|
|
|
|
_freeX86reg(x86temp);
|
|
}
|
|
|
|
if( ptr[0] == 0x800003bf ) // waitq
|
|
SuperVUFlush(0, 1);
|
|
|
|
if( ptr[0] == 0x800007bf ) // waitp
|
|
SuperVUFlush(1, 1);
|
|
|
|
#ifdef PCSX2_DEVBUILD
|
|
if ( regs[1].VIread & regs[0].VIwrite & ~((1<<REG_Q)|(1<<REG_P)|(1<<REG_VF0_FLAG)|(1<<REG_ACC_FLAG))) {
|
|
SysPrintf("*PCSX2*: Warning, VI write to the same reg %x in both lower/upper cycle %x\n", regs[1].VIread & regs[0].VIwrite, s_pCurBlock->startpc);
|
|
}
|
|
#endif
|
|
|
|
u32 modewrite = 0;
|
|
if( xmmregs[vfwrite[1]].inuse && xmmregs[vfwrite[1]].type == XMMTYPE_VFREG && xmmregs[vfwrite[1]].reg == regs[1].VFwrite )
|
|
modewrite = xmmregs[vfwrite[1]].mode & MODE_WRITE;
|
|
|
|
VU->code = ptr[1];
|
|
s_vuInfo = SetCachedRegs(1, vuxyz);
|
|
|
|
if (vfwrite[1] >= 0) {
|
|
assert( regs[1].VFwrite > 0 );
|
|
|
|
if (vfwrite[0] == vfwrite[1]) {
|
|
//SysPrintf("*PCSX2*: Warning, VF write to the same reg in both lower/upper cycle %x\n", s_pCurBlock->startpc);
|
|
}
|
|
|
|
if (vfread0[0] == vfwrite[1] || vfread1[0] == vfwrite[1] ) {
|
|
assert( regs[0].VFread0 == regs[1].VFwrite || regs[0].VFread1 == regs[1].VFwrite );
|
|
assert( vfflush[0] >= 0 );
|
|
if( modewrite ) {
|
|
SSE_MOVAPS_XMM_to_M128((u32)&VU->VF[regs[1].VFwrite], (x86SSERegType)vfwrite[1]);
|
|
}
|
|
vfregstore = 1;
|
|
}
|
|
}
|
|
|
|
if( s_JumpX86 > 0 ) x86regs[s_JumpX86].needed = 1;
|
|
|
|
recSVU_UPPER_OPCODE[ VU->code & 0x3f ]();
|
|
_clearNeededXMMregs();
|
|
_clearNeededX86regs();
|
|
|
|
VU->code = ptr[0];
|
|
s_vuInfo = SetCachedRegs(0, vuxyz);
|
|
|
|
if( vfregstore ) {
|
|
// load
|
|
SSE_MOVAPS_M128_to_XMM(vfflush[0], (u32)&VU->VF[regs[1].VFwrite]);
|
|
|
|
assert( xmmregs[vfwrite[1]].mode & MODE_WRITE );
|
|
|
|
// replace with vfflush
|
|
if( _Fs_ == regs[1].VFwrite ) {
|
|
s_vuInfo &= ~PROCESS_EE_SET_S(0xf);
|
|
s_vuInfo |= PROCESS_EE_SET_S(vfflush[0]);
|
|
}
|
|
if( _Ft_ == regs[1].VFwrite ) {
|
|
s_vuInfo &= ~PROCESS_EE_SET_T(0xf);
|
|
s_vuInfo |= PROCESS_EE_SET_T(vfflush[0]);
|
|
}
|
|
|
|
xmmregs[vfflush[0]].mode |= MODE_NOFLUSH|MODE_WRITE; // so that lower inst doesn't flush
|
|
}
|
|
|
|
if( s_JumpX86 > 0 ) x86regs[s_JumpX86].needed = 1;
|
|
|
|
recSVU_LOWER_OPCODE[ VU->code >> 25 ]();
|
|
_clearNeededXMMregs();
|
|
_clearNeededX86regs();
|
|
}
|
|
|
|
// clip is always written so ok
|
|
if( (regs[0].VIwrite|regs[1].VIwrite) & (1<<REG_CLIP_FLAG) ) {
|
|
assert( pClipWrite != 0 );
|
|
s_PrevClipWrite = pClipWrite;
|
|
}
|
|
|
|
if( (regs[0].VIwrite|regs[1].VIwrite) & (1<<REG_STATUS_FLAG) ) {
|
|
assert( pStatusWrite != 0 );
|
|
s_PrevStatusWrite = pStatusWrite;
|
|
}
|
|
}
|
|
|
|
///////////////////////////////////
|
|
// Super VU Recompilation Tables //
|
|
///////////////////////////////////
|
|
|
|
void recSVUMI_BranchHandle()
|
|
{
|
|
int bpc = _recbranchAddr(VU->code);
|
|
int curjump = 0;
|
|
|
|
if( s_pCurInst->type & INST_BRANCH_DELAY ) {
|
|
assert( (branch&7)!=2 && (branch&7)!=4 ); // no jump handlig for now
|
|
|
|
if( (branch & 0x7) == 3 ) {
|
|
// previous was a direct jump
|
|
curjump = 1;
|
|
}
|
|
else if( branch & 1 ) curjump = 2;
|
|
}
|
|
|
|
assert( s_JumpX86 > 0 );
|
|
|
|
if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 ) MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), bpc);
|
|
MOV32ItoR(s_JumpX86, 0);
|
|
s_pCurBlock->pChildJumps[curjump] = (u32*)x86Ptr-1;
|
|
|
|
if( !(s_pCurInst->type & INST_BRANCH_DELAY) ) {
|
|
j8Ptr[1] = JMP8(0);
|
|
x86SetJ8( j8Ptr[ 0 ] );
|
|
|
|
if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 ) MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), pc);
|
|
MOV32ItoR(s_JumpX86, 0);
|
|
s_pCurBlock->pChildJumps[curjump+1] = (u32*)x86Ptr-1;
|
|
|
|
x86SetJ8( j8Ptr[ 1 ] );
|
|
}
|
|
else
|
|
x86SetJ8( j8Ptr[ 0 ] );
|
|
|
|
branch |= 1;
|
|
}
|
|
|
|
// supervu specific insts
|
|
void recSVUMI_IBQ_prep()
|
|
{
|
|
int fsreg, ftreg;
|
|
|
|
if( _Fs_ == 0 ) {
|
|
ftreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Ft_, MODE_READ);
|
|
s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE);
|
|
|
|
if( ftreg >= 0 ) {
|
|
CMP16ItoR( ftreg, 0 );
|
|
}
|
|
else CMP16ItoM(SuperVUGetVIAddr(_Ft_, 1), 0);
|
|
}
|
|
else if( _Ft_ == 0 ) {
|
|
fsreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Fs_, MODE_READ);
|
|
s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE);
|
|
|
|
if( fsreg >= 0 ) {
|
|
CMP16ItoR( fsreg, 0 );
|
|
}
|
|
else CMP16ItoM(SuperVUGetVIAddr(_Fs_, 1), 0);
|
|
}
|
|
else {
|
|
_addNeededX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Ft_);
|
|
fsreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Fs_, MODE_READ);
|
|
ftreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Ft_, MODE_READ);
|
|
s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE);
|
|
|
|
if( fsreg >= 0 ) {
|
|
if( ftreg >= 0 ) {
|
|
CMP16RtoR( fsreg, ftreg );
|
|
}
|
|
else CMP16MtoR(fsreg, SuperVUGetVIAddr(_Ft_, 1));
|
|
}
|
|
else if( ftreg >= 0 ) {
|
|
CMP16MtoR(ftreg, SuperVUGetVIAddr(_Fs_, 1));
|
|
}
|
|
else {
|
|
fsreg = _allocX86reg(-1, X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Fs_, MODE_READ);
|
|
CMP16MtoR(fsreg, SuperVUGetVIAddr(_Ft_, 1));
|
|
}
|
|
}
|
|
}
|
|
|
|
void recSVUMI_IBEQ()
|
|
{
|
|
recSVUMI_IBQ_prep();
|
|
j8Ptr[ 0 ] = JNE8( 0 );
|
|
recSVUMI_BranchHandle();
|
|
}
|
|
|
|
void recSVUMI_IBGEZ()
|
|
{
|
|
int fsreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Fs_, MODE_READ);
|
|
s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE);
|
|
|
|
if( fsreg >= 0 ) {
|
|
OR16RtoR(fsreg, fsreg);
|
|
j8Ptr[ 0 ] = JS8( 0 );
|
|
}
|
|
else {
|
|
CMP16ItoM( SuperVUGetVIAddr(_Fs_, 1), 0x0 );
|
|
j8Ptr[ 0 ] = JL8( 0 );
|
|
}
|
|
|
|
recSVUMI_BranchHandle();
|
|
}
|
|
|
|
void recSVUMI_IBGTZ()
|
|
{
|
|
int fsreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Fs_, MODE_READ);
|
|
s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE);
|
|
|
|
if( fsreg >= 0 ) {
|
|
CMP16ItoR(fsreg, 0);
|
|
j8Ptr[ 0 ] = JLE8( 0 );
|
|
}
|
|
else {
|
|
CMP16ItoM( SuperVUGetVIAddr(_Fs_, 1), 0x0 );
|
|
j8Ptr[ 0 ] = JLE8( 0 );
|
|
}
|
|
recSVUMI_BranchHandle();
|
|
}
|
|
|
|
void recSVUMI_IBLEZ()
|
|
{
|
|
int fsreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Fs_, MODE_READ);
|
|
s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE);
|
|
|
|
if( fsreg >= 0 ) {
|
|
CMP16ItoR(fsreg, 0);
|
|
j8Ptr[ 0 ] = JG8( 0 );
|
|
}
|
|
else {
|
|
CMP16ItoM( SuperVUGetVIAddr(_Fs_, 1), 0x0 );
|
|
j8Ptr[ 0 ] = JG8( 0 );
|
|
}
|
|
recSVUMI_BranchHandle();
|
|
}
|
|
|
|
void recSVUMI_IBLTZ()
|
|
{
|
|
int fsreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Fs_, MODE_READ);
|
|
s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE);
|
|
|
|
if( fsreg >= 0 ) {
|
|
OR16RtoR(fsreg, fsreg);
|
|
j8Ptr[ 0 ] = JNS8( 0 );
|
|
}
|
|
else {
|
|
CMP16ItoM( SuperVUGetVIAddr(_Fs_, 1), 0x0 );
|
|
j8Ptr[ 0 ] = JGE8( 0 );
|
|
}
|
|
recSVUMI_BranchHandle();
|
|
}
|
|
|
|
void recSVUMI_IBNE()
|
|
{
|
|
recSVUMI_IBQ_prep();
|
|
j8Ptr[ 0 ] = JE8( 0 );
|
|
recSVUMI_BranchHandle();
|
|
}
|
|
|
|
void recSVUMI_B()
|
|
{
|
|
// supervu will take care of the rest
|
|
int bpc = _recbranchAddr(VU->code);
|
|
if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 ) MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), bpc);
|
|
|
|
// loops to self, so check condition
|
|
if( bpc == s_pCurBlock->startpc && s_vu == 0 ) {
|
|
SuperVUTestVU0Condition(0);
|
|
}
|
|
|
|
if( s_pCurBlock->blocks.size() > 1 ) {
|
|
s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE);
|
|
MOV32ItoR(s_JumpX86, 0);
|
|
s_pCurBlock->pChildJumps[0] = (u32*)x86Ptr-1;
|
|
}
|
|
|
|
branch |= 3;
|
|
}
|
|
|
|
void recSVUMI_BAL()
|
|
{
|
|
int bpc = _recbranchAddr(VU->code);
|
|
if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 ) MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), bpc);
|
|
|
|
// loops to self, so check condition
|
|
if( bpc == s_pCurBlock->startpc && s_vu == 0 ) {
|
|
SuperVUTestVU0Condition(0);
|
|
}
|
|
|
|
if ( _Ft_ ) {
|
|
_deleteX86reg(X86TYPE_VI|(s_vu?X86TYPE_VU1:0), _Ft_, 2);
|
|
MOV16ItoM( SuperVUGetVIAddr(_Ft_, 0), (pc+8)>>3 );
|
|
}
|
|
|
|
if( s_pCurBlock->blocks.size() > 1 ) {
|
|
s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE);
|
|
MOV32ItoR(s_JumpX86, 0);
|
|
s_pCurBlock->pChildJumps[0] = (u32*)x86Ptr-1;
|
|
}
|
|
|
|
branch |= 3;
|
|
}
|
|
|
|
void recSVUMI_JR()
|
|
{
|
|
int fsreg = _allocX86reg(-1, X86TYPE_VI|(s_vu?X86TYPE_VU1:0), _Fs_, MODE_READ);
|
|
LEA32RStoR(EAX, fsreg, 3);
|
|
CWDE();
|
|
|
|
if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 ) MOV32RtoM(SuperVUGetVIAddr(REG_TPC, 0), EAX);
|
|
|
|
if( !(s_pCurBlock->type & BLOCKTYPE_HASEOP) ) {
|
|
PUSH32I(s_vu);
|
|
PUSH32R(EAX);
|
|
}
|
|
branch |= 2;
|
|
}
|
|
|
|
void recSVUMI_JALR()
|
|
{
|
|
_addNeededX86reg(X86TYPE_VI|(s_vu?X86TYPE_VU1:0), _Ft_);
|
|
|
|
int fsreg = _allocX86reg(-1, X86TYPE_VI|(s_vu?X86TYPE_VU1:0), _Fs_, MODE_READ);
|
|
LEA32RStoR(EAX, fsreg, 3);
|
|
CWDE(); // necessary, charlie and chocolate factory gives bad addrs, but graphics are ok
|
|
|
|
if ( _Ft_ ) {
|
|
_deleteX86reg(X86TYPE_VI|(s_vu?X86TYPE_VU1:0), _Ft_, 2);
|
|
MOV16ItoM( SuperVUGetVIAddr(_Ft_, 0), (pc+8)>>3 );
|
|
}
|
|
|
|
if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 ) MOV32RtoM(SuperVUGetVIAddr(REG_TPC, 0), EAX);
|
|
|
|
if( !(s_pCurBlock->type & BLOCKTYPE_HASEOP) ) {
|
|
PUSH32I(s_vu);
|
|
PUSH32R(EAX);
|
|
}
|
|
|
|
branch |= 4;
|
|
}
|
|
|
|
#ifdef SUPERVU_COUNT
|
|
void StopSVUCounter()
|
|
{
|
|
QueryPerformanceCounter(&svufinal);
|
|
svutime += (u32)(svufinal.QuadPart-svubase.QuadPart);
|
|
}
|
|
|
|
void StartSVUCounter()
|
|
{
|
|
QueryPerformanceCounter(&svubase);
|
|
}
|
|
#endif
|
|
|
|
//extern u32 vudump;
|
|
//void countfn()
|
|
//{
|
|
// static int scount = 0;
|
|
// scount++;
|
|
//
|
|
// if( scount > 766 ) {
|
|
// vudump |= 8;
|
|
// }
|
|
//}
|
|
|
|
void recSVUMI_XGKICK( VURegs *VU, int info )
|
|
{
|
|
int fsreg = _allocX86reg(-1, X86TYPE_VI|(s_vu?X86TYPE_VU1:0), _Fs_, MODE_READ);
|
|
_freeX86reg(fsreg);
|
|
SHL32ItoR(fsreg, 4);
|
|
AND32ItoR(fsreg, 0x3fff);
|
|
|
|
_freeX86regs();
|
|
_freeXMMregs();
|
|
|
|
PUSH32R(fsreg);
|
|
PUSH32I((int)VU->Mem);
|
|
|
|
#ifdef SUPERVU_COUNT
|
|
CALLFunc((u32)StopSVUCounter);
|
|
#endif
|
|
|
|
//CALLFunc((u32)countfn);
|
|
|
|
if( CHECK_MULTIGS ) {
|
|
CALLFunc((int)VU1XGKICK_MTGSTransfer);
|
|
ADD32ItoR(ESP, 8);
|
|
}
|
|
else {
|
|
CALLFunc((int)GSgifTransfer1);
|
|
}
|
|
|
|
#ifdef SUPERVU_COUNT
|
|
CALLFunc((u32)StartSVUCounter);
|
|
#endif
|
|
}
|
|
|
|
// upper inst
|
|
void recSVUMI_ABS() { recVUMI_ABS(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_ADD() { recVUMI_ADD(VU, s_vuInfo); }
|
|
void recSVUMI_ADDi() { recVUMI_ADDi(VU, s_vuInfo); }
|
|
void recSVUMI_ADDq() { recVUMI_ADDq(VU, s_vuInfo); }
|
|
void recSVUMI_ADDx() { recVUMI_ADDx(VU, s_vuInfo); }
|
|
void recSVUMI_ADDy() { recVUMI_ADDy(VU, s_vuInfo); }
|
|
void recSVUMI_ADDz() { recVUMI_ADDz(VU, s_vuInfo); }
|
|
void recSVUMI_ADDw() { recVUMI_ADDw(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_ADDA() { recVUMI_ADDA(VU, s_vuInfo); }
|
|
void recSVUMI_ADDAi() { recVUMI_ADDAi(VU, s_vuInfo); }
|
|
void recSVUMI_ADDAq() { recVUMI_ADDAq(VU, s_vuInfo); }
|
|
void recSVUMI_ADDAx() { recVUMI_ADDAx(VU, s_vuInfo); }
|
|
void recSVUMI_ADDAy() { recVUMI_ADDAy(VU, s_vuInfo); }
|
|
void recSVUMI_ADDAz() { recVUMI_ADDAz(VU, s_vuInfo); }
|
|
void recSVUMI_ADDAw() { recVUMI_ADDAw(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_SUB() { recVUMI_SUB(VU, s_vuInfo); }
|
|
void recSVUMI_SUBi() { recVUMI_SUBi(VU, s_vuInfo); }
|
|
void recSVUMI_SUBq() { recVUMI_SUBq(VU, s_vuInfo); }
|
|
void recSVUMI_SUBx() { recVUMI_SUBx(VU, s_vuInfo); }
|
|
void recSVUMI_SUBy() { recVUMI_SUBy(VU, s_vuInfo); }
|
|
void recSVUMI_SUBz() { recVUMI_SUBz(VU, s_vuInfo); }
|
|
void recSVUMI_SUBw() { recVUMI_SUBw(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_SUBA() { recVUMI_SUBA(VU, s_vuInfo); }
|
|
void recSVUMI_SUBAi() { recVUMI_SUBAi(VU, s_vuInfo); }
|
|
void recSVUMI_SUBAq() { recVUMI_SUBAq(VU, s_vuInfo); }
|
|
void recSVUMI_SUBAx() { recVUMI_SUBAx(VU, s_vuInfo); }
|
|
void recSVUMI_SUBAy() { recVUMI_SUBAy(VU, s_vuInfo); }
|
|
void recSVUMI_SUBAz() { recVUMI_SUBAz(VU, s_vuInfo); }
|
|
void recSVUMI_SUBAw() { recVUMI_SUBAw(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_MUL() { recVUMI_MUL(VU, s_vuInfo); }
|
|
void recSVUMI_MULi() { recVUMI_MULi(VU, s_vuInfo); }
|
|
void recSVUMI_MULq() { recVUMI_MULq(VU, s_vuInfo); }
|
|
void recSVUMI_MULx() { recVUMI_MULx(VU, s_vuInfo); }
|
|
void recSVUMI_MULy() { recVUMI_MULy(VU, s_vuInfo); }
|
|
void recSVUMI_MULz() { recVUMI_MULz(VU, s_vuInfo); }
|
|
void recSVUMI_MULw() { recVUMI_MULw(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_MULA() { recVUMI_MULA(VU, s_vuInfo); }
|
|
void recSVUMI_MULAi() { recVUMI_MULAi(VU, s_vuInfo); }
|
|
void recSVUMI_MULAq() { recVUMI_MULAq(VU, s_vuInfo); }
|
|
void recSVUMI_MULAx() { recVUMI_MULAx(VU, s_vuInfo); }
|
|
void recSVUMI_MULAy() { recVUMI_MULAy(VU, s_vuInfo); }
|
|
void recSVUMI_MULAz() { recVUMI_MULAz(VU, s_vuInfo); }
|
|
void recSVUMI_MULAw() { recVUMI_MULAw(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_MADD() { recVUMI_MADD(VU, s_vuInfo); }
|
|
void recSVUMI_MADDi() { recVUMI_MADDi(VU, s_vuInfo); }
|
|
void recSVUMI_MADDq() { recVUMI_MADDq(VU, s_vuInfo); }
|
|
void recSVUMI_MADDx() { recVUMI_MADDx(VU, s_vuInfo); }
|
|
void recSVUMI_MADDy() { recVUMI_MADDy(VU, s_vuInfo); }
|
|
void recSVUMI_MADDz() { recVUMI_MADDz(VU, s_vuInfo); }
|
|
void recSVUMI_MADDw() { recVUMI_MADDw(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_MADDA() { recVUMI_MADDA(VU, s_vuInfo); }
|
|
void recSVUMI_MADDAi() { recVUMI_MADDAi(VU, s_vuInfo); }
|
|
void recSVUMI_MADDAq() { recVUMI_MADDAq(VU, s_vuInfo); }
|
|
void recSVUMI_MADDAx() { recVUMI_MADDAx(VU, s_vuInfo); }
|
|
void recSVUMI_MADDAy() { recVUMI_MADDAy(VU, s_vuInfo); }
|
|
void recSVUMI_MADDAz() { recVUMI_MADDAz(VU, s_vuInfo); }
|
|
void recSVUMI_MADDAw() { recVUMI_MADDAw(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_MSUB() { recVUMI_MSUB(VU, s_vuInfo); }
|
|
void recSVUMI_MSUBi() { recVUMI_MSUBi(VU, s_vuInfo); }
|
|
void recSVUMI_MSUBq() { recVUMI_MSUBq(VU, s_vuInfo); }
|
|
void recSVUMI_MSUBx() { recVUMI_MSUBx(VU, s_vuInfo); }
|
|
void recSVUMI_MSUBy() { recVUMI_MSUBy(VU, s_vuInfo); }
|
|
void recSVUMI_MSUBz() { recVUMI_MSUBz(VU, s_vuInfo); }
|
|
void recSVUMI_MSUBw() { recVUMI_MSUBw(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_MSUBA() { recVUMI_MSUBA(VU, s_vuInfo); }
|
|
void recSVUMI_MSUBAi() { recVUMI_MSUBAi(VU, s_vuInfo); }
|
|
void recSVUMI_MSUBAq() { recVUMI_MSUBAq(VU, s_vuInfo); }
|
|
void recSVUMI_MSUBAx() { recVUMI_MSUBAx(VU, s_vuInfo); }
|
|
void recSVUMI_MSUBAy() { recVUMI_MSUBAy(VU, s_vuInfo); }
|
|
void recSVUMI_MSUBAz() { recVUMI_MSUBAz(VU, s_vuInfo); }
|
|
void recSVUMI_MSUBAw() { recVUMI_MSUBAw(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_MAX() { recVUMI_MAX(VU, s_vuInfo); }
|
|
void recSVUMI_MAXi() { recVUMI_MAXi(VU, s_vuInfo); }
|
|
void recSVUMI_MAXx() { recVUMI_MAXx(VU, s_vuInfo); }
|
|
void recSVUMI_MAXy() { recVUMI_MAXy(VU, s_vuInfo); }
|
|
void recSVUMI_MAXz() { recVUMI_MAXz(VU, s_vuInfo); }
|
|
void recSVUMI_MAXw() { recVUMI_MAXw(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_MINI() { recVUMI_MINI(VU, s_vuInfo); }
|
|
void recSVUMI_MINIi() { recVUMI_MINIi(VU, s_vuInfo); }
|
|
void recSVUMI_MINIx() { recVUMI_MINIx(VU, s_vuInfo); }
|
|
void recSVUMI_MINIy() { recVUMI_MINIy(VU, s_vuInfo); }
|
|
void recSVUMI_MINIz() { recVUMI_MINIz(VU, s_vuInfo); }
|
|
void recSVUMI_MINIw() { recVUMI_MINIw(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_FTOI0() { recVUMI_FTOI0(VU, s_vuInfo); }
|
|
void recSVUMI_FTOI4() { recVUMI_FTOI4(VU, s_vuInfo); }
|
|
void recSVUMI_FTOI12() { recVUMI_FTOI12(VU, s_vuInfo); }
|
|
void recSVUMI_FTOI15() { recVUMI_FTOI15(VU, s_vuInfo); }
|
|
void recSVUMI_ITOF0() { recVUMI_ITOF0(VU, s_vuInfo); }
|
|
void recSVUMI_ITOF4() { recVUMI_ITOF4(VU, s_vuInfo); }
|
|
void recSVUMI_ITOF12() { recVUMI_ITOF12(VU, s_vuInfo); }
|
|
void recSVUMI_ITOF15() { recVUMI_ITOF15(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_OPMULA() { recVUMI_OPMULA(VU, s_vuInfo); }
|
|
void recSVUMI_OPMSUB() { recVUMI_OPMSUB(VU, s_vuInfo); }
|
|
void recSVUMI_NOP() { }
|
|
void recSVUMI_CLIP() { recVUMI_CLIP(VU, s_vuInfo); }
|
|
|
|
// lower inst
|
|
void recSVUMI_MTIR() { recVUMI_MTIR(VU, s_vuInfo); }
|
|
void recSVUMI_MR32() { recVUMI_MR32(VU, s_vuInfo); }
|
|
void recSVUMI_MFIR() { recVUMI_MFIR(VU, s_vuInfo); }
|
|
void recSVUMI_MOVE() { recVUMI_MOVE(VU, s_vuInfo); }
|
|
void recSVUMI_WAITQ() { recVUMI_WAITQ(VU, s_vuInfo); }
|
|
void recSVUMI_MFP() { recVUMI_MFP(VU, s_vuInfo); }
|
|
void recSVUMI_WAITP() { recVUMI_WAITP(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_SQRT() { recVUMI_SQRT(VU, s_vuInfo); }
|
|
void recSVUMI_RSQRT() { recVUMI_RSQRT(VU, s_vuInfo); }
|
|
void recSVUMI_DIV() { recVUMI_DIV(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_ESADD() { recVUMI_ESADD(VU, s_vuInfo); }
|
|
void recSVUMI_ERSADD() { recVUMI_ERSADD(VU, s_vuInfo); }
|
|
void recSVUMI_ELENG() { recVUMI_ELENG(VU, s_vuInfo); }
|
|
void recSVUMI_ERLENG() { recVUMI_ERLENG(VU, s_vuInfo); }
|
|
void recSVUMI_EATANxy() { recVUMI_EATANxy(VU, s_vuInfo); }
|
|
void recSVUMI_EATANxz() { recVUMI_EATANxz(VU, s_vuInfo); }
|
|
void recSVUMI_ESUM() { recVUMI_ESUM(VU, s_vuInfo); }
|
|
void recSVUMI_ERCPR() { recVUMI_ERCPR(VU, s_vuInfo); }
|
|
void recSVUMI_ESQRT() { recVUMI_ESQRT(VU, s_vuInfo); }
|
|
void recSVUMI_ERSQRT() { recVUMI_ERSQRT(VU, s_vuInfo); }
|
|
void recSVUMI_ESIN() { recVUMI_ESIN(VU, s_vuInfo); }
|
|
void recSVUMI_EATAN() { recVUMI_EATAN(VU, s_vuInfo); }
|
|
void recSVUMI_EEXP() { recVUMI_EEXP(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_XITOP() { recVUMI_XITOP(VU, s_vuInfo); }
|
|
void recSVUMI_XGKICK() { recSVUMI_XGKICK(VU, s_vuInfo); }
|
|
void recSVUMI_XTOP() { recVUMI_XTOP(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_RINIT() { recVUMI_RINIT(VU, s_vuInfo); }
|
|
void recSVUMI_RGET() { recVUMI_RGET(VU, s_vuInfo); }
|
|
void recSVUMI_RNEXT() { recVUMI_RNEXT(VU, s_vuInfo); }
|
|
void recSVUMI_RXOR() { recVUMI_RXOR(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_FSAND() { recVUMI_FSAND(VU, s_vuInfo); }
|
|
void recSVUMI_FSEQ() { recVUMI_FSEQ(VU, s_vuInfo); }
|
|
void recSVUMI_FSOR() { recVUMI_FSOR(VU, s_vuInfo); }
|
|
void recSVUMI_FSSET() { recVUMI_FSSET(VU, s_vuInfo); }
|
|
void recSVUMI_FMEQ() { recVUMI_FMEQ(VU, s_vuInfo); }
|
|
void recSVUMI_FMOR() { recVUMI_FMOR(VU, s_vuInfo); }
|
|
void recSVUMI_FCEQ() { recVUMI_FCEQ(VU, s_vuInfo); }
|
|
void recSVUMI_FCOR() { recVUMI_FCOR(VU, s_vuInfo); }
|
|
void recSVUMI_FCSET() { recVUMI_FCSET(VU, s_vuInfo); }
|
|
void recSVUMI_FCGET() { recVUMI_FCGET(VU, s_vuInfo); }
|
|
void recSVUMI_FCAND() { recVUMI_FCAND(VU, s_vuInfo); }
|
|
void recSVUMI_FMAND() { recVUMI_FMAND(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_LQ() { recVUMI_LQ(VU, s_vuInfo); }
|
|
void recSVUMI_LQD() { recVUMI_LQD(VU, s_vuInfo); }
|
|
void recSVUMI_LQI() { recVUMI_LQI(VU, s_vuInfo); }
|
|
void recSVUMI_SQ() { recVUMI_SQ(VU, s_vuInfo); }
|
|
void recSVUMI_SQD() { recVUMI_SQD(VU, s_vuInfo); }
|
|
void recSVUMI_SQI() { recVUMI_SQI(VU, s_vuInfo); }
|
|
void recSVUMI_ILW() { recVUMI_ILW(VU, s_vuInfo); }
|
|
void recSVUMI_ISW() { recVUMI_ISW(VU, s_vuInfo); }
|
|
void recSVUMI_ILWR() { recVUMI_ILWR(VU, s_vuInfo); }
|
|
void recSVUMI_ISWR() { recVUMI_ISWR(VU, s_vuInfo); }
|
|
|
|
void recSVUMI_IADD() { recVUMI_IADD(VU, s_vuInfo); }
|
|
void recSVUMI_IADDI() { recVUMI_IADDI(VU, s_vuInfo); }
|
|
void recSVUMI_IADDIU() { recVUMI_IADDIU(VU, s_vuInfo); }
|
|
void recSVUMI_IOR() { recVUMI_IOR(VU, s_vuInfo); }
|
|
void recSVUMI_ISUB() { recVUMI_ISUB(VU, s_vuInfo); }
|
|
void recSVUMI_IAND() { recVUMI_IAND(VU, s_vuInfo); }
|
|
void recSVUMI_ISUBIU() { recVUMI_ISUBIU(VU, s_vuInfo); }
|
|
|
|
void recSVU_UPPER_FD_00( void );
|
|
void recSVU_UPPER_FD_01( void );
|
|
void recSVU_UPPER_FD_10( void );
|
|
void recSVU_UPPER_FD_11( void );
|
|
void recSVULowerOP( void );
|
|
void recSVULowerOP_T3_00( void );
|
|
void recSVULowerOP_T3_01( void );
|
|
void recSVULowerOP_T3_10( void );
|
|
void recSVULowerOP_T3_11( void );
|
|
void recSVUunknown( void );
|
|
|
|
void (*recSVU_LOWER_OPCODE[128])() = {
|
|
recSVUMI_LQ , recSVUMI_SQ , recSVUunknown , recSVUunknown,
|
|
recSVUMI_ILW , recSVUMI_ISW , recSVUunknown , recSVUunknown,
|
|
recSVUMI_IADDIU, recSVUMI_ISUBIU, recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUMI_FCEQ , recSVUMI_FCSET , recSVUMI_FCAND, recSVUMI_FCOR, /* 0x10 */
|
|
recSVUMI_FSEQ , recSVUMI_FSSET , recSVUMI_FSAND, recSVUMI_FSOR,
|
|
recSVUMI_FMEQ , recSVUunknown , recSVUMI_FMAND, recSVUMI_FMOR,
|
|
recSVUMI_FCGET , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUMI_B , recSVUMI_BAL , recSVUunknown , recSVUunknown, /* 0x20 */
|
|
recSVUMI_JR , recSVUMI_JALR , recSVUunknown , recSVUunknown,
|
|
recSVUMI_IBEQ , recSVUMI_IBNE , recSVUunknown , recSVUunknown,
|
|
recSVUMI_IBLTZ , recSVUMI_IBGTZ , recSVUMI_IBLEZ, recSVUMI_IBGEZ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown, /* 0x30 */
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVULowerOP , recSVUunknown , recSVUunknown , recSVUunknown, /* 0x40*/
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown, /* 0x50 */
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown, /* 0x60 */
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown, /* 0x70 */
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
};
|
|
|
|
void (*recSVULowerOP_T3_00_OPCODE[32])() = {
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUMI_MOVE , recSVUMI_LQI , recSVUMI_DIV , recSVUMI_MTIR,
|
|
recSVUMI_RNEXT , recSVUunknown , recSVUunknown , recSVUunknown, /* 0x10 */
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUMI_MFP , recSVUMI_XTOP , recSVUMI_XGKICK,
|
|
recSVUMI_ESADD , recSVUMI_EATANxy, recSVUMI_ESQRT, recSVUMI_ESIN,
|
|
};
|
|
|
|
void (*recSVULowerOP_T3_01_OPCODE[32])() = {
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUMI_MR32 , recSVUMI_SQI , recSVUMI_SQRT , recSVUMI_MFIR,
|
|
recSVUMI_RGET , recSVUunknown , recSVUunknown , recSVUunknown, /* 0x10 */
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUMI_XITOP, recSVUunknown,
|
|
recSVUMI_ERSADD, recSVUMI_EATANxz, recSVUMI_ERSQRT, recSVUMI_EATAN,
|
|
};
|
|
|
|
void (*recSVULowerOP_T3_10_OPCODE[32])() = {
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUMI_LQD , recSVUMI_RSQRT, recSVUMI_ILWR,
|
|
recSVUMI_RINIT , recSVUunknown , recSVUunknown , recSVUunknown, /* 0x10 */
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUMI_ELENG , recSVUMI_ESUM , recSVUMI_ERCPR, recSVUMI_EEXP,
|
|
};
|
|
|
|
void (*recSVULowerOP_T3_11_OPCODE[32])() = {
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUMI_SQD , recSVUMI_WAITQ, recSVUMI_ISWR,
|
|
recSVUMI_RXOR , recSVUunknown , recSVUunknown , recSVUunknown, /* 0x10 */
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUMI_ERLENG, recSVUunknown , recSVUMI_WAITP, recSVUunknown,
|
|
};
|
|
|
|
void (*recSVULowerOP_OPCODE[64])() = {
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown, /* 0x10 */
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown, /* 0x20 */
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUMI_IADD , recSVUMI_ISUB , recSVUMI_IADDI, recSVUunknown, /* 0x30 */
|
|
recSVUMI_IAND , recSVUMI_IOR , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVULowerOP_T3_00, recSVULowerOP_T3_01, recSVULowerOP_T3_10, recSVULowerOP_T3_11,
|
|
};
|
|
|
|
void (*recSVU_UPPER_OPCODE[64])() = {
|
|
recSVUMI_ADDx , recSVUMI_ADDy , recSVUMI_ADDz , recSVUMI_ADDw,
|
|
recSVUMI_SUBx , recSVUMI_SUBy , recSVUMI_SUBz , recSVUMI_SUBw,
|
|
recSVUMI_MADDx , recSVUMI_MADDy , recSVUMI_MADDz , recSVUMI_MADDw,
|
|
recSVUMI_MSUBx , recSVUMI_MSUBy , recSVUMI_MSUBz , recSVUMI_MSUBw,
|
|
recSVUMI_MAXx , recSVUMI_MAXy , recSVUMI_MAXz , recSVUMI_MAXw, /* 0x10 */
|
|
recSVUMI_MINIx , recSVUMI_MINIy , recSVUMI_MINIz , recSVUMI_MINIw,
|
|
recSVUMI_MULx , recSVUMI_MULy , recSVUMI_MULz , recSVUMI_MULw,
|
|
recSVUMI_MULq , recSVUMI_MAXi , recSVUMI_MULi , recSVUMI_MINIi,
|
|
recSVUMI_ADDq , recSVUMI_MADDq , recSVUMI_ADDi , recSVUMI_MADDi, /* 0x20 */
|
|
recSVUMI_SUBq , recSVUMI_MSUBq , recSVUMI_SUBi , recSVUMI_MSUBi,
|
|
recSVUMI_ADD , recSVUMI_MADD , recSVUMI_MUL , recSVUMI_MAX,
|
|
recSVUMI_SUB , recSVUMI_MSUB , recSVUMI_OPMSUB, recSVUMI_MINI,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown, /* 0x30 */
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown,
|
|
recSVU_UPPER_FD_00, recSVU_UPPER_FD_01, recSVU_UPPER_FD_10, recSVU_UPPER_FD_11,
|
|
};
|
|
|
|
void (*recSVU_UPPER_FD_00_TABLE[32])() = {
|
|
recSVUMI_ADDAx, recSVUMI_SUBAx , recSVUMI_MADDAx, recSVUMI_MSUBAx,
|
|
recSVUMI_ITOF0, recSVUMI_FTOI0, recSVUMI_MULAx , recSVUMI_MULAq ,
|
|
recSVUMI_ADDAq, recSVUMI_SUBAq, recSVUMI_ADDA , recSVUMI_SUBA ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
};
|
|
|
|
void (*recSVU_UPPER_FD_01_TABLE[32])() = {
|
|
recSVUMI_ADDAy , recSVUMI_SUBAy , recSVUMI_MADDAy, recSVUMI_MSUBAy,
|
|
recSVUMI_ITOF4 , recSVUMI_FTOI4 , recSVUMI_MULAy , recSVUMI_ABS ,
|
|
recSVUMI_MADDAq, recSVUMI_MSUBAq, recSVUMI_MADDA , recSVUMI_MSUBA ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
};
|
|
|
|
void (*recSVU_UPPER_FD_10_TABLE[32])() = {
|
|
recSVUMI_ADDAz , recSVUMI_SUBAz , recSVUMI_MADDAz, recSVUMI_MSUBAz,
|
|
recSVUMI_ITOF12, recSVUMI_FTOI12, recSVUMI_MULAz , recSVUMI_MULAi ,
|
|
recSVUMI_MADDAi, recSVUMI_SUBAi , recSVUMI_MULA , recSVUMI_OPMULA,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
};
|
|
|
|
void (*recSVU_UPPER_FD_11_TABLE[32])() = {
|
|
recSVUMI_ADDAw , recSVUMI_SUBAw , recSVUMI_MADDAw, recSVUMI_MSUBAw,
|
|
recSVUMI_ITOF15, recSVUMI_FTOI15, recSVUMI_MULAw , recSVUMI_CLIP ,
|
|
recSVUMI_MADDAi, recSVUMI_MSUBAi, recSVUunknown , recSVUMI_NOP ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
recSVUunknown , recSVUunknown , recSVUunknown , recSVUunknown ,
|
|
};
|
|
|
|
void recSVU_UPPER_FD_00( void )
|
|
{
|
|
recSVU_UPPER_FD_00_TABLE[ ( VU->code >> 6 ) & 0x1f ]( );
|
|
}
|
|
|
|
void recSVU_UPPER_FD_01( void )
|
|
{
|
|
recSVU_UPPER_FD_01_TABLE[ ( VU->code >> 6 ) & 0x1f ]( );
|
|
}
|
|
|
|
void recSVU_UPPER_FD_10( void )
|
|
{
|
|
recSVU_UPPER_FD_10_TABLE[ ( VU->code >> 6 ) & 0x1f ]( );
|
|
}
|
|
|
|
void recSVU_UPPER_FD_11( void )
|
|
{
|
|
recSVU_UPPER_FD_11_TABLE[ ( VU->code >> 6 ) & 0x1f ]( );
|
|
}
|
|
|
|
void recSVULowerOP( void )
|
|
{
|
|
recSVULowerOP_OPCODE[ VU->code & 0x3f ]( );
|
|
}
|
|
|
|
void recSVULowerOP_T3_00( void )
|
|
{
|
|
recSVULowerOP_T3_00_OPCODE[ ( VU->code >> 6 ) & 0x1f ]( );
|
|
}
|
|
|
|
void recSVULowerOP_T3_01( void )
|
|
{
|
|
recSVULowerOP_T3_01_OPCODE[ ( VU->code >> 6 ) & 0x1f ]( );
|
|
}
|
|
|
|
void recSVULowerOP_T3_10( void )
|
|
{
|
|
recSVULowerOP_T3_10_OPCODE[ ( VU->code >> 6 ) & 0x1f ]( );
|
|
}
|
|
|
|
void recSVULowerOP_T3_11( void )
|
|
{
|
|
recSVULowerOP_T3_11_OPCODE[ ( VU->code >> 6 ) & 0x1f ]( );
|
|
}
|
|
|
|
void recSVUunknown( void )
|
|
{
|
|
SysPrintf("Unknown SVU micromode opcode called\n");
|
|
}
|