pcsx2/x86/iR5900.c

3119 lines
82 KiB
C

/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2005 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
// recompiler reworked to add dynamic linking zerofrog(@gmail.com) Jan06
// Recompiled completely rewritten to add block level recompilation/reg-caching/
// liveness analysis/constant propagation Apr06 (zerofrog@gmail.com)
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <malloc.h>
#include "Common.h"
#include "Memory.h"
#include "InterTables.h"
#include "ix86/ix86.h"
#include "iR5900.h"
#include "iR5900AritImm.h"
#include "iR5900Arit.h"
#include "iR5900MultDiv.h"
#include "iR5900Shift.h"
#include "iR5900Branch.h"
#include "iR5900Jump.h"
#include "iR5900LoadStore.h"
#include "iR5900Move.h"
#include "iMMI.h"
#include "iFPU.h"
#include "iCP0.h"
#include "iVUmicro.h"
#include "iVU0micro.h"
#include "iVU1micro.h"
#include "VU.h"
#include "VUmicro.h"
#include "iVUzerorec.h"
#ifdef __MSCW32__
#pragma warning(disable:4244)
#pragma warning(disable:4761)
#endif
u32 maxrecmem = 0;
uptr *recLUT;
#define X86
#define RECSTACK_SIZE 0x00010000
#define EE_NUMBLOCKS (1<<14)
static char *recMem = NULL; // the recompiled blocks will be here
static char* recStack = NULL; // stack mem
static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here
static BASEBLOCK *recROM = NULL; // and here
static BASEBLOCK *recROM1 = NULL; // also here
static BASEBLOCKEX *recBlocks = NULL;
static char *recPtr = NULL, *recStackPtr = NULL;
static EEINST* s_pInstCache = NULL;
static u32 s_nInstCacheSize = 0;
u32 g_EEFreezeRegs = 0; // if set, should freeze the regs
static BASEBLOCK* s_pCurBlock = NULL;
static BASEBLOCKEX* s_pCurBlockEx = NULL;
static BASEBLOCK* s_pDispatchBlock = NULL;
static u32 s_nEndBlock = 0; // what pc the current block ends
static u32 s_nHasDelay = 0;
static u32 s_nNextBlock = 0; // next free block in recBlocks
extern void (*recBSC[64])();
extern void (*recBSC_co[64])();
void rpropBSC(EEINST* prev, EEINST* pinst);
// save states for branches
static u16 s_savex86FpuState, s_saveiCWstate;
static GPR_reg64 s_ConstGPRreg;
static u32 s_saveConstGPRreg = 0, s_saveHasConstReg = 0, s_saveFlushedConstReg = 0, s_saveRegHasLive1 = 0, s_saveRegHasSignExt = 0;
static EEINST* s_psaveInstInfo = NULL;
u32 s_nBlockCycles = 0; // cycles of current block recompiling
static u32 s_savenBlockCycles = 0;
void recCOP2RecompileInst();
int recCOP2AnalyzeBlock(u32 startpc, u32 endpc);
void recCOP2EndBlock(void);
#ifdef _DEBUG
u32 dumplog = 0;
#else
#define dumplog 0
#endif
u32 pc; // recompiler pc
int branch; // set for branch
BOOL bExecBIOS = FALSE;
#ifdef PCSX2_DEVBUILD
static LARGE_INTEGER lbase = {0}, lfinal = {0};
static u32 s_startcount = 0;
#endif
#ifdef __x86_64__
char *txt0 = "RAX = %x : RDX = %x : RCX = %x\n";
char *txt0RC = "EAX = %x : EBX = %x : ECX = %x : EDX = %x : ESI = %x : EDI = %x\n";
char *txt1 = "REG[%d] = %x_%x\n";
char *txt2 = "M32 = %x\n";
#else
char *txt0 = "EAX = %x : ECX = %x : EDX = %x\n";
char *txt0RC = "EAX = %x : EBX = %x : ECX = %x : EDX = %x : ESI = %x : EDI = %x\n";
char *txt1 = "REG[%d] = %x_%x\n";
char *txt2 = "M32 = %x\n";
#endif
void _cop2AnalyzeOp(EEINST* pinst, int dostalls); // reccop2.c
static void iBranchTest(u32 newpc, u32 cpuBranch);
static void recRecompile( u32 startpc );
void recCOP22( void );
BASEBLOCKEX* PC_GETBLOCKEX(BASEBLOCK* p)
{
// BASEBLOCKEX* pex = *(BASEBLOCKEX**)(p+1);
// if( pex >= recBlocks && pex < recBlocks+EE_NUMBLOCKS )
// return pex;
// otherwise, use the sorted list
return GetBaseBlockEx(p->startpc, 0);
}
////////////////////////////////////////////////////
void iDumpBlock( int startpc, char * ptr )
{
FILE *f;
char filename[ 256 ];
u32 i, j;
EEINST* pcur;
extern char *disRNameGPR[];
u8 used[34];
u8 fpuused[33];
int numused, count, fpunumused;
SysPrintf( "dump1 %x:%x, %x\n", startpc, pc, cpuRegs.cycle );
#ifdef __WIN32__
CreateDirectory("dumps", NULL);
sprintf( filename, "dumps\\dump%.8X.txt", startpc);
#else
mkdir("dumps", 0755);
sprintf( filename, "dumps/dump%.8X.txt", startpc);
#endif
fflush( stdout );
// f = fopen( "dump1", "wb" );
// fwrite( ptr, 1, (u32)x86Ptr - (u32)ptr, f );
// fclose( f );
//
//#ifdef __x86_64__
// sprintf( command, "objdump -D --target=binary --architecture=i386:x86-64 dump1 > %s", filename );
//#else
// sprintf( command, "objdump -D --target=binary --architecture=i386 dump1 > %s", filename );
//#endif
// system( command );
f = fopen( filename, "w" );
for ( i = startpc; i < s_nEndBlock; i += 4 ) {
fprintf( f, "%s\n", disR5900Fasm( PSMu32( i ), i ) );
}
// write the instruction info
fprintf(f, "\n\nlive0 - %x, live1 - %x, live2 - %x, lastuse - %x\nmmx - %x, xmm - %x, used - %x\n",
EEINST_LIVE0, EEINST_LIVE1, EEINST_LIVE2, EEINST_LASTUSE, EEINST_MMX, EEINST_XMM, EEINST_USED);
memset(used, 0, sizeof(used));
numused = 0;
for(i = 0; i < ARRAYSIZE(s_pInstCache->regs); ++i) {
if( s_pInstCache->regs[i] & EEINST_USED ) {
used[i] = 1;
numused++;
}
}
memset(fpuused, 0, sizeof(fpuused));
fpunumused = 0;
for(i = 0; i < ARRAYSIZE(s_pInstCache->fpuregs); ++i) {
if( s_pInstCache->fpuregs[i] & EEINST_USED ) {
fpuused[i] = 1;
fpunumused++;
}
}
fprintf(f, " ");
for(i = 0; i < ARRAYSIZE(s_pInstCache->regs); ++i) {
if( used[i] ) fprintf(f, "%2d ", i);
}
for(i = 0; i < ARRAYSIZE(s_pInstCache->fpuregs); ++i) {
if( fpuused[i] ) fprintf(f, "%2d ", i);
}
fprintf(f, "\n");
fprintf(f, " ");
for(i = 0; i < ARRAYSIZE(s_pInstCache->regs); ++i) {
if( used[i] ) fprintf(f, "%s ", disRNameGPR[i]);
}
for(i = 0; i < ARRAYSIZE(s_pInstCache->fpuregs); ++i) {
if( fpuused[i] ) fprintf(f, "%s ", i<32?"FR":"FA");
}
fprintf(f, "\n");
pcur = s_pInstCache+1;
for( i = 0; i < (s_nEndBlock-startpc)/4; ++i, ++pcur) {
fprintf(f, "%2d: %2.2x ", i+1, pcur->info);
count = 1;
for(j = 0; j < ARRAYSIZE(s_pInstCache->regs); j++) {
if( used[j] ) {
fprintf(f, "%2.2x%s", pcur->regs[j], ((count%8)&&count<numused)?"_":" ");
++count;
}
}
count = 1;
for(j = 0; j < ARRAYSIZE(s_pInstCache->fpuregs); j++) {
if( fpuused[j] ) {
fprintf(f, "%2.2x%s", pcur->fpuregs[j], ((count%8)&&count<fpunumused)?"_":" ");
++count;
}
}
fprintf(f, "\n");
}
fclose( f );
}
u8 _eeLoadWritesRs(u32 tempcode)
{
switch(tempcode>>26) {
case 26: // ldl
case 27: // ldr
case 32: case 33: case 34: case 35: case 36: case 37: case 38: case 39:
case 55: // LD
case 30: // lq
return ((tempcode>>21)&0x1f)==((tempcode>>16)&0x1f); // rs==rt
}
return 0;
}
u8 _eeIsLoadStoreCoIssue(u32 firstcode, u32 secondcode)
{
switch(firstcode>>26) {
case 34: // lwl
return (secondcode>>26)==38;
case 38: // lwr
return (secondcode>>26)==34;
case 42: // swl
return (secondcode>>26)==46;
case 46: // swr
return (secondcode>>26)==42;
case 26: // ldl
return (secondcode>>26)==27;
case 27: // ldr
return (secondcode>>26)==26;
case 44: // sdl
return (secondcode>>26)==45;
case 45: // sdr
return (secondcode>>26)==44;
case 32: case 33: case 35: case 36: case 37: case 39:
case 55: // LD
case 30: // lq
// stores
case 31: // sq
case 40: case 41: case 43:
case 63: // sd
case 49: // lwc1
case 57: // swc1
case 54: // lqc2
case 62: // sqc2
return (secondcode>>26)==(firstcode>>26);
}
return 0;
}
u8 _eeIsLoadStoreCoX(u32 tempcode)
{
switch( tempcode>>26 ) {
case 30: case 31: case 49: case 57: case 55: case 63:
return 1;
}
return 0;
}
void _eeFlushAllUnused()
{
int i;
for(i = 0; i < 34; ++i) {
if( pc < s_nEndBlock ) {
if( (g_pCurInstInfo[1].regs[i]&EEINST_USED) )
continue;
}
else if( (g_pCurInstInfo[0].regs[i]&EEINST_USED) )
continue;
if( i < 32 && GPR_IS_CONST1(i) ) _flushConstReg(i);
else {
_deleteMMXreg(MMX_GPR+i, 1);
_deleteGPRtoXMMreg(i, 1);
}
}
//TODO when used info is done for FPU and VU0
for(i = 0; i < XMMREGS; ++i) {
if( xmmregs[i].inuse && xmmregs[i].type != XMMTYPE_GPRREG )
_freeXMMreg(i);
}
}
u32* _eeGetConstReg(int reg)
{
assert( GPR_IS_CONST1( reg ) );
if( g_cpuFlushedConstReg & (1<<reg) )
return &cpuRegs.GPR.r[ reg ].UL[0];
// if written in the future, don't flush
if( _recIsRegWritten(g_pCurInstInfo+1, (s_nEndBlock-pc)/4, XMMTYPE_GPRREG, reg) ) {
u32* ptempmem;
ptempmem = (u32*)recAllocStackMem(8, 4);
ptempmem[0] = g_cpuConstRegs[ reg ].UL[0];
ptempmem[1] = g_cpuConstRegs[ reg ].UL[1];
return ptempmem;
}
_flushConstReg(reg);
return &cpuRegs.GPR.r[ reg ].UL[0];
}
int _flushXMMunused()
{
int i;
for (i=0; i<XMMREGS; i++) {
if (!xmmregs[i].inuse || xmmregs[i].needed || !(xmmregs[i].mode&MODE_WRITE) ) continue;
if (xmmregs[i].type == XMMTYPE_GPRREG ) {
//if( !(g_pCurInstInfo->regs[xmmregs[i].reg]&EEINST_USED) ) {
if( !_recIsRegWritten(g_pCurInstInfo+1, (s_nEndBlock-pc)/4, XMMTYPE_GPRREG, xmmregs[i].reg) ) {
_freeXMMreg(i);
xmmregs[i].inuse = 1;
return 1;
}
}
}
return 0;
}
int _flushMMXunused()
{
int i;
for (i=0; i<MMXREGS; i++) {
if (!mmxregs[i].inuse || mmxregs[i].needed || !(mmxregs[i].mode&MODE_WRITE) ) continue;
if( MMX_ISGPR(mmxregs[i].reg) ) {
//if( !(g_pCurInstInfo->regs[mmxregs[i].reg-MMX_GPR]&EEINST_USED) ) {
if( !_recIsRegWritten(g_pCurInstInfo+1, (s_nEndBlock-pc)/4, XMMTYPE_GPRREG, mmxregs[i].reg-MMX_GPR) ) {
_freeMMXreg(i);
mmxregs[i].inuse = 1;
return 1;
}
}
}
return 0;
}
int _flushUnusedConstReg()
{
int i;
for(i = 1; i < 32; ++i) {
if( (g_cpuHasConstReg & (1<<i)) && !(g_cpuFlushedConstReg&(1<<i)) &&
!_recIsRegWritten(g_pCurInstInfo+1, (s_nEndBlock-pc)/4, XMMTYPE_GPRREG, i) ) {
// check if will be written in the future
MOV32ItoM((u32)&cpuRegs.GPR.r[i].UL[0], g_cpuConstRegs[i].UL[0]);
MOV32ItoM((u32)&cpuRegs.GPR.r[i].UL[1], g_cpuConstRegs[i].UL[1]);
g_cpuFlushedConstReg |= 1<<i;
return 1;
}
}
return 0;
}
void _flushCachedRegs()
{
_flushConstRegs();
_flushMMXregs();
_flushXMMregs();
}
void _flushConstReg(int reg)
{
if( GPR_IS_CONST1( reg ) && !(g_cpuFlushedConstReg&(1<<reg)) ) {
MOV32ItoM((int)&cpuRegs.GPR.r[reg].UL[0], g_cpuConstRegs[reg].UL[0]);
MOV32ItoM((int)&cpuRegs.GPR.r[reg].UL[1], g_cpuConstRegs[reg].UL[1]);
g_cpuFlushedConstReg |= (1<<reg);
}
}
void _flushConstRegs()
{
int i;
// flush constants
// ignore r0
for(i = 1; i < 32; ++i) {
if( g_cpuHasConstReg & (1<<i) ) {
if( !(g_cpuFlushedConstReg&(1<<i)) ) {
MOV32ItoM((u32)&cpuRegs.GPR.r[i].UL[0], g_cpuConstRegs[i].UL[0]);
MOV32ItoM((u32)&cpuRegs.GPR.r[i].UL[1], g_cpuConstRegs[i].UL[1]);
g_cpuFlushedConstReg |= 1<<i;
}
#if defined(_DEBUG)&&0
else {
// make sure the const regs are the same
u8* ptemp[3];
CMP32ItoM((u32)&cpuRegs.GPR.r[i].UL[0], g_cpuConstRegs[i].UL[0]);
ptemp[0] = JNE8(0);
if( EEINST_ISLIVE1(i) ) {
CMP32ItoM((u32)&cpuRegs.GPR.r[i].UL[1], g_cpuConstRegs[i].UL[1]);
ptemp[1] = JNE8(0);
}
ptemp[2] = JMP8(0);
x86SetJ8( ptemp[0] );
if( EEINST_ISLIVE1(i) ) x86SetJ8( ptemp[1] );
CALLFunc((u32)checkconstreg);
x86SetJ8( ptemp[2] );
}
#else
if( g_cpuHasConstReg == g_cpuFlushedConstReg )
break;
#endif
}
}
}
void* recAllocStackMem(int size, int align)
{
// write to a temp loc, trick
if( (u32)recStackPtr % align ) recStackPtr += align - ((u32)recStackPtr%align);
recStackPtr += size;
return recStackPtr-size;
}
////////////////////
// Code Templates //
////////////////////
void CHECK_SAVE_REG(int reg)
{
if( s_saveConstGPRreg == 0xffffffff ) {
if( GPR_IS_CONST1(reg) ) {
s_saveConstGPRreg = reg;
s_ConstGPRreg = g_cpuConstRegs[reg];
}
}
else {
assert( s_saveConstGPRreg == 0 || s_saveConstGPRreg == reg );
}
}
void _eeProcessHasLive(int reg, int signext)
{
g_cpuPrevRegHasLive1 = g_cpuRegHasLive1;
g_cpuRegHasLive1 |= 1<<reg;
g_cpuPrevRegHasSignExt = g_cpuRegHasSignExt;
if( signext ) {
EEINST_SETSIGNEXT(reg);
}
else {
EEINST_RESETSIGNEXT(reg);
}
}
void _eeOnWriteReg(int reg, int signext)
{
CHECK_SAVE_REG(reg);
GPR_DEL_CONST(reg);
_eeProcessHasLive(reg, signext);
}
void _deleteEEreg(int reg, int flush)
{
if( !reg ) return;
if( flush && GPR_IS_CONST1(reg) ) {
_flushConstReg(reg);
return;
}
GPR_DEL_CONST(reg);
_deleteGPRtoXMMreg(reg, flush ? 0 : 2);
_deleteMMXreg(MMX_GPR+reg, flush ? 0 : 2);
}
// if not mmx, then xmm
int eeProcessHILO(int reg, int mode, int mmx)
{
int info = 0;
if( (mmx ? _hasFreeMMXreg() : _hasFreeXMMreg()) || !(g_pCurInstInfo->regs[reg]&EEINST_LASTUSE) ) {
if( mmx ) return _allocMMXreg(-1, MMX_GPR+reg, mode);
return _allocGPRtoXMMreg(-1, reg, mode);
}
return -1;
}
#define PROCESS_EE_SETMODES(mmreg) ((mmxregs[mmreg].mode&MODE_WRITE)?PROCESS_EE_MODEWRITES:0)
#define PROCESS_EE_SETMODET(mmreg) ((mmxregs[mmreg].mode&MODE_WRITE)?PROCESS_EE_MODEWRITET:0)
// ignores XMMINFO_READS, XMMINFO_READT, and XMMINFO_READD_LO from xmminfo
// core of reg caching
void eeRecompileCode0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode, int xmminfo)
{
int mmreg1, mmreg2, mmreg3, mmtemp, moded;
if ( ! _Rd_ && (xmminfo&XMMINFO_WRITED) ) return;
if( xmminfo&XMMINFO_WRITED) {
CHECK_SAVE_REG(_Rd_);
_eeProcessHasLive(_Rd_, 0);
EEINST_RESETSIGNEXT(_Rd_);
}
if( GPR_IS_CONST2(_Rs_, _Rt_) ) {
if( xmminfo & XMMINFO_WRITED ) {
_deleteMMXreg(MMX_GPR+_Rd_, 2);
_deleteGPRtoXMMreg(_Rd_, 2);
}
if( xmminfo&XMMINFO_WRITED ) GPR_SET_CONST(_Rd_);
constcode();
return;
}
moded = MODE_WRITE|((xmminfo&XMMINFO_READD)?MODE_READ:0);
// test if should write mmx
if( g_pCurInstInfo->info & EEINST_MMX ) {
if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) _addNeededMMXreg(MMX_GPR+MMX_LO);
if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) _addNeededMMXreg(MMX_GPR+MMX_HI);
_addNeededMMXreg(MMX_GPR+_Rs_);
_addNeededMMXreg(MMX_GPR+_Rt_);
if( GPR_IS_CONST1(_Rs_) || GPR_IS_CONST1(_Rt_) ) {
int creg = GPR_IS_CONST1(_Rs_) ? _Rs_ : _Rt_;
int vreg = creg == _Rs_ ? _Rt_ : _Rs_;
// if(g_pCurInstInfo->regs[vreg]&EEINST_MMX) {
// mmreg1 = _allocMMXreg(-1, MMX_GPR+vreg, MODE_READ);
// _addNeededMMXreg(MMX_GPR+vreg);
// }
mmreg1 = _allocCheckGPRtoMMX(g_pCurInstInfo, vreg, MODE_READ);
if( mmreg1 >= 0 ) {
int info = PROCESS_EE_MMX;
if( GPR_IS_CONST1(_Rs_) ) info |= PROCESS_EE_SETMODET(mmreg1);
else info |= PROCESS_EE_SETMODES(mmreg1);
if( xmminfo & XMMINFO_WRITED ) {
_addNeededMMXreg(MMX_GPR+_Rd_);
mmreg3 = _checkMMXreg(MMX_GPR+_Rd_, moded);
if( !(xmminfo&XMMINFO_READD) && mmreg3 < 0 && ((g_pCurInstInfo->regs[vreg] & EEINST_LASTUSE) || !EEINST_ISLIVEMMX(vreg)) ) {
if( EEINST_ISLIVEMMX(vreg) ) {
_freeMMXreg(mmreg1);
if( GPR_IS_CONST1(_Rs_) ) info &= ~PROCESS_EE_MODEWRITET;
else info &= ~PROCESS_EE_MODEWRITES;
}
_deleteGPRtoXMMreg(_Rd_, 2);
mmxregs[mmreg1].inuse = 1;
mmxregs[mmreg1].reg = _Rd_;
mmxregs[mmreg1].mode = moded;
mmreg3 = mmreg1;
}
else if( mmreg3 < 0 ) mmreg3 = _allocMMXreg(-1, MMX_GPR+_Rd_, moded);
info |= PROCESS_EE_SET_D(mmreg3);
}
if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) {
mmtemp = eeProcessHILO(MMX_LO, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 1);
if( mmtemp >= 0 ) info |= PROCESS_EE_SET_LO(mmtemp);
}
if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) {
mmtemp = eeProcessHILO(MMX_HI, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 1);
if( mmtemp >= 0 ) info |= PROCESS_EE_SET_HI(mmtemp);
}
SetMMXstate();
if( creg == _Rs_ ) constscode(info|PROCESS_EE_SET_T(mmreg1));
else consttcode(info|PROCESS_EE_SET_S(mmreg1));
_clearNeededMMXregs();
if( xmminfo & XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_);
return;
}
}
else {
// no const regs
mmreg1 = _allocCheckGPRtoMMX(g_pCurInstInfo, _Rs_, MODE_READ);
mmreg2 = _allocCheckGPRtoMMX(g_pCurInstInfo, _Rt_, MODE_READ);
if( mmreg1 >= 0 || mmreg2 >= 0 ) {
int info = PROCESS_EE_MMX;
// do it all in mmx
if( mmreg1 < 0 ) mmreg1 = _allocMMXreg(-1, MMX_GPR+_Rs_, MODE_READ);
if( mmreg2 < 0 ) mmreg2 = _allocMMXreg(-1, MMX_GPR+_Rt_, MODE_READ);
info |= PROCESS_EE_SETMODES(mmreg1)|PROCESS_EE_SETMODET(mmreg2);
// check for last used, if so don't alloc a new MMX reg
if( xmminfo & XMMINFO_WRITED ) {
_addNeededMMXreg(MMX_GPR+_Rd_);
mmreg3 = _checkMMXreg(MMX_GPR+_Rd_, moded);
if( mmreg3 < 0 ) {
if( !(xmminfo&XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEMMX(_Rt_)) ) {
if( EEINST_ISLIVEMMX(_Rt_) ) {
_freeMMXreg(mmreg2);
info &= ~PROCESS_EE_MODEWRITET;
}
_deleteGPRtoXMMreg(_Rd_, 2);
mmxregs[mmreg2].inuse = 1;
mmxregs[mmreg2].reg = _Rd_;
mmxregs[mmreg2].mode = moded;
mmreg3 = mmreg2;
}
else if( !(xmminfo&XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEMMX(_Rs_)) ) {
if( EEINST_ISLIVEMMX(_Rs_) ) {
_freeMMXreg(mmreg1);
info &= ~PROCESS_EE_MODEWRITES;
}
_deleteGPRtoXMMreg(_Rd_, 2);
mmxregs[mmreg1].inuse = 1;
mmxregs[mmreg1].reg = _Rd_;
mmxregs[mmreg1].mode = moded;
mmreg3 = mmreg1;
}
else mmreg3 = _allocMMXreg(-1, MMX_GPR+_Rd_, moded);
}
info |= PROCESS_EE_SET_D(mmreg3);
}
if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) {
mmtemp = eeProcessHILO(MMX_LO, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 1);
if( mmtemp >= 0 ) info |= PROCESS_EE_SET_LO(mmtemp);
}
if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) {
mmtemp = eeProcessHILO(MMX_HI, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 1);
if( mmtemp >= 0 ) info |= PROCESS_EE_SET_HI(mmtemp);
}
SetMMXstate();
noconstcode(info|PROCESS_EE_SET_S(mmreg1)|PROCESS_EE_SET_T(mmreg2));
_clearNeededMMXregs();
if( xmminfo & XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_);
return;
}
}
_clearNeededMMXregs();
}
// test if should write xmm, mirror to mmx code
if( g_pCurInstInfo->info & EEINST_XMM ) {
if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) _addNeededGPRtoXMMreg(XMMGPR_LO);
if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) _addNeededGPRtoXMMreg(XMMGPR_HI);
_addNeededGPRtoXMMreg(_Rs_);
_addNeededGPRtoXMMreg(_Rt_);
if( GPR_IS_CONST1(_Rs_) || GPR_IS_CONST1(_Rt_) ) {
int creg = GPR_IS_CONST1(_Rs_) ? _Rs_ : _Rt_;
int vreg = creg == _Rs_ ? _Rt_ : _Rs_;
// if(g_pCurInstInfo->regs[vreg]&EEINST_XMM) {
// mmreg1 = _allocGPRtoXMMreg(-1, vreg, MODE_READ);
// _addNeededGPRtoXMMreg(vreg);
// }
mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, vreg, MODE_READ);
if( mmreg1 >= 0 ) {
int info = PROCESS_EE_XMM;
if( GPR_IS_CONST1(_Rs_) ) info |= PROCESS_EE_SETMODET(mmreg1);
else info |= PROCESS_EE_SETMODES(mmreg1);
if( xmminfo & XMMINFO_WRITED ) {
_addNeededGPRtoXMMreg(_Rd_);
mmreg3 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE);
if( !(xmminfo&XMMINFO_READD) && mmreg3 < 0 && ((g_pCurInstInfo->regs[vreg] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(vreg)) ) {
_freeXMMreg(mmreg1);
if( GPR_IS_CONST1(_Rs_) ) info &= ~PROCESS_EE_MODEWRITET;
else info &= ~PROCESS_EE_MODEWRITES;
_deleteMMXreg(MMX_GPR+_Rd_, 2);
xmmregs[mmreg1].inuse = 1;
xmmregs[mmreg1].reg = _Rd_;
xmmregs[mmreg1].mode = moded;
mmreg3 = mmreg1;
}
else if( mmreg3 < 0 ) mmreg3 = _allocGPRtoXMMreg(-1, _Rd_, moded);
info |= PROCESS_EE_SET_D(mmreg3);
}
if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) {
mmtemp = eeProcessHILO(XMMGPR_LO, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 0);
if( mmtemp >= 0 ) info |= PROCESS_EE_SET_LO(mmtemp);
}
if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) {
mmtemp = eeProcessHILO(XMMGPR_HI, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 0);
if( mmtemp >= 0 ) info |= PROCESS_EE_SET_HI(mmtemp);
}
if( creg == _Rs_ ) constscode(info|PROCESS_EE_SET_T(mmreg1));
else consttcode(info|PROCESS_EE_SET_S(mmreg1));
_clearNeededXMMregs();
if( xmminfo & XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_);
return;
}
}
else {
// no const regs
mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rs_, MODE_READ);
mmreg2 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rt_, MODE_READ);
if( mmreg1 >= 0 || mmreg2 >= 0 ) {
int info = PROCESS_EE_XMM;
// do it all in xmm
if( mmreg1 < 0 ) mmreg1 = _allocGPRtoXMMreg(-1, _Rs_, MODE_READ);
if( mmreg2 < 0 ) mmreg2 = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
info |= PROCESS_EE_SETMODES(mmreg1)|PROCESS_EE_SETMODET(mmreg2);
if( xmminfo & XMMINFO_WRITED ) {
// check for last used, if so don't alloc a new XMM reg
_addNeededGPRtoXMMreg(_Rd_);
mmreg3 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, moded);
if( mmreg3 < 0 ) {
if( !(xmminfo&XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_)) ) {
_freeXMMreg(mmreg2);
info &= ~PROCESS_EE_MODEWRITET;
_deleteMMXreg(MMX_GPR+_Rd_, 2);
xmmregs[mmreg2].inuse = 1;
xmmregs[mmreg2].reg = _Rd_;
xmmregs[mmreg2].mode = moded;
mmreg3 = mmreg2;
}
else if( !(xmminfo&XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_)) ) {
_freeXMMreg(mmreg1);
info &= ~PROCESS_EE_MODEWRITES;
_deleteMMXreg(MMX_GPR+_Rd_, 2);
xmmregs[mmreg1].inuse = 1;
xmmregs[mmreg1].reg = _Rd_;
xmmregs[mmreg1].mode = moded;
mmreg3 = mmreg1;
}
else mmreg3 = _allocGPRtoXMMreg(-1, _Rd_, moded);
}
info |= PROCESS_EE_SET_D(mmreg3);
}
if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) {
mmtemp = eeProcessHILO(XMMGPR_LO, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 0);
if( mmtemp >= 0 ) info |= PROCESS_EE_SET_LO(mmtemp);
}
if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) {
mmtemp = eeProcessHILO(XMMGPR_HI, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 0);
if( mmtemp >= 0 ) info |= PROCESS_EE_SET_HI(mmtemp);
}
noconstcode(info|PROCESS_EE_SET_S(mmreg1)|PROCESS_EE_SET_T(mmreg2));
_clearNeededXMMregs();
if( xmminfo & XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_);
return;
}
}
_clearNeededXMMregs();
}
// regular x86
_deleteGPRtoXMMreg(_Rs_, 1);
_deleteGPRtoXMMreg(_Rt_, 1);
if( xmminfo&XMMINFO_WRITED )
_deleteGPRtoXMMreg(_Rd_, (xmminfo&XMMINFO_READD)?0:2);
_deleteMMXreg(MMX_GPR+_Rs_, 1);
_deleteMMXreg(MMX_GPR+_Rt_, 1);
if( xmminfo&XMMINFO_WRITED )
_deleteMMXreg(MMX_GPR+_Rd_, (xmminfo&XMMINFO_READD)?0:2);
// don't delete, fn will take care of them
// if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) {
// _deleteGPRtoXMMreg(XMMGPR_LO, (xmminfo&XMMINFO_READLO)?1:0);
// _deleteMMXreg(MMX_GPR+MMX_LO, (xmminfo&XMMINFO_READLO)?1:0);
// }
// if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) {
// _deleteGPRtoXMMreg(XMMGPR_HI, (xmminfo&XMMINFO_READHI)?1:0);
// _deleteMMXreg(MMX_GPR+MMX_HI, (xmminfo&XMMINFO_READHI)?1:0);
// }
if( GPR_IS_CONST1(_Rs_) ) {
constscode(0);
if( xmminfo&XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_);
return;
}
if( GPR_IS_CONST1(_Rt_) ) {
consttcode(0);
if( xmminfo&XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_);
return;
}
noconstcode(0);
if( xmminfo&XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_);
}
// rt = rs op imm16
void eeRecompileCode1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
{
int mmreg1, mmreg2;
if ( ! _Rt_ ) return;
CHECK_SAVE_REG(_Rt_);
_eeProcessHasLive(_Rt_, 0);
EEINST_RESETSIGNEXT(_Rt_);
if( GPR_IS_CONST1(_Rs_) ) {
_deleteMMXreg(MMX_GPR+_Rt_, 2);
_deleteGPRtoXMMreg(_Rt_, 2);
GPR_SET_CONST(_Rt_);
constcode();
return;
}
// test if should write mmx
if( g_pCurInstInfo->info & EEINST_MMX ) {
// no const regs
mmreg1 = _allocCheckGPRtoMMX(g_pCurInstInfo, _Rs_, MODE_READ);
if( mmreg1 >= 0 ) {
int info = PROCESS_EE_MMX|PROCESS_EE_SETMODES(mmreg1);
// check for last used, if so don't alloc a new MMX reg
_addNeededMMXreg(MMX_GPR+_Rt_);
mmreg2 = _checkMMXreg(MMX_GPR+_Rt_, MODE_WRITE);
if( mmreg2 < 0 ) {
if( (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEMMX(_Rs_) ) {
if( EEINST_ISLIVEMMX(_Rs_) ) {
_freeMMXreg(mmreg1);
info &= ~PROCESS_EE_MODEWRITES;
}
_deleteGPRtoXMMreg(_Rt_, 2);
mmxregs[mmreg1].inuse = 1;
mmxregs[mmreg1].reg = _Rt_;
mmxregs[mmreg1].mode = MODE_WRITE|MODE_READ;
mmreg2 = mmreg1;
}
else mmreg2 = _allocMMXreg(-1, MMX_GPR+_Rt_, MODE_WRITE);
}
SetMMXstate();
noconstcode(info|PROCESS_EE_SET_S(mmreg1)|PROCESS_EE_SET_T(mmreg2));
_clearNeededMMXregs();
GPR_DEL_CONST(_Rt_);
return;
}
_clearNeededMMXregs();
}
// test if should write xmm, mirror to mmx code
if( g_pCurInstInfo->info & EEINST_XMM ) {
// no const regs
mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rs_, MODE_READ);
if( mmreg1 >= 0 ) {
int info = PROCESS_EE_XMM|PROCESS_EE_SETMODES(mmreg1);
// check for last used, if so don't alloc a new XMM reg
_addNeededGPRtoXMMreg(_Rt_);
mmreg2 = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_WRITE);
if( mmreg2 < 0 ) {
if( (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_) ) {
_freeXMMreg(mmreg1);
info &= ~PROCESS_EE_MODEWRITES;
_deleteMMXreg(MMX_GPR+_Rt_, 2);
xmmregs[mmreg1].inuse = 1;
xmmregs[mmreg1].reg = _Rt_;
xmmregs[mmreg1].mode = MODE_WRITE|MODE_READ;
mmreg2 = mmreg1;
}
else mmreg2 = _allocGPRtoXMMreg(-1, _Rt_, MODE_WRITE);
}
noconstcode(info|PROCESS_EE_SET_S(mmreg1)|PROCESS_EE_SET_T(mmreg2));
_clearNeededXMMregs();
GPR_DEL_CONST(_Rt_);
return;
}
_clearNeededXMMregs();
}
// regular x86
_deleteGPRtoXMMreg(_Rs_, 1);
_deleteGPRtoXMMreg(_Rt_, 2);
_deleteMMXreg(MMX_GPR+_Rs_, 1);
_deleteMMXreg(MMX_GPR+_Rt_, 2);
noconstcode(0);
GPR_DEL_CONST(_Rt_);
}
// rd = rt op sa
void eeRecompileCode2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
{
int mmreg1, mmreg2;
if ( ! _Rd_ ) return;
CHECK_SAVE_REG(_Rd_);
_eeProcessHasLive(_Rd_, 0);
EEINST_RESETSIGNEXT(_Rd_);
if( GPR_IS_CONST1(_Rt_) ) {
_deleteMMXreg(MMX_GPR+_Rd_, 2);
_deleteGPRtoXMMreg(_Rd_, 2);
GPR_SET_CONST(_Rd_);
constcode();
return;
}
// test if should write mmx
if( g_pCurInstInfo->info & EEINST_MMX ) {
// no const regs
mmreg1 = _allocCheckGPRtoMMX(g_pCurInstInfo, _Rt_, MODE_READ);
if( mmreg1 >= 0 ) {
int info = PROCESS_EE_MMX|PROCESS_EE_SETMODET(mmreg1);
// check for last used, if so don't alloc a new MMX reg
_addNeededMMXreg(MMX_GPR+_Rd_);
mmreg2 = _checkMMXreg(MMX_GPR+_Rd_, MODE_WRITE);
if( mmreg2 < 0 ) {
if( (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEMMX(_Rt_) ) {
if( EEINST_ISLIVEMMX(_Rt_) ) {
_freeMMXreg(mmreg1);
info &= ~PROCESS_EE_MODEWRITET;
}
_deleteGPRtoXMMreg(_Rd_, 2);
mmxregs[mmreg1].inuse = 1;
mmxregs[mmreg1].reg = _Rd_;
mmxregs[mmreg1].mode = MODE_WRITE|MODE_READ;
mmreg2 = mmreg1;
}
else mmreg2 = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE);
}
SetMMXstate();
noconstcode(info|PROCESS_EE_SET_T(mmreg1)|PROCESS_EE_SET_D(mmreg2));
_clearNeededMMXregs();
GPR_DEL_CONST(_Rd_);
return;
}
_clearNeededMMXregs();
}
// test if should write xmm, mirror to mmx code
if( g_pCurInstInfo->info & EEINST_XMM ) {
// no const regs
mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rt_, MODE_READ);
if( mmreg1 >= 0 ) {
int info = PROCESS_EE_XMM|PROCESS_EE_SETMODET(mmreg1);
// check for last used, if so don't alloc a new XMM reg
_addNeededGPRtoXMMreg(_Rd_);
mmreg2 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE);
if( mmreg2 < 0 ) {
if( (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEMMX(_Rt_) ) {
_freeXMMreg(mmreg1);
info &= ~PROCESS_EE_MODEWRITET;
_deleteMMXreg(MMX_GPR+_Rd_, 2);
xmmregs[mmreg1].inuse = 1;
xmmregs[mmreg1].reg = _Rd_;
xmmregs[mmreg1].mode = MODE_WRITE|MODE_READ;
mmreg2 = mmreg1;
}
else mmreg2 = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE);
}
noconstcode(info|PROCESS_EE_SET_T(mmreg1)|PROCESS_EE_SET_D(mmreg2));
_clearNeededXMMregs();
GPR_DEL_CONST(_Rd_);
return;
}
_clearNeededXMMregs();
}
// regular x86
_deleteGPRtoXMMreg(_Rt_, 1);
_deleteGPRtoXMMreg(_Rd_, 2);
_deleteMMXreg(MMX_GPR+_Rt_, 1);
_deleteMMXreg(MMX_GPR+_Rd_, 2);
noconstcode(0);
GPR_DEL_CONST(_Rd_);
}
// rt op rs
void eeRecompileCode3(R5900FNPTR constcode, R5900FNPTR_INFO multicode)
{
assert(0);
// for now, don't support xmm
_deleteEEreg(_Rs_, 1);
_deleteEEreg(_Rt_, 1);
if( GPR_IS_CONST2(_Rs_, _Rt_) ) {
constcode();
return;
}
if( GPR_IS_CONST1(_Rs_) ) {
//multicode(PROCESS_EE_CONSTT);
return;
}
if( GPR_IS_CONST1(_Rt_) ) {
//multicode(PROCESS_EE_CONSTT);
return;
}
multicode(0);
}
// Simple Code Templates //
// rd = rs op rt
void eeRecompileCodeConst0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode)
{
if ( ! _Rd_ ) return;
// for now, don't support xmm
CHECK_SAVE_REG(_Rd_);
_deleteGPRtoXMMreg(_Rs_, 1);
_deleteGPRtoXMMreg(_Rt_, 1);
_deleteGPRtoXMMreg(_Rd_, 0);
_deleteMMXreg(MMX_GPR+_Rs_, 1);
_deleteMMXreg(MMX_GPR+_Rt_, 1);
_deleteMMXreg(MMX_GPR+_Rd_, 0);
if( GPR_IS_CONST2(_Rs_, _Rt_) ) {
GPR_SET_CONST(_Rd_);
constcode();
return;
}
if( GPR_IS_CONST1(_Rs_) ) {
constscode(0);
GPR_DEL_CONST(_Rd_);
return;
}
if( GPR_IS_CONST1(_Rt_) ) {
consttcode(0);
GPR_DEL_CONST(_Rd_);
return;
}
noconstcode(0);
GPR_DEL_CONST(_Rd_);
}
// rt = rs op imm16
void eeRecompileCodeConst1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
{
if ( ! _Rt_ ) return;
// for now, don't support xmm
CHECK_SAVE_REG(_Rt_);
_deleteGPRtoXMMreg(_Rs_, 1);
_deleteGPRtoXMMreg(_Rt_, 0);
if( GPR_IS_CONST1(_Rs_) ) {
GPR_SET_CONST(_Rt_);
constcode();
return;
}
noconstcode(0);
GPR_DEL_CONST(_Rt_);
}
// rd = rt op sa
void eeRecompileCodeConst2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
{
if ( ! _Rd_ ) return;
// for now, don't support xmm
CHECK_SAVE_REG(_Rd_);
_deleteGPRtoXMMreg(_Rt_, 1);
_deleteGPRtoXMMreg(_Rd_, 0);
if( GPR_IS_CONST1(_Rt_) ) {
GPR_SET_CONST(_Rd_);
constcode();
return;
}
noconstcode(0);
GPR_DEL_CONST(_Rd_);
}
// rd = rt MULT rs (SPECIAL)
void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode, int MULT)
{
assert(0);
// for now, don't support xmm
if( MULT ) {
CHECK_SAVE_REG(_Rd_);
_deleteGPRtoXMMreg(_Rd_, 0);
}
_deleteGPRtoXMMreg(_Rs_, 1);
_deleteGPRtoXMMreg(_Rt_, 1);
if( GPR_IS_CONST2(_Rs_, _Rt_) ) {
if( MULT && _Rd_ ) GPR_SET_CONST(_Rd_);
constcode();
return;
}
if( GPR_IS_CONST1(_Rs_) ) {
//multicode(PROCESS_EE_CONSTS);
if( MULT && _Rd_ ) GPR_DEL_CONST(_Rd_);
return;
}
if( GPR_IS_CONST1(_Rt_) ) {
//multicode(PROCESS_EE_CONSTT);
if( MULT && _Rd_ ) GPR_DEL_CONST(_Rd_);
return;
}
multicode(0);
if( MULT && _Rd_ ) GPR_DEL_CONST(_Rd_);
}
// EE XMM allocation code
int eeRecompileCodeXMM(int xmminfo)
{
int info = PROCESS_EE_XMM;
// save state
if( xmminfo & XMMINFO_WRITED ) {
CHECK_SAVE_REG(_Rd_);
_eeProcessHasLive(_Rd_, 0);
EEINST_RESETSIGNEXT(_Rd_);
}
// flush consts
if( xmminfo & XMMINFO_READT ) {
if( GPR_IS_CONST1( _Rt_ ) && !(g_cpuFlushedConstReg&(1<<_Rt_)) ) {
MOV32ItoM((int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], g_cpuConstRegs[_Rt_].UL[0]);
MOV32ItoM((int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], g_cpuConstRegs[_Rt_].UL[1]);
g_cpuFlushedConstReg |= (1<<_Rt_);
}
}
if( xmminfo & XMMINFO_READS) {
if( GPR_IS_CONST1( _Rs_ ) && !(g_cpuFlushedConstReg&(1<<_Rs_)) ) {
MOV32ItoM((int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ], g_cpuConstRegs[_Rs_].UL[0]);
MOV32ItoM((int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ], g_cpuConstRegs[_Rs_].UL[1]);
g_cpuFlushedConstReg |= (1<<_Rs_);
}
}
if( xmminfo & XMMINFO_WRITED ) {
GPR_DEL_CONST(_Rd_);
}
// add needed
if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) {
_addNeededGPRtoXMMreg(XMMGPR_LO);
}
if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) {
_addNeededGPRtoXMMreg(XMMGPR_HI);
}
if( xmminfo & XMMINFO_READS) _addNeededGPRtoXMMreg(_Rs_);
if( xmminfo & XMMINFO_READT) _addNeededGPRtoXMMreg(_Rt_);
if( xmminfo & XMMINFO_WRITED ) _addNeededGPRtoXMMreg(_Rd_);
// allocate
if( xmminfo & XMMINFO_READS) {
int reg = _allocGPRtoXMMreg(-1, _Rs_, MODE_READ);
info |= PROCESS_EE_SET_S(reg)|PROCESS_EE_SETMODES(reg);
}
if( xmminfo & XMMINFO_READT) {
int reg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
info |= PROCESS_EE_SET_T(reg)|PROCESS_EE_SETMODET(reg);
}
if( xmminfo & XMMINFO_WRITED ) {
int readd = MODE_WRITE|((xmminfo&XMMINFO_READD)?((xmminfo&XMMINFO_READD_LO)?(MODE_READ|MODE_READHALF):MODE_READ):0);
int regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, readd);
if( regd < 0 ) {
if( !(xmminfo&XMMINFO_READD) && (xmminfo & XMMINFO_READT) && (_Rt_ == 0 || (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_)) ) {
_freeXMMreg(EEREC_T);
_deleteMMXreg(MMX_GPR+_Rd_, 2);
xmmregs[EEREC_T].inuse = 1;
xmmregs[EEREC_T].reg = _Rd_;
xmmregs[EEREC_T].mode = readd;
regd = EEREC_T;
}
else if( !(xmminfo&XMMINFO_READD) && (xmminfo & XMMINFO_READS) && (_Rs_ == 0 || (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_)) ) {
_freeXMMreg(EEREC_S);
_deleteMMXreg(MMX_GPR+_Rd_, 2);
xmmregs[EEREC_S].inuse = 1;
xmmregs[EEREC_S].reg = _Rd_;
xmmregs[EEREC_S].mode = readd;
regd = EEREC_S;
}
else regd = _allocGPRtoXMMreg(-1, _Rd_, readd);
}
info |= PROCESS_EE_SET_D(regd);
}
if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) {
info |= PROCESS_EE_SET_LO(_allocGPRtoXMMreg(-1, XMMGPR_LO, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0)));
info |= PROCESS_EE_LO;
}
if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) {
info |= PROCESS_EE_SET_HI(_allocGPRtoXMMreg(-1, XMMGPR_HI, ((xmminfo&XMMINFO_READHI)?MODE_READ:0)|((xmminfo&XMMINFO_WRITEHI)?MODE_WRITE:0)));
info |= PROCESS_EE_HI;
}
return info;
}
// EE COP1(FPU) XMM allocation code
#define _Ft_ _Rt_
#define _Fs_ _Rd_
#define _Fd_ _Sa_
// rd = rs op rt
void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR_INFO fpucode, int xmminfo)
{
int mmregs=-1, mmregt=-1, mmregd=-1, mmregacc=-1;
if( EE_FPU_REGCACHING && cpucaps.hasStreamingSIMDExtensions ) {
int info = PROCESS_EE_XMM;
if( xmminfo & XMMINFO_READS ) _addNeededFPtoXMMreg(_Fs_);
if( xmminfo & XMMINFO_READT ) _addNeededFPtoXMMreg(_Ft_);
if( xmminfo & (XMMINFO_WRITED|XMMINFO_READD) ) _addNeededFPtoXMMreg(_Fd_);
if( xmminfo & (XMMINFO_WRITEACC|XMMINFO_READACC) ) _addNeededFPACCtoXMMreg();
if( xmminfo & XMMINFO_READT ) {
if( g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE ) mmregt = _checkXMMreg(XMMTYPE_FPREG, _Ft_, MODE_READ);
else mmregt = _allocFPtoXMMreg(-1, _Ft_, MODE_READ);
}
if( xmminfo & XMMINFO_READS ) {
if( (!(xmminfo&XMMINFO_READT)||mmregt>=0) && (g_pCurInstInfo->fpuregs[_Fs_] & EEINST_LASTUSE) )
mmregs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ);
else mmregs = _allocFPtoXMMreg(-1, _Fs_, MODE_READ);
}
if( mmregs >= 0 ) info |= PROCESS_EE_SETMODES(mmregs);
if( mmregt >= 0 ) info |= PROCESS_EE_SETMODET(mmregt);
if( xmminfo & XMMINFO_READD ) {
assert( xmminfo & XMMINFO_WRITED );
mmregd = _allocFPtoXMMreg(-1, _Fd_, MODE_READ);
}
if( xmminfo & XMMINFO_READACC ) {
if( !(xmminfo&XMMINFO_WRITEACC) && (g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE) )
mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, MODE_READ);
else mmregacc = _allocFPACCtoXMMreg(-1, MODE_READ);
}
if( xmminfo & XMMINFO_WRITEACC ) {
// check for last used, if so don't alloc a new XMM reg
int readacc = MODE_WRITE|((xmminfo&XMMINFO_READACC)?MODE_READ:0);
mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, readacc);
if( mmregacc < 0 ) {
if( (xmminfo&XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)) ) {
if( FPUINST_ISLIVE(_Ft_) ) _freeXMMreg(mmregt);
_deleteMMXreg(MMX_FPU+XMMFPU_ACC, 2);
xmmregs[mmregt].inuse = 1;
xmmregs[mmregt].reg = 0;
xmmregs[mmregt].mode = readacc;
xmmregs[mmregt].type = XMMTYPE_FPACC;
mmregacc = mmregt;
}
else if( (xmminfo&XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)) ) {
if( FPUINST_ISLIVE(_Fs_) ) _freeXMMreg(mmregs);
_deleteMMXreg(MMX_FPU+XMMFPU_ACC, 2);
xmmregs[mmregs].inuse = 1;
xmmregs[mmregs].reg = 0;
xmmregs[mmregs].mode = readacc;
xmmregs[mmregs].type = XMMTYPE_FPACC;
mmregacc = mmregs;
}
else mmregacc = _allocFPACCtoXMMreg(-1, readacc);
}
xmmregs[mmregacc].mode |= MODE_WRITE;
}
else if( xmminfo & XMMINFO_WRITED ) {
// check for last used, if so don't alloc a new XMM reg
int readd = MODE_WRITE|((xmminfo&XMMINFO_READD)?MODE_READ:0);
if( xmminfo&XMMINFO_READD ) mmregd = _allocFPtoXMMreg(-1, _Fd_, readd);
else mmregd = _checkXMMreg(XMMTYPE_FPREG, _Fd_, readd);
if( mmregd < 0 ) {
if( (xmminfo&XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)) ) {
if( FPUINST_ISLIVE(_Ft_) ) _freeXMMreg(mmregt);
_deleteMMXreg(MMX_FPU+_Fd_, 2);
xmmregs[mmregt].inuse = 1;
xmmregs[mmregt].reg = _Fd_;
xmmregs[mmregt].mode = readd;
mmregd = mmregt;
}
else if( (xmminfo&XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)) ) {
if( FPUINST_ISLIVE(_Fs_) ) _freeXMMreg(mmregs);
_deleteMMXreg(MMX_FPU+_Fd_, 2);
xmmregs[mmregs].inuse = 1;
xmmregs[mmregs].reg = _Fd_;
xmmregs[mmregs].mode = readd;
mmregd = mmregs;
}
else if( (xmminfo&XMMINFO_READACC) && mmregacc >= 0 && (FPUINST_LASTUSE(XMMFPU_ACC) || !FPUINST_ISLIVE(XMMFPU_ACC)) ) {
if( FPUINST_ISLIVE(XMMFPU_ACC) ) _freeXMMreg(mmregacc);
_deleteMMXreg(MMX_FPU+_Fd_, 2);
xmmregs[mmregacc].inuse = 1;
xmmregs[mmregacc].reg = _Fd_;
xmmregs[mmregacc].mode = readd;
xmmregs[mmregacc].type = XMMTYPE_FPREG;
mmregd = mmregacc;
}
else mmregd = _allocFPtoXMMreg(-1, _Fd_, readd);
}
}
assert( mmregs >= 0 || mmregt >= 0 || mmregd >= 0 || mmregacc >= 0 );
if( xmminfo & XMMINFO_WRITED ) {
assert( mmregd >= 0 );
info |= PROCESS_EE_SET_D(mmregd);
}
if( xmminfo & (XMMINFO_WRITEACC|XMMINFO_READACC) ) {
if( mmregacc >= 0 ) info |= PROCESS_EE_SET_ACC(mmregacc)|PROCESS_EE_ACC;
else assert( !(xmminfo&XMMINFO_WRITEACC));
}
if( xmminfo & XMMINFO_READS ) {
if( mmregs >= 0 ) info |= PROCESS_EE_SET_S(mmregs)|PROCESS_EE_S;
}
if( xmminfo & XMMINFO_READT ) {
if( mmregt >= 0 ) info |= PROCESS_EE_SET_T(mmregt)|PROCESS_EE_T;
}
// at least one must be in xmm
if( (xmminfo & (XMMINFO_READS|XMMINFO_READT)) == (XMMINFO_READS|XMMINFO_READT) ) {
assert( mmregs >= 0 || mmregt >= 0 );
}
xmmcode(info);
_clearNeededXMMregs();
return;
}
if( xmminfo & XMMINFO_READS ) _deleteFPtoXMMreg(_Fs_, 0);
if( xmminfo & XMMINFO_READT ) _deleteFPtoXMMreg(_Ft_, 0);
if( xmminfo & (XMMINFO_READD|XMMINFO_WRITED) ) _deleteFPtoXMMreg(_Fd_, 0);
if( xmminfo & (XMMINFO_READACC|XMMINFO_WRITEACC) ) _deleteFPtoXMMreg(XMMFPU_ACC, 0);
fpucode(0);
}
#undef _Ft_
#undef _Fs_
#undef _Fd_
////////////////////////////////////////////////////
extern u8 g_MACFlagTransform[256]; // for vus
u32 g_sseMXCSR = 0x9f80; // disable all exception, round to 0, flush to 0
u32 g_sseVUMXCSR = 0xff80;
void SetCPUState()
{
// SSE STATE //
// do NOT set Denormals-Are-Zero flag (charlie and chocfac messes up)
g_sseMXCSR = 0x9f80; // changing the rounding mode to 0x2000 (near) kills grandia III!
// changing the rounding mode to 0x0000 or 0x4000 totally kills gitaroo
// so... grandia III wins
__asm ldmxcsr g_sseMXCSR // set the new sse control
g_sseVUMXCSR = g_sseMXCSR|0x6000;
}
extern BOOL install_my_handler();
int recInit( void )
{
int i;
const u8 macarr[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };
recLUT = (uptr*) _aligned_malloc( 0x010000 * sizeof(uptr), 16 );
memset( recLUT, 0, 0x010000 * sizeof(uptr) );
recMem = (char*)SysMmap(0, 0x00c00000);
// 32 alignment necessary
recRAM = (BASEBLOCK*) _aligned_malloc( sizeof(BASEBLOCK)/4*0x02000000 , 4*sizeof(BASEBLOCK));
recROM = (BASEBLOCK*) _aligned_malloc( sizeof(BASEBLOCK)/4*0x00400000 , 4*sizeof(BASEBLOCK));
recROM1= (BASEBLOCK*) _aligned_malloc( sizeof(BASEBLOCK)/4*0x00040000 , 4*sizeof(BASEBLOCK));
recBlocks = (BASEBLOCKEX*) _aligned_malloc( sizeof(BASEBLOCKEX)*EE_NUMBLOCKS, 16);
recStack = (char*)malloc( RECSTACK_SIZE );
s_nInstCacheSize = 128;
s_pInstCache = (EEINST*)malloc( sizeof(EEINST) * s_nInstCacheSize );
if ( recBlocks == NULL || recRAM == NULL || recROM == NULL || recROM1 == NULL || recMem == NULL || recLUT == NULL ) {
SysMessage( _( "Error allocating memory" ) );
return -1;
}
for ( i = 0x0000; i < 0x0200; i++ )
{
recLUT[ i + 0x0000 ] = (uptr)&recRAM[ i << 14 ];
recLUT[ i + 0x2000 ] = (uptr)&recRAM[ i << 14 ];
recLUT[ i + 0x3000 ] = (uptr)&recRAM[ i << 14 ];
}
for ( i = 0x0000; i < 0x0040; i++ )
{
recLUT[ i + 0x1fc0 ] = (uptr)&recROM[ i << 14 ];
recLUT[ i + 0x9fc0 ] = (uptr)&recROM[ i << 14 ];
recLUT[ i + 0xbfc0 ] = (uptr)&recROM[ i << 14 ];
}
for ( i = 0x0000; i < 0x0004; i++ )
{
recLUT[ i + 0x1e00 ] = (uptr)&recROM1[ i << 14 ];
recLUT[ i + 0x9e00 ] = (uptr)&recROM1[ i << 14 ];
recLUT[ i + 0xbe00 ] = (uptr)&recROM1[ i << 14 ];
}
memcpy( recLUT + 0x8000, recLUT, 0x2000 * sizeof(uptr) );
memcpy( recLUT + 0xa000, recLUT, 0x2000 * sizeof(uptr) );
memset(recMem, 0xcd, 0x00c00000);
memset(recStack, 0, RECSTACK_SIZE);
// SSE3 detection, manually create the code
x86SetPtr(recMem);
SSE3_MOVSLDUP_XMM_to_XMM(XMM0, XMM0);
RET();
cpucaps.hasStreamingSIMD3Extensions = 1;
__try {
__asm call recMem
}
__except(EXCEPTION_EXECUTE_HANDLER) {
cpucaps.hasStreamingSIMD3Extensions = 0;
#ifdef WIN32_VIRTUAL_MEM
// necessary since can potentially kill the custom handler
install_my_handler();
#endif
}
SysPrintf( "x86Init: \n" );
SysPrintf( "\tCPU vender name = %s\n", cpuinfo.x86ID );
SysPrintf( "\tFamilyID = %x\n", cpuinfo.x86StepID );
SysPrintf( "\tx86Family = %s\n", cpuinfo.x86Fam );
SysPrintf( "\tCPU speed = %d MHZ\n", cpuinfo.cpuspeed);
SysPrintf( "\tx86PType = %s\n", cpuinfo.x86Type );
SysPrintf( "\tx86Flags = %8.8x\n", cpuinfo.x86Flags );
SysPrintf( "\tx86EFlags = %8.8x\n", cpuinfo.x86EFlags );
SysPrintf( "Features: \n" );
SysPrintf( "\t%sDetected MMX\n", cpucaps.hasMultimediaExtensions ? "" : "Not " );
SysPrintf( "\t%sDetected SSE\n", cpucaps.hasStreamingSIMDExtensions ? "" : "Not " );
SysPrintf( "\t%sDetected SSE2\n", cpucaps.hasStreamingSIMD2Extensions ? "" : "Not " );
SysPrintf( "\t%sDetected SSE3\n", cpucaps.hasStreamingSIMD3Extensions ? "" : "Not " );
if ( cpuinfo.x86ID[0] == 'A' ) //AMD cpu
{
SysPrintf( " Extented AMD Features: \n" );
SysPrintf( "\t%sDetected MMX2\n", cpucaps.hasMultimediaExtensionsExt ? "" : "Not " );
SysPrintf( "\t%sDetected 3DNOW\n", cpucaps.has3DNOWInstructionExtensions ? "" : "Not " );
SysPrintf( "\t%sDetected 3DNOW2\n", cpucaps.has3DNOWInstructionExtensionsExt ? "" : "Not " );
}
if ( !( cpucaps.hasMultimediaExtensions ) )
{
SysMessage( _( "Processor doesn't supports MMX, can't run recompiler without that" ) );
return -1;
}
x86FpuState = FPU_STATE;
SuperVUInit(-1);
for(i = 0; i < 256; ++i) {
g_MACFlagTransform[i] = macarr[i>>4]|(macarr[i&15]<<4);
}
SetCPUState();
return 0;
}
////////////////////////////////////////////////////
void recReset( void ) {
#ifdef PCSX2_DEVBUILD
SysPrintf("EE Recompiler data reset\n");
#endif
s_nNextBlock = 0;
maxrecmem = 0;
memset( recRAM, 0, sizeof(BASEBLOCK)/4*0x02000000 );
memset( recROM, 0, sizeof(BASEBLOCK)/4*0x00400000 );
memset( recROM1, 0, sizeof(BASEBLOCK)/4*0x00040000 );
memset( recBlocks, 0, sizeof(BASEBLOCKEX)*EE_NUMBLOCKS );
if( s_pInstCache ) memset( s_pInstCache, 0, sizeof(EEINST)*s_nInstCacheSize );
ResetBaseBlockEx(0);
__asm emms
#ifdef _DEBUG
// don't clear since save states won't work
//memset(recMem, 0xcd, 0x00c00000);
#endif
recPtr = recMem;
recStackPtr = recStack;
x86FpuState = FPU_STATE;
iCWstate = 0;
branch = 0;
}
void recShutdown( void )
{
if ( recMem == NULL ) {
return;
}
_aligned_free( recLUT );
SysMunmap((uptr)recMem, 0x00800000); recMem = NULL;
_aligned_free( recRAM ); recRAM = NULL;
_aligned_free( recROM ); recROM = NULL;
_aligned_free( recROM1 ); recROM1 = NULL;
_aligned_free( recBlocks ); recBlocks = NULL;
free( s_pInstCache ); s_pInstCache = NULL; s_nInstCacheSize = 0;
SuperVUDestroy(-1);
x86Shutdown( );
}
void recEnableVU0micro(int enable) {
}
void recEnableVU1micro(int enable) {
}
#pragma warning(disable:4731) // frame pointer register 'ebp' modified by inline assembly code
static u32 s_uSaveESP = 0, s_uSaveEBP;
static void execute( void )
{
#ifdef _DEBUG
u8* fnptr;
u32 oldesi;
#else
R5900FNPTR pfn;
#endif
BASEBLOCK* pblock = PC_GETBLOCK(cpuRegs.pc);
if ( !pblock->pFnptr || pblock->startpc != cpuRegs.pc ) {
recRecompile(cpuRegs.pc);
}
assert( pblock->pFnptr != 0 );
g_EEFreezeRegs = 1;
// skip the POPs
#ifdef _DEBUG
fnptr = (u8*)pblock->pFnptr;
__asm {
// save data
mov oldesi, esi
mov s_uSaveESP, esp
sub s_uSaveESP, 8
mov s_uSaveEBP, ebp
push ebp
call fnptr // jump into function
// restore data
pop ebp
mov esi, oldesi
}
#else
pfn = ((R5900FNPTR)pblock->pFnptr);
// use call instead of pfn()
//__asm mov s_uSaveEBP, ebp
__asm call pfn
#endif
g_EEFreezeRegs = 0;
}
void recStep( void ) {
}
void recExecute( void ) {
//SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
//SetPriorityClass(GetCurrentProcess(), HIGH_PRIORITY_CLASS);//ABOVE_NORMAL_PRIORITY_CLASS);
if( Config.Options & PCSX2_EEREC ) Config.Options |= PCSX2_COP2REC;
for (;;)
execute();
}
void recExecuteBlock( void ) {
execute();
}
////////////////////////////////////////////////////
extern u32 g_nextBranchCycle;
u32 g_lastpc = 0;
static u32 g_temp;
// jumped to when invalid pc address
__declspec(naked,noreturn) void Dispatcher()
{
// EDX contains the current pc to jump to, stack contains the jump addr to modify
__asm push edx
// calc PC_GETBLOCK
s_pDispatchBlock = PC_GETBLOCK(cpuRegs.pc);
__asm {
mov eax, s_pDispatchBlock
// check if startpc == cpuRegs.pc
mov ecx, cpuRegs.pc
//and ecx, 0x5fffffff // remove higher bits
cmp ecx, dword ptr [eax+BLOCKTYPE_STARTPC]
je CheckPtr
// recompile
push cpuRegs.pc // pc
call recRecompile
add esp, 4 // pop old param
mov eax, s_pDispatchBlock
CheckPtr:
mov eax, dword ptr [eax]
}
#ifdef _DEBUG
__asm mov g_temp, eax
assert( g_temp );
#endif
// __asm {
// test eax, 0x40000000 // BLOCKTYPE_NEEDCLEAR
// jz Done
// // move new pc
// and eax, 0x0fffffff
// mov ecx, cpuRegs.pc
// mov dword ptr [eax+1], ecx
// }
__asm {
and eax, 0x0fffffff
mov edx, eax
pop ecx // x86Ptr to mod
sub edx, ecx
sub edx, 4
mov dword ptr [ecx], edx
jmp eax
}
}
__declspec(naked,noreturn) void DispatcherClear()
{
// EDX contains the current pc
__asm mov cpuRegs.pc, edx
__asm push edx
// calc PC_GETBLOCK
s_pDispatchBlock = PC_GETBLOCK(cpuRegs.pc);
if( s_pDispatchBlock->startpc == cpuRegs.pc ) {
assert( s_pDispatchBlock->pFnptr != 0 );
// already modded the code, jump to the new place
__asm {
pop edx
add esp, 4 // ignore stack
mov eax, s_pDispatchBlock
mov eax, dword ptr [eax]
and eax, 0x0fffffff
jmp eax
}
}
__asm {
call recRecompile
add esp, 4 // pop old param
mov eax, s_pDispatchBlock
mov eax, dword ptr [eax]
pop ecx // old fnptr
and eax, 0x0fffffff
mov byte ptr [ecx], 0xe9 // jmp32
mov edx, eax
sub edx, ecx
sub edx, 5
mov dword ptr [ecx+1], edx
jmp eax
}
}
// called when jumping to variable pc address
__declspec(naked,noreturn) void DispatcherReg()
{
__asm {
//s_pDispatchBlock = PC_GETBLOCK(cpuRegs.pc);
mov edx, cpuRegs.pc
mov ecx, edx
}
__asm {
shr edx, 14
and edx, 0xfffffffc
add edx, recLUT
mov edx, dword ptr [edx]
mov eax, ecx
and eax, 0xfffc
// edx += 2*eax
shl eax, 1
add edx, eax
// check if startpc == cpuRegs.pc
mov eax, ecx
//and eax, 0x5fffffff // remove higher bits
cmp eax, dword ptr [edx+BLOCKTYPE_STARTPC]
jne recomp
mov eax, dword ptr [edx]
}
#ifdef _DEBUG
__asm mov g_temp, eax
assert( g_temp );
#endif
__asm {
and eax, 0x0fffffff
jmp eax // fnptr
recomp:
sub esp, 8
mov dword ptr [esp+4], edx
mov dword ptr [esp], ecx
call recRecompile
mov edx, dword ptr [esp+4]
add esp, 8
mov eax, dword ptr [edx]
and eax, 0x0fffffff
jmp eax // fnptr
}
}
////////////////////////////////////////////////////
void recClear64(BASEBLOCK* p)
{
int left = 4 - ((u32)p % 16)/sizeof(BASEBLOCK);
recClearMem(p);
if( left > 1 && *(u32*)(p+1) ) recClearMem(p+1);
}
void recClear128(BASEBLOCK* p)
{
int left = 4 - ((u32)p % 32)/sizeof(BASEBLOCK);
recClearMem(p);
if( left > 1 && *(u32*)(p+1) ) recClearMem(p+1);
if( left > 2 && *(u32*)(p+2) ) recClearMem(p+2);
if( left > 3 && *(u32*)(p+3) ) recClearMem(p+3);
}
void recClear( u32 Addr, u32 Size )
{
u32 i;
for(i = 0; i < Size; ++i, Addr+=4) {
REC_CLEARM(Addr);
}
}
#define EE_MIN_BLOCK_BYTES 15
void recClearMem(BASEBLOCK* p)
{
BASEBLOCKEX* pexblock;
BASEBLOCK* pstart;
int lastdelay;
// necessary since recompiler doesn't call femms/emms
if (cpucaps.has3DNOWInstructionExtensions) __asm femms
else __asm emms
assert( p != NULL );
if( p->uType & BLOCKTYPE_DELAYSLOT ) {
recClearMem(p-1);
if( p->pFnptr == 0 )
return;
}
assert( p->pFnptr != 0 );
assert( p->startpc );
x86Ptr = (s8*)p->pFnptr;
// there is a small problem: mem can be ored with 0xa<<28 or 0x8<<28, and don't know which
MOV32ItoR(EDX, p->startpc);
PUSH32I((u32)x86Ptr); // will be replaced by JMP32
JMP32((u32)DispatcherClear - ( (u32)x86Ptr + 5 ));
assert( x86Ptr == (s8*)p->pFnptr + EE_MIN_BLOCK_BYTES );
pstart = PC_GETBLOCK(p->startpc);
pexblock = PC_GETBLOCKEX(pstart);
assert( pexblock->startpc == pstart->startpc );
// if( pexblock->pOldFnptr ) {
// // have to mod oldfnptr too
// x86Ptr = pexblock->pOldFnptr;
//
// MOV32ItoR(EDX, p->startpc);
// JMP32((u32)DispatcherClear - ( (u32)x86Ptr + 5 ));
// }
// else
// pexblock->pOldFnptr = (u8*)p->pFnptr;
// don't delete if last is delay
lastdelay = pexblock->size;
if( pstart[pexblock->size-1].uType & BLOCKTYPE_DELAYSLOT ) {
assert( pstart[pexblock->size-1].pFnptr != pstart->pFnptr );
if( pstart[pexblock->size-1].pFnptr != 0 ) {
pstart[pexblock->size-1].uType = 0;
--lastdelay;
}
}
memset(pstart, 0, lastdelay*sizeof(BASEBLOCK));
RemoveBaseBlockEx(pexblock, 0);
pexblock->size = 0;
pexblock->startpc = 0;
}
// check for end of bios
void CheckForBIOSEnd()
{
MOV32MtoR(EAX, (int)&cpuRegs.pc);
CMP32ItoR(EAX, 0x00200008);
j8Ptr[0] = JE8(0);
CMP32ItoR(EAX, 0x00100008);
j8Ptr[1] = JE8(0);
// return
j8Ptr[2] = JMP8(0);
x86SetJ8( j8Ptr[0] );
x86SetJ8( j8Ptr[1] );
// bios end
RET2();
x86SetJ8( j8Ptr[2] );
}
static int *s_pCode;
void SetBranchReg( u32 reg )
{
branch = 1;
if( reg != 0xffffffff ) {
// if( GPR_IS_CONST1(reg) )
// MOV32ItoM( (u32)&cpuRegs.pc, g_cpuConstRegs[reg].UL[0] );
// else {
// int mmreg;
//
// if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, reg, MODE_READ)) >= 0 ) {
// SSE_MOVSS_XMM_to_M32((u32)&cpuRegs.pc, mmreg);
// }
// else if( (mmreg = _checkMMXreg(MMX_GPR+reg, MODE_READ)) >= 0 ) {
// MOVDMMXtoM((u32)&cpuRegs.pc, mmreg);
// SetMMXstate();
// }
// else {
// MOV32MtoR(EAX, (int)&cpuRegs.GPR.r[ reg ].UL[ 0 ] );
// MOV32RtoM((u32)&cpuRegs.pc, EAX);
// }
// }
_allocX86reg(ESI, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
_eeMoveGPRtoR(ESI, reg);
recompileNextInstruction(1);
if( x86regs[ESI].inuse ) {
assert( x86regs[ESI].type == X86TYPE_PCWRITEBACK );
MOV32RtoM((int)&cpuRegs.pc, ESI);
x86regs[ESI].inuse = 0;
}
else {
MOV32MtoR(EAX, (u32)&g_recWriteback);
MOV32RtoM((int)&cpuRegs.pc, EAX);
}
}
// CMP32ItoM((u32)&cpuRegs.pc, 0);
// j8Ptr[5] = JNE8(0);
// CALLFunc((u32)tempfn);
// x86SetJ8( j8Ptr[5] );
iFlushCall(FLUSH_EVERYTHING);
iBranchTest(0xffffffff, 1);
if( bExecBIOS ) CheckForBIOSEnd();
JMP32((u32)DispatcherReg - ( (u32)x86Ptr + 5 ));
}
void SetBranchImm( u32 imm )
{
u32* ptr;
branch = 1;
assert( imm );
// end the current block
MOV32ItoM( (u32)&cpuRegs.pc, imm );
iFlushCall(FLUSH_EVERYTHING);
iBranchTest(imm, imm <= pc);
if( bExecBIOS ) CheckForBIOSEnd();
MOV32ItoR(EDX, 0);
ptr = (u32*)(x86Ptr-4);
*ptr = (u32)JMP32((u32)Dispatcher - ( (u32)x86Ptr + 5 ));
}
void SaveBranchState()
{
s_savex86FpuState = x86FpuState;
s_saveiCWstate = iCWstate;
s_savenBlockCycles = s_nBlockCycles;
s_saveConstGPRreg = 0xffffffff; // indicate searching
s_saveHasConstReg = g_cpuHasConstReg;
s_saveFlushedConstReg = g_cpuFlushedConstReg;
s_psaveInstInfo = g_pCurInstInfo;
s_saveRegHasLive1 = g_cpuRegHasLive1;
s_saveRegHasSignExt = g_cpuRegHasSignExt;
// save all mmx regs
memcpy(s_saveMMXregs, mmxregs, sizeof(mmxregs));
memcpy(s_saveXMMregs, xmmregs, sizeof(xmmregs));
}
void LoadBranchState()
{
x86FpuState = s_savex86FpuState;
iCWstate = s_saveiCWstate;
s_nBlockCycles = s_savenBlockCycles;
if( s_saveConstGPRreg != 0xffffffff ) {
assert( s_saveConstGPRreg > 0 );
// make sure right GPR was saved
assert( g_cpuHasConstReg == s_saveHasConstReg || (g_cpuHasConstReg ^ s_saveHasConstReg) == (1<<s_saveConstGPRreg) );
// restore the GPR reg
g_cpuConstRegs[s_saveConstGPRreg] = s_ConstGPRreg;
GPR_SET_CONST(s_saveConstGPRreg);
s_saveConstGPRreg = 0;
}
g_cpuHasConstReg = s_saveHasConstReg;
g_cpuFlushedConstReg = s_saveFlushedConstReg;
g_pCurInstInfo = s_psaveInstInfo;
g_cpuRegHasLive1 = g_cpuPrevRegHasLive1 = s_saveRegHasLive1;
g_cpuRegHasSignExt = g_cpuPrevRegHasSignExt = s_saveRegHasSignExt;
// restore all mmx regs
memcpy(mmxregs, s_saveMMXregs, sizeof(mmxregs));
memcpy(xmmregs, s_saveXMMregs, sizeof(xmmregs));
}
void iFlushCall(int flushtype)
{
_freeX86regs();
if( flushtype & FLUSH_FREE_XMM )
_freeXMMregs();
else if( flushtype & FLUSH_FLUSH_XMM)
_flushXMMregs();
if( flushtype & FLUSH_FREE_MMX )
_freeMMXregs();
else if( flushtype & FLUSH_FLUSH_MMX)
_flushMMXregs();
if( flushtype & FLUSH_CACHED_REGS )
_flushConstRegs();
LoadCW();
if (x86FpuState==MMX_STATE) {
if (cpucaps.has3DNOWInstructionExtensions) FEMMS();
else EMMS();
x86FpuState=FPU_STATE;
}
}
#ifdef PCSX2_DEVBUILD
__declspec(naked) void _StartPerfCounter()
{
__asm {
push eax
push ebx
push ecx
}
QueryPerformanceCounter(&lbase);
__asm {
pop ecx
pop ebx
pop eax
ret
}
}
static u32 s_pCurBlock_ltime;
__declspec(naked) void _StopPerfCounter()
{
__asm {
push eax
push ebx
push ecx
}
QueryPerformanceCounter(&lfinal);
__asm {
mov eax, dword ptr [offset lfinal]
mov edx, dword ptr [offset lfinal + 4]
sub eax, dword ptr [offset lbase]
sbb edx, dword ptr [offset lbase + 4]
mov ecx, s_pCurBlock_ltime
add eax, dword ptr [ecx]
adc edx, dword ptr [ecx + 4]
mov dword ptr [ecx], eax
mov dword ptr [ecx + 4], edx
pop ecx
pop ebx
pop eax
ret
}
}
#endif
void StartPerfCounter()
{
#ifdef PCSX2_DEVBUILD
if( s_startcount ) {
CALLFunc((u32)_StartPerfCounter);
}
#endif
}
void StopPerfCounter()
{
#ifdef PCSX2_DEVBUILD
if( s_startcount ) {
MOV32ItoM((u32)&s_pCurBlock_ltime, (u32)&s_pCurBlockEx->ltime);
CALLFunc((u32)_StopPerfCounter);
}
#endif
}
#define USE_FAST_BRANCHES 0
//void testfpu()
//{
// int i;
// for(i = 0; i < 32; ++i ) {
// if( fpuRegs.fpr[i].UL== 0x7f800000 || fpuRegs.fpr[i].UL == 0xffc00000) {
// SysPrintf("bad fpu: %x %x %x\n", i, cpuRegs.cycle, g_lastpc);
// }
//
// if( VU0.VF[i].UL[0] == 0xffc00000 || //(VU0.VF[i].UL[1]&0xffc00000) == 0xffc00000 ||
// VU0.VF[i].UL[0] == 0x7f800000) {
// SysPrintf("bad vu0: %x %x %x\n", i, cpuRegs.cycle, g_lastpc);
// }
// }
//}
//static void cleanup()
//{
// assert( !g_globalMMXSaved );
// assert( !g_globalXMMSaved );
//}
static void iBranchTest(u32 newpc, u32 cpuBranch)
{
#ifdef PCSX2_DEVBUILD
if( s_startcount ) {
StopPerfCounter();
ADD32ItoM( (u32)&s_pCurBlockEx->visited, 1 );
}
#endif
#ifdef _DEBUG
//CALLFunc((u32)testfpu);
#endif
if( !USE_FAST_BRANCHES || cpuBranch ) {
MOV32MtoR(ECX, (int)&cpuRegs.cycle);
ADD32ItoR(ECX, s_nBlockCycles*9/8); // NOTE: mulitply cycles here, 6/5 ratio stops pal ffx from randomly crashing, but crashes jakI
MOV32RtoM((int)&cpuRegs.cycle, ECX); // update cycles
}
else {
ADD32ItoM((int)&cpuRegs.cycle, s_nBlockCycles*9/8);
return;
}
SUB32MtoR(ECX, (int)&g_nextBranchCycle);
// check if should branch
j8Ptr[0] = JS8( 0 );
// has to be in the middle of Save/LoadBranchState
CALLFunc( (int)cpuBranchTest );
if( newpc != 0xffffffff ) {
CMP32ItoM((int)&cpuRegs.pc, newpc);
JNE32((u32)DispatcherReg - ( (u32)x86Ptr + 6 ));
}
x86SetJ8( j8Ptr[0] );
}
////////////////////////////////////////////////////
#ifndef CP2_RECOMPILE
REC_SYS(COP2);
#else
void recCOP2( void )
{
#ifdef CPU_LOG
CPU_LOG( "Recompiling COP2:%s\n", disR5900Fasm( cpuRegs.code, cpuRegs.pc ) );
#endif
// if ( !CHECK_COP2REC ) //disable the use of vus better this way :P
// {
// assert( !CHECK_EEREC );
// MOV32ItoM( (u32)&cpuRegs.code, cpuRegs.code );
// MOV32ItoM( (u32)&cpuRegs.pc, pc );
// iFlushCall(FLUSH_EVERYTHING);
// g_cpuHasConstReg = 1; // reset all since COP2 can change regs
// CALLFunc( (u32)COP2 );
//
// CMP32ItoM((int)&cpuRegs.pc, pc);
// j8Ptr[0] = JE8(0);
// ADD32ItoM((u32)&cpuRegs.cycle, s_nBlockCycles);
// JMP32((u32)DispatcherReg - ( (u32)x86Ptr + 5 ));
// x86SetJ8(j8Ptr[0]);
//// branch = 2;
// }
// else
{
recCOP22( );
}
}
#endif
////////////////////////////////////////////////////
void recSYSCALL( void ) {
MOV32ItoM( (u32)&cpuRegs.code, cpuRegs.code );
MOV32ItoM( (u32)&cpuRegs.pc, pc );
iFlushCall(FLUSH_NODESTROY);
CALLFunc( (u32)SYSCALL );
CMP32ItoM((int)&cpuRegs.pc, pc);
j8Ptr[0] = JE8(0);
ADD32ItoM((u32)&cpuRegs.cycle, s_nBlockCycles);
JMP32((u32)DispatcherReg - ( (u32)x86Ptr + 5 ));
x86SetJ8(j8Ptr[0]);
//branch = 2;
}
////////////////////////////////////////////////////
void recBREAK( void ) {
MOV32ItoM( (u32)&cpuRegs.code, cpuRegs.code );
MOV32ItoM( (u32)&cpuRegs.pc, pc );
iFlushCall(FLUSH_EVERYTHING);
CALLFunc( (u32)BREAK );
CMP32ItoM((int)&cpuRegs.pc, pc);
j8Ptr[0] = JE8(0);
ADD32ItoM((u32)&cpuRegs.cycle, s_nBlockCycles);
RET();
x86SetJ8(j8Ptr[0]);
//branch = 2;
}
static void checkcodefn()
{
int pctemp;
__asm mov pctemp, eax
SysPrintf("code changed! %x\n", pctemp);
assert(0);
}
void checkpchanged(u32 startpc)
{
assert(0);
}
//#ifdef _DEBUG
//#define CHECK_XMMCHANGED() CALLFunc((u32)checkxmmchanged);
//#else
//#define CHECK_XMMCHANGED()
//#endif
//
//static void checkxmmchanged()
//{
// assert( !g_globalMMXSaved );
// assert( !g_globalXMMSaved );
//}
u32 recompileCodeSafe(u32 temppc)
{
BASEBLOCK* pblock = PC_GETBLOCK(temppc);
if( pblock->pFnptr != 0 && pblock->startpc != s_pCurBlock->startpc ) {
if( pc == pblock->startpc )
return 0;
}
return 1;
}
void recompileNextInstruction(int delayslot)
{
static u8 s_bFlushReg = 1;
int i, count;
BASEBLOCK* pblock = PC_GETBLOCK(pc);
// need *ppblock != s_pCurBlock because of branches
if( pblock->pFnptr != 0 && pblock->startpc != s_pCurBlock->startpc ) {
if( !delayslot && pc == pblock->startpc ) {
// code already in place, so jump to it and exit recomp
assert( PC_GETBLOCKEX(pblock)->startpc == pblock->startpc );
iFlushCall(FLUSH_EVERYTHING);
MOV32ItoM((u32)&cpuRegs.pc, pc);
// if( pexblock->pOldFnptr ) {
// // code already in place, so jump to it and exit recomp
// JMP32((u32)pexblock->pOldFnptr - ((u32)x86Ptr + 5));
// branch = 3;
// return;
// }
JMP32((u32)pblock->pFnptr - ((u32)x86Ptr + 5));
branch = 3;
return;
}
else {
if( !(delayslot && pblock->startpc == pc) ) {
s8* oldX86 = x86Ptr;
//__Log("clear block %x\n", pblock->startpc);
recClearMem(pblock);
x86Ptr = oldX86;
if( delayslot )
SysPrintf("delay slot %x\n", pc);
}
}
}
if( delayslot )
pblock->uType = BLOCKTYPE_DELAYSLOT;
s_pCode = (int *)PSM( pc );
assert(s_pCode);
#ifdef _DEBUG
MOV32ItoR(EAX, pc);
#endif
cpuRegs.code = *(int *)s_pCode;
s_nBlockCycles++;
pc += 4;
//#ifdef _DEBUG
// CMP32ItoM((u32)s_pCode, cpuRegs.code);
// j8Ptr[0] = JE8(0);
// MOV32ItoR(EAX, pc);
// CALLFunc((u32)checkcodefn);
// x86SetJ8( j8Ptr[ 0 ] );
//
// if( !delayslot ) {
// CMP32ItoM((u32)&cpuRegs.pc, s_pCurBlockEx->startpc);
// j8Ptr[0] = JB8(0);
// CMP32ItoM((u32)&cpuRegs.pc, pc);
// j8Ptr[1] = JA8(0);
// j8Ptr[2] = JMP8(0);
// x86SetJ8( j8Ptr[ 0 ] );
// x86SetJ8( j8Ptr[ 1 ] );
// PUSH32I(s_pCurBlockEx->startpc);
// CALLFunc((u32)checkpchanged);
// ADD32ItoR(ESP, 4);
// x86SetJ8( j8Ptr[ 2 ] );
// }
//#endif
g_pCurInstInfo++;
// reorder register priorities
// for(i = 0; i < X86REGS; ++i) {
// if( x86regs[i].inuse ) {
// if( count > 0 ) mmxregs[i].counter = 1000-count;
// else mmxregs[i].counter = 0;
// }
// }
for(i = 0; i < MMXREGS; ++i) {
if( mmxregs[i].inuse ) {
assert( MMX_ISGPR(mmxregs[i].reg) );
count = _recIsRegWritten(g_pCurInstInfo, (s_nEndBlock-pc)/4 + 1, XMMTYPE_GPRREG, mmxregs[i].reg-MMX_GPR);
if( count > 0 ) mmxregs[i].counter = 1000-count;
else mmxregs[i].counter = 0;
}
}
for(i = 0; i < XMMREGS; ++i) {
if( xmmregs[i].inuse ) {
count = _recIsRegWritten(g_pCurInstInfo, (s_nEndBlock-pc)/4 + 1, xmmregs[i].type, xmmregs[i].reg);
if( count > 0 ) xmmregs[i].counter = 1000-count;
else xmmregs[i].counter = 0;
}
}
// peephole optimizations
if( g_pCurInstInfo->info & EEINSTINFO_COREC ) {
#ifdef WIN32_VIRTUAL_MEM
if( g_pCurInstInfo->numpeeps > 1 ) {
switch(cpuRegs.code>>26) {
case 30: recLQ_coX(g_pCurInstInfo->numpeeps); break;
case 31: recSQ_coX(g_pCurInstInfo->numpeeps); break;
case 49: recLWC1_coX(g_pCurInstInfo->numpeeps); break;
case 57: recSWC1_coX(g_pCurInstInfo->numpeeps); break;
case 55: recLD_coX(g_pCurInstInfo->numpeeps); break;
case 63: recSD_coX(g_pCurInstInfo->numpeeps); break;
default:
assert(0);
}
pc += g_pCurInstInfo->numpeeps*4;
s_nBlockCycles += g_pCurInstInfo->numpeeps;
g_pCurInstInfo += g_pCurInstInfo->numpeeps;
}
else {
recBSC_co[cpuRegs.code>>26]();
pc += 4;
s_nBlockCycles++;
g_pCurInstInfo++;
}
#else
assert(0);
#endif
}
else {
assert( !(g_pCurInstInfo->info & EEINSTINFO_NOREC) );
recBSC[ cpuRegs.code >> 26 ]();
}
if( !delayslot ) {
if( s_bFlushReg ) {
//if( !_flushUnusedConstReg() ) {
int flushed = 0;
if( _getNumMMXwrite() > 3 ) flushed = _flushMMXunused();
if( !flushed && _getNumXMMwrite() > 2 ) _flushXMMunused();
s_bFlushReg = !flushed;
// }
// else s_bFlushReg = 0;
}
else s_bFlushReg = 1;
}
else s_bFlushReg = 1;
//CHECK_XMMCHANGED();
_clearNeededX86regs();
_clearNeededMMXregs();
_clearNeededXMMregs();
}
__declspec(naked) void iDummyBlock()
{
// g_lastpc = cpuRegs.pc;
//
// do {
// cpuRegs.cycle = g_nextBranchCycle;
// cpuBranchTest();
// } while(g_lastpc == cpuRegs.pc);
//
// __asm jmp DispatcherReg
__asm {
RepDummy:
add cpuRegs.cycle, 9
call cpuBranchTest
cmp cpuRegs.pc, 0x81fc0
je RepDummy
jmp DispatcherReg
}
}
////////////////////////////////////////////////////
#include "r3000a.h"
#include "PsxCounters.h"
extern tIPU_BP g_BP;
extern u32 psxdump;
extern u32 psxNextCounter, psxNextsCounter;
extern void iDumpPsxRegisters(u32 startpc, u32 temp);
extern Counter counters[6];
void iDumpRegisters(u32 startpc, u32 temp)
{
int i;
char* pstr = temp ? "t" : "";
const u32 dmacs[] = {0x8000, 0x9000, 0xa000, 0xb000, 0xb400, 0xc000, 0xc400, 0xc800, 0xd000, 0xd400 };
__Log("%sreg: %x %x\n", pstr, startpc, cpuRegs.interrupt);
for(i = 1; i < 32; ++i) __Log("%s%d: %x_%x_%x_%x\n", pstr, i, cpuRegs.GPR.r[i].UL[3], cpuRegs.GPR.r[i].UL[2], cpuRegs.GPR.r[i].UL[1], cpuRegs.GPR.r[i].UL[0]);
//for(i = 0; i < 32; ++i) __Log("%sf%d: %f %x\n", pstr, i, fpuRegs.fpr[i].f, fpuRegs.fprc[i]);
//for(i = 1; i < 32; ++i) __Log("%svf%d: %f %f %f %f, vi: %x\n", pstr, i, VU0.VF[i].F[3], VU0.VF[i].F[2], VU0.VF[i].F[1], VU0.VF[i].F[0], VU0.VI[i].UL);
for(i = 0; i < 32; ++i) __Log("%sf%d: %x %x\n", pstr, i, fpuRegs.fpr[i].UL, fpuRegs.fprc[i]);
for(i = 1; i < 32; ++i) __Log("%svf%d: %x %x %x %x, vi: %x\n", pstr, i, VU0.VF[i].UL[3], VU0.VF[i].UL[2], VU0.VF[i].UL[1], VU0.VF[i].UL[0], VU0.VI[i].UL);
__Log("%svfACC: %x %x %x %x\n", pstr, VU0.ACC.UL[3], VU0.ACC.UL[2], VU0.ACC.UL[1], VU0.ACC.UL[0]);
__Log("%sLO: %x_%x_%x_%x, HI: %x_%x_%x_%x\n", pstr, cpuRegs.LO.UL[3], cpuRegs.LO.UL[2], cpuRegs.LO.UL[1], cpuRegs.LO.UL[0],
cpuRegs.HI.UL[3], cpuRegs.HI.UL[2], cpuRegs.HI.UL[1], cpuRegs.HI.UL[0]);
__Log("%sCycle: %x %x, Count: %x\n", pstr, cpuRegs.cycle, g_nextBranchCycle, cpuRegs.CP0.n.Count);
iDumpPsxRegisters(psxRegs.pc, temp);
__Log("cyc11: %x %x; vu0: %x, vu1: %x\n", cpuRegs.sCycle[1], cpuRegs.eCycle[1], VU0.cycle, VU1.cycle);
__Log("%scounters: %x %x; psx: %x %x\n", pstr, nextsCounter, nextCounter, psxNextsCounter, psxNextCounter);
for(i = 0; i < 4; ++i) {
__Log("eetimer%d: count: %x mode: %x target: %x %x; %x %x; %x %x %x %x\n", i,
counters[i].count, counters[i].mode, counters[i].target, counters[i].hold, counters[i].rate,
counters[i].interrupt, counters[i].Cycle, counters[i].sCycle, counters[i].CycleT, counters[i].sCycleT);
}
__Log("ipu %x %x %x %x; bp: %x %x %x %x\n", psHu32(0x2000), psHu32(0x2010), psHu32(0x2020), psHu32(0x2030), g_BP.BP, g_BP.bufferhasnew, g_BP.FP, g_BP.IFC);
__Log("gif: %x %x %x\n", psHu32(0x3000), psHu32(0x3010), psHu32(0x3020));
for(i = 0; i < ARRAYSIZE(dmacs); ++i) {
DMACh* p = (DMACh*)(PS2MEM_HW+dmacs[i]);
__Log("dma%d c%x m%x q%x t%x s%x\n", i, p->chcr, p->madr, p->qwc, p->tadr, p->sadr);
}
__Log("dmac %x %x %x %x\n", psHu32(DMAC_CTRL), psHu32(DMAC_STAT), psHu32(DMAC_RBSR), psHu32(DMAC_RBOR));
__Log("intc %x %x\n", psHu32(INTC_STAT), psHu32(INTC_MASK));
__Log("sif: %x %x %x %x %x\n", psHu32(0xf200), psHu32(0xf220), psHu32(0xf230), psHu32(0xf240), psHu32(0xf260));
}
extern u32 psxdump;
static void printfn()
{
static int lastrec = 0;
static int curcount = 0, count2 = 0;
const int skip = 0;
static int i;
assert( !g_globalMMXSaved );
assert( !g_globalXMMSaved );
#ifdef _DEBUG
__asm stmxcsr i
assert( i = g_sseMXCSR );
#endif
if( (dumplog&2) ) {//&& lastrec != g_lastpc ) {
curcount++;
if( curcount > skip ) {
iDumpRegisters(g_lastpc, 1);
curcount = 0;
}
lastrec = g_lastpc;
}
}
u32 s_recblocks[] = {0};
void badespfn() {
assert(0);
SysPrintf("Bad esp!\n");
}
#define OPTIMIZE_COP2 0//CHECK_VU0REC
static void recRecompile( u32 startpc )
{
u32 i = 0;
u32 branchTo;
u32 willbranch3 = 0;
u32* ptr;
u32 usecop2;
#ifdef _DEBUG
//dumplog |= 4;
if( dumplog & 4 )
iDumpRegisters(startpc, 0);
#endif
assert( startpc );
// if recPtr reached the mem limit reset whole mem
if ( ( (uptr)recPtr - (uptr)recMem ) >= 0xb92000 || dumplog == 0xffffffff) {
recReset();
}
if ( ( (uptr)recStackPtr - (uptr)recStack ) >= RECSTACK_SIZE-0x100 ) {
#ifdef _DEBUG
SysPrintf("stack reset\n");
#endif
recReset();
}
s_pCurBlock = PC_GETBLOCK(startpc);
if( s_pCurBlock->pFnptr ) {
// clear if already taken
assert( s_pCurBlock->startpc < startpc );
recClearMem(s_pCurBlock);
}
if( s_pCurBlock->startpc == startpc ) {
s_pCurBlockEx = PC_GETBLOCKEX(s_pCurBlock);
assert( s_pCurBlockEx->startpc == startpc );
}
else {
s_pCurBlockEx = NULL;
for(i = 0; i < EE_NUMBLOCKS; ++i) {
if( recBlocks[(i+s_nNextBlock)%EE_NUMBLOCKS].size == 0 ) {
s_pCurBlockEx = recBlocks+(i+s_nNextBlock)%EE_NUMBLOCKS;
s_nNextBlock = (i+s_nNextBlock+1)%EE_NUMBLOCKS;
break;
}
}
if( s_pCurBlockEx == NULL ) {
//SysPrintf("ee reset (blocks)\n");
recReset();
s_nNextBlock = 0;
s_pCurBlockEx = recBlocks;
}
s_pCurBlockEx->startpc = startpc;
}
x86SetPtr( recPtr );
x86Align(16);
recPtr = x86Ptr;
s_pCurBlock->pFnptr = (u32)x86Ptr;
s_pCurBlock->startpc = startpc;
// slower
// if( startpc == 0x81fc0 ) {
//
// MOV32MtoR(ECX, (u32)&g_nextBranchCycle);
// MOV32RtoM((u32)&cpuRegs.cycle, ECX);
// //ADD32ItoR(ECX, 9);
// //ADD32ItoM((u32)&cpuRegs.cycle, 512);
// CALLFunc((u32)cpuBranchTest);
// CMP32ItoM((u32)&cpuRegs.pc, 0x81fc0);
// JE8(s_pCurBlock->pFnptr - (u32)(x86Ptr+2) );
// JMP32((u32)DispatcherReg - (u32)(x86Ptr+5));
//
// pc = startpc + 9*4;
// assert( (pc-startpc)>>2 <= 0xffff );
// s_pCurBlockEx->size = (pc-startpc)>>2;
//
// for(i = 1; i < (u32)s_pCurBlockEx->size-1; ++i) {
// s_pCurBlock[i].pFnptr = s_pCurBlock->pFnptr;
// s_pCurBlock[i].startpc = s_pCurBlock->startpc;
// }
//
// // don't overwrite if delay slot
// if( i < (u32)s_pCurBlockEx->size && !(s_pCurBlock[i].uType & BLOCKTYPE_DELAYSLOT) ) {
// s_pCurBlock[i].pFnptr = s_pCurBlock->pFnptr;
// s_pCurBlock[i].startpc = s_pCurBlock->startpc;
// }
//
// // set the block ptr
// AddBaseBlockEx(s_pCurBlockEx, 0);
//
// if( !(pc&0x10000000) )
// maxrecmem = max( (pc&~0xa0000000), maxrecmem );
//
// recPtr = x86Ptr;
// return;
// }
branch = 0;
// reset recomp state variables
s_nBlockCycles = 0;
pc = startpc;
x86FpuState = FPU_STATE;
iCWstate = 0;
s_saveConstGPRreg = 0;
g_cpuHasConstReg = g_cpuFlushedConstReg = 1;
g_cpuPrevRegHasLive1 = g_cpuRegHasLive1 = 0xffffffff;
g_cpuPrevRegHasSignExt = g_cpuRegHasSignExt = 0;
_recClearWritebacks();
assert( g_cpuConstRegs[0].UD[0] == 0 );
_initX86regs();
_initXMMregs();
_initMMXregs();
#ifdef _DEBUG
// for debugging purposes
MOV32ItoM((u32)&g_lastpc, pc);
CALLFunc((u32)printfn);
// CMP32MtoR(EBP, (u32)&s_uSaveEBP);
// j8Ptr[0] = JE8(0);
// CALLFunc((u32)badespfn);
// x86SetJ8(j8Ptr[0]);
#endif
// go until the next branch
i = startpc;
s_nEndBlock = 0xffffffff;
s_nHasDelay = 0;
while(1) {
BASEBLOCK* pblock = PC_GETBLOCK(i);
if( pblock->pFnptr != 0 && pblock->startpc != s_pCurBlock->startpc ) {
if( i == pblock->startpc ) {
// branch = 3
willbranch3 = 1;
s_nEndBlock = i;
break;
}
}
cpuRegs.code = *(int *)PSM(i);
switch(cpuRegs.code >> 26) {
case 0: // special
if( _Funct_ == 8 || _Funct_ == 9 ) { // JR, JALR
s_nEndBlock = i + 8;
s_nHasDelay = 1;
goto StartRecomp;
}
break;
case 1: // regimm
if( _Rt_ < 4 || (_Rt_ >= 16 && _Rt_ < 20) ) {
// branches
if( _Rt_ == 2 && _Rt_ == 3 && _Rt_ == 18 && _Rt_ == 19 ) s_nHasDelay = 1;
else s_nHasDelay = 2;
branchTo = _Imm_ * 4 + i + 4;
if( branchTo > startpc && branchTo < i ) s_nEndBlock = branchTo;
else s_nEndBlock = i+8;
goto StartRecomp;
}
break;
case 2: // J
case 3: // JAL
s_nHasDelay = 1;
s_nEndBlock = i + 8;
goto StartRecomp;
// branches
case 4: case 5: case 6: case 7:
case 20: case 21: case 22: case 23:
if( (cpuRegs.code >> 26) >= 20 ) s_nHasDelay = 1;
else s_nHasDelay = 2;
branchTo = _Imm_ * 4 + i + 4;
if( branchTo > startpc && branchTo < i ) s_nEndBlock = branchTo;
else s_nEndBlock = i+8;
goto StartRecomp;
case 16: // cp0
if( _Rs_ == 16 ) {
if( _Funct_ == 24 ) { // eret
s_nEndBlock = i+4;
goto StartRecomp;
}
}
break;
case 17: // cp1
case 18: // cp2
if( _Rs_ == 8 ) {
// BC1F, BC1T, BC1FL, BC1TL
// BC2F, BC2T, BC2FL, BC2TL
if( _Rt_ >= 2 ) s_nHasDelay = 1;
else s_nHasDelay = 2;
branchTo = _Imm_ * 4 + i + 4;
if( branchTo > startpc && branchTo < i ) s_nEndBlock = branchTo;
else s_nEndBlock = i+8;
goto StartRecomp;
}
break;
}
i += 4;
}
StartRecomp:
// rec info //
{
EEINST* pcur;
if( s_nInstCacheSize < (s_nEndBlock-startpc)/4+1 ) {
free(s_pInstCache);
s_nInstCacheSize = (s_nEndBlock-startpc)/4+10;
s_pInstCache = (EEINST*)malloc(sizeof(EEINST)*s_nInstCacheSize);
assert( s_pInstCache != NULL );
}
pcur = s_pInstCache + (s_nEndBlock-startpc)/4;
_recClearInst(pcur);
pcur->info = 0;
for(i = s_nEndBlock; i > startpc; i -= 4 ) {
cpuRegs.code = *(int *)PSM(i-4);
pcur[-1] = pcur[0];
rpropBSC(pcur-1, pcur);
pcur--;
}
}
// analyze instructions //
{
usecop2 = 0;
g_pCurInstInfo = s_pInstCache;
for(i = startpc; i < s_nEndBlock; i += 4) {
g_pCurInstInfo++;
cpuRegs.code = *(u32*)PSM(i);
// cop2 //
if( g_pCurInstInfo->info & EEINSTINFO_COP2 ) {
if( !usecop2 ) {
// init
if( OPTIMIZE_COP2 ) {
memset(VU0.fmac,0,sizeof(VU0.fmac));
memset(&VU0.fdiv,0,sizeof(VU0.fdiv));
memset(&VU0.efu,0,sizeof(VU0.efu));
}
vucycle = 0;
usecop2 = 1;
}
VU0.code = cpuRegs.code;
_cop2AnalyzeOp(g_pCurInstInfo, OPTIMIZE_COP2);
continue;
}
if( usecop2 ) vucycle++;
// peephole optimizations //
#ifdef WIN32_VIRTUAL_MEM
if( i < s_nEndBlock-4 && recompileCodeSafe(i) ) {
u32 curcode = cpuRegs.code;
u32 nextcode = *(u32*)PSM(i+4);
if( _eeIsLoadStoreCoIssue(curcode, nextcode) && recBSC_co[curcode>>26] != NULL ) {
// rs has to be the same, and cannot be just written
if( ((curcode >> 21) & 0x1F) == ((nextcode >> 21) & 0x1F) && !_eeLoadWritesRs(curcode) ) {
if( _eeIsLoadStoreCoX(curcode) && ((nextcode>>16)&0x1f) != ((curcode>>21)&0x1f) ) {
// see how many stores there are
u32 j;
// use xmmregs since only supporting lwc1,lq,swc1,sq
for(j = i+8; j < s_nEndBlock && j < i+4*XMMREGS; j += 4 ) {
u32 nncode = *(u32*)PSM(j);
if( (nncode>>26) != (curcode>>26) || ((curcode>>21)&0x1f) != ((nncode>>21)&0x1f) ||
_eeLoadWritesRs(nncode))
break;
}
if( j > i+8 ) {
u32 num = (j-i)>>2; // number of stores that can coissue
assert( num <= XMMREGS );
g_pCurInstInfo[0].numpeeps = num-1;
g_pCurInstInfo[0].info |= EEINSTINFO_COREC;
while(i < j-4) {
g_pCurInstInfo++;
g_pCurInstInfo[0].info |= EEINSTINFO_NOREC;
i += 4;
}
continue;
}
// fall through
}
// unaligned loadstores
// if LWL, check if LWR and that offsets are +3 away
switch(curcode >> 26) {
case 0x22: // LWL
if( (nextcode>>26) != 0x26 || ((s16)nextcode)+3 != (s16)curcode )
continue;
break;
case 0x26: // LWR
if( (nextcode>>26) != 0x22 || ((s16)nextcode) != (s16)curcode+3 )
continue;
break;
case 0x2a: // SWL
if( (nextcode>>26) != 0x2e || ((s16)nextcode)+3 != (s16)curcode )
continue;
break;
case 0x2e: // SWR
if( (nextcode>>26) != 0x2a || ((s16)nextcode) != (s16)curcode+3 )
continue;
break;
case 0x1a: // LDL
if( (nextcode>>26) != 0x1b || ((s16)nextcode)+7 != (s16)curcode )
continue;
break;
case 0x1b: // LWR
if( (nextcode>>26) != 0x1aa || ((s16)nextcode) != (s16)curcode+7 )
continue;
break;
case 0x2c: // SWL
if( (nextcode>>26) != 0x2d || ((s16)nextcode)+7 != (s16)curcode )
continue;
break;
case 0x2d: // SWR
if( (nextcode>>26) != 0x2c || ((s16)nextcode) != (s16)curcode+7 )
continue;
break;
}
// good enough
g_pCurInstInfo[0].info |= EEINSTINFO_COREC;
g_pCurInstInfo[0].numpeeps = 1;
g_pCurInstInfo[1].info |= EEINSTINFO_NOREC;
g_pCurInstInfo++;
i += 4;
continue;
}
}
}
#endif // end peephole
}
if( usecop2 ) {
// add necessary mac writebacks
g_pCurInstInfo = s_pInstCache;
for(i = startpc; i < s_nEndBlock-4; i += 4) {
g_pCurInstInfo++;
if( g_pCurInstInfo->info & EEINSTINFO_COP2 ) {
}
}
}
}
// perf counters //
#ifdef PCSX2_DEVBUILD
s_startcount = 0;
// if( pc+32 < s_nEndBlock ) {
// // only blocks with more than 8 insts
// PUSH32I((u32)&lbase);
// CALLFunc((u32)QueryPerformanceCounter);
// s_startcount = 1;
// }
#endif
#ifdef _DEBUG
// dump code
for(i = 0; i < ARRAYSIZE(s_recblocks); ++i) {
if( startpc == s_recblocks[i] ) {
iDumpBlock(startpc, recPtr);
}
}
if( (dumplog & 1) )
iDumpBlock(startpc, recPtr);
#endif
// finally recompile //
g_pCurInstInfo = s_pInstCache;
while (!branch && pc < s_nEndBlock) {
recompileNextInstruction(0);
}
#ifdef _DEBUG
if( (dumplog & 1) )
iDumpBlock(startpc, recPtr);
#endif
assert( (pc-startpc)>>2 <= 0xffff );
s_pCurBlockEx->size = (pc-startpc)>>2;
for(i = 1; i < (u32)s_pCurBlockEx->size-1; ++i) {
s_pCurBlock[i].pFnptr = s_pCurBlock->pFnptr;
s_pCurBlock[i].startpc = s_pCurBlock->startpc;
}
// don't overwrite if delay slot
if( i < (u32)s_pCurBlockEx->size && !(s_pCurBlock[i].uType & BLOCKTYPE_DELAYSLOT) ) {
s_pCurBlock[i].pFnptr = s_pCurBlock->pFnptr;
s_pCurBlock[i].startpc = s_pCurBlock->startpc;
}
// set the block ptr
AddBaseBlockEx(s_pCurBlockEx, 0);
// if( p[1].startpc == p[0].startpc + 4 ) {
// assert( p[1].pFnptr != 0 );
// // already fn in place, so add to list
// AddBaseBlockEx(s_pCurBlockEx, 0);
// }
// else
// *(BASEBLOCKEX**)(p+1) = pex;
// }
//PC_SETBLOCKEX(s_pCurBlock, s_pCurBlockEx);
if( !(pc&0x10000000) )
maxrecmem = max( (pc&~0xa0000000), maxrecmem );
if( branch == 2 ) {
iFlushCall(FLUSH_EVERYTHING);
iBranchTest(0xffffffff, 1);
if( bExecBIOS ) CheckForBIOSEnd();
JMP32((u32)DispatcherReg - ( (u32)x86Ptr + 5 ));
}
else {
assert( branch != 3 );
if( branch ) assert( !willbranch3 );
else ADD32ItoM((int)&cpuRegs.cycle, s_nBlockCycles*9/8);
if( willbranch3 ) {
BASEBLOCK* pblock = PC_GETBLOCK(s_nEndBlock);
assert( pc == s_nEndBlock );
iFlushCall(FLUSH_EVERYTHING);
MOV32ItoM((u32)&cpuRegs.pc, pc);
JMP32((u32)pblock->pFnptr - ((u32)x86Ptr + 5));
branch = 3;
}
else if( !branch ) {
// didn't branch, but had to stop
MOV32ItoM( (u32)&cpuRegs.pc, pc );
iFlushCall(FLUSH_EVERYTHING);
ptr = JMP32(0);
}
}
assert( x86Ptr >= (s8*)s_pCurBlock->pFnptr + EE_MIN_BLOCK_BYTES );
assert( x86Ptr < recMem+0x00c00000 );
assert( recStackPtr < recStack+RECSTACK_SIZE );
assert( x86FpuState == 0 );
recPtr = x86Ptr;
assert( (g_cpuHasConstReg&g_cpuFlushedConstReg) == g_cpuHasConstReg );
if( !branch ) {
BASEBLOCK* pcurblock = s_pCurBlock;
u32 nEndBlock = s_nEndBlock;
s_pCurBlock = PC_GETBLOCK(pc);
assert( ptr != NULL );
if( s_pCurBlock->startpc != pc )
recRecompile(pc);
if( pcurblock->startpc == startpc ) {
assert( pcurblock->pFnptr );
assert( s_pCurBlock->startpc == nEndBlock );
*ptr = s_pCurBlock->pFnptr - ( (u32)ptr + 4 );
}
else {
recRecompile(startpc);
assert( pcurblock->pFnptr != 0 );
}
}
}
R5900cpu recCpu = {
recInit,
recReset,
recStep,
recExecute,
recExecuteBlock,
recExecuteVU0Block,
recExecuteVU1Block,
recEnableVU0micro,
recEnableVU1micro,
recClear,
recClearVU0,
recClearVU1,
recShutdown
};