Core: Format recompilers

This commit is contained in:
TellowKrinkle 2021-08-30 01:38:13 -05:00 committed by Kojin
parent 7aa85960ba
commit 5260d63565
63 changed files with 9222 additions and 6744 deletions

View File

@ -23,7 +23,7 @@ BASEBLOCKEX* BaseBlocks::New(u32 startpc, uptr fnptr)
for (linkiter_t i = range.first; i != range.second; ++i) for (linkiter_t i = range.first; i != range.second; ++i)
*(u32*)i->second = fnptr - (i->second + 4); *(u32*)i->second = fnptr - (i->second + 4);
return blocks.insert(startpc, fnptr);; return blocks.insert(startpc, fnptr);
} }
int BaseBlocks::LastIndex(u32 startpc) const int BaseBlocks::LastIndex(u32 startpc) const
@ -33,7 +33,8 @@ int BaseBlocks::LastIndex(u32 startpc) const
int imin = 0, imax = blocks.size() - 1, imid; int imin = 0, imax = blocks.size() - 1, imid;
while(imin != imax) { while (imin != imax)
{
imid = (imin + imax + 1) >> 1; imid = (imin + imax + 1) >> 1;
if (blocks[imid].startpc > startpc) if (blocks[imid].startpc > startpc)
@ -79,4 +80,3 @@ void BaseBlocks::Link(u32 pc, s32* jumpptr)
*jumpptr = (s32)(recompiler - (sptr)(jumpptr + 1)); *jumpptr = (s32)(recompiler - (sptr)(jumpptr + 1));
links.insert(std::pair<u32, uptr>(pc, (uptr)jumpptr)); links.insert(std::pair<u32, uptr>(pc, (uptr)jumpptr));
} }

View File

@ -41,10 +41,10 @@ struct BASEBLOCKEX
//u32 visited; // number of times called //u32 visited; // number of times called
//u64 ltime; // regs it assumes to have set already //u64 ltime; // regs it assumes to have set already
#endif #endif
}; };
class BaseBlockArray { class BaseBlockArray
{
s32 _Reserved; s32 _Reserved;
s32 _Size; s32 _Size;
BASEBLOCKEX* blocks; BASEBLOCKEX* blocks;
@ -53,7 +53,8 @@ class BaseBlockArray {
{ {
pxAssert(size > 0); pxAssert(size > 0);
BASEBLOCKEX* newMem = new BASEBLOCKEX[size]; BASEBLOCKEX* newMem = new BASEBLOCKEX[size];
if(blocks) { if (blocks)
{
memcpy(newMem, blocks, _Reserved * sizeof(BASEBLOCKEX)); memcpy(newMem, blocks, _Reserved * sizeof(BASEBLOCKEX));
delete[] blocks; delete[] blocks;
} }
@ -66,30 +67,34 @@ class BaseBlockArray {
resize(size); resize(size);
_Reserved = size; _Reserved = size;
} }
public: public:
~BaseBlockArray() ~BaseBlockArray()
{ {
if(blocks) { if (blocks)
delete[] blocks; delete[] blocks;
} }
}
BaseBlockArray (s32 size) : _Reserved(0), BaseBlockArray(s32 size)
_Size(0), blocks(NULL) : _Reserved(0)
, _Size(0)
, blocks(NULL)
{ {
reserve(size); reserve(size);
} }
BASEBLOCKEX* insert(u32 startpc, uptr fnptr) BASEBLOCKEX* insert(u32 startpc, uptr fnptr)
{ {
if(_Size + 1 >= _Reserved) { if (_Size + 1 >= _Reserved)
{
reserve(_Reserved + 0x2000); // some games requires even more! reserve(_Reserved + 0x2000); // some games requires even more!
} }
// Insert the the new BASEBLOCKEX by startpc order // Insert the the new BASEBLOCKEX by startpc order
int imin = 0, imax = _Size, imid; int imin = 0, imax = _Size, imid;
while (imin < imax) { while (imin < imax)
{
imid = (imin + imax) >> 1; imid = (imin + imax) >> 1;
if (blocks[imid].startpc > startpc) if (blocks[imid].startpc > startpc)
@ -100,7 +105,8 @@ public:
pxAssert(imin == _Size || blocks[imin].startpc > startpc); pxAssert(imin == _Size || blocks[imin].startpc > startpc);
if(imin < _Size) { if (imin < _Size)
{
// make a hole for a new block. // make a hole for a new block.
memmove(blocks + imin + 1, blocks + imin, (_Size - imin) * sizeof(BASEBLOCKEX)); memmove(blocks + imin + 1, blocks + imin, (_Size - imin) * sizeof(BASEBLOCKEX));
} }
@ -132,7 +138,8 @@ public:
{ {
int range = last - first; int range = last - first;
if(last < _Size) { if (last < _Size)
{
memmove(blocks + first, blocks + last, (_Size - last) * sizeof(BASEBLOCKEX)); memmove(blocks + first, blocks + last, (_Size - last) * sizeof(BASEBLOCKEX));
} }
@ -151,8 +158,8 @@ protected:
BaseBlockArray blocks; BaseBlockArray blocks;
public: public:
BaseBlocks() : BaseBlocks()
recompiler(0) : recompiler(0)
, blocks(0x4000) , blocks(0x4000)
{ {
} }
@ -194,7 +201,8 @@ public:
{ {
pxAssert(first <= last); pxAssert(first <= last);
int idx = first; int idx = first;
do{ do
{
pxAssert(idx <= last); pxAssert(idx <= last);
//u32 startpc = blocks[idx].startpc; //u32 startpc = blocks[idx].startpc;
@ -212,8 +220,7 @@ public:
BASEBLOCKEX effu(blocks[idx]); BASEBLOCKEX effu(blocks[idx]);
memset((void*)effu.fnptr, 0xcc, 1); memset((void*)effu.fnptr, 0xcc, 1);
} }
} } while (idx++ < last);
while(idx++ < last);
// TODO: remove links from this block? // TODO: remove links from this block?
blocks.erase(first, last + 1); blocks.erase(first, last + 1);

View File

@ -20,7 +20,8 @@
#define MOVZ MOVZtemp #define MOVZ MOVZtemp
#define MOVN MOVNtemp #define MOVN MOVNtemp
enum class eeOpcode { enum class eeOpcode
{
// Core // Core
special , regimm , J , JAL , BEQ , BNE , BLEZ , BGTZ , special , regimm , J , JAL , BEQ , BNE , BLEZ , BGTZ ,
ADDI , ADDIU , SLTI , SLTIU , ANDI , ORI , XORI , LUI , ADDI , ADDIU , SLTI , SLTIU , ANDI , ORI , XORI , LUI ,
@ -229,7 +230,8 @@ static const char eeOpcodeName[][16] = {
using namespace x86Emitter; using namespace x86Emitter;
struct eeProfiler { struct eeProfiler
{
static const u32 memSpace = 1 << 19; static const u32 memSpace = 1 << 19;
u64 opStats[static_cast<int>(eeOpcode::LAST)]; u64 opStats[static_cast<int>(eeOpcode::LAST)];
@ -239,7 +241,8 @@ struct eeProfiler {
u64 memStatsFast; u64 memStatsFast;
u32 memMask; u32 memMask;
void Reset() { void Reset()
{
memzero(opStats); memzero(opStats);
memzero(memStats); memzero(memStats);
memzero(memStatsConst); memzero(memStatsConst);
@ -249,22 +252,26 @@ struct eeProfiler {
pxAssert(eeOpcodeName[static_cast<int>(eeOpcode::LAST)][0] == '!'); pxAssert(eeOpcodeName[static_cast<int>(eeOpcode::LAST)][0] == '!');
} }
void EmitOp(eeOpcode opcode) { void EmitOp(eeOpcode opcode)
{
int op = static_cast<int>(opcode); int op = static_cast<int>(opcode);
xADD(ptr32[&(((u32*)opStats)[op * 2 + 0])], 1); xADD(ptr32[&(((u32*)opStats)[op * 2 + 0])], 1);
xADC(ptr32[&(((u32*)opStats)[op * 2 + 1])], 0); xADC(ptr32[&(((u32*)opStats)[op * 2 + 1])], 0);
} }
double per(u64 part, u64 total) { double per(u64 part, u64 total)
{
return (double)part / (double)total * 100.0; return (double)part / (double)total * 100.0;
} }
void Print() { void Print()
{
// Compute opcode stat // Compute opcode stat
u64 total = 0; u64 total = 0;
std::vector<std::pair<u32, u32>> v; std::vector<std::pair<u32, u32>> v;
std::vector<std::pair<u32, u32>> vc; std::vector<std::pair<u32, u32>> vc;
for(int i = 0; i < static_cast<int>(eeOpcode::LAST); i++) { for (int i = 0; i < static_cast<int>(eeOpcode::LAST); i++)
{
total += opStats[i]; total += opStats[i];
v.push_back(std::make_pair(opStats[i], i)); v.push_back(std::make_pair(opStats[i], i));
} }
@ -272,7 +279,8 @@ struct eeProfiler {
std::reverse(v.begin(), v.end()); std::reverse(v.begin(), v.end());
DevCon.WriteLn("EE Profiler:"); DevCon.WriteLn("EE Profiler:");
for(u32 i = 0; i < v.size(); i++) { for (u32 i = 0; i < v.size(); i++)
{
u64 count = v[i].first; u64 count = v[i].first;
double stat = (double)count / (double)total * 100.0; double stat = (double)count / (double)total * 100.0;
DevCon.WriteLn("%-8s - [%3.4f%%][count=%u]", DevCon.WriteLn("%-8s - [%3.4f%%][count=%u]",
@ -310,7 +318,8 @@ struct eeProfiler {
for (size_t i = 0; i < memSpace; i++) for (size_t i = 0; i < memSpace; i++)
total_const += memStatsConst[i]; total_const += memStatsConst[i];
for (int i = 0; i < 4 * _1kb; i++) reg_const += memStatsConst[ou + i] + memStatsConst[ok + i]; for (int i = 0; i < 4 * _1kb; i++)
reg_const += memStatsConst[ou + i] + memStatsConst[ok + i];
u64 ram_const = total_const - reg_const; // value is slightly wrong but good enough u64 ram_const = total_const - reg_const; // value is slightly wrong but good enough
double ram_const_p = per(ram_const, ram); double ram_const_p = per(ram_const, ram);
@ -329,7 +338,8 @@ struct eeProfiler {
v.clear(); v.clear();
vc.clear(); vc.clear();
for (int i = 0; i < 4 * _1kb; i++) { for (int i = 0; i < 4 * _1kb; i++)
{
u32 reg_c = memStatsConst[ou + i] + memStatsConst[ok + i]; u32 reg_c = memStatsConst[ou + i] + memStatsConst[ok + i];
u32 reg = memStats[ok + i] + memStats[ou + i] - reg_c; u32 reg = memStats[ok + i] + memStats[ou + i] - reg_c;
if (reg) if (reg)
@ -344,7 +354,8 @@ struct eeProfiler {
std::reverse(vc.begin(), vc.end()); std::reverse(vc.begin(), vc.end());
DevCon.WriteLn("\nEE Reg Profiler:"); DevCon.WriteLn("\nEE Reg Profiler:");
for(u32 i = 0; i < v.size(); i++) { for (u32 i = 0; i < v.size(); i++)
{
u64 count = v[i].first; u64 count = v[i].first;
double stat = (double)count / (double)(reg - reg_const) * 100.0; double stat = (double)count / (double)(reg - reg_const) * 100.0;
DevCon.WriteLn("%04x - [%3.4f%%][count=%u]", DevCon.WriteLn("%04x - [%3.4f%%][count=%u]",
@ -354,7 +365,8 @@ struct eeProfiler {
} }
DevCon.WriteLn("\nEE Const Reg Profiler:"); DevCon.WriteLn("\nEE Const Reg Profiler:");
for(u32 i = 0; i < vc.size(); i++) { for (u32 i = 0; i < vc.size(); i++)
{
u64 count = vc[i].first; u64 count = vc[i].first;
double stat = (double)count / (double)reg_const * 100.0; double stat = (double)count / (double)reg_const * 100.0;
DevCon.WriteLn("%04x - [%3.4f%%][count=%u]", DevCon.WriteLn("%04x - [%3.4f%%][count=%u]",
@ -362,38 +374,44 @@ struct eeProfiler {
if (stat < 0.01) if (stat < 0.01)
break; break;
} }
} }
// Warning dirty ebx // Warning dirty ebx
void EmitMem() { void EmitMem()
{
// Compact the 4GB virtual address to a 512KB virtual address // Compact the 4GB virtual address to a 512KB virtual address
if (x86caps.hasBMI2) { if (x86caps.hasBMI2)
{
xPEXT(ebx, ecx, ptr[&memMask]); xPEXT(ebx, ecx, ptr[&memMask]);
xADD(ptr32[(rbx * 4) + memStats], 1); xADD(ptr32[(rbx * 4) + memStats], 1);
} }
} }
void EmitConstMem(u32 add) { void EmitConstMem(u32 add)
if (x86caps.hasBMI2) { {
if (x86caps.hasBMI2)
{
u32 a = _pext_u32(add, memMask); u32 a = _pext_u32(add, memMask);
xADD(ptr32[a + memStats], 1); xADD(ptr32[a + memStats], 1);
xADD(ptr32[a + memStatsConst], 1); xADD(ptr32[a + memStatsConst], 1);
} }
} }
void EmitSlowMem() { void EmitSlowMem()
{
xADD(ptr32[(u32*)&memStatsSlow], 1); xADD(ptr32[(u32*)&memStatsSlow], 1);
xADC(ptr32[(u32*)&memStatsSlow + 1], 0); xADC(ptr32[(u32*)&memStatsSlow + 1], 0);
} }
void EmitFastMem() { void EmitFastMem()
{
xADD(ptr32[(u32*)&memStatsFast], 1); xADD(ptr32[(u32*)&memStatsFast], 1);
xADC(ptr32[(u32*)&memStatsFast + 1], 0); xADC(ptr32[(u32*)&memStatsFast + 1], 0);
} }
}; };
#else #else
struct eeProfiler { struct eeProfiler
{
__fi void Reset() {} __fi void Reset() {}
__fi void EmitOp(eeOpcode op) {} __fi void EmitOp(eeOpcode op) {}
__fi void Print() {} __fi void Print() {}
@ -404,6 +422,7 @@ struct eeProfiler {
}; };
#endif #endif
namespace EE { namespace EE
{
extern eeProfiler Profiler; extern eeProfiler Profiler;
} }

View File

@ -154,7 +154,8 @@ void recMFC0()
xMOV(ptr[&s_iLastCOP0Cycle], ecx); xMOV(ptr[&s_iLastCOP0Cycle], ecx);
xMOV(eax, ptr[&cpuRegs.CP0.r[_Rd_]]); xMOV(eax, ptr[&cpuRegs.CP0.r[_Rd_]]);
if( !_Rt_ ) return; if (!_Rt_)
return;
_deleteEEreg(_Rt_, 0); _deleteEEreg(_Rt_, 0);
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
@ -164,7 +165,8 @@ void recMFC0()
return; return;
} }
if ( !_Rt_ ) return; if (!_Rt_)
return;
if (_Rd_ == 25) if (_Rd_ == 25)
{ {
@ -192,7 +194,8 @@ void recMFC0()
return; return;
} }
else if(_Rd_ == 24){ else if (_Rd_ == 24)
{
COP0_LOG("MFC0 Breakpoint debug Registers code = %x\n", cpuRegs.code & 0x3FF); COP0_LOG("MFC0 Breakpoint debug Registers code = %x\n", cpuRegs.code & 0x3FF);
return; return;
} }
@ -346,4 +349,7 @@ void rec(TLBWR) {
void rec(TLBP) { void rec(TLBP) {
}*/ }*/
}}}} } // namespace COP0
} // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900

View File

@ -26,8 +26,8 @@
namespace R5900 { namespace R5900 {
namespace Dynarec { namespace Dynarec {
namespace OpcodeImpl { namespace OpcodeImpl {
namespace COP0 namespace COP0 {
{
void recMFC0(); void recMFC0();
void recMTC0(); void recMTC0();
void recBC0F(); void recBC0F();
@ -42,5 +42,8 @@ namespace COP0
void recDI(); void recDI();
void recEI(); void recEI();
}}}} } // namespace COP0
} // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900
#endif #endif

View File

@ -49,7 +49,8 @@ static int s_xmmchecknext = 0;
// Clear current register mapping structure // Clear current register mapping structure
// Clear allocation counter // Clear allocation counter
void _initXMMregs() { void _initXMMregs()
{
memzero(xmmregs); memzero(xmmregs);
g_xmmAllocCounter = 0; g_xmmAllocCounter = 0;
s_xmmchecknext = 0; s_xmmchecknext = 0;
@ -58,7 +59,8 @@ void _initXMMregs() {
// Get a pointer to the physical register (GPR / FPU / VU etc..) // Get a pointer to the physical register (GPR / FPU / VU etc..)
__fi void* _XMMGetAddr(int type, int reg, VURegs* VU) __fi void* _XMMGetAddr(int type, int reg, VURegs* VU)
{ {
switch (type) { switch (type)
{
case XMMTYPE_VFREG: case XMMTYPE_VFREG:
return (void*)VU_VFx_ADDR(reg); return (void*)VU_VFx_ADDR(reg);
@ -95,8 +97,10 @@ int _getFreeXMMreg()
int i, tempi; int i, tempi;
u32 bestcount = 0x10000; u32 bestcount = 0x10000;
for (i=0; (uint)i<iREGCNT_XMM; i++) { for (i = 0; (uint)i < iREGCNT_XMM; i++)
if (xmmregs[(i+s_xmmchecknext)%iREGCNT_XMM].inuse == 0) { {
if (xmmregs[(i + s_xmmchecknext) % iREGCNT_XMM].inuse == 0)
{
int ret = (s_xmmchecknext + i) % iREGCNT_XMM; int ret = (s_xmmchecknext + i) % iREGCNT_XMM;
s_xmmchecknext = (s_xmmchecknext + i + 1) % iREGCNT_XMM; s_xmmchecknext = (s_xmmchecknext + i + 1) % iREGCNT_XMM;
return ret; return ret;
@ -104,10 +108,14 @@ int _getFreeXMMreg()
} }
// check for dead regs // check for dead regs
for (i=0; (uint)i<iREGCNT_XMM; i++) { for (i = 0; (uint)i < iREGCNT_XMM; i++)
if (xmmregs[i].needed) continue; {
if (xmmregs[i].type == XMMTYPE_GPRREG ) { if (xmmregs[i].needed)
if (!(EEINST_ISLIVEXMM(xmmregs[i].reg))) { continue;
if (xmmregs[i].type == XMMTYPE_GPRREG)
{
if (!(EEINST_ISLIVEXMM(xmmregs[i].reg)))
{
_freeXMMreg(i); _freeXMMreg(i);
return i; return i;
} }
@ -115,10 +123,14 @@ int _getFreeXMMreg()
} }
// check for future xmm usage // check for future xmm usage
for (i=0; (uint)i<iREGCNT_XMM; i++) { for (i = 0; (uint)i < iREGCNT_XMM; i++)
if (xmmregs[i].needed) continue; {
if (xmmregs[i].type == XMMTYPE_GPRREG ) { if (xmmregs[i].needed)
if( !(g_pCurInstInfo->regs[xmmregs[i].reg] & EEINST_XMM) ) { continue;
if (xmmregs[i].type == XMMTYPE_GPRREG)
{
if (!(g_pCurInstInfo->regs[xmmregs[i].reg] & EEINST_XMM))
{
_freeXMMreg(i); _freeXMMreg(i);
return i; return i;
} }
@ -127,11 +139,15 @@ int _getFreeXMMreg()
tempi = -1; tempi = -1;
bestcount = 0xffff; bestcount = 0xffff;
for (i=0; (uint)i<iREGCNT_XMM; i++) { for (i = 0; (uint)i < iREGCNT_XMM; i++)
if (xmmregs[i].needed) continue; {
if (xmmregs[i].type != XMMTYPE_TEMP) { if (xmmregs[i].needed)
continue;
if (xmmregs[i].type != XMMTYPE_TEMP)
{
if( xmmregs[i].counter < bestcount ) { if (xmmregs[i].counter < bestcount)
{
tempi = i; tempi = i;
bestcount = xmmregs[i].counter; bestcount = xmmregs[i].counter;
} }
@ -142,7 +158,8 @@ int _getFreeXMMreg()
return i; return i;
} }
if( tempi != -1 ) { if (tempi != -1)
{
_freeXMMreg(tempi); _freeXMMreg(tempi);
return tempi; return tempi;
} }
@ -152,7 +169,8 @@ int _getFreeXMMreg()
} }
// Reserve a XMM register for temporary operation. // Reserve a XMM register for temporary operation.
int _allocTempXMMreg(XMMSSEType type, int xmmreg) { int _allocTempXMMreg(XMMSSEType type, int xmmreg)
{
if (xmmreg == -1) if (xmmreg == -1)
xmmreg = _getFreeXMMreg(); xmmreg = _getFreeXMMreg();
else else
@ -177,14 +195,19 @@ int _checkXMMreg(int type, int reg, int mode)
{ {
int i; int i;
for (i=0; (uint)i<iREGCNT_XMM; i++) { for (i = 0; (uint)i < iREGCNT_XMM; i++)
if (xmmregs[i].inuse && (xmmregs[i].type == (type&0xff)) && (xmmregs[i].reg == reg)) { {
if (xmmregs[i].inuse && (xmmregs[i].type == (type & 0xff)) && (xmmregs[i].reg == reg))
{
if ( !(xmmregs[i].mode & MODE_READ) ) { if (!(xmmregs[i].mode & MODE_READ))
if (mode & MODE_READ) { {
if (mode & MODE_READ)
{
xMOVDQA(xRegisterSSE(i), ptr[_XMMGetAddr(xmmregs[i].type, xmmregs[i].reg, xmmregs[i].VU ? &VU1 : &VU0)]); xMOVDQA(xRegisterSSE(i), ptr[_XMMGetAddr(xmmregs[i].type, xmmregs[i].reg, xmmregs[i].VU ? &VU1 : &VU0)]);
} }
else if (mode & MODE_READHALF) { else if (mode & MODE_READHALF)
{
if (g_xmmtypes[i] == XMMT_INT) if (g_xmmtypes[i] == XMMT_INT)
xMOVQZX(xRegisterSSE(i), ptr[(void*)(uptr)_XMMGetAddr(xmmregs[i].type, xmmregs[i].reg, xmmregs[i].VU ? &VU1 : &VU0)]); xMOVQZX(xRegisterSSE(i), ptr[(void*)(uptr)_XMMGetAddr(xmmregs[i].type, xmmregs[i].reg, xmmregs[i].VU ? &VU1 : &VU0)]);
else else
@ -209,15 +232,21 @@ int _checkXMMreg(int type, int reg, int mode)
// reserve a new reg, then populate it if we read it // reserve a new reg, then populate it if we read it
// //
// Note: FPU are always in XMM register // Note: FPU are always in XMM register
int _allocFPtoXMMreg(int xmmreg, int fpreg, int mode) { int _allocFPtoXMMreg(int xmmreg, int fpreg, int mode)
{
int i; int i;
for (i=0; (uint)i<iREGCNT_XMM; i++) { for (i = 0; (uint)i < iREGCNT_XMM; i++)
if (xmmregs[i].inuse == 0) continue; {
if (xmmregs[i].type != XMMTYPE_FPREG) continue; if (xmmregs[i].inuse == 0)
if (xmmregs[i].reg != fpreg) continue; continue;
if (xmmregs[i].type != XMMTYPE_FPREG)
continue;
if (xmmregs[i].reg != fpreg)
continue;
if( !(xmmregs[i].mode & MODE_READ) && (mode & MODE_READ)) { if (!(xmmregs[i].mode & MODE_READ) && (mode & MODE_READ))
{
xMOVSSZX(xRegisterSSE(i), ptr[&fpuRegs.fpr[fpreg].f]); xMOVSSZX(xRegisterSSE(i), ptr[&fpuRegs.fpr[fpreg].f]);
xmmregs[i].mode |= MODE_READ; xmmregs[i].mode |= MODE_READ;
} }
@ -254,9 +283,12 @@ int _allocGPRtoXMMreg(int xmmreg, int gprreg, int mode)
for (i = 0; (uint)i < iREGCNT_XMM; i++) for (i = 0; (uint)i < iREGCNT_XMM; i++)
{ {
if (xmmregs[i].inuse == 0) continue; if (xmmregs[i].inuse == 0)
if (xmmregs[i].type != XMMTYPE_GPRREG) continue; continue;
if (xmmregs[i].reg != gprreg) continue; if (xmmregs[i].type != XMMTYPE_GPRREG)
continue;
if (xmmregs[i].reg != gprreg)
continue;
g_xmmtypes[i] = XMMT_INT; g_xmmtypes[i] = XMMT_INT;
@ -315,7 +347,8 @@ int _allocGPRtoXMMreg(int xmmreg, int gprreg, int mode)
else else
{ {
// DOX86 // DOX86
if (mode & MODE_READ) _flushConstReg(gprreg); if (mode & MODE_READ)
_flushConstReg(gprreg);
xMOVDQA(xRegisterSSE(xmmreg), ptr[&cpuRegs.GPR.r[gprreg].UL[0]]); xMOVDQA(xRegisterSSE(xmmreg), ptr[&cpuRegs.GPR.r[gprreg].UL[0]]);
} }
@ -330,11 +363,15 @@ int _allocFPACCtoXMMreg(int xmmreg, int mode)
{ {
int i; int i;
for (i=0; (uint)i<iREGCNT_XMM; i++) { for (i = 0; (uint)i < iREGCNT_XMM; i++)
if (xmmregs[i].inuse == 0) continue; {
if (xmmregs[i].type != XMMTYPE_FPACC) continue; if (xmmregs[i].inuse == 0)
continue;
if (xmmregs[i].type != XMMTYPE_FPACC)
continue;
if( !(xmmregs[i].mode & MODE_READ) && (mode&MODE_READ)) { if (!(xmmregs[i].mode & MODE_READ) && (mode & MODE_READ))
{
xMOVSSZX(xRegisterSSE(i), ptr[&fpuRegs.ACC.f]); xMOVSSZX(xRegisterSSE(i), ptr[&fpuRegs.ACC.f]);
xmmregs[i].mode |= MODE_READ; xmmregs[i].mode |= MODE_READ;
} }
@ -357,7 +394,8 @@ int _allocFPACCtoXMMreg(int xmmreg, int mode)
xmmregs[xmmreg].reg = 0; xmmregs[xmmreg].reg = 0;
xmmregs[xmmreg].counter = g_xmmAllocCounter++; xmmregs[xmmreg].counter = g_xmmAllocCounter++;
if (mode & MODE_READ) { if (mode & MODE_READ)
{
xMOVSSZX(xRegisterSSE(xmmreg), ptr[&fpuRegs.ACC.f]); xMOVSSZX(xRegisterSSE(xmmreg), ptr[&fpuRegs.ACC.f]);
} }
@ -370,10 +408,14 @@ void _addNeededGPRtoXMMreg(int gprreg)
{ {
int i; int i;
for (i=0; (uint)i<iREGCNT_XMM; i++) { for (i = 0; (uint)i < iREGCNT_XMM; i++)
if (xmmregs[i].inuse == 0) continue; {
if (xmmregs[i].type != XMMTYPE_GPRREG) continue; if (xmmregs[i].inuse == 0)
if (xmmregs[i].reg != gprreg) continue; continue;
if (xmmregs[i].type != XMMTYPE_GPRREG)
continue;
if (xmmregs[i].reg != gprreg)
continue;
xmmregs[i].counter = g_xmmAllocCounter++; // update counter xmmregs[i].counter = g_xmmAllocCounter++; // update counter
xmmregs[i].needed = 1; xmmregs[i].needed = 1;
@ -383,13 +425,18 @@ void _addNeededGPRtoXMMreg(int gprreg)
// Mark reserved FPU reg as needed. It won't be evicted anymore. // Mark reserved FPU reg as needed. It won't be evicted anymore.
// You must use _clearNeededXMMregs to clear the flag // You must use _clearNeededXMMregs to clear the flag
void _addNeededFPtoXMMreg(int fpreg) { void _addNeededFPtoXMMreg(int fpreg)
{
int i; int i;
for (i=0; (uint)i<iREGCNT_XMM; i++) { for (i = 0; (uint)i < iREGCNT_XMM; i++)
if (xmmregs[i].inuse == 0) continue; {
if (xmmregs[i].type != XMMTYPE_FPREG) continue; if (xmmregs[i].inuse == 0)
if (xmmregs[i].reg != fpreg) continue; continue;
if (xmmregs[i].type != XMMTYPE_FPREG)
continue;
if (xmmregs[i].reg != fpreg)
continue;
xmmregs[i].counter = g_xmmAllocCounter++; // update counter xmmregs[i].counter = g_xmmAllocCounter++; // update counter
xmmregs[i].needed = 1; xmmregs[i].needed = 1;
@ -399,12 +446,16 @@ void _addNeededFPtoXMMreg(int fpreg) {
// Mark reserved FPU ACC reg as needed. It won't be evicted anymore. // Mark reserved FPU ACC reg as needed. It won't be evicted anymore.
// You must use _clearNeededXMMregs to clear the flag // You must use _clearNeededXMMregs to clear the flag
void _addNeededFPACCtoXMMreg() { void _addNeededFPACCtoXMMreg()
{
int i; int i;
for (i=0; (uint)i<iREGCNT_XMM; i++) { for (i = 0; (uint)i < iREGCNT_XMM; i++)
if (xmmregs[i].inuse == 0) continue; {
if (xmmregs[i].type != XMMTYPE_FPACC) continue; if (xmmregs[i].inuse == 0)
continue;
if (xmmregs[i].type != XMMTYPE_FPACC)
continue;
xmmregs[i].counter = g_xmmAllocCounter++; // update counter xmmregs[i].counter = g_xmmAllocCounter++; // update counter
xmmregs[i].needed = 1; xmmregs[i].needed = 1;
@ -414,12 +465,15 @@ void _addNeededFPACCtoXMMreg() {
// Clear needed flags of all registers // Clear needed flags of all registers
// Written register will set MODE_READ (aka data is valid, no need to load it) // Written register will set MODE_READ (aka data is valid, no need to load it)
void _clearNeededXMMregs() { void _clearNeededXMMregs()
{
int i; int i;
for (i=0; (uint)i<iREGCNT_XMM; i++) { for (i = 0; (uint)i < iREGCNT_XMM; i++)
{
if( xmmregs[i].needed ) { if (xmmregs[i].needed)
{
// setup read to any just written regs // setup read to any just written regs
if (xmmregs[i].inuse && (xmmregs[i].mode & MODE_WRITE)) if (xmmregs[i].inuse && (xmmregs[i].mode & MODE_WRITE))
@ -427,7 +481,8 @@ void _clearNeededXMMregs() {
xmmregs[i].needed = 0; xmmregs[i].needed = 0;
} }
if( xmmregs[i].inuse ) { if (xmmregs[i].inuse)
{
pxAssert(xmmregs[i].type != XMMTYPE_TEMP); pxAssert(xmmregs[i].type != XMMTYPE_TEMP);
} }
} }
@ -440,17 +495,21 @@ void _clearNeededXMMregs() {
void _deleteGPRtoXMMreg(int reg, int flush) void _deleteGPRtoXMMreg(int reg, int flush)
{ {
int i; int i;
for (i=0; (uint)i<iREGCNT_XMM; i++) { for (i = 0; (uint)i < iREGCNT_XMM; i++)
{
if (xmmregs[i].inuse && xmmregs[i].type == XMMTYPE_GPRREG && xmmregs[i].reg == reg ) { if (xmmregs[i].inuse && xmmregs[i].type == XMMTYPE_GPRREG && xmmregs[i].reg == reg)
{
switch(flush) { switch (flush)
{
case 0: case 0:
_freeXMMreg(i); _freeXMMreg(i);
break; break;
case 1: case 1:
case 2: case 2:
if( xmmregs[i].mode & MODE_WRITE ) { if (xmmregs[i].mode & MODE_WRITE)
{
pxAssert(reg != 0); pxAssert(reg != 0);
//pxAssert( g_xmmtypes[i] == XMMT_INT ); //pxAssert( g_xmmtypes[i] == XMMT_INT );
@ -481,15 +540,19 @@ void _deleteGPRtoXMMreg(int reg, int flush)
void _deleteFPtoXMMreg(int reg, int flush) void _deleteFPtoXMMreg(int reg, int flush)
{ {
int i; int i;
for (i=0; (uint)i<iREGCNT_XMM; i++) { for (i = 0; (uint)i < iREGCNT_XMM; i++)
if (xmmregs[i].inuse && xmmregs[i].type == XMMTYPE_FPREG && xmmregs[i].reg == reg ) { {
switch(flush) { if (xmmregs[i].inuse && xmmregs[i].type == XMMTYPE_FPREG && xmmregs[i].reg == reg)
{
switch (flush)
{
case 0: case 0:
_freeXMMreg(i); _freeXMMreg(i);
return; return;
case 1: case 1:
if (xmmregs[i].mode & MODE_WRITE) { if (xmmregs[i].mode & MODE_WRITE)
{
xMOVSS(ptr[&fpuRegs.fpr[reg].UL], xRegisterSSE(i)); xMOVSS(ptr[&fpuRegs.fpr[reg].UL], xRegisterSSE(i));
// get rid of MODE_WRITE since don't want to flush again // get rid of MODE_WRITE since don't want to flush again
xmmregs[i].mode &= ~MODE_WRITE; xmmregs[i].mode &= ~MODE_WRITE;
@ -512,10 +575,13 @@ void _freeXMMreg(u32 xmmreg)
{ {
pxAssert(xmmreg < iREGCNT_XMM); pxAssert(xmmreg < iREGCNT_XMM);
if (!xmmregs[xmmreg].inuse) return; if (!xmmregs[xmmreg].inuse)
return;
if (xmmregs[xmmreg].mode & MODE_WRITE) { if (xmmregs[xmmreg].mode & MODE_WRITE)
switch (xmmregs[xmmreg].type) { {
switch (xmmregs[xmmreg].type)
{
case XMMTYPE_VFREG: case XMMTYPE_VFREG:
{ {
const VURegs* VU = xmmregs[xmmreg].VU ? &VU1 : &VU0; const VURegs* VU = xmmregs[xmmreg].VU ? &VU1 : &VU0;
@ -525,8 +591,10 @@ void _freeXMMreg(u32 xmmreg)
{ {
// don't destroy w // don't destroy w
uint t0reg; uint t0reg;
for(t0reg = 0; t0reg < iREGCNT_XMM; ++t0reg ) { for (t0reg = 0; t0reg < iREGCNT_XMM; ++t0reg)
if( !xmmregs[t0reg].inuse ) break; {
if (!xmmregs[t0reg].inuse)
break;
} }
if (t0reg < iREGCNT_XMM) if (t0reg < iREGCNT_XMM)
@ -567,8 +635,10 @@ void _freeXMMreg(u32 xmmreg)
// don't destroy w // don't destroy w
uint t0reg; uint t0reg;
for(t0reg = 0; t0reg < iREGCNT_XMM; ++t0reg ) { for (t0reg = 0; t0reg < iREGCNT_XMM; ++t0reg)
if( !xmmregs[t0reg].inuse ) break; {
if (!xmmregs[t0reg].inuse)
break;
} }
if (t0reg < iREGCNT_XMM) if (t0reg < iREGCNT_XMM)
@ -625,8 +695,10 @@ void _freeXMMreg(u32 xmmreg)
int _getNumXMMwrite() int _getNumXMMwrite()
{ {
int num = 0, i; int num = 0, i;
for (i=0; (uint)i<iREGCNT_XMM; i++) { for (i = 0; (uint)i < iREGCNT_XMM; i++)
if( xmmregs[i].inuse && (xmmregs[i].mode&MODE_WRITE) ) ++num; {
if (xmmregs[i].inuse && (xmmregs[i].mode & MODE_WRITE))
++num;
} }
return num; return num;
@ -639,25 +711,35 @@ u8 _hasFreeXMMreg()
{ {
int i; int i;
for (i=0; (uint)i<iREGCNT_XMM; i++) { for (i = 0; (uint)i < iREGCNT_XMM; i++)
if (!xmmregs[i].inuse) return 1; {
if (!xmmregs[i].inuse)
return 1;
} }
// check for dead regs // check for dead regs
for (i=0; (uint)i<iREGCNT_XMM; i++) { for (i = 0; (uint)i < iREGCNT_XMM; i++)
if (xmmregs[i].needed) continue; {
if (xmmregs[i].type == XMMTYPE_GPRREG ) { if (xmmregs[i].needed)
if( !EEINST_ISLIVEXMM(xmmregs[i].reg) ) { continue;
if (xmmregs[i].type == XMMTYPE_GPRREG)
{
if (!EEINST_ISLIVEXMM(xmmregs[i].reg))
{
return 1; return 1;
} }
} }
} }
// check for dead regs // check for dead regs
for (i=0; (uint)i<iREGCNT_XMM; i++) { for (i = 0; (uint)i < iREGCNT_XMM; i++)
if (xmmregs[i].needed) continue; {
if (xmmregs[i].type == XMMTYPE_GPRREG ) { if (xmmregs[i].needed)
if( !(g_pCurInstInfo->regs[xmmregs[i].reg]&EEINST_USED) ) { continue;
if (xmmregs[i].type == XMMTYPE_GPRREG)
{
if (!(g_pCurInstInfo->regs[xmmregs[i].reg] & EEINST_USED))
{
return 1; return 1;
} }
} }
@ -670,8 +752,10 @@ void _flushXMMregs()
{ {
int i; int i;
for (i=0; (uint)i<iREGCNT_XMM; i++) { for (i = 0; (uint)i < iREGCNT_XMM; i++)
if (xmmregs[i].inuse == 0) continue; {
if (xmmregs[i].inuse == 0)
continue;
pxAssert(xmmregs[i].type != XMMTYPE_TEMP); pxAssert(xmmregs[i].type != XMMTYPE_TEMP);
pxAssert(xmmregs[i].mode & (MODE_READ | MODE_WRITE)); pxAssert(xmmregs[i].mode & (MODE_READ | MODE_WRITE));
@ -688,8 +772,10 @@ void _freeXMMregs()
{ {
int i; int i;
for (i=0; (uint)i<iREGCNT_XMM; i++) { for (i = 0; (uint)i < iREGCNT_XMM; i++)
if (xmmregs[i].inuse == 0) continue; {
if (xmmregs[i].inuse == 0)
continue;
pxAssert(xmmregs[i].type != XMMTYPE_TEMP); pxAssert(xmmregs[i].type != XMMTYPE_TEMP);
//pxAssert( xmmregs[i].mode & (MODE_READ|MODE_WRITE) ); //pxAssert( xmmregs[i].mode & (MODE_READ|MODE_WRITE) );
@ -702,20 +788,25 @@ int _signExtendXMMtoM(uptr to, x86SSERegType from, int candestroy)
{ {
int t0reg; int t0reg;
g_xmmtypes[from] = XMMT_INT; g_xmmtypes[from] = XMMT_INT;
if( candestroy ) { if (candestroy)
if( g_xmmtypes[from] == XMMT_FPS ) xMOVSS(ptr[(void*)(to)], xRegisterSSE(from)); {
else xMOVD(ptr[(void*)(to)], xRegisterSSE(from)); if (g_xmmtypes[from] == XMMT_FPS)
xMOVSS(ptr[(void*)(to)], xRegisterSSE(from));
else
xMOVD(ptr[(void*)(to)], xRegisterSSE(from));
xPSRA.D(xRegisterSSE(from), 31); xPSRA.D(xRegisterSSE(from), 31);
xMOVD(ptr[(void*)(to + 4)], xRegisterSSE(from)); xMOVD(ptr[(void*)(to + 4)], xRegisterSSE(from));
return 1; return 1;
} }
else { else
{
// can't destroy and type is int // can't destroy and type is int
pxAssert(g_xmmtypes[from] == XMMT_INT); pxAssert(g_xmmtypes[from] == XMMT_INT);
if( _hasFreeXMMreg() ) { if (_hasFreeXMMreg())
{
xmmregs[from].needed = 1; xmmregs[from].needed = 1;
t0reg = _allocTempXMMreg(XMMT_INT, -1); t0reg = _allocTempXMMreg(XMMT_INT, -1);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(from)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(from));
@ -727,7 +818,8 @@ int _signExtendXMMtoM(uptr to, x86SSERegType from, int candestroy)
xmmregs[t0reg] = xmmregs[from]; xmmregs[t0reg] = xmmregs[from];
xmmregs[from].inuse = 0; xmmregs[from].inuse = 0;
} }
else { else
{
xMOVD(ptr[(void*)(to + 4)], xRegisterSSE(from)); xMOVD(ptr[(void*)(to + 4)], xRegisterSSE(from));
xMOVD(ptr[(void*)(to)], xRegisterSSE(from)); xMOVD(ptr[(void*)(to)], xRegisterSSE(from));
xSAR(ptr32[(u32*)(to + 4)], 31); xSAR(ptr32[(u32*)(to + 4)], 31);
@ -743,7 +835,8 @@ int _signExtendXMMtoM(uptr to, x86SSERegType from, int candestroy)
// But it is quite obscure !!! // But it is quite obscure !!!
int _allocCheckGPRtoXMM(EEINST* pinst, int gprreg, int mode) int _allocCheckGPRtoXMM(EEINST* pinst, int gprreg, int mode)
{ {
if( pinst->regs[gprreg] & EEINST_XMM ) return _allocGPRtoXMMreg(-1, gprreg, mode); if (pinst->regs[gprreg] & EEINST_XMM)
return _allocGPRtoXMMreg(-1, gprreg, mode);
return _checkXMMreg(XMMTYPE_GPRREG, gprreg, mode); return _checkXMMreg(XMMTYPE_GPRREG, gprreg, mode);
} }
@ -752,7 +845,8 @@ int _allocCheckGPRtoXMM(EEINST* pinst, int gprreg, int mode)
// But it is quite obscure !!! // But it is quite obscure !!!
int _allocCheckFPUtoXMM(EEINST* pinst, int fpureg, int mode) int _allocCheckFPUtoXMM(EEINST* pinst, int fpureg, int mode)
{ {
if( pinst->fpuregs[fpureg] & EEINST_XMM ) return _allocFPtoXMMreg(-1, fpureg, mode); if (pinst->fpuregs[fpureg] & EEINST_XMM)
return _allocFPtoXMMreg(-1, fpureg, mode);
return _checkXMMreg(XMMTYPE_FPREG, fpureg, mode); return _checkXMMreg(XMMTYPE_FPREG, fpureg, mode);
} }
@ -777,8 +871,10 @@ u32 _recIsRegWritten(EEINST* pinst, int size, u8 xmmtype, u8 reg)
{ {
u32 i, inst = 1; u32 i, inst = 1;
while(size-- > 0) { while (size-- > 0)
for(i = 0; i < ArraySize(pinst->writeType); ++i) { {
for (i = 0; i < ArraySize(pinst->writeType); ++i)
{
if ((pinst->writeType[i] == xmmtype) && (pinst->writeReg[i] == reg)) if ((pinst->writeType[i] == xmmtype) && (pinst->writeReg[i] == reg))
return inst; return inst;
} }
@ -792,9 +888,12 @@ u32 _recIsRegWritten(EEINST* pinst, int size, u8 xmmtype, u8 reg)
void _recFillRegister(EEINST& pinst, int type, int reg, int write) void _recFillRegister(EEINST& pinst, int type, int reg, int write)
{ {
u32 i = 0; u32 i = 0;
if (write ) { if (write)
for(i = 0; i < ArraySize(pinst.writeType); ++i) { {
if( pinst.writeType[i] == XMMTYPE_TEMP ) { for (i = 0; i < ArraySize(pinst.writeType); ++i)
{
if (pinst.writeType[i] == XMMTYPE_TEMP)
{
pinst.writeType[i] = type; pinst.writeType[i] = type;
pinst.writeReg[i] = reg; pinst.writeReg[i] = reg;
return; return;
@ -802,9 +901,12 @@ void _recFillRegister(EEINST& pinst, int type, int reg, int write)
} }
pxAssume(false); pxAssume(false);
} }
else { else
for(i = 0; i < ArraySize(pinst.readType); ++i) { {
if( pinst.readType[i] == XMMTYPE_TEMP ) { for (i = 0; i < ArraySize(pinst.readType); ++i)
{
if (pinst.readType[i] == XMMTYPE_TEMP)
{
pinst.readType[i] = type; pinst.readType[i] = type;
pinst.readReg[i] = reg; pinst.readReg[i] = reg;
return; return;

View File

@ -28,7 +28,7 @@
#define MODE_READ 1 #define MODE_READ 1
#define MODE_WRITE 2 #define MODE_WRITE 2
#define MODE_READHALF 4 // read only low 64 bits #define MODE_READHALF 4 // read only low 64 bits
#define MODE_VUXY 0x8 // vector only has xy valid (real zw are in mem), not the same as MODE_READHALF #define MODE_VUXY 8 // vector only has xy valid (real zw are in mem), not the same as MODE_READHALF
#define MODE_VUZ 0x10 // z only doesn't work for now #define MODE_VUZ 0x10 // z only doesn't work for now
#define MODE_VUXYZ (MODE_VUZ | MODE_VUXY) // vector only has xyz valid (real w is in memory) #define MODE_VUXYZ (MODE_VUZ | MODE_VUXY) // vector only has xyz valid (real w is in memory)
#define MODE_NOFLUSH 0x20 // can't flush reg to mem #define MODE_NOFLUSH 0x20 // can't flush reg to mem
@ -103,7 +103,8 @@ static __fi int X86_ISVI(int type)
return ((type & ~X86TYPE_VU1) == X86TYPE_VI); return ((type & ~X86TYPE_VU1) == X86TYPE_VI);
} }
struct _x86regs { struct _x86regs
{
u8 inuse; u8 inuse;
u8 reg; // value of 0 - not used u8 reg; // value of 0 - not used
u8 mode; u8 mode;
@ -147,7 +148,8 @@ void _flushConstReg(int reg);
#define XMMGPR_HI 32 #define XMMGPR_HI 32
#define XMMFPU_ACC 32 #define XMMFPU_ACC 32
struct _xmmregs { struct _xmmregs
{
u8 inuse; u8 inuse;
u8 reg; u8 reg;
u8 type; u8 type;

File diff suppressed because it is too large Load Diff

View File

@ -21,10 +21,9 @@ extern const __aligned16 u32 g_maxvals[4];
namespace R5900 { namespace R5900 {
namespace Dynarec { namespace Dynarec {
namespace OpcodeImpl { namespace OpcodeImpl {
namespace COP1 namespace COP1 {
{
void recMFC1(); void recMFC1();
void recCFC1(); void recCFC1();
void recMTC1(); void recMTC1();
@ -60,9 +59,10 @@ namespace Dynarec {
void recBC1T(); void recBC1T();
void recBC1FL(); void recBC1FL();
void recBC1TL(); void recBC1TL();
} }
} } } // namespace COP1
} // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900
#endif #endif

View File

@ -75,15 +75,15 @@ namespace DOUBLE {
#define _Fd_ _Sa_ #define _Fd_ _Sa_
// FCR31 Flags // FCR31 Flags
#define FPUflagC 0X00800000 #define FPUflagC 0x00800000
#define FPUflagI 0X00020000 #define FPUflagI 0x00020000
#define FPUflagD 0X00010000 #define FPUflagD 0x00010000
#define FPUflagO 0X00008000 #define FPUflagO 0x00008000
#define FPUflagU 0X00004000 #define FPUflagU 0x00004000
#define FPUflagSI 0X00000040 #define FPUflagSI 0x00000040
#define FPUflagSD 0X00000020 #define FPUflagSD 0x00000020
#define FPUflagSO 0X00000010 #define FPUflagSO 0x00000010
#define FPUflagSU 0X00000008 #define FPUflagSU 0x00000008
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -166,12 +166,10 @@ void ToDouble(int reg)
// DOUBLE -> PS2 // DOUBLE -> PS2
//------------------------------------------------------------------ //------------------------------------------------------------------
/* // If FPU_RESULT is defined, results are more like the real PS2's FPU.
if FPU_RESULT is defined, results are more like the real PS2's FPU. But new issues may happen if // But new issues may happen if the VU isn't clamping all operands since games may transfer FPU results into the VU.
the VU isn't clamping all operands since games may transfer FPU results into the VU. // Ar tonelico 1 does this with the result from DIV/RSQRT (when a division by zero occurs).
Ar tonelico 1 does this with the result from DIV/RSQRT (when a division by zero occurs) // Otherwise, results are still usually better than iFPU.cpp.
otherwise, results are still usually better than iFPU.cpp.
*/
// ToPS2FPU_Full - converts double-precision IEEE float to single-precision PS2 float // ToPS2FPU_Full - converts double-precision IEEE float to single-precision PS2 float
@ -290,25 +288,52 @@ void SetMaxValue(int regd)
} }
} }
#define GET_S(sreg) { \ #define GET_S(sreg) \
if( info & PROCESS_EE_S ) xMOVSS(xRegisterSSE(sreg), xRegisterSSE(EEREC_S)); \ do { \
else xMOVSSZX(xRegisterSSE(sreg), ptr[&fpuRegs.fpr[_Fs_]]); } if (info & PROCESS_EE_S) \
xMOVSS(xRegisterSSE(sreg), xRegisterSSE(EEREC_S)); \
else \
xMOVSSZX(xRegisterSSE(sreg), ptr[&fpuRegs.fpr[_Fs_]]); \
} while (0)
#define ALLOC_S(sreg) { (sreg) = _allocTempXMMreg(XMMT_FPS, -1); GET_S(sreg); } #define ALLOC_S(sreg) \
do { \
(sreg) = _allocTempXMMreg(XMMT_FPS, -1); \
GET_S(sreg); \
} while (0)
#define GET_T(treg) { \ #define GET_T(treg) \
if( info & PROCESS_EE_T ) xMOVSS(xRegisterSSE(treg), xRegisterSSE(EEREC_T)); \ do { \
else xMOVSSZX(xRegisterSSE(treg), ptr[&fpuRegs.fpr[_Ft_]]); } if (info & PROCESS_EE_T) \
xMOVSS(xRegisterSSE(treg), xRegisterSSE(EEREC_T)); \
else \
xMOVSSZX(xRegisterSSE(treg), ptr[&fpuRegs.fpr[_Ft_]]); \
} while (0)
#define ALLOC_T(treg) { (treg) = _allocTempXMMreg(XMMT_FPS, -1); GET_T(treg); } #define ALLOC_T(treg) \
do { \
(treg) = _allocTempXMMreg(XMMT_FPS, -1); \
GET_T(treg); \
} while (0)
#define GET_ACC(areg) { \ #define GET_ACC(areg) \
if( info & PROCESS_EE_ACC ) xMOVSS(xRegisterSSE(areg), xRegisterSSE(EEREC_ACC)); \ do { \
else xMOVSSZX(xRegisterSSE(areg), ptr[&fpuRegs.ACC]); } if (info & PROCESS_EE_ACC) \
xMOVSS(xRegisterSSE(areg), xRegisterSSE(EEREC_ACC)); \
else \
xMOVSSZX(xRegisterSSE(areg), ptr[&fpuRegs.ACC]); \
} while (0)
#define ALLOC_ACC(areg) { (areg) = _allocTempXMMreg(XMMT_FPS, -1); GET_ACC(areg); } #define ALLOC_ACC(areg) \
do { \
(areg) = _allocTempXMMreg(XMMT_FPS, -1); \
GET_ACC(areg); \
} while (0)
#define CLEAR_OU_FLAGS { xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagO | FPUflagU)); } #define CLEAR_OU_FLAGS \
do { \
xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagO | FPUflagU)); \
} while (0)
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -542,13 +567,11 @@ FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS|XMMINFO_READT);
void recCVT_S_xmm(int info) void recCVT_S_xmm(int info)
{ {
EE::Profiler.EmitOp(eeOpcode::CVTS_F); EE::Profiler.EmitOp(eeOpcode::CVTS_F);
if( !(info&PROCESS_EE_S) || (EEREC_D != EEREC_S && !(info&PROCESS_EE_MODEWRITES)) ) { if (!(info & PROCESS_EE_S) || (EEREC_D != EEREC_S && !(info & PROCESS_EE_MODEWRITES)))
xCVTSI2SS(xRegisterSSE(EEREC_D), ptr32[&fpuRegs.fpr[_Fs_]]); xCVTSI2SS(xRegisterSSE(EEREC_D), ptr32[&fpuRegs.fpr[_Fs_]]);
} else
else {
xCVTDQ2PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xCVTDQ2PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
} }
}
FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED | XMMINFO_READS); FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED | XMMINFO_READS);
@ -690,7 +713,8 @@ void recDIV_S_xmm(int info)
xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(sreg)); xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(sreg));
if (roundmodeFlag) xLDMXCSR (g_sseMXCSR); if (roundmodeFlag)
xLDMXCSR(g_sseMXCSR);
_freeXMMreg(sreg); _freeXMMreg(treg); _freeXMMreg(sreg); _freeXMMreg(treg);
} }
@ -786,7 +810,7 @@ FPURECOMPILE_CONSTCODE(MADDA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|X
static const __aligned16 u32 minmax_mask[8] = static const __aligned16 u32 minmax_mask[8] =
{ {
0xffffffff, 0x80000000, 0, 0, 0xffffffff, 0x80000000, 0, 0,
0, 0x40000000, 0, 0 0, 0x40000000, 0, 0,
}; };
// FPU's MAX/MIN work with all numbers (including "denormals"). Check VU's logical min max for more info. // FPU's MAX/MIN work with all numbers (including "denormals"). Check VU's logical min max for more info.
void recMINMAX(int info, bool ismin) void recMINMAX(int info, bool ismin)
@ -957,7 +981,8 @@ void recSQRT_S_xmm(int info)
GET_T(EEREC_D); GET_T(EEREC_D);
if (FPU_FLAGS_ID) { if (FPU_FLAGS_ID)
{
xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags
//--- Check for negative SQRT --- (sqrt(-0) = 0, unlike what the docs say) //--- Check for negative SQRT --- (sqrt(-0) = 0, unlike what the docs say)
@ -980,9 +1005,8 @@ void recSQRT_S_xmm(int info)
ToPS2FPU(EEREC_D, false, t1reg, false); ToPS2FPU(EEREC_D, false, t1reg, false);
if (roundmodeFlag == 1) { if (roundmodeFlag == 1)
xLDMXCSR(g_sseMXCSR); xLDMXCSR(g_sseMXCSR);
}
_freeX86reg(tempReg); _freeX86reg(tempReg);
_freeXMMreg(t1reg); _freeXMMreg(t1reg);
@ -1091,11 +1115,16 @@ void recRSQRT_S_xmm(int info)
_freeXMMreg(treg); _freeXMMreg(sreg); _freeXMMreg(treg); _freeXMMreg(sreg);
if (roundmodeFlag) xLDMXCSR (g_sseMXCSR); if (roundmodeFlag)
xLDMXCSR(g_sseMXCSR);
} }
FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT);
} } } } } } // namespace DOUBLE
} // namespace COP1
} // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900
#endif #endif

File diff suppressed because it is too large Load Diff

View File

@ -43,8 +43,8 @@ namespace OpcodeImpl {
void recDIV1(); void recDIV1();
void recDIVU1(); void recDIVU1();
namespace MMI namespace MMI {
{
void recPLZCW(); void recPLZCW();
void recMMI0(); void recMMI0();
void recMMI1(); void recMMI1();
@ -138,7 +138,9 @@ namespace MMI
void recPOR(); void recPOR();
void recPCPYH(); void recPCPYH();
} } } } } // namespace MMI
} // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900
#endif #endif

View File

@ -31,4 +31,3 @@ void SetCPUState(SSE_MXCSR sseMXCSR, SSE_MXCSR sseVUMXCSR)
_mm_setcsr(g_sseMXCSR.bitmask); _mm_setcsr(g_sseMXCSR.bitmask);
} }

View File

@ -236,7 +236,8 @@ static void iIopDumpBlock( int startpc, u8 * ptr )
f.Printf("Dump PSX register data: 0x%x\n\n", (uptr)&psxRegs); f.Printf("Dump PSX register data: 0x%x\n\n", (uptr)&psxRegs);
for ( i = startpc; i < s_nEndBlock; i += 4 ) { for (i = startpc; i < s_nEndBlock; i += 4)
{
f.Printf("%s\n", disR3000AF(iopMemRead32(i), i)); f.Printf("%s\n", disR3000AF(iopMemRead32(i), i));
} }
@ -245,32 +246,41 @@ static void iIopDumpBlock( int startpc, u8 * ptr )
memzero(used); memzero(used);
numused = 0; numused = 0;
for(i = 0; i < ArraySize(s_pInstCache->regs); ++i) { for (i = 0; i < ArraySize(s_pInstCache->regs); ++i)
if( s_pInstCache->regs[i] & EEINST_USED ) { {
if (s_pInstCache->regs[i] & EEINST_USED)
{
used[i] = 1; used[i] = 1;
numused++; numused++;
} }
} }
f.Printf(" "); f.Printf(" ");
for(i = 0; i < ArraySize(s_pInstCache->regs); ++i) { for (i = 0; i < ArraySize(s_pInstCache->regs); ++i)
if( used[i] ) f.Printf("%2d ", i); {
if (used[i])
f.Printf("%2d ", i);
} }
f.Printf("\n"); f.Printf("\n");
f.Printf(" "); f.Printf(" ");
for(i = 0; i < ArraySize(s_pInstCache->regs); ++i) { for (i = 0; i < ArraySize(s_pInstCache->regs); ++i)
if( used[i] ) f.Printf("%s ", disRNameGPR[i]); {
if (used[i])
f.Printf("%s ", disRNameGPR[i]);
} }
f.Printf("\n"); f.Printf("\n");
pcur = s_pInstCache + 1; pcur = s_pInstCache + 1;
for( i = 0; i < (s_nEndBlock-startpc)/4; ++i, ++pcur) { for (i = 0; i < (s_nEndBlock - startpc) / 4; ++i, ++pcur)
{
f.Printf("%2d: %2.2x ", i + 1, pcur->info); f.Printf("%2d: %2.2x ", i + 1, pcur->info);
count = 1; count = 1;
for(j = 0; j < ArraySize(s_pInstCache->regs); j++) { for (j = 0; j < ArraySize(s_pInstCache->regs); j++)
if( used[j] ) { {
if (used[j])
{
f.Printf("%2.2x%s", pcur->regs[j], ((count % 8) && count < numused) ? "_" : " "); f.Printf("%2.2x%s", pcur->regs[j], ((count % 8) && count < numused) ? "_" : " ");
++count; ++count;
} }
@ -296,7 +306,8 @@ static void iIopDumpBlock( int startpc, u8 * ptr )
u8 _psxLoadWritesRs(u32 tempcode) u8 _psxLoadWritesRs(u32 tempcode)
{ {
switch(tempcode>>26) { switch (tempcode >> 26)
{
case 32: case 33: case 34: case 35: case 36: case 37: case 38: case 32: case 33: case 34: case 35: case 36: case 37: case 38:
return ((tempcode >> 21) & 0x1f) == ((tempcode >> 16) & 0x1f); // rs==rt return ((tempcode >> 21) & 0x1f) == ((tempcode >> 16) & 0x1f); // rs==rt
} }
@ -305,7 +316,8 @@ u8 _psxLoadWritesRs(u32 tempcode)
u8 _psxIsLoadStore(u32 tempcode) u8 _psxIsLoadStore(u32 tempcode)
{ {
switch(tempcode>>26) { switch (tempcode >> 26)
{
case 32: case 33: case 34: case 35: case 36: case 37: case 38: case 32: case 33: case 34: case 35: case 36: case 37: case 38:
// 4 byte stores // 4 byte stores
case 40: case 41: case 42: case 43: case 46: case 40: case 41: case 42: case 43: case 46:
@ -317,16 +329,24 @@ u8 _psxIsLoadStore(u32 tempcode)
void _psxFlushAllUnused() void _psxFlushAllUnused()
{ {
int i; int i;
for(i = 0; i < 34; ++i) { for (i = 0; i < 34; ++i)
if( psxpc < s_nEndBlock ) { {
if (psxpc < s_nEndBlock)
{
if ((g_pCurInstInfo[1].regs[i] & EEINST_USED)) if ((g_pCurInstInfo[1].regs[i] & EEINST_USED))
continue; continue;
} }
else if ((g_pCurInstInfo[0].regs[i] & EEINST_USED)) else if ((g_pCurInstInfo[0].regs[i] & EEINST_USED))
{
continue; continue;
}
if( i < 32 && PSX_IS_CONST1(i) ) _psxFlushConstReg(i); if (i < 32 && PSX_IS_CONST1(i))
else { {
_psxFlushConstReg(i);
}
else
{
_deleteX86reg(X86TYPE_PSX, i, 1); _deleteX86reg(X86TYPE_PSX, i, 1);
} }
} }
@ -335,9 +355,11 @@ void _psxFlushAllUnused()
int _psxFlushUnusedConstReg() int _psxFlushUnusedConstReg()
{ {
int i; int i;
for(i = 1; i < 32; ++i) { for (i = 1; i < 32; ++i)
{
if ((g_psxHasConstReg & (1 << i)) && !(g_psxFlushedConstReg & (1 << i)) && if ((g_psxHasConstReg & (1 << i)) && !(g_psxFlushedConstReg & (1 << i)) &&
!_recIsRegWritten(g_pCurInstInfo+1, (s_nEndBlock-psxpc)/4, XMMTYPE_GPRREG, i) ) { !_recIsRegWritten(g_pCurInstInfo + 1, (s_nEndBlock - psxpc) / 4, XMMTYPE_GPRREG, i))
{
// check if will be written in the future // check if will be written in the future
xMOV(ptr32[&psxRegs.GPR.r[i]], g_psxConstRegs[i]); xMOV(ptr32[&psxRegs.GPR.r[i]], g_psxConstRegs[i]);
@ -356,7 +378,8 @@ void _psxFlushCachedRegs()
void _psxFlushConstReg(int reg) void _psxFlushConstReg(int reg)
{ {
if( PSX_IS_CONST1( reg ) && !(g_psxFlushedConstReg&(1<<reg)) ) { if (PSX_IS_CONST1(reg) && !(g_psxFlushedConstReg & (1 << reg)))
{
xMOV(ptr32[&psxRegs.GPR.r[reg]], g_psxConstRegs[reg]); xMOV(ptr32[&psxRegs.GPR.r[reg]], g_psxConstRegs[reg]);
g_psxFlushedConstReg |= (1 << reg); g_psxFlushedConstReg |= (1 << reg);
} }
@ -369,10 +392,13 @@ void _psxFlushConstRegs()
// flush constants // flush constants
// ignore r0 // ignore r0
for(i = 1; i < 32; ++i) { for (i = 1; i < 32; ++i)
if( g_psxHasConstReg & (1<<i) ) { {
if (g_psxHasConstReg & (1 << i))
{
if( !(g_psxFlushedConstReg&(1<<i)) ) { if (!(g_psxFlushedConstReg & (1 << i)))
{
xMOV(ptr32[&psxRegs.GPR.r[i]], g_psxConstRegs[i]); xMOV(ptr32[&psxRegs.GPR.r[i]], g_psxConstRegs[i]);
g_psxFlushedConstReg |= 1 << i; g_psxFlushedConstReg |= 1 << i;
} }
@ -385,8 +411,10 @@ void _psxFlushConstRegs()
void _psxDeleteReg(int reg, int flush) void _psxDeleteReg(int reg, int flush)
{ {
if( !reg ) return; if (!reg)
if( flush && PSX_IS_CONST1(reg) ) { return;
if (flush && PSX_IS_CONST1(reg))
{
_psxFlushConstReg(reg); _psxFlushConstReg(reg);
return; return;
} }
@ -398,7 +426,8 @@ void _psxMoveGPRtoR(const xRegister32& to, int fromgpr)
{ {
if (PSX_IS_CONST1(fromgpr)) if (PSX_IS_CONST1(fromgpr))
xMOV(to, g_psxConstRegs[fromgpr]); xMOV(to, g_psxConstRegs[fromgpr]);
else { else
{
// check x86 // check x86
xMOV(to, ptr[&psxRegs.GPR.r[fromgpr]]); xMOV(to, ptr[&psxRegs.GPR.r[fromgpr]]);
} }
@ -437,7 +466,8 @@ void _psxFlushCall(int flushtype)
_freeX86reg(ecx); _freeX86reg(ecx);
_freeX86reg(edx); _freeX86reg(edx);
if ((flushtype & FLUSH_PC)/*&& !g_cpuFlushedPC*/) { if ((flushtype & FLUSH_PC) /*&& !g_cpuFlushedPC*/)
{
xMOV(ptr32[&psxRegs.pc], psxpc); xMOV(ptr32[&psxRegs.pc], psxpc);
//g_cpuFlushedPC = true; //g_cpuFlushedPC = true;
} }
@ -483,7 +513,8 @@ void _psxOnWriteReg(int reg)
// rd = rs op rt // rd = rs op rt
void psxRecompileCodeConst0(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode) void psxRecompileCodeConst0(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode)
{ {
if ( ! _Rd_ ) return; if (!_Rd_)
return;
// for now, don't support xmm // for now, don't support xmm
@ -491,19 +522,22 @@ void psxRecompileCodeConst0(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode,
_deleteX86reg(X86TYPE_PSX, _Rt_, 1); _deleteX86reg(X86TYPE_PSX, _Rt_, 1);
_deleteX86reg(X86TYPE_PSX, _Rd_, 0); _deleteX86reg(X86TYPE_PSX, _Rd_, 0);
if( PSX_IS_CONST2(_Rs_, _Rt_) ) { if (PSX_IS_CONST2(_Rs_, _Rt_))
{
PSX_SET_CONST(_Rd_); PSX_SET_CONST(_Rd_);
constcode(); constcode();
return; return;
} }
if( PSX_IS_CONST1(_Rs_) ) { if (PSX_IS_CONST1(_Rs_))
{
constscode(0); constscode(0);
PSX_DEL_CONST(_Rd_); PSX_DEL_CONST(_Rd_);
return; return;
} }
if( PSX_IS_CONST1(_Rt_) ) { if (PSX_IS_CONST1(_Rt_))
{
consttcode(0); consttcode(0);
PSX_DEL_CONST(_Rd_); PSX_DEL_CONST(_Rd_);
return; return;
@ -538,7 +572,8 @@ static void psxRecompileIrxImport()
xMOV(ptr32[&psxRegs.pc], psxpc); xMOV(ptr32[&psxRegs.pc], psxpc);
_psxFlushCall(FLUSH_NODESTROY); _psxFlushCall(FLUSH_NODESTROY);
if (SysTraceActive(IOP.Bios)) { if (SysTraceActive(IOP.Bios))
{
xPUSH((uptr)funcname); xPUSH((uptr)funcname);
xFastCall((void*)irxImportLog_rec, import_table, index); xFastCall((void*)irxImportLog_rec, import_table, index);
} }
@ -546,7 +581,8 @@ static void psxRecompileIrxImport()
if (debug) if (debug)
xFastCall((void*)debug); xFastCall((void*)debug);
if (hle) { if (hle)
{
xFastCall((void*)hle); xFastCall((void*)hle);
xTEST(eax, eax); xTEST(eax, eax);
xJNZ(iopDispatcherReg); xJNZ(iopDispatcherReg);
@ -556,7 +592,8 @@ static void psxRecompileIrxImport()
// rt = rs op imm16 // rt = rs op imm16
void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode) void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode)
{ {
if ( ! _Rt_ ) { if (!_Rt_)
{
// check for iop module import table magic // check for iop module import table magic
if (psxRegs.code >> 16 == 0x2400) if (psxRegs.code >> 16 == 0x2400)
psxRecompileIrxImport(); psxRecompileIrxImport();
@ -568,7 +605,8 @@ void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode)
_deleteX86reg(X86TYPE_PSX, _Rs_, 1); _deleteX86reg(X86TYPE_PSX, _Rs_, 1);
_deleteX86reg(X86TYPE_PSX, _Rt_, 0); _deleteX86reg(X86TYPE_PSX, _Rt_, 0);
if( PSX_IS_CONST1(_Rs_) ) { if (PSX_IS_CONST1(_Rs_))
{
PSX_SET_CONST(_Rt_); PSX_SET_CONST(_Rt_);
constcode(); constcode();
return; return;
@ -581,14 +619,16 @@ void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode)
// rd = rt op sa // rd = rt op sa
void psxRecompileCodeConst2(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode) void psxRecompileCodeConst2(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode)
{ {
if ( ! _Rd_ ) return; if (!_Rd_)
return;
// for now, don't support xmm // for now, don't support xmm
_deleteX86reg(X86TYPE_PSX, _Rt_, 1); _deleteX86reg(X86TYPE_PSX, _Rt_, 1);
_deleteX86reg(X86TYPE_PSX, _Rd_, 0); _deleteX86reg(X86TYPE_PSX, _Rd_, 0);
if( PSX_IS_CONST1(_Rt_) ) { if (PSX_IS_CONST1(_Rt_))
{
PSX_SET_CONST(_Rd_); PSX_SET_CONST(_Rd_);
constcode(); constcode();
return; return;
@ -604,22 +644,26 @@ void psxRecompileCodeConst3(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode,
_deleteX86reg(X86TYPE_PSX, _Rs_, 1); _deleteX86reg(X86TYPE_PSX, _Rs_, 1);
_deleteX86reg(X86TYPE_PSX, _Rt_, 1); _deleteX86reg(X86TYPE_PSX, _Rt_, 1);
if( LOHI ) { if (LOHI)
{
_deleteX86reg(X86TYPE_PSX, PSX_HI, 1); _deleteX86reg(X86TYPE_PSX, PSX_HI, 1);
_deleteX86reg(X86TYPE_PSX, PSX_LO, 1); _deleteX86reg(X86TYPE_PSX, PSX_LO, 1);
} }
if( PSX_IS_CONST2(_Rs_, _Rt_) ) { if (PSX_IS_CONST2(_Rs_, _Rt_))
{
constcode(); constcode();
return; return;
} }
if( PSX_IS_CONST1(_Rs_) ) { if (PSX_IS_CONST1(_Rs_))
{
constscode(0); constscode(0);
return; return;
} }
if( PSX_IS_CONST1(_Rt_) ) { if (PSX_IS_CONST1(_Rt_))
{
consttcode(0); consttcode(0);
return; return;
} }
@ -635,15 +679,18 @@ static const uint m_recBlockAllocSize =
static void recReserveCache() static void recReserveCache()
{ {
if (!recMem) recMem = new RecompiledCodeReserve(L"R3000A Recompiler Cache", _8mb); if (!recMem)
recMem = new RecompiledCodeReserve(L"R3000A Recompiler Cache", _8mb);
recMem->SetProfilerName("IOPrec"); recMem->SetProfilerName("IOPrec");
while (!recMem->IsOk()) while (!recMem->IsOk())
{ {
if (recMem->Reserve(GetVmMemory().MainMemory(), HostMemoryMap::IOPrecOffset, m_ConfiguredCacheReserve * _1mb) != NULL) break; if (recMem->Reserve(GetVmMemory().MainMemory(), HostMemoryMap::IOPrecOffset, m_ConfiguredCacheReserve * _1mb) != NULL)
break;
// If it failed, then try again (if possible): // If it failed, then try again (if possible):
if (m_ConfiguredCacheReserve < 4) break; if (m_ConfiguredCacheReserve < 4)
break;
m_ConfiguredCacheReserve /= 2; m_ConfiguredCacheReserve /= 2;
} }
@ -821,7 +868,8 @@ static __fi u32 psxRecClearMem(u32 pc)
int blockidx = recBlocks.Index(pc); int blockidx = recBlocks.Index(pc);
pxAssert(blockidx != -1); pxAssert(blockidx != -1);
while (BASEBLOCKEX* pexblock = recBlocks[blockidx - 1]) { while (BASEBLOCKEX* pexblock = recBlocks[blockidx - 1])
{
if (pexblock->startpc + pexblock->size * 4 <= lowerextent) if (pexblock->startpc + pexblock->size * 4 <= lowerextent)
break; break;
@ -831,7 +879,8 @@ static __fi u32 psxRecClearMem(u32 pc)
int toRemoveFirst = blockidx; int toRemoveFirst = blockidx;
while (BASEBLOCKEX* pexblock = recBlocks[blockidx]) { while (BASEBLOCKEX* pexblock = recBlocks[blockidx])
{
if (pexblock->startpc >= upperextent) if (pexblock->startpc >= upperextent)
break; break;
@ -841,14 +890,16 @@ static __fi u32 psxRecClearMem(u32 pc)
blockidx++; blockidx++;
} }
if(toRemoveFirst != blockidx) { if (toRemoveFirst != blockidx)
{
recBlocks.Remove(toRemoveFirst, (blockidx - 1)); recBlocks.Remove(toRemoveFirst, (blockidx - 1));
} }
blockidx = 0; blockidx = 0;
while (BASEBLOCKEX* pexblock = recBlocks[blockidx++]) while (BASEBLOCKEX* pexblock = recBlocks[blockidx++])
{ {
if (pc >= pexblock->startpc && pc < pexblock->startpc + pexblock->size * 4) { if (pc >= pexblock->startpc && pc < pexblock->startpc + pexblock->size * 4)
{
DevCon.Error("[IOP] Impossible block clearing failure"); DevCon.Error("[IOP] Impossible block clearing failure");
pxFailDev("[IOP] Impossible block clearing failure"); pxFailDev("[IOP] Impossible block clearing failure");
} }
@ -870,13 +921,15 @@ void psxSetBranchReg(u32 reg)
{ {
psxbranch = 1; psxbranch = 1;
if( reg != 0xffffffff ) { if (reg != 0xffffffff)
{
_allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE); _allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
_psxMoveGPRtoR(calleeSavedReg2d, reg); _psxMoveGPRtoR(calleeSavedReg2d, reg);
psxRecompileNextInstruction(1); psxRecompileNextInstruction(1);
if( x86regs[calleeSavedReg2d.GetId()].inuse ) { if (x86regs[calleeSavedReg2d.GetId()].inuse)
{
pxAssert(x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK); pxAssert(x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK);
xMOV(ptr32[&psxRegs.pc], calleeSavedReg2d); xMOV(ptr32[&psxRegs.pc], calleeSavedReg2d);
x86regs[calleeSavedReg2d.GetId()].inuse = 0; x86regs[calleeSavedReg2d.GetId()].inuse = 0;
@ -884,7 +937,8 @@ void psxSetBranchReg(u32 reg)
xOR(calleeSavedReg2d, calleeSavedReg2d); xOR(calleeSavedReg2d, calleeSavedReg2d);
#endif #endif
} }
else { else
{
xMOV(eax, ptr32[&g_recWriteback]); xMOV(eax, ptr32[&g_recWriteback]);
xMOV(ptr32[&psxRegs.pc], eax); xMOV(ptr32[&psxRegs.pc], eax);
@ -968,7 +1022,8 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch)
xFastCall((void*)iopEventTest); xFastCall((void*)iopEventTest);
if( newpc != 0xffffffff ) { if (newpc != 0xffffffff)
{
xCMP(ptr32[&psxRegs.pc], newpc); xCMP(ptr32[&psxRegs.pc], newpc);
xJNE(iopDispatcherReg); xJNE(iopDispatcherReg);
} }
@ -1047,14 +1102,17 @@ void psxDynarecCheckBreakpoint()
int bpFlags = psxIsBreakpointNeeded(pc); int bpFlags = psxIsBreakpointNeeded(pc);
bool hit = false; bool hit = false;
//check breakpoint at current pc //check breakpoint at current pc
if (bpFlags & 1) { if (bpFlags & 1)
{
auto cond = CBreakPoints::GetBreakPointCondition(BREAKPOINT_IOP, pc); auto cond = CBreakPoints::GetBreakPointCondition(BREAKPOINT_IOP, pc);
if (cond == NULL || cond->Evaluate()) { if (cond == NULL || cond->Evaluate())
{
hit = true; hit = true;
} }
} }
//check breakpoint in delay slot //check breakpoint in delay slot
if (bpFlags & 2) { if (bpFlags & 2)
{
auto cond = CBreakPoints::GetBreakPointCondition(BREAKPOINT_IOP, pc + 4); auto cond = CBreakPoints::GetBreakPointCondition(BREAKPOINT_IOP, pc + 4);
if (cond == NULL || cond->Evaluate()) if (cond == NULL || cond->Evaluate())
hit = true; hit = true;
@ -1129,11 +1187,13 @@ void psxRecMemcheck(u32 op, u32 bits, bool store)
xForwardJGE8 next2; // if start >= address+size then goto next2 xForwardJGE8 next2; // if start >= address+size then goto next2
// hit the breakpoint // hit the breakpoint
if (checks[i].result & MEMCHECK_LOG) { if (checks[i].result & MEMCHECK_LOG)
{
xMOV(edx, store); xMOV(edx, store);
xFastCall((void*)psxDynarecMemLogcheck, ecx, edx); xFastCall((void*)psxDynarecMemLogcheck, ecx, edx);
} }
if (checks[i].result & MEMCHECK_BREAK) { if (checks[i].result & MEMCHECK_BREAK)
{
xFastCall((void*)psxDynarecMemcheck); xFastCall((void*)psxDynarecMemcheck);
} }
@ -1169,21 +1229,11 @@ void psxEncodeMemcheck()
bool store = (opcode.flags & IS_STORE) != 0; bool store = (opcode.flags & IS_STORE) != 0;
switch (opcode.flags & MEMTYPE_MASK) switch (opcode.flags & MEMTYPE_MASK)
{ {
case MEMTYPE_BYTE: case MEMTYPE_BYTE: psxRecMemcheck(op, 8, store); break;
psxRecMemcheck(op, 8, store); case MEMTYPE_HALF: psxRecMemcheck(op, 16, store); break;
break; case MEMTYPE_WORD: psxRecMemcheck(op, 32, store); break;
case MEMTYPE_HALF: case MEMTYPE_DWORD: psxRecMemcheck(op, 64, store); break;
psxRecMemcheck(op, 16, store); case MEMTYPE_QWORD: psxRecMemcheck(op, 128, store); break;
break;
case MEMTYPE_WORD:
psxRecMemcheck(op, 32, store);
break;
case MEMTYPE_DWORD:
psxRecMemcheck(op, 64, store);
break;
case MEMTYPE_QWORD:
psxRecMemcheck(op, 128, store);
break;
} }
} }
@ -1199,7 +1249,8 @@ void psxRecompileNextInstruction(int delayslot)
psxEncodeMemcheck(); psxEncodeMemcheck();
} }
if( IsDebugBuild ) { if (IsDebugBuild)
{
xNOP(); xNOP();
xMOV(eax, psxpc); xMOV(eax, psxpc);
} }
@ -1232,7 +1283,8 @@ static void __fastcall PreBlockCheck( u32 blockpc )
{ {
curcount++; curcount++;
if( curcount > skip ) { if (curcount > skip)
{
iDumpPsxRegisters(blockpc, 1); iDumpPsxRegisters(blockpc, 1);
curcount = 0; curcount = 0;
} }
@ -1248,8 +1300,10 @@ static void __fastcall iopRecRecompile( const u32 startpc )
u32 willbranch3 = 0; u32 willbranch3 = 0;
// Inject IRX hack // Inject IRX hack
if (startpc == 0x1630 && g_Conf->CurrentIRX.Length() > 3) { if (startpc == 0x1630 && g_Conf->CurrentIRX.Length() > 3)
if (iopMemRead32(0x20018) == 0x1F) { {
if (iopMemRead32(0x20018) == 0x1F)
{
// FIXME do I need to increase the module count (0x1F -> 0x20) // FIXME do I need to increase the module count (0x1F -> 0x20)
iopMemWrite32(0x20094, 0xbffc0000); iopMemWrite32(0x20094, 0xbffc0000);
} }
@ -1264,7 +1318,8 @@ static void __fastcall iopRecRecompile( const u32 startpc )
pxAssert(startpc); pxAssert(startpc);
// if recPtr reached the mem limit reset whole mem // if recPtr reached the mem limit reset whole mem
if (recPtr >= (recMem->GetPtrEnd() - _64kb)) { if (recPtr >= (recMem->GetPtrEnd() - _64kb))
{
recResetIOP(); recResetIOP();
} }
@ -1293,7 +1348,8 @@ static void __fastcall iopRecRecompile( const u32 startpc )
_initX86regs(); _initX86regs();
if ((psxHu32(HW_ICFG) & 8) && (HWADDR(startpc) == 0xa0 || HWADDR(startpc) == 0xb0 || HWADDR(startpc) == 0xc0)) { if ((psxHu32(HW_ICFG) & 8) && (HWADDR(startpc) == 0xa0 || HWADDR(startpc) == 0xb0 || HWADDR(startpc) == 0xc0))
{
xFastCall((void*)psxBiosCall); xFastCall((void*)psxBiosCall);
xTEST(al, al); xTEST(al, al);
xJNZ(iopDispatcherReg); xJNZ(iopDispatcherReg);
@ -1309,11 +1365,13 @@ static void __fastcall iopRecRecompile( const u32 startpc )
s_nEndBlock = 0xffffffff; s_nEndBlock = 0xffffffff;
s_branchTo = -1; s_branchTo = -1;
while(1) { while (1)
{
BASEBLOCK* pblock = PSX_GETBLOCK(i); BASEBLOCK* pblock = PSX_GETBLOCK(i);
if (i != startpc if (i != startpc
&& pblock->GetFnptr() != (uptr)iopJITCompile && pblock->GetFnptr() != (uptr)iopJITCompile
&& pblock->GetFnptr() != (uptr)iopJITCompileInBlock) { && pblock->GetFnptr() != (uptr)iopJITCompileInBlock)
{
// branch = 3 // branch = 3
willbranch3 = 1; willbranch3 = 1;
s_nEndBlock = i; s_nEndBlock = i;
@ -1322,26 +1380,26 @@ static void __fastcall iopRecRecompile( const u32 startpc )
psxRegs.code = iopMemRead32(i); psxRegs.code = iopMemRead32(i);
switch(psxRegs.code >> 26) { switch (psxRegs.code >> 26)
{
case 0: // special case 0: // special
if (_Funct_ == 8 || _Funct_ == 9)
if( _Funct_ == 8 || _Funct_ == 9 ) { // JR, JALR { // JR, JALR
s_nEndBlock = i + 8; s_nEndBlock = i + 8;
goto StartRecomp; goto StartRecomp;
} }
break; break;
case 1: // regimm case 1: // regimm
if (_Rt_ == 0 || _Rt_ == 1 || _Rt_ == 16 || _Rt_ == 17)
if( _Rt_ == 0 || _Rt_ == 1 || _Rt_ == 16 || _Rt_ == 17 ) { {
s_branchTo = _Imm_ * 4 + i + 4; s_branchTo = _Imm_ * 4 + i + 4;
if( s_branchTo > startpc && s_branchTo < i ) s_nEndBlock = s_branchTo; if (s_branchTo > startpc && s_branchTo < i)
else s_nEndBlock = i+8; s_nEndBlock = s_branchTo;
else
s_nEndBlock = i + 8;
goto StartRecomp; goto StartRecomp;
} }
break; break;
case 2: // J case 2: // J
@ -1352,11 +1410,11 @@ static void __fastcall iopRecRecompile( const u32 startpc )
// branches // branches
case 4: case 5: case 6: case 7: case 4: case 5: case 6: case 7:
s_branchTo = _Imm_ * 4 + i + 4; s_branchTo = _Imm_ * 4 + i + 4;
if( s_branchTo > startpc && s_branchTo < i ) s_nEndBlock = s_branchTo; if (s_branchTo > startpc && s_branchTo < i)
else s_nEndBlock = i+8; s_nEndBlock = s_branchTo;
else
s_nEndBlock = i + 8;
goto StartRecomp; goto StartRecomp;
} }
@ -1366,11 +1424,15 @@ static void __fastcall iopRecRecompile( const u32 startpc )
StartRecomp: StartRecomp:
s_nBlockFF = false; s_nBlockFF = false;
if (s_branchTo == startpc) { if (s_branchTo == startpc)
{
s_nBlockFF = true; s_nBlockFF = true;
for (i = startpc; i < s_nEndBlock; i += 4) { for (i = startpc; i < s_nEndBlock; i += 4)
if (i != s_nEndBlock - 8) { {
switch (iopMemRead32(i)) { if (i != s_nEndBlock - 8)
{
switch (iopMemRead32(i))
{
case 0: // nop case 0: // nop
break; break;
default: default:
@ -1384,7 +1446,8 @@ StartRecomp:
{ {
EEINST* pcur; EEINST* pcur;
if( s_nInstCacheSize < (s_nEndBlock-startpc)/4+1 ) { if (s_nInstCacheSize < (s_nEndBlock - startpc) / 4 + 1)
{
free(s_pInstCache); free(s_pInstCache);
s_nInstCacheSize = (s_nEndBlock - startpc) / 4 + 10; s_nInstCacheSize = (s_nEndBlock - startpc) / 4 + 10;
s_pInstCache = (EEINST*)malloc(sizeof(EEINST) * s_nInstCacheSize); s_pInstCache = (EEINST*)malloc(sizeof(EEINST) * s_nInstCacheSize);
@ -1395,7 +1458,8 @@ StartRecomp:
_recClearInst(pcur); _recClearInst(pcur);
pcur->info = 0; pcur->info = 0;
for(i = s_nEndBlock; i > startpc; i -= 4 ) { for (i = s_nEndBlock; i > startpc; i -= 4)
{
psxRegs.code = iopMemRead32(i - 4); psxRegs.code = iopMemRead32(i - 4);
pcur[-1] = pcur[0]; pcur[-1] = pcur[0];
rpsxpropBSC(pcur - 1, pcur); rpsxpropBSC(pcur - 1, pcur);
@ -1406,8 +1470,10 @@ StartRecomp:
// dump code // dump code
if (IsDebugBuild) if (IsDebugBuild)
{ {
for(i = 0; i < ArraySize(s_psxrecblocks); ++i) { for (i = 0; i < ArraySize(s_psxrecblocks); ++i)
if( startpc == s_psxrecblocks[i] ) { {
if (startpc == s_psxrecblocks[i])
{
iIopDumpBlock(startpc, recPtr); iIopDumpBlock(startpc, recPtr);
} }
} }
@ -1417,7 +1483,8 @@ StartRecomp:
} }
g_pCurInstInfo = s_pInstCache; g_pCurInstInfo = s_pInstCache;
while (!psxbranch && psxpc < s_nEndBlock) { while (!psxbranch && psxpc < s_nEndBlock)
{
psxRecompileNextInstruction(0); psxRecompileNextInstruction(0);
} }
@ -1427,7 +1494,8 @@ StartRecomp:
pxAssert((psxpc - startpc) >> 2 <= 0xffff); pxAssert((psxpc - startpc) >> 2 <= 0xffff);
s_pCurBlockEx->size = (psxpc - startpc) >> 2; s_pCurBlockEx->size = (psxpc - startpc) >> 2;
for(i = 1; i < (u32)s_pCurBlockEx->size; ++i) { for (i = 1; i < (u32)s_pCurBlockEx->size; ++i)
{
if (s_pCurBlock[i].GetFnptr() == (uptr)iopJITCompile) if (s_pCurBlock[i].GetFnptr() == (uptr)iopJITCompile)
s_pCurBlock[i].SetFnptr((uptr)iopJITCompileInBlock); s_pCurBlock[i].SetFnptr((uptr)iopJITCompileInBlock);
} }
@ -1435,22 +1503,26 @@ StartRecomp:
if (!(psxpc & 0x10000000)) if (!(psxpc & 0x10000000))
g_psxMaxRecMem = std::max((psxpc & ~0xa0000000), g_psxMaxRecMem); g_psxMaxRecMem = std::max((psxpc & ~0xa0000000), g_psxMaxRecMem);
if( psxbranch == 2 ) { if (psxbranch == 2)
{
_psxFlushCall(FLUSH_EVERYTHING); _psxFlushCall(FLUSH_EVERYTHING);
iPsxBranchTest(0xffffffff, 1); iPsxBranchTest(0xffffffff, 1);
JMP32((uptr)iopDispatcherReg - ((uptr)x86Ptr + 5)); JMP32((uptr)iopDispatcherReg - ((uptr)x86Ptr + 5));
} }
else { else
if( psxbranch ) pxAssert( !willbranch3 ); {
if (psxbranch)
pxAssert(!willbranch3);
else else
{ {
xADD(ptr32[&psxRegs.cycle], psxScaleBlockCycles()); xADD(ptr32[&psxRegs.cycle], psxScaleBlockCycles());
xSUB(ptr32[&iopCycleEE], psxScaleBlockCycles() * 8); xSUB(ptr32[&iopCycleEE], psxScaleBlockCycles() * 8);
} }
if (willbranch3 || !psxbranch) { if (willbranch3 || !psxbranch)
{
pxAssert(psxpc == s_nEndBlock); pxAssert(psxpc == s_nEndBlock);
_psxFlushCall(FLUSH_EVERYTHING); _psxFlushCall(FLUSH_EVERYTHING);
xMOV(ptr32[&psxRegs.pc], psxpc); xMOV(ptr32[&psxRegs.pc], psxpc);
@ -1495,4 +1567,3 @@ R3000Acpu psxRec = {
recGetCacheReserve, recGetCacheReserve,
recSetCacheReserve recSetCacheReserve
}; };

View File

@ -70,15 +70,19 @@ extern void psxRecompileNextInstruction(int delayslot);
#define PSX_IS_CONST1(reg) ((reg) < 32 && (g_psxHasConstReg & (1 << (reg)))) #define PSX_IS_CONST1(reg) ((reg) < 32 && (g_psxHasConstReg & (1 << (reg))))
#define PSX_IS_CONST2(reg1, reg2) ((g_psxHasConstReg & (1 << (reg1))) && (g_psxHasConstReg & (1 << (reg2)))) #define PSX_IS_CONST2(reg1, reg2) ((g_psxHasConstReg & (1 << (reg1))) && (g_psxHasConstReg & (1 << (reg2))))
#define PSX_SET_CONST(reg) { \ #define PSX_SET_CONST(reg) \
if( (reg) < 32 ) { \ { \
if ((reg) < 32) \
{ \
g_psxHasConstReg |= (1 << (reg)); \ g_psxHasConstReg |= (1 << (reg)); \
g_psxFlushedConstReg &= ~(1 << (reg)); \ g_psxFlushedConstReg &= ~(1 << (reg)); \
} \ } \
} }
#define PSX_DEL_CONST(reg) { \ #define PSX_DEL_CONST(reg) \
if( (reg) < 32 ) g_psxHasConstReg &= ~(1<<(reg)); \ { \
if ((reg) < 32) \
g_psxHasConstReg &= ~(1 << (reg)); \
} }
extern u32 g_psxConstRegs[32]; extern u32 g_psxConstRegs[32];

View File

@ -29,7 +29,8 @@ extern u32 g_psxMaxRecMem;
// R3000A instruction implementation // R3000A instruction implementation
#define REC_FUNC(f) \ #define REC_FUNC(f) \
static void rpsx##f() { \ static void rpsx##f() \
{ \
xMOV(ptr32[&psxRegs.code], (u32)psxRegs.code); \ xMOV(ptr32[&psxRegs.code], (u32)psxRegs.code); \
_psxFlushCall(FLUSH_EVERYTHING); \ _psxFlushCall(FLUSH_EVERYTHING); \
xFastCall((void*)(uptr)psx##f); \ xFastCall((void*)(uptr)psx##f); \
@ -39,7 +40,8 @@ static void rpsx##f() { \
// Same as above but with a different naming convension (to avoid various rename) // Same as above but with a different naming convension (to avoid various rename)
#define REC_GTE_FUNC(f) \ #define REC_GTE_FUNC(f) \
static void rgte##f() { \ static void rgte##f() \
{ \
xMOV(ptr32[&psxRegs.code], (u32)psxRegs.code); \ xMOV(ptr32[&psxRegs.code], (u32)psxRegs.code); \
_psxFlushCall(FLUSH_EVERYTHING); \ _psxFlushCall(FLUSH_EVERYTHING); \
xFastCall((void*)(uptr)gte##f); \ xFastCall((void*)(uptr)gte##f); \
@ -61,16 +63,22 @@ void rpsxADDIU_const()
// adds a constant to sreg and puts into dreg // adds a constant to sreg and puts into dreg
void rpsxADDconst(int dreg, int sreg, u32 off, int info) void rpsxADDconst(int dreg, int sreg, u32 off, int info)
{ {
if (sreg) { if (sreg)
if (sreg == dreg) { {
if (sreg == dreg)
{
xADD(ptr32[&psxRegs.GPR.r[dreg]], off); xADD(ptr32[&psxRegs.GPR.r[dreg]], off);
} else { }
else
{
xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]); xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]);
if (off) xADD(eax, off); if (off)
xADD(eax, off);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax); xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
} }
} }
else { else
{
xMOV(ptr32[&psxRegs.GPR.r[dreg]], off); xMOV(ptr32[&psxRegs.GPR.r[dreg]], off);
} }
} }
@ -78,7 +86,8 @@ void rpsxADDconst(int dreg, int sreg, u32 off, int info)
void rpsxADDIU_(int info) void rpsxADDIU_(int info)
{ {
// Rt = Rs + Im // Rt = Rs + Im
if (!_Rt_) return; if (!_Rt_)
return;
rpsxADDconst(_Rt_, _Rs_, _Imm_, info); rpsxADDconst(_Rt_, _Rs_, _Imm_, info);
} }
@ -130,15 +139,21 @@ void rpsxANDI_const()
void rpsxANDconst(int info, int dreg, int sreg, u32 imm) void rpsxANDconst(int info, int dreg, int sreg, u32 imm)
{ {
if (imm) { if (imm)
if (sreg == dreg) { {
if (sreg == dreg)
{
xAND(ptr32[&psxRegs.GPR.r[dreg]], imm); xAND(ptr32[&psxRegs.GPR.r[dreg]], imm);
} else { }
else
{
xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]); xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]);
xAND(eax, imm); xAND(eax, imm);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax); xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
} }
} else { }
else
{
xMOV(ptr32[&psxRegs.GPR.r[dreg]], 0); xMOV(ptr32[&psxRegs.GPR.r[dreg]], 0);
} }
} }
@ -155,18 +170,23 @@ void rpsxORI_const()
void rpsxORconst(int info, int dreg, int sreg, u32 imm) void rpsxORconst(int info, int dreg, int sreg, u32 imm)
{ {
if (imm) { if (imm)
if (sreg == dreg) { {
if (sreg == dreg)
{
xOR(ptr32[&psxRegs.GPR.r[dreg]], imm); xOR(ptr32[&psxRegs.GPR.r[dreg]], imm);
} }
else { else
{
xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]); xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]);
xOR(eax, imm); xOR(eax, imm);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax); xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
} }
} }
else { else
if( dreg != sreg ) { {
if (dreg != sreg)
{
xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx); xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
} }
@ -184,29 +204,37 @@ void rpsxXORI_const()
void rpsxXORconst(int info, int dreg, int sreg, u32 imm) void rpsxXORconst(int info, int dreg, int sreg, u32 imm)
{ {
if( imm == 0xffffffff ) { if (imm == 0xffffffff)
if( dreg == sreg ) { {
if (dreg == sreg)
{
xNOT(ptr32[&psxRegs.GPR.r[dreg]]); xNOT(ptr32[&psxRegs.GPR.r[dreg]]);
} }
else { else
{
xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
xNOT(ecx); xNOT(ecx);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx); xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
} }
} }
else if (imm) { else if (imm)
{
if (sreg == dreg) { if (sreg == dreg)
{
xXOR(ptr32[&psxRegs.GPR.r[dreg]], imm); xXOR(ptr32[&psxRegs.GPR.r[dreg]], imm);
} }
else { else
{
xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]); xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]);
xXOR(eax, imm); xXOR(eax, imm);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax); xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
} }
} }
else { else
if( dreg != sreg ) { {
if (dreg != sreg)
{
xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx); xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
} }
@ -219,7 +247,8 @@ PSXRECOMPILE_CONSTCODE1(XORI);
void rpsxLUI() void rpsxLUI()
{ {
if(!_Rt_) return; if (!_Rt_)
return;
_psxOnWriteReg(_Rt_); _psxOnWriteReg(_Rt_);
_psxDeleteReg(_Rt_, 0); _psxDeleteReg(_Rt_, 0);
PSX_SET_CONST(_Rt_); PSX_SET_CONST(_Rt_);
@ -240,14 +269,21 @@ void rpsxADDU_constt(int info)
void rpsxADDU_(int info) void rpsxADDU_(int info)
{ {
if (_Rs_ && _Rt_) { if (_Rs_ && _Rt_)
{
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
xADD(eax, ptr32[&psxRegs.GPR.r[_Rt_]]); xADD(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
} else if (_Rs_) { }
else if (_Rs_)
{
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
} else if (_Rt_) { }
else if (_Rt_)
{
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
} else { }
else
{
xXOR(eax, eax); xXOR(eax, eax);
} }
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
@ -275,13 +311,16 @@ void rpsxSUBU_constt(int info) { rpsxADDconst(_Rd_, _Rs_, -(int)g_psxConstRegs[_
void rpsxSUBU_(int info) void rpsxSUBU_(int info)
{ {
// Rd = Rs - Rt // Rd = Rs - Rt
if (!_Rd_) return; if (!_Rd_)
return;
if( _Rd_ == _Rs_ ) { if (_Rd_ == _Rs_)
{
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xSUB(ptr32[&psxRegs.GPR.r[_Rd_]], eax); xSUB(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
} }
else { else
{
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
xSUB(eax, ptr32[&psxRegs.GPR.r[_Rt_]]); xSUB(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
@ -294,7 +333,8 @@ void rpsxSUB() { rpsxSUBU(); }
void rpsxLogicalOp(int info, int op) void rpsxLogicalOp(int info, int op)
{ {
if( _Rd_ == _Rs_ || _Rd_ == _Rt_ ) { if (_Rd_ == _Rs_ || _Rd_ == _Rt_)
{
int vreg = _Rd_ == _Rs_ ? _Rt_ : _Rs_; int vreg = _Rd_ == _Rs_ ? _Rt_ : _Rs_;
xMOV(ecx, ptr32[&psxRegs.GPR.r[vreg]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[vreg]]);
@ -309,7 +349,8 @@ void rpsxLogicalOp(int info, int op)
if (op == 3) if (op == 3)
xNOT(ptr32[&psxRegs.GPR.r[_Rd_]]); xNOT(ptr32[&psxRegs.GPR.r[_Rd_]]);
} }
else { else
{
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
switch (op) { switch (op) {
@ -368,23 +409,29 @@ void rpsxNOR_const()
void rpsxNORconst(int info, int dreg, int sreg, u32 imm) void rpsxNORconst(int info, int dreg, int sreg, u32 imm)
{ {
if( imm ) { if (imm)
if( dreg == sreg ) { {
if (dreg == sreg)
{
xOR(ptr32[&psxRegs.GPR.r[dreg]], imm); xOR(ptr32[&psxRegs.GPR.r[dreg]], imm);
xNOT(ptr32[&psxRegs.GPR.r[dreg]]); xNOT(ptr32[&psxRegs.GPR.r[dreg]]);
} }
else { else
{
xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
xOR(ecx, imm); xOR(ecx, imm);
xNOT(ecx); xNOT(ecx);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx); xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
} }
} }
else { else
if( dreg == sreg ) { {
if (dreg == sreg)
{
xNOT(ptr32[&psxRegs.GPR.r[dreg]]); xNOT(ptr32[&psxRegs.GPR.r[dreg]]);
} }
else { else
{
xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
xNOT(ecx); xNOT(ecx);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx); xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
@ -442,7 +489,8 @@ void rpsxSLTU_constt(int info) { rpsxSLTUconst(info, _Rd_, _Rs_, g_psxConstRegs[
void rpsxSLTU_(int info) void rpsxSLTU_(int info)
{ {
// Rd = Rs < Rt (unsigned) // Rd = Rs < Rt (unsigned)
if (!_Rd_) return; if (!_Rd_)
return;
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
xCMP(eax, ptr32[&psxRegs.GPR.r[_Rt_]]); xCMP(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
@ -466,8 +514,10 @@ void rpsxMULTsuperconst(int info, int sreg, int imm, int sign)
{ {
// Lo/Hi = Rs * Rt (signed) // Lo/Hi = Rs * Rt (signed)
xMOV(eax, imm); xMOV(eax, imm);
if( sign ) xMUL(ptr32[&psxRegs.GPR.r[sreg]]); if (sign)
else xUMUL(ptr32[&psxRegs.GPR.r[sreg]]); xMUL(ptr32[&psxRegs.GPR.r[sreg]]);
else
xUMUL(ptr32[&psxRegs.GPR.r[sreg]]);
xMOV(ptr32[&psxRegs.GPR.n.lo], eax); xMOV(ptr32[&psxRegs.GPR.n.lo], eax);
xMOV(ptr32[&psxRegs.GPR.n.hi], edx); xMOV(ptr32[&psxRegs.GPR.n.hi], edx);
} }
@ -476,8 +526,10 @@ void rpsxMULTsuper(int info, int sign)
{ {
// Lo/Hi = Rs * Rt (signed) // Lo/Hi = Rs * Rt (signed)
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
if( sign ) xMUL(ptr32[&psxRegs.GPR.r[_Rt_]]); if (sign)
else xUMUL(ptr32[&psxRegs.GPR.r[_Rt_]]); xMUL(ptr32[&psxRegs.GPR.r[_Rt_]]);
else
xUMUL(ptr32[&psxRegs.GPR.r[_Rt_]]);
xMOV(ptr32[&psxRegs.GPR.n.lo], eax); xMOV(ptr32[&psxRegs.GPR.n.lo], eax);
xMOV(ptr32[&psxRegs.GPR.n.hi], edx); xMOV(ptr32[&psxRegs.GPR.n.hi], edx);
} }
@ -515,22 +567,29 @@ void rpsxDIV_const()
* Quotient: 0x80000000 (-2147483648), and remainder: 0x00000000 (0) * Quotient: 0x80000000 (-2147483648), and remainder: 0x00000000 (0)
*/ */
// Of course x86 cpu does overflow ! // Of course x86 cpu does overflow !
if (g_psxConstRegs[_Rs_] == 0x80000000u && g_psxConstRegs[_Rt_] == 0xFFFFFFFFu) { if (g_psxConstRegs[_Rs_] == 0x80000000u && g_psxConstRegs[_Rt_] == 0xFFFFFFFFu)
{
xMOV(ptr32[&psxRegs.GPR.n.hi], 0); xMOV(ptr32[&psxRegs.GPR.n.hi], 0);
xMOV(ptr32[&psxRegs.GPR.n.lo], 0x80000000); xMOV(ptr32[&psxRegs.GPR.n.lo], 0x80000000);
return; return;
} }
if (g_psxConstRegs[_Rt_] != 0) { if (g_psxConstRegs[_Rt_] != 0)
{
lo = *(int*)&g_psxConstRegs[_Rs_] / *(int*)&g_psxConstRegs[_Rt_]; lo = *(int*)&g_psxConstRegs[_Rs_] / *(int*)&g_psxConstRegs[_Rt_];
hi = *(int*)&g_psxConstRegs[_Rs_] % *(int*)&g_psxConstRegs[_Rt_]; hi = *(int*)&g_psxConstRegs[_Rs_] % *(int*)&g_psxConstRegs[_Rt_];
xMOV(ptr32[&psxRegs.GPR.n.hi], hi); xMOV(ptr32[&psxRegs.GPR.n.hi], hi);
xMOV(ptr32[&psxRegs.GPR.n.lo], lo); xMOV(ptr32[&psxRegs.GPR.n.lo], lo);
} else { }
else
{
xMOV(ptr32[&psxRegs.GPR.n.hi], g_psxConstRegs[_Rs_]); xMOV(ptr32[&psxRegs.GPR.n.hi], g_psxConstRegs[_Rs_]);
if (g_psxConstRegs[_Rs_] & 0x80000000u) { if (g_psxConstRegs[_Rs_] & 0x80000000u)
{
xMOV(ptr32[&psxRegs.GPR.n.lo], 0x1); xMOV(ptr32[&psxRegs.GPR.n.lo], 0x1);
} else { }
else
{
xMOV(ptr32[&psxRegs.GPR.n.lo], 0xFFFFFFFFu); xMOV(ptr32[&psxRegs.GPR.n.lo], 0xFFFFFFFFu);
} }
} }
@ -581,16 +640,19 @@ void rpsxDIVsuper(int info, int sign, int process = 0)
// Normal division // Normal division
x86SetJ8(cont3); x86SetJ8(cont3);
if( sign ) { if (sign)
{
xCDQ(); xCDQ();
xDIV(ecx); xDIV(ecx);
} }
else { else
{
xXOR(edx, edx); xXOR(edx, edx);
xUDIV(ecx); xUDIV(ecx);
} }
if (sign) x86SetJ8( end1 ); if (sign)
x86SetJ8(end1);
x86SetJ8(end2); x86SetJ8(end2);
xMOV(ptr32[&psxRegs.GPR.n.lo], eax); xMOV(ptr32[&psxRegs.GPR.n.lo], eax);
@ -608,12 +670,15 @@ void rpsxDIVU_const()
{ {
u32 lo, hi; u32 lo, hi;
if (g_psxConstRegs[_Rt_] != 0) { if (g_psxConstRegs[_Rt_] != 0)
{
lo = g_psxConstRegs[_Rs_] / g_psxConstRegs[_Rt_]; lo = g_psxConstRegs[_Rs_] / g_psxConstRegs[_Rt_];
hi = g_psxConstRegs[_Rs_] % g_psxConstRegs[_Rt_]; hi = g_psxConstRegs[_Rs_] % g_psxConstRegs[_Rt_];
xMOV(ptr32[&psxRegs.GPR.n.hi], hi); xMOV(ptr32[&psxRegs.GPR.n.hi], hi);
xMOV(ptr32[&psxRegs.GPR.n.lo], lo); xMOV(ptr32[&psxRegs.GPR.n.lo], lo);
} else { }
else
{
xMOV(ptr32[&psxRegs.GPR.n.hi], g_psxConstRegs[_Rs_]); xMOV(ptr32[&psxRegs.GPR.n.hi], g_psxConstRegs[_Rs_]);
xMOV(ptr32[&psxRegs.GPR.n.lo], 0xFFFFFFFFu); xMOV(ptr32[&psxRegs.GPR.n.lo], 0xFFFFFFFFu);
} }
@ -640,9 +705,11 @@ static void rpsxLB()
_psxDeleteReg(_Rt_, 0); _psxDeleteReg(_Rt_, 0);
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_)
xADD(ecx, _Imm_);
xFastCall((void*)iopMemRead8, ecx); // returns value in EAX xFastCall((void*)iopMemRead8, ecx); // returns value in EAX
if (_Rt_) { if (_Rt_)
{
xMOVSX(eax, al); xMOVSX(eax, al);
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
} }
@ -656,9 +723,11 @@ static void rpsxLBU()
_psxDeleteReg(_Rt_, 0); _psxDeleteReg(_Rt_, 0);
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_)
xADD(ecx, _Imm_);
xFastCall((void*)iopMemRead8, ecx); // returns value in EAX xFastCall((void*)iopMemRead8, ecx); // returns value in EAX
if (_Rt_) { if (_Rt_)
{
xMOVZX(eax, al); xMOVZX(eax, al);
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
} }
@ -672,9 +741,11 @@ static void rpsxLH()
_psxDeleteReg(_Rt_, 0); _psxDeleteReg(_Rt_, 0);
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_)
xADD(ecx, _Imm_);
xFastCall((void*)iopMemRead16, ecx); // returns value in EAX xFastCall((void*)iopMemRead16, ecx); // returns value in EAX
if (_Rt_) { if (_Rt_)
{
xMOVSX(eax, ax); xMOVSX(eax, ax);
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
} }
@ -688,9 +759,11 @@ static void rpsxLHU()
_psxDeleteReg(_Rt_, 0); _psxDeleteReg(_Rt_, 0);
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_)
xADD(ecx, _Imm_);
xFastCall((void*)iopMemRead16, ecx); // returns value in EAX xFastCall((void*)iopMemRead16, ecx); // returns value in EAX
if (_Rt_) { if (_Rt_)
{
xMOVZX(eax, ax); xMOVZX(eax, ax);
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
} }
@ -705,13 +778,15 @@ static void rpsxLW()
_psxFlushCall(FLUSH_EVERYTHING); _psxFlushCall(FLUSH_EVERYTHING);
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_)
xADD(ecx, _Imm_);
xTEST(ecx, 0x10000000); xTEST(ecx, 0x10000000);
j8Ptr[0] = JZ8(0); j8Ptr[0] = JZ8(0);
xFastCall((void*)iopMemRead32, ecx); // returns value in EAX xFastCall((void*)iopMemRead32, ecx); // returns value in EAX
if (_Rt_) { if (_Rt_)
{
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
} }
j8Ptr[1] = JMP8(0); j8Ptr[1] = JMP8(0);
@ -721,7 +796,8 @@ static void rpsxLW()
xAND(ecx, 0x1fffff); xAND(ecx, 0x1fffff);
xMOV(ecx, ptr32[xComplexAddress(rax, iopMem->Main, rcx)]); xMOV(ecx, ptr32[xComplexAddress(rax, iopMem->Main, rcx)]);
if (_Rt_) { if (_Rt_)
{
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], ecx); xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], ecx);
} }
@ -735,7 +811,8 @@ static void rpsxSB()
_psxDeleteReg(_Rt_, 1); _psxDeleteReg(_Rt_, 1);
xMOV(arg1regd, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(arg1regd, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(arg1regd, _Imm_); if (_Imm_)
xADD(arg1regd, _Imm_);
xMOV(arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]]); xMOV(arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]]);
xFastCall((void*)iopMemWrite8, arg1regd, arg2regd); xFastCall((void*)iopMemWrite8, arg1regd, arg2regd);
} }
@ -746,7 +823,8 @@ static void rpsxSH()
_psxDeleteReg(_Rt_, 1); _psxDeleteReg(_Rt_, 1);
xMOV(arg1regd, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(arg1regd, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(arg1regd, _Imm_); if (_Imm_)
xADD(arg1regd, _Imm_);
xMOV(arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]]); xMOV(arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]]);
xFastCall((void*)iopMemWrite16, arg1regd, arg2regd); xFastCall((void*)iopMemWrite16, arg1regd, arg2regd);
} }
@ -757,7 +835,8 @@ static void rpsxSW()
_psxDeleteReg(_Rt_, 1); _psxDeleteReg(_Rt_, 1);
xMOV(arg1regd, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(arg1regd, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(arg1regd, _Imm_); if (_Imm_)
xADD(arg1regd, _Imm_);
xMOV(arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]]); xMOV(arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]]);
xFastCall((void*)iopMemWrite32, arg1regd, arg2regd); xFastCall((void*)iopMemWrite32, arg1regd, arg2regd);
} }
@ -772,17 +851,22 @@ void rpsxSLL_const()
void rpsxShiftConst(int info, int rdreg, int rtreg, int imm, int shifttype) void rpsxShiftConst(int info, int rdreg, int rtreg, int imm, int shifttype)
{ {
imm &= 0x1f; imm &= 0x1f;
if (imm) { if (imm)
if( rdreg == rtreg ) { {
switch(shifttype) { if (rdreg == rtreg)
{
switch (shifttype)
{
case 0: xSHL(ptr32[&psxRegs.GPR.r[rdreg]], imm); break; case 0: xSHL(ptr32[&psxRegs.GPR.r[rdreg]], imm); break;
case 1: xSHR(ptr32[&psxRegs.GPR.r[rdreg]], imm); break; case 1: xSHR(ptr32[&psxRegs.GPR.r[rdreg]], imm); break;
case 2: xSAR(ptr32[&psxRegs.GPR.r[rdreg]], imm); break; case 2: xSAR(ptr32[&psxRegs.GPR.r[rdreg]], imm); break;
} }
} }
else { else
{
xMOV(eax, ptr32[&psxRegs.GPR.r[rtreg]]); xMOV(eax, ptr32[&psxRegs.GPR.r[rtreg]]);
switch(shifttype) { switch (shifttype)
{
case 0: xSHL(eax, imm); break; case 0: xSHL(eax, imm); break;
case 1: xSHR(eax, imm); break; case 1: xSHR(eax, imm); break;
case 2: xSAR(eax, imm); break; case 2: xSAR(eax, imm); break;
@ -790,8 +874,10 @@ void rpsxShiftConst(int info, int rdreg, int rtreg, int imm, int shifttype)
xMOV(ptr32[&psxRegs.GPR.r[rdreg]], eax); xMOV(ptr32[&psxRegs.GPR.r[rdreg]], eax);
} }
} }
else { else
if( rdreg != rtreg ) { {
if (rdreg != rtreg)
{
xMOV(eax, ptr32[&psxRegs.GPR.r[rtreg]]); xMOV(eax, ptr32[&psxRegs.GPR.r[rtreg]]);
xMOV(ptr32[&psxRegs.GPR.r[rdreg]], eax); xMOV(ptr32[&psxRegs.GPR.r[rdreg]], eax);
} }
@ -834,7 +920,8 @@ void rpsxShiftVconstt(int info, int shifttype)
{ {
xMOV(eax, g_psxConstRegs[_Rt_]); xMOV(eax, g_psxConstRegs[_Rt_]);
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
switch(shifttype) { switch (shifttype)
{
case 0: xSHL(eax, cl); break; case 0: xSHL(eax, cl); break;
case 1: xSHR(eax, cl); break; case 1: xSHR(eax, cl); break;
case 2: xSAR(eax, cl); break; case 2: xSAR(eax, cl); break;
@ -895,7 +982,8 @@ extern void rpsxBREAK();
void rpsxMFHI() void rpsxMFHI()
{ {
if (!_Rd_) return; if (!_Rd_)
return;
_psxOnWriteReg(_Rd_); _psxOnWriteReg(_Rd_);
_psxDeleteReg(_Rd_, 0); _psxDeleteReg(_Rd_, 0);
@ -905,10 +993,12 @@ void rpsxMFHI()
void rpsxMTHI() void rpsxMTHI()
{ {
if( PSX_IS_CONST1(_Rs_) ) { if (PSX_IS_CONST1(_Rs_))
{
xMOV(ptr32[&psxRegs.GPR.n.hi], g_psxConstRegs[_Rs_]); xMOV(ptr32[&psxRegs.GPR.n.hi], g_psxConstRegs[_Rs_]);
} }
else { else
{
_psxDeleteReg(_Rs_, 1); _psxDeleteReg(_Rs_, 1);
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
xMOV(ptr32[&psxRegs.GPR.n.hi], eax); xMOV(ptr32[&psxRegs.GPR.n.hi], eax);
@ -917,7 +1007,8 @@ void rpsxMTHI()
void rpsxMFLO() void rpsxMFLO()
{ {
if (!_Rd_) return; if (!_Rd_)
return;
_psxOnWriteReg(_Rd_); _psxOnWriteReg(_Rd_);
_psxDeleteReg(_Rd_, 0); _psxDeleteReg(_Rd_, 0);
@ -927,10 +1018,12 @@ void rpsxMFLO()
void rpsxMTLO() void rpsxMTLO()
{ {
if( PSX_IS_CONST1(_Rs_) ) { if (PSX_IS_CONST1(_Rs_))
{
xMOV(ptr32[&psxRegs.GPR.n.lo], g_psxConstRegs[_Rs_]); xMOV(ptr32[&psxRegs.GPR.n.lo], g_psxConstRegs[_Rs_]);
} }
else { else
{
_psxDeleteReg(_Rs_, 1); _psxDeleteReg(_Rs_, 1);
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
xMOV(ptr32[&psxRegs.GPR.n.lo], eax); xMOV(ptr32[&psxRegs.GPR.n.lo], eax);
@ -976,16 +1069,17 @@ void rpsxJALR()
psxRecompileNextInstruction(1); psxRecompileNextInstruction(1);
if( x86regs[calleeSavedReg2d.GetId()].inuse ) { if (x86regs[calleeSavedReg2d.GetId()].inuse)
{
pxAssert(x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK); pxAssert(x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK);
xMOV(ptr32[&psxRegs.pc], calleeSavedReg2d); xMOV(ptr32[&psxRegs.pc], calleeSavedReg2d);
x86regs[calleeSavedReg2d.GetId()].inuse = 0; x86regs[calleeSavedReg2d.GetId()].inuse = 0;
#ifdef PCSX2_DEBUG #ifdef PCSX2_DEBUG
xOR(calleeSavedReg2d, calleeSavedReg2d); xOR(calleeSavedReg2d, calleeSavedReg2d);
#endif #endif
} }
else { else
{
xMOV(eax, ptr32[&g_recWriteback]); xMOV(eax, ptr32[&g_recWriteback]);
xMOV(ptr32[&psxRegs.pc], eax); xMOV(ptr32[&psxRegs.pc], eax);
#ifdef PCSX2_DEBUG #ifdef PCSX2_DEBUG
@ -1006,15 +1100,18 @@ static u32* s_pbranchjmp;
void rpsxSetBranchEQ(int info, int process) void rpsxSetBranchEQ(int info, int process)
{ {
if( process & PROCESS_CONSTS ) { if (process & PROCESS_CONSTS)
{
xCMP(ptr32[&psxRegs.GPR.r[_Rt_]], g_psxConstRegs[_Rs_]); xCMP(ptr32[&psxRegs.GPR.r[_Rt_]], g_psxConstRegs[_Rs_]);
s_pbranchjmp = JNE32(0); s_pbranchjmp = JNE32(0);
} }
else if( process & PROCESS_CONSTT ) { else if (process & PROCESS_CONSTT)
{
xCMP(ptr32[&psxRegs.GPR.r[_Rs_]], g_psxConstRegs[_Rt_]); xCMP(ptr32[&psxRegs.GPR.r[_Rs_]], g_psxConstRegs[_Rt_]);
s_pbranchjmp = JNE32(0); s_pbranchjmp = JNE32(0);
} }
else { else
{
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
xCMP(eax, ptr32[&psxRegs.GPR.r[_Rt_]]); xCMP(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
s_pbranchjmp = JNE32(0); s_pbranchjmp = JNE32(0);
@ -1124,7 +1221,8 @@ void rpsxBLTZ()
_psxFlushAllUnused(); _psxFlushAllUnused();
if( PSX_IS_CONST1(_Rs_) ) { if (PSX_IS_CONST1(_Rs_))
{
if ((int)g_psxConstRegs[_Rs_] >= 0) if ((int)g_psxConstRegs[_Rs_] >= 0)
branchTo = psxpc + 4; branchTo = psxpc + 4;
@ -1158,7 +1256,8 @@ void rpsxBGEZ()
_psxFlushAllUnused(); _psxFlushAllUnused();
if( PSX_IS_CONST1(_Rs_) ) { if (PSX_IS_CONST1(_Rs_))
{
if ((int)g_psxConstRegs[_Rs_] < 0) if ((int)g_psxConstRegs[_Rs_] < 0)
branchTo = psxpc + 4; branchTo = psxpc + 4;
@ -1198,7 +1297,8 @@ void rpsxBLTZAL()
PSX_SET_CONST(31); PSX_SET_CONST(31);
g_psxConstRegs[31] = psxpc + 4; g_psxConstRegs[31] = psxpc + 4;
if( PSX_IS_CONST1(_Rs_) ) { if (PSX_IS_CONST1(_Rs_))
{
if ((int)g_psxConstRegs[_Rs_] >= 0) if ((int)g_psxConstRegs[_Rs_] >= 0)
branchTo = psxpc + 4; branchTo = psxpc + 4;
@ -1238,7 +1338,8 @@ void rpsxBGEZAL()
PSX_SET_CONST(31); PSX_SET_CONST(31);
g_psxConstRegs[31] = psxpc + 4; g_psxConstRegs[31] = psxpc + 4;
if( PSX_IS_CONST1(_Rs_) ) { if (PSX_IS_CONST1(_Rs_))
{
if ((int)g_psxConstRegs[_Rs_] < 0) if ((int)g_psxConstRegs[_Rs_] < 0)
branchTo = psxpc + 4; branchTo = psxpc + 4;
@ -1273,7 +1374,8 @@ void rpsxBLEZ()
_psxFlushAllUnused(); _psxFlushAllUnused();
if( PSX_IS_CONST1(_Rs_) ) { if (PSX_IS_CONST1(_Rs_))
{
if ((int)g_psxConstRegs[_Rs_] > 0) if ((int)g_psxConstRegs[_Rs_] > 0)
branchTo = psxpc + 4; branchTo = psxpc + 4;
@ -1308,7 +1410,8 @@ void rpsxBGTZ()
_psxFlushAllUnused(); _psxFlushAllUnused();
if( PSX_IS_CONST1(_Rs_) ) { if (PSX_IS_CONST1(_Rs_))
{
if ((int)g_psxConstRegs[_Rs_] <= 0) if ((int)g_psxConstRegs[_Rs_] <= 0)
branchTo = psxpc + 4; branchTo = psxpc + 4;
@ -1338,7 +1441,8 @@ void rpsxBGTZ()
void rpsxMFC0() void rpsxMFC0()
{ {
// Rt = Cop0->Rd // Rt = Cop0->Rd
if (!_Rt_) return; if (!_Rt_)
return;
_psxOnWriteReg(_Rt_); _psxOnWriteReg(_Rt_);
xMOV(eax, ptr32[&psxRegs.CP0.r[_Rd_]]); xMOV(eax, ptr32[&psxRegs.CP0.r[_Rd_]]);
@ -1348,7 +1452,8 @@ void rpsxMFC0()
void rpsxCFC0() void rpsxCFC0()
{ {
// Rt = Cop0->Rd // Rt = Cop0->Rd
if (!_Rt_) return; if (!_Rt_)
return;
_psxOnWriteReg(_Rt_); _psxOnWriteReg(_Rt_);
xMOV(eax, ptr32[&psxRegs.CP0.r[_Rd_]]); xMOV(eax, ptr32[&psxRegs.CP0.r[_Rd_]]);
@ -1358,10 +1463,12 @@ void rpsxCFC0()
void rpsxMTC0() void rpsxMTC0()
{ {
// Cop0->Rd = Rt // Cop0->Rd = Rt
if( PSX_IS_CONST1(_Rt_) ) { if (PSX_IS_CONST1(_Rt_))
{
xMOV(ptr32[&psxRegs.CP0.r[_Rd_]], g_psxConstRegs[_Rt_]); xMOV(ptr32[&psxRegs.CP0.r[_Rd_]], g_psxConstRegs[_Rt_]);
} }
else { else
{
_psxDeleteReg(_Rt_, 1); _psxDeleteReg(_Rt_, 1);
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xMOV(ptr32[&psxRegs.CP0.r[_Rd_]], eax); xMOV(ptr32[&psxRegs.CP0.r[_Rd_]], eax);
@ -1437,7 +1544,8 @@ static void rpsxCOP0() { rpsxCP0[_Rs_](); }
static void rpsxCOP2() { rpsxCP2[_Funct_](); } static void rpsxCOP2() { rpsxCP2[_Funct_](); }
static void rpsxBASIC() { rpsxCP2BSC[_Rs_](); } static void rpsxBASIC() { rpsxCP2BSC[_Rs_](); }
static void rpsxNULL() { static void rpsxNULL()
{
Console.WriteLn("psxUNK: %8.8x", psxRegs.code); Console.WriteLn("psxUNK: %8.8x", psxRegs.code);
} }
@ -1449,7 +1557,7 @@ void (*rpsxBSC[64])() = {
rpsxLB , rpsxLH , rpsxLWL , rpsxLW , rpsxLBU , rpsxLHU , rpsxLWR , rpsxNULL, rpsxLB , rpsxLH , rpsxLWL , rpsxLW , rpsxLBU , rpsxLHU , rpsxLWR , rpsxNULL,
rpsxSB , rpsxSH , rpsxSWL , rpsxSW , rpsxNULL, rpsxNULL, rpsxSWR , rpsxNULL, rpsxSB , rpsxSH , rpsxSWL , rpsxSW , rpsxNULL, rpsxNULL, rpsxSWR , rpsxNULL,
rpsxNULL , rpsxNULL , rgteLWC2, rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL , rpsxNULL , rgteLWC2, rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL,
rpsxNULL , rpsxNULL , rgteSWC2, rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL rpsxNULL , rpsxNULL , rgteSWC2, rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL,
}; };
void (*rpsxSPC[64])() = { void (*rpsxSPC[64])() = {
@ -1460,21 +1568,21 @@ void (*rpsxSPC[64])() = {
rpsxADD , rpsxADDU, rpsxSUB , rpsxSUBU, rpsxAND , rpsxOR , rpsxXOR , rpsxNOR , rpsxADD , rpsxADDU, rpsxSUB , rpsxSUBU, rpsxAND , rpsxOR , rpsxXOR , rpsxNOR ,
rpsxNULL, rpsxNULL, rpsxSLT , rpsxSLTU, rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxSLT , rpsxSLTU, rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL,
rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL,
rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL,
}; };
void (*rpsxREG[32])() = { void (*rpsxREG[32])() = {
rpsxBLTZ , rpsxBGEZ , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxBLTZ , rpsxBGEZ , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL,
rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL,
rpsxBLTZAL, rpsxBGEZAL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxBLTZAL, rpsxBGEZAL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL,
rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL,
}; };
void (*rpsxCP0[32])() = { void (*rpsxCP0[32])() = {
rpsxMFC0, rpsxNULL, rpsxCFC0, rpsxNULL, rpsxMTC0, rpsxNULL, rpsxCTC0, rpsxNULL, rpsxMFC0, rpsxNULL, rpsxCFC0, rpsxNULL, rpsxMTC0, rpsxNULL, rpsxCTC0, rpsxNULL,
rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL,
rpsxRFE , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxRFE , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL,
rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL,
}; };
void (*rpsxCP2[64])() = { void (*rpsxCP2[64])() = {
@ -1485,28 +1593,30 @@ void (*rpsxCP2[64])() = {
rgteNCT , rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL , rpsxNULL , rpsxNULL, // 20 rgteNCT , rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL , rpsxNULL , rpsxNULL, // 20
rgteSQR , rgteDCPL , rgteDPCT , rpsxNULL, rpsxNULL, rgteAVSZ3, rgteAVSZ4, rpsxNULL, // 28 rgteSQR , rgteDCPL , rgteDPCT , rpsxNULL, rpsxNULL, rgteAVSZ3, rgteAVSZ4, rpsxNULL, // 28
rgteRTPT , rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL , rpsxNULL , rpsxNULL, // 30 rgteRTPT , rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL , rpsxNULL , rpsxNULL, // 30
rpsxNULL , rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rgteGPF , rgteGPL , rgteNCCT // 38 rpsxNULL , rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rgteGPF , rgteGPL , rgteNCCT, // 38
}; };
void (*rpsxCP2BSC[32])() = { void (*rpsxCP2BSC[32])() = {
rgteMFC2, rpsxNULL, rgteCFC2, rpsxNULL, rgteMTC2, rpsxNULL, rgteCTC2, rpsxNULL, rgteMFC2, rpsxNULL, rgteCFC2, rpsxNULL, rgteMTC2, rpsxNULL, rgteCTC2, rpsxNULL,
rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL,
rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL,
rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL,
}; };
//////////////////////////////////////////////// ////////////////////////////////////////////////
// Back-Prob Function Tables - Gathering Info // // Back-Prob Function Tables - Gathering Info //
//////////////////////////////////////////////// ////////////////////////////////////////////////
#define rpsxpropSetRead(reg) { \ #define rpsxpropSetRead(reg) \
{ \
if (!(pinst->regs[reg] & EEINST_USED)) \ if (!(pinst->regs[reg] & EEINST_USED)) \
pinst->regs[reg] |= EEINST_LASTUSE; \ pinst->regs[reg] |= EEINST_LASTUSE; \
prev->regs[reg] |= EEINST_LIVE0 | EEINST_USED; \ prev->regs[reg] |= EEINST_LIVE0 | EEINST_USED; \
pinst->regs[reg] |= EEINST_USED; \ pinst->regs[reg] |= EEINST_USED; \
_recFillRegister(*pinst, XMMTYPE_GPRREG, reg, 0); \ _recFillRegister(*pinst, XMMTYPE_GPRREG, reg, 0); \
} \ }
#define rpsxpropSetWrite(reg) { \ #define rpsxpropSetWrite(reg) \
{ \
prev->regs[reg] &= ~EEINST_LIVE0; \ prev->regs[reg] &= ~EEINST_LIVE0; \
if (!(pinst->regs[reg] & EEINST_USED)) \ if (!(pinst->regs[reg] & EEINST_USED)) \
pinst->regs[reg] |= EEINST_LASTUSE; \ pinst->regs[reg] |= EEINST_LASTUSE; \
@ -1531,9 +1641,14 @@ void rpsxpropCP2(EEINST* prev, EEINST* pinst);
//NULL , NULL , NULL, NULL , NULL, NULL, NULL, NULL //NULL , NULL , NULL, NULL , NULL, NULL, NULL, NULL
void rpsxpropBSC(EEINST* prev, EEINST* pinst) void rpsxpropBSC(EEINST* prev, EEINST* pinst)
{ {
switch(psxRegs.code >> 26) { switch (psxRegs.code >> 26)
case 0: rpsxpropSPECIAL(prev, pinst); break; {
case 1: rpsxpropREGIMM(prev, pinst); break; case 0:
rpsxpropSPECIAL(prev, pinst);
break;
case 1:
rpsxpropREGIMM(prev, pinst);
break;
case 2: // j case 2: // j
break; break;
case 3: // jal case 3: // jal
@ -1554,8 +1669,12 @@ void rpsxpropBSC(EEINST* prev, EEINST* pinst)
rpsxpropSetWrite(_Rt_); rpsxpropSetWrite(_Rt_);
break; break;
case 16: rpsxpropCP0(prev, pinst); break; case 16:
case 18: rpsxpropCP2(prev, pinst); break; rpsxpropCP0(prev, pinst);
break;
case 18:
rpsxpropCP2(prev, pinst);
break;
// stores // stores
case 40: case 41: case 42: case 43: case 46: case 40: case 41: case 42: case 43: case 46:
@ -1582,10 +1701,11 @@ void rpsxpropBSC(EEINST* prev, EEINST* pinst)
//ADD , ADDU, SUB , SUBU, AND , OR , XOR , NOR , //ADD , ADDU, SUB , SUBU, AND , OR , XOR , NOR ,
//NULL, NULL, SLT , SLTU, NULL , NULL , NULL, NULL, //NULL, NULL, SLT , SLTU, NULL , NULL , NULL, NULL,
//NULL, NULL, NULL, NULL, NULL , NULL , NULL, NULL, //NULL, NULL, NULL, NULL, NULL , NULL , NULL, NULL,
//NULL, NULL, NULL, NULL, NULL , NULL , NULL, NULL //NULL, NULL, NULL, NULL, NULL , NULL , NULL, NULL,
void rpsxpropSPECIAL(EEINST* prev, EEINST* pinst) void rpsxpropSPECIAL(EEINST* prev, EEINST* pinst)
{ {
switch(_Funct_) { switch (_Funct_)
{
case 0: // SLL case 0: // SLL
case 2: // SRL case 2: // SRL
case 3: // SRA case 3: // SRA
@ -1641,8 +1761,10 @@ void rpsxpropSPECIAL(EEINST* prev, EEINST* pinst)
case 34: // sub case 34: // sub
case 35: // subu case 35: // subu
rpsxpropSetWrite(_Rd_); rpsxpropSetWrite(_Rd_);
if( _Rs_ ) rpsxpropSetRead(_Rs_); if (_Rs_)
if( _Rt_ ) rpsxpropSetRead(_Rt_); rpsxpropSetRead(_Rs_);
if (_Rt_)
rpsxpropSetRead(_Rt_);
break; break;
default: default:
@ -1659,7 +1781,8 @@ void rpsxpropSPECIAL(EEINST* prev, EEINST* pinst)
//NULL , NULL , NULL, NULL, NULL, NULL, NULL, NULL //NULL , NULL , NULL, NULL, NULL, NULL, NULL, NULL
void rpsxpropREGIMM(EEINST* prev, EEINST* pinst) void rpsxpropREGIMM(EEINST* prev, EEINST* pinst)
{ {
switch(_Rt_) { switch (_Rt_)
{
case 0: // bltz case 0: // bltz
case 1: // bgez case 1: // bgez
rpsxpropSetRead(_Rs_); rpsxpropSetRead(_Rs_);
@ -1678,10 +1801,11 @@ void rpsxpropREGIMM(EEINST* prev, EEINST* pinst)
//MFC0, NULL, CFC0, NULL, MTC0, NULL, CTC0, NULL, //MFC0, NULL, CFC0, NULL, MTC0, NULL, CTC0, NULL,
//NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, //NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
//RFE , NULL, NULL, NULL, NULL, NULL, NULL, NULL, //RFE , NULL, NULL, NULL, NULL, NULL, NULL, NULL,
//NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL //NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
void rpsxpropCP0(EEINST* prev, EEINST* pinst) void rpsxpropCP0(EEINST* prev, EEINST* pinst)
{ {
switch(_Rs_) { switch (_Rs_)
{
case 0: // mfc0 case 0: // mfc0
case 2: // cfc0 case 2: // cfc0
rpsxpropSetWrite(_Rt_); rpsxpropSetWrite(_Rt_);
@ -1703,10 +1827,11 @@ void rpsxpropCP0(EEINST* prev, EEINST* pinst)
// gteMFC2, psxNULL, gteCFC2, psxNULL, gteMTC2, psxNULL, gteCTC2, psxNULL, // gteMFC2, psxNULL, gteCFC2, psxNULL, gteMTC2, psxNULL, gteCTC2, psxNULL,
// psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, // psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL,
// psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, // psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL,
// psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL // psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL,
void rpsxpropCP2_basic(EEINST* prev, EEINST* pinst) void rpsxpropCP2_basic(EEINST* prev, EEINST* pinst)
{ {
switch(_Rs_) { switch (_Rs_)
{
case 0: // mfc2 case 0: // mfc2
case 2: // cfc2 case 2: // cfc2
rpsxpropSetWrite(_Rt_); rpsxpropSetWrite(_Rt_);
@ -1732,10 +1857,11 @@ void rpsxpropCP2_basic(EEINST* prev, EEINST* pinst)
// gteNCT , psxNULL , psxNULL , psxNULL, psxNULL, psxNULL , psxNULL , psxNULL, // 20 // gteNCT , psxNULL , psxNULL , psxNULL, psxNULL, psxNULL , psxNULL , psxNULL, // 20
// gteSQR , gteDCPL , gteDPCT , psxNULL, psxNULL, gteAVSZ3, gteAVSZ4, psxNULL, // 28 // gteSQR , gteDCPL , gteDPCT , psxNULL, psxNULL, gteAVSZ3, gteAVSZ4, psxNULL, // 28
// gteRTPT , psxNULL , psxNULL , psxNULL, psxNULL, psxNULL , psxNULL , psxNULL, // 30 // gteRTPT , psxNULL , psxNULL , psxNULL, psxNULL, psxNULL , psxNULL , psxNULL, // 30
// psxNULL , psxNULL , psxNULL , psxNULL, psxNULL, gteGPF , gteGPL , gteNCCT // 38 // psxNULL , psxNULL , psxNULL , psxNULL, psxNULL, gteGPF , gteGPL , gteNCCT, // 38
void rpsxpropCP2(EEINST* prev, EEINST* pinst) void rpsxpropCP2(EEINST* prev, EEINST* pinst)
{ {
switch(_Funct_) { switch (_Funct_)
{
case 0: // Basic opcode case 0: // Basic opcode
rpsxpropCP2_basic(prev, pinst); rpsxpropCP2_basic(prev, pinst);
break; break;

View File

@ -40,7 +40,8 @@ extern u32 s_nBlockCycles; // cycles of current block recompiling
#define REC_FUNC_DEL(f, delreg) \ #define REC_FUNC_DEL(f, delreg) \
void rec##f() \ void rec##f() \
{ \ { \
if( (delreg) > 0 ) _deleteEEreg(delreg, 1); \ if ((delreg) > 0) \
_deleteEEreg(delreg, 1); \
recCall(Interp::f); \ recCall(Interp::f); \
} }
@ -53,7 +54,8 @@ extern u32 s_nBlockCycles; // cycles of current block recompiling
#define REC_SYS_DEL(f, delreg) \ #define REC_SYS_DEL(f, delreg) \
void rec##f() \ void rec##f() \
{ \ { \
if( (delreg) > 0 ) _deleteEEreg(delreg, 1); \ if ((delreg) > 0) \
_deleteEEreg(delreg, 1); \
recBranchCall(Interp::f); \ recBranchCall(Interp::f); \
} }
@ -76,26 +78,33 @@ void recBranchCall( void (*func)() );
void recCall(void (*func)()); void recCall(void (*func)());
u32 scaleblockcycles_clear(); u32 scaleblockcycles_clear();
namespace R5900{ namespace R5900
namespace Dynarec { {
namespace Dynarec
{
extern void recDoBranchImm(u32* jmpSkip, bool isLikely = false); extern void recDoBranchImm(u32* jmpSkip, bool isLikely = false);
extern void recDoBranchImm_Likely(u32* jmpSkip); extern void recDoBranchImm_Likely(u32* jmpSkip);
} } } // namespace Dynarec
} // namespace R5900
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// Constant Propagation - From here to the end of the header! // Constant Propagation - From here to the end of the header!
#define GPR_IS_CONST1(reg) (EE_CONST_PROP && (reg) < 32 && (g_cpuHasConstReg & (1 << (reg)))) #define GPR_IS_CONST1(reg) (EE_CONST_PROP && (reg) < 32 && (g_cpuHasConstReg & (1 << (reg))))
#define GPR_IS_CONST2(reg1, reg2) (EE_CONST_PROP && (g_cpuHasConstReg & (1 << (reg1))) && (g_cpuHasConstReg & (1 << (reg2)))) #define GPR_IS_CONST2(reg1, reg2) (EE_CONST_PROP && (g_cpuHasConstReg & (1 << (reg1))) && (g_cpuHasConstReg & (1 << (reg2))))
#define GPR_SET_CONST(reg) { \ #define GPR_SET_CONST(reg) \
if( (reg) < 32 ) { \ { \
if ((reg) < 32) \
{ \
g_cpuHasConstReg |= (1 << (reg)); \ g_cpuHasConstReg |= (1 << (reg)); \
g_cpuFlushedConstReg &= ~(1 << (reg)); \ g_cpuFlushedConstReg &= ~(1 << (reg)); \
} \ } \
} }
#define GPR_DEL_CONST(reg) { \ #define GPR_DEL_CONST(reg) \
if( (reg) < 32 ) g_cpuHasConstReg &= ~(1<<(reg)); \ { \
if ((reg) < 32) \
g_cpuHasConstReg &= ~(1 << (reg)); \
} }
extern __aligned16 GPR_reg64 g_cpuConstRegs[32]; extern __aligned16 GPR_reg64 g_cpuConstRegs[32];
@ -167,28 +176,28 @@ void eeRecompileCode3(R5900FNPTR constcode, R5900FNPTR_INFO multicode);
void rec##fn(void) \ void rec##fn(void) \
{ \ { \
eeRecompileCodeConst0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_); \ eeRecompileCodeConst0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_); \
} \ }
// rt = rs op imm16 // rt = rs op imm16
#define EERECOMPILE_CONSTCODE1(fn) \ #define EERECOMPILE_CONSTCODE1(fn) \
void rec##fn(void) \ void rec##fn(void) \
{ \ { \
eeRecompileCodeConst1(rec##fn##_const, rec##fn##_); \ eeRecompileCodeConst1(rec##fn##_const, rec##fn##_); \
} \ }
// rd = rt op sa // rd = rt op sa
#define EERECOMPILE_CONSTCODE2(fn) \ #define EERECOMPILE_CONSTCODE2(fn) \
void rec##fn(void) \ void rec##fn(void) \
{ \ { \
eeRecompileCodeConst2(rec##fn##_const, rec##fn##_); \ eeRecompileCodeConst2(rec##fn##_const, rec##fn##_); \
} \ }
// rd = rt op rs // rd = rt op rs
#define EERECOMPILE_CONSTCODESPECIAL(fn, mult) \ #define EERECOMPILE_CONSTCODESPECIAL(fn, mult) \
void rec##fn(void) \ void rec##fn(void) \
{ \ { \
eeRecompileCodeConstSPECIAL(rec##fn##_const, rec##fn##_, mult); \ eeRecompileCodeConstSPECIAL(rec##fn##_const, rec##fn##_, mult); \
} \ }
// rd = rs op rt // rd = rs op rt
void eeRecompileCodeConst0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode); void eeRecompileCodeConst0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode);
@ -200,14 +209,14 @@ void eeRecompileCodeConst2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode);
void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode, int MULT); void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode, int MULT);
// XMM caching helpers // XMM caching helpers
#define XMMINFO_READLO 0x01 #define XMMINFO_READLO 0x001
#define XMMINFO_READHI 0x02 #define XMMINFO_READHI 0x002
#define XMMINFO_WRITELO 0x04 #define XMMINFO_WRITELO 0x004
#define XMMINFO_WRITEHI 0x08 #define XMMINFO_WRITEHI 0x008
#define XMMINFO_WRITED 0x10 #define XMMINFO_WRITED 0x010
#define XMMINFO_READD 0x20 #define XMMINFO_READD 0x020
#define XMMINFO_READS 0x40 #define XMMINFO_READS 0x040
#define XMMINFO_READT 0x80 #define XMMINFO_READT 0x080
#define XMMINFO_READD_LO 0x100 // if set and XMMINFO_READD is set, reads only low 64 bits of D #define XMMINFO_READD_LO 0x100 // if set and XMMINFO_READD is set, reads only low 64 bits of D
#define XMMINFO_READACC 0x200 #define XMMINFO_READACC 0x200
#define XMMINFO_WRITEACC 0x400 #define XMMINFO_WRITEACC 0x400

View File

@ -23,8 +23,8 @@
namespace R5900 { namespace R5900 {
namespace Dynarec { namespace Dynarec {
namespace OpcodeImpl namespace OpcodeImpl {
{
void recADD(); void recADD();
void recADDU(); void recADDU();
void recDADD(); void recDADD();
@ -39,5 +39,8 @@ namespace OpcodeImpl
void recNOR(); void recNOR();
void recSLT(); void recSLT();
void recSLTU(); void recSLTU();
} } }
} // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900
#endif #endif

View File

@ -34,6 +34,9 @@ namespace OpcodeImpl {
void recSLTI(); void recSLTI();
void recSLTIU(); void recSLTIU();
} } }
} // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900
#endif #endif

View File

@ -41,6 +41,9 @@ namespace OpcodeImpl {
void recBGEZL(); void recBGEZL();
void recBGEZAL(); void recBGEZAL();
void recBGEZALL(); void recBGEZALL();
} } }
} // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900
#endif #endif

View File

@ -29,6 +29,9 @@ namespace OpcodeImpl {
void recJAL(); void recJAL();
void recJR(); void recJR();
void recJALR(); void recJALR();
} } }
} // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900
#endif #endif

View File

@ -50,6 +50,8 @@ namespace OpcodeImpl {
void recLQC2(); void recLQC2();
void recSQC2(); void recSQC2();
} } } } // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900
#endif #endif

View File

@ -92,13 +92,16 @@ void recSYNC()
void recMFSA() void recMFSA()
{ {
int mmreg; int mmreg;
if (!_Rd_) return; if (!_Rd_)
return;
mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE); mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE);
if( mmreg >= 0 ) { if (mmreg >= 0)
{
xMOVL.PS(xRegisterSSE(mmreg), ptr[&cpuRegs.sa]); xMOVL.PS(xRegisterSSE(mmreg), ptr[&cpuRegs.sa]);
} }
else { else
{
xMOV(eax, ptr[&cpuRegs.sa]); xMOV(eax, ptr[&cpuRegs.sa]);
_deleteEEreg(_Rd_, 0); _deleteEEreg(_Rd_, 0);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
@ -109,16 +112,20 @@ void recMFSA()
// SA is 4-bit and contains the amount of bytes to shift // SA is 4-bit and contains the amount of bytes to shift
void recMTSA() void recMTSA()
{ {
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
xMOV(ptr32[&cpuRegs.sa], g_cpuConstRegs[_Rs_].UL[0] & 0xf); xMOV(ptr32[&cpuRegs.sa], g_cpuConstRegs[_Rs_].UL[0] & 0xf);
} }
else { else
{
int mmreg; int mmreg;
if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0 ) { if ((mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0)
{
xMOVSS(ptr[&cpuRegs.sa], xRegisterSSE(mmreg)); xMOVSS(ptr[&cpuRegs.sa], xRegisterSSE(mmreg));
} }
else { else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xMOV(ptr[&cpuRegs.sa], eax); xMOV(ptr[&cpuRegs.sa], eax);
} }
@ -128,10 +135,12 @@ void recMTSA()
void recMTSAB() void recMTSAB()
{ {
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
xMOV(ptr32[&cpuRegs.sa], ((g_cpuConstRegs[_Rs_].UL[0] & 0xF) ^ (_Imm_ & 0xF))); xMOV(ptr32[&cpuRegs.sa], ((g_cpuConstRegs[_Rs_].UL[0] & 0xF) ^ (_Imm_ & 0xF)));
} }
else { else
{
_eeMoveGPRtoR(eax, _Rs_); _eeMoveGPRtoR(eax, _Rs_);
xAND(eax, 0xF); xAND(eax, 0xF);
xXOR(eax, _Imm_ & 0xf); xXOR(eax, _Imm_ & 0xf);
@ -141,10 +150,12 @@ void recMTSAB()
void recMTSAH() void recMTSAH()
{ {
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
xMOV(ptr32[&cpuRegs.sa], ((g_cpuConstRegs[_Rs_].UL[0] & 0x7) ^ (_Imm_ & 0x7)) << 1); xMOV(ptr32[&cpuRegs.sa], ((g_cpuConstRegs[_Rs_].UL[0] & 0x7) ^ (_Imm_ & 0x7)) << 1);
} }
else { else
{
_eeMoveGPRtoR(eax, _Rs_); _eeMoveGPRtoR(eax, _Rs_);
xAND(eax, 0x7); xAND(eax, 0x7);
xXOR(eax, _Imm_ & 0x7); xXOR(eax, _Imm_ & 0x7);
@ -259,4 +270,6 @@ void recMTSAH()
recBranchCall(R5900::Interpreter::OpcodeImpl::TNEI); recBranchCall(R5900::Interpreter::OpcodeImpl::TNEI);
} }
} }} // end Namespace R5900::Dynarec::OpcodeImpl } // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900

View File

@ -27,6 +27,9 @@ namespace OpcodeImpl {
void recMTHI(); void recMTHI();
void recMOVN(); void recMOVN();
void recMOVZ(); void recMOVZ();
} } }
} // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900
#endif #endif

View File

@ -29,6 +29,9 @@ namespace OpcodeImpl {
void recMULTU(); void recMULTU();
void recDIV(); void recDIV();
void recDIVU(); void recDIVU();
} } }
} // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900
#endif #endif

View File

@ -41,6 +41,9 @@ namespace OpcodeImpl {
void recDSLLV(); void recDSLLV();
void recDSRLV(); void recDSRLV();
void recDSRAV(); void recDSRAV();
} } }
} // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900
#endif #endif

View File

@ -33,4 +33,3 @@
#include "iMMI.h" #include "iMMI.h"
#include "iFPU.h" #include "iFPU.h"
#include "iCOP0.h" #include "iCOP0.h"

View File

@ -33,7 +33,8 @@ static int g_x86checknext;
// use special x86 register allocation for ia32 // use special x86 register allocation for ia32
void _initX86regs() { void _initX86regs()
{
memzero(x86regs); memzero(x86regs);
g_x86AllocCounter = 0; g_x86AllocCounter = 0;
g_x86checknext = 0; g_x86checknext = 0;
@ -117,26 +118,35 @@ int _getFreeX86reg(int mode)
int maxreg = (mode & MODE_8BITREG) ? 4 : iREGCNT_GPR; int maxreg = (mode & MODE_8BITREG) ? 4 : iREGCNT_GPR;
for (uint i=0; i<iREGCNT_GPR; i++) { for (uint i = 0; i < iREGCNT_GPR; i++)
{
int reg = (g_x86checknext + i) % iREGCNT_GPR; int reg = (g_x86checknext + i) % iREGCNT_GPR;
if( reg == 0 || reg == esp.GetId() || reg == ebp.GetId() ) continue; if (reg == 0 || reg == esp.GetId() || reg == ebp.GetId())
if( reg >= maxreg ) continue; continue;
if (reg >= maxreg)
continue;
//if( (mode&MODE_NOFRAME) && reg==EBP ) continue; //if( (mode&MODE_NOFRAME) && reg==EBP ) continue;
if (x86regs[reg].inuse == 0) { if (x86regs[reg].inuse == 0)
{
g_x86checknext = (reg + 1) % iREGCNT_GPR; g_x86checknext = (reg + 1) % iREGCNT_GPR;
return reg; return reg;
} }
} }
for (int i=1; i<maxreg; i++) { for (int i = 1; i < maxreg; i++)
if( i == esp.GetId() || i==ebp.GetId()) continue; {
if (i == esp.GetId() || i == ebp.GetId())
continue;
//if( (mode&MODE_NOFRAME) && i==EBP ) continue; //if( (mode&MODE_NOFRAME) && i==EBP ) continue;
if (x86regs[i].needed) continue; if (x86regs[i].needed)
if (x86regs[i].type != X86TYPE_TEMP) { continue;
if (x86regs[i].type != X86TYPE_TEMP)
{
if( x86regs[i].counter < bestcount ) { if (x86regs[i].counter < bestcount)
{
tempi = i; tempi = i;
bestcount = x86regs[i].counter; bestcount = x86regs[i].counter;
} }
@ -147,7 +157,8 @@ int _getFreeX86reg(int mode)
return i; return i;
} }
if( tempi != -1 ) { if (tempi != -1)
{
_freeX86reg(tempi); _freeX86reg(tempi);
return tempi; return tempi;
} }
@ -164,11 +175,13 @@ void _flushCachedRegs()
void _flushConstReg(int reg) void _flushConstReg(int reg)
{ {
if( GPR_IS_CONST1( reg ) && !(g_cpuFlushedConstReg&(1<<reg)) ) { if (GPR_IS_CONST1(reg) && !(g_cpuFlushedConstReg & (1 << reg)))
{
xMOV(ptr32[&cpuRegs.GPR.r[reg].UL[0]], g_cpuConstRegs[reg].UL[0]); xMOV(ptr32[&cpuRegs.GPR.r[reg].UL[0]], g_cpuConstRegs[reg].UL[0]);
xMOV(ptr32[&cpuRegs.GPR.r[reg].UL[1]], g_cpuConstRegs[reg].UL[1]); xMOV(ptr32[&cpuRegs.GPR.r[reg].UL[1]], g_cpuConstRegs[reg].UL[1]);
g_cpuFlushedConstReg |= (1 << reg); g_cpuFlushedConstReg |= (1 << reg);
if (reg == 0) DevCon.Warning("Flushing r0!"); if (reg == 0)
DevCon.Warning("Flushing r0!");
} }
} }
@ -183,11 +196,15 @@ void _flushConstRegs()
// flush 0 and -1 first // flush 0 and -1 first
// ignore r0 // ignore r0
for (int i = 1, j = 0; i < 32; j++ && ++i, j %= 2) { for (int i = 1, j = 0; i < 32; j++ && ++i, j %= 2)
if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1<<i)) continue; {
if (g_cpuConstRegs[i].SL[j] != 0) continue; if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1 << i))
continue;
if (g_cpuConstRegs[i].SL[j] != 0)
continue;
if (eaxval != 0) { if (eaxval != 0)
{
xXOR(eax, eax); xXOR(eax, eax);
eaxval = 0; eaxval = 0;
} }
@ -199,15 +216,20 @@ void _flushConstRegs()
rewindPtr = x86Ptr; rewindPtr = x86Ptr;
for (int i = 1, j = 0; i < 32; j++ && ++i, j %= 2) { for (int i = 1, j = 0; i < 32; j++ && ++i, j %= 2)
if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1<<i)) continue; {
if (g_cpuConstRegs[i].SL[j] != -1) continue; if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1 << i))
continue;
if (g_cpuConstRegs[i].SL[j] != -1)
continue;
if (eaxval > 0) { if (eaxval > 0)
{
xXOR(eax, eax); xXOR(eax, eax);
eaxval = 0; eaxval = 0;
} }
if (eaxval == 0) { if (eaxval == 0)
{
xNOT(eax); xNOT(eax);
eaxval = -1; eaxval = -1;
} }
@ -217,16 +239,22 @@ void _flushConstRegs()
minusone_cnt++; minusone_cnt++;
} }
if (minusone_cnt == 1 && !zero_cnt) { // not worth it for one byte if (minusone_cnt == 1 && !zero_cnt) // not worth it for one byte
{
x86SetPtr(rewindPtr); x86SetPtr(rewindPtr);
} else { }
else
{
done[0] |= done[2]; done[0] |= done[2];
done[1] |= done[3]; done[1] |= done[3];
} }
for (int i = 1; i < 32; ++i) { for (int i = 1; i < 32; ++i)
if (GPR_IS_CONST1(i)) { {
if (!(g_cpuFlushedConstReg&(1<<i))) { if (GPR_IS_CONST1(i))
{
if (!(g_cpuFlushedConstReg & (1 << i)))
{
if (!(done[0] & (1 << i))) if (!(done[0] & (1 << i)))
xMOV(ptr32[&cpuRegs.GPR.r[i].UL[0]], g_cpuConstRegs[i].UL[0]); xMOV(ptr32[&cpuRegs.GPR.r[i].UL[0]], g_cpuConstRegs[i].UL[0]);
if (!(done[1] & (1 << i))) if (!(done[1] & (1 << i)))
@ -234,7 +262,8 @@ void _flushConstRegs()
g_cpuFlushedConstReg |= 1 << i; g_cpuFlushedConstReg |= 1 << i;
} }
if (g_cpuHasConstReg == g_cpuFlushedConstReg) break; if (g_cpuHasConstReg == g_cpuFlushedConstReg)
break;
} }
} }
} }
@ -252,28 +281,37 @@ int _allocX86reg(xRegister32 x86reg, int type, int reg, int mode)
mode &= ~(MODE_NOFRAME | MODE_8BITREG); mode &= ~(MODE_NOFRAME | MODE_8BITREG);
int readfromreg = -1; int readfromreg = -1;
if ( type != X86TYPE_TEMP ) { if (type != X86TYPE_TEMP)
if ( maxreg < iREGCNT_GPR ) { {
if (maxreg < iREGCNT_GPR)
{
// make sure reg isn't in the higher regs // make sure reg isn't in the higher regs
for(i = maxreg; i < iREGCNT_GPR; ++i) { for (i = maxreg; i < iREGCNT_GPR; ++i)
if (!x86regs[i].inuse || x86regs[i].type != type || x86regs[i].reg != reg) continue; {
if (!x86regs[i].inuse || x86regs[i].type != type || x86regs[i].reg != reg)
continue;
if( mode & MODE_READ ) { if (mode & MODE_READ)
{
readfromreg = i; readfromreg = i;
x86regs[i].inuse = 0; x86regs[i].inuse = 0;
break; break;
} }
else if( mode & MODE_WRITE ) { else if (mode & MODE_WRITE)
{
x86regs[i].inuse = 0; x86regs[i].inuse = 0;
break; break;
} }
} }
} }
for (i=1; i<maxreg; i++) { for (i = 1; i < maxreg; i++)
if ( (int)i == esp.GetId() || (int)i == ebp.GetId() ) continue; {
if (!x86regs[i].inuse || x86regs[i].type != type || x86regs[i].reg != reg) continue; if ((int)i == esp.GetId() || (int)i == ebp.GetId())
continue;
if (!x86regs[i].inuse || x86regs[i].type != type || x86regs[i].reg != reg)
continue;
// We're in a for loop until i<maxreg. This will never happen. // We're in a for loop until i<maxreg. This will never happen.
/*if( i >= maxreg ) { /*if( i >= maxreg ) {
@ -284,19 +322,24 @@ int _allocX86reg(xRegister32 x86reg, int type, int reg, int mode)
break; break;
}*/ }*/
if( !x86reg.IsEmpty() ) { if (!x86reg.IsEmpty())
{
// requested specific reg, so return that instead // requested specific reg, so return that instead
if( i != (uint)x86reg.GetId() ) { if (i != (uint)x86reg.GetId())
if( x86regs[i].mode & MODE_READ ) readfromreg = i; {
if (x86regs[i].mode & MODE_READ)
readfromreg = i;
mode |= x86regs[i].mode & MODE_WRITE; mode |= x86regs[i].mode & MODE_WRITE;
x86regs[i].inuse = 0; x86regs[i].inuse = 0;
break; break;
} }
} }
if( type != X86TYPE_TEMP && !(x86regs[i].mode & MODE_READ) && (mode&MODE_READ)) { if (type != X86TYPE_TEMP && !(x86regs[i].mode & MODE_READ) && (mode & MODE_READ))
{
if( type == X86TYPE_GPR ) _flushConstReg(reg); if (type == X86TYPE_GPR)
_flushConstReg(reg);
if (X86_ISVI(type) && reg < 16) if (X86_ISVI(type) && reg < 16)
xMOVZX(xRegister32(i), ptr16[(u16*)(_x86GetAddr(type, reg))]); xMOVZX(xRegister32(i), ptr16[(u16*)(_x86GetAddr(type, reg))]);
@ -323,16 +366,21 @@ int _allocX86reg(xRegister32 x86reg, int type, int reg, int mode)
x86regs[x86reg.GetId()].needed = 1; x86regs[x86reg.GetId()].needed = 1;
x86regs[x86reg.GetId()].inuse = 1; x86regs[x86reg.GetId()].inuse = 1;
if( mode & MODE_READ ) { if (mode & MODE_READ)
{
if (readfromreg >= 0) if (readfromreg >= 0)
xMOV(x86reg, xRegister32(readfromreg)); xMOV(x86reg, xRegister32(readfromreg));
else { else
if( type == X86TYPE_GPR ) { {
if (type == X86TYPE_GPR)
{
if( reg == 0 ) { if (reg == 0)
{
xXOR(x86reg, x86reg); xXOR(x86reg, x86reg);
} }
else { else
{
_flushConstReg(reg); _flushConstReg(reg);
_deleteGPRtoXMMreg(reg, 1); _deleteGPRtoXMMreg(reg, 1);
@ -341,14 +389,17 @@ int _allocX86reg(xRegister32 x86reg, int type, int reg, int mode)
_deleteGPRtoXMMreg(reg, 0); _deleteGPRtoXMMreg(reg, 0);
} }
} }
else { else
if( X86_ISVI(type) && reg < 16 ) { {
if (X86_ISVI(type) && reg < 16)
{
if (reg == 0) if (reg == 0)
xXOR(x86reg, x86reg); xXOR(x86reg, x86reg);
else else
xMOVZX(x86reg, ptr16[(u16*)(_x86GetAddr(type, reg))]); xMOVZX(x86reg, ptr16[(u16*)(_x86GetAddr(type, reg))]);
} }
else xMOV(x86reg, ptr[(void*)(_x86GetAddr(type, reg))]); else
xMOV(x86reg, ptr[(void*)(_x86GetAddr(type, reg))]);
} }
} }
} }
@ -362,10 +413,13 @@ int _checkX86reg(int type, int reg, int mode)
{ {
uint i; uint i;
for (i=0; i<iREGCNT_GPR; i++) { for (i = 0; i < iREGCNT_GPR; i++)
if (x86regs[i].inuse && x86regs[i].reg == reg && x86regs[i].type == type) { {
if (x86regs[i].inuse && x86regs[i].reg == reg && x86regs[i].type == type)
{
if( !(x86regs[i].mode & MODE_READ) && (mode&MODE_READ) ) { if (!(x86regs[i].mode & MODE_READ) && (mode & MODE_READ))
{
if (X86_ISVI(type)) if (X86_ISVI(type))
xMOVZX(xRegister32(i), ptr16[(u16*)(_x86GetAddr(type, reg))]); xMOVZX(xRegister32(i), ptr16[(u16*)(_x86GetAddr(type, reg))]);
else else
@ -386,19 +440,24 @@ void _addNeededX86reg(int type, int reg)
{ {
uint i; uint i;
for (i=0; i<iREGCNT_GPR; i++) { for (i = 0; i < iREGCNT_GPR; i++)
if (!x86regs[i].inuse || x86regs[i].reg != reg || x86regs[i].type != type ) continue; {
if (!x86regs[i].inuse || x86regs[i].reg != reg || x86regs[i].type != type)
continue;
x86regs[i].counter = g_x86AllocCounter++; x86regs[i].counter = g_x86AllocCounter++;
x86regs[i].needed = 1; x86regs[i].needed = 1;
} }
} }
void _clearNeededX86regs() { void _clearNeededX86regs()
{
uint i; uint i;
for (i=0; i<iREGCNT_GPR; i++) { for (i = 0; i < iREGCNT_GPR; i++)
if (x86regs[i].needed ) { {
if (x86regs[i].needed)
{
if (x86regs[i].inuse && (x86regs[i].mode & MODE_WRITE)) if (x86regs[i].inuse && (x86regs[i].mode & MODE_WRITE))
x86regs[i].mode |= MODE_READ; x86regs[i].mode |= MODE_READ;
} }
@ -410,15 +469,19 @@ void _deleteX86reg(int type, int reg, int flush)
{ {
uint i; uint i;
for (i=0; i<iREGCNT_GPR; i++) { for (i = 0; i < iREGCNT_GPR; i++)
if (x86regs[i].inuse && x86regs[i].reg == reg && x86regs[i].type == type) { {
switch(flush) { if (x86regs[i].inuse && x86regs[i].reg == reg && x86regs[i].type == type)
{
switch (flush)
{
case 0: case 0:
_freeX86reg(i); _freeX86reg(i);
break; break;
case 1: case 1:
if( x86regs[i].mode & MODE_WRITE) { if (x86regs[i].mode & MODE_WRITE)
{
if (X86_ISVI(type) && x86regs[i].reg < 16) if (X86_ISVI(type) && x86regs[i].reg < 16)
xMOV(ptr[(void*)(_x86GetAddr(type, x86regs[i].reg))], xRegister16(i)); xMOV(ptr[(void*)(_x86GetAddr(type, x86regs[i].reg))], xRegister16(i));
@ -449,10 +512,12 @@ void _freeX86reg(int x86reg)
{ {
pxAssert(x86reg >= 0 && x86reg < (int)iREGCNT_GPR); pxAssert(x86reg >= 0 && x86reg < (int)iREGCNT_GPR);
if( x86regs[x86reg].inuse && (x86regs[x86reg].mode&MODE_WRITE) ) { if (x86regs[x86reg].inuse && (x86regs[x86reg].mode & MODE_WRITE))
{
x86regs[x86reg].mode &= ~MODE_WRITE; x86regs[x86reg].mode &= ~MODE_WRITE;
if( X86_ISVI(x86regs[x86reg].type) && x86regs[x86reg].reg < 16 ) { if (X86_ISVI(x86regs[x86reg].type) && x86regs[x86reg].reg < 16)
{
xMOV(ptr[(void*)(_x86GetAddr(x86regs[x86reg].type, x86regs[x86reg].reg))], xRegister16(x86reg)); xMOV(ptr[(void*)(_x86GetAddr(x86regs[x86reg].type, x86regs[x86reg].reg))], xRegister16(x86reg));
} }
else else

View File

@ -118,22 +118,25 @@ static void recExitExecution();
void _eeFlushAllUnused() void _eeFlushAllUnused()
{ {
u32 i; u32 i;
for(i = 0; i < 34; ++i) { for (i = 0; i < 34; ++i)
if( pc < s_nEndBlock ) { {
if (pc < s_nEndBlock)
{
if ((g_pCurInstInfo[1].regs[i] & EEINST_USED)) if ((g_pCurInstInfo[1].regs[i] & EEINST_USED))
continue; continue;
} }
else if ((g_pCurInstInfo[0].regs[i] & EEINST_USED)) else if ((g_pCurInstInfo[0].regs[i] & EEINST_USED))
continue; continue;
if( i < 32 && GPR_IS_CONST1(i) ) _flushConstReg(i); if (i < 32 && GPR_IS_CONST1(i))
else { _flushConstReg(i);
else
_deleteGPRtoXMMreg(i, 1); _deleteGPRtoXMMreg(i, 1);
} }
}
//TODO when used info is done for FPU and VU0 //TODO when used info is done for FPU and VU0
for(i = 0; i < iREGCNT_XMM; ++i) { for (i = 0; i < iREGCNT_XMM; ++i)
{
if (xmmregs[i].inuse && xmmregs[i].type != XMMTYPE_GPRREG) if (xmmregs[i].inuse && xmmregs[i].type != XMMTYPE_GPRREG)
_freeXMMreg(i); _freeXMMreg(i);
} }
@ -160,13 +163,16 @@ void _eeMoveGPRtoR(const xRegister32& to, int fromgpr)
xXOR(to, to); // zero register should use xor, thanks --air xXOR(to, to); // zero register should use xor, thanks --air
else if (GPR_IS_CONST1(fromgpr)) else if (GPR_IS_CONST1(fromgpr))
xMOV(to, g_cpuConstRegs[fromgpr].UL[0]); xMOV(to, g_cpuConstRegs[fromgpr].UL[0]);
else { else
{
int mmreg; int mmreg;
if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, fromgpr, MODE_READ)) >= 0 && (xmmregs[mmreg].mode&MODE_WRITE)) { if ((mmreg = _checkXMMreg(XMMTYPE_GPRREG, fromgpr, MODE_READ)) >= 0 && (xmmregs[mmreg].mode & MODE_WRITE))
{
xMOVD(to, xRegisterSSE(mmreg)); xMOVD(to, xRegisterSSE(mmreg));
} }
else { else
{
xMOV(to, ptr[&cpuRegs.GPR.r[fromgpr].UL[0]]); xMOV(to, ptr[&cpuRegs.GPR.r[fromgpr].UL[0]]);
} }
} }
@ -176,13 +182,16 @@ void _eeMoveGPRtoM(uptr to, int fromgpr)
{ {
if (GPR_IS_CONST1(fromgpr)) if (GPR_IS_CONST1(fromgpr))
xMOV(ptr32[(u32*)(to)], g_cpuConstRegs[fromgpr].UL[0]); xMOV(ptr32[(u32*)(to)], g_cpuConstRegs[fromgpr].UL[0]);
else { else
{
int mmreg; int mmreg;
if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, fromgpr, MODE_READ)) >= 0 ) { if ((mmreg = _checkXMMreg(XMMTYPE_GPRREG, fromgpr, MODE_READ)) >= 0)
{
xMOVSS(ptr[(void*)(to)], xRegisterSSE(mmreg)); xMOVSS(ptr[(void*)(to)], xRegisterSSE(mmreg));
} }
else { else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[fromgpr].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[fromgpr].UL[0]]);
xMOV(ptr[(void*)(to)], eax); xMOV(ptr[(void*)(to)], eax);
} }
@ -193,13 +202,16 @@ void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr)
{ {
if (GPR_IS_CONST1(fromgpr)) if (GPR_IS_CONST1(fromgpr))
xMOV(ptr32[xAddressReg(to)], g_cpuConstRegs[fromgpr].UL[0]); xMOV(ptr32[xAddressReg(to)], g_cpuConstRegs[fromgpr].UL[0]);
else { else
{
int mmreg; int mmreg;
if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, fromgpr, MODE_READ)) >= 0 ) { if ((mmreg = _checkXMMreg(XMMTYPE_GPRREG, fromgpr, MODE_READ)) >= 0)
{
xMOVSS(ptr[xAddressReg(to)], xRegisterSSE(mmreg)); xMOVSS(ptr[xAddressReg(to)], xRegisterSSE(mmreg));
} }
else { else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[fromgpr].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[fromgpr].UL[0]]);
xMOV(ptr[xAddressReg(to)], eax); xMOV(ptr[xAddressReg(to)], eax);
} }
@ -217,12 +229,16 @@ void eeSignExtendTo(int gpr, bool onlyupper)
int _flushXMMunused() int _flushXMMunused()
{ {
u32 i; u32 i;
for (i=0; i<iREGCNT_XMM; i++) { for (i = 0; i < iREGCNT_XMM; i++)
if (!xmmregs[i].inuse || xmmregs[i].needed || !(xmmregs[i].mode&MODE_WRITE) ) continue; {
if (!xmmregs[i].inuse || xmmregs[i].needed || !(xmmregs[i].mode & MODE_WRITE))
continue;
if (xmmregs[i].type == XMMTYPE_GPRREG ) { if (xmmregs[i].type == XMMTYPE_GPRREG)
{
//if( !(g_pCurInstInfo->regs[xmmregs[i].reg]&EEINST_USED) ) { //if( !(g_pCurInstInfo->regs[xmmregs[i].reg]&EEINST_USED) ) {
if( !_recIsRegWritten(g_pCurInstInfo+1, (s_nEndBlock-pc)/4, XMMTYPE_GPRREG, xmmregs[i].reg) ) { if (!_recIsRegWritten(g_pCurInstInfo + 1, (s_nEndBlock - pc) / 4, XMMTYPE_GPRREG, xmmregs[i].reg))
{
_freeXMMreg(i); _freeXMMreg(i);
xmmregs[i].inuse = 1; xmmregs[i].inuse = 1;
return 1; return 1;
@ -236,9 +252,11 @@ int _flushXMMunused()
int _flushUnusedConstReg() int _flushUnusedConstReg()
{ {
int i; int i;
for(i = 1; i < 32; ++i) { for (i = 1; i < 32; ++i)
{
if ((g_cpuHasConstReg & (1 << i)) && !(g_cpuFlushedConstReg & (1 << i)) && if ((g_cpuHasConstReg & (1 << i)) && !(g_cpuFlushedConstReg & (1 << i)) &&
!_recIsRegWritten(g_pCurInstInfo+1, (s_nEndBlock-pc)/4, XMMTYPE_GPRREG, i) ) { !_recIsRegWritten(g_pCurInstInfo + 1, (s_nEndBlock - pc) / 4, XMMTYPE_GPRREG, i))
{
// check if will be written in the future // check if will be written in the future
xMOV(ptr32[&cpuRegs.GPR.r[i].UL[0]], g_cpuConstRegs[i].UL[0]); xMOV(ptr32[&cpuRegs.GPR.r[i].UL[0]], g_cpuConstRegs[i].UL[0]);
@ -338,7 +356,8 @@ static void recEventTest()
{ {
_cpuEventTest_Shared(); _cpuEventTest_Shared();
if (iopBreakpoint) { if (iopBreakpoint)
{
iopBreakpoint = false; iopBreakpoint = false;
recExitExecution(); recExitExecution();
} }
@ -489,15 +508,18 @@ static void recThrowHardwareDeficiency( const wxChar* extFail )
static void recReserveCache() static void recReserveCache()
{ {
if (!recMem) recMem = new RecompiledCodeReserve(L"R5900-32 Recompiler Cache", _16mb); if (!recMem)
recMem = new RecompiledCodeReserve(L"R5900-32 Recompiler Cache", _16mb);
recMem->SetProfilerName("EErec"); recMem->SetProfilerName("EErec");
while (!recMem->IsOk()) while (!recMem->IsOk())
{ {
if (recMem->Reserve(GetVmMemory().MainMemory(), HostMemoryMap::EErecOffset, m_ConfiguredCacheReserve * _1mb) != NULL) break; if (recMem->Reserve(GetVmMemory().MainMemory(), HostMemoryMap::EErecOffset, m_ConfiguredCacheReserve * _1mb) != NULL)
break;
// If it failed, then try again (if possible): // If it failed, then try again (if possible):
if (m_ConfiguredCacheReserve < 16) break; if (m_ConfiguredCacheReserve < 16)
break;
m_ConfiguredCacheReserve /= 2; m_ConfiguredCacheReserve /= 2;
} }
@ -600,7 +622,8 @@ static void recResetRaw()
recAlloc(); recAlloc();
if( eeRecIsReset.exchange(true) ) return; if (eeRecIsReset.exchange(true))
return;
eeRecNeedsReset = false; eeRecNeedsReset = false;
Console.WriteLn(Color_StrongBlack, "EE/iR5900-32 Recompiler Reset"); Console.WriteLn(Color_StrongBlack, "EE/iR5900-32 Recompiler Reset");
@ -702,7 +725,8 @@ static void recExecute()
eeRecIsReset = false; eeRecIsReset = false;
ScopedBool executing(eeCpuExecuting); ScopedBool executing(eeCpuExecuting);
try { try
{
EnterRecompiledCode(); EnterRecompiledCode();
} }
catch (Exception::ExitCpuExecute&) catch (Exception::ExitCpuExecute&)
@ -738,8 +762,10 @@ static void recExecute()
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate); pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
} }
if(m_cpuException) m_cpuException->Rethrow(); if (m_cpuException)
if(m_Exception) m_Exception->Rethrow(); m_cpuException->Rethrow();
if (m_Exception)
m_Exception->Rethrow();
// FIXME Warning thread unsafe // FIXME Warning thread unsafe
Perf::dump(); Perf::dump();
@ -800,20 +826,24 @@ void recClear(u32 addr, u32 size)
int toRemoveLast = blockidx; int toRemoveLast = blockidx;
while (pexblock = recBlocks[blockidx]) { while (pexblock = recBlocks[blockidx])
{
u32 blockstart = pexblock->startpc; u32 blockstart = pexblock->startpc;
u32 blockend = pexblock->startpc + pexblock->size * 4; u32 blockend = pexblock->startpc + pexblock->size * 4;
BASEBLOCK* pblock = PC_GETBLOCK(blockstart); BASEBLOCK* pblock = PC_GETBLOCK(blockstart);
if (pblock == s_pCurBlock) { if (pblock == s_pCurBlock)
if(toRemoveLast != blockidx) { {
if (toRemoveLast != blockidx)
{
recBlocks.Remove((blockidx + 1), toRemoveLast); recBlocks.Remove((blockidx + 1), toRemoveLast);
} }
toRemoveLast = --blockidx; toRemoveLast = --blockidx;
continue; continue;
} }
if (blockend <= addr) { if (blockend <= addr)
{
lowerextent = std::max(lowerextent, blockend); lowerextent = std::max(lowerextent, blockend);
break; break;
} }
@ -827,18 +857,21 @@ void recClear(u32 addr, u32 size)
blockidx--; blockidx--;
} }
if(toRemoveLast != blockidx) { if (toRemoveLast != blockidx)
{
recBlocks.Remove((blockidx + 1), toRemoveLast); recBlocks.Remove((blockidx + 1), toRemoveLast);
} }
upperextent = std::min(upperextent, ceiling); upperextent = std::min(upperextent, ceiling);
for (int i = 0; pexblock = recBlocks[i]; i++) { for (int i = 0; pexblock = recBlocks[i]; i++)
{
if (s_pCurBlock == PC_GETBLOCK(pexblock->startpc)) if (s_pCurBlock == PC_GETBLOCK(pexblock->startpc))
continue; continue;
u32 blockend = pexblock->startpc + pexblock->size * 4; u32 blockend = pexblock->startpc + pexblock->size * 4;
if (pexblock->startpc >= addr && pexblock->startpc < addr + size * 4 if (pexblock->startpc >= addr && pexblock->startpc < addr + size * 4
|| pexblock->startpc < addr && blockend > addr) { || pexblock->startpc < addr && blockend > addr)
{
if (!IsDevBuild) if (!IsDevBuild)
Console.Error("[EE] Impossible block clearing failure"); Console.Error("[EE] Impossible block clearing failure");
else else
@ -857,16 +890,20 @@ void SetBranchReg( u32 reg )
{ {
g_branch = 1; g_branch = 1;
if( reg != 0xffffffff ) { if (reg != 0xffffffff)
{
// if (GPR_IS_CONST1(reg)) // if (GPR_IS_CONST1(reg))
// xMOV(ptr32[&cpuRegs.pc], g_cpuConstRegs[reg].UL[0]); // xMOV(ptr32[&cpuRegs.pc], g_cpuConstRegs[reg].UL[0]);
// else { // else
// {
// int mmreg; // int mmreg;
// //
// if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, reg, MODE_READ)) >= 0 ) { // if ((mmreg = _checkXMMreg(XMMTYPE_GPRREG, reg, MODE_READ)) >= 0)
// {
// xMOVSS(ptr[&cpuRegs.pc], xRegisterSSE(mmreg)); // xMOVSS(ptr[&cpuRegs.pc], xRegisterSSE(mmreg));
// } // }
// else { // else
// {
// xMOV(eax, ptr[(void*)((int)&cpuRegs.GPR.r[reg].UL[0])]); // xMOV(eax, ptr[(void*)((int)&cpuRegs.GPR.r[reg].UL[0])]);
// xMOV(ptr[&cpuRegs.pc], eax); // xMOV(ptr[&cpuRegs.pc], eax);
// } // }
@ -874,7 +911,8 @@ void SetBranchReg( u32 reg )
_allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE); _allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
_eeMoveGPRtoR(calleeSavedReg2d, reg); _eeMoveGPRtoR(calleeSavedReg2d, reg);
if (EmuConfig.Gamefixes.GoemonTlbHack) { if (EmuConfig.Gamefixes.GoemonTlbHack)
{
xMOV(ecx, calleeSavedReg2d); xMOV(ecx, calleeSavedReg2d);
vtlb_DynV2P(); vtlb_DynV2P();
xMOV(calleeSavedReg2d, eax); xMOV(calleeSavedReg2d, eax);
@ -882,12 +920,14 @@ void SetBranchReg( u32 reg )
recompileNextInstruction(1); recompileNextInstruction(1);
if( x86regs[calleeSavedReg2d.GetId()].inuse ) { if (x86regs[calleeSavedReg2d.GetId()].inuse)
{
pxAssert(x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK); pxAssert(x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK);
xMOV(ptr[&cpuRegs.pc], calleeSavedReg2d); xMOV(ptr[&cpuRegs.pc], calleeSavedReg2d);
x86regs[calleeSavedReg2d.GetId()].inuse = 0; x86regs[calleeSavedReg2d.GetId()].inuse = 0;
} }
else { else
{
xMOV(eax, ptr[&g_recWriteback]); xMOV(eax, ptr[&g_recWriteback]);
xMOV(ptr[&cpuRegs.pc], eax); xMOV(ptr[&cpuRegs.pc], eax);
} }
@ -945,17 +985,20 @@ void iFlushCall(int flushtype)
_freeX86reg(ecx); _freeX86reg(ecx);
_freeX86reg(edx); _freeX86reg(edx);
if ((flushtype & FLUSH_PC) && !g_cpuFlushedPC) { if ((flushtype & FLUSH_PC) && !g_cpuFlushedPC)
{
xMOV(ptr32[&cpuRegs.pc], pc); xMOV(ptr32[&cpuRegs.pc], pc);
g_cpuFlushedPC = true; g_cpuFlushedPC = true;
} }
if ((flushtype & FLUSH_CODE) && !g_cpuFlushedCode) { if ((flushtype & FLUSH_CODE) && !g_cpuFlushedCode)
{
xMOV(ptr32[&cpuRegs.code], cpuRegs.code); xMOV(ptr32[&cpuRegs.code], cpuRegs.code);
g_cpuFlushedCode = true; g_cpuFlushedCode = true;
} }
if ((flushtype == FLUSH_CAUSE) && !g_maySignalException) { if ((flushtype == FLUSH_CAUSE) && !g_maySignalException)
{
if (g_recompilingDelaySlot) if (g_recompilingDelaySlot)
xOR(ptr32[&cpuRegs.CP0.n.Cause], 1 << 31); // BD xOR(ptr32[&cpuRegs.CP0.n.Cause], 1 << 31); // BD
g_maySignalException = true; g_maySignalException = true;
@ -1216,14 +1259,17 @@ void dynarecCheckBreakpoint()
int bpFlags = isBreakpointNeeded(pc); int bpFlags = isBreakpointNeeded(pc);
bool hit = false; bool hit = false;
//check breakpoint at current pc //check breakpoint at current pc
if (bpFlags & 1) { if (bpFlags & 1)
{
auto cond = CBreakPoints::GetBreakPointCondition(BREAKPOINT_EE, pc); auto cond = CBreakPoints::GetBreakPointCondition(BREAKPOINT_EE, pc);
if (cond == NULL || cond->Evaluate()) { if (cond == NULL || cond->Evaluate())
{
hit = true; hit = true;
} }
} }
//check breakpoint in delay slot //check breakpoint in delay slot
if (bpFlags & 2) { if (bpFlags & 2)
{
auto cond = CBreakPoints::GetBreakPointCondition(BREAKPOINT_EE, pc + 4); auto cond = CBreakPoints::GetBreakPointCondition(BREAKPOINT_EE, pc + 4);
if (cond == NULL || cond->Evaluate()) if (cond == NULL || cond->Evaluate())
hit = true; hit = true;
@ -1298,11 +1344,13 @@ void recMemcheck(u32 op, u32 bits, bool store)
xForwardJGE8 next2; // if start >= address+size then goto next2 xForwardJGE8 next2; // if start >= address+size then goto next2
// hit the breakpoint // hit the breakpoint
if (checks[i].result & MEMCHECK_LOG) { if (checks[i].result & MEMCHECK_LOG)
{
xMOV(edx, store); xMOV(edx, store);
xFastCall((void*)dynarecMemLogcheck, ecx, edx); xFastCall((void*)dynarecMemLogcheck, ecx, edx);
} }
if (checks[i].result & MEMCHECK_BREAK) { if (checks[i].result & MEMCHECK_BREAK)
{
xFastCall((void*)dynarecMemcheck); xFastCall((void*)dynarecMemcheck);
} }
@ -1373,22 +1421,29 @@ void recompileNextInstruction(int delayslot)
cpuRegs.code = *(int*)s_pCode; cpuRegs.code = *(int*)s_pCode;
if (!delayslot) { if (!delayslot)
{
pc += 4; pc += 4;
g_cpuFlushedPC = false; g_cpuFlushedPC = false;
g_cpuFlushedCode = false; g_cpuFlushedCode = false;
} else { }
else
{
// increment after recompiling so that pc points to the branch during recompilation // increment after recompiling so that pc points to the branch during recompilation
g_recompilingDelaySlot = true; g_recompilingDelaySlot = true;
} }
g_pCurInstInfo++; g_pCurInstInfo++;
for(i = 0; i < iREGCNT_XMM; ++i) { for (i = 0; i < iREGCNT_XMM; ++i)
if( xmmregs[i].inuse ) { {
if (xmmregs[i].inuse)
{
count = _recIsRegWritten(g_pCurInstInfo, (s_nEndBlock - pc) / 4 + 1, xmmregs[i].type, xmmregs[i].reg); count = _recIsRegWritten(g_pCurInstInfo, (s_nEndBlock - pc) / 4 + 1, xmmregs[i].type, xmmregs[i].reg);
if( count > 0 ) xmmregs[i].counter = 1000-count; if (count > 0)
else xmmregs[i].counter = 0; xmmregs[i].counter = 1000 - count;
else
xmmregs[i].counter = 0;
} }
} }
@ -1397,11 +1452,14 @@ void recompileNextInstruction(int delayslot)
//pxAssert( !(g_pCurInstInfo->info & EEINSTINFO_NOREC) ); //pxAssert( !(g_pCurInstInfo->info & EEINSTINFO_NOREC) );
//Console.Warning("opcode name = %s, it's cycles = %d\n",opcode.Name,opcode.cycles); //Console.Warning("opcode name = %s, it's cycles = %d\n",opcode.Name,opcode.cycles);
// if this instruction is a jump or a branch, exit right away // if this instruction is a jump or a branch, exit right away
if( delayslot ) { if (delayslot)
{
bool check_branch_delay = false; bool check_branch_delay = false;
switch(_Opcode_) { switch (_Opcode_)
{
case 1: case 1:
switch(_Rt_) { switch (_Rt_)
{
case 0: case 1: case 2: case 3: case 0x10: case 0x11: case 0x12: case 0x13: case 0: case 1: case 2: case 3: case 0x10: case 0x11: case 0x12: case 0x13:
check_branch_delay = true; check_branch_delay = true;
} }
@ -1413,7 +1471,8 @@ void recompileNextInstruction(int delayslot)
// Check for branch in delay slot, new code by FlatOut. // Check for branch in delay slot, new code by FlatOut.
// Gregory tested this in 2017 using the ps2autotests suite and remarked "So far we return 1 (even with this PR), and the HW 2. // Gregory tested this in 2017 using the ps2autotests suite and remarked "So far we return 1 (even with this PR), and the HW 2.
// Original PR and discussion at https://github.com/PCSX2/pcsx2/pull/1783 so we don't forget this information. // Original PR and discussion at https://github.com/PCSX2/pcsx2/pull/1783 so we don't forget this information.
if (check_branch_delay) { if (check_branch_delay)
{
DevCon.Warning("Branch %x in delay slot!", cpuRegs.code); DevCon.Warning("Branch %x in delay slot!", cpuRegs.code);
_clearNeededX86regs(); _clearNeededX86regs();
_clearNeededXMMregs(); _clearNeededXMMregs();
@ -1428,16 +1487,21 @@ void recompileNextInstruction(int delayslot)
} }
} }
// Check for NOP // Check for NOP
if (cpuRegs.code == 0x00000000) { if (cpuRegs.code == 0x00000000)
{
// Note: Tests on a ps2 suggested more like 5 cycles for a NOP. But there's many factors in this.. // Note: Tests on a ps2 suggested more like 5 cycles for a NOP. But there's many factors in this..
s_nBlockCycles += 9 * (2 - ((cpuRegs.CP0.n.Config >> 18) & 0x1)); s_nBlockCycles += 9 * (2 - ((cpuRegs.CP0.n.Config >> 18) & 0x1));
} }
else { else
{
//If the COP0 DIE bit is disabled, cycles should be doubled. //If the COP0 DIE bit is disabled, cycles should be doubled.
s_nBlockCycles += opcode.cycles * (2 - ((cpuRegs.CP0.n.Config >> 18) & 0x1)); s_nBlockCycles += opcode.cycles * (2 - ((cpuRegs.CP0.n.Config >> 18) & 0x1));
try { try
{
opcode.recompile(); opcode.recompile();
} catch (Exception::FailedToAllocateRegister&) { }
catch (Exception::FailedToAllocateRegister&)
{
// Fall back to the interpreter // Fall back to the interpreter
recCall(opcode.interpret); recCall(opcode.interpret);
#if 0 #if 0
@ -1447,7 +1511,8 @@ void recompileNextInstruction(int delayslot)
} }
} }
if (!delayslot && (_getNumXMMwrite() > 2)) _flushXMMunused(); if (!delayslot && (_getNumXMMwrite() > 2))
_flushXMMunused();
//CHECK_XMMCHANGED(); //CHECK_XMMCHANGED();
_clearNeededX86regs(); _clearNeededX86regs();
@ -1457,7 +1522,8 @@ void recompileNextInstruction(int delayslot)
// _flushCachedRegs(); // _flushCachedRegs();
// g_cpuHasConstReg = 1; // g_cpuHasConstReg = 1;
if (delayslot) { if (delayslot)
{
pc += 4; pc += 4;
g_cpuFlushedPC = false; g_cpuFlushedPC = false;
g_cpuFlushedCode = false; g_cpuFlushedCode = false;
@ -1687,17 +1753,22 @@ static void memory_protect_recompiled_code(u32 startpc, u32 size)
} }
// Skip MPEG Game-Fix // Skip MPEG Game-Fix
bool skipMPEG_By_Pattern(u32 sPC) { bool skipMPEG_By_Pattern(u32 sPC)
{
if (!CHECK_SKIPMPEGHACK) return 0; if (!CHECK_SKIPMPEGHACK)
return 0;
// sceMpegIsEnd: lw reg, 0x40(a0); jr ra; lw v0, 0(reg) // sceMpegIsEnd: lw reg, 0x40(a0); jr ra; lw v0, 0(reg)
if ((s_nEndBlock == sPC + 12) && (memRead32(sPC + 4) == 0x03e00008)) { if ((s_nEndBlock == sPC + 12) && (memRead32(sPC + 4) == 0x03e00008))
{
u32 code = memRead32(sPC); u32 code = memRead32(sPC);
u32 p1 = 0x8c800040; u32 p1 = 0x8c800040;
u32 p2 = 0x8c020000 | (code & 0x1f0000) << 5; u32 p2 = 0x8c020000 | (code & 0x1f0000) << 5;
if ((code & 0xffe0ffff) != p1) return 0; if ((code & 0xffe0ffff) != p1)
if (memRead32(sPC+8) != p2) return 0; return 0;
if (memRead32(sPC + 8) != p2)
return 0;
xMOV(ptr32[&cpuRegs.GPR.n.v0.UL[0]], 1); xMOV(ptr32[&cpuRegs.GPR.n.v0.UL[0]], 1);
xMOV(ptr32[&cpuRegs.GPR.n.v0.UL[1]], 0); xMOV(ptr32[&cpuRegs.GPR.n.v0.UL[1]], 0);
xMOV(eax, ptr32[&cpuRegs.GPR.n.ra.UL[0]]); xMOV(eax, ptr32[&cpuRegs.GPR.n.ra.UL[0]]);
@ -1727,21 +1798,25 @@ static void __fastcall recRecompile( const u32 startpc )
u32 usecop2; u32 usecop2;
#ifdef PCSX2_DEBUG #ifdef PCSX2_DEBUG
if (dumplog & 4) iDumpRegisters(startpc, 0); if (dumplog & 4)
iDumpRegisters(startpc, 0);
#endif #endif
pxAssert(startpc); pxAssert(startpc);
// if recPtr reached the mem limit reset whole mem // if recPtr reached the mem limit reset whole mem
if (recPtr >= (recMem->GetPtrEnd() - _64kb)) { if (recPtr >= (recMem->GetPtrEnd() - _64kb))
{
eeRecNeedsReset = true; eeRecNeedsReset = true;
} }
else if ((recConstBufPtr - recConstBuf) >= RECCONSTBUF_SIZE - 64) { else if ((recConstBufPtr - recConstBuf) >= RECCONSTBUF_SIZE - 64)
{
Console.WriteLn("EE recompiler stack reset"); Console.WriteLn("EE recompiler stack reset");
eeRecNeedsReset = true; eeRecNeedsReset = true;
} }
if (eeRecNeedsReset) recResetRaw(); if (eeRecNeedsReset)
recResetRaw();
xSetPtr(recPtr); xSetPtr(recPtr);
recPtr = xGetAlignedCallTarget(); recPtr = xGetAlignedCallTarget();
@ -1784,8 +1859,7 @@ static void __fastcall recRecompile( const u32 startpc )
g_eeloadExec = EELOAD_START + 0x2B8; g_eeloadExec = EELOAD_START + 0x2B8;
else if (typeAexecjump >> 26 == 3) // JAL to 0x82170 else if (typeAexecjump >> 26 == 3) // JAL to 0x82170
g_eeloadExec = EELOAD_START + 0x170; g_eeloadExec = EELOAD_START + 0x170;
else // There might be other types of EELOAD, because these models' BIOSs have not been examined: 18000, 3500x, 3700x, else // There might be other types of EELOAD, because these models' BIOSs have not been examined: 18000, 3500x, 3700x, 5500x, and 7900x. However, all BIOS versions have been examined except for v1.01 and v1.10.
// 5500x, and 7900x. However, all BIOS versions have been examined except for v1.01 and v1.10.
Console.WriteLn("recRecompile: Could not enable launch arguments for fast boot mode; unidentified BIOS version! Please report this to the PCSX2 developers."); Console.WriteLn("recRecompile: Could not enable launch arguments for fast boot mode; unidentified BIOS version! Please report this to the PCSX2 developers.");
} }
@ -1802,7 +1876,8 @@ static void __fastcall recRecompile( const u32 startpc )
xFastCall((void*)eeloadHook2); xFastCall((void*)eeloadHook2);
// this is the only way patches get applied, doesn't depend on a hack // this is the only way patches get applied, doesn't depend on a hack
if (g_GameLoading && HWADDR(startpc) == ElfEntry) { if (g_GameLoading && HWADDR(startpc) == ElfEntry)
{
Console.WriteLn(L"Elf entry point @ 0x%08x about to get recompiled. Load patches first.", startpc); Console.WriteLn(L"Elf entry point @ 0x%08x about to get recompiled. Load patches first.", startpc);
xFastCall((void*)eeGameStarting); xFastCall((void*)eeGameStarting);
@ -1831,11 +1906,15 @@ static void __fastcall recRecompile( const u32 startpc )
xFastCall((void*)PreBlockCheck, pc); xFastCall((void*)PreBlockCheck, pc);
} }
if (EmuConfig.Gamefixes.GoemonTlbHack) { if (EmuConfig.Gamefixes.GoemonTlbHack)
if (pc == 0x33ad48 || pc == 0x35060c) { {
if (pc == 0x33ad48 || pc == 0x35060c)
{
// 0x33ad48 and 0x35060c are the return address of the function (0x356250) that populate the TLB cache // 0x33ad48 and 0x35060c are the return address of the function (0x356250) that populate the TLB cache
xFastCall((void*)GoemonPreloadTlb); xFastCall((void*)GoemonPreloadTlb);
} else if (pc == 0x3563b8) { }
else if (pc == 0x3563b8)
{
// Game will unmap some virtual addresses. If a constant address were hardcoded in the block, we would be in a bad situation. // Game will unmap some virtual addresses. If a constant address were hardcoded in the block, we would be in a bad situation.
eeRecNeedsReset = true; eeRecNeedsReset = true;
// 0x3563b8 is the start address of the function that invalidate entry in TLB cache // 0x3563b8 is the start address of the function that invalidate entry in TLB cache
@ -1858,7 +1937,8 @@ static void __fastcall recRecompile( const u32 startpc )
goto StartRecomp; goto StartRecomp;
} }
while(1) { while (1)
{
BASEBLOCK* pblock = PC_GETBLOCK(i); BASEBLOCK* pblock = PC_GETBLOCK(i);
// stop before breakpoints // stop before breakpoints
@ -1890,9 +1970,11 @@ static void __fastcall recRecompile( const u32 startpc )
//HUH ? PSM ? whut ? THIS IS VIRTUAL ACCESS GOD DAMMIT //HUH ? PSM ? whut ? THIS IS VIRTUAL ACCESS GOD DAMMIT
cpuRegs.code = *(int*)PSM(i); cpuRegs.code = *(int*)PSM(i);
switch(cpuRegs.code >> 26) { switch (cpuRegs.code >> 26)
{
case 0: // special case 0: // special
if( _Funct_ == 8 || _Funct_ == 9 ) { // JR, JALR if (_Funct_ == 8 || _Funct_ == 9) // JR, JALR
{
s_nEndBlock = i + 8; s_nEndBlock = i + 8;
goto StartRecomp; goto StartRecomp;
} }
@ -1900,11 +1982,14 @@ static void __fastcall recRecompile( const u32 startpc )
case 1: // regimm case 1: // regimm
if( _Rt_ < 4 || (_Rt_ >= 16 && _Rt_ < 20) ) { if (_Rt_ < 4 || (_Rt_ >= 16 && _Rt_ < 20))
{
// branches // branches
s_branchTo = _Imm_ * 4 + i + 4; s_branchTo = _Imm_ * 4 + i + 4;
if( s_branchTo > startpc && s_branchTo < i ) s_nEndBlock = s_branchTo; if (s_branchTo > startpc && s_branchTo < i)
else s_nEndBlock = i+8; s_nEndBlock = s_branchTo;
else
s_nEndBlock = i + 8;
goto StartRecomp; goto StartRecomp;
} }
@ -1920,14 +2005,18 @@ static void __fastcall recRecompile( const u32 startpc )
case 4: case 5: case 6: case 7: case 4: case 5: case 6: case 7:
case 20: case 21: case 22: case 23: case 20: case 21: case 22: case 23:
s_branchTo = _Imm_ * 4 + i + 4; s_branchTo = _Imm_ * 4 + i + 4;
if( s_branchTo > startpc && s_branchTo < i ) s_nEndBlock = s_branchTo; if (s_branchTo > startpc && s_branchTo < i)
else s_nEndBlock = i+8; s_nEndBlock = s_branchTo;
else
s_nEndBlock = i + 8;
goto StartRecomp; goto StartRecomp;
case 16: // cp0 case 16: // cp0
if( _Rs_ == 16 ) { if (_Rs_ == 16)
if( _Funct_ == 24 ) { // eret {
if (_Funct_ == 24) // eret
{
s_nEndBlock = i + 4; s_nEndBlock = i + 4;
goto StartRecomp; goto StartRecomp;
} }
@ -1937,12 +2026,15 @@ static void __fastcall recRecompile( const u32 startpc )
case 17: // cp1 case 17: // cp1
case 18: // cp2 case 18: // cp2
if( _Rs_ == 8 ) { if (_Rs_ == 8)
{
// BC1F, BC1T, BC1FL, BC1TL // BC1F, BC1T, BC1FL, BC1TL
// BC2F, BC2T, BC2FL, BC2TL // BC2F, BC2T, BC2FL, BC2TL
s_branchTo = _Imm_ * 4 + i + 4; s_branchTo = _Imm_ * 4 + i + 4;
if( s_branchTo > startpc && s_branchTo < i ) s_nEndBlock = s_branchTo; if (s_branchTo > startpc && s_branchTo < i)
else s_nEndBlock = i+8; s_nEndBlock = s_branchTo;
else
s_nEndBlock = i + 8;
goto StartRecomp; goto StartRecomp;
} }
@ -1964,12 +2056,14 @@ StartRecomp:
// without a significant loss in cycle accuracy is with a division, but games would probably // without a significant loss in cycle accuracy is with a division, but games would probably
// be happy with time wasting loops completing in 0 cycles and timeouts waiting forever. // be happy with time wasting loops completing in 0 cycles and timeouts waiting forever.
s_nBlockFF = false; s_nBlockFF = false;
if (s_branchTo == startpc) { if (s_branchTo == startpc)
{
s_nBlockFF = true; s_nBlockFF = true;
u32 reads = 0, loads = 1; u32 reads = 0, loads = 1;
for (i = startpc; i < s_nEndBlock; i += 4) { for (i = startpc; i < s_nEndBlock; i += 4)
{
if (i == s_nEndBlock - 8) if (i == s_nEndBlock - 8)
continue; continue;
cpuRegs.code = *(u32*)PSM(i); cpuRegs.code = *(u32*)PSM(i);
@ -1982,13 +2076,15 @@ StartRecomp:
// imm arithmetic // imm arithmetic
else if ((_Opcode_ & 070) == 010 || (_Opcode_ & 076) == 030) else if ((_Opcode_ & 070) == 010 || (_Opcode_ & 076) == 030)
{ {
if (loads & 1 << _Rs_) { if (loads & 1 << _Rs_)
{
loads |= 1 << _Rt_; loads |= 1 << _Rt_;
continue; continue;
} }
else else
reads |= 1 << _Rs_; reads |= 1 << _Rs_;
if (reads & 1 << _Rt_) { if (reads & 1 << _Rt_)
{
s_nBlockFF = false; s_nBlockFF = false;
break; break;
} }
@ -1996,13 +2092,15 @@ StartRecomp:
// common register arithmetic instructions // common register arithmetic instructions
else if (_Opcode_ == 0 && (_Funct_ & 060) == 040 && (_Funct_ & 076) != 050) else if (_Opcode_ == 0 && (_Funct_ & 060) == 040 && (_Funct_ & 076) != 050)
{ {
if (loads & 1 << _Rs_ && loads & 1 << _Rt_) { if (loads & 1 << _Rs_ && loads & 1 << _Rt_)
{
loads |= 1 << _Rd_; loads |= 1 << _Rd_;
continue; continue;
} }
else else
reads |= 1 << _Rs_ | 1 << _Rt_; reads |= 1 << _Rs_ | 1 << _Rt_;
if (reads & 1 << _Rd_) { if (reads & 1 << _Rd_)
{
s_nBlockFF = false; s_nBlockFF = false;
break; break;
} }
@ -2010,13 +2108,15 @@ StartRecomp:
// loads // loads
else if ((_Opcode_ & 070) == 040 || (_Opcode_ & 076) == 032 || _Opcode_ == 067) else if ((_Opcode_ & 070) == 040 || (_Opcode_ & 076) == 032 || _Opcode_ == 067)
{ {
if (loads & 1 << _Rs_) { if (loads & 1 << _Rs_)
{
loads |= 1 << _Rt_; loads |= 1 << _Rt_;
continue; continue;
} }
else else
reads |= 1 << _Rs_; reads |= 1 << _Rs_;
if (reads & 1 << _Rt_) { if (reads & 1 << _Rt_)
{
s_nBlockFF = false; s_nBlockFF = false;
break; break;
} }
@ -2038,7 +2138,8 @@ StartRecomp:
{ {
EEINST* pcur; EEINST* pcur;
if( s_nInstCacheSize < (s_nEndBlock-startpc)/4+1 ) { if (s_nInstCacheSize < (s_nEndBlock - startpc) / 4 + 1)
{
free(s_pInstCache); free(s_pInstCache);
s_nInstCacheSize = (s_nEndBlock - startpc) / 4 + 10; s_nInstCacheSize = (s_nEndBlock - startpc) / 4 + 10;
s_pInstCache = (EEINST*)malloc(sizeof(EEINST) * s_nInstCacheSize); s_pInstCache = (EEINST*)malloc(sizeof(EEINST) * s_nInstCacheSize);
@ -2049,7 +2150,8 @@ StartRecomp:
_recClearInst(pcur); _recClearInst(pcur);
pcur->info = 0; pcur->info = 0;
for(i = s_nEndBlock; i > startpc; i -= 4 ) { for (i = s_nEndBlock; i > startpc; i -= 4)
{
cpuRegs.code = *(int*)PSM(i - 4); cpuRegs.code = *(int*)PSM(i - 4);
pcur[-1] = pcur[0]; pcur[-1] = pcur[0];
pcur--; pcur--;
@ -2061,14 +2163,17 @@ StartRecomp:
usecop2 = 0; usecop2 = 0;
g_pCurInstInfo = s_pInstCache; g_pCurInstInfo = s_pInstCache;
for(i = startpc; i < s_nEndBlock; i += 4) { for (i = startpc; i < s_nEndBlock; i += 4)
{
g_pCurInstInfo++; g_pCurInstInfo++;
cpuRegs.code = *(u32*)PSM(i); cpuRegs.code = *(u32*)PSM(i);
// cop2 // // cop2 //
if( g_pCurInstInfo->info & EEINSTINFO_COP2 ) { if (g_pCurInstInfo->info & EEINSTINFO_COP2)
{
if( !usecop2 ) { if (!usecop2)
{
// init // init
usecop2 = 1; usecop2 = 1;
} }
@ -2081,14 +2186,17 @@ StartRecomp:
// This *is* important because g_pCurInstInfo is checked a bit later on and // This *is* important because g_pCurInstInfo is checked a bit later on and
// if it's not equal to s_pInstCache it handles recompilation differently. // if it's not equal to s_pInstCache it handles recompilation differently.
// ... but the empty if() conditional inside the for loop is still amusing. >_< // ... but the empty if() conditional inside the for loop is still amusing. >_<
if( usecop2 ) { if (usecop2)
{
// add necessary mac writebacks // add necessary mac writebacks
g_pCurInstInfo = s_pInstCache; g_pCurInstInfo = s_pInstCache;
for(i = startpc; i < s_nEndBlock-4; i += 4) { for (i = startpc; i < s_nEndBlock - 4; i += 4)
{
g_pCurInstInfo++; g_pCurInstInfo++;
if( g_pCurInstInfo->info & EEINSTINFO_COP2 ) { if (g_pCurInstInfo->info & EEINSTINFO_COP2)
{
} }
} }
} }
@ -2104,7 +2212,8 @@ StartRecomp:
} }
} }
if (dumplog & 1) iDumpBlock(startpc, recPtr); if (dumplog & 1)
iDumpBlock(startpc, recPtr);
#endif #endif
// Detect and handle self-modified code // Detect and handle self-modified code
@ -2113,27 +2222,32 @@ StartRecomp:
// Skip Recompilation if sceMpegIsEnd Pattern detected // Skip Recompilation if sceMpegIsEnd Pattern detected
bool doRecompilation = !skipMPEG_By_Pattern(startpc); bool doRecompilation = !skipMPEG_By_Pattern(startpc);
if (doRecompilation) { if (doRecompilation)
{
// Finally: Generate x86 recompiled code! // Finally: Generate x86 recompiled code!
g_pCurInstInfo = s_pInstCache; g_pCurInstInfo = s_pInstCache;
while (!g_branch && pc < s_nEndBlock) { while (!g_branch && pc < s_nEndBlock)
{
recompileNextInstruction(0); // For the love of recursion, batman! recompileNextInstruction(0); // For the love of recursion, batman!
} }
} }
#ifdef PCSX2_DEBUG #ifdef PCSX2_DEBUG
if (dumplog & 1) iDumpBlock(startpc, recPtr); if (dumplog & 1)
iDumpBlock(startpc, recPtr);
#endif #endif
pxAssert((pc - startpc) >> 2 <= 0xffff); pxAssert((pc - startpc) >> 2 <= 0xffff);
s_pCurBlockEx->size = (pc - startpc) >> 2; s_pCurBlockEx->size = (pc - startpc) >> 2;
if (HWADDR(pc) <= Ps2MemSize::MainRam) { if (HWADDR(pc) <= Ps2MemSize::MainRam)
{
BASEBLOCKEX* oldBlock; BASEBLOCKEX* oldBlock;
int i; int i;
i = recBlocks.LastIndex(HWADDR(pc) - 4); i = recBlocks.LastIndex(HWADDR(pc) - 4);
while (oldBlock = recBlocks[i--]) { while (oldBlock = recBlocks[i--])
{
if (oldBlock == s_pCurBlockEx) if (oldBlock == s_pCurBlockEx)
continue; continue;
if (oldBlock->startpc >= HWADDR(pc)) if (oldBlock->startpc >= HWADDR(pc))
@ -2156,7 +2270,8 @@ StartRecomp:
s_pCurBlock->SetFnptr((uptr)recPtr); s_pCurBlock->SetFnptr((uptr)recPtr);
for(i = 1; i < (u32)s_pCurBlockEx->size; i++) { for (i = 1; i < (u32)s_pCurBlockEx->size; i++)
{
if ((uptr)JITCompile == s_pCurBlock[i].GetFnptr()) if ((uptr)JITCompile == s_pCurBlock[i].GetFnptr())
s_pCurBlock[i].SetFnptr((uptr)JITCompileInBlock); s_pCurBlock[i].SetFnptr((uptr)JITCompileInBlock);
} }
@ -2180,7 +2295,8 @@ StartRecomp:
if (g_branch) if (g_branch)
pxAssert(!willbranch3); pxAssert(!willbranch3);
if( willbranch3 || !g_branch) { if (willbranch3 || !g_branch)
{
iFlushCall(FLUSH_EVERYTHING); iFlushCall(FLUSH_EVERYTHING);
@ -2232,7 +2348,8 @@ static void recThrowException( const BaseR5900Exception& ex )
#if PCSX2_SEH #if PCSX2_SEH
ex.Rethrow(); ex.Rethrow();
#else #else
if (!eeCpuExecuting) ex.Rethrow(); if (!eeCpuExecuting)
ex.Rethrow();
m_cpuException = std::unique_ptr<BaseR5900Exception>(ex.Clone()); m_cpuException = std::unique_ptr<BaseR5900Exception>(ex.Clone());
recExitExecution(); recExitExecution();
#endif #endif
@ -2243,7 +2360,8 @@ static void recThrowException( const BaseException& ex )
#if PCSX2_SEH #if PCSX2_SEH
ex.Rethrow(); ex.Rethrow();
#else #else
if (!eeCpuExecuting) ex.Rethrow(); if (!eeCpuExecuting)
ex.Rethrow();
m_Exception = ScopedExcept(ex.Clone()); m_Exception = ScopedExcept(ex.Clone());
recExitExecution(); recExitExecution();
#endif #endif

View File

@ -24,8 +24,7 @@ using namespace x86Emitter;
namespace R5900 { namespace R5900 {
namespace Dynarec { namespace Dynarec {
namespace OpcodeImpl namespace OpcodeImpl {
{
/********************************************************* /*********************************************************
* Register arithmetic * * Register arithmetic *
@ -118,15 +117,19 @@ void recDADD_constv(int info, int creg, u32 vreg)
GPR_reg64 cval = g_cpuConstRegs[creg]; GPR_reg64 cval = g_cpuConstRegs[creg];
if (_Rd_ == vreg) { if (_Rd_ == vreg)
if (!cval.SD[0]) {
return; // no-op if (!cval.SD[0]) // no-op
return;
xADD(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], cval.SL[0]); xADD(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], cval.SL[0]);
xADC(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], cval.SL[1]); xADC(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], cval.SL[1]);
} else { }
else
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[vreg].SL[0]]); xMOV(eax, ptr32[&cpuRegs.GPR.r[vreg].SL[0]]);
xMOV(edx, ptr32[&cpuRegs.GPR.r[vreg].SL[1]]); xMOV(edx, ptr32[&cpuRegs.GPR.r[vreg].SL[1]]);
if (cval.SD[0]) { if (cval.SD[0])
{
xADD(eax, cval.SL[0]); xADD(eax, cval.SL[0]);
xADC(edx, cval.SL[1]); xADC(edx, cval.SL[1]);
} }
@ -155,7 +158,8 @@ void recDADD_(int info)
xMOV(eax, ptr32[&cpuRegs.GPR.r[rt].SL[0]]); xMOV(eax, ptr32[&cpuRegs.GPR.r[rt].SL[0]]);
if (_Rd_ == _Rs_ && _Rs_ == _Rt_) { if (_Rd_ == _Rs_ && _Rs_ == _Rt_)
{
xSHLD(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], eax, 1); xSHLD(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], eax, 1);
xSHL(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], 1); xSHL(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], 1);
return; return;
@ -163,14 +167,19 @@ void recDADD_(int info)
xMOV(edx, ptr32[&cpuRegs.GPR.r[rt].SL[1]]); xMOV(edx, ptr32[&cpuRegs.GPR.r[rt].SL[1]]);
if (_Rd_ == rs) { if (_Rd_ == rs)
{
xADD(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax); xADD(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax);
xADC(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx); xADC(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx);
return; return;
} else if (rs == rt) { }
else if (rs == rt)
{
xADD(eax, eax); xADD(eax, eax);
xADC(edx, edx); xADC(edx, edx);
} else { }
else
{
xADD(eax, ptr32[&cpuRegs.GPR.r[rs].SL[0]]); xADD(eax, ptr32[&cpuRegs.GPR.r[rs].SL[0]]);
xADC(edx, ptr32[&cpuRegs.GPR.r[rs].SL[1]]); xADC(edx, ptr32[&cpuRegs.GPR.r[rs].SL[1]]);
} }
@ -221,7 +230,8 @@ void recSUB_(int info)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
if (_Rs_ == _Rt_) { if (_Rs_ == _Rt_)
{
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], 0); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], 0);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], 0); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], 0);
return; return;
@ -252,7 +262,8 @@ void recDSUB_consts(int info)
GPR_reg64 sval = g_cpuConstRegs[_Rs_]; GPR_reg64 sval = g_cpuConstRegs[_Rs_];
if (!sval.SD[0] && _Rd_ == _Rt_) { if (!sval.SD[0] && _Rd_ == _Rt_)
{
/* To understand this 64-bit negate, consider that a negate in 2's complement /* To understand this 64-bit negate, consider that a negate in 2's complement
* is a NOT then an ADD 1. The upper word should only have the NOT stage unless * is a NOT then an ADD 1. The upper word should only have the NOT stage unless
* the ADD overflows. The ADD only overflows if the lower word is 0. * the ADD overflows. The ADD only overflows if the lower word is 0.
@ -263,7 +274,9 @@ void recDSUB_consts(int info)
xADC(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], 0); xADC(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], 0);
xNEG(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]]); xNEG(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]]);
return; return;
} else { }
else
{
xMOV(eax, sval.SL[0]); xMOV(eax, sval.SL[0]);
xMOV(edx, sval.SL[1]); xMOV(edx, sval.SL[1]);
} }
@ -280,13 +293,17 @@ void recDSUB_constt(int info)
GPR_reg64 tval = g_cpuConstRegs[_Rt_]; GPR_reg64 tval = g_cpuConstRegs[_Rt_];
if (_Rd_ == _Rs_) { if (_Rd_ == _Rs_)
{
xSUB(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], tval.SL[0]); xSUB(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], tval.SL[0]);
xSBB(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], tval.SL[1]); xSBB(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], tval.SL[1]);
} else { }
else
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rs_].SL[0]]); xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rs_].SL[0]]);
xMOV(edx, ptr32[&cpuRegs.GPR.r[_Rs_].SL[1]]); xMOV(edx, ptr32[&cpuRegs.GPR.r[_Rs_].SL[1]]);
if (tval.SD[0]) { if (tval.SD[0])
{
xSUB(eax, tval.SL[0]); xSUB(eax, tval.SL[0]);
xSBB(edx, tval.SL[1]); xSBB(edx, tval.SL[1]);
} }
@ -299,15 +316,20 @@ void recDSUB_(int info)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
if (_Rs_ == _Rt_) { if (_Rs_ == _Rt_)
{
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], 0); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], 0);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], 0); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], 0);
} else if (_Rd_ == _Rs_) { }
else if (_Rd_ == _Rs_)
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].SL[0]]); xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].SL[0]]);
xMOV(edx, ptr32[&cpuRegs.GPR.r[_Rt_].SL[1]]); xMOV(edx, ptr32[&cpuRegs.GPR.r[_Rt_].SL[1]]);
xSUB(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax); xSUB(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax);
xSBB(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx); xSBB(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx);
} else { }
else
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rs_].SL[0]]); xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rs_].SL[0]]);
xMOV(edx, ptr32[&cpuRegs.GPR.r[_Rs_].SL[1]]); xMOV(edx, ptr32[&cpuRegs.GPR.r[_Rs_].SL[1]]);
xSUB(eax, ptr32[&cpuRegs.GPR.r[_Rt_].SL[0]]); xSUB(eax, ptr32[&cpuRegs.GPR.r[_Rt_].SL[0]]);
@ -337,13 +359,19 @@ void recAND_constv(int info, int creg, u32 vreg)
GPR_reg64 cval = g_cpuConstRegs[creg]; GPR_reg64 cval = g_cpuConstRegs[creg];
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++)
if (!cval.UL[i]) { {
if (!cval.UL[i])
{
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], 0); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], 0);
} else if (_Rd_ == vreg) { }
else if (_Rd_ == vreg)
{
if (cval.SL[i] != -1) if (cval.SL[i] != -1)
xAND(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], cval.UL[i]); xAND(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], cval.UL[i]);
} else { }
else
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[vreg].UL[i]]); xMOV(eax, ptr32[&cpuRegs.GPR.r[vreg].UL[i]]);
if (cval.SL[i] != -1) if (cval.SL[i] != -1)
xAND(eax, cval.UL[i]); xAND(eax, cval.UL[i]);
@ -370,13 +398,17 @@ void recAND_(int info)
if (_Rd_ == _Rt_) if (_Rd_ == _Rt_)
rs = _Rt_, rt = _Rs_; rs = _Rt_, rt = _Rs_;
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++)
if (_Rd_ == rs) { {
if (_Rd_ == rs)
{
if (rs == rt) if (rs == rt)
continue; continue;
xMOV(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]); xMOV(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]);
xAND(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax); xAND(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax);
} else { }
else
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[rs].UL[i]]); xMOV(eax, ptr32[&cpuRegs.GPR.r[rs].UL[i]]);
if (rs != rt) if (rs != rt)
xAND(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]); xAND(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]);
@ -399,13 +431,19 @@ void recOR_constv(int info, int creg, u32 vreg)
GPR_reg64 cval = g_cpuConstRegs[creg]; GPR_reg64 cval = g_cpuConstRegs[creg];
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++)
if (cval.SL[i] == -1) { {
if (cval.SL[i] == -1)
{
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], -1); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], -1);
} else if (_Rd_ == vreg) { }
else if (_Rd_ == vreg)
{
if (cval.UL[i]) if (cval.UL[i])
xOR(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], cval.UL[i]); xOR(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], cval.UL[i]);
} else { }
else
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[vreg].UL[i]]); xMOV(eax, ptr32[&cpuRegs.GPR.r[vreg].UL[i]]);
if (cval.UL[i]) if (cval.UL[i])
xOR(eax, cval.UL[i]); xOR(eax, cval.UL[i]);
@ -432,13 +470,17 @@ void recOR_(int info)
if (_Rd_ == _Rt_) if (_Rd_ == _Rt_)
rs = _Rt_, rt = _Rs_; rs = _Rt_, rt = _Rs_;
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++)
if (_Rd_ == rs) { {
if (_Rd_ == rs)
{
if (rs == rt) if (rs == rt)
continue; continue;
xMOV(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]); xMOV(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]);
xOR(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax); xOR(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax);
} else { }
else
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[rs].UL[i]]); xMOV(eax, ptr32[&cpuRegs.GPR.r[rs].UL[i]]);
if (rs != rt) if (rs != rt)
xOR(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]); xOR(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]);
@ -461,11 +503,15 @@ void recXOR_constv(int info, int creg, u32 vreg)
GPR_reg64 cval = g_cpuConstRegs[creg]; GPR_reg64 cval = g_cpuConstRegs[creg];
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++)
if (_Rd_ == vreg) { {
if (_Rd_ == vreg)
{
if (cval.UL[i]) if (cval.UL[i])
xXOR(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], cval.UL[i]); xXOR(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], cval.UL[i]);
} else { }
else
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[vreg].UL[i]]); xMOV(eax, ptr32[&cpuRegs.GPR.r[vreg].UL[i]]);
if (cval.UL[i]) if (cval.UL[i])
xXOR(eax, cval.UL[i]); xXOR(eax, cval.UL[i]);
@ -492,13 +538,19 @@ void recXOR_(int info)
if (_Rd_ == _Rt_) if (_Rd_ == _Rt_)
rs = _Rt_, rt = _Rs_; rs = _Rt_, rt = _Rs_;
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++)
if (rs == rt) { {
if (rs == rt)
{
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], 0); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], 0);
} else if (_Rd_ == rs) { }
else if (_Rd_ == rs)
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]); xMOV(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]);
xXOR(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax); xXOR(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax);
} else { }
else
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[rs].UL[i]]); xMOV(eax, ptr32[&cpuRegs.GPR.r[rs].UL[i]]);
xXOR(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]); xXOR(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax);
@ -520,12 +572,16 @@ void recNOR_constv(int info, int creg, u32 vreg)
GPR_reg64 cval = g_cpuConstRegs[creg]; GPR_reg64 cval = g_cpuConstRegs[creg];
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++)
if (_Rd_ == vreg) { {
if (_Rd_ == vreg)
{
if (cval.UL[i]) if (cval.UL[i])
xOR(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], cval.UL[i]); xOR(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], cval.UL[i]);
xNOT(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]]); xNOT(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]]);
} else { }
else
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[vreg].UL[i]]); xMOV(eax, ptr32[&cpuRegs.GPR.r[vreg].UL[i]]);
if (cval.UL[i]) if (cval.UL[i])
xOR(eax, cval.UL[i]); xOR(eax, cval.UL[i]);
@ -553,16 +609,21 @@ void recNOR_(int info)
if (_Rd_ == _Rt_) if (_Rd_ == _Rt_)
rs = _Rt_, rt = _Rs_; rs = _Rt_, rt = _Rs_;
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++)
if (_Rd_ == rs) { {
if (rs == rt) { if (_Rd_ == rs)
{
if (rs == rt)
{
xNOT(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]]); xNOT(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]]);
continue; continue;
} }
xMOV(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]); xMOV(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]);
xOR(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax); xOR(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax);
xNOT(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]]); xNOT(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]]);
} else { }
else
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[rs].UL[i]]); xMOV(eax, ptr32[&cpuRegs.GPR.r[rs].UL[i]]);
if (rs != rt) if (rs != rt)
xOR(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]); xOR(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]);
@ -672,4 +733,6 @@ EERECOMPILE_CODE0(SLTU, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED);
#endif #endif
} } } } // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900

View File

@ -24,8 +24,7 @@ using namespace x86Emitter;
namespace R5900 { namespace R5900 {
namespace Dynarec { namespace Dynarec {
namespace OpcodeImpl namespace OpcodeImpl {
{
/********************************************************* /*********************************************************
* Arithmetic with immediate operand * * Arithmetic with immediate operand *
@ -59,15 +58,18 @@ void recADDI_(int info)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
if ( _Rt_ == _Rs_ ) { if (_Rt_ == _Rs_)
{
// must perform the ADD unconditionally, to maintain flags status: // must perform the ADD unconditionally, to maintain flags status:
xADD(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], _Imm_); xADD(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], _Imm_);
_signExtendSFtoM((uptr)&cpuRegs.GPR.r[_Rt_].UL[1]); _signExtendSFtoM((uptr)&cpuRegs.GPR.r[_Rt_].UL[1]);
} }
else { else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
if ( _Imm_ != 0 ) xADD(eax, _Imm_ ); if (_Imm_ != 0)
xADD(eax, _Imm_);
xCDQ(); xCDQ();
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
@ -93,11 +95,13 @@ void recDADDI_(int info)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
if( _Rt_ == _Rs_ ) { if (_Rt_ == _Rs_)
{
xADD(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], _Imm_); xADD(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], _Imm_);
xADC(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], _Imm_ < 0 ? 0xffffffff : 0); xADC(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], _Imm_ < 0 ? 0xffffffff : 0);
} }
else { else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]);
@ -194,20 +198,24 @@ void recLogicalOpI(int info, int op)
{ {
if (_ImmU_ != 0) if (_ImmU_ != 0)
{ {
if( _Rt_ == _Rs_ ) { if (_Rt_ == _Rs_)
switch(op) { {
switch (op)
{
case 0: xAND(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], _ImmU_); break; case 0: xAND(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], _ImmU_); break;
case 1: xOR(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], _ImmU_); break; case 1: xOR(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], _ImmU_); break;
case 2: xXOR(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], _ImmU_); break; case 2: xXOR(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], _ImmU_); break;
default: pxAssert(0); default: pxAssert(0);
} }
} }
else { else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
if (op != 0) if (op != 0)
xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]);
switch(op) { switch (op)
{
case 0: xAND(eax, _ImmU_); break; case 0: xAND(eax, _ImmU_); break;
case 1: xOR(eax, _ImmU_); break; case 1: xOR(eax, _ImmU_); break;
case 2: xXOR(eax, _ImmU_); break; case 2: xXOR(eax, _ImmU_); break;
@ -219,18 +227,22 @@ void recLogicalOpI(int info, int op)
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
} }
if( op == 0 ) { if (op == 0)
{
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0); xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0);
} }
} }
else else
{ {
if( op == 0 ) { if (op == 0)
{
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], 0); xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], 0);
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0); xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0);
} }
else { else
if( _Rt_ != _Rs_ ) { {
if (_Rt_ != _Rs_)
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]);
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
@ -275,4 +287,6 @@ EERECOMPILE_CODEX(eeRecompileCode1, XORI);
#endif #endif
} } } } // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900

View File

@ -26,8 +26,7 @@ using namespace x86Emitter;
namespace R5900 { namespace R5900 {
namespace Dynarec { namespace Dynarec {
namespace OpcodeImpl namespace OpcodeImpl {
{
/********************************************************* /*********************************************************
* Register branch logic * * Register branch logic *
@ -58,16 +57,20 @@ REC_SYS_DEL(BGEZALL, 31);
void recSetBranchEQ(int info, int bne, int process) void recSetBranchEQ(int info, int bne, int process)
{ {
if( info & PROCESS_EE_XMM ) { if (info & PROCESS_EE_XMM)
{
int t0reg; int t0reg;
if( process & PROCESS_CONSTS ) { if (process & PROCESS_CONSTS)
if( (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_) ) { {
if ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_))
{
_deleteGPRtoXMMreg(_Rt_, 1); _deleteGPRtoXMMreg(_Rt_, 1);
xmmregs[EEREC_T].inuse = 0; xmmregs[EEREC_T].inuse = 0;
t0reg = EEREC_T; t0reg = EEREC_T;
} }
else { else
{
t0reg = _allocTempXMMreg(XMMT_INT, -1); t0reg = _allocTempXMMreg(XMMT_INT, -1);
xMOVQZX(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVQZX(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
} }
@ -76,15 +79,19 @@ void recSetBranchEQ(int info, int bne, int process)
xPCMP.EQD(xRegisterSSE(t0reg), ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xPCMP.EQD(xRegisterSSE(t0reg), ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
if( t0reg != EEREC_T ) _freeXMMreg(t0reg); if (t0reg != EEREC_T)
_freeXMMreg(t0reg);
} }
else if( process & PROCESS_CONSTT ) { else if (process & PROCESS_CONSTT)
if( (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_) ) { {
if ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_))
{
_deleteGPRtoXMMreg(_Rs_, 1); _deleteGPRtoXMMreg(_Rs_, 1);
xmmregs[EEREC_S].inuse = 0; xmmregs[EEREC_S].inuse = 0;
t0reg = EEREC_S; t0reg = EEREC_S;
} }
else { else
{
t0reg = _allocTempXMMreg(XMMT_INT, -1); t0reg = _allocTempXMMreg(XMMT_INT, -1);
xMOVQZX(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVQZX(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
} }
@ -92,29 +99,35 @@ void recSetBranchEQ(int info, int bne, int process)
_flushConstReg(_Rt_); _flushConstReg(_Rt_);
xPCMP.EQD(xRegisterSSE(t0reg), ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); xPCMP.EQD(xRegisterSSE(t0reg), ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
if( t0reg != EEREC_S ) _freeXMMreg(t0reg); if (t0reg != EEREC_S)
_freeXMMreg(t0reg);
} }
else { else
{
if( (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_) ) { if ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_))
{
_deleteGPRtoXMMreg(_Rs_, 1); _deleteGPRtoXMMreg(_Rs_, 1);
xmmregs[EEREC_S].inuse = 0; xmmregs[EEREC_S].inuse = 0;
t0reg = EEREC_S; t0reg = EEREC_S;
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
} }
else if( (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_) ) { else if ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_))
{
_deleteGPRtoXMMreg(_Rt_, 1); _deleteGPRtoXMMreg(_Rt_, 1);
xmmregs[EEREC_T].inuse = 0; xmmregs[EEREC_T].inuse = 0;
t0reg = EEREC_T; t0reg = EEREC_T;
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
} }
else { else
{
t0reg = _allocTempXMMreg(XMMT_INT, -1); t0reg = _allocTempXMMreg(XMMT_INT, -1);
xMOVQZX(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVQZX(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
} }
if( t0reg != EEREC_S && t0reg != EEREC_T ) _freeXMMreg(t0reg); if (t0reg != EEREC_S && t0reg != EEREC_T)
_freeXMMreg(t0reg);
} }
xMOVMSKPS(eax, xRegisterSSE(t0reg)); xMOVMSKPS(eax, xRegisterSSE(t0reg));
@ -124,29 +137,36 @@ void recSetBranchEQ(int info, int bne, int process)
xAND(al, 3); xAND(al, 3);
xCMP(al, 0x3); xCMP(al, 0x3);
if( bne ) j32Ptr[ 1 ] = JE32( 0 ); if (bne)
else j32Ptr[ 0 ] = j32Ptr[ 1 ] = JNE32( 0 ); j32Ptr[1] = JE32(0);
else
j32Ptr[0] = j32Ptr[1] = JNE32(0);
} }
else { else
{
_eeFlushAllUnused(); _eeFlushAllUnused();
if( bne ) { if (bne)
if( process & PROCESS_CONSTS ) { {
if (process & PROCESS_CONSTS)
{
xCMP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], g_cpuConstRegs[_Rs_].UL[0]); xCMP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], g_cpuConstRegs[_Rs_].UL[0]);
j8Ptr[0] = JNE8(0); j8Ptr[0] = JNE8(0);
xCMP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], g_cpuConstRegs[_Rs_].UL[1]); xCMP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], g_cpuConstRegs[_Rs_].UL[1]);
j32Ptr[1] = JE32(0); j32Ptr[1] = JE32(0);
} }
else if( process & PROCESS_CONSTT ) { else if (process & PROCESS_CONSTT)
{
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], g_cpuConstRegs[_Rt_].UL[0]); xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], g_cpuConstRegs[_Rt_].UL[0]);
j8Ptr[0] = JNE8(0); j8Ptr[0] = JNE8(0);
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], g_cpuConstRegs[_Rt_].UL[1]); xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], g_cpuConstRegs[_Rt_].UL[1]);
j32Ptr[1] = JE32(0); j32Ptr[1] = JE32(0);
} }
else { else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xCMP(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); xCMP(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
j8Ptr[0] = JNE8(0); j8Ptr[0] = JNE8(0);
@ -158,23 +178,27 @@ void recSetBranchEQ(int info, int bne, int process)
x86SetJ8(j8Ptr[0]); x86SetJ8(j8Ptr[0]);
} }
else { else
{
// beq // beq
if( process & PROCESS_CONSTS ) { if (process & PROCESS_CONSTS)
{
xCMP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], g_cpuConstRegs[_Rs_].UL[0]); xCMP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], g_cpuConstRegs[_Rs_].UL[0]);
j32Ptr[0] = JNE32(0); j32Ptr[0] = JNE32(0);
xCMP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], g_cpuConstRegs[_Rs_].UL[1]); xCMP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], g_cpuConstRegs[_Rs_].UL[1]);
j32Ptr[1] = JNE32(0); j32Ptr[1] = JNE32(0);
} }
else if( process & PROCESS_CONSTT ) { else if (process & PROCESS_CONSTT)
{
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], g_cpuConstRegs[_Rt_].UL[0]); xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], g_cpuConstRegs[_Rt_].UL[0]);
j32Ptr[0] = JNE32(0); j32Ptr[0] = JNE32(0);
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], g_cpuConstRegs[_Rt_].UL[1]); xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], g_cpuConstRegs[_Rt_].UL[1]);
j32Ptr[1] = JNE32(0); j32Ptr[1] = JNE32(0);
} }
else { else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xCMP(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); xCMP(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
j32Ptr[0] = JNE32(0); j32Ptr[0] = JNE32(0);
@ -193,22 +217,27 @@ void recSetBranchL(int ltz)
{ {
int regs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ); int regs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ);
if( regs >= 0 ) { if (regs >= 0)
{
xMOVMSKPS(eax, xRegisterSSE(regs)); xMOVMSKPS(eax, xRegisterSSE(regs));
_eeFlushAllUnused(); _eeFlushAllUnused();
xTEST(al, 2); xTEST(al, 2);
if( ltz ) j32Ptr[ 0 ] = JZ32( 0 ); if (ltz)
else j32Ptr[ 0 ] = JNZ32( 0 ); j32Ptr[0] = JZ32(0);
else
j32Ptr[0] = JNZ32(0);
return; return;
} }
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], 0); xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], 0);
if( ltz ) j32Ptr[ 0 ] = JGE32( 0 ); if (ltz)
else j32Ptr[ 0 ] = JL32( 0 ); j32Ptr[0] = JGE32(0);
else
j32Ptr[0] = JL32(0);
_clearNeededXMMregs(); _clearNeededXMMregs();
} }
@ -314,12 +343,14 @@ EERECOMPILE_CODE0(BNE, XMMINFO_READS|XMMINFO_READT);
//// BEQL //// BEQL
void recBEQL_const() void recBEQL_const()
{ {
if( g_cpuConstRegs[_Rs_].SD[0] == g_cpuConstRegs[_Rt_].SD[0] ) { if (g_cpuConstRegs[_Rs_].SD[0] == g_cpuConstRegs[_Rt_].SD[0])
{
u32 branchTo = ((s32)_Imm_ * 4) + pc; u32 branchTo = ((s32)_Imm_ * 4) + pc;
recompileNextInstruction(1); recompileNextInstruction(1);
SetBranchImm(branchTo); SetBranchImm(branchTo);
} }
else { else
{
SetBranchImm(pc + 4); SetBranchImm(pc + 4);
} }
} }
@ -349,12 +380,14 @@ EERECOMPILE_CODE0(BEQL, XMMINFO_READS|XMMINFO_READT);
//// BNEL //// BNEL
void recBNEL_const() void recBNEL_const()
{ {
if( g_cpuConstRegs[_Rs_].SD[0] != g_cpuConstRegs[_Rt_].SD[0] ) { if (g_cpuConstRegs[_Rs_].SD[0] != g_cpuConstRegs[_Rt_].SD[0])
{
u32 branchTo = ((s32)_Imm_ * 4) + pc; u32 branchTo = ((s32)_Imm_ * 4) + pc;
recompileNextInstruction(1); recompileNextInstruction(1);
SetBranchImm(branchTo); SetBranchImm(branchTo);
} }
else { else
{
SetBranchImm(pc + 4); SetBranchImm(pc + 4);
} }
} }
@ -414,7 +447,8 @@ void recBLTZAL()
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4); xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4);
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0); xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0);
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
if (!(g_cpuConstRegs[_Rs_].SD[0] < 0)) if (!(g_cpuConstRegs[_Rs_].SD[0] < 0))
branchTo = pc + 4; branchTo = pc + 4;
@ -455,7 +489,8 @@ void recBGEZAL()
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4); xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4);
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0); xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0);
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
if (!(g_cpuConstRegs[_Rs_].SD[0] >= 0)) if (!(g_cpuConstRegs[_Rs_].SD[0] >= 0))
branchTo = pc + 4; branchTo = pc + 4;
@ -496,10 +531,12 @@ void recBLTZALL()
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4); xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4);
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0); xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0);
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
if (!(g_cpuConstRegs[_Rs_].SD[0] < 0)) if (!(g_cpuConstRegs[_Rs_].SD[0] < 0))
SetBranchImm(pc + 4); SetBranchImm(pc + 4);
else { else
{
recompileNextInstruction(1); recompileNextInstruction(1);
SetBranchImm(branchTo); SetBranchImm(branchTo);
} }
@ -532,10 +569,12 @@ void recBGEZALL()
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4); xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4);
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0); xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0);
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
if (!(g_cpuConstRegs[_Rs_].SD[0] >= 0)) if (!(g_cpuConstRegs[_Rs_].SD[0] >= 0))
SetBranchImm(pc + 4); SetBranchImm(pc + 4);
else { else
{
recompileNextInstruction(1); recompileNextInstruction(1);
SetBranchImm(branchTo); SetBranchImm(branchTo);
} }
@ -564,7 +603,8 @@ void recBLEZ()
_eeFlushAllUnused(); _eeFlushAllUnused();
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
if (!(g_cpuConstRegs[_Rs_].SD[0] <= 0)) if (!(g_cpuConstRegs[_Rs_].SD[0] <= 0))
branchTo = pc + 4; branchTo = pc + 4;
@ -611,7 +651,8 @@ void recBGTZ()
_eeFlushAllUnused(); _eeFlushAllUnused();
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
if (!(g_cpuConstRegs[_Rs_].SD[0] > 0)) if (!(g_cpuConstRegs[_Rs_].SD[0] > 0))
branchTo = pc + 4; branchTo = pc + 4;
@ -658,7 +699,8 @@ void recBLTZ()
_eeFlushAllUnused(); _eeFlushAllUnused();
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
if (!(g_cpuConstRegs[_Rs_].SD[0] < 0)) if (!(g_cpuConstRegs[_Rs_].SD[0] < 0))
branchTo = pc + 4; branchTo = pc + 4;
@ -693,7 +735,8 @@ void recBGEZ()
_eeFlushAllUnused(); _eeFlushAllUnused();
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
if (!(g_cpuConstRegs[_Rs_].SD[0] >= 0)) if (!(g_cpuConstRegs[_Rs_].SD[0] >= 0))
branchTo = pc + 4; branchTo = pc + 4;
@ -728,10 +771,12 @@ void recBLTZL()
_eeFlushAllUnused(); _eeFlushAllUnused();
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
if (!(g_cpuConstRegs[_Rs_].SD[0] < 0)) if (!(g_cpuConstRegs[_Rs_].SD[0] < 0))
SetBranchImm(pc + 4); SetBranchImm(pc + 4);
else { else
{
recompileNextInstruction(1); recompileNextInstruction(1);
SetBranchImm(branchTo); SetBranchImm(branchTo);
} }
@ -760,10 +805,12 @@ void recBGEZL()
_eeFlushAllUnused(); _eeFlushAllUnused();
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
if (!(g_cpuConstRegs[_Rs_].SD[0] >= 0)) if (!(g_cpuConstRegs[_Rs_].SD[0] >= 0))
SetBranchImm(pc + 4); SetBranchImm(pc + 4);
else { else
{
recompileNextInstruction(1); recompileNextInstruction(1);
SetBranchImm(branchTo); SetBranchImm(branchTo);
} }
@ -784,7 +831,6 @@ void recBGEZL()
/********************************************************* /*********************************************************
* Register branch logic Likely * * Register branch logic Likely *
* Format: OP rs, offset * * Format: OP rs, offset *
@ -799,10 +845,12 @@ void recBLEZL()
_eeFlushAllUnused(); _eeFlushAllUnused();
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
if (!(g_cpuConstRegs[_Rs_].SD[0] <= 0)) if (!(g_cpuConstRegs[_Rs_].SD[0] <= 0))
SetBranchImm(pc + 4); SetBranchImm(pc + 4);
else { else
{
_clearNeededXMMregs(); _clearNeededXMMregs();
recompileNextInstruction(1); recompileNextInstruction(1);
SetBranchImm(branchTo); SetBranchImm(branchTo);
@ -843,10 +891,12 @@ void recBGTZL()
_eeFlushAllUnused(); _eeFlushAllUnused();
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
if (!(g_cpuConstRegs[_Rs_].SD[0] > 0)) if (!(g_cpuConstRegs[_Rs_].SD[0] > 0))
SetBranchImm(pc + 4); SetBranchImm(pc + 4);
else { else
{
_clearNeededXMMregs(); _clearNeededXMMregs();
recompileNextInstruction(1); recompileNextInstruction(1);
SetBranchImm(branchTo); SetBranchImm(branchTo);
@ -880,4 +930,6 @@ void recBGTZL()
#endif #endif
} } } } // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900

View File

@ -26,8 +26,7 @@ using namespace x86Emitter;
namespace R5900 { namespace R5900 {
namespace Dynarec { namespace Dynarec {
namespace OpcodeImpl namespace OpcodeImpl {
{
/********************************************************* /*********************************************************
* Jump to target * * Jump to target *
@ -106,7 +105,8 @@ void recJALR()
_allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE); _allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
_eeMoveGPRtoR(calleeSavedReg2d, _Rs_); _eeMoveGPRtoR(calleeSavedReg2d, _Rs_);
if (EmuConfig.Gamefixes.GoemonTlbHack) { if (EmuConfig.Gamefixes.GoemonTlbHack)
{
xMOV(ecx, calleeSavedReg2d); xMOV(ecx, calleeSavedReg2d);
vtlb_DynV2P(); vtlb_DynV2P();
xMOV(calleeSavedReg2d, eax); xMOV(calleeSavedReg2d, eax);
@ -115,10 +115,12 @@ void recJALR()
// int mmreg; // int mmreg;
// if (GPR_IS_CONST1(_Rs_)) // if (GPR_IS_CONST1(_Rs_))
// xMOV(ptr32[&cpuRegs.pc], g_cpuConstRegs[_Rs_].UL[0]); // xMOV(ptr32[&cpuRegs.pc], g_cpuConstRegs[_Rs_].UL[0]);
// else { // else
// {
// int mmreg; // int mmreg;
// //
// if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0 ) { // if ((mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0)
// {
// xMOVSS(ptr[&cpuRegs.pc], xRegisterSSE(mmreg)); // xMOVSS(ptr[&cpuRegs.pc], xRegisterSSE(mmreg));
// } // }
// else { // else {
@ -147,12 +149,14 @@ void recJALR()
_clearNeededXMMregs(); _clearNeededXMMregs();
recompileNextInstruction(1); recompileNextInstruction(1);
if( x86regs[calleeSavedReg2d.GetId()].inuse ) { if (x86regs[calleeSavedReg2d.GetId()].inuse)
{
pxAssert(x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK); pxAssert(x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK);
xMOV(ptr[&cpuRegs.pc], calleeSavedReg2d); xMOV(ptr[&cpuRegs.pc], calleeSavedReg2d);
x86regs[calleeSavedReg2d.GetId()].inuse = 0; x86regs[calleeSavedReg2d.GetId()].inuse = 0;
} }
else { else
{
xMOV(eax, ptr[&g_recWriteback]); xMOV(eax, ptr[&g_recWriteback]);
xMOV(ptr[&cpuRegs.pc], eax); xMOV(ptr[&cpuRegs.pc], eax);
} }
@ -162,4 +166,6 @@ void recJALR()
#endif #endif
} } } } // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900

View File

@ -72,18 +72,23 @@ void _eeOnLoadWrite(u32 reg)
{ {
int regt; int regt;
if( !reg ) return; if (!reg)
return;
_eeOnWriteReg(reg, 1); _eeOnWriteReg(reg, 1);
regt = _checkXMMreg(XMMTYPE_GPRREG, reg, MODE_READ); regt = _checkXMMreg(XMMTYPE_GPRREG, reg, MODE_READ);
if( regt >= 0 ) { if (regt >= 0)
if( xmmregs[regt].mode & MODE_WRITE ) { {
if( reg != _Rs_ ) { if (xmmregs[regt].mode & MODE_WRITE)
{
if (reg != _Rs_)
{
xPUNPCK.HQDQ(xRegisterSSE(regt), xRegisterSSE(regt)); xPUNPCK.HQDQ(xRegisterSSE(regt), xRegisterSSE(regt));
xMOVQ(ptr[&cpuRegs.GPR.r[reg].UL[2]], xRegisterSSE(regt)); xMOVQ(ptr[&cpuRegs.GPR.r[reg].UL[2]], xRegisterSSE(regt));
} }
else xMOVH.PS(ptr[&cpuRegs.GPR.r[reg].UL[2]], xRegisterSSE(regt)); else
xMOVH.PS(ptr[&cpuRegs.GPR.r[reg].UL[2]], xRegisterSSE(regt));
} }
xmmregs[regt].inuse = 0; xmmregs[regt].inuse = 0;
} }
@ -242,8 +247,8 @@ void recLQ() { recLoad64(128,false); EE::Profiler.EmitOp(eeOpcode::LQ);}
void recSB() { recStore( 8); EE::Profiler.EmitOp(eeOpcode::SB); } void recSB() { recStore( 8); EE::Profiler.EmitOp(eeOpcode::SB); }
void recSH() { recStore( 16); EE::Profiler.EmitOp(eeOpcode::SH); } void recSH() { recStore( 16); EE::Profiler.EmitOp(eeOpcode::SH); }
void recSW() { recStore( 32); EE::Profiler.EmitOp(eeOpcode::SW); } void recSW() { recStore( 32); EE::Profiler.EmitOp(eeOpcode::SW); }
void recSQ() { recStore(128); EE::Profiler.EmitOp(eeOpcode::SQ);}
void recSD() { recStore( 64); EE::Profiler.EmitOp(eeOpcode::SD); } void recSD() { recStore( 64); EE::Profiler.EmitOp(eeOpcode::SD); }
void recSQ() { recStore(128); EE::Profiler.EmitOp(eeOpcode::SQ); }
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
@ -657,7 +662,6 @@ void recSQC2()
#endif #endif
} } } // end namespace R5900::Dynarec::OpcodeImpl } // namespace OpcodeImpl
} // namespace Dynarec
using namespace R5900::Dynarec; } // namespace R5900
using namespace R5900::Dynarec::OpcodeImpl;

View File

@ -24,8 +24,7 @@ using namespace x86Emitter;
namespace R5900 { namespace R5900 {
namespace Dynarec { namespace Dynarec {
namespace OpcodeImpl namespace OpcodeImpl {
{
/********************************************************* /*********************************************************
* Shift arithmetic with constant shift * * Shift arithmetic with constant shift *
@ -60,12 +59,15 @@ REC_FUNC_DEL(MOVN, _Rd_);
void recLUI() void recLUI()
{ {
int mmreg; int mmreg;
if(!_Rt_) return; if (!_Rt_)
return;
_eeOnWriteReg(_Rt_, 1); _eeOnWriteReg(_Rt_, 1);
if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_WRITE)) >= 0 ) { if ((mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_WRITE)) >= 0)
if( xmmregs[mmreg].mode & MODE_WRITE ) { {
if (xmmregs[mmreg].mode & MODE_WRITE)
{
xMOVH.PS(ptr[&cpuRegs.GPR.r[_Rt_].UL[2]], xRegisterSSE(mmreg)); xMOVH.PS(ptr[&cpuRegs.GPR.r[_Rt_].UL[2]], xRegisterSSE(mmreg));
} }
xmmregs[mmreg].inuse = 0; xmmregs[mmreg].inuse = 0;
@ -103,29 +105,38 @@ void recMFHILO(int hi)
regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_READ | MODE_WRITE); regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_READ | MODE_WRITE);
if( reghi >= 0 ) { if (reghi >= 0)
if( regd >= 0 ) { {
if (regd >= 0)
{
pxAssert(regd != reghi); pxAssert(regd != reghi);
xmmregs[regd].inuse = 0; xmmregs[regd].inuse = 0;
xMOVQ(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], xRegisterSSE(reghi)); xMOVQ(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], xRegisterSSE(reghi));
if( xmmregs[regd].mode & MODE_WRITE ) { if (xmmregs[regd].mode & MODE_WRITE)
{
xMOVH.PS(ptr[&cpuRegs.GPR.r[_Rd_].UL[2]], xRegisterSSE(regd)); xMOVH.PS(ptr[&cpuRegs.GPR.r[_Rd_].UL[2]], xRegisterSSE(regd));
} }
} }
else { else
{
_deleteEEreg(_Rd_, 0); _deleteEEreg(_Rd_, 0);
xMOVQ(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(reghi)); xMOVQ(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(reghi));
} }
} }
else { else
if( regd >= 0 ) { {
if( EEINST_ISLIVE2(_Rd_) ) xMOVL.PS(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[ 0 ] : (uptr)&cpuRegs.LO.UD[ 0 ])]); if (regd >= 0)
else xMOVQZX(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[ 0 ] : (uptr)&cpuRegs.LO.UD[ 0 ])]); {
if (EEINST_ISLIVE2(_Rd_))
xMOVL.PS(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[0] : (uptr)&cpuRegs.LO.UD[0])]);
else
xMOVQZX(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[0] : (uptr)&cpuRegs.LO.UD[0])]);
} }
else { else
{
_deleteEEreg(_Rd_, 0); _deleteEEreg(_Rd_, 0);
xMOV(eax, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[0] : (uptr)&cpuRegs.LO.UL[0])]); xMOV(eax, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[0] : (uptr)&cpuRegs.LO.UL[0])]);
xMOV(edx, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[1] : (uptr)&cpuRegs.LO.UL[1])]); xMOV(edx, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[1] : (uptr)&cpuRegs.LO.UL[1])]);
@ -146,8 +157,10 @@ void recMTHILO(int hi)
regs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ); regs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ);
reghi = _checkXMMreg(XMMTYPE_GPRREG, xmmhilo, MODE_READ | MODE_WRITE); reghi = _checkXMMreg(XMMTYPE_GPRREG, xmmhilo, MODE_READ | MODE_WRITE);
if( reghi >= 0 ) { if (reghi >= 0)
if( regs >= 0 ) { {
if (regs >= 0)
{
pxAssert(reghi != regs); pxAssert(reghi != regs);
_deleteGPRtoXMMreg(_Rs_, 0); _deleteGPRtoXMMreg(_Rs_, 0);
@ -158,24 +171,29 @@ void recMTHILO(int hi)
xmmregs[regs] = xmmregs[reghi]; xmmregs[regs] = xmmregs[reghi];
xmmregs[reghi].inuse = 0; xmmregs[reghi].inuse = 0;
xmmregs[regs].mode |= MODE_WRITE; xmmregs[regs].mode |= MODE_WRITE;
} }
else { else
{
_flushConstReg(_Rs_); _flushConstReg(_Rs_);
xMOVL.PS(xRegisterSSE(reghi), ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]); xMOVL.PS(xRegisterSSE(reghi), ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]);
xmmregs[reghi].mode |= MODE_WRITE; xmmregs[reghi].mode |= MODE_WRITE;
} }
} }
else { else
if( regs >= 0 ) { {
if (regs >= 0)
{
xMOVQ(ptr[(void*)(addrhilo)], xRegisterSSE(regs)); xMOVQ(ptr[(void*)(addrhilo)], xRegisterSSE(regs));
} }
else { else
if( GPR_IS_CONST1(_Rs_) ) { {
if (GPR_IS_CONST1(_Rs_))
{
xMOV(ptr32[(u32*)(addrhilo)], g_cpuConstRegs[_Rs_].UL[0]); xMOV(ptr32[(u32*)(addrhilo)], g_cpuConstRegs[_Rs_].UL[0]);
xMOV(ptr32[(u32*)(addrhilo + 4)], g_cpuConstRegs[_Rs_].UL[1]); xMOV(ptr32[(u32*)(addrhilo + 4)], g_cpuConstRegs[_Rs_].UL[1]);
} }
else { else
{
_eeMoveGPRtoR(ecx, _Rs_); _eeMoveGPRtoR(ecx, _Rs_);
_flushEEreg(_Rs_); _flushEEreg(_Rs_);
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
@ -225,29 +243,37 @@ void recMFHILO1(int hi)
regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_READ | MODE_WRITE); regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_READ | MODE_WRITE);
if( reghi >= 0 ) { if (reghi >= 0)
if( regd >= 0 ) { {
if (regd >= 0)
{
xMOVHL.PS(xRegisterSSE(regd), xRegisterSSE(reghi)); xMOVHL.PS(xRegisterSSE(regd), xRegisterSSE(reghi));
xmmregs[regd].mode |= MODE_WRITE; xmmregs[regd].mode |= MODE_WRITE;
} }
else { else
{
_deleteEEreg(_Rd_, 0); _deleteEEreg(_Rd_, 0);
xMOVH.PS(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(reghi)); xMOVH.PS(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(reghi));
} }
} }
else { else
if( regd >= 0 ) { {
if( EEINST_ISLIVE2(_Rd_) ) { if (regd >= 0)
{
if (EEINST_ISLIVE2(_Rd_))
{
xPUNPCK.HQDQ(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[0] : (uptr)&cpuRegs.LO.UD[0])]); xPUNPCK.HQDQ(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[0] : (uptr)&cpuRegs.LO.UD[0])]);
xPSHUF.D(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); xPSHUF.D(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e);
} }
else { else
{
xMOVQZX(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[1] : (uptr)&cpuRegs.LO.UD[1])]); xMOVQZX(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[1] : (uptr)&cpuRegs.LO.UD[1])]);
} }
xmmregs[regd].mode |= MODE_WRITE; xmmregs[regd].mode |= MODE_WRITE;
} }
else { else
{
_deleteEEreg(_Rd_, 0); _deleteEEreg(_Rd_, 0);
xMOV(eax, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[2] : (uptr)&cpuRegs.LO.UL[2])]); xMOV(eax, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[2] : (uptr)&cpuRegs.LO.UL[2])]);
xMOV(edx, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[3] : (uptr)&cpuRegs.LO.UL[3])]); xMOV(edx, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[3] : (uptr)&cpuRegs.LO.UL[3])]);
@ -268,25 +294,33 @@ void recMTHILO1(int hi)
regs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ); regs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ);
reghi = _allocCheckGPRtoXMM(g_pCurInstInfo, xmmhilo, MODE_WRITE | MODE_READ); reghi = _allocCheckGPRtoXMM(g_pCurInstInfo, xmmhilo, MODE_WRITE | MODE_READ);
if( reghi >= 0 ) { if (reghi >= 0)
if( regs >= 0 ) { {
if (regs >= 0)
{
xPUNPCK.LQDQ(xRegisterSSE(reghi), xRegisterSSE(regs)); xPUNPCK.LQDQ(xRegisterSSE(reghi), xRegisterSSE(regs));
} }
else { else
{
_flushEEreg(_Rs_); _flushEEreg(_Rs_);
xPUNPCK.LQDQ(xRegisterSSE(reghi), ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]); xPUNPCK.LQDQ(xRegisterSSE(reghi), ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]);
} }
} }
else { else
if( regs >= 0 ) { {
if (regs >= 0)
{
xMOVQ(ptr[(void*)(addrhilo + 8)], xRegisterSSE(regs)); xMOVQ(ptr[(void*)(addrhilo + 8)], xRegisterSSE(regs));
} }
else { else
if( GPR_IS_CONST1(_Rs_) ) { {
if (GPR_IS_CONST1(_Rs_))
{
xMOV(ptr32[(u32*)(addrhilo + 8)], g_cpuConstRegs[_Rs_].UL[0]); xMOV(ptr32[(u32*)(addrhilo + 8)], g_cpuConstRegs[_Rs_].UL[0]);
xMOV(ptr32[(u32*)(addrhilo + 12)], g_cpuConstRegs[_Rs_].UL[1]); xMOV(ptr32[(u32*)(addrhilo + 12)], g_cpuConstRegs[_Rs_].UL[1]);
} }
else { else
{
_flushEEreg(_Rs_); _flushEEreg(_Rs_);
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]);
@ -368,10 +402,12 @@ void recMOVZ()
if (_Rs_ == _Rd_) if (_Rs_ == _Rd_)
return; return;
if(GPR_IS_CONST1(_Rt_)) { if (GPR_IS_CONST1(_Rt_))
{
if (g_cpuConstRegs[_Rt_].UD[0] != 0) if (g_cpuConstRegs[_Rt_].UD[0] != 0)
return; return;
} else }
else
_deleteEEreg(_Rd_, 1); _deleteEEreg(_Rd_, 1);
recMOVZtemp(); recMOVZtemp();
@ -424,10 +460,12 @@ void recMOVN()
if (_Rs_ == _Rd_) if (_Rs_ == _Rd_)
return; return;
if (GPR_IS_CONST1(_Rt_)) { if (GPR_IS_CONST1(_Rt_))
{
if (g_cpuConstRegs[_Rt_].UD[0] == 0) if (g_cpuConstRegs[_Rt_].UD[0] == 0)
return; return;
} else }
else
_deleteEEreg(_Rd_, 1); _deleteEEreg(_Rd_, 1);
recMOVNtemp(); recMOVNtemp();
@ -435,4 +473,6 @@ void recMOVN()
#endif #endif
} } } } // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900

View File

@ -26,8 +26,7 @@ namespace Interp = R5900::Interpreter::OpcodeImpl;
namespace R5900 { namespace R5900 {
namespace Dynarec { namespace Dynarec {
namespace OpcodeImpl namespace OpcodeImpl {
{
/********************************************************* /*********************************************************
* Register mult/div & Register trap logic * * Register mult/div & Register trap logic *
@ -63,12 +62,17 @@ void recWritebackHILO(int info, int writed, int upper)
if (g_pCurInstInfo->regs[XMMGPR_HI] & testlive) if (g_pCurInstInfo->regs[XMMGPR_HI] & testlive)
xMOV(ecx, edx); xMOV(ecx, edx);
if( g_pCurInstInfo->regs[XMMGPR_LO] & testlive ) { if (g_pCurInstInfo->regs[XMMGPR_LO] & testlive)
{
if( (reglo = _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_LO, MODE_READ)) >= 0 ) { if ((reglo = _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_LO, MODE_READ)) >= 0)
if( xmmregs[reglo].mode & MODE_WRITE ) { {
if( upper ) xMOVQ(ptr[(void*)(loaddr-8)], xRegisterSSE(reglo)); if (xmmregs[reglo].mode & MODE_WRITE)
else xMOVH.PS(ptr[(void*)(loaddr+8)], xRegisterSSE(reglo)); {
if (upper)
xMOVQ(ptr[(void*)(loaddr - 8)], xRegisterSSE(reglo));
else
xMOVH.PS(ptr[(void*)(loaddr + 8)], xRegisterSSE(reglo));
} }
xmmregs[reglo].inuse = 0; xmmregs[reglo].inuse = 0;
@ -86,29 +90,39 @@ void recWritebackHILO(int info, int writed, int upper)
_eeOnWriteReg(_Rd_, 1); _eeOnWriteReg(_Rd_, 1);
regd = -1; regd = -1;
if( g_pCurInstInfo->regs[_Rd_] & EEINST_XMM ) { if (g_pCurInstInfo->regs[_Rd_] & EEINST_XMM)
if( savedlo ) { {
if (savedlo)
{
regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE | MODE_READ); regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE | MODE_READ);
if( regd >= 0 ) { if (regd >= 0)
{
xMOVL.PS(xRegisterSSE(regd), ptr[(void*)(loaddr)]); xMOVL.PS(xRegisterSSE(regd), ptr[(void*)(loaddr)]);
} }
} }
} }
if( regd < 0 ) { if (regd < 0)
{
_deleteEEreg(_Rd_, 0); _deleteEEreg(_Rd_, 0);
if( !savedlo ) xCDQ(); if (!savedlo)
xCDQ();
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx);
} }
} }
if( g_pCurInstInfo->regs[XMMGPR_HI] & testlive ) { if (g_pCurInstInfo->regs[XMMGPR_HI] & testlive)
if( (reghi = _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_HI, MODE_READ)) >= 0 ) { {
if( xmmregs[reghi].mode & MODE_WRITE ) { if ((reghi = _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_HI, MODE_READ)) >= 0)
if( upper ) xMOVQ(ptr[(void*)(hiaddr-8)], xRegisterSSE(reghi)); {
else xMOVH.PS(ptr[(void*)(hiaddr+8)], xRegisterSSE(reghi)); if (xmmregs[reghi].mode & MODE_WRITE)
{
if (upper)
xMOVQ(ptr[(void*)(hiaddr - 8)], xRegisterSSE(reghi));
else
xMOVH.PS(ptr[(void*)(hiaddr + 8)], xRegisterSSE(reghi));
} }
xmmregs[reghi].inuse = 0; xmmregs[reghi].inuse = 0;
@ -128,37 +142,48 @@ void recWritebackConstHILO(u64 res, int writed, int upper)
uptr hiaddr = (uptr)&cpuRegs.HI.UL[upper ? 2 : 0]; uptr hiaddr = (uptr)&cpuRegs.HI.UL[upper ? 2 : 0];
u8 testlive = upper ? EEINST_LIVE2 : EEINST_LIVE0; u8 testlive = upper ? EEINST_LIVE2 : EEINST_LIVE0;
if( g_pCurInstInfo->regs[XMMGPR_LO] & testlive ) { if (g_pCurInstInfo->regs[XMMGPR_LO] & testlive)
{
reglo = _allocCheckGPRtoXMM(g_pCurInstInfo, XMMGPR_LO, MODE_WRITE | MODE_READ); reglo = _allocCheckGPRtoXMM(g_pCurInstInfo, XMMGPR_LO, MODE_WRITE | MODE_READ);
if( reglo >= 0 ) { if (reglo >= 0)
{
u32* mem_ptr = recGetImm64(res & 0x80000000 ? -1 : 0, (u32)res); u32* mem_ptr = recGetImm64(res & 0x80000000 ? -1 : 0, (u32)res);
if( upper ) xMOVH.PS(xRegisterSSE(reglo), ptr[mem_ptr]); if (upper)
else xMOVL.PS(xRegisterSSE(reglo), ptr[mem_ptr]); xMOVH.PS(xRegisterSSE(reglo), ptr[mem_ptr]);
else
xMOVL.PS(xRegisterSSE(reglo), ptr[mem_ptr]);
} }
else { else
{
xMOV(ptr32[(u32*)(loaddr)], res & 0xffffffff); xMOV(ptr32[(u32*)(loaddr)], res & 0xffffffff);
xMOV(ptr32[(u32*)(loaddr + 4)], (res & 0x80000000) ? 0xffffffff : 0); xMOV(ptr32[(u32*)(loaddr + 4)], (res & 0x80000000) ? 0xffffffff : 0);
} }
} }
if( g_pCurInstInfo->regs[XMMGPR_HI] & testlive ) { if (g_pCurInstInfo->regs[XMMGPR_HI] & testlive)
{
reghi = _allocCheckGPRtoXMM(g_pCurInstInfo, XMMGPR_HI, MODE_WRITE | MODE_READ); reghi = _allocCheckGPRtoXMM(g_pCurInstInfo, XMMGPR_HI, MODE_WRITE | MODE_READ);
if( reghi >= 0 ) { if (reghi >= 0)
{
u32* mem_ptr = recGetImm64((res >> 63) ? -1 : 0, res >> 32); u32* mem_ptr = recGetImm64((res >> 63) ? -1 : 0, res >> 32);
if( upper ) xMOVH.PS(xRegisterSSE(reghi), ptr[mem_ptr]); if (upper)
else xMOVL.PS(xRegisterSSE(reghi), ptr[mem_ptr]); xMOVH.PS(xRegisterSSE(reghi), ptr[mem_ptr]);
else
xMOVL.PS(xRegisterSSE(reghi), ptr[mem_ptr]);
} }
else { else
{
_deleteEEreg(XMMGPR_HI, 0); _deleteEEreg(XMMGPR_HI, 0);
xMOV(ptr32[(u32*)(hiaddr)], res >> 32); xMOV(ptr32[(u32*)(hiaddr)], res >> 32);
xMOV(ptr32[(u32*)(hiaddr + 4)], (res >> 63) ? 0xffffffff : 0); xMOV(ptr32[(u32*)(hiaddr + 4)], (res >> 63) ? 0xffffffff : 0);
} }
} }
if (!writed || !_Rd_) return; if (!writed || !_Rd_)
return;
g_cpuConstRegs[_Rd_].SD[0] = (s32)(res & 0xffffffffULL); //that is the difference g_cpuConstRegs[_Rd_].SD[0] = (s32)(res & 0xffffffffULL); //that is the difference
} }
@ -173,15 +198,18 @@ void recMULT_const()
void recMULTUsuper(int info, int upper, int process); void recMULTUsuper(int info, int upper, int process);
void recMULTsuper(int info, int upper, int process) void recMULTsuper(int info, int upper, int process)
{ {
if( process & PROCESS_CONSTS ) { if (process & PROCESS_CONSTS)
{
xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]); xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]);
xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
} }
else if( process & PROCESS_CONSTT) { else if (process & PROCESS_CONSTT)
{
xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]); xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]);
xMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
} }
else { else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
} }
@ -217,15 +245,18 @@ void recMULTU_const()
void recMULTUsuper(int info, int upper, int process) void recMULTUsuper(int info, int upper, int process)
{ {
if( process & PROCESS_CONSTS ) { if (process & PROCESS_CONSTS)
{
xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]); xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]);
xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
} }
else if( process & PROCESS_CONSTT) { else if (process & PROCESS_CONSTT)
{
xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]); xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]);
xUMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]); xUMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
} }
else { else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
} }
@ -371,16 +402,19 @@ void recDIVsuper(int info, int sign, int upper, int process)
u8* end2 = JMP8(0); u8* end2 = JMP8(0);
x86SetJ8(cont3); x86SetJ8(cont3);
if( sign ) { if (sign)
{
xCDQ(); xCDQ();
xDIV(ecx); xDIV(ecx);
} }
else { else
{
xXOR(edx, edx); xXOR(edx, edx);
xUDIV(ecx); xUDIV(ecx);
} }
if (sign) x86SetJ8( end1 ); if (sign)
x86SetJ8(end1);
x86SetJ8(end2); x86SetJ8(end2);
// need to execute regardless of bad divide // need to execute regardless of bad divide
@ -408,7 +442,8 @@ EERECOMPILE_CODE0(DIV, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITE
void recDIVUconst(int upper) void recDIVUconst(int upper)
{ {
u32 quot, rem; u32 quot, rem;
if (g_cpuConstRegs[_Rt_].UL[0] != 0) { if (g_cpuConstRegs[_Rt_].UL[0] != 0)
{
quot = g_cpuConstRegs[_Rs_].UL[0] / g_cpuConstRegs[_Rt_].UL[0]; quot = g_cpuConstRegs[_Rs_].UL[0] / g_cpuConstRegs[_Rt_].UL[0];
rem = g_cpuConstRegs[_Rs_].UL[0] % g_cpuConstRegs[_Rt_].UL[0]; rem = g_cpuConstRegs[_Rs_].UL[0] % g_cpuConstRegs[_Rt_].UL[0];
} }
@ -489,7 +524,8 @@ EERECOMPILE_CODE0(DIVU1, XMMINFO_READS|XMMINFO_READT);
void recMADD() void recMADD()
{ {
if( GPR_IS_CONST2(_Rs_, _Rt_) ) { if (GPR_IS_CONST2(_Rs_, _Rt_))
{
u64 result = ((s64)g_cpuConstRegs[_Rs_].SL[0] * (s64)g_cpuConstRegs[_Rt_].SL[0]); u64 result = ((s64)g_cpuConstRegs[_Rs_].SL[0] * (s64)g_cpuConstRegs[_Rt_].SL[0]);
_deleteEEreg(XMMGPR_LO, 1); _deleteEEreg(XMMGPR_LO, 1);
_deleteEEreg(XMMGPR_HI, 1); _deleteEEreg(XMMGPR_HI, 1);
@ -500,7 +536,8 @@ void recMADD()
xADD(eax, (u32)result & 0xffffffff); xADD(eax, (u32)result & 0xffffffff);
xADC(ecx, (u32)(result >> 32)); xADC(ecx, (u32)(result >> 32));
xCDQ(); xCDQ();
if( _Rd_) { if (_Rd_)
{
_eeOnWriteReg(_Rd_, 1); _eeOnWriteReg(_Rd_, 1);
_deleteEEreg(_Rd_, 0); _deleteEEreg(_Rd_, 0);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
@ -522,15 +559,18 @@ void recMADD()
_deleteGPRtoXMMreg(_Rs_, 1); _deleteGPRtoXMMreg(_Rs_, 1);
_deleteGPRtoXMMreg(_Rt_, 1); _deleteGPRtoXMMreg(_Rt_, 1);
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]); xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]);
xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
} }
else if ( GPR_IS_CONST1(_Rt_) ) { else if (GPR_IS_CONST1(_Rt_))
{
xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]); xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]);
xMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
} }
else { else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
} }
@ -539,7 +579,8 @@ void recMADD()
xADD(eax, ptr[&cpuRegs.LO.UL[0]]); xADD(eax, ptr[&cpuRegs.LO.UL[0]]);
xADC(ecx, ptr[&cpuRegs.HI.UL[0]]); xADC(ecx, ptr[&cpuRegs.HI.UL[0]]);
xCDQ(); xCDQ();
if( _Rd_ ) { if (_Rd_)
{
_eeOnWriteReg(_Rd_, 1); _eeOnWriteReg(_Rd_, 1);
_deleteEEreg(_Rd_, 0); _deleteEEreg(_Rd_, 0);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
@ -557,7 +598,8 @@ void recMADD()
void recMADDU() void recMADDU()
{ {
if( GPR_IS_CONST2(_Rs_, _Rt_) ) { if (GPR_IS_CONST2(_Rs_, _Rt_))
{
u64 result = ((u64)g_cpuConstRegs[_Rs_].UL[0] * (u64)g_cpuConstRegs[_Rt_].UL[0]); u64 result = ((u64)g_cpuConstRegs[_Rs_].UL[0] * (u64)g_cpuConstRegs[_Rt_].UL[0]);
_deleteEEreg(XMMGPR_LO, 1); _deleteEEreg(XMMGPR_LO, 1);
_deleteEEreg(XMMGPR_HI, 1); _deleteEEreg(XMMGPR_HI, 1);
@ -568,7 +610,8 @@ void recMADDU()
xADD(eax, (u32)result & 0xffffffff); xADD(eax, (u32)result & 0xffffffff);
xADC(ecx, (u32)(result >> 32)); xADC(ecx, (u32)(result >> 32));
xCDQ(); xCDQ();
if( _Rd_) { if (_Rd_)
{
_eeOnWriteReg(_Rd_, 1); _eeOnWriteReg(_Rd_, 1);
_deleteEEreg(_Rd_, 0); _deleteEEreg(_Rd_, 0);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
@ -590,15 +633,18 @@ void recMADDU()
_deleteGPRtoXMMreg(_Rs_, 1); _deleteGPRtoXMMreg(_Rs_, 1);
_deleteGPRtoXMMreg(_Rt_, 1); _deleteGPRtoXMMreg(_Rt_, 1);
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]); xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]);
xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
} }
else if ( GPR_IS_CONST1(_Rt_) ) { else if (GPR_IS_CONST1(_Rt_))
{
xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]); xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]);
xUMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]); xUMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
} }
else { else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
} }
@ -607,7 +653,8 @@ void recMADDU()
xADD(eax, ptr[&cpuRegs.LO.UL[0]]); xADD(eax, ptr[&cpuRegs.LO.UL[0]]);
xADC(ecx, ptr[&cpuRegs.HI.UL[0]]); xADC(ecx, ptr[&cpuRegs.HI.UL[0]]);
xCDQ(); xCDQ();
if( _Rd_ ) { if (_Rd_)
{
_eeOnWriteReg(_Rd_, 1); _eeOnWriteReg(_Rd_, 1);
_deleteEEreg(_Rd_, 0); _deleteEEreg(_Rd_, 0);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
@ -625,7 +672,8 @@ void recMADDU()
void recMADD1() void recMADD1()
{ {
if( GPR_IS_CONST2(_Rs_, _Rt_) ) { if (GPR_IS_CONST2(_Rs_, _Rt_))
{
u64 result = ((s64)g_cpuConstRegs[_Rs_].SL[0] * (s64)g_cpuConstRegs[_Rt_].SL[0]); u64 result = ((s64)g_cpuConstRegs[_Rs_].SL[0] * (s64)g_cpuConstRegs[_Rt_].SL[0]);
_deleteEEreg(XMMGPR_LO, 1); _deleteEEreg(XMMGPR_LO, 1);
_deleteEEreg(XMMGPR_HI, 1); _deleteEEreg(XMMGPR_HI, 1);
@ -636,7 +684,8 @@ void recMADD1()
xADD(eax, (u32)result & 0xffffffff); xADD(eax, (u32)result & 0xffffffff);
xADC(ecx, (u32)(result >> 32)); xADC(ecx, (u32)(result >> 32));
xCDQ(); xCDQ();
if( _Rd_) { if (_Rd_)
{
_eeOnWriteReg(_Rd_, 1); _eeOnWriteReg(_Rd_, 1);
_deleteEEreg(_Rd_, 0); _deleteEEreg(_Rd_, 0);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
@ -658,15 +707,18 @@ void recMADD1()
_deleteGPRtoXMMreg(_Rs_, 1); _deleteGPRtoXMMreg(_Rs_, 1);
_deleteGPRtoXMMreg(_Rt_, 1); _deleteGPRtoXMMreg(_Rt_, 1);
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]); xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]);
xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
} }
else if ( GPR_IS_CONST1(_Rt_) ) { else if (GPR_IS_CONST1(_Rt_))
{
xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]); xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]);
xMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
} }
else { else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
} }
@ -675,7 +727,8 @@ void recMADD1()
xADD(eax, ptr[&cpuRegs.LO.UL[2]]); xADD(eax, ptr[&cpuRegs.LO.UL[2]]);
xADC(ecx, ptr[&cpuRegs.HI.UL[2]]); xADC(ecx, ptr[&cpuRegs.HI.UL[2]]);
xCDQ(); xCDQ();
if( _Rd_ ) { if (_Rd_)
{
_eeOnWriteReg(_Rd_, 1); _eeOnWriteReg(_Rd_, 1);
_deleteEEreg(_Rd_, 0); _deleteEEreg(_Rd_, 0);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
@ -693,7 +746,8 @@ void recMADD1()
void recMADDU1() void recMADDU1()
{ {
if( GPR_IS_CONST2(_Rs_, _Rt_) ) { if (GPR_IS_CONST2(_Rs_, _Rt_))
{
u64 result = ((u64)g_cpuConstRegs[_Rs_].UL[0] * (u64)g_cpuConstRegs[_Rt_].UL[0]); u64 result = ((u64)g_cpuConstRegs[_Rs_].UL[0] * (u64)g_cpuConstRegs[_Rt_].UL[0]);
_deleteEEreg(XMMGPR_LO, 1); _deleteEEreg(XMMGPR_LO, 1);
_deleteEEreg(XMMGPR_HI, 1); _deleteEEreg(XMMGPR_HI, 1);
@ -704,7 +758,8 @@ void recMADDU1()
xADD(eax, (u32)result & 0xffffffff); xADD(eax, (u32)result & 0xffffffff);
xADC(ecx, (u32)(result >> 32)); xADC(ecx, (u32)(result >> 32));
xCDQ(); xCDQ();
if( _Rd_) { if (_Rd_)
{
_eeOnWriteReg(_Rd_, 1); _eeOnWriteReg(_Rd_, 1);
_deleteEEreg(_Rd_, 0); _deleteEEreg(_Rd_, 0);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
@ -726,15 +781,18 @@ void recMADDU1()
_deleteGPRtoXMMreg(_Rs_, 1); _deleteGPRtoXMMreg(_Rs_, 1);
_deleteGPRtoXMMreg(_Rt_, 1); _deleteGPRtoXMMreg(_Rt_, 1);
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]); xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]);
xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
} }
else if ( GPR_IS_CONST1(_Rt_) ) { else if (GPR_IS_CONST1(_Rt_))
{
xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]); xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]);
xUMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]); xUMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
} }
else { else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
} }
@ -743,7 +801,8 @@ void recMADDU1()
xADD(eax, ptr[&cpuRegs.LO.UL[2]]); xADD(eax, ptr[&cpuRegs.LO.UL[2]]);
xADC(ecx, ptr[&cpuRegs.HI.UL[2]]); xADC(ecx, ptr[&cpuRegs.HI.UL[2]]);
xCDQ(); xCDQ();
if( _Rd_ ) { if (_Rd_)
{
_eeOnWriteReg(_Rd_, 1); _eeOnWriteReg(_Rd_, 1);
_deleteEEreg(_Rd_, 0); _deleteEEreg(_Rd_, 0);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
@ -762,4 +821,6 @@ void recMADDU1()
#endif #endif
} } } } // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900

View File

@ -24,8 +24,7 @@ using namespace x86Emitter;
namespace R5900 { namespace R5900 {
namespace Dynarec { namespace Dynarec {
namespace OpcodeImpl namespace OpcodeImpl {
{
/********************************************************* /*********************************************************
* Shift arithmetic with constant shift * * Shift arithmetic with constant shift *
@ -93,7 +92,8 @@ void recSRLs_(int info, int sa)
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
if ( sa != 0 ) xSHR(eax, sa); if (sa != 0)
xSHR(eax, sa);
xCDQ(); xCDQ();
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
@ -118,7 +118,8 @@ void recSRAs_(int info, int sa)
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
if ( sa != 0 ) xSAR(eax, sa); if (sa != 0)
xSAR(eax, sa);
xCDQ(); xCDQ();
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
@ -148,7 +149,8 @@ void recDSLLs_(int info, int sa)
rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
rdreg = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE); rdreg = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE);
if( rtreg != rdreg ) xMOVDQA(xRegisterSSE(rdreg), xRegisterSSE(rtreg)); if (rtreg != rdreg)
xMOVDQA(xRegisterSSE(rdreg), xRegisterSSE(rtreg));
xPSLL.Q(xRegisterSSE(rdreg), sa); xPSLL.Q(xRegisterSSE(rdreg), sa);
// flush lower 64 bits (as upper is wrong) // flush lower 64 bits (as upper is wrong)
@ -182,7 +184,8 @@ void recDSRLs_(int info, int sa)
rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
rdreg = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE); rdreg = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE);
if( rtreg != rdreg ) xMOVDQA(xRegisterSSE(rdreg), xRegisterSSE(rtreg)); if (rtreg != rdreg)
xMOVDQA(xRegisterSSE(rdreg), xRegisterSSE(rtreg));
xPSRL.Q(xRegisterSSE(rdreg), sa); xPSRL.Q(xRegisterSSE(rdreg), sa);
// flush lower 64 bits (as upper is wrong) // flush lower 64 bits (as upper is wrong)
@ -216,9 +219,11 @@ void recDSRAs_(int info, int sa)
rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
rdreg = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE); rdreg = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE);
if( rtreg != rdreg ) xMOVDQA(xRegisterSSE(rdreg), xRegisterSSE(rtreg)); if (rtreg != rdreg)
xMOVDQA(xRegisterSSE(rdreg), xRegisterSSE(rtreg));
if ( sa ) { if (sa)
{
t0reg = _allocTempXMMreg(XMMT_INT, -1); t0reg = _allocTempXMMreg(XMMT_INT, -1);
@ -271,7 +276,6 @@ void recDSLL32s_(int info, int sa)
} }
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], 0); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], 0);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], eax);
} }
void recDSLL32_(int info) void recDSLL32_(int info)
@ -292,7 +296,8 @@ void recDSRL32s_(int info, int sa)
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]);
if ( sa != 0 ) xSHR(eax, sa ); if (sa != 0)
xSHR(eax, sa);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], 0); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], 0);
@ -317,11 +322,11 @@ void recDSRA32s_(int info, int sa)
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]);
xCDQ(); xCDQ();
if ( sa != 0 ) xSAR(eax, sa ); if (sa != 0)
xSAR(eax, sa);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx);
} }
void recDSRA32_(int info) void recDSRA32_(int info)
@ -354,7 +359,8 @@ void recSetShiftV(int info, int* rsreg, int* rtreg, int* rdreg, int* rstemp)
xMOVDZX(xRegisterSSE(*rstemp), eax); xMOVDZX(xRegisterSSE(*rstemp), eax);
*rsreg = *rstemp; *rsreg = *rstemp;
if( *rtreg != *rdreg ) xMOVDQA(xRegisterSSE(*rdreg), xRegisterSSE(*rtreg)); if (*rtreg != *rdreg)
xMOVDQA(xRegisterSSE(*rdreg), xRegisterSSE(*rtreg));
} }
void recSetConstShiftV(int info, int* rsreg, int* rdreg, int* rstemp) void recSetConstShiftV(int info, int* rsreg, int* rdreg, int* rstemp)
@ -499,8 +505,10 @@ void recDSLLV_const()
void recDSLLV_consts(int info) void recDSLLV_consts(int info)
{ {
int sa = g_cpuConstRegs[_Rs_].UL[0] & 0x3f; int sa = g_cpuConstRegs[_Rs_].UL[0] & 0x3f;
if( sa < 32 ) recDSLLs_(info, sa); if (sa < 32)
else recDSLL32s_(info, sa-32); recDSLLs_(info, sa);
else
recDSLL32s_(info, sa - 32);
} }
void recDSLLV_constt(int info) void recDSLLV_constt(int info)
@ -509,7 +517,8 @@ void recDSLLV_constt(int info)
recSetConstShiftV(info, &rsreg, &rdreg, &rstemp); recSetConstShiftV(info, &rsreg, &rdreg, &rstemp);
xMOVDQA(xRegisterSSE(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]); xMOVDQA(xRegisterSSE(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]);
xPSLL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg)); xPSLL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg));
if( rstemp != -1 ) _freeXMMreg(rstemp); if (rstemp != -1)
_freeXMMreg(rstemp);
// flush lower 64 bits (as upper is wrong) // flush lower 64 bits (as upper is wrong)
// The others possibility could be a read back of the upper 64 bits // The others possibility could be a read back of the upper 64 bits
@ -525,7 +534,8 @@ void recDSLLV_(int info)
recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp); recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp);
xPSLL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg)); xPSLL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg));
if( rstemp != -1 ) _freeXMMreg(rstemp); if (rstemp != -1)
_freeXMMreg(rstemp);
// flush lower 64 bits (as upper is wrong) // flush lower 64 bits (as upper is wrong)
// The others possibility could be a read back of the upper 64 bits // The others possibility could be a read back of the upper 64 bits
@ -546,8 +556,10 @@ void recDSRLV_const()
void recDSRLV_consts(int info) void recDSRLV_consts(int info)
{ {
int sa = g_cpuConstRegs[_Rs_].UL[0] & 0x3f; int sa = g_cpuConstRegs[_Rs_].UL[0] & 0x3f;
if( sa < 32 ) recDSRLs_(info, sa); if (sa < 32)
else recDSRL32s_(info, sa-32); recDSRLs_(info, sa);
else
recDSRL32s_(info, sa - 32);
} }
void recDSRLV_constt(int info) void recDSRLV_constt(int info)
@ -557,7 +569,8 @@ void recDSRLV_constt(int info)
xMOVDQA(xRegisterSSE(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]); xMOVDQA(xRegisterSSE(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]);
xPSRL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg)); xPSRL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg));
if( rstemp != -1 ) _freeXMMreg(rstemp); if (rstemp != -1)
_freeXMMreg(rstemp);
// flush lower 64 bits (as upper is wrong) // flush lower 64 bits (as upper is wrong)
// The others possibility could be a read back of the upper 64 bits // The others possibility could be a read back of the upper 64 bits
@ -573,7 +586,8 @@ void recDSRLV_(int info)
recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp); recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp);
xPSRL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg)); xPSRL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg));
if( rstemp != -1 ) _freeXMMreg(rstemp); if (rstemp != -1)
_freeXMMreg(rstemp);
// flush lower 64 bits (as upper is wrong) // flush lower 64 bits (as upper is wrong)
// The others possibility could be a read back of the upper 64 bits // The others possibility could be a read back of the upper 64 bits
@ -594,8 +608,10 @@ void recDSRAV_const()
void recDSRAV_consts(int info) void recDSRAV_consts(int info)
{ {
int sa = g_cpuConstRegs[_Rs_].UL[0] & 0x3f; int sa = g_cpuConstRegs[_Rs_].UL[0] & 0x3f;
if( sa < 32 ) recDSRAs_(info, sa); if (sa < 32)
else recDSRA32s_(info, sa-32); recDSRAs_(info, sa);
else
recDSRA32s_(info, sa - 32);
} }
void recDSRAV_constt(int info) void recDSRAV_constt(int info)
@ -633,7 +649,8 @@ void recDSRAV_constt(int info)
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
_freeXMMreg(t1reg); _freeXMMreg(t1reg);
if( rstemp != -1 ) _freeXMMreg(rstemp); if (rstemp != -1)
_freeXMMreg(rstemp);
} }
void recDSRAV_(int info) void recDSRAV_(int info)
@ -670,11 +687,14 @@ void recDSRAV_(int info)
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
_freeXMMreg(t1reg); _freeXMMreg(t1reg);
if( rstemp != -1 ) _freeXMMreg(rstemp); if (rstemp != -1)
_freeXMMreg(rstemp);
} }
EERECOMPILE_CODE0(DSRAV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); EERECOMPILE_CODE0(DSRAV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
#endif #endif
} } } } // namespace OpcodeImpl
} // namespace Dynarec
} // namespace R5900

View File

@ -40,8 +40,10 @@ void _eeOnWriteReg(int reg, int signext)
void _deleteEEreg(int reg, int flush) void _deleteEEreg(int reg, int flush)
{ {
if( !reg ) return; if (!reg)
if( flush && GPR_IS_CONST1(reg) ) { return;
if (flush && GPR_IS_CONST1(reg))
{
_flushConstReg(reg); _flushConstReg(reg);
} }
GPR_DEL_CONST(reg); GPR_DEL_CONST(reg);
@ -50,8 +52,10 @@ void _deleteEEreg(int reg, int flush)
void _flushEEreg(int reg) void _flushEEreg(int reg)
{ {
if (!reg) return; if (!reg)
if (GPR_IS_CONST1(reg)) { return;
if (GPR_IS_CONST1(reg))
{
_flushConstReg(reg); _flushConstReg(reg);
return; return;
} }
@ -63,7 +67,8 @@ int eeProcessHILO(int reg, int mode, int mmx)
{ {
// Fixme: MMX problem // Fixme: MMX problem
int usemmx = 0; int usemmx = 0;
if( (usemmx || _hasFreeXMMreg()) || !(g_pCurInstInfo->regs[reg]&EEINST_LASTUSE) ) { if ((usemmx || _hasFreeXMMreg()) || !(g_pCurInstInfo->regs[reg] & EEINST_LASTUSE))
{
return _allocGPRtoXMMreg(-1, reg, mode); return _allocGPRtoXMMreg(-1, reg, mode);
} }
@ -80,13 +85,17 @@ void eeRecompileCode0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNP
{ {
int mmreg1, mmreg2, mmreg3, mmtemp, moded; int mmreg1, mmreg2, mmreg3, mmtemp, moded;
if ( ! _Rd_ && (xmminfo&XMMINFO_WRITED) ) return; if (!_Rd_ && (xmminfo & XMMINFO_WRITED))
return;
if( GPR_IS_CONST2(_Rs_, _Rt_) ) { if (GPR_IS_CONST2(_Rs_, _Rt_))
if( xmminfo & XMMINFO_WRITED ) { {
if (xmminfo & XMMINFO_WRITED)
{
_deleteGPRtoXMMreg(_Rd_, 2); _deleteGPRtoXMMreg(_Rd_, 2);
} }
if( xmminfo&XMMINFO_WRITED ) GPR_SET_CONST(_Rd_); if (xmminfo & XMMINFO_WRITED)
GPR_SET_CONST(_Rd_);
constcode(); constcode();
return; return;
} }
@ -94,86 +103,113 @@ void eeRecompileCode0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNP
moded = MODE_WRITE | ((xmminfo & XMMINFO_READD) ? MODE_READ : 0); moded = MODE_WRITE | ((xmminfo & XMMINFO_READD) ? MODE_READ : 0);
// test if should write xmm, mirror to mmx code // test if should write xmm, mirror to mmx code
if( g_pCurInstInfo->info & EEINST_XMM ) { if (g_pCurInstInfo->info & EEINST_XMM)
{
pxAssert(0); pxAssert(0);
if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) _addNeededGPRtoXMMreg(XMMGPR_LO); if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO))
if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) _addNeededGPRtoXMMreg(XMMGPR_HI); _addNeededGPRtoXMMreg(XMMGPR_LO);
if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI))
_addNeededGPRtoXMMreg(XMMGPR_HI);
_addNeededGPRtoXMMreg(_Rs_); _addNeededGPRtoXMMreg(_Rs_);
_addNeededGPRtoXMMreg(_Rt_); _addNeededGPRtoXMMreg(_Rt_);
if( GPR_IS_CONST1(_Rs_) || GPR_IS_CONST1(_Rt_) ) { if (GPR_IS_CONST1(_Rs_) || GPR_IS_CONST1(_Rt_))
{
u32 creg = GPR_IS_CONST1(_Rs_) ? _Rs_ : _Rt_; u32 creg = GPR_IS_CONST1(_Rs_) ? _Rs_ : _Rt_;
int vreg = creg == _Rs_ ? _Rt_ : _Rs_; int vreg = creg == _Rs_ ? _Rt_ : _Rs_;
// if(g_pCurInstInfo->regs[vreg]&EEINST_XMM) { // if (g_pCurInstInfo->regs[vreg] & EEINST_XMM)
// {
// mmreg1 = _allocGPRtoXMMreg(-1, vreg, MODE_READ); // mmreg1 = _allocGPRtoXMMreg(-1, vreg, MODE_READ);
// _addNeededGPRtoXMMreg(vreg); // _addNeededGPRtoXMMreg(vreg);
// } // }
mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, vreg, MODE_READ); mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, vreg, MODE_READ);
if( mmreg1 >= 0 ) { if (mmreg1 >= 0)
{
int info = PROCESS_EE_XMM; int info = PROCESS_EE_XMM;
if( GPR_IS_CONST1(_Rs_) ) info |= PROCESS_EE_SETMODET(mmreg1); if (GPR_IS_CONST1(_Rs_))
else info |= PROCESS_EE_SETMODES(mmreg1); info |= PROCESS_EE_SETMODET(mmreg1);
else
info |= PROCESS_EE_SETMODES(mmreg1);
if( xmminfo & XMMINFO_WRITED ) { if (xmminfo & XMMINFO_WRITED)
{
_addNeededGPRtoXMMreg(_Rd_); _addNeededGPRtoXMMreg(_Rd_);
mmreg3 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE); mmreg3 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE);
if( !(xmminfo&XMMINFO_READD) && mmreg3 < 0 && ((g_pCurInstInfo->regs[vreg] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(vreg)) ) { if (!(xmminfo & XMMINFO_READD) && mmreg3 < 0 && ((g_pCurInstInfo->regs[vreg] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(vreg)))
{
_freeXMMreg(mmreg1); _freeXMMreg(mmreg1);
if( GPR_IS_CONST1(_Rs_) ) info &= ~PROCESS_EE_MODEWRITET; if (GPR_IS_CONST1(_Rs_))
else info &= ~PROCESS_EE_MODEWRITES; info &= ~PROCESS_EE_MODEWRITET;
else
info &= ~PROCESS_EE_MODEWRITES;
xmmregs[mmreg1].inuse = 1; xmmregs[mmreg1].inuse = 1;
xmmregs[mmreg1].reg = _Rd_; xmmregs[mmreg1].reg = _Rd_;
xmmregs[mmreg1].mode = moded; xmmregs[mmreg1].mode = moded;
mmreg3 = mmreg1; mmreg3 = mmreg1;
} }
else if( mmreg3 < 0 ) mmreg3 = _allocGPRtoXMMreg(-1, _Rd_, moded); else if (mmreg3 < 0)
mmreg3 = _allocGPRtoXMMreg(-1, _Rd_, moded);
info |= PROCESS_EE_SET_D(mmreg3); info |= PROCESS_EE_SET_D(mmreg3);
} }
if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO))
{
mmtemp = eeProcessHILO(XMMGPR_LO, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0); mmtemp = eeProcessHILO(XMMGPR_LO, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0);
if( mmtemp >= 0 ) info |= PROCESS_EE_SET_LO(mmtemp); if (mmtemp >= 0)
info |= PROCESS_EE_SET_LO(mmtemp);
} }
if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI))
{
mmtemp = eeProcessHILO(XMMGPR_HI, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0); mmtemp = eeProcessHILO(XMMGPR_HI, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0);
if( mmtemp >= 0 ) info |= PROCESS_EE_SET_HI(mmtemp); if (mmtemp >= 0)
info |= PROCESS_EE_SET_HI(mmtemp);
} }
if( creg == _Rs_ ) constscode(info|PROCESS_EE_SET_T(mmreg1)); if (creg == _Rs_)
else consttcode(info|PROCESS_EE_SET_S(mmreg1)); constscode(info | PROCESS_EE_SET_T(mmreg1));
else
consttcode(info | PROCESS_EE_SET_S(mmreg1));
_clearNeededXMMregs(); _clearNeededXMMregs();
if( xmminfo & XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); if (xmminfo & XMMINFO_WRITED)
GPR_DEL_CONST(_Rd_);
return; return;
} }
} }
else { else
{
// no const regs // no const regs
mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rs_, MODE_READ); mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rs_, MODE_READ);
mmreg2 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rt_, MODE_READ); mmreg2 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rt_, MODE_READ);
if( mmreg1 >= 0 || mmreg2 >= 0 ) { if (mmreg1 >= 0 || mmreg2 >= 0)
{
int info = PROCESS_EE_XMM; int info = PROCESS_EE_XMM;
// do it all in xmm // do it all in xmm
if( mmreg1 < 0 ) mmreg1 = _allocGPRtoXMMreg(-1, _Rs_, MODE_READ); if (mmreg1 < 0)
if( mmreg2 < 0 ) mmreg2 = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); mmreg1 = _allocGPRtoXMMreg(-1, _Rs_, MODE_READ);
if (mmreg2 < 0)
mmreg2 = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
info |= PROCESS_EE_SETMODES(mmreg1) | PROCESS_EE_SETMODET(mmreg2); info |= PROCESS_EE_SETMODES(mmreg1) | PROCESS_EE_SETMODET(mmreg2);
if( xmminfo & XMMINFO_WRITED ) { if (xmminfo & XMMINFO_WRITED)
{
// check for last used, if so don't alloc a new XMM reg // check for last used, if so don't alloc a new XMM reg
_addNeededGPRtoXMMreg(_Rd_); _addNeededGPRtoXMMreg(_Rd_);
mmreg3 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, moded); mmreg3 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, moded);
if( mmreg3 < 0 ) { if (mmreg3 < 0)
if( !(xmminfo&XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_)) ) { {
if (!(xmminfo & XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_)))
{
_freeXMMreg(mmreg2); _freeXMMreg(mmreg2);
info &= ~PROCESS_EE_MODEWRITET; info &= ~PROCESS_EE_MODEWRITET;
xmmregs[mmreg2].inuse = 1; xmmregs[mmreg2].inuse = 1;
@ -181,7 +217,8 @@ void eeRecompileCode0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNP
xmmregs[mmreg2].mode = moded; xmmregs[mmreg2].mode = moded;
mmreg3 = mmreg2; mmreg3 = mmreg2;
} }
else if( !(xmminfo&XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_)) ) { else if (!(xmminfo & XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_)))
{
_freeXMMreg(mmreg1); _freeXMMreg(mmreg1);
info &= ~PROCESS_EE_MODEWRITES; info &= ~PROCESS_EE_MODEWRITES;
xmmregs[mmreg1].inuse = 1; xmmregs[mmreg1].inuse = 1;
@ -189,24 +226,30 @@ void eeRecompileCode0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNP
xmmregs[mmreg1].mode = moded; xmmregs[mmreg1].mode = moded;
mmreg3 = mmreg1; mmreg3 = mmreg1;
} }
else mmreg3 = _allocGPRtoXMMreg(-1, _Rd_, moded); else
mmreg3 = _allocGPRtoXMMreg(-1, _Rd_, moded);
} }
info |= PROCESS_EE_SET_D(mmreg3); info |= PROCESS_EE_SET_D(mmreg3);
} }
if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO))
{
mmtemp = eeProcessHILO(XMMGPR_LO, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0); mmtemp = eeProcessHILO(XMMGPR_LO, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0);
if( mmtemp >= 0 ) info |= PROCESS_EE_SET_LO(mmtemp); if (mmtemp >= 0)
info |= PROCESS_EE_SET_LO(mmtemp);
} }
if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI))
{
mmtemp = eeProcessHILO(XMMGPR_HI, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0); mmtemp = eeProcessHILO(XMMGPR_HI, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0);
if( mmtemp >= 0 ) info |= PROCESS_EE_SET_HI(mmtemp); if (mmtemp >= 0)
info |= PROCESS_EE_SET_HI(mmtemp);
} }
noconstcode(info | PROCESS_EE_SET_S(mmreg1) | PROCESS_EE_SET_T(mmreg2)); noconstcode(info | PROCESS_EE_SET_S(mmreg1) | PROCESS_EE_SET_T(mmreg2));
_clearNeededXMMregs(); _clearNeededXMMregs();
if( xmminfo & XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); if (xmminfo & XMMINFO_WRITED)
GPR_DEL_CONST(_Rd_);
return; return;
} }
} }
@ -221,36 +264,45 @@ void eeRecompileCode0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNP
_deleteGPRtoXMMreg(_Rd_, (xmminfo & XMMINFO_READD) ? 0 : 2); _deleteGPRtoXMMreg(_Rd_, (xmminfo & XMMINFO_READD) ? 0 : 2);
// don't delete, fn will take care of them // don't delete, fn will take care of them
// if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { // if (xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO))
// {
// _deleteGPRtoXMMreg(XMMGPR_LO, (xmminfo & XMMINFO_READLO) ? 1 : 0); // _deleteGPRtoXMMreg(XMMGPR_LO, (xmminfo & XMMINFO_READLO) ? 1 : 0);
// } // }
// if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { // if (xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI))
// {
// _deleteGPRtoXMMreg(XMMGPR_HI, (xmminfo & XMMINFO_READHI) ? 1 : 0); // _deleteGPRtoXMMreg(XMMGPR_HI, (xmminfo & XMMINFO_READHI) ? 1 : 0);
// } // }
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
constscode(0); constscode(0);
if( xmminfo&XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); if (xmminfo & XMMINFO_WRITED)
GPR_DEL_CONST(_Rd_);
return; return;
} }
if( GPR_IS_CONST1(_Rt_) ) { if (GPR_IS_CONST1(_Rt_))
{
consttcode(0); consttcode(0);
if( xmminfo&XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); if (xmminfo & XMMINFO_WRITED)
GPR_DEL_CONST(_Rd_);
return; return;
} }
noconstcode(0); noconstcode(0);
if( xmminfo&XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); if (xmminfo & XMMINFO_WRITED)
GPR_DEL_CONST(_Rd_);
} }
// rt = rs op imm16 // rt = rs op imm16
void eeRecompileCode1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) void eeRecompileCode1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
{ {
int mmreg1, mmreg2; int mmreg1, mmreg2;
if ( ! _Rt_ ) return; if (!_Rt_)
return;
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
_deleteGPRtoXMMreg(_Rt_, 2); _deleteGPRtoXMMreg(_Rt_, 2);
GPR_SET_CONST(_Rt_); GPR_SET_CONST(_Rt_);
constcode(); constcode();
@ -258,21 +310,25 @@ void eeRecompileCode1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
} }
// test if should write xmm, mirror to mmx code // test if should write xmm, mirror to mmx code
if( g_pCurInstInfo->info & EEINST_XMM ) { if (g_pCurInstInfo->info & EEINST_XMM)
{
pxAssert(0); pxAssert(0);
// no const regs // no const regs
mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rs_, MODE_READ); mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rs_, MODE_READ);
if( mmreg1 >= 0 ) { if (mmreg1 >= 0)
{
int info = PROCESS_EE_XMM | PROCESS_EE_SETMODES(mmreg1); int info = PROCESS_EE_XMM | PROCESS_EE_SETMODES(mmreg1);
// check for last used, if so don't alloc a new XMM reg // check for last used, if so don't alloc a new XMM reg
_addNeededGPRtoXMMreg(_Rt_); _addNeededGPRtoXMMreg(_Rt_);
mmreg2 = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_WRITE); mmreg2 = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_WRITE);
if( mmreg2 < 0 ) { if (mmreg2 < 0)
if( (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_) ) { {
if ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_))
{
_freeXMMreg(mmreg1); _freeXMMreg(mmreg1);
info &= ~PROCESS_EE_MODEWRITES; info &= ~PROCESS_EE_MODEWRITES;
xmmregs[mmreg1].inuse = 1; xmmregs[mmreg1].inuse = 1;
@ -280,7 +336,8 @@ void eeRecompileCode1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
xmmregs[mmreg1].mode = MODE_WRITE | MODE_READ; xmmregs[mmreg1].mode = MODE_WRITE | MODE_READ;
mmreg2 = mmreg1; mmreg2 = mmreg1;
} }
else mmreg2 = _allocGPRtoXMMreg(-1, _Rt_, MODE_WRITE); else
mmreg2 = _allocGPRtoXMMreg(-1, _Rt_, MODE_WRITE);
} }
noconstcode(info | PROCESS_EE_SET_S(mmreg1) | PROCESS_EE_SET_T(mmreg2)); noconstcode(info | PROCESS_EE_SET_S(mmreg1) | PROCESS_EE_SET_T(mmreg2));
@ -304,9 +361,11 @@ void eeRecompileCode1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
void eeRecompileCode2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) void eeRecompileCode2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
{ {
int mmreg1, mmreg2; int mmreg1, mmreg2;
if ( ! _Rd_ ) return; if (!_Rd_)
return;
if( GPR_IS_CONST1(_Rt_) ) { if (GPR_IS_CONST1(_Rt_))
{
_deleteGPRtoXMMreg(_Rd_, 2); _deleteGPRtoXMMreg(_Rd_, 2);
GPR_SET_CONST(_Rd_); GPR_SET_CONST(_Rd_);
constcode(); constcode();
@ -314,21 +373,25 @@ void eeRecompileCode2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
} }
// test if should write xmm, mirror to mmx code // test if should write xmm, mirror to mmx code
if( g_pCurInstInfo->info & EEINST_XMM ) { if (g_pCurInstInfo->info & EEINST_XMM)
{
pxAssert(0); pxAssert(0);
// no const regs // no const regs
mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rt_, MODE_READ); mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rt_, MODE_READ);
if( mmreg1 >= 0 ) { if (mmreg1 >= 0)
{
int info = PROCESS_EE_XMM | PROCESS_EE_SETMODET(mmreg1); int info = PROCESS_EE_XMM | PROCESS_EE_SETMODET(mmreg1);
// check for last used, if so don't alloc a new XMM reg // check for last used, if so don't alloc a new XMM reg
_addNeededGPRtoXMMreg(_Rd_); _addNeededGPRtoXMMreg(_Rd_);
mmreg2 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE); mmreg2 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE);
if( mmreg2 < 0 ) { if (mmreg2 < 0)
if( (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVE64(_Rt_) ) { {
if ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVE64(_Rt_))
{
_freeXMMreg(mmreg1); _freeXMMreg(mmreg1);
info &= ~PROCESS_EE_MODEWRITET; info &= ~PROCESS_EE_MODEWRITET;
xmmregs[mmreg1].inuse = 1; xmmregs[mmreg1].inuse = 1;
@ -336,7 +399,8 @@ void eeRecompileCode2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
xmmregs[mmreg1].mode = MODE_WRITE | MODE_READ; xmmregs[mmreg1].mode = MODE_WRITE | MODE_READ;
mmreg2 = mmreg1; mmreg2 = mmreg1;
} }
else mmreg2 = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE); else
mmreg2 = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE);
} }
noconstcode(info | PROCESS_EE_SET_T(mmreg1) | PROCESS_EE_SET_D(mmreg2)); noconstcode(info | PROCESS_EE_SET_T(mmreg1) | PROCESS_EE_SET_D(mmreg2));
@ -365,17 +429,20 @@ void eeRecompileCode3(R5900FNPTR constcode, R5900FNPTR_INFO multicode)
_deleteEEreg(_Rs_, 0); _deleteEEreg(_Rs_, 0);
_deleteEEreg(_Rt_, 1); _deleteEEreg(_Rt_, 1);
if( GPR_IS_CONST2(_Rs_, _Rt_) ) { if (GPR_IS_CONST2(_Rs_, _Rt_))
{
constcode(); constcode();
return; return;
} }
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
//multicode(PROCESS_EE_CONSTT); //multicode(PROCESS_EE_CONSTT);
return; return;
} }
if( GPR_IS_CONST1(_Rt_) ) { if (GPR_IS_CONST1(_Rt_))
{
//multicode(PROCESS_EE_CONSTT); //multicode(PROCESS_EE_CONSTT);
return; return;
} }
@ -388,7 +455,8 @@ void eeRecompileCode3(R5900FNPTR constcode, R5900FNPTR_INFO multicode)
// rd = rs op rt // rd = rs op rt
void eeRecompileCodeConst0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode) void eeRecompileCodeConst0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode)
{ {
if ( ! _Rd_ ) return; if (!_Rd_)
return;
// for now, don't support xmm // for now, don't support xmm
@ -396,19 +464,22 @@ void eeRecompileCodeConst0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R59
_deleteGPRtoXMMreg(_Rt_, 1); _deleteGPRtoXMMreg(_Rt_, 1);
_deleteGPRtoXMMreg(_Rd_, 0); _deleteGPRtoXMMreg(_Rd_, 0);
if( GPR_IS_CONST2(_Rs_, _Rt_) ) { if (GPR_IS_CONST2(_Rs_, _Rt_))
{
GPR_SET_CONST(_Rd_); GPR_SET_CONST(_Rd_);
constcode(); constcode();
return; return;
} }
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
constscode(0); constscode(0);
GPR_DEL_CONST(_Rd_); GPR_DEL_CONST(_Rd_);
return; return;
} }
if( GPR_IS_CONST1(_Rt_) ) { if (GPR_IS_CONST1(_Rt_))
{
consttcode(0); consttcode(0);
GPR_DEL_CONST(_Rd_); GPR_DEL_CONST(_Rd_);
return; return;
@ -429,7 +500,8 @@ void eeRecompileCodeConst1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
_deleteGPRtoXMMreg(_Rs_, 1); _deleteGPRtoXMMreg(_Rs_, 1);
_deleteGPRtoXMMreg(_Rt_, 0); _deleteGPRtoXMMreg(_Rt_, 0);
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
GPR_SET_CONST(_Rt_); GPR_SET_CONST(_Rt_);
constcode(); constcode();
return; return;
@ -442,14 +514,16 @@ void eeRecompileCodeConst1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
// rd = rt op sa // rd = rt op sa
void eeRecompileCodeConst2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) void eeRecompileCodeConst2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
{ {
if ( ! _Rd_ ) return; if (!_Rd_)
return;
// for now, don't support xmm // for now, don't support xmm
_deleteGPRtoXMMreg(_Rt_, 1); _deleteGPRtoXMMreg(_Rt_, 1);
_deleteGPRtoXMMreg(_Rd_, 0); _deleteGPRtoXMMreg(_Rd_, 0);
if( GPR_IS_CONST1(_Rt_) ) { if (GPR_IS_CONST1(_Rt_))
{
GPR_SET_CONST(_Rd_); GPR_SET_CONST(_Rd_);
constcode(); constcode();
return; return;
@ -465,33 +539,41 @@ void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode
pxFail("Unfinished code reached."); pxFail("Unfinished code reached.");
// for now, don't support xmm // for now, don't support xmm
if( MULT ) { if (MULT)
{
_deleteGPRtoXMMreg(_Rd_, 0); _deleteGPRtoXMMreg(_Rd_, 0);
} }
_deleteGPRtoXMMreg(_Rs_, 1); _deleteGPRtoXMMreg(_Rs_, 1);
_deleteGPRtoXMMreg(_Rt_, 1); _deleteGPRtoXMMreg(_Rt_, 1);
if( GPR_IS_CONST2(_Rs_, _Rt_) ) { if (GPR_IS_CONST2(_Rs_, _Rt_))
if( MULT && _Rd_ ) GPR_SET_CONST(_Rd_); {
if (MULT && _Rd_)
GPR_SET_CONST(_Rd_);
constcode(); constcode();
return; return;
} }
if( GPR_IS_CONST1(_Rs_) ) { if (GPR_IS_CONST1(_Rs_))
{
//multicode(PROCESS_EE_CONSTS); //multicode(PROCESS_EE_CONSTS);
if( MULT && _Rd_ ) GPR_DEL_CONST(_Rd_); if (MULT && _Rd_)
GPR_DEL_CONST(_Rd_);
return; return;
} }
if( GPR_IS_CONST1(_Rt_) ) { if (GPR_IS_CONST1(_Rt_))
{
//multicode(PROCESS_EE_CONSTT); //multicode(PROCESS_EE_CONSTT);
if( MULT && _Rd_ ) GPR_DEL_CONST(_Rd_); if (MULT && _Rd_)
GPR_DEL_CONST(_Rd_);
return; return;
} }
multicode(0); multicode(0);
if( MULT && _Rd_ ) GPR_DEL_CONST(_Rd_); if (MULT && _Rd_)
GPR_DEL_CONST(_Rd_);
} }
// EE XMM allocation code // EE XMM allocation code
@ -500,76 +582,95 @@ int eeRecompileCodeXMM(int xmminfo)
int info = PROCESS_EE_XMM; int info = PROCESS_EE_XMM;
// flush consts // flush consts
if( xmminfo & XMMINFO_READT ) { if (xmminfo & XMMINFO_READT)
if( GPR_IS_CONST1( _Rt_ ) && !(g_cpuFlushedConstReg&(1<<_Rt_)) ) { {
if (GPR_IS_CONST1(_Rt_) && !(g_cpuFlushedConstReg & (1 << _Rt_)))
{
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], g_cpuConstRegs[_Rt_].UL[0]); xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], g_cpuConstRegs[_Rt_].UL[0]);
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], g_cpuConstRegs[_Rt_].UL[1]); xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], g_cpuConstRegs[_Rt_].UL[1]);
g_cpuFlushedConstReg |= (1 << _Rt_); g_cpuFlushedConstReg |= (1 << _Rt_);
} }
} }
if( xmminfo & XMMINFO_READS) { if (xmminfo & XMMINFO_READS)
if( GPR_IS_CONST1( _Rs_ ) && !(g_cpuFlushedConstReg&(1<<_Rs_)) ) { {
if (GPR_IS_CONST1(_Rs_) && !(g_cpuFlushedConstReg & (1 << _Rs_)))
{
xMOV(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], g_cpuConstRegs[_Rs_].UL[0]); xMOV(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], g_cpuConstRegs[_Rs_].UL[0]);
xMOV(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], g_cpuConstRegs[_Rs_].UL[1]); xMOV(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], g_cpuConstRegs[_Rs_].UL[1]);
g_cpuFlushedConstReg |= (1 << _Rs_); g_cpuFlushedConstReg |= (1 << _Rs_);
} }
} }
if( xmminfo & XMMINFO_WRITED ) { if (xmminfo & XMMINFO_WRITED)
{
GPR_DEL_CONST(_Rd_); GPR_DEL_CONST(_Rd_);
} }
// add needed // add needed
if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO))
{
_addNeededGPRtoXMMreg(XMMGPR_LO); _addNeededGPRtoXMMreg(XMMGPR_LO);
} }
if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI))
{
_addNeededGPRtoXMMreg(XMMGPR_HI); _addNeededGPRtoXMMreg(XMMGPR_HI);
} }
if( xmminfo & XMMINFO_READS) _addNeededGPRtoXMMreg(_Rs_); if (xmminfo & XMMINFO_READS)
if( xmminfo & XMMINFO_READT) _addNeededGPRtoXMMreg(_Rt_); _addNeededGPRtoXMMreg(_Rs_);
if( xmminfo & XMMINFO_WRITED ) _addNeededGPRtoXMMreg(_Rd_); if (xmminfo & XMMINFO_READT)
_addNeededGPRtoXMMreg(_Rt_);
if (xmminfo & XMMINFO_WRITED)
_addNeededGPRtoXMMreg(_Rd_);
// allocate // allocate
if( xmminfo & XMMINFO_READS) { if (xmminfo & XMMINFO_READS)
{
int reg = _allocGPRtoXMMreg(-1, _Rs_, MODE_READ); int reg = _allocGPRtoXMMreg(-1, _Rs_, MODE_READ);
info |= PROCESS_EE_SET_S(reg) | PROCESS_EE_SETMODES(reg); info |= PROCESS_EE_SET_S(reg) | PROCESS_EE_SETMODES(reg);
} }
if( xmminfo & XMMINFO_READT) { if (xmminfo & XMMINFO_READT)
{
int reg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); int reg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
info |= PROCESS_EE_SET_T(reg) | PROCESS_EE_SETMODET(reg); info |= PROCESS_EE_SET_T(reg) | PROCESS_EE_SETMODET(reg);
} }
if( xmminfo & XMMINFO_WRITED ) { if (xmminfo & XMMINFO_WRITED)
{
int readd = MODE_WRITE | ((xmminfo & XMMINFO_READD) ? ((xmminfo & XMMINFO_READD_LO) ? (MODE_READ | MODE_READHALF) : MODE_READ) : 0); int readd = MODE_WRITE | ((xmminfo & XMMINFO_READD) ? ((xmminfo & XMMINFO_READD_LO) ? (MODE_READ | MODE_READHALF) : MODE_READ) : 0);
int regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, readd); int regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, readd);
if( regd < 0 ) { if (regd < 0)
if( !(xmminfo&XMMINFO_READD) && (xmminfo & XMMINFO_READT) && (_Rt_ == 0 || (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_)) ) { {
if (!(xmminfo & XMMINFO_READD) && (xmminfo & XMMINFO_READT) && (_Rt_ == 0 || (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_)))
{
_freeXMMreg(EEREC_T); _freeXMMreg(EEREC_T);
xmmregs[EEREC_T].inuse = 1; xmmregs[EEREC_T].inuse = 1;
xmmregs[EEREC_T].reg = _Rd_; xmmregs[EEREC_T].reg = _Rd_;
xmmregs[EEREC_T].mode = readd; xmmregs[EEREC_T].mode = readd;
regd = EEREC_T; regd = EEREC_T;
} }
else if( !(xmminfo&XMMINFO_READD) && (xmminfo & XMMINFO_READS) && (_Rs_ == 0 || (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_)) ) { else if (!(xmminfo & XMMINFO_READD) && (xmminfo & XMMINFO_READS) && (_Rs_ == 0 || (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_)))
{
_freeXMMreg(EEREC_S); _freeXMMreg(EEREC_S);
xmmregs[EEREC_S].inuse = 1; xmmregs[EEREC_S].inuse = 1;
xmmregs[EEREC_S].reg = _Rd_; xmmregs[EEREC_S].reg = _Rd_;
xmmregs[EEREC_S].mode = readd; xmmregs[EEREC_S].mode = readd;
regd = EEREC_S; regd = EEREC_S;
} }
else regd = _allocGPRtoXMMreg(-1, _Rd_, readd); else
regd = _allocGPRtoXMMreg(-1, _Rd_, readd);
} }
info |= PROCESS_EE_SET_D(regd); info |= PROCESS_EE_SET_D(regd);
} }
if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO))
{
info |= PROCESS_EE_SET_LO(_allocGPRtoXMMreg(-1, XMMGPR_LO, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0))); info |= PROCESS_EE_SET_LO(_allocGPRtoXMMreg(-1, XMMGPR_LO, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0)));
info |= PROCESS_EE_LO; info |= PROCESS_EE_LO;
} }
if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI))
{
info |= PROCESS_EE_SET_HI(_allocGPRtoXMMreg(-1, XMMGPR_HI, ((xmminfo & XMMINFO_READHI) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITEHI) ? MODE_WRITE : 0))); info |= PROCESS_EE_SET_HI(_allocGPRtoXMMreg(-1, XMMGPR_HI, ((xmminfo & XMMINFO_READHI) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITEHI) ? MODE_WRITE : 0)));
info |= PROCESS_EE_HI; info |= PROCESS_EE_HI;
} }
@ -590,47 +691,66 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo
int mmregs = -1, mmregt = -1, mmregd = -1, mmregacc = -1; int mmregs = -1, mmregt = -1, mmregd = -1, mmregacc = -1;
int info = PROCESS_EE_XMM; int info = PROCESS_EE_XMM;
if( xmminfo & XMMINFO_READS ) _addNeededFPtoXMMreg(_Fs_); if (xmminfo & XMMINFO_READS)
if( xmminfo & XMMINFO_READT ) _addNeededFPtoXMMreg(_Ft_); _addNeededFPtoXMMreg(_Fs_);
if( xmminfo & (XMMINFO_WRITED|XMMINFO_READD) ) _addNeededFPtoXMMreg(_Fd_); if (xmminfo & XMMINFO_READT)
if( xmminfo & (XMMINFO_WRITEACC|XMMINFO_READACC) ) _addNeededFPACCtoXMMreg(); _addNeededFPtoXMMreg(_Ft_);
if (xmminfo & (XMMINFO_WRITED | XMMINFO_READD))
_addNeededFPtoXMMreg(_Fd_);
if (xmminfo & (XMMINFO_WRITEACC | XMMINFO_READACC))
_addNeededFPACCtoXMMreg();
if( xmminfo & XMMINFO_READT ) { if (xmminfo & XMMINFO_READT)
if( g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE ) mmregt = _checkXMMreg(XMMTYPE_FPREG, _Ft_, MODE_READ); {
else mmregt = _allocFPtoXMMreg(-1, _Ft_, MODE_READ); if (g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE)
mmregt = _checkXMMreg(XMMTYPE_FPREG, _Ft_, MODE_READ);
else
mmregt = _allocFPtoXMMreg(-1, _Ft_, MODE_READ);
} }
if( xmminfo & XMMINFO_READS ) { if (xmminfo & XMMINFO_READS)
if( ( !(xmminfo & XMMINFO_READT) || (mmregt >= 0) ) && (g_pCurInstInfo->fpuregs[_Fs_] & EEINST_LASTUSE) ) { {
if ((!(xmminfo & XMMINFO_READT) || (mmregt >= 0)) && (g_pCurInstInfo->fpuregs[_Fs_] & EEINST_LASTUSE))
{
mmregs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); mmregs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ);
} }
else mmregs = _allocFPtoXMMreg(-1, _Fs_, MODE_READ); else
mmregs = _allocFPtoXMMreg(-1, _Fs_, MODE_READ);
} }
if( mmregs >= 0 ) info |= PROCESS_EE_SETMODES_XMM(mmregs); if (mmregs >= 0)
if( mmregt >= 0 ) info |= PROCESS_EE_SETMODET_XMM(mmregt); info |= PROCESS_EE_SETMODES_XMM(mmregs);
if (mmregt >= 0)
info |= PROCESS_EE_SETMODET_XMM(mmregt);
if( xmminfo & XMMINFO_READD ) { if (xmminfo & XMMINFO_READD)
{
pxAssert(xmminfo & XMMINFO_WRITED); pxAssert(xmminfo & XMMINFO_WRITED);
mmregd = _allocFPtoXMMreg(-1, _Fd_, MODE_READ); mmregd = _allocFPtoXMMreg(-1, _Fd_, MODE_READ);
} }
if( xmminfo & XMMINFO_READACC ) { if (xmminfo & XMMINFO_READACC)
{
if (!(xmminfo & XMMINFO_WRITEACC) && (g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE)) if (!(xmminfo & XMMINFO_WRITEACC) && (g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE))
mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, MODE_READ); mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, MODE_READ);
else mmregacc = _allocFPACCtoXMMreg(-1, MODE_READ); else
mmregacc = _allocFPACCtoXMMreg(-1, MODE_READ);
} }
if( xmminfo & XMMINFO_WRITEACC ) { if (xmminfo & XMMINFO_WRITEACC)
{
// check for last used, if so don't alloc a new XMM reg // check for last used, if so don't alloc a new XMM reg
int readacc = MODE_WRITE | ((xmminfo & XMMINFO_READACC) ? MODE_READ : 0); int readacc = MODE_WRITE | ((xmminfo & XMMINFO_READACC) ? MODE_READ : 0);
mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, readacc); mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, readacc);
if( mmregacc < 0 ) { if (mmregacc < 0)
if( (xmminfo&XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)) ) { {
if( FPUINST_ISLIVE(_Ft_) ) { if ((xmminfo & XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)))
{
if (FPUINST_ISLIVE(_Ft_))
{
_freeXMMreg(mmregt); _freeXMMreg(mmregt);
info &= ~PROCESS_EE_MODEWRITET; info &= ~PROCESS_EE_MODEWRITET;
} }
@ -640,8 +760,10 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo
xmmregs[mmregt].type = XMMTYPE_FPACC; xmmregs[mmregt].type = XMMTYPE_FPACC;
mmregacc = mmregt; mmregacc = mmregt;
} }
else if( (xmminfo&XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)) ) { else if ((xmminfo & XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)))
if( FPUINST_ISLIVE(_Fs_) ) { {
if (FPUINST_ISLIVE(_Fs_))
{
_freeXMMreg(mmregs); _freeXMMreg(mmregs);
info &= ~PROCESS_EE_MODEWRITES; info &= ~PROCESS_EE_MODEWRITES;
} }
@ -651,20 +773,27 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo
xmmregs[mmregs].type = XMMTYPE_FPACC; xmmregs[mmregs].type = XMMTYPE_FPACC;
mmregacc = mmregs; mmregacc = mmregs;
} }
else mmregacc = _allocFPACCtoXMMreg(-1, readacc); else
mmregacc = _allocFPACCtoXMMreg(-1, readacc);
} }
xmmregs[mmregacc].mode |= MODE_WRITE; xmmregs[mmregacc].mode |= MODE_WRITE;
} }
else if( xmminfo & XMMINFO_WRITED ) { else if (xmminfo & XMMINFO_WRITED)
{
// check for last used, if so don't alloc a new XMM reg // check for last used, if so don't alloc a new XMM reg
int readd = MODE_WRITE | ((xmminfo & XMMINFO_READD) ? MODE_READ : 0); int readd = MODE_WRITE | ((xmminfo & XMMINFO_READD) ? MODE_READ : 0);
if( xmminfo&XMMINFO_READD ) mmregd = _allocFPtoXMMreg(-1, _Fd_, readd); if (xmminfo & XMMINFO_READD)
else mmregd = _checkXMMreg(XMMTYPE_FPREG, _Fd_, readd); mmregd = _allocFPtoXMMreg(-1, _Fd_, readd);
else
mmregd = _checkXMMreg(XMMTYPE_FPREG, _Fd_, readd);
if( mmregd < 0 ) { if (mmregd < 0)
if( (xmminfo&XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)) ) { {
if( FPUINST_ISLIVE(_Ft_) ) { if ((xmminfo & XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)))
{
if (FPUINST_ISLIVE(_Ft_))
{
_freeXMMreg(mmregt); _freeXMMreg(mmregt);
info &= ~PROCESS_EE_MODEWRITET; info &= ~PROCESS_EE_MODEWRITET;
} }
@ -673,8 +802,10 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo
xmmregs[mmregt].mode = readd; xmmregs[mmregt].mode = readd;
mmregd = mmregt; mmregd = mmregt;
} }
else if( (xmminfo&XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)) ) { else if ((xmminfo & XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)))
if( FPUINST_ISLIVE(_Fs_) ) { {
if (FPUINST_ISLIVE(_Fs_))
{
_freeXMMreg(mmregs); _freeXMMreg(mmregs);
info &= ~PROCESS_EE_MODEWRITES; info &= ~PROCESS_EE_MODEWRITES;
} }
@ -683,7 +814,8 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo
xmmregs[mmregs].mode = readd; xmmregs[mmregs].mode = readd;
mmregd = mmregs; mmregd = mmregs;
} }
else if( (xmminfo&XMMINFO_READACC) && mmregacc >= 0 && (FPUINST_LASTUSE(XMMFPU_ACC) || !FPUINST_ISLIVE(XMMFPU_ACC)) ) { else if ((xmminfo & XMMINFO_READACC) && mmregacc >= 0 && (FPUINST_LASTUSE(XMMFPU_ACC) || !FPUINST_ISLIVE(XMMFPU_ACC)))
{
if (FPUINST_ISLIVE(XMMFPU_ACC)) if (FPUINST_ISLIVE(XMMFPU_ACC))
_freeXMMreg(mmregacc); _freeXMMreg(mmregacc);
xmmregs[mmregacc].inuse = 1; xmmregs[mmregacc].inuse = 1;
@ -692,30 +824,40 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo
xmmregs[mmregacc].type = XMMTYPE_FPREG; xmmregs[mmregacc].type = XMMTYPE_FPREG;
mmregd = mmregacc; mmregd = mmregacc;
} }
else mmregd = _allocFPtoXMMreg(-1, _Fd_, readd); else
mmregd = _allocFPtoXMMreg(-1, _Fd_, readd);
} }
} }
pxAssert(mmregs >= 0 || mmregt >= 0 || mmregd >= 0 || mmregacc >= 0); pxAssert(mmregs >= 0 || mmregt >= 0 || mmregd >= 0 || mmregacc >= 0);
if( xmminfo & XMMINFO_WRITED ) { if (xmminfo & XMMINFO_WRITED)
{
pxAssert(mmregd >= 0); pxAssert(mmregd >= 0);
info |= PROCESS_EE_SET_D(mmregd); info |= PROCESS_EE_SET_D(mmregd);
} }
if( xmminfo & (XMMINFO_WRITEACC|XMMINFO_READACC) ) { if (xmminfo & (XMMINFO_WRITEACC | XMMINFO_READACC))
if( mmregacc >= 0 ) info |= PROCESS_EE_SET_ACC(mmregacc)|PROCESS_EE_ACC; {
else pxAssert( !(xmminfo&XMMINFO_WRITEACC)); if (mmregacc >= 0)
info |= PROCESS_EE_SET_ACC(mmregacc) | PROCESS_EE_ACC;
else
pxAssert(!(xmminfo & XMMINFO_WRITEACC));
} }
if( xmminfo & XMMINFO_READS ) { if (xmminfo & XMMINFO_READS)
if( mmregs >= 0 ) info |= PROCESS_EE_SET_S(mmregs)|PROCESS_EE_S; {
if (mmregs >= 0)
info |= PROCESS_EE_SET_S(mmregs) | PROCESS_EE_S;
} }
if( xmminfo & XMMINFO_READT ) { if (xmminfo & XMMINFO_READT)
if( mmregt >= 0 ) info |= PROCESS_EE_SET_T(mmregt)|PROCESS_EE_T; {
if (mmregt >= 0)
info |= PROCESS_EE_SET_T(mmregt) | PROCESS_EE_T;
} }
// at least one must be in xmm // at least one must be in xmm
if( (xmminfo & (XMMINFO_READS|XMMINFO_READT)) == (XMMINFO_READS|XMMINFO_READT) ) { if ((xmminfo & (XMMINFO_READS | XMMINFO_READT)) == (XMMINFO_READS | XMMINFO_READT))
{
pxAssert(mmregs >= 0 || mmregt >= 0); pxAssert(mmregs >= 0 || mmregt >= 0);
} }

View File

@ -37,9 +37,9 @@ protected:
bool m_free; bool m_free;
public: public:
iAllocRegSSE() : iAllocRegSSE()
m_reg( xmm0 ), : m_reg(xmm0)
m_free( !!_hasFreeXMMreg() ) , m_free(!!_hasFreeXMMreg())
{ {
if (m_free) if (m_free)
m_reg = xRegisterSSE(_allocTempXMMreg(XMMT_INT, -1)); m_reg = xRegisterSSE(_allocTempXMMreg(XMMT_INT, -1));
@ -73,7 +73,8 @@ static void iMOV128_SSE( const xIndirectVoid& destRm, const xIndirectVoid& srcRm
// //
static void iMOV64_Smart(const xIndirectVoid& destRm, const xIndirectVoid& srcRm) static void iMOV64_Smart(const xIndirectVoid& destRm, const xIndirectVoid& srcRm)
{ {
if (wordsize == 8) { if (wordsize == 8)
{
xMOV(rax, srcRm); xMOV(rax, srcRm);
xMOV(destRm, rax); xMOV(destRm, rax);
return; return;
@ -232,7 +233,7 @@ namespace vtlb_private
break; break;
} }
} }
} } // namespace vtlb_private
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
// allocate one page for our naked indirect dispatcher function. // allocate one page for our naked indirect dispatcher function.
@ -288,15 +289,19 @@ static void DynGen_IndirectDispatch( int mode, int bits, bool sign = false )
static void DynGen_IndirectTlbDispatcher(int mode, int bits, bool sign) static void DynGen_IndirectTlbDispatcher(int mode, int bits, bool sign)
{ {
xMOVZX(eax, al); xMOVZX(eax, al);
if (wordsize != 8) xSUB( arg1regd, 0x80000000 ); if (wordsize != 8)
xSUB(arg1regd, 0x80000000);
xSUB(arg1regd, eax); xSUB(arg1regd, eax);
// jump to the indirect handler, which is a __fastcall C++ function. // jump to the indirect handler, which is a __fastcall C++ function.
// [ecx is address, edx is data] // [ecx is address, edx is data]
sptr table = (sptr)vtlbdata.RWFT[bits][mode]; sptr table = (sptr)vtlbdata.RWFT[bits][mode];
if (table == (s32)table) { if (table == (s32)table)
{
xFastCall(ptrNative[(rax * wordsize) + table], arg1reg, arg2reg); xFastCall(ptrNative[(rax * wordsize) + table], arg1reg, arg2reg);
} else { }
else
{
xLEA(arg3reg, ptr[(void*)table]); xLEA(arg3reg, ptr[(void*)table]);
xFastCall(ptrNative[(rax * wordsize) + arg3reg], arg1reg, arg2reg); xFastCall(ptrNative[(rax * wordsize) + arg3reg], arg1reg, arg2reg);
} }
@ -328,7 +333,8 @@ static void DynGen_IndirectTlbDispatcher( int mode, int bits, bool sign )
void vtlb_dynarec_init() void vtlb_dynarec_init()
{ {
static bool hasBeenCalled = false; static bool hasBeenCalled = false;
if (hasBeenCalled) return; if (hasBeenCalled)
return;
hasBeenCalled = true; hasBeenCalled = true;
// In case init gets called multiple times: // In case init gets called multiple times:
@ -569,7 +575,6 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
iMOV128_SSE(ptr[(void*)ppf], ptr[arg2reg]); iMOV128_SSE(ptr[(void*)ppf], ptr[arg2reg]);
break; break;
} }
} }
else else
{ {

View File

@ -39,9 +39,9 @@ void mVUreserveCache(microVU& mVU)
mVU.cache_reserve = new RecompiledCodeReserve(pxsFmt("Micro VU%u Recompiler Cache", mVU.index), _16mb); mVU.cache_reserve = new RecompiledCodeReserve(pxsFmt("Micro VU%u Recompiler Cache", mVU.index), _16mb);
mVU.cache_reserve->SetProfilerName(pxsFmt("mVU%urec", mVU.index)); mVU.cache_reserve->SetProfilerName(pxsFmt("mVU%urec", mVU.index));
mVU.cache = mVU.index ? mVU.cache = mVU.index
(u8*)mVU.cache_reserve->Reserve(GetVmMemory().MainMemory(), HostMemoryMap::mVU1recOffset, mVU.cacheSize * _1mb) : ? (u8*)mVU.cache_reserve->Reserve(GetVmMemory().MainMemory(), HostMemoryMap::mVU1recOffset, mVU.cacheSize * _1mb)
(u8*)mVU.cache_reserve->Reserve(GetVmMemory().MainMemory(), HostMemoryMap::mVU0recOffset, mVU.cacheSize * _1mb); : (u8*)mVU.cache_reserve->Reserve(GetVmMemory().MainMemory(), HostMemoryMap::mVU0recOffset, mVU.cacheSize * _1mb);
mVU.cache_reserve->ThrowIfNotOk(); mVU.cache_reserve->ThrowIfNotOk();
} }
@ -294,15 +294,11 @@ __fi bool mVUcmpProg(microVU& mVU, microProgram& prog, const bool cmpWholeProg)
{ {
auto cmpOffset = [&](void* x) { return (u8*)x + range.start; }; auto cmpOffset = [&](void* x) { return (u8*)x + range.start; };
if ((range.start < 0) || (range.end < 0)) if ((range.start < 0) || (range.end < 0))
{
DevCon.Error("microVU%d: Negative Range![%d][%d]", mVU.index, range.start, range.end); DevCon.Error("microVU%d: Negative Range![%d][%d]", mVU.index, range.start, range.end);
}
if (memcmp_mmx(cmpOffset(prog.data), cmpOffset(mVU.regs().Micro), (range.end - range.start))) if (memcmp_mmx(cmpOffset(prog.data), cmpOffset(mVU.regs().Micro), (range.end - range.start)))
{
return false; return false;
} }
} }
}
mVU.prog.cleared = 0; mVU.prog.cleared = 0;
mVU.prog.cur = &prog; mVU.prog.cur = &prog;
mVU.prog.isSame = cmpWholeProg ? 1 : -1; mVU.prog.isSame = cmpWholeProg ? 1 : -1;
@ -316,8 +312,8 @@ _mVUt __fi void* mVUsearchProg(u32 startPC, uptr pState)
microProgramQuick& quick = mVU.prog.quick[mVU.regs().start_pc / 8]; microProgramQuick& quick = mVU.prog.quick[mVU.regs().start_pc / 8];
microProgramList* list = mVU.prog.prog [mVU.regs().start_pc / 8]; microProgramList* list = mVU.prog.prog [mVU.regs().start_pc / 8];
if (!quick.prog) if (!quick.prog) // If null, we need to search for new program
{ // If null, we need to search for new program {
std::deque<microProgram*>::iterator it(list->begin()); std::deque<microProgram*>::iterator it(list->begin());
for (; it != list->end(); ++it) for (; it != list->end(); ++it)
{ {
@ -371,16 +367,8 @@ _mVUt __fi void* mVUsearchProg(u32 startPC, uptr pState)
//------------------------------------------------------------------ //------------------------------------------------------------------
// recMicroVU0 / recMicroVU1 // recMicroVU0 / recMicroVU1
//------------------------------------------------------------------ //------------------------------------------------------------------
recMicroVU0::recMicroVU0() recMicroVU0::recMicroVU0() { m_Idx = 0; IsInterpreter = false; }
{ recMicroVU1::recMicroVU1() { m_Idx = 1; IsInterpreter = false; }
m_Idx = 0;
IsInterpreter = false;
}
recMicroVU1::recMicroVU1()
{
m_Idx = 1;
IsInterpreter = false;
}
void recMicroVU0::Vsync() noexcept { mVUvsyncUpdate(microVU0); } void recMicroVU0::Vsync() noexcept { mVUvsyncUpdate(microVU0); }
void recMicroVU1::Vsync() noexcept { mVUvsyncUpdate(microVU1); } void recMicroVU1::Vsync() noexcept { mVUvsyncUpdate(microVU1); }

View File

@ -37,12 +37,14 @@ using namespace x86Emitter;
#include "microVU_Profiler.h" #include "microVU_Profiler.h"
#include "common/Perf.h" #include "common/Perf.h"
struct microBlockLink { struct microBlockLink
{
microBlock block; microBlock block;
microBlockLink* next; microBlockLink* next;
}; };
class microBlockManager { class microBlockManager
{
private: private:
microBlockLink *qBlockList, *qBlockEnd; // Quick Search microBlockLink *qBlockList, *qBlockEnd; // Quick Search
microBlockLink *fBlockList, *fBlockEnd; // Full Search microBlockLink *fBlockList, *fBlockEnd; // Full Search
@ -50,20 +52,24 @@ private:
public: public:
inline int getFullListCount() const { return fListI; } inline int getFullListCount() const { return fListI; }
microBlockManager() { microBlockManager()
{
qListI = fListI = 0; qListI = fListI = 0;
qBlockEnd = qBlockList = NULL; qBlockEnd = qBlockList = NULL;
fBlockEnd = fBlockList = NULL; fBlockEnd = fBlockList = NULL;
} }
~microBlockManager() { reset(); } ~microBlockManager() { reset(); }
void reset() { void reset()
for(microBlockLink* linkI = qBlockList; linkI != NULL; ) { {
for (microBlockLink* linkI = qBlockList; linkI != NULL;)
{
microBlockLink* freeI = linkI; microBlockLink* freeI = linkI;
safe_delete_array(linkI->block.jumpCache); safe_delete_array(linkI->block.jumpCache);
linkI = linkI->next; linkI = linkI->next;
_aligned_free(freeI); _aligned_free(freeI);
} }
for(microBlockLink* linkI = fBlockList; linkI != NULL; ) { for (microBlockLink* linkI = fBlockList; linkI != NULL;)
{
microBlockLink* freeI = linkI; microBlockLink* freeI = linkI;
safe_delete_array(linkI->block.jumpCache); safe_delete_array(linkI->block.jumpCache);
linkI = linkI->next; linkI = linkI->next;
@ -73,11 +79,16 @@ public:
qBlockEnd = qBlockList = NULL; qBlockEnd = qBlockList = NULL;
fBlockEnd = fBlockList = NULL; fBlockEnd = fBlockList = NULL;
}; };
microBlock* add(microBlock* pBlock) { microBlock* add(microBlock* pBlock)
{
microBlock* thisBlock = search(&pBlock->pState); microBlock* thisBlock = search(&pBlock->pState);
if (!thisBlock) { if (!thisBlock)
{
u8 fullCmp = pBlock->pState.needExactMatch; u8 fullCmp = pBlock->pState.needExactMatch;
if (fullCmp) fListI++; else qListI++; if (fullCmp)
fListI++;
else
qListI++;
microBlockLink*& blockList = fullCmp ? fBlockList : qBlockList; microBlockLink*& blockList = fullCmp ? fBlockList : qBlockList;
microBlockLink*& blockEnd = fullCmp ? fBlockEnd : qBlockEnd; microBlockLink*& blockEnd = fullCmp ? fBlockEnd : qBlockEnd;
@ -85,11 +96,13 @@ public:
newBlock->block.jumpCache = NULL; newBlock->block.jumpCache = NULL;
newBlock->next = NULL; newBlock->next = NULL;
if (blockEnd) { if (blockEnd)
{
blockEnd->next = newBlock; blockEnd->next = newBlock;
blockEnd = newBlock; blockEnd = newBlock;
} }
else { else
{
blockEnd = blockList = newBlock; blockEnd = blockList = newBlock;
} }
@ -98,15 +111,20 @@ public:
} }
return thisBlock; return thisBlock;
} }
__ri microBlock* search(microRegInfo* pState) { __ri microBlock* search(microRegInfo* pState)
if (pState->needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State) {
for(microBlockLink* linkI = fBlockList; linkI != NULL; linkI = linkI->next) { if (pState->needExactMatch) // Needs Detailed Search (Exact Match of Pipeline State)
{
for (microBlockLink* linkI = fBlockList; linkI != NULL; linkI = linkI->next)
{
if (mVUquickSearch((void*)pState, (void*)&linkI->block.pState, sizeof(microRegInfo))) if (mVUquickSearch((void*)pState, (void*)&linkI->block.pState, sizeof(microRegInfo)))
return &linkI->block; return &linkI->block;
} }
} }
else { // Can do Simple Search (Only Matches the Important Pipeline Stuff) else // Can do Simple Search (Only Matches the Important Pipeline Stuff)
for(microBlockLink* linkI = qBlockList; linkI != NULL; linkI = linkI->next) { {
for (microBlockLink* linkI = qBlockList; linkI != NULL; linkI = linkI->next)
{
if (linkI->block.pState.quick32[0] != pState->quick32[0]) continue; if (linkI->block.pState.quick32[0] != pState->quick32[0]) continue;
if (linkI->block.pState.quick32[1] != pState->quick32[1]) continue; if (linkI->block.pState.quick32[1] != pState->quick32[1]) continue;
if (doConstProp && (linkI->block.pState.vi15 != pState->vi15)) continue; if (doConstProp && (linkI->block.pState.vi15 != pState->vi15)) continue;
@ -116,17 +134,22 @@ public:
} }
return NULL; return NULL;
} }
void printInfo(int pc, bool printQuick) { void printInfo(int pc, bool printQuick)
{
int listI = printQuick ? qListI : fListI; int listI = printQuick ? qListI : fListI;
if (listI < 7) return; if (listI < 7)
return;
microBlockLink* linkI = printQuick ? qBlockList : fBlockList; microBlockLink* linkI = printQuick ? qBlockList : fBlockList;
for (int i = 0; i <= listI; i++) { for (int i = 0; i <= listI; i++)
{
u32 viCRC = 0, vfCRC = 0, crc = 0, z = sizeof(microRegInfo) / 4; u32 viCRC = 0, vfCRC = 0, crc = 0, z = sizeof(microRegInfo) / 4;
for (u32 j = 0; j < 4; j++) viCRC -= ((u32*)linkI->block.pState.VI)[j]; for (u32 j = 0; j < 4; j++) viCRC -= ((u32*)linkI->block.pState.VI)[j];
for (u32 j = 0; j < 32; j++) vfCRC -= linkI->block.pState.VF[j].reg; for (u32 j = 0; j < 32; j++) vfCRC -= linkI->block.pState.VF[j].reg;
for (u32 j = 0; j < z; j++) crc -= ((u32*)&linkI->block.pState)[j]; for (u32 j = 0; j < z; j++) crc -= ((u32*)&linkI->block.pState)[j];
DevCon.WriteLn(Color_Green, "[%04x][Block #%d][crc=%08x][q=%02d][p=%02d][xgkick=%d][vi15=%04x][vi15v=%d][viBackup=%02d]" DevCon.WriteLn(Color_Green,
"[flags=%02x][exactMatch=%x][blockType=%d][viCRC=%08x][vfCRC=%08x]", pc, i, crc, linkI->block.pState.q, "[%04x][Block #%d][crc=%08x][q=%02d][p=%02d][xgkick=%d][vi15=%04x][vi15v=%d][viBackup=%02d]"
"[flags=%02x][exactMatch=%x][blockType=%d][viCRC=%08x][vfCRC=%08x]",
pc, i, crc, linkI->block.pState.q,
linkI->block.pState.p, linkI->block.pState.xgkick, linkI->block.pState.vi15, linkI->block.pState.vi15v, linkI->block.pState.p, linkI->block.pState.xgkick, linkI->block.pState.vi15, linkI->block.pState.vi15v,
linkI->block.pState.viBackUp, linkI->block.pState.flagInfo, linkI->block.pState.needExactMatch, linkI->block.pState.viBackUp, linkI->block.pState.flagInfo, linkI->block.pState.needExactMatch,
linkI->block.pState.blockType, viCRC, vfCRC); linkI->block.pState.blockType, viCRC, vfCRC);
@ -135,13 +158,15 @@ public:
} }
}; };
struct microRange { struct microRange
{
s32 start; // Start PC (The opcode the block starts at) s32 start; // Start PC (The opcode the block starts at)
s32 end; // End PC (The opcode the block ends with) s32 end; // End PC (The opcode the block ends with)
}; };
#define mProgSize (0x4000 / 4) #define mProgSize (0x4000 / 4)
struct microProgram { struct microProgram
{
u32 data [mProgSize]; // Holds a copy of the VU microProgram u32 data [mProgSize]; // Holds a copy of the VU microProgram
microBlockManager* block[mProgSize / 2]; // Array of Block Managers microBlockManager* block[mProgSize / 2]; // Array of Block Managers
std::deque<microRange>* ranges; // The ranges of the microProgram that have already been recompiled std::deque<microRange>* ranges; // The ranges of the microProgram that have already been recompiled
@ -151,12 +176,14 @@ struct microProgram {
typedef std::deque<microProgram*> microProgramList; typedef std::deque<microProgram*> microProgramList;
struct microProgramQuick { struct microProgramQuick
{
microBlockManager* block; // Quick reference to valid microBlockManager for current startPC microBlockManager* block; // Quick reference to valid microBlockManager for current startPC
microProgram* prog; // The microProgram who is the owner of 'block' microProgram* prog; // The microProgram who is the owner of 'block'
}; };
struct microProgManager { struct microProgManager
{
microIR<mProgSize> IRinfo; // IR information microIR<mProgSize> IRinfo; // IR information
microProgramList* prog [mProgSize/2]; // List of microPrograms indexed by startPC values microProgramList* prog [mProgSize/2]; // List of microPrograms indexed by startPC values
microProgramQuick quick[mProgSize/2]; // Quick reference to valid microPrograms for current execution microProgramQuick quick[mProgSize/2]; // Quick reference to valid microPrograms for current execution
@ -176,7 +203,8 @@ static const uint mVUcacheSafeZone = 3; // Safe-Zone for program recompilatio
static const uint mVU0cacheReserve = 64; // mVU0 Reserve Cache Size (in megabytes) static const uint mVU0cacheReserve = 64; // mVU0 Reserve Cache Size (in megabytes)
static const uint mVU1cacheReserve = 64; // mVU1 Reserve Cache Size (in megabytes) static const uint mVU1cacheReserve = 64; // mVU1 Reserve Cache Size (in megabytes)
struct microVU { struct microVU
{
__aligned16 u32 statFlag[4]; // 4 instances of status flag (backup for xgkick) __aligned16 u32 statFlag[4]; // 4 instances of status flag (backup for xgkick)
__aligned16 u32 macFlag [4]; // 4 instances of mac flag (used in execution) __aligned16 u32 macFlag [4]; // 4 instances of mac flag (used in execution)
@ -221,7 +249,8 @@ struct microVU {
__fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; } __fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; }
__fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; } __fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; }
__fi VIFregisters& getVifRegs() const { __fi VIFregisters& getVifRegs() const
{
return (index && THREAD_VU1) ? vu1Thread.vifRegs : regs().GetVifRegs(); return (index && THREAD_VU1) ? vu1Thread.vifRegs : regs().GetVifRegs();
} }
}; };
@ -257,12 +286,14 @@ typedef void (__fastcall *mVUrecCall)(u32, u32);
typedef void (*mVUrecCallXG)(void); typedef void (*mVUrecCallXG)(void);
template <typename T> template <typename T>
void makeUnique(T& v) { // Removes Duplicates void makeUnique(T& v)
{ // Removes Duplicates
v.erase(unique(v.begin(), v.end()), v.end()); v.erase(unique(v.begin(), v.end()), v.end());
} }
template <typename T> template <typename T>
void sortVector(T& v) { void sortVector(T& v)
{
sort(v.begin(), v.end()); sort(v.begin(), v.end());
} }

View File

@ -71,7 +71,8 @@ __ri void mVUallocSFLAGc(const x32& reg, const x32& regT, int fInstance)
} }
// Denormalizes Status Flag // Denormalizes Status Flag
__ri void mVUallocSFLAGd(u32* memAddr) { __ri void mVUallocSFLAGd(u32* memAddr)
{
xMOV(edx, ptr32[memAddr]); xMOV(edx, ptr32[memAddr]);
xMOV(eax, edx); xMOV(eax, edx);
xSHR(eax, 3); xSHR(eax, 3);
@ -135,15 +136,18 @@ __ri void mVUallocVIa(mV, const x32& GPRreg, int _reg_, bool signext = false)
__ri void mVUallocVIb(mV, const x32& GPRreg, int _reg_) __ri void mVUallocVIb(mV, const x32& GPRreg, int _reg_)
{ {
if (mVUlow.backupVI) { // Backs up reg to memory (used when VI is modified b4 a branch) if (mVUlow.backupVI) // Backs up reg to memory (used when VI is modified b4 a branch)
{
xMOVZX(gprT3, ptr16[&mVU.regs().VI[_reg_].UL]); xMOVZX(gprT3, ptr16[&mVU.regs().VI[_reg_].UL]);
xMOV (ptr32[&mVU.VIbackup], gprT3); xMOV (ptr32[&mVU.VIbackup], gprT3);
} }
if (_reg_ == 0) { if (_reg_ == 0)
{
return; return;
} }
else if (_reg_ < 16) { else if (_reg_ < 16)
{
xMOV(ptr16[&mVU.regs().VI[_reg_].UL], xRegister16(GPRreg.Id)); xMOV(ptr16[&mVU.regs().VI[_reg_].UL], xRegister16(GPRreg.Id));
} }
} }
@ -168,5 +172,6 @@ __ri void writeQreg(const xmm& reg, int qInstance)
{ {
if (qInstance) if (qInstance)
xINSERTPS(xmmPQ, reg, _MM_MK_INSERTPS_NDX(0, 1, 0)); xINSERTPS(xmmPQ, reg, _MM_MK_INSERTPS_NDX(0, 1, 0));
else xMOVSS(xmmPQ, reg); else
xMOVSS(xmmPQ, reg);
} }

View File

@ -34,8 +34,10 @@ __ri void analyzeReg1(mV, int xReg, microVFreg& vfRead) {
} }
// Write to a VF reg // Write to a VF reg
__ri void analyzeReg2(mV, int xReg, microVFreg& vfWrite, bool isLowOp) { __ri void analyzeReg2(mV, int xReg, microVFreg& vfWrite, bool isLowOp)
if (xReg) { {
if (xReg)
{
#define bReg(x, y) mVUregsTemp.VFreg[y] = x; mVUregsTemp.VF[y] #define bReg(x, y) mVUregsTemp.VFreg[y] = x; mVUregsTemp.VF[y]
if (_X) { bReg(xReg, isLowOp).x = 4; vfWrite.reg = xReg; vfWrite.x = 4; } if (_X) { bReg(xReg, isLowOp).x = 4; vfWrite.reg = xReg; vfWrite.x = 4; }
if (_Y) { bReg(xReg, isLowOp).y = 4; vfWrite.reg = xReg; vfWrite.y = 4; } if (_Y) { bReg(xReg, isLowOp).y = 4; vfWrite.reg = xReg; vfWrite.y = 4; }
@ -45,24 +47,30 @@ __ri void analyzeReg2(mV, int xReg, microVFreg& vfWrite, bool isLowOp) {
} }
// Read a VF reg (BC opcodes) // Read a VF reg (BC opcodes)
__ri void analyzeReg3(mV, int xReg, microVFreg& vfRead) { __ri void analyzeReg3(mV, int xReg, microVFreg& vfRead)
if (xReg) { {
if (_bc_x) { if (xReg)
{
if (_bc_x)
{
mVUstall = std::max(mVUstall, mVUregs.VF[xReg].x); mVUstall = std::max(mVUstall, mVUregs.VF[xReg].x);
vfRead.reg = xReg; vfRead.reg = xReg;
vfRead.x = 1; vfRead.x = 1;
} }
else if (_bc_y) { else if (_bc_y)
{
mVUstall = std::max(mVUstall, mVUregs.VF[xReg].y); mVUstall = std::max(mVUstall, mVUregs.VF[xReg].y);
vfRead.reg = xReg; vfRead.reg = xReg;
vfRead.y = 1; vfRead.y = 1;
} }
else if (_bc_z) { else if (_bc_z)
{
mVUstall = std::max(mVUstall, mVUregs.VF[xReg].z); mVUstall = std::max(mVUstall, mVUregs.VF[xReg].z);
vfRead.reg = xReg; vfRead.reg = xReg;
vfRead.z = 1; vfRead.z = 1;
} }
else { else
{
mVUstall = std::max(mVUstall, mVUregs.VF[xReg].w); mVUstall = std::max(mVUstall, mVUregs.VF[xReg].w);
vfRead.reg = xReg; vfRead.reg = xReg;
vfRead.w = 1; vfRead.w = 1;
@ -71,8 +79,10 @@ __ri void analyzeReg3(mV, int xReg, microVFreg& vfRead) {
} }
// For Clip Opcode // For Clip Opcode
__ri void analyzeReg4(mV, int xReg, microVFreg& vfRead) { __ri void analyzeReg4(mV, int xReg, microVFreg& vfRead)
if (xReg) { {
if (xReg)
{
mVUstall = std::max(mVUstall, mVUregs.VF[xReg].w); mVUstall = std::max(mVUstall, mVUregs.VF[xReg].w);
vfRead.reg = xReg; vfRead.reg = xReg;
vfRead.w = 1; vfRead.w = 1;
@ -80,9 +90,12 @@ __ri void analyzeReg4(mV, int xReg, microVFreg& vfRead) {
} }
// Read VF reg (FsF/FtF) // Read VF reg (FsF/FtF)
__ri void analyzeReg5(mV, int xReg, int fxf, microVFreg& vfRead) { __ri void analyzeReg5(mV, int xReg, int fxf, microVFreg& vfRead)
if (xReg) { {
switch (fxf) { if (xReg)
{
switch (fxf)
{
case 0: mVUstall = std::max(mVUstall, mVUregs.VF[xReg].x); vfRead.reg = xReg; vfRead.x = 1; break; case 0: mVUstall = std::max(mVUstall, mVUregs.VF[xReg].x); vfRead.reg = xReg; vfRead.x = 1; break;
case 1: mVUstall = std::max(mVUstall, mVUregs.VF[xReg].y); vfRead.reg = xReg; vfRead.y = 1; break; case 1: mVUstall = std::max(mVUstall, mVUregs.VF[xReg].y); vfRead.reg = xReg; vfRead.y = 1; break;
case 2: mVUstall = std::max(mVUstall, mVUregs.VF[xReg].z); vfRead.reg = xReg; vfRead.z = 1; break; case 2: mVUstall = std::max(mVUstall, mVUregs.VF[xReg].z); vfRead.reg = xReg; vfRead.z = 1; break;
@ -92,8 +105,10 @@ __ri void analyzeReg5(mV, int xReg, int fxf, microVFreg& vfRead) {
} }
// Flips xyzw stalls to yzwx (MR32 Opcode) // Flips xyzw stalls to yzwx (MR32 Opcode)
__ri void analyzeReg6(mV, int xReg, microVFreg& vfRead) { __ri void analyzeReg6(mV, int xReg, microVFreg& vfRead)
if (xReg) { {
if (xReg)
{
if (_X) { mVUstall = std::max(mVUstall, mVUregs.VF[xReg].y); vfRead.reg = xReg; vfRead.y = 1; } if (_X) { mVUstall = std::max(mVUstall, mVUregs.VF[xReg].y); vfRead.reg = xReg; vfRead.y = 1; }
if (_Y) { mVUstall = std::max(mVUstall, mVUregs.VF[xReg].z); vfRead.reg = xReg; vfRead.z = 1; } if (_Y) { mVUstall = std::max(mVUstall, mVUregs.VF[xReg].z); vfRead.reg = xReg; vfRead.z = 1; }
if (_Z) { mVUstall = std::max(mVUstall, mVUregs.VF[xReg].w); vfRead.reg = xReg; vfRead.w = 1; } if (_Z) { mVUstall = std::max(mVUstall, mVUregs.VF[xReg].w); vfRead.reg = xReg; vfRead.w = 1; }
@ -102,8 +117,10 @@ __ri void analyzeReg6(mV, int xReg, microVFreg& vfRead) {
} }
// Reading a VI reg // Reading a VI reg
__ri void analyzeVIreg1(mV, int xReg, microVIreg& viRead) { __ri void analyzeVIreg1(mV, int xReg, microVIreg& viRead)
if (xReg) { {
if (xReg)
{
mVUstall = std::max(mVUstall, mVUregs.VI[xReg]); mVUstall = std::max(mVUstall, mVUregs.VI[xReg]);
viRead.reg = xReg; viRead.reg = xReg;
viRead.used = 1; viRead.used = 1;
@ -111,8 +128,10 @@ __ri void analyzeVIreg1(mV, int xReg, microVIreg& viRead) {
} }
// Writing to a VI reg // Writing to a VI reg
__ri void analyzeVIreg2(mV, int xReg, microVIreg& viWrite, int aCycles) { __ri void analyzeVIreg2(mV, int xReg, microVIreg& viWrite, int aCycles)
if (xReg) { {
if (xReg)
{
mVUconstReg[xReg].isValid = 0; mVUconstReg[xReg].isValid = 0;
mVUregsTemp.VIreg = xReg; mVUregsTemp.VIreg = xReg;
mVUregsTemp.VI = aCycles; mVUregsTemp.VI = aCycles;
@ -121,18 +140,43 @@ __ri void analyzeVIreg2(mV, int xReg, microVIreg& viWrite, int aCycles) {
} }
} }
#define analyzeQreg(x) { mVUregsTemp.q = x; mVUstall = std::max(mVUstall, mVUregs.q); } #define analyzeQreg(x) \
#define analyzePreg(x) { mVUregsTemp.p = x; mVUstall = std::max(mVUstall, (u8)((mVUregs.p) ? (mVUregs.p - 1) : 0)); } { \
#define analyzeRreg() { mVUregsTemp.r = 1; } mVUregsTemp.q = x; \
#define analyzeXGkick1() { mVUstall = std::max(mVUstall, mVUregs.xgkick); } mVUstall = std::max(mVUstall, mVUregs.q); \
#define analyzeXGkick2(x) { mVUregsTemp.xgkick = x; } }
#define setConstReg(x, v) { if (x) { mVUconstReg[x].isValid = 1; mVUconstReg[x].regValue = v; } } #define analyzePreg(x) \
{ \
mVUregsTemp.p = x; \
mVUstall = std::max(mVUstall, (u8)((mVUregs.p) ? (mVUregs.p - 1) : 0)); \
}
#define analyzeRreg() \
{ \
mVUregsTemp.r = 1; \
}
#define analyzeXGkick1() \
{ \
mVUstall = std::max(mVUstall, mVUregs.xgkick); \
}
#define analyzeXGkick2(x) \
{ \
mVUregsTemp.xgkick = x; \
}
#define setConstReg(x, v) \
{ \
if (x) \
{ \
mVUconstReg[x].isValid = 1; \
mVUconstReg[x].regValue = v; \
} \
}
//------------------------------------------------------------------ //------------------------------------------------------------------
// FMAC1 - Normal FMAC Opcodes // FMAC1 - Normal FMAC Opcodes
//------------------------------------------------------------------ //------------------------------------------------------------------
__fi void mVUanalyzeFMAC1(mV, int Fd, int Fs, int Ft) { __fi void mVUanalyzeFMAC1(mV, int Fd, int Fs, int Ft)
{
sFLAG.doFlag = 1; sFLAG.doFlag = 1;
analyzeReg1(mVU, Fs, mVUup.VF_read[0]); analyzeReg1(mVU, Fs, mVUup.VF_read[0]);
analyzeReg1(mVU, Ft, mVUup.VF_read[1]); analyzeReg1(mVU, Ft, mVUup.VF_read[1]);
@ -143,7 +187,8 @@ __fi void mVUanalyzeFMAC1(mV, int Fd, int Fs, int Ft) {
// FMAC2 - ABS/FTOI/ITOF Opcodes // FMAC2 - ABS/FTOI/ITOF Opcodes
//------------------------------------------------------------------ //------------------------------------------------------------------
__fi void mVUanalyzeFMAC2(mV, int Fs, int Ft) { __fi void mVUanalyzeFMAC2(mV, int Fs, int Ft)
{
analyzeReg1(mVU, Fs, mVUup.VF_read[0]); analyzeReg1(mVU, Fs, mVUup.VF_read[0]);
analyzeReg2(mVU, Ft, mVUup.VF_write, 0); analyzeReg2(mVU, Ft, mVUup.VF_write, 0);
} }
@ -152,7 +197,8 @@ __fi void mVUanalyzeFMAC2(mV, int Fs, int Ft) {
// FMAC3 - BC(xyzw) FMAC Opcodes // FMAC3 - BC(xyzw) FMAC Opcodes
//------------------------------------------------------------------ //------------------------------------------------------------------
__fi void mVUanalyzeFMAC3(mV, int Fd, int Fs, int Ft) { __fi void mVUanalyzeFMAC3(mV, int Fd, int Fs, int Ft)
{
sFLAG.doFlag = 1; sFLAG.doFlag = 1;
analyzeReg1(mVU, Fs, mVUup.VF_read[0]); analyzeReg1(mVU, Fs, mVUup.VF_read[0]);
analyzeReg3(mVU, Ft, mVUup.VF_read[1]); analyzeReg3(mVU, Ft, mVUup.VF_read[1]);
@ -163,7 +209,8 @@ __fi void mVUanalyzeFMAC3(mV, int Fd, int Fs, int Ft) {
// FMAC4 - Clip FMAC Opcode // FMAC4 - Clip FMAC Opcode
//------------------------------------------------------------------ //------------------------------------------------------------------
__fi void mVUanalyzeFMAC4(mV, int Fs, int Ft) { __fi void mVUanalyzeFMAC4(mV, int Fs, int Ft)
{
cFLAG.doFlag = 1; cFLAG.doFlag = 1;
analyzeReg1(mVU, Fs, mVUup.VF_read[0]); analyzeReg1(mVU, Fs, mVUup.VF_read[0]);
analyzeReg4(mVU, Ft, mVUup.VF_read[1]); analyzeReg4(mVU, Ft, mVUup.VF_read[1]);
@ -173,30 +220,42 @@ __fi void mVUanalyzeFMAC4(mV, int Fs, int Ft) {
// IALU - IALU Opcodes // IALU - IALU Opcodes
//------------------------------------------------------------------ //------------------------------------------------------------------
__fi void mVUanalyzeIALU1(mV, int Id, int Is, int It) { __fi void mVUanalyzeIALU1(mV, int Id, int Is, int It)
if (!Id) mVUlow.isNOP = 1; {
if (!Id)
mVUlow.isNOP = 1;
analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]); analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]);
analyzeVIreg1(mVU, It, mVUlow.VI_read[1]); analyzeVIreg1(mVU, It, mVUlow.VI_read[1]);
analyzeVIreg2(mVU, Id, mVUlow.VI_write, 1); analyzeVIreg2(mVU, Id, mVUlow.VI_write, 1);
} }
__fi void mVUanalyzeIALU2(mV, int Is, int It) { __fi void mVUanalyzeIALU2(mV, int Is, int It)
if (!It) mVUlow.isNOP = 1; {
if (!It)
mVUlow.isNOP = 1;
analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]); analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]);
analyzeVIreg2(mVU, It, mVUlow.VI_write, 1); analyzeVIreg2(mVU, It, mVUlow.VI_write, 1);
} }
__fi void mVUanalyzeIADDI(mV, int Is, int It, s16 imm) { __fi void mVUanalyzeIADDI(mV, int Is, int It, s16 imm)
{
mVUanalyzeIALU2(mVU, Is, It); mVUanalyzeIALU2(mVU, Is, It);
if (!Is) { setConstReg(It, imm); } if (!Is)
{
setConstReg(It, imm);
}
} }
//------------------------------------------------------------------ //------------------------------------------------------------------
// MR32 - MR32 Opcode // MR32 - MR32 Opcode
//------------------------------------------------------------------ //------------------------------------------------------------------
__fi void mVUanalyzeMR32(mV, int Fs, int Ft) { __fi void mVUanalyzeMR32(mV, int Fs, int Ft)
if (!Ft) { mVUlow.isNOP = 1; } {
if (!Ft)
{
mVUlow.isNOP = 1;
}
analyzeReg6(mVU, Fs, mVUlow.VF_read[0]); analyzeReg6(mVU, Fs, mVUlow.VF_read[0]);
analyzeReg2(mVU, Ft, mVUlow.VF_write, 1); analyzeReg2(mVU, Ft, mVUlow.VF_write, 1);
} }
@ -205,7 +264,8 @@ __fi void mVUanalyzeMR32(mV, int Fs, int Ft) {
// FDIV - DIV/SQRT/RSQRT Opcodes // FDIV - DIV/SQRT/RSQRT Opcodes
//------------------------------------------------------------------ //------------------------------------------------------------------
__fi void mVUanalyzeFDIV(mV, int Fs, int Fsf, int Ft, int Ftf, u8 xCycles) { __fi void mVUanalyzeFDIV(mV, int Fs, int Fsf, int Ft, int Ftf, u8 xCycles)
{
analyzeReg5(mVU, Fs, Fsf, mVUlow.VF_read[0]); analyzeReg5(mVU, Fs, Fsf, mVUlow.VF_read[0]);
analyzeReg5(mVU, Ft, Ftf, mVUlow.VF_read[1]); analyzeReg5(mVU, Ft, Ftf, mVUlow.VF_read[1]);
analyzeQreg(xCycles); analyzeQreg(xCycles);
@ -215,12 +275,14 @@ __fi void mVUanalyzeFDIV(mV, int Fs, int Fsf, int Ft, int Ftf, u8 xCycles) {
// EFU - EFU Opcodes // EFU - EFU Opcodes
//------------------------------------------------------------------ //------------------------------------------------------------------
__fi void mVUanalyzeEFU1(mV, int Fs, int Fsf, u8 xCycles) { __fi void mVUanalyzeEFU1(mV, int Fs, int Fsf, u8 xCycles)
{
analyzeReg5(mVU, Fs, Fsf, mVUlow.VF_read[0]); analyzeReg5(mVU, Fs, Fsf, mVUlow.VF_read[0]);
analyzePreg(xCycles); analyzePreg(xCycles);
} }
__fi void mVUanalyzeEFU2(mV, int Fs, u8 xCycles) { __fi void mVUanalyzeEFU2(mV, int Fs, u8 xCycles)
{
analyzeReg1(mVU, Fs, mVUlow.VF_read[0]); analyzeReg1(mVU, Fs, mVUlow.VF_read[0]);
analyzePreg(xCycles); analyzePreg(xCycles);
} }
@ -229,8 +291,10 @@ __fi void mVUanalyzeEFU2(mV, int Fs, u8 xCycles) {
// MFP - MFP Opcode // MFP - MFP Opcode
//------------------------------------------------------------------ //------------------------------------------------------------------
__fi void mVUanalyzeMFP(mV, int Ft) { __fi void mVUanalyzeMFP(mV, int Ft)
if (!Ft) mVUlow.isNOP = 1; {
if (!Ft)
mVUlow.isNOP = 1;
analyzeReg2(mVU, Ft, mVUlow.VF_write, 1); analyzeReg2(mVU, Ft, mVUlow.VF_write, 1);
} }
@ -238,8 +302,10 @@ __fi void mVUanalyzeMFP(mV, int Ft) {
// MOVE - MOVE Opcode // MOVE - MOVE Opcode
//------------------------------------------------------------------ //------------------------------------------------------------------
__fi void mVUanalyzeMOVE(mV, int Fs, int Ft) { __fi void mVUanalyzeMOVE(mV, int Fs, int Ft)
if (!Ft||(Ft == Fs)) mVUlow.isNOP = 1; {
if (!Ft || (Ft == Fs))
mVUlow.isNOP = 1;
analyzeReg1(mVU, Fs, mVUlow.VF_read[0]); analyzeReg1(mVU, Fs, mVUlow.VF_read[0]);
analyzeReg2(mVU, Ft, mVUlow.VF_write, 1); analyzeReg2(mVU, Ft, mVUlow.VF_write, 1);
} }
@ -248,36 +314,59 @@ __fi void mVUanalyzeMOVE(mV, int Fs, int Ft) {
// LQx - LQ/LQD/LQI Opcodes // LQx - LQ/LQD/LQI Opcodes
//------------------------------------------------------------------ //------------------------------------------------------------------
__fi void mVUanalyzeLQ(mV, int Ft, int Is, bool writeIs) { __fi void mVUanalyzeLQ(mV, int Ft, int Is, bool writeIs)
{
analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]); analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]);
analyzeReg2(mVU, Ft, mVUlow.VF_write, 1); analyzeReg2(mVU, Ft, mVUlow.VF_write, 1);
if (!Ft) { if (writeIs && Is) { mVUlow.noWriteVF = 1; } else { mVUlow.isNOP = 1; } } if (!Ft)
if (writeIs) { analyzeVIreg2(mVU, Is, mVUlow.VI_write, 1); } {
if (writeIs && Is)
{
mVUlow.noWriteVF = 1;
}
else
{
mVUlow.isNOP = 1;
}
}
if (writeIs)
{
analyzeVIreg2(mVU, Is, mVUlow.VI_write, 1);
}
} }
//------------------------------------------------------------------ //------------------------------------------------------------------
// SQx - SQ/SQD/SQI Opcodes // SQx - SQ/SQD/SQI Opcodes
//------------------------------------------------------------------ //------------------------------------------------------------------
__fi void mVUanalyzeSQ(mV, int Fs, int It, bool writeIt) { __fi void mVUanalyzeSQ(mV, int Fs, int It, bool writeIt)
{
analyzeReg1(mVU, Fs, mVUlow.VF_read[0]); analyzeReg1(mVU, Fs, mVUlow.VF_read[0]);
analyzeVIreg1(mVU, It, mVUlow.VI_read[0]); analyzeVIreg1(mVU, It, mVUlow.VI_read[0]);
if (writeIt) { analyzeVIreg2(mVU, It, mVUlow.VI_write, 1); } if (writeIt)
{
analyzeVIreg2(mVU, It, mVUlow.VI_write, 1);
}
} }
//------------------------------------------------------------------ //------------------------------------------------------------------
// R*** - R Reg Opcodes // R*** - R Reg Opcodes
//------------------------------------------------------------------ //------------------------------------------------------------------
__fi void mVUanalyzeR1(mV, int Fs, int Fsf) { __fi void mVUanalyzeR1(mV, int Fs, int Fsf)
{
analyzeReg5(mVU, Fs, Fsf, mVUlow.VF_read[0]); analyzeReg5(mVU, Fs, Fsf, mVUlow.VF_read[0]);
analyzeRreg(); analyzeRreg();
} }
__fi void mVUanalyzeR2(mV, int Ft, bool canBeNOP) { __fi void mVUanalyzeR2(mV, int Ft, bool canBeNOP)
if (!Ft) { {
if (canBeNOP) mVUlow.isNOP = 1; if (!Ft)
else mVUlow.noWriteVF = 1; {
if (canBeNOP)
mVUlow.isNOP = 1;
else
mVUlow.noWriteVF = 1;
} }
analyzeReg2(mVU, Ft, mVUlow.VF_write, 1); analyzeReg2(mVU, Ft, mVUlow.VF_write, 1);
analyzeRreg(); analyzeRreg();
@ -286,21 +375,25 @@ __fi void mVUanalyzeR2(mV, int Ft, bool canBeNOP) {
//------------------------------------------------------------------ //------------------------------------------------------------------
// Sflag - Status Flag Opcodes // Sflag - Status Flag Opcodes
//------------------------------------------------------------------ //------------------------------------------------------------------
__ri void flagSet(mV, bool setMacFlag) { __ri void flagSet(mV, bool setMacFlag)
{
int curPC = iPC; int curPC = iPC;
int calcOPS = 0; int calcOPS = 0;
//Check which ops need to do the flag settings, also check for runs of ops as they can do multiple calculations to get the sticky status flags (VP2) //Check which ops need to do the flag settings, also check for runs of ops as they can do multiple calculations to get the sticky status flags (VP2)
//Make sure we get the last 4 calculations (Bloody Roar 3, possibly others) //Make sure we get the last 4 calculations (Bloody Roar 3, possibly others)
for (int i = mVUcount, j = 0; i > 0; i--, j++) { for (int i = mVUcount, j = 0; i > 0; i--, j++)
{
j += mVUstall; j += mVUstall;
incPC(-2); incPC(-2);
if (calcOPS >= 4 && mVUup.VF_write.reg) break; if (calcOPS >= 4 && mVUup.VF_write.reg)
break;
if (sFLAG.doFlag && (j >= 3)) if (sFLAG.doFlag && (j >= 3))
{ {
if (setMacFlag) mFLAG.doFlag = 1; if (setMacFlag)
mFLAG.doFlag = 1;
sFLAG.doNonSticky = 1; sFLAG.doNonSticky = 1;
calcOPS++; calcOPS++;
} }
@ -310,22 +403,29 @@ __ri void flagSet(mV, bool setMacFlag) {
setCode(); setCode();
} }
__ri void mVUanalyzeSflag(mV, int It) { __ri void mVUanalyzeSflag(mV, int It)
{
mVUlow.readFlags = true; mVUlow.readFlags = true;
analyzeVIreg2(mVU, It, mVUlow.VI_write, 1); analyzeVIreg2(mVU, It, mVUlow.VI_write, 1);
if (!It) { mVUlow.isNOP = 1; } if (!It)
else { {
mVUlow.isNOP = 1;
}
else
{
//mVUsFlagHack = 0; // Don't Optimize Out Status Flags for this block //mVUsFlagHack = 0; // Don't Optimize Out Status Flags for this block
mVUinfo.swapOps = 1; mVUinfo.swapOps = 1;
flagSet(mVU, 0); flagSet(mVU, 0);
if (mVUcount < 4) { if (mVUcount < 4)
{
if (!(mVUpBlock->pState.needExactMatch & 1)) // The only time this should happen is on the first program block if (!(mVUpBlock->pState.needExactMatch & 1)) // The only time this should happen is on the first program block
DevCon.WriteLn(Color_Green, "microVU%d: pState's sFlag Info was expected to be set [%04x]", getIndex, xPC); DevCon.WriteLn(Color_Green, "microVU%d: pState's sFlag Info was expected to be set [%04x]", getIndex, xPC);
} }
} }
} }
__ri void mVUanalyzeFSSET(mV) { __ri void mVUanalyzeFSSET(mV)
{
mVUlow.isFSSET = 1; mVUlow.isFSSET = 1;
mVUlow.readFlags = true; mVUlow.readFlags = true;
} }
@ -334,15 +434,21 @@ __ri void mVUanalyzeFSSET(mV) {
// Mflag - Mac Flag Opcodes // Mflag - Mac Flag Opcodes
//------------------------------------------------------------------ //------------------------------------------------------------------
__ri void mVUanalyzeMflag(mV, int Is, int It) { __ri void mVUanalyzeMflag(mV, int Is, int It)
{
mVUlow.readFlags = true; mVUlow.readFlags = true;
analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]); analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]);
analyzeVIreg2(mVU, It, mVUlow.VI_write, 1); analyzeVIreg2(mVU, It, mVUlow.VI_write, 1);
if (!It) { mVUlow.isNOP = 1; } if (!It)
else { {
mVUlow.isNOP = 1;
}
else
{
mVUinfo.swapOps = 1; mVUinfo.swapOps = 1;
flagSet(mVU, 1); flagSet(mVU, 1);
if (mVUcount < 4) { if (mVUcount < 4)
{
if (!(mVUpBlock->pState.needExactMatch & 2)) // The only time this should happen is on the first program block if (!(mVUpBlock->pState.needExactMatch & 2)) // The only time this should happen is on the first program block
DevCon.WriteLn(Color_Green, "microVU%d: pState's mFlag Info was expected to be set [%04x]", getIndex, xPC); DevCon.WriteLn(Color_Green, "microVU%d: pState's mFlag Info was expected to be set [%04x]", getIndex, xPC);
} }
@ -353,10 +459,12 @@ __ri void mVUanalyzeMflag(mV, int Is, int It) {
// Cflag - Clip Flag Opcodes // Cflag - Clip Flag Opcodes
//------------------------------------------------------------------ //------------------------------------------------------------------
__fi void mVUanalyzeCflag(mV, int It) { __fi void mVUanalyzeCflag(mV, int It)
{
mVUinfo.swapOps = 1; mVUinfo.swapOps = 1;
mVUlow.readFlags = true; mVUlow.readFlags = true;
if (mVUcount < 4) { if (mVUcount < 4)
{
if (!(mVUpBlock->pState.needExactMatch & 4)) // The only time this should happen is on the first program block if (!(mVUpBlock->pState.needExactMatch & 4)) // The only time this should happen is on the first program block
DevCon.WriteLn(Color_Green, "microVU%d: pState's cFlag Info was expected to be set [%04x]", getIndex, xPC); DevCon.WriteLn(Color_Green, "microVU%d: pState's cFlag Info was expected to be set [%04x]", getIndex, xPC);
} }
@ -367,7 +475,8 @@ __fi void mVUanalyzeCflag(mV, int It) {
// XGkick // XGkick
//------------------------------------------------------------------ //------------------------------------------------------------------
__fi void mVUanalyzeXGkick(mV, int Fs, int xCycles) { __fi void mVUanalyzeXGkick(mV, int Fs, int xCycles)
{
analyzeVIreg1(mVU, Fs, mVUlow.VI_read[0]); analyzeVIreg1(mVU, Fs, mVUlow.VI_read[0]);
analyzeXGkick1(); // Stall will cause mVUincCycles() to trigger pending xgkick analyzeXGkick1(); // Stall will cause mVUincCycles() to trigger pending xgkick
analyzeXGkick2(xCycles); analyzeXGkick2(xCycles);
@ -387,9 +496,12 @@ __fi void mVUanalyzeXGkick(mV, int Fs, int xCycles) {
// value read by the branch is the value the VI reg had at the start // value read by the branch is the value the VI reg had at the start
// of the instruction 4 instructions ago (assuming no stalls). // of the instruction 4 instructions ago (assuming no stalls).
// See: https://forums.pcsx2.net/Thread-blog-PS2-VU-Vector-Unit-Documentation-Part-1 // See: https://forums.pcsx2.net/Thread-blog-PS2-VU-Vector-Unit-Documentation-Part-1
static void analyzeBranchVI(mV, int xReg, bool& infoVar) { static void analyzeBranchVI(mV, int xReg, bool& infoVar)
if (!xReg) return; {
if (mVUstall) { // I assume a stall on branch means the vi reg is not modified directly b4 the branch... if (!xReg)
return;
if (mVUstall) // I assume a stall on branch means the vi reg is not modified directly b4 the branch...
{
DevCon.Warning("microVU%d: %d cycle stall on branch instruction [%04x]", getIndex, mVUstall, xPC); DevCon.Warning("microVU%d: %d cycle stall on branch instruction [%04x]", getIndex, mVUstall, xPC);
return; return;
} }
@ -398,44 +510,56 @@ static void analyzeBranchVI(mV, int xReg, bool& infoVar) {
int iEnd = 4; int iEnd = 4;
int bPC = iPC; int bPC = iPC;
incPC2(-2); incPC2(-2);
for (i = 0; i < iEnd && cyc < iEnd; i++) { for (i = 0; i < iEnd && cyc < iEnd; i++)
if (i && mVUstall) { {
if (i && mVUstall)
{
DevCon.Warning("microVU%d: Branch VI-Delay with %d cycle stall (%d) [%04x]", getIndex, mVUstall, i, xPC); DevCon.Warning("microVU%d: Branch VI-Delay with %d cycle stall (%d) [%04x]", getIndex, mVUstall, i, xPC);
} }
if (i == (int)mVUcount) { if (i == (int)mVUcount)
{
bool warn = false; bool warn = false;
if (i == 1) if (i == 1)
warn = true; warn = true;
if (mVUpBlock->pState.viBackUp == xReg) { if (mVUpBlock->pState.viBackUp == xReg)
{
DevCon.WriteLn(Color_Green, "microVU%d: Loading Branch VI value from previous block", getIndex); DevCon.WriteLn(Color_Green, "microVU%d: Loading Branch VI value from previous block", getIndex);
if (i == 0) if (i == 0)
warn = true; warn = true;
infoVar = true; infoVar = true;
j = i; i++; j = i;
i++;
} }
if (warn) DevCon.Warning("microVU%d: Branch VI-Delay with small block (%d) [%04x]", getIndex, i, xPC); if (warn)
DevCon.Warning("microVU%d: Branch VI-Delay with small block (%d) [%04x]", getIndex, i, xPC);
break; // if (warn), we don't have enough information to always guarantee the correct result. break; // if (warn), we don't have enough information to always guarantee the correct result.
} }
if ((mVUlow.VI_write.reg == xReg) && mVUlow.VI_write.used) { if ((mVUlow.VI_write.reg == xReg) && mVUlow.VI_write.used)
if (mVUlow.readFlags) { {
if (i) DevCon.Warning("microVU%d: Branch VI-Delay with Read Flags Set (%d) [%04x]", getIndex, i, xPC); if (mVUlow.readFlags)
{
if (i)
DevCon.Warning("microVU%d: Branch VI-Delay with Read Flags Set (%d) [%04x]", getIndex, i, xPC);
break; // Not sure if on the above "if (i)" case, if we need to "continue" or if we should "break" break; // Not sure if on the above "if (i)" case, if we need to "continue" or if we should "break"
} }
j = i; j = i;
} }
else if (i == 0) { else if (i == 0)
{
break; break;
} }
cyc += mVUstall + 1; cyc += mVUstall + 1;
incPC2(-2); incPC2(-2);
} }
if (i) { if (i)
if (!infoVar) { {
if (!infoVar)
{
iPC = bPC; iPC = bPC;
incPC2(-2 * (j + 1)); incPC2(-2 * (j + 1));
mVUlow.backupVI = true; mVUlow.backupVI = true;
@ -444,38 +568,54 @@ static void analyzeBranchVI(mV, int xReg, bool& infoVar) {
iPC = bPC; iPC = bPC;
DevCon.WriteLn(Color_Green, "microVU%d: Branch VI-Delay (%d) [%04x][%03d]", getIndex, j + 1, xPC, mVU.prog.cur->idx); DevCon.WriteLn(Color_Green, "microVU%d: Branch VI-Delay (%d) [%04x][%03d]", getIndex, j + 1, xPC, mVU.prog.cur->idx);
} }
else { else
{
iPC = bPC; iPC = bPC;
} }
} }
/* /*
// Dead Code... the old version of analyzeBranchVI() // Dead Code... the old version of analyzeBranchVI()
__fi void analyzeBranchVI(mV, int xReg, bool& infoVar) { __fi void analyzeBranchVI(mV, int xReg, bool& infoVar)
if (!xReg) return; {
if (!xReg)
return;
int i; int i;
int iEnd = std::min(5, mVUcount + 1); int iEnd = std::min(5, mVUcount + 1);
int bPC = iPC; int bPC = iPC;
incPC2(-2); incPC2(-2);
for (i = 0; i < iEnd; i++) { for (i = 0; i < iEnd; i++)
if ((i == mVUcount) && (i < 5)) { {
if (mVUpBlock->pState.viBackUp == xReg) { if ((i == mVUcount) && (i < 5))
{
if (mVUpBlock->pState.viBackUp == xReg)
{
infoVar = 1; infoVar = 1;
i++; i++;
} }
break; break;
} }
if ((mVUlow.VI_write.reg == xReg) && mVUlow.VI_write.used) { if ((mVUlow.VI_write.reg == xReg) && mVUlow.VI_write.used)
{
if (mVUlow.readFlags || i == 5) break; if (mVUlow.readFlags || i == 5) break;
if (i == 0) { incPC2(-2); continue; } if (i == 0)
{
incPC2(-2);
continue;
}
if (((mVUlow.VI_read[0].reg == xReg) && (mVUlow.VI_read[0].used)) if (((mVUlow.VI_read[0].reg == xReg) && (mVUlow.VI_read[0].used))
|| ((mVUlow.VI_read[1].reg == xReg) && (mVUlow.VI_read[1].used))) || ((mVUlow.VI_read[1].reg == xReg) && (mVUlow.VI_read[1].used)))
{ incPC2(-2); continue; } {
incPC2(-2);
continue;
}
} }
break; break;
} }
if (i) { if (i)
if (!infoVar) { {
if (!infoVar)
{
incPC2(2); incPC2(2);
mVUlow.backupVI = 1; mVUlow.backupVI = 1;
infoVar = 1; infoVar = 1;
@ -488,23 +628,26 @@ __fi void analyzeBranchVI(mV, int xReg, bool& infoVar) {
*/ */
// Branch in Branch Delay-Slots // Branch in Branch Delay-Slots
__ri int mVUbranchCheck(mV) { __ri int mVUbranchCheck(mV)
{
if (!mVUcount) if (!mVUcount)
return 0; return 0;
incPC(-2); incPC(-2);
if (mVUlow.branch) { if (mVUlow.branch)
{
u32 branchType = mVUlow.branch; u32 branchType = mVUlow.branch;
if (doBranchInDelaySlot) { if (doBranchInDelaySlot)
{
mVUlow.badBranch = true; mVUlow.badBranch = true;
incPC(2); incPC(2);
mVUlow.evilBranch = true; mVUlow.evilBranch = true;
if (mVUlow.branch == 2 || mVUlow.branch == 10) // Needs linking, we can only guess this if the next is not conditional if (mVUlow.branch == 2 || mVUlow.branch == 10) // Needs linking, we can only guess this if the next is not conditional
{ {
if(branchType <= 2 || branchType >= 9) //First branch is not conditional so we know what the link will be if (branchType <= 2 || branchType >= 9) // First branch is not conditional so we know what the link will be so we can let the existing evil block do its thing! We know where to get the addr :)
{ //So we can let the existing evil block do its thing! We know where to get the addr :) {
DevCon.Warning("yo"); DevCon.Warning("yo");
DevCon.Warning("yo"); DevCon.Warning("yo");
DevCon.Warning("yo"); DevCon.Warning("yo");
@ -515,7 +658,8 @@ __ri int mVUbranchCheck(mV) {
mVUregs.blockType = 2; mVUregs.blockType = 2;
} //Else it is conditional, so we need to do some nasty processing later in microVU_Branch.inl } //Else it is conditional, so we need to do some nasty processing later in microVU_Branch.inl
} }
else { else
{
mVUregs.blockType = 2; //Second branch doesn't need linking, so can let it run its evil block course (MGS2 for testing) mVUregs.blockType = 2; //Second branch doesn't need linking, so can let it run its evil block course (MGS2 for testing)
} }
@ -525,7 +669,8 @@ __ri int mVUbranchCheck(mV) {
branchSTR[mVUlow.branch & 0xf], branchSTR[branchType & 0xf], xPC); branchSTR[mVUlow.branch & 0xf], branchSTR[branchType & 0xf], xPC);
return 1; return 1;
} }
else { else
{
incPC(2); incPC(2);
mVUlow.isNOP = true; mVUlow.isNOP = true;
DevCon.Warning("microVU%d: %s in %s delay slot! [%04x]", mVU.index, DevCon.Warning("microVU%d: %s in %s delay slot! [%04x]", mVU.index,
@ -537,40 +682,49 @@ __ri int mVUbranchCheck(mV) {
return 0; return 0;
} }
__fi void mVUanalyzeCondBranch1(mV, int Is) { __fi void mVUanalyzeCondBranch1(mV, int Is)
{
analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]); analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]);
if (!mVUbranchCheck(mVU)) { if (!mVUbranchCheck(mVU))
{
analyzeBranchVI(mVU, Is, mVUlow.memReadIs); analyzeBranchVI(mVU, Is, mVUlow.memReadIs);
} }
} }
__fi void mVUanalyzeCondBranch2(mV, int Is, int It) { __fi void mVUanalyzeCondBranch2(mV, int Is, int It)
{
analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]); analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]);
analyzeVIreg1(mVU, It, mVUlow.VI_read[1]); analyzeVIreg1(mVU, It, mVUlow.VI_read[1]);
if (!mVUbranchCheck(mVU)) { if (!mVUbranchCheck(mVU))
{
analyzeBranchVI(mVU, Is, mVUlow.memReadIs); analyzeBranchVI(mVU, Is, mVUlow.memReadIs);
analyzeBranchVI(mVU, It, mVUlow.memReadIt); analyzeBranchVI(mVU, It, mVUlow.memReadIt);
} }
} }
__fi void mVUanalyzeNormBranch(mV, int It, bool isBAL) { __fi void mVUanalyzeNormBranch(mV, int It, bool isBAL)
{
mVUbranchCheck(mVU); mVUbranchCheck(mVU);
if (isBAL) { if (isBAL)
{
analyzeVIreg2(mVU, It, mVUlow.VI_write, 1); analyzeVIreg2(mVU, It, mVUlow.VI_write, 1);
setConstReg(It, bSaveAddr); setConstReg(It, bSaveAddr);
} }
} }
__ri void mVUanalyzeJump(mV, int Is, int It, bool isJALR) { __ri void mVUanalyzeJump(mV, int Is, int It, bool isJALR)
{
mVUlow.branch = (isJALR) ? 10 : 9; mVUlow.branch = (isJALR) ? 10 : 9;
mVUbranchCheck(mVU); mVUbranchCheck(mVU);
if (mVUconstReg[Is].isValid && doConstProp) { if (mVUconstReg[Is].isValid && doConstProp)
{
mVUlow.constJump.isValid = 1; mVUlow.constJump.isValid = 1;
mVUlow.constJump.regValue = mVUconstReg[Is].regValue; mVUlow.constJump.regValue = mVUconstReg[Is].regValue;
//DevCon.Status("microVU%d: Constant JR/JALR Address Optimization", mVU.index); //DevCon.Status("microVU%d: Constant JR/JALR Address Optimization", mVU.index);
} }
analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]); analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]);
if (isJALR) { if (isJALR)
{
analyzeVIreg2(mVU, It, mVUlow.VI_write, 1); analyzeVIreg2(mVU, It, mVUlow.VI_write, 1);
setConstReg(It, bSaveAddr); setConstReg(It, bSaveAddr);
} }

View File

@ -27,16 +27,8 @@ __fi int getLastFlagInst(microRegInfo& pState, int* xFlag, int flagType, int isE
return (((pState.flagInfo >> (2 * flagType + 2)) & 3) - 1) & 3; return (((pState.flagInfo >> (2 * flagType + 2)) & 3) - 1) & 3;
} }
void mVU0clearlpStateJIT() void mVU0clearlpStateJIT() { if (!microVU0.prog.cleared) memzero(microVU0.prog.lpState); }
{ void mVU1clearlpStateJIT() { if (!microVU1.prog.cleared) memzero(microVU1.prog.lpState); }
if (!microVU0.prog.cleared)
memzero(microVU0.prog.lpState);
}
void mVU1clearlpStateJIT()
{
if (!microVU1.prog.cleared)
memzero(microVU1.prog.lpState);
}
void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit) void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit)
{ {
@ -78,9 +70,7 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit)
// Save P/Q Regs // Save P/Q Regs
if (qInst) if (qInst)
{
xPSHUF.D(xmmPQ, xmmPQ, 0xe1); xPSHUF.D(xmmPQ, xmmPQ, 0xe1);
}
xMOVSS(ptr32[&mVU.regs().VI[REG_Q].UL], xmmPQ); xMOVSS(ptr32[&mVU.regs().VI[REG_Q].UL], xmmPQ);
xPSHUF.D(xmmPQ, xmmPQ, 0xe1); xPSHUF.D(xmmPQ, xmmPQ, 0xe1);
xMOVSS(ptr32[&mVU.regs().pending_q], xmmPQ); xMOVSS(ptr32[&mVU.regs().pending_q], xmmPQ);
@ -89,9 +79,7 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit)
if (isVU1) if (isVU1)
{ {
if (pInst) if (pInst)
{ xPSHUF.D(xmmPQ, xmmPQ, 0xb4); // Swap Pending/Active P
xPSHUF.D(xmmPQ, xmmPQ, 0xb4);
} // Swap Pending/Active P
xPSHUF.D(xmmPQ, xmmPQ, 0xC6); // 3 0 1 2 xPSHUF.D(xmmPQ, xmmPQ, 0xC6); // 3 0 1 2
xMOVSS(ptr32[&mVU.regs().VI[REG_P].UL], xmmPQ); xMOVSS(ptr32[&mVU.regs().VI[REG_P].UL], xmmPQ);
xPSHUF.D(xmmPQ, xmmPQ, 0x87); // 0 2 1 3 xPSHUF.D(xmmPQ, xmmPQ, 0x87); // 0 2 1 3
@ -107,8 +95,8 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit)
xMOV(ptr32[&mVU.regs().VI[REG_MAC_FLAG].UL], gprT1); xMOV(ptr32[&mVU.regs().VI[REG_MAC_FLAG].UL], gprT1);
xMOV(ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL], gprT2); xMOV(ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL], gprT2);
if (!isEbit) if (!isEbit) // Backup flag instances
{ // Backup flag instances {
xMOVAPS(xmmT1, ptr128[mVU.macFlag]); xMOVAPS(xmmT1, ptr128[mVU.macFlag]);
xMOVAPS(ptr128[&mVU.regs().micro_macflags], xmmT1); xMOVAPS(ptr128[&mVU.regs().micro_macflags], xmmT1);
xMOVAPS(xmmT1, ptr128[mVU.clipFlag]); xMOVAPS(xmmT1, ptr128[mVU.clipFlag]);
@ -119,8 +107,8 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit)
xMOV(ptr32[&mVU.regs().micro_statusflags[2]], gprF2); xMOV(ptr32[&mVU.regs().micro_statusflags[2]], gprF2);
xMOV(ptr32[&mVU.regs().micro_statusflags[3]], gprF3); xMOV(ptr32[&mVU.regs().micro_statusflags[3]], gprF3);
} }
else else // Flush flag instances
{ // Flush flag instances {
xMOVDZX(xmmT1, ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL]); xMOVDZX(xmmT1, ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL]);
xSHUF.PS(xmmT1, xmmT1, 0); xSHUF.PS(xmmT1, xmmT1, 0);
xMOVAPS(ptr128[&mVU.regs().micro_clipflags], xmmT1); xMOVAPS(ptr128[&mVU.regs().micro_clipflags], xmmT1);
@ -134,8 +122,8 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit)
xMOVAPS(ptr128[&mVU.regs().micro_statusflags], xmmT1); xMOVAPS(ptr128[&mVU.regs().micro_statusflags], xmmT1);
} }
if (isEbit) if (isEbit) // Clear 'is busy' Flags
{ // Clear 'is busy' Flags {
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
if (!mVU.index || !THREAD_VU1) if (!mVU.index || !THREAD_VU1)
{ {
@ -147,8 +135,8 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit)
else else
xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles); xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles);
if (isEbit != 2) if (isEbit != 2) // Save PC, and Jump to Exit Point
{ // Save PC, and Jump to Exit Point {
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC); xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
xJMP(mVU.exitFunct); xJMP(mVU.exitFunct);
} }
@ -196,9 +184,7 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit)
// Save P/Q Regs // Save P/Q Regs
if (qInst) if (qInst)
{
xPSHUF.D(xmmPQ, xmmPQ, 0xe1); xPSHUF.D(xmmPQ, xmmPQ, 0xe1);
}
xMOVSS(ptr32[&mVU.regs().VI[REG_Q].UL], xmmPQ); xMOVSS(ptr32[&mVU.regs().VI[REG_Q].UL], xmmPQ);
xPSHUF.D(xmmPQ, xmmPQ, 0xe1); xPSHUF.D(xmmPQ, xmmPQ, 0xe1);
xMOVSS(ptr32[&mVU.regs().pending_q], xmmPQ); xMOVSS(ptr32[&mVU.regs().pending_q], xmmPQ);
@ -207,9 +193,7 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit)
if (isVU1) if (isVU1)
{ {
if (pInst) if (pInst)
{ xPSHUF.D(xmmPQ, xmmPQ, 0xb4); // Swap Pending/Active P
xPSHUF.D(xmmPQ, xmmPQ, 0xb4);
} // Swap Pending/Active P
xPSHUF.D(xmmPQ, xmmPQ, 0xC6); // 3 0 1 2 xPSHUF.D(xmmPQ, xmmPQ, 0xC6); // 3 0 1 2
xMOVSS(ptr32[&mVU.regs().VI[REG_P].UL], xmmPQ); xMOVSS(ptr32[&mVU.regs().VI[REG_P].UL], xmmPQ);
xPSHUF.D(xmmPQ, xmmPQ, 0x87); // 0 2 1 3 xPSHUF.D(xmmPQ, xmmPQ, 0x87); // 0 2 1 3
@ -225,8 +209,8 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit)
xMOV(ptr32[&mVU.regs().VI[REG_MAC_FLAG].UL], gprT1); xMOV(ptr32[&mVU.regs().VI[REG_MAC_FLAG].UL], gprT1);
xMOV(ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL], gprT2); xMOV(ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL], gprT2);
if (!isEbit || isEbit == 3) if (!isEbit || isEbit == 3) // Backup flag instances
{ // Backup flag instances {
xMOVAPS(xmmT1, ptr128[mVU.macFlag]); xMOVAPS(xmmT1, ptr128[mVU.macFlag]);
xMOVAPS(ptr128[&mVU.regs().micro_macflags], xmmT1); xMOVAPS(ptr128[&mVU.regs().micro_macflags], xmmT1);
xMOVAPS(xmmT1, ptr128[mVU.clipFlag]); xMOVAPS(xmmT1, ptr128[mVU.clipFlag]);
@ -237,8 +221,8 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit)
xMOV(ptr32[&mVU.regs().micro_statusflags[2]], gprF2); xMOV(ptr32[&mVU.regs().micro_statusflags[2]], gprF2);
xMOV(ptr32[&mVU.regs().micro_statusflags[3]], gprF3); xMOV(ptr32[&mVU.regs().micro_statusflags[3]], gprF3);
} }
else else // Flush flag instances
{ // Flush flag instances {
xMOVDZX(xmmT1, ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL]); xMOVDZX(xmmT1, ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL]);
xSHUF.PS(xmmT1, xmmT1, 0); xSHUF.PS(xmmT1, xmmT1, 0);
xMOVAPS(ptr128[&mVU.regs().micro_clipflags], xmmT1); xMOVAPS(ptr128[&mVU.regs().micro_clipflags], xmmT1);
@ -253,8 +237,8 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit)
} }
if ((isEbit && isEbit != 3)) if ((isEbit && isEbit != 3)) // Clear 'is busy' Flags
{ // Clear 'is busy' Flags {
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
if (!mVU.index || !THREAD_VU1) if (!mVU.index || !THREAD_VU1)
{ {
@ -266,8 +250,8 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit)
else else
xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles); xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles);
if (isEbit != 2 && isEbit != 3) if (isEbit != 2 && isEbit != 3) // Save PC, and Jump to Exit Point
{ // Save PC, and Jump to Exit Point {
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC); xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
xJMP(mVU.exitFunct); xJMP(mVU.exitFunct);
} }
@ -277,15 +261,12 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit)
// Recompiles Code for Proper Flags and Q/P regs on Block Linkings // Recompiles Code for Proper Flags and Q/P regs on Block Linkings
void mVUsetupBranch(mV, microFlagCycles& mFC) void mVUsetupBranch(mV, microFlagCycles& mFC)
{ {
mVU.regAlloc->flushAll(); // Flush Allocated Regs mVU.regAlloc->flushAll(); // Flush Allocated Regs
mVUsetupFlags(mVU, mFC); // Shuffle Flag Instances mVUsetupFlags(mVU, mFC); // Shuffle Flag Instances
// Shuffle P/Q regs since every block starts at instance #0 // Shuffle P/Q regs since every block starts at instance #0
if (mVU.p || mVU.q) if (mVU.p || mVU.q)
{
xPSHUF.D(xmmPQ, xmmPQ, shufflePQ); xPSHUF.D(xmmPQ, xmmPQ, shufflePQ);
}
mVU.p = 0, mVU.q = 0; mVU.p = 0, mVU.q = 0;
} }
@ -295,14 +276,10 @@ void normBranchCompile(microVU& mVU, u32 branchPC)
blockCreate(branchPC / 8); blockCreate(branchPC / 8);
pBlock = mVUblocks[branchPC / 8]->search((microRegInfo*)&mVUregs); pBlock = mVUblocks[branchPC / 8]->search((microRegInfo*)&mVUregs);
if (pBlock) if (pBlock)
{
xJMP(pBlock->x86ptrStart); xJMP(pBlock->x86ptrStart);
}
else else
{
mVUcompile(mVU, branchPC, (uptr)&mVUregs); mVUcompile(mVU, branchPC, (uptr)&mVUregs);
} }
}
void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump)
{ {
@ -310,8 +287,8 @@ void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump)
mVUsetupBranch(mVU, mFC); mVUsetupBranch(mVU, mFC);
mVUbackupRegs(mVU); mVUbackupRegs(mVU);
if (!mVUpBlock->jumpCache) if (!mVUpBlock->jumpCache) // Create the jump cache for this block
{ // Create the jump cache for this block {
mVUpBlock->jumpCache = new microJumpCache[mProgSize / 2]; mVUpBlock->jumpCache = new microJumpCache[mProgSize / 2];
} }
@ -537,8 +514,8 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc)
xJMP(mVU.exitFunct); xJMP(mVU.exitFunct);
iPC = tempPC; iPC = tempPC;
} }
if (mVUup.eBit) if (mVUup.eBit) // Conditional Branch With E-Bit Set
{ // Conditional Branch With E-Bit Set {
if (mVUlow.evilBranch) if (mVUlow.evilBranch)
DevCon.Warning("End on evil branch! - Not implemented! - If game broken report to PCSX2 Team"); DevCon.Warning("End on evil branch! - Not implemented! - If game broken report to PCSX2 Team");
@ -558,14 +535,13 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc)
xJMP(mVU.exitFunct); xJMP(mVU.exitFunct);
return; return;
} }
else else // Normal Conditional Branch
{ // Normal Conditional Branch {
xCMP(ptr16[&mVU.branch], 0); xCMP(ptr16[&mVU.branch], 0);
incPC(3); incPC(3);
if (mVUlow.evilBranch) // We are dealing with an evil evil block, so we need to process this slightly differently if (mVUlow.evilBranch) // We are dealing with an evil evil block, so we need to process this slightly differently
{ {
if (mVUlow.branch == 10 || mVUlow.branch == 2) // Evil branch is a jump of some measure if (mVUlow.branch == 10 || mVUlow.branch == 2) // Evil branch is a jump of some measure
{ {
//Because of how it is linked, we need to make sure the target is recompiled if taken //Because of how it is linked, we need to make sure the target is recompiled if taken
@ -578,8 +554,8 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc)
blockCreate(iPC / 2); blockCreate(iPC / 2);
bBlock = mVUblocks[iPC / 2]->search((microRegInfo*)&mVUregs); bBlock = mVUblocks[iPC / 2]->search((microRegInfo*)&mVUregs);
incPC2(-1); incPC2(-1);
if (bBlock) if (bBlock) // Branch non-taken has already been compiled
{ // Branch non-taken has already been compiled {
xJcc(xInvertCond((JccComparisonType)JMPcc), bBlock->x86ptrStart); xJcc(xInvertCond((JccComparisonType)JMPcc), bBlock->x86ptrStart);
incPC(-3); // Go back to branch opcode (to get branch imm addr) incPC(-3); // Go back to branch opcode (to get branch imm addr)
normBranchCompile(mVU, branchAddr(mVU)); normBranchCompile(mVU, branchAddr(mVU));
@ -608,10 +584,10 @@ void normJump(mV, microFlagCycles& mFC)
{ {
DevCon.Warning("M-Bit on Jump! Please report if broken"); DevCon.Warning("M-Bit on Jump! Please report if broken");
} }
if (mVUlow.constJump.isValid) if (mVUlow.constJump.isValid) // Jump Address is Constant
{ // Jump Address is Constant {
if (mVUup.eBit) if (mVUup.eBit) // E-bit Jump
{ // E-bit Jump {
iPC = (mVUlow.constJump.regValue * 2) & (mVU.progMemMask); iPC = (mVUlow.constJump.regValue * 2) & (mVU.progMemMask);
mVUendProgram(mVU, &mFC, 1); mVUendProgram(mVU, &mFC, 1);
return; return;
@ -666,8 +642,8 @@ void normJump(mV, microFlagCycles& mFC)
xJMP(mVU.exitFunct); xJMP(mVU.exitFunct);
eJMP.SetTarget(); eJMP.SetTarget();
} }
if (mVUup.eBit) if (mVUup.eBit) // E-bit Jump
{ // E-bit Jump {
mVUendProgram(mVU, &mFC, 2); mVUendProgram(mVU, &mFC, 2);
xMOV(gprT1, ptr32[&mVU.branch]); xMOV(gprT1, ptr32[&mVU.branch]);
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], gprT1); xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], gprT1);

View File

@ -34,9 +34,12 @@ const __aligned16 u32 sse4_maxvals[2][4] = {
// gotten a NaN value, then something went wrong; and the NaN's sign // gotten a NaN value, then something went wrong; and the NaN's sign
// is not to be trusted. Games like positive values better usually, // is not to be trusted. Games like positive values better usually,
// and its faster... so just always make NaNs into positive infinity. // and its faster... so just always make NaNs into positive infinity.
void mVUclamp1(const xmm& reg, const xmm& regT1, int xyzw, bool bClampE = 0) { void mVUclamp1(const xmm& reg, const xmm& regT1, int xyzw, bool bClampE = 0)
if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) { {
switch (xyzw) { if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE))
{
switch (xyzw)
{
case 1: case 2: case 4: case 8: case 1: case 2: case 4: case 8:
xMIN.SS(reg, ptr32[mVUglob.maxvals]); xMIN.SS(reg, ptr32[mVUglob.maxvals]);
xMAX.SS(reg, ptr32[mVUglob.minvals]); xMAX.SS(reg, ptr32[mVUglob.minvals]);
@ -54,19 +57,24 @@ void mVUclamp1(const xmm& reg, const xmm& regT1, int xyzw, bool bClampE = 0) {
// Note 2: Using regalloc here seems to contaminate some regs in certain games. // Note 2: Using regalloc here seems to contaminate some regs in certain games.
// Must be some specific case I've overlooked (or I used regalloc improperly on an opcode) // Must be some specific case I've overlooked (or I used regalloc improperly on an opcode)
// so we just use a temporary mem location for our backup for now... (non-sse4 version only) // so we just use a temporary mem location for our backup for now... (non-sse4 version only)
void mVUclamp2(microVU& mVU, const xmm& reg, const xmm& regT1in, int xyzw, bool bClampE = 0) { void mVUclamp2(microVU& mVU, const xmm& reg, const xmm& regT1in, int xyzw, bool bClampE = 0)
if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE && CHECK_VU_SIGN_OVERFLOW)) { {
if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE && CHECK_VU_SIGN_OVERFLOW))
{
int i = (xyzw == 1 || xyzw == 2 || xyzw == 4 || xyzw == 8) ? 0 : 1; int i = (xyzw == 1 || xyzw == 2 || xyzw == 4 || xyzw == 8) ? 0 : 1;
xPMIN.SD(reg, ptr128[&sse4_maxvals[i][0]]); xPMIN.SD(reg, ptr128[&sse4_maxvals[i][0]]);
xPMIN.UD(reg, ptr128[&sse4_minvals[i][0]]); xPMIN.UD(reg, ptr128[&sse4_minvals[i][0]]);
return; return;
} }
else mVUclamp1(reg, regT1in, xyzw, bClampE); else
mVUclamp1(reg, regT1in, xyzw, bClampE);
} }
// Used for operand clamping on every SSE instruction (add/sub/mul/div) // Used for operand clamping on every SSE instruction (add/sub/mul/div)
void mVUclamp3(microVU& mVU, const xmm& reg, const xmm& regT1, int xyzw) { void mVUclamp3(microVU& mVU, const xmm& reg, const xmm& regT1, int xyzw)
if (clampE) mVUclamp2(mVU, reg, regT1, xyzw, 1); {
if (clampE)
mVUclamp2(mVU, reg, regT1, xyzw, 1);
} }
// Used for result clamping on every SSE instruction (add/sub/mul/div) // Used for result clamping on every SSE instruction (add/sub/mul/div)
@ -75,6 +83,8 @@ void mVUclamp3(microVU& mVU, const xmm& reg, const xmm& regT1, int xyzw) {
// emulated opcodes (causing crashes). Since we're clamping the operands // emulated opcodes (causing crashes). Since we're clamping the operands
// with mVUclamp3, we should almost never be getting a NaN result, // with mVUclamp3, we should almost never be getting a NaN result,
// but this clamp is just a precaution just-in-case. // but this clamp is just a precaution just-in-case.
void mVUclamp4(const xmm& reg, const xmm& regT1, int xyzw) { void mVUclamp4(const xmm& reg, const xmm& regT1, int xyzw)
if (clampE && !CHECK_VU_SIGN_OVERFLOW) mVUclamp1(reg, regT1, xyzw, 1); {
if (clampE && !CHECK_VU_SIGN_OVERFLOW)
mVUclamp1(reg, regT1, xyzw, 1);
} }

View File

@ -31,24 +31,31 @@ static void __fc mVUprintPC2(u32 pc) { Console.WriteLn("Block End PC = 0x%04
//------------------------------------------------------------------ //------------------------------------------------------------------
// Used by mVUsetupRange // Used by mVUsetupRange
__fi void mVUcheckIsSame(mV) { __fi void mVUcheckIsSame(mV)
if (mVU.prog.isSame == -1) { {
if (mVU.prog.isSame == -1)
{
mVU.prog.isSame = !memcmp_mmx((u8*)mVUcurProg.data, mVU.regs().Micro, mVU.microMemSize); mVU.prog.isSame = !memcmp_mmx((u8*)mVUcurProg.data, mVU.regs().Micro, mVU.microMemSize);
} }
if (mVU.prog.isSame == 0) { if (mVU.prog.isSame == 0)
{
mVUcacheProg(mVU, *mVU.prog.cur); mVUcacheProg(mVU, *mVU.prog.cur);
mVU.prog.isSame = 1; mVU.prog.isSame = 1;
} }
} }
// Sets up microProgram PC ranges based on whats been recompiled // Sets up microProgram PC ranges based on whats been recompiled
void mVUsetupRange(microVU& mVU, s32 pc, bool isStartPC) { void mVUsetupRange(microVU& mVU, s32 pc, bool isStartPC)
{
std::deque<microRange>*& ranges = mVUcurProg.ranges; std::deque<microRange>*& ranges = mVUcurProg.ranges;
pxAssertDev(pc <= (s64)mVU.microMemSize, pxsFmt("microVU%d: PC outside of VU memory PC=0x%04x", mVU.index, pc)); pxAssertDev(pc <= (s64)mVU.microMemSize, pxsFmt("microVU%d: PC outside of VU memory PC=0x%04x", mVU.index, pc));
if (isStartPC) { // Check if startPC is already within a block we've recompiled if (isStartPC) // Check if startPC is already within a block we've recompiled
{
std::deque<microRange>::const_iterator it(ranges->begin()); std::deque<microRange>::const_iterator it(ranges->begin());
for ( ; it != ranges->end(); ++it) { for (; it != ranges->end(); ++it)
if ((pc >= it[0].start) && (pc <= it[0].end)) { {
if ((pc >= it[0].start) && (pc <= it[0].end))
{
if (it[0].start != it[0].end) if (it[0].start != it[0].end)
{ {
microRange mRange = {it[0].start, it[0].end}; microRange mRange = {it[0].start, it[0].end};
@ -59,40 +66,48 @@ void mVUsetupRange(microVU& mVU, s32 pc, bool isStartPC) {
} }
} }
} }
else if (mVUrange.end >= pc) { else if (mVUrange.end >= pc)
{
// existing range covers more area than current PC so no need to process it // existing range covers more area than current PC so no need to process it
return; return;
} }
mVUcheckIsSame(mVU); mVUcheckIsSame(mVU);
if (isStartPC) { if (isStartPC)
{
microRange mRange = {pc, -1}; microRange mRange = {pc, -1};
ranges->push_front(mRange); ranges->push_front(mRange);
return; return;
} }
if (mVUrange.start <= pc) { if (mVUrange.start <= pc)
{
mVUrange.end = pc; mVUrange.end = pc;
bool mergedRange = false; bool mergedRange = false;
s32 rStart = mVUrange.start; s32 rStart = mVUrange.start;
s32 rEnd = mVUrange.end; s32 rEnd = mVUrange.end;
std::deque<microRange>::iterator it(ranges->begin()); std::deque<microRange>::iterator it(ranges->begin());
for (++it; it != ranges->end(); ++it) { for (++it; it != ranges->end(); ++it)
if((it[0].start >= rStart) && (it[0].start <= rEnd)) { // Starts after this prog but starts before the end of current prog {
if ((it[0].start >= rStart) && (it[0].start <= rEnd)) // Starts after this prog but starts before the end of current prog
{
it[0].start = std::min(it[0].start, rStart); // Choose the earlier start it[0].start = std::min(it[0].start, rStart); // Choose the earlier start
mergedRange = true; mergedRange = true;
} }
// Make sure we check both as the start on the other one may be later, we don't want to delete that // Make sure we check both as the start on the other one may be later, we don't want to delete that
if ((it[0].end >= rStart) && (it[0].end <= rEnd)) { // Ends after this prog starts but ends before this one ends if ((it[0].end >= rStart) && (it[0].end <= rEnd)) // Ends after this prog starts but ends before this one ends
{
it[0].end = std::max(it[0].end, rEnd); // Extend the end of this prog to match this program it[0].end = std::max(it[0].end, rEnd); // Extend the end of this prog to match this program
mergedRange = true; mergedRange = true;
} }
} }
if (mergedRange) { if (mergedRange)
{
ranges->erase(ranges->begin()); ranges->erase(ranges->begin());
} }
} }
else { else
{
mVUrange.end = mVU.microMemSize; mVUrange.end = mVU.microMemSize;
DevCon.WriteLn(Color_Green, "microVU%d: Prog Range Wrap [%04x] [%d]", mVU.index, mVUrange.start, mVUrange.end); DevCon.WriteLn(Color_Green, "microVU%d: Prog Range Wrap [%04x] [%d]", mVU.index, mVUrange.start, mVUrange.end);
microRange mRange = {0, pc}; microRange mRange = {0, pc};
@ -104,25 +119,44 @@ void mVUsetupRange(microVU& mVU, s32 pc, bool isStartPC) {
// Execute VU Opcode/Instruction (Upper and Lower) // Execute VU Opcode/Instruction (Upper and Lower)
//------------------------------------------------------------------ //------------------------------------------------------------------
__ri void doUpperOp(mV) { mVUopU(mVU, 1); mVUdivSet(mVU); } __ri void doUpperOp(mV)
__ri void doLowerOp(mV) { incPC(-1); mVUopL(mVU, 1); incPC(1); } {
__ri void flushRegs(mV) { if (!doRegAlloc) mVU.regAlloc->flushAll(); } mVUopU(mVU, 1);
mVUdivSet(mVU);
}
__ri void doLowerOp(mV)
{
incPC(-1);
mVUopL(mVU, 1);
incPC(1);
}
__ri void flushRegs(mV)
{
if (!doRegAlloc)
mVU.regAlloc->flushAll();
}
void doIbit(mV) { void doIbit(mV)
if (mVUup.iBit) { {
if (mVUup.iBit)
{
incPC(-1); incPC(-1);
mVU.regAlloc->clearRegVF(33); mVU.regAlloc->clearRegVF(33);
if (EmuConfig.Gamefixes.IbitHack) { if (EmuConfig.Gamefixes.IbitHack)
{
xMOV(gprT1, ptr32[&curI]); xMOV(gprT1, ptr32[&curI]);
xMOV(ptr32[&mVU.getVI(REG_I)], gprT1); xMOV(ptr32[&mVU.getVI(REG_I)], gprT1);
} }
else { else
{
u32 tempI; u32 tempI;
if (CHECK_VU_OVERFLOW && ((curI & 0x7fffffff) >= 0x7f800000)) { if (CHECK_VU_OVERFLOW && ((curI & 0x7fffffff) >= 0x7f800000))
{
DevCon.WriteLn(Color_Green, "microVU%d: Clamping I Reg", mVU.index); DevCon.WriteLn(Color_Green, "microVU%d: Clamping I Reg", mVU.index);
tempI = (0x80000000 & curI) | 0x7f7fffff; // Clamp I Reg tempI = (0x80000000 & curI) | 0x7f7fffff; // Clamp I Reg
} }
else tempI = curI; else
tempI = curI;
xMOV(ptr32[&mVU.getVI(REG_I)], tempI); xMOV(ptr32[&mVU.getVI(REG_I)], tempI);
} }
@ -130,8 +164,10 @@ void doIbit(mV) {
} }
} }
void doSwapOp(mV) { void doSwapOp(mV)
if (mVUinfo.backupVF && !mVUlow.noWriteVF) { {
if (mVUinfo.backupVF && !mVUlow.noWriteVF)
{
DevCon.WriteLn(Color_Green, "microVU%d: Backing Up VF Reg [%04x]", getIndex, xPC); DevCon.WriteLn(Color_Green, "microVU%d: Backing Up VF Reg [%04x]", getIndex, xPC);
// Allocate t1 first for better chance of reg-alloc // Allocate t1 first for better chance of reg-alloc
@ -156,23 +192,33 @@ void doSwapOp(mV) {
mVU.regAlloc->clearNeeded(t4); mVU.regAlloc->clearNeeded(t4);
mVU.regAlloc->clearNeeded(t2); mVU.regAlloc->clearNeeded(t2);
} }
else { mVUopL(mVU, 1); incPC(1); flushRegs(mVU); doUpperOp(mVU); } else
{
mVUopL(mVU, 1);
incPC(1);
flushRegs(mVU);
doUpperOp(mVU);
}
} }
void mVUexecuteInstruction(mV) { void mVUexecuteInstruction(mV)
if (mVUlow.isNOP) { {
if (mVUlow.isNOP)
{
incPC(1); incPC(1);
doUpperOp(mVU); doUpperOp(mVU);
flushRegs(mVU); flushRegs(mVU);
doIbit(mVU); doIbit(mVU);
} }
else if (!mVUinfo.swapOps) { else if (!mVUinfo.swapOps)
{
incPC(1); incPC(1);
doUpperOp(mVU); doUpperOp(mVU);
flushRegs(mVU); flushRegs(mVU);
doLowerOp(mVU); doLowerOp(mVU);
} }
else { else
{
doSwapOp(mVU); doSwapOp(mVU);
} }
@ -184,24 +230,27 @@ void mVUexecuteInstruction(mV) {
//------------------------------------------------------------------ //------------------------------------------------------------------
// If 1st op in block is a bad opcode, then don't compile rest of block (Dawn of Mana Level 2) // If 1st op in block is a bad opcode, then don't compile rest of block (Dawn of Mana Level 2)
__fi void mVUcheckBadOp(mV) { __fi void mVUcheckBadOp(mV)
{
// The BIOS writes upper and lower NOPs in reversed slots (bug) // The BIOS writes upper and lower NOPs in reversed slots (bug)
//So to prevent spamming we ignore these, however its possible the real VU will bomb out if //So to prevent spamming we ignore these, however its possible the real VU will bomb out if
//this happens, so we will bomb out without warning. //this happens, so we will bomb out without warning.
if (mVUinfo.isBadOp && mVU.code != 0x8000033c) { if (mVUinfo.isBadOp && mVU.code != 0x8000033c)
{
mVUinfo.isEOB = true; mVUinfo.isEOB = true;
DevCon.Warning("microVU Warning: Block contains an illegal opcode..."); DevCon.Warning("microVU Warning: Block contains an illegal opcode...");
} }
} }
// Prints msg when exiting block early if 1st op was a bad opcode (Dawn of Mana Level 2) // Prints msg when exiting block early if 1st op was a bad opcode (Dawn of Mana Level 2)
// #ifdef PCSX2_DEVBUILD because starting with SVN R5586 we get log spam in releases (Shadow Hearts battles) // #ifdef PCSX2_DEVBUILD because starting with SVN R5586 we get log spam in releases (Shadow Hearts battles)
__fi void handleBadOp(mV, int count) { __fi void handleBadOp(mV, int count)
{
#ifdef PCSX2_DEVBUILD #ifdef PCSX2_DEVBUILD
if (mVUinfo.isBadOp) { if (mVUinfo.isBadOp)
{
mVUbackupRegs(mVU, true); mVUbackupRegs(mVU, true);
if (!isVU1) xFastCall(mVUbadOp0, mVU.prog.cur->idx, xPC); if (!isVU1) xFastCall(mVUbadOp0, mVU.prog.cur->idx, xPC);
else xFastCall(mVUbadOp1, mVU.prog.cur->idx, xPC); else xFastCall(mVUbadOp1, mVU.prog.cur->idx, xPC);
@ -210,35 +259,46 @@ __fi void handleBadOp(mV, int count) {
#endif #endif
} }
__ri void branchWarning(mV) { __ri void branchWarning(mV)
{
incPC(-2); incPC(-2);
if (mVUup.eBit && mVUbranch) { if (mVUup.eBit && mVUbranch)
{
incPC(2); incPC(2);
DevCon.Warning("microVU%d Warning: Branch in E-bit delay slot! [%04x]", mVU.index, xPC); DevCon.Warning("microVU%d Warning: Branch in E-bit delay slot! [%04x]", mVU.index, xPC);
mVUlow.isNOP = true; mVUlow.isNOP = true;
} }
else incPC(2); else
incPC(2);
if (mVUinfo.isBdelay && !mVUlow.evilBranch) { // Check if VI Reg Written to on Branch Delay Slot Instruction if (mVUinfo.isBdelay && !mVUlow.evilBranch) // Check if VI Reg Written to on Branch Delay Slot Instruction
if (mVUlow.VI_write.reg && mVUlow.VI_write.used && !mVUlow.readFlags) { {
if (mVUlow.VI_write.reg && mVUlow.VI_write.used && !mVUlow.readFlags)
{
mVUlow.backupVI = true; mVUlow.backupVI = true;
mVUregs.viBackUp = mVUlow.VI_write.reg; mVUregs.viBackUp = mVUlow.VI_write.reg;
} }
} }
} }
__fi void eBitPass1(mV, int& branch) { __fi void eBitPass1(mV, int& branch)
if (mVUregs.blockType != 1) { {
if (mVUregs.blockType != 1)
{
branch = 1; branch = 1;
mVUup.eBit = true; mVUup.eBit = true;
} }
} }
__ri void eBitWarning(mV) { __ri void eBitWarning(mV)
if (mVUpBlock->pState.blockType == 1) Console.Error("microVU%d Warning: Branch, E-bit, Branch! [%04x]", mVU.index, xPC); {
if (mVUpBlock->pState.blockType == 2) Console.Error("microVU%d Warning: Branch, Branch, Branch! [%04x]", mVU.index, xPC); if (mVUpBlock->pState.blockType == 1)
Console.Error("microVU%d Warning: Branch, E-bit, Branch! [%04x]", mVU.index, xPC);
if (mVUpBlock->pState.blockType == 2)
Console.Error("microVU%d Warning: Branch, Branch, Branch! [%04x]", mVU.index, xPC);
incPC(2); incPC(2);
if (curI & _Ebit_) { if (curI & _Ebit_)
{
DevCon.Warning("microVU%d: E-bit in Branch delay slot! [%04x]", mVU.index, xPC); DevCon.Warning("microVU%d: E-bit in Branch delay slot! [%04x]", mVU.index, xPC);
mVUregs.blockType = 1; mVUregs.blockType = 1;
} }
@ -258,14 +318,17 @@ __fi void incQ(mV) { mVU.q ^= 1; }
// If the cycles remaining is just '1', we don't have to transfer it to the next block // If the cycles remaining is just '1', we don't have to transfer it to the next block
// because mVU automatically decrements this number at the start of its loop, // because mVU automatically decrements this number at the start of its loop,
// so essentially '1' will be the same as '0'... // so essentially '1' will be the same as '0'...
void mVUoptimizePipeState(mV) { void mVUoptimizePipeState(mV)
for (int i = 0; i < 32; i++) { {
for (int i = 0; i < 32; i++)
{
optimizeReg(mVUregs.VF[i].x); optimizeReg(mVUregs.VF[i].x);
optimizeReg(mVUregs.VF[i].y); optimizeReg(mVUregs.VF[i].y);
optimizeReg(mVUregs.VF[i].z); optimizeReg(mVUregs.VF[i].z);
optimizeReg(mVUregs.VF[i].w); optimizeReg(mVUregs.VF[i].w);
} }
for (int i = 0; i < 16; i++) { for (int i = 0; i < 16; i++)
{
optimizeReg(mVUregs.VI[i]); optimizeReg(mVUregs.VI[i]);
} }
if (mVUregs.q) { optimizeReg(mVUregs.q); if (!mVUregs.q) { incQ(mVU); } } if (mVUregs.q) { optimizeReg(mVUregs.q); if (!mVUregs.q) { incQ(mVU); } }
@ -273,60 +336,90 @@ void mVUoptimizePipeState(mV) {
mVUregs.r = 0; // There are no stalls on the R-reg, so its Safe to discard info mVUregs.r = 0; // There are no stalls on the R-reg, so its Safe to discard info
} }
void mVUincCycles(mV, int x) { void mVUincCycles(mV, int x)
{
mVUcycles += x; mVUcycles += x;
// VF[0] is a constant value (0.0 0.0 0.0 1.0) // VF[0] is a constant value (0.0 0.0 0.0 1.0)
for (int z = 31; z > 0; z--) { for (int z = 31; z > 0; z--)
{
calcCycles(mVUregs.VF[z].x, x); calcCycles(mVUregs.VF[z].x, x);
calcCycles(mVUregs.VF[z].y, x); calcCycles(mVUregs.VF[z].y, x);
calcCycles(mVUregs.VF[z].z, x); calcCycles(mVUregs.VF[z].z, x);
calcCycles(mVUregs.VF[z].w, x); calcCycles(mVUregs.VF[z].w, x);
} }
// VI[0] is a constant value (0) // VI[0] is a constant value (0)
for (int z = 15; z > 0; z--) { for (int z = 15; z > 0; z--)
{
calcCycles(mVUregs.VI[z], x); calcCycles(mVUregs.VI[z], x);
} }
if (mVUregs.q) { if (mVUregs.q)
if (mVUregs.q > 4) { calcCycles(mVUregs.q, x); if (mVUregs.q <= 4) { mVUinfo.doDivFlag = 1; } } {
else { calcCycles(mVUregs.q, x); } if (mVUregs.q > 4)
if (!mVUregs.q) { incQ(mVU); } {
calcCycles(mVUregs.q, x);
if (mVUregs.q <= 4)
{
mVUinfo.doDivFlag = 1;
} }
if (mVUregs.p) { }
else
{
calcCycles(mVUregs.q, x);
}
if (!mVUregs.q)
incQ(mVU);
}
if (mVUregs.p)
{
calcCycles(mVUregs.p, x); calcCycles(mVUregs.p, x);
if (!mVUregs.p || mVUregsTemp.p) { incP(mVU); } if (!mVUregs.p || mVUregsTemp.p)
incP(mVU);
} }
if (mVUregs.xgkick) { if (mVUregs.xgkick)
{
calcCycles(mVUregs.xgkick, x); calcCycles(mVUregs.xgkick, x);
if (!mVUregs.xgkick) { mVUinfo.doXGKICK = 1; mVUinfo.XGKICKPC = xPC;} if (!mVUregs.xgkick)
{
mVUinfo.doXGKICK = 1;
mVUinfo.XGKICKPC = xPC;
}
} }
calcCycles(mVUregs.r, x); calcCycles(mVUregs.r, x);
} }
// Helps check if upper/lower ops read/write to same regs... // Helps check if upper/lower ops read/write to same regs...
void cmpVFregs(microVFreg& VFreg1, microVFreg& VFreg2, bool& xVar) { void cmpVFregs(microVFreg& VFreg1, microVFreg& VFreg2, bool& xVar)
if (VFreg1.reg == VFreg2.reg) { {
if (VFreg1.reg == VFreg2.reg)
{
if ((VFreg1.x && VFreg2.x) || (VFreg1.y && VFreg2.y) if ((VFreg1.x && VFreg2.x) || (VFreg1.y && VFreg2.y)
|| (VFreg1.z && VFreg2.z) || (VFreg1.w && VFreg2.w)) || (VFreg1.z && VFreg2.z) || (VFreg1.w && VFreg2.w))
{ xVar = 1; } {
xVar = 1;
}
} }
} }
void mVUsetCycles(mV) { void mVUsetCycles(mV)
{
mVUincCycles(mVU, mVUstall); mVUincCycles(mVU, mVUstall);
// If upper Op && lower Op write to same VF reg: // If upper Op && lower Op write to same VF reg:
if ((mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1]) && mVUregsTemp.VFreg[0]) { if ((mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1]) && mVUregsTemp.VFreg[0])
{
if (mVUregsTemp.r || mVUregsTemp.VI) if (mVUregsTemp.r || mVUregsTemp.VI)
mVUlow.noWriteVF = true; mVUlow.noWriteVF = true;
else else
mVUlow.isNOP = true; // If lower Op doesn't modify anything else, then make it a NOP mVUlow.isNOP = true; // If lower Op doesn't modify anything else, then make it a NOP
} }
// If lower op reads a VF reg that upper Op writes to: // If lower op reads a VF reg that upper Op writes to:
if ((mVUlow.VF_read[0].reg || mVUlow.VF_read[1].reg) && mVUup.VF_write.reg) { if ((mVUlow.VF_read[0].reg || mVUlow.VF_read[1].reg) && mVUup.VF_write.reg)
{
cmpVFregs(mVUup.VF_write, mVUlow.VF_read[0], mVUinfo.swapOps); cmpVFregs(mVUup.VF_write, mVUlow.VF_read[0], mVUinfo.swapOps);
cmpVFregs(mVUup.VF_write, mVUlow.VF_read[1], mVUinfo.swapOps); cmpVFregs(mVUup.VF_write, mVUlow.VF_read[1], mVUinfo.swapOps);
} }
// If above case is true, and upper op reads a VF reg that lower Op Writes to: // If above case is true, and upper op reads a VF reg that lower Op Writes to:
if (mVUinfo.swapOps && ((mVUup.VF_read[0].reg || mVUup.VF_read[1].reg) && mVUlow.VF_write.reg)) { if (mVUinfo.swapOps && ((mVUup.VF_read[0].reg || mVUup.VF_read[1].reg) && mVUlow.VF_write.reg))
{
cmpVFregs(mVUlow.VF_write, mVUup.VF_read[0], mVUinfo.backupVF); cmpVFregs(mVUlow.VF_write, mVUup.VF_read[0], mVUinfo.backupVF);
cmpVFregs(mVUlow.VF_write, mVUup.VF_read[1], mVUinfo.backupVF); cmpVFregs(mVUlow.VF_write, mVUup.VF_read[1], mVUinfo.backupVF);
} }
@ -349,8 +442,10 @@ void mVUsetCycles(mV) {
} }
// Prints Start/End PC of blocks executed, for debugging... // Prints Start/End PC of blocks executed, for debugging...
void mVUdebugPrintBlocks(microVU& mVU, bool isEndPC) { void mVUdebugPrintBlocks(microVU& mVU, bool isEndPC)
if (mVUdebugNow) { {
if (mVUdebugNow)
{
mVUbackupRegs(mVU, true); mVUbackupRegs(mVU, true);
if (isEndPC) xFastCall(mVUprintPC2, xPC); if (isEndPC) xFastCall(mVUprintPC2, xPC);
else xFastCall(mVUprintPC1, xPC); else xFastCall(mVUprintPC1, xPC);
@ -359,15 +454,18 @@ void mVUdebugPrintBlocks(microVU& mVU, bool isEndPC) {
} }
// Saves Pipeline State for resuming from early exits // Saves Pipeline State for resuming from early exits
__fi void mVUsavePipelineState(microVU& mVU) { __fi void mVUsavePipelineState(microVU& mVU)
{
u32* lpS = (u32*)&mVU.prog.lpState; u32* lpS = (u32*)&mVU.prog.lpState;
for(size_t i = 0; i < (sizeof(microRegInfo)-4)/4; i++, lpS++) { for (size_t i = 0; i < (sizeof(microRegInfo) - 4) / 4; i++, lpS++)
{
xMOV(ptr32[lpS], lpS[0]); xMOV(ptr32[lpS], lpS[0]);
} }
} }
// Test cycles to see if we need to exit-early... // Test cycles to see if we need to exit-early...
void mVUtestCycles(microVU& mVU, microFlagCycles& mFC) { void mVUtestCycles(microVU& mVU, microFlagCycles& mFC)
{
iPC = mVUstartPC; iPC = mVUstartPC;
xMOV(eax, ptr32[&mVU.cycles]); xMOV(eax, ptr32[&mVU.cycles]);
@ -392,17 +490,23 @@ void mVUtestCycles(microVU& mVU, microFlagCycles& mFC) {
//------------------------------------------------------------------ //------------------------------------------------------------------
// This gets run at the start of every loop of mVU's first pass // This gets run at the start of every loop of mVU's first pass
__fi void startLoop(mV) { __fi void startLoop(mV)
if (curI & _Mbit_ && isVU0) { DevCon.WriteLn (Color_Green, "microVU%d: M-bit set! PC = %x", getIndex, xPC); } {
if (curI & _Dbit_) { DevCon.WriteLn (Color_Green, "microVU%d: D-bit set! PC = %x", getIndex, xPC); } if (curI & _Mbit_ && isVU0)
if (curI & _Tbit_) { DevCon.WriteLn (Color_Green, "microVU%d: T-bit set! PC = %x", getIndex, xPC); } DevCon.WriteLn(Color_Green, "microVU%d: M-bit set! PC = %x", getIndex, xPC);
if (curI & _Dbit_)
DevCon.WriteLn(Color_Green, "microVU%d: D-bit set! PC = %x", getIndex, xPC);
if (curI & _Tbit_)
DevCon.WriteLn(Color_Green, "microVU%d: T-bit set! PC = %x", getIndex, xPC);
memzero(mVUinfo); memzero(mVUinfo);
memzero(mVUregsTemp); memzero(mVUregsTemp);
} }
// Initialize VI Constants (vi15 propagates through blocks) // Initialize VI Constants (vi15 propagates through blocks)
__fi void mVUinitConstValues(microVU& mVU) { __fi void mVUinitConstValues(microVU& mVU)
for (int i = 0; i < 16; i++) { {
for (int i = 0; i < 16; i++)
{
mVUconstReg[i].isValid = 0; mVUconstReg[i].isValid = 0;
mVUconstReg[i].regValue = 0; mVUconstReg[i].regValue = 0;
} }
@ -411,17 +515,20 @@ __fi void mVUinitConstValues(microVU& mVU) {
} }
// Initialize Variables // Initialize Variables
__fi void mVUinitFirstPass(microVU& mVU, uptr pState, u8* thisPtr) { __fi void mVUinitFirstPass(microVU& mVU, uptr pState, u8* thisPtr)
{
mVUstartPC = iPC; // Block Start PC mVUstartPC = iPC; // Block Start PC
mVUbranch = 0; // Branch Type mVUbranch = 0; // Branch Type
mVUcount = 0; // Number of instructions ran mVUcount = 0; // Number of instructions ran
mVUcycles = 0; // Skips "M" phase, and starts counting cycles at "T" stage mVUcycles = 0; // Skips "M" phase, and starts counting cycles at "T" stage
mVU.p = 0; // All blocks start at p index #0 mVU.p = 0; // All blocks start at p index #0
mVU.q = 0; // All blocks start at q index #0 mVU.q = 0; // All blocks start at q index #0
if ((uptr)&mVUregs != pState) { // Loads up Pipeline State Info if ((uptr)&mVUregs != pState) // Loads up Pipeline State Info
{
memcpy((u8*)&mVUregs, (u8*)pState, sizeof(microRegInfo)); memcpy((u8*)&mVUregs, (u8*)pState, sizeof(microRegInfo));
} }
if (((uptr)&mVU.prog.lpState != pState)) { if (((uptr)&mVU.prog.lpState != pState))
{
memcpy((u8*)&mVU.prog.lpState, (u8*)pState, sizeof(microRegInfo)); memcpy((u8*)&mVU.prog.lpState, (u8*)pState, sizeof(microRegInfo));
} }
mVUblock.x86ptrStart = thisPtr; mVUblock.x86ptrStart = thisPtr;
@ -442,7 +549,8 @@ __fi void mVUinitFirstPass(microVU& mVU, uptr pState, u8* thisPtr) {
//Unfortunately linking the reg manually and using the normal evil block method seems to suck at this :/ //Unfortunately linking the reg manually and using the normal evil block method seems to suck at this :/
//If this is removed, test Evil Dead: Fistful of Boomstick (hangs going ingame), Mark of Kri (collision detection) //If this is removed, test Evil Dead: Fistful of Boomstick (hangs going ingame), Mark of Kri (collision detection)
//and Tony Hawks Project 8 (graphics are half missing, requires Negative rounding when working) //and Tony Hawks Project 8 (graphics are half missing, requires Negative rounding when working)
void* mVUcompileSingleInstruction(microVU& mVU, u32 startPC, uptr pState, microFlagCycles& mFC) { void* mVUcompileSingleInstruction(microVU& mVU, u32 startPC, uptr pState, microFlagCycles& mFC)
{
u8* thisPtr = x86Ptr; u8* thisPtr = x86Ptr;
@ -456,12 +564,36 @@ void* mVUcompileSingleInstruction(microVU& mVU, u32 startPC, uptr pState, microF
mVUincCycles(mVU, 1); mVUincCycles(mVU, 1);
mVUopU(mVU, 0); mVUopU(mVU, 0);
mVUcheckBadOp(mVU); mVUcheckBadOp(mVU);
if (curI & _Ebit_) { eBitPass1(mVU, g_branch); DevCon.Warning("E Bit on single instruction");} if (curI & _Ebit_)
if (curI & _Dbit_) { mVUup.dBit = true; } {
if (curI & _Tbit_) { mVUup.tBit = true; } eBitPass1(mVU, g_branch);
if (curI & _Mbit_) { mVUup.mBit = true; DevCon.Warning("M Bit on single instruction");} DevCon.Warning("E Bit on single instruction");
if (curI & _Ibit_) { mVUlow.isNOP = true; mVUup.iBit = true; DevCon.Warning("I Bit on single instruction");} }
else { incPC(-1); mVUopL(mVU, 0); incPC(1); } if (curI & _Dbit_)
{
mVUup.dBit = true;
}
if (curI & _Tbit_)
{
mVUup.tBit = true;
}
if (curI & _Mbit_)
{
mVUup.mBit = true;
DevCon.Warning("M Bit on single instruction");
}
if (curI & _Ibit_)
{
mVUlow.isNOP = true;
mVUup.iBit = true;
DevCon.Warning("I Bit on single instruction");
}
else
{
incPC(-1);
mVUopL(mVU, 0);
incPC(1);
}
mVUsetCycles(mVU); mVUsetCycles(mVU);
mVUinfo.readQ = mVU.q; mVUinfo.readQ = mVU.q;
mVUinfo.writeQ = !mVU.q; mVUinfo.writeQ = !mVU.q;
@ -480,13 +612,15 @@ void* mVUcompileSingleInstruction(microVU& mVU, u32 startPC, uptr pState, microF
iPC = startPC / 4; iPC = startPC / 4;
setCode(); setCode();
if (mVUup.mBit) { if (mVUup.mBit)
{
xOR(ptr32[&mVU.regs().flags], VUFLAG_MFLAGSET); xOR(ptr32[&mVU.regs().flags], VUFLAG_MFLAGSET);
} }
mVUexecuteInstruction(mVU); mVUexecuteInstruction(mVU);
mVUincCycles(mVU, 1); //Just incase the is XGKick mVUincCycles(mVU, 1); //Just incase the is XGKick
if (mVUinfo.doXGKICK) { if (mVUinfo.doXGKICK)
{
mVU_XGKICK_DELAY(mVU); mVU_XGKICK_DELAY(mVU);
} }
@ -497,7 +631,8 @@ void mVUDoDBit(microVU& mVU, microFlagCycles* mFC)
{ {
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x400 : 0x4)); xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x400 : 0x4));
xForwardJump32 eJMP(Jcc_Zero); xForwardJump32 eJMP(Jcc_Zero);
if (!isVU1 || !THREAD_VU1) { if (!isVU1 || !THREAD_VU1)
{
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x200 : 0x2)); xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x200 : 0x2));
xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT);
} }
@ -511,7 +646,8 @@ void mVUDoTBit(microVU& mVU, microFlagCycles* mFC)
{ {
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x800 : 0x8)); xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x800 : 0x8));
xForwardJump32 eJMP(Jcc_Zero); xForwardJump32 eJMP(Jcc_Zero);
if (!isVU1 || !THREAD_VU1) { if (!isVU1 || !THREAD_VU1)
{
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x400 : 0x4)); xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x400 : 0x4));
xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT);
} }
@ -539,24 +675,29 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState)
mVU.regAlloc->reset(); // Reset regAlloc mVU.regAlloc->reset(); // Reset regAlloc
mVUinitFirstPass(mVU, pState, thisPtr); mVUinitFirstPass(mVU, pState, thisPtr);
mVUbranch = 0; mVUbranch = 0;
for (int branch = 0; mVUcount < endCount;) { for (int branch = 0; mVUcount < endCount;)
{
incPC(1); incPC(1);
startLoop(mVU); startLoop(mVU);
mVUincCycles(mVU, 1); mVUincCycles(mVU, 1);
mVUopU(mVU, 0); mVUopU(mVU, 0);
mVUcheckBadOp(mVU); mVUcheckBadOp(mVU);
if (curI & _Ebit_) { if (curI & _Ebit_)
{
eBitPass1(mVU, branch); eBitPass1(mVU, branch);
// VU0 end of program MAC results can be read by COP2, so best to make sure the last instance is valid // VU0 end of program MAC results can be read by COP2, so best to make sure the last instance is valid
// Needed for State of Emergency 2 and Driving Emotion Type-S // Needed for State of Emergency 2 and Driving Emotion Type-S
if(isVU0) mVUregs.needExactMatch |= 7; if (isVU0)
mVUregs.needExactMatch |= 7;
} }
if ((curI & _Mbit_) && isVU0) { if ((curI & _Mbit_) && isVU0)
{
if (xPC > 0) if (xPC > 0)
{ {
incPC(-2); incPC(-2);
if (!(curI & _Mbit_)) { //If the last instruction was also M-Bit we don't need to sync again if (!(curI & _Mbit_)) //If the last instruction was also M-Bit we don't need to sync again
{
incPC(2); incPC(2);
mVUup.mBit = true; mVUup.mBit = true;
} }
@ -567,24 +708,29 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState)
mVUup.mBit = true; mVUup.mBit = true;
} }
if (curI & _Ibit_) { if (curI & _Ibit_)
{
mVUlow.isNOP = true; mVUlow.isNOP = true;
mVUup.iBit = true; mVUup.iBit = true;
if (EmuConfig.Gamefixes.IbitHack) { if (EmuConfig.Gamefixes.IbitHack)
{
mVUsetupRange(mVU, xPC, false); mVUsetupRange(mVU, xPC, false);
if (branch < 2) if (branch < 2)
mVUsetupRange(mVU, xPC + 8, true); // Ideally we'd do +4 but the mmx compare only works in 64bits, this should be fine mVUsetupRange(mVU, xPC + 8, true); // Ideally we'd do +4 but the mmx compare only works in 64bits, this should be fine
} }
} }
else { else
{
incPC(-1); incPC(-1);
mVUopL(mVU, 0); mVUopL(mVU, 0);
incPC(1); incPC(1);
} }
if (curI & _Dbit_) { if (curI & _Dbit_)
{
mVUup.dBit = true; mVUup.dBit = true;
} }
if (curI & _Tbit_) { if (curI & _Tbit_)
{
mVUup.tBit = true; mVUup.tBit = true;
} }
mVUsetCycles(mVU); mVUsetCycles(mVU);
@ -594,21 +740,25 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState)
mVUinfo.writeP = !mVU.p && isVU1; mVUinfo.writeP = !mVU.p && isVU1;
mVUcount++; mVUcount++;
if (branch >= 2) { if (branch >= 2)
{
mVUinfo.isEOB = true; mVUinfo.isEOB = true;
if (branch == 3) { if (branch == 3)
{
mVUinfo.isBdelay = true; mVUinfo.isBdelay = true;
} }
branchWarning(mVU); branchWarning(mVU);
break; break;
} }
else if (branch == 1) { else if (branch == 1)
{
branch = 2; branch = 2;
} }
if (mVUbranch) { if (mVUbranch)
{
mVUsetFlagInfo(mVU); mVUsetFlagInfo(mVU);
eBitWarning(mVU); eBitWarning(mVU);
branch = 3; branch = 3;
@ -642,24 +792,30 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState)
mVUbranch = 0; mVUbranch = 0;
u32 x = 0; u32 x = 0;
for (; x < endCount; x++) { for (; x < endCount; x++)
if (mVUinfo.isEOB) { {
if (mVUinfo.isEOB)
{
handleBadOp(mVU, x); handleBadOp(mVU, x);
x = 0xffff; x = 0xffff;
} // handleBadOp currently just prints a warning } // handleBadOp currently just prints a warning
if (mVUup.mBit) { if (mVUup.mBit)
{
xOR(ptr32[&mVU.regs().flags], VUFLAG_MFLAGSET); xOR(ptr32[&mVU.regs().flags], VUFLAG_MFLAGSET);
} }
mVUexecuteInstruction(mVU); mVUexecuteInstruction(mVU);
if (!mVUinfo.isBdelay && !mVUlow.branch) //T/D Bit on branch is handled after the branch, branch delay slots are executed. if (!mVUinfo.isBdelay && !mVUlow.branch) //T/D Bit on branch is handled after the branch, branch delay slots are executed.
{ {
if (mVUup.tBit) { if (mVUup.tBit)
{
mVUDoTBit(mVU, &mFC); mVUDoTBit(mVU, &mFC);
} }
else if (mVUup.dBit && doDBitHandling) { else if (mVUup.dBit && doDBitHandling)
{
mVUDoDBit(mVU, &mFC); mVUDoDBit(mVU, &mFC);
} }
else if (mVUup.mBit && !mVUup.eBit && !mVUinfo.isEOB) { else if (mVUup.mBit && !mVUup.eBit && !mVUinfo.isEOB)
{
// Need to make sure the flags are exact, Gungrave does FCAND with Mbit, then directly after FMAND with M-bit // Need to make sure the flags are exact, Gungrave does FCAND with Mbit, then directly after FMAND with M-bit
// Also call setupBranch to sort flag instances // Also call setupBranch to sort flag instances
@ -667,7 +823,8 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState)
// Make sure we save the current state so it can come back to it // Make sure we save the current state so it can come back to it
u32* cpS = (u32*)&mVUregs; u32* cpS = (u32*)&mVUregs;
u32* lpS = (u32*)&mVU.prog.lpState; u32* lpS = (u32*)&mVU.prog.lpState;
for (size_t i = 0; i < (sizeof(microRegInfo) - 4) / 4; i++, lpS++, cpS++) { for (size_t i = 0; i < (sizeof(microRegInfo) - 4) / 4; i++, lpS++, cpS++)
{
xMOV(ptr32[lpS], cpS[0]); xMOV(ptr32[lpS], cpS[0]);
} }
incPC(2); incPC(2);
@ -679,16 +836,19 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState)
} }
} }
if (mVUinfo.doXGKICK) { if (mVUinfo.doXGKICK)
{
mVU_XGKICK_DELAY(mVU); mVU_XGKICK_DELAY(mVU);
} }
if (isEvilBlock) { if (isEvilBlock)
{
mVUsetupRange(mVU, xPC + 8, false); mVUsetupRange(mVU, xPC + 8, false);
normJumpCompile(mVU, mFC, true); normJumpCompile(mVU, mFC, true);
goto perf_and_return; goto perf_and_return;
} }
else if (!mVUinfo.isBdelay) { else if (!mVUinfo.isBdelay)
{
// Handle range wrapping // Handle range wrapping
if ((xPC + 8) == mVU.microMemSize) if ((xPC + 8) == mVU.microMemSize)
{ {
@ -697,13 +857,15 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState)
} }
incPC(1); incPC(1);
} }
else { else
{
incPC(1); incPC(1);
mVUsetupRange(mVU, xPC, false); mVUsetupRange(mVU, xPC, false);
mVUdebugPrintBlocks(mVU, true); mVUdebugPrintBlocks(mVU, true);
incPC(-4); // Go back to branch opcode incPC(-4); // Go back to branch opcode
switch (mVUlow.branch) { switch (mVUlow.branch)
{
case 1: // B/BAL case 1: // B/BAL
case 2: case 2:
normBranch(mVU, mFC); normBranch(mVU, mFC);
@ -733,7 +895,8 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState)
} }
} }
} }
if ((x == endCount) && (x != 1)) { if ((x == endCount) && (x != 1))
{
Console.Error("microVU%d: Possible infinite compiling loop!", mVU.index); Console.Error("microVU%d: Possible infinite compiling loop!", mVU.index);
} }
@ -749,14 +912,18 @@ perf_and_return:
} }
// Returns the entry point of the block (compiles it if not found) // Returns the entry point of the block (compiles it if not found)
__fi void* mVUentryGet(microVU& mVU, microBlockManager* block, u32 startPC, uptr pState) { __fi void* mVUentryGet(microVU& mVU, microBlockManager* block, u32 startPC, uptr pState)
{
microBlock* pBlock = block->search((microRegInfo*)pState); microBlock* pBlock = block->search((microRegInfo*)pState);
if (pBlock) return pBlock->x86ptrStart; if (pBlock)
else { return mVUcompile(mVU, startPC, pState);} return pBlock->x86ptrStart;
else
return mVUcompile(mVU, startPC, pState);
} }
// Search for Existing Compiled Block (if found, return x86ptr; else, compile and return x86ptr) // Search for Existing Compiled Block (if found, return x86ptr; else, compile and return x86ptr)
__fi void* mVUblockFetch(microVU& mVU, u32 startPC, uptr pState) { __fi void* mVUblockFetch(microVU& mVU, u32 startPC, uptr pState)
{
pxAssertDev((startPC & 7) == 0, pxsFmt("microVU%d: unaligned startPC=0x%04x", mVU.index, startPC)); pxAssertDev((startPC & 7) == 0, pxsFmt("microVU%d: unaligned startPC=0x%04x", mVU.index, startPC));
pxAssertDev(startPC <= mVU.microMemSize - 8, pxsFmt("microVU%d: invalid startPC=0x%04x", mVU.index, startPC)); pxAssertDev(startPC <= mVU.microMemSize - 8, pxsFmt("microVU%d: invalid startPC=0x%04x", mVU.index, startPC));
@ -767,13 +934,17 @@ __fi void* mVUblockFetch(microVU& mVU, u32 startPC, uptr pState) {
} }
// mVUcompileJIT() - Called By JR/JALR during execution // mVUcompileJIT() - Called By JR/JALR during execution
_mVUt void* __fastcall mVUcompileJIT(u32 startPC, uptr ptr) { _mVUt void* __fastcall mVUcompileJIT(u32 startPC, uptr ptr)
if (doJumpAsSameProgram) { // Treat jump as part of same microProgram {
if (doJumpCaching) { // When doJumpCaching, ptr is a microBlock pointer if (doJumpAsSameProgram) // Treat jump as part of same microProgram
{
if (doJumpCaching) // When doJumpCaching, ptr is a microBlock pointer
{
microVU& mVU = mVUx; microVU& mVU = mVUx;
microBlock* pBlock = (microBlock*)ptr; microBlock* pBlock = (microBlock*)ptr;
microJumpCache& jc = pBlock->jumpCache[startPC / 8]; microJumpCache& jc = pBlock->jumpCache[startPC / 8];
if (jc.prog && jc.prog == mVU.prog.quick[startPC / 8].prog) return jc.x86ptrStart; if (jc.prog && jc.prog == mVU.prog.quick[startPC / 8].prog)
return jc.x86ptrStart;
void* v = mVUblockFetch(mVUx, startPC, (uptr)&pBlock->pStateEnd); void* v = mVUblockFetch(mVUx, startPC, (uptr)&pBlock->pStateEnd);
jc.prog = mVU.prog.quick[startPC / 8].prog; jc.prog = mVU.prog.quick[startPC / 8].prog;
jc.x86ptrStart = v; jc.x86ptrStart = v;
@ -782,17 +953,20 @@ _mVUt void* __fastcall mVUcompileJIT(u32 startPC, uptr ptr) {
return mVUblockFetch(mVUx, startPC, ptr); return mVUblockFetch(mVUx, startPC, ptr);
} }
mVUx.regs().start_pc = startPC; mVUx.regs().start_pc = startPC;
if (doJumpCaching) { // When doJumpCaching, ptr is a microBlock pointer if (doJumpCaching) // When doJumpCaching, ptr is a microBlock pointer
{
microVU& mVU = mVUx; microVU& mVU = mVUx;
microBlock* pBlock = (microBlock*)ptr; microBlock* pBlock = (microBlock*)ptr;
microJumpCache& jc = pBlock->jumpCache[startPC / 8]; microJumpCache& jc = pBlock->jumpCache[startPC / 8];
if (jc.prog && jc.prog == mVU.prog.quick[startPC/8].prog) return jc.x86ptrStart; if (jc.prog && jc.prog == mVU.prog.quick[startPC / 8].prog)
return jc.x86ptrStart;
void* v = mVUsearchProg<vuIndex>(startPC, (uptr)&pBlock->pStateEnd); void* v = mVUsearchProg<vuIndex>(startPC, (uptr)&pBlock->pStateEnd);
jc.prog = mVU.prog.quick[startPC / 8].prog; jc.prog = mVU.prog.quick[startPC / 8].prog;
jc.x86ptrStart = v; jc.x86ptrStart = v;
return v; return v;
} }
else { // When !doJumpCaching, pBlock param is really a microRegInfo pointer else // When !doJumpCaching, pBlock param is really a microRegInfo pointer
{
return mVUsearchProg<vuIndex>(startPC, ptr); // Find and set correct program return mVUsearchProg<vuIndex>(startPC, ptr); // Find and set correct program
} }
} }

View File

@ -20,15 +20,16 @@
//------------------------------------------------------------------ //------------------------------------------------------------------
// Generates the code for entering/exit recompiled blocks // Generates the code for entering/exit recompiled blocks
void mVUdispatcherAB(mV) { void mVUdispatcherAB(mV)
{
mVU.startFunct = x86Ptr; mVU.startFunct = x86Ptr;
{ {
xScopedStackFrame frame(false, true); xScopedStackFrame frame(false, true);
// __fastcall = The caller has already put the needed parameters in ecx/edx: // __fastcall = The caller has already put the needed parameters in ecx/edx:
if (!isVU1) { xFastCall((void*)mVUexecuteVU0, arg1reg, arg2reg); } if (!isVU1) xFastCall((void*)mVUexecuteVU0, arg1reg, arg2reg);
else { xFastCall((void*)mVUexecuteVU1, arg1reg, arg2reg); } else xFastCall((void*)mVUexecuteVU1, arg1reg, arg2reg);
// Load VU's MXCSR state // Load VU's MXCSR state
xLDMXCSR(g_sseVUMXCSR); xLDMXCSR(g_sseVUMXCSR);
@ -74,8 +75,8 @@ void mVUdispatcherAB(mV) {
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers;
// all other arguments are passed right to left. // all other arguments are passed right to left.
if (!isVU1) { xFastCall((void*)mVUcleanUpVU0); } if (!isVU1) xFastCall((void*)mVUcleanUpVU0);
else { xFastCall((void*)mVUcleanUpVU1); } else xFastCall((void*)mVUcleanUpVU1);
} }
xRET(); xRET();
@ -85,7 +86,8 @@ void mVUdispatcherAB(mV) {
} }
// Generates the code for resuming/exit xgkick // Generates the code for resuming/exit xgkick
void mVUdispatcherCD(mV) { void mVUdispatcherCD(mV)
{
mVU.startFunctXG = x86Ptr; mVU.startFunctXG = x86Ptr;
{ {
@ -116,7 +118,6 @@ void mVUdispatcherCD(mV) {
// Load EE's MXCSR state // Load EE's MXCSR state
xLDMXCSR(g_sseMXCSR); xLDMXCSR(g_sseMXCSR);
} }
xRET(); xRET();
@ -130,11 +131,13 @@ void mVUdispatcherCD(mV) {
//------------------------------------------------------------------ //------------------------------------------------------------------
// Executes for number of cycles // Executes for number of cycles
_mVUt void* __fastcall mVUexecute(u32 startPC, u32 cycles) { _mVUt void* __fastcall mVUexecute(u32 startPC, u32 cycles)
{
microVU& mVU = mVUx; microVU& mVU = mVUx;
u32 vuLimit = vuIndex ? 0x3ff8 : 0xff8; u32 vuLimit = vuIndex ? 0x3ff8 : 0xff8;
if (startPC > vuLimit + 7) { if (startPC > vuLimit + 7)
{
DevCon.Warning("microVU%x Warning: startPC = 0x%x, cycles = 0x%x", vuIndex, startPC, cycles); DevCon.Warning("microVU%x Warning: startPC = 0x%x, cycles = 0x%x", vuIndex, startPC, cycles);
} }
@ -149,7 +152,8 @@ _mVUt void* __fastcall mVUexecute(u32 startPC, u32 cycles) {
// Cleanup Functions // Cleanup Functions
//------------------------------------------------------------------ //------------------------------------------------------------------
_mVUt void mVUcleanUp() { _mVUt void mVUcleanUp()
{
microVU& mVU = mVUx; microVU& mVU = mVUx;
//mVUprint("microVU: Program exited successfully!"); //mVUprint("microVU: Program exited successfully!");
//mVUprint("microVU: VF0 = {%x,%x,%x,%x}", mVU.regs().VF[0].UL[0], mVU.regs().VF[0].UL[1], mVU.regs().VF[0].UL[2], mVU.regs().VF[0].UL[3]); //mVUprint("microVU: VF0 = {%x,%x,%x,%x}", mVU.regs().VF[0].UL[0], mVU.regs().VF[0].UL[1], mVU.regs().VF[0].UL[2], mVU.regs().VF[0].UL[3]);
@ -157,7 +161,8 @@ _mVUt void mVUcleanUp() {
mVU.prog.x86ptr = x86Ptr; mVU.prog.x86ptr = x86Ptr;
if ((xGetPtr() < mVU.prog.x86start) || (xGetPtr() >= mVU.prog.x86end)) { if ((xGetPtr() < mVU.prog.x86start) || (xGetPtr() >= mVU.prog.x86end))
{
Console.WriteLn(vuIndex ? Color_Orange : Color_Magenta, "microVU%d: Program cache limit reached.", mVU.index); Console.WriteLn(vuIndex ? Color_Orange : Color_Magenta, "microVU%d: Program cache limit reached.", mVU.index);
mVUreset(mVU, false); mVUreset(mVU, false);
} }
@ -165,9 +170,11 @@ _mVUt void mVUcleanUp() {
mVU.cycles = mVU.totalCycles - mVU.cycles; mVU.cycles = mVU.totalCycles - mVU.cycles;
mVU.regs().cycle += mVU.cycles; mVU.regs().cycle += mVU.cycles;
if (!vuIndex || !THREAD_VU1) { if (!vuIndex || !THREAD_VU1)
{
u32 cycles_passed = std::min(mVU.cycles, 3000u) * EmuConfig.Speedhacks.EECycleSkip; u32 cycles_passed = std::min(mVU.cycles, 3000u) * EmuConfig.Speedhacks.EECycleSkip;
if (cycles_passed > 0) { if (cycles_passed > 0)
{
s32 vu0_offset = VU0.cycle - cpuRegs.cycle; s32 vu0_offset = VU0.cycle - cpuRegs.cycle;
cpuRegs.cycle += cycles_passed; cpuRegs.cycle += cycles_passed;

View File

@ -16,9 +16,12 @@
#pragma once #pragma once
// Sets FDIV Flags at the proper time // Sets FDIV Flags at the proper time
__fi void mVUdivSet(mV) { __fi void mVUdivSet(mV)
if (mVUinfo.doDivFlag) { {
if (!sFLAG.doFlag) { xMOV(getFlagReg(sFLAG.write), getFlagReg(sFLAG.lastWrite)); } if (mVUinfo.doDivFlag)
{
if (!sFLAG.doFlag)
xMOV(getFlagReg(sFLAG.write), getFlagReg(sFLAG.lastWrite));
xAND(getFlagReg(sFLAG.write), 0xfff3ffff); xAND(getFlagReg(sFLAG.write), 0xfff3ffff);
xOR(getFlagReg(sFLAG.write), ptr32[&mVU.divFlag]); xOR(getFlagReg(sFLAG.write), ptr32[&mVU.divFlag]);
} }
@ -26,29 +29,37 @@ __fi void mVUdivSet(mV) {
// Optimizes out unneeded status flag updates // Optimizes out unneeded status flag updates
// This can safely be done when there is an FSSET opcode // This can safely be done when there is an FSSET opcode
__fi void mVUstatusFlagOp(mV) { __fi void mVUstatusFlagOp(mV)
{
int curPC = iPC; int curPC = iPC;
int i = mVUcount; int i = mVUcount;
bool runLoop = true; bool runLoop = true;
if (sFLAG.doFlag) { if (sFLAG.doFlag)
{
sFLAG.doNonSticky = true; sFLAG.doNonSticky = true;
} }
else { else
for (; i > 0; i--) { {
for (; i > 0; i--)
{
incPC2(-2); incPC2(-2);
if (sFLAG.doNonSticky) { if (sFLAG.doNonSticky)
{
runLoop = false; runLoop = false;
break; break;
} }
else if (sFLAG.doFlag) { else if (sFLAG.doFlag)
{
sFLAG.doNonSticky = true; sFLAG.doNonSticky = true;
break; break;
} }
} }
} }
if (runLoop) { if (runLoop)
for (; i > 0; i--) { {
for (; i > 0; i--)
{
incPC2(-2); incPC2(-2);
if (sFLAG.doNonSticky) if (sFLAG.doNonSticky)
@ -61,32 +72,41 @@ __fi void mVUstatusFlagOp(mV) {
DevCon.WriteLn(Color_Green, "microVU%d: FSSET Optimization", getIndex); DevCon.WriteLn(Color_Green, "microVU%d: FSSET Optimization", getIndex);
} }
int findFlagInst(int* fFlag, int cycles) { int findFlagInst(int* fFlag, int cycles)
{
int j = 0, jValue = -1; int j = 0, jValue = -1;
for(int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++)
if ((fFlag[i] <= cycles) && (fFlag[i] > jValue)) { {
j = i; jValue = fFlag[i]; if ((fFlag[i] <= cycles) && (fFlag[i] > jValue))
{
j = i;
jValue = fFlag[i];
} }
} }
return j; return j;
} }
// Setup Last 4 instances of Status/Mac/Clip flags (needed for accurate block linking) // Setup Last 4 instances of Status/Mac/Clip flags (needed for accurate block linking)
int sortFlag(int* fFlag, int* bFlag, int cycles) { int sortFlag(int* fFlag, int* bFlag, int cycles)
{
int lFlag = -5; int lFlag = -5;
int x = 0; int x = 0;
for(int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++)
{
bFlag[i] = findFlagInst(fFlag, cycles); bFlag[i] = findFlagInst(fFlag, cycles);
if (lFlag != bFlag[i]) { x++; } if (lFlag != bFlag[i])
x++;
lFlag = bFlag[i]; lFlag = bFlag[i];
cycles++; cycles++;
} }
return x; // Returns the number of Valid Flag Instances return x; // Returns the number of Valid Flag Instances
} }
void sortFullFlag(int* fFlag, int* bFlag) { void sortFullFlag(int* fFlag, int* bFlag)
{
int m = std::max(std::max(fFlag[0], fFlag[1]), std::max(fFlag[2], fFlag[3])); int m = std::max(std::max(fFlag[0], fFlag[1]), std::max(fFlag[2], fFlag[3]));
for(int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++)
{
int t = 3 - (m - fFlag[i]); int t = 3 - (m - fFlag[i]);
bFlag[i] = (t < 0) ? 0 : t + 1; bFlag[i] = (t < 0) ? 0 : t + 1;
} }
@ -96,26 +116,32 @@ void sortFullFlag(int* fFlag, int* bFlag) {
#define sHackCond (mVUsFlagHack && !sFLAG.doNonSticky) #define sHackCond (mVUsFlagHack && !sFLAG.doNonSticky)
// Note: Flag handling is 'very' complex, it requires full knowledge of how microVU recs work, so don't touch! // Note: Flag handling is 'very' complex, it requires full knowledge of how microVU recs work, so don't touch!
__fi void mVUsetFlags(mV, microFlagCycles& mFC) { __fi void mVUsetFlags(mV, microFlagCycles& mFC)
{
int endPC = iPC; int endPC = iPC;
u32 aCount = 0; // Amount of instructions needed to get valid mac flag instances for block linking u32 aCount = 0; // Amount of instructions needed to get valid mac flag instances for block linking
//bool writeProtect = false; //bool writeProtect = false;
// Ensure last ~4+ instructions update mac/status flags (if next block's first 4 instructions will read them) // Ensure last ~4+ instructions update mac/status flags (if next block's first 4 instructions will read them)
for(int i = mVUcount; i > 0; i--, aCount++) { for (int i = mVUcount; i > 0; i--, aCount++)
if (sFLAG.doFlag) { {
if (sFLAG.doFlag)
{
if (__Mac) { if (__Mac)
{
mFLAG.doFlag = true; mFLAG.doFlag = true;
//writeProtect = true; //writeProtect = true;
} }
if (__Status) { if (__Status)
{
sFLAG.doNonSticky = true; sFLAG.doNonSticky = true;
//writeProtect = true; //writeProtect = true;
} }
if (aCount >= 3){ if (aCount >= 3)
{
break; break;
} }
} }
@ -125,42 +151,57 @@ __fi void mVUsetFlags(mV, microFlagCycles& mFC) {
// Status/Mac Flags Setup Code // Status/Mac Flags Setup Code
int xS = 0, xM = 0, xC = 0; int xS = 0, xM = 0, xC = 0;
for(int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++)
{
mFC.xStatus[i] = i; mFC.xStatus[i] = i;
mFC.xMac [i] = i; mFC.xMac [i] = i;
mFC.xClip [i] = i; mFC.xClip [i] = i;
} }
if(!(mVUpBlock->pState.needExactMatch & 1)) { if (!(mVUpBlock->pState.needExactMatch & 1))
{
xS = (mVUpBlock->pState.flagInfo >> 2) & 3; xS = (mVUpBlock->pState.flagInfo >> 2) & 3;
mFC.xStatus[0] = -1; mFC.xStatus[1] = -1; mFC.xStatus[0] = -1;
mFC.xStatus[2] = -1; mFC.xStatus[3] = -1; mFC.xStatus[1] = -1;
mFC.xStatus[2] = -1;
mFC.xStatus[3] = -1;
mFC.xStatus[(xS - 1) & 3] = 0; mFC.xStatus[(xS - 1) & 3] = 0;
} }
if(!(mVUpBlock->pState.needExactMatch & 2)) { if (!(mVUpBlock->pState.needExactMatch & 2))
{
//xM = (mVUpBlock->pState.flagInfo >> 4) & 3; //xM = (mVUpBlock->pState.flagInfo >> 4) & 3;
mFC.xMac[0] = -1; mFC.xMac[1] = -1; mFC.xMac[0] = -1;
mFC.xMac[2] = -1; mFC.xMac[3] = -1; mFC.xMac[1] = -1;
mFC.xMac[2] = -1;
mFC.xMac[3] = -1;
//mFC.xMac[(xM-1)&3] = 0; //mFC.xMac[(xM-1)&3] = 0;
} }
if(!(mVUpBlock->pState.needExactMatch & 4)) { if (!(mVUpBlock->pState.needExactMatch & 4))
{
xC = (mVUpBlock->pState.flagInfo >> 6) & 3; xC = (mVUpBlock->pState.flagInfo >> 6) & 3;
mFC.xClip[0] = -1; mFC.xClip[1] = -1; mFC.xClip[0] = -1;
mFC.xClip[2] = -1; mFC.xClip[3] = -1; mFC.xClip[1] = -1;
mFC.xClip[2] = -1;
mFC.xClip[3] = -1;
mFC.xClip[(xC - 1) & 3] = 0; mFC.xClip[(xC - 1) & 3] = 0;
} }
mFC.cycles = 0; mFC.cycles = 0;
u32 xCount = mVUcount; // Backup count u32 xCount = mVUcount; // Backup count
iPC = mVUstartPC; iPC = mVUstartPC;
for(mVUcount = 0; mVUcount < xCount; mVUcount++) { for (mVUcount = 0; mVUcount < xCount; mVUcount++)
if (mVUlow.isFSSET && !noFlagOpts) { {
if (__Status) { // Don't Optimize out on the last ~4+ instructions if (mVUlow.isFSSET && !noFlagOpts)
if ((xCount - mVUcount) > aCount) { mVUstatusFlagOp(mVU); } {
if (__Status) // Don't Optimize out on the last ~4+ instructions
{
if ((xCount - mVUcount) > aCount)
mVUstatusFlagOp(mVU);
} }
else mVUstatusFlagOp(mVU); else
mVUstatusFlagOp(mVU);
} }
mFC.cycles += mVUstall; mFC.cycles += mVUstall;
@ -176,28 +217,34 @@ __fi void mVUsetFlags(mV, microFlagCycles& mFC) {
mFLAG.lastWrite = doMFlagInsts ? (xM - 1) & 3 : 0; mFLAG.lastWrite = doMFlagInsts ? (xM - 1) & 3 : 0;
cFLAG.lastWrite = doCFlagInsts ? (xC - 1) & 3 : 0; cFLAG.lastWrite = doCFlagInsts ? (xC - 1) & 3 : 0;
if (sHackCond) { if (sHackCond)
{
sFLAG.doFlag = false; sFLAG.doFlag = false;
} }
if (sFLAG.doFlag) { if (sFLAG.doFlag)
if(noFlagOpts) { {
if (noFlagOpts)
{
sFLAG.doNonSticky = true; sFLAG.doNonSticky = true;
mFLAG.doFlag = true; mFLAG.doFlag = true;
} }
} }
if (sFlagCond) { if (sFlagCond)
{
mFC.xStatus[xS] = mFC.cycles + 4; mFC.xStatus[xS] = mFC.cycles + 4;
xS = (xS + 1) & 3; xS = (xS + 1) & 3;
} }
if (mFLAG.doFlag) { if (mFLAG.doFlag)
{
mFC.xMac[xM] = mFC.cycles + 4; mFC.xMac[xM] = mFC.cycles + 4;
xM = (xM + 1) & 3; xM = (xM + 1) & 3;
} }
if (cFLAG.doFlag) { if (cFLAG.doFlag)
{
mFC.xClip[xC] = mFC.cycles + 4; mFC.xClip[xC] = mFC.cycles + 4;
xC = (xC + 1) & 3; xC = (xC + 1) & 3;
} }
@ -219,36 +266,45 @@ __fi void mVUsetFlags(mV, microFlagCycles& mFC) {
#define shuffleClip ((bClip[3] << 6) | (bClip[2] << 4) | (bClip[1] << 2) | bClip[0]) #define shuffleClip ((bClip[3] << 6) | (bClip[2] << 4) | (bClip[1] << 2) | bClip[0])
// Recompiles Code for Proper Flags on Block Linkings // Recompiles Code for Proper Flags on Block Linkings
__fi void mVUsetupFlags(mV, microFlagCycles& mFC) { __fi void mVUsetupFlags(mV, microFlagCycles& mFC)
{
if (mVUregs.flagInfo & 1) { if (mVUregs.flagInfo & 1)
if (mVUregs.needExactMatch) DevCon.Error("mVU ERROR!!!"); {
if (mVUregs.needExactMatch)
DevCon.Error("mVU ERROR!!!");
} }
const bool pf = false; // Print Flag Info const bool pf = false; // Print Flag Info
if (pf) DevCon.WriteLn("mVU%d - [#%d][sPC=%04x][bPC=%04x][mVUBranch=%d][branch=%d]", if (pf)
DevCon.WriteLn("mVU%d - [#%d][sPC=%04x][bPC=%04x][mVUBranch=%d][branch=%d]",
mVU.index, mVU.prog.cur->idx, mVUstartPC / 2 * 8, xPC, mVUbranch, mVUlow.branch); mVU.index, mVU.prog.cur->idx, mVUstartPC / 2 * 8, xPC, mVUbranch, mVUlow.branch);
if (doSFlagInsts && __Status) { if (doSFlagInsts && __Status)
if (pf) DevCon.WriteLn("mVU%d - Status Flag", mVU.index); {
if (pf)
DevCon.WriteLn("mVU%d - Status Flag", mVU.index);
int bStatus[4]; int bStatus[4];
int sortRegs = sortFlag(mFC.xStatus, bStatus, mFC.cycles); int sortRegs = sortFlag(mFC.xStatus, bStatus, mFC.cycles);
// DevCon::Status("sortRegs = %d", params sortRegs); // DevCon::Status("sortRegs = %d", params sortRegs);
// Note: Emitter will optimize out mov(reg1, reg1) cases... // Note: Emitter will optimize out mov(reg1, reg1) cases...
if (sortRegs == 1) { if (sortRegs == 1)
{
xMOV(gprF0, getFlagReg(bStatus[0])); xMOV(gprF0, getFlagReg(bStatus[0]));
xMOV(gprF1, getFlagReg(bStatus[1])); xMOV(gprF1, getFlagReg(bStatus[1]));
xMOV(gprF2, getFlagReg(bStatus[2])); xMOV(gprF2, getFlagReg(bStatus[2]));
xMOV(gprF3, getFlagReg(bStatus[3])); xMOV(gprF3, getFlagReg(bStatus[3]));
} }
else if (sortRegs == 2) { else if (sortRegs == 2)
{
xMOV(gprT1, getFlagReg (bStatus[3])); xMOV(gprT1, getFlagReg (bStatus[3]));
xMOV(gprF0, getFlagReg (bStatus[0])); xMOV(gprF0, getFlagReg (bStatus[0]));
xMOV(gprF1, getFlagReg2(bStatus[1])); xMOV(gprF1, getFlagReg2(bStatus[1]));
xMOV(gprF2, getFlagReg2(bStatus[2])); xMOV(gprF2, getFlagReg2(bStatus[2]));
xMOV(gprF3, gprT1); xMOV(gprF3, gprT1);
} }
else if (sortRegs == 3) { else if (sortRegs == 3)
{
int gFlag = (bStatus[0] == bStatus[1]) ? bStatus[2] : bStatus[1]; int gFlag = (bStatus[0] == bStatus[1]) ? bStatus[2] : bStatus[1];
xMOV(gprT1, getFlagReg (gFlag)); xMOV(gprT1, getFlagReg (gFlag));
xMOV(gprT2, getFlagReg (bStatus[3])); xMOV(gprT2, getFlagReg (bStatus[3]));
@ -257,7 +313,8 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC) {
xMOV(gprF2, getFlagReg4(bStatus[2])); xMOV(gprF2, getFlagReg4(bStatus[2]));
xMOV(gprF3, gprT2); xMOV(gprF3, gprT2);
} }
else { else
{
xMOV(gprT1, getFlagReg(bStatus[0])); xMOV(gprT1, getFlagReg(bStatus[0]));
xMOV(gprT2, getFlagReg(bStatus[1])); xMOV(gprT2, getFlagReg(bStatus[1]));
xMOV(gprT3, getFlagReg(bStatus[2])); xMOV(gprT3, getFlagReg(bStatus[2]));
@ -268,8 +325,10 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC) {
} }
} }
if (doMFlagInsts && __Mac) { if (doMFlagInsts && __Mac)
if (pf) DevCon.WriteLn("mVU%d - Mac Flag", mVU.index); {
if (pf)
DevCon.WriteLn("mVU%d - Mac Flag", mVU.index);
int bMac[4]; int bMac[4];
sortFlag(mFC.xMac, bMac, mFC.cycles); sortFlag(mFC.xMac, bMac, mFC.cycles);
xMOVAPS(xmmT1, ptr128[mVU.macFlag]); xMOVAPS(xmmT1, ptr128[mVU.macFlag]);
@ -277,8 +336,10 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC) {
xMOVAPS(ptr128[mVU.macFlag], xmmT1); xMOVAPS(ptr128[mVU.macFlag], xmmT1);
} }
if (doCFlagInsts && __Clip) { if (doCFlagInsts && __Clip)
if (pf) DevCon.WriteLn("mVU%d - Clip Flag", mVU.index); {
if (pf)
DevCon.WriteLn("mVU%d - Clip Flag", mVU.index);
int bClip[4]; int bClip[4];
sortFlag(mFC.xClip, bClip, mFC.cycles); sortFlag(mFC.xClip, bClip, mFC.cycles);
xMOVAPS(xmmT2, ptr128[mVU.clipFlag]); xMOVAPS(xmmT2, ptr128[mVU.clipFlag]);
@ -287,26 +348,33 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC) {
} }
} }
#define shortBranch() { \ #define shortBranch() \
if ((branch == 3) || (branch == 4)) { /*Branches*/ \ { \
if ((branch == 3) || (branch == 4)) /*Branches*/ \
{ \
_mVUflagPass(mVU, aBranchAddr, sCount + found, found, v); \ _mVUflagPass(mVU, aBranchAddr, sCount + found, found, v); \
if (branch == 3) break; /*Non-conditional Branch*/ \ if (branch == 3) /*Non-conditional Branch*/ \
break; \
branch = 0; \ branch = 0; \
} \ } \
else if (branch == 5) { /*JR/JARL*/ \ else if (branch == 5) /*JR/JARL*/ \
if(sCount+found<4) { \ { \
if (sCount + found < 4) \
mVUregs.needExactMatch |= 7; \ mVUregs.needExactMatch |= 7; \
} \
break; \ break; \
} \ } \
else break; /*E-Bit End*/ \ else /*E-Bit End*/ \
break; \
} }
// Scan through instructions and check if flags are read (FSxxx, FMxxx, FCxxx opcodes) // Scan through instructions and check if flags are read (FSxxx, FMxxx, FCxxx opcodes)
void _mVUflagPass(mV, u32 startPC, u32 sCount, u32 found, std::vector<u32>& v) { void _mVUflagPass(mV, u32 startPC, u32 sCount, u32 found, std::vector<u32>& v)
{
for (u32 i = 0; i < v.size(); i++) { for (u32 i = 0; i < v.size(); i++)
if (v[i] == startPC) return; // Prevent infinite recursion {
if (v[i] == startPC)
return; // Prevent infinite recursion
} }
v.push_back(startPC); v.push_back(startPC);
@ -315,24 +383,53 @@ void _mVUflagPass(mV, u32 startPC, u32 sCount, u32 found, std::vector<u32>& v) {
int aBranchAddr = 0; int aBranchAddr = 0;
iPC = startPC / 4; iPC = startPC / 4;
mVUbranch = 0; mVUbranch = 0;
for(int branch = 0; sCount < 4; sCount += found) { for (int branch = 0; sCount < 4; sCount += found)
{
mVUregs.needExactMatch &= 7; mVUregs.needExactMatch &= 7;
incPC(1); incPC(1);
mVUopU(mVU, 3); mVUopU(mVU, 3);
found |= (mVUregs.needExactMatch & 8) >> 3; found |= (mVUregs.needExactMatch & 8) >> 3;
mVUregs.needExactMatch &= 7; mVUregs.needExactMatch &= 7;
if ( curI & _Ebit_ ) { branch = 1; } if (curI & _Ebit_)
if ( curI & _Tbit_ ) { branch = 6; } {
if ( (curI & _Dbit_) && doDBitHandling ) { branch = 6; } branch = 1;
if (!(curI & _Ibit_) ) { incPC(-1); mVUopL(mVU, 3); incPC(1); } }
if (curI & _Tbit_)
{
branch = 6;
}
if ((curI & _Dbit_) && doDBitHandling)
{
branch = 6;
}
if (!(curI & _Ibit_))
{
incPC(-1);
mVUopL(mVU, 3);
incPC(1);
}
// if (mVUbranch&&(branch>=3)&&(branch<=5)) { DevCon.Error("Double Branch [%x]", xPC); mVUregs.needExactMatch |= 7; break; } // if (mVUbranch&&(branch>=3)&&(branch<=5)) { DevCon.Error("Double Branch [%x]", xPC); mVUregs.needExactMatch |= 7; break; }
if (branch >= 2) { shortBranch(); } if (branch >= 2)
else if (branch == 1) { branch = 2; } {
if (mVUbranch) { branch = ((mVUbranch>8)?(5):((mVUbranch<3)?3:4)); incPC(-1); aBranchAddr = branchAddr(mVU); incPC(1); mVUbranch = 0; } shortBranch();
}
else if (branch == 1)
{
branch = 2;
}
if (mVUbranch)
{
branch = ((mVUbranch > 8) ? (5) : ((mVUbranch < 3) ? 3 : 4));
incPC(-1);
aBranchAddr = branchAddr(mVU);
incPC(1); incPC(1);
if ((mVUregs.needExactMatch&7)==7) break; mVUbranch = 0;
}
incPC(1);
if ((mVUregs.needExactMatch & 7) == 7)
break;
} }
iPC = oldPC; iPC = oldPC;
mVUbranch = oldBranch; mVUbranch = oldBranch;
@ -340,26 +437,31 @@ void _mVUflagPass(mV, u32 startPC, u32 sCount, u32 found, std::vector<u32>& v) {
setCode(); setCode();
} }
void mVUflagPass(mV, u32 startPC, u32 sCount = 0, u32 found = 0) { void mVUflagPass(mV, u32 startPC, u32 sCount = 0, u32 found = 0)
{
std::vector<u32> v; std::vector<u32> v;
_mVUflagPass(mVU, startPC, sCount, found, v); _mVUflagPass(mVU, startPC, sCount, found, v);
} }
// Checks if the first ~4 instructions of a block will read flags // Checks if the first ~4 instructions of a block will read flags
void mVUsetFlagInfo(mV) { void mVUsetFlagInfo(mV)
if (noFlagOpts) { {
if (noFlagOpts)
{
mVUregs.needExactMatch = 0x7; mVUregs.needExactMatch = 0x7;
mVUregs.flagInfo = 0x0; mVUregs.flagInfo = 0x0;
return; return;
} }
if (mVUbranch <= 2) { // B/BAL if (mVUbranch <= 2) // B/BAL
{
incPC(-1); incPC(-1);
mVUflagPass(mVU, branchAddr(mVU)); mVUflagPass(mVU, branchAddr(mVU));
incPC(1); incPC(1);
mVUregs.needExactMatch &= 0x7; mVUregs.needExactMatch &= 0x7;
} }
else if (mVUbranch <= 8) { // Conditional Branch else if (mVUbranch <= 8) // Conditional Branch
{
incPC(-1); // Branch Taken incPC(-1); // Branch Taken
mVUflagPass(mVU, branchAddr(mVU)); mVUflagPass(mVU, branchAddr(mVU));
int backupFlagInfo = mVUregs.needExactMatch; int backupFlagInfo = mVUregs.needExactMatch;
@ -372,9 +474,16 @@ void mVUsetFlagInfo(mV) {
mVUregs.needExactMatch |= backupFlagInfo; mVUregs.needExactMatch |= backupFlagInfo;
mVUregs.needExactMatch &= 0x7; mVUregs.needExactMatch &= 0x7;
} }
else { // JR/JALR else // JR/JALR
if (!doConstProp || !mVUlow.constJump.isValid) { mVUregs.needExactMatch |= 0x7; } {
else { mVUflagPass(mVU, (mVUlow.constJump.regValue*8)&(mVU.microMemSize-8)); } if (!doConstProp || !mVUlow.constJump.isValid)
{
mVUregs.needExactMatch |= 0x7;
}
else
{
mVUflagPass(mVU, (mVUlow.constJump.regValue * 8) & (mVU.microMemSize - 8));
}
mVUregs.needExactMatch &= 0x7; mVUregs.needExactMatch &= 0x7;
} }
} }

View File

@ -15,9 +15,11 @@
#pragma once #pragma once
union regInfo { union regInfo
{
u32 reg; u32 reg;
struct { struct
{
u8 x; u8 x;
u8 y; u8 y;
u8 z; u8 z;
@ -31,10 +33,14 @@ union regInfo {
// vi15 is only used if microVU const-prop is enabled (it is *not* by default). When constprop // vi15 is only used if microVU const-prop is enabled (it is *not* by default). When constprop
// is disabled the vi15 field acts as additional padding that is required for 16 byte alignment // is disabled the vi15 field acts as additional padding that is required for 16 byte alignment
// needed by the xmm compare. // needed by the xmm compare.
union __aligned16 microRegInfo { union __aligned16 microRegInfo
struct { {
union { struct
struct { {
union
{
struct
{
u8 needExactMatch; // If set, block needs an exact match of pipeline state u8 needExactMatch; // If set, block needs an exact match of pipeline state
u8 flagInfo; // xC * 2 | xM * 2 | xS * 2 | 0 * 1 | fullFlag Valid * 1 u8 flagInfo; // xC * 2 | xM * 2 | xS * 2 | 0 * 1 | fullFlag Valid * 1
u8 q; u8 q;
@ -50,7 +56,8 @@ union __aligned16 microRegInfo {
u8 vi15v; // 'vi15' constant is valid u8 vi15v; // 'vi15' constant is valid
u16 vi15; // Constant Prop Info for vi15 u16 vi15; // Constant Prop Info for vi15
struct { struct
{
u8 VI[16]; u8 VI[16];
regInfo VF[32]; regInfo VF[32];
}; };
@ -64,20 +71,23 @@ union __aligned16 microRegInfo {
static_assert(sizeof(microRegInfo) == 160, "microRegInfo was not 160 bytes"); static_assert(sizeof(microRegInfo) == 160, "microRegInfo was not 160 bytes");
struct microProgram; struct microProgram;
struct microJumpCache { struct microJumpCache
{
microJumpCache() : prog(NULL), x86ptrStart(NULL) {} microJumpCache() : prog(NULL), x86ptrStart(NULL) {}
microProgram* prog; // Program to which the entry point below is part of microProgram* prog; // Program to which the entry point below is part of
void* x86ptrStart; // Start of code (Entry point for block) void* x86ptrStart; // Start of code (Entry point for block)
}; };
struct __aligned16 microBlock { struct __aligned16 microBlock
{
microRegInfo pState; // Detailed State of Pipeline microRegInfo pState; // Detailed State of Pipeline
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes) microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
u8* x86ptrStart; // Start of code (Entry point for block) u8* x86ptrStart; // Start of code (Entry point for block)
microJumpCache* jumpCache; // Will point to an array of entry points of size [16k/8] if block ends in JR/JALR microJumpCache* jumpCache; // Will point to an array of entry points of size [16k/8] if block ends in JR/JALR
}; };
struct microTempRegInfo { struct microTempRegInfo
{
regInfo VF[2]; // Holds cycle info for Fd, VF[0] = Upper Instruction, VF[1] = Lower Instruction regInfo VF[2]; // Holds cycle info for Fd, VF[0] = Upper Instruction, VF[1] = Lower Instruction
u8 VFreg[2]; // Index of the VF reg u8 VFreg[2]; // Index of the VF reg
u8 VI; // Holds cycle info for Id u8 VI; // Holds cycle info for Id
@ -88,7 +98,8 @@ struct microTempRegInfo {
u8 xgkick; // Holds the cycle info for XGkick u8 xgkick; // Holds the cycle info for XGkick
}; };
struct microVFreg { struct microVFreg
{
u8 reg; // Reg Index u8 reg; // Reg Index
u8 x; // X vector read/written to? u8 x; // X vector read/written to?
u8 y; // Y vector read/written to? u8 y; // Y vector read/written to?
@ -96,17 +107,20 @@ struct microVFreg {
u8 w; // W vector read/written to? u8 w; // W vector read/written to?
}; };
struct microVIreg { struct microVIreg
{
u8 reg; // Reg Index u8 reg; // Reg Index
u8 used; // Reg is Used? (Read/Written) u8 used; // Reg is Used? (Read/Written)
}; };
struct microConstInfo { struct microConstInfo
{
u8 isValid; // Is the constant in regValue valid? u8 isValid; // Is the constant in regValue valid?
u32 regValue; // Constant Value u32 regValue; // Constant Value
}; };
struct microUpperOp { struct microUpperOp
{
bool eBit; // Has E-bit set bool eBit; // Has E-bit set
bool iBit; // Has I-bit set bool iBit; // Has I-bit set
bool mBit; // Has M-bit set bool mBit; // Has M-bit set
@ -116,7 +130,8 @@ struct microUpperOp {
microVFreg VF_read[2]; // VF Vectors read by this instruction microVFreg VF_read[2]; // VF Vectors read by this instruction
}; };
struct microLowerOp { struct microLowerOp
{
microVFreg VF_write; // VF Vectors written to by this instruction microVFreg VF_write; // VF Vectors written to by this instruction
microVFreg VF_read[2]; // VF Vectors read by this instruction microVFreg VF_read[2]; // VF Vectors read by this instruction
microVIreg VI_write; // VI reg written to by this instruction microVIreg VI_write; // VI reg written to by this instruction
@ -134,7 +149,8 @@ struct microLowerOp {
bool readFlags; // Current Instruction reads Status, Mac, or Clip flags bool readFlags; // Current Instruction reads Status, Mac, or Clip flags
}; };
struct microFlagInst { struct microFlagInst
{
bool doFlag; // Update Flag on this Instruction bool doFlag; // Update Flag on this Instruction
bool doNonSticky; // Update O,U,S,Z (non-sticky) bits on this Instruction (status flag only) bool doNonSticky; // Update O,U,S,Z (non-sticky) bits on this Instruction (status flag only)
u8 write; // Points to the instance that should be written to (s-stage write) u8 write; // Points to the instance that should be written to (s-stage write)
@ -142,14 +158,16 @@ struct microFlagInst {
u8 read; // Points to the instance that should be read by a lower instruction (t-stage read) u8 read; // Points to the instance that should be read by a lower instruction (t-stage read)
}; };
struct microFlagCycles { struct microFlagCycles
{
int xStatus[4]; int xStatus[4];
int xMac[4]; int xMac[4];
int xClip[4]; int xClip[4];
int cycles; int cycles;
}; };
struct microOp { struct microOp
{
u8 stall; // Info on how much current instruction stalled u8 stall; // Info on how much current instruction stalled
bool isBadOp; // Cur Instruction is a bad opcode (not a legal instruction) bool isBadOp; // Cur Instruction is a bad opcode (not a legal instruction)
bool isEOB; // Cur Instruction is last instruction in block (End of Block) bool isEOB; // Cur Instruction is last instruction in block (End of Block)
@ -171,7 +189,8 @@ struct microOp {
}; };
template <u32 pSize> template <u32 pSize>
struct microIR { struct microIR
{
microBlock block; // Block/Pipeline info microBlock block; // Block/Pipeline info
microBlock* pBlock; // Pointer to a block in mVUblocks microBlock* pBlock; // Pointer to a block in mVUblocks
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle) microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
@ -189,14 +208,16 @@ struct microIR {
// Reg Alloc // Reg Alloc
//------------------------------------------------------------------ //------------------------------------------------------------------
struct microMapXMM { struct microMapXMM
{
int VFreg; // VF Reg Number Stored (-1 = Temp; 0 = vf0 and will not be written back; 32 = ACC; 33 = I reg) int VFreg; // VF Reg Number Stored (-1 = Temp; 0 = vf0 and will not be written back; 32 = ACC; 33 = I reg)
int xyzw; // xyzw to write back (0 = Don't write back anything AND cached vfReg has all vectors valid) int xyzw; // xyzw to write back (0 = Don't write back anything AND cached vfReg has all vectors valid)
int count; // Count of when last used int count; // Count of when last used
bool isNeeded; // Is needed for current instruction bool isNeeded; // Is needed for current instruction
}; };
class microRegAlloc { class microRegAlloc
{
protected: protected:
static const int xmmTotal = 7; // Don't allocate PQ? static const int xmmTotal = 7; // Don't allocate PQ?
microMapXMM xmmMap[xmmTotal]; microMapXMM xmmMap[xmmTotal];
@ -208,25 +229,34 @@ protected:
__fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; } __fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; }
__fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; } __fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; }
__ri void loadIreg(const xmm& reg, int xyzw) { __ri void loadIreg(const xmm& reg, int xyzw)
{
xMOVSSZX(reg, ptr32[&getVI(REG_I)]); xMOVSSZX(reg, ptr32[&getVI(REG_I)]);
if (!_XYZWss(xyzw)) xSHUF.PS(reg, reg, 0); if (!_XYZWss(xyzw))
xSHUF.PS(reg, reg, 0);
} }
int findFreeRegRec(int startIdx) { int findFreeRegRec(int startIdx)
for(int i = startIdx; i < xmmTotal; i++) { {
if (!xmmMap[i].isNeeded) { for (int i = startIdx; i < xmmTotal; i++)
{
if (!xmmMap[i].isNeeded)
{
int x = findFreeRegRec(i + 1); int x = findFreeRegRec(i + 1);
if (x == -1) return i; if (x == -1)
return i;
return ((xmmMap[i].count < xmmMap[x].count) ? i : x); return ((xmmMap[i].count < xmmMap[x].count) ? i : x);
} }
} }
return -1; return -1;
} }
int findFreeReg() { int findFreeReg()
for(int i = 0; i < xmmTotal; i++) { {
if (!xmmMap[i].isNeeded && (xmmMap[i].VFreg < 0)) { for (int i = 0; i < xmmTotal; i++)
{
if (!xmmMap[i].isNeeded && (xmmMap[i].VFreg < 0))
{
return i; // Reg is not needed and was a temp reg return i; // Reg is not needed and was a temp reg
} }
} }
@ -236,14 +266,17 @@ protected:
} }
public: public:
microRegAlloc(int _index) { microRegAlloc(int _index)
{
index = _index; index = _index;
reset(); reset();
} }
// Fully resets the regalloc by clearing all cached data // Fully resets the regalloc by clearing all cached data
void reset() { void reset()
for(int i = 0; i < xmmTotal; i++) { {
for (int i = 0; i < xmmTotal; i++)
{
clearReg(i); clearReg(i);
} }
counter = 0; counter = 0;
@ -252,19 +285,24 @@ public:
// Flushes all allocated registers (i.e. writes-back to memory all modified registers). // Flushes all allocated registers (i.e. writes-back to memory all modified registers).
// If clearState is 0, then it keeps cached reg data valid // If clearState is 0, then it keeps cached reg data valid
// If clearState is 1, then it invalidates all cached reg data after write-back // If clearState is 1, then it invalidates all cached reg data after write-back
void flushAll(bool clearState = true) { void flushAll(bool clearState = true)
for(int i = 0; i < xmmTotal; i++) { {
for (int i = 0; i < xmmTotal; i++)
{
writeBackReg(xmm(i)); writeBackReg(xmm(i));
if (clearState) if (clearState)
clearReg(i); clearReg(i);
} }
} }
void TDwritebackAll(bool clearState = false) { void TDwritebackAll(bool clearState = false)
for(int i = 0; i < xmmTotal; i++) { {
for (int i = 0; i < xmmTotal; i++)
{
microMapXMM& mapX = xmmMap[xmm(i).Id]; microMapXMM& mapX = xmmMap[xmm(i).Id];
if ((mapX.VFreg > 0) && mapX.xyzw) { // Reg was modified and not Temp or vf0 if ((mapX.VFreg > 0) && mapX.xyzw) // Reg was modified and not Temp or vf0
{
if (mapX.VFreg == 33) if (mapX.VFreg == 33)
xMOVSS(ptr32[&getVI(REG_I)], xmm(i)); xMOVSS(ptr32[&getVI(REG_I)], xmm(i));
else if (mapX.VFreg == 32) else if (mapX.VFreg == 32)
@ -276,7 +314,8 @@ public:
} }
void clearReg(const xmm& reg) { clearReg(reg.Id); } void clearReg(const xmm& reg) { clearReg(reg.Id); }
void clearReg(int regId) { void clearReg(int regId)
{
microMapXMM& clear = xmmMap[regId]; microMapXMM& clear = xmmMap[regId];
clear.VFreg = -1; clear.VFreg = -1;
clear.count = 0; clear.count = 0;
@ -284,19 +323,24 @@ public:
clear.isNeeded = 0; clear.isNeeded = 0;
} }
void clearRegVF(int VFreg) { void clearRegVF(int VFreg)
for(int i = 0; i < xmmTotal; i++) { {
if (xmmMap[i].VFreg == VFreg) clearReg(i); for (int i = 0; i < xmmTotal; i++)
{
if (xmmMap[i].VFreg == VFreg)
clearReg(i);
} }
} }
// Writes back modified reg to memory. // Writes back modified reg to memory.
// If all vectors modified, then keeps the VF reg cached in the xmm register. // If all vectors modified, then keeps the VF reg cached in the xmm register.
// If reg was not modified, then keeps the VF reg cached in the xmm register. // If reg was not modified, then keeps the VF reg cached in the xmm register.
void writeBackReg(const xmm& reg, bool invalidateRegs = true) { void writeBackReg(const xmm& reg, bool invalidateRegs = true)
{
microMapXMM& mapX = xmmMap[reg.Id]; microMapXMM& mapX = xmmMap[reg.Id];
if ((mapX.VFreg > 0) && mapX.xyzw) { // Reg was modified and not Temp or vf0 if ((mapX.VFreg > 0) && mapX.xyzw) // Reg was modified and not Temp or vf0
{
if (mapX.VFreg == 33) if (mapX.VFreg == 33)
xMOVSS(ptr32[&getVI(REG_I)], reg); xMOVSS(ptr32[&getVI(REG_I)], reg);
else if (mapX.VFreg == 32) else if (mapX.VFreg == 32)
@ -304,20 +348,25 @@ public:
else else
mVUsaveReg(reg, ptr[&getVF(mapX.VFreg)], mapX.xyzw, true); mVUsaveReg(reg, ptr[&getVF(mapX.VFreg)], mapX.xyzw, true);
if (invalidateRegs) { if (invalidateRegs)
for(int i = 0; i < xmmTotal; i++) { {
for (int i = 0; i < xmmTotal; i++)
{
microMapXMM& mapI = xmmMap[i]; microMapXMM& mapI = xmmMap[i];
if ((i == reg.Id) || mapI.isNeeded) if ((i == reg.Id) || mapI.isNeeded)
continue; continue;
if (mapI.VFreg == mapX.VFreg) { if (mapI.VFreg == mapX.VFreg)
if (mapI.xyzw && mapI.xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", mapI.VFreg); {
if (mapI.xyzw && mapI.xyzw < 0xf)
DevCon.Error("microVU Error: writeBackReg() [%d]", mapI.VFreg);
clearReg(i); // Invalidate any Cached Regs of same vf Reg clearReg(i); // Invalidate any Cached Regs of same vf Reg
} }
} }
} }
if (mapX.xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified if (mapX.xyzw == 0xf) // Make Cached Reg if All Vectors were Modified
{
mapX.count = counter; mapX.count = counter;
mapX.xyzw = 0; mapX.xyzw = 0;
mapX.isNeeded = false; mapX.isNeeded = false;
@ -325,7 +374,8 @@ public:
} }
clearReg(reg); clearReg(reg);
} }
else if (mapX.xyzw) { // Clear reg if modified and is VF0 or temp reg... else if (mapX.xyzw) // Clear reg if modified and is VF0 or temp reg...
{
clearReg(reg); clearReg(reg);
} }
} }
@ -335,30 +385,41 @@ public:
// This is to guarantee proper merging between registers... When a written-to reg is cleared, // This is to guarantee proper merging between registers... When a written-to reg is cleared,
// it invalidates other cached registers of the same VF reg, and merges partial-vector // it invalidates other cached registers of the same VF reg, and merges partial-vector
// writes into them. // writes into them.
void clearNeeded(const xmm& reg) { void clearNeeded(const xmm& reg)
{
if ((reg.Id < 0) || (reg.Id >= xmmTotal)) return; // Sometimes xmmPQ hits this if ((reg.Id < 0) || (reg.Id >= xmmTotal)) // Sometimes xmmPQ hits this
return;
microMapXMM& clear = xmmMap[reg.Id]; microMapXMM& clear = xmmMap[reg.Id];
clear.isNeeded = false; clear.isNeeded = false;
if (clear.xyzw) { // Reg was modified if (clear.xyzw) // Reg was modified
if (clear.VFreg > 0) { {
if (clear.VFreg > 0)
{
int mergeRegs = 0; int mergeRegs = 0;
if (clear.xyzw < 0xf) mergeRegs = 1; // Try to merge partial writes if (clear.xyzw < 0xf) // Try to merge partial writes
for(int i = 0; i < xmmTotal; i++) { // Invalidate any other read-only regs of same vfReg mergeRegs = 1;
if (i == reg.Id) continue; for (int i = 0; i < xmmTotal; i++) // Invalidate any other read-only regs of same vfReg
{
if (i == reg.Id)
continue;
microMapXMM& mapI = xmmMap[i]; microMapXMM& mapI = xmmMap[i];
if (mapI.VFreg == clear.VFreg) { if (mapI.VFreg == clear.VFreg)
if (mapI.xyzw && mapI.xyzw < 0xf) { {
if (mapI.xyzw && mapI.xyzw < 0xf)
{
DevCon.Error("microVU Error: clearNeeded() [%d]", mapI.VFreg); DevCon.Error("microVU Error: clearNeeded() [%d]", mapI.VFreg);
} }
if (mergeRegs == 1) { if (mergeRegs == 1)
{
mVUmergeRegs(xmm(i), reg, clear.xyzw, true); mVUmergeRegs(xmm(i), reg, clear.xyzw, true);
mapI.xyzw = 0xf; mapI.xyzw = 0xf;
mapI.count = counter; mapI.count = counter;
mergeRegs = 2; mergeRegs = 2;
} }
else clearReg(i); // Clears when mergeRegs is 0 or 2 else
clearReg(i); // Clears when mergeRegs is 0 or 2
} }
} }
if (mergeRegs == 2) // Clear Current Reg if Merged if (mergeRegs == 2) // Clear Current Reg if Merged
@ -366,31 +427,38 @@ public:
else if (mergeRegs == 1) // Write Back Partial Writes if couldn't merge else if (mergeRegs == 1) // Write Back Partial Writes if couldn't merge
writeBackReg(reg); writeBackReg(reg);
} }
else clearReg(reg); // If Reg was temp or vf0, then invalidate itself else
clearReg(reg); // If Reg was temp or vf0, then invalidate itself
} }
} }
// vfLoadReg = VF reg to be loaded to the xmm register // vfLoadReg = VF reg to be loaded to the xmm register
// vfWriteReg = VF reg that the returned xmm register will be considered as // vfWriteReg = VF reg that the returned xmm register will be considered as
// xyzw = XYZW vectors that will be modified (and loaded) // xyzw = XYZW vectors that will be modified (and loaded)
// cloneWrite = When loading a reg that will be written to, // cloneWrite = When loading a reg that will be written to, it copies it to its own xmm reg instead of overwriting the cached one...
// it copies it to its own xmm reg instead of overwriting the cached one...
// Notes: // Notes:
// To load a temp reg use the default param values, vfLoadReg = -1 and vfWriteReg = -1. // To load a temp reg use the default param values, vfLoadReg = -1 and vfWriteReg = -1.
// To load a full reg which won't be modified and you want cached, specify vfLoadReg >= 0 and vfWriteReg = -1 // To load a full reg which won't be modified and you want cached, specify vfLoadReg >= 0 and vfWriteReg = -1
// To load a reg which you don't want written back or cached, specify vfLoadReg >= 0 and vfWriteReg = 0 // To load a reg which you don't want written back or cached, specify vfLoadReg >= 0 and vfWriteReg = 0
const xmm& allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1) { const xmm& allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1)
{
//DevCon.WriteLn("vfLoadReg = %02d, vfWriteReg = %02d, xyzw = %x, clone = %d",vfLoadReg,vfWriteReg,xyzw,(int)cloneWrite); //DevCon.WriteLn("vfLoadReg = %02d, vfWriteReg = %02d, xyzw = %x, clone = %d",vfLoadReg,vfWriteReg,xyzw,(int)cloneWrite);
counter++; counter++;
if (vfLoadReg >= 0) { // Search For Cached Regs if (vfLoadReg >= 0) // Search For Cached Regs
for(int i = 0; i < xmmTotal; i++) { {
for (int i = 0; i < xmmTotal; i++)
{
const xmm& xmmI = xmm::GetInstance(i); const xmm& xmmI = xmm::GetInstance(i);
microMapXMM& mapI = xmmMap[i]; microMapXMM& mapI = xmmMap[i];
if ((mapI.VFreg == vfLoadReg) && (!mapI.xyzw // Reg Was Not Modified if ((mapI.VFreg == vfLoadReg)
|| (mapI.VFreg && (mapI.xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0 && (!mapI.xyzw // Reg Was Not Modified
|| (mapI.VFreg && (mapI.xyzw == 0xf)))) // Reg Had All Vectors Modified and != VF0
{
int z = i; int z = i;
if (vfWriteReg >= 0) { // Reg will be modified if (vfWriteReg >= 0) // Reg will be modified
if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg {
if (cloneWrite) // Clone Reg so as not to use the same Cached Reg
{
z = findFreeReg(); z = findFreeReg();
const xmm& xmmZ = xmm::GetInstance(z); const xmm& xmmZ = xmm::GetInstance(z);
writeBackReg(xmmZ); writeBackReg(xmmZ);
@ -406,7 +474,8 @@ public:
mapI.count = counter; // Reg i was used, so update counter mapI.count = counter; // Reg i was used, so update counter
} }
else { // Don't clone reg, but shuffle to adjust for SS ops else // Don't clone reg, but shuffle to adjust for SS ops
{
if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf)) if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf))
writeBackReg(xmmI); writeBackReg(xmmI);
@ -430,7 +499,8 @@ public:
const xmm& xmmX = xmm::GetInstance(x); const xmm& xmmX = xmm::GetInstance(x);
writeBackReg(xmmX); writeBackReg(xmmX);
if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading) if (vfWriteReg >= 0) // Reg Will Be Modified (allow partial reg loading)
{
if ((vfLoadReg == 0) && !(xyzw & 1)) if ((vfLoadReg == 0) && !(xyzw & 1))
xPXOR(xmmX, xmmX); xPXOR(xmmX, xmmX);
else if (vfLoadReg == 33) else if (vfLoadReg == 33)
@ -443,7 +513,8 @@ public:
xmmMap[x].VFreg = vfWriteReg; xmmMap[x].VFreg = vfWriteReg;
xmmMap[x].xyzw = xyzw; xmmMap[x].xyzw = xyzw;
} }
else { // Reg Will Not Be Modified (always load full reg for caching) else // Reg Will Not Be Modified (always load full reg for caching)
{
if (vfLoadReg == 33) if (vfLoadReg == 33)
loadIreg(xmmX, 0xf); loadIreg(xmmX, 0xf);
else if (vfLoadReg == 32) else if (vfLoadReg == 32)

View File

@ -18,10 +18,12 @@
#include "Utilities/AsciiFile.h" #include "Utilities/AsciiFile.h"
// writes text directly to mVU.logFile, no newlines appended. // writes text directly to mVU.logFile, no newlines appended.
_mVUt void __mVULog(const char* fmt, ...) { _mVUt void __mVULog(const char* fmt, ...)
{
microVU& mVU = mVUx; microVU& mVU = mVUx;
if (!mVU.logFile) return; if (!mVU.logFile)
return;
char tmp[2024]; char tmp[2024];
va_list list; va_list list;
@ -35,11 +37,19 @@ _mVUt void __mVULog(const char* fmt, ...) {
mVU.logFile->Flush(); mVU.logFile->Flush();
} }
#define commaIf() { if (bitX[6]) { mVUlog(","); bitX[6] = false; } } #define commaIf() \
{ \
if (bitX[6]) \
{ \
mVUlog(","); \
bitX[6] = false; \
} \
}
#include "AppConfig.h" #include "AppConfig.h"
void __mVUdumpProgram(microVU& mVU, microProgram& prog) { void __mVUdumpProgram(microVU& mVU, microProgram& prog)
{
bool bitX[7]; bool bitX[7];
int delay = 0; int delay = 0;
int bBranch = mVUbranch; int bBranch = mVUbranch;
@ -61,10 +71,21 @@ void __mVUdumpProgram(microVU& mVU, microProgram& prog) {
mVUlog("*********************\n\n<br><br>", prog.idx); mVUlog("*********************\n\n<br><br>", prog.idx);
mVUlog("</font>"); mVUlog("</font>");
for (u32 i = 0; i < mVU.progSize; i+=2) { for (u32 i = 0; i < mVU.progSize; i += 2)
{
if (delay) { delay--; mVUlog("</font>"); if (!delay) mVUlog("<hr/>"); } if (delay)
if (mVUbranch) { delay = 1; mVUbranch = 0; } {
delay--;
mVUlog("</font>");
if (!delay)
mVUlog("<hr/>");
}
if (mVUbranch)
{
delay = 1;
mVUbranch = 0;
}
mVU.code = prog.data[i + 1]; mVU.code = prog.data[i + 1];
bitX[0] = false; bitX[0] = false;
@ -89,7 +110,8 @@ void __mVUdumpProgram(microVU& mVU, microProgram& prog) {
mVUlog("[%04x] (%08x)</a> ", i * 4, mVU.code); mVUlog("[%04x] (%08x)</a> ", i * 4, mVU.code);
mVUopU(mVU, 2); mVUopU(mVU, 2);
if (bitX[5]) { if (bitX[5])
{
mVUlog(" ("); mVUlog(" (");
if (bitX[0]) { mVUlog("I"); bitX[6] = true; } if (bitX[0]) { mVUlog("I"); bitX[6] = true; }
if (bitX[1]) { commaIf(); mVUlog("E"); bitX[6] = true; } if (bitX[1]) { commaIf(); mVUlog("E"); bitX[6] = true; }
@ -99,19 +121,22 @@ void __mVUdumpProgram(microVU& mVU, microProgram& prog) {
mVUlog(")"); mVUlog(")");
} }
if (mVUstall) { if (mVUstall)
{
mVUlog(" Stall %d Cycles", mVUstall); mVUlog(" Stall %d Cycles", mVUstall);
} }
iPC = i; iPC = i;
mVU.code = prog.data[i]; mVU.code = prog.data[i];
if(bitX[0]) { if (bitX[0])
{
mVUlog("<br>\n<font color=\"#FF7000\">"); mVUlog("<br>\n<font color=\"#FF7000\">");
mVUlog("[%04x] (%08x) %f", i * 4, mVU.code, *(float*)&mVU.code); mVUlog("[%04x] (%08x) %f", i * 4, mVU.code, *(float*)&mVU.code);
mVUlog("</font>\n\n<br><br>"); mVUlog("</font>\n\n<br><br>");
} }
else { else
{
mVUlog("<br>\n[%04x] (%08x) ", i * 4, mVU.code); mVUlog("<br>\n[%04x] (%08x) ", i * 4, mVU.code);
mVUopL(mVU, 2); mVUopL(mVU, 2);
mVUlog("\n\n<br><br>"); mVUlog("\n\n<br><br>");
@ -128,4 +153,3 @@ void __mVUdumpProgram(microVU& mVU, microProgram& prog) {
mVU.logFile.reset(nullptr); mVU.logFile.reset(nullptr);
} }

File diff suppressed because it is too large Load Diff

View File

@ -38,17 +38,17 @@ void setupMacroOp(int mode, const char* opName)
memset(&microVU0.prog.IRinfo.info[0], 0, sizeof(microVU0.prog.IRinfo.info[0])); memset(&microVU0.prog.IRinfo.info[0], 0, sizeof(microVU0.prog.IRinfo.info[0]));
iFlushCall(FLUSH_EVERYTHING); iFlushCall(FLUSH_EVERYTHING);
microVU0.regAlloc->reset(); microVU0.regAlloc->reset();
if (mode & 0x01) if (mode & 0x01) // Q-Reg will be Read
{ // Q-Reg will be Read {
xMOVSSZX(xmmPQ, ptr32[&vu0Regs.VI[REG_Q].UL]); xMOVSSZX(xmmPQ, ptr32[&vu0Regs.VI[REG_Q].UL]);
} }
if (mode & 0x08) if (mode & 0x08) // Clip Instruction
{ // Clip Instruction {
microVU0.prog.IRinfo.info[0].cFlag.write = 0xff; microVU0.prog.IRinfo.info[0].cFlag.write = 0xff;
microVU0.prog.IRinfo.info[0].cFlag.lastWrite = 0xff; microVU0.prog.IRinfo.info[0].cFlag.lastWrite = 0xff;
} }
if (mode & 0x10) if (mode & 0x10) // Update Status/Mac Flags
{ // Update Status/Mac Flags {
microVU0.prog.IRinfo.info[0].sFlag.doFlag = true; microVU0.prog.IRinfo.info[0].sFlag.doFlag = true;
microVU0.prog.IRinfo.info[0].sFlag.doNonSticky = true; microVU0.prog.IRinfo.info[0].sFlag.doNonSticky = true;
microVU0.prog.IRinfo.info[0].sFlag.write = 0; microVU0.prog.IRinfo.info[0].sFlag.write = 0;
@ -64,20 +64,20 @@ void setupMacroOp(int mode, const char* opName)
void endMacroOp(int mode) void endMacroOp(int mode)
{ {
if (mode & 0x02) if (mode & 0x02) // Q-Reg was Written To
{ // Q-Reg was Written To {
xMOVSS(ptr32[&vu0Regs.VI[REG_Q].UL], xmmPQ); xMOVSS(ptr32[&vu0Regs.VI[REG_Q].UL], xmmPQ);
} }
if (mode & 0x10) if (mode & 0x10) // Status/Mac Flags were Updated
{ // Status/Mac Flags were Updated {
// Normalize // Normalize
mVUallocSFLAGc(eax, gprF0, 0); mVUallocSFLAGc(eax, gprF0, 0);
xMOV(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], eax); xMOV(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], eax);
} }
microVU0.regAlloc->flushAll(); microVU0.regAlloc->flushAll();
if (mode & 0x10) if (mode & 0x10) // Update VU0 Status/Mac instances after flush to avoid corrupting anything
{ // Update VU0 Status/Mac instances after flush to avoid corrupting anything {
mVUallocSFLAGd(&vu0Regs.VI[REG_STATUS_FLAG].UL); mVUallocSFLAGd(&vu0Regs.VI[REG_STATUS_FLAG].UL);
xMOVDZX(xmmT1, eax); xMOVDZX(xmmT1, eax);
xSHUF.PS(xmmT1, xmmT1, 0); xSHUF.PS(xmmT1, xmmT1, 0);
@ -335,15 +335,13 @@ static void recCFC2()
skipvuidle.SetTarget(); skipvuidle.SetTarget();
} }
if (_Rd_ == REG_STATUS_FLAG) if (_Rd_ == REG_STATUS_FLAG) // Normalize Status Flag
{ // Normalize Status Flag
xMOV(eax, ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL]); xMOV(eax, ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL]);
}
else else
xMOV(eax, ptr32[&vu0Regs.VI[_Rd_].UL]); xMOV(eax, ptr32[&vu0Regs.VI[_Rd_].UL]);
if (_Rd_ == REG_TPC) if (_Rd_ == REG_TPC) // Divide TPC register value by 8 during copying
{ // Divide TPC register value by 8 during copying {
// Ok, this deserves an explanation. // Ok, this deserves an explanation.
// Accoring to the official PS2 VU0 coding manual there are 3 ways to execute a micro subroutine on VU0 // Accoring to the official PS2 VU0 coding manual there are 3 ways to execute a micro subroutine on VU0
// one of which is using the VCALLMSR intruction. // one of which is using the VCALLMSR intruction.
@ -571,279 +569,52 @@ void _vuRegsCOP22(VURegs* VU, _VURegsNum* VUregsn) {}
// Recompilation // Recompilation
void (*recCOP2t[32])() = { void (*recCOP2t[32])() = {
rec_C2UNK, rec_C2UNK, recQMFC2, recCFC2, rec_C2UNK, rec_C2UNK, recQMTC2, recCTC2, rec_C2UNK,
recQMFC2, recCOP2_BC2, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK,
recCFC2, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1,
rec_C2UNK, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1,
rec_C2UNK,
recQMTC2,
recCTC2,
rec_C2UNK,
recCOP2_BC2,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
recCOP2_SPEC1,
recCOP2_SPEC1,
recCOP2_SPEC1,
recCOP2_SPEC1,
recCOP2_SPEC1,
recCOP2_SPEC1,
recCOP2_SPEC1,
recCOP2_SPEC1,
recCOP2_SPEC1,
recCOP2_SPEC1,
recCOP2_SPEC1,
recCOP2_SPEC1,
recCOP2_SPEC1,
recCOP2_SPEC1,
recCOP2_SPEC1,
recCOP2_SPEC1,
}; };
void (*recCOP2_BC2t[32])() = { void (*recCOP2_BC2t[32])() = {
recBC2F, recBC2F, recBC2T, recBC2FL, recBC2TL, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK,
recBC2T, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK,
recBC2FL, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK,
recBC2TL, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
}; };
void (*recCOP2SPECIAL1t[64])() = { void (*recCOP2SPECIAL1t[64])() = {
recVADDx, recVADDx, recVADDy, recVADDz, recVADDw, recVSUBx, recVSUBy, recVSUBz, recVSUBw,
recVADDy, recVMADDx, recVMADDy, recVMADDz, recVMADDw, recVMSUBx, recVMSUBy, recVMSUBz, recVMSUBw,
recVADDz, recVMAXx, recVMAXy, recVMAXz, recVMAXw, recVMINIx, recVMINIy, recVMINIz, recVMINIw,
recVADDw, recVMULx, recVMULy, recVMULz, recVMULw, recVMULq, recVMAXi, recVMULi, recVMINIi,
recVSUBx, recVADDq, recVMADDq, recVADDi, recVMADDi, recVSUBq, recVMSUBq, recVSUBi, recVMSUBi,
recVSUBy, recVADD, recVMADD, recVMUL, recVMAX, recVSUB, recVMSUB, recVOPMSUB, recVMINI,
recVSUBz, recVIADD, recVISUB, recVIADDI, rec_C2UNK, recVIAND, recVIOR, rec_C2UNK, rec_C2UNK,
recVSUBw, recVCALLMS, recVCALLMSR,rec_C2UNK, rec_C2UNK, recCOP2_SPEC2, recCOP2_SPEC2, recCOP2_SPEC2, recCOP2_SPEC2,
recVMADDx,
recVMADDy,
recVMADDz,
recVMADDw,
recVMSUBx,
recVMSUBy,
recVMSUBz,
recVMSUBw,
recVMAXx,
recVMAXy,
recVMAXz,
recVMAXw,
recVMINIx,
recVMINIy,
recVMINIz,
recVMINIw,
recVMULx,
recVMULy,
recVMULz,
recVMULw,
recVMULq,
recVMAXi,
recVMULi,
recVMINIi,
recVADDq,
recVMADDq,
recVADDi,
recVMADDi,
recVSUBq,
recVMSUBq,
recVSUBi,
recVMSUBi,
recVADD,
recVMADD,
recVMUL,
recVMAX,
recVSUB,
recVMSUB,
recVOPMSUB,
recVMINI,
recVIADD,
recVISUB,
recVIADDI,
rec_C2UNK,
recVIAND,
recVIOR,
rec_C2UNK,
rec_C2UNK,
recVCALLMS,
recVCALLMSR,
rec_C2UNK,
rec_C2UNK,
recCOP2_SPEC2,
recCOP2_SPEC2,
recCOP2_SPEC2,
recCOP2_SPEC2,
}; };
void (*recCOP2SPECIAL2t[128])() = { void (*recCOP2SPECIAL2t[128])() = {
recVADDAx, recVADDAx, recVADDAy, recVADDAz, recVADDAw, recVSUBAx, recVSUBAy, recVSUBAz, recVSUBAw,
recVADDAy, recVMADDAx,recVMADDAy, recVMADDAz, recVMADDAw, recVMSUBAx, recVMSUBAy, recVMSUBAz, recVMSUBAw,
recVADDAz, recVITOF0, recVITOF4, recVITOF12, recVITOF15, recVFTOI0, recVFTOI4, recVFTOI12, recVFTOI15,
recVADDAw, recVMULAx, recVMULAy, recVMULAz, recVMULAw, recVMULAq, recVABS, recVMULAi, recVCLIP,
recVSUBAx, recVADDAq, recVMADDAq,recVADDAi, recVMADDAi, recVSUBAq, recVMSUBAq, recVSUBAi, recVMSUBAi,
recVSUBAy, recVADDA, recVMADDA, recVMULA, rec_C2UNK, recVSUBA, recVMSUBA, recVOPMULA, recVNOP,
recVSUBAz, recVMOVE, recVMR32, rec_C2UNK, rec_C2UNK, recVLQI, recVSQI, recVLQD, recVSQD,
recVSUBAw, recVDIV, recVSQRT, recVRSQRT, recVWAITQ, recVMTIR, recVMFIR, recVILWR, recVISWR,
recVMADDAx, recVRNEXT, recVRGET, recVRINIT, recVRXOR, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK,
recVMADDAy, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK,
recVMADDAz, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK,
recVMADDAw, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK,
recVMSUBAx, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK,
recVMSUBAy, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK,
recVMSUBAz, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK,
recVMSUBAw, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK,
recVITOF0,
recVITOF4,
recVITOF12,
recVITOF15,
recVFTOI0,
recVFTOI4,
recVFTOI12,
recVFTOI15,
recVMULAx,
recVMULAy,
recVMULAz,
recVMULAw,
recVMULAq,
recVABS,
recVMULAi,
recVCLIP,
recVADDAq,
recVMADDAq,
recVADDAi,
recVMADDAi,
recVSUBAq,
recVMSUBAq,
recVSUBAi,
recVMSUBAi,
recVADDA,
recVMADDA,
recVMULA,
rec_C2UNK,
recVSUBA,
recVMSUBA,
recVOPMULA,
recVNOP,
recVMOVE,
recVMR32,
rec_C2UNK,
rec_C2UNK,
recVLQI,
recVSQI,
recVLQD,
recVSQD,
recVDIV,
recVSQRT,
recVRSQRT,
recVWAITQ,
recVMTIR,
recVMFIR,
recVILWR,
recVISWR,
recVRNEXT,
recVRGET,
recVRINIT,
recVRXOR,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
rec_C2UNK,
}; };
namespace R5900 namespace R5900 {
{ namespace Dynarec {
namespace Dynarec namespace OpcodeImpl {
{
namespace OpcodeImpl
{
void recCOP2() { recCOP2t[_Rs_](); } void recCOP2() { recCOP2t[_Rs_](); }
} // namespace OpcodeImpl } // namespace OpcodeImpl
} // namespace Dynarec } // namespace Dynarec

View File

@ -26,7 +26,8 @@ struct microVU;
// Global Variables // Global Variables
//------------------------------------------------------------------ //------------------------------------------------------------------
struct mVU_Globals { struct mVU_Globals
{
u32 absclip[4], signbit[4], minvals[4], maxvals[4]; u32 absclip[4], signbit[4], minvals[4], maxvals[4];
u32 one[4]; u32 one[4];
u32 Pi4[4]; u32 Pi4[4];
@ -196,7 +197,10 @@ typedef Fntype_mVUrecInst* Fnptr_mVUrecInst;
extern __pagealigned u8 mVUsearchXMM[__pagesize]; extern __pagealigned u8 mVUsearchXMM[__pagesize];
typedef u32(__fastcall* mVUCall)(void*, void*); typedef u32(__fastcall* mVUCall)(void*, void*);
#define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0xf) #define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0xf)
#define mVUemitSearch() { mVUcustomSearch(); } #define mVUemitSearch() \
{ \
mVUcustomSearch(); \
}
//------------------------------------------------------------------ //------------------------------------------------------------------
// Misc Macros... // Misc Macros...
@ -236,8 +240,10 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
#define varPrint(x) DevCon.WriteLn(#x " = %d", (int)x) #define varPrint(x) DevCon.WriteLn(#x " = %d", (int)x)
#define islowerOP ((iPC & 1) == 0) #define islowerOP ((iPC & 1) == 0)
#define blockCreate(addr) { \ #define blockCreate(addr) \
if (!mVUblocks[addr]) mVUblocks[addr] = new microBlockManager(); \ { \
if (!mVUblocks[addr]) \
mVUblocks[addr] = new microBlockManager(); \
} }
// Fetches the PC and instruction opcode relative to the current PC. Used to rewind and // Fetches the PC and instruction opcode relative to the current PC. Used to rewind and

View File

@ -21,7 +21,8 @@
void mVUunpack_xyzw(const xmm& dstreg, const xmm& srcreg, int xyzw) void mVUunpack_xyzw(const xmm& dstreg, const xmm& srcreg, int xyzw)
{ {
switch ( xyzw ) { switch (xyzw)
{
case 0: xPSHUF.D(dstreg, srcreg, 0x00); break; // XXXX case 0: xPSHUF.D(dstreg, srcreg, 0x00); break; // XXXX
case 1: xPSHUF.D(dstreg, srcreg, 0x55); break; // YYYY case 1: xPSHUF.D(dstreg, srcreg, 0x55); break; // YYYY
case 2: xPSHUF.D(dstreg, srcreg, 0xaa); break; // ZZZZ case 2: xPSHUF.D(dstreg, srcreg, 0xaa); break; // ZZZZ
@ -31,7 +32,8 @@ void mVUunpack_xyzw(const xmm& dstreg, const xmm& srcreg, int xyzw)
void mVUloadReg(const xmm& reg, xAddressVoid ptr, int xyzw) void mVUloadReg(const xmm& reg, xAddressVoid ptr, int xyzw)
{ {
switch( xyzw ) { switch (xyzw)
{
case 8: xMOVSSZX(reg, ptr32[ptr ]); break; // X case 8: xMOVSSZX(reg, ptr32[ptr ]); break; // X
case 4: xMOVSSZX(reg, ptr32[ptr + 4]); break; // Y case 4: xMOVSSZX(reg, ptr32[ptr + 4]); break; // Y
case 2: xMOVSSZX(reg, ptr32[ptr + 8]); break; // Z case 2: xMOVSSZX(reg, ptr32[ptr + 8]); break; // Z
@ -43,7 +45,8 @@ void mVUloadReg(const xmm& reg, xAddressVoid ptr, int xyzw)
void mVUloadIreg(const xmm& reg, int xyzw, VURegs* vuRegs) void mVUloadIreg(const xmm& reg, int xyzw, VURegs* vuRegs)
{ {
xMOVSSZX(reg, ptr32[&vuRegs->VI[REG_I].UL]); xMOVSSZX(reg, ptr32[&vuRegs->VI[REG_I].UL]);
if (!_XYZWss(xyzw)) xSHUF.PS(reg, reg, 0); if (!_XYZWss(xyzw))
xSHUF.PS(reg, reg, 0);
} }
// Modifies the Source Reg! // Modifies the Source Reg!
@ -58,44 +61,67 @@ void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW)
xMOVAPS(ptr128[ptr], xmmT2); xMOVAPS(ptr128[ptr], xmmT2);
return;*/ return;*/
switch ( xyzw ) { switch (xyzw)
case 5: xEXTRACTPS(ptr32[ptr+4], reg, 1); {
xEXTRACTPS(ptr32[ptr+12], reg, 3); case 5: // YW
break; // YW
case 6: xPSHUF.D(reg, reg, 0xc9);
xMOVL.PS(ptr64[ptr+4], reg);
break; // YZ
case 7: xMOVH.PS(ptr64[ptr+8], reg);
xEXTRACTPS(ptr32[ptr + 4], reg, 1); xEXTRACTPS(ptr32[ptr + 4], reg, 1);
break; // YZW
case 9: xMOVSS(ptr32[ptr], reg);
xEXTRACTPS(ptr32[ptr + 12], reg, 3); xEXTRACTPS(ptr32[ptr + 12], reg, 3);
break; // XW break;
case 10: xMOVSS(ptr32[ptr], reg); case 6: // YZ
xEXTRACTPS(ptr32[ptr+8], reg, 2); xPSHUF.D(reg, reg, 0xc9);
break; //XZ xMOVL.PS(ptr64[ptr + 4], reg);
case 11: xMOVSS(ptr32[ptr], reg); break;
case 7: // YZW
xMOVH.PS(ptr64[ptr + 8], reg); xMOVH.PS(ptr64[ptr + 8], reg);
break; //XZW xEXTRACTPS(ptr32[ptr + 4], reg, 1);
case 13: xMOVL.PS(ptr64[ptr], reg); break;
case 9: // XW
xMOVSS(ptr32[ptr], reg);
xEXTRACTPS(ptr32[ptr + 12], reg, 3); xEXTRACTPS(ptr32[ptr + 12], reg, 3);
break; // XYW break;
case 14: xMOVL.PS(ptr64[ptr], reg); case 10: // XZ
xMOVSS(ptr32[ptr], reg);
xEXTRACTPS(ptr32[ptr + 8], reg, 2); xEXTRACTPS(ptr32[ptr + 8], reg, 2);
break; // XYZ break;
case 4: if (!modXYZW) mVUunpack_xyzw(reg, reg, 1); case 11: // XZW
xMOVSS(ptr32[ptr], reg);
xMOVH.PS(ptr64[ptr + 8], reg);
break;
case 13: // XYW
xMOVL.PS(ptr64[ptr], reg);
xEXTRACTPS(ptr32[ptr + 12], reg, 3);
break;
case 14: // XYZ
xMOVL.PS(ptr64[ptr], reg);
xEXTRACTPS(ptr32[ptr + 8], reg, 2);
break;
case 4: // Y
if (!modXYZW)
mVUunpack_xyzw(reg, reg, 1);
xMOVSS(ptr32[ptr + 4], reg); xMOVSS(ptr32[ptr + 4], reg);
break; // Y break;
case 2: if (!modXYZW) mVUunpack_xyzw(reg, reg, 2); case 2: // Z
if (!modXYZW)
mVUunpack_xyzw(reg, reg, 2);
xMOVSS(ptr32[ptr + 8], reg); xMOVSS(ptr32[ptr + 8], reg);
break; // Z break;
case 1: if (!modXYZW) mVUunpack_xyzw(reg, reg, 3); case 1: // W
if (!modXYZW)
mVUunpack_xyzw(reg, reg, 3);
xMOVSS(ptr32[ptr + 12], reg); xMOVSS(ptr32[ptr + 12], reg);
break; // W break;
case 8: xMOVSS(ptr32[ptr], reg); break; // X case 8: // X
case 12: xMOVL.PS(ptr64[ptr], reg); break; // XY xMOVSS(ptr32[ptr], reg);
case 3: xMOVH.PS(ptr64[ptr+8], reg); break; // ZW break;
default: xMOVAPS(ptr128[ptr], reg); break; // XYZW case 12: // XY
xMOVL.PS(ptr64[ptr], reg);
break;
case 3: // ZW
xMOVH.PS(ptr64[ptr + 8], reg);
break;
default: // XYZW
xMOVAPS(ptr128[ptr], reg);
break;
} }
} }
@ -111,7 +137,8 @@ void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW)
xMOVAPS(dest, src); xMOVAPS(dest, src);
else else
{ {
if (modXYZW) { if (modXYZW)
{
if (xyzw == 1) { xINSERTPS(dest, src, _MM_MK_INSERTPS_NDX(0, 3, 0)); return; } if (xyzw == 1) { xINSERTPS(dest, src, _MM_MK_INSERTPS_NDX(0, 3, 0)); return; }
else if (xyzw == 2) { xINSERTPS(dest, src, _MM_MK_INSERTPS_NDX(0, 2, 0)); return; } else if (xyzw == 2) { xINSERTPS(dest, src, _MM_MK_INSERTPS_NDX(0, 2, 0)); return; }
else if (xyzw == 4) { xINSERTPS(dest, src, _MM_MK_INSERTPS_NDX(0, 1, 0)); return; } else if (xyzw == 4) { xINSERTPS(dest, src, _MM_MK_INSERTPS_NDX(0, 1, 0)); return; }
@ -127,48 +154,64 @@ void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW)
//------------------------------------------------------------------ //------------------------------------------------------------------
// Backup Volatile Regs (EAX, ECX, EDX, MM0~7, XMM0~7, are all volatile according to 32bit Win/Linux ABI) // Backup Volatile Regs (EAX, ECX, EDX, MM0~7, XMM0~7, are all volatile according to 32bit Win/Linux ABI)
__fi void mVUbackupRegs(microVU& mVU, bool toMemory = false) { __fi void mVUbackupRegs(microVU& mVU, bool toMemory = false)
if (toMemory) { {
for(int i = 0; i < 8; i++) { if (toMemory)
{
for (int i = 0; i < 8; i++)
{
xMOVAPS(ptr128[&mVU.xmmBackup[i][0]], xmm(i)); xMOVAPS(ptr128[&mVU.xmmBackup[i][0]], xmm(i));
} }
} }
else { else
{
mVU.regAlloc->flushAll(); // Flush Regalloc mVU.regAlloc->flushAll(); // Flush Regalloc
xMOVAPS(ptr128[&mVU.xmmBackup[xmmPQ.Id][0]], xmmPQ); xMOVAPS(ptr128[&mVU.xmmBackup[xmmPQ.Id][0]], xmmPQ);
} }
} }
// Restore Volatile Regs // Restore Volatile Regs
__fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false) { __fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false)
if (fromMemory) { {
for(int i = 0; i < 8; i++) { if (fromMemory)
{
for (int i = 0; i < 8; i++)
{
xMOVAPS(xmm(i), ptr128[&mVU.xmmBackup[i][0]]); xMOVAPS(xmm(i), ptr128[&mVU.xmmBackup[i][0]]);
} }
} }
else xMOVAPS(xmmPQ, ptr128[&mVU.xmmBackup[xmmPQ.Id][0]]); else
xMOVAPS(xmmPQ, ptr128[&mVU.xmmBackup[xmmPQ.Id][0]]);
} }
class mVUScopedXMMBackup { class mVUScopedXMMBackup
{
microVU& mVU; microVU& mVU;
bool fromMemory; bool fromMemory;
public: public:
mVUScopedXMMBackup(microVU& mVU, bool fromMemory): mVU(mVU), fromMemory(fromMemory) { mVUScopedXMMBackup(microVU& mVU, bool fromMemory)
: mVU(mVU) , fromMemory(fromMemory)
{
mVUbackupRegs(mVU, fromMemory); mVUbackupRegs(mVU, fromMemory);
} }
~mVUScopedXMMBackup() { ~mVUScopedXMMBackup()
{
mVUrestoreRegs(mVU, fromMemory); mVUrestoreRegs(mVU, fromMemory);
} }
}; };
_mVUt void __fc mVUprintRegs() { _mVUt void __fc mVUprintRegs()
{
microVU& mVU = mVUx; microVU& mVU = mVUx;
for(int i = 0; i < 8; i++) { for (int i = 0; i < 8; i++)
{
Console.WriteLn("xmm%d = [0x%08x,0x%08x,0x%08x,0x%08x]", i, Console.WriteLn("xmm%d = [0x%08x,0x%08x,0x%08x,0x%08x]", i,
mVU.xmmBackup[i][0], mVU.xmmBackup[i][1], mVU.xmmBackup[i][0], mVU.xmmBackup[i][1],
mVU.xmmBackup[i][2], mVU.xmmBackup[i][3]); mVU.xmmBackup[i][2], mVU.xmmBackup[i][3]);
} }
for(int i = 0; i < 8; i++) { for (int i = 0; i < 8; i++)
{
Console.WriteLn("xmm%d = [%f,%f,%f,%f]", i, Console.WriteLn("xmm%d = [%f,%f,%f,%f]", i,
(float&)mVU.xmmBackup[i][0], (float&)mVU.xmmBackup[i][1], (float&)mVU.xmmBackup[i][0], (float&)mVU.xmmBackup[i][1],
(float&)mVU.xmmBackup[i][2], (float&)mVU.xmmBackup[i][3]); (float&)mVU.xmmBackup[i][2], (float&)mVU.xmmBackup[i][3]);
@ -176,17 +219,20 @@ _mVUt void __fc mVUprintRegs() {
} }
// Gets called by mVUaddrFix at execution-time // Gets called by mVUaddrFix at execution-time
static void __fc mVUwarningRegAccess(u32 prog, u32 pc) { static void __fc mVUwarningRegAccess(u32 prog, u32 pc)
{
Console.Error("microVU0 Warning: Accessing VU1 Regs! [%04x] [%x]", pc, prog); Console.Error("microVU0 Warning: Accessing VU1 Regs! [%04x] [%x]", pc, prog);
} }
static void __fc mVUTBit() { static void __fc mVUTBit()
{
u32 old = vu1Thread.mtvuInterrupts.fetch_or(VU_Thread::InterruptFlagVUTBit, std::memory_order_release); u32 old = vu1Thread.mtvuInterrupts.fetch_or(VU_Thread::InterruptFlagVUTBit, std::memory_order_release);
if (old & VU_Thread::InterruptFlagVUTBit) if (old & VU_Thread::InterruptFlagVUTBit)
DevCon.Warning("Old TBit not registered"); DevCon.Warning("Old TBit not registered");
} }
static void __fc mVUEBit() { static void __fc mVUEBit()
{
vu1Thread.mtvuInterrupts.fetch_or(VU_Thread::InterruptFlagVUEBit, std::memory_order_release); vu1Thread.mtvuInterrupts.fetch_or(VU_Thread::InterruptFlagVUEBit, std::memory_order_release);
} }
@ -203,29 +249,35 @@ static inline u32 branchAddr(const mV)
return ((((iPC + 2) + (_Imm11_ * 2)) & mVU.progMemMask) * 4); return ((((iPC + 2) + (_Imm11_ * 2)) & mVU.progMemMask) * 4);
} }
static void __fc mVUwaitMTVU() { static void __fc mVUwaitMTVU()
if (IsDevBuild) DevCon.WriteLn("microVU0: Waiting on VU1 thread to access VU1 regs!"); {
if (IsDevBuild)
DevCon.WriteLn("microVU0: Waiting on VU1 thread to access VU1 regs!");
vu1Thread.WaitVU(); vu1Thread.WaitVU();
} }
// Transforms the Address in gprReg to valid VU0/VU1 Address // Transforms the Address in gprReg to valid VU0/VU1 Address
__fi void mVUaddrFix(mV, const xAddressReg& gprReg) __fi void mVUaddrFix(mV, const xAddressReg& gprReg)
{ {
if (isVU1) { if (isVU1)
{
xAND(xRegister32(gprReg.Id), 0x3ff); // wrap around xAND(xRegister32(gprReg.Id), 0x3ff); // wrap around
xSHL(xRegister32(gprReg.Id), 4); xSHL(xRegister32(gprReg.Id), 4);
} }
else { else
{
xTEST(xRegister32(gprReg.Id), 0x400); xTEST(xRegister32(gprReg.Id), 0x400);
xForwardJNZ8 jmpA; // if addr & 0x4000, reads VU1's VF regs and VI regs xForwardJNZ8 jmpA; // if addr & 0x4000, reads VU1's VF regs and VI regs
xAND(xRegister32(gprReg.Id), 0xff); // if !(addr & 0x4000), wrap around xAND(xRegister32(gprReg.Id), 0xff); // if !(addr & 0x4000), wrap around
xForwardJump32 jmpB; xForwardJump32 jmpB;
jmpA.SetTarget(); jmpA.SetTarget();
if (THREAD_VU1) { if (THREAD_VU1)
{
{ {
mVUScopedXMMBackup mVUSave(mVU, true); mVUScopedXMMBackup mVUSave(mVU, true);
xScopedSavedRegisters save{gprT1q, gprT2q, gprT3q}; xScopedSavedRegisters save{gprT1q, gprT2q, gprT3q};
if (IsDevBuild && !isCOP2) { // Lets see which games do this! if (IsDevBuild && !isCOP2) // Lets see which games do this!
{
xMOV(arg1regd, mVU.prog.cur->idx); // Note: Kernel does it via COP2 to initialize VU1! xMOV(arg1regd, mVU.prog.cur->idx); // Note: Kernel does it via COP2 to initialize VU1!
xMOV(arg2regd, xPC); // So we don't spam console, we'll only check micro-mode... xMOV(arg2regd, xPC); // So we don't spam console, we'll only check micro-mode...
xFastCall((void*)mVUwarningRegAccess, arg1regd, arg2regd); xFastCall((void*)mVUwarningRegAccess, arg1regd, arg2regd);
@ -244,13 +296,16 @@ __fi void mVUaddrFix(mV, const xAddressReg& gprReg)
// Micro VU - Custom SSE Instructions // Micro VU - Custom SSE Instructions
//------------------------------------------------------------------ //------------------------------------------------------------------
struct SSEMasks { u32 MIN_MAX_1[4], MIN_MAX_2[4], ADD_SS[4]; }; struct SSEMasks
{
u32 MIN_MAX_1[4], MIN_MAX_2[4], ADD_SS[4];
};
static const __aligned16 SSEMasks sseMasks = static const __aligned16 SSEMasks sseMasks =
{ {
{0xffffffff, 0x80000000, 0xffffffff, 0x80000000}, {0xffffffff, 0x80000000, 0xffffffff, 0x80000000},
{0x00000000, 0x40000000, 0x00000000, 0x40000000}, {0x00000000, 0x40000000, 0x00000000, 0x40000000},
{0x80000000, 0xffffffff, 0xffffffff, 0xffffffff} {0x80000000, 0xffffffff, 0xffffffff, 0xffffffff},
}; };
@ -260,7 +315,8 @@ void MIN_MAX_PS(microVU& mVU, const xmm& to, const xmm& from, const xmm& t1in, c
const xmm& t1 = t1in.IsEmpty() ? mVU.regAlloc->allocReg() : t1in; const xmm& t1 = t1in.IsEmpty() ? mVU.regAlloc->allocReg() : t1in;
const xmm& t2 = t2in.IsEmpty() ? mVU.regAlloc->allocReg() : t2in; const xmm& t2 = t2in.IsEmpty() ? mVU.regAlloc->allocReg() : t2in;
if (0) { // use double comparison if (0) // use double comparison
{
// ZW // ZW
xPSHUF.D(t1, to, 0xfa); xPSHUF.D(t1, to, 0xfa);
xPAND (t1, ptr128[sseMasks.MIN_MAX_1]); xPAND (t1, ptr128[sseMasks.MIN_MAX_1]);
@ -283,7 +339,8 @@ void MIN_MAX_PS(microVU& mVU, const xmm& to, const xmm& from, const xmm& t1in, c
xSHUF.PS(to, t1, 0x88); xSHUF.PS(to, t1, 0x88);
} }
else { // use integer comparison else // use integer comparison
{
const xmm& c1 = min ? t2 : t1; const xmm& c1 = min ? t2 : t1;
const xmm& c2 = min ? t1 : t2; const xmm& c2 = min ? t1 : t2;
@ -317,7 +374,8 @@ void MIN_MAX_SS(mV, const xmm& to, const xmm& from, const xmm& t1in, bool min)
xPSHUF.D(t1, to, 0xee); xPSHUF.D(t1, to, 0xee);
if (min) xMIN.PD(to, t1); if (min) xMIN.PD(to, t1);
else xMAX.PD(to, t1); else xMAX.PD(to, t1);
if (t1 != t1in) mVU.regAlloc->clearNeeded(t1); if (t1 != t1in)
mVU.regAlloc->clearNeeded(t1);
} }
// Not Used! - TriAce games only need a portion of this code to boot (see function below) // Not Used! - TriAce games only need a portion of this code to boot (see function below)
@ -375,7 +433,8 @@ void ADD_SS_Single_Guard_Bit(microVU& mVU, const xmm& to, const xmm& from, const
case_end4.SetTarget(); case_end4.SetTarget();
xADD.SS(to, from); xADD.SS(to, from);
if (t1 != t1in) mVU.regAlloc->clearNeeded(t1); if (t1 != t1in)
mVU.regAlloc->clearNeeded(t1);
} }
// Turns out only this is needed to get TriAce games booting with mVU // Turns out only this is needed to get TriAce games booting with mVU
@ -408,37 +467,48 @@ void ADD_SS_TriAceHack(microVU& mVU, const xmm& to, const xmm& from)
xADD.SS(to, from); xADD.SS(to, from);
} }
#define clampOp(opX, isPS) { \ #define clampOp(opX, isPS) \
do { \
mVUclamp3(mVU, to, t1, (isPS) ? 0xf : 0x8); \ mVUclamp3(mVU, to, t1, (isPS) ? 0xf : 0x8); \
mVUclamp3(mVU, from, t1, (isPS) ? 0xf : 0x8); \ mVUclamp3(mVU, from, t1, (isPS) ? 0xf : 0x8); \
opX(to, from); \ opX(to, from); \
mVUclamp4(to, t1, (isPS) ? 0xf : 0x8); \ mVUclamp4(to, t1, (isPS) ? 0xf : 0x8); \
} } while (0)
void SSE_MAXPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) void SSE_MAXPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
{ {
if (CHECK_VU_MINMAXHACK) { xMAX.PS(to, from); } if (CHECK_VU_MINMAXHACK)
else { MIN_MAX_PS(mVU, to, from, t1, t2, false); } xMAX.PS(to, from);
else
MIN_MAX_PS(mVU, to, from, t1, t2, false);
} }
void SSE_MINPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) void SSE_MINPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
{ {
if (CHECK_VU_MINMAXHACK) { xMIN.PS(to, from); } if (CHECK_VU_MINMAXHACK)
else { MIN_MAX_PS(mVU, to, from, t1, t2, true); } xMIN.PS(to, from);
else
MIN_MAX_PS(mVU, to, from, t1, t2, true);
} }
void SSE_MAXSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) void SSE_MAXSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
{ {
if (CHECK_VU_MINMAXHACK) { xMAX.SS(to, from); } if (CHECK_VU_MINMAXHACK)
else { MIN_MAX_SS(mVU, to, from, t1, false); } xMAX.SS(to, from);
else
MIN_MAX_SS(mVU, to, from, t1, false);
} }
void SSE_MINSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) void SSE_MINSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
{ {
if (CHECK_VU_MINMAXHACK) { xMIN.SS(to, from); } if (CHECK_VU_MINMAXHACK)
else { MIN_MAX_SS(mVU, to, from, t1, true); } xMIN.SS(to, from);
else
MIN_MAX_SS(mVU, to, from, t1, true);
} }
void SSE_ADD2SS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) void SSE_ADD2SS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
{ {
if (!CHECK_VUADDSUBHACK) { clampOp(xADD.SS, false); } if (!CHECK_VUADDSUBHACK)
else { ADD_SS_TriAceHack(mVU, to, from); } clampOp(xADD.SS, false);
else
ADD_SS_TriAceHack(mVU, to, from);
} }
// Does same as SSE_ADDPS since tri-ace games only need SS implementation of VUADDSUBHACK... // Does same as SSE_ADDPS since tri-ace games only need SS implementation of VUADDSUBHACK...
@ -487,7 +557,8 @@ __pagealigned u8 mVUsearchXMM[__pagesize];
// Generates a custom optimized block-search function // Generates a custom optimized block-search function
// Note: Structs must be 16-byte aligned! (GCC doesn't guarantee this) // Note: Structs must be 16-byte aligned! (GCC doesn't guarantee this)
void mVUcustomSearch() { void mVUcustomSearch()
{
HostSys::MemProtectStatic(mVUsearchXMM, PageAccess_ReadWrite()); HostSys::MemProtectStatic(mVUsearchXMM, PageAccess_ReadWrite());
memset(mVUsearchXMM, 0xcc, __pagesize); memset(mVUsearchXMM, 0xcc, __pagesize);
xSetPtr(mVUsearchXMM); xSetPtr(mVUsearchXMM);

View File

@ -15,7 +15,8 @@
#pragma once #pragma once
enum microOpcode { enum microOpcode
{
// Upper Instructions // Upper Instructions
opABS, opCLIP, opOPMULA, opOPMSUB, opNOP, opABS, opCLIP, opOPMULA, opOPMSUB, opNOP,
opADD, opADDi, opADDq, opADDx, opADDy, opADDz, opADDw, opADD, opADDi, opADDq, opADDx, opADDy, opADDz, opADDw,
@ -99,22 +100,31 @@ static const char microOpcodeName[][16] = {
#include <string> #include <string>
#include <algorithm> #include <algorithm>
struct microProfiler { struct microProfiler
{
static const u32 progLimit = 10000; static const u32 progLimit = 10000;
u64 opStats[opLastOpcode]; u64 opStats[opLastOpcode];
u32 progCount; u32 progCount;
int index; int index;
void Reset(int _index) { memzero(*this); index = _index; } void Reset(int _index)
void EmitOp(microOpcode op) { {
memzero(*this);
index = _index;
}
void EmitOp(microOpcode op)
{
xADD(ptr32[&(((u32*)opStats)[op * 2 + 0])], 1); xADD(ptr32[&(((u32*)opStats)[op * 2 + 0])], 1);
xADC(ptr32[&(((u32*)opStats)[op * 2 + 1])], 0); xADC(ptr32[&(((u32*)opStats)[op * 2 + 1])], 0);
} }
void Print() { void Print()
{
progCount++; progCount++;
if ((progCount % progLimit) == 0) { if ((progCount % progLimit) == 0)
{
u64 total = 0; u64 total = 0;
std::vector<std::pair<u32, u32>> v; std::vector<std::pair<u32, u32>> v;
for(int i = 0; i < opLastOpcode; i++) { for (int i = 0; i < opLastOpcode; i++)
{
total += opStats[i]; total += opStats[i];
v.push_back(std::make_pair(opStats[i], i)); v.push_back(std::make_pair(opStats[i], i));
} }
@ -122,7 +132,8 @@ struct microProfiler {
std::reverse(v.begin(), v.end()); std::reverse(v.begin(), v.end());
double dTotal = (double)total; double dTotal = (double)total;
DevCon.WriteLn("microVU%d Profiler:", index); DevCon.WriteLn("microVU%d Profiler:", index);
for(u32 i = 0; i < v.size(); i++) { for (u32 i = 0; i < v.size(); i++)
{
u64 count = v[i].first; u64 count = v[i].first;
double stat = (double)count / dTotal * 100.0; double stat = (double)count / dTotal * 100.0;
std::string str = microOpcodeName[v[i].second]; std::string str = microOpcodeName[v[i].second];
@ -135,7 +146,8 @@ struct microProfiler {
} }
}; };
#else #else
struct microProfiler { struct microProfiler
{
__fi void Reset(int _index) {} __fi void Reset(int _index) {}
__fi void EmitOp(microOpcode op) {} __fi void EmitOp(microOpcode op) {}
__fi void Print() {} __fi void Print() {}

View File

@ -210,9 +210,17 @@ mVUop(mVULowerOP_T3_10) { mVULowerOP_T3_10_OPCODE [((mVU.code >> 6) & 0x1f)](mX)
mVUop(mVULowerOP_T3_11) { mVULowerOP_T3_11_OPCODE [((mVU.code >> 6) & 0x1f)](mX); } mVUop(mVULowerOP_T3_11) { mVULowerOP_T3_11_OPCODE [((mVU.code >> 6) & 0x1f)](mX); }
mVUop(mVUopU) { mVU_UPPER_OPCODE [ (mVU.code & 0x3f) ](mX); } // Gets Upper Opcode mVUop(mVUopU) { mVU_UPPER_OPCODE [ (mVU.code & 0x3f) ](mX); } // Gets Upper Opcode
mVUop(mVUopL) { mVULOWER_OPCODE [ (mVU.code >> 25) ](mX); } // Gets Lower Opcode mVUop(mVUopL) { mVULOWER_OPCODE [ (mVU.code >> 25) ](mX); } // Gets Lower Opcode
mVUop(mVUunknown) { mVUop(mVUunknown)
pass1 { if (mVU.code != 0x8000033c) mVUinfo.isBadOp = true; } {
pass2 { if(mVU.code != 0x8000033c) Console.Error("microVU%d: Unknown Micro VU opcode called (%x) [%04x]\n", getIndex, mVU.code, xPC); } pass1
{
if (mVU.code != 0x8000033c)
mVUinfo.isBadOp = true;
}
pass2
{
if (mVU.code != 0x8000033c)
Console.Error("microVU%d: Unknown Micro VU opcode called (%x) [%04x]\n", getIndex, mVU.code, xPC);
}
pass3 { mVUlog("Unknown", mVU.code); } pass3 { mVUlog("Unknown", mVU.code); }
} }

View File

@ -21,7 +21,13 @@
#define AND_XYZW ((_XYZW_SS && modXYZW) ? (1) : (mFLAG.doFlag ? (_X_Y_Z_W) : (flipMask[_X_Y_Z_W]))) #define AND_XYZW ((_XYZW_SS && modXYZW) ? (1) : (mFLAG.doFlag ? (_X_Y_Z_W) : (flipMask[_X_Y_Z_W])))
#define ADD_XYZW ((_XYZW_SS && modXYZW) ? (_X ? 3 : (_Y ? 2 : (_Z ? 1 : 0))) : 0) #define ADD_XYZW ((_XYZW_SS && modXYZW) ? (_X ? 3 : (_Y ? 2 : (_Z ? 1 : 0))) : 0)
#define SHIFT_XYZW(gprReg) { if (_XYZW_SS && modXYZW && !_W) { xSHL(gprReg, ADD_XYZW); } } #define SHIFT_XYZW(gprReg) \
do { \
if (_XYZW_SS && modXYZW && !_W) \
{ \
xSHL(gprReg, ADD_XYZW); \
} \
} while (0)
const __aligned16 u32 sse4_compvals[2][4] = { const __aligned16 u32 sse4_compvals[2][4] = {
@ -30,31 +36,38 @@ const __aligned16 u32 sse4_compvals[2][4] = {
}; };
// Note: If modXYZW is true, then it adjusts XYZW for Single Scalar operations // Note: If modXYZW is true, then it adjusts XYZW for Single Scalar operations
static void mVUupdateFlags(mV, const xmm& reg, const xmm& regT1in = xEmptyReg, const xmm& regT2in = xEmptyReg, bool modXYZW = 1) { static void mVUupdateFlags(mV, const xmm& reg, const xmm& regT1in = xEmptyReg, const xmm& regT2in = xEmptyReg, bool modXYZW = 1)
{
const x32& mReg = gprT1; const x32& mReg = gprT1;
const x32& sReg = getFlagReg(sFLAG.write); const x32& sReg = getFlagReg(sFLAG.write);
bool regT1b = regT1in.IsEmpty(), regT2b = false; bool regT1b = regT1in.IsEmpty(), regT2b = false;
static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15}; static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
//SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag); //SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag);
if (!sFLAG.doFlag && !mFLAG.doFlag) { return; } if (!sFLAG.doFlag && !mFLAG.doFlag)
return;
const xmm& regT1 = regT1b ? mVU.regAlloc->allocReg() : regT1in; const xmm& regT1 = regT1b ? mVU.regAlloc->allocReg() : regT1in;
xmm regT2 = reg; xmm regT2 = reg;
if ((mFLAG.doFlag && !(_XYZW_SS && modXYZW))) { if ((mFLAG.doFlag && !(_XYZW_SS && modXYZW)))
{
regT2 = regT2in; regT2 = regT2in;
if (regT2.IsEmpty()) { if (regT2.IsEmpty())
{
regT2 = mVU.regAlloc->allocReg(); regT2 = mVU.regAlloc->allocReg();
regT2b = true; regT2b = true;
} }
xPSHUF.D(regT2, reg, 0x1B); // Flip wzyx to xyzw xPSHUF.D(regT2, reg, 0x1B); // Flip wzyx to xyzw
} }
else regT2 = reg; else
regT2 = reg;
if (sFLAG.doFlag) { if (sFLAG.doFlag)
{
mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag
if (sFLAG.doNonSticky) xAND(sReg, 0xfffc00ff); // Clear O,U,S,Z flags if (sFLAG.doNonSticky)
xAND(sReg, 0xfffc00ff); // Clear O,U,S,Z flags
} }
//-------------------------Check for Signed flags------------------------------ //-------------------------Check for Signed flags------------------------------
@ -70,11 +83,13 @@ static void mVUupdateFlags(mV, const xmm& reg, const xmm& regT1in = xEmptyReg, c
//-------------------------Check for Zero flags------------------------------ //-------------------------Check for Zero flags------------------------------
xAND(gprT2, AND_XYZW); // Grab "Is Zero" bits from the previous calculation xAND(gprT2, AND_XYZW); // Grab "Is Zero" bits from the previous calculation
if (mFLAG.doFlag) { SHIFT_XYZW(gprT2); } if (mFLAG.doFlag)
SHIFT_XYZW(gprT2);
xOR(mReg, gprT2); xOR(mReg, gprT2);
//-------------------------Overflow Flags----------------------------------- //-------------------------Overflow Flags-----------------------------------
if (sFLAG.doFlag) { if (sFLAG.doFlag)
{
//Calculate overflow //Calculate overflow
xMOVAPS(regT1, regT2); xMOVAPS(regT1, regT2);
xAND.PS(regT1, ptr128[&sse4_compvals[1][0]]); // Remove sign flags (we don't care) xAND.PS(regT1, ptr128[&sse4_compvals[1][0]]); // Remove sign flags (we don't care)
@ -90,17 +105,22 @@ static void mVUupdateFlags(mV, const xmm& reg, const xmm& regT1in = xEmptyReg, c
} }
//-------------------------Write back flags------------------------------ //-------------------------Write back flags------------------------------
if (mFLAG.doFlag) mVUallocMFLAGb(mVU, mReg, mFLAG.write); // Set Mac Flag if (mFLAG.doFlag)
if (sFLAG.doFlag) { mVUallocMFLAGb(mVU, mReg, mFLAG.write); // Set Mac Flag
if (sFLAG.doFlag)
{
xAND(mReg, 0xFF); // Ignore overflow bits, they're handled separately xAND(mReg, 0xFF); // Ignore overflow bits, they're handled separately
xOR(sReg, mReg); xOR(sReg, mReg);
if (sFLAG.doNonSticky) { if (sFLAG.doNonSticky)
{
xSHL(mReg, 8); xSHL(mReg, 8);
xOR(sReg, mReg); xOR(sReg, mReg);
} }
} }
if (regT1b) mVU.regAlloc->clearNeeded(regT1); if (regT1b)
if (regT2b) mVU.regAlloc->clearNeeded(regT2); mVU.regAlloc->clearNeeded(regT1);
if (regT2b)
mVU.regAlloc->clearNeeded(regT2);
} }
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -125,14 +145,16 @@ static void (*const SSE_SS[]) (microVU&, const xmm&, const xmm&, const xmm&, con
SSE_ADD2SS // 5 SSE_ADD2SS // 5
}; };
enum clampModes { enum clampModes
{
cFt = 0x01, // Clamp Ft / I-reg / Q-reg cFt = 0x01, // Clamp Ft / I-reg / Q-reg
cFs = 0x02, // Clamp Fs cFs = 0x02, // Clamp Fs
cACC = 0x04, // Clamp ACC cACC = 0x04, // Clamp ACC
}; };
// Prints Opcode to MicroProgram Logs // Prints Opcode to MicroProgram Logs
static void mVU_printOP(microVU& mVU, int opCase, microOpcode opEnum, bool isACC) { static void mVU_printOP(microVU& mVU, int opCase, microOpcode opEnum, bool isACC)
{
mVUlog(microOpcodeName[opEnum]); mVUlog(microOpcodeName[opEnum]);
opCase1 { if (isACC) { mVUlogACC(); } else { mVUlogFd(); } mVUlogFt(); } opCase1 { if (isACC) { mVUlogACC(); } else { mVUlogFd(); } mVUlogFt(); }
opCase2 { if (isACC) { mVUlogACC(); } else { mVUlogFd(); } mVUlogBC(); } opCase2 { if (isACC) { mVUlogACC(); } else { mVUlogFd(); } mVUlogBC(); }
@ -141,21 +163,24 @@ static void mVU_printOP(microVU& mVU, int opCase, microOpcode opEnum, bool isACC
} }
// Sets Up Pass1 Info for Normal, BC, I, and Q Cases // Sets Up Pass1 Info for Normal, BC, I, and Q Cases
static void setupPass1(microVU& mVU, int opCase, bool isACC, bool noFlagUpdate) { static void setupPass1(microVU& mVU, int opCase, bool isACC, bool noFlagUpdate)
{
opCase1 { mVUanalyzeFMAC1(mVU, ((isACC) ? 0 : _Fd_), _Fs_, _Ft_); } opCase1 { mVUanalyzeFMAC1(mVU, ((isACC) ? 0 : _Fd_), _Fs_, _Ft_); }
opCase2 { mVUanalyzeFMAC3(mVU, ((isACC) ? 0 : _Fd_), _Fs_, _Ft_); } opCase2 { mVUanalyzeFMAC3(mVU, ((isACC) ? 0 : _Fd_), _Fs_, _Ft_); }
opCase3 { mVUanalyzeFMAC1(mVU, ((isACC) ? 0 : _Fd_), _Fs_, 0); } opCase3 { mVUanalyzeFMAC1(mVU, ((isACC) ? 0 : _Fd_), _Fs_, 0); }
opCase4 { mVUanalyzeFMAC1(mVU, ((isACC) ? 0 : _Fd_), _Fs_, 0); } opCase4 { mVUanalyzeFMAC1(mVU, ((isACC) ? 0 : _Fd_), _Fs_, 0); }
if (noFlagUpdate) { //Max/Min Ops if (noFlagUpdate) //Max/Min Ops
sFLAG.doFlag = false; sFLAG.doFlag = false;
} }
}
// Safer to force 0 as the result for X minus X than to do actual subtraction // Safer to force 0 as the result for X minus X than to do actual subtraction
static bool doSafeSub(microVU& mVU, int opCase, int opType, bool isACC) { static bool doSafeSub(microVU& mVU, int opCase, int opType, bool isACC)
opCase1 { {
if ((opType == 1) && (_Ft_ == _Fs_)) { opCase1
{
if ((opType == 1) && (_Ft_ == _Fs_))
{
const xmm& Fs = mVU.regAlloc->allocReg(-1, isACC ? 32 : _Fd_, _X_Y_Z_W); const xmm& Fs = mVU.regAlloc->allocReg(-1, isACC ? 32 : _Fd_, _X_Y_Z_W);
xPXOR(Fs, Fs); // Set to Positive 0 xPXOR(Fs, Fs); // Set to Positive 0
mVUupdateFlags(mVU, Fs); mVUupdateFlags(mVU, Fs);
@ -167,41 +192,66 @@ static bool doSafeSub(microVU& mVU, int opCase, int opType, bool isACC) {
} }
// Sets Up Ft Reg for Normal, BC, I, and Q Cases // Sets Up Ft Reg for Normal, BC, I, and Q Cases
static void setupFtReg(microVU& mVU, xmm& Ft, xmm& tempFt, int opCase) { static void setupFtReg(microVU& mVU, xmm& Ft, xmm& tempFt, int opCase)
opCase1 { {
opCase1
{
if (_XYZW_SS2) { Ft = mVU.regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); tempFt = Ft; } if (_XYZW_SS2) { Ft = mVU.regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); tempFt = Ft; }
else if (clampE) { Ft = mVU.regAlloc->allocReg(_Ft_, 0, 0xf); tempFt = Ft; } else if (clampE) { Ft = mVU.regAlloc->allocReg(_Ft_, 0, 0xf); tempFt = Ft; }
else { Ft = mVU.regAlloc->allocReg(_Ft_); tempFt = xEmptyReg; } else { Ft = mVU.regAlloc->allocReg(_Ft_); tempFt = xEmptyReg; }
} }
opCase2 { opCase2
{
tempFt = mVU.regAlloc->allocReg(_Ft_); tempFt = mVU.regAlloc->allocReg(_Ft_);
Ft = mVU.regAlloc->allocReg(); Ft = mVU.regAlloc->allocReg();
mVUunpack_xyzw(Ft, tempFt, _bc_); mVUunpack_xyzw(Ft, tempFt, _bc_);
mVU.regAlloc->clearNeeded(tempFt); mVU.regAlloc->clearNeeded(tempFt);
tempFt = Ft; tempFt = Ft;
} }
opCase3 { Ft = mVU.regAlloc->allocReg(33, 0, _X_Y_Z_W); tempFt = Ft; } opCase3
opCase4 { {
if (!clampE && _XYZW_SS && !mVUinfo.readQ) { Ft = xmmPQ; tempFt = xEmptyReg; } Ft = mVU.regAlloc->allocReg(33, 0, _X_Y_Z_W);
else { Ft = mVU.regAlloc->allocReg(); tempFt = Ft; getQreg(Ft, mVUinfo.readQ); } tempFt = Ft;
}
opCase4
{
if (!clampE && _XYZW_SS && !mVUinfo.readQ)
{
Ft = xmmPQ;
tempFt = xEmptyReg;
}
else
{
Ft = mVU.regAlloc->allocReg();
tempFt = Ft;
getQreg(Ft, mVUinfo.readQ);
}
} }
} }
// Normal FMAC Opcodes // Normal FMAC Opcodes
static void mVU_FMACa(microVU& mVU, int recPass, int opCase, int opType, bool isACC, microOpcode opEnum, int clampType) { static void mVU_FMACa(microVU& mVU, int recPass, int opCase, int opType, bool isACC, microOpcode opEnum, int clampType)
{
pass1 { setupPass1(mVU, opCase, isACC, ((opType == 3) || (opType == 4))); } pass1 { setupPass1(mVU, opCase, isACC, ((opType == 3) || (opType == 4))); }
pass2 { pass2
if (doSafeSub(mVU, opCase, opType, isACC)) return; {
if (doSafeSub(mVU, opCase, opType, isACC))
return;
xmm Fs, Ft, ACC, tempFt; xmm Fs, Ft, ACC, tempFt;
setupFtReg(mVU, Ft, tempFt, opCase); setupFtReg(mVU, Ft, tempFt, opCase);
if (isACC) { if (isACC)
{
Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
ACC = mVU.regAlloc->allocReg((_X_Y_Z_W == 0xf) ? -1 : 32, 32, 0xf, 0); ACC = mVU.regAlloc->allocReg((_X_Y_Z_W == 0xf) ? -1 : 32, 32, 0xf, 0);
if (_XYZW_SS2) xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W)); if (_XYZW_SS2)
xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W));
}
else
{
Fs = mVU.regAlloc->allocReg(_Fs_, _Fd_, _X_Y_Z_W);
} }
else { Fs = mVU.regAlloc->allocReg(_Fs_, _Fd_, _X_Y_Z_W); }
if (clampType & cFt) mVUclamp2(mVU, Ft, xEmptyReg, _X_Y_Z_W); if (clampType & cFt) mVUclamp2(mVU, Ft, xEmptyReg, _X_Y_Z_W);
if (clampType & cFs) mVUclamp2(mVU, Fs, xEmptyReg, _X_Y_Z_W); if (clampType & cFs) mVUclamp2(mVU, Fs, xEmptyReg, _X_Y_Z_W);
@ -209,34 +259,46 @@ static void mVU_FMACa(microVU& mVU, int recPass, int opCase, int opType, bool is
if (_XYZW_SS) SSE_SS[opType](mVU, Fs, Ft, xEmptyReg, xEmptyReg); if (_XYZW_SS) SSE_SS[opType](mVU, Fs, Ft, xEmptyReg, xEmptyReg);
else SSE_PS[opType](mVU, Fs, Ft, xEmptyReg, xEmptyReg); else SSE_PS[opType](mVU, Fs, Ft, xEmptyReg, xEmptyReg);
if (isACC) { if (isACC)
if (_XYZW_SS) xMOVSS(ACC, Fs); {
else mVUmergeRegs(ACC, Fs, _X_Y_Z_W); if (_XYZW_SS)
xMOVSS(ACC, Fs);
else
mVUmergeRegs(ACC, Fs, _X_Y_Z_W);
mVUupdateFlags(mVU, ACC, Fs, tempFt); mVUupdateFlags(mVU, ACC, Fs, tempFt);
if (_XYZW_SS2) xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W)); if (_XYZW_SS2)
xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W));
mVU.regAlloc->clearNeeded(ACC); mVU.regAlloc->clearNeeded(ACC);
} }
else mVUupdateFlags(mVU, Fs, tempFt); else
mVUupdateFlags(mVU, Fs, tempFt);
mVU.regAlloc->clearNeeded(Fs); // Always Clear Written Reg First mVU.regAlloc->clearNeeded(Fs); // Always Clear Written Reg First
mVU.regAlloc->clearNeeded(Ft); mVU.regAlloc->clearNeeded(Ft);
mVU.profiler.EmitOp(opEnum); mVU.profiler.EmitOp(opEnum);
} }
pass3 { mVU_printOP(mVU, opCase, opEnum, isACC); } pass3 { mVU_printOP(mVU, opCase, opEnum, isACC); }
pass4 { if ((opType != 3) && (opType != 4)) mVUregs.needExactMatch |= 8; } pass4
{
if ((opType != 3) && (opType != 4))
mVUregs.needExactMatch |= 8;
}
} }
// MADDA/MSUBA Opcodes // MADDA/MSUBA Opcodes
static void mVU_FMACb(microVU& mVU, int recPass, int opCase, int opType, microOpcode opEnum, int clampType) { static void mVU_FMACb(microVU& mVU, int recPass, int opCase, int opType, microOpcode opEnum, int clampType)
{
pass1 { setupPass1(mVU, opCase, true, false); } pass1 { setupPass1(mVU, opCase, true, false); }
pass2 { pass2
{
xmm Fs, Ft, ACC, tempFt; xmm Fs, Ft, ACC, tempFt;
setupFtReg(mVU, Ft, tempFt, opCase); setupFtReg(mVU, Ft, tempFt, opCase);
Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
ACC = mVU.regAlloc->allocReg(32, 32, 0xf, false); ACC = mVU.regAlloc->allocReg(32, 32, 0xf, false);
if (_XYZW_SS2) { xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W)); } if (_XYZW_SS2)
xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W));
if (clampType & cFt) mVUclamp2(mVU, Ft, xEmptyReg, _X_Y_Z_W); if (clampType & cFt) mVUclamp2(mVU, Ft, xEmptyReg, _X_Y_Z_W);
if (clampType & cFs) mVUclamp2(mVU, Fs, xEmptyReg, _X_Y_Z_W); if (clampType & cFs) mVUclamp2(mVU, Fs, xEmptyReg, _X_Y_Z_W);
@ -244,13 +306,16 @@ static void mVU_FMACb(microVU& mVU, int recPass, int opCase, int opType, microOp
if (_XYZW_SS) SSE_SS[2](mVU, Fs, Ft, xEmptyReg, xEmptyReg); if (_XYZW_SS) SSE_SS[2](mVU, Fs, Ft, xEmptyReg, xEmptyReg);
else SSE_PS[2](mVU, Fs, Ft, xEmptyReg, xEmptyReg); else SSE_PS[2](mVU, Fs, Ft, xEmptyReg, xEmptyReg);
if (_XYZW_SS || _X_Y_Z_W == 0xf) { if (_XYZW_SS || _X_Y_Z_W == 0xf)
{
if (_XYZW_SS) SSE_SS[opType](mVU, ACC, Fs, tempFt, xEmptyReg); if (_XYZW_SS) SSE_SS[opType](mVU, ACC, Fs, tempFt, xEmptyReg);
else SSE_PS[opType](mVU, ACC, Fs, tempFt, xEmptyReg); else SSE_PS[opType](mVU, ACC, Fs, tempFt, xEmptyReg);
mVUupdateFlags(mVU, ACC, Fs, tempFt); mVUupdateFlags(mVU, ACC, Fs, tempFt);
if (_XYZW_SS && _X_Y_Z_W != 8) xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W)); if (_XYZW_SS && _X_Y_Z_W != 8)
xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W));
} }
else { else
{
const xmm& tempACC = mVU.regAlloc->allocReg(); const xmm& tempACC = mVU.regAlloc->allocReg();
xMOVAPS(tempACC, ACC); xMOVAPS(tempACC, ACC);
SSE_PS[opType](mVU, tempACC, Fs, tempFt, xEmptyReg); SSE_PS[opType](mVU, tempACC, Fs, tempFt, xEmptyReg);
@ -269,25 +334,30 @@ static void mVU_FMACb(microVU& mVU, int recPass, int opCase, int opType, microOp
} }
// MADD Opcodes // MADD Opcodes
static void mVU_FMACc(microVU& mVU, int recPass, int opCase, microOpcode opEnum, int clampType) { static void mVU_FMACc(microVU& mVU, int recPass, int opCase, microOpcode opEnum, int clampType)
{
pass1 { setupPass1(mVU, opCase, false, false); } pass1 { setupPass1(mVU, opCase, false, false); }
pass2 { pass2
{
xmm Fs, Ft, ACC, tempFt; xmm Fs, Ft, ACC, tempFt;
setupFtReg(mVU, Ft, tempFt, opCase); setupFtReg(mVU, Ft, tempFt, opCase);
ACC = mVU.regAlloc->allocReg(32); ACC = mVU.regAlloc->allocReg(32);
Fs = mVU.regAlloc->allocReg(_Fs_, _Fd_, _X_Y_Z_W); Fs = mVU.regAlloc->allocReg(_Fs_, _Fd_, _X_Y_Z_W);
if (_XYZW_SS2) { xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W)); } if (_XYZW_SS2)
xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W));
if (clampType & cFt) mVUclamp2(mVU, Ft, xEmptyReg, _X_Y_Z_W); if (clampType & cFt) mVUclamp2(mVU, Ft, xEmptyReg, _X_Y_Z_W);
if (clampType & cFs) mVUclamp2(mVU, Fs, xEmptyReg, _X_Y_Z_W); if (clampType & cFs) mVUclamp2(mVU, Fs, xEmptyReg, _X_Y_Z_W);
if (clampType & cACC) mVUclamp2(mVU, ACC, xEmptyReg, _X_Y_Z_W); if (clampType & cACC) mVUclamp2(mVU, ACC, xEmptyReg, _X_Y_Z_W);
if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, xEmptyReg, xEmptyReg); SSE_SS[0](mVU, Fs, ACC, tempFt, xEmptyReg); } if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, xEmptyReg, xEmptyReg); SSE_SS[0](mVU, Fs, ACC, tempFt, xEmptyReg); }
else { SSE_PS[2](mVU, Fs, Ft, xEmptyReg, xEmptyReg); SSE_PS[0](mVU, Fs, ACC, tempFt, xEmptyReg); } else { SSE_PS[2](mVU, Fs, Ft, xEmptyReg, xEmptyReg); SSE_PS[0](mVU, Fs, ACC, tempFt, xEmptyReg); }
if (_XYZW_SS2) { xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W)); } if (_XYZW_SS2)
xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W));
mVUupdateFlags(mVU, Fs, tempFt); mVUupdateFlags(mVU, Fs, tempFt);
@ -301,9 +371,11 @@ static void mVU_FMACc(microVU& mVU, int recPass, int opCase, microOpcode opEnum,
} }
// MSUB Opcodes // MSUB Opcodes
static void mVU_FMACd(microVU& mVU, int recPass, int opCase, microOpcode opEnum, int clampType) { static void mVU_FMACd(microVU& mVU, int recPass, int opCase, microOpcode opEnum, int clampType)
{
pass1 { setupPass1(mVU, opCase, false, false); } pass1 { setupPass1(mVU, opCase, false, false); }
pass2 { pass2
{
xmm Fs, Ft, Fd, tempFt; xmm Fs, Ft, Fd, tempFt;
setupFtReg(mVU, Ft, tempFt, opCase); setupFtReg(mVU, Ft, tempFt, opCase);
@ -329,22 +401,31 @@ static void mVU_FMACd(microVU& mVU, int recPass, int opCase, microOpcode opEnum,
} }
// ABS Opcode // ABS Opcode
mVUop(mVU_ABS) { mVUop(mVU_ABS)
{
pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); } pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); }
pass2 { pass2
if (!_Ft_) return; {
if (!_Ft_)
return;
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf)));
xAND.PS(Fs, ptr128[mVUglob.absclip]); xAND.PS(Fs, ptr128[mVUglob.absclip]);
mVU.regAlloc->clearNeeded(Fs); mVU.regAlloc->clearNeeded(Fs);
mVU.profiler.EmitOp(opABS); mVU.profiler.EmitOp(opABS);
} }
pass3 { mVUlog("ABS"); mVUlogFtFs(); } pass3
{
mVUlog("ABS");
mVUlogFtFs();
}
} }
// OPMULA Opcode // OPMULA Opcode
mVUop(mVU_OPMULA) { mVUop(mVU_OPMULA)
{
pass1 { mVUanalyzeFMAC1(mVU, 0, _Fs_, _Ft_); } pass1 { mVUanalyzeFMAC1(mVU, 0, _Fs_, _Ft_); }
pass2 { pass2
{
const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W);
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 32, _X_Y_Z_W); const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 32, _X_Y_Z_W);
@ -356,14 +437,21 @@ mVUop(mVU_OPMULA) {
mVU.regAlloc->clearNeeded(Fs); mVU.regAlloc->clearNeeded(Fs);
mVU.profiler.EmitOp(opOPMULA); mVU.profiler.EmitOp(opOPMULA);
} }
pass3 { mVUlog("OPMULA"); mVUlogACC(); mVUlogFt(); } pass3
{
mVUlog("OPMULA");
mVUlogACC();
mVUlogFt();
}
pass4 { mVUregs.needExactMatch |= 8; } pass4 { mVUregs.needExactMatch |= 8; }
} }
// OPMSUB Opcode // OPMSUB Opcode
mVUop(mVU_OPMSUB) { mVUop(mVU_OPMSUB)
{
pass1 { mVUanalyzeFMAC1(mVU, _Fd_, _Fs_, _Ft_); } pass1 { mVUanalyzeFMAC1(mVU, _Fd_, _Fs_, _Ft_); }
pass2 { pass2
{
const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, 0xf); const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, 0xf);
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, 0xf); const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, 0xf);
const xmm& ACC = mVU.regAlloc->allocReg(32, _Fd_, _X_Y_Z_W); const xmm& ACC = mVU.regAlloc->allocReg(32, _Fd_, _X_Y_Z_W);
@ -378,22 +466,31 @@ mVUop(mVU_OPMSUB) {
mVU.regAlloc->clearNeeded(ACC); mVU.regAlloc->clearNeeded(ACC);
mVU.profiler.EmitOp(opOPMSUB); mVU.profiler.EmitOp(opOPMSUB);
} }
pass3 { mVUlog("OPMSUB"); mVUlogFd(); mVUlogFt(); } pass3
{
mVUlog("OPMSUB");
mVUlogFd();
mVUlogFt();
}
pass4 { mVUregs.needExactMatch |= 8; } pass4 { mVUregs.needExactMatch |= 8; }
} }
// FTOI0/FTIO4/FTIO12/FTIO15 Opcodes // FTOI0/FTIO4/FTIO12/FTIO15 Opcodes
static void mVU_FTOIx(mP, const float* addr, microOpcode opEnum) { static void mVU_FTOIx(mP, const float* addr, microOpcode opEnum)
{
pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); } pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); }
pass2 { pass2
if (!_Ft_) return; {
if (!_Ft_)
return;
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf)));
const xmm& t1 = mVU.regAlloc->allocReg(); const xmm& t1 = mVU.regAlloc->allocReg();
const xmm& t2 = mVU.regAlloc->allocReg(); const xmm& t2 = mVU.regAlloc->allocReg();
// Note: For help understanding this algorithm see recVUMI_FTOI_Saturate() // Note: For help understanding this algorithm see recVUMI_FTOI_Saturate()
xMOVAPS(t1, Fs); xMOVAPS(t1, Fs);
if (addr) { xMUL.PS(Fs, ptr128[addr]); } if (addr)
xMUL.PS(Fs, ptr128[addr]);
xCVTTPS2DQ(Fs, Fs); xCVTTPS2DQ(Fs, Fs);
xPXOR(t1, ptr128[mVUglob.signbit]); xPXOR(t1, ptr128[mVUglob.signbit]);
xPSRA.D(t1, 31); xPSRA.D(t1, 31);
@ -407,30 +504,44 @@ static void mVU_FTOIx(mP, const float* addr, microOpcode opEnum) {
mVU.regAlloc->clearNeeded(t2); mVU.regAlloc->clearNeeded(t2);
mVU.profiler.EmitOp(opEnum); mVU.profiler.EmitOp(opEnum);
} }
pass3 { mVUlog(microOpcodeName[opEnum]); mVUlogFtFs(); } pass3
{
mVUlog(microOpcodeName[opEnum]);
mVUlogFtFs();
}
} }
// ITOF0/ITOF4/ITOF12/ITOF15 Opcodes // ITOF0/ITOF4/ITOF12/ITOF15 Opcodes
static void mVU_ITOFx(mP, const float* addr, microOpcode opEnum) { static void mVU_ITOFx(mP, const float* addr, microOpcode opEnum)
{
pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); } pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); }
pass2 { pass2
if (!_Ft_) return; {
if (!_Ft_)
return;
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf)));
xCVTDQ2PS(Fs, Fs); xCVTDQ2PS(Fs, Fs);
if (addr) { xMUL.PS(Fs, ptr128[addr]); } if (addr)
xMUL.PS(Fs, ptr128[addr]);
//mVUclamp2(Fs, xmmT1, 15); // Clamp (not sure if this is needed) //mVUclamp2(Fs, xmmT1, 15); // Clamp (not sure if this is needed)
mVU.regAlloc->clearNeeded(Fs); mVU.regAlloc->clearNeeded(Fs);
mVU.profiler.EmitOp(opEnum); mVU.profiler.EmitOp(opEnum);
} }
pass3 { mVUlog(microOpcodeName[opEnum]); mVUlogFtFs(); } pass3
{
mVUlog(microOpcodeName[opEnum]);
mVUlogFtFs();
}
} }
// Clip Opcode // Clip Opcode
mVUop(mVU_CLIP) { mVUop(mVU_CLIP)
{
pass1 { mVUanalyzeFMAC4(mVU, _Fs_, _Ft_); } pass1 { mVUanalyzeFMAC4(mVU, _Fs_, _Ft_); }
pass2 { pass2
{
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, 0xf); const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, 0xf);
const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, 0x1); const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, 0x1);
const xmm& t1 = mVU.regAlloc->allocReg(); const xmm& t1 = mVU.regAlloc->allocReg();
@ -466,7 +577,11 @@ mVUop(mVU_CLIP) {
mVU.regAlloc->clearNeeded(t1); mVU.regAlloc->clearNeeded(t1);
mVU.profiler.EmitOp(opCLIP); mVU.profiler.EmitOp(opCLIP);
} }
pass3 { mVUlog("CLIP"); mVUlogCLIP(); } pass3
{
mVUlog("CLIP");
mVUlogCLIP();
}
} }
//------------------------------------------------------------------ //------------------------------------------------------------------

View File

@ -52,7 +52,8 @@ _vifT extern void dVifUnpack (const u8* data, bool isFill);
#define xmmRow xmm6 #define xmmRow xmm6
#define xmmTemp xmm7 #define xmmTemp xmm7
struct nVifStruct { struct nVifStruct
{
// Buffer for partial transfers (should always be first to ensure alignment) // Buffer for partial transfers (should always be first to ensure alignment)
// Maximum buffer size is 256 (vifRegs.Num max range) * 16 (quadword) // Maximum buffer size is 256 (vifRegs.Num max range) * 16 (quadword)
__aligned16 u8 buffer[256*16]; __aligned16 u8 buffer[256*16];
@ -67,6 +68,7 @@ struct nVifStruct {
HashBucket vifBlocks; // Vif Blocks HashBucket vifBlocks; // Vif Blocks
nVifStruct() = default; nVifStruct() = default;
}; };

View File

@ -22,7 +22,8 @@
#include "MTVU.h" #include "MTVU.h"
#include "common/Perf.h" #include "common/Perf.h"
static void recReset(int idx) { static void recReset(int idx)
{
nVif[idx].vifBlocks.reset(); nVif[idx].vifBlocks.reset();
nVif[idx].recReserve->Reset(); nVif[idx].recReserve->Reset();
@ -30,7 +31,8 @@ static void recReset(int idx) {
nVif[idx].recWritePtr = nVif[idx].recReserve->GetPtr(); nVif[idx].recWritePtr = nVif[idx].recReserve->GetPtr();
} }
void dVifReserve(int idx) { void dVifReserve(int idx)
{
if (!nVif[idx].recReserve) if (!nVif[idx].recReserve)
nVif[idx].recReserve = new RecompiledCodeReserve(pxsFmt(L"VIF%u Unpack Recompiler Cache", idx), _8mb); nVif[idx].recReserve = new RecompiledCodeReserve(pxsFmt(L"VIF%u Unpack Recompiler Cache", idx), _8mb);
@ -38,18 +40,21 @@ void dVifReserve(int idx) {
nVif[idx].recReserve->Reserve(GetVmMemory().MainMemory(), offset, 8 * _1mb); nVif[idx].recReserve->Reserve(GetVmMemory().MainMemory(), offset, 8 * _1mb);
} }
void dVifReset(int idx) { void dVifReset(int idx)
{
pxAssertDev(nVif[idx].recReserve, "Dynamic VIF recompiler reserve must be created prior to VIF use or reset!"); pxAssertDev(nVif[idx].recReserve, "Dynamic VIF recompiler reserve must be created prior to VIF use or reset!");
recReset(idx); recReset(idx);
} }
void dVifClose(int idx) { void dVifClose(int idx)
{
if (nVif[idx].recReserve) if (nVif[idx].recReserve)
nVif[idx].recReserve->Reset(); nVif[idx].recReserve->Reset();
} }
void dVifRelease(int idx) { void dVifRelease(int idx)
{
dVifClose(idx); dVifClose(idx);
safe_delete(nVif[idx].recReserve); safe_delete(nVif[idx].recReserve);
} }
@ -72,7 +77,8 @@ __fi void makeMergeMask(u32& x)
x = ((x & 0x40) >> 6) | ((x & 0x10) >> 3) | (x & 4) | ((x & 1) << 3); x = ((x & 0x40) >> 6) | ((x & 0x10) >> 3) | (x & 4) | ((x & 1) << 3);
} }
__fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const { __fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const
{
const int idx = v.idx; const int idx = v.idx;
const vifStruct& vif = MTVU_VifX; const vifStruct& vif = MTVU_VifX;
@ -81,8 +87,13 @@ __fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const {
u32 m3 = ((m0 & 0xaaaaaaaa) >> 1) & ~m0; //all the upper bits, so our example 0x01010000 & 0xFCFDFEFF = 0x00010000 just the cols (shifted right for maskmerge) u32 m3 = ((m0 & 0xaaaaaaaa) >> 1) & ~m0; //all the upper bits, so our example 0x01010000 & 0xFCFDFEFF = 0x00010000 just the cols (shifted right for maskmerge)
u32 m2 = (m0 & 0x55555555) & (~m0 >> 1); // 0x1000100 & 0xFE7EFF7F = 0x00000100 Just the row u32 m2 = (m0 & 0x55555555) & (~m0 >> 1); // 0x1000100 & 0xFE7EFF7F = 0x00000100 Just the row
if((m2&&doMask)||doMode) { xMOVAPS(xmmRow, ptr128[&vif.MaskRow]); MSKPATH3_LOG("Moving row");} if ((m2 && doMask) || doMode)
if (m3&&doMask) { {
xMOVAPS(xmmRow, ptr128[&vif.MaskRow]);
MSKPATH3_LOG("Moving row");
}
if (m3 && doMask)
{
MSKPATH3_LOG("Merging Cols"); MSKPATH3_LOG("Merging Cols");
xMOVAPS(xmmCol0, ptr128[&vif.MaskCol]); xMOVAPS(xmmCol0, ptr128[&vif.MaskCol]);
if ((cS >= 2) && (m3 & 0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1); if ((cS >= 2) && (m3 & 0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
@ -93,7 +104,8 @@ __fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const {
//if (doMask||doMode) loadRowCol((nVifStruct&)v); //if (doMask||doMode) loadRowCol((nVifStruct&)v);
} }
void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const { void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const
{
pxAssertDev(regX.Id <= 1, "Reg Overflow! XMM2 thru XMM6 are reserved for masking."); pxAssertDev(regX.Id <= 1, "Reg Overflow! XMM2 thru XMM6 are reserved for masking.");
int cc = std::min(vCL, 3); int cc = std::min(vCL, 3);
@ -106,14 +118,25 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const {
makeMergeMask(m3); makeMergeMask(m3);
makeMergeMask(m4); makeMergeMask(m4);
if (doMask&&m2) { mergeVectors(regX, xmmRow, xmmTemp, m2); } // Merge MaskRow if (doMask && m2) // Merge MaskRow
if (doMask&&m3) { mergeVectors(regX, xRegisterSSE(xmmCol0.Id+cc), xmmTemp, m3); } // Merge MaskCol {
if (doMask&&m4) { xMOVAPS(xmmTemp, ptr[dstIndirect]); mergeVectors(regX, xmmRow, xmmTemp, m2);
mergeVectors(regX, xmmTemp, xmmTemp, m4); } // Merge Write Protect }
if (doMode) { if (doMask && m3) // Merge MaskCol
{
mergeVectors(regX, xRegisterSSE(xmmCol0.Id + cc), xmmTemp, m3);
}
if (doMask && m4) // Merge Write Protect
{
xMOVAPS(xmmTemp, ptr[dstIndirect]);
mergeVectors(regX, xmmTemp, xmmTemp, m4);
}
if (doMode)
{
u32 m5 = ~(m2 | m3 | m4) & 0xf; u32 m5 = ~(m2 | m3 | m4) & 0xf;
if (!doMask) m5 = 0xf; if (!doMask)
m5 = 0xf;
if (m5 < 0xf) if (m5 < 0xf)
{ {
@ -126,9 +149,9 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const {
{ {
mergeVectors(xmmTemp, xmmRow, xmmTemp, m5); mergeVectors(xmmTemp, xmmRow, xmmTemp, m5);
xPADD.D(regX, xmmTemp); xPADD.D(regX, xmmTemp);
if (doMode == 2) mergeVectors(xmmRow, regX, xmmTemp, m5); if (doMode == 2)
mergeVectors(xmmRow, regX, xmmTemp, m5);
} }
} }
else else
{ {
@ -139,14 +162,16 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const {
else else
{ {
xPADD.D(regX, xmmRow); xPADD.D(regX, xmmRow);
if (doMode == 2) { xMOVAPS(xmmRow, regX); } if (doMode == 2)
xMOVAPS(xmmRow, regX);
} }
} }
} }
xMOVAPS(ptr32[dstIndirect], regX); xMOVAPS(ptr32[dstIndirect], regX);
} }
void VifUnpackSSE_Dynarec::writeBackRow() const { void VifUnpackSSE_Dynarec::writeBackRow() const
{
const int idx = v.idx; const int idx = v.idx;
xMOVAPS(ptr128[&(MTVU_VifX.MaskRow)], xmmRow); xMOVAPS(ptr128[&(MTVU_VifX.MaskRow)], xmmRow);
@ -166,7 +191,8 @@ static void ShiftDisplacementWindow( xAddressVoid& addr, const xRegisterLong& mo
addImm += 0xf0; addImm += 0xf0;
addr -= 0xf0; addr -= 0xf0;
} }
if(addImm) { xADD(modReg, addImm); } if (addImm)
xADD(modReg, addImm);
} }
void VifUnpackSSE_Dynarec::ModUnpack(int upknum, bool PostOp) void VifUnpackSSE_Dynarec::ModUnpack(int upknum, bool PostOp)
@ -176,20 +202,49 @@ void VifUnpackSSE_Dynarec::ModUnpack( int upknum, bool PostOp )
{ {
case 0: case 0:
case 1: case 1:
case 2: if(PostOp) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration & 0x3; } break; case 2:
if (PostOp)
{
UnpkLoopIteration++;
UnpkLoopIteration = UnpkLoopIteration & 0x3;
}
break;
case 4: case 4:
case 5: case 5:
case 6: if(PostOp) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration & 0x1; } break; case 6:
if (PostOp)
{
UnpkLoopIteration++;
UnpkLoopIteration = UnpkLoopIteration & 0x1;
}
break;
case 8: if(PostOp) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration & 0x1; } break; case 8:
case 9: if (!PostOp) { UnpkLoopIteration++; } break; if (PostOp)
case 10:if (!PostOp) { UnpkLoopIteration++; } break; {
UnpkLoopIteration++;
UnpkLoopIteration = UnpkLoopIteration & 0x1;
}
break;
case 9:
if (!PostOp)
{
UnpkLoopIteration++;
}
break;
case 10:
if (!PostOp)
{
UnpkLoopIteration++;
}
break;
case 12: break; case 12:
case 13: break; case 13:
case 14: break; case 14:
case 15: break; case 15:
break;
case 3: case 3:
case 7: case 7:
@ -197,10 +252,10 @@ void VifUnpackSSE_Dynarec::ModUnpack( int upknum, bool PostOp )
pxFailRel(wxsFormat(L"Vpu/Vif - Invalid Unpack! [%d]", upknum)); pxFailRel(wxsFormat(L"Vpu/Vif - Invalid Unpack! [%d]", upknum));
break; break;
} }
} }
void VifUnpackSSE_Dynarec::CompileRoutine() { void VifUnpackSSE_Dynarec::CompileRoutine()
{
const int wl = vB.wl ? vB.wl : 256; // 0 is taken as 256 (KH2) const int wl = vB.wl ? vB.wl : 256; // 0 is taken as 256 (KH2)
const int upkNum = vB.upkType & 0xf; const int upkNum = vB.upkType & 0xf;
const u8& vift = nVifT[upkNum]; const u8& vift = nVifT[upkNum];
@ -218,7 +273,8 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
// Value passed determines # of col regs we need to load // Value passed determines # of col regs we need to load
SetMasks(isFill ? blockSize : cycleSize); SetMasks(isFill ? blockSize : cycleSize);
while (vNum) { while (vNum)
{
ShiftDisplacementWindow(dstIndirect, arg1reg); ShiftDisplacementWindow(dstIndirect, arg1reg);
@ -227,7 +283,8 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
ShiftDisplacementWindow(srcIndirect, arg2reg); //Don't need to do this otherwise as we arent reading the source. ShiftDisplacementWindow(srcIndirect, arg2reg); //Don't need to do this otherwise as we arent reading the source.
if (vCL < cycleSize) { if (vCL < cycleSize)
{
ModUnpack(upkNum, false); ModUnpack(upkNum, false);
xUnpack(upkNum); xUnpack(upkNum);
xMovDest(); xMovDest();
@ -238,9 +295,11 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
srcIndirect += vift; srcIndirect += vift;
vNum--; vNum--;
if (++vCL == blockSize) vCL = 0; if (++vCL == blockSize)
vCL = 0;
} }
else if (isFill) { else if (isFill)
{
//Filling doesn't need anything fancy, it's pretty much a normal write, just doesnt increment the source. //Filling doesn't need anything fancy, it's pretty much a normal write, just doesnt increment the source.
//DevCon.WriteLn("filling mode!"); //DevCon.WriteLn("filling mode!");
xUnpack(upkNum); xUnpack(upkNum);
@ -249,22 +308,27 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
dstIndirect += 16; dstIndirect += 16;
vNum--; vNum--;
if (++vCL == blockSize) vCL = 0; if (++vCL == blockSize)
vCL = 0;
} }
else { else
{
dstIndirect += (16 * skipSize); dstIndirect += (16 * skipSize);
vCL = 0; vCL = 0;
} }
} }
if (doMode>=2) writeBackRow(); if (doMode >= 2)
writeBackRow();
xRET(); xRET();
} }
static u16 dVifComputeLength(uint cl, uint wl, u8 num, bool isFill) { static u16 dVifComputeLength(uint cl, uint wl, u8 num, bool isFill)
{
uint length = (num > 0) ? (num * 16) : 4096; // 0 = 256 uint length = (num > 0) ? (num * 16) : 4096; // 0 = 256
if (!isFill) { if (!isFill)
{
uint skipSize = (cl - wl) * 16; uint skipSize = (cl - wl) * 16;
uint blocks = (num + (wl - 1)) / wl; //Need to round up num's to calculate skip size correctly. uint blocks = (num + (wl - 1)) / wl; //Need to round up num's to calculate skip size correctly.
length += (blocks - 1) * skipSize; length += (blocks - 1) * skipSize;
@ -273,14 +337,15 @@ static u16 dVifComputeLength(uint cl, uint wl, u8 num, bool isFill) {
return std::min(length, 0xFFFFu); return std::min(length, 0xFFFFu);
} }
_vifT __fi nVifBlock* dVifCompile(nVifBlock& block, bool isFill) { _vifT __fi nVifBlock* dVifCompile(nVifBlock& block, bool isFill)
{
nVifStruct& v = nVif[idx]; nVifStruct& v = nVif[idx];
// Check size before the compilation // Check size before the compilation
if (v.recWritePtr > (v.recReserve->GetPtrEnd() - _256kb)) { if (v.recWritePtr > (v.recReserve->GetPtrEnd() - _256kb))
{
DevCon.WriteLn(L"nVif Recompiler Cache Reset! [%ls > %ls]", DevCon.WriteLn(L"nVif Recompiler Cache Reset! [%ls > %ls]",
pxsPtr(v.recWritePtr), pxsPtr(v.recReserve->GetPtrEnd()) pxsPtr(v.recWritePtr), pxsPtr(v.recReserve->GetPtrEnd()));
);
recReset(idx); recReset(idx);
} }
@ -299,7 +364,8 @@ _vifT __fi nVifBlock* dVifCompile(nVifBlock& block, bool isFill) {
return &block; return &block;
} }
_vifT __fi void dVifUnpack(const u8* data, bool isFill) { _vifT __fi void dVifUnpack(const u8* data, bool isFill)
{
nVifStruct& v = nVif[idx]; nVifStruct& v = nVif[idx];
vifStruct& vif = MTVU_VifX; vifStruct& vif = MTVU_VifX;
@ -338,7 +404,8 @@ _vifT __fi void dVifUnpack(const u8* data, bool isFill) {
// Seach in cache before trying to compile the block // Seach in cache before trying to compile the block
nVifBlock* b = v.vifBlocks.find(block); nVifBlock* b = v.vifBlocks.find(block);
if (unlikely(b == nullptr)) { if (unlikely(b == nullptr))
{
b = dVifCompile<idx>(block, isFill); b = dVifCompile<idx>(block, isFill);
} }
@ -349,10 +416,13 @@ _vifT __fi void dVifUnpack(const u8* data, bool isFill) {
u8* startmem = VU.Mem + (vif.tag.addr & (vuMemLimit - 0x10)); u8* startmem = VU.Mem + (vif.tag.addr & (vuMemLimit - 0x10));
u8* endmem = VU.Mem + vuMemLimit; u8* endmem = VU.Mem + vuMemLimit;
if (likely((startmem + b->length) <= endmem)) { if (likely((startmem + b->length) <= endmem))
{
// No wrapping, you can run the fast dynarec // No wrapping, you can run the fast dynarec
((nVifrecCall)b->startPtr)((uptr)startmem, (uptr)data); ((nVifrecCall)b->startPtr)((uptr)startmem, (uptr)data);
} else { }
else
{
VIF_LOG("Running Interpreter Block: nVif%x - VU Mem Ptr Overflow; falling back to interpreter. Start = %x End = %x num = %x, wl = %x, cl = %x", VIF_LOG("Running Interpreter Block: nVif%x - VU Mem Ptr Overflow; falling back to interpreter. Start = %x End = %x num = %x, wl = %x, cl = %x",
v.idx, vif.tag.addr, vif.tag.addr + (block.num * 16), block.num, block.wl, block.cl); v.idx, vif.tag.addr, vif.tag.addr + (block.num * 16), block.num, block.wl, block.cl);
_nVifUnpack(idx, data, vifRegs.mode, isFill); _nVifUnpack(idx, data, vifRegs.mode, isFill);

View File

@ -19,9 +19,11 @@
// nVifBlock - Ordered for Hashing; the 'num' and 'upkType' fields are // nVifBlock - Ordered for Hashing; the 'num' and 'upkType' fields are
// used as the hash bucket selector. // used as the hash bucket selector.
union nVifBlock { union nVifBlock
{
// Warning: order depends on the newVifDynaRec code // Warning: order depends on the newVifDynaRec code
struct { struct
{
u8 num; // [00] Num Field u8 num; // [00] Num Field
u8 upkType; // [01] Unpack Type [usn1:mask1:upk*4] u8 upkType; // [01] Unpack Type [usn1:mask1:upk*4]
u16 length; // [02] Extra: pre computed Length u16 length; // [02] Extra: pre computed Length
@ -33,7 +35,8 @@ union nVifBlock {
uptr startPtr; // [12] Start Ptr of RecGen Code uptr startPtr; // [12] Start Ptr of RecGen Code
}; };
struct { struct
{
u16 hash_key; u16 hash_key;
u16 _pad0; u16 _pad0;
u32 key0; u32 key0;
@ -54,21 +57,25 @@ union nVifBlock {
// The hash function is determined by taking the first bytes of data and // The hash function is determined by taking the first bytes of data and
// performing a modulus the size of hSize. So the most diverse-data should // performing a modulus the size of hSize. So the most diverse-data should
// be in the first bytes of the struct. (hence why nVifBlock is specifically sorted) // be in the first bytes of the struct. (hence why nVifBlock is specifically sorted)
class HashBucket { class HashBucket
{
protected: protected:
std::array<nVifBlock*, hSize> m_bucket; std::array<nVifBlock*, hSize> m_bucket;
public: public:
HashBucket() { HashBucket()
{
m_bucket.fill(nullptr); m_bucket.fill(nullptr);
} }
~HashBucket() { clear(); } ~HashBucket() { clear(); }
__fi nVifBlock* find(const nVifBlock& dataPtr) { __fi nVifBlock* find(const nVifBlock& dataPtr)
{
nVifBlock* chainpos = m_bucket[dataPtr.hash_key]; nVifBlock* chainpos = m_bucket[dataPtr.hash_key];
while (true) { while (true)
{
if (chainpos->key0 == dataPtr.key0 && chainpos->key1 == dataPtr.key1) if (chainpos->key0 == dataPtr.key0 && chainpos->key1 == dataPtr.key1)
return chainpos; return chainpos;
@ -79,32 +86,36 @@ public:
} }
} }
void add(const nVifBlock& dataPtr) { void add(const nVifBlock& dataPtr)
{
u32 b = dataPtr.hash_key; u32 b = dataPtr.hash_key;
u32 size = bucket_size(dataPtr); u32 size = bucket_size(dataPtr);
// Warning there is an extra +1 due to the empty cell // Warning there is an extra +1 due to the empty cell
// Performance note: 64B align to reduce cache miss penalty in `find` // Performance note: 64B align to reduce cache miss penalty in `find`
if( (m_bucket[b] = (nVifBlock*)pcsx2_aligned_realloc( m_bucket[b], sizeof(nVifBlock)*(size+2), 64, sizeof(nVifBlock)*(size+1) )) == NULL ) { if ((m_bucket[b] = (nVifBlock*)pcsx2_aligned_realloc(m_bucket[b], sizeof(nVifBlock) * (size + 2), 64, sizeof(nVifBlock) * (size + 1))) == NULL)
{
throw Exception::OutOfMemory( throw Exception::OutOfMemory(
wxsFormat(L"HashBucket Chain (bucket size=%d)", size+2) wxsFormat(L"HashBucket Chain (bucket size=%d)", size + 2));
);
} }
// Replace the empty cell by the new block and create a new empty cell // Replace the empty cell by the new block and create a new empty cell
memcpy(&m_bucket[b][size++], &dataPtr, sizeof(nVifBlock)); memcpy(&m_bucket[b][size++], &dataPtr, sizeof(nVifBlock));
memset(&m_bucket[b][size], 0, sizeof(nVifBlock)); memset(&m_bucket[b][size], 0, sizeof(nVifBlock));
if( size > 3 ) DevCon.Warning( "recVifUnpk: Bucket 0x%04x has %d micro-programs", b, size ); if (size > 3)
DevCon.Warning("recVifUnpk: Bucket 0x%04x has %d micro-programs", b, size);
} }
u32 bucket_size(const nVifBlock& dataPtr) { u32 bucket_size(const nVifBlock& dataPtr)
{
nVifBlock* chainpos = m_bucket[dataPtr.hash_key]; nVifBlock* chainpos = m_bucket[dataPtr.hash_key];
u32 size = 0; u32 size = 0;
while (chainpos->startPtr != 0) { while (chainpos->startPtr != 0)
{
size++; size++;
chainpos++; chainpos++;
} }
@ -112,20 +123,22 @@ public:
return size; return size;
} }
void clear() { void clear()
{
for (auto& bucket : m_bucket) for (auto& bucket : m_bucket)
safe_aligned_free(bucket); safe_aligned_free(bucket);
} }
void reset() { void reset()
{
clear(); clear();
// Allocate an empty cell for all buckets // Allocate an empty cell for all buckets
for (auto& bucket : m_bucket) { for (auto& bucket : m_bucket)
if( (bucket = (nVifBlock*)_aligned_malloc( sizeof(nVifBlock), 64 )) == nullptr ) { {
throw Exception::OutOfMemory( if ((bucket = (nVifBlock*)_aligned_malloc(sizeof(nVifBlock), 64)) == nullptr)
wxsFormat(L"HashBucket Chain (bucket size=%d)", 1) {
); throw Exception::OutOfMemory(wxsFormat(L"HashBucket Chain (bucket size=%d)", 1));
} }
memset(bucket, 0, sizeof(nVifBlock)); memset(bucket, 0, sizeof(nVifBlock));

View File

@ -64,10 +64,14 @@ typedef FnType_VifUnpackLoop* Fnptr_VifUnpackLoop;
// Unpacks Until 'Num' is 0 // Unpacks Until 'Num' is 0
static const __aligned16 Fnptr_VifUnpackLoop UnpackLoopTable[2][2][2] = { static const __aligned16 Fnptr_VifUnpackLoop UnpackLoopTable[2][2][2] = {
{{ _nVifUnpackLoop<0,0,0>, _nVifUnpackLoop<0,0,1> }, {
{ _nVifUnpackLoop<0,1,0>, _nVifUnpackLoop<0,1,1> },}, {_nVifUnpackLoop<0, 0, 0>, _nVifUnpackLoop<0, 0, 1>},
{{ _nVifUnpackLoop<1,0,0>, _nVifUnpackLoop<1,0,1> }, {_nVifUnpackLoop<0, 1, 0>, _nVifUnpackLoop<0, 1, 1>},
{ _nVifUnpackLoop<1,1,0>, _nVifUnpackLoop<1,1,1> },}, },
{
{_nVifUnpackLoop<1, 0, 0>, _nVifUnpackLoop<1, 0, 1>},
{_nVifUnpackLoop<1, 1, 0>, _nVifUnpackLoop<1, 1, 1>},
},
}; };
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
@ -80,21 +84,26 @@ void resetNewVif(int idx)
nVif[idx].bSize = 0; nVif[idx].bSize = 0;
memzero(nVif[idx].buffer); memzero(nVif[idx].buffer);
if (newVifDynaRec) dVifReset(idx); if (newVifDynaRec)
dVifReset(idx);
} }
void closeNewVif(int idx) { void closeNewVif(int idx)
{
} }
void releaseNewVif(int idx) { void releaseNewVif(int idx)
{
} }
static __fi u8* getVUptr(uint idx, int offset) { static __fi u8* getVUptr(uint idx, int offset)
{
return (u8*)(vuRegs[idx].Mem + (offset & (idx ? 0x3ff0 : 0xff0))); return (u8*)(vuRegs[idx].Mem + (offset & (idx ? 0x3ff0 : 0xff0)));
} }
_vifT int nVifUnpack(const u8* data) { _vifT int nVifUnpack(const u8* data)
{
nVifStruct& v = nVif[idx]; nVifStruct& v = nVif[idx];
vifStruct& vif = GetVifX; vifStruct& vif = GetVifX;
VIFregisters& vifRegs = vifXRegs; VIFregisters& vifRegs = vifXRegs;
@ -104,8 +113,10 @@ _vifT int nVifUnpack(const u8* data) {
const bool isFill = (vifRegs.cycle.cl < wl); const bool isFill = (vifRegs.cycle.cl < wl);
s32 size = ret << 2; s32 size = ret << 2;
if (ret == vif.tag.size) { // Full Transfer if (ret == vif.tag.size) // Full Transfer
if (v.bSize) { // Last transfer was partial {
if (v.bSize) // Last transfer was partial
{
memcpy(&v.buffer[v.bSize], data, size); memcpy(&v.buffer[v.bSize], data, size);
v.bSize += size; v.bSize += size;
size = v.bSize; size = v.bSize;
@ -113,14 +124,19 @@ _vifT int nVifUnpack(const u8* data) {
vif.cl = 0; vif.cl = 0;
vifRegs.num = (vifXRegs.code >> 16) & 0xff; // grab NUM form the original VIFcode input. vifRegs.num = (vifXRegs.code >> 16) & 0xff; // grab NUM form the original VIFcode input.
if (!vifRegs.num) vifRegs.num = 256; if (!vifRegs.num)
vifRegs.num = 256;
} }
if (!idx || !THREAD_VU1) { if (!idx || !THREAD_VU1)
if (newVifDynaRec) dVifUnpack<idx>(data, isFill); {
else _nVifUnpack(idx, data, vifRegs.mode, isFill); if (newVifDynaRec)
dVifUnpack<idx>(data, isFill);
else
_nVifUnpack(idx, data, vifRegs.mode, isFill);
} }
else vu1Thread.VifUnpack(vif, vifRegs, (u8*)data, (size + 4) & ~0x3); else
vu1Thread.VifUnpack(vif, vifRegs, (u8*)data, (size + 4) & ~0x3);
vif.pass = 0; vif.pass = 0;
vif.tag.size = 0; vif.tag.size = 0;
@ -128,7 +144,8 @@ _vifT int nVifUnpack(const u8* data) {
vifRegs.num = 0; vifRegs.num = 0;
v.bSize = 0; v.bSize = 0;
} }
else { // Partial Transfer else // Partial Transfer
{
memcpy(&v.buffer[v.bSize], data, size); memcpy(&v.buffer[v.bSize], data, size);
v.bSize += size; v.bSize += size;
vif.tag.size -= ret; vif.tag.size -= ret;
@ -139,10 +156,12 @@ _vifT int nVifUnpack(const u8* data) {
// to read back from it mid-transfer. Since so few games actually use partial transfers // to read back from it mid-transfer. Since so few games actually use partial transfers
// of VIF unpacks, this code should not be any bottleneck. // of VIF unpacks, this code should not be any bottleneck.
if (!isFill) { if (!isFill)
{
vifRegs.num -= (size / vSize); vifRegs.num -= (size / vSize);
} }
else { else
{
int dataSize = (size / vSize); int dataSize = (size / vSize);
vifRegs.num = vifRegs.num - (((dataSize / vifRegs.cycle.cl) * (vifRegs.cycle.wl - vifRegs.cycle.cl)) + dataSize); vifRegs.num = vifRegs.num - (((dataSize / vifRegs.cycle.cl) * (vifRegs.cycle.wl - vifRegs.cycle.cl)) + dataSize);
} }
@ -156,10 +175,13 @@ template int nVifUnpack<1>(const u8* data);
// This is used by the interpreted SSE unpacks only. Recompiled SSE unpacks // This is used by the interpreted SSE unpacks only. Recompiled SSE unpacks
// and the interpreted C unpacks use the vif.MaskRow/MaskCol members directly. // and the interpreted C unpacks use the vif.MaskRow/MaskCol members directly.
static void setMasks(const vifStruct& vif, const VIFregisters& v) { static void setMasks(const vifStruct& vif, const VIFregisters& v)
for (int i = 0; i < 16; i++) { {
for (int i = 0; i < 16; i++)
{
int m = (v.mask >> (i * 2)) & 3; int m = (v.mask >> (i * 2)) & 3;
switch (m) { switch (m)
{
case 0: // Data case 0: // Data
nVifMask[0][i / 4][i % 4] = 0xffffffff; nVifMask[0][i / 4][i % 4] = 0xffffffff;
nVifMask[1][i / 4][i % 4] = 0; nVifMask[1][i / 4][i % 4] = 0;
@ -206,7 +228,8 @@ static void setMasks(const vifStruct& vif, const VIFregisters& v) {
// size - size of the packet fragment incoming from DMAC. // size - size of the packet fragment incoming from DMAC.
template <int idx, bool doMode, bool isFill> template <int idx, bool doMode, bool isFill>
__ri void __fastcall _nVifUnpackLoop(const u8* data) { __ri void __fastcall _nVifUnpackLoop(const u8* data)
{
vifStruct& vif = MTVU_VifX; vifStruct& vif = MTVU_VifX;
VIFregisters& vifRegs = MTVU_VifXRegs; VIFregisters& vifRegs = MTVU_VifXRegs;
@ -216,7 +239,8 @@ __ri void __fastcall _nVifUnpackLoop(const u8* data) {
//DevCon.WriteLn("[%d][%d][%d][num=%d][upk=%d][cl=%d][bl=%d][skip=%d]", isFill, doMask, doMode, vifRegs.num, upkNum, vif.cl, blockSize, skipSize); //DevCon.WriteLn("[%d][%d][%d][num=%d][upk=%d][cl=%d][bl=%d][skip=%d]", isFill, doMask, doMode, vifRegs.num, upkNum, vif.cl, blockSize, skipSize);
if (!doMode && (vif.cmd & 0x10)) setMasks(vif, vifRegs); if (!doMode && (vif.cmd & 0x10))
setMasks(vif, vifRegs);
const int usn = !!vif.usn; const int usn = !!vif.usn;
const int upkNum = vif.cmd & 0x1f; const int upkNum = vif.cmd & 0x1f;
@ -231,14 +255,17 @@ __ri void __fastcall _nVifUnpackLoop(const u8* data) {
pxAssume(vif.cl == 0); pxAssume(vif.cl == 0);
//pxAssume (vifRegs.cycle.wl > 0); //pxAssume (vifRegs.cycle.wl > 0);
do { do
{
u8* dest = getVUptr(idx, vif.tag.addr); u8* dest = getVUptr(idx, vif.tag.addr);
if (doMode) { if (doMode)
{
//if (1) { //if (1) {
ft(dest, data); ft(dest, data);
} }
else { else
{
//DevCon.WriteLn("SSE Unpack!"); //DevCon.WriteLn("SSE Unpack!");
uint cl3 = std::min(vif.cl, 3); uint cl3 = std::min(vif.cl, 3);
fnbase[cl3](dest, data); fnbase[cl3](dest, data);
@ -248,16 +275,20 @@ __ri void __fastcall _nVifUnpackLoop(const u8* data) {
--vifRegs.num; --vifRegs.num;
++vif.cl; ++vif.cl;
if (isFill) { if (isFill)
{
//DevCon.WriteLn("isFill!"); //DevCon.WriteLn("isFill!");
if (vif.cl <= vifRegs.cycle.cl) data += vSize; if (vif.cl <= vifRegs.cycle.cl)
else if (vif.cl == vifRegs.cycle.wl) vif.cl = 0; data += vSize;
else if (vif.cl == vifRegs.cycle.wl)
vif.cl = 0;
} }
else else
{ {
data += vSize; data += vSize;
if (vif.cl >= vifRegs.cycle.wl) { if (vif.cl >= vifRegs.cycle.wl)
{
vif.tag.addr += skipSize; vif.tag.addr += skipSize;
vif.cl = 0; vif.cl = 0;
} }
@ -265,8 +296,8 @@ __ri void __fastcall _nVifUnpackLoop(const u8* data) {
} while (vifRegs.num); } while (vifRegs.num);
} }
__fi void _nVifUnpack(int idx, const u8* data, uint mode, bool isFill) { __fi void _nVifUnpack(int idx, const u8* data, uint mode, bool isFill)
{
UnpackLoopTable[idx][!!mode][isFill](data); UnpackLoopTable[idx][!!mode][isFill](data);
} }

View File

@ -34,7 +34,8 @@ static const __aligned16 u32 SSEXYZWMask[4][4] =
static RecompiledCodeReserve* nVifUpkExec = NULL; static RecompiledCodeReserve* nVifUpkExec = NULL;
// Merges xmm vectors without modifying source reg // Merges xmm vectors without modifying source reg
void mergeVectors(xRegisterSSE dest, xRegisterSSE src, xRegisterSSE temp, int xyzw) { void mergeVectors(xRegisterSSE dest, xRegisterSSE src, xRegisterSSE temp, int xyzw)
{
mVUmergeRegs(dest, src, xyzw); mVUmergeRegs(dest, src, xyzw);
} }
@ -54,27 +55,32 @@ VifUnpackSSE_Base::VifUnpackSSE_Base()
{ {
} }
void VifUnpackSSE_Base::xMovDest() const { void VifUnpackSSE_Base::xMovDest() const
{
if (IsUnmaskedOp()) { xMOVAPS (ptr[dstIndirect], destReg); } if (IsUnmaskedOp()) { xMOVAPS (ptr[dstIndirect], destReg); }
else { doMaskWrite(destReg); } else { doMaskWrite(destReg); }
} }
void VifUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const { void VifUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const
{
if (usn) { xPSRL.D(regX, n); } if (usn) { xPSRL.D(regX, n); }
else { xPSRA.D(regX, n); } else { xPSRA.D(regX, n); }
} }
void VifUnpackSSE_Base::xPMOVXX8(const xRegisterSSE& regX) const { void VifUnpackSSE_Base::xPMOVXX8(const xRegisterSSE& regX) const
{
if (usn) xPMOVZX.BD(regX, ptr32[srcIndirect]); if (usn) xPMOVZX.BD(regX, ptr32[srcIndirect]);
else xPMOVSX.BD(regX, ptr32[srcIndirect]); else xPMOVSX.BD(regX, ptr32[srcIndirect]);
} }
void VifUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const { void VifUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const
{
if (usn) xPMOVZX.WD(regX, ptr64[srcIndirect]); if (usn) xPMOVZX.WD(regX, ptr64[srcIndirect]);
else xPMOVSX.WD(regX, ptr64[srcIndirect]); else xPMOVSX.WD(regX, ptr64[srcIndirect]);
} }
void VifUnpackSSE_Base::xUPK_S_32() const { void VifUnpackSSE_Base::xUPK_S_32() const
{
switch (UnpkLoopIteration) switch (UnpkLoopIteration)
{ {
@ -92,10 +98,10 @@ void VifUnpackSSE_Base::xUPK_S_32() const {
xPSHUF.D(destReg, workReg, _v3); xPSHUF.D(destReg, workReg, _v3);
break; break;
} }
} }
void VifUnpackSSE_Base::xUPK_S_16() const { void VifUnpackSSE_Base::xUPK_S_16() const
{
switch (UnpkLoopIteration) switch (UnpkLoopIteration)
{ {
@ -113,10 +119,10 @@ void VifUnpackSSE_Base::xUPK_S_16() const {
xPSHUF.D(destReg, workReg, _v3); xPSHUF.D(destReg, workReg, _v3);
break; break;
} }
} }
void VifUnpackSSE_Base::xUPK_S_8() const { void VifUnpackSSE_Base::xUPK_S_8() const
{
switch (UnpkLoopIteration) switch (UnpkLoopIteration)
{ {
@ -134,7 +140,6 @@ void VifUnpackSSE_Base::xUPK_S_8() const {
xPSHUF.D(destReg, workReg, _v3); xPSHUF.D(destReg, workReg, _v3);
break; break;
} }
} }
// The V2 + V3 unpacks have freaky behaviour, the manual claims "indeterminate". // The V2 + V3 unpacks have freaky behaviour, the manual claims "indeterminate".
@ -142,24 +147,26 @@ void VifUnpackSSE_Base::xUPK_S_8() const {
// and games like Lemmings, And1 Streetball rely on this data to be like this! // and games like Lemmings, And1 Streetball rely on this data to be like this!
// I have commented after each shuffle to show what data is going where - Ref // I have commented after each shuffle to show what data is going where - Ref
void VifUnpackSSE_Base::xUPK_V2_32() const { void VifUnpackSSE_Base::xUPK_V2_32() const
{
if (UnpkLoopIteration == 0) if (UnpkLoopIteration == 0)
{ {
xMOV128(workReg, ptr32[srcIndirect]); xMOV128(workReg, ptr32[srcIndirect]);
xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0 xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0
if(IsAligned)xAND.PS( destReg, ptr128[SSEXYZWMask[0]]); //zero last word - tested on ps2 if (IsAligned)
xAND.PS(destReg, ptr128[SSEXYZWMask[0]]); //zero last word - tested on ps2
} }
else else
{ {
xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2 xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2
if(IsAligned)xAND.PS( destReg, ptr128[SSEXYZWMask[0]]); //zero last word - tested on ps2 if (IsAligned)
xAND.PS(destReg, ptr128[SSEXYZWMask[0]]); //zero last word - tested on ps2
}
} }
} void VifUnpackSSE_Base::xUPK_V2_16() const
{
void VifUnpackSSE_Base::xUPK_V2_16() const {
if (UnpkLoopIteration == 0) if (UnpkLoopIteration == 0)
{ {
@ -170,11 +177,10 @@ void VifUnpackSSE_Base::xUPK_V2_16() const {
{ {
xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2 xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2
} }
} }
void VifUnpackSSE_Base::xUPK_V2_8() const { void VifUnpackSSE_Base::xUPK_V2_8() const
{
if (UnpkLoopIteration == 0) if (UnpkLoopIteration == 0)
{ {
@ -185,17 +191,18 @@ void VifUnpackSSE_Base::xUPK_V2_8() const {
{ {
xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2 xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2
} }
} }
void VifUnpackSSE_Base::xUPK_V3_32() const { void VifUnpackSSE_Base::xUPK_V3_32() const
{
xMOV128(destReg, ptr128[srcIndirect]); xMOV128(destReg, ptr128[srcIndirect]);
if (UnpkLoopIteration != IsAligned) if (UnpkLoopIteration != IsAligned)
xAND.PS(destReg, ptr128[SSEXYZWMask[0]]); xAND.PS(destReg, ptr128[SSEXYZWMask[0]]);
} }
void VifUnpackSSE_Base::xUPK_V3_16() const { void VifUnpackSSE_Base::xUPK_V3_16() const
{
xPMOVXX16(destReg); xPMOVXX16(destReg);
@ -205,34 +212,37 @@ void VifUnpackSSE_Base::xUPK_V3_16() const {
//Iteration counts where we are in the packet. //Iteration counts where we are in the packet.
int result = (((UnpkLoopIteration / 4) + 1 + (4 - IsAligned)) & 0x3); int result = (((UnpkLoopIteration / 4) + 1 + (4 - IsAligned)) & 0x3);
if ((UnpkLoopIteration & 0x1) == 0 && result == 0){ if ((UnpkLoopIteration & 0x1) == 0 && result == 0)
{
xAND.PS(destReg, ptr128[SSEXYZWMask[0]]); //zero last word on QW boundary if whole 32bit word is used - tested on ps2 xAND.PS(destReg, ptr128[SSEXYZWMask[0]]); //zero last word on QW boundary if whole 32bit word is used - tested on ps2
} }
} }
void VifUnpackSSE_Base::xUPK_V3_8() const { void VifUnpackSSE_Base::xUPK_V3_8() const
{
xPMOVXX8(destReg); xPMOVXX8(destReg);
if (UnpkLoopIteration != IsAligned) if (UnpkLoopIteration != IsAligned)
xAND.PS(destReg, ptr128[SSEXYZWMask[0]]); xAND.PS(destReg, ptr128[SSEXYZWMask[0]]);
} }
void VifUnpackSSE_Base::xUPK_V4_32() const { void VifUnpackSSE_Base::xUPK_V4_32() const
{
xMOV128(destReg, ptr32[srcIndirect]); xMOV128(destReg, ptr32[srcIndirect]);
} }
void VifUnpackSSE_Base::xUPK_V4_16() const { void VifUnpackSSE_Base::xUPK_V4_16() const
{
xPMOVXX16(destReg); xPMOVXX16(destReg);
} }
void VifUnpackSSE_Base::xUPK_V4_8() const { void VifUnpackSSE_Base::xUPK_V4_8() const
{
xPMOVXX8(destReg); xPMOVXX8(destReg);
} }
void VifUnpackSSE_Base::xUPK_V4_5() const { void VifUnpackSSE_Base::xUPK_V4_5() const
{
xMOV16 (workReg, ptr32[srcIndirect]); xMOV16 (workReg, ptr32[srcIndirect]);
xPSHUF.D (workReg, workReg, _v0); xPSHUF.D (workReg, workReg, _v0);
@ -272,6 +282,7 @@ void VifUnpackSSE_Base::xUnpack( int upknum ) const
case 14: xUPK_V4_8(); break; case 14: xUPK_V4_8(); break;
case 15: xUPK_V4_5(); break; case 15: xUPK_V4_5(); break;
case 3: case 3:
case 7: case 7:
case 11: case 11:
@ -292,7 +303,8 @@ VifUnpackSSE_Simple::VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_)
IsAligned = true; IsAligned = true;
} }
void VifUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const { void VifUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const
{
xMOVAPS(xmm7, ptr[dstIndirect]); xMOVAPS(xmm7, ptr[dstIndirect]);
int offX = std::min(curCycle, 3); int offX = std::min(curCycle, 3);
xPAND(regX, ptr32[nVifMask[0][offX]]); xPAND(regX, ptr32[nVifMask[0][offX]]);
@ -303,7 +315,8 @@ void VifUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const {
} }
// ecx = dest, edx = src // ecx = dest, edx = src
static void nVifGen(int usn, int mask, int curCycle) { static void nVifGen(int usn, int mask, int curCycle)
{
int usnpart = usn * 2 * 16; int usnpart = usn * 2 * 16;
int maskpart = mask * 16; int maskpart = mask * 16;
@ -314,7 +327,8 @@ static void nVifGen(int usn, int mask, int curCycle) {
{ {
nVifCall& ucall(nVifUpk[((usnpart + maskpart + i) * 4) + curCycle]); nVifCall& ucall(nVifUpk[((usnpart + maskpart + i) * 4) + curCycle]);
ucall = NULL; ucall = NULL;
if( nVifT[i] == 0 ) continue; if (nVifT[i] == 0)
continue;
ucall = (nVifCall)xGetAlignedCallTarget(); ucall = (nVifCall)xGetAlignedCallTarget();
vpugen.xUnpack(i); vpugen.xUnpack(i);
@ -325,7 +339,8 @@ static void nVifGen(int usn, int mask, int curCycle) {
void VifUnpackSSE_Init() void VifUnpackSSE_Init()
{ {
if (nVifUpkExec) return; if (nVifUpkExec)
return;
DevCon.WriteLn("Generating SSE-optimized unpacking functions for VIF interpreters..."); DevCon.WriteLn("Generating SSE-optimized unpacking functions for VIF interpreters...");
@ -337,11 +352,10 @@ void VifUnpackSSE_Init()
xSetPtr(*nVifUpkExec); xSetPtr(*nVifUpkExec);
for (int a = 0; a < 2; a++) { for (int a = 0; a < 2; a++)
for (int b = 0; b < 2; b++) { for (int b = 0; b < 2; b++)
for (int c = 0; c < 4; c++) { for (int c = 0; c < 4; c++)
nVifGen(a, b, c); nVifGen(a, b, c);
}}}
nVifUpkExec->ForbidModification(); nVifUpkExec->ForbidModification();

View File

@ -75,7 +75,6 @@ protected:
virtual void xUPK_V4_16() const; virtual void xUPK_V4_16() const;
virtual void xUPK_V4_8() const; virtual void xUPK_V4_8() const;
virtual void xUPK_V4_5() const; virtual void xUPK_V4_5() const;
}; };
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
@ -146,4 +145,3 @@ protected:
return fillingWrite; return fillingWrite;
} }
}; };