diff --git a/pcsx2/x86/BaseblockEx.cpp b/pcsx2/x86/BaseblockEx.cpp index 840b11369c..002bc54fc0 100644 --- a/pcsx2/x86/BaseblockEx.cpp +++ b/pcsx2/x86/BaseblockEx.cpp @@ -22,8 +22,8 @@ BASEBLOCKEX* BaseBlocks::New(u32 startpc, uptr fnptr) std::pair range = links.equal_range(startpc); for (linkiter_t i = range.first; i != range.second; ++i) *(u32*)i->second = fnptr - (i->second + 4); - - return blocks.insert(startpc, fnptr);; + + return blocks.insert(startpc, fnptr); } int BaseBlocks::LastIndex(u32 startpc) const @@ -33,8 +33,9 @@ int BaseBlocks::LastIndex(u32 startpc) const int imin = 0, imax = blocks.size() - 1, imid; - while(imin != imax) { - imid = (imin+imax+1)>>1; + while (imin != imax) + { + imid = (imin + imax + 1) >> 1; if (blocks[imid].startpc > startpc) imax = imid - 1; @@ -72,11 +73,10 @@ BASEBLOCKEX* BaseBlocks::GetByX86(uptr ip) void BaseBlocks::Link(u32 pc, s32* jumpptr) { - BASEBLOCKEX *targetblock = Get(pc); + BASEBLOCKEX* targetblock = Get(pc); if (targetblock && targetblock->startpc == pc) *jumpptr = (s32)(targetblock->fnptr - (sptr)(jumpptr + 1)); else *jumpptr = (s32)(recompiler - (sptr)(jumpptr + 1)); links.insert(std::pair(pc, (uptr)jumpptr)); } - diff --git a/pcsx2/x86/BaseblockEx.h b/pcsx2/x86/BaseblockEx.h index d61b78a84e..e0fc1704ba 100644 --- a/pcsx2/x86/BaseblockEx.h +++ b/pcsx2/x86/BaseblockEx.h @@ -15,7 +15,7 @@ #pragma once -#include // used by BaseBlockEx +#include // used by BaseBlockEx // Every potential jump point in the PS2's addressable memory has a BASEBLOCK // associated with it. So that means a BASEBLOCK for every 4 bytes of PS2 @@ -25,35 +25,36 @@ struct BASEBLOCK uptr m_pFnptr; __inline uptr GetFnptr() const { return m_pFnptr; } - void __inline SetFnptr( uptr ptr ) { m_pFnptr = ptr; } + void __inline SetFnptr(uptr ptr) { m_pFnptr = ptr; } }; // extra block info (only valid for start of fn) struct BASEBLOCKEX { - u32 startpc; + u32 startpc; uptr fnptr; - u16 size; // The size in dwords (equivalent to the number of instructions) - u16 x86size; // The size in byte of the translated x86 instructions + u16 size; // The size in dwords (equivalent to the number of instructions) + u16 x86size; // The size in byte of the translated x86 instructions #ifdef PCSX2_DEVBUILD // Could be useful to instrument the block //u32 visited; // number of times called //u64 ltime; // regs it assumes to have set already #endif - }; -class BaseBlockArray { +class BaseBlockArray +{ s32 _Reserved; s32 _Size; - BASEBLOCKEX *blocks; + BASEBLOCKEX* blocks; __fi void resize(s32 size) { pxAssert(size > 0); - BASEBLOCKEX *newMem = new BASEBLOCKEX[size]; - if(blocks) { + BASEBLOCKEX* newMem = new BASEBLOCKEX[size]; + if (blocks) + { memcpy(newMem, blocks, _Reserved * sizeof(BASEBLOCKEX)); delete[] blocks; } @@ -66,41 +67,46 @@ class BaseBlockArray { resize(size); _Reserved = size; } + public: ~BaseBlockArray() { - if(blocks) { + if (blocks) delete[] blocks; - } } - BaseBlockArray (s32 size) : _Reserved(0), - _Size(0), blocks(NULL) + BaseBlockArray(s32 size) + : _Reserved(0) + , _Size(0) + , blocks(NULL) { reserve(size); } - BASEBLOCKEX *insert(u32 startpc, uptr fnptr) + BASEBLOCKEX* insert(u32 startpc, uptr fnptr) { - if(_Size + 1 >= _Reserved) { + if (_Size + 1 >= _Reserved) + { reserve(_Reserved + 0x2000); // some games requires even more! } // Insert the the new BASEBLOCKEX by startpc order int imin = 0, imax = _Size, imid; - while (imin < imax) { - imid = (imin+imax)>>1; + while (imin < imax) + { + imid = (imin + imax) >> 1; if (blocks[imid].startpc > startpc) imax = imid; else imin = imid + 1; } - + pxAssert(imin == _Size || blocks[imin].startpc > startpc); - if(imin < _Size) { + if (imin < _Size) + { // make a hole for a new block. memmove(blocks + imin + 1, blocks + imin, (_Size - imin) * sizeof(BASEBLOCKEX)); } @@ -113,7 +119,7 @@ public: return &blocks[imin]; } - __fi BASEBLOCKEX &operator[](int idx) const + __fi BASEBLOCKEX& operator[](int idx) const { return *(blocks + idx); } @@ -132,7 +138,8 @@ public: { int range = last - first; - if(last < _Size) { + if (last < _Size) + { memmove(blocks + first, blocks + last, (_Size - last) * sizeof(BASEBLOCKEX)); } @@ -151,22 +158,22 @@ protected: BaseBlockArray blocks; public: - BaseBlocks() : - recompiler(0) - , blocks(0x4000) + BaseBlocks() + : recompiler(0) + , blocks(0x4000) { } - void SetJITCompile( void (*recompiler_)() ) + void SetJITCompile(void (*recompiler_)()) { recompiler = (uptr)recompiler_; } BASEBLOCKEX* New(u32 startpc, uptr fnptr); - int LastIndex (u32 startpc) const; + int LastIndex(u32 startpc) const; //BASEBLOCKEX* GetByX86(uptr ip); - __fi int Index (u32 startpc) const + __fi int Index(u32 startpc) const { int idx = LastIndex(startpc); @@ -194,7 +201,8 @@ public: { pxAssert(first <= last); int idx = first; - do{ + do + { pxAssert(idx <= last); //u32 startpc = blocks[idx].startpc; @@ -202,18 +210,17 @@ public: for (linkiter_t i = range.first; i != range.second; ++i) *(u32*)i->second = recompiler - (i->second + 4); - if( IsDevBuild ) + if (IsDevBuild) { // Clear the first instruction to 0xcc (breakpoint), as a way to assert if some // static jumps get left behind to this block. Note: Do not clear more than the // first byte, since this code is called during exception handlers and event handlers // both of which expect to be able to return to the recompiled code. - BASEBLOCKEX effu( blocks[idx] ); - memset( (void*)effu.fnptr, 0xcc, 1 ); + BASEBLOCKEX effu(blocks[idx]); + memset((void*)effu.fnptr, 0xcc, 1); } - } - while(idx++ < last); + } while (idx++ < last); // TODO: remove links from this block? blocks.erase(first, last + 1); @@ -228,7 +235,7 @@ public: } }; -#define PC_GETBLOCK_(x, reclut) ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x)*(sizeof(BASEBLOCK)/4))) +#define PC_GETBLOCK_(x, reclut) ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x) * (sizeof(BASEBLOCK) / 4))) /** * Add a page to the recompiler lookup table @@ -237,19 +244,19 @@ public: * Will associate `hwlut[pagebase + pageidx]` with `pageidx << 16` */ static void recLUT_SetPage(uptr reclut[0x10000], u32 hwlut[0x10000], - BASEBLOCK *mapbase, uint pagebase, uint pageidx, uint mappage) + BASEBLOCK* mapbase, uint pagebase, uint pageidx, uint mappage) { // this value is in 64k pages! uint page = pagebase + pageidx; - pxAssert( page < 0x10000 ); + pxAssert(page < 0x10000); reclut[page] = (uptr)&mapbase[((s32)mappage - (s32)page) << 14]; if (hwlut) hwlut[page] = 0u - (pagebase << 16); } #if defined(_M_X86_64) -static_assert( sizeof(BASEBLOCK) == 8, "BASEBLOCK is not 8 bytes" ); +static_assert(sizeof(BASEBLOCK) == 8, "BASEBLOCK is not 8 bytes"); #else -static_assert( sizeof(BASEBLOCK) == 4, "BASEBLOCK is not 4 bytes" ); +static_assert(sizeof(BASEBLOCK) == 4, "BASEBLOCK is not 4 bytes"); #endif diff --git a/pcsx2/x86/R5900_Profiler.h b/pcsx2/x86/R5900_Profiler.h index d9755ecbb3..e3f10dd63e 100644 --- a/pcsx2/x86/R5900_Profiler.h +++ b/pcsx2/x86/R5900_Profiler.h @@ -20,7 +20,8 @@ #define MOVZ MOVZtemp #define MOVN MOVNtemp -enum class eeOpcode { +enum class eeOpcode +{ // Core special , regimm , J , JAL , BEQ , BNE , BLEZ , BGTZ , ADDI , ADDIU , SLTI , SLTIU , ANDI , ORI , XORI , LUI , @@ -100,22 +101,22 @@ enum class eeOpcode { // ADD COP0 ?? // "COP1" - MFC1, /* , */ CFC1, /* , */ MTC1, /* , */ CTC1 , /* , */ + MFC1 , /*,*/ CFC1 , /*,*/ MTC1 , /*,*/ CTC1 , /*,*/ // "COP1 BC1" - BC1F, BC1T, BC1FL, BC1TL, /* , */ /* , */ /* , */ /* , */ + BC1F , BC1T , BC1FL , BC1TL , /*,*/ /*,*/ /*,*/ /*,*/ // "COP1 S" - ADD_F, SUB_F, MUL_F, DIV_F, SQRT_F, ABS_F, MOV_F, NEG_F, - /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ - /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ RSQRT_F, /* , */ - ADDA_F, SUBA_F, MULA_F, /* , */ MADD_F, MSUB_F, MADDA_F, MSUBA_F, - /* , */ /* , */ /* , */ /* , */ CVTW, /* , */ /* , */ /* , */ - MAX_F, MIN_F, /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ - CF_F, /* , */ CEQ_F, /* , */ CLT_F, /* , */ CLE_F, /* , */ + ADD_F , SUB_F , MUL_F , DIV_F , SQRT_F , ABS_F , MOV_F , NEG_F , + /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ + /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ RSQRT_F , /*,*/ + ADDA_F , SUBA_F , MULA_F , /*,*/ MADD_F , MSUB_F , MADDA_F , MSUBA_F , + /*,*/ /*,*/ /*,*/ /*,*/ CVTW , /*,*/ /*,*/ /*,*/ + MAX_F , MIN_F , /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ + CF_F, /*,*/ CEQ_F , /*,*/ CLT_F , /*,*/ CLE_F , /*,*/ // "COP1 W" - CVTS_F, /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ + CVTS_F, /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ LAST }; @@ -201,22 +202,22 @@ static const char eeOpcodeName[][16] = { /* , */ /* , */ "PEXCW" , /* , */ // "COP1" - "MFC1" , /* , */ "CFC1" , /* , */ "MTC1" , /* , */ "CTC1" , /* , */ + "MFC1" , /* , */ "CFC1" , /* , */ "MTC1" , /* , */ "CTC1" , /* , */ // "COP1 BC1" - "BC1F" , "BC1T" , "BC1FL" , "BC1TL" , /* , */ /* , */ /* , */ /* , */ + "BC1F" , "BC1T" , "BC1FL" , "BC1TL" , /* , */ /* , */ /* , */ /* , */ // "COP1 S" - "ADD_F" , "SUB_F" , "MUL_F" , "DIV_F" , "SQRT_F" , "ABS_F" , "MOV_F" , "NEG_F" , - /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ - /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ "RSQRT_F" , /* , */ - "ADDA_F" , "SUBA_F" , "MULA_F" , /* , */ "MADD_F" , "MSUB_F" , "MADDA_F" , "MSUBA_F" , - /* , */ /* , */ /* , */ /* , */ "CVTW" , /* , */ /* , */ /* , */ - "MAX_F" , "MIN_F" , /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ - "C.F" , /* , */ "C.EQ" , /* , */ "C.LT" , /* , */ "C.LE" , /* , */ + "ADD_F" , "SUB_F" , "MUL_F" , "DIV_F" , "SQRT_F" , "ABS_F" , "MOV_F" , "NEG_F" , + /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ + /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ "RSQRT_F" , /* , */ + "ADDA_F" , "SUBA_F" , "MULA_F" , /* , */ "MADD_F" , "MSUB_F" , "MADDA_F" , "MSUBA_F" , + /* , */ /* , */ /* , */ /* , */ "CVTW" , /* , */ /* , */ /* , */ + "MAX_F" , "MIN_F" , /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ + "C.F" , /* , */ "C.EQ" , /* , */ "C.LT" , /* , */ "C.LE" , /* , */ // "COP1 W" - "CVTS_F" , /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ + "CVTS_F" , /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ "!" }; @@ -229,7 +230,8 @@ static const char eeOpcodeName[][16] = { using namespace x86Emitter; -struct eeProfiler { +struct eeProfiler +{ static const u32 memSpace = 1 << 19; u64 opStats[static_cast(eeOpcode::LAST)]; @@ -239,7 +241,8 @@ struct eeProfiler { u64 memStatsFast; u32 memMask; - void Reset() { + void Reset() + { memzero(opStats); memzero(memStats); memzero(memStatsConst); @@ -249,34 +252,39 @@ struct eeProfiler { pxAssert(eeOpcodeName[static_cast(eeOpcode::LAST)][0] == '!'); } - void EmitOp(eeOpcode opcode) { + void EmitOp(eeOpcode opcode) + { int op = static_cast(opcode); - xADD(ptr32[&(((u32*)opStats)[op*2+0])], 1); - xADC(ptr32[&(((u32*)opStats)[op*2+1])], 0); + xADD(ptr32[&(((u32*)opStats)[op * 2 + 0])], 1); + xADC(ptr32[&(((u32*)opStats)[op * 2 + 1])], 0); } - double per(u64 part, u64 total) { - return (double) part / (double) total * 100.0; + double per(u64 part, u64 total) + { + return (double)part / (double)total * 100.0; } - void Print() { + void Print() + { // Compute opcode stat u64 total = 0; - std::vector< std::pair > v; - std::vector< std::pair > vc; - for(int i = 0; i < static_cast(eeOpcode::LAST); i++) { + std::vector> v; + std::vector> vc; + for (int i = 0; i < static_cast(eeOpcode::LAST); i++) + { total += opStats[i]; v.push_back(std::make_pair(opStats[i], i)); } - std::sort (v.begin(), v.end()); + std::sort(v.begin(), v.end()); std::reverse(v.begin(), v.end()); DevCon.WriteLn("EE Profiler:"); - for(u32 i = 0; i < v.size(); i++) { - u64 count = v[i].first; - double stat = (double)count / (double)total * 100.0; + for (u32 i = 0; i < v.size(); i++) + { + u64 count = v[i].first; + double stat = (double)count / (double)total * 100.0; DevCon.WriteLn("%-8s - [%3.4f%%][count=%u]", - eeOpcodeName[v[i].second], stat, (u32)count); + eeOpcodeName[v[i].second], stat, (u32)count); if (stat < 0.01) break; } @@ -288,10 +296,10 @@ struct eeProfiler { u64 gs = 0; u64 vu = 0; // FIXME: MAYBE count the scratch pad - for (size_t i = 0; i < memSpace ; i++) + for (size_t i = 0; i < memSpace; i++) total += memStats[i]; - int ou = 32 * _1kb; // user segment (0x10000000) + int ou = 32 * _1kb; // user segment (0x10000000) int ok = 352 * _1kb; // kernel segment (0xB0000000) for (int i = 0; i < 4 * _1kb; i++) reg += memStats[ou + 0 * _1kb + i] + memStats[ok + 0 * _1kb + i]; for (int i = 0; i < 4 * _1kb; i++) gs += memStats[ou + 4 * _1kb + i] + memStats[ok + 4 * _1kb + i]; @@ -301,35 +309,37 @@ struct eeProfiler { u64 ram = total - reg - gs - vu; double ram_p = per(ram, total); double reg_p = per(reg, total); - double gs_p = per(gs , total); - double vu_p = per(vu , total); + double gs_p = per(gs, total); + double vu_p = per(vu, total); // Compute const memory stat u64 total_const = 0; u64 reg_const = 0; - for (size_t i = 0; i < memSpace ; i++) + for (size_t i = 0; i < memSpace; i++) total_const += memStatsConst[i]; - for (int i = 0; i < 4 * _1kb; i++) reg_const += memStatsConst[ou + i] + memStatsConst[ok + i]; + for (int i = 0; i < 4 * _1kb; i++) + reg_const += memStatsConst[ou + i] + memStatsConst[ok + i]; u64 ram_const = total_const - reg_const; // value is slightly wrong but good enough double ram_const_p = per(ram_const, ram); double reg_const_p = per(reg_const, reg); DevCon.WriteLn("\nEE Memory Profiler:"); - DevCon.WriteLn("Total = 0x%08x_%08x", (u32)(u64)(total>>32),(u32)total); - DevCon.WriteLn(" RAM = 0x%08x_%08x [%3.4f%%] Const[%3.4f%%]", (u32)(u64)(ram>>32),(u32)ram, ram_p, ram_const_p); - DevCon.WriteLn(" REG = 0x%08x_%08x [%3.4f%%] Const[%3.4f%%]", (u32)(u64)(reg>>32),(u32)reg, reg_p, reg_const_p); - DevCon.WriteLn(" GS = 0x%08x_%08x [%3.4f%%]", (u32)(u64)( gs>>32),(u32) gs, gs_p); - DevCon.WriteLn(" VU = 0x%08x_%08x [%3.4f%%]", (u32)(u64) (vu>>32),(u32) vu, vu_p); + DevCon.WriteLn("Total = 0x%08x_%08x", (u32)(u64)(total >> 32), (u32)total); + DevCon.WriteLn(" RAM = 0x%08x_%08x [%3.4f%%] Const[%3.4f%%]", (u32)(u64)(ram >> 32), (u32)ram, ram_p, ram_const_p); + DevCon.WriteLn(" REG = 0x%08x_%08x [%3.4f%%] Const[%3.4f%%]", (u32)(u64)(reg >> 32), (u32)reg, reg_p, reg_const_p); + DevCon.WriteLn(" GS = 0x%08x_%08x [%3.4f%%]", (u32)(u64)(gs >> 32), (u32)gs, gs_p); + DevCon.WriteLn(" VU = 0x%08x_%08x [%3.4f%%]", (u32)(u64)(vu >> 32), (u32)vu, vu_p); u64 total_ram = memStatsSlow + memStatsFast; DevCon.WriteLn("\n RAM Fast [%3.4f%%] RAM Slow [%3.4f%%]. Total 0x%08x_%08x [%3.4f%%]", - per(memStatsFast, total_ram), per(memStatsSlow, total_ram), (u32)(u64)(total_ram>>32),(u32)total_ram, per(total_ram, total)); + per(memStatsFast, total_ram), per(memStatsSlow, total_ram), (u32)(u64)(total_ram >> 32), (u32)total_ram, per(total_ram, total)); v.clear(); vc.clear(); - for (int i = 0; i < 4 * _1kb; i++) { + for (int i = 0; i < 4 * _1kb; i++) + { u32 reg_c = memStatsConst[ou + i] + memStatsConst[ok + i]; u32 reg = memStats[ok + i] + memStats[ou + i] - reg_c; if (reg) @@ -337,63 +347,71 @@ struct eeProfiler { if (reg_c) vc.push_back(std::make_pair(reg_c, i * 16)); } - std::sort (v.begin(), v.end()); + std::sort(v.begin(), v.end()); std::reverse(v.begin(), v.end()); - std::sort (vc.begin(), vc.end()); + std::sort(vc.begin(), vc.end()); std::reverse(vc.begin(), vc.end()); DevCon.WriteLn("\nEE Reg Profiler:"); - for(u32 i = 0; i < v.size(); i++) { + for (u32 i = 0; i < v.size(); i++) + { u64 count = v[i].first; double stat = (double)count / (double)(reg - reg_const) * 100.0; DevCon.WriteLn("%04x - [%3.4f%%][count=%u]", - v[i].second, stat, (u32)count); + v[i].second, stat, (u32)count); if (stat < 0.01) break; } DevCon.WriteLn("\nEE Const Reg Profiler:"); - for(u32 i = 0; i < vc.size(); i++) { + for (u32 i = 0; i < vc.size(); i++) + { u64 count = vc[i].first; double stat = (double)count / (double)reg_const * 100.0; DevCon.WriteLn("%04x - [%3.4f%%][count=%u]", - vc[i].second, stat, (u32)count); + vc[i].second, stat, (u32)count); if (stat < 0.01) break; } - } // Warning dirty ebx - void EmitMem() { + void EmitMem() + { // Compact the 4GB virtual address to a 512KB virtual address - if (x86caps.hasBMI2) { + if (x86caps.hasBMI2) + { xPEXT(ebx, ecx, ptr[&memMask]); - xADD(ptr32[(rbx*4) + memStats], 1); + xADD(ptr32[(rbx * 4) + memStats], 1); } } - void EmitConstMem(u32 add) { - if (x86caps.hasBMI2) { + void EmitConstMem(u32 add) + { + if (x86caps.hasBMI2) + { u32 a = _pext_u32(add, memMask); xADD(ptr32[a + memStats], 1); xADD(ptr32[a + memStatsConst], 1); } } - void EmitSlowMem() { + void EmitSlowMem() + { xADD(ptr32[(u32*)&memStatsSlow], 1); xADC(ptr32[(u32*)&memStatsSlow + 1], 0); } - void EmitFastMem() { + void EmitFastMem() + { xADD(ptr32[(u32*)&memStatsFast], 1); xADC(ptr32[(u32*)&memStatsFast + 1], 0); } }; #else -struct eeProfiler { +struct eeProfiler +{ __fi void Reset() {} __fi void EmitOp(eeOpcode op) {} __fi void Print() {} @@ -404,6 +422,7 @@ struct eeProfiler { }; #endif -namespace EE { +namespace EE +{ extern eeProfiler Profiler; } diff --git a/pcsx2/x86/iCOP0.cpp b/pcsx2/x86/iCOP0.cpp index 6c0faaac66..913eb5ce3d 100644 --- a/pcsx2/x86/iCOP0.cpp +++ b/pcsx2/x86/iCOP0.cpp @@ -54,10 +54,10 @@ static void _setupBranchTest() // But using 32-bit loads here is ok (and faster), because we mask off // everything except the lower 10 bits away. - xMOV(eax, ptr[(&psHu32(DMAC_PCR) )]); - xMOV(ecx, 0x3ff ); // ECX is our 10-bit mask var + xMOV(eax, ptr[(&psHu32(DMAC_PCR))]); + xMOV(ecx, 0x3ff); // ECX is our 10-bit mask var xNOT(eax); - xOR(eax, ptr[(&psHu32(DMAC_STAT) )]); + xOR(eax, ptr[(&psHu32(DMAC_STAT))]); xAND(eax, ecx); xCMP(eax, ecx); } @@ -93,14 +93,14 @@ void recTLBWR() { recCall(Interp::TLBWR); } void recERET() { - recBranchCall( Interp::ERET ); + recBranchCall(Interp::ERET); } void recEI() { // must branch after enabling interrupts, so that anything // pending gets triggered properly. - recBranchCall( Interp::EI ); + recBranchCall(Interp::EI); } void recDI() @@ -117,7 +117,7 @@ void recDI() // Fixes booting issues in the following games: // Jak X, Namco 50th anniversary, Spongebob the Movie, Spongebob Battle for Bikini Bottom, // The Incredibles, The Incredibles rize of the underminer, Soukou kihei armodyne, Garfield Saving Arlene, Tales of Fandom Vol. 2. - if(!g_recompilingDelaySlot) + if (!g_recompilingDelaySlot) recompileNextInstruction(0); // DI execution is delayed by one instruction xMOV(eax, ptr[&cpuRegs.CP0.n.Status]); @@ -134,27 +134,28 @@ void recDI() #ifndef CP0_RECOMPILE -REC_SYS( MFC0 ); -REC_SYS( MTC0 ); +REC_SYS(MFC0); +REC_SYS(MTC0); #else void recMFC0() { - if( _Rd_ == 9 ) + if (_Rd_ == 9) { // This case needs to be handled even if the write-back is ignored (_Rt_ == 0 ) - xMOV(ecx, ptr[&cpuRegs.cycle]); - xMOV(eax, ecx); + xMOV(ecx, ptr[&cpuRegs.cycle]); + xMOV(eax, ecx); xSUB(eax, ptr[&s_iLastCOP0Cycle]); - u8* skipInc = JNZ8( 0 ); + u8* skipInc = JNZ8(0); xINC(eax); - x86SetJ8( skipInc ); - xADD(ptr[&cpuRegs.CP0.n.Count], eax); + x86SetJ8(skipInc); + xADD(ptr[&cpuRegs.CP0.n.Count], eax); xMOV(ptr[&s_iLastCOP0Cycle], ecx); - xMOV(eax, ptr[&cpuRegs.CP0.r[ _Rd_ ] ]); + xMOV(eax, ptr[&cpuRegs.CP0.r[_Rd_]]); - if( !_Rt_ ) return; + if (!_Rt_) + return; _deleteEEreg(_Rt_, 0); xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); @@ -164,9 +165,10 @@ void recMFC0() return; } - if ( !_Rt_ ) return; + if (!_Rt_) + return; - if( _Rd_ == 25 ) + if (_Rd_ == 25) { if (0 == (_Imm_ & 1)) // MFPS, register value ignored { @@ -192,13 +194,14 @@ void recMFC0() return; } - else if(_Rd_ == 24){ + else if (_Rd_ == 24) + { COP0_LOG("MFC0 Breakpoint debug Registers code = %x\n", cpuRegs.code & 0x3FF); - return; + return; } _eeOnWriteReg(_Rt_, 1); _deleteEEreg(_Rt_, 0); - xMOV(eax, ptr[&cpuRegs.CP0.r[ _Rd_ ]]); + xMOV(eax, ptr[&cpuRegs.CP0.r[_Rd_]]); xCDQ(); xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); @@ -206,25 +209,25 @@ void recMFC0() void recMTC0() { - if( GPR_IS_CONST1(_Rt_) ) + if (GPR_IS_CONST1(_Rt_)) { switch (_Rd_) { case 12: iFlushCall(FLUSH_INTERPRETER); xFastCall((void*)WriteCP0Status, g_cpuConstRegs[_Rt_].UL[0]); - break; + break; case 16: iFlushCall(FLUSH_INTERPRETER); xFastCall((void*)WriteCP0Config, g_cpuConstRegs[_Rt_].UL[0]); - break; + break; case 9: xMOV(ecx, ptr[&cpuRegs.cycle]); xMOV(ptr[&s_iLastCOP0Cycle], ecx); xMOV(ptr32[&cpuRegs.CP0.r[9]], g_cpuConstRegs[_Rt_].UL[0]); - break; + break; case 25: if (0 == (_Imm_ & 1)) // MTPS @@ -249,15 +252,15 @@ void recMTC0() xMOV(ptr32[&cpuRegs.PERF.n.pcr1], g_cpuConstRegs[_Rt_].UL[0]); xMOV(ptr[&s_iLastPERFCycle[1]], eax); } - break; + break; case 24: COP0_LOG("MTC0 Breakpoint debug Registers code = %x\n", cpuRegs.code & 0x3FF); - break; + break; default: xMOV(ptr32[&cpuRegs.CP0.r[_Rd_]], g_cpuConstRegs[_Rt_].UL[0]); - break; + break; } } else @@ -267,20 +270,20 @@ void recMTC0() case 12: iFlushCall(FLUSH_INTERPRETER); _eeMoveGPRtoR(ecx, _Rt_); - xFastCall((void*)WriteCP0Status, ecx ); - break; + xFastCall((void*)WriteCP0Status, ecx); + break; case 16: iFlushCall(FLUSH_INTERPRETER); _eeMoveGPRtoR(ecx, _Rt_); xFastCall((void*)WriteCP0Config, ecx); - break; + break; case 9: xMOV(ecx, ptr[&cpuRegs.cycle]); _eeMoveGPRtoM((uptr)&cpuRegs.CP0.r[9], _Rt_); xMOV(ptr[&s_iLastCOP0Cycle], ecx); - break; + break; case 25: if (0 == (_Imm_ & 1)) // MTPS @@ -304,15 +307,15 @@ void recMTC0() _eeMoveGPRtoM((uptr)&cpuRegs.PERF.n.pcr1, _Rt_); xMOV(ptr[&s_iLastPERFCycle[1]], ecx); } - break; + break; case 24: COP0_LOG("MTC0 Breakpoint debug Registers code = %x\n", cpuRegs.code & 0x3FF); - break; + break; default: _eeMoveGPRtoM((uptr)&cpuRegs.CP0.r[_Rd_], _Rt_); - break; + break; } } } @@ -346,4 +349,7 @@ void rec(TLBWR) { void rec(TLBP) { }*/ -}}}} +} // namespace COP0 +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 diff --git a/pcsx2/x86/iCOP0.h b/pcsx2/x86/iCOP0.h index e08e59df6e..97c10442f5 100644 --- a/pcsx2/x86/iCOP0.h +++ b/pcsx2/x86/iCOP0.h @@ -26,8 +26,8 @@ namespace R5900 { namespace Dynarec { namespace OpcodeImpl { -namespace COP0 -{ +namespace COP0 { + void recMFC0(); void recMTC0(); void recBC0F(); @@ -42,5 +42,8 @@ namespace COP0 void recDI(); void recEI(); -}}}} +} // namespace COP0 +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 #endif diff --git a/pcsx2/x86/iCore.cpp b/pcsx2/x86/iCore.cpp index 94a0ec3921..e7e9a26b85 100644 --- a/pcsx2/x86/iCore.cpp +++ b/pcsx2/x86/iCore.cpp @@ -24,8 +24,8 @@ using namespace x86Emitter; -__tls_emit u8 *j8Ptr[32]; -__tls_emit u32 *j32Ptr[32]; +__tls_emit u8* j8Ptr[32]; +__tls_emit u32* j32Ptr[32]; u16 g_x86AllocCounter = 0; u16 g_xmmAllocCounter = 0; @@ -42,23 +42,25 @@ _xmmregs xmmregs[iREGCNT_XMM], s_saveXMMregs[iREGCNT_XMM]; _x86regs x86regs[iREGCNT_GPR], s_saveX86regs[iREGCNT_GPR]; // XMM Caching -#define VU_VFx_ADDR(x) (uptr)&VU->VF[x].UL[0] -#define VU_ACCx_ADDR (uptr)&VU->ACC.UL[0] +#define VU_VFx_ADDR(x) (uptr)&VU->VF[x].UL[0] +#define VU_ACCx_ADDR (uptr)&VU->ACC.UL[0] static int s_xmmchecknext = 0; // Clear current register mapping structure // Clear allocation counter -void _initXMMregs() { - memzero( xmmregs ); +void _initXMMregs() +{ + memzero(xmmregs); g_xmmAllocCounter = 0; s_xmmchecknext = 0; } // Get a pointer to the physical register (GPR / FPU / VU etc..) -__fi void* _XMMGetAddr(int type, int reg, VURegs *VU) +__fi void* _XMMGetAddr(int type, int reg, VURegs* VU) { - switch (type) { + switch (type) + { case XMMTYPE_VFREG: return (void*)VU_VFx_ADDR(reg); @@ -66,8 +68,8 @@ __fi void* _XMMGetAddr(int type, int reg, VURegs *VU) return (void*)VU_ACCx_ADDR; case XMMTYPE_GPRREG: - if( reg < 32 ) - pxAssert( !(g_cpuHasConstReg & (1<regs[xmmregs[i].reg] & EEINST_XMM) ) { + for (i = 0; (uint)i < iREGCNT_XMM; i++) + { + if (xmmregs[i].needed) + continue; + if (xmmregs[i].type == XMMTYPE_GPRREG) + { + if (!(g_pCurInstInfo->regs[xmmregs[i].reg] & EEINST_XMM)) + { _freeXMMreg(i); return i; } @@ -127,11 +139,15 @@ int _getFreeXMMreg() tempi = -1; bestcount = 0xffff; - for (i=0; (uint)iregs[xmmregs[i].reg]&EEINST_USED) ) { + for (i = 0; (uint)i < iREGCNT_XMM; i++) + { + if (xmmregs[i].needed) + continue; + if (xmmregs[i].type == XMMTYPE_GPRREG) + { + if (!(g_pCurInstInfo->regs[xmmregs[i].reg] & EEINST_USED)) + { return 1; } } @@ -670,11 +752,13 @@ void _flushXMMregs() { int i; - for (i=0; (uint)iregs[gprreg] & EEINST_XMM ) return _allocGPRtoXMMreg(-1, gprreg, mode); + if (pinst->regs[gprreg] & EEINST_XMM) + return _allocGPRtoXMMreg(-1, gprreg, mode); return _checkXMMreg(XMMTYPE_GPRREG, gprreg, mode); } @@ -752,33 +845,36 @@ int _allocCheckGPRtoXMM(EEINST* pinst, int gprreg, int mode) // But it is quite obscure !!! int _allocCheckFPUtoXMM(EEINST* pinst, int fpureg, int mode) { - if( pinst->fpuregs[fpureg] & EEINST_XMM ) return _allocFPtoXMMreg(-1, fpureg, mode); + if (pinst->fpuregs[fpureg] & EEINST_XMM) + return _allocFPtoXMMreg(-1, fpureg, mode); return _checkXMMreg(XMMTYPE_FPREG, fpureg, mode); } int _allocCheckGPRtoX86(EEINST* pinst, int gprreg, int mode) { - if( pinst->regs[gprreg] & EEINST_USED ) - return _allocX86reg(xEmptyReg, X86TYPE_GPR, gprreg, mode); + if (pinst->regs[gprreg] & EEINST_USED) + return _allocX86reg(xEmptyReg, X86TYPE_GPR, gprreg, mode); return _checkX86reg(X86TYPE_GPR, gprreg, mode); } void _recClearInst(EEINST* pinst) { - memzero( *pinst ); - memset8( pinst->regs ); - memset8( pinst->fpuregs ); + memzero(*pinst); + memset8(pinst->regs); + memset8(pinst->fpuregs); } // returns nonzero value if reg has been written between [startpc, endpc-4] u32 _recIsRegWritten(EEINST* pinst, int size, u8 xmmtype, u8 reg) { - u32 i, inst = 1; + u32 i, inst = 1; - while(size-- > 0) { - for(i = 0; i < ArraySize(pinst->writeType); ++i) { + while (size-- > 0) + { + for (i = 0; i < ArraySize(pinst->writeType); ++i) + { if ((pinst->writeType[i] == xmmtype) && (pinst->writeReg[i] == reg)) return inst; } @@ -792,24 +888,30 @@ u32 _recIsRegWritten(EEINST* pinst, int size, u8 xmmtype, u8 reg) void _recFillRegister(EEINST& pinst, int type, int reg, int write) { u32 i = 0; - if (write ) { - for(i = 0; i < ArraySize(pinst.writeType); ++i) { - if( pinst.writeType[i] == XMMTYPE_TEMP ) { + if (write) + { + for (i = 0; i < ArraySize(pinst.writeType); ++i) + { + if (pinst.writeType[i] == XMMTYPE_TEMP) + { pinst.writeType[i] = type; pinst.writeReg[i] = reg; return; } } - pxAssume( false ); + pxAssume(false); } - else { - for(i = 0; i < ArraySize(pinst.readType); ++i) { - if( pinst.readType[i] == XMMTYPE_TEMP ) { + else + { + for (i = 0; i < ArraySize(pinst.readType); ++i) + { + if (pinst.readType[i] == XMMTYPE_TEMP) + { pinst.readType[i] = type; pinst.readReg[i] = reg; return; } } - pxAssume( false ); + pxAssume(false); } } diff --git a/pcsx2/x86/iCore.h b/pcsx2/x86/iCore.h index c5e9c7c2da..ad80294a7f 100644 --- a/pcsx2/x86/iCore.h +++ b/pcsx2/x86/iCore.h @@ -25,51 +25,51 @@ //////////////////////////////////////////////////////////////////////////////// // Shared Register allocation flags (apply to X86, XMM, MMX, etc). -#define MODE_READ 1 -#define MODE_WRITE 2 -#define MODE_READHALF 4 // read only low 64 bits -#define MODE_VUXY 0x8 // vector only has xy valid (real zw are in mem), not the same as MODE_READHALF -#define MODE_VUZ 0x10 // z only doesn't work for now -#define MODE_VUXYZ (MODE_VUZ|MODE_VUXY) // vector only has xyz valid (real w is in memory) -#define MODE_NOFLUSH 0x20 // can't flush reg to mem -#define MODE_NOFRAME 0x40 // when allocating x86regs, don't use ebp reg -#define MODE_8BITREG 0x80 // when allocating x86regs, use only eax, ecx, edx, and ebx +#define MODE_READ 1 +#define MODE_WRITE 2 +#define MODE_READHALF 4 // read only low 64 bits +#define MODE_VUXY 8 // vector only has xy valid (real zw are in mem), not the same as MODE_READHALF +#define MODE_VUZ 0x10 // z only doesn't work for now +#define MODE_VUXYZ (MODE_VUZ | MODE_VUXY) // vector only has xyz valid (real w is in memory) +#define MODE_NOFLUSH 0x20 // can't flush reg to mem +#define MODE_NOFRAME 0x40 // when allocating x86regs, don't use ebp reg +#define MODE_8BITREG 0x80 // when allocating x86regs, use only eax, ecx, edx, and ebx #define PROCESS_EE_XMM 0x02 // currently only used in FPU -#define PROCESS_EE_S 0x04 // S is valid, otherwise take from mem -#define PROCESS_EE_T 0x08 // T is valid, otherwise take from mem +#define PROCESS_EE_S 0x04 // S is valid, otherwise take from mem +#define PROCESS_EE_T 0x08 // T is valid, otherwise take from mem // not used in VU recs #define PROCESS_EE_MODEWRITES 0x10 // if s is a reg, set if not in cpuRegs #define PROCESS_EE_MODEWRITET 0x20 // if t is a reg, set if not in cpuRegs -#define PROCESS_EE_LO 0x40 // lo reg is valid -#define PROCESS_EE_HI 0x80 // hi reg is valid -#define PROCESS_EE_ACC 0x40 // acc reg is valid +#define PROCESS_EE_LO 0x40 // lo reg is valid +#define PROCESS_EE_HI 0x80 // hi reg is valid +#define PROCESS_EE_ACC 0x40 // acc reg is valid // used in VU recs #define PROCESS_VU_UPDATEFLAGS 0x10 -#define PROCESS_VU_COP2 0x80 // simple cop2 +#define PROCESS_VU_COP2 0x80 // simple cop2 -#define EEREC_S (((info)>>8)&0xf) -#define EEREC_T (((info)>>12)&0xf) -#define EEREC_D (((info)>>16)&0xf) -#define EEREC_LO (((info)>>20)&0xf) -#define EEREC_HI (((info)>>24)&0xf) -#define EEREC_ACC (((info)>>20)&0xf) -#define EEREC_TEMP (((info)>>24)&0xf) +#define EEREC_S (((info) >> 8) & 0xf) +#define EEREC_T (((info) >> 12) & 0xf) +#define EEREC_D (((info) >> 16) & 0xf) +#define EEREC_LO (((info) >> 20) & 0xf) +#define EEREC_HI (((info) >> 24) & 0xf) +#define EEREC_ACC (((info) >> 20) & 0xf) +#define EEREC_TEMP (((info) >> 24) & 0xf) #define VUREC_FMAC ((info)&0x80000000) -#define PROCESS_EE_SET_S(reg) ((reg)<<8) -#define PROCESS_EE_SET_T(reg) ((reg)<<12) -#define PROCESS_EE_SET_D(reg) ((reg)<<16) -#define PROCESS_EE_SET_LO(reg) ((reg)<<20) -#define PROCESS_EE_SET_HI(reg) ((reg)<<24) -#define PROCESS_EE_SET_ACC(reg) ((reg)<<20) +#define PROCESS_EE_SET_S(reg) ((reg) << 8) +#define PROCESS_EE_SET_T(reg) ((reg) << 12) +#define PROCESS_EE_SET_D(reg) ((reg) << 16) +#define PROCESS_EE_SET_LO(reg) ((reg) << 20) +#define PROCESS_EE_SET_HI(reg) ((reg) << 24) +#define PROCESS_EE_SET_ACC(reg) ((reg) << 20) #define PROCESS_VU_SET_ACC(reg) PROCESS_EE_SET_ACC(reg) -#define PROCESS_VU_SET_TEMP(reg) ((reg)<<24) +#define PROCESS_VU_SET_TEMP(reg) ((reg) << 24) #define PROCESS_VU_SET_FMAC() 0x80000000 @@ -91,19 +91,20 @@ #define X86TYPE_VUPWRITE 8 #define X86TYPE_PSX 9 #define X86TYPE_PCWRITEBACK 10 -#define X86TYPE_VUJUMP 12 // jump from random mem (g_recWriteback) +#define X86TYPE_VUJUMP 12 // jump from random mem (g_recWriteback) #define X86TYPE_VITEMP 13 -#define X86TYPE_FNARG 14 // function parameter, max is 4 +#define X86TYPE_FNARG 14 // function parameter, max is 4 #define X86TYPE_VU1 0x80 //#define X86_ISVI(type) ((type&~X86TYPE_VU1) == X86TYPE_VI) static __fi int X86_ISVI(int type) { - return ((type&~X86TYPE_VU1) == X86TYPE_VI); + return ((type & ~X86TYPE_VU1) == X86TYPE_VI); } -struct _x86regs { +struct _x86regs +{ u8 inuse; u8 reg; // value of 0 - not used u8 mode; @@ -117,8 +118,8 @@ extern _x86regs x86regs[iREGCNT_GPR], s_saveX86regs[iREGCNT_GPR]; uptr _x86GetAddr(int type, int reg); void _initX86regs(); -int _getFreeX86reg(int mode); -int _allocX86reg(x86Emitter::xRegister32 x86reg, int type, int reg, int mode); +int _getFreeX86reg(int mode); +int _allocX86reg(x86Emitter::xRegister32 x86reg, int type, int reg, int mode); void _deleteX86reg(int type, int reg, int flush); int _checkX86reg(int type, int reg, int mode); void _addNeededX86reg(int type, int reg); @@ -133,21 +134,22 @@ void _flushConstReg(int reg); //////////////////////////////////////////////////////////////////////////////// // XMM (128-bit) Register Allocation Tools -#define XMM_CONV_VU(VU) (VU==&VU1) +#define XMM_CONV_VU(VU) (VU == &VU1) -#define XMMTYPE_TEMP 0 // has to be 0 -#define XMMTYPE_VFREG 1 -#define XMMTYPE_ACC 2 -#define XMMTYPE_FPREG 3 -#define XMMTYPE_FPACC 4 -#define XMMTYPE_GPRREG 5 +#define XMMTYPE_TEMP 0 // has to be 0 +#define XMMTYPE_VFREG 1 +#define XMMTYPE_ACC 2 +#define XMMTYPE_FPREG 3 +#define XMMTYPE_FPACC 4 +#define XMMTYPE_GPRREG 5 // lo and hi regs -#define XMMGPR_LO 33 -#define XMMGPR_HI 32 -#define XMMFPU_ACC 32 +#define XMMGPR_LO 33 +#define XMMGPR_HI 32 +#define XMMFPU_ACC 32 -struct _xmmregs { +struct _xmmregs +{ u8 inuse; u8 reg; u8 type; @@ -158,12 +160,12 @@ struct _xmmregs { }; void _initXMMregs(); -int _getFreeXMMreg(); -int _allocTempXMMreg(XMMSSEType type, int xmmreg); -int _allocFPtoXMMreg(int xmmreg, int fpreg, int mode); -int _allocGPRtoXMMreg(int xmmreg, int gprreg, int mode); -int _allocFPACCtoXMMreg(int xmmreg, int mode); -int _checkXMMreg(int type, int reg, int mode); +int _getFreeXMMreg(); +int _allocTempXMMreg(XMMSSEType type, int xmmreg); +int _allocFPtoXMMreg(int xmmreg, int fpreg, int mode); +int _allocGPRtoXMMreg(int xmmreg, int gprreg, int mode); +int _allocFPACCtoXMMreg(int xmmreg, int mode); +int _checkXMMreg(int type, int reg, int mode); void _addNeededFPtoXMMreg(int fpreg); void _addNeededFPACCtoXMMreg(); void _addNeededGPRtoXMMreg(int gprreg); @@ -199,15 +201,15 @@ int _signExtendXMMtoM(uptr to, x86SSERegType from, int candestroy); // returns t // 3/ EEINST_LIVE* is cleared when register is written. And set again when register is read. // My guess: the purpose is to detect the usage hole in the flow -#define EEINST_LIVE0 1 // if var is ever used (read or write) -#define EEINST_LIVE2 4 // if cur var's next 64 bits are needed -#define EEINST_LASTUSE 8 // if var isn't written/read anymore -//#define EEINST_MMX 0x10 // removed -#define EEINST_XMM 0x20 // var will be used in xmm ops -#define EEINST_USED 0x40 +#define EEINST_LIVE0 1 // if var is ever used (read or write) +#define EEINST_LIVE2 4 // if cur var's next 64 bits are needed +#define EEINST_LASTUSE 8 // if var isn't written/read anymore +//#define EEINST_MMX 0x10 // removed +#define EEINST_XMM 0x20 // var will be used in xmm ops +#define EEINST_USED 0x40 -#define EEINSTINFO_COP1 1 -#define EEINSTINFO_COP2 2 +#define EEINSTINFO_COP1 1 +#define EEINSTINFO_COP2 2 struct EEINST { @@ -233,19 +235,19 @@ extern u32 _recIsRegWritten(EEINST* pinst, int size, u8 xmmtype, u8 reg); //extern u32 _recIsRegUsed(EEINST* pinst, int size, u8 xmmtype, u8 reg); extern void _recFillRegister(EEINST& pinst, int type, int reg, int write); -static __fi bool EEINST_ISLIVE64(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & (EEINST_LIVE0)); } -static __fi bool EEINST_ISLIVEXMM(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & (EEINST_LIVE0|EEINST_LIVE2)); } -static __fi bool EEINST_ISLIVE2(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & EEINST_LIVE2); } +static __fi bool EEINST_ISLIVE64(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & (EEINST_LIVE0)); } +static __fi bool EEINST_ISLIVEXMM(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & (EEINST_LIVE0 | EEINST_LIVE2)); } +static __fi bool EEINST_ISLIVE2(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & EEINST_LIVE2); } -static __fi bool FPUINST_ISLIVE(u32 reg) { return !!(g_pCurInstInfo->fpuregs[reg] & EEINST_LIVE0); } -static __fi bool FPUINST_LASTUSE(u32 reg) { return !!(g_pCurInstInfo->fpuregs[reg] & EEINST_LASTUSE); } +static __fi bool FPUINST_ISLIVE(u32 reg) { return !!(g_pCurInstInfo->fpuregs[reg] & EEINST_LIVE0); } +static __fi bool FPUINST_LASTUSE(u32 reg) { return !!(g_pCurInstInfo->fpuregs[reg] & EEINST_LASTUSE); } extern u32 g_recWriteback; // used for jumps (VUrec mess!) extern _xmmregs xmmregs[iREGCNT_XMM], s_saveXMMregs[iREGCNT_XMM]; -extern __tls_emit u8 *j8Ptr[32]; // depreciated item. use local u8* vars instead. -extern __tls_emit u32 *j32Ptr[32]; // depreciated item. use local u32* vars instead. +extern __tls_emit u8* j8Ptr[32]; // depreciated item. use local u8* vars instead. +extern __tls_emit u32* j32Ptr[32]; // depreciated item. use local u32* vars instead. extern u16 g_x86AllocCounter; extern u16 g_xmmAllocCounter; @@ -272,25 +274,25 @@ int _allocCheckGPRtoX86(EEINST* pinst, int gprreg, int mode); // the code being called is going to modify register allocations -- ie, be doing // some kind of recompiling of its own. -#define FLUSH_CACHED_REGS 0x001 -#define FLUSH_FLUSH_XMM 0x002 -#define FLUSH_FREE_XMM 0x004 // both flushes and frees -#define FLUSH_FLUSH_ALLX86 0x020 // flush x86 -#define FLUSH_FREE_TEMPX86 0x040 // flush and free temporary x86 regs -#define FLUSH_FREE_ALLX86 0x080 // free all x86 regs -#define FLUSH_FREE_VU0 0x100 // free all vu0 related regs -#define FLUSH_PC 0x200 // program counter -#define FLUSH_CAUSE 0x000 // disabled for now: cause register, only the branch delay bit -#define FLUSH_CODE 0x800 // opcode for interpreter +#define FLUSH_CACHED_REGS 0x001 +#define FLUSH_FLUSH_XMM 0x002 +#define FLUSH_FREE_XMM 0x004 // both flushes and frees +#define FLUSH_FLUSH_ALLX86 0x020 // flush x86 +#define FLUSH_FREE_TEMPX86 0x040 // flush and free temporary x86 regs +#define FLUSH_FREE_ALLX86 0x080 // free all x86 regs +#define FLUSH_FREE_VU0 0x100 // free all vu0 related regs +#define FLUSH_PC 0x200 // program counter +#define FLUSH_CAUSE 0x000 // disabled for now: cause register, only the branch delay bit +#define FLUSH_CODE 0x800 // opcode for interpreter -#define FLUSH_EVERYTHING 0x1ff +#define FLUSH_EVERYTHING 0x1ff //#define FLUSH_EXCEPTION 0x1ff // will probably do this totally differently actually -#define FLUSH_INTERPRETER 0xfff +#define FLUSH_INTERPRETER 0xfff #define FLUSH_FULLVTLB FLUSH_NOCONST // no freeing, used when callee won't destroy xmm regs -#define FLUSH_NODESTROY (FLUSH_CACHED_REGS|FLUSH_FLUSH_XMM|FLUSH_FLUSH_ALLX86) +#define FLUSH_NODESTROY (FLUSH_CACHED_REGS | FLUSH_FLUSH_XMM | FLUSH_FLUSH_ALLX86) // used when regs aren't going to be changed be callee -#define FLUSH_NOCONST (FLUSH_FREE_XMM|FLUSH_FREE_TEMPX86) +#define FLUSH_NOCONST (FLUSH_FREE_XMM | FLUSH_FREE_TEMPX86) #endif diff --git a/pcsx2/x86/iFPU.cpp b/pcsx2/x86/iFPU.cpp index 3a0f867f85..72422ca6ca 100644 --- a/pcsx2/x86/iFPU.cpp +++ b/pcsx2/x86/iFPU.cpp @@ -23,8 +23,8 @@ using namespace x86Emitter; -const __aligned16 u32 g_minvals[4] = {0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff}; -const __aligned16 u32 g_maxvals[4] = {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff}; +const __aligned16 u32 g_minvals[4] = {0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff}; +const __aligned16 u32 g_maxvals[4] = {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff}; //------------------------------------------------------------------ namespace R5900 { @@ -32,33 +32,34 @@ namespace Dynarec { namespace OpcodeImpl { namespace COP1 { -namespace DOUBLE { +namespace DOUBLE +{ -void recABS_S_xmm(int info); -void recADD_S_xmm(int info); -void recADDA_S_xmm(int info); -void recC_EQ_xmm(int info); -void recC_LE_xmm(int info); -void recC_LT_xmm(int info); -void recCVT_S_xmm(int info); -void recCVT_W(); -void recDIV_S_xmm(int info); -void recMADD_S_xmm(int info); -void recMADDA_S_xmm(int info); -void recMAX_S_xmm(int info); -void recMIN_S_xmm(int info); -void recMOV_S_xmm(int info); -void recMSUB_S_xmm(int info); -void recMSUBA_S_xmm(int info); -void recMUL_S_xmm(int info); -void recMULA_S_xmm(int info); -void recNEG_S_xmm(int info); -void recSUB_S_xmm(int info); -void recSUBA_S_xmm(int info); -void recSQRT_S_xmm(int info); -void recRSQRT_S_xmm(int info); + void recABS_S_xmm(int info); + void recADD_S_xmm(int info); + void recADDA_S_xmm(int info); + void recC_EQ_xmm(int info); + void recC_LE_xmm(int info); + void recC_LT_xmm(int info); + void recCVT_S_xmm(int info); + void recCVT_W(); + void recDIV_S_xmm(int info); + void recMADD_S_xmm(int info); + void recMADDA_S_xmm(int info); + void recMAX_S_xmm(int info); + void recMIN_S_xmm(int info); + void recMOV_S_xmm(int info); + void recMSUB_S_xmm(int info); + void recMSUBA_S_xmm(int info); + void recMUL_S_xmm(int info); + void recMULA_S_xmm(int info); + void recNEG_S_xmm(int info); + void recSUB_S_xmm(int info); + void recSUBA_S_xmm(int info); + void recSQRT_S_xmm(int info); + void recRSQRT_S_xmm(int info); -}; +}; // namespace DOUBLE //------------------------------------------------------------------ // Helper Macros @@ -68,36 +69,38 @@ void recRSQRT_S_xmm(int info); #define _Fd_ _Sa_ // FCR31 Flags -#define FPUflagC 0X00800000 -#define FPUflagI 0X00020000 -#define FPUflagD 0X00010000 -#define FPUflagO 0X00008000 -#define FPUflagU 0X00004000 -#define FPUflagSI 0X00000040 -#define FPUflagSD 0X00000020 -#define FPUflagSO 0X00000010 -#define FPUflagSU 0X00000008 +#define FPUflagC 0x00800000 +#define FPUflagI 0x00020000 +#define FPUflagD 0x00010000 +#define FPUflagO 0x00008000 +#define FPUflagU 0x00004000 +#define FPUflagSI 0x00000040 +#define FPUflagSD 0x00000020 +#define FPUflagSO 0x00000010 +#define FPUflagSU 0x00000008 // Add/Sub opcodes produce the same results as the ps2 #define FPU_CORRECT_ADD_SUB 1 -static const __aligned16 u32 s_neg[4] = { 0x80000000, 0xffffffff, 0xffffffff, 0xffffffff }; -static const __aligned16 u32 s_pos[4] = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +static const __aligned16 u32 s_neg[4] = {0x80000000, 0xffffffff, 0xffffffff, 0xffffffff}; +static const __aligned16 u32 s_pos[4] = {0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff}; #define REC_FPUBRANCH(f) \ void f(); \ - void rec##f() { \ - iFlushCall(FLUSH_INTERPRETER); \ - xFastCall((void*)(uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \ - g_branch = 2; \ -} + void rec##f() \ + { \ + iFlushCall(FLUSH_INTERPRETER); \ + xFastCall((void*)(uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \ + g_branch = 2; \ + } #define REC_FPUFUNC(f) \ void f(); \ - void rec##f() { \ - iFlushCall(FLUSH_INTERPRETER); \ - xFastCall((void*)(uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \ -} + void rec##f() \ + { \ + iFlushCall(FLUSH_INTERPRETER); \ + xFastCall((void*)(uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \ + } //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -119,15 +122,16 @@ REC_FPUFUNC(MTC1); //------------------------------------------------------------------ void recCFC1(void) { - if ( !_Rt_ ) return; + if (!_Rt_) + return; EE::Profiler.EmitOp(eeOpcode::CFC1); _eeOnWriteReg(_Rt_, 1); if (_Fs_ >= 16) - xMOV(eax, ptr[&fpuRegs.fprc[31] ]); + xMOV(eax, ptr[&fpuRegs.fprc[31]]); else - xMOV(eax, ptr[&fpuRegs.fprc[0] ]); + xMOV(eax, ptr[&fpuRegs.fprc[0]]); _deleteEEreg(_Rt_, 0); if (_Fs_ >= 16) @@ -136,35 +140,36 @@ void recCFC1(void) xOR(eax, 0x01000001); //set always-one bits } - xCDQ( ); - xMOV(ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], edx); + xCDQ(); + xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); } void recCTC1() { - if ( _Fs_ != 31 ) return; + if (_Fs_ != 31) + return; EE::Profiler.EmitOp(eeOpcode::CTC1); - if ( GPR_IS_CONST1(_Rt_) ) + if (GPR_IS_CONST1(_Rt_)) { - xMOV(ptr32[&fpuRegs.fprc[ _Fs_ ]], g_cpuConstRegs[_Rt_].UL[0]); + xMOV(ptr32[&fpuRegs.fprc[_Fs_]], g_cpuConstRegs[_Rt_].UL[0]); } else { int mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ); - if( mmreg >= 0 ) + if (mmreg >= 0) { - xMOVSS(ptr[&fpuRegs.fprc[ _Fs_ ]], xRegisterSSE(mmreg)); + xMOVSS(ptr[&fpuRegs.fprc[_Fs_]], xRegisterSSE(mmreg)); } else { _deleteGPRtoXMMreg(_Rt_, 1); - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); - xMOV(ptr[&fpuRegs.fprc[ _Fs_ ]], eax); + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); + xMOV(ptr[&fpuRegs.fprc[_Fs_]], eax); } } } @@ -178,25 +183,26 @@ void recCTC1() void recMFC1() { int regt, regs; - if ( ! _Rt_ ) return; + if (!_Rt_) + return; EE::Profiler.EmitOp(eeOpcode::MFC1); _eeOnWriteReg(_Rt_, 1); regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); - if( regs >= 0 ) + if (regs >= 0) { _deleteGPRtoXMMreg(_Rt_, 2); - _signExtendXMMtoM((uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], regs, 0); + _signExtendXMMtoM((uptr)&cpuRegs.GPR.r[_Rt_].UL[0], regs, 0); } else { regt = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ); - if( regt >= 0 ) + if (regt >= 0) { - if( xmmregs[regt].mode & MODE_WRITE ) + if (xmmregs[regt].mode & MODE_WRITE) { xMOVH.PS(ptr[&cpuRegs.GPR.r[_Rt_].UL[2]], xRegisterSSE(regt)); } @@ -204,11 +210,11 @@ void recMFC1() } _deleteEEreg(_Rt_, 0); - xMOV(eax, ptr[&fpuRegs.fpr[ _Fs_ ].UL ]); + xMOV(eax, ptr[&fpuRegs.fpr[_Fs_].UL]); - xCDQ( ); - xMOV(ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], edx); + xCDQ(); + xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); } } @@ -221,18 +227,18 @@ void recMFC1() void recMTC1() { EE::Profiler.EmitOp(eeOpcode::MTC1); - if( GPR_IS_CONST1(_Rt_) ) + if (GPR_IS_CONST1(_Rt_)) { _deleteFPtoXMMreg(_Fs_, 0); - xMOV(ptr32[&fpuRegs.fpr[ _Fs_ ].UL], g_cpuConstRegs[_Rt_].UL[0]); + xMOV(ptr32[&fpuRegs.fpr[_Fs_].UL], g_cpuConstRegs[_Rt_].UL[0]); } else { int mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ); - if( mmreg >= 0 ) + if (mmreg >= 0) { - if( g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE ) + if (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) { // transfer the reg directly _deleteGPRtoXMMreg(_Rt_, 2); @@ -243,24 +249,24 @@ void recMTC1() { int mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE); - if( mmreg2 >= 0 ) + if (mmreg2 >= 0) xMOVSS(xRegisterSSE(mmreg2), xRegisterSSE(mmreg)); else - xMOVSS(ptr[&fpuRegs.fpr[ _Fs_ ].UL], xRegisterSSE(mmreg)); + xMOVSS(ptr[&fpuRegs.fpr[_Fs_].UL], xRegisterSSE(mmreg)); } } else { int mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE); - if( mmreg2 >= 0 ) + if (mmreg2 >= 0) { - xMOVSSZX(xRegisterSSE(mmreg2), ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]]); + xMOVSSZX(xRegisterSSE(mmreg2), ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); } else { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]]); - xMOV(ptr[&fpuRegs.fpr[ _Fs_ ].UL], eax); + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); + xMOV(ptr[&fpuRegs.fpr[_Fs_].UL], eax); } } } @@ -307,9 +313,11 @@ REC_FPUFUNC(RSQRT_S); //------------------------------------------------------------------ static __aligned16 u64 FPU_FLOAT_TEMP[2]; -__fi void fpuFloat3(int regd) { // +NaN -> +fMax, -NaN -> -fMax, +Inf -> +fMax, -Inf -> -fMax +__fi void fpuFloat3(int regd) // +NaN -> +fMax, -NaN -> -fMax, +Inf -> +fMax, -Inf -> -fMax +{ int t1reg = _allocTempXMMreg(XMMT_FPS, -1); - if (t1reg >= 0) { + if (t1reg >= 0) + { xMOVSS(xRegisterSSE(t1reg), xRegisterSSE(regd)); xAND.PS(xRegisterSSE(t1reg), ptr[&s_neg[0]]); xMIN.SS(xRegisterSSE(regd), ptr[&g_maxvals[0]]); @@ -317,33 +325,39 @@ __fi void fpuFloat3(int regd) { // +NaN -> +fMax, -NaN -> -fMax, +Inf -> +fMax, xOR.PS(xRegisterSSE(regd), xRegisterSSE(t1reg)); _freeXMMreg(t1reg); } - else { + else + { Console.Error("fpuFloat2() allocation error"); t1reg = (regd == 0) ? 1 : 0; // get a temp reg thats not regd - xMOVAPS(ptr[&FPU_FLOAT_TEMP[0]], xRegisterSSE(t1reg )); // backup data in t1reg to a temp address + xMOVAPS(ptr[&FPU_FLOAT_TEMP[0]], xRegisterSSE(t1reg)); // backup data in t1reg to a temp address xMOVSS(xRegisterSSE(t1reg), xRegisterSSE(regd)); xAND.PS(xRegisterSSE(t1reg), ptr[&s_neg[0]]); xMIN.SS(xRegisterSSE(regd), ptr[&g_maxvals[0]]); xMAX.SS(xRegisterSSE(regd), ptr[&g_minvals[0]]); xOR.PS(xRegisterSSE(regd), xRegisterSSE(t1reg)); - xMOVAPS(xRegisterSSE(t1reg), ptr[&FPU_FLOAT_TEMP[0] ]); // restore t1reg data + xMOVAPS(xRegisterSSE(t1reg), ptr[&FPU_FLOAT_TEMP[0]]); // restore t1reg data } } -__fi void fpuFloat(int regd) { // +/-NaN -> +fMax, +Inf -> +fMax, -Inf -> -fMax - if (CHECK_FPU_OVERFLOW) { +__fi void fpuFloat(int regd) // +/-NaN -> +fMax, +Inf -> +fMax, -Inf -> -fMax +{ + if (CHECK_FPU_OVERFLOW) + { xMIN.SS(xRegisterSSE(regd), ptr[&g_maxvals[0]]); // MIN() must be before MAX()! So that NaN's become +Maximum xMAX.SS(xRegisterSSE(regd), ptr[&g_minvals[0]]); } } -__fi void fpuFloat2(int regd) { // +NaN -> +fMax, -NaN -> -fMax, +Inf -> +fMax, -Inf -> -fMax - if (CHECK_FPU_OVERFLOW) { +__fi void fpuFloat2(int regd) // +NaN -> +fMax, -NaN -> -fMax, +Inf -> +fMax, -Inf -> -fMax +{ + if (CHECK_FPU_OVERFLOW) + { fpuFloat3(regd); } } -void ClampValues(int regd) { +void ClampValues(int regd) +{ fpuFloat(regd); } //------------------------------------------------------------------ @@ -355,8 +369,10 @@ void ClampValues(int regd) { void recABS_S_xmm(int info) { EE::Profiler.EmitOp(eeOpcode::ABS_F); - if( info & PROCESS_EE_S ) xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else xMOVSSZX(xRegisterSSE(EEREC_D), ptr[&fpuRegs.fpr[_Fs_]]); + if (info & PROCESS_EE_S) + xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + xMOVSSZX(xRegisterSSE(EEREC_D), ptr[&fpuRegs.fpr[_Fs_]]); xAND.PS(xRegisterSSE(EEREC_D), ptr[&s_pos[0]]); //xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagO|FPUflagU)); // Clear O and U flags @@ -365,7 +381,7 @@ void recABS_S_xmm(int info) xMIN.SS(xRegisterSSE(EEREC_D), ptr[&g_maxvals[0]]); } -FPURECOMPILE_CONSTCODE(ABS_S, XMMINFO_WRITED|XMMINFO_READS); +FPURECOMPILE_CONSTCODE(ABS_S, XMMINFO_WRITED | XMMINFO_READS); //------------------------------------------------------------------ @@ -465,14 +481,20 @@ void FPU_ADD_SUB(int regd, int regt, int issub) _freeX86reg(tempecx); } -void FPU_ADD(int regd, int regt) { - if (FPU_CORRECT_ADD_SUB) FPU_ADD_SUB(regd, regt, 0); - else xADD.SS(xRegisterSSE(regd), xRegisterSSE(regt)); +void FPU_ADD(int regd, int regt) +{ + if (FPU_CORRECT_ADD_SUB) + FPU_ADD_SUB(regd, regt, 0); + else + xADD.SS(xRegisterSSE(regd), xRegisterSSE(regt)); } -void FPU_SUB(int regd, int regt) { - if (FPU_CORRECT_ADD_SUB) FPU_ADD_SUB(regd, regt, 1); - else xSUB.SS(xRegisterSSE(regd), xRegisterSSE(regt)); +void FPU_SUB(int regd, int regt) +{ + if (FPU_CORRECT_ADD_SUB) + FPU_ADD_SUB(regd, regt, 1); + else + xSUB.SS(xRegisterSSE(regd), xRegisterSSE(regt)); } //------------------------------------------------------------------ @@ -518,11 +540,11 @@ void FPU_MUL_REV(int regd, int regt) { FPU_MUL(regd, regt, true); } //reversed o //------------------------------------------------------------------ // CommutativeOp XMM (used for ADD, MUL, MAX, and MIN opcodes) //------------------------------------------------------------------ -static void (*recComOpXMM_to_XMM[] )(x86SSERegType, x86SSERegType) = { - FPU_ADD, FPU_MUL, SSE_MAXSS_XMM_to_XMM, SSE_MINSS_XMM_to_XMM }; +static void (*recComOpXMM_to_XMM[])(x86SSERegType, x86SSERegType) = { + FPU_ADD, FPU_MUL, SSE_MAXSS_XMM_to_XMM, SSE_MINSS_XMM_to_XMM}; -static void (*recComOpXMM_to_XMM_REV[] )(x86SSERegType, x86SSERegType) = { //reversed operands - FPU_ADD, FPU_MUL_REV, SSE_MAXSS_XMM_to_XMM, SSE_MINSS_XMM_to_XMM }; +static void (*recComOpXMM_to_XMM_REV[])(x86SSERegType, x86SSERegType) = { //reversed operands + FPU_ADD, FPU_MUL_REV, SSE_MAXSS_XMM_to_XMM, SSE_MINSS_XMM_to_XMM}; //static void (*recComOpM32_to_XMM[] )(x86SSERegType, uptr) = { // SSE_ADDSS_M32_to_XMM, SSE_MULSS_M32_to_XMM, SSE_MAXSS_M32_to_XMM, SSE_MINSS_M32_to_XMM }; @@ -531,39 +553,70 @@ int recCommutativeOp(int info, int regd, int op) { int t0reg = _allocTempXMMreg(XMMT_FPS, -1); - switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { + switch (info & (PROCESS_EE_S | PROCESS_EE_T)) + { case PROCESS_EE_S: - if (regd == EEREC_S) { + if (regd == EEREC_S) + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); - if (CHECK_FPU_EXTRA_OVERFLOW /*&& !CHECK_FPUCLAMPHACK */ || (op >= 2)) { fpuFloat2(regd); fpuFloat2(t0reg); } + if (CHECK_FPU_EXTRA_OVERFLOW /*&& !CHECK_FPUCLAMPHACK */ || (op >= 2)) + { + fpuFloat2(regd); + fpuFloat2(t0reg); + } recComOpXMM_to_XMM[op](regd, t0reg); } - else { + else + { xMOVSSZX(xRegisterSSE(regd), ptr[&fpuRegs.fpr[_Ft_]]); - if (CHECK_FPU_EXTRA_OVERFLOW || (op >= 2)) { fpuFloat2(regd); fpuFloat2(EEREC_S); } + if (CHECK_FPU_EXTRA_OVERFLOW || (op >= 2)) + { + fpuFloat2(regd); + fpuFloat2(EEREC_S); + } recComOpXMM_to_XMM_REV[op](regd, EEREC_S); } break; case PROCESS_EE_T: - if (regd == EEREC_T) { + if (regd == EEREC_T) + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]); - if (CHECK_FPU_EXTRA_OVERFLOW || (op >= 2)) { fpuFloat2(regd); fpuFloat2(t0reg); } + if (CHECK_FPU_EXTRA_OVERFLOW || (op >= 2)) + { + fpuFloat2(regd); + fpuFloat2(t0reg); + } recComOpXMM_to_XMM_REV[op](regd, t0reg); } - else { + else + { xMOVSSZX(xRegisterSSE(regd), ptr[&fpuRegs.fpr[_Fs_]]); - if (CHECK_FPU_EXTRA_OVERFLOW || (op >= 2)) { fpuFloat2(regd); fpuFloat2(EEREC_T); } + if (CHECK_FPU_EXTRA_OVERFLOW || (op >= 2)) + { + fpuFloat2(regd); + fpuFloat2(EEREC_T); + } recComOpXMM_to_XMM[op](regd, EEREC_T); } break; - case (PROCESS_EE_S|PROCESS_EE_T): - if (regd == EEREC_T) { - if (CHECK_FPU_EXTRA_OVERFLOW || (op >= 2)) { fpuFloat2(regd); fpuFloat2(EEREC_S); } + case (PROCESS_EE_S | PROCESS_EE_T): + if (regd == EEREC_T) + { + if (CHECK_FPU_EXTRA_OVERFLOW || (op >= 2)) + { + fpuFloat2(regd); + fpuFloat2(EEREC_S); + } recComOpXMM_to_XMM_REV[op](regd, EEREC_S); } - else { + else + { xMOVSS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - if (CHECK_FPU_EXTRA_OVERFLOW || (op >= 2)) { fpuFloat2(regd); fpuFloat2(EEREC_T); } + if (CHECK_FPU_EXTRA_OVERFLOW || (op >= 2)) + { + fpuFloat2(regd); + fpuFloat2(EEREC_T); + } recComOpXMM_to_XMM[op](regd, EEREC_T); } break; @@ -571,7 +624,11 @@ int recCommutativeOp(int info, int regd, int op) Console.WriteLn(Color_Magenta, "FPU: recCommutativeOp case 4"); xMOVSSZX(xRegisterSSE(regd), ptr[&fpuRegs.fpr[_Fs_]]); xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); - if (CHECK_FPU_EXTRA_OVERFLOW || (op >= 2)) { fpuFloat2(regd); fpuFloat2(t0reg); } + if (CHECK_FPU_EXTRA_OVERFLOW || (op >= 2)) + { + fpuFloat2(regd); + fpuFloat2(t0reg); + } recComOpXMM_to_XMM[op](regd, t0reg); break; } @@ -589,20 +646,20 @@ void recADD_S_xmm(int info) { EE::Profiler.EmitOp(eeOpcode::ADD_F); //xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagO|FPUflagU)); // Clear O and U flags - ClampValues(recCommutativeOp(info, EEREC_D, 0)); + ClampValues(recCommutativeOp(info, EEREC_D, 0)); //REC_FPUOP(ADD_S); } -FPURECOMPILE_CONSTCODE(ADD_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(ADD_S, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); void recADDA_S_xmm(int info) { EE::Profiler.EmitOp(eeOpcode::ADDA_F); //xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagO|FPUflagU)); // Clear O and U flags - ClampValues(recCommutativeOp(info, EEREC_ACC, 0)); + ClampValues(recCommutativeOp(info, EEREC_ACC, 0)); } -FPURECOMPILE_CONSTCODE(ADDA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(ADDA_S, XMMINFO_WRITEACC | XMMINFO_READS | XMMINFO_READT); //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -662,30 +719,35 @@ void recC_EQ_xmm(int info) //Console.WriteLn("recC_EQ_xmm()"); - switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { + switch (info & (PROCESS_EE_S | PROCESS_EE_T)) + { case PROCESS_EE_S: fpuFloat3(EEREC_S); t0reg = _allocTempXMMreg(XMMT_FPS, -1); - if (t0reg >= 0) { + if (t0reg >= 0) + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); fpuFloat3(t0reg); xUCOMI.SS(xRegisterSSE(EEREC_S), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else xUCOMI.SS(xRegisterSSE(EEREC_S), ptr[&fpuRegs.fpr[_Ft_]]); + else + xUCOMI.SS(xRegisterSSE(EEREC_S), ptr[&fpuRegs.fpr[_Ft_]]); break; case PROCESS_EE_T: fpuFloat3(EEREC_T); t0reg = _allocTempXMMreg(XMMT_FPS, -1); - if (t0reg >= 0) { + if (t0reg >= 0) + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]); fpuFloat3(t0reg); xUCOMI.SS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); _freeXMMreg(t0reg); } - else xUCOMI.SS(xRegisterSSE(EEREC_T), ptr[&fpuRegs.fpr[_Fs_]]); + else + xUCOMI.SS(xRegisterSSE(EEREC_T), ptr[&fpuRegs.fpr[_Fs_]]); break; - case (PROCESS_EE_S|PROCESS_EE_T): + case (PROCESS_EE_S | PROCESS_EE_T): fpuFloat3(EEREC_S); fpuFloat3(EEREC_T); xUCOMI.SS(xRegisterSSE(EEREC_S), xRegisterSSE(EEREC_T)); @@ -697,35 +759,36 @@ void recC_EQ_xmm(int info) xCMP(xRegister32(tempReg), ptr[&fpuRegs.fpr[_Ft_]]); j8Ptr[0] = JZ8(0); - xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC ); + xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC); j8Ptr[1] = JMP8(0); x86SetJ8(j8Ptr[0]); xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC); x86SetJ8(j8Ptr[1]); - if (tempReg >= 0) _freeX86reg(tempReg); + if (tempReg >= 0) + _freeX86reg(tempReg); return; } j8Ptr[0] = JZ8(0); - xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC ); + xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC); j8Ptr[1] = JMP8(0); x86SetJ8(j8Ptr[0]); xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC); x86SetJ8(j8Ptr[1]); } -FPURECOMPILE_CONSTCODE(C_EQ, XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(C_EQ, XMMINFO_READS | XMMINFO_READT); //REC_FPUFUNC(C_EQ); void recC_F() { EE::Profiler.EmitOp(eeOpcode::CF_F); - xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC ); + xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC); } //REC_FPUFUNC(C_F); -void recC_LE_xmm(int info ) +void recC_LE_xmm(int info) { EE::Profiler.EmitOp(eeOpcode::CLE_F); int tempReg; //tempX86reg @@ -733,32 +796,37 @@ void recC_LE_xmm(int info ) //Console.WriteLn("recC_LE_xmm()"); - switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { + switch (info & (PROCESS_EE_S | PROCESS_EE_T)) + { case PROCESS_EE_S: fpuFloat3(EEREC_S); t0reg = _allocTempXMMreg(XMMT_FPS, -1); - if (t0reg >= 0) { + if (t0reg >= 0) + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); fpuFloat3(t0reg); xUCOMI.SS(xRegisterSSE(EEREC_S), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else xUCOMI.SS(xRegisterSSE(EEREC_S), ptr[&fpuRegs.fpr[_Ft_]]); + else + xUCOMI.SS(xRegisterSSE(EEREC_S), ptr[&fpuRegs.fpr[_Ft_]]); break; case PROCESS_EE_T: fpuFloat3(EEREC_T); t0reg = _allocTempXMMreg(XMMT_FPS, -1); - if (t0reg >= 0) { + if (t0reg >= 0) + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]); fpuFloat3(t0reg); xUCOMI.SS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); _freeXMMreg(t0reg); } - else { + else + { xUCOMI.SS(xRegisterSSE(EEREC_T), ptr[&fpuRegs.fpr[_Fs_]]); j8Ptr[0] = JAE8(0); - xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC ); + xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC); j8Ptr[1] = JMP8(0); x86SetJ8(j8Ptr[0]); xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC); @@ -766,7 +834,7 @@ void recC_LE_xmm(int info ) return; } break; - case (PROCESS_EE_S|PROCESS_EE_T): + case (PROCESS_EE_S | PROCESS_EE_T): fpuFloat3(EEREC_S); fpuFloat3(EEREC_T); xUCOMI.SS(xRegisterSSE(EEREC_S), xRegisterSSE(EEREC_T)); @@ -778,25 +846,26 @@ void recC_LE_xmm(int info ) xCMP(xRegister32(tempReg), ptr[&fpuRegs.fpr[_Ft_]]); j8Ptr[0] = JLE8(0); - xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC ); + xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC); j8Ptr[1] = JMP8(0); x86SetJ8(j8Ptr[0]); xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC); x86SetJ8(j8Ptr[1]); - if (tempReg >= 0) _freeX86reg(tempReg); + if (tempReg >= 0) + _freeX86reg(tempReg); return; } j8Ptr[0] = JBE8(0); - xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC ); + xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC); j8Ptr[1] = JMP8(0); x86SetJ8(j8Ptr[0]); xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC); x86SetJ8(j8Ptr[1]); } -FPURECOMPILE_CONSTCODE(C_LE, XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(C_LE, XMMINFO_READS | XMMINFO_READT); //REC_FPUFUNC(C_LE); void recC_LT_xmm(int info) @@ -807,32 +876,37 @@ void recC_LT_xmm(int info) //Console.WriteLn("recC_LT_xmm()"); - switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { + switch (info & (PROCESS_EE_S | PROCESS_EE_T)) + { case PROCESS_EE_S: fpuFloat3(EEREC_S); t0reg = _allocTempXMMreg(XMMT_FPS, -1); - if (t0reg >= 0) { + if (t0reg >= 0) + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); fpuFloat3(t0reg); xUCOMI.SS(xRegisterSSE(EEREC_S), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else xUCOMI.SS(xRegisterSSE(EEREC_S), ptr[&fpuRegs.fpr[_Ft_]]); + else + xUCOMI.SS(xRegisterSSE(EEREC_S), ptr[&fpuRegs.fpr[_Ft_]]); break; case PROCESS_EE_T: fpuFloat3(EEREC_T); t0reg = _allocTempXMMreg(XMMT_FPS, -1); - if (t0reg >= 0) { + if (t0reg >= 0) + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]); fpuFloat3(t0reg); xUCOMI.SS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); _freeXMMreg(t0reg); } - else { + else + { xUCOMI.SS(xRegisterSSE(EEREC_T), ptr[&fpuRegs.fpr[_Fs_]]); j8Ptr[0] = JA8(0); - xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC ); + xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC); j8Ptr[1] = JMP8(0); x86SetJ8(j8Ptr[0]); xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC); @@ -840,7 +914,7 @@ void recC_LT_xmm(int info) return; } break; - case (PROCESS_EE_S|PROCESS_EE_T): + case (PROCESS_EE_S | PROCESS_EE_T): // Clamp NaNs // Note: This fixes a crash in Rule of Rose. fpuFloat3(EEREC_S); @@ -854,25 +928,26 @@ void recC_LT_xmm(int info) xCMP(xRegister32(tempReg), ptr[&fpuRegs.fpr[_Ft_]]); j8Ptr[0] = JL8(0); - xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC ); + xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC); j8Ptr[1] = JMP8(0); x86SetJ8(j8Ptr[0]); xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC); x86SetJ8(j8Ptr[1]); - if (tempReg >= 0) _freeX86reg(tempReg); + if (tempReg >= 0) + _freeX86reg(tempReg); return; } j8Ptr[0] = JB8(0); - xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC ); + xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC); j8Ptr[1] = JMP8(0); x86SetJ8(j8Ptr[0]); xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC); x86SetJ8(j8Ptr[1]); } -FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS | XMMINFO_READT); //REC_FPUFUNC(C_LT); //------------------------------------------------------------------ @@ -883,15 +958,17 @@ FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS|XMMINFO_READT); void recCVT_S_xmm(int info) { EE::Profiler.EmitOp(eeOpcode::CVTS_F); - if( !(info&PROCESS_EE_S) || (EEREC_D != EEREC_S && !(info&PROCESS_EE_MODEWRITES)) ) { + if (!(info & PROCESS_EE_S) || (EEREC_D != EEREC_S && !(info & PROCESS_EE_MODEWRITES))) + { xCVTSI2SS(xRegisterSSE(EEREC_D), ptr32[&fpuRegs.fpr[_Fs_]]); } - else { + else + { xCVTDQ2PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } } -FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED|XMMINFO_READS); +FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED | XMMINFO_READS); void recCVT_W() { @@ -906,27 +983,28 @@ void recCVT_W() int regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); - if( regs >= 0 ) + if (regs >= 0) { - if (CHECK_FPU_EXTRA_OVERFLOW) fpuFloat2(regs); + if (CHECK_FPU_EXTRA_OVERFLOW) + fpuFloat2(regs); xCVTTSS2SI(eax, xRegisterSSE(regs)); - xMOVMSKPS(edx, xRegisterSSE(regs)); //extract the signs - xAND(edx, 1); //keep only LSB + xMOVMSKPS(edx, xRegisterSSE(regs)); //extract the signs + xAND(edx, 1); // keep only LSB } else { - xCVTTSS2SI(eax, ptr32[&fpuRegs.fpr[ _Fs_ ]]); - xMOV(edx, ptr[&fpuRegs.fpr[ _Fs_ ]]); - xSHR(edx, 31); //mov sign to lsb + xCVTTSS2SI(eax, ptr32[&fpuRegs.fpr[_Fs_]]); + xMOV(edx, ptr[&fpuRegs.fpr[_Fs_]]); + xSHR(edx, 31); // mov sign to lsb } //kill register allocation for dst because we write directly to fpuRegs.fpr[_Fd_] _deleteFPtoXMMreg(_Fd_, 2); - xADD(edx, 0x7FFFFFFF); //0x7FFFFFFF if positive, 0x8000 0000 if negative + xADD(edx, 0x7FFFFFFF); // 0x7FFFFFFF if positive, 0x8000 0000 if negative - xCMP(eax, 0x80000000); //If the result is indefinitive - xCMOVE(eax, edx); //Saturate it + xCMP(eax, 0x80000000); // If the result is indefinitive + xCMOVE(eax, edx); // Saturate it //Write the result xMOV(ptr[&fpuRegs.fpr[_Fd_]], eax); @@ -944,25 +1022,25 @@ void recDIVhelper1(int regd, int regt) // Sets flags int t1reg = _allocTempXMMreg(XMMT_FPS, -1); int tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0); - xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI|FPUflagD)); // Clear I and D flags + xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags /*--- Check for divide by zero ---*/ xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regt)); xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg)); - xAND(xRegister32(tempReg), 1); //Check sign (if regt == zero, sign will be set) + xAND(xRegister32(tempReg), 1); //Check sign (if regt == zero, sign will be set) ajmp32 = JZ32(0); //Skip if not set /*--- Check for 0/0 ---*/ xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regd)); xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg)); - xAND(xRegister32(tempReg), 1); //Check sign (if regd == zero, sign will be set) + xAND(xRegister32(tempReg), 1); //Check sign (if regd == zero, sign will be set) pjmp1 = JZ8(0); //Skip if not set - xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI|FPUflagSI); // Set I and SI flags ( 0/0 ) + xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags ( 0/0 ) pjmp2 = JMP8(0); x86SetJ8(pjmp1); //x/0 but not 0/0 - xOR(ptr32[&fpuRegs.fprc[31]], FPUflagD|FPUflagSD); // Set D and SD flags ( x/0 ) + xOR(ptr32[&fpuRegs.fprc[31]], FPUflagD | FPUflagSD); // Set D and SD flags ( x/0 ) x86SetJ8(pjmp2); /*--- Make regd +/- Maximum ---*/ @@ -999,9 +1077,9 @@ void recDIV_S_xmm(int info) EE::Profiler.EmitOp(eeOpcode::DIV_F); bool roundmodeFlag = false; int t0reg = _allocTempXMMreg(XMMT_FPS, -1); - //Console.WriteLn("DIV"); + //Console.WriteLn("DIV"); - if( CHECK_FPUNEGDIVHACK ) + if (CHECK_FPUNEGDIVHACK) { if (g_sseMXCSR.GetRoundMode() != SSEround_NegInf) { @@ -1009,8 +1087,8 @@ void recDIV_S_xmm(int info) //Console.WriteLn("div to negative inf"); roundmode_neg = g_sseMXCSR; - roundmode_neg.SetRoundMode( SSEround_NegInf ); - xLDMXCSR( roundmode_neg ); + roundmode_neg.SetRoundMode(SSEround_NegInf); + xLDMXCSR(roundmode_neg); roundmodeFlag = true; } } @@ -1022,61 +1100,79 @@ void recDIV_S_xmm(int info) //Console.WriteLn("div to nearest"); roundmode_nearest = g_sseMXCSR; - roundmode_nearest.SetRoundMode( SSEround_Nearest ); - xLDMXCSR( roundmode_nearest ); + roundmode_nearest.SetRoundMode(SSEround_Nearest); + xLDMXCSR(roundmode_nearest); roundmodeFlag = true; } } - switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { + switch (info & (PROCESS_EE_S | PROCESS_EE_T)) + { case PROCESS_EE_S: //Console.WriteLn("FPU: DIV case 1"); xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); - if (CHECK_FPU_EXTRA_FLAGS) recDIVhelper1(EEREC_D, t0reg); - else recDIVhelper2(EEREC_D, t0reg); + if (CHECK_FPU_EXTRA_FLAGS) + recDIVhelper1(EEREC_D, t0reg); + else + recDIVhelper2(EEREC_D, t0reg); break; case PROCESS_EE_T: //Console.WriteLn("FPU: DIV case 2"); - if (EEREC_D == EEREC_T) { + if (EEREC_D == EEREC_T) + { xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVSSZX(xRegisterSSE(EEREC_D), ptr[&fpuRegs.fpr[_Fs_]]); - if (CHECK_FPU_EXTRA_FLAGS) recDIVhelper1(EEREC_D, t0reg); - else recDIVhelper2(EEREC_D, t0reg); + if (CHECK_FPU_EXTRA_FLAGS) + recDIVhelper1(EEREC_D, t0reg); + else + recDIVhelper2(EEREC_D, t0reg); } - else { + else + { xMOVSSZX(xRegisterSSE(EEREC_D), ptr[&fpuRegs.fpr[_Fs_]]); - if (CHECK_FPU_EXTRA_FLAGS) recDIVhelper1(EEREC_D, EEREC_T); - else recDIVhelper2(EEREC_D, EEREC_T); + if (CHECK_FPU_EXTRA_FLAGS) + recDIVhelper1(EEREC_D, EEREC_T); + else + recDIVhelper2(EEREC_D, EEREC_T); } break; - case (PROCESS_EE_S|PROCESS_EE_T): + case (PROCESS_EE_S | PROCESS_EE_T): //Console.WriteLn("FPU: DIV case 3"); - if (EEREC_D == EEREC_T) { + if (EEREC_D == EEREC_T) + { xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - if (CHECK_FPU_EXTRA_FLAGS) recDIVhelper1(EEREC_D, t0reg); - else recDIVhelper2(EEREC_D, t0reg); + if (CHECK_FPU_EXTRA_FLAGS) + recDIVhelper1(EEREC_D, t0reg); + else + recDIVhelper2(EEREC_D, t0reg); } - else { + else + { xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - if (CHECK_FPU_EXTRA_FLAGS) recDIVhelper1(EEREC_D, EEREC_T); - else recDIVhelper2(EEREC_D, EEREC_T); + if (CHECK_FPU_EXTRA_FLAGS) + recDIVhelper1(EEREC_D, EEREC_T); + else + recDIVhelper2(EEREC_D, EEREC_T); } break; default: //Console.WriteLn("FPU: DIV case 4"); xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); xMOVSSZX(xRegisterSSE(EEREC_D), ptr[&fpuRegs.fpr[_Fs_]]); - if (CHECK_FPU_EXTRA_FLAGS) recDIVhelper1(EEREC_D, t0reg); - else recDIVhelper2(EEREC_D, t0reg); + if (CHECK_FPU_EXTRA_FLAGS) + recDIVhelper1(EEREC_D, t0reg); + else + recDIVhelper2(EEREC_D, t0reg); break; } - if (roundmodeFlag) xLDMXCSR (g_sseMXCSR); + if (roundmodeFlag) + xLDMXCSR(g_sseMXCSR); _freeXMMreg(t0reg); } -FPURECOMPILE_CONSTCODE(DIV_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(DIV_S, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); //------------------------------------------------------------------ @@ -1089,38 +1185,46 @@ void recMADDtemp(int info, int regd) int t1reg; int t0reg = _allocTempXMMreg(XMMT_FPS, -1); - switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { + switch (info & (PROCESS_EE_S | PROCESS_EE_T)) + { case PROCESS_EE_S: - if(regd == EEREC_S) { + if (regd == EEREC_S) + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(t0reg); } xMUL.SS(xRegisterSSE(regd), xRegisterSSE(t0reg)); - if (info & PROCESS_EE_ACC) { + if (info & PROCESS_EE_ACC) + { if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(EEREC_ACC); fpuFloat(regd); } FPU_ADD(regd, EEREC_ACC); } - else { + else + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(EEREC_ACC); fpuFloat(t0reg); } FPU_ADD(regd, t0reg); } } - else if (regd == EEREC_ACC){ + else if (regd == EEREC_ACC) + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(EEREC_S); fpuFloat2(t0reg); } xMUL.SS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); } FPU_ADD(regd, t0reg); } - else { + else + { xMOVSSZX(xRegisterSSE(regd), ptr[&fpuRegs.fpr[_Ft_]]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_S); } xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - if (info & PROCESS_EE_ACC) { + if (info & PROCESS_EE_ACC) + { if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(EEREC_ACC); fpuFloat(regd); } FPU_ADD(regd, EEREC_ACC); } - else { + else + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(EEREC_ACC); fpuFloat(t0reg); } FPU_ADD(regd, t0reg); @@ -1128,85 +1232,102 @@ void recMADDtemp(int info, int regd) } break; case PROCESS_EE_T: - if(regd == EEREC_T) { + if (regd == EEREC_T) + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(t0reg); } xMUL.SS(xRegisterSSE(regd), xRegisterSSE(t0reg)); - if (info & PROCESS_EE_ACC) { + if (info & PROCESS_EE_ACC) + { if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(EEREC_ACC); fpuFloat(regd); } FPU_ADD(regd, EEREC_ACC); } - else { + else + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(EEREC_ACC); fpuFloat(t0reg); } FPU_ADD(regd, t0reg); } } - else if (regd == EEREC_ACC){ + else if (regd == EEREC_ACC) + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(EEREC_T); fpuFloat2(t0reg); } xMUL.SS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); } FPU_ADD(regd, t0reg); } - else { + else + { xMOVSSZX(xRegisterSSE(regd), ptr[&fpuRegs.fpr[_Fs_]]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_T); } xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_T)); - if (info & PROCESS_EE_ACC) { + if (info & PROCESS_EE_ACC) + { if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(EEREC_ACC); fpuFloat(regd); } FPU_ADD(regd, EEREC_ACC); } - else { + else + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(EEREC_ACC); fpuFloat(t0reg); } FPU_ADD(regd, t0reg); } } break; - case (PROCESS_EE_S|PROCESS_EE_T): - if(regd == EEREC_S) { + case (PROCESS_EE_S | PROCESS_EE_T): + if (regd == EEREC_S) + { if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_T); } xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_T)); - if (info & PROCESS_EE_ACC) { + if (info & PROCESS_EE_ACC) + { if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(EEREC_ACC); } FPU_ADD(regd, EEREC_ACC); } - else { + else + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); } FPU_ADD(regd, t0reg); } } - else if(regd == EEREC_T) { + else if (regd == EEREC_T) + { if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_S); } xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - if (info & PROCESS_EE_ACC) { + if (info & PROCESS_EE_ACC) + { if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(EEREC_ACC); } FPU_ADD(regd, EEREC_ACC); } - else { + else + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); } FPU_ADD(regd, t0reg); } } - else if(regd == EEREC_ACC) { + else if (regd == EEREC_ACC) + { xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(t0reg); fpuFloat2(EEREC_T); } xMUL.SS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); } FPU_ADD(regd, t0reg); } - else { + else + { xMOVSS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_T); } xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_T)); - if (info & PROCESS_EE_ACC) { + if (info & PROCESS_EE_ACC) + { if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(EEREC_ACC); } FPU_ADD(regd, EEREC_ACC); } - else { + else + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); } FPU_ADD(regd, t0reg); @@ -1214,7 +1335,8 @@ void recMADDtemp(int info, int regd) } break; default: - if(regd == EEREC_ACC){ + if (regd == EEREC_ACC) + { t1reg = _allocTempXMMreg(XMMT_FPS, -1); xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]); xMOVSSZX(xRegisterSSE(t1reg), ptr[&fpuRegs.fpr[_Ft_]]); @@ -1230,11 +1352,13 @@ void recMADDtemp(int info, int regd) xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(t0reg); } xMUL.SS(xRegisterSSE(regd), xRegisterSSE(t0reg)); - if (info & PROCESS_EE_ACC) { + if (info & PROCESS_EE_ACC) + { if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(EEREC_ACC); } FPU_ADD(regd, EEREC_ACC); } - else { + else + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); } FPU_ADD(regd, t0reg); @@ -1243,8 +1367,8 @@ void recMADDtemp(int info, int regd) break; } - ClampValues(regd); - _freeXMMreg(t0reg); + ClampValues(regd); + _freeXMMreg(t0reg); } void recMADD_S_xmm(int info) @@ -1254,7 +1378,7 @@ void recMADD_S_xmm(int info) recMADDtemp(info, EEREC_D); } -FPURECOMPILE_CONSTCODE(MADD_S, XMMINFO_WRITED|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(MADD_S, XMMINFO_WRITED | XMMINFO_READACC | XMMINFO_READS | XMMINFO_READT); void recMADDA_S_xmm(int info) { @@ -1263,7 +1387,7 @@ void recMADDA_S_xmm(int info) recMADDtemp(info, EEREC_ACC); } -FPURECOMPILE_CONSTCODE(MADDA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(MADDA_S, XMMINFO_WRITEACC | XMMINFO_READACC | XMMINFO_READS | XMMINFO_READT); //------------------------------------------------------------------ @@ -1274,19 +1398,19 @@ void recMAX_S_xmm(int info) { EE::Profiler.EmitOp(eeOpcode::MAX_F); //xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagO|FPUflagU)); // Clear O and U flags - recCommutativeOp(info, EEREC_D, 2); + recCommutativeOp(info, EEREC_D, 2); } -FPURECOMPILE_CONSTCODE(MAX_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(MAX_S, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); void recMIN_S_xmm(int info) { EE::Profiler.EmitOp(eeOpcode::MIN_F); //xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagO|FPUflagU)); // Clear O and U flags - recCommutativeOp(info, EEREC_D, 3); + recCommutativeOp(info, EEREC_D, 3); } -FPURECOMPILE_CONSTCODE(MIN_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(MIN_S, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); //------------------------------------------------------------------ @@ -1296,11 +1420,13 @@ FPURECOMPILE_CONSTCODE(MIN_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); void recMOV_S_xmm(int info) { EE::Profiler.EmitOp(eeOpcode::MOV_F); - if( info & PROCESS_EE_S ) xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else xMOVSSZX(xRegisterSSE(EEREC_D), ptr[&fpuRegs.fpr[_Fs_]]); + if (info & PROCESS_EE_S) + xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + xMOVSSZX(xRegisterSSE(EEREC_D), ptr[&fpuRegs.fpr[_Fs_]]); } -FPURECOMPILE_CONSTCODE(MOV_S, XMMINFO_WRITED|XMMINFO_READS); +FPURECOMPILE_CONSTCODE(MOV_S, XMMINFO_WRITED | XMMINFO_READS); //------------------------------------------------------------------ @@ -1309,107 +1435,133 @@ FPURECOMPILE_CONSTCODE(MOV_S, XMMINFO_WRITED|XMMINFO_READS); //------------------------------------------------------------------ void recMSUBtemp(int info, int regd) { -int t1reg; + int t1reg; int t0reg = _allocTempXMMreg(XMMT_FPS, -1); - switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { + switch (info & (PROCESS_EE_S | PROCESS_EE_T)) + { case PROCESS_EE_S: - if(regd == EEREC_S) { + if (regd == EEREC_S) + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(t0reg); } xMUL.SS(xRegisterSSE(regd), xRegisterSSE(t0reg)); - if (info & PROCESS_EE_ACC) { xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_ACC)); } - else { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); } + if (info & PROCESS_EE_ACC) + xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_ACC)); + else + xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); } FPU_SUB(t0reg, regd); xMOVSS(xRegisterSSE(regd), xRegisterSSE(t0reg)); } - else if (regd == EEREC_ACC){ + else if (regd == EEREC_ACC) + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(EEREC_S); fpuFloat2(t0reg); } xMUL.SS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); } FPU_SUB(regd, t0reg); } - else { + else + { xMOVSSZX(xRegisterSSE(regd), ptr[&fpuRegs.fpr[_Ft_]]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_S); } xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - if (info & PROCESS_EE_ACC) { xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_ACC)); } - else { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); } + if (info & PROCESS_EE_ACC) + xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_ACC)); + else + xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); } FPU_SUB(t0reg, regd); xMOVSS(xRegisterSSE(regd), xRegisterSSE(t0reg)); } break; case PROCESS_EE_T: - if(regd == EEREC_T) { + if (regd == EEREC_T) + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(t0reg); } xMUL.SS(xRegisterSSE(regd), xRegisterSSE(t0reg)); - if (info & PROCESS_EE_ACC) { xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_ACC)); } - else { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); } + if (info & PROCESS_EE_ACC) + xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_ACC)); + else + xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); } FPU_SUB(t0reg, regd); xMOVSS(xRegisterSSE(regd), xRegisterSSE(t0reg)); } - else if (regd == EEREC_ACC){ + else if (regd == EEREC_ACC) + { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(EEREC_T); fpuFloat2(t0reg); } xMUL.SS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); } FPU_SUB(regd, t0reg); } - else { + else + { xMOVSSZX(xRegisterSSE(regd), ptr[&fpuRegs.fpr[_Fs_]]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_T); } xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_T)); - if (info & PROCESS_EE_ACC) { xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_ACC)); } - else { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); } + if (info & PROCESS_EE_ACC) + xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_ACC)); + else + xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); } FPU_SUB(t0reg, regd); xMOVSS(xRegisterSSE(regd), xRegisterSSE(t0reg)); } break; - case (PROCESS_EE_S|PROCESS_EE_T): - if(regd == EEREC_S) { + case (PROCESS_EE_S | PROCESS_EE_T): + if (regd == EEREC_S) + { if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_T); } xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_T)); - if (info & PROCESS_EE_ACC) { xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_ACC)); } - else { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); } + if (info & PROCESS_EE_ACC) + xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_ACC)); + else + xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); } FPU_SUB(t0reg, regd); xMOVSS(xRegisterSSE(regd), xRegisterSSE(t0reg)); } - else if(regd == EEREC_T) { + else if (regd == EEREC_T) + { if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_S); } xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - if (info & PROCESS_EE_ACC) { xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_ACC)); } - else { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); } + if (info & PROCESS_EE_ACC) + xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_ACC)); + else + xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); } FPU_SUB(t0reg, regd); xMOVSS(xRegisterSSE(regd), xRegisterSSE(t0reg)); } - else if(regd == EEREC_ACC) { + else if (regd == EEREC_ACC) + { xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(t0reg); fpuFloat2(EEREC_T); } xMUL.SS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); } FPU_SUB(regd, t0reg); } - else { + else + { xMOVSS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_T); } xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_T)); - if (info & PROCESS_EE_ACC) { xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_ACC)); } - else { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); } + if (info & PROCESS_EE_ACC) + xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_ACC)); + else + xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); } FPU_SUB(t0reg, regd); xMOVSS(xRegisterSSE(regd), xRegisterSSE(t0reg)); } break; default: - if(regd == EEREC_ACC){ + if (regd == EEREC_ACC) + { t1reg = _allocTempXMMreg(XMMT_FPS, -1); xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]); xMOVSSZX(xRegisterSSE(t1reg), ptr[&fpuRegs.fpr[_Ft_]]); @@ -1425,8 +1577,10 @@ int t1reg; xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(t0reg); } xMUL.SS(xRegisterSSE(regd), xRegisterSSE(t0reg)); - if (info & PROCESS_EE_ACC) { xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_ACC)); } - else { xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); } + if (info & PROCESS_EE_ACC) + xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_ACC)); + else + xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.ACC]); if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); } FPU_SUB(t0reg, regd); xMOVSS(xRegisterSSE(regd), xRegisterSSE(t0reg)); @@ -1434,9 +1588,8 @@ int t1reg; break; } - ClampValues(regd); - _freeXMMreg(t0reg); - + ClampValues(regd); + _freeXMMreg(t0reg); } void recMSUB_S_xmm(int info) @@ -1446,7 +1599,7 @@ void recMSUB_S_xmm(int info) recMSUBtemp(info, EEREC_D); } -FPURECOMPILE_CONSTCODE(MSUB_S, XMMINFO_WRITED|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(MSUB_S, XMMINFO_WRITED | XMMINFO_READACC | XMMINFO_READS | XMMINFO_READT); void recMSUBA_S_xmm(int info) { @@ -1455,7 +1608,7 @@ void recMSUBA_S_xmm(int info) recMSUBtemp(info, EEREC_ACC); } -FPURECOMPILE_CONSTCODE(MSUBA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(MSUBA_S, XMMINFO_WRITEACC | XMMINFO_READACC | XMMINFO_READS | XMMINFO_READT); //------------------------------------------------------------------ @@ -1466,10 +1619,10 @@ void recMUL_S_xmm(int info) { EE::Profiler.EmitOp(eeOpcode::MUL_F); //xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagO|FPUflagU)); // Clear O and U flags - ClampValues(recCommutativeOp(info, EEREC_D, 1)); + ClampValues(recCommutativeOp(info, EEREC_D, 1)); } -FPURECOMPILE_CONSTCODE(MUL_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(MUL_S, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); void recMULA_S_xmm(int info) { @@ -1478,24 +1631,27 @@ void recMULA_S_xmm(int info) ClampValues(recCommutativeOp(info, EEREC_ACC, 1)); } -FPURECOMPILE_CONSTCODE(MULA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(MULA_S, XMMINFO_WRITEACC | XMMINFO_READS | XMMINFO_READT); //------------------------------------------------------------------ //------------------------------------------------------------------ // NEG XMM //------------------------------------------------------------------ -void recNEG_S_xmm(int info) { +void recNEG_S_xmm(int info) +{ EE::Profiler.EmitOp(eeOpcode::NEG_F); - if( info & PROCESS_EE_S ) xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else xMOVSSZX(xRegisterSSE(EEREC_D), ptr[&fpuRegs.fpr[_Fs_]]); + if (info & PROCESS_EE_S) + xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + xMOVSSZX(xRegisterSSE(EEREC_D), ptr[&fpuRegs.fpr[_Fs_]]); //xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagO|FPUflagU)); // Clear O and U flags xXOR.PS(xRegisterSSE(EEREC_D), ptr[&s_neg[0]]); ClampValues(EEREC_D); } -FPURECOMPILE_CONSTCODE(NEG_S, XMMINFO_WRITED|XMMINFO_READS); +FPURECOMPILE_CONSTCODE(NEG_S, XMMINFO_WRITED | XMMINFO_READS); //------------------------------------------------------------------ @@ -1514,33 +1670,39 @@ void recSUBop(int info, int regd) //xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagO|FPUflagU)); // Clear O and U flags - switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { + switch (info & (PROCESS_EE_S | PROCESS_EE_T)) + { case PROCESS_EE_S: //Console.WriteLn("FPU: SUB case 1"); - if (regd != EEREC_S) xMOVSS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); + if (regd != EEREC_S) + xMOVSS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); recSUBhelper(regd, t0reg); break; case PROCESS_EE_T: //Console.WriteLn("FPU: SUB case 2"); - if (regd == EEREC_T) { + if (regd == EEREC_T) + { xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVSSZX(xRegisterSSE(regd), ptr[&fpuRegs.fpr[_Fs_]]); recSUBhelper(regd, t0reg); } - else { + else + { xMOVSSZX(xRegisterSSE(regd), ptr[&fpuRegs.fpr[_Fs_]]); recSUBhelper(regd, EEREC_T); } break; - case (PROCESS_EE_S|PROCESS_EE_T): + case (PROCESS_EE_S | PROCESS_EE_T): //Console.WriteLn("FPU: SUB case 3"); - if (regd == EEREC_T) { + if (regd == EEREC_T) + { xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVSS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); recSUBhelper(regd, t0reg); } - else { + else + { xMOVSS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); recSUBhelper(regd, EEREC_T); } @@ -1563,7 +1725,7 @@ void recSUB_S_xmm(int info) recSUBop(info, EEREC_D); } -FPURECOMPILE_CONSTCODE(SUB_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(SUB_S, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); void recSUBA_S_xmm(int info) @@ -1572,7 +1734,7 @@ void recSUBA_S_xmm(int info) recSUBop(info, EEREC_ACC); } -FPURECOMPILE_CONSTCODE(SUBA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(SUBA_S, XMMINFO_WRITEACC | XMMINFO_READS | XMMINFO_READT); //------------------------------------------------------------------ @@ -1591,39 +1753,46 @@ void recSQRT_S_xmm(int info) // Set roundmode to nearest if it isn't already //Console.WriteLn("sqrt to nearest"); roundmode_nearest = g_sseMXCSR; - roundmode_nearest.SetRoundMode( SSEround_Nearest ); - xLDMXCSR (roundmode_nearest); + roundmode_nearest.SetRoundMode(SSEround_Nearest); + xLDMXCSR(roundmode_nearest); roundmodeFlag = true; } - if( info & PROCESS_EE_T ) xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else xMOVSSZX(xRegisterSSE(EEREC_D), ptr[&fpuRegs.fpr[_Ft_]]); + if (info & PROCESS_EE_T) + xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else + xMOVSSZX(xRegisterSSE(EEREC_D), ptr[&fpuRegs.fpr[_Ft_]]); - if (CHECK_FPU_EXTRA_FLAGS) { + if (CHECK_FPU_EXTRA_FLAGS) + { int tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0); - xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI|FPUflagD)); // Clear I and D flags + xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags /*--- Check for negative SQRT ---*/ xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(EEREC_D)); - xAND(xRegister32(tempReg), 1); //Check sign + xAND(xRegister32(tempReg), 1); //Check sign pjmp = JZ8(0); //Skip if none are - xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI|FPUflagSI); // Set I and SI flags + xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags xAND.PS(xRegisterSSE(EEREC_D), ptr[&s_pos[0]]); // Make EEREC_D Positive x86SetJ8(pjmp); _freeX86reg(tempReg); } - else xAND.PS(xRegisterSSE(EEREC_D), ptr[&s_pos[0]]); // Make EEREC_D Positive + else + xAND.PS(xRegisterSSE(EEREC_D), ptr[&s_pos[0]]); // Make EEREC_D Positive - if (CHECK_FPU_OVERFLOW) xMIN.SS(xRegisterSSE(EEREC_D), ptr[&g_maxvals[0]]);// Only need to do positive clamp, since EEREC_D is positive + if (CHECK_FPU_OVERFLOW) // Only need to do positive clamp, since EEREC_D is positive + xMIN.SS(xRegisterSSE(EEREC_D), ptr[&g_maxvals[0]]); xSQRT.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - if (CHECK_FPU_EXTRA_OVERFLOW) ClampValues(EEREC_D); // Shouldn't need to clamp again since SQRT of a number will always be smaller than the original number, doing it just incase :/ + if (CHECK_FPU_EXTRA_OVERFLOW) // Shouldn't need to clamp again since SQRT of a number will always be smaller than the original number, doing it just incase :/ + ClampValues(EEREC_D); - if (roundmodeFlag) xLDMXCSR (g_sseMXCSR); + if (roundmodeFlag) + xLDMXCSR(g_sseMXCSR); } -FPURECOMPILE_CONSTCODE(SQRT_S, XMMINFO_WRITED|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(SQRT_S, XMMINFO_WRITED | XMMINFO_READT); //------------------------------------------------------------------ @@ -1638,13 +1807,13 @@ void recRSQRThelper1(int regd, int t0reg) // Preforms the RSQRT function when re int t1reg = _allocTempXMMreg(XMMT_FPS, -1); int tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0); - xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI|FPUflagD)); // Clear I and D flags + xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags /*--- (first) Check for negative SQRT ---*/ xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t0reg)); - xAND(xRegister32(tempReg), 1); //Check sign + xAND(xRegister32(tempReg), 1); //Check sign pjmp2 = JZ8(0); //Skip if not set - xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI|FPUflagSI); // Set I and SI flags + xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags xAND.PS(xRegisterSSE(t0reg), ptr[&s_pos[0]]); // Make t0reg Positive x86SetJ8(pjmp2); @@ -1652,18 +1821,18 @@ void recRSQRThelper1(int regd, int t0reg) // Preforms the RSQRT function when re xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(t0reg)); xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg)); - xAND(xRegister32(tempReg), 1); //Check sign (if t0reg == zero, sign will be set) + xAND(xRegister32(tempReg), 1); //Check sign (if t0reg == zero, sign will be set) pjmp1 = JZ8(0); //Skip if not set /*--- Check for 0/0 ---*/ xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regd)); xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg)); - xAND(xRegister32(tempReg), 1); //Check sign (if regd == zero, sign will be set) + xAND(xRegister32(tempReg), 1); //Check sign (if regd == zero, sign will be set) qjmp1 = JZ8(0); //Skip if not set - xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI|FPUflagSI); // Set I and SI flags ( 0/0 ) + xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags ( 0/0 ) qjmp2 = JMP8(0); x86SetJ8(qjmp1); //x/0 but not 0/0 - xOR(ptr32[&fpuRegs.fprc[31]], FPUflagD|FPUflagSD); // Set D and SD flags ( x/0 ) + xOR(ptr32[&fpuRegs.fprc[31]], FPUflagD | FPUflagSD); // Set D and SD flags ( x/0 ) x86SetJ8(qjmp2); /*--- Make regd +/- Maximum ---*/ @@ -1672,7 +1841,8 @@ void recRSQRThelper1(int regd, int t0reg) // Preforms the RSQRT function when re pjmp32 = JMP32(0); x86SetJ8(pjmp1); - if (CHECK_FPU_EXTRA_OVERFLOW) { + if (CHECK_FPU_EXTRA_OVERFLOW) + { xMIN.SS(xRegisterSSE(t0reg), ptr[&g_maxvals[0]]); // Only need to do positive clamp, since t0reg is positive fpuFloat2(regd); } @@ -1690,7 +1860,8 @@ void recRSQRThelper1(int regd, int t0reg) // Preforms the RSQRT function when re void recRSQRThelper2(int regd, int t0reg) // Preforms the RSQRT function when regd <- Fs and t0reg <- Ft (Doesn't set flags) { xAND.PS(xRegisterSSE(t0reg), ptr[&s_pos[0]]); // Make t0reg Positive - if (CHECK_FPU_EXTRA_OVERFLOW) { + if (CHECK_FPU_EXTRA_OVERFLOW) + { xMIN.SS(xRegisterSSE(t0reg), ptr[&g_maxvals[0]]); // Only need to do positive clamp, since t0reg is positive fpuFloat2(regd); } @@ -1708,41 +1879,53 @@ void recRSQRT_S_xmm(int info) int t0reg = _allocTempXMMreg(XMMT_FPS, -1); //Console.WriteLn("FPU: RSQRT"); - switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { + switch (info & (PROCESS_EE_S | PROCESS_EE_T)) + { case PROCESS_EE_S: //Console.WriteLn("FPU: RSQRT case 1"); xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); - if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg); - else recRSQRThelper2(EEREC_D, t0reg); + if (CHECK_FPU_EXTRA_FLAGS) + recRSQRThelper1(EEREC_D, t0reg); + else + recRSQRThelper2(EEREC_D, t0reg); break; case PROCESS_EE_T: //Console.WriteLn("FPU: RSQRT case 2"); xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVSSZX(xRegisterSSE(EEREC_D), ptr[&fpuRegs.fpr[_Fs_]]); - if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg); - else recRSQRThelper2(EEREC_D, t0reg); + if (CHECK_FPU_EXTRA_FLAGS) + recRSQRThelper1(EEREC_D, t0reg); + else + recRSQRThelper2(EEREC_D, t0reg); break; - case (PROCESS_EE_S|PROCESS_EE_T): + case (PROCESS_EE_S | PROCESS_EE_T): //Console.WriteLn("FPU: RSQRT case 3"); xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg); - else recRSQRThelper2(EEREC_D, t0reg); + if (CHECK_FPU_EXTRA_FLAGS) + recRSQRThelper1(EEREC_D, t0reg); + else + recRSQRThelper2(EEREC_D, t0reg); break; default: //Console.WriteLn("FPU: RSQRT case 4"); xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); xMOVSSZX(xRegisterSSE(EEREC_D), ptr[&fpuRegs.fpr[_Fs_]]); - if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg); - else recRSQRThelper2(EEREC_D, t0reg); + if (CHECK_FPU_EXTRA_FLAGS) + recRSQRThelper1(EEREC_D, t0reg); + else + recRSQRThelper2(EEREC_D, t0reg); break; } _freeXMMreg(t0reg); } -FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); #endif // FPU_RECOMPILE -} } } } +} // namespace COP1 +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 diff --git a/pcsx2/x86/iFPU.h b/pcsx2/x86/iFPU.h index bd5a0ae882..9ba95378dc 100644 --- a/pcsx2/x86/iFPU.h +++ b/pcsx2/x86/iFPU.h @@ -21,48 +21,48 @@ extern const __aligned16 u32 g_maxvals[4]; namespace R5900 { namespace Dynarec { +namespace OpcodeImpl { +namespace COP1 { - namespace OpcodeImpl { - namespace COP1 - { - void recMFC1(); - void recCFC1(); - void recMTC1(); - void recCTC1(); - void recCOP1_BC1(); - void recCOP1_S(); - void recCOP1_W(); - void recC_EQ(); - void recC_F(); - void recC_LT(); - void recC_LE(); - void recADD_S(); - void recSUB_S(); - void recMUL_S(); - void recDIV_S(); - void recSQRT_S(); - void recABS_S(); - void recMOV_S(); - void recNEG_S(); - void recRSQRT_S(); - void recADDA_S(); - void recSUBA_S(); - void recMULA_S(); - void recMADD_S(); - void recMSUB_S(); - void recMADDA_S(); - void recMSUBA_S(); - void recCVT_S(); - void recCVT_W(); - void recMAX_S(); - void recMIN_S(); - void recBC1F(); - void recBC1T(); - void recBC1FL(); - void recBC1TL(); - } } -} } + void recMFC1(); + void recCFC1(); + void recMTC1(); + void recCTC1(); + void recCOP1_BC1(); + void recCOP1_S(); + void recCOP1_W(); + void recC_EQ(); + void recC_F(); + void recC_LT(); + void recC_LE(); + void recADD_S(); + void recSUB_S(); + void recMUL_S(); + void recDIV_S(); + void recSQRT_S(); + void recABS_S(); + void recMOV_S(); + void recNEG_S(); + void recRSQRT_S(); + void recADDA_S(); + void recSUBA_S(); + void recMULA_S(); + void recMADD_S(); + void recMSUB_S(); + void recMADDA_S(); + void recMSUBA_S(); + void recCVT_S(); + void recCVT_W(); + void recMAX_S(); + void recMIN_S(); + void recBC1F(); + void recBC1T(); + void recBC1FL(); + void recBC1TL(); + +} // namespace COP1 +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 #endif - - diff --git a/pcsx2/x86/iFPUd.cpp b/pcsx2/x86/iFPUd.cpp index 1410eb5f03..31f288cd4c 100644 --- a/pcsx2/x86/iFPUd.cpp +++ b/pcsx2/x86/iFPUd.cpp @@ -75,15 +75,15 @@ namespace DOUBLE { #define _Fd_ _Sa_ // FCR31 Flags -#define FPUflagC 0X00800000 -#define FPUflagI 0X00020000 -#define FPUflagD 0X00010000 -#define FPUflagO 0X00008000 -#define FPUflagU 0X00004000 -#define FPUflagSI 0X00000040 -#define FPUflagSD 0X00000020 -#define FPUflagSO 0X00000010 -#define FPUflagSU 0X00000008 +#define FPUflagC 0x00800000 +#define FPUflagI 0x00020000 +#define FPUflagD 0x00010000 +#define FPUflagO 0x00008000 +#define FPUflagU 0x00004000 +#define FPUflagSI 0x00000040 +#define FPUflagSD 0x00000020 +#define FPUflagSO 0x00000010 +#define FPUflagSU 0x00000008 //------------------------------------------------------------------ @@ -95,44 +95,44 @@ namespace DOUBLE { // PS2 -> DOUBLE //------------------------------------------------------------------ -#define SINGLE(sign, exp, mant) (((u32)(sign)<<31) | ((u32)(exp)<<23) | (u32)(mant)) -#define DOUBLE(sign, exp, mant) (((sign ## ULL)<<63) | ((exp ## ULL)<<52) | (mant ## ULL)) +#define SINGLE(sign, exp, mant) (((u32)(sign) << 31) | ((u32)(exp) << 23) | (u32)(mant)) +#define DOUBLE(sign, exp, mant) (((sign##ULL) << 63) | ((exp##ULL) << 52) | (mant##ULL)) struct FPUd_Globals { - u32 neg[4], pos[4]; + u32 neg[4], pos[4]; - u32 pos_inf[4], neg_inf[4], - one_exp[4]; + u32 pos_inf[4], neg_inf[4], + one_exp[4]; - u64 dbl_one_exp[2]; + u64 dbl_one_exp[2]; - u64 dbl_cvt_overflow, // needs special code if above or equal - dbl_ps2_overflow, // overflow & clamp if above or equal - dbl_underflow; // underflow if below + u64 dbl_cvt_overflow, // needs special code if above or equal + dbl_ps2_overflow, // overflow & clamp if above or equal + dbl_underflow; // underflow if below - u64 padding; + u64 padding; - u64 dbl_s_pos[2]; + u64 dbl_s_pos[2]; //u64 dlb_s_neg[2]; }; static const __aligned(32) FPUd_Globals s_const = { - { 0x80000000, 0xffffffff, 0xffffffff, 0xffffffff }, - { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff }, + {0x80000000, 0xffffffff, 0xffffffff, 0xffffffff}, + {0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff}, - {SINGLE(0,0xff,0), 0, 0, 0}, - {SINGLE(1,0xff,0), 0, 0, 0}, - {SINGLE(0,1,0), 0, 0, 0}, + {SINGLE(0, 0xff, 0), 0, 0, 0}, + {SINGLE(1, 0xff, 0), 0, 0, 0}, + {SINGLE(0, 1, 0), 0, 0, 0}, - {DOUBLE(0,1,0), 0}, + {DOUBLE(0, 1, 0), 0}, - DOUBLE(0,1151,0), // cvt_overflow - DOUBLE(0,1152,0), // ps2_overflow - DOUBLE(0,897,0), // underflow + DOUBLE(0, 1151, 0), // cvt_overflow + DOUBLE(0, 1152, 0), // ps2_overflow + DOUBLE(0, 897, 0), // underflow - 0, // Padding!! + 0, // Padding!! {0x7fffffffffffffffULL, 0}, //{0x8000000000000000ULL, 0}, @@ -144,12 +144,12 @@ static const __aligned(32) FPUd_Globals s_const = void ToDouble(int reg) { xUCOMI.SS(xRegisterSSE(reg), ptr[s_const.pos_inf]); // Sets ZF if reg is equal or incomparable to pos_inf - u8 *to_complex = JE8(0); // Complex conversion if positive infinity or NaN + u8* to_complex = JE8(0); // Complex conversion if positive infinity or NaN xUCOMI.SS(xRegisterSSE(reg), ptr[s_const.neg_inf]); - u8 *to_complex2 = JE8(0); // Complex conversion if negative infinity + u8* to_complex2 = JE8(0); // Complex conversion if negative infinity xCVTSS2SD(xRegisterSSE(reg), xRegisterSSE(reg)); // Simply convert - u8 *end = JMP8(0); + u8* end = JMP8(0); x86SetJ8(to_complex); x86SetJ8(to_complex2); @@ -166,12 +166,10 @@ void ToDouble(int reg) // DOUBLE -> PS2 //------------------------------------------------------------------ -/* - if FPU_RESULT is defined, results are more like the real PS2's FPU. But new issues may happen if - the VU isn't clamping all operands since games may transfer FPU results into the VU. - Ar tonelico 1 does this with the result from DIV/RSQRT (when a division by zero occurs) - otherwise, results are still usually better than iFPU.cpp. -*/ +// If FPU_RESULT is defined, results are more like the real PS2's FPU. +// But new issues may happen if the VU isn't clamping all operands since games may transfer FPU results into the VU. +// Ar tonelico 1 does this with the result from DIV/RSQRT (when a division by zero occurs). +// Otherwise, results are still usually better than iFPU.cpp. // ToPS2FPU_Full - converts double-precision IEEE float to single-precision PS2 float @@ -192,27 +190,27 @@ void ToPS2FPU_Full(int reg, bool flags, int absreg, bool acc, bool addsub) xAND.PD(xRegisterSSE(absreg), ptr[&s_const.dbl_s_pos]); xUCOMI.SD(xRegisterSSE(absreg), ptr[&s_const.dbl_cvt_overflow]); - u8 *to_complex = JAE8(0); + u8* to_complex = JAE8(0); xUCOMI.SD(xRegisterSSE(absreg), ptr[&s_const.dbl_underflow]); - u8 *to_underflow = JB8(0); + u8* to_underflow = JB8(0); xCVTSD2SS(xRegisterSSE(reg), xRegisterSSE(reg)); //simply convert #ifdef __M_X86_64 u32* end = JMP32(0); #else - u8 *end = JMP8(0); + u8* end = JMP8(0); #endif x86SetJ8(to_complex); xUCOMI.SD(xRegisterSSE(absreg), ptr[&s_const.dbl_ps2_overflow]); - u8 *to_overflow = JAE8(0); + u8* to_overflow = JAE8(0); xPSUB.Q(xRegisterSSE(reg), ptr[&s_const.dbl_one_exp]); //lower exponent xCVTSD2SS(xRegisterSSE(reg), xRegisterSSE(reg)); //convert xPADD.D(xRegisterSSE(reg), ptr[s_const.one_exp]); //raise exponent #ifdef __M_X86_64 - u32 *end2 = JMP32(0); + u32* end2 = JMP32(0); #else u8* end2 = JMP8(0); #endif @@ -224,15 +222,15 @@ void ToPS2FPU_Full(int reg, bool flags, int absreg, bool acc, bool addsub) xOR(ptr32[&fpuRegs.fprc[31]], (FPUflagO | FPUflagSO)); if (flags && FPU_FLAGS_OVERFLOW && acc) xOR(ptr32[&fpuRegs.ACCflag], 1); - u8 *end3 = JMP8(0); + u8* end3 = JMP8(0); x86SetJ8(to_underflow); - u8 *end4 = nullptr; + u8* end4 = nullptr; if (flags && FPU_FLAGS_UNDERFLOW) //set underflow flags if not zero { xXOR.PD(xRegisterSSE(absreg), xRegisterSSE(absreg)); xUCOMI.SD(xRegisterSSE(reg), xRegisterSSE(absreg)); - u8 *is_zero = JE8(0); + u8* is_zero = JE8(0); xOR(ptr32[&fpuRegs.fprc[31]], (FPUflagU | FPUflagSU)); if (addsub) @@ -290,25 +288,52 @@ void SetMaxValue(int regd) } } -#define GET_S(sreg) { \ - if( info & PROCESS_EE_S ) xMOVSS(xRegisterSSE(sreg), xRegisterSSE(EEREC_S)); \ - else xMOVSSZX(xRegisterSSE(sreg), ptr[&fpuRegs.fpr[_Fs_]]); } +#define GET_S(sreg) \ + do { \ + if (info & PROCESS_EE_S) \ + xMOVSS(xRegisterSSE(sreg), xRegisterSSE(EEREC_S)); \ + else \ + xMOVSSZX(xRegisterSSE(sreg), ptr[&fpuRegs.fpr[_Fs_]]); \ + } while (0) -#define ALLOC_S(sreg) { (sreg) = _allocTempXMMreg(XMMT_FPS, -1); GET_S(sreg); } +#define ALLOC_S(sreg) \ + do { \ + (sreg) = _allocTempXMMreg(XMMT_FPS, -1); \ + GET_S(sreg); \ + } while (0) -#define GET_T(treg) { \ - if( info & PROCESS_EE_T ) xMOVSS(xRegisterSSE(treg), xRegisterSSE(EEREC_T)); \ - else xMOVSSZX(xRegisterSSE(treg), ptr[&fpuRegs.fpr[_Ft_]]); } +#define GET_T(treg) \ + do { \ + if (info & PROCESS_EE_T) \ + xMOVSS(xRegisterSSE(treg), xRegisterSSE(EEREC_T)); \ + else \ + xMOVSSZX(xRegisterSSE(treg), ptr[&fpuRegs.fpr[_Ft_]]); \ + } while (0) -#define ALLOC_T(treg) { (treg) = _allocTempXMMreg(XMMT_FPS, -1); GET_T(treg); } +#define ALLOC_T(treg) \ + do { \ + (treg) = _allocTempXMMreg(XMMT_FPS, -1); \ + GET_T(treg); \ + } while (0) -#define GET_ACC(areg) { \ - if( info & PROCESS_EE_ACC ) xMOVSS(xRegisterSSE(areg), xRegisterSSE(EEREC_ACC)); \ - else xMOVSSZX(xRegisterSSE(areg), ptr[&fpuRegs.ACC]); } +#define GET_ACC(areg) \ + do { \ + if (info & PROCESS_EE_ACC) \ + xMOVSS(xRegisterSSE(areg), xRegisterSSE(EEREC_ACC)); \ + else \ + xMOVSSZX(xRegisterSSE(areg), ptr[&fpuRegs.ACC]); \ + } while (0) -#define ALLOC_ACC(areg) { (areg) = _allocTempXMMreg(XMMT_FPS, -1); GET_ACC(areg); } +#define ALLOC_ACC(areg) \ + do { \ + (areg) = _allocTempXMMreg(XMMT_FPS, -1); \ + GET_ACC(areg); \ + } while (0) -#define CLEAR_OU_FLAGS { xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagO | FPUflagU)); } +#define CLEAR_OU_FLAGS \ + do { \ + xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagO | FPUflagU)); \ + } while (0) //------------------------------------------------------------------ @@ -324,7 +349,7 @@ void recABS_S_xmm(int info) xAND.PS(xRegisterSSE(EEREC_D), ptr[s_const.pos]); } -FPURECOMPILE_CONSTCODE(ABS_S, XMMINFO_WRITED|XMMINFO_READS); +FPURECOMPILE_CONSTCODE(ABS_S, XMMINFO_WRITED | XMMINFO_READS); //------------------------------------------------------------------ @@ -405,8 +430,8 @@ void FPU_ADD_SUB(int tempd, int tempt) //tempd and tempt are overwritten, they a void FPU_MUL(int info, int regd, int sreg, int treg, bool acc) { - u8 *noHack; - u32 *endMul = nullptr; + u8* noHack; + u32* endMul = nullptr; if (CHECK_FPUMULHACK) { @@ -432,8 +457,8 @@ void FPU_MUL(int info, int regd, int sreg, int treg, bool acc) //------------------------------------------------------------------ // CommutativeOp XMM (used for ADD and SUB opcodes. that's it.) //------------------------------------------------------------------ -static void (*recFPUOpXMM_to_XMM[] )(x86SSERegType, x86SSERegType) = { - SSE2_ADDSD_XMM_to_XMM, SSE2_SUBSD_XMM_to_XMM }; +static void (*recFPUOpXMM_to_XMM[])(x86SSERegType, x86SSERegType) = { + SSE2_ADDSD_XMM_to_XMM, SSE2_SUBSD_XMM_to_XMM}; void recFPUOp(int info, int regd, int op, bool acc) { @@ -460,19 +485,19 @@ void recFPUOp(int info, int regd, int op, bool acc) //------------------------------------------------------------------ void recADD_S_xmm(int info) { - EE::Profiler.EmitOp(eeOpcode::ADD_F); - recFPUOp(info, EEREC_D, 0, false); + EE::Profiler.EmitOp(eeOpcode::ADD_F); + recFPUOp(info, EEREC_D, 0, false); } -FPURECOMPILE_CONSTCODE(ADD_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(ADD_S, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); void recADDA_S_xmm(int info) { - EE::Profiler.EmitOp(eeOpcode::ADDA_F); - recFPUOp(info, EEREC_ACC, 0, true); + EE::Profiler.EmitOp(eeOpcode::ADDA_F); + recFPUOp(info, EEREC_ACC, 0, true); } -FPURECOMPILE_CONSTCODE(ADDA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(ADDA_S, XMMINFO_WRITEACC | XMMINFO_READS | XMMINFO_READT); //------------------------------------------------------------------ void recCMP(int info) @@ -495,29 +520,29 @@ void recC_EQ_xmm(int info) recCMP(info); j8Ptr[0] = JZ8(0); - xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC ); + xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC); j8Ptr[1] = JMP8(0); x86SetJ8(j8Ptr[0]); xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC); x86SetJ8(j8Ptr[1]); } -FPURECOMPILE_CONSTCODE(C_EQ, XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(C_EQ, XMMINFO_READS | XMMINFO_READT); -void recC_LE_xmm(int info ) +void recC_LE_xmm(int info) { EE::Profiler.EmitOp(eeOpcode::CLE_F); recCMP(info); j8Ptr[0] = JBE8(0); - xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC ); + xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC); j8Ptr[1] = JMP8(0); x86SetJ8(j8Ptr[0]); xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC); x86SetJ8(j8Ptr[1]); } -FPURECOMPILE_CONSTCODE(C_LE, XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(C_LE, XMMINFO_READS | XMMINFO_READT); void recC_LT_xmm(int info) { @@ -525,14 +550,14 @@ void recC_LT_xmm(int info) recCMP(info); j8Ptr[0] = JB8(0); - xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC ); + xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC); j8Ptr[1] = JMP8(0); x86SetJ8(j8Ptr[0]); xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC); x86SetJ8(j8Ptr[1]); } -FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS | XMMINFO_READT); //------------------------------------------------------------------ @@ -542,41 +567,39 @@ FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS|XMMINFO_READT); void recCVT_S_xmm(int info) { EE::Profiler.EmitOp(eeOpcode::CVTS_F); - if( !(info&PROCESS_EE_S) || (EEREC_D != EEREC_S && !(info&PROCESS_EE_MODEWRITES)) ) { + if (!(info & PROCESS_EE_S) || (EEREC_D != EEREC_S && !(info & PROCESS_EE_MODEWRITES))) xCVTSI2SS(xRegisterSSE(EEREC_D), ptr32[&fpuRegs.fpr[_Fs_]]); - } - else { + else xCVTDQ2PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - } } -FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED|XMMINFO_READS); +FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED | XMMINFO_READS); void recCVT_W() //called from iFPU.cpp's recCVT_W { EE::Profiler.EmitOp(eeOpcode::CVTW); int regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); - if( regs >= 0 ) + if (regs >= 0) { xCVTTSS2SI(eax, xRegisterSSE(regs)); - xMOVMSKPS(edx, xRegisterSSE(regs)); //extract the signs - xAND(edx, 1); //keep only LSB + xMOVMSKPS(edx, xRegisterSSE(regs)); // extract the signs + xAND(edx, 1); // keep only LSB } else { - xCVTTSS2SI(eax, ptr32[&fpuRegs.fpr[ _Fs_ ]]); - xMOV(edx, ptr[&fpuRegs.fpr[ _Fs_ ]]); - xSHR(edx, 31); //mov sign to lsb + xCVTTSS2SI(eax, ptr32[&fpuRegs.fpr[_Fs_]]); + xMOV(edx, ptr[&fpuRegs.fpr[_Fs_]]); + xSHR(edx, 31); //mov sign to lsb } //kill register allocation for dst because we write directly to fpuRegs.fpr[_Fd_] _deleteFPtoXMMreg(_Fd_, 2); - xADD(edx, 0x7FFFFFFF); //0x7FFFFFFF if positive, 0x8000 0000 if negative + xADD(edx, 0x7FFFFFFF); // 0x7FFFFFFF if positive, 0x8000 0000 if negative - xCMP(eax, 0x80000000); //If the result is indefinitive - xCMOVE(eax, edx); //Saturate it + xCMP(eax, 0x80000000); // If the result is indefinitive + xCMOVE(eax, edx); // Saturate it //Write the result xMOV(ptr[&fpuRegs.fpr[_Fd_]], eax); @@ -594,25 +617,25 @@ void recDIVhelper1(int regd, int regt) // Sets flags int t1reg = _allocTempXMMreg(XMMT_FPS, -1); int tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0); - xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI|FPUflagD)); // Clear I and D flags + xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags //--- Check for divide by zero --- xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regt)); xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg)); - xAND(xRegister32(tempReg), 1); //Check sign (if regt == zero, sign will be set) + xAND(xRegister32(tempReg), 1); //Check sign (if regt == zero, sign will be set) ajmp32 = JZ32(0); //Skip if not set //--- Check for 0/0 --- xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regd)); xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg)); - xAND(xRegister32(tempReg), 1); //Check sign (if regd == zero, sign will be set) + xAND(xRegister32(tempReg), 1); //Check sign (if regd == zero, sign will be set) pjmp1 = JZ8(0); //Skip if not set - xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI|FPUflagSI); // Set I and SI flags ( 0/0 ) + xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags ( 0/0 ) pjmp2 = JMP8(0); x86SetJ8(pjmp1); //x/0 but not 0/0 - xOR(ptr32[&fpuRegs.fprc[31]], FPUflagD|FPUflagSD); // Set D and SD flags ( x/0 ) + xOR(ptr32[&fpuRegs.fprc[31]], FPUflagD | FPUflagSD); // Set D and SD flags ( x/0 ) x86SetJ8(pjmp2); //--- Make regd +/- Maximum --- @@ -650,9 +673,9 @@ void recDIV_S_xmm(int info) { EE::Profiler.EmitOp(eeOpcode::DIV_F); bool roundmodeFlag = false; - //Console.WriteLn("DIV"); + //Console.WriteLn("DIV"); - if( CHECK_FPUNEGDIVHACK ) + if (CHECK_FPUNEGDIVHACK) { if (g_sseMXCSR.GetRoundMode() != SSEround_NegInf) { @@ -660,8 +683,8 @@ void recDIV_S_xmm(int info) //Console.WriteLn("div to negative inf"); roundmode_neg = g_sseMXCSR; - roundmode_neg.SetRoundMode( SSEround_NegInf ); - xLDMXCSR( roundmode_neg ); + roundmode_neg.SetRoundMode(SSEround_NegInf); + xLDMXCSR(roundmode_neg); roundmodeFlag = true; } } @@ -673,8 +696,8 @@ void recDIV_S_xmm(int info) //Console.WriteLn("div to nearest"); roundmode_nearest = g_sseMXCSR; - roundmode_nearest.SetRoundMode( SSEround_Nearest ); - xLDMXCSR( roundmode_nearest ); + roundmode_nearest.SetRoundMode(SSEround_Nearest); + xLDMXCSR(roundmode_nearest); roundmodeFlag = true; } } @@ -690,11 +713,12 @@ void recDIV_S_xmm(int info) xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(sreg)); - if (roundmodeFlag) xLDMXCSR (g_sseMXCSR); + if (roundmodeFlag) + xLDMXCSR(g_sseMXCSR); _freeXMMreg(sreg); _freeXMMreg(treg); } -FPURECOMPILE_CONSTCODE(DIV_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(DIV_S, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); //------------------------------------------------------------------ @@ -723,18 +747,18 @@ void recMaddsub(int info, int regd, int op, bool acc) // TEST FOR ACC/MUL OVERFLOWS, PROPOGATE THEM IF THEY OCCUR xTEST(ptr32[&fpuRegs.fprc[31]], FPUflagO); - u8 *mulovf = JNZ8(0); + u8* mulovf = JNZ8(0); ToDouble(sreg); //else, convert xTEST(ptr32[&fpuRegs.ACCflag], 1); - u8 *accovf = JNZ8(0); + u8* accovf = JNZ8(0); ToDouble(treg); //else, convert - u8 *operation = JMP8(0); + u8* operation = JMP8(0); x86SetJ8(mulovf); if (op == 1) //sub xXOR.PS(xRegisterSSE(sreg), ptr[s_const.neg]); - xMOVAPS(xRegisterSSE(treg), xRegisterSSE(sreg)); //fall through below + xMOVAPS(xRegisterSSE(treg), xRegisterSSE(sreg)); //fall through below x86SetJ8(accovf); SetMaxValue(treg); //just in case... I think it has to be a MaxValue already here @@ -743,7 +767,7 @@ void recMaddsub(int info, int regd, int op, bool acc) xOR(ptr32[&fpuRegs.fprc[31]], FPUflagO | FPUflagSO); if (FPU_FLAGS_OVERFLOW && acc) xOR(ptr32[&fpuRegs.ACCflag], 1); - u32 *skipall = JMP32(0); + u32* skipall = JMP32(0); // PERFORM THE ACCUMULATION AND TEST RESULT. CONVERT TO SINGLE @@ -767,7 +791,7 @@ void recMADD_S_xmm(int info) recMaddsub(info, EEREC_D, 0, false); } -FPURECOMPILE_CONSTCODE(MADD_S, XMMINFO_WRITED|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(MADD_S, XMMINFO_WRITED | XMMINFO_READACC | XMMINFO_READS | XMMINFO_READT); void recMADDA_S_xmm(int info) { @@ -775,7 +799,7 @@ void recMADDA_S_xmm(int info) recMaddsub(info, EEREC_ACC, 0, true); } -FPURECOMPILE_CONSTCODE(MADDA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(MADDA_S, XMMINFO_WRITEACC | XMMINFO_READACC | XMMINFO_READS | XMMINFO_READT); //------------------------------------------------------------------ @@ -785,8 +809,8 @@ FPURECOMPILE_CONSTCODE(MADDA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|X static const __aligned16 u32 minmax_mask[8] = { - 0xffffffff, 0x80000000, 0, 0, - 0, 0x40000000, 0, 0 + 0xffffffff, 0x80000000, 0, 0, + 0, 0x40000000, 0, 0, }; // FPU's MAX/MIN work with all numbers (including "denormals"). Check VU's logical min max for more info. void recMINMAX(int info, bool ismin) @@ -818,7 +842,7 @@ void recMAX_S_xmm(int info) recMINMAX(info, false); } -FPURECOMPILE_CONSTCODE(MAX_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(MAX_S, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); void recMIN_S_xmm(int info) { @@ -826,7 +850,7 @@ void recMIN_S_xmm(int info) recMINMAX(info, true); } -FPURECOMPILE_CONSTCODE(MIN_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(MIN_S, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); //------------------------------------------------------------------ @@ -839,7 +863,7 @@ void recMOV_S_xmm(int info) GET_S(EEREC_D); } -FPURECOMPILE_CONSTCODE(MOV_S, XMMINFO_WRITED|XMMINFO_READS); +FPURECOMPILE_CONSTCODE(MOV_S, XMMINFO_WRITED | XMMINFO_READS); //------------------------------------------------------------------ @@ -853,7 +877,7 @@ void recMSUB_S_xmm(int info) recMaddsub(info, EEREC_D, 1, false); } -FPURECOMPILE_CONSTCODE(MSUB_S, XMMINFO_WRITED|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(MSUB_S, XMMINFO_WRITED | XMMINFO_READACC | XMMINFO_READS | XMMINFO_READT); void recMSUBA_S_xmm(int info) { @@ -861,7 +885,7 @@ void recMSUBA_S_xmm(int info) recMaddsub(info, EEREC_ACC, 1, true); } -FPURECOMPILE_CONSTCODE(MSUBA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(MSUBA_S, XMMINFO_WRITEACC | XMMINFO_READACC | XMMINFO_READS | XMMINFO_READT); //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -877,7 +901,7 @@ void recMUL_S_xmm(int info) _freeXMMreg(sreg); _freeXMMreg(treg); } -FPURECOMPILE_CONSTCODE(MUL_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(MUL_S, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); void recMULA_S_xmm(int info) { @@ -889,7 +913,7 @@ void recMULA_S_xmm(int info) _freeXMMreg(sreg); _freeXMMreg(treg); } -FPURECOMPILE_CONSTCODE(MULA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(MULA_S, XMMINFO_WRITEACC | XMMINFO_READS | XMMINFO_READT); //------------------------------------------------------------------ @@ -906,7 +930,7 @@ void recNEG_S_xmm(int info) xXOR.PS(xRegisterSSE(EEREC_D), ptr[&s_const.neg[0]]); } -FPURECOMPILE_CONSTCODE(NEG_S, XMMINFO_WRITED|XMMINFO_READS); +FPURECOMPILE_CONSTCODE(NEG_S, XMMINFO_WRITED | XMMINFO_READS); //------------------------------------------------------------------ @@ -920,7 +944,7 @@ void recSUB_S_xmm(int info) recFPUOp(info, EEREC_D, 1, false); } -FPURECOMPILE_CONSTCODE(SUB_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(SUB_S, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); void recSUBA_S_xmm(int info) @@ -929,7 +953,7 @@ void recSUBA_S_xmm(int info) recFPUOp(info, EEREC_ACC, 1, true); } -FPURECOMPILE_CONSTCODE(SUBA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(SUBA_S, XMMINFO_WRITEACC | XMMINFO_READS | XMMINFO_READT); //------------------------------------------------------------------ @@ -939,7 +963,7 @@ FPURECOMPILE_CONSTCODE(SUBA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); void recSQRT_S_xmm(int info) { EE::Profiler.EmitOp(eeOpcode::SQRT_F); - u8 *pjmp; + u8* pjmp; int roundmodeFlag = 0; int tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0); int t1reg = _allocTempXMMreg(XMMT_FPS, -1); @@ -950,21 +974,22 @@ void recSQRT_S_xmm(int info) // Set roundmode to nearest if it isn't already //Console.WriteLn("sqrt to nearest"); roundmode_nearest = g_sseMXCSR; - roundmode_nearest.SetRoundMode( SSEround_Nearest ); - xLDMXCSR (roundmode_nearest); + roundmode_nearest.SetRoundMode(SSEround_Nearest); + xLDMXCSR(roundmode_nearest); roundmodeFlag = 1; } GET_T(EEREC_D); - if (FPU_FLAGS_ID) { - xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI|FPUflagD)); // Clear I and D flags + if (FPU_FLAGS_ID) + { + xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags //--- Check for negative SQRT --- (sqrt(-0) = 0, unlike what the docs say) xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(EEREC_D)); - xAND(xRegister32(tempReg), 1); //Check sign + xAND(xRegister32(tempReg), 1); //Check sign pjmp = JZ8(0); //Skip if none are - xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI|FPUflagSI); // Set I and SI flags + xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags xAND.PS(xRegisterSSE(EEREC_D), ptr[&s_const.pos[0]]); // Make EEREC_D Positive x86SetJ8(pjmp); } @@ -980,15 +1005,14 @@ void recSQRT_S_xmm(int info) ToPS2FPU(EEREC_D, false, t1reg, false); - if (roundmodeFlag == 1) { - xLDMXCSR (g_sseMXCSR); - } + if (roundmodeFlag == 1) + xLDMXCSR(g_sseMXCSR); _freeX86reg(tempReg); _freeXMMreg(t1reg); } -FPURECOMPILE_CONSTCODE(SQRT_S, XMMINFO_WRITED|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(SQRT_S, XMMINFO_WRITED | XMMINFO_READT); //------------------------------------------------------------------ @@ -999,17 +1023,17 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg { u8 *pjmp1, *pjmp2; u8 *qjmp1, *qjmp2; - u32 *pjmp32; + u32* pjmp32; int t1reg = _allocTempXMMreg(XMMT_FPS, -1); int tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0); - xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI|FPUflagD)); // Clear I and D flags + xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags //--- (first) Check for negative SQRT --- xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(regt)); - xAND(xRegister32(tempReg), 1); //Check sign + xAND(xRegister32(tempReg), 1); //Check sign pjmp2 = JZ8(0); //Skip if not set - xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI|FPUflagSI); // Set I and SI flags + xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags xAND.PS(xRegisterSSE(regt), ptr[&s_const.pos[0]]); // Make regt Positive x86SetJ8(pjmp2); @@ -1017,19 +1041,19 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regt)); xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg)); - xAND(xRegister32(tempReg), 1); //Check sign (if regt == zero, sign will be set) + xAND(xRegister32(tempReg), 1); //Check sign (if regt == zero, sign will be set) pjmp1 = JZ8(0); //Skip if not set //--- Check for 0/0 --- xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regd)); xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg)); - xAND(xRegister32(tempReg), 1); //Check sign (if regd == zero, sign will be set) + xAND(xRegister32(tempReg), 1); //Check sign (if regd == zero, sign will be set) qjmp1 = JZ8(0); //Skip if not set - xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI|FPUflagSI); // Set I and SI flags ( 0/0 ) + xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags ( 0/0 ) qjmp2 = JMP8(0); x86SetJ8(qjmp1); //x/0 but not 0/0 - xOR(ptr32[&fpuRegs.fprc[31]], FPUflagD|FPUflagSD); // Set D and SD flags ( x/0 ) + xOR(ptr32[&fpuRegs.fprc[31]], FPUflagD | FPUflagSD); // Set D and SD flags ( x/0 ) x86SetJ8(qjmp2); SetMaxValue(regd); //clamp to max @@ -1075,8 +1099,8 @@ void recRSQRT_S_xmm(int info) // Set roundmode to nearest if it isn't already //Console.WriteLn("sqrt to nearest"); roundmode_nearest = g_sseMXCSR; - roundmode_nearest.SetRoundMode( SSEround_Nearest ); - xLDMXCSR (roundmode_nearest); + roundmode_nearest.SetRoundMode(SSEround_Nearest); + xLDMXCSR(roundmode_nearest); roundmodeFlag = true; } @@ -1091,11 +1115,16 @@ void recRSQRT_S_xmm(int info) _freeXMMreg(treg); _freeXMMreg(sreg); - if (roundmodeFlag) xLDMXCSR (g_sseMXCSR); + if (roundmodeFlag) + xLDMXCSR(g_sseMXCSR); } -FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); -} } } } } +} // namespace DOUBLE +} // namespace COP1 +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 #endif diff --git a/pcsx2/x86/iMMI.cpp b/pcsx2/x86/iMMI.cpp index 4e93aafc85..b337f45795 100644 --- a/pcsx2/x86/iMMI.cpp +++ b/pcsx2/x86/iMMI.cpp @@ -34,24 +34,23 @@ namespace Interp = R5900::Interpreter::OpcodeImpl::MMI; namespace R5900 { namespace Dynarec { namespace OpcodeImpl { -namespace MMI -{ +namespace MMI { #ifndef MMI_RECOMPILE -REC_FUNC_DEL( PLZCW, _Rd_ ); +REC_FUNC_DEL(PLZCW, _Rd_); -REC_FUNC_DEL( PMFHL, _Rd_ ); -REC_FUNC_DEL( PMTHL, _Rd_ ); +REC_FUNC_DEL(PMFHL, _Rd_); +REC_FUNC_DEL(PMTHL, _Rd_); -REC_FUNC_DEL( PSRLW, _Rd_ ); -REC_FUNC_DEL( PSRLH, _Rd_ ); +REC_FUNC_DEL(PSRLW, _Rd_); +REC_FUNC_DEL(PSRLH, _Rd_); -REC_FUNC_DEL( PSRAH, _Rd_ ); -REC_FUNC_DEL( PSRAW, _Rd_ ); +REC_FUNC_DEL(PSRAH, _Rd_); +REC_FUNC_DEL(PSRAW, _Rd_); -REC_FUNC_DEL( PSLLH, _Rd_ ); -REC_FUNC_DEL( PSLLW, _Rd_ ); +REC_FUNC_DEL(PSLLH, _Rd_); +REC_FUNC_DEL(PSLLW, _Rd_); #else @@ -59,11 +58,13 @@ void recPLZCW() { int regs = -1; - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PLZCW); - if( GPR_IS_CONST1(_Rs_) ) { + if (GPR_IS_CONST1(_Rs_)) + { _eeOnWriteReg(_Rd_, 0); _deleteEEreg(_Rd_, 0); GPR_SET_CONST(_Rd_); @@ -77,11 +78,13 @@ void recPLZCW() _eeOnWriteReg(_Rd_, 0); - if( (regs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0 ) { + if ((regs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0) + { xMOVD(eax, xRegisterSSE(regs)); } - else { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]]); + else + { + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); } _deleteEEreg(_Rd_, 0); @@ -97,58 +100,62 @@ void recPLZCW() // --- first word --- xMOV(ecx, 31); - xTEST(eax, eax); // TEST sets the sign flag accordingly. + xTEST(eax, eax); // TEST sets the sign flag accordingly. u8* label_notSigned = JNS8(0); xNOT(eax); x86SetJ8(label_notSigned); xBSR(eax, eax); - u8* label_Zeroed = JZ8(0); // If BSR sets the ZF, eax is "trash" + u8* label_Zeroed = JZ8(0); // If BSR sets the ZF, eax is "trash" xSUB(ecx, eax); - xDEC(ecx); // PS2 doesn't count the first bit + xDEC(ecx); // PS2 doesn't count the first bit x86SetJ8(label_Zeroed); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], ecx); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], ecx); // second word - if( regs >= 0) { - xPSHUF.D(xRegisterSSE(regs&0xf), xRegisterSSE(regs&0xf), 0xe1); - xMOVD(eax, xRegisterSSE(regs&0xf)); - xPSHUF.D(xRegisterSSE(regs&0xf), xRegisterSSE(regs&0xf), 0xe1); + if (regs >= 0) + { + xPSHUF.D(xRegisterSSE(regs & 0xf), xRegisterSSE(regs & 0xf), 0xe1); + xMOVD(eax, xRegisterSSE(regs & 0xf)); + xPSHUF.D(xRegisterSSE(regs & 0xf), xRegisterSSE(regs & 0xf), 0xe1); } - else { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ]]); + else + { + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); } xMOV(ecx, 31); - xTEST(eax, eax); // TEST sets the sign flag accordingly. + xTEST(eax, eax); // TEST sets the sign flag accordingly. label_notSigned = JNS8(0); xNOT(eax); x86SetJ8(label_notSigned); xBSR(eax, eax); - label_Zeroed = JZ8(0); // If BSR sets the ZF, eax is "trash" + label_Zeroed = JZ8(0); // If BSR sets the ZF, eax is "trash" xSUB(ecx, eax); - xDEC(ecx); // PS2 doesn't count the first bit + xDEC(ecx); // PS2 doesn't count the first bit x86SetJ8(label_Zeroed); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], ecx); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], ecx); GPR_DEL_CONST(_Rd_); } void recPMFHL() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PMFHL); - int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READLO|XMMINFO_READHI ); + int info = eeRecompileCodeXMM(XMMINFO_WRITED | XMMINFO_READLO | XMMINFO_READHI); int t0reg; - switch (_Sa_) { + switch (_Sa_) + { case 0x00: // LW t0reg = _allocTempXMMreg(XMMT_INT, -1); @@ -171,7 +178,7 @@ void recPMFHL() // fall to interp _deleteEEreg(_Rd_, 0); iFlushCall(FLUSH_INTERPRETER); // since calling CALLFunc - xFastCall((void*)(uptr)R5900::Interpreter::OpcodeImpl::MMI::PMFHL ); + xFastCall((void*)(uptr)R5900::Interpreter::OpcodeImpl::MMI::PMFHL); break; case 0x03: // LH @@ -187,11 +194,13 @@ void recPMFHL() break; case 0x04: // SH - if( EEREC_D == EEREC_HI ) { + if (EEREC_D == EEREC_HI) + { xPACK.SSDW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO)); xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D), 0x72); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO)); xPACK.SSDW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_HI)); @@ -209,11 +218,12 @@ void recPMFHL() void recPMTHL() { - if ( _Sa_ != 0 ) return; + if (_Sa_ != 0) + return; EE::Profiler.EmitOp(eeOpcode::PMTHL); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI ); + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READLO | XMMINFO_READHI | XMMINFO_WRITELO | XMMINFO_WRITEHI); xBLEND.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S), 0x5); xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S), 0xdd); @@ -225,17 +235,20 @@ void recPMTHL() //////////////////////////////////////////////////// void recPSRLH() { - if ( !_Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PSRLH); - int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); - if( (_Sa_&0xf) == 0 ) { + int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); + if ((_Sa_ & 0xf) == 0) + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - xPSRL.W(xRegisterSSE(EEREC_D), _Sa_&0xf ); + xPSRL.W(xRegisterSSE(EEREC_D), _Sa_ & 0xf); } _clearNeededXMMregs(); } @@ -243,17 +256,20 @@ void recPSRLH() //////////////////////////////////////////////////// void recPSRLW() { - if( !_Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PSRLW); - int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); - if( _Sa_ == 0 ) { + int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); + if (_Sa_ == 0) + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - xPSRL.D(xRegisterSSE(EEREC_D), _Sa_ ); + xPSRL.D(xRegisterSSE(EEREC_D), _Sa_); } _clearNeededXMMregs(); } @@ -261,17 +277,20 @@ void recPSRLW() //////////////////////////////////////////////////// void recPSRAH() { - if ( !_Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PSRAH); - int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); - if( (_Sa_&0xf) == 0 ) { + int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); + if ((_Sa_ & 0xf) == 0) + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - xPSRA.W(xRegisterSSE(EEREC_D), _Sa_&0xf ); + xPSRA.W(xRegisterSSE(EEREC_D), _Sa_ & 0xf); } _clearNeededXMMregs(); } @@ -279,17 +298,20 @@ void recPSRAH() //////////////////////////////////////////////////// void recPSRAW() { - if ( !_Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PSRAW); - int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); - if( _Sa_ == 0 ) { + int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); + if (_Sa_ == 0) + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - xPSRA.D(xRegisterSSE(EEREC_D), _Sa_ ); + xPSRA.D(xRegisterSSE(EEREC_D), _Sa_); } _clearNeededXMMregs(); } @@ -297,17 +319,20 @@ void recPSRAW() //////////////////////////////////////////////////// void recPSLLH() { - if ( !_Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PSLLH); - int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); - if( (_Sa_&0xf) == 0 ) { + int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); + if ((_Sa_ & 0xf) == 0) + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - xPSLL.W(xRegisterSSE(EEREC_D), _Sa_&0xf ); + xPSLL.W(xRegisterSSE(EEREC_D), _Sa_ & 0xf); } _clearNeededXMMregs(); } @@ -315,17 +340,20 @@ void recPSLLH() //////////////////////////////////////////////////// void recPSLLW() { - if ( !_Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PSLLW); - int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); - if( _Sa_ == 0 ) { + int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); + if (_Sa_ == 0) + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - xPSLL.D(xRegisterSSE(EEREC_D), _Sa_ ); + xPSLL.D(xRegisterSSE(EEREC_D), _Sa_); } _clearNeededXMMregs(); } @@ -352,50 +380,55 @@ void recPLZCW() *********************************************************/ #ifndef MMI0_RECOMPILE -REC_FUNC_DEL( PADDB, _Rd_); -REC_FUNC_DEL( PADDH, _Rd_); -REC_FUNC_DEL( PADDW, _Rd_); -REC_FUNC_DEL( PADDSB, _Rd_); -REC_FUNC_DEL( PADDSH, _Rd_); -REC_FUNC_DEL( PADDSW, _Rd_); -REC_FUNC_DEL( PSUBB, _Rd_); -REC_FUNC_DEL( PSUBH, _Rd_); -REC_FUNC_DEL( PSUBW, _Rd_); -REC_FUNC_DEL( PSUBSB, _Rd_); -REC_FUNC_DEL( PSUBSH, _Rd_); -REC_FUNC_DEL( PSUBSW, _Rd_); +REC_FUNC_DEL(PADDB, _Rd_); +REC_FUNC_DEL(PADDH, _Rd_); +REC_FUNC_DEL(PADDW, _Rd_); +REC_FUNC_DEL(PADDSB, _Rd_); +REC_FUNC_DEL(PADDSH, _Rd_); +REC_FUNC_DEL(PADDSW, _Rd_); +REC_FUNC_DEL(PSUBB, _Rd_); +REC_FUNC_DEL(PSUBH, _Rd_); +REC_FUNC_DEL(PSUBW, _Rd_); +REC_FUNC_DEL(PSUBSB, _Rd_); +REC_FUNC_DEL(PSUBSH, _Rd_); +REC_FUNC_DEL(PSUBSW, _Rd_); -REC_FUNC_DEL( PMAXW, _Rd_); -REC_FUNC_DEL( PMAXH, _Rd_); +REC_FUNC_DEL(PMAXW, _Rd_); +REC_FUNC_DEL(PMAXH, _Rd_); -REC_FUNC_DEL( PCGTW, _Rd_); -REC_FUNC_DEL( PCGTH, _Rd_); -REC_FUNC_DEL( PCGTB, _Rd_); +REC_FUNC_DEL(PCGTW, _Rd_); +REC_FUNC_DEL(PCGTH, _Rd_); +REC_FUNC_DEL(PCGTB, _Rd_); -REC_FUNC_DEL( PEXTLW, _Rd_); +REC_FUNC_DEL(PEXTLW, _Rd_); -REC_FUNC_DEL( PPACW, _Rd_); -REC_FUNC_DEL( PEXTLH, _Rd_); -REC_FUNC_DEL( PPACH, _Rd_); -REC_FUNC_DEL( PEXTLB, _Rd_); -REC_FUNC_DEL( PPACB, _Rd_); -REC_FUNC_DEL( PEXT5, _Rd_); -REC_FUNC_DEL( PPAC5, _Rd_); +REC_FUNC_DEL(PPACW, _Rd_); +REC_FUNC_DEL(PEXTLH, _Rd_); +REC_FUNC_DEL(PPACH, _Rd_); +REC_FUNC_DEL(PEXTLB, _Rd_); +REC_FUNC_DEL(PPACB, _Rd_); +REC_FUNC_DEL(PEXT5, _Rd_); +REC_FUNC_DEL(PPAC5, _Rd_); #else //////////////////////////////////////////////////// void recPMAXW() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PMAXW); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else if( EEREC_D == EEREC_S ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if ( EEREC_D == EEREC_T ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_S == EEREC_T) + xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else if (EEREC_D == EEREC_S) + xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -405,25 +438,30 @@ void recPMAXW() //////////////////////////////////////////////////// void recPPACW() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PPACW); - int info = eeRecompileCodeXMM( ((_Rs_!=0)?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM(((_Rs_ != 0) ? XMMINFO_READS : 0) | XMMINFO_READT | XMMINFO_WRITED); - if( _Rs_ == 0 ) { + if (_Rs_ == 0) + { xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); xPSRL.DQ(xRegisterSSE(EEREC_D), 8); } - else { + else + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); - if( EEREC_D == EEREC_T ) { + if (EEREC_D == EEREC_T) + { xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S), 0x88); xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); xPUNPCK.LQDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else { + else + { xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T), 0x88); xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S), 0x88); xPUNPCK.LQDQ(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D)); @@ -439,18 +477,21 @@ void recPPACW() void recPPACH() { - if (!_Rd_) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PPACH); - int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); - if( _Rs_ == 0 ) { + int info = eeRecompileCodeXMM((_Rs_ != 0 ? XMMINFO_READS : 0) | XMMINFO_READT | XMMINFO_WRITED); + if (_Rs_ == 0) + { xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); xPSHUF.HW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D), 0x88); xPSLL.DQ(xRegisterSSE(EEREC_D), 4); xPSRL.DQ(xRegisterSSE(EEREC_D), 8); } - else { + else + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xPSHUF.LW(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S), 0x88); xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); @@ -469,13 +510,16 @@ void recPPACH() //////////////////////////////////////////////////// void recPPACB() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PPACB); - int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); - if( _Rs_ == 0 ) { - if( _hasFreeXMMreg() ) { + int info = eeRecompileCodeXMM((_Rs_ != 0 ? XMMINFO_READS : 0) | XMMINFO_READT | XMMINFO_WRITED); + if (_Rs_ == 0) + { + if (_hasFreeXMMreg()) + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPSLL.W(xRegisterSSE(EEREC_D), 8); @@ -484,7 +528,8 @@ void recPPACB() xPACK.USWB(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPSLL.W(xRegisterSSE(EEREC_D), 8); xPSRL.W(xRegisterSSE(EEREC_D), 8); @@ -492,7 +537,8 @@ void recPPACB() xPSRL.DQ(xRegisterSSE(EEREC_D), 8); } } - else { + else + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); @@ -511,11 +557,12 @@ void recPPACB() //////////////////////////////////////////////////// void recPEXT5() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PEXT5); - int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -549,11 +596,12 @@ void recPEXT5() //////////////////////////////////////////////////// void recPPAC5() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PPAC5); - int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -589,14 +637,18 @@ void recPPAC5() //////////////////////////////////////////////////// void recPMAXH() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PMAXH); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D == EEREC_S ) xPMAX.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPMAX.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D == EEREC_S) + xPMAX.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPMAX.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPMAX.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -606,16 +658,19 @@ void recPMAXH() //////////////////////////////////////////////////// void recPCGTB() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PCGTB); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D != EEREC_T ) { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D != EEREC_T) + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPCMP.GTB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else { + else + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); @@ -628,16 +683,19 @@ void recPCGTB() //////////////////////////////////////////////////// void recPCGTH() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PCGTH); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D != EEREC_T ) { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D != EEREC_T) + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPCMP.GTW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else { + else + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); @@ -651,16 +709,19 @@ void recPCGTH() void recPCGTW() { //TODO:optimize RS | RT== 0 - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PCGTW); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D != EEREC_T ) { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D != EEREC_T) + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPCMP.GTD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else { + else + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); @@ -673,14 +734,18 @@ void recPCGTW() //////////////////////////////////////////////////// void recPADDSB() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PADDSB); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D == EEREC_S ) xPADD.SB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPADD.SB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D == EEREC_S) + xPADD.SB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPADD.SB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPADD.SB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -690,14 +755,18 @@ void recPADDSB() //////////////////////////////////////////////////// void recPADDSH() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PADDSH); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D == EEREC_S ) xPADD.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPADD.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D == EEREC_S) + xPADD.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPADD.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPADD.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -708,11 +777,12 @@ void recPADDSH() //NOTE: check kh2 movies if changing this void recPADDSW() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PADDSW); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t2reg = _allocTempXMMreg(XMMT_INT, -1); @@ -727,9 +797,12 @@ void recPADDSW() xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); // normal addition - if( EEREC_D == EEREC_S ) xPADD.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPADD.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + if (EEREC_D == EEREC_S) + xPADD.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPADD.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPADD.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -760,20 +833,24 @@ void recPADDSW() //////////////////////////////////////////////////// void recPSUBSB() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PSUBSB); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D == EEREC_S ) xPSUB.SB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D == EEREC_S) + xPSUB.SB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSUB.SB(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSUB.SB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -783,20 +860,24 @@ void recPSUBSB() //////////////////////////////////////////////////// void recPSUBSH() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PSUBSH); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D == EEREC_S ) xPSUB.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D == EEREC_S) + xPSUB.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSUB.SW(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSUB.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -807,11 +888,12 @@ void recPSUBSH() //NOTE: check kh2 movies if changing this void recPSUBSW() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PSUBSW); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t2reg = _allocTempXMMreg(XMMT_INT, -1); @@ -828,13 +910,16 @@ void recPSUBSW() xPSRL.D(xRegisterSSE(t1reg), 31); // normal subtraction - if( EEREC_D == EEREC_S ) xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) { + if (EEREC_D == EEREC_S) + xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + { xMOVDQA(xRegisterSSE(t2reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(t2reg)); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -864,14 +949,18 @@ void recPSUBSW() //////////////////////////////////////////////////// void recPADDB() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PADDB); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D == EEREC_S ) xPADD.B(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPADD.B(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D == EEREC_S) + xPADD.B(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPADD.B(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPADD.B(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -881,22 +970,31 @@ void recPADDB() //////////////////////////////////////////////////// void recPADDH() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PADDH); - int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED ); - if( _Rs_ == 0 ) { - if( _Rt_ == 0 ) xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - else xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + int info = eeRecompileCodeXMM((_Rs_ != 0 ? XMMINFO_READS : 0) | (_Rt_ != 0 ? XMMINFO_READT : 0) | XMMINFO_WRITED); + if (_Rs_ == 0) + { + if (_Rt_ == 0) + xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); + else + xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else if( _Rt_ == 0 ) { + else if (_Rt_ == 0) + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } - else { - if( EEREC_D == EEREC_S ) xPADD.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPADD.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + else + { + if (EEREC_D == EEREC_S) + xPADD.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPADD.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPADD.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -907,22 +1005,31 @@ void recPADDH() //////////////////////////////////////////////////// void recPADDW() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PADDW); - int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED ); - if( _Rs_ == 0 ) { - if( _Rt_ == 0 ) xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - else xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + int info = eeRecompileCodeXMM((_Rs_ != 0 ? XMMINFO_READS : 0) | (_Rt_ != 0 ? XMMINFO_READT : 0) | XMMINFO_WRITED); + if (_Rs_ == 0) + { + if (_Rt_ == 0) + xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); + else + xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else if( _Rt_ == 0 ) { + else if (_Rt_ == 0) + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } - else { - if( EEREC_D == EEREC_S ) xPADD.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPADD.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + else + { + if (EEREC_D == EEREC_S) + xPADD.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPADD.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPADD.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -933,20 +1040,24 @@ void recPADDW() //////////////////////////////////////////////////// void recPSUBB() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PSUBB); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D == EEREC_S ) xPSUB.B(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D == EEREC_S) + xPSUB.B(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSUB.B(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSUB.B(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -956,20 +1067,24 @@ void recPSUBB() //////////////////////////////////////////////////// void recPSUBH() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PSUBH); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D == EEREC_S ) xPSUB.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D == EEREC_S) + xPSUB.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSUB.W(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSUB.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -979,20 +1094,24 @@ void recPSUBH() //////////////////////////////////////////////////// void recPSUBW() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PSUBW); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D == EEREC_S ) xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D == EEREC_S) + xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -1002,25 +1121,31 @@ void recPSUBW() //////////////////////////////////////////////////// void recPEXTLW() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PEXTLW); - int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); - if( _Rs_ == 0 ) { + int info = eeRecompileCodeXMM((_Rs_ != 0 ? XMMINFO_READS : 0) | XMMINFO_READT | XMMINFO_WRITED); + if (_Rs_ == 0) + { xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPSRL.Q(xRegisterSSE(EEREC_D), 32); } - else { - if( EEREC_D == EEREC_T ) xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else if( EEREC_D == EEREC_S ) { + else + { + if (EEREC_D == EEREC_T) + xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else if (EEREC_D == EEREC_S) + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } @@ -1030,25 +1155,31 @@ void recPEXTLW() void recPEXTLB() { - if (!_Rd_) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PEXTLB); - int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); - if( _Rs_ == 0 ) { + int info = eeRecompileCodeXMM((_Rs_ != 0 ? XMMINFO_READS : 0) | XMMINFO_READT | XMMINFO_WRITED); + if (_Rs_ == 0) + { xPUNPCK.LBW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPSRL.W(xRegisterSSE(EEREC_D), 8); } - else { - if( EEREC_D == EEREC_T ) xPUNPCK.LBW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else if( EEREC_D == EEREC_S ) { + else + { + if (EEREC_D == EEREC_T) + xPUNPCK.LBW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else if (EEREC_D == EEREC_S) + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPUNPCK.LBW(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPUNPCK.LBW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } @@ -1058,25 +1189,31 @@ void recPEXTLB() void recPEXTLH() { - if (!_Rd_) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PEXTLH); - int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); - if( _Rs_ == 0 ) { + int info = eeRecompileCodeXMM((_Rs_ != 0 ? XMMINFO_READS : 0) | XMMINFO_READT | XMMINFO_WRITED); + if (_Rs_ == 0) + { xPUNPCK.LWD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPSRL.D(xRegisterSSE(EEREC_D), 16); } - else { - if( EEREC_D == EEREC_T ) xPUNPCK.LWD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else if( EEREC_D == EEREC_S ) { + else + { + if (EEREC_D == EEREC_T) + xPUNPCK.LWD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else if (EEREC_D == EEREC_S) + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPUNPCK.LWD(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPUNPCK.LWD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } @@ -1092,28 +1229,28 @@ void recPEXTLH() *********************************************************/ #ifndef MMI1_RECOMPILE -REC_FUNC_DEL( PABSW, _Rd_); -REC_FUNC_DEL( PABSH, _Rd_); +REC_FUNC_DEL(PABSW, _Rd_); +REC_FUNC_DEL(PABSH, _Rd_); -REC_FUNC_DEL( PMINW, _Rd_); -REC_FUNC_DEL( PADSBH, _Rd_); -REC_FUNC_DEL( PMINH, _Rd_); -REC_FUNC_DEL( PCEQB, _Rd_); -REC_FUNC_DEL( PCEQH, _Rd_); -REC_FUNC_DEL( PCEQW, _Rd_); +REC_FUNC_DEL(PMINW, _Rd_); +REC_FUNC_DEL(PADSBH, _Rd_); +REC_FUNC_DEL(PMINH, _Rd_); +REC_FUNC_DEL(PCEQB, _Rd_); +REC_FUNC_DEL(PCEQH, _Rd_); +REC_FUNC_DEL(PCEQW, _Rd_); -REC_FUNC_DEL( PADDUB, _Rd_); -REC_FUNC_DEL( PADDUH, _Rd_); -REC_FUNC_DEL( PADDUW, _Rd_); +REC_FUNC_DEL(PADDUB, _Rd_); +REC_FUNC_DEL(PADDUH, _Rd_); +REC_FUNC_DEL(PADDUW, _Rd_); -REC_FUNC_DEL( PSUBUB, _Rd_); -REC_FUNC_DEL( PSUBUH, _Rd_); -REC_FUNC_DEL( PSUBUW, _Rd_); +REC_FUNC_DEL(PSUBUB, _Rd_); +REC_FUNC_DEL(PSUBUH, _Rd_); +REC_FUNC_DEL(PSUBUW, _Rd_); -REC_FUNC_DEL( PEXTUW, _Rd_); -REC_FUNC_DEL( PEXTUH, _Rd_); -REC_FUNC_DEL( PEXTUB, _Rd_); -REC_FUNC_DEL( QFSRV, _Rd_); +REC_FUNC_DEL(PEXTUW, _Rd_); +REC_FUNC_DEL(PEXTUH, _Rd_); +REC_FUNC_DEL(PEXTUB, _Rd_); +REC_FUNC_DEL(QFSRV, _Rd_); #else @@ -1121,11 +1258,12 @@ REC_FUNC_DEL( QFSRV, _Rd_); void recPABSW() //needs clamping { - if( !_Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PABSW); - int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); int t0reg = _allocTempXMMreg(XMMT_INT, -1); xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPSLL.D(xRegisterSSE(t0reg), 31); @@ -1140,11 +1278,12 @@ void recPABSW() //needs clamping //////////////////////////////////////////////////// void recPABSH() { - if( !_Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PABSH); - int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); int t0reg = _allocTempXMMreg(XMMT_INT, -1); xPCMP.EQW(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPSLL.W(xRegisterSSE(t0reg), 15); @@ -1158,15 +1297,20 @@ void recPABSH() //////////////////////////////////////////////////// void recPMINW() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PMINW); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else if( EEREC_D == EEREC_S ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if ( EEREC_D == EEREC_T ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_S == EEREC_T) + xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else if (EEREC_D == EEREC_S) + xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -1176,30 +1320,35 @@ void recPMINW() //////////////////////////////////////////////////// void recPADSBH() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PADSBH); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); int t0reg; - if( EEREC_S == EEREC_T ) { + if (EEREC_S == EEREC_T) + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPADD.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); // reset lower bits to 0s xPSRL.DQ(xRegisterSSE(EEREC_D), 8); xPSLL.DQ(xRegisterSSE(EEREC_D), 8); } - else { + else + { t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); - if( EEREC_D == EEREC_S ) { + if (EEREC_D == EEREC_S) + { xPADD.W(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xPSUB.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSUB.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPADD.W(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); @@ -1217,22 +1366,28 @@ void recPADSBH() //////////////////////////////////////////////////// void recPADDUW() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PADDUW); - int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM((_Rs_ ? XMMINFO_READS : 0) | (_Rt_ ? XMMINFO_READT : 0) | XMMINFO_WRITED); - if( _Rt_ == 0 ) { - if( _Rs_ == 0 ) { + if (_Rt_ == 0) + { + if (_Rs_ == 0) + { xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } - else xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } - else if( _Rs_ == 0 ) { + else if (_Rs_ == 0) + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else { + else + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -1242,9 +1397,12 @@ void recPADDUW() xPXOR(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); // invert MSB of Rs (for unsigned comparison) // normal 32-bit addition - if( EEREC_D == EEREC_S ) xPADD.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPADD.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + if (EEREC_D == EEREC_S) + xPADD.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPADD.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPADD.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -1265,20 +1423,24 @@ void recPADDUW() //////////////////////////////////////////////////// void recPSUBUB() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PSUBUB); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D == EEREC_S ) xPSUB.USB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D == EEREC_S) + xPSUB.USB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSUB.USB(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSUB.USB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -1288,20 +1450,24 @@ void recPSUBUB() //////////////////////////////////////////////////// void recPSUBUH() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PSUBUH); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D == EEREC_S ) xPSUB.USW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D == EEREC_S) + xPSUB.USW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSUB.USW(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSUB.USW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -1311,11 +1477,12 @@ void recPSUBUH() //////////////////////////////////////////////////// void recPSUBUW() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PSUBUW); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -1324,20 +1491,23 @@ void recPSUBUW() // normal 32-bit subtraction // and invert MSB of Rs and Rt (for unsigned comparison) - if( EEREC_D == EEREC_S ) { + if (EEREC_D == EEREC_S) + { xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(t0reg)); xPXOR(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xPXOR(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else if( EEREC_D == EEREC_T ) { + else if (EEREC_D == EEREC_T) + { xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); xPXOR(xRegisterSSE(t1reg), xRegisterSSE(t0reg)); xPXOR(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(t0reg)); @@ -1359,25 +1529,31 @@ void recPSUBUW() //////////////////////////////////////////////////// void recPEXTUH() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PEXTUH); - int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); - if( _Rs_ == 0 ) { + int info = eeRecompileCodeXMM((_Rs_ != 0 ? XMMINFO_READS : 0) | XMMINFO_READT | XMMINFO_WRITED); + if (_Rs_ == 0) + { xPUNPCK.HWD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPSRL.D(xRegisterSSE(EEREC_D), 16); } - else { - if( EEREC_D == EEREC_T ) xPUNPCK.HWD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else if( EEREC_D == EEREC_S ) { + else + { + if (EEREC_D == EEREC_T) + xPUNPCK.HWD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else if (EEREC_D == EEREC_S) + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPUNPCK.HWD(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPUNPCK.HWD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } @@ -1389,12 +1565,14 @@ static __aligned16 u32 tempqw[8]; void recQFSRV() { - if ( !_Rd_ ) return; + if (!_Rd_) + return; //Console.WriteLn("recQFSRV()"); EE::Profiler.EmitOp(eeOpcode::QFSRV); - if (_Rs_ == _Rt_ + 1) { + if (_Rs_ == _Rt_ + 1) + { _flushEEreg(_Rs_); _flushEEreg(_Rt_); int info = eeRecompileCodeXMM(XMMINFO_WRITED); @@ -1404,13 +1582,13 @@ void recQFSRV() xMOVDQU(xRegisterSSE(EEREC_D), ptr32[rax + rcx]); return; } - - int info = eeRecompileCodeXMM( XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED ); + + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); xMOV(eax, ptr32[&cpuRegs.sa]); xLEA(rcx, ptr[tempqw]); xMOVDQA(ptr32[rcx], xRegisterSSE(EEREC_T)); - xMOVDQA(ptr32[rcx+16], xRegisterSSE(EEREC_S)); + xMOVDQA(ptr32[rcx + 16], xRegisterSSE(EEREC_S)); xMOVDQU(xRegisterSSE(EEREC_D), ptr32[rax + rcx]); _clearNeededXMMregs(); @@ -1419,26 +1597,32 @@ void recQFSRV() void recPEXTUB() { - if (!_Rd_) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PEXTUB); - int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM((_Rs_ != 0 ? XMMINFO_READS : 0) | XMMINFO_READT | XMMINFO_WRITED); - if( _Rs_ == 0 ) { + if (_Rs_ == 0) + { xPUNPCK.HBW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPSRL.W(xRegisterSSE(EEREC_D), 8); } - else { - if( EEREC_D == EEREC_T ) xPUNPCK.HBW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else if( EEREC_D == EEREC_S ) { + else + { + if (EEREC_D == EEREC_T) + xPUNPCK.HBW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else if (EEREC_D == EEREC_S) + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPUNPCK.HBW(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPUNPCK.HBW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } @@ -1449,25 +1633,31 @@ void recPEXTUB() //////////////////////////////////////////////////// void recPEXTUW() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PEXTUW); - int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); - if( _Rs_ == 0 ) { + int info = eeRecompileCodeXMM((_Rs_ != 0 ? XMMINFO_READS : 0) | XMMINFO_READT | XMMINFO_WRITED); + if (_Rs_ == 0) + { xPUNPCK.HDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPSRL.Q(xRegisterSSE(EEREC_D), 32); } - else { - if( EEREC_D == EEREC_T ) xPUNPCK.HDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else if( EEREC_D == EEREC_S ) { + else + { + if (EEREC_D == EEREC_T) + xPUNPCK.HDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else if (EEREC_D == EEREC_S) + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPUNPCK.HDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPUNPCK.HDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } @@ -1478,14 +1668,18 @@ void recPEXTUW() //////////////////////////////////////////////////// void recPMINH() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PMINH); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D == EEREC_S ) xPMIN.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPMIN.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D == EEREC_S) + xPMIN.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPMIN.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPMIN.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -1495,14 +1689,18 @@ void recPMINH() //////////////////////////////////////////////////// void recPCEQB() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PCEQB); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D == EEREC_S ) xPCMP.EQB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPCMP.EQB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D == EEREC_S) + xPCMP.EQB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPCMP.EQB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPCMP.EQB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -1512,14 +1710,18 @@ void recPCEQB() //////////////////////////////////////////////////// void recPCEQH() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PCEQH); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D == EEREC_S ) xPCMP.EQW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPCMP.EQW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D == EEREC_S) + xPCMP.EQW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPCMP.EQW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPCMP.EQW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -1529,14 +1731,18 @@ void recPCEQH() //////////////////////////////////////////////////// void recPCEQW() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PCEQW); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D == EEREC_S ) xPCMP.EQD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPCMP.EQD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D == EEREC_S) + xPCMP.EQD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPCMP.EQD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPCMP.EQD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -1546,34 +1752,44 @@ void recPCEQW() //////////////////////////////////////////////////// void recPADDUB() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PADDUB); - int info = eeRecompileCodeXMM( XMMINFO_READS|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED ); - if( _Rt_ ) { - if( EEREC_D == EEREC_S ) xPADD.USB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPADD.USB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + int info = eeRecompileCodeXMM(XMMINFO_READS | (_Rt_ ? XMMINFO_READT : 0) | XMMINFO_WRITED); + if (_Rt_) + { + if (EEREC_D == EEREC_S) + xPADD.USB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPADD.USB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPADD.USB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } } - else xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); _clearNeededXMMregs(); } //////////////////////////////////////////////////// void recPADDUH() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PADDUH); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D == EEREC_S ) xPADD.USW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPADD.USW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D == EEREC_S) + xPADD.USW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPADD.USW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPADD.USW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -1587,29 +1803,29 @@ void recPADDUH() *********************************************************/ #ifndef MMI2_RECOMPILE -REC_FUNC_DEL( PMFHI, _Rd_); -REC_FUNC_DEL( PMFLO, _Rd_); -REC_FUNC_DEL( PCPYLD, _Rd_); -REC_FUNC_DEL( PAND, _Rd_); -REC_FUNC_DEL( PXOR, _Rd_); +REC_FUNC_DEL(PMFHI, _Rd_); +REC_FUNC_DEL(PMFLO, _Rd_); +REC_FUNC_DEL(PCPYLD, _Rd_); +REC_FUNC_DEL(PAND, _Rd_); +REC_FUNC_DEL(PXOR, _Rd_); -REC_FUNC_DEL( PMADDW, _Rd_); -REC_FUNC_DEL( PSLLVW, _Rd_); -REC_FUNC_DEL( PSRLVW, _Rd_); -REC_FUNC_DEL( PMSUBW, _Rd_); -REC_FUNC_DEL( PINTH, _Rd_); -REC_FUNC_DEL( PMULTW, _Rd_); -REC_FUNC_DEL( PDIVW, _Rd_); -REC_FUNC_DEL( PMADDH, _Rd_); -REC_FUNC_DEL( PHMADH, _Rd_); -REC_FUNC_DEL( PMSUBH, _Rd_); -REC_FUNC_DEL( PHMSBH, _Rd_); -REC_FUNC_DEL( PEXEH, _Rd_); -REC_FUNC_DEL( PREVH, _Rd_); -REC_FUNC_DEL( PMULTH, _Rd_); -REC_FUNC_DEL( PDIVBW, _Rd_); -REC_FUNC_DEL( PEXEW, _Rd_); -REC_FUNC_DEL( PROT3W, _Rd_ ); +REC_FUNC_DEL(PMADDW, _Rd_); +REC_FUNC_DEL(PSLLVW, _Rd_); +REC_FUNC_DEL(PSRLVW, _Rd_); +REC_FUNC_DEL(PMSUBW, _Rd_); +REC_FUNC_DEL(PINTH, _Rd_); +REC_FUNC_DEL(PMULTW, _Rd_); +REC_FUNC_DEL(PDIVW, _Rd_); +REC_FUNC_DEL(PMADDH, _Rd_); +REC_FUNC_DEL(PHMADH, _Rd_); +REC_FUNC_DEL(PMSUBH, _Rd_); +REC_FUNC_DEL(PHMSBH, _Rd_); +REC_FUNC_DEL(PEXEH, _Rd_); +REC_FUNC_DEL(PREVH, _Rd_); +REC_FUNC_DEL(PMULTH, _Rd_); +REC_FUNC_DEL(PDIVBW, _Rd_); +REC_FUNC_DEL(PEXEW, _Rd_); +REC_FUNC_DEL(PROT3W, _Rd_); #else @@ -1618,36 +1834,48 @@ void recPMADDW() { EE::Profiler.EmitOp(eeOpcode::PMADDW); - int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI ); + int info = eeRecompileCodeXMM((((_Rs_) && (_Rt_)) ? XMMINFO_READS : 0) | (((_Rs_) && (_Rt_)) ? XMMINFO_READT : 0) | (_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_WRITELO | XMMINFO_WRITEHI | XMMINFO_READLO | XMMINFO_READHI); xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88); xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]} - if( _Rd_ ) { - if( !_Rs_ || !_Rt_ ) xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - else if( EEREC_D == EEREC_S ) xPMUL.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPMUL.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + if (_Rd_) + { + if (!_Rs_ || !_Rt_) + xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); + else if (EEREC_D == EEREC_S) + xPMUL.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPMUL.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPMUL.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } } - else { - if( !_Rs_ || !_Rt_ ) xPXOR(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI)); - else { + else + { + if (!_Rs_ || !_Rt_) + xPXOR(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI)); + else + { xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S)); xPMUL.DQ(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_T)); } } // add from LO/HI - if ( _Rd_ ) xPADD.Q(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO)); - else xPADD.Q(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_LO)); + if (_Rd_) + xPADD.Q(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO)); + else + xPADD.Q(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_LO)); // interleave & sign extend - if ( _Rd_ ) { + if (_Rd_) + { xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_D), 0x88); xPSHUF.D(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_D), 0xdd); } - else { + else + { xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88); xPSHUF.D(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0xdd); } @@ -1659,35 +1887,42 @@ void recPMADDW() //////////////////////////////////////////////////// void recPSLLVW() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PSLLVW); - int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED ); - if( _Rs_ == 0 ) { - if( _Rt_ == 0 ) { + int info = eeRecompileCodeXMM((_Rs_ ? XMMINFO_READS : 0) | (_Rt_ ? XMMINFO_READT : 0) | XMMINFO_WRITED); + if (_Rs_ == 0) + { + if (_Rt_ == 0) + { xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } - else { + else + { xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } } - else if( _Rt_ == 0 ) { + else if (_Rt_ == 0) + { xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } - else { + else + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); // shamt is 5-bit xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); - xPSLL.Q(xRegisterSSE(t0reg), 27+32); - xPSRL.Q(xRegisterSSE(t0reg), 27+32); + xPSLL.Q(xRegisterSSE(t0reg), 27 + 32); + xPSRL.Q(xRegisterSSE(t0reg), 27 + 32); // EEREC_D[0] <- Rt[0], t1reg[0] <- Rt[2] xMOVHL.PS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); - if( EEREC_D != EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + if (EEREC_D != EEREC_T) + xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); // shift (left) Rt[0] xPSLL.D(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); @@ -1709,35 +1944,42 @@ void recPSLLVW() //////////////////////////////////////////////////// void recPSRLVW() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PSRLVW); - int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED ); - if( _Rs_ == 0 ) { - if( _Rt_ == 0 ) { + int info = eeRecompileCodeXMM((_Rs_ ? XMMINFO_READS : 0) | (_Rt_ ? XMMINFO_READT : 0) | XMMINFO_WRITED); + if (_Rs_ == 0) + { + if (_Rt_ == 0) + { xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } - else { + else + { xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } } - else if( _Rt_ == 0 ) { + else if (_Rt_ == 0) + { xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } - else { + else + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); // shamt is 5-bit xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); - xPSLL.Q(xRegisterSSE(t0reg), 27+32); - xPSRL.Q(xRegisterSSE(t0reg), 27+32); + xPSLL.Q(xRegisterSSE(t0reg), 27 + 32); + xPSRL.Q(xRegisterSSE(t0reg), 27 + 32); // EEREC_D[0] <- Rt[0], t1reg[0] <- Rt[2] xMOVHL.PS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); - if( EEREC_D != EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + if (EEREC_D != EEREC_T) + xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); // shift (right logical) Rt[0] xPSRL.D(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); @@ -1761,42 +2003,54 @@ void recPMSUBW() { EE::Profiler.EmitOp(eeOpcode::PMSUBW); - int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI ); + int info = eeRecompileCodeXMM((((_Rs_) && (_Rt_)) ? XMMINFO_READS : 0) | (((_Rs_) && (_Rt_)) ? XMMINFO_READT : 0) | (_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_WRITELO | XMMINFO_WRITEHI | XMMINFO_READLO | XMMINFO_READHI); xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88); xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]} - if( _Rd_ ) { - if( !_Rs_ || !_Rt_ ) xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - else if( EEREC_D == EEREC_S ) xPMUL.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPMUL.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + if (_Rd_) + { + if (!_Rs_ || !_Rt_) + xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); + else if (EEREC_D == EEREC_S) + xPMUL.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPMUL.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPMUL.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } } - else { - if( !_Rs_ || !_Rt_ ) xPXOR(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI)); - else { + else + { + if (!_Rs_ || !_Rt_) + xPXOR(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI)); + else + { xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S)); xPMUL.DQ(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_T)); } } // sub from LO/HI - if ( _Rd_ ) { + if (_Rd_) + { xPSUB.Q(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_D)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO)); } - else { + else + { xPSUB.Q(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI)); xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_LO)); } // interleave & sign extend - if ( _Rd_ ) { + if (_Rd_) + { xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_D), 0x88); xPSHUF.D(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_D), 0xdd); } - else { + else + { xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88); xPSHUF.D(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0xdd); } @@ -1810,32 +2064,42 @@ void recPMULTW() { EE::Profiler.EmitOp(eeOpcode::PMULTW); - int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI ); - if( !_Rs_ || !_Rt_ ) { - if( _Rd_ ) xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); + int info = eeRecompileCodeXMM((((_Rs_) && (_Rt_)) ? XMMINFO_READS : 0) | (((_Rs_) && (_Rt_)) ? XMMINFO_READT : 0) | (_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_WRITELO | XMMINFO_WRITEHI); + if (!_Rs_ || !_Rt_) + { + if (_Rd_) + xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); xPXOR(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO)); xPXOR(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI)); } - else { - if( _Rd_ ) { - if( EEREC_D == EEREC_S ) xPMUL.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPMUL.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + else + { + if (_Rd_) + { + if (EEREC_D == EEREC_S) + xPMUL.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPMUL.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPMUL.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S)); xPMUL.DQ(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_T)); } // interleave & sign extend - if ( _Rd_ ) { + if (_Rd_) + { xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_D), 0x88); xPSHUF.D(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_D), 0xdd); } - else { + else + { xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88); xPSHUF.D(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0xdd); } @@ -1870,7 +2134,7 @@ void recPHMADH() { EE::Profiler.EmitOp(eeOpcode::PHMADH); - int info = eeRecompileCodeXMM( (_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI ); + int info = eeRecompileCodeXMM((_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITELO | XMMINFO_WRITEHI); int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); @@ -1878,20 +2142,25 @@ void recPHMADH() xPSLL.D(xRegisterSSE(t0reg), 16); xPMADD.WD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); - if( _Rd_ ) { - if( EEREC_D == EEREC_S ) { + if (_Rd_) + { + if (EEREC_D == EEREC_S) + { xPMADD.WD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else if( EEREC_D == EEREC_T ) { + else if (EEREC_D == EEREC_T) + { xPMADD.WD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPMADD.WD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } xMOVDQA(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_D)); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_T)); xPMADD.WD(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S)); } @@ -1912,11 +2181,12 @@ void recPMSUBH() { EE::Profiler.EmitOp(eeOpcode::PMSUBH); - int info = eeRecompileCodeXMM( (_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI ); + int info = eeRecompileCodeXMM((_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_READS | XMMINFO_READT | XMMINFO_READLO | XMMINFO_READHI | XMMINFO_WRITELO | XMMINFO_WRITEHI); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); - if( !_Rd_ ) { + if (!_Rd_) + { xPXOR(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPSHUF.D(xRegisterSSE(t1reg), xRegisterSSE(EEREC_S), 0xd8); //S0, S1, S4, S5, S2, S3, S6, S7 xPUNPCK.LWD(xRegisterSSE(t1reg), xRegisterSSE(t0reg)); //S0, 0, S1, 0, S4, 0, S5, 0 @@ -1935,7 +2205,8 @@ void recPMSUBH() xPSUB.D(xRegisterSSE(EEREC_HI), xRegisterSSE(t0reg)); } - else { + else + { xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_S)); @@ -1976,7 +2247,7 @@ void recPHMSBH() { EE::Profiler.EmitOp(eeOpcode::PHMSBH); - int info = eeRecompileCodeXMM( (_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI ); + int info = eeRecompileCodeXMM((_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITELO | XMMINFO_WRITEHI); int t0reg = _allocTempXMMreg(XMMT_INT, -1); xPCMP.EQD(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO)); @@ -1989,7 +2260,8 @@ void recPHMSBH() xPMADD.WD(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_LO)); xPSUB.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI)); - if( _Rd_ ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO)); + if (_Rd_) + xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO)); xPCMP.EQD(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI)); xPXOR(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI)); @@ -2009,11 +2281,12 @@ void recPHMSBH() //////////////////////////////////////////////////// void recPEXEH() { - if (!_Rd_) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PEXEH); - int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0xc6); xPSHUF.HW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D), 0xc6); _clearNeededXMMregs(); @@ -2022,11 +2295,12 @@ void recPEXEH() //////////////////////////////////////////////////// void recPREVH() { - if (!_Rd_) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PREVH); - int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x1B); xPSHUF.HW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D), 0x1B); _clearNeededXMMregs(); @@ -2035,19 +2309,23 @@ void recPREVH() //////////////////////////////////////////////////// void recPINTH() { - if (!_Rd_) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PINTH); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if( EEREC_D == EEREC_S ) { + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); + if (EEREC_D == EEREC_S) + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVHL.PS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); - if( EEREC_D != EEREC_T ) xMOVQZX(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + if (EEREC_D != EEREC_T) + xMOVQZX(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPUNPCK.LWD(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else { + else + { xMOVLH.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPUNPCK.HWD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } @@ -2056,22 +2334,24 @@ void recPINTH() void recPEXEW() { - if (!_Rd_) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PEXEW); - int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0xc6); _clearNeededXMMregs(); } void recPROT3W() { - if (!_Rd_) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PROT3W); - int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0xc9); _clearNeededXMMregs(); } @@ -2080,7 +2360,7 @@ void recPMULTH() { EE::Profiler.EmitOp(eeOpcode::PMULTH); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI ); + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_WRITELO | XMMINFO_WRITEHI); int t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S)); @@ -2095,7 +2375,8 @@ void recPMULTH() // 4-7 xPUNPCK.HWD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI)); - if( _Rd_ ) { + if (_Rd_) + { // 0,2,4,6, L->H xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO), 0x88); xPSHUF.D(xRegisterSSE(EEREC_HI), xRegisterSSE(t0reg), 0x88); @@ -2115,11 +2396,12 @@ void recPMULTH() void recPMFHI() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PMFHI); - int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READHI ); + int info = eeRecompileCodeXMM(XMMINFO_WRITED | XMMINFO_READHI); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_HI)); _clearNeededXMMregs(); } @@ -2127,11 +2409,12 @@ void recPMFHI() //////////////////////////////////////////////////// void recPMFLO() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PMFLO); - int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READLO ); + int info = eeRecompileCodeXMM(XMMINFO_WRITED | XMMINFO_READLO); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO)); _clearNeededXMMregs(); } @@ -2139,18 +2422,22 @@ void recPMFLO() //////////////////////////////////////////////////// void recPAND() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PAND); - int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT ); - if( EEREC_D == EEREC_T ) { + int info = eeRecompileCodeXMM(XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); + if (EEREC_D == EEREC_T) + { xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } - else if( EEREC_D == EEREC_S ) { + else if (EEREC_D == EEREC_S) + { xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -2160,18 +2447,22 @@ void recPAND() //////////////////////////////////////////////////// void recPXOR() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PXOR); - int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT ); - if( EEREC_D == EEREC_T ) { + int info = eeRecompileCodeXMM(XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); + if (EEREC_D == EEREC_T) + { xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } - else if( EEREC_D == EEREC_S ) { + else if (EEREC_D == EEREC_S) + { xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -2181,22 +2472,29 @@ void recPXOR() //////////////////////////////////////////////////// void recPCPYLD() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PCPYLD); - int info = eeRecompileCodeXMM( XMMINFO_WRITED|(( _Rs_== 0) ? 0:XMMINFO_READS)|XMMINFO_READT ); - if( _Rs_ == 0 ) { + int info = eeRecompileCodeXMM(XMMINFO_WRITED | ((_Rs_ == 0) ? 0 : XMMINFO_READS) | XMMINFO_READT); + if (_Rs_ == 0) + { xMOVQZX(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else { - if( EEREC_D == EEREC_T ) xPUNPCK.LQDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else if( EEREC_S == EEREC_T ) xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S), 0x44); - else if( EEREC_D == EEREC_S ) { + else + { + if (EEREC_D == EEREC_T) + xPUNPCK.LQDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else if (EEREC_S == EEREC_T) + xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S), 0x44); + else if (EEREC_D == EEREC_S) + { xPUNPCK.LQDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D), 0x4e); } - else { + else + { xMOVQZX(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPUNPCK.LQDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } @@ -2208,11 +2506,12 @@ void recPMADDH() { EE::Profiler.EmitOp(eeOpcode::PMADDH); - int info = eeRecompileCodeXMM( (_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI ); + int info = eeRecompileCodeXMM((_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_READS | XMMINFO_READT | XMMINFO_READLO | XMMINFO_READHI | XMMINFO_WRITELO | XMMINFO_WRITEHI); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); - if( !_Rd_ ) { + if (!_Rd_) + { xPXOR(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPSHUF.D(xRegisterSSE(t1reg), xRegisterSSE(EEREC_S), 0xd8); //S0, S1, S4, S5, S2, S3, S6, S7 xPUNPCK.LWD(xRegisterSSE(t1reg), xRegisterSSE(t0reg)); //S0, 0, S1, 0, S4, 0, S5, 0 @@ -2231,7 +2530,8 @@ void recPMADDH() xPADD.D(xRegisterSSE(EEREC_HI), xRegisterSSE(t0reg)); } - else { + else + { xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_S)); @@ -2272,19 +2572,19 @@ void recPMADDH() *********************************************************/ #ifndef MMI3_RECOMPILE -REC_FUNC_DEL( PMADDUW, _Rd_); -REC_FUNC_DEL( PSRAVW, _Rd_); -REC_FUNC_DEL( PMTHI, _Rd_); -REC_FUNC_DEL( PMTLO, _Rd_); -REC_FUNC_DEL( PINTEH, _Rd_); -REC_FUNC_DEL( PMULTUW, _Rd_); -REC_FUNC_DEL( PDIVUW, _Rd_); -REC_FUNC_DEL( PCPYUD, _Rd_); -REC_FUNC_DEL( POR, _Rd_); -REC_FUNC_DEL( PNOR, _Rd_); -REC_FUNC_DEL( PCPYH, _Rd_); -REC_FUNC_DEL( PEXCW, _Rd_); -REC_FUNC_DEL( PEXCH, _Rd_); +REC_FUNC_DEL(PMADDUW, _Rd_); +REC_FUNC_DEL(PSRAVW, _Rd_); +REC_FUNC_DEL(PMTHI, _Rd_); +REC_FUNC_DEL(PMTLO, _Rd_); +REC_FUNC_DEL(PINTEH, _Rd_); +REC_FUNC_DEL(PMULTUW, _Rd_); +REC_FUNC_DEL(PDIVUW, _Rd_); +REC_FUNC_DEL(PCPYUD, _Rd_); +REC_FUNC_DEL(POR, _Rd_); +REC_FUNC_DEL(PNOR, _Rd_); +REC_FUNC_DEL(PCPYH, _Rd_); +REC_FUNC_DEL(PEXCW, _Rd_); +REC_FUNC_DEL(PEXCH, _Rd_); #else @@ -2293,35 +2593,42 @@ REC_FUNC_DEL( PEXCH, _Rd_); void recPSRAVW() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PSRAVW); - int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED ); - if( _Rs_ == 0 ) { - if( _Rt_ == 0 ) { + int info = eeRecompileCodeXMM((_Rs_ ? XMMINFO_READS : 0) | (_Rt_ ? XMMINFO_READT : 0) | XMMINFO_WRITED); + if (_Rs_ == 0) + { + if (_Rt_ == 0) + { xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } - else { + else + { xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } } - else if( _Rt_ == 0 ) { + else if (_Rt_ == 0) + { xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } - else { + else + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); // shamt is 5-bit xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); - xPSLL.Q(xRegisterSSE(t0reg), 27+32); - xPSRL.Q(xRegisterSSE(t0reg), 27+32); + xPSLL.Q(xRegisterSSE(t0reg), 27 + 32); + xPSRL.Q(xRegisterSSE(t0reg), 27 + 32); // EEREC_D[0] <- Rt[0], t1reg[0] <- Rt[2] xMOVHL.PS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); - if( EEREC_D != EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + if (EEREC_D != EEREC_T) + xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); // shift (right arithmetic) Rt[0] xPSRA.D(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); @@ -2331,11 +2638,13 @@ void recPSRAVW() xPSRA.D(xRegisterSSE(t1reg), xRegisterSSE(t0reg)); // merge & sign extend - if ( x86caps.hasStreamingSIMD4Extensions ) { + if (x86caps.hasStreamingSIMD4Extensions) + { xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } - else { + else + { xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D)); xPSRA.D(xRegisterSSE(t0reg), 31); // get the signs @@ -2351,38 +2660,46 @@ void recPSRAVW() //////////////////////////////////////////////////// -static const __aligned16 u32 s_tempPINTEH[4] = {0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff }; +static const __aligned16 u32 s_tempPINTEH[4] = {0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff}; void recPINTEH() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PINTEH); - int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM((_Rs_ ? XMMINFO_READS : 0) | (_Rt_ ? XMMINFO_READT : 0) | XMMINFO_WRITED); int t0reg = -1; - if( _Rs_ == 0 ) { - if( _Rt_ == 0 ) { + if (_Rs_ == 0) + { + if (_Rt_ == 0) + { xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPAND(xRegisterSSE(EEREC_D), ptr[s_tempPINTEH]); } } - else if( _Rt_ == 0 ) { + else if (_Rt_ == 0) + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSLL.D(xRegisterSSE(EEREC_D), 16); } - else { - if( EEREC_S == EEREC_T ) { + else + { + if (EEREC_S == EEREC_T) + { xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S), 0xa0); xPSHUF.HW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D), 0xa0); } - else if( EEREC_D == EEREC_T ) { - pxAssert( EEREC_D != EEREC_S ); + else if (EEREC_D == EEREC_T) + { + pxAssert(EEREC_D != EEREC_S); t0reg = _allocTempXMMreg(XMMT_INT, -1); xPSLL.D(xRegisterSSE(EEREC_D), 16); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); @@ -2390,7 +2707,8 @@ void recPINTEH() xPSLL.D(xRegisterSSE(t0reg), 16); xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); } - else { + else + { t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); @@ -2401,7 +2719,8 @@ void recPINTEH() } } - if( t0reg >= 0 ) _freeXMMreg(t0reg); + if (t0reg >= 0) + _freeXMMreg(t0reg); _clearNeededXMMregs(); } @@ -2410,35 +2729,45 @@ void recPMULTUW() { EE::Profiler.EmitOp(eeOpcode::PMULTUW); - int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI ); - if( !_Rs_ || !_Rt_ ) { - if( _Rd_ ) xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); + int info = eeRecompileCodeXMM((((_Rs_) && (_Rt_)) ? XMMINFO_READS : 0) | (((_Rs_) && (_Rt_)) ? XMMINFO_READT : 0) | (_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_WRITELO | XMMINFO_WRITEHI); + if (!_Rs_ || !_Rt_) + { + if (_Rd_) + xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); xPXOR(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO)); xPXOR(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI)); } - else { - if( _Rd_ ) { - if( EEREC_D == EEREC_S ) xPMUL.UDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPMUL.UDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + else + { + if (_Rd_) + { + if (EEREC_D == EEREC_S) + xPMUL.UDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPMUL.UDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPMUL.UDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_D)); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S)); xPMUL.UDQ(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_T)); } // interleave & sign extend - if ( x86caps.hasStreamingSIMD4Extensions ) { + if (x86caps.hasStreamingSIMD4Extensions) + { xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88); xPSHUF.D(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0xdd); xPMOVSX.DQ(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO)); xPMOVSX.DQ(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI)); } - else { + else + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI), 0xd8); xMOVDQA(xRegisterSSE(EEREC_LO), xRegisterSSE(t0reg)); @@ -2458,41 +2787,53 @@ void recPMADDUW() { EE::Profiler.EmitOp(eeOpcode::PMADDUW); - int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI ); + int info = eeRecompileCodeXMM((((_Rs_) && (_Rt_)) ? XMMINFO_READS : 0) | (((_Rs_) && (_Rt_)) ? XMMINFO_READT : 0) | (_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_WRITELO | XMMINFO_WRITEHI | XMMINFO_READLO | XMMINFO_READHI); xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88); xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]} - if( _Rd_ ) { - if( !_Rs_ || !_Rt_ ) xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - else if( EEREC_D == EEREC_S ) xPMUL.UDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPMUL.UDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + if (_Rd_) + { + if (!_Rs_ || !_Rt_) + xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); + else if (EEREC_D == EEREC_S) + xPMUL.UDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPMUL.UDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPMUL.UDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } } - else { - if( !_Rs_ || !_Rt_ ) xPXOR(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI)); - else { + else + { + if (!_Rs_ || !_Rt_) + xPXOR(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI)); + else + { xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S)); xPMUL.UDQ(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_T)); } } // add from LO/HI - if ( _Rd_ ) { + if (_Rd_) + { xPADD.Q(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO)); xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_D)); } - else xPADD.Q(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_LO)); + else + xPADD.Q(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_LO)); // interleave & sign extend - if ( x86caps.hasStreamingSIMD4Extensions ) { + if (x86caps.hasStreamingSIMD4Extensions) + { xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88); xPSHUF.D(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0xdd); xPMOVSX.DQ(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO)); xPMOVSX.DQ(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI)); } - else { + else + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI), 0xd8); xMOVDQA(xRegisterSSE(EEREC_LO), xRegisterSSE(t0reg)); @@ -2520,9 +2861,10 @@ void recPEXCW() { EE::Profiler.EmitOp(eeOpcode::PEXCW); - if (!_Rd_) return; + if (!_Rd_) + return; - int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0xd8); _clearNeededXMMregs(); } @@ -2532,9 +2874,10 @@ void recPEXCH() { EE::Profiler.EmitOp(eeOpcode::PEXCH); - if (!_Rd_) return; + if (!_Rd_) + return; - int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0xd8); xPSHUF.HW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D), 0xd8); _clearNeededXMMregs(); @@ -2543,53 +2886,68 @@ void recPEXCH() //////////////////////////////////////////////////// void recPNOR() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PNOR); - int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM((_Rs_ != 0 ? XMMINFO_READS : 0) | (_Rt_ != 0 ? XMMINFO_READT : 0) | XMMINFO_WRITED); - if( _Rs_ == 0 ) { - if( _Rt_ == 0 ) { - xPCMP.EQD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D )); + if (_Rs_ == 0) + { + if (_Rt_ == 0) + { + xPCMP.EQD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } - else { - if( EEREC_D == EEREC_T ) { + else + { + if (EEREC_D == EEREC_T) + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else { - xPCMP.EQD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D )); - if( _Rt_ != 0 ) xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else + { + xPCMP.EQD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); + if (_Rt_ != 0) + xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } } } - else if( _Rt_ == 0 ) { - if( EEREC_D == EEREC_S ) { + else if (_Rt_ == 0) + { + if (EEREC_D == EEREC_S) + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } - else { - xPCMP.EQD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D )); + else + { + xPCMP.EQD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } } - else { + else + { int t0reg = _allocTempXMMreg(XMMT_INT, -1); - if( EEREC_D == EEREC_S ) xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { + if (EEREC_D == EEREC_S) + xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - if( EEREC_S != EEREC_T ) xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + if (EEREC_S != EEREC_T) + xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg )); - xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg )); + xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); + xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); _freeXMMreg(t0reg); } _clearNeededXMMregs(); @@ -2600,7 +2958,7 @@ void recPMTHI() { EE::Profiler.EmitOp(eeOpcode::PMTHI); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_WRITEHI ); + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_WRITEHI); xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S)); _clearNeededXMMregs(); } @@ -2610,7 +2968,7 @@ void recPMTLO() { EE::Profiler.EmitOp(eeOpcode::PMTLO); - int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_WRITELO ); + int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_WRITELO); xMOVDQA(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S)); _clearNeededXMMregs(); } @@ -2618,34 +2976,44 @@ void recPMTLO() //////////////////////////////////////////////////// void recPCPYUD() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PCPYUD); - int info = eeRecompileCodeXMM( XMMINFO_READS|(( _Rt_ == 0) ? 0:XMMINFO_READT)|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM(XMMINFO_READS | ((_Rt_ == 0) ? 0 : XMMINFO_READT) | XMMINFO_WRITED); - if( _Rt_ == 0 ) { - if( EEREC_D == EEREC_S ) { + if (_Rt_ == 0) + { + if (EEREC_D == EEREC_S) + { xPUNPCK.HQDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xMOVQZX(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } - else { + else + { xMOVHL.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xMOVQZX(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } } - else { - if( EEREC_D == EEREC_S ) xPUNPCK.HQDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) { + else + { + if (EEREC_D == EEREC_S) + xPUNPCK.HQDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if (EEREC_D == EEREC_T) + { //TODO xPUNPCK.HQDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D), 0x4e); } - else { - if( EEREC_S == EEREC_T ) { + else + { + if (EEREC_S == EEREC_T) + { xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S), 0xee); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPUNPCK.HQDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } @@ -2657,31 +3025,41 @@ void recPCPYUD() //////////////////////////////////////////////////// void recPOR() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::POR); - int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM((_Rs_ != 0 ? XMMINFO_READS : 0) | (_Rt_ != 0 ? XMMINFO_READT : 0) | XMMINFO_WRITED); - if( _Rs_ == 0 ) { - if( _Rt_ == 0 ) { + if (_Rs_ == 0) + { + if (_Rt_ == 0) + { xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } - else xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else + xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else if( _Rt_ == 0 ) { + else if (_Rt_ == 0) + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } - else { - if( EEREC_D == EEREC_S ) { + else + { + if (EEREC_D == EEREC_S) + { xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } - else if( EEREC_D == EEREC_T ) { + else if (EEREC_D == EEREC_T) + { xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } - else { + else + { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - if( EEREC_S != EEREC_T ) { + if (EEREC_S != EEREC_T) + { xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } } @@ -2692,16 +3070,20 @@ void recPOR() //////////////////////////////////////////////////// void recPCPYH() { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; EE::Profiler.EmitOp(eeOpcode::PCPYH); - int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); + int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0); xPSHUF.HW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D), 0); _clearNeededXMMregs(); } -#endif // else MMI3_RECOMPILE +#endif // else MMI3_RECOMPILE -} } } } +} // namespace MMI +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 diff --git a/pcsx2/x86/iMMI.h b/pcsx2/x86/iMMI.h index 18a1694b9f..45c06b751c 100644 --- a/pcsx2/x86/iMMI.h +++ b/pcsx2/x86/iMMI.h @@ -43,8 +43,8 @@ namespace OpcodeImpl { void recDIV1(); void recDIVU1(); -namespace MMI -{ +namespace MMI { + void recPLZCW(); void recMMI0(); void recMMI1(); @@ -138,7 +138,9 @@ namespace MMI void recPOR(); void recPCPYH(); -} } } } +} // namespace MMI +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 #endif - diff --git a/pcsx2/x86/iMisc.cpp b/pcsx2/x86/iMisc.cpp index ec3e240bdc..ce0849b810 100644 --- a/pcsx2/x86/iMisc.cpp +++ b/pcsx2/x86/iMisc.cpp @@ -17,8 +17,8 @@ #include "PrecompiledHeader.h" #include "common/emitter/x86_intrin.h" -SSE_MXCSR g_sseMXCSR = { DEFAULT_sseMXCSR }; -SSE_MXCSR g_sseVUMXCSR = { DEFAULT_sseVUMXCSR }; +SSE_MXCSR g_sseMXCSR = {DEFAULT_sseMXCSR}; +SSE_MXCSR g_sseVUMXCSR = {DEFAULT_sseVUMXCSR}; // SetCPUState -- for assignment of SSE roundmodes and clampmodes. // @@ -26,9 +26,8 @@ void SetCPUState(SSE_MXCSR sseMXCSR, SSE_MXCSR sseVUMXCSR) { //Msgbox::Alert("SetCPUState: Config.sseMXCSR = %x; Config.sseVUMXCSR = %x \n", Config.sseMXCSR, Config.sseVUMXCSR); - g_sseMXCSR = sseMXCSR.ApplyReserveMask(); - g_sseVUMXCSR = sseVUMXCSR.ApplyReserveMask(); + g_sseMXCSR = sseMXCSR.ApplyReserveMask(); + g_sseVUMXCSR = sseVUMXCSR.ApplyReserveMask(); - _mm_setcsr( g_sseMXCSR.bitmask ); + _mm_setcsr(g_sseMXCSR.bitmask); } - diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index e2aa8ceb97..00c49b8251 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -55,14 +55,14 @@ static __fi u32 HWADDR(u32 mem) { return psxhwLUT[mem >> 16] + mem; } static RecompiledCodeReserve* recMem = NULL; -static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here -static BASEBLOCK *recROM = NULL; // and here -static BASEBLOCK *recROM1 = NULL; // also here -static BASEBLOCK *recROM2 = NULL; // also here +static BASEBLOCK* recRAM = NULL; // and the ptr to the blocks here +static BASEBLOCK* recROM = NULL; // and here +static BASEBLOCK* recROM1 = NULL; // also here +static BASEBLOCK* recROM2 = NULL; // also here static BaseBlocks recBlocks; -static u8 *recPtr = NULL; -u32 psxpc; // recompiler psxpc -int psxbranch; // set for branch +static u8* recPtr = NULL; +u32 psxpc; // recompiler psxpc +int psxbranch; // set for branch u32 g_iopCyclePenalty; static EEINST* s_pInstCache = NULL; @@ -102,19 +102,19 @@ static u32 psxdump = 0; // Dynamically Compiled Dispatchers - R3000A style // ===================================================================================================== -static void __fastcall iopRecRecompile( const u32 startpc ); +static void __fastcall iopRecRecompile(const u32 startpc); // Recompiled code buffer for EE recompiler dispatchers! static u8 __pagealigned iopRecDispatchers[__pagesize]; typedef void DynGenFunc(); -static DynGenFunc* iopDispatcherEvent = NULL; -static DynGenFunc* iopDispatcherReg = NULL; -static DynGenFunc* iopJITCompile = NULL; -static DynGenFunc* iopJITCompileInBlock = NULL; -static DynGenFunc* iopEnterRecompiledCode = NULL; -static DynGenFunc* iopExitRecompiledCode = NULL; +static DynGenFunc* iopDispatcherEvent = NULL; +static DynGenFunc* iopDispatcherReg = NULL; +static DynGenFunc* iopJITCompile = NULL; +static DynGenFunc* iopJITCompileInBlock = NULL; +static DynGenFunc* iopEnterRecompiledCode = NULL; +static DynGenFunc* iopExitRecompiledCode = NULL; static void recEventTest() { @@ -125,17 +125,17 @@ static void recEventTest() // dispatches to the recompiled block address. static DynGenFunc* _DynGen_JITCompile() { - pxAssertMsg( iopDispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks." ); + pxAssertMsg(iopDispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks."); u8* retval = xGetPtr(); - xFastCall((void*)iopRecRecompile, ptr32[&psxRegs.pc] ); + xFastCall((void*)iopRecRecompile, ptr32[&psxRegs.pc]); - xMOV( eax, ptr[&psxRegs.pc] ); - xMOV( ebx, eax ); - xSHR( eax, 16 ); - xMOV( rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax*wordsize)] ); - xJMP( ptrNative[rbx*(wordsize/4) + rcx] ); + xMOV(eax, ptr[&psxRegs.pc]); + xMOV(ebx, eax); + xSHR(eax, 16); + xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]); + xJMP(ptrNative[rbx * (wordsize / 4) + rcx]); return (DynGenFunc*)retval; } @@ -143,7 +143,7 @@ static DynGenFunc* _DynGen_JITCompile() static DynGenFunc* _DynGen_JITCompileInBlock() { u8* retval = xGetPtr(); - xJMP( (void*)iopJITCompile ); + xJMP((void*)iopJITCompile); return (DynGenFunc*)retval; } @@ -152,11 +152,11 @@ static DynGenFunc* _DynGen_DispatcherReg() { u8* retval = xGetPtr(); - xMOV( eax, ptr[&psxRegs.pc] ); - xMOV( ebx, eax ); - xSHR( eax, 16 ); - xMOV( rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax*wordsize)] ); - xJMP( ptrNative[rbx*(wordsize/4) + rcx] ); + xMOV(eax, ptr[&psxRegs.pc]); + xMOV(ebx, eax); + xSHR(eax, 16); + xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]); + xJMP(ptrNative[rbx * (wordsize / 4) + rcx]); return (DynGenFunc*)retval; } @@ -193,26 +193,26 @@ static DynGenFunc* _DynGen_EnterRecompiledCode() static void _DynGen_Dispatchers() { // In case init gets called multiple times: - HostSys::MemProtectStatic( iopRecDispatchers, PageAccess_ReadWrite() ); + HostSys::MemProtectStatic(iopRecDispatchers, PageAccess_ReadWrite()); // clear the buffer to 0xcc (easier debugging). - memset( iopRecDispatchers, 0xcc, __pagesize); + memset(iopRecDispatchers, 0xcc, __pagesize); - xSetPtr( iopRecDispatchers ); + xSetPtr(iopRecDispatchers); // Place the EventTest and DispatcherReg stuff at the top, because they get called the // most and stand to benefit from strong alignment and direct referencing. iopDispatcherEvent = (DynGenFunc*)xGetPtr(); - xFastCall((void*)recEventTest ); - iopDispatcherReg = _DynGen_DispatcherReg(); + xFastCall((void*)recEventTest); + iopDispatcherReg = _DynGen_DispatcherReg(); - iopJITCompile = _DynGen_JITCompile(); - iopJITCompileInBlock = _DynGen_JITCompileInBlock(); - iopEnterRecompiledCode = _DynGen_EnterRecompiledCode(); + iopJITCompile = _DynGen_JITCompile(); + iopJITCompileInBlock = _DynGen_JITCompileInBlock(); + iopEnterRecompiledCode = _DynGen_EnterRecompiledCode(); - HostSys::MemProtectStatic( iopRecDispatchers, PageAccess_ExecOnly() ); + HostSys::MemProtectStatic(iopRecDispatchers, PageAccess_ExecOnly()); - recBlocks.SetJITCompile( iopJITCompile ); + recBlocks.SetJITCompile(iopJITCompile); Perf::any.map((uptr)&iopRecDispatchers, 4096, "IOP Dispatcher"); } @@ -221,23 +221,24 @@ static void _DynGen_Dispatchers() using namespace R3000A; #include "Utilities/AsciiFile.h" -static void iIopDumpBlock( int startpc, u8 * ptr ) +static void iIopDumpBlock(int startpc, u8* ptr) { u32 i, j; EEINST* pcur; u8 used[34]; int numused, count; - Console.WriteLn( "dump1 %x:%x, %x", startpc, psxpc, psxRegs.cycle ); + Console.WriteLn("dump1 %x:%x, %x", startpc, psxpc, psxRegs.cycle); g_Conf->Folders.Logs.Mkdir(); - wxString filename( Path::Combine( g_Conf->Folders.Logs, wxsFormat( L"psxdump%.8X.txt", startpc ) ) ); - AsciiFile f( filename, L"w" ); + wxString filename(Path::Combine(g_Conf->Folders.Logs, wxsFormat(L"psxdump%.8X.txt", startpc))); + AsciiFile f(filename, L"w"); f.Printf("Dump PSX register data: 0x%x\n\n", (uptr)&psxRegs); - for ( i = startpc; i < s_nEndBlock; i += 4 ) { - f.Printf("%s\n", disR3000AF( iopMemRead32( i ), i ) ); + for (i = startpc; i < s_nEndBlock; i += 4) + { + f.Printf("%s\n", disR3000AF(iopMemRead32(i), i)); } // write the instruction info @@ -245,33 +246,42 @@ static void iIopDumpBlock( int startpc, u8 * ptr ) memzero(used); numused = 0; - for(i = 0; i < ArraySize(s_pInstCache->regs); ++i) { - if( s_pInstCache->regs[i] & EEINST_USED ) { + for (i = 0; i < ArraySize(s_pInstCache->regs); ++i) + { + if (s_pInstCache->regs[i] & EEINST_USED) + { used[i] = 1; numused++; } } f.Printf(" "); - for(i = 0; i < ArraySize(s_pInstCache->regs); ++i) { - if( used[i] ) f.Printf("%2d ", i); + for (i = 0; i < ArraySize(s_pInstCache->regs); ++i) + { + if (used[i]) + f.Printf("%2d ", i); } f.Printf("\n"); f.Printf(" "); - for(i = 0; i < ArraySize(s_pInstCache->regs); ++i) { - if( used[i] ) f.Printf("%s ", disRNameGPR[i]); + for (i = 0; i < ArraySize(s_pInstCache->regs); ++i) + { + if (used[i]) + f.Printf("%s ", disRNameGPR[i]); } f.Printf("\n"); - pcur = s_pInstCache+1; - for( i = 0; i < (s_nEndBlock-startpc)/4; ++i, ++pcur) { - f.Printf("%2d: %2.2x ", i+1, pcur->info); + pcur = s_pInstCache + 1; + for (i = 0; i < (s_nEndBlock - startpc) / 4; ++i, ++pcur) + { + f.Printf("%2d: %2.2x ", i + 1, pcur->info); count = 1; - for(j = 0; j < ArraySize(s_pInstCache->regs); j++) { - if( used[j] ) { - f.Printf("%2.2x%s", pcur->regs[j], ((count%8)&&countregs); j++) + { + if (used[j]) + { + f.Printf("%2.2x%s", pcur->regs[j], ((count % 8) && count < numused) ? "_" : " "); ++count; } } @@ -282,12 +292,12 @@ static void iIopDumpBlock( int startpc, u8 * ptr ) #ifdef __linux__ // dump the asm { - AsciiFile f2( L"mydump1", L"w" ); - f2.Write( ptr, (uptr)x86Ptr - (uptr)ptr ); + AsciiFile f2(L"mydump1", L"w"); + f2.Write(ptr, (uptr)x86Ptr - (uptr)ptr); } - int status = std::system( wxsFormat( L"objdump -D -b binary -mi386 -M intel --no-show-raw-insn %s >> %s; rm %s", - "mydump1", WX_STR(filename), "mydump1").mb_str() ); + int status = std::system(wxsFormat(L"objdump -D -b binary -mi386 -M intel --no-show-raw-insn %s >> %s; rm %s", + "mydump1", WX_STR(filename), "mydump1").mb_str()); if (!WIFEXITED(status)) Console.Error("IOP dump didn't terminate normally"); @@ -296,16 +306,18 @@ static void iIopDumpBlock( int startpc, u8 * ptr ) u8 _psxLoadWritesRs(u32 tempcode) { - switch(tempcode>>26) { + switch (tempcode >> 26) + { case 32: case 33: case 34: case 35: case 36: case 37: case 38: - return ((tempcode>>21)&0x1f)==((tempcode>>16)&0x1f); // rs==rt + return ((tempcode >> 21) & 0x1f) == ((tempcode >> 16) & 0x1f); // rs==rt } return 0; } u8 _psxIsLoadStore(u32 tempcode) { - switch(tempcode>>26) { + switch (tempcode >> 26) + { case 32: case 33: case 34: case 35: case 36: case 37: case 38: // 4 byte stores case 40: case 41: case 42: case 43: case 46: @@ -317,16 +329,24 @@ u8 _psxIsLoadStore(u32 tempcode) void _psxFlushAllUnused() { int i; - for(i = 0; i < 34; ++i) { - if( psxpc < s_nEndBlock ) { - if( (g_pCurInstInfo[1].regs[i]&EEINST_USED) ) + for (i = 0; i < 34; ++i) + { + if (psxpc < s_nEndBlock) + { + if ((g_pCurInstInfo[1].regs[i] & EEINST_USED)) continue; } - else if( (g_pCurInstInfo[0].regs[i]&EEINST_USED) ) + else if ((g_pCurInstInfo[0].regs[i] & EEINST_USED)) + { continue; + } - if( i < 32 && PSX_IS_CONST1(i) ) _psxFlushConstReg(i); - else { + if (i < 32 && PSX_IS_CONST1(i)) + { + _psxFlushConstReg(i); + } + else + { _deleteX86reg(X86TYPE_PSX, i, 1); } } @@ -335,13 +355,15 @@ void _psxFlushAllUnused() int _psxFlushUnusedConstReg() { int i; - for(i = 1; i < 32; ++i) { - if( (g_psxHasConstReg & (1<> 16 == 0x2400) psxRecompileIrxImport(); - return; - } + return; + } // for now, don't support xmm _deleteX86reg(X86TYPE_PSX, _Rs_, 1); _deleteX86reg(X86TYPE_PSX, _Rt_, 0); - if( PSX_IS_CONST1(_Rs_) ) { + if (PSX_IS_CONST1(_Rs_)) + { PSX_SET_CONST(_Rt_); constcode(); return; @@ -581,14 +619,16 @@ void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode) // rd = rt op sa void psxRecompileCodeConst2(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode) { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; // for now, don't support xmm _deleteX86reg(X86TYPE_PSX, _Rt_, 1); _deleteX86reg(X86TYPE_PSX, _Rd_, 0); - if( PSX_IS_CONST1(_Rt_) ) { + if (PSX_IS_CONST1(_Rt_)) + { PSX_SET_CONST(_Rd_); constcode(); return; @@ -604,22 +644,26 @@ void psxRecompileCodeConst3(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, _deleteX86reg(X86TYPE_PSX, _Rs_, 1); _deleteX86reg(X86TYPE_PSX, _Rt_, 1); - if( LOHI ) { + if (LOHI) + { _deleteX86reg(X86TYPE_PSX, PSX_HI, 1); _deleteX86reg(X86TYPE_PSX, PSX_LO, 1); } - if( PSX_IS_CONST2(_Rs_, _Rt_) ) { + if (PSX_IS_CONST2(_Rs_, _Rt_)) + { constcode(); return; } - if( PSX_IS_CONST1(_Rs_) ) { + if (PSX_IS_CONST1(_Rs_)) + { constscode(0); return; } - if( PSX_IS_CONST1(_Rt_) ) { + if (PSX_IS_CONST1(_Rt_)) + { consttcode(0); return; } @@ -635,15 +679,18 @@ static const uint m_recBlockAllocSize = static void recReserveCache() { - if (!recMem) recMem = new RecompiledCodeReserve(L"R3000A Recompiler Cache", _8mb); + if (!recMem) + recMem = new RecompiledCodeReserve(L"R3000A Recompiler Cache", _8mb); recMem->SetProfilerName("IOPrec"); while (!recMem->IsOk()) { - if (recMem->Reserve(GetVmMemory().MainMemory(), HostMemoryMap::IOPrecOffset, m_ConfiguredCacheReserve * _1mb) != NULL) break; + if (recMem->Reserve(GetVmMemory().MainMemory(), HostMemoryMap::IOPrecOffset, m_ConfiguredCacheReserve * _1mb) != NULL) + break; // If it failed, then try again (if possible): - if (m_ConfiguredCacheReserve < 4) break; + if (m_ConfiguredCacheReserve < 4) + break; m_ConfiguredCacheReserve /= 2; } @@ -663,34 +710,34 @@ static void recAlloc() // Any 4-byte aligned address makes a valid branch target as per MIPS design (all instructions are // always 4 bytes long). - if( m_recBlockAlloc == NULL ) - m_recBlockAlloc = (u8*)_aligned_malloc( m_recBlockAllocSize, 4096 ); + if (m_recBlockAlloc == NULL) + m_recBlockAlloc = (u8*)_aligned_malloc(m_recBlockAllocSize, 4096); - if( m_recBlockAlloc == NULL ) - throw Exception::OutOfMemory( L"R3000A BASEBLOCK lookup tables" ); + if (m_recBlockAlloc == NULL) + throw Exception::OutOfMemory(L"R3000A BASEBLOCK lookup tables"); u8* curpos = m_recBlockAlloc; - recRAM = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::IopRam / 4) * sizeof(BASEBLOCK); - recROM = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom / 4) * sizeof(BASEBLOCK); - recROM1 = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom1 / 4) * sizeof(BASEBLOCK); - recROM2 = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom2 / 4) * sizeof(BASEBLOCK); + recRAM = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::IopRam / 4) * sizeof(BASEBLOCK); + recROM = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom / 4) * sizeof(BASEBLOCK); + recROM1 = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom1 / 4) * sizeof(BASEBLOCK); + recROM2 = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom2 / 4) * sizeof(BASEBLOCK); - if( s_pInstCache == NULL ) + if (s_pInstCache == NULL) { s_nInstCacheSize = 128; - s_pInstCache = (EEINST*)malloc( sizeof(EEINST) * s_nInstCacheSize ); + s_pInstCache = (EEINST*)malloc(sizeof(EEINST) * s_nInstCacheSize); } - if( s_pInstCache == NULL ) - throw Exception::OutOfMemory( L"R3000 InstCache." ); + if (s_pInstCache == NULL) + throw Exception::OutOfMemory(L"R3000 InstCache."); _DynGen_Dispatchers(); } void recResetIOP() { - DevCon.WriteLn( "iR3000A Recompiler reset." ); + DevCon.WriteLn("iR3000A Recompiler reset."); Perf::iop.reset(); @@ -711,21 +758,21 @@ void recResetIOP() // We're only mapping 20 pages here in 4 places. // 0x80 comes from : (Ps2MemSize::IopRam / 0x10000) * 4 - for (int i=0; i<0x80; i++) + for (int i = 0; i < 0x80; i++) { recLUT_SetPage(psxRecLUT, psxhwLUT, recRAM, 0x0000, i, i & 0x1f); recLUT_SetPage(psxRecLUT, psxhwLUT, recRAM, 0x8000, i, i & 0x1f); recLUT_SetPage(psxRecLUT, psxhwLUT, recRAM, 0xa000, i, i & 0x1f); } - for (int i=0x1fc0; i<0x2000; i++) + for (int i = 0x1fc0; i < 0x2000; i++) { recLUT_SetPage(psxRecLUT, psxhwLUT, recROM, 0x0000, i, i - 0x1fc0); recLUT_SetPage(psxRecLUT, psxhwLUT, recROM, 0x8000, i, i - 0x1fc0); recLUT_SetPage(psxRecLUT, psxhwLUT, recROM, 0xa000, i, i - 0x1fc0); } - for (int i=0x1e00; i<0x1e04; i++) + for (int i = 0x1e00; i < 0x1e04; i++) { recLUT_SetPage(psxRecLUT, psxhwLUT, recROM1, 0x0000, i, i - 0x1e00); recLUT_SetPage(psxRecLUT, psxhwLUT, recROM1, 0x8000, i, i - 0x1e00); @@ -739,8 +786,8 @@ void recResetIOP() recLUT_SetPage(psxRecLUT, psxhwLUT, recROM2, 0xa000, i, i - 0x1e40); } - if( s_pInstCache ) - memset( s_pInstCache, 0, sizeof(EEINST)*s_nInstCacheSize ); + if (s_pInstCache) + memset(s_pInstCache, 0, sizeof(EEINST) * s_nInstCacheSize); recBlocks.Reset(); g_psxMaxRecMem = 0; @@ -751,11 +798,11 @@ void recResetIOP() static void recShutdown() { - safe_delete( recMem ); + safe_delete(recMem); - safe_aligned_free( m_recBlockAlloc ); + safe_aligned_free(m_recBlockAlloc); - safe_free( s_pInstCache ); + safe_free(s_pInstCache); s_nInstCacheSize = 0; // FIXME Warning thread unsafe @@ -774,7 +821,7 @@ static void recExecute() //for (;;) R3000AExecute(); } -static __noinline s32 recExecuteBlock( s32 eeCycles ) +static __noinline s32 recExecuteBlock(s32 eeCycles) { iopBreak = 0; iopCycleEE = eeCycles; @@ -790,15 +837,15 @@ static __noinline s32 recExecuteBlock( s32 eeCycles ) // Likely Disasm, as borrowed from MSVC: -// Entry: -// mov eax,dword ptr [esp+4] -// mov dword ptr [iopBreak (0E88DCCh)],0 -// mov dword ptr [iopCycleEE (832A84h)],eax + // Entry: + // mov eax,dword ptr [esp+4] + // mov dword ptr [iopBreak (0E88DCCh)],0 + // mov dword ptr [iopCycleEE (832A84h)],eax -// Exit: -// mov ecx,dword ptr [iopBreak (0E88DCCh)] -// mov edx,dword ptr [iopCycleEE (832A84h)] -// lea eax,[edx+ecx] + // Exit: + // mov ecx,dword ptr [iopBreak (0E88DCCh)] + // mov edx,dword ptr [iopCycleEE (832A84h)] + // lea eax,[edx+ecx] iopEnterRecompiledCode(); @@ -821,7 +868,8 @@ static __fi u32 psxRecClearMem(u32 pc) int blockidx = recBlocks.Index(pc); pxAssert(blockidx != -1); - while (BASEBLOCKEX* pexblock = recBlocks[blockidx - 1]) { + while (BASEBLOCKEX* pexblock = recBlocks[blockidx - 1]) + { if (pexblock->startpc + pexblock->size * 4 <= lowerextent) break; @@ -831,7 +879,8 @@ static __fi u32 psxRecClearMem(u32 pc) int toRemoveFirst = blockidx; - while (BASEBLOCKEX* pexblock = recBlocks[blockidx]) { + while (BASEBLOCKEX* pexblock = recBlocks[blockidx]) + { if (pexblock->startpc >= upperextent) break; @@ -841,16 +890,18 @@ static __fi u32 psxRecClearMem(u32 pc) blockidx++; } - if(toRemoveFirst != blockidx) { + if (toRemoveFirst != blockidx) + { recBlocks.Remove(toRemoveFirst, (blockidx - 1)); } - blockidx=0; - while(BASEBLOCKEX* pexblock = recBlocks[blockidx++]) + blockidx = 0; + while (BASEBLOCKEX* pexblock = recBlocks[blockidx++]) { - if (pc >= pexblock->startpc && pc < pexblock->startpc + pexblock->size * 4) { + if (pc >= pexblock->startpc && pc < pexblock->startpc + pexblock->size * 4) + { DevCon.Error("[IOP] Impossible block clearing failure"); - pxFailDev( "[IOP] Impossible block clearing failure" ); + pxFailDev("[IOP] Impossible block clearing failure"); } } @@ -862,7 +913,7 @@ static __fi u32 psxRecClearMem(u32 pc) static __fi void recClearIOP(u32 Addr, u32 Size) { u32 pc = Addr; - while (pc < Addr + Size*4) + while (pc < Addr + Size * 4) pc += PSXREC_CLEARM(pc); } @@ -870,49 +921,52 @@ void psxSetBranchReg(u32 reg) { psxbranch = 1; - if( reg != 0xffffffff ) { + if (reg != 0xffffffff) + { _allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE); _psxMoveGPRtoR(calleeSavedReg2d, reg); psxRecompileNextInstruction(1); - if( x86regs[calleeSavedReg2d.GetId()].inuse ) { - pxAssert( x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK ); + if (x86regs[calleeSavedReg2d.GetId()].inuse) + { + pxAssert(x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK); xMOV(ptr32[&psxRegs.pc], calleeSavedReg2d); x86regs[calleeSavedReg2d.GetId()].inuse = 0; - #ifdef PCSX2_DEBUG - xOR( calleeSavedReg2d, calleeSavedReg2d ); - #endif +#ifdef PCSX2_DEBUG + xOR(calleeSavedReg2d, calleeSavedReg2d); +#endif } - else { + else + { xMOV(eax, ptr32[&g_recWriteback]); xMOV(ptr32[&psxRegs.pc], eax); - #ifdef PCSX2_DEBUG - xOR( eax, eax ); - #endif +#ifdef PCSX2_DEBUG + xOR(eax, eax); +#endif } - #ifdef PCSX2_DEBUG +#ifdef PCSX2_DEBUG xForwardJNZ8 skipAssert; - xWrite8( 0xcc ); + xWrite8(0xcc); skipAssert.SetTarget(); - #endif +#endif } _psxFlushCall(FLUSH_EVERYTHING); iPsxBranchTest(0xffffffff, 1); - JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 )); + JMP32((uptr)iopDispatcherReg - ((uptr)x86Ptr + 5)); } -void psxSetBranchImm( u32 imm ) +void psxSetBranchImm(u32 imm) { psxbranch = 1; - pxAssert( imm ); + pxAssert(imm); // end the current block - xMOV(ptr32[&psxRegs.pc], imm ); + xMOV(ptr32[&psxRegs.pc], imm); _psxFlushCall(FLUSH_EVERYTHING); iPsxBranchTest(imm, imm <= psxpc); @@ -946,7 +1000,7 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch) xFastCall((void*)iopEventTest); - if( newpc != 0xffffffff ) + if (newpc != 0xffffffff) { xCMP(ptr32[&psxRegs.pc], newpc); xJNE(iopDispatcherReg); @@ -959,7 +1013,7 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch) xMOV(ptr32[&psxRegs.cycle], eax); // update cycles // jump if iopCycleEE <= 0 (iop's timeslice timed out, so time to return control to the EE) - xSUB(ptr32[&iopCycleEE], blockCycles*8); + xSUB(ptr32[&iopCycleEE], blockCycles * 8); xJLE(iopExitRecompiledCode); // check if an event is pending @@ -968,7 +1022,8 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch) xFastCall((void*)iopEventTest); - if( newpc != 0xffffffff ) { + if (newpc != 0xffffffff) + { xCMP(ptr32[&psxRegs.pc], newpc); xJNE(iopDispatcherReg); } @@ -997,20 +1052,20 @@ static void checkcodefn() void rpsxSYSCALL() { - xMOV(ptr32[&psxRegs.code], psxRegs.code ); + xMOV(ptr32[&psxRegs.code], psxRegs.code); xMOV(ptr32[&psxRegs.pc], psxpc - 4); _psxFlushCall(FLUSH_NODESTROY); //xMOV( ecx, 0x20 ); // exception code //xMOV( edx, psxbranch==1 ); // branch delay slot? - xFastCall((void*)psxException, 0x20, psxbranch == 1 ); + xFastCall((void*)psxException, 0x20, psxbranch == 1); - xCMP(ptr32[&psxRegs.pc], psxpc-4); + xCMP(ptr32[&psxRegs.pc], psxpc - 4); j8Ptr[0] = JE8(0); - xADD(ptr32[&psxRegs.cycle], psxScaleBlockCycles() ); - xSUB(ptr32[&iopCycleEE], psxScaleBlockCycles()*8 ); - JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 )); + xADD(ptr32[&psxRegs.cycle], psxScaleBlockCycles()); + xSUB(ptr32[&iopCycleEE], psxScaleBlockCycles() * 8); + JMP32((uptr)iopDispatcherReg - ((uptr)x86Ptr + 5)); // jump target for skipping blockCycle updates x86SetJ8(j8Ptr[0]); @@ -1020,19 +1075,19 @@ void rpsxSYSCALL() void rpsxBREAK() { - xMOV(ptr32[&psxRegs.code], psxRegs.code ); + xMOV(ptr32[&psxRegs.code], psxRegs.code); xMOV(ptr32[&psxRegs.pc], psxpc - 4); _psxFlushCall(FLUSH_NODESTROY); //xMOV( ecx, 0x24 ); // exception code //xMOV( edx, psxbranch==1 ); // branch delay slot? - xFastCall((void*)psxException, 0x24, psxbranch == 1 ); + xFastCall((void*)psxException, 0x24, psxbranch == 1); - xCMP(ptr32[&psxRegs.pc], psxpc-4); + xCMP(ptr32[&psxRegs.pc], psxpc - 4); j8Ptr[0] = JE8(0); - xADD(ptr32[&psxRegs.cycle], psxScaleBlockCycles() ); - xSUB(ptr32[&iopCycleEE], psxScaleBlockCycles()*8 ); - JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 )); + xADD(ptr32[&psxRegs.cycle], psxScaleBlockCycles()); + xSUB(ptr32[&iopCycleEE], psxScaleBlockCycles() * 8); + JMP32((uptr)iopDispatcherReg - ((uptr)x86Ptr + 5)); x86SetJ8(j8Ptr[0]); //if (!psxbranch) psxbranch = 2; @@ -1047,14 +1102,17 @@ void psxDynarecCheckBreakpoint() int bpFlags = psxIsBreakpointNeeded(pc); bool hit = false; //check breakpoint at current pc - if (bpFlags & 1) { + if (bpFlags & 1) + { auto cond = CBreakPoints::GetBreakPointCondition(BREAKPOINT_IOP, pc); - if (cond == NULL || cond->Evaluate()) { + if (cond == NULL || cond->Evaluate()) + { hit = true; } } //check breakpoint in delay slot - if (bpFlags & 2) { + if (bpFlags & 2) + { auto cond = CBreakPoints::GetBreakPointCondition(BREAKPOINT_IOP, pc + 4); if (cond == NULL || cond->Evaluate()) hit = true; @@ -1121,19 +1179,21 @@ void psxRecMemcheck(u32 op, u32 bits, bool store) // logic: memAddress < bpEnd && bpStart < memAddress+memSize xMOV(eax, standardizeBreakpointAddress(BREAKPOINT_IOP, checks[i].end)); - xCMP(ecx, eax); // address < end - xForwardJGE8 next1; // if address >= end then goto next1 + xCMP(ecx, eax); // address < end + xForwardJGE8 next1; // if address >= end then goto next1 xMOV(eax, standardizeBreakpointAddress(BREAKPOINT_IOP, checks[i].start)); - xCMP(eax, edx); // start < address+size - xForwardJGE8 next2; // if start >= address+size then goto next2 + xCMP(eax, edx); // start < address+size + xForwardJGE8 next2; // if start >= address+size then goto next2 - // hit the breakpoint - if (checks[i].result & MEMCHECK_LOG) { + // hit the breakpoint + if (checks[i].result & MEMCHECK_LOG) + { xMOV(edx, store); xFastCall((void*)psxDynarecMemLogcheck, ecx, edx); } - if (checks[i].result & MEMCHECK_BREAK) { + if (checks[i].result & MEMCHECK_BREAK) + { xFastCall((void*)psxDynarecMemcheck); } @@ -1169,21 +1229,11 @@ void psxEncodeMemcheck() bool store = (opcode.flags & IS_STORE) != 0; switch (opcode.flags & MEMTYPE_MASK) { - case MEMTYPE_BYTE: - psxRecMemcheck(op, 8, store); - break; - case MEMTYPE_HALF: - psxRecMemcheck(op, 16, store); - break; - case MEMTYPE_WORD: - psxRecMemcheck(op, 32, store); - break; - case MEMTYPE_DWORD: - psxRecMemcheck(op, 64, store); - break; - case MEMTYPE_QWORD: - psxRecMemcheck(op, 128, store); - break; + case MEMTYPE_BYTE: psxRecMemcheck(op, 8, store); break; + case MEMTYPE_HALF: psxRecMemcheck(op, 16, store); break; + case MEMTYPE_WORD: psxRecMemcheck(op, 32, store); break; + case MEMTYPE_DWORD: psxRecMemcheck(op, 64, store); break; + case MEMTYPE_QWORD: psxRecMemcheck(op, 128, store); break; } } @@ -1199,25 +1249,26 @@ void psxRecompileNextInstruction(int delayslot) psxEncodeMemcheck(); } - if( IsDebugBuild ) { + if (IsDebugBuild) + { xNOP(); xMOV(eax, psxpc); } - psxRegs.code = iopMemRead32( psxpc ); + psxRegs.code = iopMemRead32(psxpc); s_psxBlockCycles++; psxpc += 4; g_pCurInstInfo++; g_iopCyclePenalty = 0; - rpsxBSC[ psxRegs.code >> 26 ](); + rpsxBSC[psxRegs.code >> 26](); s_psxBlockCycles += g_iopCyclePenalty; _clearNeededX86regs(); } -static void __fastcall PreBlockCheck( u32 blockpc ) +static void __fastcall PreBlockCheck(u32 blockpc) { #ifdef PCSX2_DEBUG extern void iDumpPsxRegisters(u32 startpc, u32 temp); @@ -1226,13 +1277,14 @@ static void __fastcall PreBlockCheck( u32 blockpc ) static int curcount = 0; const int skip = 0; - //*(int*)PSXM(0x27990) = 1; // enables cdvd bios output for scph10000 + //*(int*)PSXM(0x27990) = 1; // enables cdvd bios output for scph10000 - if( (psxdump&2) && lastrec != blockpc ) - { + if ((psxdump & 2) && lastrec != blockpc) + { curcount++; - if( curcount > skip ) { + if (curcount > skip) + { iDumpPsxRegisters(blockpc, 1); curcount = 0; } @@ -1242,33 +1294,36 @@ static void __fastcall PreBlockCheck( u32 blockpc ) #endif } -static void __fastcall iopRecRecompile( const u32 startpc ) +static void __fastcall iopRecRecompile(const u32 startpc) { u32 i; u32 willbranch3 = 0; // Inject IRX hack - if (startpc == 0x1630 && g_Conf->CurrentIRX.Length() > 3) { - if (iopMemRead32(0x20018) == 0x1F) { + if (startpc == 0x1630 && g_Conf->CurrentIRX.Length() > 3) + { + if (iopMemRead32(0x20018) == 0x1F) + { // FIXME do I need to increase the module count (0x1F -> 0x20) iopMemWrite32(0x20094, 0xbffc0000); } } - if( IsDebugBuild && (psxdump & 4) ) + if (IsDebugBuild && (psxdump & 4)) { extern void iDumpPsxRegisters(u32 startpc, u32 temp); iDumpPsxRegisters(startpc, 0); } - pxAssert( startpc ); + pxAssert(startpc); // if recPtr reached the mem limit reset whole mem - if (recPtr >= (recMem->GetPtrEnd() - _64kb)) { + if (recPtr >= (recMem->GetPtrEnd() - _64kb)) + { recResetIOP(); } - x86SetPtr( recPtr ); + x86SetPtr(recPtr); x86Align(16); recPtr = x86Ptr; @@ -1279,12 +1334,12 @@ static void __fastcall iopRecRecompile( const u32 startpc ) s_pCurBlockEx = recBlocks.Get(HWADDR(startpc)); - if(!s_pCurBlockEx || s_pCurBlockEx->startpc != HWADDR(startpc)) + if (!s_pCurBlockEx || s_pCurBlockEx->startpc != HWADDR(startpc)) s_pCurBlockEx = recBlocks.New(HWADDR(startpc), (uptr)recPtr); psxbranch = 0; - s_pCurBlock->SetFnptr( (uptr)x86Ptr ); + s_pCurBlock->SetFnptr((uptr)x86Ptr); s_psxBlockCycles = 0; // reset recomp state variables @@ -1293,13 +1348,14 @@ static void __fastcall iopRecRecompile( const u32 startpc ) _initX86regs(); - if ((psxHu32(HW_ICFG) & 8) && (HWADDR(startpc) == 0xa0 || HWADDR(startpc) == 0xb0 || HWADDR(startpc) == 0xc0)) { + if ((psxHu32(HW_ICFG) & 8) && (HWADDR(startpc) == 0xa0 || HWADDR(startpc) == 0xb0 || HWADDR(startpc) == 0xc0)) + { xFastCall((void*)psxBiosCall); xTEST(al, al); xJNZ(iopDispatcherReg); } - if( IsDebugBuild ) + if (IsDebugBuild) { xFastCall((void*)PreBlockCheck, psxpc); } @@ -1309,11 +1365,13 @@ static void __fastcall iopRecRecompile( const u32 startpc ) s_nEndBlock = 0xffffffff; s_branchTo = -1; - while(1) { + while (1) + { BASEBLOCK* pblock = PSX_GETBLOCK(i); if (i != startpc && pblock->GetFnptr() != (uptr)iopJITCompile - && pblock->GetFnptr() != (uptr)iopJITCompileInBlock) { + && pblock->GetFnptr() != (uptr)iopJITCompileInBlock) + { // branch = 3 willbranch3 = 1; s_nEndBlock = i; @@ -1322,26 +1380,26 @@ static void __fastcall iopRecRecompile( const u32 startpc ) psxRegs.code = iopMemRead32(i); - switch(psxRegs.code >> 26) { + switch (psxRegs.code >> 26) + { case 0: // special - - if( _Funct_ == 8 || _Funct_ == 9 ) { // JR, JALR + if (_Funct_ == 8 || _Funct_ == 9) + { // JR, JALR s_nEndBlock = i + 8; goto StartRecomp; } - break; + case 1: // regimm - - if( _Rt_ == 0 || _Rt_ == 1 || _Rt_ == 16 || _Rt_ == 17 ) { - + if (_Rt_ == 0 || _Rt_ == 1 || _Rt_ == 16 || _Rt_ == 17) + { s_branchTo = _Imm_ * 4 + i + 4; - if( s_branchTo > startpc && s_branchTo < i ) s_nEndBlock = s_branchTo; - else s_nEndBlock = i+8; - + if (s_branchTo > startpc && s_branchTo < i) + s_nEndBlock = s_branchTo; + else + s_nEndBlock = i + 8; goto StartRecomp; } - break; case 2: // J @@ -1352,11 +1410,11 @@ static void __fastcall iopRecRecompile( const u32 startpc ) // branches case 4: case 5: case 6: case 7: - s_branchTo = _Imm_ * 4 + i + 4; - if( s_branchTo > startpc && s_branchTo < i ) s_nEndBlock = s_branchTo; - else s_nEndBlock = i+8; - + if (s_branchTo > startpc && s_branchTo < i) + s_nEndBlock = s_branchTo; + else + s_nEndBlock = i + 8; goto StartRecomp; } @@ -1366,11 +1424,15 @@ static void __fastcall iopRecRecompile( const u32 startpc ) StartRecomp: s_nBlockFF = false; - if (s_branchTo == startpc) { + if (s_branchTo == startpc) + { s_nBlockFF = true; - for (i = startpc; i < s_nEndBlock; i += 4) { - if (i != s_nEndBlock - 8) { - switch (iopMemRead32(i)) { + for (i = startpc; i < s_nEndBlock; i += 4) + { + if (i != s_nEndBlock - 8) + { + switch (iopMemRead32(i)) + { case 0: // nop break; default: @@ -1384,82 +1446,92 @@ StartRecomp: { EEINST* pcur; - if( s_nInstCacheSize < (s_nEndBlock-startpc)/4+1 ) { + if (s_nInstCacheSize < (s_nEndBlock - startpc) / 4 + 1) + { free(s_pInstCache); - s_nInstCacheSize = (s_nEndBlock-startpc)/4+10; - s_pInstCache = (EEINST*)malloc(sizeof(EEINST)*s_nInstCacheSize); - pxAssert( s_pInstCache != NULL ); + s_nInstCacheSize = (s_nEndBlock - startpc) / 4 + 10; + s_pInstCache = (EEINST*)malloc(sizeof(EEINST) * s_nInstCacheSize); + pxAssert(s_pInstCache != NULL); } - pcur = s_pInstCache + (s_nEndBlock-startpc)/4; + pcur = s_pInstCache + (s_nEndBlock - startpc) / 4; _recClearInst(pcur); pcur->info = 0; - for(i = s_nEndBlock; i > startpc; i -= 4 ) { - psxRegs.code = iopMemRead32(i-4); + for (i = s_nEndBlock; i > startpc; i -= 4) + { + psxRegs.code = iopMemRead32(i - 4); pcur[-1] = pcur[0]; - rpsxpropBSC(pcur-1, pcur); + rpsxpropBSC(pcur - 1, pcur); pcur--; } } // dump code - if( IsDebugBuild ) + if (IsDebugBuild) { - for(i = 0; i < ArraySize(s_psxrecblocks); ++i) { - if( startpc == s_psxrecblocks[i] ) { + for (i = 0; i < ArraySize(s_psxrecblocks); ++i) + { + if (startpc == s_psxrecblocks[i]) + { iIopDumpBlock(startpc, recPtr); } } - if( (psxdump & 1) ) + if ((psxdump & 1)) iIopDumpBlock(startpc, recPtr); } g_pCurInstInfo = s_pInstCache; - while (!psxbranch && psxpc < s_nEndBlock) { + while (!psxbranch && psxpc < s_nEndBlock) + { psxRecompileNextInstruction(0); } - if( IsDebugBuild && (psxdump & 1) ) + if (IsDebugBuild && (psxdump & 1)) iIopDumpBlock(startpc, recPtr); - pxAssert( (psxpc-startpc)>>2 <= 0xffff ); - s_pCurBlockEx->size = (psxpc-startpc)>>2; + pxAssert((psxpc - startpc) >> 2 <= 0xffff); + s_pCurBlockEx->size = (psxpc - startpc) >> 2; - for(i = 1; i < (u32)s_pCurBlockEx->size; ++i) { + for (i = 1; i < (u32)s_pCurBlockEx->size; ++i) + { if (s_pCurBlock[i].GetFnptr() == (uptr)iopJITCompile) s_pCurBlock[i].SetFnptr((uptr)iopJITCompileInBlock); } - if( !(psxpc&0x10000000) ) - g_psxMaxRecMem = std::max( (psxpc&~0xa0000000), g_psxMaxRecMem ); + if (!(psxpc & 0x10000000)) + g_psxMaxRecMem = std::max((psxpc & ~0xa0000000), g_psxMaxRecMem); - if( psxbranch == 2 ) { + if (psxbranch == 2) + { _psxFlushCall(FLUSH_EVERYTHING); iPsxBranchTest(0xffffffff, 1); - JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 )); + JMP32((uptr)iopDispatcherReg - ((uptr)x86Ptr + 5)); } - else { - if( psxbranch ) pxAssert( !willbranch3 ); + else + { + if (psxbranch) + pxAssert(!willbranch3); else { - xADD(ptr32[&psxRegs.cycle], psxScaleBlockCycles() ); - xSUB(ptr32[&iopCycleEE], psxScaleBlockCycles()*8 ); + xADD(ptr32[&psxRegs.cycle], psxScaleBlockCycles()); + xSUB(ptr32[&iopCycleEE], psxScaleBlockCycles() * 8); } - if (willbranch3 || !psxbranch) { - pxAssert( psxpc == s_nEndBlock ); + if (willbranch3 || !psxbranch) + { + pxAssert(psxpc == s_nEndBlock); _psxFlushCall(FLUSH_EVERYTHING); xMOV(ptr32[&psxRegs.pc], psxpc); - recBlocks.Link(HWADDR(s_nEndBlock), xJcc32() ); + recBlocks.Link(HWADDR(s_nEndBlock), xJcc32()); psxbranch = 3; } } - pxAssert( xGetPtr() < recMem->GetPtrEnd() ); + pxAssert(xGetPtr() < recMem->GetPtrEnd()); pxAssert(xGetPtr() - recPtr < _64kb); s_pCurBlockEx->x86size = xGetPtr() - recPtr; @@ -1468,13 +1540,13 @@ StartRecomp: recPtr = xGetPtr(); - pxAssert( (g_psxHasConstReg&g_psxFlushedConstReg) == g_psxHasConstReg ); + pxAssert((g_psxHasConstReg & g_psxFlushedConstReg) == g_psxHasConstReg); s_pCurBlock = NULL; s_pCurBlockEx = NULL; } -static void recSetCacheReserve( uint reserveInMegs ) +static void recSetCacheReserve(uint reserveInMegs) { m_ConfiguredCacheReserve = reserveInMegs; } @@ -1491,8 +1563,7 @@ R3000Acpu psxRec = { recExecuteBlock, recClearIOP, recShutdown, - + recGetCacheReserve, recSetCacheReserve }; - diff --git a/pcsx2/x86/iR3000A.h b/pcsx2/x86/iR3000A.h index 10b915a3a0..3e1bf4c23f 100644 --- a/pcsx2/x86/iR3000A.h +++ b/pcsx2/x86/iR3000A.h @@ -54,32 +54,36 @@ void _psxMoveGPRtoM(uptr to, int fromgpr); void _psxMoveGPRtoRm(x86IntRegType to, int fromgpr); #endif -extern u32 psxpc; // recompiler pc -extern int psxbranch; // set for branch +extern u32 psxpc; // recompiler pc +extern int psxbranch; // set for branch extern u32 g_iopCyclePenalty; void psxSaveBranchState(); void psxLoadBranchState(); extern void psxSetBranchReg(u32 reg); -extern void psxSetBranchImm( u32 imm ); +extern void psxSetBranchImm(u32 imm); extern void psxRecompileNextInstruction(int delayslot); //////////////////////////////////////////////////////////////////// // IOP Constant Propagation Defines, Vars, and API - From here down! -#define PSX_IS_CONST1(reg) ((reg)<32 && (g_psxHasConstReg&(1<<(reg)))) -#define PSX_IS_CONST2(reg1, reg2) ((g_psxHasConstReg&(1<<(reg1)))&&(g_psxHasConstReg&(1<<(reg2)))) -#define PSX_SET_CONST(reg) { \ - if( (reg) < 32 ) { \ - g_psxHasConstReg |= (1<<(reg)); \ - g_psxFlushedConstReg &= ~(1<<(reg)); \ - } \ -} +#define PSX_IS_CONST1(reg) ((reg) < 32 && (g_psxHasConstReg & (1 << (reg)))) +#define PSX_IS_CONST2(reg1, reg2) ((g_psxHasConstReg & (1 << (reg1))) && (g_psxHasConstReg & (1 << (reg2)))) +#define PSX_SET_CONST(reg) \ + { \ + if ((reg) < 32) \ + { \ + g_psxHasConstReg |= (1 << (reg)); \ + g_psxFlushedConstReg &= ~(1 << (reg)); \ + } \ + } -#define PSX_DEL_CONST(reg) { \ - if( (reg) < 32 ) g_psxHasConstReg &= ~(1<<(reg)); \ -} +#define PSX_DEL_CONST(reg) \ + { \ + if ((reg) < 32) \ + g_psxHasConstReg &= ~(1 << (reg)); \ + } extern u32 g_psxConstRegs[32]; extern u32 g_psxHasConstReg, g_psxFlushedConstReg; @@ -92,38 +96,38 @@ typedef void (*R3000AFNPTR_INFO)(int info); // // rd = rs op rt #define PSXRECOMPILE_CONSTCODE0(fn) \ -void rpsx##fn(void) \ -{ \ - psxRecompileCodeConst0(rpsx##fn##_const, rpsx##fn##_consts, rpsx##fn##_constt, rpsx##fn##_); \ -} + void rpsx##fn(void) \ + { \ + psxRecompileCodeConst0(rpsx##fn##_const, rpsx##fn##_consts, rpsx##fn##_constt, rpsx##fn##_); \ + } // rt = rs op imm16 #define PSXRECOMPILE_CONSTCODE1(fn) \ -void rpsx##fn(void) \ -{ \ - psxRecompileCodeConst1(rpsx##fn##_const, rpsx##fn##_); \ -} + void rpsx##fn(void) \ + { \ + psxRecompileCodeConst1(rpsx##fn##_const, rpsx##fn##_); \ + } // rd = rt op sa #define PSXRECOMPILE_CONSTCODE2(fn) \ -void rpsx##fn(void) \ -{ \ - psxRecompileCodeConst2(rpsx##fn##_const, rpsx##fn##_); \ -} + void rpsx##fn(void) \ + { \ + psxRecompileCodeConst2(rpsx##fn##_const, rpsx##fn##_); \ + } // [lo,hi] = rt op rs #define PSXRECOMPILE_CONSTCODE3(fn, LOHI) \ -void rpsx##fn(void) \ -{ \ - psxRecompileCodeConst3(rpsx##fn##_const, rpsx##fn##_consts, rpsx##fn##_constt, rpsx##fn##_, LOHI); \ -} + void rpsx##fn(void) \ + { \ + psxRecompileCodeConst3(rpsx##fn##_const, rpsx##fn##_consts, rpsx##fn##_constt, rpsx##fn##_, LOHI); \ + } #define PSXRECOMPILE_CONSTCODE3_PENALTY(fn, LOHI, cycles) \ -void rpsx##fn(void) \ -{ \ - psxRecompileCodeConst3(rpsx##fn##_const, rpsx##fn##_consts, rpsx##fn##_constt, rpsx##fn##_, LOHI); \ - g_iopCyclePenalty = cycles; \ -} + void rpsx##fn(void) \ + { \ + psxRecompileCodeConst3(rpsx##fn##_const, rpsx##fn##_consts, rpsx##fn##_constt, rpsx##fn##_, LOHI); \ + g_iopCyclePenalty = cycles; \ + } // rd = rs op rt void psxRecompileCodeConst0(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode); diff --git a/pcsx2/x86/iR3000Atables.cpp b/pcsx2/x86/iR3000Atables.cpp index fab357bb68..85115d3043 100644 --- a/pcsx2/x86/iR3000Atables.cpp +++ b/pcsx2/x86/iR3000Atables.cpp @@ -29,23 +29,25 @@ extern u32 g_psxMaxRecMem; // R3000A instruction implementation #define REC_FUNC(f) \ -static void rpsx##f() { \ - xMOV(ptr32[&psxRegs.code], (u32)psxRegs.code); \ - _psxFlushCall(FLUSH_EVERYTHING); \ - xFastCall((void*)(uptr)psx##f); \ - PSX_DEL_CONST(_Rt_); \ -/* branch = 2; */\ -} + static void rpsx##f() \ + { \ + xMOV(ptr32[&psxRegs.code], (u32)psxRegs.code); \ + _psxFlushCall(FLUSH_EVERYTHING); \ + xFastCall((void*)(uptr)psx##f); \ + PSX_DEL_CONST(_Rt_); \ + /* branch = 2; */ \ + } // Same as above but with a different naming convension (to avoid various rename) #define REC_GTE_FUNC(f) \ -static void rgte##f() { \ - xMOV(ptr32[&psxRegs.code], (u32)psxRegs.code); \ - _psxFlushCall(FLUSH_EVERYTHING); \ - xFastCall((void*)(uptr)gte##f); \ - PSX_DEL_CONST(_Rt_); \ -/* branch = 2; */\ -} + static void rgte##f() \ + { \ + xMOV(ptr32[&psxRegs.code], (u32)psxRegs.code); \ + _psxFlushCall(FLUSH_EVERYTHING); \ + xFastCall((void*)(uptr)gte##f); \ + PSX_DEL_CONST(_Rt_); \ + /* branch = 2; */ \ + } extern void psxLWL(); extern void psxLWR(); @@ -61,16 +63,22 @@ void rpsxADDIU_const() // adds a constant to sreg and puts into dreg void rpsxADDconst(int dreg, int sreg, u32 off, int info) { - if (sreg) { - if (sreg == dreg) { + if (sreg) + { + if (sreg == dreg) + { xADD(ptr32[&psxRegs.GPR.r[dreg]], off); - } else { + } + else + { xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]); - if (off) xADD(eax, off); + if (off) + xADD(eax, off); xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax); } } - else { + else + { xMOV(ptr32[&psxRegs.GPR.r[dreg]], off); } } @@ -78,7 +86,8 @@ void rpsxADDconst(int dreg, int sreg, u32 off, int info) void rpsxADDIU_(int info) { // Rt = Rs + Im - if (!_Rt_) return; + if (!_Rt_) + return; rpsxADDconst(_Rt_, _Rs_, _Imm_, info); } @@ -95,8 +104,8 @@ void rpsxSLTI_const() void rpsxSLTconst(int info, int dreg, int sreg, int imm) { xXOR(eax, eax); - xCMP(ptr32[&psxRegs.GPR.r[sreg]], imm); - xSETL(al); + xCMP(ptr32[&psxRegs.GPR.r[sreg]], imm); + xSETL(al); xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax); } @@ -114,7 +123,7 @@ void rpsxSLTUconst(int info, int dreg, int sreg, int imm) { xXOR(eax, eax); xCMP(ptr32[&psxRegs.GPR.r[sreg]], imm); - xSETB(al); + xSETB(al); xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax); } @@ -130,15 +139,21 @@ void rpsxANDI_const() void rpsxANDconst(int info, int dreg, int sreg, u32 imm) { - if (imm) { - if (sreg == dreg) { + if (imm) + { + if (sreg == dreg) + { xAND(ptr32[&psxRegs.GPR.r[dreg]], imm); - } else { + } + else + { xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]); xAND(eax, imm); xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax); } - } else { + } + else + { xMOV(ptr32[&psxRegs.GPR.r[dreg]], 0); } } @@ -155,18 +170,23 @@ void rpsxORI_const() void rpsxORconst(int info, int dreg, int sreg, u32 imm) { - if (imm) { - if (sreg == dreg) { + if (imm) + { + if (sreg == dreg) + { xOR(ptr32[&psxRegs.GPR.r[dreg]], imm); } - else { + else + { xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]); xOR(eax, imm); xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax); } } - else { - if( dreg != sreg ) { + else + { + if (dreg != sreg) + { xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]); xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx); } @@ -184,29 +204,37 @@ void rpsxXORI_const() void rpsxXORconst(int info, int dreg, int sreg, u32 imm) { - if( imm == 0xffffffff ) { - if( dreg == sreg ) { + if (imm == 0xffffffff) + { + if (dreg == sreg) + { xNOT(ptr32[&psxRegs.GPR.r[dreg]]); } - else { + else + { xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]); xNOT(ecx); xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx); } } - else if (imm) { + else if (imm) + { - if (sreg == dreg) { + if (sreg == dreg) + { xXOR(ptr32[&psxRegs.GPR.r[dreg]], imm); } - else { + else + { xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]); xXOR(eax, imm); xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax); } } - else { - if( dreg != sreg ) { + else + { + if (dreg != sreg) + { xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]); xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx); } @@ -219,7 +247,8 @@ PSXRECOMPILE_CONSTCODE1(XORI); void rpsxLUI() { - if(!_Rt_) return; + if (!_Rt_) + return; _psxOnWriteReg(_Rt_); _psxDeleteReg(_Rt_, 0); PSX_SET_CONST(_Rt_); @@ -240,14 +269,21 @@ void rpsxADDU_constt(int info) void rpsxADDU_(int info) { - if (_Rs_ && _Rt_) { + if (_Rs_ && _Rt_) + { xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]); xADD(eax, ptr32[&psxRegs.GPR.r[_Rt_]]); - } else if (_Rs_) { + } + else if (_Rs_) + { xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]); - } else if (_Rt_) { + } + else if (_Rt_) + { xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]); - } else { + } + else + { xXOR(eax, eax); } xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax); @@ -275,13 +311,16 @@ void rpsxSUBU_constt(int info) { rpsxADDconst(_Rd_, _Rs_, -(int)g_psxConstRegs[_ void rpsxSUBU_(int info) { // Rd = Rs - Rt - if (!_Rd_) return; + if (!_Rd_) + return; - if( _Rd_ == _Rs_ ) { + if (_Rd_ == _Rs_) + { xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]); xSUB(ptr32[&psxRegs.GPR.r[_Rd_]], eax); } - else { + else + { xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]); xSUB(eax, ptr32[&psxRegs.GPR.r[_Rt_]]); xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax); @@ -294,33 +333,35 @@ void rpsxSUB() { rpsxSUBU(); } void rpsxLogicalOp(int info, int op) { - if( _Rd_ == _Rs_ || _Rd_ == _Rt_ ) { + if (_Rd_ == _Rs_ || _Rd_ == _Rt_) + { int vreg = _Rd_ == _Rs_ ? _Rt_ : _Rs_; xMOV(ecx, ptr32[&psxRegs.GPR.r[vreg]]); - switch(op) { + switch (op) { case 0: xAND(ptr32[&psxRegs.GPR.r[_Rd_]], ecx); break; - case 1: xOR(ptr32[&psxRegs.GPR.r[_Rd_]], ecx); break; + case 1: xOR (ptr32[&psxRegs.GPR.r[_Rd_]], ecx); break; case 2: xXOR(ptr32[&psxRegs.GPR.r[_Rd_]], ecx); break; - case 3: xOR(ptr32[&psxRegs.GPR.r[_Rd_]], ecx); break; + case 3: xOR (ptr32[&psxRegs.GPR.r[_Rd_]], ecx); break; default: pxAssert(0); } - if( op == 3 ) + if (op == 3) xNOT(ptr32[&psxRegs.GPR.r[_Rd_]]); } - else { + else + { xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]); - switch(op) { + switch (op) { case 0: xAND(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]); break; - case 1: xOR(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]); break; + case 1: xOR (ecx, ptr32[&psxRegs.GPR.r[_Rt_]]); break; case 2: xXOR(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]); break; - case 3: xOR(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]); break; + case 3: xOR (ecx, ptr32[&psxRegs.GPR.r[_Rt_]]); break; default: pxAssert(0); } - if( op == 3 ) + if (op == 3) xNOT(ecx); xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], ecx); } @@ -368,23 +409,29 @@ void rpsxNOR_const() void rpsxNORconst(int info, int dreg, int sreg, u32 imm) { - if( imm ) { - if( dreg == sreg ) { + if (imm) + { + if (dreg == sreg) + { xOR(ptr32[&psxRegs.GPR.r[dreg]], imm); xNOT(ptr32[&psxRegs.GPR.r[dreg]]); } - else { + else + { xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]); xOR(ecx, imm); xNOT(ecx); xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx); } } - else { - if( dreg == sreg ) { + else + { + if (dreg == sreg) + { xNOT(ptr32[&psxRegs.GPR.r[dreg]]); } - else { + else + { xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]); xNOT(ecx); xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx); @@ -442,7 +489,8 @@ void rpsxSLTU_constt(int info) { rpsxSLTUconst(info, _Rd_, _Rs_, g_psxConstRegs[ void rpsxSLTU_(int info) { // Rd = Rs < Rt (unsigned) - if (!_Rd_) return; + if (!_Rd_) + return; xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]); xCMP(eax, ptr32[&psxRegs.GPR.r[_Rt_]]); @@ -466,8 +514,10 @@ void rpsxMULTsuperconst(int info, int sreg, int imm, int sign) { // Lo/Hi = Rs * Rt (signed) xMOV(eax, imm); - if( sign ) xMUL(ptr32[&psxRegs.GPR.r[sreg]]); - else xUMUL(ptr32[&psxRegs.GPR.r[sreg]]); + if (sign) + xMUL(ptr32[&psxRegs.GPR.r[sreg]]); + else + xUMUL(ptr32[&psxRegs.GPR.r[sreg]]); xMOV(ptr32[&psxRegs.GPR.n.lo], eax); xMOV(ptr32[&psxRegs.GPR.n.hi], edx); } @@ -476,8 +526,10 @@ void rpsxMULTsuper(int info, int sign) { // Lo/Hi = Rs * Rt (signed) xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]); - if( sign ) xMUL(ptr32[&psxRegs.GPR.r[_Rt_]]); - else xUMUL(ptr32[&psxRegs.GPR.r[_Rt_]]); + if (sign) + xMUL(ptr32[&psxRegs.GPR.r[_Rt_]]); + else + xUMUL(ptr32[&psxRegs.GPR.r[_Rt_]]); xMOV(ptr32[&psxRegs.GPR.n.lo], eax); xMOV(ptr32[&psxRegs.GPR.n.hi], edx); } @@ -515,22 +567,29 @@ void rpsxDIV_const() * Quotient: 0x80000000 (-2147483648), and remainder: 0x00000000 (0) */ // Of course x86 cpu does overflow ! - if (g_psxConstRegs[_Rs_] == 0x80000000u && g_psxConstRegs[_Rt_] == 0xFFFFFFFFu) { + if (g_psxConstRegs[_Rs_] == 0x80000000u && g_psxConstRegs[_Rt_] == 0xFFFFFFFFu) + { xMOV(ptr32[&psxRegs.GPR.n.hi], 0); xMOV(ptr32[&psxRegs.GPR.n.lo], 0x80000000); return; } - if (g_psxConstRegs[_Rt_] != 0) { + if (g_psxConstRegs[_Rt_] != 0) + { lo = *(int*)&g_psxConstRegs[_Rs_] / *(int*)&g_psxConstRegs[_Rt_]; hi = *(int*)&g_psxConstRegs[_Rs_] % *(int*)&g_psxConstRegs[_Rt_]; xMOV(ptr32[&psxRegs.GPR.n.hi], hi); xMOV(ptr32[&psxRegs.GPR.n.lo], lo); - } else { + } + else + { xMOV(ptr32[&psxRegs.GPR.n.hi], g_psxConstRegs[_Rs_]); - if (g_psxConstRegs[_Rs_] & 0x80000000u) { + if (g_psxConstRegs[_Rs_] & 0x80000000u) + { xMOV(ptr32[&psxRegs.GPR.n.lo], 0x1); - } else { + } + else + { xMOV(ptr32[&psxRegs.GPR.n.lo], 0xFFFFFFFFu); } } @@ -539,23 +598,23 @@ void rpsxDIV_const() void rpsxDIVsuper(int info, int sign, int process = 0) { // Lo/Hi = Rs / Rt (signed) - if( process & PROCESS_CONSTT ) + if (process & PROCESS_CONSTT) xMOV(ecx, g_psxConstRegs[_Rt_]); else xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]); - if( process & PROCESS_CONSTS ) + if (process & PROCESS_CONSTS) xMOV(eax, g_psxConstRegs[_Rs_]); else xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]); - u8 *end1; - if (sign) //test for overflow (x86 will just throw an exception) + u8* end1; + if (sign) //test for overflow (x86 will just throw an exception) { - xCMP(eax, 0x80000000 ); - u8 *cont1 = JNE8(0); - xCMP(ecx, 0xffffffff ); - u8 *cont2 = JNE8(0); + xCMP(eax, 0x80000000); + u8* cont1 = JNE8(0); + xCMP(ecx, 0xffffffff); + u8* cont2 = JNE8(0); //overflow case: xXOR(edx, edx); //EAX remains 0x80000000 end1 = JMP8(0); @@ -564,34 +623,37 @@ void rpsxDIVsuper(int info, int sign, int process = 0) x86SetJ8(cont2); } - xCMP(ecx, 0 ); - u8 *cont3 = JNE8(0); + xCMP(ecx, 0); + u8* cont3 = JNE8(0); //divide by zero xMOV(edx, eax); if (sign) //set EAX to (EAX < 0)?1:-1 { - xSAR(eax, 31 ); //(EAX < 0)?-1:0 - xSHL(eax, 1 ); //(EAX < 0)?-2:0 + xSAR(eax, 31); //(EAX < 0)?-1:0 + xSHL(eax, 1); //(EAX < 0)?-2:0 xNOT(eax); //(EAX < 0)?1:-1 } else - xMOV(eax, 0xffffffff ); - u8 *end2 = JMP8(0); + xMOV(eax, 0xffffffff); + u8* end2 = JMP8(0); // Normal division x86SetJ8(cont3); - if( sign ) { + if (sign) + { xCDQ(); xDIV(ecx); } - else { + else + { xXOR(edx, edx); xUDIV(ecx); } - if (sign) x86SetJ8( end1 ); - x86SetJ8( end2 ); + if (sign) + x86SetJ8(end1); + x86SetJ8(end2); xMOV(ptr32[&psxRegs.GPR.n.lo], eax); xMOV(ptr32[&psxRegs.GPR.n.hi], edx); @@ -608,12 +670,15 @@ void rpsxDIVU_const() { u32 lo, hi; - if (g_psxConstRegs[_Rt_] != 0) { + if (g_psxConstRegs[_Rt_] != 0) + { lo = g_psxConstRegs[_Rs_] / g_psxConstRegs[_Rt_]; hi = g_psxConstRegs[_Rs_] % g_psxConstRegs[_Rt_]; xMOV(ptr32[&psxRegs.GPR.n.hi], hi); xMOV(ptr32[&psxRegs.GPR.n.lo], lo); - } else { + } + else + { xMOV(ptr32[&psxRegs.GPR.n.hi], g_psxConstRegs[_Rs_]); xMOV(ptr32[&psxRegs.GPR.n.lo], 0xFFFFFFFFu); } @@ -640,9 +705,11 @@ static void rpsxLB() _psxDeleteReg(_Rt_, 0); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]); - if (_Imm_) xADD(ecx, _Imm_); - xFastCall((void*)iopMemRead8, ecx ); // returns value in EAX - if (_Rt_) { + if (_Imm_) + xADD(ecx, _Imm_); + xFastCall((void*)iopMemRead8, ecx); // returns value in EAX + if (_Rt_) + { xMOVSX(eax, al); xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax); } @@ -656,9 +723,11 @@ static void rpsxLBU() _psxDeleteReg(_Rt_, 0); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]); - if (_Imm_) xADD(ecx, _Imm_); - xFastCall((void*)iopMemRead8, ecx ); // returns value in EAX - if (_Rt_) { + if (_Imm_) + xADD(ecx, _Imm_); + xFastCall((void*)iopMemRead8, ecx); // returns value in EAX + if (_Rt_) + { xMOVZX(eax, al); xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax); } @@ -672,9 +741,11 @@ static void rpsxLH() _psxDeleteReg(_Rt_, 0); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]); - if (_Imm_) xADD(ecx, _Imm_); - xFastCall((void*)iopMemRead16, ecx ); // returns value in EAX - if (_Rt_) { + if (_Imm_) + xADD(ecx, _Imm_); + xFastCall((void*)iopMemRead16, ecx); // returns value in EAX + if (_Rt_) + { xMOVSX(eax, ax); xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax); } @@ -688,9 +759,11 @@ static void rpsxLHU() _psxDeleteReg(_Rt_, 0); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]); - if (_Imm_) xADD(ecx, _Imm_); - xFastCall((void*)iopMemRead16, ecx ); // returns value in EAX - if (_Rt_) { + if (_Imm_) + xADD(ecx, _Imm_); + xFastCall((void*)iopMemRead16, ecx); // returns value in EAX + if (_Rt_) + { xMOVZX(eax, ax); xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax); } @@ -705,13 +778,15 @@ static void rpsxLW() _psxFlushCall(FLUSH_EVERYTHING); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]); - if (_Imm_) xADD(ecx, _Imm_); + if (_Imm_) + xADD(ecx, _Imm_); xTEST(ecx, 0x10000000); j8Ptr[0] = JZ8(0); - xFastCall((void*)iopMemRead32, ecx ); // returns value in EAX - if (_Rt_) { + xFastCall((void*)iopMemRead32, ecx); // returns value in EAX + if (_Rt_) + { xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax); } j8Ptr[1] = JMP8(0); @@ -721,7 +796,8 @@ static void rpsxLW() xAND(ecx, 0x1fffff); xMOV(ecx, ptr32[xComplexAddress(rax, iopMem->Main, rcx)]); - if (_Rt_) { + if (_Rt_) + { xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], ecx); } @@ -735,9 +811,10 @@ static void rpsxSB() _psxDeleteReg(_Rt_, 1); xMOV(arg1regd, ptr32[&psxRegs.GPR.r[_Rs_]]); - if (_Imm_) xADD(arg1regd, _Imm_); - xMOV( arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]] ); - xFastCall((void*)iopMemWrite8, arg1regd, arg2regd ); + if (_Imm_) + xADD(arg1regd, _Imm_); + xMOV(arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]]); + xFastCall((void*)iopMemWrite8, arg1regd, arg2regd); } static void rpsxSH() @@ -746,9 +823,10 @@ static void rpsxSH() _psxDeleteReg(_Rt_, 1); xMOV(arg1regd, ptr32[&psxRegs.GPR.r[_Rs_]]); - if (_Imm_) xADD(arg1regd, _Imm_); - xMOV( arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]] ); - xFastCall((void*)iopMemWrite16, arg1regd, arg2regd ); + if (_Imm_) + xADD(arg1regd, _Imm_); + xMOV(arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]]); + xFastCall((void*)iopMemWrite16, arg1regd, arg2regd); } static void rpsxSW() @@ -757,9 +835,10 @@ static void rpsxSW() _psxDeleteReg(_Rt_, 1); xMOV(arg1regd, ptr32[&psxRegs.GPR.r[_Rs_]]); - if (_Imm_) xADD(arg1regd, _Imm_); - xMOV( arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]] ); - xFastCall((void*)iopMemWrite32, arg1regd, arg2regd ); + if (_Imm_) + xADD(arg1regd, _Imm_); + xMOV(arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]]); + xFastCall((void*)iopMemWrite32, arg1regd, arg2regd); } //// SLL @@ -772,17 +851,22 @@ void rpsxSLL_const() void rpsxShiftConst(int info, int rdreg, int rtreg, int imm, int shifttype) { imm &= 0x1f; - if (imm) { - if( rdreg == rtreg ) { - switch(shifttype) { + if (imm) + { + if (rdreg == rtreg) + { + switch (shifttype) + { case 0: xSHL(ptr32[&psxRegs.GPR.r[rdreg]], imm); break; case 1: xSHR(ptr32[&psxRegs.GPR.r[rdreg]], imm); break; case 2: xSAR(ptr32[&psxRegs.GPR.r[rdreg]], imm); break; } } - else { + else + { xMOV(eax, ptr32[&psxRegs.GPR.r[rtreg]]); - switch(shifttype) { + switch (shifttype) + { case 0: xSHL(eax, imm); break; case 1: xSHR(eax, imm); break; case 2: xSAR(eax, imm); break; @@ -790,8 +874,10 @@ void rpsxShiftConst(int info, int rdreg, int rtreg, int imm, int shifttype) xMOV(ptr32[&psxRegs.GPR.r[rdreg]], eax); } } - else { - if( rdreg != rtreg ) { + else + { + if (rdreg != rtreg) + { xMOV(eax, ptr32[&psxRegs.GPR.r[rtreg]]); xMOV(ptr32[&psxRegs.GPR.r[rdreg]], eax); } @@ -822,7 +908,7 @@ PSXRECOMPILE_CONSTCODE2(SRA); //// SLLV void rpsxSLLV_const() { - g_psxConstRegs[_Rd_] = g_psxConstRegs[_Rt_] << (g_psxConstRegs[_Rs_]&0x1f); + g_psxConstRegs[_Rd_] = g_psxConstRegs[_Rt_] << (g_psxConstRegs[_Rs_] & 0x1f); } void rpsxShiftVconsts(int info, int shifttype) @@ -834,7 +920,8 @@ void rpsxShiftVconstt(int info, int shifttype) { xMOV(eax, g_psxConstRegs[_Rt_]); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]); - switch(shifttype) { + switch (shifttype) + { case 0: xSHL(eax, cl); break; case 1: xSHR(eax, cl); break; case 2: xSAR(eax, cl); break; @@ -857,7 +944,7 @@ PSXRECOMPILE_CONSTCODE0(SLLV); //// SRLV void rpsxSRLV_const() { - g_psxConstRegs[_Rd_] = g_psxConstRegs[_Rt_] >> (g_psxConstRegs[_Rs_]&0x1f); + g_psxConstRegs[_Rd_] = g_psxConstRegs[_Rt_] >> (g_psxConstRegs[_Rs_] & 0x1f); } void rpsxSRLV_consts(int info) { rpsxShiftVconsts(info, 1); } @@ -875,7 +962,7 @@ PSXRECOMPILE_CONSTCODE0(SRLV); //// SRAV void rpsxSRAV_const() { - g_psxConstRegs[_Rd_] = *(int*)&g_psxConstRegs[_Rt_] >> (g_psxConstRegs[_Rs_]&0x1f); + g_psxConstRegs[_Rd_] = *(int*)&g_psxConstRegs[_Rt_] >> (g_psxConstRegs[_Rs_] & 0x1f); } void rpsxSRAV_consts(int info) { rpsxShiftVconsts(info, 2); } @@ -895,7 +982,8 @@ extern void rpsxBREAK(); void rpsxMFHI() { - if (!_Rd_) return; + if (!_Rd_) + return; _psxOnWriteReg(_Rd_); _psxDeleteReg(_Rd_, 0); @@ -905,10 +993,12 @@ void rpsxMFHI() void rpsxMTHI() { - if( PSX_IS_CONST1(_Rs_) ) { + if (PSX_IS_CONST1(_Rs_)) + { xMOV(ptr32[&psxRegs.GPR.n.hi], g_psxConstRegs[_Rs_]); } - else { + else + { _psxDeleteReg(_Rs_, 1); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(ptr32[&psxRegs.GPR.n.hi], eax); @@ -917,7 +1007,8 @@ void rpsxMTHI() void rpsxMFLO() { - if (!_Rd_) return; + if (!_Rd_) + return; _psxOnWriteReg(_Rd_); _psxDeleteReg(_Rd_, 0); @@ -927,10 +1018,12 @@ void rpsxMFLO() void rpsxMTLO() { - if( PSX_IS_CONST1(_Rs_) ) { + if (PSX_IS_CONST1(_Rs_)) + { xMOV(ptr32[&psxRegs.GPR.n.lo], g_psxConstRegs[_Rs_]); } - else { + else + { _psxDeleteReg(_Rs_, 1); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]); xMOV(ptr32[&psxRegs.GPR.n.lo], eax); @@ -947,7 +1040,7 @@ void rpsxJ() void rpsxJAL() { - u32 newpc = (_InstrucTarget_ << 2) + ( psxpc & 0xf0000000 ); + u32 newpc = (_InstrucTarget_ << 2) + (psxpc & 0xf0000000); _psxDeleteReg(31, 0); PSX_SET_CONST(31); g_psxConstRegs[31] = psxpc + 4; @@ -967,7 +1060,7 @@ void rpsxJALR() _allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE); _psxMoveGPRtoR(calleeSavedReg2d, _Rs_); - if ( _Rd_ ) + if (_Rd_) { _psxDeleteReg(_Rd_, 0); PSX_SET_CONST(_Rd_); @@ -976,27 +1069,28 @@ void rpsxJALR() psxRecompileNextInstruction(1); - if( x86regs[calleeSavedReg2d.GetId()].inuse ) { - pxAssert( x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK ); + if (x86regs[calleeSavedReg2d.GetId()].inuse) + { + pxAssert(x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK); xMOV(ptr32[&psxRegs.pc], calleeSavedReg2d); x86regs[calleeSavedReg2d.GetId()].inuse = 0; - #ifdef PCSX2_DEBUG - xOR( calleeSavedReg2d, calleeSavedReg2d ); - #endif - +#ifdef PCSX2_DEBUG + xOR(calleeSavedReg2d, calleeSavedReg2d); +#endif } - else { + else + { xMOV(eax, ptr32[&g_recWriteback]); xMOV(ptr32[&psxRegs.pc], eax); - #ifdef PCSX2_DEBUG - xOR( eax, eax ); - #endif +#ifdef PCSX2_DEBUG + xOR(eax, eax); +#endif } - #ifdef PCSX2_DEBUG +#ifdef PCSX2_DEBUG xForwardJNZ8 skipAssert; - xWrite8( 0xcc ); + xWrite8(0xcc); skipAssert.SetTarget(); - #endif +#endif psxSetBranchReg(0xffffffff); } @@ -1006,18 +1100,21 @@ static u32* s_pbranchjmp; void rpsxSetBranchEQ(int info, int process) { - if( process & PROCESS_CONSTS ) { - xCMP(ptr32[&psxRegs.GPR.r[ _Rt_ ]], g_psxConstRegs[_Rs_] ); - s_pbranchjmp = JNE32( 0 ); + if (process & PROCESS_CONSTS) + { + xCMP(ptr32[&psxRegs.GPR.r[_Rt_]], g_psxConstRegs[_Rs_]); + s_pbranchjmp = JNE32(0); } - else if( process & PROCESS_CONSTT ) { - xCMP(ptr32[&psxRegs.GPR.r[ _Rs_ ]], g_psxConstRegs[_Rt_] ); - s_pbranchjmp = JNE32( 0 ); + else if (process & PROCESS_CONSTT) + { + xCMP(ptr32[&psxRegs.GPR.r[_Rs_]], g_psxConstRegs[_Rt_]); + s_pbranchjmp = JNE32(0); } - else { - xMOV(eax, ptr32[&psxRegs.GPR.r[ _Rs_ ] ]); - xCMP(eax, ptr32[&psxRegs.GPR.r[ _Rt_ ] ]); - s_pbranchjmp = JNE32( 0 ); + else + { + xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]); + xCMP(eax, ptr32[&psxRegs.GPR.r[_Rt_]]); + s_pbranchjmp = JNE32(0); } } @@ -1025,23 +1122,23 @@ void rpsxBEQ_const() { u32 branchTo; - if( g_psxConstRegs[_Rs_] == g_psxConstRegs[_Rt_] ) + if (g_psxConstRegs[_Rs_] == g_psxConstRegs[_Rt_]) branchTo = ((s32)_Imm_ * 4) + psxpc; else - branchTo = psxpc+4; + branchTo = psxpc + 4; psxRecompileNextInstruction(1); - psxSetBranchImm( branchTo ); + psxSetBranchImm(branchTo); } void rpsxBEQ_process(int info, int process) { u32 branchTo = ((s32)_Imm_ * 4) + psxpc; - if ( _Rs_ == _Rt_ ) + if (_Rs_ == _Rt_) { psxRecompileNextInstruction(1); - psxSetBranchImm( branchTo ); + psxSetBranchImm(branchTo); } else { @@ -1053,7 +1150,7 @@ void rpsxBEQ_process(int info, int process) psxRecompileNextInstruction(1); psxSetBranchImm(branchTo); - x86SetJ32A( s_pbranchjmp ); + x86SetJ32A(s_pbranchjmp); // recopy the next inst psxpc -= 4; @@ -1074,20 +1171,20 @@ void rpsxBNE_const() { u32 branchTo; - if( g_psxConstRegs[_Rs_] != g_psxConstRegs[_Rt_] ) + if (g_psxConstRegs[_Rs_] != g_psxConstRegs[_Rt_]) branchTo = ((s32)_Imm_ * 4) + psxpc; else - branchTo = psxpc+4; + branchTo = psxpc + 4; psxRecompileNextInstruction(1); - psxSetBranchImm( branchTo ); + psxSetBranchImm(branchTo); } void rpsxBNE_process(int info, int process) { u32 branchTo = ((s32)_Imm_ * 4) + psxpc; - if ( _Rs_ == _Rt_ ) + if (_Rs_ == _Rt_) { psxRecompileNextInstruction(1); psxSetBranchImm(psxpc); @@ -1101,7 +1198,7 @@ void rpsxBNE_process(int info, int process) psxRecompileNextInstruction(1); psxSetBranchImm(psxpc); - x86SetJ32A( s_pbranchjmp ); + x86SetJ32A(s_pbranchjmp); // recopy the next inst psxpc -= 4; @@ -1124,12 +1221,13 @@ void rpsxBLTZ() _psxFlushAllUnused(); - if( PSX_IS_CONST1(_Rs_) ) { - if( (int)g_psxConstRegs[_Rs_] >= 0 ) - branchTo = psxpc+4; + if (PSX_IS_CONST1(_Rs_)) + { + if ((int)g_psxConstRegs[_Rs_] >= 0) + branchTo = psxpc + 4; psxRecompileNextInstruction(1); - psxSetBranchImm( branchTo ); + psxSetBranchImm(branchTo); return; } @@ -1141,7 +1239,7 @@ void rpsxBLTZ() psxSetBranchImm(psxpc); - x86SetJ32A( pjmp ); + x86SetJ32A(pjmp); // recopy the next inst psxpc -= 4; @@ -1158,12 +1256,13 @@ void rpsxBGEZ() _psxFlushAllUnused(); - if( PSX_IS_CONST1(_Rs_) ) { - if ( (int)g_psxConstRegs[_Rs_] < 0 ) - branchTo = psxpc+4; + if (PSX_IS_CONST1(_Rs_)) + { + if ((int)g_psxConstRegs[_Rs_] < 0) + branchTo = psxpc + 4; psxRecompileNextInstruction(1); - psxSetBranchImm( branchTo ); + psxSetBranchImm(branchTo); return; } @@ -1175,7 +1274,7 @@ void rpsxBGEZ() psxSetBranchImm(psxpc); - x86SetJ32A( pjmp ); + x86SetJ32A(pjmp); // recopy the next inst psxpc -= 4; @@ -1198,12 +1297,13 @@ void rpsxBLTZAL() PSX_SET_CONST(31); g_psxConstRegs[31] = psxpc + 4; - if( PSX_IS_CONST1(_Rs_) ) { - if( (int)g_psxConstRegs[_Rs_] >= 0 ) - branchTo = psxpc+4; + if (PSX_IS_CONST1(_Rs_)) + { + if ((int)g_psxConstRegs[_Rs_] >= 0) + branchTo = psxpc + 4; psxRecompileNextInstruction(1); - psxSetBranchImm( branchTo ); + psxSetBranchImm(branchTo); return; } @@ -1216,7 +1316,7 @@ void rpsxBLTZAL() psxSetBranchImm(psxpc); - x86SetJ32A( pjmp ); + x86SetJ32A(pjmp); // recopy the next inst psxpc -= 4; @@ -1238,12 +1338,13 @@ void rpsxBGEZAL() PSX_SET_CONST(31); g_psxConstRegs[31] = psxpc + 4; - if( PSX_IS_CONST1(_Rs_) ) { - if( (int)g_psxConstRegs[_Rs_] < 0 ) - branchTo = psxpc+4; + if (PSX_IS_CONST1(_Rs_)) + { + if ((int)g_psxConstRegs[_Rs_] < 0) + branchTo = psxpc + 4; psxRecompileNextInstruction(1); - psxSetBranchImm( branchTo ); + psxSetBranchImm(branchTo); return; } @@ -1255,7 +1356,7 @@ void rpsxBGEZAL() psxSetBranchImm(psxpc); - x86SetJ32A( pjmp ); + x86SetJ32A(pjmp); // recopy the next inst psxpc -= 4; @@ -1273,12 +1374,13 @@ void rpsxBLEZ() _psxFlushAllUnused(); - if( PSX_IS_CONST1(_Rs_) ) { - if( (int)g_psxConstRegs[_Rs_] > 0 ) - branchTo = psxpc+4; + if (PSX_IS_CONST1(_Rs_)) + { + if ((int)g_psxConstRegs[_Rs_] > 0) + branchTo = psxpc + 4; psxRecompileNextInstruction(1); - psxSetBranchImm( branchTo ); + psxSetBranchImm(branchTo); return; } @@ -1292,7 +1394,7 @@ void rpsxBLEZ() psxRecompileNextInstruction(1); psxSetBranchImm(psxpc); - x86SetJ32A( pjmp ); + x86SetJ32A(pjmp); psxpc -= 4; psxLoadBranchState(); @@ -1308,12 +1410,13 @@ void rpsxBGTZ() _psxFlushAllUnused(); - if( PSX_IS_CONST1(_Rs_) ) { - if( (int)g_psxConstRegs[_Rs_] <= 0 ) - branchTo = psxpc+4; + if (PSX_IS_CONST1(_Rs_)) + { + if ((int)g_psxConstRegs[_Rs_] <= 0) + branchTo = psxpc + 4; psxRecompileNextInstruction(1); - psxSetBranchImm( branchTo ); + psxSetBranchImm(branchTo); return; } @@ -1327,7 +1430,7 @@ void rpsxBGTZ() psxRecompileNextInstruction(1); psxSetBranchImm(psxpc); - x86SetJ32A( pjmp ); + x86SetJ32A(pjmp); psxpc -= 4; psxLoadBranchState(); @@ -1338,7 +1441,8 @@ void rpsxBGTZ() void rpsxMFC0() { // Rt = Cop0->Rd - if (!_Rt_) return; + if (!_Rt_) + return; _psxOnWriteReg(_Rt_); xMOV(eax, ptr32[&psxRegs.CP0.r[_Rd_]]); @@ -1348,7 +1452,8 @@ void rpsxMFC0() void rpsxCFC0() { // Rt = Cop0->Rd - if (!_Rt_) return; + if (!_Rt_) + return; _psxOnWriteReg(_Rt_); xMOV(eax, ptr32[&psxRegs.CP0.r[_Rd_]]); @@ -1358,10 +1463,12 @@ void rpsxCFC0() void rpsxMTC0() { // Cop0->Rd = Rt - if( PSX_IS_CONST1(_Rt_) ) { + if (PSX_IS_CONST1(_Rt_)) + { xMOV(ptr32[&psxRegs.CP0.r[_Rd_]], g_psxConstRegs[_Rt_]); } - else { + else + { _psxDeleteReg(_Rt_, 1); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]); xMOV(ptr32[&psxRegs.CP0.r[_Rd_]], eax); @@ -1387,7 +1494,7 @@ void rpsxRFE() // Test the IOP's INTC status, so that any pending ints get raised. _psxFlushCall(0); - xFastCall((void*)(uptr)&iopTestIntc ); + xFastCall((void*)(uptr)&iopTestIntc); } //// COP2 @@ -1437,7 +1544,8 @@ static void rpsxCOP0() { rpsxCP0[_Rs_](); } static void rpsxCOP2() { rpsxCP2[_Funct_](); } static void rpsxBASIC() { rpsxCP2BSC[_Rs_](); } -static void rpsxNULL() { +static void rpsxNULL() +{ Console.WriteLn("psxUNK: %8.8x", psxRegs.code); } @@ -1449,7 +1557,7 @@ void (*rpsxBSC[64])() = { rpsxLB , rpsxLH , rpsxLWL , rpsxLW , rpsxLBU , rpsxLHU , rpsxLWR , rpsxNULL, rpsxSB , rpsxSH , rpsxSWL , rpsxSW , rpsxNULL, rpsxNULL, rpsxSWR , rpsxNULL, rpsxNULL , rpsxNULL , rgteLWC2, rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, - rpsxNULL , rpsxNULL , rgteSWC2, rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL + rpsxNULL , rpsxNULL , rgteSWC2, rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, }; void (*rpsxSPC[64])() = { @@ -1460,21 +1568,21 @@ void (*rpsxSPC[64])() = { rpsxADD , rpsxADDU, rpsxSUB , rpsxSUBU, rpsxAND , rpsxOR , rpsxXOR , rpsxNOR , rpsxNULL, rpsxNULL, rpsxSLT , rpsxSLTU, rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, - rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL + rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, }; void (*rpsxREG[32])() = { rpsxBLTZ , rpsxBGEZ , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxBLTZAL, rpsxBGEZAL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, - rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL + rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, }; void (*rpsxCP0[32])() = { rpsxMFC0, rpsxNULL, rpsxCFC0, rpsxNULL, rpsxMTC0, rpsxNULL, rpsxCTC0, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxRFE , rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, - rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL + rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, }; void (*rpsxCP2[64])() = { @@ -1485,35 +1593,37 @@ void (*rpsxCP2[64])() = { rgteNCT , rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL , rpsxNULL , rpsxNULL, // 20 rgteSQR , rgteDCPL , rgteDPCT , rpsxNULL, rpsxNULL, rgteAVSZ3, rgteAVSZ4, rpsxNULL, // 28 rgteRTPT , rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rpsxNULL , rpsxNULL , rpsxNULL, // 30 - rpsxNULL , rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rgteGPF , rgteGPL , rgteNCCT // 38 + rpsxNULL , rpsxNULL , rpsxNULL , rpsxNULL, rpsxNULL, rgteGPF , rgteGPL , rgteNCCT, // 38 }; -void(*rpsxCP2BSC[32])() = { +void (*rpsxCP2BSC[32])() = { rgteMFC2, rpsxNULL, rgteCFC2, rpsxNULL, rgteMTC2, rpsxNULL, rgteCTC2, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, - rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL + rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, rpsxNULL, }; //////////////////////////////////////////////// // Back-Prob Function Tables - Gathering Info // //////////////////////////////////////////////// -#define rpsxpropSetRead(reg) { \ - if( !(pinst->regs[reg] & EEINST_USED) ) \ - pinst->regs[reg] |= EEINST_LASTUSE; \ - prev->regs[reg] |= EEINST_LIVE0|EEINST_USED; \ - pinst->regs[reg] |= EEINST_USED; \ - _recFillRegister(*pinst, XMMTYPE_GPRREG, reg, 0); \ -} \ +#define rpsxpropSetRead(reg) \ + { \ + if (!(pinst->regs[reg] & EEINST_USED)) \ + pinst->regs[reg] |= EEINST_LASTUSE; \ + prev->regs[reg] |= EEINST_LIVE0 | EEINST_USED; \ + pinst->regs[reg] |= EEINST_USED; \ + _recFillRegister(*pinst, XMMTYPE_GPRREG, reg, 0); \ + } -#define rpsxpropSetWrite(reg) { \ - prev->regs[reg] &= ~EEINST_LIVE0; \ - if( !(pinst->regs[reg] & EEINST_USED) ) \ - pinst->regs[reg] |= EEINST_LASTUSE; \ - pinst->regs[reg] |= EEINST_USED; \ - prev->regs[reg] |= EEINST_USED; \ - _recFillRegister(*pinst, XMMTYPE_GPRREG, reg, 1); \ -} +#define rpsxpropSetWrite(reg) \ + { \ + prev->regs[reg] &= ~EEINST_LIVE0; \ + if (!(pinst->regs[reg] & EEINST_USED)) \ + pinst->regs[reg] |= EEINST_LASTUSE; \ + pinst->regs[reg] |= EEINST_USED; \ + prev->regs[reg] |= EEINST_USED; \ + _recFillRegister(*pinst, XMMTYPE_GPRREG, reg, 1); \ + } void rpsxpropBSC(EEINST* prev, EEINST* pinst); void rpsxpropSPECIAL(EEINST* prev, EEINST* pinst); @@ -1531,9 +1641,14 @@ void rpsxpropCP2(EEINST* prev, EEINST* pinst); //NULL , NULL , NULL, NULL , NULL, NULL, NULL, NULL void rpsxpropBSC(EEINST* prev, EEINST* pinst) { - switch(psxRegs.code >> 26) { - case 0: rpsxpropSPECIAL(prev, pinst); break; - case 1: rpsxpropREGIMM(prev, pinst); break; + switch (psxRegs.code >> 26) + { + case 0: + rpsxpropSPECIAL(prev, pinst); + break; + case 1: + rpsxpropREGIMM(prev, pinst); + break; case 2: // j break; case 3: // jal @@ -1554,8 +1669,12 @@ void rpsxpropBSC(EEINST* prev, EEINST* pinst) rpsxpropSetWrite(_Rt_); break; - case 16: rpsxpropCP0(prev, pinst); break; - case 18: rpsxpropCP2(prev, pinst); break; + case 16: + rpsxpropCP0(prev, pinst); + break; + case 18: + rpsxpropCP2(prev, pinst); + break; // stores case 40: case 41: case 42: case 43: case 46: @@ -1582,10 +1701,11 @@ void rpsxpropBSC(EEINST* prev, EEINST* pinst) //ADD , ADDU, SUB , SUBU, AND , OR , XOR , NOR , //NULL, NULL, SLT , SLTU, NULL , NULL , NULL, NULL, //NULL, NULL, NULL, NULL, NULL , NULL , NULL, NULL, -//NULL, NULL, NULL, NULL, NULL , NULL , NULL, NULL +//NULL, NULL, NULL, NULL, NULL , NULL , NULL, NULL, void rpsxpropSPECIAL(EEINST* prev, EEINST* pinst) { - switch(_Funct_) { + switch (_Funct_) + { case 0: // SLL case 2: // SRL case 3: // SRA @@ -1641,8 +1761,10 @@ void rpsxpropSPECIAL(EEINST* prev, EEINST* pinst) case 34: // sub case 35: // subu rpsxpropSetWrite(_Rd_); - if( _Rs_ ) rpsxpropSetRead(_Rs_); - if( _Rt_ ) rpsxpropSetRead(_Rt_); + if (_Rs_) + rpsxpropSetRead(_Rs_); + if (_Rt_) + rpsxpropSetRead(_Rt_); break; default: @@ -1659,7 +1781,8 @@ void rpsxpropSPECIAL(EEINST* prev, EEINST* pinst) //NULL , NULL , NULL, NULL, NULL, NULL, NULL, NULL void rpsxpropREGIMM(EEINST* prev, EEINST* pinst) { - switch(_Rt_) { + switch (_Rt_) + { case 0: // bltz case 1: // bgez rpsxpropSetRead(_Rs_); @@ -1678,10 +1801,11 @@ void rpsxpropREGIMM(EEINST* prev, EEINST* pinst) //MFC0, NULL, CFC0, NULL, MTC0, NULL, CTC0, NULL, //NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, //RFE , NULL, NULL, NULL, NULL, NULL, NULL, NULL, -//NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL +//NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, void rpsxpropCP0(EEINST* prev, EEINST* pinst) { - switch(_Rs_) { + switch (_Rs_) + { case 0: // mfc0 case 2: // cfc0 rpsxpropSetWrite(_Rt_); @@ -1703,10 +1827,11 @@ void rpsxpropCP0(EEINST* prev, EEINST* pinst) // gteMFC2, psxNULL, gteCFC2, psxNULL, gteMTC2, psxNULL, gteCTC2, psxNULL, // psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, // psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, -// psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL +// psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, void rpsxpropCP2_basic(EEINST* prev, EEINST* pinst) { - switch(_Rs_) { + switch (_Rs_) + { case 0: // mfc2 case 2: // cfc2 rpsxpropSetWrite(_Rt_); @@ -1718,7 +1843,7 @@ void rpsxpropCP2_basic(EEINST* prev, EEINST* pinst) break; default: - pxFailDev( "iop invalid opcode in const propagation (rpsxpropCP2/BASIC)" ); + pxFailDev("iop invalid opcode in const propagation (rpsxpropCP2/BASIC)"); break; } } @@ -1732,10 +1857,11 @@ void rpsxpropCP2_basic(EEINST* prev, EEINST* pinst) // gteNCT , psxNULL , psxNULL , psxNULL, psxNULL, psxNULL , psxNULL , psxNULL, // 20 // gteSQR , gteDCPL , gteDPCT , psxNULL, psxNULL, gteAVSZ3, gteAVSZ4, psxNULL, // 28 // gteRTPT , psxNULL , psxNULL , psxNULL, psxNULL, psxNULL , psxNULL , psxNULL, // 30 -// psxNULL , psxNULL , psxNULL , psxNULL, psxNULL, gteGPF , gteGPL , gteNCCT // 38 +// psxNULL , psxNULL , psxNULL , psxNULL, psxNULL, gteGPF , gteGPL , gteNCCT, // 38 void rpsxpropCP2(EEINST* prev, EEINST* pinst) { - switch(_Funct_) { + switch (_Funct_) + { case 0: // Basic opcode rpsxpropCP2_basic(prev, pinst); break; diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index 6ac6f7de8c..0c7d9a614e 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -23,44 +23,46 @@ #include "R5900_Profiler.h" extern u32 maxrecmem; -extern u32 pc; // recompiler pc -extern int g_branch; // set for branch -extern u32 target; // branch target -extern u32 s_nBlockCycles; // cycles of current block recompiling +extern u32 pc; // recompiler pc +extern int g_branch; // set for branch +extern u32 target; // branch target +extern u32 s_nBlockCycles; // cycles of current block recompiling ////////////////////////////////////////////////////////////////////////////////////////// // -#define REC_FUNC( f ) \ - void rec##f() \ - { \ - recCall(Interp::f); \ - } - -#define REC_FUNC_DEL( f, delreg ) \ +#define REC_FUNC(f) \ void rec##f() \ -{ \ - if( (delreg) > 0 ) _deleteEEreg(delreg, 1); \ - recCall(Interp::f); \ -} + { \ + recCall(Interp::f); \ + } -#define REC_SYS( f ) \ - void rec##f() \ - { \ - recBranchCall(Interp::f); \ - } +#define REC_FUNC_DEL(f, delreg) \ + void rec##f() \ + { \ + if ((delreg) > 0) \ + _deleteEEreg(delreg, 1); \ + recCall(Interp::f); \ + } -#define REC_SYS_DEL( f, delreg ) \ - void rec##f() \ - { \ - if( (delreg) > 0 ) _deleteEEreg(delreg, 1); \ - recBranchCall(Interp::f); \ - } +#define REC_SYS(f) \ + void rec##f() \ + { \ + recBranchCall(Interp::f); \ + } + +#define REC_SYS_DEL(f, delreg) \ + void rec##f() \ + { \ + if ((delreg) > 0) \ + _deleteEEreg(delreg, 1); \ + recBranchCall(Interp::f); \ + } // Used to clear recompiled code blocks during memory/dma write operations. u32 recClearMem(u32 pc); -u32 REC_CLEARM( u32 mem ); +u32 REC_CLEARM(u32 mem); extern bool g_recompilingDelaySlot; // used when processing branches @@ -68,35 +70,42 @@ void SaveBranchState(); void LoadBranchState(); void recompileNextInstruction(int delayslot); -void SetBranchReg( u32 reg ); -void SetBranchImm( u32 imm ); +void SetBranchReg(u32 reg); +void SetBranchImm(u32 imm); void iFlushCall(int flushtype); -void recBranchCall( void (*func)() ); -void recCall( void (*func)() ); +void recBranchCall(void (*func)()); +void recCall(void (*func)()); u32 scaleblockcycles_clear(); -namespace R5900{ -namespace Dynarec { -extern void recDoBranchImm( u32* jmpSkip, bool isLikely = false ); -extern void recDoBranchImm_Likely( u32* jmpSkip ); -} } +namespace R5900 +{ + namespace Dynarec + { + extern void recDoBranchImm(u32* jmpSkip, bool isLikely = false); + extern void recDoBranchImm_Likely(u32* jmpSkip); + } // namespace Dynarec +} // namespace R5900 //////////////////////////////////////////////////////////////////// // Constant Propagation - From here to the end of the header! -#define GPR_IS_CONST1(reg) (EE_CONST_PROP && (reg)<32 && (g_cpuHasConstReg&(1<<(reg)))) -#define GPR_IS_CONST2(reg1, reg2) (EE_CONST_PROP && (g_cpuHasConstReg&(1<<(reg1)))&&(g_cpuHasConstReg&(1<<(reg2)))) -#define GPR_SET_CONST(reg) { \ - if( (reg) < 32 ) { \ - g_cpuHasConstReg |= (1<<(reg)); \ - g_cpuFlushedConstReg &= ~(1<<(reg)); \ - } \ -} +#define GPR_IS_CONST1(reg) (EE_CONST_PROP && (reg) < 32 && (g_cpuHasConstReg & (1 << (reg)))) +#define GPR_IS_CONST2(reg1, reg2) (EE_CONST_PROP && (g_cpuHasConstReg & (1 << (reg1))) && (g_cpuHasConstReg & (1 << (reg2)))) +#define GPR_SET_CONST(reg) \ + { \ + if ((reg) < 32) \ + { \ + g_cpuHasConstReg |= (1 << (reg)); \ + g_cpuFlushedConstReg &= ~(1 << (reg)); \ + } \ + } -#define GPR_DEL_CONST(reg) { \ - if( (reg) < 32 ) g_cpuHasConstReg &= ~(1<<(reg)); \ -} +#define GPR_DEL_CONST(reg) \ + { \ + if ((reg) < 32) \ + g_cpuHasConstReg &= ~(1 << (reg)); \ + } extern __aligned16 GPR_reg64 g_cpuConstRegs[32]; extern u32 g_cpuHasConstReg, g_cpuFlushedConstReg; @@ -108,7 +117,7 @@ u32* _eeGetConstReg(int reg); void _eeMoveGPRtoR(const x86Emitter::xRegister32& to, int fromgpr); void _eeMoveGPRtoM(uptr to, int fromgpr); void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr); -void eeSignExtendTo(int gpr, bool onlyupper=false); +void eeSignExtendTo(int gpr, bool onlyupper = false); void _eeFlushAllUnused(); void _eeOnWriteReg(int reg, int signext); @@ -123,7 +132,7 @@ void _flushEEreg(int reg); // allocates memory on the instruction size and returns the pointer u32* recGetImm64(u32 hi, u32 lo); -void _vuRegsCOP22(VURegs * VU, _VURegsNum *VUregsn); +void _vuRegsCOP22(VURegs* VU, _VURegsNum* VUregsn); ////////////////////////////////////// // Templates for code recompilation // @@ -133,18 +142,18 @@ typedef void (*R5900FNPTR)(); typedef void (*R5900FNPTR_INFO)(int info); #define EERECOMPILE_CODE0(fn, xmminfo) \ -void rec##fn(void) \ -{ \ - EE::Profiler.EmitOp(eeOpcode::fn); \ - eeRecompileCode0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_, xmminfo); \ -} + void rec##fn(void) \ + { \ + EE::Profiler.EmitOp(eeOpcode::fn); \ + eeRecompileCode0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_, xmminfo); \ + } #define EERECOMPILE_CODEX(codename, fn) \ -void rec##fn(void) \ -{ \ - EE::Profiler.EmitOp(eeOpcode::fn); \ - codename(rec##fn##_const, rec##fn##_); \ -} + void rec##fn(void) \ + { \ + EE::Profiler.EmitOp(eeOpcode::fn); \ + codename(rec##fn##_const, rec##fn##_); \ + } // // MMX/XMM caching helpers @@ -164,31 +173,31 @@ void eeRecompileCode3(R5900FNPTR constcode, R5900FNPTR_INFO multicode); // // rd = rs op rt #define EERECOMPILE_CONSTCODE0(fn) \ -void rec##fn(void) \ -{ \ - eeRecompileCodeConst0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_); \ -} \ + void rec##fn(void) \ + { \ + eeRecompileCodeConst0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_); \ + } // rt = rs op imm16 #define EERECOMPILE_CONSTCODE1(fn) \ -void rec##fn(void) \ -{ \ - eeRecompileCodeConst1(rec##fn##_const, rec##fn##_); \ -} \ + void rec##fn(void) \ + { \ + eeRecompileCodeConst1(rec##fn##_const, rec##fn##_); \ + } // rd = rt op sa #define EERECOMPILE_CONSTCODE2(fn) \ -void rec##fn(void) \ -{ \ - eeRecompileCodeConst2(rec##fn##_const, rec##fn##_); \ -} \ + void rec##fn(void) \ + { \ + eeRecompileCodeConst2(rec##fn##_const, rec##fn##_); \ + } // rd = rt op rs #define EERECOMPILE_CONSTCODESPECIAL(fn, mult) \ -void rec##fn(void) \ -{ \ - eeRecompileCodeConstSPECIAL(rec##fn##_const, rec##fn##_, mult); \ -} \ + void rec##fn(void) \ + { \ + eeRecompileCodeConstSPECIAL(rec##fn##_const, rec##fn##_, mult); \ + } // rd = rs op rt void eeRecompileCodeConst0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode); @@ -200,26 +209,26 @@ void eeRecompileCodeConst2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode); void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode, int MULT); // XMM caching helpers -#define XMMINFO_READLO 0x01 -#define XMMINFO_READHI 0x02 -#define XMMINFO_WRITELO 0x04 -#define XMMINFO_WRITEHI 0x08 -#define XMMINFO_WRITED 0x10 -#define XMMINFO_READD 0x20 -#define XMMINFO_READS 0x40 -#define XMMINFO_READT 0x80 -#define XMMINFO_READD_LO 0x100 // if set and XMMINFO_READD is set, reads only low 64 bits of D -#define XMMINFO_READACC 0x200 -#define XMMINFO_WRITEACC 0x400 +#define XMMINFO_READLO 0x001 +#define XMMINFO_READHI 0x002 +#define XMMINFO_WRITELO 0x004 +#define XMMINFO_WRITEHI 0x008 +#define XMMINFO_WRITED 0x010 +#define XMMINFO_READD 0x020 +#define XMMINFO_READS 0x040 +#define XMMINFO_READT 0x080 +#define XMMINFO_READD_LO 0x100 // if set and XMMINFO_READD is set, reads only low 64 bits of D +#define XMMINFO_READACC 0x200 +#define XMMINFO_WRITEACC 0x400 #define FPURECOMPILE_CONSTCODE(fn, xmminfo) \ -void rec##fn(void) \ -{ \ - if (CHECK_FPU_FULL) \ - eeFPURecompileCode(DOUBLE::rec##fn##_xmm, R5900::Interpreter::OpcodeImpl::COP1::fn, xmminfo); \ - else \ - eeFPURecompileCode(rec##fn##_xmm, R5900::Interpreter::OpcodeImpl::COP1::fn, xmminfo); \ -} + void rec##fn(void) \ + { \ + if (CHECK_FPU_FULL) \ + eeFPURecompileCode(DOUBLE::rec##fn##_xmm, R5900::Interpreter::OpcodeImpl::COP1::fn, xmminfo); \ + else \ + eeFPURecompileCode(rec##fn##_xmm, R5900::Interpreter::OpcodeImpl::COP1::fn, xmminfo); \ + } // rd = rs op rt (all regs need to be in xmm) int eeRecompileCodeXMM(int xmminfo); diff --git a/pcsx2/x86/iR5900Arit.h b/pcsx2/x86/iR5900Arit.h index bf35054249..307bd51c67 100644 --- a/pcsx2/x86/iR5900Arit.h +++ b/pcsx2/x86/iR5900Arit.h @@ -23,8 +23,8 @@ namespace R5900 { namespace Dynarec { -namespace OpcodeImpl -{ +namespace OpcodeImpl { + void recADD(); void recADDU(); void recDADD(); @@ -39,5 +39,8 @@ namespace OpcodeImpl void recNOR(); void recSLT(); void recSLTU(); -} } } + +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 #endif diff --git a/pcsx2/x86/iR5900AritImm.h b/pcsx2/x86/iR5900AritImm.h index 73fd135c3f..18fe48c40a 100644 --- a/pcsx2/x86/iR5900AritImm.h +++ b/pcsx2/x86/iR5900AritImm.h @@ -34,6 +34,9 @@ namespace OpcodeImpl { void recSLTI(); void recSLTIU(); -} } } + +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 #endif diff --git a/pcsx2/x86/iR5900Branch.h b/pcsx2/x86/iR5900Branch.h index 6543ecd3bc..619599b90b 100644 --- a/pcsx2/x86/iR5900Branch.h +++ b/pcsx2/x86/iR5900Branch.h @@ -41,6 +41,9 @@ namespace OpcodeImpl { void recBGEZL(); void recBGEZAL(); void recBGEZALL(); -} } } + +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 #endif diff --git a/pcsx2/x86/iR5900Jump.h b/pcsx2/x86/iR5900Jump.h index 1ccdbba964..ff0e40278d 100644 --- a/pcsx2/x86/iR5900Jump.h +++ b/pcsx2/x86/iR5900Jump.h @@ -29,6 +29,9 @@ namespace OpcodeImpl { void recJAL(); void recJR(); void recJALR(); -} } } + +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 #endif diff --git a/pcsx2/x86/iR5900LoadStore.h b/pcsx2/x86/iR5900LoadStore.h index 692d9ce76f..817d1f8920 100644 --- a/pcsx2/x86/iR5900LoadStore.h +++ b/pcsx2/x86/iR5900LoadStore.h @@ -50,6 +50,8 @@ namespace OpcodeImpl { void recLQC2(); void recSQC2(); -} } } +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 #endif diff --git a/pcsx2/x86/iR5900Misc.cpp b/pcsx2/x86/iR5900Misc.cpp index fbb8250cb2..8a9191e307 100644 --- a/pcsx2/x86/iR5900Misc.cpp +++ b/pcsx2/x86/iR5900Misc.cpp @@ -31,7 +31,7 @@ namespace Dynarec { // Parameters: // jmpSkip - This parameter is the result of the appropriate J32 instruction // (usually JZ32 or JNZ32). -void recDoBranchImm( u32* jmpSkip, bool isLikely ) +void recDoBranchImm(u32* jmpSkip, bool isLikely) { // All R5900 branches use this format: const u32 branchTo = ((s32)_Imm_ * 4) + pc; @@ -51,17 +51,17 @@ void recDoBranchImm( u32* jmpSkip, bool isLikely ) // if it's a likely branch then we'll need to skip the delay slot here, since // MIPS cancels the delay slot instruction when branches aren't taken. LoadBranchState(); - if( !isLikely ) + if (!isLikely) { - pc -= 4; // instruction rewinder for delay slot, if non-likely. + pc -= 4; // instruction rewinder for delay slot, if non-likely. recompileNextInstruction(1); } - SetBranchImm(pc); // start a new recompiled block. + SetBranchImm(pc); // start a new recompiled block. } -void recDoBranchImm_Likely( u32* jmpSkip ) +void recDoBranchImm_Likely(u32* jmpSkip) { - recDoBranchImm( jmpSkip, true ); + recDoBranchImm(jmpSkip, true); } namespace OpcodeImpl { @@ -92,13 +92,16 @@ void recSYNC() void recMFSA() { int mmreg; - if (!_Rd_) return; + if (!_Rd_) + return; mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE); - if( mmreg >= 0 ) { + if (mmreg >= 0) + { xMOVL.PS(xRegisterSSE(mmreg), ptr[&cpuRegs.sa]); } - else { + else + { xMOV(eax, ptr[&cpuRegs.sa]); _deleteEEreg(_Rd_, 0); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); @@ -109,16 +112,20 @@ void recMFSA() // SA is 4-bit and contains the amount of bytes to shift void recMTSA() { - if( GPR_IS_CONST1(_Rs_) ) { - xMOV(ptr32[&cpuRegs.sa], g_cpuConstRegs[_Rs_].UL[0] & 0xf ); + if (GPR_IS_CONST1(_Rs_)) + { + xMOV(ptr32[&cpuRegs.sa], g_cpuConstRegs[_Rs_].UL[0] & 0xf); } - else { + else + { int mmreg; - if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0 ) { + if ((mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0) + { xMOVSS(ptr[&cpuRegs.sa], xRegisterSSE(mmreg)); } - else { + else + { xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMOV(ptr[&cpuRegs.sa], eax); } @@ -128,135 +135,141 @@ void recMTSA() void recMTSAB() { - if( GPR_IS_CONST1(_Rs_) ) { + if (GPR_IS_CONST1(_Rs_)) + { xMOV(ptr32[&cpuRegs.sa], ((g_cpuConstRegs[_Rs_].UL[0] & 0xF) ^ (_Imm_ & 0xF))); } - else { + else + { _eeMoveGPRtoR(eax, _Rs_); xAND(eax, 0xF); - xXOR(eax, _Imm_&0xf); + xXOR(eax, _Imm_ & 0xf); xMOV(ptr[&cpuRegs.sa], eax); } - } +} void recMTSAH() { - if( GPR_IS_CONST1(_Rs_) ) { + if (GPR_IS_CONST1(_Rs_)) + { xMOV(ptr32[&cpuRegs.sa], ((g_cpuConstRegs[_Rs_].UL[0] & 0x7) ^ (_Imm_ & 0x7)) << 1); } - else { + else + { _eeMoveGPRtoR(eax, _Rs_); xAND(eax, 0x7); - xXOR(eax, _Imm_&0x7); + xXOR(eax, _Imm_ & 0x7); xSHL(eax, 1); xMOV(ptr[&cpuRegs.sa], eax); } } - //////////////////////////////////////////////////// - void recNULL() - { - Console.Error("EE: Unimplemented op %x", cpuRegs.code); - } +//////////////////////////////////////////////////// +void recNULL() +{ + Console.Error("EE: Unimplemented op %x", cpuRegs.code); +} - //////////////////////////////////////////////////// - void recUnknown() - { - // TODO : Unknown ops should throw an exception. - Console.Error("EE: Unrecognized op %x", cpuRegs.code); - } +//////////////////////////////////////////////////// +void recUnknown() +{ + // TODO : Unknown ops should throw an exception. + Console.Error("EE: Unrecognized op %x", cpuRegs.code); +} - void recMMI_Unknown() - { - // TODO : Unknown ops should throw an exception. - Console.Error("EE: Unrecognized MMI op %x", cpuRegs.code); - } +void recMMI_Unknown() +{ + // TODO : Unknown ops should throw an exception. + Console.Error("EE: Unrecognized MMI op %x", cpuRegs.code); +} - void recCOP0_Unknown() - { - // TODO : Unknown ops should throw an exception. - Console.Error("EE: Unrecognized COP0 op %x", cpuRegs.code); - } +void recCOP0_Unknown() +{ + // TODO : Unknown ops should throw an exception. + Console.Error("EE: Unrecognized COP0 op %x", cpuRegs.code); +} - void recCOP1_Unknown() - { - // TODO : Unknown ops should throw an exception. - Console.Error("EE: Unrecognized FPU/COP1 op %x", cpuRegs.code); - } +void recCOP1_Unknown() +{ + // TODO : Unknown ops should throw an exception. + Console.Error("EE: Unrecognized FPU/COP1 op %x", cpuRegs.code); +} - /********************************************************** - * UNHANDLED YET OPCODES - * - **********************************************************/ +/********************************************************** +* UNHANDLED YET OPCODES +* +**********************************************************/ - // Suikoden 3 uses it a lot - void recCACHE() //Interpreter only! - { - //xMOV(ptr32[&cpuRegs.code], (u32)cpuRegs.code ); - //xMOV(ptr32[&cpuRegs.pc], (u32)pc ); - //iFlushCall(FLUSH_EVERYTHING); - //xFastCall((void*)(uptr)R5900::Interpreter::OpcodeImpl::CACHE ); - //branch = 2; - } +// Suikoden 3 uses it a lot +void recCACHE() //Interpreter only! +{ + //xMOV(ptr32[&cpuRegs.code], (u32)cpuRegs.code ); + //xMOV(ptr32[&cpuRegs.pc], (u32)pc ); + //iFlushCall(FLUSH_EVERYTHING); + //xFastCall((void*)(uptr)R5900::Interpreter::OpcodeImpl::CACHE ); + //branch = 2; +} - void recTGE() - { - recBranchCall( R5900::Interpreter::OpcodeImpl::TGE ); - } +void recTGE() +{ + recBranchCall(R5900::Interpreter::OpcodeImpl::TGE); +} - void recTGEU() - { - recBranchCall( R5900::Interpreter::OpcodeImpl::TGEU ); - } +void recTGEU() +{ + recBranchCall(R5900::Interpreter::OpcodeImpl::TGEU); +} - void recTLT() - { - recBranchCall( R5900::Interpreter::OpcodeImpl::TLT ); - } +void recTLT() +{ + recBranchCall(R5900::Interpreter::OpcodeImpl::TLT); +} - void recTLTU() - { - recBranchCall( R5900::Interpreter::OpcodeImpl::TLTU ); - } +void recTLTU() +{ + recBranchCall(R5900::Interpreter::OpcodeImpl::TLTU); +} - void recTEQ() - { - recBranchCall( R5900::Interpreter::OpcodeImpl::TEQ ); - } +void recTEQ() +{ + recBranchCall(R5900::Interpreter::OpcodeImpl::TEQ); +} - void recTNE() - { - recBranchCall( R5900::Interpreter::OpcodeImpl::TNE ); - } +void recTNE() +{ + recBranchCall(R5900::Interpreter::OpcodeImpl::TNE); +} - void recTGEI() - { - recBranchCall( R5900::Interpreter::OpcodeImpl::TGEI ); - } +void recTGEI() +{ + recBranchCall(R5900::Interpreter::OpcodeImpl::TGEI); +} - void recTGEIU() - { - recBranchCall( R5900::Interpreter::OpcodeImpl::TGEIU ); - } +void recTGEIU() +{ + recBranchCall(R5900::Interpreter::OpcodeImpl::TGEIU); +} - void recTLTI() - { - recBranchCall( R5900::Interpreter::OpcodeImpl::TLTI ); - } +void recTLTI() +{ + recBranchCall(R5900::Interpreter::OpcodeImpl::TLTI); +} - void recTLTIU() - { - recBranchCall( R5900::Interpreter::OpcodeImpl::TLTIU ); - } +void recTLTIU() +{ + recBranchCall(R5900::Interpreter::OpcodeImpl::TLTIU); +} - void recTEQI() - { - recBranchCall( R5900::Interpreter::OpcodeImpl::TEQI ); - } +void recTEQI() +{ + recBranchCall(R5900::Interpreter::OpcodeImpl::TEQI); +} - void recTNEI() - { - recBranchCall( R5900::Interpreter::OpcodeImpl::TNEI ); - } +void recTNEI() +{ + recBranchCall(R5900::Interpreter::OpcodeImpl::TNEI); +} -} }} // end Namespace R5900::Dynarec::OpcodeImpl +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 diff --git a/pcsx2/x86/iR5900Move.h b/pcsx2/x86/iR5900Move.h index e4a6a9d8c2..cf05434020 100644 --- a/pcsx2/x86/iR5900Move.h +++ b/pcsx2/x86/iR5900Move.h @@ -27,6 +27,9 @@ namespace OpcodeImpl { void recMTHI(); void recMOVN(); void recMOVZ(); -} } } + +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 #endif diff --git a/pcsx2/x86/iR5900MultDiv.h b/pcsx2/x86/iR5900MultDiv.h index 3637026702..9c75a6f333 100644 --- a/pcsx2/x86/iR5900MultDiv.h +++ b/pcsx2/x86/iR5900MultDiv.h @@ -29,6 +29,9 @@ namespace OpcodeImpl { void recMULTU(); void recDIV(); void recDIVU(); -} } } + +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 #endif diff --git a/pcsx2/x86/iR5900Shift.h b/pcsx2/x86/iR5900Shift.h index bf5b6ac5c4..5b7a9ab9ea 100644 --- a/pcsx2/x86/iR5900Shift.h +++ b/pcsx2/x86/iR5900Shift.h @@ -41,6 +41,9 @@ namespace OpcodeImpl { void recDSLLV(); void recDSRLV(); void recDSRAV(); -} } } + +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 #endif diff --git a/pcsx2/x86/ir5900tables.cpp b/pcsx2/x86/ir5900tables.cpp index af85154c74..fe693dc1fd 100644 --- a/pcsx2/x86/ir5900tables.cpp +++ b/pcsx2/x86/ir5900tables.cpp @@ -33,4 +33,3 @@ #include "iMMI.h" #include "iFPU.h" #include "iCOP0.h" - diff --git a/pcsx2/x86/ix86-32/iCore-32.cpp b/pcsx2/x86/ix86-32/iCore-32.cpp index 4b35afe599..812960623a 100644 --- a/pcsx2/x86/ix86-32/iCore-32.cpp +++ b/pcsx2/x86/ix86-32/iCore-32.cpp @@ -33,7 +33,8 @@ static int g_x86checknext; // use special x86 register allocation for ia32 -void _initX86regs() { +void _initX86regs() +{ memzero(x86regs); g_x86AllocCounter = 0; g_x86checknext = 0; @@ -43,7 +44,7 @@ uptr _x86GetAddr(int type, int reg) { uptr ret = 0; - switch(type&~X86TYPE_VU1) + switch (type & ~X86TYPE_VU1) { case X86TYPE_GPR: ret = (uptr)&cpuRegs.GPR.r[reg]; @@ -65,28 +66,28 @@ uptr _x86GetAddr(int type, int reg) break; case X86TYPE_VUQREAD: - if (type & X86TYPE_VU1) + if (type & X86TYPE_VU1) ret = (uptr)&VU1.VI[REG_Q]; else ret = (uptr)&VU0.VI[REG_Q]; break; case X86TYPE_VUPREAD: - if (type & X86TYPE_VU1) + if (type & X86TYPE_VU1) ret = (uptr)&VU1.VI[REG_P]; else ret = (uptr)&VU0.VI[REG_P]; break; case X86TYPE_VUQWRITE: - if (type & X86TYPE_VU1) + if (type & X86TYPE_VU1) ret = (uptr)&VU1.q; else ret = (uptr)&VU0.q; break; case X86TYPE_VUPWRITE: - if (type & X86TYPE_VU1) + if (type & X86TYPE_VU1) ret = (uptr)&VU1.p; else ret = (uptr)&VU0.p; @@ -115,28 +116,37 @@ int _getFreeX86reg(int mode) int tempi = -1; u32 bestcount = 0x10000; - int maxreg = (mode&MODE_8BITREG)?4:iREGCNT_GPR; + int maxreg = (mode & MODE_8BITREG) ? 4 : iREGCNT_GPR; - for (uint i=0; i= maxreg ) continue; + for (uint i = 0; i < iREGCNT_GPR; i++) + { + int reg = (g_x86checknext + i) % iREGCNT_GPR; + if (reg == 0 || reg == esp.GetId() || reg == ebp.GetId()) + continue; + if (reg >= maxreg) + continue; //if( (mode&MODE_NOFRAME) && reg==EBP ) continue; - if (x86regs[reg].inuse == 0) { - g_x86checknext = (reg+1)%iREGCNT_GPR; + if (x86regs[reg].inuse == 0) + { + g_x86checknext = (reg + 1) % iREGCNT_GPR; return reg; } } - for (int i=1; i 0) { + if (eaxval > 0) + { xXOR(eax, eax); eaxval = 0; } - if (eaxval == 0) { + if (eaxval == 0) + { xNOT(eax); eaxval = -1; } xMOV(ptr[&cpuRegs.GPR.r[i].SL[j]], eax); - done[j + 2] |= 1<= 0 && reg < 32, "Register index out of bounds." ); - pxAssertDev( x86reg != esp && x86reg != ebp, "Allocation of ESP/EBP is not allowed!" ); + pxAssertDev(reg >= 0 && reg < 32, "Register index out of bounds."); + pxAssertDev(x86reg != esp && x86reg != ebp, "Allocation of ESP/EBP is not allowed!"); // don't alloc EAX and ESP,EBP if MODE_NOFRAME int oldmode = mode; //int noframe = mode & MODE_NOFRAME; uint maxreg = (mode & MODE_8BITREG) ? 4 : iREGCNT_GPR; - mode &= ~(MODE_NOFRAME|MODE_8BITREG); + mode &= ~(MODE_NOFRAME | MODE_8BITREG); int readfromreg = -1; - if ( type != X86TYPE_TEMP ) { - if ( maxreg < iREGCNT_GPR ) { + if (type != X86TYPE_TEMP) + { + if (maxreg < iREGCNT_GPR) + { // make sure reg isn't in the higher regs - for(i = maxreg; i < iREGCNT_GPR; ++i) { - if (!x86regs[i].inuse || x86regs[i].type != type || x86regs[i].reg != reg) continue; + for (i = maxreg; i < iREGCNT_GPR; ++i) + { + if (!x86regs[i].inuse || x86regs[i].type != type || x86regs[i].reg != reg) + continue; - if( mode & MODE_READ ) { + if (mode & MODE_READ) + { readfromreg = i; x86regs[i].inuse = 0; break; } - else if( mode & MODE_WRITE ) { + else if (mode & MODE_WRITE) + { x86regs[i].inuse = 0; break; } } } - for (i=1; i= maxreg ) { @@ -284,21 +322,26 @@ int _allocX86reg(xRegister32 x86reg, int type, int reg, int mode) break; }*/ - if( !x86reg.IsEmpty() ) { + if (!x86reg.IsEmpty()) + { // requested specific reg, so return that instead - if( i != (uint)x86reg.GetId() ) { - if( x86regs[i].mode & MODE_READ ) readfromreg = i; - mode |= x86regs[i].mode&MODE_WRITE; + if (i != (uint)x86reg.GetId()) + { + if (x86regs[i].mode & MODE_READ) + readfromreg = i; + mode |= x86regs[i].mode & MODE_WRITE; x86regs[i].inuse = 0; break; } } - if( type != X86TYPE_TEMP && !(x86regs[i].mode & MODE_READ) && (mode&MODE_READ)) { + if (type != X86TYPE_TEMP && !(x86regs[i].mode & MODE_READ) && (mode & MODE_READ)) + { - if( type == X86TYPE_GPR ) _flushConstReg(reg); + if (type == X86TYPE_GPR) + _flushConstReg(reg); - if( X86_ISVI(type) && reg < 16 ) + if (X86_ISVI(type) && reg < 16) xMOVZX(xRegister32(i), ptr16[(u16*)(_x86GetAddr(type, reg))]); else xMOV(xRegister32(i), ptr[(void*)(_x86GetAddr(type, reg))]); @@ -307,7 +350,7 @@ int _allocX86reg(xRegister32 x86reg, int type, int reg, int mode) } x86regs[i].needed = 1; - x86regs[i].mode|= mode; + x86regs[i].mode |= mode; return i; } } @@ -323,16 +366,21 @@ int _allocX86reg(xRegister32 x86reg, int type, int reg, int mode) x86regs[x86reg.GetId()].needed = 1; x86regs[x86reg.GetId()].inuse = 1; - if( mode & MODE_READ ) { - if( readfromreg >= 0 ) + if (mode & MODE_READ) + { + if (readfromreg >= 0) xMOV(x86reg, xRegister32(readfromreg)); - else { - if( type == X86TYPE_GPR ) { + else + { + if (type == X86TYPE_GPR) + { - if( reg == 0 ) { + if (reg == 0) + { xXOR(x86reg, x86reg); } - else { + else + { _flushConstReg(reg); _deleteGPRtoXMMreg(reg, 1); @@ -341,14 +389,17 @@ int _allocX86reg(xRegister32 x86reg, int type, int reg, int mode) _deleteGPRtoXMMreg(reg, 0); } } - else { - if( X86_ISVI(type) && reg < 16 ) { - if( reg == 0 ) + else + { + if (X86_ISVI(type) && reg < 16) + { + if (reg == 0) xXOR(x86reg, x86reg); else xMOVZX(x86reg, ptr16[(u16*)(_x86GetAddr(type, reg))]); } - else xMOV(x86reg, ptr[(void*)(_x86GetAddr(type, reg))]); + else + xMOV(x86reg, ptr[(void*)(_x86GetAddr(type, reg))]); } } } @@ -362,11 +413,14 @@ int _checkX86reg(int type, int reg, int mode) { uint i; - for (i=0; i= 0 && x86reg < (int)iREGCNT_GPR ); + pxAssert(x86reg >= 0 && x86reg < (int)iREGCNT_GPR); - if( x86regs[x86reg].inuse && (x86regs[x86reg].mode&MODE_WRITE) ) { + if (x86regs[x86reg].inuse && (x86regs[x86reg].mode & MODE_WRITE)) + { x86regs[x86reg].mode &= ~MODE_WRITE; - if( X86_ISVI(x86regs[x86reg].type) && x86regs[x86reg].reg < 16 ) { + if (X86_ISVI(x86regs[x86reg].type) && x86regs[x86reg].reg < 16) + { xMOV(ptr[(void*)(_x86GetAddr(x86regs[x86reg].type, x86regs[x86reg].reg))], xRegister16(x86reg)); } else @@ -464,7 +529,7 @@ void _freeX86reg(int x86reg) void _freeX86regs() { - for (uint i=0; i + #include #endif @@ -57,8 +57,8 @@ static __fi u32 HWADDR(u32 mem) { return hwLUT[mem >> 16] + mem; } u32 s_nBlockCycles = 0; // cycles of current block recompiling -u32 pc; // recompiler pc -int g_branch; // set for branch +u32 pc; // recompiler pc +int g_branch; // set for branch __aligned16 GPR_reg64 g_cpuConstRegs[32] = {0}; u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0; @@ -80,14 +80,14 @@ static const size_t recLutSize = (Ps2MemSize::MainRam + Ps2MemSize::Rom + Ps2Mem static uptr m_ConfiguredCacheReserve = 64; alignas(16) static u32 recConstBuf[RECCONSTBUF_SIZE]; // 64-bit pseudo-immediates -static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here -static BASEBLOCK *recROM = NULL; // and here -static BASEBLOCK *recROM1 = NULL; // also here -static BASEBLOCK *recROM2 = NULL; // also here +static BASEBLOCK* recRAM = NULL; // and the ptr to the blocks here +static BASEBLOCK* recROM = NULL; // and here +static BASEBLOCK* recROM1 = NULL; // also here +static BASEBLOCK* recROM2 = NULL; // also here static BaseBlocks recBlocks; static u8* recPtr = NULL; -static u32 *recConstBufPtr = NULL; +static u32* recConstBufPtr = NULL; EEINST* s_pInstCache = NULL; static u32 s_nInstCacheSize = 0; @@ -118,72 +118,81 @@ static void recExitExecution(); void _eeFlushAllUnused() { u32 i; - for(i = 0; i < 34; ++i) { - if( pc < s_nEndBlock ) { - if( (g_pCurInstInfo[1].regs[i]&EEINST_USED) ) + for (i = 0; i < 34; ++i) + { + if (pc < s_nEndBlock) + { + if ((g_pCurInstInfo[1].regs[i] & EEINST_USED)) continue; } - else if( (g_pCurInstInfo[0].regs[i]&EEINST_USED) ) + else if ((g_pCurInstInfo[0].regs[i] & EEINST_USED)) continue; - if( i < 32 && GPR_IS_CONST1(i) ) _flushConstReg(i); - else { + if (i < 32 && GPR_IS_CONST1(i)) + _flushConstReg(i); + else _deleteGPRtoXMMreg(i, 1); - } } //TODO when used info is done for FPU and VU0 - for(i = 0; i < iREGCNT_XMM; ++i) { - if( xmmregs[i].inuse && xmmregs[i].type != XMMTYPE_GPRREG ) + for (i = 0; i < iREGCNT_XMM; ++i) + { + if (xmmregs[i].inuse && xmmregs[i].type != XMMTYPE_GPRREG) _freeXMMreg(i); } } u32* _eeGetConstReg(int reg) { - pxAssert( GPR_IS_CONST1( reg ) ); + pxAssert(GPR_IS_CONST1(reg)); - if( g_cpuFlushedConstReg & (1<= 0 && (xmmregs[mmreg].mode&MODE_WRITE)) { + if ((mmreg = _checkXMMreg(XMMTYPE_GPRREG, fromgpr, MODE_READ)) >= 0 && (xmmregs[mmreg].mode & MODE_WRITE)) + { xMOVD(to, xRegisterSSE(mmreg)); } - else { - xMOV(to, ptr[&cpuRegs.GPR.r[ fromgpr ].UL[ 0 ] ]); + else + { + xMOV(to, ptr[&cpuRegs.GPR.r[fromgpr].UL[0]]); } } } void _eeMoveGPRtoM(uptr to, int fromgpr) { - if( GPR_IS_CONST1(fromgpr) ) - xMOV(ptr32[(u32*)(to)], g_cpuConstRegs[fromgpr].UL[0] ); - else { + if (GPR_IS_CONST1(fromgpr)) + xMOV(ptr32[(u32*)(to)], g_cpuConstRegs[fromgpr].UL[0]); + else + { int mmreg; - if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, fromgpr, MODE_READ)) >= 0 ) { + if ((mmreg = _checkXMMreg(XMMTYPE_GPRREG, fromgpr, MODE_READ)) >= 0) + { xMOVSS(ptr[(void*)(to)], xRegisterSSE(mmreg)); } - else { - xMOV(eax, ptr[&cpuRegs.GPR.r[ fromgpr ].UL[ 0 ] ]); + else + { + xMOV(eax, ptr[&cpuRegs.GPR.r[fromgpr].UL[0]]); xMOV(ptr[(void*)(to)], eax); } } @@ -191,16 +200,19 @@ void _eeMoveGPRtoM(uptr to, int fromgpr) void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr) { - if( GPR_IS_CONST1(fromgpr) ) - xMOV(ptr32[xAddressReg(to)], g_cpuConstRegs[fromgpr].UL[0] ); - else { + if (GPR_IS_CONST1(fromgpr)) + xMOV(ptr32[xAddressReg(to)], g_cpuConstRegs[fromgpr].UL[0]); + else + { int mmreg; - if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, fromgpr, MODE_READ)) >= 0 ) { + if ((mmreg = _checkXMMreg(XMMTYPE_GPRREG, fromgpr, MODE_READ)) >= 0) + { xMOVSS(ptr[xAddressReg(to)], xRegisterSSE(mmreg)); } - else { - xMOV(eax, ptr[&cpuRegs.GPR.r[ fromgpr ].UL[ 0 ] ]); + else + { + xMOV(eax, ptr[&cpuRegs.GPR.r[fromgpr].UL[0]]); xMOV(ptr[xAddressReg(to)], eax); } } @@ -217,12 +229,16 @@ void eeSignExtendTo(int gpr, bool onlyupper) int _flushXMMunused() { u32 i; - for (i=0; iregs[xmmregs[i].reg]&EEINST_USED) ) { - if( !_recIsRegWritten(g_pCurInstInfo+1, (s_nEndBlock-pc)/4, XMMTYPE_GPRREG, xmmregs[i].reg) ) { + if (!_recIsRegWritten(g_pCurInstInfo + 1, (s_nEndBlock - pc) / 4, XMMTYPE_GPRREG, xmmregs[i].reg)) + { _freeXMMreg(i); xmmregs[i].inuse = 1; return 1; @@ -236,14 +252,16 @@ int _flushXMMunused() int _flushUnusedConstReg() { int i; - for(i = 1; i < 32; ++i) { - if( (g_cpuHasConstReg & (1<= recConstBuf + RECCONSTBUF_SIZE) { - Console.WriteLn( "EErec const buffer filled; Resetting..." ); + Console.WriteLn("EErec const buffer filled; Resetting..."); throw Exception::ExitCpuExecute(); /*for (u32 *p = recConstBuf; p < recConstBuf + RECCONSTBUF_SIZE; p += 2) @@ -294,19 +312,19 @@ u32* recGetImm64(u32 hi, u32 lo) // Use this to call into interpreter functions that require an immediate branchtest // to be done afterward (anything that throws an exception or enables interrupts, etc). -void recBranchCall( void (*func)() ) +void recBranchCall(void (*func)()) { // In order to make sure a branch test is performed, the nextBranchCycle is set // to the current cpu cycle. - xMOV(eax, ptr[&cpuRegs.cycle ]); + xMOV(eax, ptr[&cpuRegs.cycle]); xMOV(ptr[&g_nextEventCycle], eax); recCall(func); g_branch = 2; } -void recCall( void (*func)() ) +void recCall(void (*func)()) { iFlushCall(FLUSH_INTERPRETER); xFastCall((void*)func); @@ -316,21 +334,21 @@ void recCall( void (*func)() ) // R5900 Dispatchers // ===================================================================================================== -static void __fastcall recRecompile( const u32 startpc ); -static void __fastcall dyna_block_discard(u32 start,u32 sz); -static void __fastcall dyna_page_reset(u32 start,u32 sz); +static void __fastcall recRecompile(const u32 startpc); +static void __fastcall dyna_block_discard(u32 start, u32 sz); +static void __fastcall dyna_page_reset(u32 start, u32 sz); // Recompiled code buffer for EE recompiler dispatchers! static u8 __pagealigned eeRecDispatchers[__pagesize]; typedef void DynGenFunc(); -static DynGenFunc* DispatcherEvent = NULL; -static DynGenFunc* DispatcherReg = NULL; -static DynGenFunc* JITCompile = NULL; -static DynGenFunc* JITCompileInBlock = NULL; -static DynGenFunc* EnterRecompiledCode = NULL; -static DynGenFunc* ExitRecompiledCode = NULL; +static DynGenFunc* DispatcherEvent = NULL; +static DynGenFunc* DispatcherReg = NULL; +static DynGenFunc* JITCompile = NULL; +static DynGenFunc* JITCompileInBlock = NULL; +static DynGenFunc* EnterRecompiledCode = NULL; +static DynGenFunc* ExitRecompiledCode = NULL; static DynGenFunc* DispatchBlockDiscard = NULL; static DynGenFunc* DispatchPageReset = NULL; @@ -338,7 +356,8 @@ static void recEventTest() { _cpuEventTest_Shared(); - if (iopBreakpoint) { + if (iopBreakpoint) + { iopBreakpoint = false; recExitExecution(); } @@ -348,21 +367,21 @@ static void recEventTest() // dispatches to the recompiled block address. static DynGenFunc* _DynGen_JITCompile() { - pxAssertMsg( DispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks." ); + pxAssertMsg(DispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks."); u8* retval = xGetAlignedCallTarget(); - xFastCall((void*)recRecompile, ptr32[&cpuRegs.pc] ); + xFastCall((void*)recRecompile, ptr32[&cpuRegs.pc]); // C equivalent: // u32 addr = cpuRegs.pc; // void(**base)() = (void(**)())recLUT[addr >> 16]; // base[addr >> 2](); - xMOV( eax, ptr[&cpuRegs.pc] ); - xMOV( ebx, eax ); - xSHR( eax, 16 ); - xMOV( rcx, ptrNative[xComplexAddress(rcx, recLUT, rax*wordsize)] ); - xJMP( ptrNative[rbx*(wordsize/4) + rcx] ); + xMOV(eax, ptr[&cpuRegs.pc]); + xMOV(ebx, eax); + xSHR(eax, 16); + xMOV(rcx, ptrNative[xComplexAddress(rcx, recLUT, rax * wordsize)]); + xJMP(ptrNative[rbx * (wordsize / 4) + rcx]); return (DynGenFunc*)retval; } @@ -370,24 +389,24 @@ static DynGenFunc* _DynGen_JITCompile() static DynGenFunc* _DynGen_JITCompileInBlock() { u8* retval = xGetAlignedCallTarget(); - xJMP( (void*)JITCompile ); + xJMP((void*)JITCompile); return (DynGenFunc*)retval; } // called when jumping to variable pc address static DynGenFunc* _DynGen_DispatcherReg() { - u8* retval = xGetPtr(); // fallthrough target, can't align it! + u8* retval = xGetPtr(); // fallthrough target, can't align it! // C equivalent: // u32 addr = cpuRegs.pc; // void(**base)() = (void(**)())recLUT[addr >> 16]; // base[addr >> 2](); - xMOV( eax, ptr[&cpuRegs.pc] ); - xMOV( ebx, eax ); - xSHR( eax, 16 ); - xMOV( rcx, ptrNative[xComplexAddress(rcx, recLUT, rax*wordsize)] ); - xJMP( ptrNative[rbx*(wordsize/4) + rcx] ); + xMOV(eax, ptr[&cpuRegs.pc]); + xMOV(ebx, eax); + xSHR(eax, 16); + xMOV(rcx, ptrNative[xComplexAddress(rcx, recLUT, rax * wordsize)]); + xJMP(ptrNative[rbx * (wordsize / 4) + rcx]); return (DynGenFunc*)retval; } @@ -396,14 +415,14 @@ static DynGenFunc* _DynGen_DispatcherEvent() { u8* retval = xGetPtr(); - xFastCall((void*)recEventTest ); + xFastCall((void*)recEventTest); return (DynGenFunc*)retval; } static DynGenFunc* _DynGen_EnterRecompiledCode() { - pxAssertDev( DispatcherReg != NULL, "Dynamically generated dispatchers are required prior to generating EnterRecompiledCode!" ); + pxAssertDev(DispatcherReg != NULL, "Dynamically generated dispatchers are required prior to generating EnterRecompiledCode!"); u8* retval = xGetAlignedCallTarget(); @@ -444,17 +463,17 @@ static DynGenFunc* _DynGen_DispatchPageReset() static void _DynGen_Dispatchers() { // In case init gets called multiple times: - HostSys::MemProtectStatic( eeRecDispatchers, PageAccess_ReadWrite() ); + HostSys::MemProtectStatic(eeRecDispatchers, PageAccess_ReadWrite()); // clear the buffer to 0xcc (easier debugging). - memset( eeRecDispatchers, 0xcc, __pagesize); + memset(eeRecDispatchers, 0xcc, __pagesize); - xSetPtr( eeRecDispatchers ); + xSetPtr(eeRecDispatchers); // Place the EventTest and DispatcherReg stuff at the top, because they get called the // most and stand to benefit from strong alignment and direct referencing. DispatcherEvent = _DynGen_DispatcherEvent(); - DispatcherReg = _DynGen_DispatcherReg(); + DispatcherReg = _DynGen_DispatcherReg(); JITCompile = _DynGen_JITCompile(); JITCompileInBlock = _DynGen_JITCompileInBlock(); @@ -462,9 +481,9 @@ static void _DynGen_Dispatchers() DispatchBlockDiscard = _DynGen_DispatchBlockDiscard(); DispatchPageReset = _DynGen_DispatchPageReset(); - HostSys::MemProtectStatic( eeRecDispatchers, PageAccess_ExecOnly() ); + HostSys::MemProtectStatic(eeRecDispatchers, PageAccess_ExecOnly()); - recBlocks.SetJITCompile( JITCompile ); + recBlocks.SetJITCompile(JITCompile); Perf::any.map((uptr)&eeRecDispatchers, 4096, "EE Dispatcher"); } @@ -475,29 +494,32 @@ static void _DynGen_Dispatchers() static __ri void ClearRecLUT(BASEBLOCK* base, int memsize) { - for (int i = 0; i < memsize/(int)sizeof(uptr); i++) + for (int i = 0; i < memsize / (int)sizeof(uptr); i++) base[i].SetFnptr((uptr)JITCompile); } -static void recThrowHardwareDeficiency( const wxChar* extFail ) +static void recThrowHardwareDeficiency(const wxChar* extFail) { throw Exception::HardwareDeficiency() - .SetDiagMsg(pxsFmt( L"R5900-32 recompiler init failed: %s is not available.", extFail)) - .SetUserMsg(pxsFmt(_("%s Extensions not found. The R5900-32 recompiler requires a host CPU with SSE2 extensions."), extFail )); + .SetDiagMsg(pxsFmt(L"R5900-32 recompiler init failed: %s is not available.", extFail)) + .SetUserMsg(pxsFmt(_("%s Extensions not found. The R5900-32 recompiler requires a host CPU with SSE2 extensions."), extFail)); } static void recReserveCache() { - if (!recMem) recMem = new RecompiledCodeReserve(L"R5900-32 Recompiler Cache", _16mb); + if (!recMem) + recMem = new RecompiledCodeReserve(L"R5900-32 Recompiler Cache", _16mb); recMem->SetProfilerName("EErec"); while (!recMem->IsOk()) { - if (recMem->Reserve(GetVmMemory().MainMemory(), HostMemoryMap::EErecOffset, m_ConfiguredCacheReserve * _1mb) != NULL) break; + if (recMem->Reserve(GetVmMemory().MainMemory(), HostMemoryMap::EErecOffset, m_ConfiguredCacheReserve * _1mb) != NULL) + break; // If it failed, then try again (if possible): - if (m_ConfiguredCacheReserve < 16) break; + if (m_ConfiguredCacheReserve < 16) + break; m_ConfiguredCacheReserve /= 2; } @@ -508,8 +530,8 @@ static void recReserve() { // Hardware Requirements Check... - if ( !x86caps.hasStreamingSIMD4Extensions ) - recThrowHardwareDeficiency( L"SSE4" ); + if (!x86caps.hasStreamingSIMD4Extensions) + recThrowHardwareDeficiency(L"SSE4"); recReserveCache(); } @@ -527,15 +549,15 @@ static void recAlloc() } BASEBLOCK* basepos = (BASEBLOCK*)recLutReserve_RAM; - recRAM = basepos; basepos += (Ps2MemSize::MainRam / 4); - recROM = basepos; basepos += (Ps2MemSize::Rom / 4); - recROM1 = basepos; basepos += (Ps2MemSize::Rom1 / 4); - recROM2 = basepos; basepos += (Ps2MemSize::Rom2 / 4); + recRAM = basepos; basepos += (Ps2MemSize::MainRam / 4); + recROM = basepos; basepos += (Ps2MemSize::Rom / 4); + recROM1 = basepos; basepos += (Ps2MemSize::Rom1 / 4); + recROM2 = basepos; basepos += (Ps2MemSize::Rom2 / 4); for (int i = 0; i < 0x10000; i++) recLUT_SetPage(recLUT, 0, 0, 0, i, 0); - for ( int i = 0x0000; i < (int)(Ps2MemSize::MainRam / 0x10000); i++ ) + for (int i = 0x0000; i < (int)(Ps2MemSize::MainRam / 0x10000); i++) { recLUT_SetPage(recLUT, hwLUT, recRAM, 0x0000, i, i); recLUT_SetPage(recLUT, hwLUT, recRAM, 0x2000, i, i); @@ -547,35 +569,35 @@ static void recAlloc() recLUT_SetPage(recLUT, hwLUT, recRAM, 0xd000, i, i); } - for ( int i = 0x1fc0; i < 0x2000; i++ ) + for (int i = 0x1fc0; i < 0x2000; i++) { recLUT_SetPage(recLUT, hwLUT, recROM, 0x0000, i, i - 0x1fc0); recLUT_SetPage(recLUT, hwLUT, recROM, 0x8000, i, i - 0x1fc0); recLUT_SetPage(recLUT, hwLUT, recROM, 0xa000, i, i - 0x1fc0); } - for ( int i = 0x1e00; i < 0x1e04; i++ ) + for (int i = 0x1e00; i < 0x1e04; i++) { recLUT_SetPage(recLUT, hwLUT, recROM1, 0x0000, i, i - 0x1e00); recLUT_SetPage(recLUT, hwLUT, recROM1, 0x8000, i, i - 0x1e00); recLUT_SetPage(recLUT, hwLUT, recROM1, 0xa000, i, i - 0x1e00); } - for (int i = 0x1e40; i < 0x1e48; i++) + for (int i = 0x1e40; i < 0x1e48; i++) { recLUT_SetPage(recLUT, hwLUT, recROM2, 0x0000, i, i - 0x1e40); recLUT_SetPage(recLUT, hwLUT, recROM2, 0x8000, i, i - 0x1e40); recLUT_SetPage(recLUT, hwLUT, recROM2, 0xa000, i, i - 0x1e40); } - if( s_pInstCache == NULL ) + if (s_pInstCache == NULL) { s_nInstCacheSize = 128; - s_pInstCache = (EEINST*)malloc( sizeof(EEINST) * s_nInstCacheSize ); + s_pInstCache = (EEINST*)malloc(sizeof(EEINST) * s_nInstCacheSize); } - if( s_pInstCache == NULL ) - throw Exception::OutOfMemory( L"R5900-32 InstCache" ); + if (s_pInstCache == NULL) + throw Exception::OutOfMemory(L"R5900-32 InstCache"); // No errors.. Proceed with initialization: @@ -600,10 +622,11 @@ static void recResetRaw() recAlloc(); - if( eeRecIsReset.exchange(true) ) return; + if (eeRecIsReset.exchange(true)) + return; eeRecNeedsReset = false; - Console.WriteLn( Color_StrongBlack, "EE/iR5900-32 Recompiler Reset" ); + Console.WriteLn(Color_StrongBlack, "EE/iR5900-32 Recompiler Reset"); recMem->Reset(); ClearRecLUT((BASEBLOCK*)recLutReserve_RAM, recLutSize); @@ -613,8 +636,8 @@ static void recResetRaw() memset(recConstBuf, 0, RECCONSTBUF_SIZE * sizeof(*recConstBuf)); - if( s_pInstCache ) - memset( s_pInstCache, 0, sizeof(EEINST)*s_nInstCacheSize ); + if (s_pInstCache) + memset(s_pInstCache, 0, sizeof(EEINST) * s_nInstCacheSize); recBlocks.Reset(); mmap_ResetBlockTracking(); @@ -631,15 +654,15 @@ static void recResetRaw() static void recShutdown() { - safe_delete( recMem ); - safe_aligned_free( recRAMCopy ); - safe_aligned_free( recLutReserve_RAM ); + safe_delete(recMem); + safe_aligned_free(recRAMCopy); + safe_aligned_free(recLutReserve_RAM); recBlocks.Reset(); recRAM = recROM = recROM1 = recROM2 = NULL; - safe_free( s_pInstCache ); + safe_free(s_pInstCache); s_nInstCacheSize = 0; // FIXME Warning thread unsafe @@ -662,12 +685,12 @@ void recStep() } #if !PCSX2_SEH -# define SETJMP_CODE(x) x + #define SETJMP_CODE(x) x static jmp_buf m_SetJmp_StateCheck; static std::unique_ptr m_cpuException; static ScopedExcept m_Exception; #else -# define SETJMP_CODE(x) + #define SETJMP_CODE(x) #endif @@ -681,13 +704,13 @@ static void recExitExecution() // creates. However, the longjump is slow so we only want to do one when absolutely // necessary: - longjmp( m_SetJmp_StateCheck, 1 ); + longjmp(m_SetJmp_StateCheck, 1); #endif } static void recCheckExecutionState() { - if( SETJMP_CODE(m_cpuException || m_Exception ||) eeRecIsReset || GetCoreThread().HasPendingStateChangeRequest() ) + if (SETJMP_CODE(m_cpuException || m_Exception ||) eeRecIsReset || GetCoreThread().HasPendingStateChangeRequest()) { recExitExecution(); } @@ -702,23 +725,24 @@ static void recExecute() eeRecIsReset = false; ScopedBool executing(eeCpuExecuting); - try { + try + { EnterRecompiledCode(); } - catch( Exception::ExitCpuExecute& ) + catch (Exception::ExitCpuExecute&) { } #else int oldstate; - m_cpuException = NULL; - m_Exception = NULL; + m_cpuException = NULL; + m_Exception = NULL; // setjmp will save the register context and will return 0 // A call to longjmp will restore the context (included the eip/rip) // but will return the longjmp 2nd parameter (here 1) - if( !setjmp( m_SetJmp_StateCheck ) ) + if (!setjmp(m_SetJmp_StateCheck)) { eeRecIsReset = false; ScopedBool executing(eeCpuExecuting); @@ -728,18 +752,20 @@ static void recExecute() // in Linux, which cannot have a C++ exception cross the recompiler. Hence the changing // of the cancelstate here! - pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &oldstate ); + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate); EnterRecompiledCode(); // Generally unreachable code here ... } else { - pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, &oldstate ); + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate); } - if(m_cpuException) m_cpuException->Rethrow(); - if(m_Exception) m_Exception->Rethrow(); + if (m_cpuException) + m_cpuException->Rethrow(); + if (m_Exception) + m_Exception->Rethrow(); // FIXME Warning thread unsafe Perf::dump(); @@ -760,7 +786,7 @@ void R5900::Dynarec::OpcodeImpl::recSYSCALL() xADD(ptr32[&cpuRegs.cycle], scaleblockcycles()); // Note: technically the address is 0x8000_0180 (or 0x180) // (if CPU is booted) - xJMP( (void*)DispatcherReg ); + xJMP((void*)DispatcherReg); x86SetJ8(j8Ptr[0]); //g_branch = 2; } @@ -775,7 +801,7 @@ void R5900::Dynarec::OpcodeImpl::recBREAK() xCMP(ptr32[&cpuRegs.pc], pc); j8Ptr[0] = JE8(0); xADD(ptr32[&cpuRegs.cycle], scaleblockcycles()); - xJMP( (void*)DispatcherEvent ); + xJMP((void*)DispatcherEvent); x86SetJ8(j8Ptr[0]); //g_branch = 2; } @@ -800,20 +826,24 @@ void recClear(u32 addr, u32 size) int toRemoveLast = blockidx; - while (pexblock = recBlocks[blockidx]) { + while (pexblock = recBlocks[blockidx]) + { u32 blockstart = pexblock->startpc; u32 blockend = pexblock->startpc + pexblock->size * 4; BASEBLOCK* pblock = PC_GETBLOCK(blockstart); - if (pblock == s_pCurBlock) { - if(toRemoveLast != blockidx) { + if (pblock == s_pCurBlock) + { + if (toRemoveLast != blockidx) + { recBlocks.Remove((blockidx + 1), toRemoveLast); } toRemoveLast = --blockidx; continue; } - if (blockend <= addr) { + if (blockend <= addr) + { lowerextent = std::max(lowerextent, blockend); break; } @@ -827,22 +857,25 @@ void recClear(u32 addr, u32 size) blockidx--; } - if(toRemoveLast != blockidx) { + if (toRemoveLast != blockidx) + { recBlocks.Remove((blockidx + 1), toRemoveLast); } upperextent = std::min(upperextent, ceiling); - for (int i = 0; pexblock = recBlocks[i]; i++) { + for (int i = 0; pexblock = recBlocks[i]; i++) + { if (s_pCurBlock == PC_GETBLOCK(pexblock->startpc)) continue; u32 blockend = pexblock->startpc + pexblock->size * 4; if (pexblock->startpc >= addr && pexblock->startpc < addr + size * 4 - || pexblock->startpc < addr && blockend > addr) { - if( !IsDevBuild ) - Console.Error( "[EE] Impossible block clearing failure" ); + || pexblock->startpc < addr && blockend > addr) + { + if (!IsDevBuild) + Console.Error("[EE] Impossible block clearing failure"); else - pxFailDev( "[EE] Impossible block clearing failure" ); + pxFailDev("[EE] Impossible block clearing failure"); } } @@ -851,30 +884,35 @@ void recClear(u32 addr, u32 size) } -static int *s_pCode; +static int* s_pCode; -void SetBranchReg( u32 reg ) +void SetBranchReg(u32 reg) { g_branch = 1; - if( reg != 0xffffffff ) { -// if( GPR_IS_CONST1(reg) ) -// xMOV(ptr32[&cpuRegs.pc], g_cpuConstRegs[reg].UL[0] ); -// else { + if (reg != 0xffffffff) + { +// if (GPR_IS_CONST1(reg)) +// xMOV(ptr32[&cpuRegs.pc], g_cpuConstRegs[reg].UL[0]); +// else +// { // int mmreg; // -// if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, reg, MODE_READ)) >= 0 ) { +// if ((mmreg = _checkXMMreg(XMMTYPE_GPRREG, reg, MODE_READ)) >= 0) +// { // xMOVSS(ptr[&cpuRegs.pc], xRegisterSSE(mmreg)); // } -// else { -// xMOV(eax, ptr[(void*)((int)&cpuRegs.GPR.r[ reg ].UL[ 0 ] )]); +// else +// { +// xMOV(eax, ptr[(void*)((int)&cpuRegs.GPR.r[reg].UL[0])]); // xMOV(ptr[&cpuRegs.pc], eax); // } // } _allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE); _eeMoveGPRtoR(calleeSavedReg2d, reg); - if (EmuConfig.Gamefixes.GoemonTlbHack) { + if (EmuConfig.Gamefixes.GoemonTlbHack) + { xMOV(ecx, calleeSavedReg2d); vtlb_DynV2P(); xMOV(calleeSavedReg2d, eax); @@ -882,12 +920,14 @@ void SetBranchReg( u32 reg ) recompileNextInstruction(1); - if( x86regs[calleeSavedReg2d.GetId()].inuse ) { - pxAssert( x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK ); + if (x86regs[calleeSavedReg2d.GetId()].inuse) + { + pxAssert(x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK); xMOV(ptr[&cpuRegs.pc], calleeSavedReg2d); x86regs[calleeSavedReg2d.GetId()].inuse = 0; } - else { + else + { xMOV(eax, ptr[&g_recWriteback]); xMOV(ptr[&cpuRegs.pc], eax); } @@ -896,18 +936,18 @@ void SetBranchReg( u32 reg ) // xCMP(ptr32[&cpuRegs.pc], 0); // j8Ptr[5] = JNE8(0); // xFastCall((void*)(uptr)tempfn); -// x86SetJ8( j8Ptr[5] ); +// x86SetJ8(j8Ptr[5]); iFlushCall(FLUSH_EVERYTHING); iBranchTest(); } -void SetBranchImm( u32 imm ) +void SetBranchImm(u32 imm) { g_branch = 1; - pxAssert( imm ); + pxAssert(imm); // end the current block iFlushCall(FLUSH_EVERYTHING); @@ -945,28 +985,31 @@ void iFlushCall(int flushtype) _freeX86reg(ecx); _freeX86reg(edx); - if ((flushtype & FLUSH_PC) && !g_cpuFlushedPC) { + if ((flushtype & FLUSH_PC) && !g_cpuFlushedPC) + { xMOV(ptr32[&cpuRegs.pc], pc); g_cpuFlushedPC = true; } - if ((flushtype & FLUSH_CODE) && !g_cpuFlushedCode) { + if ((flushtype & FLUSH_CODE) && !g_cpuFlushedCode) + { xMOV(ptr32[&cpuRegs.code], cpuRegs.code); g_cpuFlushedCode = true; } - if ((flushtype == FLUSH_CAUSE) && !g_maySignalException) { + if ((flushtype == FLUSH_CAUSE) && !g_maySignalException) + { if (g_recompilingDelaySlot) xOR(ptr32[&cpuRegs.CP0.n.Cause], 1 << 31); // BD g_maySignalException = true; } - if( flushtype & FLUSH_FREE_XMM ) + if (flushtype & FLUSH_FREE_XMM) _freeXMMregs(); - else if( flushtype & FLUSH_FLUSH_XMM) + else if (flushtype & FLUSH_FLUSH_XMM) _flushXMMregs(); - if( flushtype & FLUSH_CACHED_REGS ) + if (flushtype & FLUSH_CACHED_REGS) _flushConstRegs(); } @@ -991,7 +1034,7 @@ static u32 scaleblockcycles_calculation() else if (cyclerate == 1) scale_cycles = DEFAULT_SCALED_BLOCKS() / 1.3f; // Adds a mild 30% increase in clockspeed for value 1. - else if (cyclerate == -1) // the mildest value which is also used by the "balanced" preset. + else if (cyclerate == -1) // the mildest value which is also used by the "balanced" preset. // These values were manually tuned to yield mild speedup with high compatibility scale_cycles = (s_nBlockCycles <= 80 || s_nBlockCycles > 168 ? 5 : 7) * s_nBlockCycles / 32; @@ -1077,7 +1120,7 @@ static void iBranchTest(u32 newpc) xCMOVS(eax, ptr32[&cpuRegs.cycle]); xMOV(ptr32[&cpuRegs.cycle], eax); - xJMP( (void*)DispatcherEvent ); + xJMP((void*)DispatcherEvent); } else { @@ -1087,11 +1130,11 @@ static void iBranchTest(u32 newpc) xSUB(eax, ptr[&g_nextEventCycle]); if (newpc == 0xffffffff) - xJS( DispatcherReg ); + xJS(DispatcherReg); else recBlocks.Link(HWADDR(newpc), xJcc32(Jcc_Signed)); - xJMP( (void*)DispatcherEvent ); + xJMP((void*)DispatcherEvent); } } @@ -1178,7 +1221,7 @@ int COP2DivUnitTimings(u32 code) bool COP2IsQOP(u32 code) { - if(_Opcode_ != 022) // Not COP2 operation + if (_Opcode_ != 022) // Not COP2 operation return false; if ((code & 0x3f) == 0x20) // VADDq @@ -1210,20 +1253,23 @@ bool COP2IsQOP(u32 code) void dynarecCheckBreakpoint() { u32 pc = cpuRegs.pc; - if (CBreakPoints::CheckSkipFirst(BREAKPOINT_EE, pc) != 0) + if (CBreakPoints::CheckSkipFirst(BREAKPOINT_EE, pc) != 0) return; int bpFlags = isBreakpointNeeded(pc); bool hit = false; //check breakpoint at current pc - if (bpFlags & 1) { + if (bpFlags & 1) + { auto cond = CBreakPoints::GetBreakPointCondition(BREAKPOINT_EE, pc); - if (cond == NULL || cond->Evaluate()) { + if (cond == NULL || cond->Evaluate()) + { hit = true; } } //check breakpoint in delay slot - if (bpFlags & 2) { + if (bpFlags & 2) + { auto cond = CBreakPoints::GetBreakPointCondition(BREAKPOINT_EE, pc + 4); if (cond == NULL || cond->Evaluate()) hit = true; @@ -1240,7 +1286,7 @@ void dynarecCheckBreakpoint() void dynarecMemcheck() { u32 pc = cpuRegs.pc; - if (CBreakPoints::CheckSkipFirst(BREAKPOINT_EE, pc) != 0) + if (CBreakPoints::CheckSkipFirst(BREAKPOINT_EE, pc) != 0) return; CBreakPoints::SetBreakpointTriggered(true); @@ -1258,7 +1304,7 @@ void __fastcall dynarecMemLogcheck(u32 start, bool store) void recMemcheck(u32 op, u32 bits, bool store) { - iFlushCall(FLUSH_EVERYTHING|FLUSH_PC); + iFlushCall(FLUSH_EVERYTHING | FLUSH_PC); // compute accessed address _eeMoveGPRtoR(ecx, (op >> 21) & 0x1F); @@ -1268,9 +1314,9 @@ void recMemcheck(u32 op, u32 bits, bool store) xAND(ecx, ~0x0F); xFastCall((void*)standardizeBreakpointAddressEE, ecx); - xMOV(ecx,eax); - xMOV(edx,eax); - xADD(edx,bits/8); + xMOV(ecx, eax); + xMOV(edx, eax); + xADD(edx, bits / 8); // ecx = access address // edx = access address+size @@ -1289,20 +1335,22 @@ void recMemcheck(u32 op, u32 bits, bool store) // logic: memAddress < bpEnd && bpStart < memAddress+memSize - xMOV(eax,standardizeBreakpointAddress(BREAKPOINT_EE, checks[i].end)); - xCMP(ecx,eax); // address < end - xForwardJGE8 next1; // if address >= end then goto next1 + xMOV(eax, standardizeBreakpointAddress(BREAKPOINT_EE, checks[i].end)); + xCMP(ecx, eax); // address < end + xForwardJGE8 next1; // if address >= end then goto next1 - xMOV(eax,standardizeBreakpointAddress(BREAKPOINT_EE, checks[i].start)); - xCMP(eax,edx); // start < address+size - xForwardJGE8 next2; // if start >= address+size then goto next2 + xMOV(eax, standardizeBreakpointAddress(BREAKPOINT_EE, checks[i].start)); + xCMP(eax, edx); // start < address+size + xForwardJGE8 next2; // if start >= address+size then goto next2 // hit the breakpoint - if (checks[i].result & MEMCHECK_LOG) { + if (checks[i].result & MEMCHECK_LOG) + { xMOV(edx, store); xFastCall((void*)dynarecMemLogcheck, ecx, edx); } - if (checks[i].result & MEMCHECK_BREAK) { + if (checks[i].result & MEMCHECK_BREAK) + { xFastCall((void*)dynarecMemcheck); } @@ -1315,7 +1363,7 @@ void encodeBreakpoint() { if (isBreakpointNeeded(pc) != 0) { - iFlushCall(FLUSH_EVERYTHING|FLUSH_PC); + iFlushCall(FLUSH_EVERYTHING | FLUSH_PC); xFastCall((void*)dynarecCheckBreakpoint); } } @@ -1326,27 +1374,27 @@ void encodeMemcheck() if (needed == 0) return; - u32 op = memRead32(needed == 2 ? pc+4 : pc); + u32 op = memRead32(needed == 2 ? pc + 4 : pc); const OPCODE& opcode = GetInstruction(op); bool store = (opcode.flags & IS_STORE) != 0; switch (opcode.flags & MEMTYPE_MASK) { - case MEMTYPE_BYTE: - recMemcheck(op,8,store); - break; - case MEMTYPE_HALF: - recMemcheck(op,16,store); - break; - case MEMTYPE_WORD: - recMemcheck(op,32,store); - break; - case MEMTYPE_DWORD: - recMemcheck(op,64,store); - break; - case MEMTYPE_QWORD: - recMemcheck(op,128,store); - break; + case MEMTYPE_BYTE: + recMemcheck(op, 8, store); + break; + case MEMTYPE_HALF: + recMemcheck(op, 16, store); + break; + case MEMTYPE_WORD: + recMemcheck(op, 32, store); + break; + case MEMTYPE_DWORD: + recMemcheck(op, 64, store); + break; + case MEMTYPE_QWORD: + recMemcheck(op, 128, store); + break; } } @@ -1362,46 +1410,56 @@ void recompileNextInstruction(int delayslot) encodeMemcheck(); } - s_pCode = (int *)PSM( pc ); + s_pCode = (int*)PSM(pc); pxAssert(s_pCode); // acts as a tag for delimiting recompiled instructions when viewing x86 disasm. - if( IsDevBuild ) + if (IsDevBuild) xNOP(); - if( IsDebugBuild ) + if (IsDebugBuild) xMOV(eax, pc); - cpuRegs.code = *(int *)s_pCode; + cpuRegs.code = *(int*)s_pCode; - if (!delayslot) { + if (!delayslot) + { pc += 4; g_cpuFlushedPC = false; g_cpuFlushedCode = false; - } else { + } + else + { // increment after recompiling so that pc points to the branch during recompilation g_recompilingDelaySlot = true; } g_pCurInstInfo++; - for(i = 0; i < iREGCNT_XMM; ++i) { - if( xmmregs[i].inuse ) { - count = _recIsRegWritten(g_pCurInstInfo, (s_nEndBlock-pc)/4 + 1, xmmregs[i].type, xmmregs[i].reg); - if( count > 0 ) xmmregs[i].counter = 1000-count; - else xmmregs[i].counter = 0; + for (i = 0; i < iREGCNT_XMM; ++i) + { + if (xmmregs[i].inuse) + { + count = _recIsRegWritten(g_pCurInstInfo, (s_nEndBlock - pc) / 4 + 1, xmmregs[i].type, xmmregs[i].reg); + if (count > 0) + xmmregs[i].counter = 1000 - count; + else + xmmregs[i].counter = 0; } } const OPCODE& opcode = GetCurrentInstruction(); - //pxAssert( !(g_pCurInstInfo->info & EEINSTINFO_NOREC) ); + //pxAssert( !(g_pCurInstInfo->info & EEINSTINFO_NOREC) ); //Console.Warning("opcode name = %s, it's cycles = %d\n",opcode.Name,opcode.cycles); // if this instruction is a jump or a branch, exit right away - if( delayslot ) { + if (delayslot) + { bool check_branch_delay = false; - switch(_Opcode_) { + switch (_Opcode_) + { case 1: - switch(_Rt_) { + switch (_Rt_) + { case 0: case 1: case 2: case 3: case 0x10: case 0x11: case 0x12: case 0x13: check_branch_delay = true; } @@ -1413,7 +1471,8 @@ void recompileNextInstruction(int delayslot) // Check for branch in delay slot, new code by FlatOut. // Gregory tested this in 2017 using the ps2autotests suite and remarked "So far we return 1 (even with this PR), and the HW 2. // Original PR and discussion at https://github.com/PCSX2/pcsx2/pull/1783 so we don't forget this information. - if (check_branch_delay) { + if (check_branch_delay) + { DevCon.Warning("Branch %x in delay slot!", cpuRegs.code); _clearNeededX86regs(); _clearNeededXMMregs(); @@ -1428,16 +1487,21 @@ void recompileNextInstruction(int delayslot) } } // Check for NOP - if (cpuRegs.code == 0x00000000) { + if (cpuRegs.code == 0x00000000) + { // Note: Tests on a ps2 suggested more like 5 cycles for a NOP. But there's many factors in this.. - s_nBlockCycles +=9 * (2 - ((cpuRegs.CP0.n.Config >> 18) & 0x1)); + s_nBlockCycles += 9 * (2 - ((cpuRegs.CP0.n.Config >> 18) & 0x1)); } - else { + else + { //If the COP0 DIE bit is disabled, cycles should be doubled. s_nBlockCycles += opcode.cycles * (2 - ((cpuRegs.CP0.n.Config >> 18) & 0x1)); - try { + try + { opcode.recompile(); - } catch (Exception::FailedToAllocateRegister&) { + } + catch (Exception::FailedToAllocateRegister&) + { // Fall back to the interpreter recCall(opcode.interpret); #if 0 @@ -1447,7 +1511,8 @@ void recompileNextInstruction(int delayslot) } } - if (!delayslot && (_getNumXMMwrite() > 2)) _flushXMMunused(); + if (!delayslot && (_getNumXMMwrite() > 2)) + _flushXMMunused(); //CHECK_XMMCHANGED(); _clearNeededX86regs(); @@ -1457,7 +1522,8 @@ void recompileNextInstruction(int delayslot) // _flushCachedRegs(); // g_cpuHasConstReg = 1; - if (delayslot) { + if (delayslot) + { pc += 4; g_cpuFlushedPC = false; g_cpuFlushedCode = false; @@ -1500,7 +1566,7 @@ void recompileNextInstruction(int delayslot) { cpuRegs.code = memRead32(p); - if((_Opcode_ == 022) && (cpuRegs.code & 0x7FC) == 0x3BC) // WaitQ or another DIV op hit (stalled), we're safe + if ((_Opcode_ == 022) && (cpuRegs.code & 0x7FC) == 0x3BC) // WaitQ or another DIV op hit (stalled), we're safe break; else if (COP2IsQOP(cpuRegs.code)) @@ -1553,14 +1619,14 @@ void recompileNextInstruction(int delayslot) cpuRegs.code = *s_pCode; #endif - if (!delayslot && (xGetPtr() - recPtr > 0x1000) ) + if (!delayslot && (xGetPtr() - recPtr > 0x1000)) s_nEndBlock = pc; } // (Called from recompiled code)] // This function is called from the recompiler prior to starting execution of *every* recompiled block. // Calling of this function can be enabled or disabled through the use of EmuConfig.Recompiler.PreBlockChecks -static void __fastcall PreBlockCheck( u32 blockpc ) +static void __fastcall PreBlockCheck(u32 blockpc) { /*static int lastrec = 0; static int curcount = 0; @@ -1587,56 +1653,56 @@ static u32 s_recblocks[] = {0}; // Called when a block under manual protection fails it's pre-execution integrity check. // (meaning the actual code area has been modified -- ie dynamic modules being loaded or, // less likely, self-modifying code) -void __fastcall dyna_block_discard(u32 start,u32 sz) +void __fastcall dyna_block_discard(u32 start, u32 sz) { - eeRecPerfLog.Write( Color_StrongGray, "Clearing Manual Block @ 0x%08X [size=%d]", start, sz*4); + eeRecPerfLog.Write(Color_StrongGray, "Clearing Manual Block @ 0x%08X [size=%d]", start, sz * 4); recClear(start, sz); } // called when a page under manual protection has been run enough times to be a candidate // for being reset under the faster vtlb write protection. All blocks in the page are cleared // and the block is re-assigned for write protection. -void __fastcall dyna_page_reset(u32 start,u32 sz) +void __fastcall dyna_page_reset(u32 start, u32 sz) { recClear(start & ~0xfffUL, 0x400); manual_counter[start >> 12]++; - mmap_MarkCountedRamPage( start ); + mmap_MarkCountedRamPage(start); } static void memory_protect_recompiled_code(u32 startpc, u32 size) { u32 inpage_ptr = HWADDR(startpc); - u32 inpage_sz = size*4; + u32 inpage_sz = size * 4; // The kernel context register is stored @ 0x800010C0-0x80001300 // The EENULL thread context register is stored @ 0x81000-.... bool contains_thread_stack = ((startpc >> 12) == 0x81) || ((startpc >> 12) == 0x80001); // note: blocks are guaranteed to reside within the confines of a single page. - const vtlb_ProtectionMode PageType = contains_thread_stack ? ProtMode_Manual : mmap_GetRamPageInfo( inpage_ptr ); + const vtlb_ProtectionMode PageType = contains_thread_stack ? ProtMode_Manual : mmap_GetRamPageInfo(inpage_ptr); - switch (PageType) - { - case ProtMode_NotRequired: - break; + switch (PageType) + { + case ProtMode_NotRequired: + break; case ProtMode_None: - case ProtMode_Write: - mmap_MarkCountedRamPage( inpage_ptr ); + case ProtMode_Write: + mmap_MarkCountedRamPage(inpage_ptr); manual_page[inpage_ptr >> 12] = 0; break; - case ProtMode_Manual: - xMOV( arg1regd, inpage_ptr ); - xMOV( arg2regd, inpage_sz / 4 ); + case ProtMode_Manual: + xMOV(arg1regd, inpage_ptr); + xMOV(arg2regd, inpage_sz / 4); //xMOV( eax, startpc ); // uncomment this to access startpc (as eax) in dyna_block_discard u32 lpc = inpage_ptr; u32 stg = inpage_sz; - while(stg>0) + while (stg > 0) { - xCMP( ptr32[PSM(lpc)], *(u32*)PSM(lpc) ); + xCMP(ptr32[PSM(lpc)], *(u32*)PSM(lpc)); xJNE(DispatchBlockDiscard); stg -= 4; @@ -1673,31 +1739,36 @@ static void memory_protect_recompiled_code(u32 startpc, u32 size) xJC(DispatchPageReset); // note: clearcnt is measured per-page, not per-block! - ConsoleColorScope cs( Color_Gray ); - eeRecPerfLog.Write( "Manual block @ %08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d clearcnt = %d", - startpc, size, inpage_ptr>>12, inpage_ptr&0xfff, inpage_sz, manual_counter[inpage_ptr >> 12] ); + ConsoleColorScope cs(Color_Gray); + eeRecPerfLog.Write("Manual block @ %08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d clearcnt = %d", + startpc, size, inpage_ptr >> 12, inpage_ptr & 0xfff, inpage_sz, manual_counter[inpage_ptr >> 12]); } else { - eeRecPerfLog.Write( "Uncounted Manual block @ 0x%08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d", - startpc, size, inpage_ptr>>12, inpage_ptr&0xfff, inpage_sz ); + eeRecPerfLog.Write("Uncounted Manual block @ 0x%08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d", + startpc, size, inpage_ptr >> 12, inpage_ptr & 0xfff, inpage_sz); } - break; + break; } } // Skip MPEG Game-Fix -bool skipMPEG_By_Pattern(u32 sPC) { +bool skipMPEG_By_Pattern(u32 sPC) +{ - if (!CHECK_SKIPMPEGHACK) return 0; + if (!CHECK_SKIPMPEGHACK) + return 0; // sceMpegIsEnd: lw reg, 0x40(a0); jr ra; lw v0, 0(reg) - if ((s_nEndBlock == sPC + 12) && (memRead32(sPC + 4) == 0x03e00008)) { + if ((s_nEndBlock == sPC + 12) && (memRead32(sPC + 4) == 0x03e00008)) + { u32 code = memRead32(sPC); - u32 p1 = 0x8c800040; - u32 p2 = 0x8c020000 | (code & 0x1f0000) << 5; - if ((code & 0xffe0ffff) != p1) return 0; - if (memRead32(sPC+8) != p2) return 0; + u32 p1 = 0x8c800040; + u32 p2 = 0x8c020000 | (code & 0x1f0000) << 5; + if ((code & 0xffe0ffff) != p1) + return 0; + if (memRead32(sPC + 8) != p2) + return 0; xMOV(ptr32[&cpuRegs.GPR.n.v0.UL[0]], 1); xMOV(ptr32[&cpuRegs.GPR.n.v0.UL[1]], 0); xMOV(eax, ptr32[&cpuRegs.GPR.n.ra.UL[0]]); @@ -1716,34 +1787,38 @@ bool skipMPEG_By_Pattern(u32 sPC) { void LoadAllPatchesAndStuff(const Pcsx2Config&); void doPlace0Patches() { - LoadAllPatchesAndStuff(EmuConfig); - ApplyLoadedPatches(PPT_ONCE_ON_LOAD); + LoadAllPatchesAndStuff(EmuConfig); + ApplyLoadedPatches(PPT_ONCE_ON_LOAD); } -static void __fastcall recRecompile( const u32 startpc ) +static void __fastcall recRecompile(const u32 startpc) { u32 i = 0; u32 willbranch3 = 0; u32 usecop2; #ifdef PCSX2_DEBUG - if (dumplog & 4) iDumpRegisters(startpc, 0); + if (dumplog & 4) + iDumpRegisters(startpc, 0); #endif - pxAssert( startpc ); + pxAssert(startpc); // if recPtr reached the mem limit reset whole mem - if (recPtr >= (recMem->GetPtrEnd() - _64kb)) { + if (recPtr >= (recMem->GetPtrEnd() - _64kb)) + { eeRecNeedsReset = true; } - else if ((recConstBufPtr - recConstBuf) >= RECCONSTBUF_SIZE - 64) { + else if ((recConstBufPtr - recConstBuf) >= RECCONSTBUF_SIZE - 64) + { Console.WriteLn("EE recompiler stack reset"); eeRecNeedsReset = true; } - if (eeRecNeedsReset) recResetRaw(); + if (eeRecNeedsReset) + recResetRaw(); - xSetPtr( recPtr ); + xSetPtr(recPtr); recPtr = xGetAlignedCallTarget(); if (0x8000d618 == startpc) @@ -1752,7 +1827,7 @@ static void __fastcall recRecompile( const u32 startpc ) s_pCurBlock = PC_GETBLOCK(startpc); pxAssert(s_pCurBlock->GetFnptr() == (uptr)JITCompile - || s_pCurBlock->GetFnptr() == (uptr)JITCompileInBlock); + || s_pCurBlock->GetFnptr() == (uptr)JITCompileInBlock); s_pCurBlockEx = recBlocks.Get(HWADDR(startpc)); pxAssert(!s_pCurBlockEx || s_pCurBlockEx->startpc != HWADDR(startpc)); @@ -1784,8 +1859,7 @@ static void __fastcall recRecompile( const u32 startpc ) g_eeloadExec = EELOAD_START + 0x2B8; else if (typeAexecjump >> 26 == 3) // JAL to 0x82170 g_eeloadExec = EELOAD_START + 0x170; - else // There might be other types of EELOAD, because these models' BIOSs have not been examined: 18000, 3500x, 3700x, - // 5500x, and 7900x. However, all BIOS versions have been examined except for v1.01 and v1.10. + else // There might be other types of EELOAD, because these models' BIOSs have not been examined: 18000, 3500x, 3700x, 5500x, and 7900x. However, all BIOS versions have been examined except for v1.01 and v1.10. Console.WriteLn("recRecompile: Could not enable launch arguments for fast boot mode; unidentified BIOS version! Please report this to the PCSX2 developers."); } @@ -1797,12 +1871,13 @@ static void __fastcall recRecompile( const u32 startpc ) doPlace0Patches(); g_patchesNeedRedo = 0; } - + if (g_eeloadExec && HWADDR(startpc) == HWADDR(g_eeloadExec)) xFastCall((void*)eeloadHook2); // this is the only way patches get applied, doesn't depend on a hack - if (g_GameLoading && HWADDR(startpc) == ElfEntry) { + if (g_GameLoading && HWADDR(startpc) == ElfEntry) + { Console.WriteLn(L"Elf entry point @ 0x%08x about to get recompiled. Load patches first.", startpc); xFastCall((void*)eeGameStarting); @@ -1817,12 +1892,12 @@ static void __fastcall recRecompile( const u32 startpc ) s_nBlockCycles = 0; pc = startpc; g_cpuHasConstReg = g_cpuFlushedConstReg = 1; - pxAssert( g_cpuConstRegs[0].UD[0] == 0 ); + pxAssert(g_cpuConstRegs[0].UD[0] == 0); _initX86regs(); _initXMMregs(); - if( EmuConfig.Cpu.Recompiler.PreBlockCheckEE ) + if (EmuConfig.Cpu.Recompiler.PreBlockCheckEE) { // per-block dump checks, for debugging purposes. // [TODO] : These must be enabled from the GUI or INI to be used, otherwise the @@ -1831,11 +1906,15 @@ static void __fastcall recRecompile( const u32 startpc ) xFastCall((void*)PreBlockCheck, pc); } - if (EmuConfig.Gamefixes.GoemonTlbHack) { - if (pc == 0x33ad48 || pc == 0x35060c) { + if (EmuConfig.Gamefixes.GoemonTlbHack) + { + if (pc == 0x33ad48 || pc == 0x35060c) + { // 0x33ad48 and 0x35060c are the return address of the function (0x356250) that populate the TLB cache xFastCall((void*)GoemonPreloadTlb); - } else if (pc == 0x3563b8) { + } + else if (pc == 0x3563b8) + { // Game will unmap some virtual addresses. If a constant address were hardcoded in the block, we would be in a bad situation. eeRecNeedsReset = true; // 0x3563b8 is the start address of the function that invalidate entry in TLB cache @@ -1851,14 +1930,15 @@ static void __fastcall recRecompile( const u32 startpc ) // compile breakpoints as individual blocks int n1 = isBreakpointNeeded(i); int n2 = isMemcheckNeeded(i); - int n = std::max(n1,n2); + int n = std::max(n1, n2); if (n != 0) { - s_nEndBlock = i + n*4; + s_nEndBlock = i + n * 4; goto StartRecomp; } - while(1) { + while (1) + { BASEBLOCK* pblock = PC_GETBLOCK(i); // stop before breakpoints @@ -1868,14 +1948,14 @@ static void __fastcall recRecompile( const u32 startpc ) break; } - if(i != startpc) // Block size truncation checks. + if (i != startpc) // Block size truncation checks. { - if( (i & 0xffc) == 0x0 ) // breaks blocks at 4k page boundaries + if ((i & 0xffc) == 0x0) // breaks blocks at 4k page boundaries { willbranch3 = 1; s_nEndBlock = i; - eeRecPerfLog.Write( "Pagesplit @ %08X : size=%d insts", startpc, (i-startpc) / 4 ); + eeRecPerfLog.Write("Pagesplit @ %08X : size=%d insts", startpc, (i - startpc) / 4); break; } @@ -1888,11 +1968,13 @@ static void __fastcall recRecompile( const u32 startpc ) } //HUH ? PSM ? whut ? THIS IS VIRTUAL ACCESS GOD DAMMIT - cpuRegs.code = *(int *)PSM(i); + cpuRegs.code = *(int*)PSM(i); - switch(cpuRegs.code >> 26) { + switch (cpuRegs.code >> 26) + { case 0: // special - if( _Funct_ == 8 || _Funct_ == 9 ) { // JR, JALR + if (_Funct_ == 8 || _Funct_ == 9) // JR, JALR + { s_nEndBlock = i + 8; goto StartRecomp; } @@ -1900,11 +1982,14 @@ static void __fastcall recRecompile( const u32 startpc ) case 1: // regimm - if( _Rt_ < 4 || (_Rt_ >= 16 && _Rt_ < 20) ) { + if (_Rt_ < 4 || (_Rt_ >= 16 && _Rt_ < 20)) + { // branches s_branchTo = _Imm_ * 4 + i + 4; - if( s_branchTo > startpc && s_branchTo < i ) s_nEndBlock = s_branchTo; - else s_nEndBlock = i+8; + if (s_branchTo > startpc && s_branchTo < i) + s_nEndBlock = s_branchTo; + else + s_nEndBlock = i + 8; goto StartRecomp; } @@ -1920,15 +2005,19 @@ static void __fastcall recRecompile( const u32 startpc ) case 4: case 5: case 6: case 7: case 20: case 21: case 22: case 23: s_branchTo = _Imm_ * 4 + i + 4; - if( s_branchTo > startpc && s_branchTo < i ) s_nEndBlock = s_branchTo; - else s_nEndBlock = i+8; + if (s_branchTo > startpc && s_branchTo < i) + s_nEndBlock = s_branchTo; + else + s_nEndBlock = i + 8; goto StartRecomp; case 16: // cp0 - if( _Rs_ == 16 ) { - if( _Funct_ == 24 ) { // eret - s_nEndBlock = i+4; + if (_Rs_ == 16) + { + if (_Funct_ == 24) // eret + { + s_nEndBlock = i + 4; goto StartRecomp; } } @@ -1937,12 +2026,15 @@ static void __fastcall recRecompile( const u32 startpc ) case 17: // cp1 case 18: // cp2 - if( _Rs_ == 8 ) { + if (_Rs_ == 8) + { // BC1F, BC1T, BC1FL, BC1TL // BC2F, BC2T, BC2FL, BC2TL s_branchTo = _Imm_ * 4 + i + 4; - if( s_branchTo > startpc && s_branchTo < i ) s_nEndBlock = s_branchTo; - else s_nEndBlock = i+8; + if (s_branchTo > startpc && s_branchTo < i) + s_nEndBlock = s_branchTo; + else + s_nEndBlock = i + 8; goto StartRecomp; } @@ -1964,12 +2056,14 @@ StartRecomp: // without a significant loss in cycle accuracy is with a division, but games would probably // be happy with time wasting loops completing in 0 cycles and timeouts waiting forever. s_nBlockFF = false; - if (s_branchTo == startpc) { + if (s_branchTo == startpc) + { s_nBlockFF = true; u32 reads = 0, loads = 1; - for (i = startpc; i < s_nEndBlock; i += 4) { + for (i = startpc; i < s_nEndBlock; i += 4) + { if (i == s_nEndBlock - 8) continue; cpuRegs.code = *(u32*)PSM(i); @@ -1982,13 +2076,15 @@ StartRecomp: // imm arithmetic else if ((_Opcode_ & 070) == 010 || (_Opcode_ & 076) == 030) { - if (loads & 1 << _Rs_) { + if (loads & 1 << _Rs_) + { loads |= 1 << _Rt_; continue; } else reads |= 1 << _Rs_; - if (reads & 1 << _Rt_) { + if (reads & 1 << _Rt_) + { s_nBlockFF = false; break; } @@ -1996,13 +2092,15 @@ StartRecomp: // common register arithmetic instructions else if (_Opcode_ == 0 && (_Funct_ & 060) == 040 && (_Funct_ & 076) != 050) { - if (loads & 1 << _Rs_ && loads & 1 << _Rt_) { + if (loads & 1 << _Rs_ && loads & 1 << _Rt_) + { loads |= 1 << _Rd_; continue; } else reads |= 1 << _Rs_ | 1 << _Rt_; - if (reads & 1 << _Rd_) { + if (reads & 1 << _Rd_) + { s_nBlockFF = false; break; } @@ -2010,13 +2108,15 @@ StartRecomp: // loads else if ((_Opcode_ & 070) == 040 || (_Opcode_ & 076) == 032 || _Opcode_ == 067) { - if (loads & 1 << _Rs_) { + if (loads & 1 << _Rs_) + { loads |= 1 << _Rt_; continue; } else reads |= 1 << _Rs_; - if (reads & 1 << _Rt_) { + if (reads & 1 << _Rt_) + { s_nBlockFF = false; break; } @@ -2038,19 +2138,21 @@ StartRecomp: { EEINST* pcur; - if( s_nInstCacheSize < (s_nEndBlock-startpc)/4+1 ) { + if (s_nInstCacheSize < (s_nEndBlock - startpc) / 4 + 1) + { free(s_pInstCache); - s_nInstCacheSize = (s_nEndBlock-startpc)/4+10; - s_pInstCache = (EEINST*)malloc(sizeof(EEINST)*s_nInstCacheSize); - pxAssert( s_pInstCache != NULL ); + s_nInstCacheSize = (s_nEndBlock - startpc) / 4 + 10; + s_pInstCache = (EEINST*)malloc(sizeof(EEINST) * s_nInstCacheSize); + pxAssert(s_pInstCache != NULL); } - pcur = s_pInstCache + (s_nEndBlock-startpc)/4; + pcur = s_pInstCache + (s_nEndBlock - startpc) / 4; _recClearInst(pcur); pcur->info = 0; - for(i = s_nEndBlock; i > startpc; i -= 4 ) { - cpuRegs.code = *(int *)PSM(i-4); + for (i = s_nEndBlock; i > startpc; i -= 4) + { + cpuRegs.code = *(int*)PSM(i - 4); pcur[-1] = pcur[0]; pcur--; } @@ -2061,34 +2163,40 @@ StartRecomp: usecop2 = 0; g_pCurInstInfo = s_pInstCache; - for(i = startpc; i < s_nEndBlock; i += 4) { + for (i = startpc; i < s_nEndBlock; i += 4) + { g_pCurInstInfo++; cpuRegs.code = *(u32*)PSM(i); // cop2 // - if( g_pCurInstInfo->info & EEINSTINFO_COP2 ) { + if (g_pCurInstInfo->info & EEINSTINFO_COP2) + { - if( !usecop2 ) { + if (!usecop2) + { // init usecop2 = 1; } VU0.code = cpuRegs.code; - _vuRegsCOP22( &VU0, &g_pCurInstInfo->vuregs ); + _vuRegsCOP22(&VU0, &g_pCurInstInfo->vuregs); continue; } } // This *is* important because g_pCurInstInfo is checked a bit later on and // if it's not equal to s_pInstCache it handles recompilation differently. // ... but the empty if() conditional inside the for loop is still amusing. >_< - if( usecop2 ) { + if (usecop2) + { // add necessary mac writebacks g_pCurInstInfo = s_pInstCache; - for(i = startpc; i < s_nEndBlock-4; i += 4) { + for (i = startpc; i < s_nEndBlock - 4; i += 4) + { g_pCurInstInfo++; - if( g_pCurInstInfo->info & EEINSTINFO_COP2 ) { + if (g_pCurInstInfo->info & EEINSTINFO_COP2) + { } } } @@ -2096,7 +2204,7 @@ StartRecomp: #ifdef PCSX2_DEBUG // dump code - for(i = 0; i < ArraySize(s_recblocks); ++i) + for (i = 0; i < ArraySize(s_recblocks); ++i) { if (startpc == s_recblocks[i]) { @@ -2104,36 +2212,42 @@ StartRecomp: } } - if (dumplog & 1) iDumpBlock(startpc, recPtr); + if (dumplog & 1) + iDumpBlock(startpc, recPtr); #endif // Detect and handle self-modified code - memory_protect_recompiled_code(startpc, (s_nEndBlock-startpc) >> 2); + memory_protect_recompiled_code(startpc, (s_nEndBlock - startpc) >> 2); // Skip Recompilation if sceMpegIsEnd Pattern detected bool doRecompilation = !skipMPEG_By_Pattern(startpc); - if (doRecompilation) { + if (doRecompilation) + { // Finally: Generate x86 recompiled code! g_pCurInstInfo = s_pInstCache; - while (!g_branch && pc < s_nEndBlock) { - recompileNextInstruction(0); // For the love of recursion, batman! + while (!g_branch && pc < s_nEndBlock) + { + recompileNextInstruction(0); // For the love of recursion, batman! } } #ifdef PCSX2_DEBUG - if (dumplog & 1) iDumpBlock(startpc, recPtr); + if (dumplog & 1) + iDumpBlock(startpc, recPtr); #endif - pxAssert( (pc-startpc)>>2 <= 0xffff ); - s_pCurBlockEx->size = (pc-startpc)>>2; + pxAssert((pc - startpc) >> 2 <= 0xffff); + s_pCurBlockEx->size = (pc - startpc) >> 2; - if (HWADDR(pc) <= Ps2MemSize::MainRam) { - BASEBLOCKEX *oldBlock; + if (HWADDR(pc) <= Ps2MemSize::MainRam) + { + BASEBLOCKEX* oldBlock; int i; i = recBlocks.LastIndex(HWADDR(pc) - 4); - while (oldBlock = recBlocks[i--]) { + while (oldBlock = recBlocks[i--]) + { if (oldBlock == s_pCurBlockEx) continue; if (oldBlock->startpc >= HWADDR(pc)) @@ -2142,7 +2256,7 @@ StartRecomp: break; if (memcmp(&recRAMCopy[oldBlock->startpc / 4], PSM(oldBlock->startpc), - oldBlock->size * 4)) + oldBlock->size * 4)) { recClear(startpc, (pc - startpc) / 4); s_pCurBlockEx = recBlocks.Get(HWADDR(startpc)); @@ -2156,15 +2270,16 @@ StartRecomp: s_pCurBlock->SetFnptr((uptr)recPtr); - for(i = 1; i < (u32)s_pCurBlockEx->size; i++) { + for (i = 1; i < (u32)s_pCurBlockEx->size; i++) + { if ((uptr)JITCompile == s_pCurBlock[i].GetFnptr()) s_pCurBlock[i].SetFnptr((uptr)JITCompileInBlock); } - if( !(pc&0x10000000) ) - maxrecmem = std::max( (pc&~0xa0000000), maxrecmem ); + if (!(pc & 0x10000000)) + maxrecmem = std::max((pc & ~0xa0000000), maxrecmem); - if( g_branch == 2 ) + if (g_branch == 2) { // Branch type 2 - This is how I "think" this works (air): // Performs a branch/event test but does not actually "break" the block. @@ -2177,10 +2292,11 @@ StartRecomp: } else { - if( g_branch ) - pxAssert( !willbranch3 ); + if (g_branch) + pxAssert(!willbranch3); - if( willbranch3 || !g_branch) { + if (willbranch3 || !g_branch) + { iFlushCall(FLUSH_EVERYTHING); @@ -2191,19 +2307,19 @@ StartRecomp: // performance reasons. int numinsts = (pc - startpc) / 4; - if( numinsts > 6 ) + if (numinsts > 6) SetBranchImm(pc); else { - xMOV( ptr32[&cpuRegs.pc], pc ); - xADD( ptr32[&cpuRegs.cycle], scaleblockcycles() ); - recBlocks.Link( HWADDR(pc), xJcc32() ); + xMOV(ptr32[&cpuRegs.pc], pc); + xADD(ptr32[&cpuRegs.cycle], scaleblockcycles()); + recBlocks.Link(HWADDR(pc), xJcc32()); } } } - pxAssert( xGetPtr() < recMem->GetPtrEnd() ); - pxAssert( recConstBufPtr < recConstBuf + RECCONSTBUF_SIZE ); + pxAssert(xGetPtr() < recMem->GetPtrEnd()); + pxAssert(recConstBufPtr < recConstBuf + RECCONSTBUF_SIZE); pxAssert(xGetPtr() - recPtr < _64kb); s_pCurBlockEx->x86size = xGetPtr() - recPtr; @@ -2218,7 +2334,7 @@ StartRecomp: recPtr = xGetPtr(); - pxAssert( (g_cpuHasConstReg&g_cpuFlushedConstReg) == g_cpuHasConstReg ); + pxAssert((g_cpuHasConstReg & g_cpuFlushedConstReg) == g_cpuHasConstReg); s_pCurBlock = NULL; s_pCurBlockEx = NULL; @@ -2227,29 +2343,31 @@ StartRecomp: // The only *safe* way to throw exceptions from the context of recompiled code. // The exception is cached and the recompiler is exited safely using either an // SEH unwind (MSW) or setjmp/longjmp (GCC). -static void recThrowException( const BaseR5900Exception& ex ) +static void recThrowException(const BaseR5900Exception& ex) { #if PCSX2_SEH ex.Rethrow(); #else - if (!eeCpuExecuting) ex.Rethrow(); + if (!eeCpuExecuting) + ex.Rethrow(); m_cpuException = std::unique_ptr(ex.Clone()); recExitExecution(); #endif } -static void recThrowException( const BaseException& ex ) +static void recThrowException(const BaseException& ex) { #if PCSX2_SEH ex.Rethrow(); #else - if (!eeCpuExecuting) ex.Rethrow(); + if (!eeCpuExecuting) + ex.Rethrow(); m_Exception = ScopedExcept(ex.Clone()); recExitExecution(); #endif } -static void recSetCacheReserve( uint reserveInMegs ) +static void recSetCacheReserve(uint reserveInMegs) { m_ConfiguredCacheReserve = reserveInMegs; } diff --git a/pcsx2/x86/ix86-32/iR5900Arit.cpp b/pcsx2/x86/ix86-32/iR5900Arit.cpp index 7061453f69..a1d52dfcaa 100644 --- a/pcsx2/x86/ix86-32/iR5900Arit.cpp +++ b/pcsx2/x86/ix86-32/iR5900Arit.cpp @@ -24,8 +24,7 @@ using namespace x86Emitter; namespace R5900 { namespace Dynarec { -namespace OpcodeImpl -{ +namespace OpcodeImpl { /********************************************************* * Register arithmetic * @@ -38,20 +37,20 @@ namespace OpcodeImpl namespace Interp = R5900::Interpreter::OpcodeImpl; -REC_FUNC_DEL(ADD, _Rd_); -REC_FUNC_DEL(ADDU, _Rd_); -REC_FUNC_DEL(DADD, _Rd_); +REC_FUNC_DEL(ADD, _Rd_); +REC_FUNC_DEL(ADDU, _Rd_); +REC_FUNC_DEL(DADD, _Rd_); REC_FUNC_DEL(DADDU, _Rd_); -REC_FUNC_DEL(SUB, _Rd_); -REC_FUNC_DEL(SUBU, _Rd_); -REC_FUNC_DEL(DSUB, _Rd_); +REC_FUNC_DEL(SUB, _Rd_); +REC_FUNC_DEL(SUBU, _Rd_); +REC_FUNC_DEL(DSUB, _Rd_); REC_FUNC_DEL(DSUBU, _Rd_); -REC_FUNC_DEL(AND, _Rd_); -REC_FUNC_DEL(OR, _Rd_); -REC_FUNC_DEL(XOR, _Rd_); -REC_FUNC_DEL(NOR, _Rd_); -REC_FUNC_DEL(SLT, _Rd_); -REC_FUNC_DEL(SLTU, _Rd_); +REC_FUNC_DEL(AND, _Rd_); +REC_FUNC_DEL(OR, _Rd_); +REC_FUNC_DEL(XOR, _Rd_); +REC_FUNC_DEL(NOR, _Rd_); +REC_FUNC_DEL(SLT, _Rd_); +REC_FUNC_DEL(SLTU, _Rd_); #else @@ -63,7 +62,7 @@ void recADD_const() void recADD_constv(int info, int creg, u32 vreg) { - pxAssert( !(info&PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); s32 cval = g_cpuConstRegs[creg].SL[0]; @@ -88,7 +87,7 @@ void recADD_constt(int info) // nothing is constant void recADD_(int info) { - pxAssert( !(info&PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rs_].SL[0]]); if (_Rs_ == _Rt_) @@ -98,7 +97,7 @@ void recADD_(int info) eeSignExtendTo(_Rd_); } -EERECOMPILE_CODE0(ADD, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +EERECOMPILE_CODE0(ADD, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); //// ADDU void recADDU(void) @@ -114,19 +113,23 @@ void recDADD_const(void) void recDADD_constv(int info, int creg, u32 vreg) { - pxAssert( !(info&PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); GPR_reg64 cval = g_cpuConstRegs[creg]; - if (_Rd_ == vreg) { - if (!cval.SD[0]) - return; // no-op + if (_Rd_ == vreg) + { + if (!cval.SD[0]) // no-op + return; xADD(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], cval.SL[0]); xADC(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], cval.SL[1]); - } else { + } + else + { xMOV(eax, ptr32[&cpuRegs.GPR.r[vreg].SL[0]]); xMOV(edx, ptr32[&cpuRegs.GPR.r[vreg].SL[1]]); - if (cval.SD[0]) { + if (cval.SD[0]) + { xADD(eax, cval.SL[0]); xADC(edx, cval.SL[1]); } @@ -147,7 +150,7 @@ void recDADD_constt(int info) void recDADD_(int info) { - pxAssert( !(info&PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); u32 rs = _Rs_, rt = _Rt_; if (_Rd_ == _Rt_) @@ -155,7 +158,8 @@ void recDADD_(int info) xMOV(eax, ptr32[&cpuRegs.GPR.r[rt].SL[0]]); - if (_Rd_ == _Rs_ && _Rs_ == _Rt_) { + if (_Rd_ == _Rs_ && _Rs_ == _Rt_) + { xSHLD(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], eax, 1); xSHL(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], 1); return; @@ -163,14 +167,19 @@ void recDADD_(int info) xMOV(edx, ptr32[&cpuRegs.GPR.r[rt].SL[1]]); - if (_Rd_ == rs) { + if (_Rd_ == rs) + { xADD(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax); xADC(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx); return; - } else if (rs == rt) { + } + else if (rs == rt) + { xADD(eax, eax); xADC(edx, edx); - } else { + } + else + { xADD(eax, ptr32[&cpuRegs.GPR.r[rs].SL[0]]); xADC(edx, ptr32[&cpuRegs.GPR.r[rs].SL[1]]); } @@ -179,7 +188,7 @@ void recDADD_(int info) xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx); } -EERECOMPILE_CODE0(DADD, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +EERECOMPILE_CODE0(DADD, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); //// DADDU void recDADDU(void) @@ -196,7 +205,7 @@ void recSUB_const() void recSUB_consts(int info) { - pxAssert( !(info&PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); s32 sval = g_cpuConstRegs[_Rs_].SL[0]; @@ -207,7 +216,7 @@ void recSUB_consts(int info) void recSUB_constt(int info) { - pxAssert( !(info&PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); s32 tval = g_cpuConstRegs[_Rt_].SL[0]; @@ -219,9 +228,10 @@ void recSUB_constt(int info) void recSUB_(int info) { - pxAssert( !(info&PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); - if (_Rs_ == _Rt_) { + if (_Rs_ == _Rt_) + { xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], 0); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], 0); return; @@ -232,7 +242,7 @@ void recSUB_(int info) eeSignExtendTo(_Rd_); } -EERECOMPILE_CODE0(SUB, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED); +EERECOMPILE_CODE0(SUB, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); //// SUBU void recSUBU(void) @@ -248,11 +258,12 @@ void recDSUB_const() void recDSUB_consts(int info) { - pxAssert( !(info&PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); GPR_reg64 sval = g_cpuConstRegs[_Rs_]; - if (!sval.SD[0] && _Rd_ == _Rt_) { + if (!sval.SD[0] && _Rd_ == _Rt_) + { /* To understand this 64-bit negate, consider that a negate in 2's complement * is a NOT then an ADD 1. The upper word should only have the NOT stage unless * the ADD overflows. The ADD only overflows if the lower word is 0. @@ -263,7 +274,9 @@ void recDSUB_consts(int info) xADC(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], 0); xNEG(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]]); return; - } else { + } + else + { xMOV(eax, sval.SL[0]); xMOV(edx, sval.SL[1]); } @@ -276,17 +289,21 @@ void recDSUB_consts(int info) void recDSUB_constt(int info) { - pxAssert( !(info&PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); GPR_reg64 tval = g_cpuConstRegs[_Rt_]; - if (_Rd_ == _Rs_) { + if (_Rd_ == _Rs_) + { xSUB(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], tval.SL[0]); xSBB(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], tval.SL[1]); - } else { + } + else + { xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rs_].SL[0]]); xMOV(edx, ptr32[&cpuRegs.GPR.r[_Rs_].SL[1]]); - if (tval.SD[0]) { + if (tval.SD[0]) + { xSUB(eax, tval.SL[0]); xSBB(edx, tval.SL[1]); } @@ -297,17 +314,22 @@ void recDSUB_constt(int info) void recDSUB_(int info) { - pxAssert( !(info&PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); - if (_Rs_ == _Rt_) { + if (_Rs_ == _Rt_) + { xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], 0); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], 0); - } else if (_Rd_ == _Rs_) { + } + else if (_Rd_ == _Rs_) + { xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].SL[0]]); xMOV(edx, ptr32[&cpuRegs.GPR.r[_Rt_].SL[1]]); xSUB(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax); xSBB(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx); - } else { + } + else + { xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rs_].SL[0]]); xMOV(edx, ptr32[&cpuRegs.GPR.r[_Rs_].SL[1]]); xSUB(eax, ptr32[&cpuRegs.GPR.r[_Rt_].SL[0]]); @@ -317,7 +339,7 @@ void recDSUB_(int info) } } -EERECOMPILE_CODE0(DSUB, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED); +EERECOMPILE_CODE0(DSUB, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); //// DSUBU void recDSUBU(void) @@ -333,17 +355,23 @@ void recAND_const() void recAND_constv(int info, int creg, u32 vreg) { - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); GPR_reg64 cval = g_cpuConstRegs[creg]; - for (int i = 0; i < 2; i++) { - if (!cval.UL[i]) { + for (int i = 0; i < 2; i++) + { + if (!cval.UL[i]) + { xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], 0); - } else if (_Rd_ == vreg) { + } + else if (_Rd_ == vreg) + { if (cval.SL[i] != -1) xAND(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], cval.UL[i]); - } else { + } + else + { xMOV(eax, ptr32[&cpuRegs.GPR.r[vreg].UL[i]]); if (cval.SL[i] != -1) xAND(eax, cval.UL[i]); @@ -364,19 +392,23 @@ void recAND_constt(int info) void recAND_(int info) { - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); u32 rs = _Rs_, rt = _Rt_; if (_Rd_ == _Rt_) rs = _Rt_, rt = _Rs_; - for (int i = 0; i < 2; i++) { - if (_Rd_ == rs) { + for (int i = 0; i < 2; i++) + { + if (_Rd_ == rs) + { if (rs == rt) continue; xMOV(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]); xAND(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax); - } else { + } + else + { xMOV(eax, ptr32[&cpuRegs.GPR.r[rs].UL[i]]); if (rs != rt) xAND(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]); @@ -385,7 +417,7 @@ void recAND_(int info) } } -EERECOMPILE_CODE0(AND, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED); +EERECOMPILE_CODE0(AND, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); //// OR void recOR_const() @@ -395,17 +427,23 @@ void recOR_const() void recOR_constv(int info, int creg, u32 vreg) { - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); GPR_reg64 cval = g_cpuConstRegs[creg]; - for (int i = 0; i < 2; i++) { - if (cval.SL[i] == -1) { + for (int i = 0; i < 2; i++) + { + if (cval.SL[i] == -1) + { xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], -1); - } else if (_Rd_ == vreg) { + } + else if (_Rd_ == vreg) + { if (cval.UL[i]) xOR(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], cval.UL[i]); - } else { + } + else + { xMOV(eax, ptr32[&cpuRegs.GPR.r[vreg].UL[i]]); if (cval.UL[i]) xOR(eax, cval.UL[i]); @@ -426,19 +464,23 @@ void recOR_constt(int info) void recOR_(int info) { - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); u32 rs = _Rs_, rt = _Rt_; if (_Rd_ == _Rt_) rs = _Rt_, rt = _Rs_; - for (int i = 0; i < 2; i++) { - if (_Rd_ == rs) { + for (int i = 0; i < 2; i++) + { + if (_Rd_ == rs) + { if (rs == rt) continue; xMOV(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]); xOR(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax); - } else { + } + else + { xMOV(eax, ptr32[&cpuRegs.GPR.r[rs].UL[i]]); if (rs != rt) xOR(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]); @@ -447,7 +489,7 @@ void recOR_(int info) } } -EERECOMPILE_CODE0(OR, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED); +EERECOMPILE_CODE0(OR, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); //// XOR void recXOR_const() @@ -457,15 +499,19 @@ void recXOR_const() void recXOR_constv(int info, int creg, u32 vreg) { - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); GPR_reg64 cval = g_cpuConstRegs[creg]; - for (int i = 0; i < 2; i++) { - if (_Rd_ == vreg) { + for (int i = 0; i < 2; i++) + { + if (_Rd_ == vreg) + { if (cval.UL[i]) xXOR(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], cval.UL[i]); - } else { + } + else + { xMOV(eax, ptr32[&cpuRegs.GPR.r[vreg].UL[i]]); if (cval.UL[i]) xXOR(eax, cval.UL[i]); @@ -486,19 +532,25 @@ void recXOR_constt(int info) void recXOR_(int info) { - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); u32 rs = _Rs_, rt = _Rt_; if (_Rd_ == _Rt_) rs = _Rt_, rt = _Rs_; - for (int i = 0; i < 2; i++) { - if (rs == rt) { + for (int i = 0; i < 2; i++) + { + if (rs == rt) + { xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], 0); - } else if (_Rd_ == rs) { + } + else if (_Rd_ == rs) + { xMOV(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]); xXOR(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax); - } else { + } + else + { xMOV(eax, ptr32[&cpuRegs.GPR.r[rs].UL[i]]); xXOR(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax); @@ -506,26 +558,30 @@ void recXOR_(int info) } } -EERECOMPILE_CODE0(XOR, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED); +EERECOMPILE_CODE0(XOR, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); //// NOR void recNOR_const() { - g_cpuConstRegs[_Rd_].UD[0] =~(g_cpuConstRegs[_Rs_].UD[0] | g_cpuConstRegs[_Rt_].UD[0]); + g_cpuConstRegs[_Rd_].UD[0] = ~(g_cpuConstRegs[_Rs_].UD[0] | g_cpuConstRegs[_Rt_].UD[0]); } void recNOR_constv(int info, int creg, u32 vreg) { - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); GPR_reg64 cval = g_cpuConstRegs[creg]; - for (int i = 0; i < 2; i++) { - if (_Rd_ == vreg) { + for (int i = 0; i < 2; i++) + { + if (_Rd_ == vreg) + { if (cval.UL[i]) xOR(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], cval.UL[i]); xNOT(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]]); - } else { + } + else + { xMOV(eax, ptr32[&cpuRegs.GPR.r[vreg].UL[i]]); if (cval.UL[i]) xOR(eax, cval.UL[i]); @@ -547,22 +603,27 @@ void recNOR_constt(int info) void recNOR_(int info) { - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); u32 rs = _Rs_, rt = _Rt_; if (_Rd_ == _Rt_) rs = _Rt_, rt = _Rs_; - for (int i = 0; i < 2; i++) { - if (_Rd_ == rs) { - if (rs == rt) { + for (int i = 0; i < 2; i++) + { + if (_Rd_ == rs) + { + if (rs == rt) + { xNOT(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]]); continue; } xMOV(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]); xOR(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax); xNOT(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]]); - } else { + } + else + { xMOV(eax, ptr32[&cpuRegs.GPR.r[rs].UL[i]]); if (rs != rt) xOR(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]); @@ -572,7 +633,7 @@ void recNOR_(int info) } } -EERECOMPILE_CODE0(NOR, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED); +EERECOMPILE_CODE0(NOR, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); //// SLT - test with silent hill, lemans void recSLT_const() @@ -582,7 +643,7 @@ void recSLT_const() void recSLTs_const(int info, int sign, int st) { - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); GPR_reg64 cval = g_cpuConstRegs[st ? _Rt_ : _Rs_]; @@ -607,7 +668,7 @@ void recSLTs_const(int info, int sign, int st) void recSLTs_(int info, int sign) { - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); xMOV(eax, 1); @@ -645,7 +706,7 @@ void recSLT_(int info) recSLTs_(info, 1); } -EERECOMPILE_CODE0(SLT, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED); +EERECOMPILE_CODE0(SLT, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); // SLTU - test with silent hill, lemans void recSLTU_const() @@ -668,8 +729,10 @@ void recSLTU_(int info) recSLTs_(info, 0); } -EERECOMPILE_CODE0(SLTU, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED); +EERECOMPILE_CODE0(SLTU, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); #endif -} } } +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 diff --git a/pcsx2/x86/ix86-32/iR5900AritImm.cpp b/pcsx2/x86/ix86-32/iR5900AritImm.cpp index b38cb202eb..bbf3b484f2 100644 --- a/pcsx2/x86/ix86-32/iR5900AritImm.cpp +++ b/pcsx2/x86/ix86-32/iR5900AritImm.cpp @@ -24,8 +24,7 @@ using namespace x86Emitter; namespace R5900 { namespace Dynarec { -namespace OpcodeImpl -{ +namespace OpcodeImpl { /********************************************************* * Arithmetic with immediate operand * @@ -36,42 +35,45 @@ namespace OpcodeImpl namespace Interp = R5900::Interpreter::OpcodeImpl; -REC_FUNC_DEL(ADDI, _Rt_); -REC_FUNC_DEL(ADDIU, _Rt_); -REC_FUNC_DEL(DADDI, _Rt_); +REC_FUNC_DEL(ADDI, _Rt_); +REC_FUNC_DEL(ADDIU, _Rt_); +REC_FUNC_DEL(DADDI, _Rt_); REC_FUNC_DEL(DADDIU, _Rt_); -REC_FUNC_DEL(ANDI, _Rt_); -REC_FUNC_DEL(ORI, _Rt_); -REC_FUNC_DEL(XORI, _Rt_); +REC_FUNC_DEL(ANDI, _Rt_); +REC_FUNC_DEL(ORI, _Rt_); +REC_FUNC_DEL(XORI, _Rt_); -REC_FUNC_DEL(SLTI, _Rt_); -REC_FUNC_DEL(SLTIU, _Rt_); +REC_FUNC_DEL(SLTI, _Rt_); +REC_FUNC_DEL(SLTIU, _Rt_); #else //// ADDI -void recADDI_const( void ) +void recADDI_const(void) { g_cpuConstRegs[_Rt_].SD[0] = (s64)(g_cpuConstRegs[_Rs_].SL[0] + (s32)_Imm_); } void recADDI_(int info) { - pxAssert( !(info&PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); - if ( _Rt_ == _Rs_ ) { + if (_Rt_ == _Rs_) + { // must perform the ADD unconditionally, to maintain flags status: - xADD(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], _Imm_); - _signExtendSFtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]); + xADD(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], _Imm_); + _signExtendSFtoM((uptr)&cpuRegs.GPR.r[_Rt_].UL[1]); } - else { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); + else + { + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); - if ( _Imm_ != 0 ) xADD(eax, _Imm_ ); + if (_Imm_ != 0) + xADD(eax, _Imm_); - xCDQ( ); - xMOV(ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], edx); + xCDQ(); + xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); } } @@ -91,26 +93,28 @@ void recDADDI_const() void recDADDI_(int info) { - pxAssert( !(info&PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); - if( _Rt_ == _Rs_ ) { - xADD(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], _Imm_); - xADC(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], _Imm_<0?0xffffffff:0); + if (_Rt_ == _Rs_) + { + xADD(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], _Imm_); + xADC(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], _Imm_ < 0 ? 0xffffffff : 0); } - else { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); + else + { + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); - xMOV(edx, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ] ]); + xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); - if ( _Imm_ != 0 ) + if (_Imm_ != 0) { - xADD(eax, _Imm_ ); - xADC(edx, _Imm_ < 0?0xffffffff:0); + xADD(eax, _Imm_); + xADC(edx, _Imm_ < 0 ? 0xffffffff : 0); } - xMOV(ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], edx); + xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); } } @@ -135,11 +139,11 @@ void recSLTIU_(int info) { xMOV(eax, 1); - xCMP(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ]], _Imm_ >= 0 ? 0 : 0xffffffff); - j8Ptr[0] = JB8( 0 ); - j8Ptr[2] = JA8( 0 ); + xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], _Imm_ >= 0 ? 0 : 0xffffffff); + j8Ptr[0] = JB8(0); + j8Ptr[2] = JA8(0); - xCMP(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]], (s32)_Imm_ ); + xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], (s32)_Imm_); j8Ptr[1] = JB8(0); x86SetJ8(j8Ptr[2]); @@ -148,8 +152,8 @@ void recSLTIU_(int info) x86SetJ8(j8Ptr[0]); x86SetJ8(j8Ptr[1]); - xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax); - xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], 0 ); + xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); + xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0); } EERECOMPILE_CODEX(eeRecompileCode1, SLTIU); @@ -165,11 +169,11 @@ void recSLTI_(int info) // test silent hill if modding xMOV(eax, 1); - xCMP(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ]], _Imm_ >= 0 ? 0 : 0xffffffff); - j8Ptr[0] = JL8( 0 ); - j8Ptr[2] = JG8( 0 ); + xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], _Imm_ >= 0 ? 0 : 0xffffffff); + j8Ptr[0] = JL8(0); + j8Ptr[2] = JG8(0); - xCMP(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]], (s32)_Imm_ ); + xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], (s32)_Imm_); j8Ptr[1] = JB8(0); x86SetJ8(j8Ptr[2]); @@ -178,8 +182,8 @@ void recSLTI_(int info) x86SetJ8(j8Ptr[0]); x86SetJ8(j8Ptr[1]); - xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax); - xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], 0 ); + xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); + xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0); } EERECOMPILE_CODEX(eeRecompileCode1, SLTI); @@ -192,49 +196,57 @@ void recANDI_const() void recLogicalOpI(int info, int op) { - if ( _ImmU_ != 0 ) + if (_ImmU_ != 0) { - if( _Rt_ == _Rs_ ) { - switch(op) { + if (_Rt_ == _Rs_) + { + switch (op) + { case 0: xAND(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], _ImmU_); break; case 1: xOR(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], _ImmU_); break; case 2: xXOR(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], _ImmU_); break; default: pxAssert(0); } } - else { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); - if( op != 0 ) - xMOV(edx, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ] ]); + else + { + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + if (op != 0) + xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); - switch(op) { + switch (op) + { case 0: xAND(eax, _ImmU_); break; case 1: xOR(eax, _ImmU_); break; case 2: xXOR(eax, _ImmU_); break; default: pxAssert(0); } - if( op != 0 ) - xMOV(ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], edx); - xMOV(ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax); + if (op != 0) + xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); + xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); } - if( op == 0 ) { - xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], 0 ); + if (op == 0) + { + xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0); } } else { - if( op == 0 ) { - xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], 0 ); - xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], 0 ); + if (op == 0) + { + xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], 0); + xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0); } - else { - if( _Rt_ != _Rs_ ) { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); - xMOV(edx, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ] ]); - xMOV(ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], edx); + else + { + if (_Rt_ != _Rs_) + { + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); + xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); } } } @@ -275,4 +287,6 @@ EERECOMPILE_CODEX(eeRecompileCode1, XORI); #endif -} } } +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 diff --git a/pcsx2/x86/ix86-32/iR5900Branch.cpp b/pcsx2/x86/ix86-32/iR5900Branch.cpp index 2877ad5bf8..424d6aeaec 100644 --- a/pcsx2/x86/ix86-32/iR5900Branch.cpp +++ b/pcsx2/x86/ix86-32/iR5900Branch.cpp @@ -26,8 +26,7 @@ using namespace x86Emitter; namespace R5900 { namespace Dynarec { -namespace OpcodeImpl -{ +namespace OpcodeImpl { /********************************************************* * Register branch logic * @@ -47,27 +46,31 @@ REC_SYS(BLEZ); REC_SYS(BGEZ); REC_SYS(BGTZL); REC_SYS(BLTZL); -REC_SYS_DEL(BLTZAL, 31); +REC_SYS_DEL(BLTZAL, 31); REC_SYS_DEL(BLTZALL, 31); REC_SYS(BLEZL); REC_SYS(BGEZL); -REC_SYS_DEL(BGEZAL, 31); +REC_SYS_DEL(BGEZAL, 31); REC_SYS_DEL(BGEZALL, 31); #else void recSetBranchEQ(int info, int bne, int process) { - if( info & PROCESS_EE_XMM ) { + if (info & PROCESS_EE_XMM) + { int t0reg; - if( process & PROCESS_CONSTS ) { - if( (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_) ) { + if (process & PROCESS_CONSTS) + { + if ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_)) + { _deleteGPRtoXMMreg(_Rt_, 1); xmmregs[EEREC_T].inuse = 0; t0reg = EEREC_T; } - else { + else + { t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVQZX(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); } @@ -76,15 +79,19 @@ void recSetBranchEQ(int info, int bne, int process) xPCMP.EQD(xRegisterSSE(t0reg), ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); - if( t0reg != EEREC_T ) _freeXMMreg(t0reg); + if (t0reg != EEREC_T) + _freeXMMreg(t0reg); } - else if( process & PROCESS_CONSTT ) { - if( (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_) ) { + else if (process & PROCESS_CONSTT) + { + if ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_)) + { _deleteGPRtoXMMreg(_Rs_, 1); xmmregs[EEREC_S].inuse = 0; t0reg = EEREC_S; } - else { + else + { t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVQZX(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); } @@ -92,29 +99,35 @@ void recSetBranchEQ(int info, int bne, int process) _flushConstReg(_Rt_); xPCMP.EQD(xRegisterSSE(t0reg), ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); - if( t0reg != EEREC_S ) _freeXMMreg(t0reg); + if (t0reg != EEREC_S) + _freeXMMreg(t0reg); } - else { + else + { - if( (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_) ) { + if ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_)) + { _deleteGPRtoXMMreg(_Rs_, 1); xmmregs[EEREC_S].inuse = 0; t0reg = EEREC_S; xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); } - else if( (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_) ) { + else if ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_)) + { _deleteGPRtoXMMreg(_Rt_, 1); xmmregs[EEREC_T].inuse = 0; t0reg = EEREC_T; xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); } - else { + else + { t0reg = _allocTempXMMreg(XMMT_INT, -1); xMOVQZX(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); } - if( t0reg != EEREC_S && t0reg != EEREC_T ) _freeXMMreg(t0reg); + if (t0reg != EEREC_S && t0reg != EEREC_T) + _freeXMMreg(t0reg); } xMOVMSKPS(eax, xRegisterSSE(t0reg)); @@ -122,66 +135,77 @@ void recSetBranchEQ(int info, int bne, int process) _eeFlushAllUnused(); xAND(al, 3); - xCMP(al, 0x3 ); + xCMP(al, 0x3); - if( bne ) j32Ptr[ 1 ] = JE32( 0 ); - else j32Ptr[ 0 ] = j32Ptr[ 1 ] = JNE32( 0 ); + if (bne) + j32Ptr[1] = JE32(0); + else + j32Ptr[0] = j32Ptr[1] = JNE32(0); } - else { + else + { _eeFlushAllUnused(); - if( bne ) { - if( process & PROCESS_CONSTS ) { - xCMP(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], g_cpuConstRegs[_Rs_].UL[0] ); - j8Ptr[ 0 ] = JNE8( 0 ); + if (bne) + { + if (process & PROCESS_CONSTS) + { + xCMP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], g_cpuConstRegs[_Rs_].UL[0]); + j8Ptr[0] = JNE8(0); - xCMP(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], g_cpuConstRegs[_Rs_].UL[1] ); - j32Ptr[ 1 ] = JE32( 0 ); + xCMP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], g_cpuConstRegs[_Rs_].UL[1]); + j32Ptr[1] = JE32(0); } - else if( process & PROCESS_CONSTT ) { - xCMP(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]], g_cpuConstRegs[_Rt_].UL[0] ); - j8Ptr[ 0 ] = JNE8( 0 ); + else if (process & PROCESS_CONSTT) + { + xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], g_cpuConstRegs[_Rt_].UL[0]); + j8Ptr[0] = JNE8(0); - xCMP(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ]], g_cpuConstRegs[_Rt_].UL[1] ); - j32Ptr[ 1 ] = JE32( 0 ); + xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], g_cpuConstRegs[_Rt_].UL[1]); + j32Ptr[1] = JE32(0); } - else { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); - xCMP(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); - j8Ptr[ 0 ] = JNE8( 0 ); + else + { + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + xCMP(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); + j8Ptr[0] = JNE8(0); - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ] ]); - xCMP(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ] ]); - j32Ptr[ 1 ] = JE32( 0 ); + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); + xCMP(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]); + j32Ptr[1] = JE32(0); } - x86SetJ8( j8Ptr[0] ); + x86SetJ8(j8Ptr[0]); } - else { + else + { // beq - if( process & PROCESS_CONSTS ) { - xCMP(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], g_cpuConstRegs[_Rs_].UL[0] ); - j32Ptr[ 0 ] = JNE32( 0 ); + if (process & PROCESS_CONSTS) + { + xCMP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], g_cpuConstRegs[_Rs_].UL[0]); + j32Ptr[0] = JNE32(0); - xCMP(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], g_cpuConstRegs[_Rs_].UL[1] ); - j32Ptr[ 1 ] = JNE32( 0 ); + xCMP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], g_cpuConstRegs[_Rs_].UL[1]); + j32Ptr[1] = JNE32(0); } - else if( process & PROCESS_CONSTT ) { - xCMP(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]], g_cpuConstRegs[_Rt_].UL[0] ); - j32Ptr[ 0 ] = JNE32( 0 ); + else if (process & PROCESS_CONSTT) + { + xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], g_cpuConstRegs[_Rt_].UL[0]); + j32Ptr[0] = JNE32(0); - xCMP(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ]], g_cpuConstRegs[_Rt_].UL[1] ); - j32Ptr[ 1 ] = JNE32( 0 ); + xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], g_cpuConstRegs[_Rt_].UL[1]); + j32Ptr[1] = JNE32(0); } - else { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); - xCMP(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); - j32Ptr[ 0 ] = JNE32( 0 ); + else + { + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + xCMP(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); + j32Ptr[0] = JNE32(0); - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ] ]); - xCMP(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ] ]); - j32Ptr[ 1 ] = JNE32( 0 ); + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); + xCMP(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]); + j32Ptr[1] = JNE32(0); } } } @@ -193,22 +217,27 @@ void recSetBranchL(int ltz) { int regs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ); - if( regs >= 0 ) { + if (regs >= 0) + { xMOVMSKPS(eax, xRegisterSSE(regs)); _eeFlushAllUnused(); - xTEST(al, 2 ); + xTEST(al, 2); - if( ltz ) j32Ptr[ 0 ] = JZ32( 0 ); - else j32Ptr[ 0 ] = JNZ32( 0 ); + if (ltz) + j32Ptr[0] = JZ32(0); + else + j32Ptr[0] = JNZ32(0); return; } - xCMP(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ]], 0 ); - if( ltz ) j32Ptr[ 0 ] = JGE32( 0 ); - else j32Ptr[ 0 ] = JL32( 0 ); + xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], 0); + if (ltz) + j32Ptr[0] = JGE32(0); + else + j32Ptr[0] = JL32(0); _clearNeededXMMregs(); } @@ -218,23 +247,23 @@ void recBEQ_const() { u32 branchTo; - if( g_cpuConstRegs[_Rs_].SD[0] == g_cpuConstRegs[_Rt_].SD[0] ) + if (g_cpuConstRegs[_Rs_].SD[0] == g_cpuConstRegs[_Rt_].SD[0]) branchTo = ((s32)_Imm_ * 4) + pc; else - branchTo = pc+4; + branchTo = pc + 4; recompileNextInstruction(1); - SetBranchImm( branchTo ); + SetBranchImm(branchTo); } void recBEQ_process(int info, int process) { u32 branchTo = ((s32)_Imm_ * 4) + pc; - if ( _Rs_ == _Rt_ ) + if (_Rs_ == _Rt_) { recompileNextInstruction(1); - SetBranchImm( branchTo ); + SetBranchImm(branchTo); } else { @@ -245,8 +274,8 @@ void recBEQ_process(int info, int process) SetBranchImm(branchTo); - x86SetJ32( j32Ptr[ 0 ] ); - x86SetJ32( j32Ptr[ 1 ] ); + x86SetJ32(j32Ptr[0]); + x86SetJ32(j32Ptr[1]); // recopy the next inst pc -= 4; @@ -261,27 +290,27 @@ void recBEQ_(int info) { recBEQ_process(info, 0); } void recBEQ_consts(int info) { recBEQ_process(info, PROCESS_CONSTS); } void recBEQ_constt(int info) { recBEQ_process(info, PROCESS_CONSTT); } -EERECOMPILE_CODE0(BEQ, XMMINFO_READS|XMMINFO_READT); +EERECOMPILE_CODE0(BEQ, XMMINFO_READS | XMMINFO_READT); //// BNE void recBNE_const() { u32 branchTo; - if( g_cpuConstRegs[_Rs_].SD[0] != g_cpuConstRegs[_Rt_].SD[0] ) + if (g_cpuConstRegs[_Rs_].SD[0] != g_cpuConstRegs[_Rt_].SD[0]) branchTo = ((s32)_Imm_ * 4) + pc; else - branchTo = pc+4; + branchTo = pc + 4; recompileNextInstruction(1); - SetBranchImm( branchTo ); + SetBranchImm(branchTo); } void recBNE_process(int info, int process) { u32 branchTo = ((s32)_Imm_ * 4) + pc; - if ( _Rs_ == _Rt_ ) + if (_Rs_ == _Rt_) { recompileNextInstruction(1); SetBranchImm(pc); @@ -295,7 +324,7 @@ void recBNE_process(int info, int process) SetBranchImm(branchTo); - x86SetJ32( j32Ptr[ 1 ] ); + x86SetJ32(j32Ptr[1]); // recopy the next inst pc -= 4; @@ -309,18 +338,20 @@ void recBNE_(int info) { recBNE_process(info, 0); } void recBNE_consts(int info) { recBNE_process(info, PROCESS_CONSTS); } void recBNE_constt(int info) { recBNE_process(info, PROCESS_CONSTT); } -EERECOMPILE_CODE0(BNE, XMMINFO_READS|XMMINFO_READT); +EERECOMPILE_CODE0(BNE, XMMINFO_READS | XMMINFO_READT); //// BEQL void recBEQL_const() { - if( g_cpuConstRegs[_Rs_].SD[0] == g_cpuConstRegs[_Rt_].SD[0] ) { + if (g_cpuConstRegs[_Rs_].SD[0] == g_cpuConstRegs[_Rt_].SD[0]) + { u32 branchTo = ((s32)_Imm_ * 4) + pc; recompileNextInstruction(1); - SetBranchImm( branchTo ); + SetBranchImm(branchTo); } - else { - SetBranchImm( pc+4 ); + else + { + SetBranchImm(pc + 4); } } @@ -333,8 +364,8 @@ void recBEQL_process(int info, int process) recompileNextInstruction(1); SetBranchImm(branchTo); - x86SetJ32( j32Ptr[ 0 ] ); - x86SetJ32( j32Ptr[ 1 ] ); + x86SetJ32(j32Ptr[0]); + x86SetJ32(j32Ptr[1]); LoadBranchState(); SetBranchImm(pc); @@ -344,18 +375,20 @@ void recBEQL_(int info) { recBEQL_process(info, 0); } void recBEQL_consts(int info) { recBEQL_process(info, PROCESS_CONSTS); } void recBEQL_constt(int info) { recBEQL_process(info, PROCESS_CONSTT); } -EERECOMPILE_CODE0(BEQL, XMMINFO_READS|XMMINFO_READT); +EERECOMPILE_CODE0(BEQL, XMMINFO_READS | XMMINFO_READT); //// BNEL void recBNEL_const() { - if( g_cpuConstRegs[_Rs_].SD[0] != g_cpuConstRegs[_Rt_].SD[0] ) { + if (g_cpuConstRegs[_Rs_].SD[0] != g_cpuConstRegs[_Rt_].SD[0]) + { u32 branchTo = ((s32)_Imm_ * 4) + pc; recompileNextInstruction(1); SetBranchImm(branchTo); } - else { - SetBranchImm( pc+4 ); + else + { + SetBranchImm(pc + 4); } } @@ -366,10 +399,10 @@ void recBNEL_process(int info, int process) recSetBranchEQ(info, 0, process); SaveBranchState(); - SetBranchImm(pc+4); + SetBranchImm(pc + 4); - x86SetJ32( j32Ptr[ 0 ] ); - x86SetJ32( j32Ptr[ 1 ] ); + x86SetJ32(j32Ptr[0]); + x86SetJ32(j32Ptr[1]); // recopy the next inst LoadBranchState(); @@ -381,7 +414,7 @@ void recBNEL_(int info) { recBNEL_process(info, 0); } void recBNEL_consts(int info) { recBNEL_process(info, PROCESS_CONSTS); } void recBNEL_constt(int info) { recBNEL_process(info, PROCESS_CONSTT); } -EERECOMPILE_CODE0(BNEL, XMMINFO_READS|XMMINFO_READT); +EERECOMPILE_CODE0(BNEL, XMMINFO_READS | XMMINFO_READT); /********************************************************* * Register branch logic * @@ -411,15 +444,16 @@ void recBLTZAL() _eeFlushAllUnused(); _deleteEEreg(31, 0); - xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc+4); + xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4); xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0); - if( GPR_IS_CONST1(_Rs_) ) { - if( !(g_cpuConstRegs[_Rs_].SD[0] < 0) ) - branchTo = pc+4; + if (GPR_IS_CONST1(_Rs_)) + { + if (!(g_cpuConstRegs[_Rs_].SD[0] < 0)) + branchTo = pc + 4; recompileNextInstruction(1); - SetBranchImm( branchTo ); + SetBranchImm(branchTo); return; } @@ -431,7 +465,7 @@ void recBLTZAL() SetBranchImm(branchTo); - x86SetJ32( j32Ptr[ 0 ] ); + x86SetJ32(j32Ptr[0]); // recopy the next inst pc -= 4; @@ -452,15 +486,16 @@ void recBGEZAL() _eeFlushAllUnused(); _deleteEEreg(31, 0); - xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc+4); + xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4); xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0); - if( GPR_IS_CONST1(_Rs_) ) { - if( !(g_cpuConstRegs[_Rs_].SD[0] >= 0) ) - branchTo = pc+4; + if (GPR_IS_CONST1(_Rs_)) + { + if (!(g_cpuConstRegs[_Rs_].SD[0] >= 0)) + branchTo = pc + 4; recompileNextInstruction(1); - SetBranchImm( branchTo ); + SetBranchImm(branchTo); return; } @@ -472,7 +507,7 @@ void recBGEZAL() SetBranchImm(branchTo); - x86SetJ32( j32Ptr[ 0 ] ); + x86SetJ32(j32Ptr[0]); // recopy the next inst pc -= 4; @@ -493,15 +528,17 @@ void recBLTZALL() _eeFlushAllUnused(); _deleteEEreg(31, 0); - xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc+4); + xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4); xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0); - if( GPR_IS_CONST1(_Rs_) ) { - if( !(g_cpuConstRegs[_Rs_].SD[0] < 0) ) - SetBranchImm( pc + 4); - else { + if (GPR_IS_CONST1(_Rs_)) + { + if (!(g_cpuConstRegs[_Rs_].SD[0] < 0)) + SetBranchImm(pc + 4); + else + { recompileNextInstruction(1); - SetBranchImm( branchTo ); + SetBranchImm(branchTo); } return; } @@ -512,7 +549,7 @@ void recBLTZALL() recompileNextInstruction(1); SetBranchImm(branchTo); - x86SetJ32( j32Ptr[ 0 ] ); + x86SetJ32(j32Ptr[0]); LoadBranchState(); SetBranchImm(pc); @@ -529,15 +566,17 @@ void recBGEZALL() _eeFlushAllUnused(); _deleteEEreg(31, 0); - xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc+4); + xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4); xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0); - if( GPR_IS_CONST1(_Rs_) ) { - if( !(g_cpuConstRegs[_Rs_].SD[0] >= 0) ) - SetBranchImm( pc + 4); - else { + if (GPR_IS_CONST1(_Rs_)) + { + if (!(g_cpuConstRegs[_Rs_].SD[0] >= 0)) + SetBranchImm(pc + 4); + else + { recompileNextInstruction(1); - SetBranchImm( branchTo ); + SetBranchImm(branchTo); } return; } @@ -548,7 +587,7 @@ void recBGEZALL() recompileNextInstruction(1); SetBranchImm(branchTo); - x86SetJ32( j32Ptr[ 0 ] ); + x86SetJ32(j32Ptr[0]); LoadBranchState(); SetBranchImm(pc); @@ -564,25 +603,26 @@ void recBLEZ() _eeFlushAllUnused(); - if( GPR_IS_CONST1(_Rs_) ) { - if( !(g_cpuConstRegs[_Rs_].SD[0] <= 0) ) - branchTo = pc+4; + if (GPR_IS_CONST1(_Rs_)) + { + if (!(g_cpuConstRegs[_Rs_].SD[0] <= 0)) + branchTo = pc + 4; recompileNextInstruction(1); - SetBranchImm( branchTo ); + SetBranchImm(branchTo); return; } _flushEEreg(_Rs_); - xCMP(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ]], 0 ); - j8Ptr[ 0 ] = JL8( 0 ); - j32Ptr[ 1 ] = JG32( 0 ); + xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], 0); + j8Ptr[0] = JL8(0); + j32Ptr[1] = JG32(0); - xCMP(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]], 0 ); - j32Ptr[ 2 ] = JNZ32( 0 ); + xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], 0); + j32Ptr[2] = JNZ32(0); - x86SetJ8( j8Ptr[ 0 ] ); + x86SetJ8(j8Ptr[0]); _clearNeededXMMregs(); @@ -591,8 +631,8 @@ void recBLEZ() SetBranchImm(branchTo); - x86SetJ32( j32Ptr[ 1 ] ); - x86SetJ32( j32Ptr[ 2 ] ); + x86SetJ32(j32Ptr[1]); + x86SetJ32(j32Ptr[2]); // recopy the next inst pc -= 4; @@ -611,25 +651,26 @@ void recBGTZ() _eeFlushAllUnused(); - if( GPR_IS_CONST1(_Rs_) ) { - if( !(g_cpuConstRegs[_Rs_].SD[0] > 0) ) - branchTo = pc+4; + if (GPR_IS_CONST1(_Rs_)) + { + if (!(g_cpuConstRegs[_Rs_].SD[0] > 0)) + branchTo = pc + 4; recompileNextInstruction(1); - SetBranchImm( branchTo ); + SetBranchImm(branchTo); return; } _flushEEreg(_Rs_); - xCMP(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ]], 0 ); - j8Ptr[ 0 ] = JG8( 0 ); - j32Ptr[ 1 ] = JL32( 0 ); + xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], 0); + j8Ptr[0] = JG8(0); + j32Ptr[1] = JL32(0); - xCMP(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]], 0 ); - j32Ptr[ 2 ] = JZ32( 0 ); + xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], 0); + j32Ptr[2] = JZ32(0); - x86SetJ8( j8Ptr[ 0 ] ); + x86SetJ8(j8Ptr[0]); _clearNeededXMMregs(); @@ -638,8 +679,8 @@ void recBGTZ() SetBranchImm(branchTo); - x86SetJ32( j32Ptr[ 1 ] ); - x86SetJ32( j32Ptr[ 2 ] ); + x86SetJ32(j32Ptr[1]); + x86SetJ32(j32Ptr[2]); // recopy the next inst pc -= 4; @@ -658,12 +699,13 @@ void recBLTZ() _eeFlushAllUnused(); - if( GPR_IS_CONST1(_Rs_) ) { - if( !(g_cpuConstRegs[_Rs_].SD[0] < 0) ) - branchTo = pc+4; + if (GPR_IS_CONST1(_Rs_)) + { + if (!(g_cpuConstRegs[_Rs_].SD[0] < 0)) + branchTo = pc + 4; recompileNextInstruction(1); - SetBranchImm( branchTo ); + SetBranchImm(branchTo); return; } @@ -674,7 +716,7 @@ void recBLTZ() SetBranchImm(branchTo); - x86SetJ32( j32Ptr[ 0 ] ); + x86SetJ32(j32Ptr[0]); // recopy the next inst pc -= 4; @@ -693,12 +735,13 @@ void recBGEZ() _eeFlushAllUnused(); - if( GPR_IS_CONST1(_Rs_) ) { - if( !(g_cpuConstRegs[_Rs_].SD[0] >= 0) ) - branchTo = pc+4; + if (GPR_IS_CONST1(_Rs_)) + { + if (!(g_cpuConstRegs[_Rs_].SD[0] >= 0)) + branchTo = pc + 4; recompileNextInstruction(1); - SetBranchImm( branchTo ); + SetBranchImm(branchTo); return; } @@ -709,7 +752,7 @@ void recBGEZ() SetBranchImm(branchTo); - x86SetJ32( j32Ptr[ 0 ] ); + x86SetJ32(j32Ptr[0]); // recopy the next inst pc -= 4; @@ -728,12 +771,14 @@ void recBLTZL() _eeFlushAllUnused(); - if( GPR_IS_CONST1(_Rs_) ) { - if( !(g_cpuConstRegs[_Rs_].SD[0] < 0) ) - SetBranchImm( pc + 4); - else { + if (GPR_IS_CONST1(_Rs_)) + { + if (!(g_cpuConstRegs[_Rs_].SD[0] < 0)) + SetBranchImm(pc + 4); + else + { recompileNextInstruction(1); - SetBranchImm( branchTo ); + SetBranchImm(branchTo); } return; } @@ -744,7 +789,7 @@ void recBLTZL() recompileNextInstruction(1); SetBranchImm(branchTo); - x86SetJ32( j32Ptr[ 0 ] ); + x86SetJ32(j32Ptr[0]); LoadBranchState(); SetBranchImm(pc); @@ -760,12 +805,14 @@ void recBGEZL() _eeFlushAllUnused(); - if( GPR_IS_CONST1(_Rs_) ) { - if( !(g_cpuConstRegs[_Rs_].SD[0] >= 0) ) - SetBranchImm( pc + 4); - else { + if (GPR_IS_CONST1(_Rs_)) + { + if (!(g_cpuConstRegs[_Rs_].SD[0] >= 0)) + SetBranchImm(pc + 4); + else + { recompileNextInstruction(1); - SetBranchImm( branchTo ); + SetBranchImm(branchTo); } return; } @@ -776,7 +823,7 @@ void recBGEZL() recompileNextInstruction(1); SetBranchImm(branchTo); - x86SetJ32( j32Ptr[ 0 ] ); + x86SetJ32(j32Ptr[0]); LoadBranchState(); SetBranchImm(pc); @@ -784,7 +831,6 @@ void recBGEZL() - /********************************************************* * Register branch logic Likely * * Format: OP rs, offset * @@ -799,27 +845,29 @@ void recBLEZL() _eeFlushAllUnused(); - if( GPR_IS_CONST1(_Rs_) ) { - if( !(g_cpuConstRegs[_Rs_].SD[0] <= 0) ) - SetBranchImm( pc + 4); - else { + if (GPR_IS_CONST1(_Rs_)) + { + if (!(g_cpuConstRegs[_Rs_].SD[0] <= 0)) + SetBranchImm(pc + 4); + else + { _clearNeededXMMregs(); recompileNextInstruction(1); - SetBranchImm( branchTo ); + SetBranchImm(branchTo); } return; } _flushEEreg(_Rs_); - xCMP(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ]], 0 ); - j32Ptr[ 0 ] = JL32( 0 ); - j32Ptr[ 1 ] = JG32( 0 ); + xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], 0); + j32Ptr[0] = JL32(0); + j32Ptr[1] = JG32(0); - xCMP(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]], 0 ); - j32Ptr[ 2 ] = JNZ32( 0 ); + xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], 0); + j32Ptr[2] = JNZ32(0); - x86SetJ32( j32Ptr[ 0 ] ); + x86SetJ32(j32Ptr[0]); _clearNeededXMMregs(); @@ -827,8 +875,8 @@ void recBLEZL() recompileNextInstruction(1); SetBranchImm(branchTo); - x86SetJ32( j32Ptr[ 1 ] ); - x86SetJ32( j32Ptr[ 2 ] ); + x86SetJ32(j32Ptr[1]); + x86SetJ32(j32Ptr[2]); LoadBranchState(); SetBranchImm(pc); @@ -843,27 +891,29 @@ void recBGTZL() _eeFlushAllUnused(); - if( GPR_IS_CONST1(_Rs_) ) { - if( !(g_cpuConstRegs[_Rs_].SD[0] > 0) ) - SetBranchImm( pc + 4); - else { + if (GPR_IS_CONST1(_Rs_)) + { + if (!(g_cpuConstRegs[_Rs_].SD[0] > 0)) + SetBranchImm(pc + 4); + else + { _clearNeededXMMregs(); recompileNextInstruction(1); - SetBranchImm( branchTo ); + SetBranchImm(branchTo); } return; } _flushEEreg(_Rs_); - xCMP(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ]], 0 ); - j32Ptr[ 0 ] = JG32( 0 ); - j32Ptr[ 1 ] = JL32( 0 ); + xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], 0); + j32Ptr[0] = JG32(0); + j32Ptr[1] = JL32(0); - xCMP(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]], 0 ); - j32Ptr[ 2 ] = JZ32( 0 ); + xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], 0); + j32Ptr[2] = JZ32(0); - x86SetJ32( j32Ptr[ 0 ] ); + x86SetJ32(j32Ptr[0]); _clearNeededXMMregs(); @@ -871,8 +921,8 @@ void recBGTZL() recompileNextInstruction(1); SetBranchImm(branchTo); - x86SetJ32( j32Ptr[ 1 ] ); - x86SetJ32( j32Ptr[ 2 ] ); + x86SetJ32(j32Ptr[1]); + x86SetJ32(j32Ptr[2]); LoadBranchState(); SetBranchImm(pc); @@ -880,4 +930,6 @@ void recBGTZL() #endif -} } } +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 diff --git a/pcsx2/x86/ix86-32/iR5900Jump.cpp b/pcsx2/x86/ix86-32/iR5900Jump.cpp index a1693b6903..49597899da 100644 --- a/pcsx2/x86/ix86-32/iR5900Jump.cpp +++ b/pcsx2/x86/ix86-32/iR5900Jump.cpp @@ -26,8 +26,7 @@ using namespace x86Emitter; namespace R5900 { namespace Dynarec { -namespace OpcodeImpl -{ +namespace OpcodeImpl { /********************************************************* * Jump to target * @@ -50,7 +49,7 @@ void recJ() EE::Profiler.EmitOp(eeOpcode::J); // SET_FPUSTATE; - u32 newpc = (_InstrucTarget_ << 2) + ( pc & 0xf0000000 ); + u32 newpc = (_InstrucTarget_ << 2) + (pc & 0xf0000000); recompileNextInstruction(1); if (EmuConfig.Gamefixes.GoemonTlbHack) SetBranchImm(vtlb_V2P(newpc)); @@ -63,9 +62,9 @@ void recJAL() { EE::Profiler.EmitOp(eeOpcode::JAL); - u32 newpc = (_InstrucTarget_ << 2) + ( pc & 0xf0000000 ); + u32 newpc = (_InstrucTarget_ << 2) + (pc & 0xf0000000); _deleteEEreg(31, 0); - if(EE_CONST_PROP) + if (EE_CONST_PROP) { GPR_SET_CONST(31); g_cpuConstRegs[31].UL[0] = pc + 4; @@ -94,7 +93,7 @@ void recJR() { EE::Profiler.EmitOp(eeOpcode::JR); - SetBranchReg( _Rs_); + SetBranchReg(_Rs_); } //////////////////////////////////////////////////// @@ -106,32 +105,35 @@ void recJALR() _allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE); _eeMoveGPRtoR(calleeSavedReg2d, _Rs_); - if (EmuConfig.Gamefixes.GoemonTlbHack) { + if (EmuConfig.Gamefixes.GoemonTlbHack) + { xMOV(ecx, calleeSavedReg2d); vtlb_DynV2P(); xMOV(calleeSavedReg2d, eax); } // uncomment when there are NO instructions that need to call interpreter // int mmreg; -// if( GPR_IS_CONST1(_Rs_) ) -// xMOV(ptr32[&cpuRegs.pc], g_cpuConstRegs[_Rs_].UL[0] ); -// else { +// if (GPR_IS_CONST1(_Rs_)) +// xMOV(ptr32[&cpuRegs.pc], g_cpuConstRegs[_Rs_].UL[0]); +// else +// { // int mmreg; // -// if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0 ) { +// if ((mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0) +// { // xMOVSS(ptr[&cpuRegs.pc], xRegisterSSE(mmreg)); // } // else { -// xMOV(eax, ptr[(void*)((int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] )]); +// xMOV(eax, ptr[(void*)((int)&cpuRegs.GPR.r[_Rs_].UL[0])]); // xMOV(ptr[&cpuRegs.pc], eax); // } // } - - if ( _Rd_ ) + + if (_Rd_) { _deleteEEreg(_Rd_, 0); - if(EE_CONST_PROP) + if (EE_CONST_PROP) { GPR_SET_CONST(_Rd_); g_cpuConstRegs[_Rd_].UL[0] = newpc; @@ -147,12 +149,14 @@ void recJALR() _clearNeededXMMregs(); recompileNextInstruction(1); - if( x86regs[calleeSavedReg2d.GetId()].inuse ) { - pxAssert( x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK ); + if (x86regs[calleeSavedReg2d.GetId()].inuse) + { + pxAssert(x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK); xMOV(ptr[&cpuRegs.pc], calleeSavedReg2d); x86regs[calleeSavedReg2d.GetId()].inuse = 0; } - else { + else + { xMOV(eax, ptr[&g_recWriteback]); xMOV(ptr[&cpuRegs.pc], eax); } @@ -162,4 +166,6 @@ void recJALR() #endif -} } } +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp index 09b3f65052..27ac059790 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp @@ -38,18 +38,18 @@ namespace OpcodeImpl { namespace Interp = R5900::Interpreter::OpcodeImpl; -REC_FUNC_DEL(LB, _Rt_); +REC_FUNC_DEL(LB, _Rt_); REC_FUNC_DEL(LBU, _Rt_); -REC_FUNC_DEL(LH, _Rt_); +REC_FUNC_DEL(LH, _Rt_); REC_FUNC_DEL(LHU, _Rt_); -REC_FUNC_DEL(LW, _Rt_); +REC_FUNC_DEL(LW, _Rt_); REC_FUNC_DEL(LWU, _Rt_); REC_FUNC_DEL(LWL, _Rt_); REC_FUNC_DEL(LWR, _Rt_); -REC_FUNC_DEL(LD, _Rt_); +REC_FUNC_DEL(LD, _Rt_); REC_FUNC_DEL(LDR, _Rt_); REC_FUNC_DEL(LDL, _Rt_); -REC_FUNC_DEL(LQ, _Rt_); +REC_FUNC_DEL(LQ, _Rt_); REC_FUNC(SB); REC_FUNC(SH); REC_FUNC(SW); @@ -72,18 +72,23 @@ void _eeOnLoadWrite(u32 reg) { int regt; - if( !reg ) return; + if (!reg) + return; _eeOnWriteReg(reg, 1); regt = _checkXMMreg(XMMTYPE_GPRREG, reg, MODE_READ); - if( regt >= 0 ) { - if( xmmregs[regt].mode & MODE_WRITE ) { - if( reg != _Rs_ ) { + if (regt >= 0) + { + if (xmmregs[regt].mode & MODE_WRITE) + { + if (reg != _Rs_) + { xPUNPCK.HQDQ(xRegisterSSE(regt), xRegisterSSE(regt)); xMOVQ(ptr[&cpuRegs.GPR.r[reg].UL[2]], xRegisterSSE(regt)); } - else xMOVH.PS(ptr[&cpuRegs.GPR.r[reg].UL[2]], xRegisterSSE(regt)); + else + xMOVH.PS(ptr[&cpuRegs.GPR.r[reg].UL[2]], xRegisterSSE(regt)); } xmmregs[regt].inuse = 0; } @@ -95,9 +100,9 @@ __aligned16 u32 dummyValue[4]; ////////////////////////////////////////////////////////////////////////////////////////// // -void recLoad64( u32 bits, bool sign ) +void recLoad64(u32 bits, bool sign) { - pxAssume( bits == 64 || bits == 128 ); + pxAssume(bits == 64 || bits == 128); // Load arg2 with the destination. // 64/128 bit modes load the result directly into the cpuRegs.GPR struct. @@ -124,7 +129,7 @@ void recLoad64( u32 bits, bool sign ) _eeMoveGPRtoR(arg1regd, _Rs_); if (_Imm_ != 0) xADD(arg1regd, _Imm_); - if (bits == 128) // force 16 byte alignment on 128 bit reads + if (bits == 128) // force 16 byte alignment on 128 bit reads xAND(arg1regd, ~0x0F); _eeOnLoadWrite(_Rt_); @@ -137,9 +142,9 @@ void recLoad64( u32 bits, bool sign ) ////////////////////////////////////////////////////////////////////////////////////////// // -void recLoad32( u32 bits, bool sign ) +void recLoad32(u32 bits, bool sign) { - pxAssume( bits <= 32 ); + pxAssume(bits <= 32); // 8/16/32 bit modes return the loaded value in EAX. @@ -157,7 +162,7 @@ void recLoad32( u32 bits, bool sign ) // Load arg1 with the source memory address that we're reading from. _eeMoveGPRtoR(arg1regd, _Rs_); if (_Imm_ != 0) - xADD(arg1regd, _Imm_ ); + xADD(arg1regd, _Imm_); _eeOnLoadWrite(_Rt_); _deleteEEreg(_Rt_, 0); @@ -185,65 +190,65 @@ void recLoad32( u32 bits, bool sign ) void recStore(u32 bits) { - // Performance note: Const prop for the store address is good, always. - // Constprop for the value being stored is not really worthwhile (better to use register - // allocation -- simpler code and just as fast) + // Performance note: Const prop for the store address is good, always. + // Constprop for the value being stored is not really worthwhile (better to use register + // allocation -- simpler code and just as fast) - // Load EDX first with the value being written, or the address of the value - // being written (64/128 bit modes). + // Load EDX first with the value being written, or the address of the value + // being written (64/128 bit modes). - if (bits < 64) - { - _eeMoveGPRtoR(arg2regd, _Rt_); - } - else if (bits == 128 || bits == 64) - { - _flushEEreg(_Rt_); // flush register to mem - xLEA(arg2reg, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); - } + if (bits < 64) + { + _eeMoveGPRtoR(arg2regd, _Rt_); + } + else if (bits == 128 || bits == 64) + { + _flushEEreg(_Rt_); // flush register to mem + xLEA(arg2reg, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); + } - // Load ECX with the destination address, or issue a direct optimized write - // if the address is a constant propagation. + // Load ECX with the destination address, or issue a direct optimized write + // if the address is a constant propagation. - if (GPR_IS_CONST1(_Rs_)) - { - u32 dstadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; - if (bits == 128) - dstadr &= ~0x0f; + if (GPR_IS_CONST1(_Rs_)) + { + u32 dstadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; + if (bits == 128) + dstadr &= ~0x0f; - vtlb_DynGenWrite_Const( bits, dstadr ); - } - else - { - _eeMoveGPRtoR(arg1regd, _Rs_); - if (_Imm_ != 0) - xADD(arg1regd, _Imm_); - if (bits == 128) - xAND(arg1regd, ~0x0F); + vtlb_DynGenWrite_Const(bits, dstadr); + } + else + { + _eeMoveGPRtoR(arg1regd, _Rs_); + if (_Imm_ != 0) + xADD(arg1regd, _Imm_); + if (bits == 128) + xAND(arg1regd, ~0x0F); - iFlushCall(FLUSH_FULLVTLB); + iFlushCall(FLUSH_FULLVTLB); - vtlb_DynGenWrite(bits); - } + vtlb_DynGenWrite(bits); + } } ////////////////////////////////////////////////////////////////////////////////////////// // -void recLB() { recLoad32(8,true); EE::Profiler.EmitOp(eeOpcode::LB);} -void recLBU() { recLoad32(8,false); EE::Profiler.EmitOp(eeOpcode::LBU);} -void recLH() { recLoad32(16,true); EE::Profiler.EmitOp(eeOpcode::LH);} -void recLHU() { recLoad32(16,false); EE::Profiler.EmitOp(eeOpcode::LHU);} -void recLW() { recLoad32(32,true); EE::Profiler.EmitOp(eeOpcode::LW);} -void recLWU() { recLoad32(32,false); EE::Profiler.EmitOp(eeOpcode::LWU);} -void recLD() { recLoad64(64,false); EE::Profiler.EmitOp(eeOpcode::LD);} -void recLQ() { recLoad64(128,false); EE::Profiler.EmitOp(eeOpcode::LQ);} +void recLB() { recLoad32( 8, true); EE::Profiler.EmitOp(eeOpcode::LB); } +void recLBU() { recLoad32( 8, false); EE::Profiler.EmitOp(eeOpcode::LBU); } +void recLH() { recLoad32( 16, true); EE::Profiler.EmitOp(eeOpcode::LH); } +void recLHU() { recLoad32( 16, false); EE::Profiler.EmitOp(eeOpcode::LHU); } +void recLW() { recLoad32( 32, true); EE::Profiler.EmitOp(eeOpcode::LW); } +void recLWU() { recLoad32( 32, false); EE::Profiler.EmitOp(eeOpcode::LWU); } +void recLD() { recLoad64( 64, false); EE::Profiler.EmitOp(eeOpcode::LD); } +void recLQ() { recLoad64(128, false); EE::Profiler.EmitOp(eeOpcode::LQ); } -void recSB() { recStore(8); EE::Profiler.EmitOp(eeOpcode::SB);} -void recSH() { recStore(16); EE::Profiler.EmitOp(eeOpcode::SH);} -void recSW() { recStore(32); EE::Profiler.EmitOp(eeOpcode::SW);} -void recSQ() { recStore(128); EE::Profiler.EmitOp(eeOpcode::SQ);} -void recSD() { recStore(64); EE::Profiler.EmitOp(eeOpcode::SD);} +void recSB() { recStore( 8); EE::Profiler.EmitOp(eeOpcode::SB); } +void recSH() { recStore( 16); EE::Profiler.EmitOp(eeOpcode::SH); } +void recSW() { recStore( 32); EE::Profiler.EmitOp(eeOpcode::SW); } +void recSD() { recStore( 64); EE::Profiler.EmitOp(eeOpcode::SD); } +void recSQ() { recStore(128); EE::Profiler.EmitOp(eeOpcode::SQ); } //////////////////////////////////////////////////// @@ -536,9 +541,9 @@ void recSWC1() #else _deleteFPtoXMMreg(_Rt_, 1); - xMOV(arg2regd, ptr32[&fpuRegs.fpr[_Rt_].UL] ); + xMOV(arg2regd, ptr32[&fpuRegs.fpr[_Rt_].UL]); - if( GPR_IS_CONST1( _Rs_ ) ) + if (GPR_IS_CONST1(_Rs_)) { int addr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; vtlb_DynGenWrite_Const(32, addr); @@ -657,7 +662,6 @@ void recSQC2() #endif -} } } // end namespace R5900::Dynarec::OpcodeImpl - -using namespace R5900::Dynarec; -using namespace R5900::Dynarec::OpcodeImpl; \ No newline at end of file +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 diff --git a/pcsx2/x86/ix86-32/iR5900Move.cpp b/pcsx2/x86/ix86-32/iR5900Move.cpp index bda4411524..fa7ffe0e25 100644 --- a/pcsx2/x86/ix86-32/iR5900Move.cpp +++ b/pcsx2/x86/ix86-32/iR5900Move.cpp @@ -24,8 +24,7 @@ using namespace x86Emitter; namespace R5900 { namespace Dynarec { -namespace OpcodeImpl -{ +namespace OpcodeImpl { /********************************************************* * Shift arithmetic with constant shift * @@ -35,7 +34,7 @@ namespace OpcodeImpl namespace Interp = R5900::Interpreter::OpcodeImpl; -REC_FUNC_DEL(LUI,_Rt_); +REC_FUNC_DEL(LUI, _Rt_); REC_FUNC_DEL(MFLO, _Rd_); REC_FUNC_DEL(MFHI, _Rd_); REC_FUNC(MTLO); @@ -43,8 +42,8 @@ REC_FUNC(MTHI); REC_FUNC_DEL(MFLO1, _Rd_); REC_FUNC_DEL(MFHI1, _Rd_); -REC_FUNC( MTHI1 ); -REC_FUNC( MTLO1 ); +REC_FUNC(MTHI1); +REC_FUNC(MTLO1); REC_FUNC_DEL(MOVZ, _Rd_); REC_FUNC_DEL(MOVN, _Rd_); @@ -60,12 +59,15 @@ REC_FUNC_DEL(MOVN, _Rd_); void recLUI() { int mmreg; - if(!_Rt_) return; + if (!_Rt_) + return; _eeOnWriteReg(_Rt_, 1); - if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_WRITE)) >= 0 ) { - if( xmmregs[mmreg].mode & MODE_WRITE ) { + if ((mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_WRITE)) >= 0) + { + if (xmmregs[mmreg].mode & MODE_WRITE) + { xMOVH.PS(ptr[&cpuRegs.GPR.r[_Rt_].UL[2]], xRegisterSSE(mmreg)); } xmmregs[mmreg].inuse = 0; @@ -73,7 +75,7 @@ void recLUI() _deleteEEreg(_Rt_, 0); - if(EE_CONST_PROP) + if (EE_CONST_PROP) { GPR_SET_CONST(_Rt_); g_cpuConstRegs[_Rt_].UD[0] = (s32)(cpuRegs.code << 16); @@ -93,7 +95,7 @@ void recLUI() void recMFHILO(int hi) { int reghi, regd, xmmhilo; - if ( ! _Rd_ ) + if (!_Rd_) return; xmmhilo = hi ? XMMGPR_HI : XMMGPR_LO; @@ -101,36 +103,45 @@ void recMFHILO(int hi) _eeOnWriteReg(_Rd_, 0); - regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_READ|MODE_WRITE); + regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_READ | MODE_WRITE); - if( reghi >= 0 ) { - if( regd >= 0 ) { - pxAssert( regd != reghi ); + if (reghi >= 0) + { + if (regd >= 0) + { + pxAssert(regd != reghi); xmmregs[regd].inuse = 0; xMOVQ(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], xRegisterSSE(reghi)); - if( xmmregs[regd].mode & MODE_WRITE ) { + if (xmmregs[regd].mode & MODE_WRITE) + { xMOVH.PS(ptr[&cpuRegs.GPR.r[_Rd_].UL[2]], xRegisterSSE(regd)); } } - else { + else + { _deleteEEreg(_Rd_, 0); - xMOVQ(ptr[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]], xRegisterSSE(reghi)); + xMOVQ(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(reghi)); } } - else { - if( regd >= 0 ) { - if( EEINST_ISLIVE2(_Rd_) ) xMOVL.PS(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[ 0 ] : (uptr)&cpuRegs.LO.UD[ 0 ])]); - else xMOVQZX(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[ 0 ] : (uptr)&cpuRegs.LO.UD[ 0 ])]); + else + { + if (regd >= 0) + { + if (EEINST_ISLIVE2(_Rd_)) + xMOVL.PS(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[0] : (uptr)&cpuRegs.LO.UD[0])]); + else + xMOVQZX(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[0] : (uptr)&cpuRegs.LO.UD[0])]); } - else { + else + { _deleteEEreg(_Rd_, 0); - xMOV(eax, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[ 0 ] : (uptr)&cpuRegs.LO.UL[ 0 ])]); - xMOV(edx, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[ 1 ] : (uptr)&cpuRegs.LO.UL[ 1 ])]); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xMOV(eax, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[0] : (uptr)&cpuRegs.LO.UL[0])]); + xMOV(edx, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[1] : (uptr)&cpuRegs.LO.UL[1])]); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } } } @@ -144,11 +155,13 @@ void recMTHILO(int hi) addrhilo = hi ? (uptr)&cpuRegs.HI.UD[0] : (uptr)&cpuRegs.LO.UD[0]; regs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ); - reghi = _checkXMMreg(XMMTYPE_GPRREG, xmmhilo, MODE_READ|MODE_WRITE); + reghi = _checkXMMreg(XMMTYPE_GPRREG, xmmhilo, MODE_READ | MODE_WRITE); - if( reghi >= 0 ) { - if( regs >= 0 ) { - pxAssert( reghi != regs ); + if (reghi >= 0) + { + if (regs >= 0) + { + pxAssert(reghi != regs); _deleteGPRtoXMMreg(_Rs_, 0); xPUNPCK.HQDQ(xRegisterSSE(reghi), xRegisterSSE(reghi)); @@ -158,30 +171,35 @@ void recMTHILO(int hi) xmmregs[regs] = xmmregs[reghi]; xmmregs[reghi].inuse = 0; xmmregs[regs].mode |= MODE_WRITE; - } - else { + else + { _flushConstReg(_Rs_); - xMOVL.PS(xRegisterSSE(reghi), ptr[&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ]]); + xMOVL.PS(xRegisterSSE(reghi), ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]); xmmregs[reghi].mode |= MODE_WRITE; } } - else { - if( regs >= 0 ) { + else + { + if (regs >= 0) + { xMOVQ(ptr[(void*)(addrhilo)], xRegisterSSE(regs)); } - else { - if( GPR_IS_CONST1(_Rs_) ) { - xMOV(ptr32[(u32*)(addrhilo)], g_cpuConstRegs[_Rs_].UL[0] ); - xMOV(ptr32[(u32*)(addrhilo+4)], g_cpuConstRegs[_Rs_].UL[1] ); + else + { + if (GPR_IS_CONST1(_Rs_)) + { + xMOV(ptr32[(u32*)(addrhilo)], g_cpuConstRegs[_Rs_].UL[0]); + xMOV(ptr32[(u32*)(addrhilo + 4)], g_cpuConstRegs[_Rs_].UL[1]); } - else { + else + { _eeMoveGPRtoR(ecx, _Rs_); _flushEEreg(_Rs_); - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]]); - xMOV(edx, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ]]); + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); xMOV(ptr[(void*)(addrhilo)], eax); - xMOV(ptr[(void*)(addrhilo+4)], edx); + xMOV(ptr[(void*)(addrhilo + 4)], edx); } } } @@ -215,7 +233,7 @@ void recMTLO() void recMFHILO1(int hi) { int reghi, regd, xmmhilo; - if ( ! _Rd_ ) + if (!_Rd_) return; xmmhilo = hi ? XMMGPR_HI : XMMGPR_LO; @@ -223,36 +241,44 @@ void recMFHILO1(int hi) _eeOnWriteReg(_Rd_, 0); - regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_READ|MODE_WRITE); + regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_READ | MODE_WRITE); - if( reghi >= 0 ) { - if( regd >= 0 ) { + if (reghi >= 0) + { + if (regd >= 0) + { xMOVHL.PS(xRegisterSSE(regd), xRegisterSSE(reghi)); xmmregs[regd].mode |= MODE_WRITE; } - else { + else + { _deleteEEreg(_Rd_, 0); - xMOVH.PS(ptr[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]], xRegisterSSE(reghi)); + xMOVH.PS(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(reghi)); } } - else { - if( regd >= 0 ) { - if( EEINST_ISLIVE2(_Rd_) ) { - xPUNPCK.HQDQ(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[ 0 ] : (uptr)&cpuRegs.LO.UD[ 0 ])]); + else + { + if (regd >= 0) + { + if (EEINST_ISLIVE2(_Rd_)) + { + xPUNPCK.HQDQ(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[0] : (uptr)&cpuRegs.LO.UD[0])]); xPSHUF.D(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); } - else { - xMOVQZX(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[ 1 ] : (uptr)&cpuRegs.LO.UD[ 1 ])]); + else + { + xMOVQZX(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[1] : (uptr)&cpuRegs.LO.UD[1])]); } xmmregs[regd].mode |= MODE_WRITE; } - else { + else + { _deleteEEreg(_Rd_, 0); - xMOV(eax, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[ 2 ] : (uptr)&cpuRegs.LO.UL[ 2 ])]); - xMOV(edx, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[ 3 ] : (uptr)&cpuRegs.LO.UL[ 3 ])]); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xMOV(eax, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[2] : (uptr)&cpuRegs.LO.UL[2])]); + xMOV(edx, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[3] : (uptr)&cpuRegs.LO.UL[3])]); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } } } @@ -266,32 +292,40 @@ void recMTHILO1(int hi) addrhilo = hi ? (uptr)&cpuRegs.HI.UD[0] : (uptr)&cpuRegs.LO.UD[0]; regs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ); - reghi = _allocCheckGPRtoXMM(g_pCurInstInfo, xmmhilo, MODE_WRITE|MODE_READ); + reghi = _allocCheckGPRtoXMM(g_pCurInstInfo, xmmhilo, MODE_WRITE | MODE_READ); - if( reghi >= 0 ) { - if( regs >= 0 ) { + if (reghi >= 0) + { + if (regs >= 0) + { xPUNPCK.LQDQ(xRegisterSSE(reghi), xRegisterSSE(regs)); } - else { + else + { _flushEEreg(_Rs_); - xPUNPCK.LQDQ(xRegisterSSE(reghi), ptr[&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ]]); + xPUNPCK.LQDQ(xRegisterSSE(reghi), ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]); } } - else { - if( regs >= 0 ) { - xMOVQ(ptr[(void*)(addrhilo+8)], xRegisterSSE(regs)); + else + { + if (regs >= 0) + { + xMOVQ(ptr[(void*)(addrhilo + 8)], xRegisterSSE(regs)); } - else { - if( GPR_IS_CONST1(_Rs_) ) { - xMOV(ptr32[(u32*)(addrhilo+8)], g_cpuConstRegs[_Rs_].UL[0] ); - xMOV(ptr32[(u32*)(addrhilo+12)], g_cpuConstRegs[_Rs_].UL[1] ); + else + { + if (GPR_IS_CONST1(_Rs_)) + { + xMOV(ptr32[(u32*)(addrhilo + 8)], g_cpuConstRegs[_Rs_].UL[0]); + xMOV(ptr32[(u32*)(addrhilo + 12)], g_cpuConstRegs[_Rs_].UL[1]); } - else { + else + { _flushEEreg(_Rs_); - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]]); - xMOV(edx, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ]]); - xMOV(ptr[(void*)(addrhilo+8)], eax); - xMOV(ptr[(void*)(addrhilo+12)], edx); + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); + xMOV(ptr[(void*)(addrhilo + 8)], eax); + xMOV(ptr[(void*)(addrhilo + 12)], edx); } } } @@ -329,49 +363,51 @@ void recMOVZtemp_const() void recMOVZtemp_consts(int info) { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); - xOR(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ] ]); - j8Ptr[ 0 ] = JNZ8( 0 ); + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); + xOR(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]); + j8Ptr[0] = JNZ8(0); - xMOV(ptr32[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], g_cpuConstRegs[_Rs_].UL[0] ); - xMOV(ptr32[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], g_cpuConstRegs[_Rs_].UL[1] ); + xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], g_cpuConstRegs[_Rs_].UL[0]); + xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], g_cpuConstRegs[_Rs_].UL[1]); - x86SetJ8( j8Ptr[ 0 ] ); + x86SetJ8(j8Ptr[0]); } void recMOVZtemp_constt(int info) { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]]); - xMOV(edx, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ]]); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } void recMOVZtemp_(int info) { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); - xOR(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ] ]); - j8Ptr[ 0 ] = JNZ8( 0 ); + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); + xOR(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]); + j8Ptr[0] = JNZ8(0); - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]]); - xMOV(edx, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ]]); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); - x86SetJ8( j8Ptr[ 0 ] ); + x86SetJ8(j8Ptr[0]); } -EERECOMPILE_CODE0(MOVZtemp, XMMINFO_READS|XMMINFO_READD|XMMINFO_READD|XMMINFO_WRITED); +EERECOMPILE_CODE0(MOVZtemp, XMMINFO_READS | XMMINFO_READD | XMMINFO_READD | XMMINFO_WRITED); void recMOVZ() { - if( _Rs_ == _Rd_ ) + if (_Rs_ == _Rd_) return; - if(GPR_IS_CONST1(_Rt_)) { + if (GPR_IS_CONST1(_Rt_)) + { if (g_cpuConstRegs[_Rt_].UD[0] != 0) return; - } else + } + else _deleteEEreg(_Rd_, 1); recMOVZtemp(); @@ -385,49 +421,51 @@ void recMOVNtemp_const() void recMOVNtemp_consts(int info) { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); - xOR(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ] ]); - j8Ptr[ 0 ] = JZ8( 0 ); + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); + xOR(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]); + j8Ptr[0] = JZ8(0); - xMOV(ptr32[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], g_cpuConstRegs[_Rs_].UL[0] ); - xMOV(ptr32[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], g_cpuConstRegs[_Rs_].UL[1] ); + xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], g_cpuConstRegs[_Rs_].UL[0]); + xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], g_cpuConstRegs[_Rs_].UL[1]); - x86SetJ8( j8Ptr[ 0 ] ); + x86SetJ8(j8Ptr[0]); } void recMOVNtemp_constt(int info) { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]]); - xMOV(edx, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ]]); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } void recMOVNtemp_(int info) { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); - xOR(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ] ]); - j8Ptr[ 0 ] = JZ8( 0 ); + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); + xOR(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]); + j8Ptr[0] = JZ8(0); - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]]); - xMOV(edx, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ]]); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); - x86SetJ8( j8Ptr[ 0 ] ); + x86SetJ8(j8Ptr[0]); } -EERECOMPILE_CODE0(MOVNtemp, XMMINFO_READS|XMMINFO_READD|XMMINFO_READD|XMMINFO_WRITED); +EERECOMPILE_CODE0(MOVNtemp, XMMINFO_READS | XMMINFO_READD | XMMINFO_READD | XMMINFO_WRITED); void recMOVN() { - if( _Rs_ == _Rd_ ) + if (_Rs_ == _Rd_) return; - if (GPR_IS_CONST1(_Rt_)) { + if (GPR_IS_CONST1(_Rt_)) + { if (g_cpuConstRegs[_Rt_].UD[0] == 0) return; - } else + } + else _deleteEEreg(_Rd_, 1); recMOVNtemp(); @@ -435,4 +473,6 @@ void recMOVN() #endif -} } } +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 diff --git a/pcsx2/x86/ix86-32/iR5900MultDiv.cpp b/pcsx2/x86/ix86-32/iR5900MultDiv.cpp index b89583704c..2d05c68c12 100644 --- a/pcsx2/x86/ix86-32/iR5900MultDiv.cpp +++ b/pcsx2/x86/ix86-32/iR5900MultDiv.cpp @@ -26,8 +26,7 @@ namespace Interp = R5900::Interpreter::OpcodeImpl; namespace R5900 { namespace Dynarec { -namespace OpcodeImpl -{ +namespace OpcodeImpl { /********************************************************* * Register mult/div & Register trap logic * @@ -35,20 +34,20 @@ namespace OpcodeImpl *********************************************************/ #ifndef MULTDIV_RECOMPILE -REC_FUNC_DEL(MULT , _Rd_); -REC_FUNC_DEL(MULTU , _Rd_); -REC_FUNC_DEL( MULT1 , _Rd_); -REC_FUNC_DEL( MULTU1 , _Rd_); +REC_FUNC_DEL(MULT, _Rd_); +REC_FUNC_DEL(MULTU, _Rd_); +REC_FUNC_DEL(MULT1, _Rd_); +REC_FUNC_DEL(MULTU1, _Rd_); REC_FUNC(DIV); REC_FUNC(DIVU); -REC_FUNC( DIV1 ); -REC_FUNC( DIVU1 ); +REC_FUNC(DIV1); +REC_FUNC(DIVU1); -REC_FUNC_DEL( MADD , _Rd_ ); -REC_FUNC_DEL( MADDU , _Rd_); -REC_FUNC_DEL( MADD1 , _Rd_); -REC_FUNC_DEL( MADDU1 , _Rd_ ); +REC_FUNC_DEL(MADD, _Rd_); +REC_FUNC_DEL(MADDU, _Rd_); +REC_FUNC_DEL(MADD1, _Rd_); +REC_FUNC_DEL(MADDU1, _Rd_); #else @@ -56,19 +55,24 @@ REC_FUNC_DEL( MADDU1 , _Rd_ ); void recWritebackHILO(int info, int writed, int upper) { int regd, reglo = -1, reghi, savedlo = 0; - uptr loaddr = (uptr)&cpuRegs.LO.UL[ upper ? 2 : 0 ]; - uptr hiaddr = (uptr)&cpuRegs.HI.UL[ upper ? 2 : 0 ]; - u8 testlive = upper?EEINST_LIVE2:EEINST_LIVE0; + uptr loaddr = (uptr)&cpuRegs.LO.UL[upper ? 2 : 0]; + uptr hiaddr = (uptr)&cpuRegs.HI.UL[upper ? 2 : 0]; + u8 testlive = upper ? EEINST_LIVE2 : EEINST_LIVE0; - if( g_pCurInstInfo->regs[XMMGPR_HI] & testlive ) + if (g_pCurInstInfo->regs[XMMGPR_HI] & testlive) xMOV(ecx, edx); - if( g_pCurInstInfo->regs[XMMGPR_LO] & testlive ) { + if (g_pCurInstInfo->regs[XMMGPR_LO] & testlive) + { - if( (reglo = _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_LO, MODE_READ)) >= 0 ) { - if( xmmregs[reglo].mode & MODE_WRITE ) { - if( upper ) xMOVQ(ptr[(void*)(loaddr-8)], xRegisterSSE(reglo)); - else xMOVH.PS(ptr[(void*)(loaddr+8)], xRegisterSSE(reglo)); + if ((reglo = _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_LO, MODE_READ)) >= 0) + { + if (xmmregs[reglo].mode & MODE_WRITE) + { + if (upper) + xMOVQ(ptr[(void*)(loaddr - 8)], xRegisterSSE(reglo)); + else + xMOVH.PS(ptr[(void*)(loaddr + 8)], xRegisterSSE(reglo)); } xmmregs[reglo].inuse = 0; @@ -77,38 +81,48 @@ void recWritebackHILO(int info, int writed, int upper) xCDQ(); xMOV(ptr[(void*)(loaddr)], eax); - xMOV(ptr[(void*)(loaddr+4)], edx); + xMOV(ptr[(void*)(loaddr + 4)], edx); savedlo = 1; } - if ( writed && _Rd_ ) + if (writed && _Rd_) { _eeOnWriteReg(_Rd_, 1); regd = -1; - if( g_pCurInstInfo->regs[_Rd_] & EEINST_XMM ) { - if( savedlo ) { - regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE|MODE_READ); - if( regd >= 0 ) { + if (g_pCurInstInfo->regs[_Rd_] & EEINST_XMM) + { + if (savedlo) + { + regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE | MODE_READ); + if (regd >= 0) + { xMOVL.PS(xRegisterSSE(regd), ptr[(void*)(loaddr)]); } } } - if( regd < 0 ) { + if (regd < 0) + { _deleteEEreg(_Rd_, 0); - if( !savedlo ) xCDQ(); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + if (!savedlo) + xCDQ(); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } } - if( g_pCurInstInfo->regs[XMMGPR_HI] & testlive ) { - if( (reghi = _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_HI, MODE_READ)) >= 0 ) { - if( xmmregs[reghi].mode & MODE_WRITE ) { - if( upper ) xMOVQ(ptr[(void*)(hiaddr-8)], xRegisterSSE(reghi)); - else xMOVH.PS(ptr[(void*)(hiaddr+8)], xRegisterSSE(reghi)); + if (g_pCurInstInfo->regs[XMMGPR_HI] & testlive) + { + if ((reghi = _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_HI, MODE_READ)) >= 0) + { + if (xmmregs[reghi].mode & MODE_WRITE) + { + if (upper) + xMOVQ(ptr[(void*)(hiaddr - 8)], xRegisterSSE(reghi)); + else + xMOVH.PS(ptr[(void*)(hiaddr + 8)], xRegisterSSE(reghi)); } xmmregs[reghi].inuse = 0; @@ -117,48 +131,59 @@ void recWritebackHILO(int info, int writed, int upper) xMOV(ptr[(void*)(hiaddr)], ecx); xSAR(ecx, 31); - xMOV(ptr[(void*)(hiaddr+4)], ecx); + xMOV(ptr[(void*)(hiaddr + 4)], ecx); } } void recWritebackConstHILO(u64 res, int writed, int upper) { int reglo, reghi; - uptr loaddr = (uptr)&cpuRegs.LO.UL[ upper ? 2 : 0 ]; - uptr hiaddr = (uptr)&cpuRegs.HI.UL[ upper ? 2 : 0 ]; - u8 testlive = upper?EEINST_LIVE2:EEINST_LIVE0; + uptr loaddr = (uptr)&cpuRegs.LO.UL[upper ? 2 : 0]; + uptr hiaddr = (uptr)&cpuRegs.HI.UL[upper ? 2 : 0]; + u8 testlive = upper ? EEINST_LIVE2 : EEINST_LIVE0; - if( g_pCurInstInfo->regs[XMMGPR_LO] & testlive ) { - reglo = _allocCheckGPRtoXMM(g_pCurInstInfo, XMMGPR_LO, MODE_WRITE|MODE_READ); + if (g_pCurInstInfo->regs[XMMGPR_LO] & testlive) + { + reglo = _allocCheckGPRtoXMM(g_pCurInstInfo, XMMGPR_LO, MODE_WRITE | MODE_READ); - if( reglo >= 0 ) { + if (reglo >= 0) + { u32* mem_ptr = recGetImm64(res & 0x80000000 ? -1 : 0, (u32)res); - if( upper ) xMOVH.PS(xRegisterSSE(reglo), ptr[mem_ptr]); - else xMOVL.PS(xRegisterSSE(reglo), ptr[mem_ptr]); + if (upper) + xMOVH.PS(xRegisterSSE(reglo), ptr[mem_ptr]); + else + xMOVL.PS(xRegisterSSE(reglo), ptr[mem_ptr]); } - else { + else + { xMOV(ptr32[(u32*)(loaddr)], res & 0xffffffff); - xMOV(ptr32[(u32*)(loaddr+4)], (res&0x80000000)?0xffffffff:0); + xMOV(ptr32[(u32*)(loaddr + 4)], (res & 0x80000000) ? 0xffffffff : 0); } } - if( g_pCurInstInfo->regs[XMMGPR_HI] & testlive ) { + if (g_pCurInstInfo->regs[XMMGPR_HI] & testlive) + { - reghi = _allocCheckGPRtoXMM(g_pCurInstInfo, XMMGPR_HI, MODE_WRITE|MODE_READ); + reghi = _allocCheckGPRtoXMM(g_pCurInstInfo, XMMGPR_HI, MODE_WRITE | MODE_READ); - if( reghi >= 0 ) { + if (reghi >= 0) + { u32* mem_ptr = recGetImm64((res >> 63) ? -1 : 0, res >> 32); - if( upper ) xMOVH.PS(xRegisterSSE(reghi), ptr[mem_ptr]); - else xMOVL.PS(xRegisterSSE(reghi), ptr[mem_ptr]); + if (upper) + xMOVH.PS(xRegisterSSE(reghi), ptr[mem_ptr]); + else + xMOVL.PS(xRegisterSSE(reghi), ptr[mem_ptr]); } - else { + else + { _deleteEEreg(XMMGPR_HI, 0); xMOV(ptr32[(u32*)(hiaddr)], res >> 32); - xMOV(ptr32[(u32*)(hiaddr+4)], (res>>63)?0xffffffff:0); + xMOV(ptr32[(u32*)(hiaddr + 4)], (res >> 63) ? 0xffffffff : 0); } } - if (!writed || !_Rd_) return; + if (!writed || !_Rd_) + return; g_cpuConstRegs[_Rd_].SD[0] = (s32)(res & 0xffffffffULL); //that is the difference } @@ -173,17 +198,20 @@ void recMULT_const() void recMULTUsuper(int info, int upper, int process); void recMULTsuper(int info, int upper, int process) { - if( process & PROCESS_CONSTS ) { - xMOV(eax, g_cpuConstRegs[_Rs_].UL[0] ); - xMUL(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); + if (process & PROCESS_CONSTS) + { + xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]); + xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); } - else if( process & PROCESS_CONSTT) { - xMOV(eax, g_cpuConstRegs[_Rt_].UL[0] ); - xMUL(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); + else if (process & PROCESS_CONSTT) + { + xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]); + xMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]); } - else { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); - xMUL(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); + else + { + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); } recWritebackHILO(info, 1, upper); @@ -205,7 +233,7 @@ void recMULT_constt(int info) } // don't set XMMINFO_WRITED|XMMINFO_WRITELO|XMMINFO_WRITEHI -EERECOMPILE_CODE0(MULT, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0) ); +EERECOMPILE_CODE0(MULT, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0)); //// MULTU void recMULTU_const() @@ -217,17 +245,20 @@ void recMULTU_const() void recMULTUsuper(int info, int upper, int process) { - if( process & PROCESS_CONSTS ) { - xMOV(eax, g_cpuConstRegs[_Rs_].UL[0] ); - xUMUL(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); + if (process & PROCESS_CONSTS) + { + xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]); + xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); } - else if( process & PROCESS_CONSTT) { - xMOV(eax, g_cpuConstRegs[_Rt_].UL[0] ); - xUMUL(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); + else if (process & PROCESS_CONSTT) + { + xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]); + xUMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]); } - else { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); - xUMUL(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); + else + { + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); } recWritebackHILO(info, 1, upper); @@ -249,7 +280,7 @@ void recMULTU_constt(int info) } // don't specify XMMINFO_WRITELO or XMMINFO_WRITEHI, that is taken care of -EERECOMPILE_CODE0(MULTU, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0)); +EERECOMPILE_CODE0(MULTU, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0)); //////////////////////////////////////////////////// void recMULT1_const() @@ -274,7 +305,7 @@ void recMULT1_constt(int info) recMULTsuper(info, 1, PROCESS_CONSTT); } -EERECOMPILE_CODE0(MULT1, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0) ); +EERECOMPILE_CODE0(MULT1, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0)); //////////////////////////////////////////////////// void recMULTU1_const() @@ -299,7 +330,7 @@ void recMULTU1_constt(int info) recMULTUsuper(info, 1, PROCESS_CONSTT); } -EERECOMPILE_CODE0(MULTU1, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0)); +EERECOMPILE_CODE0(MULTU1, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0)); //// DIV @@ -313,15 +344,15 @@ void recDIVconst(int upper) } else if (g_cpuConstRegs[_Rt_].SL[0] != 0) { - quot = g_cpuConstRegs[_Rs_].SL[0] / g_cpuConstRegs[_Rt_].SL[0]; - rem = g_cpuConstRegs[_Rs_].SL[0] % g_cpuConstRegs[_Rt_].SL[0]; - } + quot = g_cpuConstRegs[_Rs_].SL[0] / g_cpuConstRegs[_Rt_].SL[0]; + rem = g_cpuConstRegs[_Rs_].SL[0] % g_cpuConstRegs[_Rt_].SL[0]; + } else { quot = (g_cpuConstRegs[_Rs_].SL[0] < 0) ? 1 : -1; rem = g_cpuConstRegs[_Rs_].SL[0]; } - recWritebackConstHILO((u64)quot|((u64)rem<<32), 0, upper); + recWritebackConstHILO((u64)quot | ((u64)rem << 32), 0, upper); } void recDIV_const() @@ -331,23 +362,23 @@ void recDIV_const() void recDIVsuper(int info, int sign, int upper, int process) { - if( process & PROCESS_CONSTT ) - xMOV(ecx, g_cpuConstRegs[_Rt_].UL[0] ); + if (process & PROCESS_CONSTT) + xMOV(ecx, g_cpuConstRegs[_Rt_].UL[0]); else - xMOV(ecx, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); + xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); - if( process & PROCESS_CONSTS ) - xMOV(eax, g_cpuConstRegs[_Rs_].UL[0] ); + if (process & PROCESS_CONSTS) + xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]); else - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); - u8 *end1; - if (sign) //test for overflow (x86 will just throw an exception) + u8* end1; + if (sign) //test for overflow (x86 will just throw an exception) { - xCMP(eax, 0x80000000 ); - u8 *cont1 = JNE8(0); - xCMP(ecx, 0xffffffff ); - u8 *cont2 = JNE8(0); + xCMP(eax, 0x80000000); + u8* cont1 = JNE8(0); + xCMP(ecx, 0xffffffff); + u8* cont2 = JNE8(0); //overflow case: xXOR(edx, edx); //EAX remains 0x80000000 end1 = JMP8(0); @@ -356,32 +387,35 @@ void recDIVsuper(int info, int sign, int upper, int process) x86SetJ8(cont2); } - xCMP(ecx, 0 ); - u8 *cont3 = JNE8(0); + xCMP(ecx, 0); + u8* cont3 = JNE8(0); //divide by zero xMOV(edx, eax); if (sign) //set EAX to (EAX < 0)?1:-1 { - xSAR(eax, 31 ); //(EAX < 0)?-1:0 - xSHL(eax, 1 ); //(EAX < 0)?-2:0 + xSAR(eax, 31); //(EAX < 0)?-1:0 + xSHL(eax, 1); //(EAX < 0)?-2:0 xNOT(eax); //(EAX < 0)?1:-1 } else - xMOV(eax, 0xffffffff ); - u8 *end2 = JMP8(0); + xMOV(eax, 0xffffffff); + u8* end2 = JMP8(0); x86SetJ8(cont3); - if( sign ) { + if (sign) + { xCDQ(); xDIV(ecx); } - else { + else + { xXOR(edx, edx); xUDIV(ecx); } - if (sign) x86SetJ8( end1 ); - x86SetJ8( end2 ); + if (sign) + x86SetJ8(end1); + x86SetJ8(end2); // need to execute regardless of bad divide recWritebackHILO(info, 0, upper); @@ -402,13 +436,14 @@ void recDIV_constt(int info) recDIVsuper(info, 1, 0, PROCESS_CONSTT); } -EERECOMPILE_CODE0(DIV, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI); +EERECOMPILE_CODE0(DIV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITELO | XMMINFO_WRITEHI); //// DIVU void recDIVUconst(int upper) { u32 quot, rem; - if (g_cpuConstRegs[_Rt_].UL[0] != 0) { + if (g_cpuConstRegs[_Rt_].UL[0] != 0) + { quot = g_cpuConstRegs[_Rs_].UL[0] / g_cpuConstRegs[_Rt_].UL[0]; rem = g_cpuConstRegs[_Rs_].UL[0] % g_cpuConstRegs[_Rt_].UL[0]; } @@ -418,7 +453,7 @@ void recDIVUconst(int upper) rem = g_cpuConstRegs[_Rs_].UL[0]; } - recWritebackConstHILO((u64)quot|((u64)rem<<32), 0, upper); + recWritebackConstHILO((u64)quot | ((u64)rem << 32), 0, upper); } void recDIVU_const() @@ -441,7 +476,7 @@ void recDIVU_constt(int info) recDIVsuper(info, 0, 0, PROCESS_CONSTT); } -EERECOMPILE_CODE0(DIVU, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI); +EERECOMPILE_CODE0(DIVU, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITELO | XMMINFO_WRITEHI); void recDIV1_const() { @@ -463,7 +498,7 @@ void recDIV1_constt(int info) recDIVsuper(info, 1, 1, PROCESS_CONSTT); } -EERECOMPILE_CODE0(DIV1, XMMINFO_READS|XMMINFO_READT); +EERECOMPILE_CODE0(DIV1, XMMINFO_READS | XMMINFO_READT); void recDIVU1_const() { @@ -485,26 +520,28 @@ void recDIVU1_constt(int info) recDIVsuper(info, 0, 1, PROCESS_CONSTT); } -EERECOMPILE_CODE0(DIVU1, XMMINFO_READS|XMMINFO_READT); +EERECOMPILE_CODE0(DIVU1, XMMINFO_READS | XMMINFO_READT); void recMADD() { - if( GPR_IS_CONST2(_Rs_, _Rt_) ) { + if (GPR_IS_CONST2(_Rs_, _Rt_)) + { u64 result = ((s64)g_cpuConstRegs[_Rs_].SL[0] * (s64)g_cpuConstRegs[_Rt_].SL[0]); _deleteEEreg(XMMGPR_LO, 1); _deleteEEreg(XMMGPR_HI, 1); // dadd - xMOV(eax, ptr[&cpuRegs.LO.UL[ 0 ] ]); - xMOV(ecx, ptr[&cpuRegs.HI.UL[ 0 ] ]); - xADD(eax, (u32)result&0xffffffff ); - xADC(ecx, (u32)(result>>32) ); + xMOV(eax, ptr[&cpuRegs.LO.UL[0]]); + xMOV(ecx, ptr[&cpuRegs.HI.UL[0]]); + xADD(eax, (u32)result & 0xffffffff); + xADC(ecx, (u32)(result >> 32)); xCDQ(); - if( _Rd_) { + if (_Rd_) + { _eeOnWriteReg(_Rd_, 1); _deleteEEreg(_Rd_, 0); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } xMOV(ptr[&cpuRegs.LO.UL[0]], eax); @@ -522,28 +559,32 @@ void recMADD() _deleteGPRtoXMMreg(_Rs_, 1); _deleteGPRtoXMMreg(_Rt_, 1); - if( GPR_IS_CONST1(_Rs_) ) { - xMOV(eax, g_cpuConstRegs[_Rs_].UL[0] ); - xMUL(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); + if (GPR_IS_CONST1(_Rs_)) + { + xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]); + xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); } - else if ( GPR_IS_CONST1(_Rt_) ) { - xMOV(eax, g_cpuConstRegs[_Rt_].UL[0] ); - xMUL(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); + else if (GPR_IS_CONST1(_Rt_)) + { + xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]); + xMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]); } - else { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); - xMUL(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); + else + { + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); } xMOV(ecx, edx); - xADD(eax, ptr[&cpuRegs.LO.UL[0] ]); - xADC(ecx, ptr[&cpuRegs.HI.UL[0] ]); + xADD(eax, ptr[&cpuRegs.LO.UL[0]]); + xADC(ecx, ptr[&cpuRegs.HI.UL[0]]); xCDQ(); - if( _Rd_ ) { + if (_Rd_) + { _eeOnWriteReg(_Rd_, 1); _deleteEEreg(_Rd_, 0); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } xMOV(ptr[&cpuRegs.LO.UL[0]], eax); @@ -557,22 +598,24 @@ void recMADD() void recMADDU() { - if( GPR_IS_CONST2(_Rs_, _Rt_) ) { + if (GPR_IS_CONST2(_Rs_, _Rt_)) + { u64 result = ((u64)g_cpuConstRegs[_Rs_].UL[0] * (u64)g_cpuConstRegs[_Rt_].UL[0]); _deleteEEreg(XMMGPR_LO, 1); _deleteEEreg(XMMGPR_HI, 1); // dadd - xMOV(eax, ptr[&cpuRegs.LO.UL[ 0 ] ]); - xMOV(ecx, ptr[&cpuRegs.HI.UL[ 0 ] ]); - xADD(eax, (u32)result&0xffffffff ); - xADC(ecx, (u32)(result>>32) ); + xMOV(eax, ptr[&cpuRegs.LO.UL[0]]); + xMOV(ecx, ptr[&cpuRegs.HI.UL[0]]); + xADD(eax, (u32)result & 0xffffffff); + xADC(ecx, (u32)(result >> 32)); xCDQ(); - if( _Rd_) { + if (_Rd_) + { _eeOnWriteReg(_Rd_, 1); _deleteEEreg(_Rd_, 0); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } xMOV(ptr[&cpuRegs.LO.UL[0]], eax); @@ -590,28 +633,32 @@ void recMADDU() _deleteGPRtoXMMreg(_Rs_, 1); _deleteGPRtoXMMreg(_Rt_, 1); - if( GPR_IS_CONST1(_Rs_) ) { - xMOV(eax, g_cpuConstRegs[_Rs_].UL[0] ); - xUMUL(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); + if (GPR_IS_CONST1(_Rs_)) + { + xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]); + xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); } - else if ( GPR_IS_CONST1(_Rt_) ) { - xMOV(eax, g_cpuConstRegs[_Rt_].UL[0] ); - xUMUL(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); + else if (GPR_IS_CONST1(_Rt_)) + { + xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]); + xUMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]); } - else { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); - xUMUL(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); + else + { + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); } xMOV(ecx, edx); - xADD(eax, ptr[&cpuRegs.LO.UL[0] ]); - xADC(ecx, ptr[&cpuRegs.HI.UL[0] ]); + xADD(eax, ptr[&cpuRegs.LO.UL[0]]); + xADC(ecx, ptr[&cpuRegs.HI.UL[0]]); xCDQ(); - if( _Rd_ ) { + if (_Rd_) + { _eeOnWriteReg(_Rd_, 1); _deleteEEreg(_Rd_, 0); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } xMOV(ptr[&cpuRegs.LO.UL[0]], eax); @@ -625,22 +672,24 @@ void recMADDU() void recMADD1() { - if( GPR_IS_CONST2(_Rs_, _Rt_) ) { + if (GPR_IS_CONST2(_Rs_, _Rt_)) + { u64 result = ((s64)g_cpuConstRegs[_Rs_].SL[0] * (s64)g_cpuConstRegs[_Rt_].SL[0]); _deleteEEreg(XMMGPR_LO, 1); _deleteEEreg(XMMGPR_HI, 1); // dadd - xMOV(eax, ptr[&cpuRegs.LO.UL[ 2 ] ]); - xMOV(ecx, ptr[&cpuRegs.HI.UL[ 2 ] ]); - xADD(eax, (u32)result&0xffffffff ); - xADC(ecx, (u32)(result>>32) ); + xMOV(eax, ptr[&cpuRegs.LO.UL[2]]); + xMOV(ecx, ptr[&cpuRegs.HI.UL[2]]); + xADD(eax, (u32)result & 0xffffffff); + xADC(ecx, (u32)(result >> 32)); xCDQ(); - if( _Rd_) { + if (_Rd_) + { _eeOnWriteReg(_Rd_, 1); _deleteEEreg(_Rd_, 0); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } xMOV(ptr[&cpuRegs.LO.UL[2]], eax); @@ -658,28 +707,32 @@ void recMADD1() _deleteGPRtoXMMreg(_Rs_, 1); _deleteGPRtoXMMreg(_Rt_, 1); - if( GPR_IS_CONST1(_Rs_) ) { - xMOV(eax, g_cpuConstRegs[_Rs_].UL[0] ); - xMUL(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); + if (GPR_IS_CONST1(_Rs_)) + { + xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]); + xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); } - else if ( GPR_IS_CONST1(_Rt_) ) { - xMOV(eax, g_cpuConstRegs[_Rt_].UL[0] ); - xMUL(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); + else if (GPR_IS_CONST1(_Rt_)) + { + xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]); + xMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]); } - else { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); - xMUL(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); + else + { + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); } xMOV(ecx, edx); - xADD(eax, ptr[&cpuRegs.LO.UL[2] ]); - xADC(ecx, ptr[&cpuRegs.HI.UL[2] ]); + xADD(eax, ptr[&cpuRegs.LO.UL[2]]); + xADC(ecx, ptr[&cpuRegs.HI.UL[2]]); xCDQ(); - if( _Rd_ ) { + if (_Rd_) + { _eeOnWriteReg(_Rd_, 1); _deleteEEreg(_Rd_, 0); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } xMOV(ptr[&cpuRegs.LO.UL[2]], eax); @@ -693,22 +746,24 @@ void recMADD1() void recMADDU1() { - if( GPR_IS_CONST2(_Rs_, _Rt_) ) { + if (GPR_IS_CONST2(_Rs_, _Rt_)) + { u64 result = ((u64)g_cpuConstRegs[_Rs_].UL[0] * (u64)g_cpuConstRegs[_Rt_].UL[0]); _deleteEEreg(XMMGPR_LO, 1); _deleteEEreg(XMMGPR_HI, 1); // dadd - xMOV(eax, ptr[&cpuRegs.LO.UL[ 2 ] ]); - xMOV(ecx, ptr[&cpuRegs.HI.UL[ 2 ] ]); - xADD(eax, (u32)result&0xffffffff ); - xADC(ecx, (u32)(result>>32) ); + xMOV(eax, ptr[&cpuRegs.LO.UL[2]]); + xMOV(ecx, ptr[&cpuRegs.HI.UL[2]]); + xADD(eax, (u32)result & 0xffffffff); + xADC(ecx, (u32)(result >> 32)); xCDQ(); - if( _Rd_) { + if (_Rd_) + { _eeOnWriteReg(_Rd_, 1); _deleteEEreg(_Rd_, 0); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } xMOV(ptr[&cpuRegs.LO.UL[2]], eax); @@ -726,28 +781,32 @@ void recMADDU1() _deleteGPRtoXMMreg(_Rs_, 1); _deleteGPRtoXMMreg(_Rt_, 1); - if( GPR_IS_CONST1(_Rs_) ) { - xMOV(eax, g_cpuConstRegs[_Rs_].UL[0] ); - xUMUL(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); + if (GPR_IS_CONST1(_Rs_)) + { + xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]); + xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); } - else if ( GPR_IS_CONST1(_Rt_) ) { - xMOV(eax, g_cpuConstRegs[_Rt_].UL[0] ); - xUMUL(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); + else if (GPR_IS_CONST1(_Rt_)) + { + xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]); + xUMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]); } - else { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); - xUMUL(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); + else + { + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); } xMOV(ecx, edx); - xADD(eax, ptr[&cpuRegs.LO.UL[2] ]); - xADC(ecx, ptr[&cpuRegs.HI.UL[2] ]); + xADD(eax, ptr[&cpuRegs.LO.UL[2]]); + xADC(ecx, ptr[&cpuRegs.HI.UL[2]]); xCDQ(); - if( _Rd_ ) { + if (_Rd_) + { _eeOnWriteReg(_Rd_, 1); _deleteEEreg(_Rd_, 0); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } xMOV(ptr[&cpuRegs.LO.UL[2]], eax); @@ -762,4 +821,6 @@ void recMADDU1() #endif -} } } +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 diff --git a/pcsx2/x86/ix86-32/iR5900Shift.cpp b/pcsx2/x86/ix86-32/iR5900Shift.cpp index 92dbcb47c9..4bafddf413 100644 --- a/pcsx2/x86/ix86-32/iR5900Shift.cpp +++ b/pcsx2/x86/ix86-32/iR5900Shift.cpp @@ -24,8 +24,7 @@ using namespace x86Emitter; namespace R5900 { namespace Dynarec { -namespace OpcodeImpl -{ +namespace OpcodeImpl { /********************************************************* * Shift arithmetic with constant shift * @@ -35,22 +34,22 @@ namespace OpcodeImpl namespace Interp = R5900::Interpreter::OpcodeImpl; -REC_FUNC_DEL(SLL, _Rd_); -REC_FUNC_DEL(SRL, _Rd_); -REC_FUNC_DEL(SRA, _Rd_); -REC_FUNC_DEL(DSLL, _Rd_); -REC_FUNC_DEL(DSRL, _Rd_); -REC_FUNC_DEL(DSRA, _Rd_); +REC_FUNC_DEL(SLL, _Rd_); +REC_FUNC_DEL(SRL, _Rd_); +REC_FUNC_DEL(SRA, _Rd_); +REC_FUNC_DEL(DSLL, _Rd_); +REC_FUNC_DEL(DSRL, _Rd_); +REC_FUNC_DEL(DSRA, _Rd_); REC_FUNC_DEL(DSLL32, _Rd_); REC_FUNC_DEL(DSRL32, _Rd_); REC_FUNC_DEL(DSRA32, _Rd_); -REC_FUNC_DEL(SLLV, _Rd_); -REC_FUNC_DEL(SRLV, _Rd_); -REC_FUNC_DEL(SRAV, _Rd_); -REC_FUNC_DEL(DSLLV, _Rd_); -REC_FUNC_DEL(DSRLV, _Rd_); -REC_FUNC_DEL(DSRAV, _Rd_); +REC_FUNC_DEL(SLLV, _Rd_); +REC_FUNC_DEL(SRLV, _Rd_); +REC_FUNC_DEL(SRAV, _Rd_); +REC_FUNC_DEL(DSLLV, _Rd_); +REC_FUNC_DEL(DSRLV, _Rd_); +REC_FUNC_DEL(DSRAV, _Rd_); #else @@ -62,17 +61,17 @@ void recSLL_const() void recSLLs_(int info, int sa) { - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); - if ( sa != 0 ) + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); + if (sa != 0) { - xSHL(eax, sa ); + xSHL(eax, sa); } - xCDQ( ); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xCDQ(); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } void recSLL_(int info) @@ -90,14 +89,15 @@ void recSRL_const() void recSRLs_(int info, int sa) { - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); - if ( sa != 0 ) xSHR(eax, sa); + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); + if (sa != 0) + xSHR(eax, sa); - xCDQ( ); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xCDQ(); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } void recSRL_(int info) @@ -115,14 +115,15 @@ void recSRA_const() void recSRAs_(int info, int sa) { - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); - if ( sa != 0 ) xSAR(eax, sa); + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); + if (sa != 0) + xSAR(eax, sa); xCDQ(); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } void recSRA_(int info) @@ -141,20 +142,21 @@ void recDSLL_const() void recDSLLs_(int info, int sa) { int rtreg, rdreg; - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); _addNeededGPRtoXMMreg(_Rt_); _addNeededGPRtoXMMreg(_Rd_); rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); rdreg = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE); - if( rtreg != rdreg ) xMOVDQA(xRegisterSSE(rdreg), xRegisterSSE(rtreg)); + if (rtreg != rdreg) + xMOVDQA(xRegisterSSE(rdreg), xRegisterSSE(rtreg)); xPSLL.Q(xRegisterSSE(rdreg), sa); // flush lower 64 bits (as upper is wrong) // The others possibility could be a read back of the upper 64 bits // (better use of register but code will likely be flushed after anyway) - xMOVL.PD(ptr64[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]] , xRegisterSSE(rdreg)); + xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg)); _deleteGPRtoXMMreg(_Rt_, 3); _deleteGPRtoXMMreg(_Rd_, 3); } @@ -175,20 +177,21 @@ void recDSRL_const() void recDSRLs_(int info, int sa) { int rtreg, rdreg; - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); _addNeededGPRtoXMMreg(_Rt_); _addNeededGPRtoXMMreg(_Rd_); rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); rdreg = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE); - if( rtreg != rdreg ) xMOVDQA(xRegisterSSE(rdreg), xRegisterSSE(rtreg)); + if (rtreg != rdreg) + xMOVDQA(xRegisterSSE(rdreg), xRegisterSSE(rtreg)); xPSRL.Q(xRegisterSSE(rdreg), sa); // flush lower 64 bits (as upper is wrong) // The others possibility could be a read back of the upper 64 bits // (better use of register but code will likely be flushed after anyway) - xMOVL.PD(ptr64[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]] , xRegisterSSE(rdreg)); + xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg)); _deleteGPRtoXMMreg(_Rt_, 3); _deleteGPRtoXMMreg(_Rd_, 3); } @@ -209,16 +212,18 @@ void recDSRA_const() void recDSRAs_(int info, int sa) { int rtreg, rdreg, t0reg; - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); _addNeededGPRtoXMMreg(_Rt_); _addNeededGPRtoXMMreg(_Rd_); rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); rdreg = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE); - if( rtreg != rdreg ) xMOVDQA(xRegisterSSE(rdreg), xRegisterSSE(rtreg)); + if (rtreg != rdreg) + xMOVDQA(xRegisterSSE(rdreg), xRegisterSSE(rtreg)); - if ( sa ) { + if (sa) + { t0reg = _allocTempXMMreg(XMMT_INT, -1); @@ -242,7 +247,7 @@ void recDSRAs_(int info, int sa) // flush lower 64 bits (as upper is wrong) // The others possibility could be a read back of the upper 64 bits // (better use of register but code will likely be flushed after anyway) - xMOVL.PD(ptr64[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]] , xRegisterSSE(rdreg)); + xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg)); _deleteGPRtoXMMreg(_Rt_, 3); _deleteGPRtoXMMreg(_Rd_, 3); } @@ -257,21 +262,20 @@ EERECOMPILE_CODEX(eeRecompileCode2, DSRA); ///// DSLL32 void recDSLL32_const() { - g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] << (_Sa_+32)); + g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] << (_Sa_ + 32)); } void recDSLL32s_(int info, int sa) { - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); - if ( sa != 0 ) + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); + if (sa != 0) { - xSHL(eax, sa ); + xSHL(eax, sa); } - xMOV(ptr32[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], 0 ); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], eax); - + xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], 0); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], eax); } void recDSLL32_(int info) @@ -284,18 +288,19 @@ EERECOMPILE_CODEX(eeRecompileCode2, DSLL32); //// DSRL32 void recDSRL32_const() { - g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] >> (_Sa_+32)); + g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] >> (_Sa_ + 32)); } void recDSRL32s_(int info, int sa) { - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ] ]); - if ( sa != 0 ) xSHR(eax, sa ); + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]); + if (sa != 0) + xSHR(eax, sa); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr32[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], 0 ); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], 0); } void recDSRL32_(int info) @@ -308,20 +313,20 @@ EERECOMPILE_CODEX(eeRecompileCode2, DSRL32); //// DSRA32 void recDSRA32_const() { - g_cpuConstRegs[_Rd_].SD[0] = (u64)(g_cpuConstRegs[_Rt_].SD[0] >> (_Sa_+32)); + g_cpuConstRegs[_Rd_].SD[0] = (u64)(g_cpuConstRegs[_Rt_].SD[0] >> (_Sa_ + 32)); } void recDSRA32s_(int info, int sa) { - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ] ]); - xCDQ( ); - if ( sa != 0 ) xSAR(eax, sa ); - - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]); + xCDQ(); + if (sa != 0) + xSAR(eax, sa); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } void recDSRA32_(int info) @@ -340,7 +345,7 @@ __aligned16 u32 s_sa[4] = {0x1f, 0, 0x3f, 0}; void recSetShiftV(int info, int* rsreg, int* rtreg, int* rdreg, int* rstemp) { - pxAssert( !(info & PROCESS_EE_XMM) ); + pxAssert(!(info & PROCESS_EE_XMM)); _addNeededGPRtoXMMreg(_Rt_); _addNeededGPRtoXMMreg(_Rd_); @@ -354,7 +359,8 @@ void recSetShiftV(int info, int* rsreg, int* rtreg, int* rdreg, int* rstemp) xMOVDZX(xRegisterSSE(*rstemp), eax); *rsreg = *rstemp; - if( *rtreg != *rdreg ) xMOVDQA(xRegisterSSE(*rdreg), xRegisterSSE(*rtreg)); + if (*rtreg != *rdreg) + xMOVDQA(xRegisterSSE(*rdreg), xRegisterSSE(*rtreg)); } void recSetConstShiftV(int info, int* rsreg, int* rdreg, int* rstemp) @@ -379,20 +385,20 @@ void recSetConstShiftV(int info, int* rsreg, int* rdreg, int* rstemp) //// SLLV void recSLLV_const() { - g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].UL[0] << (g_cpuConstRegs[_Rs_].UL[0] &0x1f)); + g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].UL[0] << (g_cpuConstRegs[_Rs_].UL[0] & 0x1f)); } void recSLLV_consts(int info) { - recSLLs_(info, g_cpuConstRegs[_Rs_].UL[0]&0x1f); + recSLLs_(info, g_cpuConstRegs[_Rs_].UL[0] & 0x1f); } void recSLLV_constt(int info) { - xMOV(ecx, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); + xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); - xMOV(eax, g_cpuConstRegs[_Rt_].UL[0] ); - xAND(ecx, 0x1f ); + xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]); + xAND(ecx, 0x1f); xSHL(eax, cl); eeSignExtendTo(_Rd_); @@ -400,37 +406,37 @@ void recSLLV_constt(int info) void recSLLV_(int info) { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); - if ( _Rs_ != 0 ) + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); + if (_Rs_ != 0) { - xMOV(ecx, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); - xAND(ecx, 0x1f ); + xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + xAND(ecx, 0x1f); xSHL(eax, cl); } xCDQ(); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } -EERECOMPILE_CODE0(SLLV, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED); +EERECOMPILE_CODE0(SLLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); //// SRLV void recSRLV_const() { - g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].UL[0] >> (g_cpuConstRegs[_Rs_].UL[0] &0x1f)); + g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].UL[0] >> (g_cpuConstRegs[_Rs_].UL[0] & 0x1f)); } void recSRLV_consts(int info) { - recSRLs_(info, g_cpuConstRegs[_Rs_].UL[0]&0x1f); + recSRLs_(info, g_cpuConstRegs[_Rs_].UL[0] & 0x1f); } void recSRLV_constt(int info) { - xMOV(ecx, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); + xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); - xMOV(eax, g_cpuConstRegs[_Rt_].UL[0] ); - xAND(ecx, 0x1f ); + xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]); + xAND(ecx, 0x1f); xSHR(eax, cl); eeSignExtendTo(_Rd_); @@ -438,37 +444,37 @@ void recSRLV_constt(int info) void recSRLV_(int info) { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); - if ( _Rs_ != 0 ) + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); + if (_Rs_ != 0) { - xMOV(ecx, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); - xAND(ecx, 0x1f ); + xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + xAND(ecx, 0x1f); xSHR(eax, cl); } - xCDQ( ); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xCDQ(); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } -EERECOMPILE_CODE0(SRLV, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED); +EERECOMPILE_CODE0(SRLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); //// SRAV void recSRAV_const() { - g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].SL[0] >> (g_cpuConstRegs[_Rs_].UL[0] &0x1f)); + g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].SL[0] >> (g_cpuConstRegs[_Rs_].UL[0] & 0x1f)); } void recSRAV_consts(int info) { - recSRAs_(info, g_cpuConstRegs[_Rs_].UL[0]&0x1f); + recSRAs_(info, g_cpuConstRegs[_Rs_].UL[0] & 0x1f); } void recSRAV_constt(int info) { - xMOV(ecx, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); + xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); - xMOV(eax, g_cpuConstRegs[_Rt_].UL[0] ); - xAND(ecx, 0x1f ); + xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]); + xAND(ecx, 0x1f); xSAR(eax, cl); eeSignExtendTo(_Rd_); @@ -476,31 +482,33 @@ void recSRAV_constt(int info) void recSRAV_(int info) { - xMOV(eax, ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ]); - if ( _Rs_ != 0 ) + xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); + if (_Rs_ != 0) { - xMOV(ecx, ptr[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ]); - xAND(ecx, 0x1f ); + xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + xAND(ecx, 0x1f); xSAR(eax, cl); } - xCDQ( ); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ]], eax); - xMOV(ptr[&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]], edx); + xCDQ(); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); } -EERECOMPILE_CODE0(SRAV, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED); +EERECOMPILE_CODE0(SRAV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); //// DSLLV void recDSLLV_const() { - g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] << (g_cpuConstRegs[_Rs_].UL[0] &0x3f)); + g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] << (g_cpuConstRegs[_Rs_].UL[0] & 0x3f)); } void recDSLLV_consts(int info) { - int sa = g_cpuConstRegs[_Rs_].UL[0]&0x3f; - if( sa < 32 ) recDSLLs_(info, sa); - else recDSLL32s_(info, sa-32); + int sa = g_cpuConstRegs[_Rs_].UL[0] & 0x3f; + if (sa < 32) + recDSLLs_(info, sa); + else + recDSLL32s_(info, sa - 32); } void recDSLLV_constt(int info) @@ -509,12 +517,13 @@ void recDSLLV_constt(int info) recSetConstShiftV(info, &rsreg, &rdreg, &rstemp); xMOVDQA(xRegisterSSE(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]); xPSLL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg)); - if( rstemp != -1 ) _freeXMMreg(rstemp); + if (rstemp != -1) + _freeXMMreg(rstemp); // flush lower 64 bits (as upper is wrong) // The others possibility could be a read back of the upper 64 bits // (better use of register but code will likely be flushed after anyway) - xMOVL.PD(ptr64[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]] , xRegisterSSE(rdreg)); + xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg)); //_deleteGPRtoXMMreg(_Rt_, 3); _deleteGPRtoXMMreg(_Rd_, 3); } @@ -525,29 +534,32 @@ void recDSLLV_(int info) recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp); xPSLL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg)); - if( rstemp != -1 ) _freeXMMreg(rstemp); + if (rstemp != -1) + _freeXMMreg(rstemp); // flush lower 64 bits (as upper is wrong) // The others possibility could be a read back of the upper 64 bits // (better use of register but code will likely be flushed after anyway) - xMOVL.PD(ptr64[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]] , xRegisterSSE(rdreg)); + xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg)); _deleteGPRtoXMMreg(_Rt_, 3); _deleteGPRtoXMMreg(_Rd_, 3); } -EERECOMPILE_CODE0(DSLLV, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED); +EERECOMPILE_CODE0(DSLLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); //// DSRLV void recDSRLV_const() { - g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] >> (g_cpuConstRegs[_Rs_].UL[0] &0x3f)); + g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] >> (g_cpuConstRegs[_Rs_].UL[0] & 0x3f)); } void recDSRLV_consts(int info) { - int sa = g_cpuConstRegs[_Rs_].UL[0]&0x3f; - if( sa < 32 ) recDSRLs_(info, sa); - else recDSRL32s_(info, sa-32); + int sa = g_cpuConstRegs[_Rs_].UL[0] & 0x3f; + if (sa < 32) + recDSRLs_(info, sa); + else + recDSRL32s_(info, sa - 32); } void recDSRLV_constt(int info) @@ -557,12 +569,13 @@ void recDSRLV_constt(int info) xMOVDQA(xRegisterSSE(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]); xPSRL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg)); - if( rstemp != -1 ) _freeXMMreg(rstemp); + if (rstemp != -1) + _freeXMMreg(rstemp); // flush lower 64 bits (as upper is wrong) // The others possibility could be a read back of the upper 64 bits // (better use of register but code will likely be flushed after anyway) - xMOVL.PD(ptr64[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]] , xRegisterSSE(rdreg)); + xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg)); //_deleteGPRtoXMMreg(_Rt_, 3); _deleteGPRtoXMMreg(_Rd_, 3); } @@ -573,29 +586,32 @@ void recDSRLV_(int info) recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp); xPSRL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg)); - if( rstemp != -1 ) _freeXMMreg(rstemp); + if (rstemp != -1) + _freeXMMreg(rstemp); // flush lower 64 bits (as upper is wrong) // The others possibility could be a read back of the upper 64 bits // (better use of register but code will likely be flushed after anyway) - xMOVL.PD(ptr64[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]] , xRegisterSSE(rdreg)); + xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg)); _deleteGPRtoXMMreg(_Rt_, 3); _deleteGPRtoXMMreg(_Rd_, 3); } -EERECOMPILE_CODE0(DSRLV, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED); +EERECOMPILE_CODE0(DSRLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); //// DSRAV void recDSRAV_const() { - g_cpuConstRegs[_Rd_].SD[0] = (s64)(g_cpuConstRegs[_Rt_].SD[0] >> (g_cpuConstRegs[_Rs_].UL[0] &0x3f)); + g_cpuConstRegs[_Rd_].SD[0] = (s64)(g_cpuConstRegs[_Rt_].SD[0] >> (g_cpuConstRegs[_Rs_].UL[0] & 0x3f)); } void recDSRAV_consts(int info) { - int sa = g_cpuConstRegs[_Rs_].UL[0]&0x3f; - if( sa < 32 ) recDSRAs_(info, sa); - else recDSRA32s_(info, sa-32); + int sa = g_cpuConstRegs[_Rs_].UL[0] & 0x3f; + if (sa < 32) + recDSRAs_(info, sa); + else + recDSRA32s_(info, sa - 32); } void recDSRAV_constt(int info) @@ -628,12 +644,13 @@ void recDSRAV_constt(int info) // flush lower 64 bits (as upper is wrong) // The others possibility could be a read back of the upper 64 bits // (better use of register but code will likely be flushed after anyway) - xMOVL.PD(ptr64[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]] , xRegisterSSE(rdreg)); + xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg)); _deleteGPRtoXMMreg(_Rd_, 3); _freeXMMreg(t0reg); _freeXMMreg(t1reg); - if( rstemp != -1 ) _freeXMMreg(rstemp); + if (rstemp != -1) + _freeXMMreg(rstemp); } void recDSRAV_(int info) @@ -664,17 +681,20 @@ void recDSRAV_(int info) // flush lower 64 bits (as upper is wrong) // The others possibility could be a read back of the upper 64 bits // (better use of register but code will likely be flushed after anyway) - xMOVL.PD(ptr64[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]] , xRegisterSSE(rdreg)); + xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg)); _deleteGPRtoXMMreg(_Rt_, 3); _deleteGPRtoXMMreg(_Rd_, 3); _freeXMMreg(t0reg); _freeXMMreg(t1reg); - if( rstemp != -1 ) _freeXMMreg(rstemp); + if (rstemp != -1) + _freeXMMreg(rstemp); } -EERECOMPILE_CODE0(DSRAV, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED); +EERECOMPILE_CODE0(DSRAV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); #endif -} } } +} // namespace OpcodeImpl +} // namespace Dynarec +} // namespace R5900 diff --git a/pcsx2/x86/ix86-32/iR5900Templates.cpp b/pcsx2/x86/ix86-32/iR5900Templates.cpp index f31c8eb9ef..274ded308b 100644 --- a/pcsx2/x86/ix86-32/iR5900Templates.cpp +++ b/pcsx2/x86/ix86-32/iR5900Templates.cpp @@ -40,8 +40,10 @@ void _eeOnWriteReg(int reg, int signext) void _deleteEEreg(int reg, int flush) { - if( !reg ) return; - if( flush && GPR_IS_CONST1(reg) ) { + if (!reg) + return; + if (flush && GPR_IS_CONST1(reg)) + { _flushConstReg(reg); } GPR_DEL_CONST(reg); @@ -50,8 +52,10 @@ void _deleteEEreg(int reg, int flush) void _flushEEreg(int reg) { - if (!reg) return; - if (GPR_IS_CONST1(reg)) { + if (!reg) + return; + if (GPR_IS_CONST1(reg)) + { _flushConstReg(reg); return; } @@ -62,8 +66,9 @@ void _flushEEreg(int reg) int eeProcessHILO(int reg, int mode, int mmx) { // Fixme: MMX problem - int usemmx = 0; - if( (usemmx || _hasFreeXMMreg()) || !(g_pCurInstInfo->regs[reg]&EEINST_LASTUSE) ) { + int usemmx = 0; + if ((usemmx || _hasFreeXMMreg()) || !(g_pCurInstInfo->regs[reg] & EEINST_LASTUSE)) + { return _allocGPRtoXMMreg(-1, reg, mode); } @@ -71,8 +76,8 @@ int eeProcessHILO(int reg, int mode, int mmx) } // Strangely this code is used on NOT-MMX path ... -#define PROCESS_EE_SETMODES(mmreg) (/*(mmxregs[mmreg].mode&MODE_WRITE)*/ false ?PROCESS_EE_MODEWRITES:0) -#define PROCESS_EE_SETMODET(mmreg) (/*(mmxregs[mmreg].mode&MODE_WRITE)*/ false ?PROCESS_EE_MODEWRITET:0) +#define PROCESS_EE_SETMODES(mmreg) (/*(mmxregs[mmreg].mode&MODE_WRITE)*/ false ? PROCESS_EE_MODEWRITES : 0) +#define PROCESS_EE_SETMODET(mmreg) (/*(mmxregs[mmreg].mode&MODE_WRITE)*/ false ? PROCESS_EE_MODEWRITET : 0) // ignores XMMINFO_READS, XMMINFO_READT, and XMMINFO_READD_LO from xmminfo // core of reg caching @@ -80,100 +85,131 @@ void eeRecompileCode0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNP { int mmreg1, mmreg2, mmreg3, mmtemp, moded; - if ( ! _Rd_ && (xmminfo&XMMINFO_WRITED) ) return; + if (!_Rd_ && (xmminfo & XMMINFO_WRITED)) + return; - if( GPR_IS_CONST2(_Rs_, _Rt_) ) { - if( xmminfo & XMMINFO_WRITED ) { + if (GPR_IS_CONST2(_Rs_, _Rt_)) + { + if (xmminfo & XMMINFO_WRITED) + { _deleteGPRtoXMMreg(_Rd_, 2); } - if( xmminfo&XMMINFO_WRITED ) GPR_SET_CONST(_Rd_); + if (xmminfo & XMMINFO_WRITED) + GPR_SET_CONST(_Rd_); constcode(); return; } - moded = MODE_WRITE|((xmminfo&XMMINFO_READD)?MODE_READ:0); + moded = MODE_WRITE | ((xmminfo & XMMINFO_READD) ? MODE_READ : 0); // test if should write xmm, mirror to mmx code - if( g_pCurInstInfo->info & EEINST_XMM ) { + if (g_pCurInstInfo->info & EEINST_XMM) + { pxAssert(0); - if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) _addNeededGPRtoXMMreg(XMMGPR_LO); - if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) _addNeededGPRtoXMMreg(XMMGPR_HI); + if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO)) + _addNeededGPRtoXMMreg(XMMGPR_LO); + if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI)) + _addNeededGPRtoXMMreg(XMMGPR_HI); _addNeededGPRtoXMMreg(_Rs_); _addNeededGPRtoXMMreg(_Rt_); - if( GPR_IS_CONST1(_Rs_) || GPR_IS_CONST1(_Rt_) ) { + if (GPR_IS_CONST1(_Rs_) || GPR_IS_CONST1(_Rt_)) + { u32 creg = GPR_IS_CONST1(_Rs_) ? _Rs_ : _Rt_; int vreg = creg == _Rs_ ? _Rt_ : _Rs_; -// if(g_pCurInstInfo->regs[vreg]&EEINST_XMM) { +// if (g_pCurInstInfo->regs[vreg] & EEINST_XMM) +// { // mmreg1 = _allocGPRtoXMMreg(-1, vreg, MODE_READ); // _addNeededGPRtoXMMreg(vreg); // } mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, vreg, MODE_READ); - if( mmreg1 >= 0 ) { + if (mmreg1 >= 0) + { int info = PROCESS_EE_XMM; - if( GPR_IS_CONST1(_Rs_) ) info |= PROCESS_EE_SETMODET(mmreg1); - else info |= PROCESS_EE_SETMODES(mmreg1); + if (GPR_IS_CONST1(_Rs_)) + info |= PROCESS_EE_SETMODET(mmreg1); + else + info |= PROCESS_EE_SETMODES(mmreg1); - if( xmminfo & XMMINFO_WRITED ) { + if (xmminfo & XMMINFO_WRITED) + { _addNeededGPRtoXMMreg(_Rd_); mmreg3 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE); - if( !(xmminfo&XMMINFO_READD) && mmreg3 < 0 && ((g_pCurInstInfo->regs[vreg] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(vreg)) ) { + if (!(xmminfo & XMMINFO_READD) && mmreg3 < 0 && ((g_pCurInstInfo->regs[vreg] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(vreg))) + { _freeXMMreg(mmreg1); - if( GPR_IS_CONST1(_Rs_) ) info &= ~PROCESS_EE_MODEWRITET; - else info &= ~PROCESS_EE_MODEWRITES; + if (GPR_IS_CONST1(_Rs_)) + info &= ~PROCESS_EE_MODEWRITET; + else + info &= ~PROCESS_EE_MODEWRITES; xmmregs[mmreg1].inuse = 1; xmmregs[mmreg1].reg = _Rd_; xmmregs[mmreg1].mode = moded; mmreg3 = mmreg1; } - else if( mmreg3 < 0 ) mmreg3 = _allocGPRtoXMMreg(-1, _Rd_, moded); + else if (mmreg3 < 0) + mmreg3 = _allocGPRtoXMMreg(-1, _Rd_, moded); info |= PROCESS_EE_SET_D(mmreg3); } - if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { - mmtemp = eeProcessHILO(XMMGPR_LO, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 0); - if( mmtemp >= 0 ) info |= PROCESS_EE_SET_LO(mmtemp); + if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO)) + { + mmtemp = eeProcessHILO(XMMGPR_LO, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0); + if (mmtemp >= 0) + info |= PROCESS_EE_SET_LO(mmtemp); } - if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { - mmtemp = eeProcessHILO(XMMGPR_HI, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 0); - if( mmtemp >= 0 ) info |= PROCESS_EE_SET_HI(mmtemp); + if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI)) + { + mmtemp = eeProcessHILO(XMMGPR_HI, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0); + if (mmtemp >= 0) + info |= PROCESS_EE_SET_HI(mmtemp); } - if( creg == _Rs_ ) constscode(info|PROCESS_EE_SET_T(mmreg1)); - else consttcode(info|PROCESS_EE_SET_S(mmreg1)); + if (creg == _Rs_) + constscode(info | PROCESS_EE_SET_T(mmreg1)); + else + consttcode(info | PROCESS_EE_SET_S(mmreg1)); _clearNeededXMMregs(); - if( xmminfo & XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); + if (xmminfo & XMMINFO_WRITED) + GPR_DEL_CONST(_Rd_); return; } } - else { + else + { // no const regs mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rs_, MODE_READ); mmreg2 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rt_, MODE_READ); - if( mmreg1 >= 0 || mmreg2 >= 0 ) { + if (mmreg1 >= 0 || mmreg2 >= 0) + { int info = PROCESS_EE_XMM; // do it all in xmm - if( mmreg1 < 0 ) mmreg1 = _allocGPRtoXMMreg(-1, _Rs_, MODE_READ); - if( mmreg2 < 0 ) mmreg2 = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); + if (mmreg1 < 0) + mmreg1 = _allocGPRtoXMMreg(-1, _Rs_, MODE_READ); + if (mmreg2 < 0) + mmreg2 = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); - info |= PROCESS_EE_SETMODES(mmreg1)|PROCESS_EE_SETMODET(mmreg2); + info |= PROCESS_EE_SETMODES(mmreg1) | PROCESS_EE_SETMODET(mmreg2); - if( xmminfo & XMMINFO_WRITED ) { + if (xmminfo & XMMINFO_WRITED) + { // check for last used, if so don't alloc a new XMM reg _addNeededGPRtoXMMreg(_Rd_); mmreg3 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, moded); - if( mmreg3 < 0 ) { - if( !(xmminfo&XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_)) ) { + if (mmreg3 < 0) + { + if (!(xmminfo & XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_))) + { _freeXMMreg(mmreg2); info &= ~PROCESS_EE_MODEWRITET; xmmregs[mmreg2].inuse = 1; @@ -181,7 +217,8 @@ void eeRecompileCode0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNP xmmregs[mmreg2].mode = moded; mmreg3 = mmreg2; } - else if( !(xmminfo&XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_)) ) { + else if (!(xmminfo & XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_))) + { _freeXMMreg(mmreg1); info &= ~PROCESS_EE_MODEWRITES; xmmregs[mmreg1].inuse = 1; @@ -189,24 +226,30 @@ void eeRecompileCode0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNP xmmregs[mmreg1].mode = moded; mmreg3 = mmreg1; } - else mmreg3 = _allocGPRtoXMMreg(-1, _Rd_, moded); + else + mmreg3 = _allocGPRtoXMMreg(-1, _Rd_, moded); } info |= PROCESS_EE_SET_D(mmreg3); } - if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { - mmtemp = eeProcessHILO(XMMGPR_LO, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 0); - if( mmtemp >= 0 ) info |= PROCESS_EE_SET_LO(mmtemp); + if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO)) + { + mmtemp = eeProcessHILO(XMMGPR_LO, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0); + if (mmtemp >= 0) + info |= PROCESS_EE_SET_LO(mmtemp); } - if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { - mmtemp = eeProcessHILO(XMMGPR_HI, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 0); - if( mmtemp >= 0 ) info |= PROCESS_EE_SET_HI(mmtemp); + if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI)) + { + mmtemp = eeProcessHILO(XMMGPR_HI, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0); + if (mmtemp >= 0) + info |= PROCESS_EE_SET_HI(mmtemp); } - noconstcode(info|PROCESS_EE_SET_S(mmreg1)|PROCESS_EE_SET_T(mmreg2)); + noconstcode(info | PROCESS_EE_SET_S(mmreg1) | PROCESS_EE_SET_T(mmreg2)); _clearNeededXMMregs(); - if( xmminfo & XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); + if (xmminfo & XMMINFO_WRITED) + GPR_DEL_CONST(_Rd_); return; } } @@ -217,40 +260,49 @@ void eeRecompileCode0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNP // regular x86 _deleteGPRtoXMMreg(_Rs_, 1); _deleteGPRtoXMMreg(_Rt_, 1); - if( xmminfo&XMMINFO_WRITED ) - _deleteGPRtoXMMreg(_Rd_, (xmminfo&XMMINFO_READD)?0:2); + if (xmminfo & XMMINFO_WRITED) + _deleteGPRtoXMMreg(_Rd_, (xmminfo & XMMINFO_READD) ? 0 : 2); // don't delete, fn will take care of them -// if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { -// _deleteGPRtoXMMreg(XMMGPR_LO, (xmminfo&XMMINFO_READLO)?1:0); +// if (xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO)) +// { +// _deleteGPRtoXMMreg(XMMGPR_LO, (xmminfo & XMMINFO_READLO) ? 1 : 0); // } -// if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { -// _deleteGPRtoXMMreg(XMMGPR_HI, (xmminfo&XMMINFO_READHI)?1:0); +// if (xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI)) +// { +// _deleteGPRtoXMMreg(XMMGPR_HI, (xmminfo & XMMINFO_READHI) ? 1 : 0); // } - if( GPR_IS_CONST1(_Rs_) ) { + if (GPR_IS_CONST1(_Rs_)) + { constscode(0); - if( xmminfo&XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); + if (xmminfo & XMMINFO_WRITED) + GPR_DEL_CONST(_Rd_); return; } - if( GPR_IS_CONST1(_Rt_) ) { + if (GPR_IS_CONST1(_Rt_)) + { consttcode(0); - if( xmminfo&XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); + if (xmminfo & XMMINFO_WRITED) + GPR_DEL_CONST(_Rd_); return; } noconstcode(0); - if( xmminfo&XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); + if (xmminfo & XMMINFO_WRITED) + GPR_DEL_CONST(_Rd_); } // rt = rs op imm16 void eeRecompileCode1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) { int mmreg1, mmreg2; - if ( ! _Rt_ ) return; + if (!_Rt_) + return; - if( GPR_IS_CONST1(_Rs_) ) { + if (GPR_IS_CONST1(_Rs_)) + { _deleteGPRtoXMMreg(_Rt_, 2); GPR_SET_CONST(_Rt_); constcode(); @@ -258,32 +310,37 @@ void eeRecompileCode1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) } // test if should write xmm, mirror to mmx code - if( g_pCurInstInfo->info & EEINST_XMM ) { + if (g_pCurInstInfo->info & EEINST_XMM) + { pxAssert(0); // no const regs mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rs_, MODE_READ); - if( mmreg1 >= 0 ) { - int info = PROCESS_EE_XMM|PROCESS_EE_SETMODES(mmreg1); + if (mmreg1 >= 0) + { + int info = PROCESS_EE_XMM | PROCESS_EE_SETMODES(mmreg1); // check for last used, if so don't alloc a new XMM reg _addNeededGPRtoXMMreg(_Rt_); mmreg2 = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_WRITE); - if( mmreg2 < 0 ) { - if( (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_) ) { + if (mmreg2 < 0) + { + if ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_)) + { _freeXMMreg(mmreg1); info &= ~PROCESS_EE_MODEWRITES; xmmregs[mmreg1].inuse = 1; xmmregs[mmreg1].reg = _Rt_; - xmmregs[mmreg1].mode = MODE_WRITE|MODE_READ; + xmmregs[mmreg1].mode = MODE_WRITE | MODE_READ; mmreg2 = mmreg1; } - else mmreg2 = _allocGPRtoXMMreg(-1, _Rt_, MODE_WRITE); + else + mmreg2 = _allocGPRtoXMMreg(-1, _Rt_, MODE_WRITE); } - noconstcode(info|PROCESS_EE_SET_S(mmreg1)|PROCESS_EE_SET_T(mmreg2)); + noconstcode(info | PROCESS_EE_SET_S(mmreg1) | PROCESS_EE_SET_T(mmreg2)); _clearNeededXMMregs(); GPR_DEL_CONST(_Rt_); return; @@ -304,9 +361,11 @@ void eeRecompileCode1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) void eeRecompileCode2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) { int mmreg1, mmreg2; - if ( ! _Rd_ ) return; + if (!_Rd_) + return; - if( GPR_IS_CONST1(_Rt_) ) { + if (GPR_IS_CONST1(_Rt_)) + { _deleteGPRtoXMMreg(_Rd_, 2); GPR_SET_CONST(_Rd_); constcode(); @@ -314,32 +373,37 @@ void eeRecompileCode2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) } // test if should write xmm, mirror to mmx code - if( g_pCurInstInfo->info & EEINST_XMM ) { + if (g_pCurInstInfo->info & EEINST_XMM) + { pxAssert(0); // no const regs mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rt_, MODE_READ); - if( mmreg1 >= 0 ) { - int info = PROCESS_EE_XMM|PROCESS_EE_SETMODET(mmreg1); + if (mmreg1 >= 0) + { + int info = PROCESS_EE_XMM | PROCESS_EE_SETMODET(mmreg1); // check for last used, if so don't alloc a new XMM reg _addNeededGPRtoXMMreg(_Rd_); mmreg2 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE); - if( mmreg2 < 0 ) { - if( (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVE64(_Rt_) ) { + if (mmreg2 < 0) + { + if ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVE64(_Rt_)) + { _freeXMMreg(mmreg1); info &= ~PROCESS_EE_MODEWRITET; xmmregs[mmreg1].inuse = 1; xmmregs[mmreg1].reg = _Rd_; - xmmregs[mmreg1].mode = MODE_WRITE|MODE_READ; + xmmregs[mmreg1].mode = MODE_WRITE | MODE_READ; mmreg2 = mmreg1; } - else mmreg2 = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE); + else + mmreg2 = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE); } - noconstcode(info|PROCESS_EE_SET_T(mmreg1)|PROCESS_EE_SET_D(mmreg2)); + noconstcode(info | PROCESS_EE_SET_T(mmreg1) | PROCESS_EE_SET_D(mmreg2)); _clearNeededXMMregs(); GPR_DEL_CONST(_Rd_); return; @@ -359,23 +423,26 @@ void eeRecompileCode2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) // rt op rs void eeRecompileCode3(R5900FNPTR constcode, R5900FNPTR_INFO multicode) { - pxFail( "Unfinished code reached." ); + pxFail("Unfinished code reached."); // for now, don't support xmm _deleteEEreg(_Rs_, 0); _deleteEEreg(_Rt_, 1); - if( GPR_IS_CONST2(_Rs_, _Rt_) ) { + if (GPR_IS_CONST2(_Rs_, _Rt_)) + { constcode(); return; } - if( GPR_IS_CONST1(_Rs_) ) { + if (GPR_IS_CONST1(_Rs_)) + { //multicode(PROCESS_EE_CONSTT); return; } - if( GPR_IS_CONST1(_Rt_) ) { + if (GPR_IS_CONST1(_Rt_)) + { //multicode(PROCESS_EE_CONSTT); return; } @@ -388,7 +455,8 @@ void eeRecompileCode3(R5900FNPTR constcode, R5900FNPTR_INFO multicode) // rd = rs op rt void eeRecompileCodeConst0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode) { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; // for now, don't support xmm @@ -396,19 +464,22 @@ void eeRecompileCodeConst0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R59 _deleteGPRtoXMMreg(_Rt_, 1); _deleteGPRtoXMMreg(_Rd_, 0); - if( GPR_IS_CONST2(_Rs_, _Rt_) ) { + if (GPR_IS_CONST2(_Rs_, _Rt_)) + { GPR_SET_CONST(_Rd_); constcode(); return; } - if( GPR_IS_CONST1(_Rs_) ) { + if (GPR_IS_CONST1(_Rs_)) + { constscode(0); GPR_DEL_CONST(_Rd_); return; } - if( GPR_IS_CONST1(_Rt_) ) { + if (GPR_IS_CONST1(_Rt_)) + { consttcode(0); GPR_DEL_CONST(_Rd_); return; @@ -421,7 +492,7 @@ void eeRecompileCodeConst0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R59 // rt = rs op imm16 void eeRecompileCodeConst1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) { - if ( ! _Rt_ ) + if (!_Rt_) return; // for now, don't support xmm @@ -429,7 +500,8 @@ void eeRecompileCodeConst1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) _deleteGPRtoXMMreg(_Rs_, 1); _deleteGPRtoXMMreg(_Rt_, 0); - if( GPR_IS_CONST1(_Rs_) ) { + if (GPR_IS_CONST1(_Rs_)) + { GPR_SET_CONST(_Rt_); constcode(); return; @@ -442,14 +514,16 @@ void eeRecompileCodeConst1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) // rd = rt op sa void eeRecompileCodeConst2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) { - if ( ! _Rd_ ) return; + if (!_Rd_) + return; // for now, don't support xmm _deleteGPRtoXMMreg(_Rt_, 1); _deleteGPRtoXMMreg(_Rd_, 0); - if( GPR_IS_CONST1(_Rt_) ) { + if (GPR_IS_CONST1(_Rt_)) + { GPR_SET_CONST(_Rd_); constcode(); return; @@ -462,36 +536,44 @@ void eeRecompileCodeConst2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) // rd = rt MULT rs (SPECIAL) void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode, int MULT) { - pxFail( "Unfinished code reached." ); + pxFail("Unfinished code reached."); // for now, don't support xmm - if( MULT ) { + if (MULT) + { _deleteGPRtoXMMreg(_Rd_, 0); } _deleteGPRtoXMMreg(_Rs_, 1); _deleteGPRtoXMMreg(_Rt_, 1); - if( GPR_IS_CONST2(_Rs_, _Rt_) ) { - if( MULT && _Rd_ ) GPR_SET_CONST(_Rd_); + if (GPR_IS_CONST2(_Rs_, _Rt_)) + { + if (MULT && _Rd_) + GPR_SET_CONST(_Rd_); constcode(); return; } - if( GPR_IS_CONST1(_Rs_) ) { + if (GPR_IS_CONST1(_Rs_)) + { //multicode(PROCESS_EE_CONSTS); - if( MULT && _Rd_ ) GPR_DEL_CONST(_Rd_); + if (MULT && _Rd_) + GPR_DEL_CONST(_Rd_); return; } - if( GPR_IS_CONST1(_Rt_) ) { + if (GPR_IS_CONST1(_Rt_)) + { //multicode(PROCESS_EE_CONSTT); - if( MULT && _Rd_ ) GPR_DEL_CONST(_Rd_); + if (MULT && _Rd_) + GPR_DEL_CONST(_Rd_); return; } multicode(0); - if( MULT && _Rd_ ) GPR_DEL_CONST(_Rd_); + if (MULT && _Rd_) + GPR_DEL_CONST(_Rd_); } // EE XMM allocation code @@ -500,77 +582,96 @@ int eeRecompileCodeXMM(int xmminfo) int info = PROCESS_EE_XMM; // flush consts - if( xmminfo & XMMINFO_READT ) { - if( GPR_IS_CONST1( _Rt_ ) && !(g_cpuFlushedConstReg&(1<<_Rt_)) ) { - xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], g_cpuConstRegs[_Rt_].UL[0]); - xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], g_cpuConstRegs[_Rt_].UL[1]); - g_cpuFlushedConstReg |= (1<<_Rt_); + if (xmminfo & XMMINFO_READT) + { + if (GPR_IS_CONST1(_Rt_) && !(g_cpuFlushedConstReg & (1 << _Rt_))) + { + xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], g_cpuConstRegs[_Rt_].UL[0]); + xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], g_cpuConstRegs[_Rt_].UL[1]); + g_cpuFlushedConstReg |= (1 << _Rt_); } } - if( xmminfo & XMMINFO_READS) { - if( GPR_IS_CONST1( _Rs_ ) && !(g_cpuFlushedConstReg&(1<<_Rs_)) ) { - xMOV(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]], g_cpuConstRegs[_Rs_].UL[0]); - xMOV(ptr32[&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ]], g_cpuConstRegs[_Rs_].UL[1]); - g_cpuFlushedConstReg |= (1<<_Rs_); + if (xmminfo & XMMINFO_READS) + { + if (GPR_IS_CONST1(_Rs_) && !(g_cpuFlushedConstReg & (1 << _Rs_))) + { + xMOV(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], g_cpuConstRegs[_Rs_].UL[0]); + xMOV(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], g_cpuConstRegs[_Rs_].UL[1]); + g_cpuFlushedConstReg |= (1 << _Rs_); } } - if( xmminfo & XMMINFO_WRITED ) { + if (xmminfo & XMMINFO_WRITED) + { GPR_DEL_CONST(_Rd_); } // add needed - if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { + if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO)) + { _addNeededGPRtoXMMreg(XMMGPR_LO); } - if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { + if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI)) + { _addNeededGPRtoXMMreg(XMMGPR_HI); } - if( xmminfo & XMMINFO_READS) _addNeededGPRtoXMMreg(_Rs_); - if( xmminfo & XMMINFO_READT) _addNeededGPRtoXMMreg(_Rt_); - if( xmminfo & XMMINFO_WRITED ) _addNeededGPRtoXMMreg(_Rd_); + if (xmminfo & XMMINFO_READS) + _addNeededGPRtoXMMreg(_Rs_); + if (xmminfo & XMMINFO_READT) + _addNeededGPRtoXMMreg(_Rt_); + if (xmminfo & XMMINFO_WRITED) + _addNeededGPRtoXMMreg(_Rd_); // allocate - if( xmminfo & XMMINFO_READS) { + if (xmminfo & XMMINFO_READS) + { int reg = _allocGPRtoXMMreg(-1, _Rs_, MODE_READ); - info |= PROCESS_EE_SET_S(reg)|PROCESS_EE_SETMODES(reg); + info |= PROCESS_EE_SET_S(reg) | PROCESS_EE_SETMODES(reg); } - if( xmminfo & XMMINFO_READT) { + if (xmminfo & XMMINFO_READT) + { int reg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); - info |= PROCESS_EE_SET_T(reg)|PROCESS_EE_SETMODET(reg); + info |= PROCESS_EE_SET_T(reg) | PROCESS_EE_SETMODET(reg); } - if( xmminfo & XMMINFO_WRITED ) { - int readd = MODE_WRITE|((xmminfo&XMMINFO_READD)?((xmminfo&XMMINFO_READD_LO)?(MODE_READ|MODE_READHALF):MODE_READ):0); + if (xmminfo & XMMINFO_WRITED) + { + int readd = MODE_WRITE | ((xmminfo & XMMINFO_READD) ? ((xmminfo & XMMINFO_READD_LO) ? (MODE_READ | MODE_READHALF) : MODE_READ) : 0); int regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, readd); - if( regd < 0 ) { - if( !(xmminfo&XMMINFO_READD) && (xmminfo & XMMINFO_READT) && (_Rt_ == 0 || (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_)) ) { + if (regd < 0) + { + if (!(xmminfo & XMMINFO_READD) && (xmminfo & XMMINFO_READT) && (_Rt_ == 0 || (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_))) + { _freeXMMreg(EEREC_T); xmmregs[EEREC_T].inuse = 1; xmmregs[EEREC_T].reg = _Rd_; xmmregs[EEREC_T].mode = readd; regd = EEREC_T; } - else if( !(xmminfo&XMMINFO_READD) && (xmminfo & XMMINFO_READS) && (_Rs_ == 0 || (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_)) ) { + else if (!(xmminfo & XMMINFO_READD) && (xmminfo & XMMINFO_READS) && (_Rs_ == 0 || (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_))) + { _freeXMMreg(EEREC_S); xmmregs[EEREC_S].inuse = 1; xmmregs[EEREC_S].reg = _Rd_; xmmregs[EEREC_S].mode = readd; regd = EEREC_S; } - else regd = _allocGPRtoXMMreg(-1, _Rd_, readd); + else + regd = _allocGPRtoXMMreg(-1, _Rd_, readd); } info |= PROCESS_EE_SET_D(regd); } - if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { - info |= PROCESS_EE_SET_LO(_allocGPRtoXMMreg(-1, XMMGPR_LO, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0))); + if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO)) + { + info |= PROCESS_EE_SET_LO(_allocGPRtoXMMreg(-1, XMMGPR_LO, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0))); info |= PROCESS_EE_LO; } - if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { - info |= PROCESS_EE_SET_HI(_allocGPRtoXMMreg(-1, XMMGPR_HI, ((xmminfo&XMMINFO_READHI)?MODE_READ:0)|((xmminfo&XMMINFO_WRITEHI)?MODE_WRITE:0))); + if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI)) + { + info |= PROCESS_EE_SET_HI(_allocGPRtoXMMreg(-1, XMMGPR_HI, ((xmminfo & XMMINFO_READHI) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITEHI) ? MODE_WRITE : 0))); info |= PROCESS_EE_HI; } return info; @@ -581,56 +682,75 @@ int eeRecompileCodeXMM(int xmminfo) #define _Fs_ _Rd_ #define _Fd_ _Sa_ -#define PROCESS_EE_SETMODES_XMM(mmreg) ((xmmregs[mmreg].mode&MODE_WRITE)?PROCESS_EE_MODEWRITES:0) -#define PROCESS_EE_SETMODET_XMM(mmreg) ((xmmregs[mmreg].mode&MODE_WRITE)?PROCESS_EE_MODEWRITET:0) +#define PROCESS_EE_SETMODES_XMM(mmreg) ((xmmregs[mmreg].mode & MODE_WRITE) ? PROCESS_EE_MODEWRITES : 0) +#define PROCESS_EE_SETMODET_XMM(mmreg) ((xmmregs[mmreg].mode & MODE_WRITE) ? PROCESS_EE_MODEWRITET : 0) // rd = rs op rt void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo) { - int mmregs=-1, mmregt=-1, mmregd=-1, mmregacc=-1; + int mmregs = -1, mmregt = -1, mmregd = -1, mmregacc = -1; int info = PROCESS_EE_XMM; - if( xmminfo & XMMINFO_READS ) _addNeededFPtoXMMreg(_Fs_); - if( xmminfo & XMMINFO_READT ) _addNeededFPtoXMMreg(_Ft_); - if( xmminfo & (XMMINFO_WRITED|XMMINFO_READD) ) _addNeededFPtoXMMreg(_Fd_); - if( xmminfo & (XMMINFO_WRITEACC|XMMINFO_READACC) ) _addNeededFPACCtoXMMreg(); + if (xmminfo & XMMINFO_READS) + _addNeededFPtoXMMreg(_Fs_); + if (xmminfo & XMMINFO_READT) + _addNeededFPtoXMMreg(_Ft_); + if (xmminfo & (XMMINFO_WRITED | XMMINFO_READD)) + _addNeededFPtoXMMreg(_Fd_); + if (xmminfo & (XMMINFO_WRITEACC | XMMINFO_READACC)) + _addNeededFPACCtoXMMreg(); - if( xmminfo & XMMINFO_READT ) { - if( g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE ) mmregt = _checkXMMreg(XMMTYPE_FPREG, _Ft_, MODE_READ); - else mmregt = _allocFPtoXMMreg(-1, _Ft_, MODE_READ); + if (xmminfo & XMMINFO_READT) + { + if (g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE) + mmregt = _checkXMMreg(XMMTYPE_FPREG, _Ft_, MODE_READ); + else + mmregt = _allocFPtoXMMreg(-1, _Ft_, MODE_READ); } - if( xmminfo & XMMINFO_READS ) { - if( ( !(xmminfo & XMMINFO_READT) || (mmregt >= 0) ) && (g_pCurInstInfo->fpuregs[_Fs_] & EEINST_LASTUSE) ) { + if (xmminfo & XMMINFO_READS) + { + if ((!(xmminfo & XMMINFO_READT) || (mmregt >= 0)) && (g_pCurInstInfo->fpuregs[_Fs_] & EEINST_LASTUSE)) + { mmregs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); } - else mmregs = _allocFPtoXMMreg(-1, _Fs_, MODE_READ); + else + mmregs = _allocFPtoXMMreg(-1, _Fs_, MODE_READ); } - if( mmregs >= 0 ) info |= PROCESS_EE_SETMODES_XMM(mmregs); - if( mmregt >= 0 ) info |= PROCESS_EE_SETMODET_XMM(mmregt); + if (mmregs >= 0) + info |= PROCESS_EE_SETMODES_XMM(mmregs); + if (mmregt >= 0) + info |= PROCESS_EE_SETMODET_XMM(mmregt); - if( xmminfo & XMMINFO_READD ) { - pxAssert( xmminfo & XMMINFO_WRITED ); + if (xmminfo & XMMINFO_READD) + { + pxAssert(xmminfo & XMMINFO_WRITED); mmregd = _allocFPtoXMMreg(-1, _Fd_, MODE_READ); } - if( xmminfo & XMMINFO_READACC ) { - if( !(xmminfo&XMMINFO_WRITEACC) && (g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE) ) + if (xmminfo & XMMINFO_READACC) + { + if (!(xmminfo & XMMINFO_WRITEACC) && (g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE)) mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, MODE_READ); - else mmregacc = _allocFPACCtoXMMreg(-1, MODE_READ); + else + mmregacc = _allocFPACCtoXMMreg(-1, MODE_READ); } - if( xmminfo & XMMINFO_WRITEACC ) { + if (xmminfo & XMMINFO_WRITEACC) + { // check for last used, if so don't alloc a new XMM reg - int readacc = MODE_WRITE|((xmminfo&XMMINFO_READACC)?MODE_READ:0); + int readacc = MODE_WRITE | ((xmminfo & XMMINFO_READACC) ? MODE_READ : 0); mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, readacc); - if( mmregacc < 0 ) { - if( (xmminfo&XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)) ) { - if( FPUINST_ISLIVE(_Ft_) ) { + if (mmregacc < 0) + { + if ((xmminfo & XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_))) + { + if (FPUINST_ISLIVE(_Ft_)) + { _freeXMMreg(mmregt); info &= ~PROCESS_EE_MODEWRITET; } @@ -640,8 +760,10 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo xmmregs[mmregt].type = XMMTYPE_FPACC; mmregacc = mmregt; } - else if( (xmminfo&XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)) ) { - if( FPUINST_ISLIVE(_Fs_) ) { + else if ((xmminfo & XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_))) + { + if (FPUINST_ISLIVE(_Fs_)) + { _freeXMMreg(mmregs); info &= ~PROCESS_EE_MODEWRITES; } @@ -651,20 +773,27 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo xmmregs[mmregs].type = XMMTYPE_FPACC; mmregacc = mmregs; } - else mmregacc = _allocFPACCtoXMMreg(-1, readacc); + else + mmregacc = _allocFPACCtoXMMreg(-1, readacc); } xmmregs[mmregacc].mode |= MODE_WRITE; } - else if( xmminfo & XMMINFO_WRITED ) { + else if (xmminfo & XMMINFO_WRITED) + { // check for last used, if so don't alloc a new XMM reg - int readd = MODE_WRITE|((xmminfo&XMMINFO_READD)?MODE_READ:0); - if( xmminfo&XMMINFO_READD ) mmregd = _allocFPtoXMMreg(-1, _Fd_, readd); - else mmregd = _checkXMMreg(XMMTYPE_FPREG, _Fd_, readd); + int readd = MODE_WRITE | ((xmminfo & XMMINFO_READD) ? MODE_READ : 0); + if (xmminfo & XMMINFO_READD) + mmregd = _allocFPtoXMMreg(-1, _Fd_, readd); + else + mmregd = _checkXMMreg(XMMTYPE_FPREG, _Fd_, readd); - if( mmregd < 0 ) { - if( (xmminfo&XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)) ) { - if( FPUINST_ISLIVE(_Ft_) ) { + if (mmregd < 0) + { + if ((xmminfo & XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_))) + { + if (FPUINST_ISLIVE(_Ft_)) + { _freeXMMreg(mmregt); info &= ~PROCESS_EE_MODEWRITET; } @@ -673,8 +802,10 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo xmmregs[mmregt].mode = readd; mmregd = mmregt; } - else if( (xmminfo&XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)) ) { - if( FPUINST_ISLIVE(_Fs_) ) { + else if ((xmminfo & XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_))) + { + if (FPUINST_ISLIVE(_Fs_)) + { _freeXMMreg(mmregs); info &= ~PROCESS_EE_MODEWRITES; } @@ -683,8 +814,9 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo xmmregs[mmregs].mode = readd; mmregd = mmregs; } - else if( (xmminfo&XMMINFO_READACC) && mmregacc >= 0 && (FPUINST_LASTUSE(XMMFPU_ACC) || !FPUINST_ISLIVE(XMMFPU_ACC)) ) { - if( FPUINST_ISLIVE(XMMFPU_ACC) ) + else if ((xmminfo & XMMINFO_READACC) && mmregacc >= 0 && (FPUINST_LASTUSE(XMMFPU_ACC) || !FPUINST_ISLIVE(XMMFPU_ACC))) + { + if (FPUINST_ISLIVE(XMMFPU_ACC)) _freeXMMreg(mmregacc); xmmregs[mmregacc].inuse = 1; xmmregs[mmregacc].reg = _Fd_; @@ -692,31 +824,41 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo xmmregs[mmregacc].type = XMMTYPE_FPREG; mmregd = mmregacc; } - else mmregd = _allocFPtoXMMreg(-1, _Fd_, readd); + else + mmregd = _allocFPtoXMMreg(-1, _Fd_, readd); } } - pxAssert( mmregs >= 0 || mmregt >= 0 || mmregd >= 0 || mmregacc >= 0 ); + pxAssert(mmregs >= 0 || mmregt >= 0 || mmregd >= 0 || mmregacc >= 0); - if( xmminfo & XMMINFO_WRITED ) { - pxAssert( mmregd >= 0 ); + if (xmminfo & XMMINFO_WRITED) + { + pxAssert(mmregd >= 0); info |= PROCESS_EE_SET_D(mmregd); } - if( xmminfo & (XMMINFO_WRITEACC|XMMINFO_READACC) ) { - if( mmregacc >= 0 ) info |= PROCESS_EE_SET_ACC(mmregacc)|PROCESS_EE_ACC; - else pxAssert( !(xmminfo&XMMINFO_WRITEACC)); + if (xmminfo & (XMMINFO_WRITEACC | XMMINFO_READACC)) + { + if (mmregacc >= 0) + info |= PROCESS_EE_SET_ACC(mmregacc) | PROCESS_EE_ACC; + else + pxAssert(!(xmminfo & XMMINFO_WRITEACC)); } - if( xmminfo & XMMINFO_READS ) { - if( mmregs >= 0 ) info |= PROCESS_EE_SET_S(mmregs)|PROCESS_EE_S; + if (xmminfo & XMMINFO_READS) + { + if (mmregs >= 0) + info |= PROCESS_EE_SET_S(mmregs) | PROCESS_EE_S; } - if( xmminfo & XMMINFO_READT ) { - if( mmregt >= 0 ) info |= PROCESS_EE_SET_T(mmregt)|PROCESS_EE_T; + if (xmminfo & XMMINFO_READT) + { + if (mmregt >= 0) + info |= PROCESS_EE_SET_T(mmregt) | PROCESS_EE_T; } // at least one must be in xmm - if( (xmminfo & (XMMINFO_READS|XMMINFO_READT)) == (XMMINFO_READS|XMMINFO_READT) ) { - pxAssert( mmregs >= 0 || mmregt >= 0 ); + if ((xmminfo & (XMMINFO_READS | XMMINFO_READT)) == (XMMINFO_READS | XMMINFO_READT)) + { + pxAssert(mmregs >= 0 || mmregt >= 0); } xmmcode(info); diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index 3f880ee6cf..87480d1460 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -37,22 +37,22 @@ protected: bool m_free; public: - iAllocRegSSE() : - m_reg( xmm0 ), - m_free( !!_hasFreeXMMreg() ) + iAllocRegSSE() + : m_reg(xmm0) + , m_free(!!_hasFreeXMMreg()) { - if( m_free ) - m_reg = xRegisterSSE( _allocTempXMMreg( XMMT_INT, -1 ) ); + if (m_free) + m_reg = xRegisterSSE(_allocTempXMMreg(XMMT_INT, -1)); else - xStoreReg( m_reg ); + xStoreReg(m_reg); } ~iAllocRegSSE() { - if( m_free ) - _freeXMMreg( m_reg.Id ); + if (m_free) + _freeXMMreg(m_reg.Id); else - xRestoreReg( m_reg ); + xRestoreReg(m_reg); } operator xRegisterSSE() const { return m_reg; } @@ -62,37 +62,38 @@ public: // This instruction always uses an SSE register, even if all registers are allocated! It // saves an SSE register to memory first, performs the copy, and restores the register. // -static void iMOV128_SSE( const xIndirectVoid& destRm, const xIndirectVoid& srcRm ) +static void iMOV128_SSE(const xIndirectVoid& destRm, const xIndirectVoid& srcRm) { iAllocRegSSE reg; - xMOVDQA( reg, srcRm ); - xMOVDQA( destRm, reg ); + xMOVDQA(reg, srcRm); + xMOVDQA(destRm, reg); } // Moves 64 bits of data from point B to point A, using either SSE, or x86 registers // -static void iMOV64_Smart( const xIndirectVoid& destRm, const xIndirectVoid& srcRm ) +static void iMOV64_Smart(const xIndirectVoid& destRm, const xIndirectVoid& srcRm) { - if (wordsize == 8) { + if (wordsize == 8) + { xMOV(rax, srcRm); xMOV(destRm, rax); return; } - if( _hasFreeXMMreg() ) + if (_hasFreeXMMreg()) { // Move things using MOVLPS: - xRegisterSSE reg( _allocTempXMMreg( XMMT_INT, -1 ) ); - xMOVL.PS( reg, srcRm ); - xMOVL.PS( destRm, reg ); - _freeXMMreg( reg.Id ); + xRegisterSSE reg(_allocTempXMMreg(XMMT_INT, -1)); + xMOVL.PS(reg, srcRm); + xMOVL.PS(destRm, reg); + _freeXMMreg(reg.Id); return; } - xMOV( eax, srcRm ); - xMOV( destRm, eax ); - xMOV( eax, srcRm+4 ); - xMOV( destRm+4, eax ); + xMOV(eax, srcRm); + xMOV(destRm, eax); + xMOV(eax, srcRm + 4); + xMOV(destRm + 4, eax); } /* @@ -159,80 +160,80 @@ namespace vtlb_private // Warning dirty ebx (in case someone got the very bad idea to move this code) EE::Profiler.EmitMem(); - xMOV( eax, arg1regd ); - xSHR( eax, VTLB_PAGE_BITS ); - xMOV( rax, ptrNative[xComplexAddress(rbx, vtlbdata.vmap, rax*wordsize)] ); - u32* writeback = xLEA_Writeback( rbx ); - xADD( arg1reg, rax ); + xMOV(eax, arg1regd); + xSHR(eax, VTLB_PAGE_BITS); + xMOV(rax, ptrNative[xComplexAddress(rbx, vtlbdata.vmap, rax * wordsize)]); + u32* writeback = xLEA_Writeback(rbx); + xADD(arg1reg, rax); return writeback; } // ------------------------------------------------------------------------ - static void DynGen_DirectRead( u32 bits, bool sign ) + static void DynGen_DirectRead(u32 bits, bool sign) { - switch( bits ) + switch (bits) { case 8: - if( sign ) - xMOVSX( eax, ptr8[arg1reg] ); + if (sign) + xMOVSX(eax, ptr8[arg1reg]); else - xMOVZX( eax, ptr8[arg1reg] ); - break; + xMOVZX(eax, ptr8[arg1reg]); + break; case 16: - if( sign ) - xMOVSX( eax, ptr16[arg1reg] ); + if (sign) + xMOVSX(eax, ptr16[arg1reg]); else - xMOVZX( eax, ptr16[arg1reg] ); - break; + xMOVZX(eax, ptr16[arg1reg]); + break; case 32: - xMOV( eax, ptr[arg1reg] ); - break; + xMOV(eax, ptr[arg1reg]); + break; case 64: - iMOV64_Smart( ptr[arg2reg], ptr[arg1reg] ); - break; + iMOV64_Smart(ptr[arg2reg], ptr[arg1reg]); + break; case 128: - iMOV128_SSE( ptr[arg2reg], ptr[arg1reg] ); - break; + iMOV128_SSE(ptr[arg2reg], ptr[arg1reg]); + break; - jNO_DEFAULT + jNO_DEFAULT } } // ------------------------------------------------------------------------ - static void DynGen_DirectWrite( u32 bits ) + static void DynGen_DirectWrite(u32 bits) { // TODO: x86Emitter can't use dil - switch(bits) + switch (bits) { //8 , 16, 32 : data on EDX case 8: - xMOV( edx, arg2regd ); - xMOV( ptr[arg1reg], dl ); - break; + xMOV(edx, arg2regd); + xMOV(ptr[arg1reg], dl); + break; case 16: - xMOV( ptr[arg1reg], xRegister16(arg2reg) ); - break; + xMOV(ptr[arg1reg], xRegister16(arg2reg)); + break; case 32: - xMOV( ptr[arg1reg], arg2regd ); - break; + xMOV(ptr[arg1reg], arg2regd); + break; case 64: - iMOV64_Smart( ptr[arg1reg], ptr[arg2reg] ); - break; + iMOV64_Smart(ptr[arg1reg], ptr[arg2reg]); + break; case 128: - iMOV128_SSE( ptr[arg1reg], ptr[arg2reg] ); - break; + iMOV128_SSE(ptr[arg1reg], ptr[arg2reg]); + break; } } -} +} // namespace vtlb_private // ------------------------------------------------------------------------ // allocate one page for our naked indirect dispatcher function. @@ -246,7 +247,7 @@ static __pagealigned u8 m_IndirectDispatchers[__pagesize]; // mode - 0 for read, 1 for write! // operandsize - 0 thru 4 represents 8, 16, 32, 64, and 128 bits. // -static u8* GetIndirectDispatcherPtr( int mode, int operandsize, int sign = 0 ) +static u8* GetIndirectDispatcherPtr(int mode, int operandsize, int sign = 0) { assert(mode || operandsize >= 2 ? !sign : true); @@ -259,46 +260,50 @@ static u8* GetIndirectDispatcherPtr( int mode, int operandsize, int sign = 0 ) // Gregory: a 32 bytes alignment is likely enough and more cache friendly const int A = 32; - return &m_IndirectDispatchers[(mode*(7*A)) + (sign*5*A) + (operandsize*A)]; + return &m_IndirectDispatchers[(mode * (7 * A)) + (sign * 5 * A) + (operandsize * A)]; } // ------------------------------------------------------------------------ // Generates a JS instruction that targets the appropriate templated instance of // the vtlb Indirect Dispatcher. // -static void DynGen_IndirectDispatch( int mode, int bits, bool sign = false ) +static void DynGen_IndirectDispatch(int mode, int bits, bool sign = false) { int szidx = 0; - switch( bits ) + switch (bits) { - case 8: szidx=0; break; - case 16: szidx=1; break; - case 32: szidx=2; break; - case 64: szidx=3; break; - case 128: szidx=4; break; + case 8: szidx = 0; break; + case 16: szidx = 1; break; + case 32: szidx = 2; break; + case 64: szidx = 3; break; + case 128: szidx = 4; break; jNO_DEFAULT; } - xJS( GetIndirectDispatcherPtr( mode, szidx, sign ) ); + xJS(GetIndirectDispatcherPtr(mode, szidx, sign)); } // ------------------------------------------------------------------------ // Generates the various instances of the indirect dispatchers // In: arg1reg: vtlb entry, arg2reg: data ptr (if mode >= 64), rbx: function return ptr // Out: eax: result (if mode < 64) -static void DynGen_IndirectTlbDispatcher( int mode, int bits, bool sign ) +static void DynGen_IndirectTlbDispatcher(int mode, int bits, bool sign) { - xMOVZX( eax, al ); - if (wordsize != 8) xSUB( arg1regd, 0x80000000 ); - xSUB( arg1regd, eax ); + xMOVZX(eax, al); + if (wordsize != 8) + xSUB(arg1regd, 0x80000000); + xSUB(arg1regd, eax); // jump to the indirect handler, which is a __fastcall C++ function. // [ecx is address, edx is data] sptr table = (sptr)vtlbdata.RWFT[bits][mode]; - if (table == (s32)table) { - xFastCall(ptrNative[(rax*wordsize) + table], arg1reg, arg2reg); - } else { + if (table == (s32)table) + { + xFastCall(ptrNative[(rax * wordsize) + table], arg1reg, arg2reg); + } + else + { xLEA(arg3reg, ptr[(void*)table]); - xFastCall(ptrNative[(rax*wordsize) + arg3reg], arg1reg, arg2reg); + xFastCall(ptrNative[(rax * wordsize) + arg3reg], arg1reg, arg2reg); } if (!mode) @@ -319,7 +324,7 @@ static void DynGen_IndirectTlbDispatcher( int mode, int bits, bool sign ) } } - xJMP( rbx ); + xJMP(rbx); } // One-time initialization procedure. Multiple subsequent calls during the lifespan of the @@ -328,34 +333,35 @@ static void DynGen_IndirectTlbDispatcher( int mode, int bits, bool sign ) void vtlb_dynarec_init() { static bool hasBeenCalled = false; - if (hasBeenCalled) return; + if (hasBeenCalled) + return; hasBeenCalled = true; // In case init gets called multiple times: - HostSys::MemProtectStatic( m_IndirectDispatchers, PageAccess_ReadWrite() ); + HostSys::MemProtectStatic(m_IndirectDispatchers, PageAccess_ReadWrite()); // clear the buffer to 0xcc (easier debugging). - memset( m_IndirectDispatchers, 0xcc, __pagesize); + memset(m_IndirectDispatchers, 0xcc, __pagesize); - for( int mode=0; mode<2; ++mode ) + for (int mode = 0; mode < 2; ++mode) { - for( int bits=0; bits<5; ++bits ) + for (int bits = 0; bits < 5; ++bits) { for (int sign = 0; sign < (!mode && bits < 2 ? 2 : 1); sign++) { - xSetPtr( GetIndirectDispatcherPtr( mode, bits, !!sign ) ); + xSetPtr(GetIndirectDispatcherPtr(mode, bits, !!sign)); - DynGen_IndirectTlbDispatcher( mode, bits, !!sign ); + DynGen_IndirectTlbDispatcher(mode, bits, !!sign); } } } - HostSys::MemProtectStatic( m_IndirectDispatchers, PageAccess_ExecOnly() ); + HostSys::MemProtectStatic(m_IndirectDispatchers, PageAccess_ExecOnly()); Perf::any.map((uptr)m_IndirectDispatchers, __pagesize, "TLB Dispatcher"); } -static void vtlb_SetWriteback(u32 *writeback) +static void vtlb_SetWriteback(u32* writeback) { uptr val = (uptr)xGetPtr(); if (wordsize == 8) @@ -371,14 +377,14 @@ static void vtlb_SetWriteback(u32 *writeback) // Dynarec Load Implementations void vtlb_DynGenRead64(u32 bits) { - pxAssume( bits == 64 || bits == 128 ); + pxAssume(bits == 64 || bits == 128); u32* writeback = DynGen_PrepRegs(); - DynGen_IndirectDispatch( 0, bits ); - DynGen_DirectRead( bits, false ); + DynGen_IndirectDispatch(0, bits); + DynGen_DirectRead(bits, false); - vtlb_SetWriteback(writeback); // return target for indirect's call/ret + vtlb_SetWriteback(writeback); // return target for indirect's call/ret } // ------------------------------------------------------------------------ @@ -387,12 +393,12 @@ void vtlb_DynGenRead64(u32 bits) // Returns read value in eax. void vtlb_DynGenRead32(u32 bits, bool sign) { - pxAssume( bits <= 32 ); + pxAssume(bits <= 32); u32* writeback = DynGen_PrepRegs(); - DynGen_IndirectDispatch( 0, bits, sign && bits < 32 ); - DynGen_DirectRead( bits, sign ); + DynGen_IndirectDispatch(0, bits, sign && bits < 32); + DynGen_DirectRead(bits, sign); vtlb_SetWriteback(writeback); } @@ -400,25 +406,25 @@ void vtlb_DynGenRead32(u32 bits, bool sign) // ------------------------------------------------------------------------ // TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the // recompiler if the TLB is changed. -void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) +void vtlb_DynGenRead64_Const(u32 bits, u32 addr_const) { EE::Profiler.EmitConstMem(addr_const); - auto vmv = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; - if( !vmv.isHandler(addr_const) ) + auto vmv = vtlbdata.vmap[addr_const >> VTLB_PAGE_BITS]; + if (!vmv.isHandler(addr_const)) { auto ppf = vmv.assumePtr(addr_const); - switch( bits ) + switch (bits) { case 64: - iMOV64_Smart( ptr[arg2reg], ptr[(void*)ppf] ); - break; + iMOV64_Smart(ptr[arg2reg], ptr[(void*)ppf]); + break; case 128: - iMOV128_SSE( ptr[arg2reg], ptr[(void*)ppf] ); - break; + iMOV128_SSE(ptr[arg2reg], ptr[(void*)ppf]); + break; - jNO_DEFAULT + jNO_DEFAULT } } else @@ -427,14 +433,14 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) u32 paddr = vmv.assumeHandlerGetPAddr(addr_const); int szidx = 0; - switch( bits ) + switch (bits) { - case 64: szidx=3; break; - case 128: szidx=4; break; + case 64: szidx = 3; break; + case 128: szidx = 4; break; } iFlushCall(FLUSH_FULLVTLB); - xFastCall( vmv.assumeHandlerGetRaw(szidx, 0), paddr, arg2reg ); + xFastCall(vmv.assumeHandlerGetRaw(szidx, 0), paddr, arg2reg); } } @@ -446,33 +452,33 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) // TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the // recompiler if the TLB is changed. // -void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const ) +void vtlb_DynGenRead32_Const(u32 bits, bool sign, u32 addr_const) { EE::Profiler.EmitConstMem(addr_const); - auto vmv = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; - if( !vmv.isHandler(addr_const) ) + auto vmv = vtlbdata.vmap[addr_const >> VTLB_PAGE_BITS]; + if (!vmv.isHandler(addr_const)) { auto ppf = vmv.assumePtr(addr_const); - switch( bits ) + switch (bits) { case 8: - if( sign ) - xMOVSX( eax, ptr8[(u8*)ppf] ); + if (sign) + xMOVSX(eax, ptr8[(u8*)ppf]); else - xMOVZX( eax, ptr8[(u8*)ppf] ); - break; + xMOVZX(eax, ptr8[(u8*)ppf]); + break; case 16: - if( sign ) - xMOVSX( eax, ptr16[(u16*)ppf] ); + if (sign) + xMOVSX(eax, ptr16[(u16*)ppf]); else - xMOVZX( eax, ptr16[(u16*)ppf] ); - break; + xMOVZX(eax, ptr16[(u16*)ppf]); + break; case 32: - xMOV( eax, ptr32[(u32*)ppf] ); - break; + xMOV(eax, ptr32[(u32*)ppf]); + break; } } else @@ -481,38 +487,38 @@ void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const ) u32 paddr = vmv.assumeHandlerGetPAddr(addr_const); int szidx = 0; - switch( bits ) + switch (bits) { - case 8: szidx=0; break; - case 16: szidx=1; break; - case 32: szidx=2; break; + case 8: szidx = 0; break; + case 16: szidx = 1; break; + case 32: szidx = 2; break; } // Shortcut for the INTC_STAT register, which many games like to spin on heavily. - if( (bits == 32) && !EmuConfig.Speedhacks.IntcStat && (paddr == INTC_STAT) ) + if ((bits == 32) && !EmuConfig.Speedhacks.IntcStat && (paddr == INTC_STAT)) { - xMOV( eax, ptr[&psHu32( INTC_STAT )] ); + xMOV(eax, ptr[&psHu32(INTC_STAT)]); } else { iFlushCall(FLUSH_FULLVTLB); - xFastCall( vmv.assumeHandlerGetRaw(szidx, false), paddr ); + xFastCall(vmv.assumeHandlerGetRaw(szidx, false), paddr); // perform sign extension on the result: - if( bits==8 ) + if (bits == 8) { - if( sign ) - xMOVSX( eax, al ); + if (sign) + xMOVSX(eax, al); else - xMOVZX( eax, al ); + xMOVZX(eax, al); } - else if( bits==16 ) + else if (bits == 16) { - if( sign ) - xMOVSX( eax, ax ); + if (sign) + xMOVSX(eax, ax); else - xMOVZX( eax, ax ); + xMOVZX(eax, ax); } } } @@ -525,8 +531,8 @@ void vtlb_DynGenWrite(u32 sz) { u32* writeback = DynGen_PrepRegs(); - DynGen_IndirectDispatch( 1, sz ); - DynGen_DirectWrite( sz ); + DynGen_IndirectDispatch(1, sz); + DynGen_DirectWrite(sz); vtlb_SetWriteback(writeback); } @@ -536,40 +542,39 @@ void vtlb_DynGenWrite(u32 sz) // Generates code for a store instruction, where the address is a known constant. // TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the // recompiler if the TLB is changed. -void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const ) +void vtlb_DynGenWrite_Const(u32 bits, u32 addr_const) { EE::Profiler.EmitConstMem(addr_const); - auto vmv = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; - if( !vmv.isHandler(addr_const) ) + auto vmv = vtlbdata.vmap[addr_const >> VTLB_PAGE_BITS]; + if (!vmv.isHandler(addr_const)) { // TODO: x86Emitter can't use dil auto ppf = vmv.assumePtr(addr_const); - switch(bits) + switch (bits) { //8 , 16, 32 : data on arg2 case 8: - xMOV( edx, arg2regd ); - xMOV( ptr[(void*)ppf], dl ); - break; + xMOV(edx, arg2regd); + xMOV(ptr[(void*)ppf], dl); + break; case 16: - xMOV( ptr[(void*)ppf], xRegister16(arg2reg) ); - break; + xMOV(ptr[(void*)ppf], xRegister16(arg2reg)); + break; case 32: - xMOV( ptr[(void*)ppf], arg2regd ); - break; + xMOV(ptr[(void*)ppf], arg2regd); + break; case 64: - iMOV64_Smart( ptr[(void*)ppf], ptr[arg2reg] ); - break; + iMOV64_Smart(ptr[(void*)ppf], ptr[arg2reg]); + break; case 128: - iMOV128_SSE( ptr[(void*)ppf], ptr[arg2reg] ); - break; + iMOV128_SSE(ptr[(void*)ppf], ptr[arg2reg]); + break; } - } else { @@ -577,17 +582,17 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const ) u32 paddr = vmv.assumeHandlerGetPAddr(addr_const); int szidx = 0; - switch( bits ) + switch (bits) { - case 8: szidx=0; break; - case 16: szidx=1; break; - case 32: szidx=2; break; - case 64: szidx=3; break; - case 128: szidx=4; break; + case 8: szidx=0; break; + case 16: szidx=1; break; + case 32: szidx=2; break; + case 64: szidx=3; break; + case 128: szidx=4; break; } iFlushCall(FLUSH_FULLVTLB); - xFastCall( vmv.assumeHandlerGetRaw(szidx, true), paddr, arg2reg ); + xFastCall(vmv.assumeHandlerGetRaw(szidx, true), paddr, arg2reg); } } @@ -603,7 +608,7 @@ void vtlb_DynV2P() xAND(ecx, VTLB_PAGE_MASK); // vaddr & VTLB_PAGE_MASK xSHR(eax, VTLB_PAGE_BITS); - xMOV(eax, ptr[xComplexAddress(rdx, vtlbdata.ppmap, rax*4)]); //vtlbdata.ppmap[vaddr>>VTLB_PAGE_BITS]; + xMOV(eax, ptr[xComplexAddress(rdx, vtlbdata.ppmap, rax * 4)]); // vtlbdata.ppmap[vaddr >> VTLB_PAGE_BITS]; xOR(eax, ecx); } diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 738a1533a0..febdd0b4b5 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -39,9 +39,9 @@ void mVUreserveCache(microVU& mVU) mVU.cache_reserve = new RecompiledCodeReserve(pxsFmt("Micro VU%u Recompiler Cache", mVU.index), _16mb); mVU.cache_reserve->SetProfilerName(pxsFmt("mVU%urec", mVU.index)); - mVU.cache = mVU.index ? - (u8*)mVU.cache_reserve->Reserve(GetVmMemory().MainMemory(), HostMemoryMap::mVU1recOffset, mVU.cacheSize * _1mb) : - (u8*)mVU.cache_reserve->Reserve(GetVmMemory().MainMemory(), HostMemoryMap::mVU0recOffset, mVU.cacheSize * _1mb); + mVU.cache = mVU.index + ? (u8*)mVU.cache_reserve->Reserve(GetVmMemory().MainMemory(), HostMemoryMap::mVU1recOffset, mVU.cacheSize * _1mb) + : (u8*)mVU.cache_reserve->Reserve(GetVmMemory().MainMemory(), HostMemoryMap::mVU0recOffset, mVU.cacheSize * _1mb); mVU.cache_reserve->ThrowIfNotOk(); } @@ -55,17 +55,17 @@ void mVUinit(microVU& mVU, uint vuIndex) memzero(mVU.prog); - mVU.index = vuIndex; - mVU.cop2 = 0; - mVU.vuMemSize = (mVU.index ? 0x4000 : 0x1000); + mVU.index = vuIndex; + mVU.cop2 = 0; + mVU.vuMemSize = (mVU.index ? 0x4000 : 0x1000); mVU.microMemSize = (mVU.index ? 0x4000 : 0x1000); - mVU.progSize = (mVU.index ? 0x4000 : 0x1000) / 4; - mVU.progMemMask = mVU.progSize - 1; - mVU.cacheSize = vuIndex ? mVU1cacheReserve : mVU0cacheReserve; - mVU.cache = NULL; - mVU.dispCache = NULL; - mVU.startFunct = NULL; - mVU.exitFunct = NULL; + mVU.progSize = (mVU.index ? 0x4000 : 0x1000) / 4; + mVU.progMemMask = mVU.progSize-1; + mVU.cacheSize = vuIndex ? mVU1cacheReserve : mVU0cacheReserve; + mVU.cache = NULL; + mVU.dispCache = NULL; + mVU.startFunct = NULL; + mVU.exitFunct = NULL; mVUreserveCache(mVU); @@ -110,17 +110,17 @@ void mVUreset(microVU& mVU, bool resetReserve) mVU.profiler.Reset(mVU.index); // Program Variables - mVU.prog.cleared = 1; - mVU.prog.isSame = -1; - mVU.prog.cur = NULL; - mVU.prog.total = 0; - mVU.prog.curFrame = 0; + mVU.prog.cleared = 1; + mVU.prog.isSame = -1; + mVU.prog.cur = NULL; + mVU.prog.total = 0; + mVU.prog.curFrame = 0; // Setup Dynarec Cache Limits for Each Program u8* z = mVU.cache; mVU.prog.x86start = z; - mVU.prog.x86ptr = z; - mVU.prog.x86end = z + ((mVU.cacheSize - mVUcacheSafeZone) * _1mb); + mVU.prog.x86ptr = z; + mVU.prog.x86end = z + ((mVU.cacheSize - mVUcacheSafeZone) * _1mb); //memset(mVU.prog.x86start, 0xcc, mVU.cacheSize*_1mb); for (u32 i = 0; i < (mVU.progSize / 2); i++) @@ -294,13 +294,9 @@ __fi bool mVUcmpProg(microVU& mVU, microProgram& prog, const bool cmpWholeProg) { auto cmpOffset = [&](void* x) { return (u8*)x + range.start; }; if ((range.start < 0) || (range.end < 0)) - { DevCon.Error("microVU%d: Negative Range![%d][%d]", mVU.index, range.start, range.end); - } if (memcmp_mmx(cmpOffset(prog.data), cmpOffset(mVU.regs().Micro), (range.end - range.start))) - { return false; - } } } mVU.prog.cleared = 0; @@ -314,10 +310,10 @@ _mVUt __fi void* mVUsearchProg(u32 startPC, uptr pState) { microVU& mVU = mVUx; microProgramQuick& quick = mVU.prog.quick[mVU.regs().start_pc / 8]; - microProgramList* list = mVU.prog.prog[mVU.regs().start_pc / 8]; + microProgramList* list = mVU.prog.prog [mVU.regs().start_pc / 8]; - if (!quick.prog) - { // If null, we need to search for new program + if (!quick.prog) // If null, we need to search for new program + { std::deque::iterator it(list->begin()); for (; it != list->end(); ++it) { @@ -326,7 +322,7 @@ _mVUt __fi void* mVUsearchProg(u32 startPC, uptr pState) if (b) { quick.block = it[0]->block[startPC / 8]; - quick.prog = it[0]; + quick.prog = it[0]; list->erase(it); list->push_front(quick.prog); @@ -342,11 +338,11 @@ _mVUt __fi void* mVUsearchProg(u32 startPC, uptr pState) // If cleared and program not found, make a new program instance mVU.prog.cleared = 0; - mVU.prog.isSame = 1; - mVU.prog.cur = mVUcreateProg(mVU, mVU.regs().start_pc / 8); - void* entryPoint = mVUblockFetch(mVU, startPC, pState); - quick.block = mVU.prog.cur->block[startPC / 8]; - quick.prog = mVU.prog.cur; + mVU.prog.isSame = 1; + mVU.prog.cur = mVUcreateProg(mVU, mVU.regs().start_pc/8); + void* entryPoint = mVUblockFetch(mVU, startPC, pState); + quick.block = mVU.prog.cur->block[startPC/8]; + quick.prog = mVU.prog.cur; list->push_front(mVU.prog.cur); //mVUprintUniqueRatio(mVU); return entryPoint; @@ -371,16 +367,8 @@ _mVUt __fi void* mVUsearchProg(u32 startPC, uptr pState) //------------------------------------------------------------------ // recMicroVU0 / recMicroVU1 //------------------------------------------------------------------ -recMicroVU0::recMicroVU0() -{ - m_Idx = 0; - IsInterpreter = false; -} -recMicroVU1::recMicroVU1() -{ - m_Idx = 1; - IsInterpreter = false; -} +recMicroVU0::recMicroVU0() { m_Idx = 0; IsInterpreter = false; } +recMicroVU1::recMicroVU1() { m_Idx = 1; IsInterpreter = false; } void recMicroVU0::Vsync() noexcept { mVUvsyncUpdate(microVU0); } void recMicroVU1::Vsync() noexcept { mVUvsyncUpdate(microVU1); } diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index e1c6bc4d0c..539cb51176 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -37,33 +37,39 @@ using namespace x86Emitter; #include "microVU_Profiler.h" #include "common/Perf.h" -struct microBlockLink { - microBlock block; - microBlockLink* next; +struct microBlockLink +{ + microBlock block; + microBlockLink* next; }; -class microBlockManager { +class microBlockManager +{ private: - microBlockLink* qBlockList, *qBlockEnd; // Quick Search - microBlockLink* fBlockList, *fBlockEnd; // Full Search + microBlockLink *qBlockList, *qBlockEnd; // Quick Search + microBlockLink *fBlockList, *fBlockEnd; // Full Search int qListI, fListI; public: inline int getFullListCount() const { return fListI; } - microBlockManager() { + microBlockManager() + { qListI = fListI = 0; qBlockEnd = qBlockList = NULL; fBlockEnd = fBlockList = NULL; } ~microBlockManager() { reset(); } - void reset() { - for(microBlockLink* linkI = qBlockList; linkI != NULL; ) { + void reset() + { + for (microBlockLink* linkI = qBlockList; linkI != NULL;) + { microBlockLink* freeI = linkI; safe_delete_array(linkI->block.jumpCache); linkI = linkI->next; _aligned_free(freeI); } - for(microBlockLink* linkI = fBlockList; linkI != NULL; ) { + for (microBlockLink* linkI = fBlockList; linkI != NULL;) + { microBlockLink* freeI = linkI; safe_delete_array(linkI->block.jumpCache); linkI = linkI->next; @@ -73,11 +79,16 @@ public: qBlockEnd = qBlockList = NULL; fBlockEnd = fBlockList = NULL; }; - microBlock* add(microBlock* pBlock) { + microBlock* add(microBlock* pBlock) + { microBlock* thisBlock = search(&pBlock->pState); - if (!thisBlock) { - u8 fullCmp = pBlock->pState.needExactMatch; - if (fullCmp) fListI++; else qListI++; + if (!thisBlock) + { + u8 fullCmp = pBlock->pState.needExactMatch; + if (fullCmp) + fListI++; + else + qListI++; microBlockLink*& blockList = fullCmp ? fBlockList : qBlockList; microBlockLink*& blockEnd = fullCmp ? fBlockEnd : qBlockEnd; @@ -85,28 +96,35 @@ public: newBlock->block.jumpCache = NULL; newBlock->next = NULL; - if (blockEnd) { - blockEnd->next = newBlock; - blockEnd = newBlock; + if (blockEnd) + { + blockEnd->next = newBlock; + blockEnd = newBlock; } - else { + else + { blockEnd = blockList = newBlock; } memcpy(&newBlock->block, pBlock, sizeof(microBlock)); - thisBlock = &newBlock->block; + thisBlock = &newBlock->block; } return thisBlock; } - __ri microBlock* search(microRegInfo* pState) { - if (pState->needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State) - for(microBlockLink* linkI = fBlockList; linkI != NULL; linkI = linkI->next) { + __ri microBlock* search(microRegInfo* pState) + { + if (pState->needExactMatch) // Needs Detailed Search (Exact Match of Pipeline State) + { + for (microBlockLink* linkI = fBlockList; linkI != NULL; linkI = linkI->next) + { if (mVUquickSearch((void*)pState, (void*)&linkI->block.pState, sizeof(microRegInfo))) return &linkI->block; } } - else { // Can do Simple Search (Only Matches the Important Pipeline Stuff) - for(microBlockLink* linkI = qBlockList; linkI != NULL; linkI = linkI->next) { + else // Can do Simple Search (Only Matches the Important Pipeline Stuff) + { + for (microBlockLink* linkI = qBlockList; linkI != NULL; linkI = linkI->next) + { if (linkI->block.pState.quick32[0] != pState->quick32[0]) continue; if (linkI->block.pState.quick32[1] != pState->quick32[1]) continue; if (doConstProp && (linkI->block.pState.vi15 != pState->vi15)) continue; @@ -116,112 +134,123 @@ public: } return NULL; } - void printInfo(int pc, bool printQuick) { + void printInfo(int pc, bool printQuick) + { int listI = printQuick ? qListI : fListI; - if (listI < 7) return; + if (listI < 7) + return; microBlockLink* linkI = printQuick ? qBlockList : fBlockList; - for (int i = 0; i <= listI; i++) { - u32 viCRC = 0, vfCRC = 0, crc = 0, z = sizeof(microRegInfo)/4; + for (int i = 0; i <= listI; i++) + { + u32 viCRC = 0, vfCRC = 0, crc = 0, z = sizeof(microRegInfo) / 4; for (u32 j = 0; j < 4; j++) viCRC -= ((u32*)linkI->block.pState.VI)[j]; for (u32 j = 0; j < 32; j++) vfCRC -= linkI->block.pState.VF[j].reg; for (u32 j = 0; j < z; j++) crc -= ((u32*)&linkI->block.pState)[j]; - DevCon.WriteLn(Color_Green, "[%04x][Block #%d][crc=%08x][q=%02d][p=%02d][xgkick=%d][vi15=%04x][vi15v=%d][viBackup=%02d]" - "[flags=%02x][exactMatch=%x][blockType=%d][viCRC=%08x][vfCRC=%08x]", pc, i, crc, linkI->block.pState.q, - linkI->block.pState.p, linkI->block.pState.xgkick, linkI->block.pState.vi15, linkI->block.pState.vi15v, - linkI->block.pState.viBackUp, linkI->block.pState.flagInfo, linkI->block.pState.needExactMatch, - linkI->block.pState.blockType, viCRC, vfCRC); + DevCon.WriteLn(Color_Green, + "[%04x][Block #%d][crc=%08x][q=%02d][p=%02d][xgkick=%d][vi15=%04x][vi15v=%d][viBackup=%02d]" + "[flags=%02x][exactMatch=%x][blockType=%d][viCRC=%08x][vfCRC=%08x]", + pc, i, crc, linkI->block.pState.q, + linkI->block.pState.p, linkI->block.pState.xgkick, linkI->block.pState.vi15, linkI->block.pState.vi15v, + linkI->block.pState.viBackUp, linkI->block.pState.flagInfo, linkI->block.pState.needExactMatch, + linkI->block.pState.blockType, viCRC, vfCRC); linkI = linkI->next; } } }; -struct microRange { +struct microRange +{ s32 start; // Start PC (The opcode the block starts at) s32 end; // End PC (The opcode the block ends with) }; -#define mProgSize (0x4000/4) -struct microProgram { - u32 data [mProgSize]; // Holds a copy of the VU microProgram - microBlockManager* block[mProgSize/2]; // Array of Block Managers - std::deque* ranges; // The ranges of the microProgram that have already been recompiled +#define mProgSize (0x4000 / 4) +struct microProgram +{ + u32 data [mProgSize]; // Holds a copy of the VU microProgram + microBlockManager* block[mProgSize / 2]; // Array of Block Managers + std::deque* ranges; // The ranges of the microProgram that have already been recompiled u32 startPC; // Start PC of this program - int idx; // Program index + int idx; // Program index }; typedef std::deque microProgramList; -struct microProgramQuick { - microBlockManager* block; // Quick reference to valid microBlockManager for current startPC - microProgram* prog; // The microProgram who is the owner of 'block' +struct microProgramQuick +{ + microBlockManager* block; // Quick reference to valid microBlockManager for current startPC + microProgram* prog; // The microProgram who is the owner of 'block' }; -struct microProgManager { - microIR IRinfo; // IR information - microProgramList* prog [mProgSize/2]; // List of microPrograms indexed by startPC values - microProgramQuick quick[mProgSize/2]; // Quick reference to valid microPrograms for current execution - microProgram* cur; // Pointer to currently running MicroProgram - int total; // Total Number of valid MicroPrograms - int isSame; // Current cached microProgram is Exact Same program as mVU.regs().Micro (-1 = unknown, 0 = No, 1 = Yes) - int cleared; // Micro Program is Indeterminate so must be searched for (and if no matches are found then recompile a new one) - u32 curFrame; // Frame Counter - u8* x86ptr; // Pointer to program's recompilation code - u8* x86start; // Start of program's rec-cache - u8* x86end; // Limit of program's rec-cache - microRegInfo lpState; // Pipeline state from where program left off (useful for continuing execution) +struct microProgManager +{ + microIR IRinfo; // IR information + microProgramList* prog [mProgSize/2]; // List of microPrograms indexed by startPC values + microProgramQuick quick[mProgSize/2]; // Quick reference to valid microPrograms for current execution + microProgram* cur; // Pointer to currently running MicroProgram + int total; // Total Number of valid MicroPrograms + int isSame; // Current cached microProgram is Exact Same program as mVU.regs().Micro (-1 = unknown, 0 = No, 1 = Yes) + int cleared; // Micro Program is Indeterminate so must be searched for (and if no matches are found then recompile a new one) + u32 curFrame; // Frame Counter + u8* x86ptr; // Pointer to program's recompilation code + u8* x86start; // Start of program's rec-cache + u8* x86end; // Limit of program's rec-cache + microRegInfo lpState; // Pipeline state from where program left off (useful for continuing execution) }; -static const uint mVUdispCacheSize = __pagesize; // Dispatcher Cache Size (in bytes) -static const uint mVUcacheSafeZone = 3; // Safe-Zone for program recompilation (in megabytes) -static const uint mVU0cacheReserve = 64; // mVU0 Reserve Cache Size (in megabytes) -static const uint mVU1cacheReserve = 64; // mVU1 Reserve Cache Size (in megabytes) +static const uint mVUdispCacheSize = __pagesize; // Dispatcher Cache Size (in bytes) +static const uint mVUcacheSafeZone = 3; // Safe-Zone for program recompilation (in megabytes) +static const uint mVU0cacheReserve = 64; // mVU0 Reserve Cache Size (in megabytes) +static const uint mVU1cacheReserve = 64; // mVU1 Reserve Cache Size (in megabytes) -struct microVU { +struct microVU +{ __aligned16 u32 statFlag[4]; // 4 instances of status flag (backup for xgkick) __aligned16 u32 macFlag [4]; // 4 instances of mac flag (used in execution) __aligned16 u32 clipFlag[4]; // 4 instances of clip flag (used in execution) - __aligned16 u32 xmmCTemp[4]; // Backup used in mVUclamp2() + __aligned16 u32 xmmCTemp[4]; // Backup used in mVUclamp2() __aligned16 u32 xmmBackup[8][4]; // Backup for xmm0~xmm7 - u32 index; // VU Index (VU0 or VU1) - u32 cop2; // VU is in COP2 mode? (No/Yes) - u32 vuMemSize; // VU Main Memory Size (in bytes) - u32 microMemSize; // VU Micro Memory Size (in bytes) - u32 progSize; // VU Micro Memory Size (in u32's) - u32 progMemMask; // VU Micro Memory Size (in u32's) - u32 cacheSize; // VU Cache Size + u32 index; // VU Index (VU0 or VU1) + u32 cop2; // VU is in COP2 mode? (No/Yes) + u32 vuMemSize; // VU Main Memory Size (in bytes) + u32 microMemSize; // VU Micro Memory Size (in bytes) + u32 progSize; // VU Micro Memory Size (in u32's) + u32 progMemMask; // VU Micro Memory Size (in u32's) + u32 cacheSize; // VU Cache Size - microProgManager prog; // Micro Program Data - microProfiler profiler; // Opcode Profiler - std::unique_ptr regAlloc; // Reg Alloc Class - std::unique_ptr logFile; // Log File Pointer + microProgManager prog; // Micro Program Data + microProfiler profiler; // Opcode Profiler + std::unique_ptr regAlloc; // Reg Alloc Class + std::unique_ptr logFile; // Log File Pointer RecompiledCodeReserve* cache_reserve; - u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to) - u8* dispCache; // Dispatchers Cache (where startFunct and exitFunct are written to) - u8* startFunct; // Function Ptr to the recompiler dispatcher (start) - u8* exitFunct; // Function Ptr to the recompiler dispatcher (exit) - u8* startFunctXG; // Function Ptr to the recompiler dispatcher (xgkick resume) - u8* exitFunctXG; // Function Ptr to the recompiler dispatcher (xgkick exit) - u8* resumePtrXG; // Ptr to recompiled code position to resume xgkick - u32 code; // Contains the current Instruction - u32 divFlag; // 1 instance of I/D flags - u32 VIbackup; // Holds a backup of a VI reg if modified before a branch - u32 VIxgkick; // Holds a backup of a VI reg used for xgkick-delays - u32 branch; // Holds branch compare result (IBxx) OR Holds address to Jump to (JALR/JR) - u32 badBranch; // For Branches in Branch Delay Slots, holds Address the first Branch went to + 8 - u32 evilBranch; // For Branches in Branch Delay Slots, holds Address to Jump to - u32 p; // Holds current P instance index - u32 q; // Holds current Q instance index - u32 totalCycles; // Total Cycles that mVU is expected to run for - u32 cycles; // Cycles Counter + u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to) + u8* dispCache; // Dispatchers Cache (where startFunct and exitFunct are written to) + u8* startFunct; // Function Ptr to the recompiler dispatcher (start) + u8* exitFunct; // Function Ptr to the recompiler dispatcher (exit) + u8* startFunctXG; // Function Ptr to the recompiler dispatcher (xgkick resume) + u8* exitFunctXG; // Function Ptr to the recompiler dispatcher (xgkick exit) + u8* resumePtrXG; // Ptr to recompiled code position to resume xgkick + u32 code; // Contains the current Instruction + u32 divFlag; // 1 instance of I/D flags + u32 VIbackup; // Holds a backup of a VI reg if modified before a branch + u32 VIxgkick; // Holds a backup of a VI reg used for xgkick-delays + u32 branch; // Holds branch compare result (IBxx) OR Holds address to Jump to (JALR/JR) + u32 badBranch; // For Branches in Branch Delay Slots, holds Address the first Branch went to + 8 + u32 evilBranch; // For Branches in Branch Delay Slots, holds Address to Jump to + u32 p; // Holds current P instance index + u32 q; // Holds current Q instance index + u32 totalCycles; // Total Cycles that mVU is expected to run for + u32 cycles; // Cycles Counter VURegs& regs() const { return ::vuRegs[index]; } - __fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; } - __fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; } - __fi VIFregisters& getVifRegs() const { + __fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; } + __fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; } + __fi VIFregisters& getVifRegs() const + { return (index && THREAD_VU1) ? vu1Thread.vifRegs : regs().GetVifRegs(); } }; @@ -234,35 +263,37 @@ __aligned16 microVU microVU1; int mVUdebugNow = 0; // Main Functions -extern void mVUclear(mV, u32, u32); -extern void mVUreset(microVU& mVU, bool resetReserve); +extern void mVUclear(mV, u32, u32); +extern void mVUreset(microVU& mVU, bool resetReserve); extern void* mVUblockFetch(microVU& mVU, u32 startPC, uptr pState); _mVUt extern void* __fastcall mVUcompileJIT(u32 startPC, uptr ptr); // Prototypes for Linux -extern void __fastcall mVUcleanUpVU0(); -extern void __fastcall mVUcleanUpVU1(); +extern void __fastcall mVUcleanUpVU0(); +extern void __fastcall mVUcleanUpVU1(); mVUop(mVUopU); mVUop(mVUopL); // Private Functions -extern void mVUcacheProg (microVU& mVU, microProgram& prog); -extern void mVUdeleteProg(microVU& mVU, microProgram*& prog); +extern void mVUcacheProg(microVU& mVU, microProgram& prog); +extern void mVUdeleteProg(microVU& mVU, microProgram*& prog); _mVUt extern void* mVUsearchProg(u32 startPC, uptr pState); extern void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles); extern void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles); // recCall Function Pointer -typedef void (__fastcall *mVUrecCall)(u32, u32); +typedef void(__fastcall* mVUrecCall)(u32, u32); typedef void (*mVUrecCallXG)(void); -template -void makeUnique(T& v) { // Removes Duplicates +template +void makeUnique(T& v) +{ // Removes Duplicates v.erase(unique(v.begin(), v.end()), v.end()); } -template -void sortVector(T& v) { +template +void sortVector(T& v) +{ sort(v.begin(), v.end()); } diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index 1634c34cf8..2ce274cf3a 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -25,7 +25,7 @@ __fi static const x32& getFlagReg(uint fInst) { - static const x32* const gprFlags[4] = { &gprF0, &gprF1, &gprF2, &gprF3 }; + static const x32* const gprFlags[4] = {&gprF0, &gprF1, &gprF2, &gprF3}; pxAssert(fInst < 4); return *gprFlags[fInst]; } @@ -67,11 +67,12 @@ __ri void mVUallocSFLAGc(const x32& reg, const x32& regT, int fInstance) setBitSFLAG(reg, regT, 0x00f0, 0x0080); // SS Bit xAND(regT, 0xffff0000); // DS/DI/OS/US/D/I/O/U Bits xSHR(regT, 14); - xOR (reg, regT); + xOR(reg, regT); } // Denormalizes Status Flag -__ri void mVUallocSFLAGd(u32* memAddr) { +__ri void mVUallocSFLAGd(u32* memAddr) +{ xMOV(edx, ptr32[memAddr]); xMOV(eax, edx); xSHR(eax, 3); @@ -80,11 +81,11 @@ __ri void mVUallocSFLAGd(u32* memAddr) { xMOV(ecx, edx); xSHL(ecx, 11); xAND(ecx, 0x1800); - xOR (eax, ecx); + xOR(eax, ecx); xSHL(edx, 14); xAND(edx, 0x3cf0000); - xOR (eax, edx); + xOR(eax, edx); } __fi void mVUallocMFLAGa(mV, const x32& reg, int fInstance) @@ -95,20 +96,20 @@ __fi void mVUallocMFLAGa(mV, const x32& reg, int fInstance) __fi void mVUallocMFLAGb(mV, const x32& reg, int fInstance) { //xAND(reg, 0xffff); - if (fInstance < 4) xMOV(ptr32[&mVU.macFlag[fInstance]], reg); // microVU - else xMOV(ptr32[&mVU.regs().VI[REG_MAC_FLAG].UL], reg); // macroVU + if (fInstance < 4) xMOV(ptr32[&mVU.macFlag[fInstance]], reg); // microVU + else xMOV(ptr32[&mVU.regs().VI[REG_MAC_FLAG].UL], reg); // macroVU } __fi void mVUallocCFLAGa(mV, const x32& reg, int fInstance) { - if (fInstance < 4) xMOV(reg, ptr32[&mVU.clipFlag[fInstance]]); // microVU - else xMOV(reg, ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL]); // macroVU + if (fInstance < 4) xMOV(reg, ptr32[&mVU.clipFlag[fInstance]]); // microVU + else xMOV(reg, ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL]); // macroVU } __fi void mVUallocCFLAGb(mV, const x32& reg, int fInstance) { - if (fInstance < 4) xMOV(ptr32[&mVU.clipFlag[fInstance]], reg); // microVU - else xMOV(ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL], reg); // macroVU + if (fInstance < 4) xMOV(ptr32[&mVU.clipFlag[fInstance]], reg); // microVU + else xMOV(ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL], reg); // macroVU // On COP2 modifying the CLIP flag we need to update the microVU version for when it's restored on new program if (fInstance == 0xff) @@ -125,7 +126,7 @@ __fi void mVUallocCFLAGb(mV, const x32& reg, int fInstance) __ri void mVUallocVIa(mV, const x32& GPRreg, int _reg_, bool signext = false) { - if (!_reg_) + if (!_reg_) xXOR(GPRreg, GPRreg); else if (signext) xMOVSX(GPRreg, ptr16[&mVU.regs().VI[_reg_].SL]); @@ -135,15 +136,18 @@ __ri void mVUallocVIa(mV, const x32& GPRreg, int _reg_, bool signext = false) __ri void mVUallocVIb(mV, const x32& GPRreg, int _reg_) { - if (mVUlow.backupVI) { // Backs up reg to memory (used when VI is modified b4 a branch) + if (mVUlow.backupVI) // Backs up reg to memory (used when VI is modified b4 a branch) + { xMOVZX(gprT3, ptr16[&mVU.regs().VI[_reg_].UL]); xMOV (ptr32[&mVU.VIbackup], gprT3); } - if (_reg_ == 0) { + if (_reg_ == 0) + { return; } - else if (_reg_ < 16) { + else if (_reg_ < 16) + { xMOV(ptr16[&mVU.regs().VI[_reg_].UL], xRegister16(GPRreg.Id)); } } @@ -168,5 +172,6 @@ __ri void writeQreg(const xmm& reg, int qInstance) { if (qInstance) xINSERTPS(xmmPQ, reg, _MM_MK_INSERTPS_NDX(0, 1, 0)); - else xMOVSS(xmmPQ, reg); + else + xMOVSS(xmmPQ, reg); } diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 796f356826..22d00e7fca 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -34,9 +34,11 @@ __ri void analyzeReg1(mV, int xReg, microVFreg& vfRead) { } // Write to a VF reg -__ri void analyzeReg2(mV, int xReg, microVFreg& vfWrite, bool isLowOp) { - if (xReg) { - #define bReg(x, y) mVUregsTemp.VFreg[y] = x; mVUregsTemp.VF[y] +__ri void analyzeReg2(mV, int xReg, microVFreg& vfWrite, bool isLowOp) +{ + if (xReg) + { + #define bReg(x, y) mVUregsTemp.VFreg[y] = x; mVUregsTemp.VF[y] if (_X) { bReg(xReg, isLowOp).x = 4; vfWrite.reg = xReg; vfWrite.x = 4; } if (_Y) { bReg(xReg, isLowOp).y = 4; vfWrite.reg = xReg; vfWrite.y = 4; } if (_Z) { bReg(xReg, isLowOp).z = 4; vfWrite.reg = xReg; vfWrite.z = 4; } @@ -45,24 +47,30 @@ __ri void analyzeReg2(mV, int xReg, microVFreg& vfWrite, bool isLowOp) { } // Read a VF reg (BC opcodes) -__ri void analyzeReg3(mV, int xReg, microVFreg& vfRead) { - if (xReg) { - if (_bc_x) { +__ri void analyzeReg3(mV, int xReg, microVFreg& vfRead) +{ + if (xReg) + { + if (_bc_x) + { mVUstall = std::max(mVUstall, mVUregs.VF[xReg].x); vfRead.reg = xReg; vfRead.x = 1; } - else if (_bc_y) { + else if (_bc_y) + { mVUstall = std::max(mVUstall, mVUregs.VF[xReg].y); vfRead.reg = xReg; vfRead.y = 1; } - else if (_bc_z) { + else if (_bc_z) + { mVUstall = std::max(mVUstall, mVUregs.VF[xReg].z); vfRead.reg = xReg; vfRead.z = 1; } - else { + else + { mVUstall = std::max(mVUstall, mVUregs.VF[xReg].w); vfRead.reg = xReg; vfRead.w = 1; @@ -71,8 +79,10 @@ __ri void analyzeReg3(mV, int xReg, microVFreg& vfRead) { } // For Clip Opcode -__ri void analyzeReg4(mV, int xReg, microVFreg& vfRead) { - if (xReg) { +__ri void analyzeReg4(mV, int xReg, microVFreg& vfRead) +{ + if (xReg) + { mVUstall = std::max(mVUstall, mVUregs.VF[xReg].w); vfRead.reg = xReg; vfRead.w = 1; @@ -80,9 +90,12 @@ __ri void analyzeReg4(mV, int xReg, microVFreg& vfRead) { } // Read VF reg (FsF/FtF) -__ri void analyzeReg5(mV, int xReg, int fxf, microVFreg& vfRead) { - if (xReg) { - switch (fxf) { +__ri void analyzeReg5(mV, int xReg, int fxf, microVFreg& vfRead) +{ + if (xReg) + { + switch (fxf) + { case 0: mVUstall = std::max(mVUstall, mVUregs.VF[xReg].x); vfRead.reg = xReg; vfRead.x = 1; break; case 1: mVUstall = std::max(mVUstall, mVUregs.VF[xReg].y); vfRead.reg = xReg; vfRead.y = 1; break; case 2: mVUstall = std::max(mVUstall, mVUregs.VF[xReg].z); vfRead.reg = xReg; vfRead.z = 1; break; @@ -92,8 +105,10 @@ __ri void analyzeReg5(mV, int xReg, int fxf, microVFreg& vfRead) { } // Flips xyzw stalls to yzwx (MR32 Opcode) -__ri void analyzeReg6(mV, int xReg, microVFreg& vfRead) { - if (xReg) { +__ri void analyzeReg6(mV, int xReg, microVFreg& vfRead) +{ + if (xReg) + { if (_X) { mVUstall = std::max(mVUstall, mVUregs.VF[xReg].y); vfRead.reg = xReg; vfRead.y = 1; } if (_Y) { mVUstall = std::max(mVUstall, mVUregs.VF[xReg].z); vfRead.reg = xReg; vfRead.z = 1; } if (_Z) { mVUstall = std::max(mVUstall, mVUregs.VF[xReg].w); vfRead.reg = xReg; vfRead.w = 1; } @@ -102,8 +117,10 @@ __ri void analyzeReg6(mV, int xReg, microVFreg& vfRead) { } // Reading a VI reg -__ri void analyzeVIreg1(mV, int xReg, microVIreg& viRead) { - if (xReg) { +__ri void analyzeVIreg1(mV, int xReg, microVIreg& viRead) +{ + if (xReg) + { mVUstall = std::max(mVUstall, mVUregs.VI[xReg]); viRead.reg = xReg; viRead.used = 1; @@ -111,8 +128,10 @@ __ri void analyzeVIreg1(mV, int xReg, microVIreg& viRead) { } // Writing to a VI reg -__ri void analyzeVIreg2(mV, int xReg, microVIreg& viWrite, int aCycles) { - if (xReg) { +__ri void analyzeVIreg2(mV, int xReg, microVIreg& viWrite, int aCycles) +{ + if (xReg) + { mVUconstReg[xReg].isValid = 0; mVUregsTemp.VIreg = xReg; mVUregsTemp.VI = aCycles; @@ -121,18 +140,43 @@ __ri void analyzeVIreg2(mV, int xReg, microVIreg& viWrite, int aCycles) { } } -#define analyzeQreg(x) { mVUregsTemp.q = x; mVUstall = std::max(mVUstall, mVUregs.q); } -#define analyzePreg(x) { mVUregsTemp.p = x; mVUstall = std::max(mVUstall, (u8)((mVUregs.p) ? (mVUregs.p - 1) : 0)); } -#define analyzeRreg() { mVUregsTemp.r = 1; } -#define analyzeXGkick1() { mVUstall = std::max(mVUstall, mVUregs.xgkick); } -#define analyzeXGkick2(x) { mVUregsTemp.xgkick = x; } -#define setConstReg(x, v) { if (x) { mVUconstReg[x].isValid = 1; mVUconstReg[x].regValue = v; } } +#define analyzeQreg(x) \ + { \ + mVUregsTemp.q = x; \ + mVUstall = std::max(mVUstall, mVUregs.q); \ + } +#define analyzePreg(x) \ + { \ + mVUregsTemp.p = x; \ + mVUstall = std::max(mVUstall, (u8)((mVUregs.p) ? (mVUregs.p - 1) : 0)); \ + } +#define analyzeRreg() \ + { \ + mVUregsTemp.r = 1; \ + } +#define analyzeXGkick1() \ + { \ + mVUstall = std::max(mVUstall, mVUregs.xgkick); \ + } +#define analyzeXGkick2(x) \ + { \ + mVUregsTemp.xgkick = x; \ + } +#define setConstReg(x, v) \ + { \ + if (x) \ + { \ + mVUconstReg[x].isValid = 1; \ + mVUconstReg[x].regValue = v; \ + } \ + } //------------------------------------------------------------------ // FMAC1 - Normal FMAC Opcodes //------------------------------------------------------------------ -__fi void mVUanalyzeFMAC1(mV, int Fd, int Fs, int Ft) { +__fi void mVUanalyzeFMAC1(mV, int Fd, int Fs, int Ft) +{ sFLAG.doFlag = 1; analyzeReg1(mVU, Fs, mVUup.VF_read[0]); analyzeReg1(mVU, Ft, mVUup.VF_read[1]); @@ -143,7 +187,8 @@ __fi void mVUanalyzeFMAC1(mV, int Fd, int Fs, int Ft) { // FMAC2 - ABS/FTOI/ITOF Opcodes //------------------------------------------------------------------ -__fi void mVUanalyzeFMAC2(mV, int Fs, int Ft) { +__fi void mVUanalyzeFMAC2(mV, int Fs, int Ft) +{ analyzeReg1(mVU, Fs, mVUup.VF_read[0]); analyzeReg2(mVU, Ft, mVUup.VF_write, 0); } @@ -152,7 +197,8 @@ __fi void mVUanalyzeFMAC2(mV, int Fs, int Ft) { // FMAC3 - BC(xyzw) FMAC Opcodes //------------------------------------------------------------------ -__fi void mVUanalyzeFMAC3(mV, int Fd, int Fs, int Ft) { +__fi void mVUanalyzeFMAC3(mV, int Fd, int Fs, int Ft) +{ sFLAG.doFlag = 1; analyzeReg1(mVU, Fs, mVUup.VF_read[0]); analyzeReg3(mVU, Ft, mVUup.VF_read[1]); @@ -163,7 +209,8 @@ __fi void mVUanalyzeFMAC3(mV, int Fd, int Fs, int Ft) { // FMAC4 - Clip FMAC Opcode //------------------------------------------------------------------ -__fi void mVUanalyzeFMAC4(mV, int Fs, int Ft) { +__fi void mVUanalyzeFMAC4(mV, int Fs, int Ft) +{ cFLAG.doFlag = 1; analyzeReg1(mVU, Fs, mVUup.VF_read[0]); analyzeReg4(mVU, Ft, mVUup.VF_read[1]); @@ -173,30 +220,42 @@ __fi void mVUanalyzeFMAC4(mV, int Fs, int Ft) { // IALU - IALU Opcodes //------------------------------------------------------------------ -__fi void mVUanalyzeIALU1(mV, int Id, int Is, int It) { - if (!Id) mVUlow.isNOP = 1; +__fi void mVUanalyzeIALU1(mV, int Id, int Is, int It) +{ + if (!Id) + mVUlow.isNOP = 1; analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]); analyzeVIreg1(mVU, It, mVUlow.VI_read[1]); analyzeVIreg2(mVU, Id, mVUlow.VI_write, 1); } -__fi void mVUanalyzeIALU2(mV, int Is, int It) { - if (!It) mVUlow.isNOP = 1; +__fi void mVUanalyzeIALU2(mV, int Is, int It) +{ + if (!It) + mVUlow.isNOP = 1; analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]); analyzeVIreg2(mVU, It, mVUlow.VI_write, 1); } -__fi void mVUanalyzeIADDI(mV, int Is, int It, s16 imm) { +__fi void mVUanalyzeIADDI(mV, int Is, int It, s16 imm) +{ mVUanalyzeIALU2(mVU, Is, It); - if (!Is) { setConstReg(It, imm); } + if (!Is) + { + setConstReg(It, imm); + } } //------------------------------------------------------------------ // MR32 - MR32 Opcode //------------------------------------------------------------------ -__fi void mVUanalyzeMR32(mV, int Fs, int Ft) { - if (!Ft) { mVUlow.isNOP = 1; } +__fi void mVUanalyzeMR32(mV, int Fs, int Ft) +{ + if (!Ft) + { + mVUlow.isNOP = 1; + } analyzeReg6(mVU, Fs, mVUlow.VF_read[0]); analyzeReg2(mVU, Ft, mVUlow.VF_write, 1); } @@ -205,7 +264,8 @@ __fi void mVUanalyzeMR32(mV, int Fs, int Ft) { // FDIV - DIV/SQRT/RSQRT Opcodes //------------------------------------------------------------------ -__fi void mVUanalyzeFDIV(mV, int Fs, int Fsf, int Ft, int Ftf, u8 xCycles) { +__fi void mVUanalyzeFDIV(mV, int Fs, int Fsf, int Ft, int Ftf, u8 xCycles) +{ analyzeReg5(mVU, Fs, Fsf, mVUlow.VF_read[0]); analyzeReg5(mVU, Ft, Ftf, mVUlow.VF_read[1]); analyzeQreg(xCycles); @@ -215,12 +275,14 @@ __fi void mVUanalyzeFDIV(mV, int Fs, int Fsf, int Ft, int Ftf, u8 xCycles) { // EFU - EFU Opcodes //------------------------------------------------------------------ -__fi void mVUanalyzeEFU1(mV, int Fs, int Fsf, u8 xCycles) { +__fi void mVUanalyzeEFU1(mV, int Fs, int Fsf, u8 xCycles) +{ analyzeReg5(mVU, Fs, Fsf, mVUlow.VF_read[0]); analyzePreg(xCycles); } -__fi void mVUanalyzeEFU2(mV, int Fs, u8 xCycles) { +__fi void mVUanalyzeEFU2(mV, int Fs, u8 xCycles) +{ analyzeReg1(mVU, Fs, mVUlow.VF_read[0]); analyzePreg(xCycles); } @@ -229,8 +291,10 @@ __fi void mVUanalyzeEFU2(mV, int Fs, u8 xCycles) { // MFP - MFP Opcode //------------------------------------------------------------------ -__fi void mVUanalyzeMFP(mV, int Ft) { - if (!Ft) mVUlow.isNOP = 1; +__fi void mVUanalyzeMFP(mV, int Ft) +{ + if (!Ft) + mVUlow.isNOP = 1; analyzeReg2(mVU, Ft, mVUlow.VF_write, 1); } @@ -238,8 +302,10 @@ __fi void mVUanalyzeMFP(mV, int Ft) { // MOVE - MOVE Opcode //------------------------------------------------------------------ -__fi void mVUanalyzeMOVE(mV, int Fs, int Ft) { - if (!Ft||(Ft == Fs)) mVUlow.isNOP = 1; +__fi void mVUanalyzeMOVE(mV, int Fs, int Ft) +{ + if (!Ft || (Ft == Fs)) + mVUlow.isNOP = 1; analyzeReg1(mVU, Fs, mVUlow.VF_read[0]); analyzeReg2(mVU, Ft, mVUlow.VF_write, 1); } @@ -248,36 +314,59 @@ __fi void mVUanalyzeMOVE(mV, int Fs, int Ft) { // LQx - LQ/LQD/LQI Opcodes //------------------------------------------------------------------ -__fi void mVUanalyzeLQ(mV, int Ft, int Is, bool writeIs) { +__fi void mVUanalyzeLQ(mV, int Ft, int Is, bool writeIs) +{ analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]); - analyzeReg2 (mVU, Ft, mVUlow.VF_write, 1); - if (!Ft) { if (writeIs && Is) { mVUlow.noWriteVF = 1; } else { mVUlow.isNOP = 1; } } - if (writeIs) { analyzeVIreg2(mVU, Is, mVUlow.VI_write, 1); } + analyzeReg2(mVU, Ft, mVUlow.VF_write, 1); + if (!Ft) + { + if (writeIs && Is) + { + mVUlow.noWriteVF = 1; + } + else + { + mVUlow.isNOP = 1; + } + } + if (writeIs) + { + analyzeVIreg2(mVU, Is, mVUlow.VI_write, 1); + } } //------------------------------------------------------------------ // SQx - SQ/SQD/SQI Opcodes //------------------------------------------------------------------ -__fi void mVUanalyzeSQ(mV, int Fs, int It, bool writeIt) { - analyzeReg1 (mVU, Fs, mVUlow.VF_read[0]); +__fi void mVUanalyzeSQ(mV, int Fs, int It, bool writeIt) +{ + analyzeReg1(mVU, Fs, mVUlow.VF_read[0]); analyzeVIreg1(mVU, It, mVUlow.VI_read[0]); - if (writeIt) { analyzeVIreg2(mVU, It, mVUlow.VI_write, 1); } + if (writeIt) + { + analyzeVIreg2(mVU, It, mVUlow.VI_write, 1); + } } //------------------------------------------------------------------ // R*** - R Reg Opcodes //------------------------------------------------------------------ -__fi void mVUanalyzeR1(mV, int Fs, int Fsf) { +__fi void mVUanalyzeR1(mV, int Fs, int Fsf) +{ analyzeReg5(mVU, Fs, Fsf, mVUlow.VF_read[0]); analyzeRreg(); } -__fi void mVUanalyzeR2(mV, int Ft, bool canBeNOP) { - if (!Ft) { - if (canBeNOP) mVUlow.isNOP = 1; - else mVUlow.noWriteVF = 1; +__fi void mVUanalyzeR2(mV, int Ft, bool canBeNOP) +{ + if (!Ft) + { + if (canBeNOP) + mVUlow.isNOP = 1; + else + mVUlow.noWriteVF = 1; } analyzeReg2(mVU, Ft, mVUlow.VF_write, 1); analyzeRreg(); @@ -286,21 +375,25 @@ __fi void mVUanalyzeR2(mV, int Ft, bool canBeNOP) { //------------------------------------------------------------------ // Sflag - Status Flag Opcodes //------------------------------------------------------------------ -__ri void flagSet(mV, bool setMacFlag) { +__ri void flagSet(mV, bool setMacFlag) +{ int curPC = iPC; int calcOPS = 0; - + //Check which ops need to do the flag settings, also check for runs of ops as they can do multiple calculations to get the sticky status flags (VP2) //Make sure we get the last 4 calculations (Bloody Roar 3, possibly others) - for (int i = mVUcount, j = 0; i > 0; i--, j++) { + for (int i = mVUcount, j = 0; i > 0; i--, j++) + { j += mVUstall; incPC(-2); - if (calcOPS >= 4 && mVUup.VF_write.reg) break; + if (calcOPS >= 4 && mVUup.VF_write.reg) + break; if (sFLAG.doFlag && (j >= 3)) { - if (setMacFlag) mFLAG.doFlag = 1; + if (setMacFlag) + mFLAG.doFlag = 1; sFLAG.doNonSticky = 1; calcOPS++; } @@ -310,22 +403,29 @@ __ri void flagSet(mV, bool setMacFlag) { setCode(); } -__ri void mVUanalyzeSflag(mV, int It) { +__ri void mVUanalyzeSflag(mV, int It) +{ mVUlow.readFlags = true; analyzeVIreg2(mVU, It, mVUlow.VI_write, 1); - if (!It) { mVUlow.isNOP = 1; } - else { + if (!It) + { + mVUlow.isNOP = 1; + } + else + { //mVUsFlagHack = 0; // Don't Optimize Out Status Flags for this block mVUinfo.swapOps = 1; flagSet(mVU, 0); - if (mVUcount < 4) { + if (mVUcount < 4) + { if (!(mVUpBlock->pState.needExactMatch & 1)) // The only time this should happen is on the first program block DevCon.WriteLn(Color_Green, "microVU%d: pState's sFlag Info was expected to be set [%04x]", getIndex, xPC); } } } -__ri void mVUanalyzeFSSET(mV) { +__ri void mVUanalyzeFSSET(mV) +{ mVUlow.isFSSET = 1; mVUlow.readFlags = true; } @@ -334,15 +434,21 @@ __ri void mVUanalyzeFSSET(mV) { // Mflag - Mac Flag Opcodes //------------------------------------------------------------------ -__ri void mVUanalyzeMflag(mV, int Is, int It) { +__ri void mVUanalyzeMflag(mV, int Is, int It) +{ mVUlow.readFlags = true; analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]); analyzeVIreg2(mVU, It, mVUlow.VI_write, 1); - if (!It) { mVUlow.isNOP = 1; } - else { + if (!It) + { + mVUlow.isNOP = 1; + } + else + { mVUinfo.swapOps = 1; flagSet(mVU, 1); - if (mVUcount < 4) { + if (mVUcount < 4) + { if (!(mVUpBlock->pState.needExactMatch & 2)) // The only time this should happen is on the first program block DevCon.WriteLn(Color_Green, "microVU%d: pState's mFlag Info was expected to be set [%04x]", getIndex, xPC); } @@ -353,10 +459,12 @@ __ri void mVUanalyzeMflag(mV, int Is, int It) { // Cflag - Clip Flag Opcodes //------------------------------------------------------------------ -__fi void mVUanalyzeCflag(mV, int It) { +__fi void mVUanalyzeCflag(mV, int It) +{ mVUinfo.swapOps = 1; mVUlow.readFlags = true; - if (mVUcount < 4) { + if (mVUcount < 4) + { if (!(mVUpBlock->pState.needExactMatch & 4)) // The only time this should happen is on the first program block DevCon.WriteLn(Color_Green, "microVU%d: pState's cFlag Info was expected to be set [%04x]", getIndex, xPC); } @@ -367,7 +475,8 @@ __fi void mVUanalyzeCflag(mV, int It) { // XGkick //------------------------------------------------------------------ -__fi void mVUanalyzeXGkick(mV, int Fs, int xCycles) { +__fi void mVUanalyzeXGkick(mV, int Fs, int xCycles) +{ analyzeVIreg1(mVU, Fs, mVUlow.VI_read[0]); analyzeXGkick1(); // Stall will cause mVUincCycles() to trigger pending xgkick analyzeXGkick2(xCycles); @@ -375,7 +484,7 @@ __fi void mVUanalyzeXGkick(mV, int Fs, int xCycles) { // this code stalls on the same instruction. The only case where this // will be a problem with, is if you have very-specifically placed // FMxxx or FSxxx opcodes checking flags near this instruction AND - // the XGKICK instruction stalls. No-game should be effected by + // the XGKICK instruction stalls. No-game should be effected by // this minor difference. } @@ -387,9 +496,12 @@ __fi void mVUanalyzeXGkick(mV, int Fs, int xCycles) { // value read by the branch is the value the VI reg had at the start // of the instruction 4 instructions ago (assuming no stalls). // See: https://forums.pcsx2.net/Thread-blog-PS2-VU-Vector-Unit-Documentation-Part-1 -static void analyzeBranchVI(mV, int xReg, bool& infoVar) { - if (!xReg) return; - if (mVUstall) { // I assume a stall on branch means the vi reg is not modified directly b4 the branch... +static void analyzeBranchVI(mV, int xReg, bool& infoVar) +{ + if (!xReg) + return; + if (mVUstall) // I assume a stall on branch means the vi reg is not modified directly b4 the branch... + { DevCon.Warning("microVU%d: %d cycle stall on branch instruction [%04x]", getIndex, mVUstall, xPC); return; } @@ -398,84 +510,112 @@ static void analyzeBranchVI(mV, int xReg, bool& infoVar) { int iEnd = 4; int bPC = iPC; incPC2(-2); - for (i = 0; i < iEnd && cyc < iEnd; i++) { - if (i && mVUstall) { + for (i = 0; i < iEnd && cyc < iEnd; i++) + { + if (i && mVUstall) + { DevCon.Warning("microVU%d: Branch VI-Delay with %d cycle stall (%d) [%04x]", getIndex, mVUstall, i, xPC); } - if (i == (int)mVUcount) { + if (i == (int)mVUcount) + { bool warn = false; if (i == 1) warn = true; - if (mVUpBlock->pState.viBackUp == xReg) { + if (mVUpBlock->pState.viBackUp == xReg) + { DevCon.WriteLn(Color_Green, "microVU%d: Loading Branch VI value from previous block", getIndex); if (i == 0) warn = true; infoVar = true; - j = i; i++; + j = i; + i++; } - if (warn) DevCon.Warning("microVU%d: Branch VI-Delay with small block (%d) [%04x]", getIndex, i, xPC); + if (warn) + DevCon.Warning("microVU%d: Branch VI-Delay with small block (%d) [%04x]", getIndex, i, xPC); break; // if (warn), we don't have enough information to always guarantee the correct result. } - if ((mVUlow.VI_write.reg == xReg) && mVUlow.VI_write.used) { - if (mVUlow.readFlags) { - if (i) DevCon.Warning("microVU%d: Branch VI-Delay with Read Flags Set (%d) [%04x]", getIndex, i, xPC); + if ((mVUlow.VI_write.reg == xReg) && mVUlow.VI_write.used) + { + if (mVUlow.readFlags) + { + if (i) + DevCon.Warning("microVU%d: Branch VI-Delay with Read Flags Set (%d) [%04x]", getIndex, i, xPC); break; // Not sure if on the above "if (i)" case, if we need to "continue" or if we should "break" } j = i; } - else if (i == 0) { + else if (i == 0) + { break; } cyc += mVUstall + 1; incPC2(-2); } - if (i) { - if (!infoVar) { + if (i) + { + if (!infoVar) + { iPC = bPC; - incPC2(-2*(j+1)); + incPC2(-2 * (j + 1)); mVUlow.backupVI = true; infoVar = true; } iPC = bPC; - DevCon.WriteLn(Color_Green, "microVU%d: Branch VI-Delay (%d) [%04x][%03d]", getIndex, j+1, xPC, mVU.prog.cur->idx); + DevCon.WriteLn(Color_Green, "microVU%d: Branch VI-Delay (%d) [%04x][%03d]", getIndex, j + 1, xPC, mVU.prog.cur->idx); } - else { + else + { iPC = bPC; } } /* // Dead Code... the old version of analyzeBranchVI() -__fi void analyzeBranchVI(mV, int xReg, bool& infoVar) { - if (!xReg) return; +__fi void analyzeBranchVI(mV, int xReg, bool& infoVar) +{ + if (!xReg) + return; int i; int iEnd = std::min(5, mVUcount + 1); int bPC = iPC; incPC2(-2); - for (i = 0; i < iEnd; i++) { - if ((i == mVUcount) && (i < 5)) { - if (mVUpBlock->pState.viBackUp == xReg) { + for (i = 0; i < iEnd; i++) + { + if ((i == mVUcount) && (i < 5)) + { + if (mVUpBlock->pState.viBackUp == xReg) + { infoVar = 1; i++; } break; } - if ((mVUlow.VI_write.reg == xReg) && mVUlow.VI_write.used) { + if ((mVUlow.VI_write.reg == xReg) && mVUlow.VI_write.used) + { if (mVUlow.readFlags || i == 5) break; - if (i == 0) { incPC2(-2); continue; } + if (i == 0) + { + incPC2(-2); + continue; + } if (((mVUlow.VI_read[0].reg == xReg) && (mVUlow.VI_read[0].used)) - || ((mVUlow.VI_read[1].reg == xReg) && (mVUlow.VI_read[1].used))) - { incPC2(-2); continue; } + || ((mVUlow.VI_read[1].reg == xReg) && (mVUlow.VI_read[1].used))) + { + incPC2(-2); + continue; + } } break; } - if (i) { - if (!infoVar) { + if (i) + { + if (!infoVar) + { incPC2(2); mVUlow.backupVI = 1; infoVar = 1; @@ -488,48 +628,53 @@ __fi void analyzeBranchVI(mV, int xReg, bool& infoVar) { */ // Branch in Branch Delay-Slots -__ri int mVUbranchCheck(mV) { +__ri int mVUbranchCheck(mV) +{ if (!mVUcount) return 0; incPC(-2); - if (mVUlow.branch) { + if (mVUlow.branch) + { u32 branchType = mVUlow.branch; - if (doBranchInDelaySlot) { - mVUlow.badBranch = true; + if (doBranchInDelaySlot) + { + mVUlow.badBranch = true; incPC(2); mVUlow.evilBranch = true; - if(mVUlow.branch == 2 || mVUlow.branch == 10) //Needs linking, we can only guess this if the next is not conditional + if (mVUlow.branch == 2 || mVUlow.branch == 10) // Needs linking, we can only guess this if the next is not conditional { - if(branchType <= 2 || branchType >= 9) //First branch is not conditional so we know what the link will be - { //So we can let the existing evil block do its thing! We know where to get the addr :) + if (branchType <= 2 || branchType >= 9) // First branch is not conditional so we know what the link will be so we can let the existing evil block do its thing! We know where to get the addr :) + { DevCon.Warning("yo"); DevCon.Warning("yo"); DevCon.Warning("yo"); DevCon.Warning("yo"); DevCon.Warning("yo"); DevCon.Warning("----"); - + mVUregs.blockType = 2; } //Else it is conditional, so we need to do some nasty processing later in microVU_Branch.inl } - else { - mVUregs.blockType = 2; //Second branch doesn't need linking, so can let it run its evil block course (MGS2 for testing) + else + { + mVUregs.blockType = 2; //Second branch doesn't need linking, so can let it run its evil block course (MGS2 for testing) } mVUregs.needExactMatch |= 7; // This might not be necessary, but w/e... - mVUregs.flagInfo = 0; + mVUregs.flagInfo = 0; DevCon.Warning("microVU%d: %s in %s delay slot! [%04x] - If game broken report to PCSX2 Team", mVU.index, - branchSTR[mVUlow.branch&0xf], branchSTR[branchType&0xf], xPC); + branchSTR[mVUlow.branch & 0xf], branchSTR[branchType & 0xf], xPC); return 1; } - else { + else + { incPC(2); mVUlow.isNOP = true; DevCon.Warning("microVU%d: %s in %s delay slot! [%04x]", mVU.index, - branchSTR[mVUlow.branch&0xf], branchSTR[branchType&0xf], xPC); + branchSTR[mVUlow.branch & 0xf], branchSTR[branchType & 0xf], xPC); return 0; } } @@ -537,40 +682,49 @@ __ri int mVUbranchCheck(mV) { return 0; } -__fi void mVUanalyzeCondBranch1(mV, int Is) { +__fi void mVUanalyzeCondBranch1(mV, int Is) +{ analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]); - if (!mVUbranchCheck(mVU)) { + if (!mVUbranchCheck(mVU)) + { analyzeBranchVI(mVU, Is, mVUlow.memReadIs); } } -__fi void mVUanalyzeCondBranch2(mV, int Is, int It) { +__fi void mVUanalyzeCondBranch2(mV, int Is, int It) +{ analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]); analyzeVIreg1(mVU, It, mVUlow.VI_read[1]); - if (!mVUbranchCheck(mVU)) { + if (!mVUbranchCheck(mVU)) + { analyzeBranchVI(mVU, Is, mVUlow.memReadIs); analyzeBranchVI(mVU, It, mVUlow.memReadIt); } } -__fi void mVUanalyzeNormBranch(mV, int It, bool isBAL) { +__fi void mVUanalyzeNormBranch(mV, int It, bool isBAL) +{ mVUbranchCheck(mVU); - if (isBAL) { - analyzeVIreg2(mVU, It, mVUlow.VI_write, 1); + if (isBAL) + { + analyzeVIreg2(mVU, It, mVUlow.VI_write, 1); setConstReg(It, bSaveAddr); } } -__ri void mVUanalyzeJump(mV, int Is, int It, bool isJALR) { +__ri void mVUanalyzeJump(mV, int Is, int It, bool isJALR) +{ mVUlow.branch = (isJALR) ? 10 : 9; mVUbranchCheck(mVU); - if (mVUconstReg[Is].isValid && doConstProp) { + if (mVUconstReg[Is].isValid && doConstProp) + { mVUlow.constJump.isValid = 1; mVUlow.constJump.regValue = mVUconstReg[Is].regValue; //DevCon.Status("microVU%d: Constant JR/JALR Address Optimization", mVU.index); } analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]); - if (isJALR) { + if (isJALR) + { analyzeVIreg2(mVU, It, mVUlow.VI_write, 1); setConstReg(It, bSaveAddr); } diff --git a/pcsx2/x86/microVU_Branch.inl b/pcsx2/x86/microVU_Branch.inl index 91755b7a2e..330b1d3f2f 100644 --- a/pcsx2/x86/microVU_Branch.inl +++ b/pcsx2/x86/microVU_Branch.inl @@ -27,25 +27,17 @@ __fi int getLastFlagInst(microRegInfo& pState, int* xFlag, int flagType, int isE return (((pState.flagInfo >> (2 * flagType + 2)) & 3) - 1) & 3; } -void mVU0clearlpStateJIT() -{ - if (!microVU0.prog.cleared) - memzero(microVU0.prog.lpState); -} -void mVU1clearlpStateJIT() -{ - if (!microVU1.prog.cleared) - memzero(microVU1.prog.lpState); -} +void mVU0clearlpStateJIT() { if (!microVU0.prog.cleared) memzero(microVU0.prog.lpState); } +void mVU1clearlpStateJIT() { if (!microVU1.prog.cleared) memzero(microVU1.prog.lpState); } void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit) { int fStatus = getLastFlagInst(mVUpBlock->pState, mFC->xStatus, 0, isEbit); - int fMac = getLastFlagInst(mVUpBlock->pState, mFC->xMac, 1, isEbit); - int fClip = getLastFlagInst(mVUpBlock->pState, mFC->xClip, 2, isEbit); - int qInst = 0; - int pInst = 0; + int fMac = getLastFlagInst(mVUpBlock->pState, mFC->xMac, 1, isEbit); + int fClip = getLastFlagInst(mVUpBlock->pState, mFC->xClip, 2, isEbit); + int qInst = 0; + int pInst = 0; microBlock stateBackup; memcpy(&stateBackup, &mVUregs, sizeof(mVUregs)); //backup the state, it's about to get screwed with. @@ -78,9 +70,7 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit) // Save P/Q Regs if (qInst) - { xPSHUF.D(xmmPQ, xmmPQ, 0xe1); - } xMOVSS(ptr32[&mVU.regs().VI[REG_Q].UL], xmmPQ); xPSHUF.D(xmmPQ, xmmPQ, 0xe1); xMOVSS(ptr32[&mVU.regs().pending_q], xmmPQ); @@ -89,9 +79,7 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit) if (isVU1) { if (pInst) - { - xPSHUF.D(xmmPQ, xmmPQ, 0xb4); - } // Swap Pending/Active P + xPSHUF.D(xmmPQ, xmmPQ, 0xb4); // Swap Pending/Active P xPSHUF.D(xmmPQ, xmmPQ, 0xC6); // 3 0 1 2 xMOVSS(ptr32[&mVU.regs().VI[REG_P].UL], xmmPQ); xPSHUF.D(xmmPQ, xmmPQ, 0x87); // 0 2 1 3 @@ -107,8 +95,8 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit) xMOV(ptr32[&mVU.regs().VI[REG_MAC_FLAG].UL], gprT1); xMOV(ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL], gprT2); - if (!isEbit) - { // Backup flag instances + if (!isEbit) // Backup flag instances + { xMOVAPS(xmmT1, ptr128[mVU.macFlag]); xMOVAPS(ptr128[&mVU.regs().micro_macflags], xmmT1); xMOVAPS(xmmT1, ptr128[mVU.clipFlag]); @@ -119,8 +107,8 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit) xMOV(ptr32[&mVU.regs().micro_statusflags[2]], gprF2); xMOV(ptr32[&mVU.regs().micro_statusflags[3]], gprF3); } - else - { // Flush flag instances + else // Flush flag instances + { xMOVDZX(xmmT1, ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL]); xSHUF.PS(xmmT1, xmmT1, 0); xMOVAPS(ptr128[&mVU.regs().micro_clipflags], xmmT1); @@ -134,8 +122,8 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit) xMOVAPS(ptr128[&mVU.regs().micro_statusflags], xmmT1); } - if (isEbit) - { // Clear 'is busy' Flags + if (isEbit) // Clear 'is busy' Flags + { xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); if (!mVU.index || !THREAD_VU1) { @@ -147,8 +135,8 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit) else xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles); - if (isEbit != 2) - { // Save PC, and Jump to Exit Point + if (isEbit != 2) // Save PC, and Jump to Exit Point + { xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC); xJMP(mVU.exitFunct); } @@ -159,10 +147,10 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) { int fStatus = getLastFlagInst(mVUpBlock->pState, mFC->xStatus, 0, isEbit && isEbit != 3); - int fMac = getLastFlagInst(mVUpBlock->pState, mFC->xMac, 1, isEbit && isEbit != 3); - int fClip = getLastFlagInst(mVUpBlock->pState, mFC->xClip, 2, isEbit && isEbit != 3); - int qInst = 0; - int pInst = 0; + int fMac = getLastFlagInst(mVUpBlock->pState, mFC->xMac, 1, isEbit && isEbit != 3); + int fClip = getLastFlagInst(mVUpBlock->pState, mFC->xClip, 2, isEbit && isEbit != 3); + int qInst = 0; + int pInst = 0; microBlock stateBackup; memcpy(&stateBackup, &mVUregs, sizeof(mVUregs)); //backup the state, it's about to get screwed with. if (!isEbit || isEbit == 3) @@ -196,9 +184,7 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) // Save P/Q Regs if (qInst) - { xPSHUF.D(xmmPQ, xmmPQ, 0xe1); - } xMOVSS(ptr32[&mVU.regs().VI[REG_Q].UL], xmmPQ); xPSHUF.D(xmmPQ, xmmPQ, 0xe1); xMOVSS(ptr32[&mVU.regs().pending_q], xmmPQ); @@ -207,9 +193,7 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) if (isVU1) { if (pInst) - { - xPSHUF.D(xmmPQ, xmmPQ, 0xb4); - } // Swap Pending/Active P + xPSHUF.D(xmmPQ, xmmPQ, 0xb4); // Swap Pending/Active P xPSHUF.D(xmmPQ, xmmPQ, 0xC6); // 3 0 1 2 xMOVSS(ptr32[&mVU.regs().VI[REG_P].UL], xmmPQ); xPSHUF.D(xmmPQ, xmmPQ, 0x87); // 0 2 1 3 @@ -225,8 +209,8 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) xMOV(ptr32[&mVU.regs().VI[REG_MAC_FLAG].UL], gprT1); xMOV(ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL], gprT2); - if (!isEbit || isEbit == 3) - { // Backup flag instances + if (!isEbit || isEbit == 3) // Backup flag instances + { xMOVAPS(xmmT1, ptr128[mVU.macFlag]); xMOVAPS(ptr128[&mVU.regs().micro_macflags], xmmT1); xMOVAPS(xmmT1, ptr128[mVU.clipFlag]); @@ -237,8 +221,8 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) xMOV(ptr32[&mVU.regs().micro_statusflags[2]], gprF2); xMOV(ptr32[&mVU.regs().micro_statusflags[3]], gprF3); } - else - { // Flush flag instances + else // Flush flag instances + { xMOVDZX(xmmT1, ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL]); xSHUF.PS(xmmT1, xmmT1, 0); xMOVAPS(ptr128[&mVU.regs().micro_clipflags], xmmT1); @@ -253,8 +237,8 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) } - if ((isEbit && isEbit != 3)) - { // Clear 'is busy' Flags + if ((isEbit && isEbit != 3)) // Clear 'is busy' Flags + { xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); if (!mVU.index || !THREAD_VU1) { @@ -266,8 +250,8 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) else xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles); - if (isEbit != 2 && isEbit != 3) - { // Save PC, and Jump to Exit Point + if (isEbit != 2 && isEbit != 3) // Save PC, and Jump to Exit Point + { xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC); xJMP(mVU.exitFunct); } @@ -277,15 +261,12 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) // Recompiles Code for Proper Flags and Q/P regs on Block Linkings void mVUsetupBranch(mV, microFlagCycles& mFC) { - mVU.regAlloc->flushAll(); // Flush Allocated Regs - mVUsetupFlags(mVU, mFC); // Shuffle Flag Instances + mVUsetupFlags(mVU, mFC); // Shuffle Flag Instances // Shuffle P/Q regs since every block starts at instance #0 if (mVU.p || mVU.q) - { xPSHUF.D(xmmPQ, xmmPQ, shufflePQ); - } mVU.p = 0, mVU.q = 0; } @@ -295,13 +276,9 @@ void normBranchCompile(microVU& mVU, u32 branchPC) blockCreate(branchPC / 8); pBlock = mVUblocks[branchPC / 8]->search((microRegInfo*)&mVUregs); if (pBlock) - { xJMP(pBlock->x86ptrStart); - } else - { mVUcompile(mVU, branchPC, (uptr)&mVUregs); - } } void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) @@ -310,8 +287,8 @@ void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) mVUsetupBranch(mVU, mFC); mVUbackupRegs(mVU); - if (!mVUpBlock->jumpCache) - { // Create the jump cache for this block + if (!mVUpBlock->jumpCache) // Create the jump cache for this block + { mVUpBlock->jumpCache = new microJumpCache[mProgSize / 2]; } @@ -480,9 +457,9 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) mVUDTendProgram(mVU, &mFC, 2); xCMP(ptr16[&mVU.branch], 0); xForwardJump32 tJMP(xInvertCond((JccComparisonType)JMPcc)); - incPC(4); // Set PC to First instruction of Non-Taken Side - xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC); - xJMP(mVU.exitFunct); + incPC(4); // Set PC to First instruction of Non-Taken Side + xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC); + xJMP(mVU.exitFunct); tJMP.SetTarget(); incPC(-4); // Go Back to Branch Opcode to get branchAddr iPC = branchAddr(mVU) / 4; @@ -504,9 +481,9 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) mVUDTendProgram(mVU, &mFC, 2); xCMP(ptr16[&mVU.branch], 0); xForwardJump32 dJMP(xInvertCond((JccComparisonType)JMPcc)); - incPC(4); // Set PC to First instruction of Non-Taken Side - xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC); - xJMP(mVU.exitFunct); + incPC(4); // Set PC to First instruction of Non-Taken Side + xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC); + xJMP(mVU.exitFunct); dJMP.SetTarget(); incPC(-4); // Go Back to Branch Opcode to get branchAddr iPC = branchAddr(mVU) / 4; @@ -537,8 +514,8 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) xJMP(mVU.exitFunct); iPC = tempPC; } - if (mVUup.eBit) - { // Conditional Branch With E-Bit Set + if (mVUup.eBit) // Conditional Branch With E-Bit Set + { if (mVUlow.evilBranch) DevCon.Warning("End on evil branch! - Not implemented! - If game broken report to PCSX2 Team"); @@ -547,9 +524,9 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) incPC(3); xForwardJump32 eJMP(((JccComparisonType)JMPcc)); - incPC(1); // Set PC to First instruction of Non-Taken Side - xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC); - xJMP(mVU.exitFunct); + incPC(1); // Set PC to First instruction of Non-Taken Side + xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC); + xJMP(mVU.exitFunct); eJMP.SetTarget(); incPC(-4); // Go Back to Branch Opcode to get branchAddr @@ -558,15 +535,14 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) xJMP(mVU.exitFunct); return; } - else - { // Normal Conditional Branch + else // Normal Conditional Branch + { xCMP(ptr16[&mVU.branch], 0); incPC(3); - if (mVUlow.evilBranch) //We are dealing with an evil evil block, so we need to process this slightly differently + if (mVUlow.evilBranch) // We are dealing with an evil evil block, so we need to process this slightly differently { - - if (mVUlow.branch == 10 || mVUlow.branch == 2) //Evil branch is a jump of some measure + if (mVUlow.branch == 10 || mVUlow.branch == 2) // Evil branch is a jump of some measure { //Because of how it is linked, we need to make sure the target is recompiled if taken condJumpProcessingEvil(mVU, mFC, JMPcc); @@ -578,8 +554,8 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) blockCreate(iPC / 2); bBlock = mVUblocks[iPC / 2]->search((microRegInfo*)&mVUregs); incPC2(-1); - if (bBlock) - { // Branch non-taken has already been compiled + if (bBlock) // Branch non-taken has already been compiled + { xJcc(xInvertCond((JccComparisonType)JMPcc), bBlock->x86ptrStart); incPC(-3); // Go back to branch opcode (to get branch imm addr) normBranchCompile(mVU, branchAddr(mVU)); @@ -608,10 +584,10 @@ void normJump(mV, microFlagCycles& mFC) { DevCon.Warning("M-Bit on Jump! Please report if broken"); } - if (mVUlow.constJump.isValid) - { // Jump Address is Constant - if (mVUup.eBit) - { // E-bit Jump + if (mVUlow.constJump.isValid) // Jump Address is Constant + { + if (mVUup.eBit) // E-bit Jump + { iPC = (mVUlow.constJump.regValue * 2) & (mVU.progMemMask); mVUendProgram(mVU, &mFC, 1); return; @@ -666,8 +642,8 @@ void normJump(mV, microFlagCycles& mFC) xJMP(mVU.exitFunct); eJMP.SetTarget(); } - if (mVUup.eBit) - { // E-bit Jump + if (mVUup.eBit) // E-bit Jump + { mVUendProgram(mVU, &mFC, 2); xMOV(gprT1, ptr32[&mVU.branch]); xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], gprT1); diff --git a/pcsx2/x86/microVU_Clamp.inl b/pcsx2/x86/microVU_Clamp.inl index 5a145ed373..e2639e8eea 100644 --- a/pcsx2/x86/microVU_Clamp.inl +++ b/pcsx2/x86/microVU_Clamp.inl @@ -20,23 +20,26 @@ //------------------------------------------------------------------ const __aligned16 u32 sse4_minvals[2][4] = { - { 0xff7fffff, 0xffffffff, 0xffffffff, 0xffffffff }, //1000 - { 0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff }, //1111 + {0xff7fffff, 0xffffffff, 0xffffffff, 0xffffffff}, //1000 + {0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff}, //1111 }; const __aligned16 u32 sse4_maxvals[2][4] = { - { 0x7f7fffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //1000 - { 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff }, //1111 + {0x7f7fffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}, //1000 + {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff}, //1111 }; // Used for Result Clamping // Note: This function will not preserve NaN values' sign. // The theory behind this is that when we compute a result, and we've // gotten a NaN value, then something went wrong; and the NaN's sign -// is not to be trusted. Games like positive values better usually, +// is not to be trusted. Games like positive values better usually, // and its faster... so just always make NaNs into positive infinity. -void mVUclamp1(const xmm& reg, const xmm& regT1, int xyzw, bool bClampE = 0) { - if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) { - switch (xyzw) { +void mVUclamp1(const xmm& reg, const xmm& regT1, int xyzw, bool bClampE = 0) +{ + if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) + { + switch (xyzw) + { case 1: case 2: case 4: case 8: xMIN.SS(reg, ptr32[mVUglob.maxvals]); xMAX.SS(reg, ptr32[mVUglob.minvals]); @@ -54,27 +57,34 @@ void mVUclamp1(const xmm& reg, const xmm& regT1, int xyzw, bool bClampE = 0) { // Note 2: Using regalloc here seems to contaminate some regs in certain games. // Must be some specific case I've overlooked (or I used regalloc improperly on an opcode) // so we just use a temporary mem location for our backup for now... (non-sse4 version only) -void mVUclamp2(microVU& mVU, const xmm& reg, const xmm& regT1in, int xyzw, bool bClampE = 0) { - if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE && CHECK_VU_SIGN_OVERFLOW)) { - int i = (xyzw==1||xyzw==2||xyzw==4||xyzw==8) ? 0: 1; +void mVUclamp2(microVU& mVU, const xmm& reg, const xmm& regT1in, int xyzw, bool bClampE = 0) +{ + if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE && CHECK_VU_SIGN_OVERFLOW)) + { + int i = (xyzw == 1 || xyzw == 2 || xyzw == 4 || xyzw == 8) ? 0 : 1; xPMIN.SD(reg, ptr128[&sse4_maxvals[i][0]]); xPMIN.UD(reg, ptr128[&sse4_minvals[i][0]]); return; } - else mVUclamp1(reg, regT1in, xyzw, bClampE); + else + mVUclamp1(reg, regT1in, xyzw, bClampE); } // Used for operand clamping on every SSE instruction (add/sub/mul/div) -void mVUclamp3(microVU& mVU, const xmm& reg, const xmm& regT1, int xyzw) { - if (clampE) mVUclamp2(mVU, reg, regT1, xyzw, 1); +void mVUclamp3(microVU& mVU, const xmm& reg, const xmm& regT1, int xyzw) +{ + if (clampE) + mVUclamp2(mVU, reg, regT1, xyzw, 1); } // Used for result clamping on every SSE instruction (add/sub/mul/div) // Note: Disabled in "preserve sign" mode because in certain cases it // makes too much code-gen, and you get jump8-overflows in certain -// emulated opcodes (causing crashes). Since we're clamping the operands -// with mVUclamp3, we should almost never be getting a NaN result, +// emulated opcodes (causing crashes). Since we're clamping the operands +// with mVUclamp3, we should almost never be getting a NaN result, // but this clamp is just a precaution just-in-case. -void mVUclamp4(const xmm& reg, const xmm& regT1, int xyzw) { - if (clampE && !CHECK_VU_SIGN_OVERFLOW) mVUclamp1(reg, regT1, xyzw, 1); +void mVUclamp4(const xmm& reg, const xmm& regT1, int xyzw) +{ + if (clampE && !CHECK_VU_SIGN_OVERFLOW) + mVUclamp1(reg, regT1, xyzw, 1); } diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 8d34a5207e..4ca94ac9eb 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -19,39 +19,46 @@ // Messages Called at Execution Time... //------------------------------------------------------------------ -static void __fc mVUbadOp0 (u32 prog, u32 pc) { Console.Error("microVU0 Warning: Exiting... Block contains an illegal opcode. [%04x] [%03d]", pc, prog); } -static void __fc mVUbadOp1 (u32 prog, u32 pc) { Console.Error("microVU1 Warning: Exiting... Block contains an illegal opcode. [%04x] [%03d]", pc, prog); } -static void __fc mVUwarning0(u32 prog, u32 pc) { Console.Error("microVU0 Warning: Exiting from Possible Infinite Loop [%04x] [%03d]", pc, prog); } -static void __fc mVUwarning1(u32 prog, u32 pc) { Console.Error("microVU1 Warning: Exiting from Possible Infinite Loop [%04x] [%03d]", pc, prog); } -static void __fc mVUprintPC1(u32 pc) { Console.WriteLn("Block Start PC = 0x%04x", pc); } -static void __fc mVUprintPC2(u32 pc) { Console.WriteLn("Block End PC = 0x%04x", pc); } +static void __fc mVUbadOp0 (u32 prog, u32 pc) { Console.Error("microVU0 Warning: Exiting... Block contains an illegal opcode. [%04x] [%03d]", pc, prog); } +static void __fc mVUbadOp1 (u32 prog, u32 pc) { Console.Error("microVU1 Warning: Exiting... Block contains an illegal opcode. [%04x] [%03d]", pc, prog); } +static void __fc mVUwarning0(u32 prog, u32 pc) { Console.Error("microVU0 Warning: Exiting from Possible Infinite Loop [%04x] [%03d]", pc, prog); } +static void __fc mVUwarning1(u32 prog, u32 pc) { Console.Error("microVU1 Warning: Exiting from Possible Infinite Loop [%04x] [%03d]", pc, prog); } +static void __fc mVUprintPC1(u32 pc) { Console.WriteLn("Block Start PC = 0x%04x", pc); } +static void __fc mVUprintPC2(u32 pc) { Console.WriteLn("Block End PC = 0x%04x", pc); } //------------------------------------------------------------------ // Program Range Checking and Setting up Ranges //------------------------------------------------------------------ // Used by mVUsetupRange -__fi void mVUcheckIsSame(mV) { - if (mVU.prog.isSame == -1) { +__fi void mVUcheckIsSame(mV) +{ + if (mVU.prog.isSame == -1) + { mVU.prog.isSame = !memcmp_mmx((u8*)mVUcurProg.data, mVU.regs().Micro, mVU.microMemSize); } - if (mVU.prog.isSame == 0) { + if (mVU.prog.isSame == 0) + { mVUcacheProg(mVU, *mVU.prog.cur); mVU.prog.isSame = 1; } } // Sets up microProgram PC ranges based on whats been recompiled -void mVUsetupRange(microVU& mVU, s32 pc, bool isStartPC) { +void mVUsetupRange(microVU& mVU, s32 pc, bool isStartPC) +{ std::deque*& ranges = mVUcurProg.ranges; pxAssertDev(pc <= (s64)mVU.microMemSize, pxsFmt("microVU%d: PC outside of VU memory PC=0x%04x", mVU.index, pc)); - if (isStartPC) { // Check if startPC is already within a block we've recompiled + if (isStartPC) // Check if startPC is already within a block we've recompiled + { std::deque::const_iterator it(ranges->begin()); - for ( ; it != ranges->end(); ++it) { - if ((pc >= it[0].start) && (pc <= it[0].end)) { + for (; it != ranges->end(); ++it) + { + if ((pc >= it[0].start) && (pc <= it[0].end)) + { if (it[0].start != it[0].end) { - microRange mRange = { it[0].start, it[0].end }; + microRange mRange = {it[0].start, it[0].end}; ranges->erase(it); ranges->push_front(mRange); return; // new start PC is inside the range of another range @@ -59,40 +66,48 @@ void mVUsetupRange(microVU& mVU, s32 pc, bool isStartPC) { } } } - else if (mVUrange.end >= pc) { + else if (mVUrange.end >= pc) + { // existing range covers more area than current PC so no need to process it return; } mVUcheckIsSame(mVU); - if (isStartPC) { + if (isStartPC) + { microRange mRange = {pc, -1}; ranges->push_front(mRange); return; } - if (mVUrange.start <= pc) { + if (mVUrange.start <= pc) + { mVUrange.end = pc; bool mergedRange = false; - s32 rStart = mVUrange.start; - s32 rEnd = mVUrange.end; + s32 rStart = mVUrange.start; + s32 rEnd = mVUrange.end; std::deque::iterator it(ranges->begin()); - for (++it; it != ranges->end(); ++it) { - if((it[0].start >= rStart) && (it[0].start <= rEnd)) { // Starts after this prog but starts before the end of current prog + for (++it; it != ranges->end(); ++it) + { + if ((it[0].start >= rStart) && (it[0].start <= rEnd)) // Starts after this prog but starts before the end of current prog + { it[0].start = std::min(it[0].start, rStart); // Choose the earlier start mergedRange = true; } // Make sure we check both as the start on the other one may be later, we don't want to delete that - if ((it[0].end >= rStart) && (it[0].end <= rEnd)) { // Ends after this prog starts but ends before this one ends - it[0].end = std::max(it[0].end, rEnd); // Extend the end of this prog to match this program + if ((it[0].end >= rStart) && (it[0].end <= rEnd)) // Ends after this prog starts but ends before this one ends + { + it[0].end = std::max(it[0].end, rEnd); // Extend the end of this prog to match this program mergedRange = true; } } - if (mergedRange) { + if (mergedRange) + { ranges->erase(ranges->begin()); } } - else { + else + { mVUrange.end = mVU.microMemSize; DevCon.WriteLn(Color_Green, "microVU%d: Prog Range Wrap [%04x] [%d]", mVU.index, mVUrange.start, mVUrange.end); microRange mRange = {0, pc}; @@ -104,34 +119,55 @@ void mVUsetupRange(microVU& mVU, s32 pc, bool isStartPC) { // Execute VU Opcode/Instruction (Upper and Lower) //------------------------------------------------------------------ -__ri void doUpperOp(mV) { mVUopU(mVU, 1); mVUdivSet(mVU); } -__ri void doLowerOp(mV) { incPC(-1); mVUopL(mVU, 1); incPC(1); } -__ri void flushRegs(mV) { if (!doRegAlloc) mVU.regAlloc->flushAll(); } +__ri void doUpperOp(mV) +{ + mVUopU(mVU, 1); + mVUdivSet(mVU); +} +__ri void doLowerOp(mV) +{ + incPC(-1); + mVUopL(mVU, 1); + incPC(1); +} +__ri void flushRegs(mV) +{ + if (!doRegAlloc) + mVU.regAlloc->flushAll(); +} -void doIbit(mV) { - if (mVUup.iBit) { +void doIbit(mV) +{ + if (mVUup.iBit) + { incPC(-1); mVU.regAlloc->clearRegVF(33); - if (EmuConfig.Gamefixes.IbitHack) { + if (EmuConfig.Gamefixes.IbitHack) + { xMOV(gprT1, ptr32[&curI]); xMOV(ptr32[&mVU.getVI(REG_I)], gprT1); } - else { + else + { u32 tempI; - if (CHECK_VU_OVERFLOW && ((curI & 0x7fffffff) >= 0x7f800000)) { + if (CHECK_VU_OVERFLOW && ((curI & 0x7fffffff) >= 0x7f800000)) + { DevCon.WriteLn(Color_Green, "microVU%d: Clamping I Reg", mVU.index); tempI = (0x80000000 & curI) | 0x7f7fffff; // Clamp I Reg } - else tempI = curI; + else + tempI = curI; xMOV(ptr32[&mVU.getVI(REG_I)], tempI); } incPC(1); - } + } } -void doSwapOp(mV) { - if (mVUinfo.backupVF && !mVUlow.noWriteVF) { +void doSwapOp(mV) +{ + if (mVUinfo.backupVF && !mVUlow.noWriteVF) + { DevCon.WriteLn(Color_Green, "microVU%d: Backing Up VF Reg [%04x]", getIndex, xPC); // Allocate t1 first for better chance of reg-alloc @@ -148,7 +184,7 @@ void doSwapOp(mV) { xXOR.PS(t2, t3); mVU.regAlloc->clearNeeded(t3); - incPC(1); + incPC(1); doUpperOp(mVU); const xmm& t4 = mVU.regAlloc->allocReg(-1, mVUlow.VF_write.reg, 0xf); @@ -156,23 +192,33 @@ void doSwapOp(mV) { mVU.regAlloc->clearNeeded(t4); mVU.regAlloc->clearNeeded(t2); } - else { mVUopL(mVU, 1); incPC(1); flushRegs(mVU); doUpperOp(mVU); } + else + { + mVUopL(mVU, 1); + incPC(1); + flushRegs(mVU); + doUpperOp(mVU); + } } -void mVUexecuteInstruction(mV) { - if (mVUlow.isNOP) { +void mVUexecuteInstruction(mV) +{ + if (mVUlow.isNOP) + { incPC(1); doUpperOp(mVU); flushRegs(mVU); doIbit(mVU); } - else if (!mVUinfo.swapOps) { + else if (!mVUinfo.swapOps) + { incPC(1); doUpperOp(mVU); flushRegs(mVU); doLowerOp(mVU); } - else { + else + { doSwapOp(mVU); } @@ -184,61 +230,75 @@ void mVUexecuteInstruction(mV) { //------------------------------------------------------------------ // If 1st op in block is a bad opcode, then don't compile rest of block (Dawn of Mana Level 2) -__fi void mVUcheckBadOp(mV) { +__fi void mVUcheckBadOp(mV) +{ // The BIOS writes upper and lower NOPs in reversed slots (bug) - //So to prevent spamming we ignore these, however its possible the real VU will bomb out if + //So to prevent spamming we ignore these, however its possible the real VU will bomb out if //this happens, so we will bomb out without warning. - if (mVUinfo.isBadOp && mVU.code != 0x8000033c) { - + if (mVUinfo.isBadOp && mVU.code != 0x8000033c) + { + mVUinfo.isEOB = true; DevCon.Warning("microVU Warning: Block contains an illegal opcode..."); - } } // Prints msg when exiting block early if 1st op was a bad opcode (Dawn of Mana Level 2) -// #ifdef PCSX2_DEVBUILD because starting with SVN R5586 we get log spam in releases (Shadow Hearts battles) -__fi void handleBadOp(mV, int count) { +// #ifdef PCSX2_DEVBUILD because starting with SVN R5586 we get log spam in releases (Shadow Hearts battles) +__fi void handleBadOp(mV, int count) +{ #ifdef PCSX2_DEVBUILD - if (mVUinfo.isBadOp) { + if (mVUinfo.isBadOp) + { mVUbackupRegs(mVU, true); if (!isVU1) xFastCall(mVUbadOp0, mVU.prog.cur->idx, xPC); - else xFastCall(mVUbadOp1, mVU.prog.cur->idx, xPC); + else xFastCall(mVUbadOp1, mVU.prog.cur->idx, xPC); mVUrestoreRegs(mVU, true); } #endif } -__ri void branchWarning(mV) { +__ri void branchWarning(mV) +{ incPC(-2); - if (mVUup.eBit && mVUbranch) { + if (mVUup.eBit && mVUbranch) + { incPC(2); DevCon.Warning("microVU%d Warning: Branch in E-bit delay slot! [%04x]", mVU.index, xPC); mVUlow.isNOP = true; } - else incPC(2); + else + incPC(2); - if (mVUinfo.isBdelay && !mVUlow.evilBranch) { // Check if VI Reg Written to on Branch Delay Slot Instruction - if (mVUlow.VI_write.reg && mVUlow.VI_write.used && !mVUlow.readFlags) { + if (mVUinfo.isBdelay && !mVUlow.evilBranch) // Check if VI Reg Written to on Branch Delay Slot Instruction + { + if (mVUlow.VI_write.reg && mVUlow.VI_write.used && !mVUlow.readFlags) + { mVUlow.backupVI = true; mVUregs.viBackUp = mVUlow.VI_write.reg; } } } -__fi void eBitPass1(mV, int& branch) { - if (mVUregs.blockType != 1) { - branch = 1; +__fi void eBitPass1(mV, int& branch) +{ + if (mVUregs.blockType != 1) + { + branch = 1; mVUup.eBit = true; } } -__ri void eBitWarning(mV) { - if (mVUpBlock->pState.blockType == 1) Console.Error("microVU%d Warning: Branch, E-bit, Branch! [%04x]", mVU.index, xPC); - if (mVUpBlock->pState.blockType == 2) Console.Error("microVU%d Warning: Branch, Branch, Branch! [%04x]", mVU.index, xPC); +__ri void eBitWarning(mV) +{ + if (mVUpBlock->pState.blockType == 1) + Console.Error("microVU%d Warning: Branch, E-bit, Branch! [%04x]", mVU.index, xPC); + if (mVUpBlock->pState.blockType == 2) + Console.Error("microVU%d Warning: Branch, Branch, Branch! [%04x]", mVU.index, xPC); incPC(2); - if (curI & _Ebit_) { + if (curI & _Ebit_) + { DevCon.Warning("microVU%d: E-bit in Branch delay slot! [%04x]", mVU.index, xPC); mVUregs.blockType = 1; } @@ -248,24 +308,27 @@ __ri void eBitWarning(mV) { //------------------------------------------------------------------ // Cycles / Pipeline State / Early Exit from Execution //------------------------------------------------------------------ -__fi void optimizeReg(u8& rState) { rState = (rState==1) ? 0 : rState; } -__fi void calcCycles(u8& reg, u8 x) { reg = ((reg > x) ? (reg - x) : 0); } +__fi void optimizeReg(u8& rState) { rState = (rState == 1) ? 0 : rState; } +__fi void calcCycles(u8& reg, u8 x) { reg = ((reg > x) ? (reg - x) : 0); } __fi void tCycles(u8& dest, u8& src) { dest = std::max(dest, src); } -__fi void incP(mV) { mVU.p ^= 1; } -__fi void incQ(mV) { mVU.q ^= 1; } +__fi void incP(mV) { mVU.p ^= 1; } +__fi void incQ(mV) { mVU.q ^= 1; } // Optimizes the End Pipeline State Removing Unnecessary Info // If the cycles remaining is just '1', we don't have to transfer it to the next block // because mVU automatically decrements this number at the start of its loop, // so essentially '1' will be the same as '0'... -void mVUoptimizePipeState(mV) { - for (int i = 0; i < 32; i++) { +void mVUoptimizePipeState(mV) +{ + for (int i = 0; i < 32; i++) + { optimizeReg(mVUregs.VF[i].x); optimizeReg(mVUregs.VF[i].y); optimizeReg(mVUregs.VF[i].z); optimizeReg(mVUregs.VF[i].w); } - for (int i = 0; i < 16; i++) { + for (int i = 0; i < 16; i++) + { optimizeReg(mVUregs.VI[i]); } if (mVUregs.q) { optimizeReg(mVUregs.q); if (!mVUregs.q) { incQ(mVU); } } @@ -273,60 +336,90 @@ void mVUoptimizePipeState(mV) { mVUregs.r = 0; // There are no stalls on the R-reg, so its Safe to discard info } -void mVUincCycles(mV, int x) { +void mVUincCycles(mV, int x) +{ mVUcycles += x; // VF[0] is a constant value (0.0 0.0 0.0 1.0) - for (int z = 31; z > 0; z--) { + for (int z = 31; z > 0; z--) + { calcCycles(mVUregs.VF[z].x, x); calcCycles(mVUregs.VF[z].y, x); calcCycles(mVUregs.VF[z].z, x); calcCycles(mVUregs.VF[z].w, x); } // VI[0] is a constant value (0) - for (int z = 15; z > 0; z--) { + for (int z = 15; z > 0; z--) + { calcCycles(mVUregs.VI[z], x); } - if (mVUregs.q) { - if (mVUregs.q > 4) { calcCycles(mVUregs.q, x); if (mVUregs.q <= 4) { mVUinfo.doDivFlag = 1; } } - else { calcCycles(mVUregs.q, x); } - if (!mVUregs.q) { incQ(mVU); } + if (mVUregs.q) + { + if (mVUregs.q > 4) + { + calcCycles(mVUregs.q, x); + if (mVUregs.q <= 4) + { + mVUinfo.doDivFlag = 1; + } + } + else + { + calcCycles(mVUregs.q, x); + } + if (!mVUregs.q) + incQ(mVU); } - if (mVUregs.p) { + if (mVUregs.p) + { calcCycles(mVUregs.p, x); - if (!mVUregs.p || mVUregsTemp.p) { incP(mVU); } + if (!mVUregs.p || mVUregsTemp.p) + incP(mVU); } - if (mVUregs.xgkick) { + if (mVUregs.xgkick) + { calcCycles(mVUregs.xgkick, x); - if (!mVUregs.xgkick) { mVUinfo.doXGKICK = 1; mVUinfo.XGKICKPC = xPC;} + if (!mVUregs.xgkick) + { + mVUinfo.doXGKICK = 1; + mVUinfo.XGKICKPC = xPC; + } } calcCycles(mVUregs.r, x); } // Helps check if upper/lower ops read/write to same regs... -void cmpVFregs(microVFreg& VFreg1, microVFreg& VFreg2, bool& xVar) { - if (VFreg1.reg == VFreg2.reg) { +void cmpVFregs(microVFreg& VFreg1, microVFreg& VFreg2, bool& xVar) +{ + if (VFreg1.reg == VFreg2.reg) + { if ((VFreg1.x && VFreg2.x) || (VFreg1.y && VFreg2.y) - || (VFreg1.z && VFreg2.z) || (VFreg1.w && VFreg2.w)) - { xVar = 1; } + || (VFreg1.z && VFreg2.z) || (VFreg1.w && VFreg2.w)) + { + xVar = 1; + } } } -void mVUsetCycles(mV) { +void mVUsetCycles(mV) +{ mVUincCycles(mVU, mVUstall); // If upper Op && lower Op write to same VF reg: - if ((mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1]) && mVUregsTemp.VFreg[0]) { + if ((mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1]) && mVUregsTemp.VFreg[0]) + { if (mVUregsTemp.r || mVUregsTemp.VI) - mVUlow.noWriteVF = true; + mVUlow.noWriteVF = true; else mVUlow.isNOP = true; // If lower Op doesn't modify anything else, then make it a NOP } // If lower op reads a VF reg that upper Op writes to: - if ((mVUlow.VF_read[0].reg || mVUlow.VF_read[1].reg) && mVUup.VF_write.reg) { + if ((mVUlow.VF_read[0].reg || mVUlow.VF_read[1].reg) && mVUup.VF_write.reg) + { cmpVFregs(mVUup.VF_write, mVUlow.VF_read[0], mVUinfo.swapOps); cmpVFregs(mVUup.VF_write, mVUlow.VF_read[1], mVUinfo.swapOps); } // If above case is true, and upper op reads a VF reg that lower Op Writes to: - if (mVUinfo.swapOps && ((mVUup.VF_read[0].reg || mVUup.VF_read[1].reg) && mVUlow.VF_write.reg)) { + if (mVUinfo.swapOps && ((mVUup.VF_read[0].reg || mVUup.VF_read[1].reg) && mVUlow.VF_write.reg)) + { cmpVFregs(mVUlow.VF_write, mVUup.VF_read[0], mVUinfo.backupVF); cmpVFregs(mVUlow.VF_write, mVUup.VF_read[1], mVUinfo.backupVF); } @@ -341,33 +434,38 @@ void mVUsetCycles(mV) { tCycles(mVUregs.VF[mVUregsTemp.VFreg[1]].z, mVUregsTemp.VF[1].z); tCycles(mVUregs.VF[mVUregsTemp.VFreg[1]].w, mVUregsTemp.VF[1].w); - tCycles(mVUregs.VI[mVUregsTemp.VIreg], mVUregsTemp.VI); - tCycles(mVUregs.q, mVUregsTemp.q); - tCycles(mVUregs.p, mVUregsTemp.p); - tCycles(mVUregs.r, mVUregsTemp.r); - tCycles(mVUregs.xgkick, mVUregsTemp.xgkick); + tCycles(mVUregs.VI[mVUregsTemp.VIreg], mVUregsTemp.VI); + tCycles(mVUregs.q, mVUregsTemp.q); + tCycles(mVUregs.p, mVUregsTemp.p); + tCycles(mVUregs.r, mVUregsTemp.r); + tCycles(mVUregs.xgkick, mVUregsTemp.xgkick); } // Prints Start/End PC of blocks executed, for debugging... -void mVUdebugPrintBlocks(microVU& mVU, bool isEndPC) { - if (mVUdebugNow) { +void mVUdebugPrintBlocks(microVU& mVU, bool isEndPC) +{ + if (mVUdebugNow) + { mVUbackupRegs(mVU, true); if (isEndPC) xFastCall(mVUprintPC2, xPC); - else xFastCall(mVUprintPC1, xPC); + else xFastCall(mVUprintPC1, xPC); mVUrestoreRegs(mVU, true); } } // Saves Pipeline State for resuming from early exits -__fi void mVUsavePipelineState(microVU& mVU) { +__fi void mVUsavePipelineState(microVU& mVU) +{ u32* lpS = (u32*)&mVU.prog.lpState; - for(size_t i = 0; i < (sizeof(microRegInfo)-4)/4; i++, lpS++) { + for (size_t i = 0; i < (sizeof(microRegInfo) - 4) / 4; i++, lpS++) + { xMOV(ptr32[lpS], lpS[0]); } } // Test cycles to see if we need to exit-early... -void mVUtestCycles(microVU& mVU, microFlagCycles& mFC) { +void mVUtestCycles(microVU& mVU, microFlagCycles& mFC) +{ iPC = mVUstartPC; xMOV(eax, ptr32[&mVU.cycles]); @@ -392,45 +490,54 @@ void mVUtestCycles(microVU& mVU, microFlagCycles& mFC) { //------------------------------------------------------------------ // This gets run at the start of every loop of mVU's first pass -__fi void startLoop(mV) { - if (curI & _Mbit_ && isVU0) { DevCon.WriteLn (Color_Green, "microVU%d: M-bit set! PC = %x", getIndex, xPC); } - if (curI & _Dbit_) { DevCon.WriteLn (Color_Green, "microVU%d: D-bit set! PC = %x", getIndex, xPC); } - if (curI & _Tbit_) { DevCon.WriteLn (Color_Green, "microVU%d: T-bit set! PC = %x", getIndex, xPC); } +__fi void startLoop(mV) +{ + if (curI & _Mbit_ && isVU0) + DevCon.WriteLn(Color_Green, "microVU%d: M-bit set! PC = %x", getIndex, xPC); + if (curI & _Dbit_) + DevCon.WriteLn(Color_Green, "microVU%d: D-bit set! PC = %x", getIndex, xPC); + if (curI & _Tbit_) + DevCon.WriteLn(Color_Green, "microVU%d: T-bit set! PC = %x", getIndex, xPC); memzero(mVUinfo); memzero(mVUregsTemp); } // Initialize VI Constants (vi15 propagates through blocks) -__fi void mVUinitConstValues(microVU& mVU) { - for (int i = 0; i < 16; i++) { - mVUconstReg[i].isValid = 0; - mVUconstReg[i].regValue = 0; +__fi void mVUinitConstValues(microVU& mVU) +{ + for (int i = 0; i < 16; i++) + { + mVUconstReg[i].isValid = 0; + mVUconstReg[i].regValue = 0; } - mVUconstReg[15].isValid = mVUregs.vi15v; + mVUconstReg[15].isValid = mVUregs.vi15v; mVUconstReg[15].regValue = mVUregs.vi15v ? mVUregs.vi15 : 0; } // Initialize Variables -__fi void mVUinitFirstPass(microVU& mVU, uptr pState, u8* thisPtr) { - mVUstartPC = iPC; // Block Start PC - mVUbranch = 0; // Branch Type - mVUcount = 0; // Number of instructions ran - mVUcycles = 0; // Skips "M" phase, and starts counting cycles at "T" stage - mVU.p = 0; // All blocks start at p index #0 - mVU.q = 0; // All blocks start at q index #0 - if ((uptr)&mVUregs != pState) { // Loads up Pipeline State Info +__fi void mVUinitFirstPass(microVU& mVU, uptr pState, u8* thisPtr) +{ + mVUstartPC = iPC; // Block Start PC + mVUbranch = 0; // Branch Type + mVUcount = 0; // Number of instructions ran + mVUcycles = 0; // Skips "M" phase, and starts counting cycles at "T" stage + mVU.p = 0; // All blocks start at p index #0 + mVU.q = 0; // All blocks start at q index #0 + if ((uptr)&mVUregs != pState) // Loads up Pipeline State Info + { memcpy((u8*)&mVUregs, (u8*)pState, sizeof(microRegInfo)); } - if (((uptr)&mVU.prog.lpState != pState)) { + if (((uptr)&mVU.prog.lpState != pState)) + { memcpy((u8*)&mVU.prog.lpState, (u8*)pState, sizeof(microRegInfo)); } - mVUblock.x86ptrStart = thisPtr; - mVUpBlock = mVUblocks[mVUstartPC/2]->add(&mVUblock); // Add this block to block manager - mVUregs.needExactMatch = (mVUpBlock->pState.blockType)?7:0; // ToDo: Fix 1-Op block flag linking (MGS2:Demo/Sly Cooper) - mVUregs.blockType = 0; - mVUregs.viBackUp = 0; - mVUregs.flagInfo = 0; - mVUsFlagHack = CHECK_VU_FLAGHACK; + mVUblock.x86ptrStart = thisPtr; + mVUpBlock = mVUblocks[mVUstartPC / 2]->add(&mVUblock); // Add this block to block manager + mVUregs.needExactMatch = (mVUpBlock->pState.blockType) ? 7 : 0; // ToDo: Fix 1-Op block flag linking (MGS2:Demo/Sly Cooper) + mVUregs.blockType = 0; + mVUregs.viBackUp = 0; + mVUregs.flagInfo = 0; + mVUsFlagHack = CHECK_VU_FLAGHACK; mVUinitConstValues(mVU); } @@ -442,13 +549,14 @@ __fi void mVUinitFirstPass(microVU& mVU, uptr pState, u8* thisPtr) { //Unfortunately linking the reg manually and using the normal evil block method seems to suck at this :/ //If this is removed, test Evil Dead: Fistful of Boomstick (hangs going ingame), Mark of Kri (collision detection) //and Tony Hawks Project 8 (graphics are half missing, requires Negative rounding when working) -void* mVUcompileSingleInstruction(microVU& mVU, u32 startPC, uptr pState, microFlagCycles& mFC) { - - u8* thisPtr = x86Ptr; - +void* mVUcompileSingleInstruction(microVU& mVU, u32 startPC, uptr pState, microFlagCycles& mFC) +{ + + u8* thisPtr = x86Ptr; + // First Pass iPC = startPC / 4; - + mVUbranch = 0; incPC(1); startLoop(mVU); @@ -456,16 +564,40 @@ void* mVUcompileSingleInstruction(microVU& mVU, u32 startPC, uptr pState, microF mVUincCycles(mVU, 1); mVUopU(mVU, 0); mVUcheckBadOp(mVU); - if (curI & _Ebit_) { eBitPass1(mVU, g_branch); DevCon.Warning("E Bit on single instruction");} - if (curI & _Dbit_) { mVUup.dBit = true; } - if (curI & _Tbit_) { mVUup.tBit = true; } - if (curI & _Mbit_) { mVUup.mBit = true; DevCon.Warning("M Bit on single instruction");} - if (curI & _Ibit_) { mVUlow.isNOP = true; mVUup.iBit = true; DevCon.Warning("I Bit on single instruction");} - else { incPC(-1); mVUopL(mVU, 0); incPC(1); } + if (curI & _Ebit_) + { + eBitPass1(mVU, g_branch); + DevCon.Warning("E Bit on single instruction"); + } + if (curI & _Dbit_) + { + mVUup.dBit = true; + } + if (curI & _Tbit_) + { + mVUup.tBit = true; + } + if (curI & _Mbit_) + { + mVUup.mBit = true; + DevCon.Warning("M Bit on single instruction"); + } + if (curI & _Ibit_) + { + mVUlow.isNOP = true; + mVUup.iBit = true; + DevCon.Warning("I Bit on single instruction"); + } + else + { + incPC(-1); + mVUopL(mVU, 0); + incPC(1); + } mVUsetCycles(mVU); - mVUinfo.readQ = mVU.q; + mVUinfo.readQ = mVU.q; mVUinfo.writeQ = !mVU.q; - mVUinfo.readP = mVU.p && isVU1; + mVUinfo.readP = mVU.p && isVU1; mVUinfo.writeP = !mVU.p && isVU1; mVUcount++; mVUsetFlagInfo(mVU); @@ -480,13 +612,15 @@ void* mVUcompileSingleInstruction(microVU& mVU, u32 startPC, uptr pState, microF iPC = startPC / 4; setCode(); - if (mVUup.mBit) { + if (mVUup.mBit) + { xOR(ptr32[&mVU.regs().flags], VUFLAG_MFLAGSET); } mVUexecuteInstruction(mVU); mVUincCycles(mVU, 1); //Just incase the is XGKick - if (mVUinfo.doXGKICK) { + if (mVUinfo.doXGKICK) + { mVU_XGKICK_DELAY(mVU); } @@ -497,7 +631,8 @@ void mVUDoDBit(microVU& mVU, microFlagCycles* mFC) { xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x400 : 0x4)); xForwardJump32 eJMP(Jcc_Zero); - if (!isVU1 || !THREAD_VU1) { + if (!isVU1 || !THREAD_VU1) + { xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x200 : 0x2)); xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); } @@ -511,7 +646,8 @@ void mVUDoTBit(microVU& mVU, microFlagCycles* mFC) { xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x800 : 0x8)); xForwardJump32 eJMP(Jcc_Zero); - if (!isVU1 || !THREAD_VU1) { + if (!isVU1 || !THREAD_VU1) + { xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x400 : 0x4)); xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); } @@ -522,10 +658,10 @@ void mVUDoTBit(microVU& mVU, microFlagCycles* mFC) eJMP.SetTarget(); } -void mVUSaveFlags(microVU& mVU,microFlagCycles &mFC, microFlagCycles &mFCBackup) +void mVUSaveFlags(microVU& mVU, microFlagCycles& mFC, microFlagCycles& mFCBackup) { memcpy(&mFCBackup, &mFC, sizeof(microFlagCycles)); - mVUsetFlags(mVU, mFCBackup); // Sets Up Flag instances + mVUsetFlags(mVU, mFCBackup); // Sets Up Flag instances } void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) { @@ -539,24 +675,29 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) mVU.regAlloc->reset(); // Reset regAlloc mVUinitFirstPass(mVU, pState, thisPtr); mVUbranch = 0; - for (int branch = 0; mVUcount < endCount;) { + for (int branch = 0; mVUcount < endCount;) + { incPC(1); startLoop(mVU); mVUincCycles(mVU, 1); mVUopU(mVU, 0); mVUcheckBadOp(mVU); - if (curI & _Ebit_) { + if (curI & _Ebit_) + { eBitPass1(mVU, branch); // VU0 end of program MAC results can be read by COP2, so best to make sure the last instance is valid // Needed for State of Emergency 2 and Driving Emotion Type-S - if(isVU0) mVUregs.needExactMatch |= 7; + if (isVU0) + mVUregs.needExactMatch |= 7; } - if ((curI & _Mbit_) && isVU0) { + if ((curI & _Mbit_) && isVU0) + { if (xPC > 0) { incPC(-2); - if (!(curI & _Mbit_)) { //If the last instruction was also M-Bit we don't need to sync again + if (!(curI & _Mbit_)) //If the last instruction was also M-Bit we don't need to sync again + { incPC(2); mVUup.mBit = true; } @@ -567,24 +708,29 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) mVUup.mBit = true; } - if (curI & _Ibit_) { + if (curI & _Ibit_) + { mVUlow.isNOP = true; mVUup.iBit = true; - if (EmuConfig.Gamefixes.IbitHack) { + if (EmuConfig.Gamefixes.IbitHack) + { mVUsetupRange(mVU, xPC, false); if (branch < 2) - mVUsetupRange(mVU, xPC+8, true); // Ideally we'd do +4 but the mmx compare only works in 64bits, this should be fine + mVUsetupRange(mVU, xPC + 8, true); // Ideally we'd do +4 but the mmx compare only works in 64bits, this should be fine } } - else { + else + { incPC(-1); mVUopL(mVU, 0); incPC(1); } - if (curI & _Dbit_) { + if (curI & _Dbit_) + { mVUup.dBit = true; } - if (curI & _Tbit_) { + if (curI & _Tbit_) + { mVUup.tBit = true; } mVUsetCycles(mVU); @@ -594,21 +740,25 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) mVUinfo.writeP = !mVU.p && isVU1; mVUcount++; - if (branch >= 2) { + if (branch >= 2) + { mVUinfo.isEOB = true; - if (branch == 3) { + if (branch == 3) + { mVUinfo.isBdelay = true; } branchWarning(mVU); break; } - else if (branch == 1) { + else if (branch == 1) + { branch = 2; } - if (mVUbranch) { + if (mVUbranch) + { mVUsetFlagInfo(mVU); eBitWarning(mVU); branch = 3; @@ -634,7 +784,7 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) mVUsetFlags(mVU, mFC); // Sets Up Flag instances mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking mVUdebugPrintBlocks(mVU, false); // Prints Start/End PC of blocks executed, for debugging... - mVUtestCycles(mVU, mFC); // Update VU Cycles and Exit Early if Necessary + mVUtestCycles(mVU, mFC); // Update VU Cycles and Exit Early if Necessary // Second Pass iPC = mVUstartPC; @@ -642,32 +792,39 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) mVUbranch = 0; u32 x = 0; - for (; x < endCount; x++) { - if (mVUinfo.isEOB) { + for (; x < endCount; x++) + { + if (mVUinfo.isEOB) + { handleBadOp(mVU, x); x = 0xffff; } // handleBadOp currently just prints a warning - if (mVUup.mBit) { + if (mVUup.mBit) + { xOR(ptr32[&mVU.regs().flags], VUFLAG_MFLAGSET); } mVUexecuteInstruction(mVU); if (!mVUinfo.isBdelay && !mVUlow.branch) //T/D Bit on branch is handled after the branch, branch delay slots are executed. { - if (mVUup.tBit) { + if (mVUup.tBit) + { mVUDoTBit(mVU, &mFC); } - else if (mVUup.dBit && doDBitHandling) { + else if (mVUup.dBit && doDBitHandling) + { mVUDoDBit(mVU, &mFC); } - else if (mVUup.mBit && !mVUup.eBit && !mVUinfo.isEOB) { + else if (mVUup.mBit && !mVUup.eBit && !mVUinfo.isEOB) + { // Need to make sure the flags are exact, Gungrave does FCAND with Mbit, then directly after FMAND with M-bit // Also call setupBranch to sort flag instances - + mVUsetupBranch(mVU, mFC); // Make sure we save the current state so it can come back to it u32* cpS = (u32*)&mVUregs; u32* lpS = (u32*)&mVU.prog.lpState; - for (size_t i = 0; i < (sizeof(microRegInfo) - 4) / 4; i++, lpS++, cpS++) { + for (size_t i = 0; i < (sizeof(microRegInfo) - 4) / 4; i++, lpS++, cpS++) + { xMOV(ptr32[lpS], cpS[0]); } incPC(2); @@ -679,61 +836,67 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) } } - if (mVUinfo.doXGKICK) { + if (mVUinfo.doXGKICK) + { mVU_XGKICK_DELAY(mVU); } - if (isEvilBlock) { - mVUsetupRange(mVU, xPC+8, false); + if (isEvilBlock) + { + mVUsetupRange(mVU, xPC + 8, false); normJumpCompile(mVU, mFC, true); goto perf_and_return; } - else if (!mVUinfo.isBdelay) { + else if (!mVUinfo.isBdelay) + { // Handle range wrapping if ((xPC + 8) == mVU.microMemSize) { - mVUsetupRange(mVU, xPC+8, false); + mVUsetupRange(mVU, xPC + 8, false); mVUsetupRange(mVU, 0, 1); } incPC(1); } - else { + else + { incPC(1); mVUsetupRange(mVU, xPC, false); mVUdebugPrintBlocks(mVU, true); incPC(-4); // Go back to branch opcode - switch (mVUlow.branch) { - case 1: // B/BAL - case 2: - normBranch(mVU, mFC); - goto perf_and_return; - case 9: // JR/JALR - case 10: - normJump(mVU, mFC); - goto perf_and_return; - case 3: // IBEQ - condBranch(mVU, mFC, Jcc_Equal); - goto perf_and_return; - case 4: // IBGEZ - condBranch(mVU, mFC, Jcc_GreaterOrEqual); - goto perf_and_return; - case 5: // IBGTZ - condBranch(mVU, mFC, Jcc_Greater); - goto perf_and_return; - case 6: // IBLEQ - condBranch(mVU, mFC, Jcc_LessOrEqual); - goto perf_and_return; - case 7: // IBLTZ - condBranch(mVU, mFC, Jcc_Less); - goto perf_and_return; - case 8: // IBNEQ - condBranch(mVU, mFC, Jcc_NotEqual); - goto perf_and_return; + switch (mVUlow.branch) + { + case 1: // B/BAL + case 2: + normBranch(mVU, mFC); + goto perf_and_return; + case 9: // JR/JALR + case 10: + normJump(mVU, mFC); + goto perf_and_return; + case 3: // IBEQ + condBranch(mVU, mFC, Jcc_Equal); + goto perf_and_return; + case 4: // IBGEZ + condBranch(mVU, mFC, Jcc_GreaterOrEqual); + goto perf_and_return; + case 5: // IBGTZ + condBranch(mVU, mFC, Jcc_Greater); + goto perf_and_return; + case 6: // IBLEQ + condBranch(mVU, mFC, Jcc_LessOrEqual); + goto perf_and_return; + case 7: // IBLTZ + condBranch(mVU, mFC, Jcc_Less); + goto perf_and_return; + case 8: // IBNEQ + condBranch(mVU, mFC, Jcc_NotEqual); + goto perf_and_return; } } } - if ((x == endCount) && (x != 1)) { + if ((x == endCount) && (x != 1)) + { Console.Error("microVU%d: Possible infinite compiling loop!", mVU.index); } @@ -749,31 +912,39 @@ perf_and_return: } // Returns the entry point of the block (compiles it if not found) -__fi void* mVUentryGet(microVU& mVU, microBlockManager* block, u32 startPC, uptr pState) { +__fi void* mVUentryGet(microVU& mVU, microBlockManager* block, u32 startPC, uptr pState) +{ microBlock* pBlock = block->search((microRegInfo*)pState); - if (pBlock) return pBlock->x86ptrStart; - else { return mVUcompile(mVU, startPC, pState);} + if (pBlock) + return pBlock->x86ptrStart; + else + return mVUcompile(mVU, startPC, pState); } - // Search for Existing Compiled Block (if found, return x86ptr; else, compile and return x86ptr) -__fi void* mVUblockFetch(microVU& mVU, u32 startPC, uptr pState) { +// Search for Existing Compiled Block (if found, return x86ptr; else, compile and return x86ptr) +__fi void* mVUblockFetch(microVU& mVU, u32 startPC, uptr pState) +{ - pxAssertDev((startPC & 7) == 0, pxsFmt("microVU%d: unaligned startPC=0x%04x", mVU.index, startPC) ); - pxAssertDev( startPC <= mVU.microMemSize-8, pxsFmt("microVU%d: invalid startPC=0x%04x", mVU.index, startPC) ); - startPC &= mVU.microMemSize-8; + pxAssertDev((startPC & 7) == 0, pxsFmt("microVU%d: unaligned startPC=0x%04x", mVU.index, startPC)); + pxAssertDev(startPC <= mVU.microMemSize - 8, pxsFmt("microVU%d: invalid startPC=0x%04x", mVU.index, startPC)); + startPC &= mVU.microMemSize - 8; - blockCreate(startPC/8); - return mVUentryGet(mVU, mVUblocks[startPC/8], startPC, pState); + blockCreate(startPC / 8); + return mVUentryGet(mVU, mVUblocks[startPC / 8], startPC, pState); } // mVUcompileJIT() - Called By JR/JALR during execution -_mVUt void* __fastcall mVUcompileJIT(u32 startPC, uptr ptr) { - if (doJumpAsSameProgram) { // Treat jump as part of same microProgram - if (doJumpCaching) { // When doJumpCaching, ptr is a microBlock pointer +_mVUt void* __fastcall mVUcompileJIT(u32 startPC, uptr ptr) +{ + if (doJumpAsSameProgram) // Treat jump as part of same microProgram + { + if (doJumpCaching) // When doJumpCaching, ptr is a microBlock pointer + { microVU& mVU = mVUx; microBlock* pBlock = (microBlock*)ptr; microJumpCache& jc = pBlock->jumpCache[startPC / 8]; - if (jc.prog && jc.prog == mVU.prog.quick[startPC / 8].prog) return jc.x86ptrStart; + if (jc.prog && jc.prog == mVU.prog.quick[startPC / 8].prog) + return jc.x86ptrStart; void* v = mVUblockFetch(mVUx, startPC, (uptr)&pBlock->pStateEnd); jc.prog = mVU.prog.quick[startPC / 8].prog; jc.x86ptrStart = v; @@ -782,17 +953,20 @@ _mVUt void* __fastcall mVUcompileJIT(u32 startPC, uptr ptr) { return mVUblockFetch(mVUx, startPC, ptr); } mVUx.regs().start_pc = startPC; - if (doJumpCaching) { // When doJumpCaching, ptr is a microBlock pointer + if (doJumpCaching) // When doJumpCaching, ptr is a microBlock pointer + { microVU& mVU = mVUx; microBlock* pBlock = (microBlock*)ptr; - microJumpCache& jc = pBlock->jumpCache[startPC/8]; - if (jc.prog && jc.prog == mVU.prog.quick[startPC/8].prog) return jc.x86ptrStart; + microJumpCache& jc = pBlock->jumpCache[startPC / 8]; + if (jc.prog && jc.prog == mVU.prog.quick[startPC / 8].prog) + return jc.x86ptrStart; void* v = mVUsearchProg(startPC, (uptr)&pBlock->pStateEnd); - jc.prog = mVU.prog.quick[startPC/8].prog; + jc.prog = mVU.prog.quick[startPC / 8].prog; jc.x86ptrStart = v; return v; } - else { // When !doJumpCaching, pBlock param is really a microRegInfo pointer + else // When !doJumpCaching, pBlock param is really a microRegInfo pointer + { return mVUsearchProg(startPC, ptr); // Find and set correct program } } diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index 985a97d1d2..f52219be8d 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -20,15 +20,16 @@ //------------------------------------------------------------------ // Generates the code for entering/exit recompiled blocks -void mVUdispatcherAB(mV) { +void mVUdispatcherAB(mV) +{ mVU.startFunct = x86Ptr; { xScopedStackFrame frame(false, true); // __fastcall = The caller has already put the needed parameters in ecx/edx: - if (!isVU1) { xFastCall((void*)mVUexecuteVU0, arg1reg, arg2reg); } - else { xFastCall((void*)mVUexecuteVU1, arg1reg, arg2reg); } + if (!isVU1) xFastCall((void*)mVUexecuteVU0, arg1reg, arg2reg); + else xFastCall((void*)mVUexecuteVU1, arg1reg, arg2reg); // Load VU's MXCSR state xLDMXCSR(g_sseVUMXCSR); @@ -42,7 +43,7 @@ void mVUdispatcherAB(mV) { xPSHUF.D(xmmPQ, xmmPQ, 0xe1); xMOVSS(xmmPQ, xmmT2); xPSHUF.D(xmmPQ, xmmPQ, 0xe1); - + if (isVU1) { //Load in other P instance @@ -74,18 +75,19 @@ void mVUdispatcherAB(mV) { // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; // all other arguments are passed right to left. - if (!isVU1) { xFastCall((void*)mVUcleanUpVU0); } - else { xFastCall((void*)mVUcleanUpVU1); } + if (!isVU1) xFastCall((void*)mVUcleanUpVU0); + else xFastCall((void*)mVUcleanUpVU1); } xRET(); pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize), - "microVU: Dispatcher generation exceeded reserved cache area!"); + "microVU: Dispatcher generation exceeded reserved cache area!"); } // Generates the code for resuming/exit xgkick -void mVUdispatcherCD(mV) { +void mVUdispatcherCD(mV) +{ mVU.startFunctXG = x86Ptr; { @@ -116,7 +118,6 @@ void mVUdispatcherCD(mV) { // Load EE's MXCSR state xLDMXCSR(g_sseMXCSR); - } xRET(); @@ -130,15 +131,17 @@ void mVUdispatcherCD(mV) { //------------------------------------------------------------------ // Executes for number of cycles -_mVUt void* __fastcall mVUexecute(u32 startPC, u32 cycles) { +_mVUt void* __fastcall mVUexecute(u32 startPC, u32 cycles) +{ microVU& mVU = mVUx; - u32 vuLimit = vuIndex ? 0x3ff8 : 0xff8; - if (startPC > vuLimit + 7) { + u32 vuLimit = vuIndex ? 0x3ff8 : 0xff8; + if (startPC > vuLimit + 7) + { DevCon.Warning("microVU%x Warning: startPC = 0x%x, cycles = 0x%x", vuIndex, startPC, cycles); } - mVU.cycles = cycles; + mVU.cycles = cycles; mVU.totalCycles = cycles; xSetPtr(mVU.prog.x86ptr); // Set x86ptr to where last program left off @@ -149,7 +152,8 @@ _mVUt void* __fastcall mVUexecute(u32 startPC, u32 cycles) { // Cleanup Functions //------------------------------------------------------------------ -_mVUt void mVUcleanUp() { +_mVUt void mVUcleanUp() +{ microVU& mVU = mVUx; //mVUprint("microVU: Program exited successfully!"); //mVUprint("microVU: VF0 = {%x,%x,%x,%x}", mVU.regs().VF[0].UL[0], mVU.regs().VF[0].UL[1], mVU.regs().VF[0].UL[2], mVU.regs().VF[0].UL[3]); @@ -157,7 +161,8 @@ _mVUt void mVUcleanUp() { mVU.prog.x86ptr = x86Ptr; - if ((xGetPtr() < mVU.prog.x86start) || (xGetPtr() >= mVU.prog.x86end)) { + if ((xGetPtr() < mVU.prog.x86start) || (xGetPtr() >= mVU.prog.x86end)) + { Console.WriteLn(vuIndex ? Color_Orange : Color_Magenta, "microVU%d: Program cache limit reached.", mVU.index); mVUreset(mVU, false); } @@ -165,9 +170,11 @@ _mVUt void mVUcleanUp() { mVU.cycles = mVU.totalCycles - mVU.cycles; mVU.regs().cycle += mVU.cycles; - if (!vuIndex || !THREAD_VU1) { + if (!vuIndex || !THREAD_VU1) + { u32 cycles_passed = std::min(mVU.cycles, 3000u) * EmuConfig.Speedhacks.EECycleSkip; - if (cycles_passed > 0) { + if (cycles_passed > 0) + { s32 vu0_offset = VU0.cycle - cpuRegs.cycle; cpuRegs.cycle += cycles_passed; @@ -196,5 +203,5 @@ _mVUt void mVUcleanUp() { void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles) { return mVUexecute<0>(startPC, cycles); } void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles) { return mVUexecute<1>(startPC, cycles); } -void __fastcall mVUcleanUpVU0() { mVUcleanUp<0>(); } -void __fastcall mVUcleanUpVU1() { mVUcleanUp<1>(); } +void __fastcall mVUcleanUpVU0() { mVUcleanUp<0>(); } +void __fastcall mVUcleanUpVU1() { mVUcleanUp<1>(); } diff --git a/pcsx2/x86/microVU_Flags.inl b/pcsx2/x86/microVU_Flags.inl index 80be91b492..41d674572e 100644 --- a/pcsx2/x86/microVU_Flags.inl +++ b/pcsx2/x86/microVU_Flags.inl @@ -16,39 +16,50 @@ #pragma once // Sets FDIV Flags at the proper time -__fi void mVUdivSet(mV) { - if (mVUinfo.doDivFlag) { - if (!sFLAG.doFlag) { xMOV(getFlagReg(sFLAG.write), getFlagReg(sFLAG.lastWrite)); } +__fi void mVUdivSet(mV) +{ + if (mVUinfo.doDivFlag) + { + if (!sFLAG.doFlag) + xMOV(getFlagReg(sFLAG.write), getFlagReg(sFLAG.lastWrite)); xAND(getFlagReg(sFLAG.write), 0xfff3ffff); - xOR (getFlagReg(sFLAG.write), ptr32[&mVU.divFlag]); + xOR(getFlagReg(sFLAG.write), ptr32[&mVU.divFlag]); } } // Optimizes out unneeded status flag updates // This can safely be done when there is an FSSET opcode -__fi void mVUstatusFlagOp(mV) { +__fi void mVUstatusFlagOp(mV) +{ int curPC = iPC; int i = mVUcount; bool runLoop = true; - if (sFLAG.doFlag) { + if (sFLAG.doFlag) + { sFLAG.doNonSticky = true; } - else { - for (; i > 0; i--) { + else + { + for (; i > 0; i--) + { incPC2(-2); - if (sFLAG.doNonSticky) { + if (sFLAG.doNonSticky) + { runLoop = false; break; } - else if (sFLAG.doFlag) { + else if (sFLAG.doFlag) + { sFLAG.doNonSticky = true; break; } } } - if (runLoop) { - for (; i > 0; i--) { + if (runLoop) + { + for (; i > 0; i--) + { incPC2(-2); if (sFLAG.doNonSticky) @@ -61,145 +72,181 @@ __fi void mVUstatusFlagOp(mV) { DevCon.WriteLn(Color_Green, "microVU%d: FSSET Optimization", getIndex); } -int findFlagInst(int* fFlag, int cycles) { +int findFlagInst(int* fFlag, int cycles) +{ int j = 0, jValue = -1; - for(int i = 0; i < 4; i++) { - if ((fFlag[i] <= cycles) && (fFlag[i] > jValue)) { - j = i; jValue = fFlag[i]; + for (int i = 0; i < 4; i++) + { + if ((fFlag[i] <= cycles) && (fFlag[i] > jValue)) + { + j = i; + jValue = fFlag[i]; } } return j; } // Setup Last 4 instances of Status/Mac/Clip flags (needed for accurate block linking) -int sortFlag(int* fFlag, int* bFlag, int cycles) { +int sortFlag(int* fFlag, int* bFlag, int cycles) +{ int lFlag = -5; int x = 0; - for(int i = 0; i < 4; i++) { + for (int i = 0; i < 4; i++) + { bFlag[i] = findFlagInst(fFlag, cycles); - if (lFlag != bFlag[i]) { x++; } + if (lFlag != bFlag[i]) + x++; lFlag = bFlag[i]; cycles++; } return x; // Returns the number of Valid Flag Instances } -void sortFullFlag(int* fFlag, int* bFlag) { +void sortFullFlag(int* fFlag, int* bFlag) +{ int m = std::max(std::max(fFlag[0], fFlag[1]), std::max(fFlag[2], fFlag[3])); - for(int i = 0; i < 4; i++) { + for (int i = 0; i < 4; i++) + { int t = 3 - (m - fFlag[i]); - bFlag[i] = (t < 0) ? 0 : t+1; + bFlag[i] = (t < 0) ? 0 : t + 1; } } -#define sFlagCond (sFLAG.doFlag || mVUlow.isFSSET || mVUinfo.doDivFlag ) +#define sFlagCond (sFLAG.doFlag || mVUlow.isFSSET || mVUinfo.doDivFlag) #define sHackCond (mVUsFlagHack && !sFLAG.doNonSticky) // Note: Flag handling is 'very' complex, it requires full knowledge of how microVU recs work, so don't touch! -__fi void mVUsetFlags(mV, microFlagCycles& mFC) { - int endPC = iPC; +__fi void mVUsetFlags(mV, microFlagCycles& mFC) +{ + int endPC = iPC; u32 aCount = 0; // Amount of instructions needed to get valid mac flag instances for block linking //bool writeProtect = false; // Ensure last ~4+ instructions update mac/status flags (if next block's first 4 instructions will read them) - for(int i = mVUcount; i > 0; i--, aCount++) { - if (sFLAG.doFlag) { + for (int i = mVUcount; i > 0; i--, aCount++) + { + if (sFLAG.doFlag) + { - if (__Mac) { + if (__Mac) + { mFLAG.doFlag = true; //writeProtect = true; } - if (__Status) { + if (__Status) + { sFLAG.doNonSticky = true; //writeProtect = true; } - if (aCount >= 3){ + if (aCount >= 3) + { break; } } incPC2(-2); } - + // Status/Mac Flags Setup Code int xS = 0, xM = 0, xC = 0; - for(int i = 0; i < 4; i++) { + for (int i = 0; i < 4; i++) + { mFC.xStatus[i] = i; mFC.xMac [i] = i; mFC.xClip [i] = i; } - - if(!(mVUpBlock->pState.needExactMatch & 1)) { + + if (!(mVUpBlock->pState.needExactMatch & 1)) + { xS = (mVUpBlock->pState.flagInfo >> 2) & 3; - mFC.xStatus[0] = -1; mFC.xStatus[1] = -1; - mFC.xStatus[2] = -1; mFC.xStatus[3] = -1; - mFC.xStatus[(xS-1)&3] = 0; + mFC.xStatus[0] = -1; + mFC.xStatus[1] = -1; + mFC.xStatus[2] = -1; + mFC.xStatus[3] = -1; + mFC.xStatus[(xS - 1) & 3] = 0; } - if(!(mVUpBlock->pState.needExactMatch & 2)) { + if (!(mVUpBlock->pState.needExactMatch & 2)) + { //xM = (mVUpBlock->pState.flagInfo >> 4) & 3; - mFC.xMac[0] = -1; mFC.xMac[1] = -1; - mFC.xMac[2] = -1; mFC.xMac[3] = -1; + mFC.xMac[0] = -1; + mFC.xMac[1] = -1; + mFC.xMac[2] = -1; + mFC.xMac[3] = -1; //mFC.xMac[(xM-1)&3] = 0; } - if(!(mVUpBlock->pState.needExactMatch & 4)) { + if (!(mVUpBlock->pState.needExactMatch & 4)) + { xC = (mVUpBlock->pState.flagInfo >> 6) & 3; - mFC.xClip[0] = -1; mFC.xClip[1] = -1; - mFC.xClip[2] = -1; mFC.xClip[3] = -1; - mFC.xClip[(xC-1)&3] = 0; + mFC.xClip[0] = -1; + mFC.xClip[1] = -1; + mFC.xClip[2] = -1; + mFC.xClip[3] = -1; + mFC.xClip[(xC - 1) & 3] = 0; } - mFC.cycles = 0; - u32 xCount = mVUcount; // Backup count - iPC = mVUstartPC; - for(mVUcount = 0; mVUcount < xCount; mVUcount++) { - if (mVUlow.isFSSET && !noFlagOpts) { - if (__Status) { // Don't Optimize out on the last ~4+ instructions - if ((xCount - mVUcount) > aCount) { mVUstatusFlagOp(mVU); } + mFC.cycles = 0; + u32 xCount = mVUcount; // Backup count + iPC = mVUstartPC; + for (mVUcount = 0; mVUcount < xCount; mVUcount++) + { + if (mVUlow.isFSSET && !noFlagOpts) + { + if (__Status) // Don't Optimize out on the last ~4+ instructions + { + if ((xCount - mVUcount) > aCount) + mVUstatusFlagOp(mVU); } - else mVUstatusFlagOp(mVU); + else + mVUstatusFlagOp(mVU); } mFC.cycles += mVUstall; - sFLAG.read = doSFlagInsts ? findFlagInst(mFC.xStatus, mFC.cycles) : 0; - mFLAG.read = doMFlagInsts ? findFlagInst(mFC.xMac, mFC.cycles) : 0; - cFLAG.read = doCFlagInsts ? findFlagInst(mFC.xClip, mFC.cycles) : 0; - + sFLAG.read = doSFlagInsts ? findFlagInst(mFC.xStatus, mFC.cycles) : 0; + mFLAG.read = doMFlagInsts ? findFlagInst(mFC.xMac, mFC.cycles) : 0; + cFLAG.read = doCFlagInsts ? findFlagInst(mFC.xClip, mFC.cycles) : 0; + sFLAG.write = doSFlagInsts ? xS : 0; mFLAG.write = doMFlagInsts ? xM : 0; cFLAG.write = doCFlagInsts ? xC : 0; - sFLAG.lastWrite = doSFlagInsts ? (xS-1) & 3 : 0; - mFLAG.lastWrite = doMFlagInsts ? (xM-1) & 3 : 0; - cFLAG.lastWrite = doCFlagInsts ? (xC-1) & 3 : 0; + sFLAG.lastWrite = doSFlagInsts ? (xS - 1) & 3 : 0; + mFLAG.lastWrite = doMFlagInsts ? (xM - 1) & 3 : 0; + cFLAG.lastWrite = doCFlagInsts ? (xC - 1) & 3 : 0; - if (sHackCond) { + if (sHackCond) + { sFLAG.doFlag = false; } - if (sFLAG.doFlag) { - if(noFlagOpts) { + if (sFLAG.doFlag) + { + if (noFlagOpts) + { sFLAG.doNonSticky = true; mFLAG.doFlag = true; } } - if (sFlagCond) { + if (sFlagCond) + { mFC.xStatus[xS] = mFC.cycles + 4; - xS = (xS+1) & 3; + xS = (xS + 1) & 3; } - if (mFLAG.doFlag) { + if (mFLAG.doFlag) + { mFC.xMac[xM] = mFC.cycles + 4; - xM = (xM+1) & 3; + xM = (xM + 1) & 3; } - if (cFLAG.doFlag) { + if (cFLAG.doFlag) + { mFC.xClip[xC] = mFC.cycles + 4; - xC = (xC+1) & 3; + xC = (xC + 1) & 3; } mFC.cycles++; @@ -212,64 +259,76 @@ __fi void mVUsetFlags(mV, microFlagCycles& mFC) { iPC = endPC; } -#define getFlagReg2(x) ((bStatus[0] == x) ? getFlagReg(x) : gprT1) -#define getFlagReg3(x) ((gFlag == x) ? gprT1 : getFlagReg(x)) -#define getFlagReg4(x) ((gFlag == x) ? gprT1 : gprT2) -#define shuffleMac ((bMac [3]<<6)|(bMac [2]<<4)|(bMac [1]<<2)|bMac [0]) -#define shuffleClip ((bClip[3]<<6)|(bClip[2]<<4)|(bClip[1]<<2)|bClip[0]) +#define getFlagReg2(x) ((bStatus[0] == x) ? getFlagReg(x) : gprT1) +#define getFlagReg3(x) ((gFlag == x) ? gprT1 : getFlagReg(x)) +#define getFlagReg4(x) ((gFlag == x) ? gprT1 : gprT2) +#define shuffleMac ((bMac[3] << 6) | (bMac[2] << 4) | (bMac[1] << 2) | bMac[0]) +#define shuffleClip ((bClip[3] << 6) | (bClip[2] << 4) | (bClip[1] << 2) | bClip[0]) // Recompiles Code for Proper Flags on Block Linkings -__fi void mVUsetupFlags(mV, microFlagCycles& mFC) { +__fi void mVUsetupFlags(mV, microFlagCycles& mFC) +{ - if (mVUregs.flagInfo & 1) { - if (mVUregs.needExactMatch) DevCon.Error("mVU ERROR!!!"); + if (mVUregs.flagInfo & 1) + { + if (mVUregs.needExactMatch) + DevCon.Error("mVU ERROR!!!"); } const bool pf = false; // Print Flag Info - if (pf) DevCon.WriteLn("mVU%d - [#%d][sPC=%04x][bPC=%04x][mVUBranch=%d][branch=%d]", - mVU.index, mVU.prog.cur->idx, mVUstartPC/2*8, xPC, mVUbranch, mVUlow.branch); + if (pf) + DevCon.WriteLn("mVU%d - [#%d][sPC=%04x][bPC=%04x][mVUBranch=%d][branch=%d]", + mVU.index, mVU.prog.cur->idx, mVUstartPC / 2 * 8, xPC, mVUbranch, mVUlow.branch); - if (doSFlagInsts && __Status) { - if (pf) DevCon.WriteLn("mVU%d - Status Flag", mVU.index); + if (doSFlagInsts && __Status) + { + if (pf) + DevCon.WriteLn("mVU%d - Status Flag", mVU.index); int bStatus[4]; int sortRegs = sortFlag(mFC.xStatus, bStatus, mFC.cycles); // DevCon::Status("sortRegs = %d", params sortRegs); // Note: Emitter will optimize out mov(reg1, reg1) cases... - if (sortRegs == 1) { - xMOV(gprF0, getFlagReg(bStatus[0])); - xMOV(gprF1, getFlagReg(bStatus[1])); - xMOV(gprF2, getFlagReg(bStatus[2])); - xMOV(gprF3, getFlagReg(bStatus[3])); + if (sortRegs == 1) + { + xMOV(gprF0, getFlagReg(bStatus[0])); + xMOV(gprF1, getFlagReg(bStatus[1])); + xMOV(gprF2, getFlagReg(bStatus[2])); + xMOV(gprF3, getFlagReg(bStatus[3])); } - else if (sortRegs == 2) { - xMOV(gprT1, getFlagReg (bStatus[3])); - xMOV(gprF0, getFlagReg (bStatus[0])); - xMOV(gprF1, getFlagReg2(bStatus[1])); - xMOV(gprF2, getFlagReg2(bStatus[2])); - xMOV(gprF3, gprT1); + else if (sortRegs == 2) + { + xMOV(gprT1, getFlagReg (bStatus[3])); + xMOV(gprF0, getFlagReg (bStatus[0])); + xMOV(gprF1, getFlagReg2(bStatus[1])); + xMOV(gprF2, getFlagReg2(bStatus[2])); + xMOV(gprF3, gprT1); } - else if (sortRegs == 3) { + else if (sortRegs == 3) + { int gFlag = (bStatus[0] == bStatus[1]) ? bStatus[2] : bStatus[1]; - xMOV(gprT1, getFlagReg (gFlag)); - xMOV(gprT2, getFlagReg (bStatus[3])); - xMOV(gprF0, getFlagReg (bStatus[0])); - xMOV(gprF1, getFlagReg3(bStatus[1])); - xMOV(gprF2, getFlagReg4(bStatus[2])); - xMOV(gprF3, gprT2); + xMOV(gprT1, getFlagReg (gFlag)); + xMOV(gprT2, getFlagReg (bStatus[3])); + xMOV(gprF0, getFlagReg (bStatus[0])); + xMOV(gprF1, getFlagReg3(bStatus[1])); + xMOV(gprF2, getFlagReg4(bStatus[2])); + xMOV(gprF3, gprT2); } - else { - xMOV(gprT1, getFlagReg(bStatus[0])); - xMOV(gprT2, getFlagReg(bStatus[1])); - xMOV(gprT3, getFlagReg(bStatus[2])); - xMOV(gprF3, getFlagReg(bStatus[3])); - xMOV(gprF0, gprT1); - xMOV(gprF1, gprT2); - xMOV(gprF2, gprT3); + else + { + xMOV(gprT1, getFlagReg(bStatus[0])); + xMOV(gprT2, getFlagReg(bStatus[1])); + xMOV(gprT3, getFlagReg(bStatus[2])); + xMOV(gprF3, getFlagReg(bStatus[3])); + xMOV(gprF0, gprT1); + xMOV(gprF1, gprT2); + xMOV(gprF2, gprT3); } } - - if (doMFlagInsts && __Mac) { - if (pf) DevCon.WriteLn("mVU%d - Mac Flag", mVU.index); + + if (doMFlagInsts && __Mac) + { + if (pf) + DevCon.WriteLn("mVU%d - Mac Flag", mVU.index); int bMac[4]; sortFlag(mFC.xMac, bMac, mFC.cycles); xMOVAPS(xmmT1, ptr128[mVU.macFlag]); @@ -277,8 +336,10 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC) { xMOVAPS(ptr128[mVU.macFlag], xmmT1); } - if (doCFlagInsts && __Clip) { - if (pf) DevCon.WriteLn("mVU%d - Clip Flag", mVU.index); + if (doCFlagInsts && __Clip) + { + if (pf) + DevCon.WriteLn("mVU%d - Clip Flag", mVU.index); int bClip[4]; sortFlag(mFC.xClip, bClip, mFC.cycles); xMOVAPS(xmmT2, ptr128[mVU.clipFlag]); @@ -287,94 +348,142 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC) { } } -#define shortBranch() { \ - if ((branch == 3) || (branch == 4)) { /*Branches*/ \ - _mVUflagPass(mVU, aBranchAddr, sCount+found, found, v); \ - if (branch == 3) break; /*Non-conditional Branch*/ \ - branch = 0; \ - } \ - else if (branch == 5) { /*JR/JARL*/ \ - if(sCount+found<4) { \ - mVUregs.needExactMatch |= 7; \ - } \ - break; \ - } \ - else break; /*E-Bit End*/ \ -} +#define shortBranch() \ + { \ + if ((branch == 3) || (branch == 4)) /*Branches*/ \ + { \ + _mVUflagPass(mVU, aBranchAddr, sCount + found, found, v); \ + if (branch == 3) /*Non-conditional Branch*/ \ + break; \ + branch = 0; \ + } \ + else if (branch == 5) /*JR/JARL*/ \ + { \ + if (sCount + found < 4) \ + mVUregs.needExactMatch |= 7; \ + break; \ + } \ + else /*E-Bit End*/ \ + break; \ + } // Scan through instructions and check if flags are read (FSxxx, FMxxx, FCxxx opcodes) -void _mVUflagPass(mV, u32 startPC, u32 sCount, u32 found, std::vector& v) { +void _mVUflagPass(mV, u32 startPC, u32 sCount, u32 found, std::vector& v) +{ - for (u32 i = 0; i < v.size(); i++) { - if (v[i] == startPC) return; // Prevent infinite recursion + for (u32 i = 0; i < v.size(); i++) + { + if (v[i] == startPC) + return; // Prevent infinite recursion } v.push_back(startPC); - int oldPC = iPC; - int oldBranch = mVUbranch; + int oldPC = iPC; + int oldBranch = mVUbranch; int aBranchAddr = 0; - iPC = startPC / 4; + iPC = startPC / 4; mVUbranch = 0; - for(int branch = 0; sCount < 4; sCount += found) { + for (int branch = 0; sCount < 4; sCount += found) + { mVUregs.needExactMatch &= 7; incPC(1); mVUopU(mVU, 3); - found |= (mVUregs.needExactMatch&8)>>3; + found |= (mVUregs.needExactMatch & 8) >> 3; mVUregs.needExactMatch &= 7; - if ( curI & _Ebit_ ) { branch = 1; } - if ( curI & _Tbit_ ) { branch = 6; } - if ( (curI & _Dbit_) && doDBitHandling ) { branch = 6; } - if (!(curI & _Ibit_) ) { incPC(-1); mVUopL(mVU, 3); incPC(1); } - + if (curI & _Ebit_) + { + branch = 1; + } + if (curI & _Tbit_) + { + branch = 6; + } + if ((curI & _Dbit_) && doDBitHandling) + { + branch = 6; + } + if (!(curI & _Ibit_)) + { + incPC(-1); + mVUopL(mVU, 3); + incPC(1); + } + // if (mVUbranch&&(branch>=3)&&(branch<=5)) { DevCon.Error("Double Branch [%x]", xPC); mVUregs.needExactMatch |= 7; break; } - - if (branch >= 2) { shortBranch(); } - else if (branch == 1) { branch = 2; } - if (mVUbranch) { branch = ((mVUbranch>8)?(5):((mVUbranch<3)?3:4)); incPC(-1); aBranchAddr = branchAddr(mVU); incPC(1); mVUbranch = 0; } + + if (branch >= 2) + { + shortBranch(); + } + else if (branch == 1) + { + branch = 2; + } + if (mVUbranch) + { + branch = ((mVUbranch > 8) ? (5) : ((mVUbranch < 3) ? 3 : 4)); + incPC(-1); + aBranchAddr = branchAddr(mVU); + incPC(1); + mVUbranch = 0; + } incPC(1); - if ((mVUregs.needExactMatch&7)==7) break; + if ((mVUregs.needExactMatch & 7) == 7) + break; } - iPC = oldPC; + iPC = oldPC; mVUbranch = oldBranch; mVUregs.needExactMatch &= 7; setCode(); } -void mVUflagPass(mV, u32 startPC, u32 sCount = 0, u32 found = 0) { +void mVUflagPass(mV, u32 startPC, u32 sCount = 0, u32 found = 0) +{ std::vector v; _mVUflagPass(mVU, startPC, sCount, found, v); } // Checks if the first ~4 instructions of a block will read flags -void mVUsetFlagInfo(mV) { - if (noFlagOpts) { - mVUregs.needExactMatch = 0x7; - mVUregs.flagInfo = 0x0; +void mVUsetFlagInfo(mV) +{ + if (noFlagOpts) + { + mVUregs.needExactMatch = 0x7; + mVUregs.flagInfo = 0x0; return; } - if (mVUbranch <= 2) { // B/BAL + if (mVUbranch <= 2) // B/BAL + { incPC(-1); - mVUflagPass (mVU, branchAddr(mVU)); + mVUflagPass(mVU, branchAddr(mVU)); incPC(1); mVUregs.needExactMatch &= 0x7; } - else if (mVUbranch <= 8) { // Conditional Branch + else if (mVUbranch <= 8) // Conditional Branch + { incPC(-1); // Branch Taken - mVUflagPass (mVU, branchAddr(mVU)); - int backupFlagInfo = mVUregs.needExactMatch; + mVUflagPass(mVU, branchAddr(mVU)); + int backupFlagInfo = mVUregs.needExactMatch; mVUregs.needExactMatch = 0; - + incPC(4); // Branch Not Taken - mVUflagPass (mVU, xPC); + mVUflagPass(mVU, xPC); incPC(-3); mVUregs.needExactMatch |= backupFlagInfo; mVUregs.needExactMatch &= 0x7; } - else { // JR/JALR - if (!doConstProp || !mVUlow.constJump.isValid) { mVUregs.needExactMatch |= 0x7; } - else { mVUflagPass(mVU, (mVUlow.constJump.regValue*8)&(mVU.microMemSize-8)); } + else // JR/JALR + { + if (!doConstProp || !mVUlow.constJump.isValid) + { + mVUregs.needExactMatch |= 0x7; + } + else + { + mVUflagPass(mVU, (mVUlow.constJump.regValue * 8) & (mVU.microMemSize - 8)); + } mVUregs.needExactMatch &= 0x7; } } diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h index 4c396f3f80..033a1db95f 100644 --- a/pcsx2/x86/microVU_IR.h +++ b/pcsx2/x86/microVU_IR.h @@ -15,9 +15,11 @@ #pragma once -union regInfo { +union regInfo +{ u32 reg; - struct { + struct + { u8 x; u8 y; u8 z; @@ -31,219 +33,250 @@ union regInfo { // vi15 is only used if microVU const-prop is enabled (it is *not* by default). When constprop // is disabled the vi15 field acts as additional padding that is required for 16 byte alignment // needed by the xmm compare. -union __aligned16 microRegInfo { - struct { - union { - struct { - u8 needExactMatch; // If set, block needs an exact match of pipeline state - u8 flagInfo; // xC * 2 | xM * 2 | xS * 2 | 0 * 1 | fullFlag Valid * 1 +union __aligned16 microRegInfo +{ + struct + { + union + { + struct + { + u8 needExactMatch; // If set, block needs an exact match of pipeline state + u8 flagInfo; // xC * 2 | xM * 2 | xS * 2 | 0 * 1 | fullFlag Valid * 1 u8 q; u8 p; u8 xgkick; - u8 viBackUp; // VI reg number that was written to on branch-delay slot - u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending) + u8 viBackUp; // VI reg number that was written to on branch-delay slot + u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending) u8 r; }; u32 quick32[2]; }; - u8 vi15v; // 'vi15' constant is valid - u16 vi15; // Constant Prop Info for vi15 + u8 vi15v; // 'vi15' constant is valid + u16 vi15; // Constant Prop Info for vi15 - struct { + struct + { u8 VI[16]; regInfo VF[32]; }; }; - - u128 full128[160/sizeof(u128)]; - u64 full64[160/sizeof(u64)]; - u32 full32[160/sizeof(u32)]; + + u128 full128[160 / sizeof(u128)]; + u64 full64[160 / sizeof(u64)]; + u32 full32[160 / sizeof(u32)]; }; static_assert(sizeof(microRegInfo) == 160, "microRegInfo was not 160 bytes"); struct microProgram; -struct microJumpCache { +struct microJumpCache +{ microJumpCache() : prog(NULL), x86ptrStart(NULL) {} - microProgram* prog; // Program to which the entry point below is part of - void* x86ptrStart; // Start of code (Entry point for block) + microProgram* prog; // Program to which the entry point below is part of + void* x86ptrStart; // Start of code (Entry point for block) }; -struct __aligned16 microBlock { - microRegInfo pState; // Detailed State of Pipeline - microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes) - u8* x86ptrStart; // Start of code (Entry point for block) - microJumpCache* jumpCache; // Will point to an array of entry points of size [16k/8] if block ends in JR/JALR +struct __aligned16 microBlock +{ + microRegInfo pState; // Detailed State of Pipeline + microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes) + u8* x86ptrStart; // Start of code (Entry point for block) + microJumpCache* jumpCache; // Will point to an array of entry points of size [16k/8] if block ends in JR/JALR }; -struct microTempRegInfo { - regInfo VF[2]; // Holds cycle info for Fd, VF[0] = Upper Instruction, VF[1] = Lower Instruction - u8 VFreg[2]; // Index of the VF reg - u8 VI; // Holds cycle info for Id - u8 VIreg; // Index of the VI reg - u8 q; // Holds cycle info for Q reg - u8 p; // Holds cycle info for P reg - u8 r; // Holds cycle info for R reg (Will never cause stalls, but useful to know if R is modified) - u8 xgkick; // Holds the cycle info for XGkick +struct microTempRegInfo +{ + regInfo VF[2]; // Holds cycle info for Fd, VF[0] = Upper Instruction, VF[1] = Lower Instruction + u8 VFreg[2]; // Index of the VF reg + u8 VI; // Holds cycle info for Id + u8 VIreg; // Index of the VI reg + u8 q; // Holds cycle info for Q reg + u8 p; // Holds cycle info for P reg + u8 r; // Holds cycle info for R reg (Will never cause stalls, but useful to know if R is modified) + u8 xgkick; // Holds the cycle info for XGkick }; -struct microVFreg { +struct microVFreg +{ u8 reg; // Reg Index - u8 x; // X vector read/written to? - u8 y; // Y vector read/written to? - u8 z; // Z vector read/written to? - u8 w; // W vector read/written to? + u8 x; // X vector read/written to? + u8 y; // Y vector read/written to? + u8 z; // Z vector read/written to? + u8 w; // W vector read/written to? }; -struct microVIreg { - u8 reg; // Reg Index - u8 used; // Reg is Used? (Read/Written) +struct microVIreg +{ + u8 reg; // Reg Index + u8 used; // Reg is Used? (Read/Written) }; -struct microConstInfo { - u8 isValid; // Is the constant in regValue valid? - u32 regValue; // Constant Value +struct microConstInfo +{ + u8 isValid; // Is the constant in regValue valid? + u32 regValue; // Constant Value }; -struct microUpperOp { - bool eBit; // Has E-bit set - bool iBit; // Has I-bit set - bool mBit; // Has M-bit set - bool tBit; // Has T-bit set - bool dBit; // Has D-bit set - microVFreg VF_write; // VF Vectors written to by this instruction - microVFreg VF_read[2]; // VF Vectors read by this instruction +struct microUpperOp +{ + bool eBit; // Has E-bit set + bool iBit; // Has I-bit set + bool mBit; // Has M-bit set + bool tBit; // Has T-bit set + bool dBit; // Has D-bit set + microVFreg VF_write; // VF Vectors written to by this instruction + microVFreg VF_read[2]; // VF Vectors read by this instruction }; -struct microLowerOp { - microVFreg VF_write; // VF Vectors written to by this instruction - microVFreg VF_read[2]; // VF Vectors read by this instruction - microVIreg VI_write; // VI reg written to by this instruction - microVIreg VI_read[2]; // VI regs read by this instruction +struct microLowerOp +{ + microVFreg VF_write; // VF Vectors written to by this instruction + microVFreg VF_read[2]; // VF Vectors read by this instruction + microVIreg VI_write; // VI reg written to by this instruction + microVIreg VI_read[2]; // VI regs read by this instruction microConstInfo constJump; // Constant Reg Info for JR/JARL instructions - u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JR, 10 = JALR) - bool badBranch; // This instruction is a Branch who has another branch in its Delay Slot - bool evilBranch;// This instruction is a Branch in a Branch Delay Slot (Instruction after badBranch) - bool isNOP; // This instruction is a NOP - bool isFSSET; // This instruction is a FSSET - bool noWriteVF; // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0) - bool backupVI; // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR) - bool memReadIs; // Read Is (VI reg) from memory (used by branches) - bool memReadIt; // Read If (VI reg) from memory (used by branches) - bool readFlags; // Current Instruction reads Status, Mac, or Clip flags + u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JR, 10 = JALR) + bool badBranch; // This instruction is a Branch who has another branch in its Delay Slot + bool evilBranch; // This instruction is a Branch in a Branch Delay Slot (Instruction after badBranch) + bool isNOP; // This instruction is a NOP + bool isFSSET; // This instruction is a FSSET + bool noWriteVF; // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0) + bool backupVI; // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR) + bool memReadIs; // Read Is (VI reg) from memory (used by branches) + bool memReadIt; // Read If (VI reg) from memory (used by branches) + bool readFlags; // Current Instruction reads Status, Mac, or Clip flags }; -struct microFlagInst { - bool doFlag; // Update Flag on this Instruction +struct microFlagInst +{ + bool doFlag; // Update Flag on this Instruction bool doNonSticky; // Update O,U,S,Z (non-sticky) bits on this Instruction (status flag only) - u8 write; // Points to the instance that should be written to (s-stage write) - u8 lastWrite; // Points to the instance that was last written to (most up-to-date flag) - u8 read; // Points to the instance that should be read by a lower instruction (t-stage read) + u8 write; // Points to the instance that should be written to (s-stage write) + u8 lastWrite; // Points to the instance that was last written to (most up-to-date flag) + u8 read; // Points to the instance that should be read by a lower instruction (t-stage read) }; -struct microFlagCycles { +struct microFlagCycles +{ int xStatus[4]; int xMac[4]; int xClip[4]; int cycles; }; -struct microOp { - u8 stall; // Info on how much current instruction stalled - bool isBadOp; // Cur Instruction is a bad opcode (not a legal instruction) - bool isEOB; // Cur Instruction is last instruction in block (End of Block) - bool isBdelay; // Cur Instruction in Branch Delay slot - bool swapOps; // Run Lower Instruction before Upper Instruction - bool backupVF; // Backup mVUlow.VF_write.reg, and restore it before the Upper Instruction is called - bool doXGKICK; // Do XGKICK transfer on this instruction +struct microOp +{ + u8 stall; // Info on how much current instruction stalled + bool isBadOp; // Cur Instruction is a bad opcode (not a legal instruction) + bool isEOB; // Cur Instruction is last instruction in block (End of Block) + bool isBdelay; // Cur Instruction in Branch Delay slot + bool swapOps; // Run Lower Instruction before Upper Instruction + bool backupVF; // Backup mVUlow.VF_write.reg, and restore it before the Upper Instruction is called + bool doXGKICK; // Do XGKICK transfer on this instruction u32 XGKICKPC; // The PC in which the XGKick has taken place, so if we break early (before it) we don run it. - bool doDivFlag; // Transfer Div flag to Status Flag on this instruction - int readQ; // Q instance for reading - int writeQ; // Q instance for writing - int readP; // P instance for reading - int writeP; // P instance for writing + bool doDivFlag; // Transfer Div flag to Status Flag on this instruction + int readQ; // Q instance for reading + int writeQ; // Q instance for writing + int readP; // P instance for reading + int writeP; // P instance for writing microFlagInst sFlag; // Status Flag Instance Info - microFlagInst mFlag; // Mac Flag Instance Info + microFlagInst mFlag; // Mac Flag Instance Info microFlagInst cFlag; // Clip Flag Instance Info - microUpperOp uOp; // Upper Op Info - microLowerOp lOp; // Lower Op Info + microUpperOp uOp; // Upper Op Info + microLowerOp lOp; // Lower Op Info }; -template -struct microIR { - microBlock block; // Block/Pipeline info - microBlock* pBlock; // Pointer to a block in mVUblocks - microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle) - microOp info[pSize/2]; // Info for Instructions in current block - microConstInfo constReg[16]; // Simple Const Propagation Info for VI regs within blocks +template +struct microIR +{ + microBlock block; // Block/Pipeline info + microBlock* pBlock; // Pointer to a block in mVUblocks + microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle) + microOp info[pSize / 2]; // Info for Instructions in current block + microConstInfo constReg[16]; // Simple Const Propagation Info for VI regs within blocks u8 branch; - u32 cycles; // Cycles for current block - u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block) - u32 curPC; // Current PC - u32 startPC; // Start PC for Cur Block - u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags + u32 cycles; // Cycles for current block + u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block) + u32 curPC; // Current PC + u32 startPC; // Start PC for Cur Block + u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags }; //------------------------------------------------------------------ // Reg Alloc //------------------------------------------------------------------ -struct microMapXMM { - int VFreg; // VF Reg Number Stored (-1 = Temp; 0 = vf0 and will not be written back; 32 = ACC; 33 = I reg) - int xyzw; // xyzw to write back (0 = Don't write back anything AND cached vfReg has all vectors valid) - int count; // Count of when last used - bool isNeeded; // Is needed for current instruction +struct microMapXMM +{ + int VFreg; // VF Reg Number Stored (-1 = Temp; 0 = vf0 and will not be written back; 32 = ACC; 33 = I reg) + int xyzw; // xyzw to write back (0 = Don't write back anything AND cached vfReg has all vectors valid) + int count; // Count of when last used + bool isNeeded; // Is needed for current instruction }; -class microRegAlloc { +class microRegAlloc +{ protected: - static const int xmmTotal = 7; // Don't allocate PQ? - microMapXMM xmmMap[xmmTotal]; - int counter; // Current allocation count - int index; // VU0 or VU1 + static const int xmmTotal = 7; // Don't allocate PQ? + microMapXMM xmmMap[xmmTotal]; + int counter; // Current allocation count + int index; // VU0 or VU1 // Helper functions to get VU regs - VURegs& regs() const { return ::vuRegs[index]; } - __fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; } - __fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; } + VURegs& regs() const { return ::vuRegs[index]; } + __fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; } + __fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; } - __ri void loadIreg(const xmm& reg, int xyzw) { + __ri void loadIreg(const xmm& reg, int xyzw) + { xMOVSSZX(reg, ptr32[&getVI(REG_I)]); - if (!_XYZWss(xyzw)) xSHUF.PS(reg, reg, 0); + if (!_XYZWss(xyzw)) + xSHUF.PS(reg, reg, 0); } - - int findFreeRegRec(int startIdx) { - for(int i = startIdx; i < xmmTotal; i++) { - if (!xmmMap[i].isNeeded) { - int x = findFreeRegRec(i+1); - if (x == -1) return i; + + int findFreeRegRec(int startIdx) + { + for (int i = startIdx; i < xmmTotal; i++) + { + if (!xmmMap[i].isNeeded) + { + int x = findFreeRegRec(i + 1); + if (x == -1) + return i; return ((xmmMap[i].count < xmmMap[x].count) ? i : x); } } return -1; } - int findFreeReg() { - for(int i = 0; i < xmmTotal; i++) { - if (!xmmMap[i].isNeeded && (xmmMap[i].VFreg < 0)) { + int findFreeReg() + { + for (int i = 0; i < xmmTotal; i++) + { + if (!xmmMap[i].isNeeded && (xmmMap[i].VFreg < 0)) + { return i; // Reg is not needed and was a temp reg } } int x = findFreeRegRec(0); - pxAssertDev( x >= 0, "microVU register allocation failure!" ); + pxAssertDev(x >= 0, "microVU register allocation failure!"); return x; } public: - microRegAlloc(int _index) { + microRegAlloc(int _index) + { index = _index; reset(); } // Fully resets the regalloc by clearing all cached data - void reset() { - for(int i = 0; i < xmmTotal; i++) { + void reset() + { + for (int i = 0; i < xmmTotal; i++) + { clearReg(i); } counter = 0; @@ -252,19 +285,24 @@ public: // Flushes all allocated registers (i.e. writes-back to memory all modified registers). // If clearState is 0, then it keeps cached reg data valid // If clearState is 1, then it invalidates all cached reg data after write-back - void flushAll(bool clearState = true) { - for(int i = 0; i < xmmTotal; i++) { + void flushAll(bool clearState = true) + { + for (int i = 0; i < xmmTotal; i++) + { writeBackReg(xmm(i)); if (clearState) clearReg(i); } } - void TDwritebackAll(bool clearState = false) { - for(int i = 0; i < xmmTotal; i++) { + void TDwritebackAll(bool clearState = false) + { + for (int i = 0; i < xmmTotal; i++) + { microMapXMM& mapX = xmmMap[xmm(i).Id]; - if ((mapX.VFreg > 0) && mapX.xyzw) { // Reg was modified and not Temp or vf0 + if ((mapX.VFreg > 0) && mapX.xyzw) // Reg was modified and not Temp or vf0 + { if (mapX.VFreg == 33) xMOVSS(ptr32[&getVI(REG_I)], xmm(i)); else if (mapX.VFreg == 32) @@ -276,27 +314,33 @@ public: } void clearReg(const xmm& reg) { clearReg(reg.Id); } - void clearReg(int regId) { + void clearReg(int regId) + { microMapXMM& clear = xmmMap[regId]; - clear.VFreg = -1; - clear.count = 0; - clear.xyzw = 0; - clear.isNeeded = 0; + clear.VFreg = -1; + clear.count = 0; + clear.xyzw = 0; + clear.isNeeded = 0; } - void clearRegVF(int VFreg) { - for(int i = 0; i < xmmTotal; i++) { - if (xmmMap[i].VFreg == VFreg) clearReg(i); + void clearRegVF(int VFreg) + { + for (int i = 0; i < xmmTotal; i++) + { + if (xmmMap[i].VFreg == VFreg) + clearReg(i); } } // Writes back modified reg to memory. // If all vectors modified, then keeps the VF reg cached in the xmm register. // If reg was not modified, then keeps the VF reg cached in the xmm register. - void writeBackReg(const xmm& reg, bool invalidateRegs = true) { + void writeBackReg(const xmm& reg, bool invalidateRegs = true) + { microMapXMM& mapX = xmmMap[reg.Id]; - if ((mapX.VFreg > 0) && mapX.xyzw) { // Reg was modified and not Temp or vf0 + if ((mapX.VFreg > 0) && mapX.xyzw) // Reg was modified and not Temp or vf0 + { if (mapX.VFreg == 33) xMOVSS(ptr32[&getVI(REG_I)], reg); else if (mapX.VFreg == 32) @@ -304,20 +348,25 @@ public: else mVUsaveReg(reg, ptr[&getVF(mapX.VFreg)], mapX.xyzw, true); - if (invalidateRegs) { - for(int i = 0; i < xmmTotal; i++) { + if (invalidateRegs) + { + for (int i = 0; i < xmmTotal; i++) + { microMapXMM& mapI = xmmMap[i]; if ((i == reg.Id) || mapI.isNeeded) continue; - if (mapI.VFreg == mapX.VFreg) { - if (mapI.xyzw && mapI.xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", mapI.VFreg); + if (mapI.VFreg == mapX.VFreg) + { + if (mapI.xyzw && mapI.xyzw < 0xf) + DevCon.Error("microVU Error: writeBackReg() [%d]", mapI.VFreg); clearReg(i); // Invalidate any Cached Regs of same vf Reg } } } - if (mapX.xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified + if (mapX.xyzw == 0xf) // Make Cached Reg if All Vectors were Modified + { mapX.count = counter; mapX.xyzw = 0; mapX.isNeeded = false; @@ -325,7 +374,8 @@ public: } clearReg(reg); } - else if (mapX.xyzw) { // Clear reg if modified and is VF0 or temp reg... + else if (mapX.xyzw) // Clear reg if modified and is VF0 or temp reg... + { clearReg(reg); } } @@ -335,64 +385,82 @@ public: // This is to guarantee proper merging between registers... When a written-to reg is cleared, // it invalidates other cached registers of the same VF reg, and merges partial-vector // writes into them. - void clearNeeded(const xmm& reg) { + void clearNeeded(const xmm& reg) + { - if ((reg.Id < 0) || (reg.Id >= xmmTotal)) return; // Sometimes xmmPQ hits this + if ((reg.Id < 0) || (reg.Id >= xmmTotal)) // Sometimes xmmPQ hits this + return; microMapXMM& clear = xmmMap[reg.Id]; clear.isNeeded = false; - if (clear.xyzw) { // Reg was modified - if (clear.VFreg > 0) { + if (clear.xyzw) // Reg was modified + { + if (clear.VFreg > 0) + { int mergeRegs = 0; - if (clear.xyzw < 0xf) mergeRegs = 1; // Try to merge partial writes - for(int i = 0; i < xmmTotal; i++) { // Invalidate any other read-only regs of same vfReg - if (i == reg.Id) continue; + if (clear.xyzw < 0xf) // Try to merge partial writes + mergeRegs = 1; + for (int i = 0; i < xmmTotal; i++) // Invalidate any other read-only regs of same vfReg + { + if (i == reg.Id) + continue; microMapXMM& mapI = xmmMap[i]; - if (mapI.VFreg == clear.VFreg) { - if (mapI.xyzw && mapI.xyzw < 0xf) { + if (mapI.VFreg == clear.VFreg) + { + if (mapI.xyzw && mapI.xyzw < 0xf) + { DevCon.Error("microVU Error: clearNeeded() [%d]", mapI.VFreg); } - if (mergeRegs == 1) { + if (mergeRegs == 1) + { mVUmergeRegs(xmm(i), reg, clear.xyzw, true); mapI.xyzw = 0xf; mapI.count = counter; mergeRegs = 2; } - else clearReg(i); // Clears when mergeRegs is 0 or 2 + else + clearReg(i); // Clears when mergeRegs is 0 or 2 } } - if (mergeRegs == 2) // Clear Current Reg if Merged + if (mergeRegs == 2) // Clear Current Reg if Merged clearReg(reg); else if (mergeRegs == 1) // Write Back Partial Writes if couldn't merge writeBackReg(reg); } - else clearReg(reg); // If Reg was temp or vf0, then invalidate itself + else + clearReg(reg); // If Reg was temp or vf0, then invalidate itself } } - + // vfLoadReg = VF reg to be loaded to the xmm register // vfWriteReg = VF reg that the returned xmm register will be considered as // xyzw = XYZW vectors that will be modified (and loaded) - // cloneWrite = When loading a reg that will be written to, - // it copies it to its own xmm reg instead of overwriting the cached one... + // cloneWrite = When loading a reg that will be written to, it copies it to its own xmm reg instead of overwriting the cached one... // Notes: // To load a temp reg use the default param values, vfLoadReg = -1 and vfWriteReg = -1. // To load a full reg which won't be modified and you want cached, specify vfLoadReg >= 0 and vfWriteReg = -1 // To load a reg which you don't want written back or cached, specify vfLoadReg >= 0 and vfWriteReg = 0 - const xmm& allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1) { + const xmm& allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1) + { //DevCon.WriteLn("vfLoadReg = %02d, vfWriteReg = %02d, xyzw = %x, clone = %d",vfLoadReg,vfWriteReg,xyzw,(int)cloneWrite); counter++; - if (vfLoadReg >= 0) { // Search For Cached Regs - for(int i = 0; i < xmmTotal; i++) { - const xmm& xmmI = xmm::GetInstance(i); + if (vfLoadReg >= 0) // Search For Cached Regs + { + for (int i = 0; i < xmmTotal; i++) + { + const xmm& xmmI = xmm::GetInstance(i); microMapXMM& mapI = xmmMap[i]; - if ((mapI.VFreg == vfLoadReg) && (!mapI.xyzw // Reg Was Not Modified - || (mapI.VFreg && (mapI.xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0 + if ((mapI.VFreg == vfLoadReg) + && (!mapI.xyzw // Reg Was Not Modified + || (mapI.VFreg && (mapI.xyzw == 0xf)))) // Reg Had All Vectors Modified and != VF0 + { int z = i; - if (vfWriteReg >= 0) { // Reg will be modified - if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg + if (vfWriteReg >= 0) // Reg will be modified + { + if (cloneWrite) // Clone Reg so as not to use the same Cached Reg + { z = findFreeReg(); - const xmm& xmmZ = xmm::GetInstance(z); + const xmm& xmmZ = xmm::GetInstance(z); writeBackReg(xmmZ); if (xyzw == 4) @@ -402,12 +470,13 @@ public: else if (xyzw == 1) xPSHUF.D(xmmZ, xmmI, 3); else if (z != i) - xMOVAPS (xmmZ, xmmI); + xMOVAPS(xmmZ, xmmI); mapI.count = counter; // Reg i was used, so update counter } - else { // Don't clone reg, but shuffle to adjust for SS ops - if ((vfLoadReg!=vfWriteReg)||(xyzw!=0xf)) + else // Don't clone reg, but shuffle to adjust for SS ops + { + if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf)) writeBackReg(xmmI); if (xyzw == 4) @@ -418,19 +487,20 @@ public: xPSHUF.D(xmmI, xmmI, 3); } xmmMap[z].VFreg = vfWriteReg; - xmmMap[z].xyzw = xyzw; + xmmMap[z].xyzw = xyzw; } - xmmMap[z].count = counter; + xmmMap[z].count = counter; xmmMap[z].isNeeded = true; return xmm::GetInstance(z); } } } int x = findFreeReg(); - const xmm& xmmX = xmm::GetInstance(x); + const xmm& xmmX = xmm::GetInstance(x); writeBackReg(xmmX); - if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading) + if (vfWriteReg >= 0) // Reg Will Be Modified (allow partial reg loading) + { if ((vfLoadReg == 0) && !(xyzw & 1)) xPXOR(xmmX, xmmX); else if (vfLoadReg == 33) @@ -443,12 +513,13 @@ public: xmmMap[x].VFreg = vfWriteReg; xmmMap[x].xyzw = xyzw; } - else { // Reg Will Not Be Modified (always load full reg for caching) + else // Reg Will Not Be Modified (always load full reg for caching) + { if (vfLoadReg == 33) loadIreg(xmmX, 0xf); else if (vfLoadReg == 32) xMOVAPS (xmmX, ptr128[®s().ACC]); - else if (vfLoadReg >= 0) + else if (vfLoadReg >= 0) xMOVAPS (xmmX, ptr128[&getVF(vfLoadReg)]); xmmMap[x].VFreg = vfLoadReg; diff --git a/pcsx2/x86/microVU_Log.inl b/pcsx2/x86/microVU_Log.inl index 9f535bab1f..c692926d38 100644 --- a/pcsx2/x86/microVU_Log.inl +++ b/pcsx2/x86/microVU_Log.inl @@ -12,16 +12,18 @@ * You should have received a copy of the GNU General Public License along with PCSX2. * If not, see . */ - + #pragma once #include "Utilities/AsciiFile.h" // writes text directly to mVU.logFile, no newlines appended. -_mVUt void __mVULog(const char* fmt, ...) { +_mVUt void __mVULog(const char* fmt, ...) +{ microVU& mVU = mVUx; - if (!mVU.logFile) return; + if (!mVU.logFile) + return; char tmp[2024]; va_list list; @@ -31,21 +33,29 @@ _mVUt void __mVULog(const char* fmt, ...) { vsprintf(tmp, fmt, list); va_end(list); - mVU.logFile->Write( tmp ); + mVU.logFile->Write(tmp); mVU.logFile->Flush(); } -#define commaIf() { if (bitX[6]) { mVUlog(","); bitX[6] = false; } } +#define commaIf() \ + { \ + if (bitX[6]) \ + { \ + mVUlog(","); \ + bitX[6] = false; \ + } \ + } #include "AppConfig.h" -void __mVUdumpProgram(microVU& mVU, microProgram& prog) { +void __mVUdumpProgram(microVU& mVU, microProgram& prog) +{ bool bitX[7]; int delay = 0; int bBranch = mVUbranch; - int bCode = mVU.code; - int bPC = iPC; - mVUbranch = 0; + int bCode = mVU.code; + int bPC = iPC; + mVUbranch = 0; const wxString logname(wxsFormat(L"microVU%d prog - %02d.html", mVU.index, prog.idx)); mVU.logFile = std::unique_ptr(new AsciiFile(Path::Combine(g_Conf->Folders.Logs, logname), L"w")); @@ -56,16 +66,27 @@ void __mVUdumpProgram(microVU& mVU, microProgram& prog) { mVUlog("\n"); mVUlog(""); - mVUlog("*********************\n
", prog.idx); - mVUlog("* Micro-Program #%02d *\n
", prog.idx); - mVUlog("*********************\n\n

", prog.idx); + mVUlog("*********************\n
", prog.idx); + mVUlog("* Micro-Program #%02d *\n
", prog.idx); + mVUlog("*********************\n\n

", prog.idx); mVUlog("
"); - for (u32 i = 0; i < mVU.progSize; i+=2) { + for (u32 i = 0; i < mVU.progSize; i += 2) + { - if (delay) { delay--; mVUlog("
"); if (!delay) mVUlog("
"); } - if (mVUbranch) { delay = 1; mVUbranch = 0; } - mVU.code = prog.data[i+1]; + if (delay) + { + delay--; + mVUlog(""); + if (!delay) + mVUlog("
"); + } + if (mVUbranch) + { + delay = 1; + mVUbranch = 0; + } + mVU.code = prog.data[i + 1]; bitX[0] = false; bitX[1] = false; @@ -84,14 +105,15 @@ void __mVUdumpProgram(microVU& mVU, microProgram& prog) { if (delay == 2) { mVUlog(""); } if (delay == 1) { mVUlog(""); } - iPC = (i+1); - mVUlog("", i*4); - mVUlog("[%04x] (%08x) ", i*4, mVU.code); + iPC = (i + 1); + mVUlog("", i * 4); + mVUlog("[%04x] (%08x) ", i * 4, mVU.code); mVUopU(mVU, 2); - if (bitX[5]) { + if (bitX[5]) + { mVUlog(" ("); - if (bitX[0]) { mVUlog("I"); bitX[6] = true; } + if (bitX[0]) { mVUlog("I"); bitX[6] = true; } if (bitX[1]) { commaIf(); mVUlog("E"); bitX[6] = true; } if (bitX[2]) { commaIf(); mVUlog("M"); bitX[6] = true; } if (bitX[3]) { commaIf(); mVUlog("D"); bitX[6] = true; } @@ -99,20 +121,23 @@ void __mVUdumpProgram(microVU& mVU, microProgram& prog) { mVUlog(")"); } - if (mVUstall) { + if (mVUstall) + { mVUlog(" Stall %d Cycles", mVUstall); } iPC = i; mVU.code = prog.data[i]; - if(bitX[0]) { + if (bitX[0]) + { mVUlog("
\n"); - mVUlog("[%04x] (%08x) %f", i*4, mVU.code, *(float*)&mVU.code); + mVUlog("[%04x] (%08x) %f", i * 4, mVU.code, *(float*)&mVU.code); mVUlog("\n\n

"); } - else { - mVUlog("
\n[%04x] (%08x) ", i*4, mVU.code); + else + { + mVUlog("
\n[%04x] (%08x) ", i * 4, mVU.code); mVUopL(mVU, 2); mVUlog("\n\n

"); } @@ -123,9 +148,8 @@ void __mVUdumpProgram(microVU& mVU, microProgram& prog) { mVUbranch = bBranch; mVU.code = bCode; - iPC = bPC; + iPC = bPC; setCode(); mVU.logFile.reset(nullptr); } - diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 26006532e9..f1b39e6ce6 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -42,12 +42,14 @@ static __fi void testNeg(mV, const xmm& xmmReg, const x32& gprTemp) skip.SetTarget(); } -mVUop(mVU_DIV) { +mVUop(mVU_DIV) +{ pass1 { mVUanalyzeFDIV(mVU, _Fs_, _Fsf_, _Ft_, _Ftf_, 7); } - pass2 { + pass2 + { xmm Ft; if (_Ftf_) Ft = mVU.regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_))); - else Ft = mVU.regAlloc->allocReg(_Ft_); + else Ft = mVU.regAlloc->allocReg(_Ft_); const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); const xmm& t1 = mVU.regAlloc->allocReg(); @@ -89,15 +91,18 @@ mVUop(mVU_DIV) { pass3 { mVUlog("DIV Q, vf%02d%s, vf%02d%s", _Fs_, _Fsf_String, _Ft_, _Ftf_String); } } -mVUop(mVU_SQRT) { +mVUop(mVU_SQRT) +{ pass1 { mVUanalyzeFDIV(mVU, 0, 0, _Ft_, _Ftf_, 7); } - pass2 { + pass2 + { const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_))); xMOV(ptr32[&mVU.divFlag], 0); // Clear I/D flags testNeg(mVU, Ft, gprT1); // Check for negative sqrt - if (CHECK_VU_OVERFLOW) xMIN.SS(Ft, ptr32[mVUglob.maxvals]); // Clamp infinities (only need to do positive clamp since xmmFt is positive) + if (CHECK_VU_OVERFLOW) // Clamp infinities (only need to do positive clamp since xmmFt is positive) + xMIN.SS(Ft, ptr32[mVUglob.maxvals]); xSQRT.SS(Ft, Ft); writeQreg(Ft, mVUinfo.writeQ); @@ -113,9 +118,11 @@ mVUop(mVU_SQRT) { pass3 { mVUlog("SQRT Q, vf%02d%s", _Ft_, _Ftf_String); } } -mVUop(mVU_RSQRT) { +mVUop(mVU_RSQRT) +{ pass1 { mVUanalyzeFDIV(mVU, _Fs_, _Fsf_, _Ft_, _Ftf_, 13); } - pass2 { + pass2 + { const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_))); const xmm& t1 = mVU.regAlloc->allocReg(); @@ -136,7 +143,7 @@ mVUop(mVU_RSQRT) { cjmp.SetTarget(); xAND.PS(Fs, ptr128[mVUglob.signbit]); - xOR.PS (Fs, ptr128[mVUglob.maxvals]); // xmmFs = +/-Max + xOR.PS(Fs, ptr128[mVUglob.maxvals]); // xmmFs = +/-Max xForwardJump8 djmp; ajmp.SetTarget(); @@ -164,16 +171,18 @@ mVUop(mVU_RSQRT) { // EATAN/EEXP/ELENG/ERCPR/ERLENG/ERSADD/ERSQRT/ESADD/ESIN/ESQRT/ESUM //------------------------------------------------------------------ -#define EATANhelper(addr) { \ - SSE_MULSS(mVU, t2, Fs); \ - SSE_MULSS(mVU, t2, Fs); \ - xMOVAPS (t1, t2); \ - xMUL.SS (t1, ptr32[addr]); \ - SSE_ADDSS(mVU, PQ, t1); \ -} +#define EATANhelper(addr) \ + { \ + SSE_MULSS(mVU, t2, Fs); \ + SSE_MULSS(mVU, t2, Fs); \ + xMOVAPS(t1, t2); \ + xMUL.SS(t1, ptr32[addr]); \ + SSE_ADDSS(mVU, PQ, t1); \ + } // ToDo: Can Be Optimized Further? (takes approximately (~115 cycles + mem access time) on a c2d) -static __fi void mVU_EATAN_(mV, const xmm& PQ, const xmm& Fs, const xmm& t1, const xmm& t2) { +static __fi void mVU_EATAN_(mV, const xmm& PQ, const xmm& Fs, const xmm& t1, const xmm& t2) +{ xMOVSS(PQ, Fs); xMUL.SS(PQ, ptr32[mVUglob.T1]); xMOVAPS(t2, Fs); @@ -188,15 +197,17 @@ static __fi void mVU_EATAN_(mV, const xmm& PQ, const xmm& Fs, const xmm& t1, con xPSHUF.D(PQ, PQ, mVUinfo.writeP ? 0x27 : 0xC6); } -mVUop(mVU_EATAN) { +mVUop(mVU_EATAN) +{ pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 54); } - pass2 { + pass2 + { const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); const xmm& t1 = mVU.regAlloc->allocReg(); const xmm& t2 = mVU.regAlloc->allocReg(); xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance xMOVSS (xmmPQ, Fs); - xSUB.SS(Fs, ptr32[mVUglob.one]); + xSUB.SS(Fs, ptr32[mVUglob.one]); xADD.SS(xmmPQ, ptr32[mVUglob.one]); SSE_DIVSS(mVU, Fs, xmmPQ); mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2); @@ -208,9 +219,11 @@ mVUop(mVU_EATAN) { pass3 { mVUlog("EATAN P"); } } -mVUop(mVU_EATANxy) { +mVUop(mVU_EATANxy) +{ pass1 { mVUanalyzeEFU2(mVU, _Fs_, 54); } - pass2 { + pass2 + { const xmm& t1 = mVU.regAlloc->allocReg(_Fs_, 0, 0xf); const xmm& Fs = mVU.regAlloc->allocReg(); const xmm& t2 = mVU.regAlloc->allocReg(); @@ -229,9 +242,11 @@ mVUop(mVU_EATANxy) { pass3 { mVUlog("EATANxy P"); } } -mVUop(mVU_EATANxz) { +mVUop(mVU_EATANxz) +{ pass1 { mVUanalyzeEFU2(mVU, _Fs_, 54); } - pass2 { + pass2 + { const xmm& t1 = mVU.regAlloc->allocReg(_Fs_, 0, 0xf); const xmm& Fs = mVU.regAlloc->allocReg(); const xmm& t2 = mVU.regAlloc->allocReg(); @@ -250,16 +265,19 @@ mVUop(mVU_EATANxz) { pass3 { mVUlog("EATANxz P"); } } -#define eexpHelper(addr) { \ - SSE_MULSS(mVU, t2, Fs); \ - xMOVAPS (t1, t2); \ - xMUL.SS (t1, ptr32[addr]); \ - SSE_ADDSS(mVU, xmmPQ, t1); \ -} +#define eexpHelper(addr) \ + { \ + SSE_MULSS(mVU, t2, Fs); \ + xMOVAPS(t1, t2); \ + xMUL.SS(t1, ptr32[addr]); \ + SSE_ADDSS(mVU, xmmPQ, t1); \ + } -mVUop(mVU_EEXP) { +mVUop(mVU_EEXP) +{ pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 44); } - pass2 { + pass2 + { const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); const xmm& t1 = mVU.regAlloc->allocReg(); const xmm& t2 = mVU.regAlloc->allocReg(); @@ -267,22 +285,22 @@ mVUop(mVU_EEXP) { xMOVSS (xmmPQ, Fs); xMUL.SS (xmmPQ, ptr32[mVUglob.E1]); xADD.SS (xmmPQ, ptr32[mVUglob.one]); - xMOVAPS (t1, Fs); + xMOVAPS(t1, Fs); SSE_MULSS(mVU, t1, Fs); - xMOVAPS (t2, t1); - xMUL.SS (t1, ptr32[mVUglob.E2]); + xMOVAPS(t2, t1); + xMUL.SS(t1, ptr32[mVUglob.E2]); SSE_ADDSS(mVU, xmmPQ, t1); eexpHelper(&mVUglob.E3); eexpHelper(&mVUglob.E4); eexpHelper(&mVUglob.E5); SSE_MULSS(mVU, t2, Fs); - xMUL.SS (t2, ptr32[mVUglob.E6]); + xMUL.SS(t2, ptr32[mVUglob.E6]); SSE_ADDSS(mVU, xmmPQ, t2); SSE_MULSS(mVU, xmmPQ, xmmPQ); SSE_MULSS(mVU, xmmPQ, xmmPQ); - xMOVSSZX (t2, ptr32[mVUglob.one]); + xMOVSSZX(t2, ptr32[mVUglob.one]); SSE_DIVSS(mVU, t2, xmmPQ); - xMOVSS (xmmPQ, t2); + xMOVSS(xmmPQ, t2); xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU.regAlloc->clearNeeded(Fs); mVU.regAlloc->clearNeeded(t1); @@ -293,94 +311,107 @@ mVUop(mVU_EEXP) { } // sumXYZ(): PQ.x = x ^ 2 + y ^ 2 + z ^ 2 -static __fi void mVU_sumXYZ(mV, const xmm& PQ, const xmm& Fs) { +static __fi void mVU_sumXYZ(mV, const xmm& PQ, const xmm& Fs) +{ xDP.PS(Fs, Fs, 0x71); xMOVSS(PQ, Fs); } -mVUop(mVU_ELENG) { +mVUop(mVU_ELENG) +{ pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); } - pass2 { + pass2 + { const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); - xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance mVU_sumXYZ(mVU, xmmPQ, Fs); - xSQRT.SS (xmmPQ, xmmPQ); - xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back + xSQRT.SS (xmmPQ, xmmPQ); + xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU.regAlloc->clearNeeded(Fs); mVU.profiler.EmitOp(opELENG); } pass3 { mVUlog("ELENG P"); } } -mVUop(mVU_ERCPR) { +mVUop(mVU_ERCPR) +{ pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); } - pass2 { + pass2 + { const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); - xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance - xMOVSS (xmmPQ, Fs); - xMOVSSZX (Fs, ptr32[mVUglob.one]); + xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + xMOVSS (xmmPQ, Fs); + xMOVSSZX (Fs, ptr32[mVUglob.one]); SSE_DIVSS(mVU, Fs, xmmPQ); - xMOVSS (xmmPQ, Fs); - xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back + xMOVSS (xmmPQ, Fs); + xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU.regAlloc->clearNeeded(Fs); mVU.profiler.EmitOp(opERCPR); } pass3 { mVUlog("ERCPR P"); } } -mVUop(mVU_ERLENG) { +mVUop(mVU_ERLENG) +{ pass1 { mVUanalyzeEFU2(mVU, _Fs_, 24); } - pass2 { + pass2 + { const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); - xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance mVU_sumXYZ(mVU, xmmPQ, Fs); - xSQRT.SS (xmmPQ, xmmPQ); - xMOVSSZX (Fs, ptr32[mVUglob.one]); + xSQRT.SS (xmmPQ, xmmPQ); + xMOVSSZX (Fs, ptr32[mVUglob.one]); SSE_DIVSS (mVU, Fs, xmmPQ); - xMOVSS (xmmPQ, Fs); - xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back + xMOVSS (xmmPQ, Fs); + xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU.regAlloc->clearNeeded(Fs); mVU.profiler.EmitOp(opERLENG); } pass3 { mVUlog("ERLENG P"); } } -mVUop(mVU_ERSADD) { +mVUop(mVU_ERSADD) +{ pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); } - pass2 { + pass2 + { const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); - xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance mVU_sumXYZ(mVU, xmmPQ, Fs); - xMOVSSZX (Fs, ptr32[mVUglob.one]); + xMOVSSZX (Fs, ptr32[mVUglob.one]); SSE_DIVSS (mVU, Fs, xmmPQ); - xMOVSS (xmmPQ, Fs); - xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back + xMOVSS (xmmPQ, Fs); + xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU.regAlloc->clearNeeded(Fs); mVU.profiler.EmitOp(opERSADD); } pass3 { mVUlog("ERSADD P"); } } -mVUop(mVU_ERSQRT) { +mVUop(mVU_ERSQRT) +{ pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 18); } - pass2 { + pass2 + { const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); - xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance - xAND.PS (Fs, ptr128[mVUglob.absclip]); - xSQRT.SS (xmmPQ, Fs); - xMOVSSZX (Fs, ptr32[mVUglob.one]); + xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + xAND.PS (Fs, ptr128[mVUglob.absclip]); + xSQRT.SS (xmmPQ, Fs); + xMOVSSZX (Fs, ptr32[mVUglob.one]); SSE_DIVSS(mVU, Fs, xmmPQ); - xMOVSS (xmmPQ, Fs); - xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back + xMOVSS (xmmPQ, Fs); + xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU.regAlloc->clearNeeded(Fs); mVU.profiler.EmitOp(opERSQRT); } pass3 { mVUlog("ERSQRT P"); } } -mVUop(mVU_ESADD) { +mVUop(mVU_ESADD) +{ pass1 { mVUanalyzeEFU2(mVU, _Fs_, 11); } - pass2 { + pass2 + { const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance mVU_sumXYZ(mVU, xmmPQ, Fs); @@ -391,35 +422,37 @@ mVUop(mVU_ESADD) { pass3 { mVUlog("ESADD P"); } } -mVUop(mVU_ESIN) { +mVUop(mVU_ESIN) +{ pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 29); } - pass2 { + pass2 + { const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); const xmm& t1 = mVU.regAlloc->allocReg(); const xmm& t2 = mVU.regAlloc->allocReg(); - xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance - xMOVSS (xmmPQ, Fs); // pq = X - SSE_MULSS(mVU, Fs, Fs); // fs = X^2 - xMOVAPS (t1, Fs); // t1 = X^2 + xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + xMOVSS (xmmPQ, Fs); // pq = X + SSE_MULSS(mVU, Fs, Fs); // fs = X^2 + xMOVAPS (t1, Fs); // t1 = X^2 SSE_MULSS(mVU, Fs, xmmPQ); // fs = X^3 - xMOVAPS (t2, Fs); // t2 = X^3 - xMUL.SS (Fs, ptr32[mVUglob.S2]); // fs = s2 * X^3 + xMOVAPS (t2, Fs); // t2 = X^3 + xMUL.SS (Fs, ptr32[mVUglob.S2]); // fs = s2 * X^3 SSE_ADDSS(mVU, xmmPQ, Fs); // pq = X + s2 * X^3 - SSE_MULSS(mVU, t2, t1); // t2 = X^3 * X^2 - xMOVAPS (Fs, t2); // fs = X^5 - xMUL.SS (Fs, ptr32[mVUglob.S3]); // ps = s3 * X^5 + SSE_MULSS(mVU, t2, t1); // t2 = X^3 * X^2 + xMOVAPS (Fs, t2); // fs = X^5 + xMUL.SS (Fs, ptr32[mVUglob.S3]); // ps = s3 * X^5 SSE_ADDSS(mVU, xmmPQ, Fs); // pq = X + s2 * X^3 + s3 * X^5 - SSE_MULSS(mVU, t2, t1); // t2 = X^5 * X^2 - xMOVAPS (Fs, t2); // fs = X^7 - xMUL.SS (Fs, ptr32[mVUglob.S4]); // fs = s4 * X^7 + SSE_MULSS(mVU, t2, t1); // t2 = X^5 * X^2 + xMOVAPS (Fs, t2); // fs = X^7 + xMUL.SS (Fs, ptr32[mVUglob.S4]); // fs = s4 * X^7 SSE_ADDSS(mVU, xmmPQ, Fs); // pq = X + s2 * X^3 + s3 * X^5 + s4 * X^7 - SSE_MULSS(mVU, t2, t1); // t2 = X^7 * X^2 - xMUL.SS (t2, ptr32[mVUglob.S5]); // t2 = s5 * X^9 + SSE_MULSS(mVU, t2, t1); // t2 = X^7 * X^2 + xMUL.SS (t2, ptr32[mVUglob.S5]); // t2 = s5 * X^9 SSE_ADDSS(mVU, xmmPQ, t2); // pq = X + s2 * X^3 + s3 * X^5 + s4 * X^7 + s5 * X^9 - xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back + xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU.regAlloc->clearNeeded(Fs); mVU.regAlloc->clearNeeded(t1); mVU.regAlloc->clearNeeded(t2); @@ -428,9 +461,11 @@ mVUop(mVU_ESIN) { pass3 { mVUlog("ESIN P"); } } -mVUop(mVU_ESQRT) { +mVUop(mVU_ESQRT) +{ pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); } - pass2 { + pass2 + { const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance xAND.PS (Fs, ptr128[mVUglob.absclip]); @@ -442,18 +477,20 @@ mVUop(mVU_ESQRT) { pass3 { mVUlog("ESQRT P"); } } -mVUop(mVU_ESUM) { +mVUop(mVU_ESUM) +{ pass1 { mVUanalyzeEFU2(mVU, _Fs_, 12); } - pass2 { + pass2 + { const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); const xmm& t1 = mVU.regAlloc->allocReg(); - xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance - xPSHUF.D (t1, Fs, 0x1b); + xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + xPSHUF.D(t1, Fs, 0x1b); SSE_ADDPS(mVU, Fs, t1); - xPSHUF.D (t1, Fs, 0x01); + xPSHUF.D(t1, Fs, 0x01); SSE_ADDSS(mVU, Fs, t1); - xMOVSS (xmmPQ, Fs); - xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back + xMOVSS(xmmPQ, Fs); + xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU.regAlloc->clearNeeded(Fs); mVU.regAlloc->clearNeeded(t1); mVU.profiler.EmitOp(opESUM); @@ -465,9 +502,11 @@ mVUop(mVU_ESUM) { // FCAND/FCEQ/FCGET/FCOR/FCSET //------------------------------------------------------------------ -mVUop(mVU_FCAND) { +mVUop(mVU_FCAND) +{ pass1 { mVUanalyzeCflag(mVU, 1); } - pass2 { + pass2 + { mVUallocCFLAGa(mVU, gprT1, cFLAG.read); xAND(gprT1, _Imm24_); xADD(gprT1, 0xffffff); @@ -479,9 +518,11 @@ mVUop(mVU_FCAND) { pass4 { mVUregs.needExactMatch |= 4; } } -mVUop(mVU_FCEQ) { +mVUop(mVU_FCEQ) +{ pass1 { mVUanalyzeCflag(mVU, 1); } - pass2 { + pass2 + { mVUallocCFLAGa(mVU, gprT1, cFLAG.read); xXOR(gprT1, _Imm24_); xSUB(gprT1, 1); @@ -493,21 +534,25 @@ mVUop(mVU_FCEQ) { pass4 { mVUregs.needExactMatch |= 4; } } -mVUop(mVU_FCGET) { +mVUop(mVU_FCGET) +{ pass1 { mVUanalyzeCflag(mVU, _It_); } - pass2 { + pass2 + { mVUallocCFLAGa(mVU, gprT1, cFLAG.read); xAND(gprT1, 0xfff); mVUallocVIb(mVU, gprT1, _It_); mVU.profiler.EmitOp(opFCGET); } - pass3 { mVUlog("FCGET vi%02d", _Ft_); } + pass3 { mVUlog("FCGET vi%02d", _Ft_); } pass4 { mVUregs.needExactMatch |= 4; } } -mVUop(mVU_FCOR) { +mVUop(mVU_FCOR) +{ pass1 { mVUanalyzeCflag(mVU, 1); } - pass2 { + pass2 + { mVUallocCFLAGa(mVU, gprT1, cFLAG.read); xOR(gprT1, _Imm24_); xADD(gprT1, 1); // If 24 1's will make 25th bit 1, else 0 @@ -519,9 +564,11 @@ mVUop(mVU_FCOR) { pass4 { mVUregs.needExactMatch |= 4; } } -mVUop(mVU_FCSET) { +mVUop(mVU_FCSET) +{ pass1 { cFLAG.doFlag = true; } - pass2 { + pass2 + { xMOV(gprT1, _Imm24_); mVUallocCFLAGb(mVU, gprT1, cFLAG.write); mVU.profiler.EmitOp(opFCSET); @@ -533,9 +580,11 @@ mVUop(mVU_FCSET) { // FMAND/FMEQ/FMOR //------------------------------------------------------------------ -mVUop(mVU_FMAND) { +mVUop(mVU_FMAND) +{ pass1 { mVUanalyzeMflag(mVU, _Is_, _It_); } - pass2 { + pass2 + { mVUallocMFLAGa(mVU, gprT1, mFLAG.read); mVUallocVIa(mVU, gprT2, _Is_); xAND(gprT1b, gprT2b); @@ -546,9 +595,11 @@ mVUop(mVU_FMAND) { pass4 { mVUregs.needExactMatch |= 2; } } -mVUop(mVU_FMEQ) { +mVUop(mVU_FMEQ) +{ pass1 { mVUanalyzeMflag(mVU, _Is_, _It_); } - pass2 { + pass2 + { mVUallocMFLAGa(mVU, gprT1, mFLAG.read); mVUallocVIa(mVU, gprT2, _Is_); xXOR(gprT1, gprT2); @@ -561,9 +612,11 @@ mVUop(mVU_FMEQ) { pass4 { mVUregs.needExactMatch |= 2; } } -mVUop(mVU_FMOR) { +mVUop(mVU_FMOR) +{ pass1 { mVUanalyzeMflag(mVU, _Is_, _It_); } - pass2 { + pass2 + { mVUallocMFLAGa(mVU, gprT1, mFLAG.read); mVUallocVIa(mVU, gprT2, _Is_); xOR(gprT1b, gprT2b); @@ -578,9 +631,11 @@ mVUop(mVU_FMOR) { // FSAND/FSEQ/FSOR/FSSET //------------------------------------------------------------------ -mVUop(mVU_FSAND) { +mVUop(mVU_FSAND) +{ pass1 { mVUanalyzeSflag(mVU, _It_); } - pass2 { + pass2 + { if (_Imm12_ & 0x0c30) DevCon.WriteLn(Color_Green, "mVU_FSAND: Checking I/D/IS/DS Flags"); if (_Imm12_ & 0x030c) DevCon.WriteLn(Color_Green, "mVU_FSAND: Checking U/O/US/OS Flags"); mVUallocSFLAGc(gprT1, gprT2, sFLAG.read); @@ -592,9 +647,11 @@ mVUop(mVU_FSAND) { pass4 { mVUregs.needExactMatch |= 1; } } -mVUop(mVU_FSOR) { +mVUop(mVU_FSOR) +{ pass1 { mVUanalyzeSflag(mVU, _It_); } - pass2 { + pass2 + { mVUallocSFLAGc(gprT1, gprT2, sFLAG.read); xOR(gprT1, _Imm12_); mVUallocVIb(mVU, gprT1, _It_); @@ -604,9 +661,11 @@ mVUop(mVU_FSOR) { pass4 { mVUregs.needExactMatch |= 1; } } -mVUop(mVU_FSEQ) { +mVUop(mVU_FSEQ) +{ pass1 { mVUanalyzeSflag(mVU, _It_); } - pass2 { + pass2 + { int imm = 0; if (_Imm12_ & 0x0c30) DevCon.WriteLn(Color_Green, "mVU_FSEQ: Checking I/D/IS/DS Flags"); if (_Imm12_ & 0x030c) DevCon.WriteLn(Color_Green, "mVU_FSEQ: Checking U/O/US/OS Flags"); @@ -638,9 +697,11 @@ mVUop(mVU_FSEQ) { pass4 { mVUregs.needExactMatch |= 1; } } -mVUop(mVU_FSSET) { +mVUop(mVU_FSSET) +{ pass1 { mVUanalyzeFSSET(mVU); } - pass2 { + pass2 + { int imm = 0; if (_Imm12_ & 0x0040) imm |= 0x000000f; // ZS if (_Imm12_ & 0x0080) imm |= 0x00000f0; // SS @@ -648,11 +709,13 @@ mVUop(mVU_FSSET) { if (_Imm12_ & 0x0200) imm |= 0x0800000; // OS if (_Imm12_ & 0x0400) imm |= 0x1000000; // IS if (_Imm12_ & 0x0800) imm |= 0x2000000; // DS - if (!(sFLAG.doFlag || mVUinfo.doDivFlag)) { + if (!(sFLAG.doFlag || mVUinfo.doDivFlag)) + { mVUallocSFLAGa(getFlagReg(sFLAG.write), sFLAG.lastWrite); // Get Prev Status Flag } xAND(getFlagReg(sFLAG.write), 0xfff00); // Keep Non-Sticky Bits - if (imm) xOR(getFlagReg(sFLAG.write), imm); + if (imm) + xOR(getFlagReg(sFLAG.write), imm); mVU.profiler.EmitOp(opFSSET); } pass3 { mVUlog("FSSET $%x", _Imm12_); } @@ -662,24 +725,30 @@ mVUop(mVU_FSSET) { // IADD/IADDI/IADDIU/IAND/IOR/ISUB/ISUBIU //------------------------------------------------------------------ -mVUop(mVU_IADD) { +mVUop(mVU_IADD) +{ pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); } - pass2 { + pass2 + { mVUallocVIa(mVU, gprT1, _Is_); - if (_It_ != _Is_) { + if (_It_ != _Is_) + { mVUallocVIa(mVU, gprT2, _It_); xADD(gprT1b, gprT2b); } - else xADD(gprT1b, gprT1b); + else + xADD(gprT1b, gprT1b); mVUallocVIb(mVU, gprT1, _Id_); mVU.profiler.EmitOp(opIADD); } pass3 { mVUlog("IADD vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); } } -mVUop(mVU_IADDI) { +mVUop(mVU_IADDI) +{ pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm5_); } - pass2 { + pass2 + { mVUallocVIa(mVU, gprT1, _Is_); if (_Imm5_ != 0) xADD(gprT1b, _Imm5_); @@ -689,9 +758,11 @@ mVUop(mVU_IADDI) { pass3 { mVUlog("IADDI vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm5_); } } -mVUop(mVU_IADDIU) { +mVUop(mVU_IADDIU) +{ pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm15_); } - pass2 { + pass2 + { mVUallocVIa(mVU, gprT1, _Is_); if (_Imm15_ != 0) xADD(gprT1b, _Imm15_); @@ -701,11 +772,14 @@ mVUop(mVU_IADDIU) { pass3 { mVUlog("IADDIU vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm15_); } } -mVUop(mVU_IAND) { +mVUop(mVU_IAND) +{ pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); } - pass2 { + pass2 + { mVUallocVIa(mVU, gprT1, _Is_); - if (_It_ != _Is_) { + if (_It_ != _Is_) + { mVUallocVIa(mVU, gprT2, _It_); xAND(gprT1, gprT2); } @@ -715,11 +789,14 @@ mVUop(mVU_IAND) { pass3 { mVUlog("IAND vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); } } -mVUop(mVU_IOR) { +mVUop(mVU_IOR) +{ pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); } - pass2 { + pass2 + { mVUallocVIa(mVU, gprT1, _Is_); - if (_It_ != _Is_) { + if (_It_ != _Is_) + { mVUallocVIa(mVU, gprT2, _It_); xOR(gprT1, gprT2); } @@ -729,16 +806,20 @@ mVUop(mVU_IOR) { pass3 { mVUlog("IOR vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); } } -mVUop(mVU_ISUB) { +mVUop(mVU_ISUB) +{ pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); } - pass2 { - if (_It_ != _Is_) { + pass2 + { + if (_It_ != _Is_) + { mVUallocVIa(mVU, gprT1, _Is_); mVUallocVIa(mVU, gprT2, _It_); xSUB(gprT1b, gprT2b); mVUallocVIb(mVU, gprT1, _Id_); } - else { + else + { xXOR(gprT1, gprT1); mVUallocVIb(mVU, gprT1, _Id_); } @@ -747,9 +828,11 @@ mVUop(mVU_ISUB) { pass3 { mVUlog("ISUB vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); } } -mVUop(mVU_ISUBIU) { +mVUop(mVU_ISUBIU) +{ pass1 { mVUanalyzeIALU2(mVU, _Is_, _It_); } - pass2 { + pass2 + { mVUallocVIa(mVU, gprT1, _Is_); if (_Imm15_ != 0) xSUB(gprT1b, _Imm15_); @@ -763,28 +846,35 @@ mVUop(mVU_ISUBIU) { // MFIR/MFP/MOVE/MR32/MTIR //------------------------------------------------------------------ -mVUop(mVU_MFIR) { - pass1 { - if (!_Ft_) { +mVUop(mVU_MFIR) +{ + pass1 + { + if (!_Ft_) + { mVUlow.isNOP = true; } analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]); analyzeReg2 (mVU, _Ft_, mVUlow.VF_write, 1); } - pass2 { + pass2 + { const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); mVUallocVIa(mVU, gprT1, _Is_, true); xMOVDZX(Ft, gprT1); - if (!_XYZW_SS) { mVUunpack_xyzw(Ft, Ft, 0); } + if (!_XYZW_SS) + mVUunpack_xyzw(Ft, Ft, 0); mVU.regAlloc->clearNeeded(Ft); mVU.profiler.EmitOp(opMFIR); } pass3 { mVUlog("MFIR.%s vf%02d, vi%02d", _XYZW_String, _Ft_, _Fs_); } } -mVUop(mVU_MFP) { +mVUop(mVU_MFP) +{ pass1 { mVUanalyzeMFP(mVU, _Ft_); } - pass2 { + pass2 + { const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); getPreg(mVU, Ft); mVU.regAlloc->clearNeeded(Ft); @@ -793,9 +883,11 @@ mVUop(mVU_MFP) { pass3 { mVUlog("MFP.%s vf%02d, P", _XYZW_String, _Ft_); } } -mVUop(mVU_MOVE) { +mVUop(mVU_MOVE) +{ pass1 { mVUanalyzeMOVE(mVU, _Fs_, _Ft_); } - pass2 { + pass2 + { const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W); mVU.regAlloc->clearNeeded(Fs); mVU.profiler.EmitOp(opMOVE); @@ -803,13 +895,17 @@ mVUop(mVU_MOVE) { pass3 { mVUlog("MOVE.%s vf%02d, vf%02d", _XYZW_String, _Ft_, _Fs_); } } -mVUop(mVU_MR32) { +mVUop(mVU_MR32) +{ pass1 { mVUanalyzeMR32(mVU, _Fs_, _Ft_); } - pass2 { + pass2 + { const xmm& Fs = mVU.regAlloc->allocReg(_Fs_); const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); - if (_XYZW_SS) mVUunpack_xyzw(Ft, Fs, (_X ? 1 : (_Y ? 2 : (_Z ? 3 : 0)))); - else xPSHUF.D(Ft, Fs, 0x39); + if (_XYZW_SS) + mVUunpack_xyzw(Ft, Fs, (_X ? 1 : (_Y ? 2 : (_Z ? 3 : 0)))); + else + xPSHUF.D(Ft, Fs, 0x39); mVU.regAlloc->clearNeeded(Ft); mVU.regAlloc->clearNeeded(Fs); mVU.profiler.EmitOp(opMR32); @@ -817,15 +913,18 @@ mVUop(mVU_MR32) { pass3 { mVUlog("MR32.%s vf%02d, vf%02d", _XYZW_String, _Ft_, _Fs_); } } -mVUop(mVU_MTIR) { - pass1 { +mVUop(mVU_MTIR) +{ + pass1 + { if (!_It_) mVUlow.isNOP = true; - analyzeReg5 (mVU, _Fs_, _Fsf_, mVUlow.VF_read[0]); + analyzeReg5(mVU, _Fs_, _Fsf_, mVUlow.VF_read[0]); analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 1); } - pass2 { + pass2 + { const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); xMOVD(gprT1, Fs); mVUallocVIb(mVU, gprT1, _It_); @@ -839,23 +938,26 @@ mVUop(mVU_MTIR) { // ILW/ILWR //------------------------------------------------------------------ -mVUop(mVU_ILW) { - pass1 { +mVUop(mVU_ILW) +{ + pass1 + { if (!_It_) mVUlow.isNOP = true; analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]); analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 4); } - pass2 { - void *ptr = mVU.regs().Mem + offsetSS; + pass2 + { + void* ptr = mVU.regs().Mem + offsetSS; mVUallocVIa(mVU, gprT2, _Is_); if (!_Is_) xXOR(gprT2, gprT2); if (_Imm11_ != 0) xADD(gprT2, _Imm11_); - mVUaddrFix (mVU, gprT2q); + mVUaddrFix(mVU, gprT2q); xMOVZX(gprT1, ptr16[xComplexAddress(gprT3q, ptr, gprT2q)]); mVUallocVIb(mVU, gprT1, _It_); mVU.profiler.EmitOp(opILW); @@ -863,21 +965,27 @@ mVUop(mVU_ILW) { pass3 { mVUlog("ILW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); } } -mVUop(mVU_ILWR) { - pass1 { +mVUop(mVU_ILWR) +{ + pass1 + { if (!_It_) mVUlow.isNOP = true; analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]); analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 4); } - pass2 { - void *ptr = mVU.regs().Mem + offsetSS; - if (_Is_) { + pass2 + { + void* ptr = mVU.regs().Mem + offsetSS; + if (_Is_) + { mVUallocVIa(mVU, gprT2, _Is_); mVUaddrFix (mVU, gprT2q); xMOVZX(gprT1, ptr16[xComplexAddress(gprT3q, ptr, gprT2q)]); - } else { + } + else + { xMOVZX(gprT1, ptr16[ptr]); } mVUallocVIb(mVU, gprT1, _It_); @@ -890,65 +998,80 @@ mVUop(mVU_ILWR) { // ISW/ISWR //------------------------------------------------------------------ -static void writeBackISW(microVU& mVU, void *base_ptr, xAddressReg reg) { - if (!reg.IsEmpty() && (sptr)base_ptr != (s32)(sptr)base_ptr) { +static void writeBackISW(microVU& mVU, void* base_ptr, xAddressReg reg) +{ + if (!reg.IsEmpty() && (sptr)base_ptr != (s32)(sptr)base_ptr) + { int register_offset = -1; - auto writeBackAt = [&](int offset){ - if (register_offset == -1) { + auto writeBackAt = [&](int offset) { + if (register_offset == -1) + { xLEA(gprT3q, ptr[(void*)((sptr)base_ptr + offset)]); register_offset = offset; } - xMOV(ptr32[gprT3q+reg+(offset-register_offset)], gprT1); + xMOV(ptr32[gprT3q + reg + (offset - register_offset)], gprT1); }; if (_X) writeBackAt(0); if (_Y) writeBackAt(4); if (_Z) writeBackAt(8); if (_W) writeBackAt(12); - } else if (reg.IsEmpty()) { - if (_X) xMOV(ptr32[(void*)((uptr)base_ptr)], gprT1); - if (_Y) xMOV(ptr32[(void*)((uptr)base_ptr+4)], gprT1); - if (_Z) xMOV(ptr32[(void*)((uptr)base_ptr+8)], gprT1); - if (_W) xMOV(ptr32[(void*)((uptr)base_ptr+12)], gprT1); - } else { - if (_X) xMOV(ptr32[base_ptr+reg], gprT1); - if (_Y) xMOV(ptr32[base_ptr+reg+4], gprT1); - if (_Z) xMOV(ptr32[base_ptr+reg+8], gprT1); - if (_W) xMOV(ptr32[base_ptr+reg+12], gprT1); + } + else if (reg.IsEmpty()) + { + if (_X) xMOV(ptr32[(void*)((uptr)base_ptr )], gprT1); + if (_Y) xMOV(ptr32[(void*)((uptr)base_ptr + 4)], gprT1); + if (_Z) xMOV(ptr32[(void*)((uptr)base_ptr + 8)], gprT1); + if (_W) xMOV(ptr32[(void*)((uptr)base_ptr + 12)], gprT1); + } + else + { + if (_X) xMOV(ptr32[base_ptr+reg ], gprT1); + if (_Y) xMOV(ptr32[base_ptr+reg + 4], gprT1); + if (_Z) xMOV(ptr32[base_ptr+reg + 8], gprT1); + if (_W) xMOV(ptr32[base_ptr+reg + 12], gprT1); } } -mVUop(mVU_ISW) { - pass1 { +mVUop(mVU_ISW) +{ + pass1 + { analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]); analyzeVIreg1(mVU, _It_, mVUlow.VI_read[1]); } - pass2 { - void *ptr = mVU.regs().Mem; - + pass2 + { + void* ptr = mVU.regs().Mem; + mVUallocVIa(mVU, gprT2, _Is_); if (!_Is_) xXOR(gprT2, gprT2); if (_Imm11_ != 0) xADD(gprT2, _Imm11_); - mVUaddrFix (mVU, gprT2q); + mVUaddrFix(mVU, gprT2q); mVUallocVIa(mVU, gprT1, _It_); writeBackISW(mVU, ptr, gprT2q); mVU.profiler.EmitOp(opISW); } - pass3 { mVUlog("ISW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); } + pass3 { mVUlog("ISW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); } } -mVUop(mVU_ISWR) { - pass1 { +mVUop(mVU_ISWR) +{ + pass1 + { analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]); - analyzeVIreg1(mVU, _It_, mVUlow.VI_read[1]); } - pass2 { - void *ptr = mVU.regs().Mem; + analyzeVIreg1(mVU, _It_, mVUlow.VI_read[1]); + } + pass2 + { + void* ptr = mVU.regs().Mem; xAddressReg is = xEmptyReg; - if (_Is_) { + if (_Is_) + { mVUallocVIa(mVU, gprT2, _Is_); - mVUaddrFix (mVU, gprT2q); + mVUaddrFix(mVU, gprT2q); is = gprT2q; } mVUallocVIa(mVU, gprT1, _It_); @@ -963,10 +1086,12 @@ mVUop(mVU_ISWR) { // LQ/LQD/LQI //------------------------------------------------------------------ -mVUop(mVU_LQ) { +mVUop(mVU_LQ) +{ pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, false); } - pass2 { - void *ptr = mVU.regs().Mem; + pass2 + { + void* ptr = mVU.regs().Mem; mVUallocVIa(mVU, gprT2, _Is_); if (!_Is_) xXOR(gprT2, gprT2); @@ -982,26 +1107,35 @@ mVUop(mVU_LQ) { pass3 { mVUlog("LQ.%s vf%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); } } -mVUop(mVU_LQD) { +mVUop(mVU_LQD) +{ pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, true); } - pass2 { - void * ptr = mVU.regs().Mem; + pass2 + { + void* ptr = mVU.regs().Mem; xAddressReg is = xEmptyReg; - if (_Is_ || isVU0) { // Access VU1 regs mem-map in !_Is_ case + if (_Is_ || isVU0) // Access VU1 regs mem-map in !_Is_ case + { mVUallocVIa(mVU, gprT2, _Is_); xSUB(gprT2b, 1); - if (_Is_) mVUallocVIb(mVU, gprT2, _Is_); - mVUaddrFix (mVU, gprT2q); + if (_Is_) + mVUallocVIb(mVU, gprT2, _Is_); + mVUaddrFix(mVU, gprT2q); is = gprT2q; } - else { - ptr = (void*)((sptr)ptr + (0xffff & (mVU.microMemSize-8))); + else + { + ptr = (void*)((sptr)ptr + (0xffff & (mVU.microMemSize - 8))); } - if (!mVUlow.noWriteVF) { + if (!mVUlow.noWriteVF) + { const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); - if (is.IsEmpty()) { + if (is.IsEmpty()) + { mVUloadReg(Ft, xAddressVoid(ptr), _X_Y_Z_W); - } else { + } + else + { mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, is), _X_Y_Z_W); } mVU.regAlloc->clearNeeded(Ft); @@ -1011,12 +1145,15 @@ mVUop(mVU_LQD) { pass3 { mVUlog("LQD.%s vf%02d, --vi%02d", _XYZW_String, _Ft_, _Is_); } } -mVUop(mVU_LQI) { +mVUop(mVU_LQI) +{ pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, true); } - pass2 { - void *ptr = mVU.regs().Mem; + pass2 + { + void* ptr = mVU.regs().Mem; xAddressReg is = xEmptyReg; - if (_Is_) { + if (_Is_) + { mVUallocVIa(mVU, gprT1, _Is_); xMOV(gprT2, gprT1); xADD(gprT1b, 1); @@ -1024,13 +1161,13 @@ mVUop(mVU_LQI) { mVUaddrFix (mVU, gprT2q); is = gprT2q; } - if (!mVUlow.noWriteVF) { + if (!mVUlow.noWriteVF) + { const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); - if (is.IsEmpty()) { + if (is.IsEmpty()) mVUloadReg(Ft, xAddressVoid(ptr), _X_Y_Z_W); - } else { + else mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, is), _X_Y_Z_W); - } mVU.regAlloc->clearNeeded(Ft); } mVU.profiler.EmitOp(opLQI); @@ -1042,10 +1179,12 @@ mVUop(mVU_LQI) { // SQ/SQD/SQI //------------------------------------------------------------------ -mVUop(mVU_SQ) { +mVUop(mVU_SQ) +{ pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, false); } - pass2 { - void * ptr = mVU.regs().Mem; + pass2 + { + void* ptr = mVU.regs().Mem; mVUallocVIa(mVU, gprT2, _It_); if (!_It_) @@ -1062,50 +1201,56 @@ mVUop(mVU_SQ) { pass3 { mVUlog("SQ.%s vf%02d, vi%02d + %d", _XYZW_String, _Fs_, _Ft_, _Imm11_); } } -mVUop(mVU_SQD) { +mVUop(mVU_SQD) +{ pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, true); } - pass2 { - void *ptr = mVU.regs().Mem; + pass2 + { + void* ptr = mVU.regs().Mem; xAddressReg it = xEmptyReg; - if (_It_ || isVU0) {// Access VU1 regs mem-map in !_It_ case + if (_It_ || isVU0) // Access VU1 regs mem-map in !_It_ case + { mVUallocVIa(mVU, gprT2, _It_); xSUB(gprT2b, 1); - if (_It_) mVUallocVIb(mVU, gprT2, _It_); - mVUaddrFix (mVU, gprT2q); + if (_It_) + mVUallocVIb(mVU, gprT2, _It_); + mVUaddrFix(mVU, gprT2q); it = gprT2q; } - else { - ptr = (void*)((sptr)ptr + (0xffff & (mVU.microMemSize-8))); + else + { + ptr = (void*)((sptr)ptr + (0xffff & (mVU.microMemSize - 8))); } const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); - if (it.IsEmpty()) { + if (it.IsEmpty()) mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1); - } else { + else mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, it), _X_Y_Z_W, 1); - } mVU.regAlloc->clearNeeded(Fs); mVU.profiler.EmitOp(opSQD); } pass3 { mVUlog("SQD.%s vf%02d, --vi%02d", _XYZW_String, _Fs_, _Ft_); } } -mVUop(mVU_SQI) { +mVUop(mVU_SQI) +{ pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, true); } - pass2 { - void *ptr = mVU.regs().Mem; - if (_It_) { + pass2 + { + void* ptr = mVU.regs().Mem; + if (_It_) + { mVUallocVIa(mVU, gprT1, _It_); xMOV(gprT2, gprT1); xADD(gprT1b, 1); mVUallocVIb(mVU, gprT1, _It_); - mVUaddrFix (mVU, gprT2q); + mVUaddrFix(mVU, gprT2q); } const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); - if (_It_) { + if (_It_) mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, gprT2q), _X_Y_Z_W, 1); - } else { + else mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1); - } mVU.regAlloc->clearNeeded(Fs); mVU.profiler.EmitOp(opSQI); } @@ -1116,10 +1261,13 @@ mVUop(mVU_SQI) { // RINIT/RGET/RNEXT/RXOR //------------------------------------------------------------------ -mVUop(mVU_RINIT) { +mVUop(mVU_RINIT) +{ pass1 { mVUanalyzeR1(mVU, _Fs_, _Fsf_); } - pass2 { - if (_Fs_ || (_Fsf_ == 3)) { + pass2 + { + if (_Fs_ || (_Fsf_ == 3)) + { const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); xMOVD(gprT1, Fs); xAND(gprT1, 0x007fffff); @@ -1127,24 +1275,30 @@ mVUop(mVU_RINIT) { xMOV(ptr32[Rmem], gprT1); mVU.regAlloc->clearNeeded(Fs); } - else xMOV(ptr32[Rmem], 0x3f800000); + else + xMOV(ptr32[Rmem], 0x3f800000); mVU.profiler.EmitOp(opRINIT); } pass3 { mVUlog("RINIT R, vf%02d%s", _Fs_, _Fsf_String); } } -static __fi void mVU_RGET_(mV, const x32& Rreg) { - if (!mVUlow.noWriteVF) { +static __fi void mVU_RGET_(mV, const x32& Rreg) +{ + if (!mVUlow.noWriteVF) + { const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); xMOVDZX(Ft, Rreg); - if (!_XYZW_SS) mVUunpack_xyzw(Ft, Ft, 0); + if (!_XYZW_SS) + mVUunpack_xyzw(Ft, Ft, 0); mVU.regAlloc->clearNeeded(Ft); } } -mVUop(mVU_RGET) { +mVUop(mVU_RGET) +{ pass1 { mVUanalyzeR2(mVU, _Ft_, true); } - pass2 { + pass2 + { xMOV(gprT1, ptr32[Rmem]); mVU_RGET_(mVU, gprT1); mVU.profiler.EmitOp(opRGET); @@ -1152,9 +1306,11 @@ mVUop(mVU_RGET) { pass3 { mVUlog("RGET.%s vf%02d, R", _XYZW_String, _Ft_); } } -mVUop(mVU_RNEXT) { +mVUop(mVU_RNEXT) +{ pass1 { mVUanalyzeR2(mVU, _Ft_, false); } - pass2 { + pass2 + { // algorithm from www.project-fao.org xMOV(gprT3, ptr32[Rmem]); xMOV(gprT1, gprT3); @@ -1171,20 +1327,23 @@ mVUop(mVU_RNEXT) { xAND(gprT3, 0x007fffff); xOR (gprT3, 0x3f800000); xMOV(ptr32[Rmem], gprT3); - mVU_RGET_(mVU, gprT3); + mVU_RGET_(mVU, gprT3); mVU.profiler.EmitOp(opRNEXT); } pass3 { mVUlog("RNEXT.%s vf%02d, R", _XYZW_String, _Ft_); } } -mVUop(mVU_RXOR) { +mVUop(mVU_RXOR) +{ pass1 { mVUanalyzeR1(mVU, _Fs_, _Fsf_); } - pass2 { - if (_Fs_ || (_Fsf_ == 3)) { + pass2 + { + if (_Fs_ || (_Fsf_ == 3)) + { const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); xMOVD(gprT1, Fs); xAND(gprT1, 0x7fffff); - xXOR(ptr32[Rmem], gprT1); + xXOR(ptr32[Rmem], gprT1); mVU.regAlloc->clearNeeded(Fs); } mVU.profiler.EmitOp(opRXOR); @@ -1196,13 +1355,15 @@ mVUop(mVU_RXOR) { // WaitP/WaitQ //------------------------------------------------------------------ -mVUop(mVU_WAITP) { +mVUop(mVU_WAITP) +{ pass1 { mVUstall = std::max(mVUstall, (u8)((mVUregs.p) ? (mVUregs.p - 1) : 0)); } pass2 { mVU.profiler.EmitOp(opWAITP); } pass3 { mVUlog("WAITP"); } } -mVUop(mVU_WAITQ) { +mVUop(mVU_WAITQ) +{ pass1 { mVUstall = std::max(mVUstall, mVUregs.q); } pass2 { mVU.profiler.EmitOp(opWAITQ); } pass3 { mVUlog("WAITQ"); } @@ -1212,14 +1373,17 @@ mVUop(mVU_WAITQ) { // XTOP/XITOP //------------------------------------------------------------------ -mVUop(mVU_XTOP) { - pass1 { +mVUop(mVU_XTOP) +{ + pass1 + { if (!_It_) mVUlow.isNOP = true; analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 1); } - pass2 { + pass2 + { xMOVZX(gprT1, ptr16[&mVU.getVifRegs().top]); mVUallocVIb(mVU, gprT1, _It_); mVU.profiler.EmitOp(opXTOP); @@ -1227,16 +1391,19 @@ mVUop(mVU_XTOP) { pass3 { mVUlog("XTOP vi%02d", _Ft_); } } -mVUop(mVU_XITOP) { - pass1 { +mVUop(mVU_XITOP) +{ + pass1 + { if (!_It_) mVUlow.isNOP = true; analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 1); } - pass2 { + pass2 + { xMOVZX(gprT1, ptr16[&mVU.getVifRegs().itop]); - xAND (gprT1, isVU1 ? 0x3ff : 0xff); + xAND(gprT1, isVU1 ? 0x3ff : 0xff); mVUallocVIb(mVU, gprT1, _It_); mVU.profiler.EmitOp(opXITOP); } @@ -1247,22 +1414,26 @@ mVUop(mVU_XITOP) { // XGkick //------------------------------------------------------------------ -void __fastcall mVU_XGKICK_(u32 addr) { +void __fastcall mVU_XGKICK_(u32 addr) +{ addr = (addr & 0x3ff) * 16; u32 diff = 0x4000 - addr; u32 size = gifUnit.GetGSPacketSize(GIF_PATH_1, vuRegs[1].Mem, addr); - if (size > diff) { + if (size > diff) + { //DevCon.WriteLn(Color_Green, "microVU1: XGkick Wrap!"); - gifUnit.gifPath[GIF_PATH_1].CopyGSPacketData( &vuRegs[1].Mem[addr], diff,true); - gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &vuRegs[1].Mem[0],size-diff,true); + gifUnit.gifPath[GIF_PATH_1].CopyGSPacketData(&vuRegs[1].Mem[addr], diff, true); + gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &vuRegs[1].Mem[0], size - diff, true); } - else { + else + { gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &vuRegs[1].Mem[addr], size, true); } } -static __fi void mVU_XGKICK_DELAY(mV) { +static __fi void mVU_XGKICK_DELAY(mV) +{ mVUbackupRegs(mVU); #if 0 // XGkick Break - ToDo: Change "SomeGifPathValue" to w/e needs to be tested xTEST (ptr32[&SomeGifPathValue], 1); // If '1', breaks execution @@ -1273,10 +1444,13 @@ static __fi void mVU_XGKICK_DELAY(mV) { mVUrestoreRegs(mVU); } -mVUop(mVU_XGKICK) { +mVUop(mVU_XGKICK) +{ pass1 { mVUanalyzeXGkick(mVU, _Is_, mVU_XGKICK_CYCLES); } - pass2 { - if (mVUinfo.doXGKICK) { // check for XGkick Transfer + pass2 + { + if (mVUinfo.doXGKICK) // check for XGkick Transfer + { mVU_XGKICK_DELAY(mVU); mVUinfo.doXGKICK = false; } @@ -1292,7 +1466,8 @@ mVUop(mVU_XGKICK) { // Branches/Jumps //------------------------------------------------------------------ -void setBranchA(mP, int x, int _x_) { +void setBranchA(mP, int x, int _x_) +{ bool isBranchDelaySlot = false; incPC(-2); @@ -1300,13 +1475,15 @@ void setBranchA(mP, int x, int _x_) { isBranchDelaySlot = true; incPC(2); - pass1 { - if (_Imm11_ == 1 && !_x_ && !isBranchDelaySlot) { + pass1 + { + if (_Imm11_ == 1 && !_x_ && !isBranchDelaySlot) + { DevCon.WriteLn(Color_Green, "microVU%d: Branch Optimization", mVU.index); mVUlow.isNOP = true; return; } - mVUbranch = x; + mVUbranch = x; mVUlow.branch = x; } pass2 { if (_Imm11_ == 1 && !_x_ && !isBranchDelaySlot) { return; } mVUbranch = x; } @@ -1314,8 +1491,10 @@ void setBranchA(mP, int x, int _x_) { pass4 { if (_Imm11_ == 1 && !_x_ && !isBranchDelaySlot) { return; } mVUbranch = x; } } -void condEvilBranch(mV, int JMPcc) { - if (mVUlow.badBranch) { +void condEvilBranch(mV, int JMPcc) +{ + if (mVUlow.badBranch) + { xMOV(ptr32[&mVU.branch], gprT1); xMOV(ptr32[&mVU.badBranch], branchAddrN(mVU)); @@ -1334,14 +1513,17 @@ void condEvilBranch(mV, int JMPcc) { xMOV(ptr32[&mVU.evilBranch], gprT1); cJMP.SetTarget(); incPC(-2); - if(mVUlow.branch >= 9) DevCon.Warning("Conditional in JALR/JR delay slot - If game broken report to PCSX2 Team"); + if (mVUlow.branch >= 9) + DevCon.Warning("Conditional in JALR/JR delay slot - If game broken report to PCSX2 Team"); incPC(2); } -mVUop(mVU_B) { +mVUop(mVU_B) +{ setBranchA(mX, 1, 0); pass1 { mVUanalyzeNormBranch(mVU, 0, false); } - pass2 { + pass2 + { if (mVUlow.badBranch) { xMOV(ptr32[&mVU.badBranch], branchAddrN(mVU)); } if (mVUlow.evilBranch) { xMOV(ptr32[&mVU.evilBranch], branchAddr(mVU)); } mVU.profiler.EmitOp(opB); @@ -1349,11 +1531,13 @@ mVUop(mVU_B) { pass3 { mVUlog("B [%04x]", branchAddr(mVU), branchAddr(mVU)); } } -mVUop(mVU_BAL) { +mVUop(mVU_BAL) +{ setBranchA(mX, 2, _It_); pass1 { mVUanalyzeNormBranch(mVU, _It_, true); } - pass2 { - if(!mVUlow.evilBranch) + pass2 + { + if (!mVUlow.evilBranch) { xMOV(gprT1, bSaveAddr); mVUallocVIb(mVU, gprT1, _It_); @@ -1366,127 +1550,191 @@ mVUop(mVU_BAL) { pass3 { mVUlog("BAL vi%02d [%04x]", _Ft_, branchAddr(mVU), branchAddr(mVU)); } } -mVUop(mVU_IBEQ) { +mVUop(mVU_IBEQ) +{ setBranchA(mX, 3, 0); pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); } - pass2 { - if (mVUlow.memReadIs) xMOV(gprT1, ptr32[&mVU.VIbackup]); - else mVUallocVIa(mVU, gprT1, _Is_); - - if (mVUlow.memReadIt) xXOR(gprT1, ptr32[&mVU.VIbackup]); - else { mVUallocVIa(mVU, gprT2, _It_); xXOR(gprT1, gprT2); } + pass2 + { + if (mVUlow.memReadIs) + xMOV(gprT1, ptr32[&mVU.VIbackup]); + else + mVUallocVIa(mVU, gprT1, _Is_); - if (!(isBadOrEvil)) xMOV(ptr32[&mVU.branch], gprT1); - else condEvilBranch(mVU, Jcc_Equal); + if (mVUlow.memReadIt) + xXOR(gprT1, ptr32[&mVU.VIbackup]); + else + { + mVUallocVIa(mVU, gprT2, _It_); + xXOR(gprT1, gprT2); + } + + if (!(isBadOrEvil)) + xMOV(ptr32[&mVU.branch], gprT1); + else + condEvilBranch(mVU, Jcc_Equal); mVU.profiler.EmitOp(opIBEQ); } pass3 { mVUlog("IBEQ vi%02d, vi%02d [%04x]", _Ft_, _Fs_, branchAddr(mVU), branchAddr(mVU)); } } -mVUop(mVU_IBGEZ) { +mVUop(mVU_IBGEZ) +{ setBranchA(mX, 4, 0); pass1 { mVUanalyzeCondBranch1(mVU, _Is_); } - pass2 { - if (mVUlow.memReadIs) xMOV(gprT1, ptr32[&mVU.VIbackup]); - else mVUallocVIa(mVU, gprT1, _Is_); - if (!(isBadOrEvil)) xMOV(ptr32[&mVU.branch], gprT1); - else condEvilBranch(mVU, Jcc_GreaterOrEqual); + pass2 + { + if (mVUlow.memReadIs) + xMOV(gprT1, ptr32[&mVU.VIbackup]); + else + mVUallocVIa(mVU, gprT1, _Is_); + if (!(isBadOrEvil)) + xMOV(ptr32[&mVU.branch], gprT1); + else + condEvilBranch(mVU, Jcc_GreaterOrEqual); mVU.profiler.EmitOp(opIBGEZ); } pass3 { mVUlog("IBGEZ vi%02d [%04x]", _Fs_, branchAddr(mVU), branchAddr(mVU)); } } -mVUop(mVU_IBGTZ) { +mVUop(mVU_IBGTZ) +{ setBranchA(mX, 5, 0); pass1 { mVUanalyzeCondBranch1(mVU, _Is_); } - pass2 { - if (mVUlow.memReadIs) xMOV(gprT1, ptr32[&mVU.VIbackup]); - else mVUallocVIa(mVU, gprT1, _Is_); - if (!(isBadOrEvil)) xMOV(ptr32[&mVU.branch], gprT1); - else condEvilBranch(mVU, Jcc_Greater); + pass2 + { + if (mVUlow.memReadIs) + xMOV(gprT1, ptr32[&mVU.VIbackup]); + else + mVUallocVIa(mVU, gprT1, _Is_); + if (!(isBadOrEvil)) + xMOV(ptr32[&mVU.branch], gprT1); + else + condEvilBranch(mVU, Jcc_Greater); mVU.profiler.EmitOp(opIBGTZ); } pass3 { mVUlog("IBGTZ vi%02d [%04x]", _Fs_, branchAddr(mVU), branchAddr(mVU)); } } -mVUop(mVU_IBLEZ) { +mVUop(mVU_IBLEZ) +{ setBranchA(mX, 6, 0); pass1 { mVUanalyzeCondBranch1(mVU, _Is_); } - pass2 { - if (mVUlow.memReadIs) xMOV(gprT1, ptr32[&mVU.VIbackup]); - else mVUallocVIa(mVU, gprT1, _Is_); - if (!(isBadOrEvil)) xMOV(ptr32[&mVU.branch], gprT1); - else condEvilBranch(mVU, Jcc_LessOrEqual); + pass2 + { + if (mVUlow.memReadIs) + xMOV(gprT1, ptr32[&mVU.VIbackup]); + else + mVUallocVIa(mVU, gprT1, _Is_); + if (!(isBadOrEvil)) + xMOV(ptr32[&mVU.branch], gprT1); + else + condEvilBranch(mVU, Jcc_LessOrEqual); mVU.profiler.EmitOp(opIBLEZ); } pass3 { mVUlog("IBLEZ vi%02d [%04x]", _Fs_, branchAddr(mVU), branchAddr(mVU)); } } -mVUop(mVU_IBLTZ) { +mVUop(mVU_IBLTZ) +{ setBranchA(mX, 7, 0); pass1 { mVUanalyzeCondBranch1(mVU, _Is_); } - pass2 { - if (mVUlow.memReadIs) xMOV(gprT1, ptr32[&mVU.VIbackup]); - else mVUallocVIa(mVU, gprT1, _Is_); - if (!(isBadOrEvil)) xMOV(ptr32[&mVU.branch], gprT1); - else condEvilBranch(mVU, Jcc_Less); + pass2 + { + if (mVUlow.memReadIs) + xMOV(gprT1, ptr32[&mVU.VIbackup]); + else + mVUallocVIa(mVU, gprT1, _Is_); + if (!(isBadOrEvil)) + xMOV(ptr32[&mVU.branch], gprT1); + else + condEvilBranch(mVU, Jcc_Less); mVU.profiler.EmitOp(opIBLTZ); } pass3 { mVUlog("IBLTZ vi%02d [%04x]", _Fs_, branchAddr(mVU), branchAddr(mVU)); } } -mVUop(mVU_IBNE) { +mVUop(mVU_IBNE) +{ setBranchA(mX, 8, 0); pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); } - pass2 { - if (mVUlow.memReadIs) xMOV(gprT1, ptr32[&mVU.VIbackup]); - else mVUallocVIa(mVU, gprT1, _Is_); - - if (mVUlow.memReadIt) xXOR(gprT1, ptr32[&mVU.VIbackup]); - else { mVUallocVIa(mVU, gprT2, _It_); xXOR(gprT1, gprT2); } - - if (!(isBadOrEvil)) xMOV(ptr32[&mVU.branch], gprT1); - else condEvilBranch(mVU, Jcc_NotEqual); + pass2 + { + if (mVUlow.memReadIs) + xMOV(gprT1, ptr32[&mVU.VIbackup]); + else + mVUallocVIa(mVU, gprT1, _Is_); + + if (mVUlow.memReadIt) + xXOR(gprT1, ptr32[&mVU.VIbackup]); + else + { + mVUallocVIa(mVU, gprT2, _It_); + xXOR(gprT1, gprT2); + } + + if (!(isBadOrEvil)) + xMOV(ptr32[&mVU.branch], gprT1); + else + condEvilBranch(mVU, Jcc_NotEqual); mVU.profiler.EmitOp(opIBNE); } pass3 { mVUlog("IBNE vi%02d, vi%02d [%04x]", _Ft_, _Fs_, branchAddr(mVU), branchAddr(mVU)); } } -void normJumpPass2(mV) { - if (!mVUlow.constJump.isValid || mVUlow.evilBranch) { +void normJumpPass2(mV) +{ + if (!mVUlow.constJump.isValid || mVUlow.evilBranch) + { mVUallocVIa(mVU, gprT1, _Is_); xSHL(gprT1, 3); xAND(gprT1, mVU.microMemSize - 8); - - if (!mVUlow.evilBranch) { xMOV(ptr32[&mVU.branch], gprT1 ); } - else { xMOV(ptr32[&mVU.evilBranch], gprT1 ); } + + if (!mVUlow.evilBranch) + { + xMOV(ptr32[&mVU.branch], gprT1); + } + else + { + xMOV(ptr32[&mVU.evilBranch], gprT1); + } //If delay slot is conditional, it uses badBranch to go to its target - if (mVUlow.badBranch) { xADD(gprT1, 8); xMOV(ptr32[&mVU.badBranch], gprT1); } + if (mVUlow.badBranch) + { + xADD(gprT1, 8); + xMOV(ptr32[&mVU.badBranch], gprT1); + } } } -mVUop(mVU_JR) { +mVUop(mVU_JR) +{ mVUbranch = 9; pass1 { mVUanalyzeJump(mVU, _Is_, 0, false); } - pass2 { normJumpPass2(mVU); mVU.profiler.EmitOp(opJR); } + pass2 + { + normJumpPass2(mVU); + mVU.profiler.EmitOp(opJR); + } pass3 { mVUlog("JR [vi%02d]", _Fs_); } } -mVUop(mVU_JALR) { +mVUop(mVU_JALR) +{ mVUbranch = 10; pass1 { mVUanalyzeJump(mVU, _Is_, _It_, 1); } - pass2 { + pass2 + { normJumpPass2(mVU); - if(!mVUlow.evilBranch) + if (!mVUlow.evilBranch) { xMOV(gprT1, bSaveAddr); mVUallocVIb(mVU, gprT1, _It_); } - if(mVUlow.evilBranch) + if (mVUlow.evilBranch) { incPC(-2); - if(mVUlow.branch >= 9) //Previous branch is a jump of some type so - //we need to take the branch address from the register it uses. + if (mVUlow.branch >= 9) // Previous branch is a jump of some type so we need to take the branch address from the register it uses. { DevCon.Warning("Linking JALR from JALR/JR branch target! - If game broken report to PCSX2 Team"); mVUallocVIa(mVU, gprT1, _Is_); @@ -1495,9 +1743,10 @@ mVUop(mVU_JALR) { incPC(2); mVUallocVIb(mVU, gprT1, _It_); } - else incPC(2); + else + incPC(2); } - + mVU.profiler.EmitOp(opJALR); } pass3 { mVUlog("JALR vi%02d, [vi%02d]", _Ft_, _Fs_); } diff --git a/pcsx2/x86/microVU_Macro.inl b/pcsx2/x86/microVU_Macro.inl index eeb62d06a7..bbd09eeaa2 100644 --- a/pcsx2/x86/microVU_Macro.inl +++ b/pcsx2/x86/microVU_Macro.inl @@ -38,23 +38,23 @@ void setupMacroOp(int mode, const char* opName) memset(µVU0.prog.IRinfo.info[0], 0, sizeof(microVU0.prog.IRinfo.info[0])); iFlushCall(FLUSH_EVERYTHING); microVU0.regAlloc->reset(); - if (mode & 0x01) - { // Q-Reg will be Read + if (mode & 0x01) // Q-Reg will be Read + { xMOVSSZX(xmmPQ, ptr32[&vu0Regs.VI[REG_Q].UL]); } - if (mode & 0x08) - { // Clip Instruction - microVU0.prog.IRinfo.info[0].cFlag.write = 0xff; + if (mode & 0x08) // Clip Instruction + { + microVU0.prog.IRinfo.info[0].cFlag.write = 0xff; microVU0.prog.IRinfo.info[0].cFlag.lastWrite = 0xff; } - if (mode & 0x10) - { // Update Status/Mac Flags - microVU0.prog.IRinfo.info[0].sFlag.doFlag = true; + if (mode & 0x10) // Update Status/Mac Flags + { + microVU0.prog.IRinfo.info[0].sFlag.doFlag = true; microVU0.prog.IRinfo.info[0].sFlag.doNonSticky = true; - microVU0.prog.IRinfo.info[0].sFlag.write = 0; - microVU0.prog.IRinfo.info[0].sFlag.lastWrite = 0; - microVU0.prog.IRinfo.info[0].mFlag.doFlag = true; - microVU0.prog.IRinfo.info[0].mFlag.write = 0xff; + microVU0.prog.IRinfo.info[0].sFlag.write = 0; + microVU0.prog.IRinfo.info[0].sFlag.lastWrite = 0; + microVU0.prog.IRinfo.info[0].mFlag.doFlag = true; + microVU0.prog.IRinfo.info[0].mFlag.write = 0xff; //Denormalize mVUallocSFLAGd(&vu0Regs.VI[REG_STATUS_FLAG].UL); @@ -64,20 +64,20 @@ void setupMacroOp(int mode, const char* opName) void endMacroOp(int mode) { - if (mode & 0x02) - { // Q-Reg was Written To + if (mode & 0x02) // Q-Reg was Written To + { xMOVSS(ptr32[&vu0Regs.VI[REG_Q].UL], xmmPQ); } - if (mode & 0x10) - { // Status/Mac Flags were Updated + if (mode & 0x10) // Status/Mac Flags were Updated + { // Normalize mVUallocSFLAGc(eax, gprF0, 0); xMOV(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], eax); } microVU0.regAlloc->flushAll(); - if (mode & 0x10) - { // Update VU0 Status/Mac instances after flush to avoid corrupting anything + if (mode & 0x10) // Update VU0 Status/Mac instances after flush to avoid corrupting anything + { mVUallocSFLAGd(&vu0Regs.VI[REG_STATUS_FLAG].UL); xMOVDZX(xmmT1, eax); xSHUF.PS(xmmT1, xmmT1, 0); @@ -124,91 +124,91 @@ void endMacroOp(int mode) // Macro VU - Redirect Upper Instructions //------------------------------------------------------------------ -REC_COP2_mVU0(ABS, "ABS", 0x00); -REC_COP2_mVU0(ITOF0, "ITOF0", 0x00); -REC_COP2_mVU0(ITOF4, "ITOF4", 0x00); +REC_COP2_mVU0(ABS, "ABS", 0x00); +REC_COP2_mVU0(ITOF0, "ITOF0", 0x00); +REC_COP2_mVU0(ITOF4, "ITOF4", 0x00); REC_COP2_mVU0(ITOF12, "ITOF12", 0x00); REC_COP2_mVU0(ITOF15, "ITOF15", 0x00); -REC_COP2_mVU0(FTOI0, "FTOI0", 0x00); -REC_COP2_mVU0(FTOI4, "FTOI4", 0x00); +REC_COP2_mVU0(FTOI0, "FTOI0", 0x00); +REC_COP2_mVU0(FTOI4, "FTOI4", 0x00); REC_COP2_mVU0(FTOI12, "FTOI12", 0x00); REC_COP2_mVU0(FTOI15, "FTOI15", 0x00); -REC_COP2_mVU0(ADD, "ADD", 0x10); -REC_COP2_mVU0(ADDi, "ADDi", 0x10); -REC_COP2_mVU0(ADDq, "ADDq", 0x11); -REC_COP2_mVU0(ADDx, "ADDx", 0x10); -REC_COP2_mVU0(ADDy, "ADDy", 0x10); -REC_COP2_mVU0(ADDz, "ADDz", 0x10); -REC_COP2_mVU0(ADDw, "ADDw", 0x10); -REC_COP2_mVU0(ADDA, "ADDA", 0x10); -REC_COP2_mVU0(ADDAi, "ADDAi", 0x10); -REC_COP2_mVU0(ADDAq, "ADDAq", 0x11); -REC_COP2_mVU0(ADDAx, "ADDAx", 0x10); -REC_COP2_mVU0(ADDAy, "ADDAy", 0x10); -REC_COP2_mVU0(ADDAz, "ADDAz", 0x10); -REC_COP2_mVU0(ADDAw, "ADDAw", 0x10); -REC_COP2_mVU0(SUB, "SUB", 0x10); -REC_COP2_mVU0(SUBi, "SUBi", 0x10); -REC_COP2_mVU0(SUBq, "SUBq", 0x11); -REC_COP2_mVU0(SUBx, "SUBx", 0x10); -REC_COP2_mVU0(SUBy, "SUBy", 0x10); -REC_COP2_mVU0(SUBz, "SUBz", 0x10); -REC_COP2_mVU0(SUBw, "SUBw", 0x10); -REC_COP2_mVU0(SUBA, "SUBA", 0x10); -REC_COP2_mVU0(SUBAi, "SUBAi", 0x10); -REC_COP2_mVU0(SUBAq, "SUBAq", 0x11); -REC_COP2_mVU0(SUBAx, "SUBAx", 0x10); -REC_COP2_mVU0(SUBAy, "SUBAy", 0x10); -REC_COP2_mVU0(SUBAz, "SUBAz", 0x10); -REC_COP2_mVU0(SUBAw, "SUBAw", 0x10); -REC_COP2_mVU0(MUL, "MUL", 0x10); -REC_COP2_mVU0(MULi, "MULi", 0x10); -REC_COP2_mVU0(MULq, "MULq", 0x11); -REC_COP2_mVU0(MULx, "MULx", 0x10); -REC_COP2_mVU0(MULy, "MULy", 0x10); -REC_COP2_mVU0(MULz, "MULz", 0x10); -REC_COP2_mVU0(MULw, "MULw", 0x10); -REC_COP2_mVU0(MULA, "MULA", 0x10); -REC_COP2_mVU0(MULAi, "MULAi", 0x10); -REC_COP2_mVU0(MULAq, "MULAq", 0x11); -REC_COP2_mVU0(MULAx, "MULAx", 0x10); -REC_COP2_mVU0(MULAy, "MULAy", 0x10); -REC_COP2_mVU0(MULAz, "MULAz", 0x10); -REC_COP2_mVU0(MULAw, "MULAw", 0x10); -REC_COP2_mVU0(MAX, "MAX", 0x00); -REC_COP2_mVU0(MAXi, "MAXi", 0x00); -REC_COP2_mVU0(MAXx, "MAXx", 0x00); -REC_COP2_mVU0(MAXy, "MAXy", 0x00); -REC_COP2_mVU0(MAXz, "MAXz", 0x00); -REC_COP2_mVU0(MAXw, "MAXw", 0x00); -REC_COP2_mVU0(MINI, "MINI", 0x00); -REC_COP2_mVU0(MINIi, "MINIi", 0x00); -REC_COP2_mVU0(MINIx, "MINIx", 0x00); -REC_COP2_mVU0(MINIy, "MINIy", 0x00); -REC_COP2_mVU0(MINIz, "MINIz", 0x00); -REC_COP2_mVU0(MINIw, "MINIw", 0x00); -REC_COP2_mVU0(MADD, "MADD", 0x10); -REC_COP2_mVU0(MADDi, "MADDi", 0x10); -REC_COP2_mVU0(MADDq, "MADDq", 0x11); -REC_COP2_mVU0(MADDx, "MADDx", 0x10); -REC_COP2_mVU0(MADDy, "MADDy", 0x10); -REC_COP2_mVU0(MADDz, "MADDz", 0x10); -REC_COP2_mVU0(MADDw, "MADDw", 0x10); -REC_COP2_mVU0(MADDA, "MADDA", 0x10); +REC_COP2_mVU0(ADD, "ADD", 0x10); +REC_COP2_mVU0(ADDi, "ADDi", 0x10); +REC_COP2_mVU0(ADDq, "ADDq", 0x11); +REC_COP2_mVU0(ADDx, "ADDx", 0x10); +REC_COP2_mVU0(ADDy, "ADDy", 0x10); +REC_COP2_mVU0(ADDz, "ADDz", 0x10); +REC_COP2_mVU0(ADDw, "ADDw", 0x10); +REC_COP2_mVU0(ADDA, "ADDA", 0x10); +REC_COP2_mVU0(ADDAi, "ADDAi", 0x10); +REC_COP2_mVU0(ADDAq, "ADDAq", 0x11); +REC_COP2_mVU0(ADDAx, "ADDAx", 0x10); +REC_COP2_mVU0(ADDAy, "ADDAy", 0x10); +REC_COP2_mVU0(ADDAz, "ADDAz", 0x10); +REC_COP2_mVU0(ADDAw, "ADDAw", 0x10); +REC_COP2_mVU0(SUB, "SUB", 0x10); +REC_COP2_mVU0(SUBi, "SUBi", 0x10); +REC_COP2_mVU0(SUBq, "SUBq", 0x11); +REC_COP2_mVU0(SUBx, "SUBx", 0x10); +REC_COP2_mVU0(SUBy, "SUBy", 0x10); +REC_COP2_mVU0(SUBz, "SUBz", 0x10); +REC_COP2_mVU0(SUBw, "SUBw", 0x10); +REC_COP2_mVU0(SUBA, "SUBA", 0x10); +REC_COP2_mVU0(SUBAi, "SUBAi", 0x10); +REC_COP2_mVU0(SUBAq, "SUBAq", 0x11); +REC_COP2_mVU0(SUBAx, "SUBAx", 0x10); +REC_COP2_mVU0(SUBAy, "SUBAy", 0x10); +REC_COP2_mVU0(SUBAz, "SUBAz", 0x10); +REC_COP2_mVU0(SUBAw, "SUBAw", 0x10); +REC_COP2_mVU0(MUL, "MUL", 0x10); +REC_COP2_mVU0(MULi, "MULi", 0x10); +REC_COP2_mVU0(MULq, "MULq", 0x11); +REC_COP2_mVU0(MULx, "MULx", 0x10); +REC_COP2_mVU0(MULy, "MULy", 0x10); +REC_COP2_mVU0(MULz, "MULz", 0x10); +REC_COP2_mVU0(MULw, "MULw", 0x10); +REC_COP2_mVU0(MULA, "MULA", 0x10); +REC_COP2_mVU0(MULAi, "MULAi", 0x10); +REC_COP2_mVU0(MULAq, "MULAq", 0x11); +REC_COP2_mVU0(MULAx, "MULAx", 0x10); +REC_COP2_mVU0(MULAy, "MULAy", 0x10); +REC_COP2_mVU0(MULAz, "MULAz", 0x10); +REC_COP2_mVU0(MULAw, "MULAw", 0x10); +REC_COP2_mVU0(MAX, "MAX", 0x00); +REC_COP2_mVU0(MAXi, "MAXi", 0x00); +REC_COP2_mVU0(MAXx, "MAXx", 0x00); +REC_COP2_mVU0(MAXy, "MAXy", 0x00); +REC_COP2_mVU0(MAXz, "MAXz", 0x00); +REC_COP2_mVU0(MAXw, "MAXw", 0x00); +REC_COP2_mVU0(MINI, "MINI", 0x00); +REC_COP2_mVU0(MINIi, "MINIi", 0x00); +REC_COP2_mVU0(MINIx, "MINIx", 0x00); +REC_COP2_mVU0(MINIy, "MINIy", 0x00); +REC_COP2_mVU0(MINIz, "MINIz", 0x00); +REC_COP2_mVU0(MINIw, "MINIw", 0x00); +REC_COP2_mVU0(MADD, "MADD", 0x10); +REC_COP2_mVU0(MADDi, "MADDi", 0x10); +REC_COP2_mVU0(MADDq, "MADDq", 0x11); +REC_COP2_mVU0(MADDx, "MADDx", 0x10); +REC_COP2_mVU0(MADDy, "MADDy", 0x10); +REC_COP2_mVU0(MADDz, "MADDz", 0x10); +REC_COP2_mVU0(MADDw, "MADDw", 0x10); +REC_COP2_mVU0(MADDA, "MADDA", 0x10); REC_COP2_mVU0(MADDAi, "MADDAi", 0x10); REC_COP2_mVU0(MADDAq, "MADDAq", 0x11); REC_COP2_mVU0(MADDAx, "MADDAx", 0x10); REC_COP2_mVU0(MADDAy, "MADDAy", 0x10); REC_COP2_mVU0(MADDAz, "MADDAz", 0x10); REC_COP2_mVU0(MADDAw, "MADDAw", 0x10); -REC_COP2_mVU0(MSUB, "MSUB", 0x10); -REC_COP2_mVU0(MSUBi, "MSUBi", 0x10); -REC_COP2_mVU0(MSUBq, "MSUBq", 0x11); -REC_COP2_mVU0(MSUBx, "MSUBx", 0x10); -REC_COP2_mVU0(MSUBy, "MSUBy", 0x10); -REC_COP2_mVU0(MSUBz, "MSUBz", 0x10); -REC_COP2_mVU0(MSUBw, "MSUBw", 0x10); -REC_COP2_mVU0(MSUBA, "MSUBA", 0x10); +REC_COP2_mVU0(MSUB, "MSUB", 0x10); +REC_COP2_mVU0(MSUBi, "MSUBi", 0x10); +REC_COP2_mVU0(MSUBq, "MSUBq", 0x11); +REC_COP2_mVU0(MSUBx, "MSUBx", 0x10); +REC_COP2_mVU0(MSUBy, "MSUBy", 0x10); +REC_COP2_mVU0(MSUBz, "MSUBz", 0x10); +REC_COP2_mVU0(MSUBw, "MSUBw", 0x10); +REC_COP2_mVU0(MSUBA, "MSUBA", 0x10); REC_COP2_mVU0(MSUBAi, "MSUBAi", 0x10); REC_COP2_mVU0(MSUBAq, "MSUBAq", 0x11); REC_COP2_mVU0(MSUBAx, "MSUBAx", 0x10); @@ -217,34 +217,34 @@ REC_COP2_mVU0(MSUBAz, "MSUBAz", 0x10); REC_COP2_mVU0(MSUBAw, "MSUBAw", 0x10); REC_COP2_mVU0(OPMULA, "OPMULA", 0x10); REC_COP2_mVU0(OPMSUB, "OPMSUB", 0x10); -REC_COP2_mVU0(CLIP, "CLIP", 0x08); +REC_COP2_mVU0(CLIP, "CLIP", 0x08); //------------------------------------------------------------------ // Macro VU - Redirect Lower Instructions //------------------------------------------------------------------ -REC_COP2_mVU0(DIV, "DIV", 0x12); -REC_COP2_mVU0(SQRT, "SQRT", 0x12); +REC_COP2_mVU0(DIV, "DIV", 0x12); +REC_COP2_mVU0(SQRT, "SQRT", 0x12); REC_COP2_mVU0(RSQRT, "RSQRT", 0x12); -REC_COP2_mVU0(IADD, "IADD", 0x04); +REC_COP2_mVU0(IADD, "IADD", 0x04); REC_COP2_mVU0(IADDI, "IADDI", 0x04); -REC_COP2_mVU0(IAND, "IAND", 0x04); -REC_COP2_mVU0(IOR, "IOR", 0x04); -REC_COP2_mVU0(ISUB, "ISUB", 0x04); -REC_COP2_mVU0(ILWR, "ILWR", 0x04); -REC_COP2_mVU0(ISWR, "ISWR", 0x00); -REC_COP2_mVU0(LQI, "LQI", 0x04); -REC_COP2_mVU0(LQD, "LQD", 0x04); -REC_COP2_mVU0(SQI, "SQI", 0x00); -REC_COP2_mVU0(SQD, "SQD", 0x00); -REC_COP2_mVU0(MFIR, "MFIR", 0x04); -REC_COP2_mVU0(MTIR, "MTIR", 0x04); -REC_COP2_mVU0(MOVE, "MOVE", 0x00); -REC_COP2_mVU0(MR32, "MR32", 0x00); +REC_COP2_mVU0(IAND, "IAND", 0x04); +REC_COP2_mVU0(IOR, "IOR", 0x04); +REC_COP2_mVU0(ISUB, "ISUB", 0x04); +REC_COP2_mVU0(ILWR, "ILWR", 0x04); +REC_COP2_mVU0(ISWR, "ISWR", 0x00); +REC_COP2_mVU0(LQI, "LQI", 0x04); +REC_COP2_mVU0(LQD, "LQD", 0x04); +REC_COP2_mVU0(SQI, "SQI", 0x00); +REC_COP2_mVU0(SQD, "SQD", 0x00); +REC_COP2_mVU0(MFIR, "MFIR", 0x04); +REC_COP2_mVU0(MTIR, "MTIR", 0x04); +REC_COP2_mVU0(MOVE, "MOVE", 0x00); +REC_COP2_mVU0(MR32, "MR32", 0x00); REC_COP2_mVU0(RINIT, "RINIT", 0x00); -REC_COP2_mVU0(RGET, "RGET", 0x04); +REC_COP2_mVU0(RGET, "RGET", 0x04); REC_COP2_mVU0(RNEXT, "RNEXT", 0x04); -REC_COP2_mVU0(RXOR, "RXOR", 0x00); +REC_COP2_mVU0(RXOR, "RXOR", 0x00); //------------------------------------------------------------------ // Macro VU - Misc... @@ -268,10 +268,10 @@ void _setupBranchTest(u32*(jmpType)(u32), bool isLikely) recDoBranchImm(jmpType(0), isLikely); } -void recBC2F() { _setupBranchTest(JNZ32, false); } -void recBC2T() { _setupBranchTest(JZ32, false); } -void recBC2FL() { _setupBranchTest(JNZ32, true); } -void recBC2TL() { _setupBranchTest(JZ32, true); } +void recBC2F() { _setupBranchTest(JNZ32, false); } +void recBC2T() { _setupBranchTest(JZ32, false); } +void recBC2FL() { _setupBranchTest(JNZ32, true); } +void recBC2TL() { _setupBranchTest(JZ32, true); } //------------------------------------------------------------------ // Macro VU - COP2 Transfer Instructions @@ -302,8 +302,8 @@ void TEST_FBRST_RESET(FnType_Void* resetFunct, int vuIndex) { xTEST(eax, (vuIndex) ? 0x200 : 0x002); xForwardJZ8 skip; - xFastCall((void*)resetFunct); - xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); + xFastCall((void*)resetFunct); + xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); skip.SetTarget(); } @@ -335,15 +335,13 @@ static void recCFC2() skipvuidle.SetTarget(); } - if (_Rd_ == REG_STATUS_FLAG) - { // Normalize Status Flag + if (_Rd_ == REG_STATUS_FLAG) // Normalize Status Flag xMOV(eax, ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL]); - } else xMOV(eax, ptr32[&vu0Regs.VI[_Rd_].UL]); - if (_Rd_ == REG_TPC) - { // Divide TPC register value by 8 during copying + if (_Rd_ == REG_TPC) // Divide TPC register value by 8 during copying + { // Ok, this deserves an explanation. // Accoring to the official PS2 VU0 coding manual there are 3 ways to execute a micro subroutine on VU0 // one of which is using the VCALLMSR intruction. @@ -571,282 +569,55 @@ void _vuRegsCOP22(VURegs* VU, _VURegsNum* VUregsn) {} // Recompilation void (*recCOP2t[32])() = { - rec_C2UNK, - recQMFC2, - recCFC2, - rec_C2UNK, - rec_C2UNK, - recQMTC2, - recCTC2, - rec_C2UNK, - recCOP2_BC2, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - recCOP2_SPEC1, - recCOP2_SPEC1, - recCOP2_SPEC1, - recCOP2_SPEC1, - recCOP2_SPEC1, - recCOP2_SPEC1, - recCOP2_SPEC1, - recCOP2_SPEC1, - recCOP2_SPEC1, - recCOP2_SPEC1, - recCOP2_SPEC1, - recCOP2_SPEC1, - recCOP2_SPEC1, - recCOP2_SPEC1, - recCOP2_SPEC1, - recCOP2_SPEC1, + rec_C2UNK, recQMFC2, recCFC2, rec_C2UNK, rec_C2UNK, recQMTC2, recCTC2, rec_C2UNK, + recCOP2_BC2, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, + recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, + recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, recCOP2_SPEC1, }; void (*recCOP2_BC2t[32])() = { - recBC2F, - recBC2T, - recBC2FL, - recBC2TL, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, + recBC2F, recBC2T, recBC2FL, recBC2TL, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, + rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, + rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, + rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, }; void (*recCOP2SPECIAL1t[64])() = { - recVADDx, - recVADDy, - recVADDz, - recVADDw, - recVSUBx, - recVSUBy, - recVSUBz, - recVSUBw, - recVMADDx, - recVMADDy, - recVMADDz, - recVMADDw, - recVMSUBx, - recVMSUBy, - recVMSUBz, - recVMSUBw, - recVMAXx, - recVMAXy, - recVMAXz, - recVMAXw, - recVMINIx, - recVMINIy, - recVMINIz, - recVMINIw, - recVMULx, - recVMULy, - recVMULz, - recVMULw, - recVMULq, - recVMAXi, - recVMULi, - recVMINIi, - recVADDq, - recVMADDq, - recVADDi, - recVMADDi, - recVSUBq, - recVMSUBq, - recVSUBi, - recVMSUBi, - recVADD, - recVMADD, - recVMUL, - recVMAX, - recVSUB, - recVMSUB, - recVOPMSUB, - recVMINI, - recVIADD, - recVISUB, - recVIADDI, - rec_C2UNK, - recVIAND, - recVIOR, - rec_C2UNK, - rec_C2UNK, - recVCALLMS, - recVCALLMSR, - rec_C2UNK, - rec_C2UNK, - recCOP2_SPEC2, - recCOP2_SPEC2, - recCOP2_SPEC2, - recCOP2_SPEC2, + recVADDx, recVADDy, recVADDz, recVADDw, recVSUBx, recVSUBy, recVSUBz, recVSUBw, + recVMADDx, recVMADDy, recVMADDz, recVMADDw, recVMSUBx, recVMSUBy, recVMSUBz, recVMSUBw, + recVMAXx, recVMAXy, recVMAXz, recVMAXw, recVMINIx, recVMINIy, recVMINIz, recVMINIw, + recVMULx, recVMULy, recVMULz, recVMULw, recVMULq, recVMAXi, recVMULi, recVMINIi, + recVADDq, recVMADDq, recVADDi, recVMADDi, recVSUBq, recVMSUBq, recVSUBi, recVMSUBi, + recVADD, recVMADD, recVMUL, recVMAX, recVSUB, recVMSUB, recVOPMSUB, recVMINI, + recVIADD, recVISUB, recVIADDI, rec_C2UNK, recVIAND, recVIOR, rec_C2UNK, rec_C2UNK, + recVCALLMS, recVCALLMSR,rec_C2UNK, rec_C2UNK, recCOP2_SPEC2, recCOP2_SPEC2, recCOP2_SPEC2, recCOP2_SPEC2, }; void (*recCOP2SPECIAL2t[128])() = { - recVADDAx, - recVADDAy, - recVADDAz, - recVADDAw, - recVSUBAx, - recVSUBAy, - recVSUBAz, - recVSUBAw, - recVMADDAx, - recVMADDAy, - recVMADDAz, - recVMADDAw, - recVMSUBAx, - recVMSUBAy, - recVMSUBAz, - recVMSUBAw, - recVITOF0, - recVITOF4, - recVITOF12, - recVITOF15, - recVFTOI0, - recVFTOI4, - recVFTOI12, - recVFTOI15, - recVMULAx, - recVMULAy, - recVMULAz, - recVMULAw, - recVMULAq, - recVABS, - recVMULAi, - recVCLIP, - recVADDAq, - recVMADDAq, - recVADDAi, - recVMADDAi, - recVSUBAq, - recVMSUBAq, - recVSUBAi, - recVMSUBAi, - recVADDA, - recVMADDA, - recVMULA, - rec_C2UNK, - recVSUBA, - recVMSUBA, - recVOPMULA, - recVNOP, - recVMOVE, - recVMR32, - rec_C2UNK, - rec_C2UNK, - recVLQI, - recVSQI, - recVLQD, - recVSQD, - recVDIV, - recVSQRT, - recVRSQRT, - recVWAITQ, - recVMTIR, - recVMFIR, - recVILWR, - recVISWR, - recVRNEXT, - recVRGET, - recVRINIT, - recVRXOR, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, - rec_C2UNK, + recVADDAx, recVADDAy, recVADDAz, recVADDAw, recVSUBAx, recVSUBAy, recVSUBAz, recVSUBAw, + recVMADDAx,recVMADDAy, recVMADDAz, recVMADDAw, recVMSUBAx, recVMSUBAy, recVMSUBAz, recVMSUBAw, + recVITOF0, recVITOF4, recVITOF12, recVITOF15, recVFTOI0, recVFTOI4, recVFTOI12, recVFTOI15, + recVMULAx, recVMULAy, recVMULAz, recVMULAw, recVMULAq, recVABS, recVMULAi, recVCLIP, + recVADDAq, recVMADDAq,recVADDAi, recVMADDAi, recVSUBAq, recVMSUBAq, recVSUBAi, recVMSUBAi, + recVADDA, recVMADDA, recVMULA, rec_C2UNK, recVSUBA, recVMSUBA, recVOPMULA, recVNOP, + recVMOVE, recVMR32, rec_C2UNK, rec_C2UNK, recVLQI, recVSQI, recVLQD, recVSQD, + recVDIV, recVSQRT, recVRSQRT, recVWAITQ, recVMTIR, recVMFIR, recVILWR, recVISWR, + recVRNEXT, recVRGET, recVRINIT, recVRXOR, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, + rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, + rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, + rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, + rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, + rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, + rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, + rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, rec_C2UNK, }; -namespace R5900 -{ - namespace Dynarec - { - namespace OpcodeImpl - { - void recCOP2() { recCOP2t[_Rs_](); } - } // namespace OpcodeImpl - } // namespace Dynarec +namespace R5900 { +namespace Dynarec { +namespace OpcodeImpl { + void recCOP2() { recCOP2t[_Rs_](); } +} // namespace OpcodeImpl +} // namespace Dynarec } // namespace R5900 void recCOP2_BC2() { recCOP2_BC2t[_Rt_](); } void recCOP2_SPEC1() diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 9f730e0389..78e508a87b 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -26,48 +26,49 @@ struct microVU; // Global Variables //------------------------------------------------------------------ -struct mVU_Globals { - u32 absclip[4], signbit[4], minvals[4], maxvals[4]; - u32 one[4]; - u32 Pi4[4]; - u32 T1[4], T2[4], T3[4], T4[4], T5[4], T6[4], T7[4], T8[4]; - u32 S2[4], S3[4], S4[4], S5[4]; - u32 E1[4], E2[4], E3[4], E4[4], E5[4], E6[4]; - float FTOI_4[4], FTOI_12[4], FTOI_15[4]; - float ITOF_4[4], ITOF_12[4], ITOF_15[4]; +struct mVU_Globals +{ + u32 absclip[4], signbit[4], minvals[4], maxvals[4]; + u32 one[4]; + u32 Pi4[4]; + u32 T1[4], T2[4], T3[4], T4[4], T5[4], T6[4], T7[4], T8[4]; + u32 S2[4], S3[4], S4[4], S5[4]; + u32 E1[4], E2[4], E3[4], E4[4], E5[4], E6[4]; + float FTOI_4[4], FTOI_12[4], FTOI_15[4]; + float ITOF_4[4], ITOF_12[4], ITOF_15[4]; }; -#define __four(val) { val, val, val, val } +#define __four(val) { val, val, val, val } static const __aligned(32) mVU_Globals mVUglob = { - __four(0x7fffffff), // absclip - __four(0x80000000), // signbit - __four(0xff7fffff), // minvals - __four(0x7f7fffff), // maxvals - __four(0x3f800000), // ONE! - __four(0x3f490fdb), // PI4! - __four(0x3f7ffff5), // T1 - __four(0xbeaaa61c), // T5 - __four(0x3e4c40a6), // T2 - __four(0xbe0e6c63), // T3 - __four(0x3dc577df), // T4 - __four(0xbd6501c4), // T6 - __four(0x3cb31652), // T7 - __four(0xbb84d7e7), // T8 - __four(0xbe2aaaa4), // S2 - __four(0x3c08873e), // S3 - __four(0xb94fb21f), // S4 - __four(0x362e9c14), // S5 - __four(0x3e7fffa8), // E1 - __four(0x3d0007f4), // E2 - __four(0x3b29d3ff), // E3 - __four(0x3933e553), // E4 - __four(0x36b63510), // E5 - __four(0x353961ac), // E6 - __four(16.0), // FTOI_4 - __four(4096.0), // FTOI_12 - __four(32768.0), // FTOI_15 - __four(0.0625f), // ITOF_4 - __four(0.000244140625), // ITOF_12 + __four(0x7fffffff), // absclip + __four(0x80000000), // signbit + __four(0xff7fffff), // minvals + __four(0x7f7fffff), // maxvals + __four(0x3f800000), // ONE! + __four(0x3f490fdb), // PI4! + __four(0x3f7ffff5), // T1 + __four(0xbeaaa61c), // T5 + __four(0x3e4c40a6), // T2 + __four(0xbe0e6c63), // T3 + __four(0x3dc577df), // T4 + __four(0xbd6501c4), // T6 + __four(0x3cb31652), // T7 + __four(0xbb84d7e7), // T8 + __four(0xbe2aaaa4), // S2 + __four(0x3c08873e), // S3 + __four(0xb94fb21f), // S4 + __four(0x362e9c14), // S5 + __four(0x3e7fffa8), // E1 + __four(0x3d0007f4), // E2 + __four(0x3b29d3ff), // E3 + __four(0x3933e553), // E4 + __four(0x36b63510), // E5 + __four(0x353961ac), // E6 + __four(16.0), // FTOI_4 + __four(4096.0), // FTOI_12 + __four(32768.0), // FTOI_15 + __four(0.0625f), // ITOF_4 + __four(0.000244140625), // ITOF_12 __four(0.000030517578125) // ITOF_15 }; @@ -91,47 +92,47 @@ static const char branchSTR[16][8] = { // Helper Macros //------------------------------------------------------------------ -#define _Ft_ ((mVU.code >> 16) & 0x1F) // The ft part of the instruction register -#define _Fs_ ((mVU.code >> 11) & 0x1F) // The fs part of the instruction register -#define _Fd_ ((mVU.code >> 6) & 0x1F) // The fd part of the instruction register +#define _Ft_ ((mVU.code >> 16) & 0x1F) // The ft part of the instruction register +#define _Fs_ ((mVU.code >> 11) & 0x1F) // The fs part of the instruction register +#define _Fd_ ((mVU.code >> 6) & 0x1F) // The fd part of the instruction register -#define _It_ ((mVU.code >> 16) & 0xF) // The it part of the instruction register -#define _Is_ ((mVU.code >> 11) & 0xF) // The is part of the instruction register -#define _Id_ ((mVU.code >> 6) & 0xF) // The id part of the instruction register +#define _It_ ((mVU.code >> 16) & 0xF) // The it part of the instruction register +#define _Is_ ((mVU.code >> 11) & 0xF) // The is part of the instruction register +#define _Id_ ((mVU.code >> 6) & 0xF) // The id part of the instruction register -#define _X ((mVU.code>>24) & 0x1) -#define _Y ((mVU.code>>23) & 0x1) -#define _Z ((mVU.code>>22) & 0x1) -#define _W ((mVU.code>>21) & 0x1) +#define _X ((mVU.code >> 24) & 0x1) +#define _Y ((mVU.code >> 23) & 0x1) +#define _Z ((mVU.code >> 22) & 0x1) +#define _W ((mVU.code >> 21) & 0x1) -#define _X_Y_Z_W (((mVU.code >> 21 ) & 0xF)) -#define _XYZW_SS (_X+_Y+_Z+_W==1) -#define _XYZW_SS2 (_XYZW_SS && (_X_Y_Z_W != 8)) -#define _XYZW_PS (_X_Y_Z_W == 0xf) -#define _XYZWss(x) ((x==8) || (x==4) || (x==2) || (x==1)) +#define _X_Y_Z_W (((mVU.code >> 21) & 0xF)) +#define _XYZW_SS (_X + _Y + _Z + _W == 1) +#define _XYZW_SS2 (_XYZW_SS && (_X_Y_Z_W != 8)) +#define _XYZW_PS (_X_Y_Z_W == 0xf) +#define _XYZWss(x) ((x == 8) || (x == 4) || (x == 2) || (x == 1)) -#define _bc_ (mVU.code & 0x3) -#define _bc_x ((mVU.code & 0x3) == 0) -#define _bc_y ((mVU.code & 0x3) == 1) -#define _bc_z ((mVU.code & 0x3) == 2) -#define _bc_w ((mVU.code & 0x3) == 3) +#define _bc_ (mVU.code & 0x3) +#define _bc_x ((mVU.code & 0x3) == 0) +#define _bc_y ((mVU.code & 0x3) == 1) +#define _bc_z ((mVU.code & 0x3) == 2) +#define _bc_w ((mVU.code & 0x3) == 3) -#define _Fsf_ ((mVU.code >> 21) & 0x03) -#define _Ftf_ ((mVU.code >> 23) & 0x03) +#define _Fsf_ ((mVU.code >> 21) & 0x03) +#define _Ftf_ ((mVU.code >> 23) & 0x03) -#define _Imm5_ ((s16) (((mVU.code & 0x400) ? 0xfff0 : 0) | ((mVU.code >> 6) & 0xf))) -#define _Imm11_ ((s32) ((mVU.code & 0x400) ? (0xfffffc00 | (mVU.code & 0x3ff)) : (mVU.code & 0x3ff))) -#define _Imm12_ ((u32)((((mVU.code >> 21) & 0x1) << 11) | (mVU.code & 0x7ff))) -#define _Imm15_ ((u32) (((mVU.code >> 10) & 0x7800) | (mVU.code & 0x7ff))) -#define _Imm24_ ((u32) (mVU.code & 0xffffff)) +#define _Imm5_ ((s16) (((mVU.code & 0x400) ? 0xfff0 : 0) | ((mVU.code >> 6) & 0xf))) +#define _Imm11_ ((s32) ((mVU.code & 0x400) ? (0xfffffc00 | (mVU.code & 0x3ff)) : (mVU.code & 0x3ff))) +#define _Imm12_ ((u32)((((mVU.code >> 21) & 0x1) << 11) | (mVU.code & 0x7ff))) +#define _Imm15_ ((u32) (((mVU.code >> 10) & 0x7800) | (mVU.code & 0x7ff))) +#define _Imm24_ ((u32) (mVU.code & 0xffffff)) -#define isCOP2 (mVU.cop2 != 0) -#define isVU1 (mVU.index != 0) -#define isVU0 (mVU.index == 0) -#define getIndex (isVU1 ? 1 : 0) -#define getVUmem(x) (((isVU1) ? (x & 0x3ff) : ((x >= 0x400) ? (x & 0x43f) : (x & 0xff))) * 16) -#define offsetSS ((_X) ? (0) : ((_Y) ? (4) : ((_Z) ? 8: 12))) -#define offsetReg ((_X) ? (0) : ((_Y) ? (1) : ((_Z) ? 2: 3))) +#define isCOP2 (mVU.cop2 != 0) +#define isVU1 (mVU.index != 0) +#define isVU0 (mVU.index == 0) +#define getIndex (isVU1 ? 1 : 0) +#define getVUmem(x) (((isVU1) ? (x & 0x3ff) : ((x >= 0x400) ? (x & 0x43f) : (x & 0xff))) * 16) +#define offsetSS ((_X) ? (0) : ((_Y) ? (4) : ((_Z) ? 8 : 12))) +#define offsetReg ((_X) ? (0) : ((_Y) ? (1) : ((_Z) ? 2 : 3))) #define xmmT1 xmm0 // Used for regAlloc #define xmmT2 xmm1 // Used for regAlloc @@ -174,9 +175,9 @@ typedef void __fastcall Fntype_mVUrecInst(microVU& mVU, int recPass); typedef Fntype_mVUrecInst* Fnptr_mVUrecInst; // Function/Template Stuff -#define mVUx (vuIndex ? microVU1 : microVU0) -#define mVUop(opName) static void __fastcall opName (mP) -#define _mVUt template +#define mVUx (vuIndex ? microVU1 : microVU0) +#define mVUop(opName) static void __fastcall opName(mP) +#define _mVUt template // Define Passes #define pass1 if (recPass == 0) // Analyze @@ -194,84 +195,89 @@ typedef Fntype_mVUrecInst* Fnptr_mVUrecInst; // Define mVUquickSearch //------------------------------------------------------------------ extern __pagealigned u8 mVUsearchXMM[__pagesize]; -typedef u32 (__fastcall *mVUCall)(void*, void*); +typedef u32(__fastcall* mVUCall)(void*, void*); #define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0xf) -#define mVUemitSearch() { mVUcustomSearch(); } +#define mVUemitSearch() \ + { \ + mVUcustomSearch(); \ + } //------------------------------------------------------------------ // Misc Macros... #define mVUcurProg mVU.prog.cur[0] -#define mVUblocks mVU.prog.cur->block -#define mVUir mVU.prog.IRinfo -#define mVUbranch mVU.prog.IRinfo.branch -#define mVUcycles mVU.prog.IRinfo.cycles -#define mVUcount mVU.prog.IRinfo.count -#define mVUpBlock mVU.prog.IRinfo.pBlock -#define mVUblock mVU.prog.IRinfo.block -#define mVUregs mVU.prog.IRinfo.block.pState -#define mVUregsTemp mVU.prog.IRinfo.regsTemp -#define iPC mVU.prog.IRinfo.curPC +#define mVUblocks mVU.prog.cur->block +#define mVUir mVU.prog.IRinfo +#define mVUbranch mVU.prog.IRinfo.branch +#define mVUcycles mVU.prog.IRinfo.cycles +#define mVUcount mVU.prog.IRinfo.count +#define mVUpBlock mVU.prog.IRinfo.pBlock +#define mVUblock mVU.prog.IRinfo.block +#define mVUregs mVU.prog.IRinfo.block.pState +#define mVUregsTemp mVU.prog.IRinfo.regsTemp +#define iPC mVU.prog.IRinfo.curPC #define mVUsFlagHack mVU.prog.IRinfo.sFlagHack -#define mVUconstReg mVU.prog.IRinfo.constReg -#define mVUstartPC mVU.prog.IRinfo.startPC -#define mVUinfo mVU.prog.IRinfo.info[iPC / 2] -#define mVUstall mVUinfo.stall -#define mVUup mVUinfo.uOp -#define mVUlow mVUinfo.lOp -#define sFLAG mVUinfo.sFlag -#define mFLAG mVUinfo.mFlag -#define cFLAG mVUinfo.cFlag +#define mVUconstReg mVU.prog.IRinfo.constReg +#define mVUstartPC mVU.prog.IRinfo.startPC +#define mVUinfo mVU.prog.IRinfo.info[iPC / 2] +#define mVUstall mVUinfo.stall +#define mVUup mVUinfo.uOp +#define mVUlow mVUinfo.lOp +#define sFLAG mVUinfo.sFlag +#define mFLAG mVUinfo.mFlag +#define cFLAG mVUinfo.cFlag #define mVUrange (mVUcurProg.ranges[0])[0] -#define isEvilBlock (mVUpBlock->pState.blockType == 2) +#define isEvilBlock (mVUpBlock->pState.blockType == 2) #define isBadOrEvil (mVUlow.badBranch || mVUlow.evilBranch) -#define xPC ((iPC / 2) * 8) -#define curI ((u32*)mVU.regs().Micro)[iPC] //mVUcurProg.data[iPC] -#define setCode() { mVU.code = curI; } -#define bSaveAddr (((xPC + 16) & (mVU.microMemSize-8)) / 8) -#define shufflePQ (((mVU.p) ? 0xb0 : 0xe0) | ((mVU.q) ? 0x01 : 0x04)) -#define Rmem &mVU.regs().VI[REG_R].UL -#define aWrap(x, m) ((x > m) ? 0 : x) -#define shuffleSS(x) ((x==1)?(0x27):((x==2)?(0xc6):((x==4)?(0xe1):(0xe4)))) +#define xPC ((iPC / 2) * 8) +#define curI ((u32*)mVU.regs().Micro)[iPC] //mVUcurProg.data[iPC] +#define setCode() { mVU.code = curI; } +#define bSaveAddr (((xPC + 16) & (mVU.microMemSize-8)) / 8) +#define shufflePQ (((mVU.p) ? 0xb0 : 0xe0) | ((mVU.q) ? 0x01 : 0x04)) +#define Rmem &mVU.regs().VI[REG_R].UL +#define aWrap(x, m) ((x > m) ? 0 : x) +#define shuffleSS(x) ((x == 1) ? (0x27) : ((x == 2) ? (0xc6) : ((x == 4) ? (0xe1) : (0xe4)))) #define clampE CHECK_VU_EXTRA_OVERFLOW #define varPrint(x) DevCon.WriteLn(#x " = %d", (int)x) #define islowerOP ((iPC & 1) == 0) -#define blockCreate(addr) { \ - if (!mVUblocks[addr]) mVUblocks[addr] = new microBlockManager(); \ -} +#define blockCreate(addr) \ + { \ + if (!mVUblocks[addr]) \ + mVUblocks[addr] = new microBlockManager(); \ + } // Fetches the PC and instruction opcode relative to the current PC. Used to rewind and // fast-forward the IR state while calculating VU pipeline conditions (branches, writebacks, etc) -#define incPC(x) { iPC = ((iPC + (x)) & mVU.progMemMask); mVU.code = curI; } -#define incPC2(x) { iPC = ((iPC + (x)) & mVU.progMemMask); } +#define incPC(x) { iPC = ((iPC + (x)) & mVU.progMemMask); mVU.code = curI; } +#define incPC2(x) { iPC = ((iPC + (x)) & mVU.progMemMask); } // Flag Info (Set if next-block's first 4 ops will read current-block's flags) -#define __Status (mVUregs.needExactMatch & 1) -#define __Mac (mVUregs.needExactMatch & 2) -#define __Clip (mVUregs.needExactMatch & 4) +#define __Status (mVUregs.needExactMatch & 1) +#define __Mac (mVUregs.needExactMatch & 2) +#define __Clip (mVUregs.needExactMatch & 4) // Pass 3 Helper Macros (Used for program logging) -#define _Fsf_String ((_Fsf_ == 3) ? "w" : ((_Fsf_ == 2) ? "z" : ((_Fsf_ == 1) ? "y" : "x"))) -#define _Ftf_String ((_Ftf_ == 3) ? "w" : ((_Ftf_ == 2) ? "z" : ((_Ftf_ == 1) ? "y" : "x"))) -#define xyzwStr(x,s) (_X_Y_Z_W == x) ? s : +#define _Fsf_String ((_Fsf_ == 3) ? "w" : ((_Fsf_ == 2) ? "z" : ((_Fsf_ == 1) ? "y" : "x"))) +#define _Ftf_String ((_Ftf_ == 3) ? "w" : ((_Ftf_ == 2) ? "z" : ((_Ftf_ == 1) ? "y" : "x"))) +#define xyzwStr(x, s) (_X_Y_Z_W == x) ? s: #define _XYZW_String (xyzwStr(1, "w") (xyzwStr(2, "z") (xyzwStr(3, "zw") (xyzwStr(4, "y") (xyzwStr(5, "yw") (xyzwStr(6, "yz") (xyzwStr(7, "yzw") (xyzwStr(8, "x") (xyzwStr(9, "xw") (xyzwStr(10, "xz") (xyzwStr(11, "xzw") (xyzwStr(12, "xy") (xyzwStr(13, "xyw") (xyzwStr(14, "xyz") "xyzw")))))))))))))) -#define _BC_String (_bc_x ? "x" : (_bc_y ? "y" : (_bc_z ? "z" : "w"))) +#define _BC_String (_bc_x ? "x" : (_bc_y ? "y" : (_bc_z ? "z" : "w"))) #define mVUlogFtFs() { mVUlog(".%s vf%02d, vf%02d", _XYZW_String, _Ft_, _Fs_); } -#define mVUlogFd() { mVUlog(".%s vf%02d, vf%02d", _XYZW_String, _Fd_, _Fs_); } -#define mVUlogACC() { mVUlog(".%s ACC, vf%02d", _XYZW_String, _Fs_); } -#define mVUlogFt() { mVUlog(", vf%02d", _Ft_); } -#define mVUlogBC() { mVUlog(", vf%02d%s", _Ft_, _BC_String); } -#define mVUlogI() { mVUlog(", I"); } -#define mVUlogQ() { mVUlog(", Q"); } +#define mVUlogFd() { mVUlog(".%s vf%02d, vf%02d", _XYZW_String, _Fd_, _Fs_); } +#define mVUlogACC() { mVUlog(".%s ACC, vf%02d", _XYZW_String, _Fs_); } +#define mVUlogFt() { mVUlog(", vf%02d", _Ft_); } +#define mVUlogBC() { mVUlog(", vf%02d%s", _Ft_, _BC_String); } +#define mVUlogI() { mVUlog(", I"); } +#define mVUlogQ() { mVUlog(", Q"); } #define mVUlogCLIP() { mVUlog("w.xyz vf%02d, vf%02dw", _Fs_, _Ft_); } // Program Logging... #ifdef mVUlogProg -#define mVUlog ((isVU1) ? __mVULog<1> : __mVULog<0>) -#define mVUdumpProg __mVUdumpProgram + #define mVUlog ((isVU1) ? __mVULog<1> : __mVULog<0>) + #define mVUdumpProg __mVUdumpProgram #else -#define mVUlog(...) if (0) {} -#define mVUdumpProg(...) if (0) {} + #define mVUlog(...) if (0) {} + #define mVUdumpProg(...) if (0) {} #endif //------------------------------------------------------------------ @@ -344,12 +350,12 @@ static const bool doDBitHandling = false; //------------------------------------------------------------------ // Status Flag Speed Hack -#define CHECK_VU_FLAGHACK (EmuConfig.Speedhacks.vuFlagHack) +#define CHECK_VU_FLAGHACK (EmuConfig.Speedhacks.vuFlagHack) // This hack only updates the Status Flag on blocks that will read it. // Most blocks do not read status flags, so this is a big speedup. // Min/Max Speed Hack -#define CHECK_VU_MINMAXHACK 0 //(EmuConfig.Speedhacks.vuMinMax) +#define CHECK_VU_MINMAXHACK 0 //(EmuConfig.Speedhacks.vuMinMax) // This hack uses SSE min/max instructions instead of emulated "logical min/max" // The PS2 does not consider denormals as zero on the mini/max opcodes. // This speedup is minor, but on AMD X2 CPUs it can be a 1~3% speedup @@ -365,6 +371,6 @@ static const bool doDBitHandling = false; //------------------------------------------------------------------ -extern void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW=false); +extern void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW = false); extern void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW); extern void mVUloadReg(const xmm& reg, xAddressVoid ptr, int xyzw); diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index d0cf7245b2..4df80d3ebe 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -21,7 +21,8 @@ void mVUunpack_xyzw(const xmm& dstreg, const xmm& srcreg, int xyzw) { - switch ( xyzw ) { + switch (xyzw) + { case 0: xPSHUF.D(dstreg, srcreg, 0x00); break; // XXXX case 1: xPSHUF.D(dstreg, srcreg, 0x55); break; // YYYY case 2: xPSHUF.D(dstreg, srcreg, 0xaa); break; // ZZZZ @@ -31,19 +32,21 @@ void mVUunpack_xyzw(const xmm& dstreg, const xmm& srcreg, int xyzw) void mVUloadReg(const xmm& reg, xAddressVoid ptr, int xyzw) { - switch( xyzw ) { - case 8: xMOVSSZX(reg, ptr32[ptr]); break; // X - case 4: xMOVSSZX(reg, ptr32[ptr+4]); break; // Y - case 2: xMOVSSZX(reg, ptr32[ptr+8]); break; // Z - case 1: xMOVSSZX(reg, ptr32[ptr+12]); break; // W - default: xMOVAPS (reg, ptr128[ptr]); break; + switch (xyzw) + { + case 8: xMOVSSZX(reg, ptr32[ptr ]); break; // X + case 4: xMOVSSZX(reg, ptr32[ptr + 4]); break; // Y + case 2: xMOVSSZX(reg, ptr32[ptr + 8]); break; // Z + case 1: xMOVSSZX(reg, ptr32[ptr + 12]); break; // W + default: xMOVAPS (reg, ptr128[ptr]); break; } } void mVUloadIreg(const xmm& reg, int xyzw, VURegs* vuRegs) { xMOVSSZX(reg, ptr32[&vuRegs->VI[REG_I].UL]); - if (!_XYZWss(xyzw)) xSHUF.PS(reg, reg, 0); + if (!_XYZWss(xyzw)) + xSHUF.PS(reg, reg, 0); } // Modifies the Source Reg! @@ -58,44 +61,67 @@ void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW) xMOVAPS(ptr128[ptr], xmmT2); return;*/ - switch ( xyzw ) { - case 5: xEXTRACTPS(ptr32[ptr+4], reg, 1); - xEXTRACTPS(ptr32[ptr+12], reg, 3); - break; // YW - case 6: xPSHUF.D(reg, reg, 0xc9); - xMOVL.PS(ptr64[ptr+4], reg); - break; // YZ - case 7: xMOVH.PS(ptr64[ptr+8], reg); - xEXTRACTPS(ptr32[ptr+4], reg, 1); - break; // YZW - case 9: xMOVSS(ptr32[ptr], reg); - xEXTRACTPS(ptr32[ptr+12], reg, 3); - break; // XW - case 10: xMOVSS(ptr32[ptr], reg); - xEXTRACTPS(ptr32[ptr+8], reg, 2); - break; //XZ - case 11: xMOVSS(ptr32[ptr], reg); - xMOVH.PS(ptr64[ptr+8], reg); - break; //XZW - case 13: xMOVL.PS(ptr64[ptr], reg); - xEXTRACTPS(ptr32[ptr+12], reg, 3); - break; // XYW - case 14: xMOVL.PS(ptr64[ptr], reg); - xEXTRACTPS(ptr32[ptr+8], reg, 2); - break; // XYZ - case 4: if (!modXYZW) mVUunpack_xyzw(reg, reg, 1); - xMOVSS(ptr32[ptr+4], reg); - break; // Y - case 2: if (!modXYZW) mVUunpack_xyzw(reg, reg, 2); - xMOVSS(ptr32[ptr+8], reg); - break; // Z - case 1: if (!modXYZW) mVUunpack_xyzw(reg, reg, 3); - xMOVSS(ptr32[ptr+12], reg); - break; // W - case 8: xMOVSS(ptr32[ptr], reg); break; // X - case 12: xMOVL.PS(ptr64[ptr], reg); break; // XY - case 3: xMOVH.PS(ptr64[ptr+8], reg); break; // ZW - default: xMOVAPS(ptr128[ptr], reg); break; // XYZW + switch (xyzw) + { + case 5: // YW + xEXTRACTPS(ptr32[ptr + 4], reg, 1); + xEXTRACTPS(ptr32[ptr + 12], reg, 3); + break; + case 6: // YZ + xPSHUF.D(reg, reg, 0xc9); + xMOVL.PS(ptr64[ptr + 4], reg); + break; + case 7: // YZW + xMOVH.PS(ptr64[ptr + 8], reg); + xEXTRACTPS(ptr32[ptr + 4], reg, 1); + break; + case 9: // XW + xMOVSS(ptr32[ptr], reg); + xEXTRACTPS(ptr32[ptr + 12], reg, 3); + break; + case 10: // XZ + xMOVSS(ptr32[ptr], reg); + xEXTRACTPS(ptr32[ptr + 8], reg, 2); + break; + case 11: // XZW + xMOVSS(ptr32[ptr], reg); + xMOVH.PS(ptr64[ptr + 8], reg); + break; + case 13: // XYW + xMOVL.PS(ptr64[ptr], reg); + xEXTRACTPS(ptr32[ptr + 12], reg, 3); + break; + case 14: // XYZ + xMOVL.PS(ptr64[ptr], reg); + xEXTRACTPS(ptr32[ptr + 8], reg, 2); + break; + case 4: // Y + if (!modXYZW) + mVUunpack_xyzw(reg, reg, 1); + xMOVSS(ptr32[ptr + 4], reg); + break; + case 2: // Z + if (!modXYZW) + mVUunpack_xyzw(reg, reg, 2); + xMOVSS(ptr32[ptr + 8], reg); + break; + case 1: // W + if (!modXYZW) + mVUunpack_xyzw(reg, reg, 3); + xMOVSS(ptr32[ptr + 12], reg); + break; + case 8: // X + xMOVSS(ptr32[ptr], reg); + break; + case 12: // XY + xMOVL.PS(ptr64[ptr], reg); + break; + case 3: // ZW + xMOVH.PS(ptr64[ptr + 8], reg); + break; + default: // XYZW + xMOVAPS(ptr128[ptr], reg); + break; } } @@ -103,7 +129,7 @@ void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW) void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW) { xyzw &= 0xf; - if ( (dest != src) && (xyzw != 0) ) + if ((dest != src) && (xyzw != 0)) { if (xyzw == 0x8) xMOVSS(dest, src); @@ -111,15 +137,16 @@ void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW) xMOVAPS(dest, src); else { - if (modXYZW) { - if (xyzw == 1) { xINSERTPS(dest, src, _MM_MK_INSERTPS_NDX(0, 3, 0)); return; } + if (modXYZW) + { + if (xyzw == 1) { xINSERTPS(dest, src, _MM_MK_INSERTPS_NDX(0, 3, 0)); return; } else if (xyzw == 2) { xINSERTPS(dest, src, _MM_MK_INSERTPS_NDX(0, 2, 0)); return; } else if (xyzw == 4) { xINSERTPS(dest, src, _MM_MK_INSERTPS_NDX(0, 1, 0)); return; } } xyzw = ((xyzw & 1) << 3) | ((xyzw & 2) << 1) | ((xyzw & 4) >> 1) | ((xyzw & 8) >> 3); xBLEND.PS(dest, src, xyzw); } - } + } } //------------------------------------------------------------------ @@ -127,48 +154,64 @@ void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW) //------------------------------------------------------------------ // Backup Volatile Regs (EAX, ECX, EDX, MM0~7, XMM0~7, are all volatile according to 32bit Win/Linux ABI) -__fi void mVUbackupRegs(microVU& mVU, bool toMemory = false) { - if (toMemory) { - for(int i = 0; i < 8; i++) { +__fi void mVUbackupRegs(microVU& mVU, bool toMemory = false) +{ + if (toMemory) + { + for (int i = 0; i < 8; i++) + { xMOVAPS(ptr128[&mVU.xmmBackup[i][0]], xmm(i)); } } - else { + else + { mVU.regAlloc->flushAll(); // Flush Regalloc xMOVAPS(ptr128[&mVU.xmmBackup[xmmPQ.Id][0]], xmmPQ); } } // Restore Volatile Regs -__fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false) { - if (fromMemory) { - for(int i = 0; i < 8; i++) { +__fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false) +{ + if (fromMemory) + { + for (int i = 0; i < 8; i++) + { xMOVAPS(xmm(i), ptr128[&mVU.xmmBackup[i][0]]); } } - else xMOVAPS(xmmPQ, ptr128[&mVU.xmmBackup[xmmPQ.Id][0]]); + else + xMOVAPS(xmmPQ, ptr128[&mVU.xmmBackup[xmmPQ.Id][0]]); } -class mVUScopedXMMBackup { +class mVUScopedXMMBackup +{ microVU& mVU; bool fromMemory; + public: - mVUScopedXMMBackup(microVU& mVU, bool fromMemory): mVU(mVU), fromMemory(fromMemory) { + mVUScopedXMMBackup(microVU& mVU, bool fromMemory) + : mVU(mVU) , fromMemory(fromMemory) + { mVUbackupRegs(mVU, fromMemory); } - ~mVUScopedXMMBackup() { + ~mVUScopedXMMBackup() + { mVUrestoreRegs(mVU, fromMemory); } }; -_mVUt void __fc mVUprintRegs() { +_mVUt void __fc mVUprintRegs() +{ microVU& mVU = mVUx; - for(int i = 0; i < 8; i++) { + for (int i = 0; i < 8; i++) + { Console.WriteLn("xmm%d = [0x%08x,0x%08x,0x%08x,0x%08x]", i, mVU.xmmBackup[i][0], mVU.xmmBackup[i][1], mVU.xmmBackup[i][2], mVU.xmmBackup[i][3]); } - for(int i = 0; i < 8; i++) { + for (int i = 0; i < 8; i++) + { Console.WriteLn("xmm%d = [%f,%f,%f,%f]", i, (float&)mVU.xmmBackup[i][0], (float&)mVU.xmmBackup[i][1], (float&)mVU.xmmBackup[i][2], (float&)mVU.xmmBackup[i][3]); @@ -176,17 +219,20 @@ _mVUt void __fc mVUprintRegs() { } // Gets called by mVUaddrFix at execution-time -static void __fc mVUwarningRegAccess(u32 prog, u32 pc) { +static void __fc mVUwarningRegAccess(u32 prog, u32 pc) +{ Console.Error("microVU0 Warning: Accessing VU1 Regs! [%04x] [%x]", pc, prog); } -static void __fc mVUTBit() { +static void __fc mVUTBit() +{ u32 old = vu1Thread.mtvuInterrupts.fetch_or(VU_Thread::InterruptFlagVUTBit, std::memory_order_release); if (old & VU_Thread::InterruptFlagVUTBit) DevCon.Warning("Old TBit not registered"); } -static void __fc mVUEBit() { +static void __fc mVUEBit() +{ vu1Thread.mtvuInterrupts.fetch_or(VU_Thread::InterruptFlagVUEBit, std::memory_order_release); } @@ -203,29 +249,35 @@ static inline u32 branchAddr(const mV) return ((((iPC + 2) + (_Imm11_ * 2)) & mVU.progMemMask) * 4); } -static void __fc mVUwaitMTVU() { - if (IsDevBuild) DevCon.WriteLn("microVU0: Waiting on VU1 thread to access VU1 regs!"); +static void __fc mVUwaitMTVU() +{ + if (IsDevBuild) + DevCon.WriteLn("microVU0: Waiting on VU1 thread to access VU1 regs!"); vu1Thread.WaitVU(); } // Transforms the Address in gprReg to valid VU0/VU1 Address __fi void mVUaddrFix(mV, const xAddressReg& gprReg) { - if (isVU1) { + if (isVU1) + { xAND(xRegister32(gprReg.Id), 0x3ff); // wrap around xSHL(xRegister32(gprReg.Id), 4); } - else { + else + { xTEST(xRegister32(gprReg.Id), 0x400); - xForwardJNZ8 jmpA; // if addr & 0x4000, reads VU1's VF regs and VI regs + xForwardJNZ8 jmpA; // if addr & 0x4000, reads VU1's VF regs and VI regs xAND(xRegister32(gprReg.Id), 0xff); // if !(addr & 0x4000), wrap around xForwardJump32 jmpB; jmpA.SetTarget(); - if (THREAD_VU1) { + if (THREAD_VU1) + { { mVUScopedXMMBackup mVUSave(mVU, true); - xScopedSavedRegisters save {gprT1q, gprT2q, gprT3q}; - if (IsDevBuild && !isCOP2) { // Lets see which games do this! + xScopedSavedRegisters save{gprT1q, gprT2q, gprT3q}; + if (IsDevBuild && !isCOP2) // Lets see which games do this! + { xMOV(arg1regd, mVU.prog.cur->idx); // Note: Kernel does it via COP2 to initialize VU1! xMOV(arg2regd, xPC); // So we don't spam console, we'll only check micro-mode... xFastCall((void*)mVUwarningRegAccess, arg1regd, arg2regd); @@ -244,13 +296,16 @@ __fi void mVUaddrFix(mV, const xAddressReg& gprReg) // Micro VU - Custom SSE Instructions //------------------------------------------------------------------ -struct SSEMasks { u32 MIN_MAX_1[4], MIN_MAX_2[4], ADD_SS[4]; }; +struct SSEMasks +{ + u32 MIN_MAX_1[4], MIN_MAX_2[4], ADD_SS[4]; +}; static const __aligned16 SSEMasks sseMasks = { {0xffffffff, 0x80000000, 0xffffffff, 0x80000000}, {0x00000000, 0x40000000, 0x00000000, 0x40000000}, - {0x80000000, 0xffffffff, 0xffffffff, 0xffffffff} + {0x80000000, 0xffffffff, 0xffffffff, 0xffffffff}, }; @@ -260,7 +315,8 @@ void MIN_MAX_PS(microVU& mVU, const xmm& to, const xmm& from, const xmm& t1in, c const xmm& t1 = t1in.IsEmpty() ? mVU.regAlloc->allocReg() : t1in; const xmm& t2 = t2in.IsEmpty() ? mVU.regAlloc->allocReg() : t2in; - if (0) { // use double comparison + if (0) // use double comparison + { // ZW xPSHUF.D(t1, to, 0xfa); xPAND (t1, ptr128[sseMasks.MIN_MAX_1]); @@ -283,7 +339,8 @@ void MIN_MAX_PS(microVU& mVU, const xmm& to, const xmm& from, const xmm& t1in, c xSHUF.PS(to, t1, 0x88); } - else { // use integer comparison + else // use integer comparison + { const xmm& c1 = min ? t2 : t1; const xmm& c2 = min ? t1 : t2; @@ -312,12 +369,13 @@ void MIN_MAX_SS(mV, const xmm& to, const xmm& from, const xmm& t1in, bool min) { const xmm& t1 = t1in.IsEmpty() ? mVU.regAlloc->allocReg() : t1in; xSHUF.PS(to, from, 0); - xPAND (to, ptr128[sseMasks.MIN_MAX_1]); - xPOR (to, ptr128[sseMasks.MIN_MAX_2]); + xPAND (to, ptr128[sseMasks.MIN_MAX_1]); + xPOR (to, ptr128[sseMasks.MIN_MAX_2]); xPSHUF.D(t1, to, 0xee); if (min) xMIN.PD(to, t1); else xMAX.PD(to, t1); - if (t1 != t1in) mVU.regAlloc->clearNeeded(t1); + if (t1 != t1in) + mVU.regAlloc->clearNeeded(t1); } // Not Used! - TriAce games only need a portion of this code to boot (see function below) @@ -375,7 +433,8 @@ void ADD_SS_Single_Guard_Bit(microVU& mVU, const xmm& to, const xmm& from, const case_end4.SetTarget(); xADD.SS(to, from); - if (t1 != t1in) mVU.regAlloc->clearNeeded(t1); + if (t1 != t1in) + mVU.regAlloc->clearNeeded(t1); } // Turns out only this is needed to get TriAce games booting with mVU @@ -408,37 +467,48 @@ void ADD_SS_TriAceHack(microVU& mVU, const xmm& to, const xmm& from) xADD.SS(to, from); } -#define clampOp(opX, isPS) { \ - mVUclamp3(mVU, to, t1, (isPS)?0xf:0x8); \ - mVUclamp3(mVU, from, t1, (isPS)?0xf:0x8); \ - opX(to, from); \ - mVUclamp4(to, t1, (isPS)?0xf:0x8); \ -} +#define clampOp(opX, isPS) \ + do { \ + mVUclamp3(mVU, to, t1, (isPS) ? 0xf : 0x8); \ + mVUclamp3(mVU, from, t1, (isPS) ? 0xf : 0x8); \ + opX(to, from); \ + mVUclamp4(to, t1, (isPS) ? 0xf : 0x8); \ + } while (0) void SSE_MAXPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) { - if (CHECK_VU_MINMAXHACK) { xMAX.PS(to, from); } - else { MIN_MAX_PS(mVU, to, from, t1, t2, false); } + if (CHECK_VU_MINMAXHACK) + xMAX.PS(to, from); + else + MIN_MAX_PS(mVU, to, from, t1, t2, false); } void SSE_MINPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) { - if (CHECK_VU_MINMAXHACK) { xMIN.PS(to, from); } - else { MIN_MAX_PS(mVU, to, from, t1, t2, true); } + if (CHECK_VU_MINMAXHACK) + xMIN.PS(to, from); + else + MIN_MAX_PS(mVU, to, from, t1, t2, true); } void SSE_MAXSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) { - if (CHECK_VU_MINMAXHACK) { xMAX.SS(to, from); } - else { MIN_MAX_SS(mVU, to, from, t1, false); } + if (CHECK_VU_MINMAXHACK) + xMAX.SS(to, from); + else + MIN_MAX_SS(mVU, to, from, t1, false); } void SSE_MINSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) { - if (CHECK_VU_MINMAXHACK) { xMIN.SS(to, from); } - else { MIN_MAX_SS(mVU, to, from, t1, true); } + if (CHECK_VU_MINMAXHACK) + xMIN.SS(to, from); + else + MIN_MAX_SS(mVU, to, from, t1, true); } void SSE_ADD2SS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) { - if (!CHECK_VUADDSUBHACK) { clampOp(xADD.SS, false); } - else { ADD_SS_TriAceHack(mVU, to, from); } + if (!CHECK_VUADDSUBHACK) + clampOp(xADD.SS, false); + else + ADD_SS_TriAceHack(mVU, to, from); } // Does same as SSE_ADDPS since tri-ace games only need SS implementation of VUADDSUBHACK... @@ -487,7 +557,8 @@ __pagealigned u8 mVUsearchXMM[__pagesize]; // Generates a custom optimized block-search function // Note: Structs must be 16-byte aligned! (GCC doesn't guarantee this) -void mVUcustomSearch() { +void mVUcustomSearch() +{ HostSys::MemProtectStatic(mVUsearchXMM, PageAccess_ReadWrite()); memset(mVUsearchXMM, 0xcc, __pagesize); xSetPtr(mVUsearchXMM); @@ -496,35 +567,35 @@ void mVUcustomSearch() { xPCMP.EQD(xmm0, ptr32[arg2reg]); xMOVAPS (xmm1, ptr32[arg1reg + 0x10]); xPCMP.EQD(xmm1, ptr32[arg2reg + 0x10]); - xPAND (xmm0, xmm1); + xPAND (xmm0, xmm1); xMOVMSKPS(eax, xmm0); - xCMP (eax, 0xf); + xCMP (eax, 0xf); xForwardJL8 exitPoint; xMOVAPS (xmm0, ptr32[arg1reg + 0x20]); xPCMP.EQD(xmm0, ptr32[arg2reg + 0x20]); xMOVAPS (xmm1, ptr32[arg1reg + 0x30]); xPCMP.EQD(xmm1, ptr32[arg2reg + 0x30]); - xPAND (xmm0, xmm1); + xPAND (xmm0, xmm1); xMOVAPS (xmm2, ptr32[arg1reg + 0x40]); xPCMP.EQD(xmm2, ptr32[arg2reg + 0x40]); xMOVAPS (xmm3, ptr32[arg1reg + 0x50]); xPCMP.EQD(xmm3, ptr32[arg2reg + 0x50]); - xPAND (xmm2, xmm3); + xPAND (xmm2, xmm3); - xMOVAPS (xmm4, ptr32[arg1reg + 0x60]); + xMOVAPS (xmm4, ptr32[arg1reg + 0x60]); xPCMP.EQD(xmm4, ptr32[arg2reg + 0x60]); - xMOVAPS (xmm5, ptr32[arg1reg + 0x70]); + xMOVAPS (xmm5, ptr32[arg1reg + 0x70]); xPCMP.EQD(xmm5, ptr32[arg2reg + 0x70]); - xPAND (xmm4, xmm5); + xPAND (xmm4, xmm5); xMOVAPS (xmm6, ptr32[arg1reg + 0x80]); xPCMP.EQD(xmm6, ptr32[arg2reg + 0x80]); xMOVAPS (xmm7, ptr32[arg1reg + 0x90]); xPCMP.EQD(xmm7, ptr32[arg2reg + 0x90]); - xPAND (xmm6, xmm7); + xPAND (xmm6, xmm7); xPAND (xmm0, xmm2); xPAND (xmm4, xmm6); diff --git a/pcsx2/x86/microVU_Profiler.h b/pcsx2/x86/microVU_Profiler.h index 0f8bd0a0b7..84e9be9243 100644 --- a/pcsx2/x86/microVU_Profiler.h +++ b/pcsx2/x86/microVU_Profiler.h @@ -12,85 +12,86 @@ * You should have received a copy of the GNU General Public License along with PCSX2. * If not, see . */ - + #pragma once -enum microOpcode { +enum microOpcode +{ // Upper Instructions - opABS, opCLIP, opOPMULA, opOPMSUB, opNOP, - opADD, opADDi, opADDq, opADDx, opADDy, opADDz, opADDw, - opADDA, opADDAi, opADDAq, opADDAx, opADDAy, opADDAz, opADDAw, - opSUB, opSUBi, opSUBq, opSUBx, opSUBy, opSUBz, opSUBw, - opSUBA, opSUBAi, opSUBAq, opSUBAx, opSUBAy, opSUBAz, opSUBAw, - opMUL, opMULi, opMULq, opMULx, opMULy, opMULz, opMULw, - opMULA, opMULAi, opMULAq, opMULAx, opMULAy, opMULAz, opMULAw, - opMADD, opMADDi, opMADDq, opMADDx, opMADDy, opMADDz, opMADDw, - opMADDA, opMADDAi, opMADDAq, opMADDAx, opMADDAy, opMADDAz, opMADDAw, - opMSUB, opMSUBi, opMSUBq, opMSUBx, opMSUBy, opMSUBz, opMSUBw, - opMSUBA, opMSUBAi, opMSUBAq, opMSUBAx, opMSUBAy, opMSUBAz, opMSUBAw, - opMAX, opMAXi, opMAXx, opMAXy, opMAXz, opMAXw, - opMINI, opMINIi, opMINIx, opMINIy, opMINIz, opMINIw, + opABS, opCLIP, opOPMULA, opOPMSUB, opNOP, + opADD, opADDi, opADDq, opADDx, opADDy, opADDz, opADDw, + opADDA, opADDAi, opADDAq, opADDAx, opADDAy, opADDAz, opADDAw, + opSUB, opSUBi, opSUBq, opSUBx, opSUBy, opSUBz, opSUBw, + opSUBA, opSUBAi, opSUBAq, opSUBAx, opSUBAy, opSUBAz, opSUBAw, + opMUL, opMULi, opMULq, opMULx, opMULy, opMULz, opMULw, + opMULA, opMULAi, opMULAq, opMULAx, opMULAy, opMULAz, opMULAw, + opMADD, opMADDi, opMADDq, opMADDx, opMADDy, opMADDz, opMADDw, + opMADDA, opMADDAi, opMADDAq, opMADDAx, opMADDAy, opMADDAz, opMADDAw, + opMSUB, opMSUBi, opMSUBq, opMSUBx, opMSUBy, opMSUBz, opMSUBw, + opMSUBA, opMSUBAi, opMSUBAq, opMSUBAx, opMSUBAy, opMSUBAz, opMSUBAw, + opMAX, opMAXi, opMAXx, opMAXy, opMAXz, opMAXw, + opMINI, opMINIi, opMINIx, opMINIy, opMINIz, opMINIw, opFTOI0, opFTOI4, opFTOI12, opFTOI15, opITOF0, opITOF4, opITOF12, opITOF15, // Lower Instructions - opDIV, opSQRT, opRSQRT, - opIADD, opIADDI, opIADDIU, - opIAND, opIOR, - opISUB, opISUBIU, + opDIV, opSQRT, opRSQRT, + opIADD, opIADDI, opIADDIU, + opIAND, opIOR, + opISUB, opISUBIU, opMOVE, opMFIR, opMTIR, opMR32, opMFP, - opLQ, opLQD, opLQI, - opSQ, opSQD, opSQI, - opILW, opISW, opILWR, opISWR, - opRINIT, opRGET, opRNEXT, opRXOR, + opLQ, opLQD, opLQI, + opSQ, opSQD, opSQI, + opILW, opISW, opILWR, opISWR, + opRINIT, opRGET, opRNEXT, opRXOR, opWAITQ, opWAITP, opFSAND, opFSEQ, opFSOR, opFSSET, - opFMAND, opFMEQ, opFMOR, - opFCAND, opFCEQ, opFCOR, opFCSET, opFCGET, - opIBEQ, opIBGEZ, opIBGTZ, opIBLTZ, opIBLEZ, opIBNE, - opB, opBAL, opJR, opJALR, - opESADD, opERSADD, opELENG, opERLENG, - opEATANxy, opEATANxz, opESUM, opERCPR, - opESQRT, opERSQRT, opESIN, opEATAN, + opFMAND, opFMEQ, opFMOR, + opFCAND, opFCEQ, opFCOR, opFCSET, opFCGET, + opIBEQ, opIBGEZ, opIBGTZ, opIBLTZ, opIBLEZ, opIBNE, + opB, opBAL, opJR, opJALR, + opESADD, opERSADD, opELENG, opERLENG, + opEATANxy, opEATANxz, opESUM, opERCPR, + opESQRT, opERSQRT, opESIN, opEATAN, opEEXP, opXITOP, opXTOP, opXGKICK, opLastOpcode }; static const char microOpcodeName[][16] = { // Upper Instructions - "ABS", "CLIP", "OPMULA", "OPMSUB", "NOP", - "ADD", "ADDi", "ADDq", "ADDx", "ADDy", "ADDz", "ADDw", - "ADDA", "ADDAi", "ADDAq", "ADDAx", "ADDAy", "ADDAz", "ADDAw", - "SUB", "SUBi", "SUBq", "SUBx", "SUBy", "SUBz", "SUBw", - "SUBA", "SUBAi", "SUBAq", "SUBAx", "SUBAy", "SUBAz", "SUBAw", - "MUL", "MULi", "MULq", "MULx", "MULy", "MULz", "MULw", - "MULA", "MULAi", "MULAq", "MULAx", "MULAy", "MULAz", "MULAw", - "MADD", "MADDi", "MADDq", "MADDx", "MADDy", "MADDz", "MADDw", - "MADDA", "MADDAi", "MADDAq", "MADDAx", "MADDAy", "MADDAz", "MADDAw", - "MSUB", "MSUBi", "MSUBq", "MSUBx", "MSUBy", "MSUBz", "MSUBw", - "MSUBA", "MSUBAi", "MSUBAq", "MSUBAx", "MSUBAy", "MSUBAz", "MSUBAw", - "MAX", "MAXi", "MAXx", "MAXy", "MAXz", "MAXw", - "MINI", "MINIi", "MINIx", "MINIy", "MINIz", "MINIw", - "FTOI0", "FTOI4", "FTOI12", "FTOI15", - "ITOF0", "ITOF4", "ITOF12", "ITOF15", + "ABS", "CLIP", "OPMULA", "OPMSUB", "NOP", + "ADD", "ADDi", "ADDq", "ADDx", "ADDy", "ADDz", "ADDw", + "ADDA", "ADDAi", "ADDAq", "ADDAx", "ADDAy", "ADDAz", "ADDAw", + "SUB", "SUBi", "SUBq", "SUBx", "SUBy", "SUBz", "SUBw", + "SUBA", "SUBAi", "SUBAq", "SUBAx", "SUBAy", "SUBAz", "SUBAw", + "MUL", "MULi", "MULq", "MULx", "MULy", "MULz", "MULw", + "MULA", "MULAi", "MULAq", "MULAx", "MULAy", "MULAz", "MULAw", + "MADD", "MADDi", "MADDq", "MADDx", "MADDy", "MADDz", "MADDw", + "MADDA", "MADDAi", "MADDAq", "MADDAx", "MADDAy", "MADDAz", "MADDAw", + "MSUB", "MSUBi", "MSUBq", "MSUBx", "MSUBy", "MSUBz", "MSUBw", + "MSUBA", "MSUBAi", "MSUBAq", "MSUBAx", "MSUBAy", "MSUBAz", "MSUBAw", + "MAX", "MAXi", "MAXx", "MAXy", "MAXz", "MAXw", + "MINI", "MINIi", "MINIx", "MINIy", "MINIz", "MINIw", + "FTOI0", "FTOI4", "FTOI12", "FTOI15", + "ITOF0", "ITOF4", "ITOF12", "ITOF15", // Lower Instructions - "DIV", "SQRT", "RSQRT", - "IADD", "IADDI", "IADDIU", - "IAND", "IOR", - "ISUB", "ISUBIU", + "DIV", "SQRT", "RSQRT", + "IADD", "IADDI", "IADDIU", + "IAND", "IOR", + "ISUB", "ISUBIU", "MOVE", "MFIR", "MTIR", "MR32", "MFP", - "LQ", "LQD", "LQI", - "SQ", "SQD", "SQI", - "ILW", "ISW", "ILWR", "ISWR", - "RINIT", "RGET", "RNEXT", "RXOR", + "LQ", "LQD", "LQI", + "SQ", "SQD", "SQI", + "ILW", "ISW", "ILWR", "ISWR", + "RINIT", "RGET", "RNEXT", "RXOR", "WAITQ", "WAITP", "FSAND", "FSEQ", "FSOR", "FSSET", - "FMAND", "FMEQ", "FMOR", - "FCAND", "FCEQ", "FCOR", "FCSET", "FCGET", - "IBEQ", "IBGEZ", "IBGTZ", "IBLTZ", "IBLEZ", "IBNE", - "B", "BAL", "JR", "JALR", - "ESADD", "ERSADD", "ELENG", "ERLENG", - "EATANxy", "EATANxz", "ESUM", "ERCPR", - "ESQRT", "ERSQRT", "ESIN", "EATAN", + "FMAND", "FMEQ", "FMOR", + "FCAND", "FCEQ", "FCOR", "FCSET", "FCGET", + "IBEQ", "IBGEZ", "IBGTZ", "IBLTZ", "IBLEZ", "IBNE", + "B", "BAL", "JR", "JALR", + "ESADD", "ERSADD", "ELENG", "ERLENG", + "EATANxy", "EATANxz", "ESUM", "ERCPR", + "ESQRT", "ERSQRT", "ESIN", "EATAN", "EEXP", "XITOP", "XTOP", "XGKICK" }; @@ -99,43 +100,54 @@ static const char microOpcodeName[][16] = { #include #include -struct microProfiler { +struct microProfiler +{ static const u32 progLimit = 10000; u64 opStats[opLastOpcode]; u32 progCount; int index; - void Reset(int _index) { memzero(*this); index = _index; } - void EmitOp(microOpcode op) { - xADD(ptr32[&(((u32*)opStats)[op*2+0])], 1); - xADC(ptr32[&(((u32*)opStats)[op*2+1])], 0); + void Reset(int _index) + { + memzero(*this); + index = _index; } - void Print() { + void EmitOp(microOpcode op) + { + xADD(ptr32[&(((u32*)opStats)[op * 2 + 0])], 1); + xADC(ptr32[&(((u32*)opStats)[op * 2 + 1])], 0); + } + void Print() + { progCount++; - if ((progCount % progLimit) == 0) { + if ((progCount % progLimit) == 0) + { u64 total = 0; - std::vector< std::pair > v; - for(int i = 0; i < opLastOpcode; i++) { + std::vector> v; + for (int i = 0; i < opLastOpcode; i++) + { total += opStats[i]; v.push_back(std::make_pair(opStats[i], i)); } - std::sort (v.begin(), v.end()); + std::sort(v.begin(), v.end()); std::reverse(v.begin(), v.end()); double dTotal = (double)total; DevCon.WriteLn("microVU%d Profiler:", index); - for(u32 i = 0; i < v.size(); i++) { - u64 count = v[i].first; - double stat = (double)count / dTotal * 100.0; + for (u32 i = 0; i < v.size(); i++) + { + u64 count = v[i].first; + double stat = (double)count / dTotal * 100.0; std::string str = microOpcodeName[v[i].second]; str.resize(8, ' '); DevCon.WriteLn("%s - [%3.4f%%][count=%u]", str.c_str(), stat, (u32)count); } - DevCon.WriteLn("Total = 0x%x%x\n\n", (u32)(u64)(total>>32),(u32)total); + DevCon.WriteLn("Total = 0x%x%x\n\n", (u32)(u64)(total >> 32), (u32)total); } } }; #else -struct microProfiler { +struct microProfiler +{ __fi void Reset(int _index) {} __fi void EmitOp(microOpcode op) {} __fi void Print() {} diff --git a/pcsx2/x86/microVU_Tables.inl b/pcsx2/x86/microVU_Tables.inl index 162f360361..b473016528 100644 --- a/pcsx2/x86/microVU_Tables.inl +++ b/pcsx2/x86/microVU_Tables.inl @@ -34,164 +34,164 @@ mVUop(mVUunknown); // Opcode Tables //------------------------------------------------------------------ static const Fnptr_mVUrecInst mVULOWER_OPCODE[128] = { - mVU_LQ , mVU_SQ , mVUunknown , mVUunknown, - mVU_ILW , mVU_ISW , mVUunknown , mVUunknown, - mVU_IADDIU , mVU_ISUBIU , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVU_FCEQ , mVU_FCSET , mVU_FCAND , mVU_FCOR, - mVU_FSEQ , mVU_FSSET , mVU_FSAND , mVU_FSOR, - mVU_FMEQ , mVUunknown , mVU_FMAND , mVU_FMOR, - mVU_FCGET , mVUunknown , mVUunknown , mVUunknown, - mVU_B , mVU_BAL , mVUunknown , mVUunknown, - mVU_JR , mVU_JALR , mVUunknown , mVUunknown, - mVU_IBEQ , mVU_IBNE , mVUunknown , mVUunknown, - mVU_IBLTZ , mVU_IBGTZ , mVU_IBLEZ , mVU_IBGEZ, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVULowerOP , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVU_LQ , mVU_SQ , mVUunknown , mVUunknown, + mVU_ILW , mVU_ISW , mVUunknown , mVUunknown, + mVU_IADDIU , mVU_ISUBIU , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVU_FCEQ , mVU_FCSET , mVU_FCAND , mVU_FCOR, + mVU_FSEQ , mVU_FSSET , mVU_FSAND , mVU_FSOR, + mVU_FMEQ , mVUunknown , mVU_FMAND , mVU_FMOR, + mVU_FCGET , mVUunknown , mVUunknown , mVUunknown, + mVU_B , mVU_BAL , mVUunknown , mVUunknown, + mVU_JR , mVU_JALR , mVUunknown , mVUunknown, + mVU_IBEQ , mVU_IBNE , mVUunknown , mVUunknown, + mVU_IBLTZ , mVU_IBGTZ , mVU_IBLEZ , mVU_IBGEZ, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVULowerOP , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, }; static const Fnptr_mVUrecInst mVULowerOP_T3_00_OPCODE[32] = { - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVU_MOVE , mVU_LQI , mVU_DIV , mVU_MTIR, - mVU_RNEXT , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVU_MFP , mVU_XTOP , mVU_XGKICK, - mVU_ESADD , mVU_EATANxy , mVU_ESQRT , mVU_ESIN, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVU_MOVE , mVU_LQI , mVU_DIV , mVU_MTIR, + mVU_RNEXT , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVU_MFP , mVU_XTOP , mVU_XGKICK, + mVU_ESADD , mVU_EATANxy, mVU_ESQRT , mVU_ESIN, }; static const Fnptr_mVUrecInst mVULowerOP_T3_01_OPCODE[32] = { - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVU_MR32 , mVU_SQI , mVU_SQRT , mVU_MFIR, - mVU_RGET , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVU_XITOP , mVUunknown, - mVU_ERSADD , mVU_EATANxz , mVU_ERSQRT , mVU_EATAN, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVU_MR32 , mVU_SQI , mVU_SQRT , mVU_MFIR, + mVU_RGET , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVU_XITOP , mVUunknown, + mVU_ERSADD , mVU_EATANxz, mVU_ERSQRT , mVU_EATAN, }; static const Fnptr_mVUrecInst mVULowerOP_T3_10_OPCODE[32] = { - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVU_LQD , mVU_RSQRT , mVU_ILWR, - mVU_RINIT , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVU_ELENG , mVU_ESUM , mVU_ERCPR , mVU_EEXP, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVU_LQD , mVU_RSQRT , mVU_ILWR, + mVU_RINIT , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVU_ELENG , mVU_ESUM , mVU_ERCPR , mVU_EEXP, }; const Fnptr_mVUrecInst mVULowerOP_T3_11_OPCODE [32] = { - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVU_SQD , mVU_WAITQ , mVU_ISWR, - mVU_RXOR , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVU_ERLENG , mVUunknown , mVU_WAITP , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVU_SQD , mVU_WAITQ , mVU_ISWR, + mVU_RXOR , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVU_ERLENG , mVUunknown , mVU_WAITP , mVUunknown, }; static const Fnptr_mVUrecInst mVULowerOP_OPCODE[64] = { - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVU_IADD , mVU_ISUB , mVU_IADDI , mVUunknown, - mVU_IAND , mVU_IOR , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVU_IADD , mVU_ISUB , mVU_IADDI , mVUunknown, + mVU_IAND , mVU_IOR , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, mVULowerOP_T3_00, mVULowerOP_T3_01, mVULowerOP_T3_10, mVULowerOP_T3_11, }; static const Fnptr_mVUrecInst mVU_UPPER_OPCODE[64] = { - mVU_ADDx , mVU_ADDy , mVU_ADDz , mVU_ADDw, - mVU_SUBx , mVU_SUBy , mVU_SUBz , mVU_SUBw, - mVU_MADDx , mVU_MADDy , mVU_MADDz , mVU_MADDw, - mVU_MSUBx , mVU_MSUBy , mVU_MSUBz , mVU_MSUBw, - mVU_MAXx , mVU_MAXy , mVU_MAXz , mVU_MAXw, - mVU_MINIx , mVU_MINIy , mVU_MINIz , mVU_MINIw, - mVU_MULx , mVU_MULy , mVU_MULz , mVU_MULw, - mVU_MULq , mVU_MAXi , mVU_MULi , mVU_MINIi, - mVU_ADDq , mVU_MADDq , mVU_ADDi , mVU_MADDi, - mVU_SUBq , mVU_MSUBq , mVU_SUBi , mVU_MSUBi, - mVU_ADD , mVU_MADD , mVU_MUL , mVU_MAX, - mVU_SUB , mVU_MSUB , mVU_OPMSUB , mVU_MINI, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVU_ADDx , mVU_ADDy , mVU_ADDz , mVU_ADDw, + mVU_SUBx , mVU_SUBy , mVU_SUBz , mVU_SUBw, + mVU_MADDx , mVU_MADDy , mVU_MADDz , mVU_MADDw, + mVU_MSUBx , mVU_MSUBy , mVU_MSUBz , mVU_MSUBw, + mVU_MAXx , mVU_MAXy , mVU_MAXz , mVU_MAXw, + mVU_MINIx , mVU_MINIy , mVU_MINIz , mVU_MINIw, + mVU_MULx , mVU_MULy , mVU_MULz , mVU_MULw, + mVU_MULq , mVU_MAXi , mVU_MULi , mVU_MINIi, + mVU_ADDq , mVU_MADDq , mVU_ADDi , mVU_MADDi, + mVU_SUBq , mVU_MSUBq , mVU_SUBi , mVU_MSUBi, + mVU_ADD , mVU_MADD , mVU_MUL , mVU_MAX, + mVU_SUB , mVU_MSUB , mVU_OPMSUB , mVU_MINI, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, mVU_UPPER_FD_00, mVU_UPPER_FD_01, mVU_UPPER_FD_10, mVU_UPPER_FD_11, }; static const Fnptr_mVUrecInst mVU_UPPER_FD_00_TABLE [32] = { - mVU_ADDAx , mVU_SUBAx , mVU_MADDAx , mVU_MSUBAx, - mVU_ITOF0 , mVU_FTOI0 , mVU_MULAx , mVU_MULAq, - mVU_ADDAq , mVU_SUBAq , mVU_ADDA , mVU_SUBA, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVU_ADDAx , mVU_SUBAx , mVU_MADDAx , mVU_MSUBAx, + mVU_ITOF0 , mVU_FTOI0 , mVU_MULAx , mVU_MULAq, + mVU_ADDAq , mVU_SUBAq , mVU_ADDA , mVU_SUBA, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, }; static const Fnptr_mVUrecInst mVU_UPPER_FD_01_TABLE [32] = { - mVU_ADDAy , mVU_SUBAy , mVU_MADDAy , mVU_MSUBAy, - mVU_ITOF4 , mVU_FTOI4 , mVU_MULAy , mVU_ABS, - mVU_MADDAq , mVU_MSUBAq , mVU_MADDA , mVU_MSUBA, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVU_ADDAy , mVU_SUBAy , mVU_MADDAy , mVU_MSUBAy, + mVU_ITOF4 , mVU_FTOI4 , mVU_MULAy , mVU_ABS, + mVU_MADDAq , mVU_MSUBAq , mVU_MADDA , mVU_MSUBA, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, }; static const Fnptr_mVUrecInst mVU_UPPER_FD_10_TABLE [32] = { - mVU_ADDAz , mVU_SUBAz , mVU_MADDAz , mVU_MSUBAz, - mVU_ITOF12 , mVU_FTOI12 , mVU_MULAz , mVU_MULAi, - mVU_ADDAi , mVU_SUBAi , mVU_MULA , mVU_OPMULA, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVU_ADDAz , mVU_SUBAz , mVU_MADDAz , mVU_MSUBAz, + mVU_ITOF12 , mVU_FTOI12 , mVU_MULAz , mVU_MULAi, + mVU_ADDAi , mVU_SUBAi , mVU_MULA , mVU_OPMULA, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, }; static const Fnptr_mVUrecInst mVU_UPPER_FD_11_TABLE [32] = { - mVU_ADDAw , mVU_SUBAw , mVU_MADDAw , mVU_MSUBAw, - mVU_ITOF15 , mVU_FTOI15 , mVU_MULAw , mVU_CLIP, - mVU_MADDAi , mVU_MSUBAi , mVUunknown , mVU_NOP, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, - mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVU_ADDAw , mVU_SUBAw , mVU_MADDAw , mVU_MSUBAw, + mVU_ITOF15 , mVU_FTOI15 , mVU_MULAw , mVU_CLIP, + mVU_MADDAi , mVU_MSUBAi , mVUunknown , mVU_NOP, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, + mVUunknown , mVUunknown , mVUunknown , mVUunknown, }; @@ -199,20 +199,28 @@ static const Fnptr_mVUrecInst mVU_UPPER_FD_11_TABLE [32] = { // Table Functions //------------------------------------------------------------------ -mVUop(mVU_UPPER_FD_00) { mVU_UPPER_FD_00_TABLE [((mVU.code >> 6) & 0x1f)](mX); } -mVUop(mVU_UPPER_FD_01) { mVU_UPPER_FD_01_TABLE [((mVU.code >> 6) & 0x1f)](mX); } -mVUop(mVU_UPPER_FD_10) { mVU_UPPER_FD_10_TABLE [((mVU.code >> 6) & 0x1f)](mX); } -mVUop(mVU_UPPER_FD_11) { mVU_UPPER_FD_11_TABLE [((mVU.code >> 6) & 0x1f)](mX); } -mVUop(mVULowerOP) { mVULowerOP_OPCODE [ (mVU.code & 0x3f) ](mX); } -mVUop(mVULowerOP_T3_00) { mVULowerOP_T3_00_OPCODE [((mVU.code >> 6) & 0x1f)](mX); } -mVUop(mVULowerOP_T3_01) { mVULowerOP_T3_01_OPCODE [((mVU.code >> 6) & 0x1f)](mX); } -mVUop(mVULowerOP_T3_10) { mVULowerOP_T3_10_OPCODE [((mVU.code >> 6) & 0x1f)](mX); } -mVUop(mVULowerOP_T3_11) { mVULowerOP_T3_11_OPCODE [((mVU.code >> 6) & 0x1f)](mX); } -mVUop(mVUopU) { mVU_UPPER_OPCODE [ (mVU.code & 0x3f) ](mX); } // Gets Upper Opcode -mVUop(mVUopL) { mVULOWER_OPCODE [ (mVU.code >> 25) ](mX); } // Gets Lower Opcode -mVUop(mVUunknown) { - pass1 { if (mVU.code != 0x8000033c) mVUinfo.isBadOp = true; } - pass2 { if(mVU.code != 0x8000033c) Console.Error("microVU%d: Unknown Micro VU opcode called (%x) [%04x]\n", getIndex, mVU.code, xPC); } +mVUop(mVU_UPPER_FD_00) { mVU_UPPER_FD_00_TABLE [((mVU.code >> 6) & 0x1f)](mX); } +mVUop(mVU_UPPER_FD_01) { mVU_UPPER_FD_01_TABLE [((mVU.code >> 6) & 0x1f)](mX); } +mVUop(mVU_UPPER_FD_10) { mVU_UPPER_FD_10_TABLE [((mVU.code >> 6) & 0x1f)](mX); } +mVUop(mVU_UPPER_FD_11) { mVU_UPPER_FD_11_TABLE [((mVU.code >> 6) & 0x1f)](mX); } +mVUop(mVULowerOP) { mVULowerOP_OPCODE [ (mVU.code & 0x3f) ](mX); } +mVUop(mVULowerOP_T3_00) { mVULowerOP_T3_00_OPCODE [((mVU.code >> 6) & 0x1f)](mX); } +mVUop(mVULowerOP_T3_01) { mVULowerOP_T3_01_OPCODE [((mVU.code >> 6) & 0x1f)](mX); } +mVUop(mVULowerOP_T3_10) { mVULowerOP_T3_10_OPCODE [((mVU.code >> 6) & 0x1f)](mX); } +mVUop(mVULowerOP_T3_11) { mVULowerOP_T3_11_OPCODE [((mVU.code >> 6) & 0x1f)](mX); } +mVUop(mVUopU) { mVU_UPPER_OPCODE [ (mVU.code & 0x3f) ](mX); } // Gets Upper Opcode +mVUop(mVUopL) { mVULOWER_OPCODE [ (mVU.code >> 25) ](mX); } // Gets Lower Opcode +mVUop(mVUunknown) +{ + pass1 + { + if (mVU.code != 0x8000033c) + mVUinfo.isBadOp = true; + } + pass2 + { + if (mVU.code != 0x8000033c) + Console.Error("microVU%d: Unknown Micro VU opcode called (%x) [%04x]\n", getIndex, mVU.code, xPC); + } pass3 { mVUlog("Unknown", mVU.code); } } - diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index add21867b5..f5fca390ea 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -12,49 +12,62 @@ * You should have received a copy of the GNU General Public License along with PCSX2. * If not, see . */ - + #pragma once //------------------------------------------------------------------ // mVUupdateFlags() - Updates status/mac flags //------------------------------------------------------------------ -#define AND_XYZW ((_XYZW_SS && modXYZW) ? (1) : (mFLAG.doFlag ? (_X_Y_Z_W) : (flipMask[_X_Y_Z_W]))) -#define ADD_XYZW ((_XYZW_SS && modXYZW) ? (_X ? 3 : (_Y ? 2 : (_Z ? 1 : 0))) : 0) -#define SHIFT_XYZW(gprReg) { if (_XYZW_SS && modXYZW && !_W) { xSHL(gprReg, ADD_XYZW); } } +#define AND_XYZW ((_XYZW_SS && modXYZW) ? (1) : (mFLAG.doFlag ? (_X_Y_Z_W) : (flipMask[_X_Y_Z_W]))) +#define ADD_XYZW ((_XYZW_SS && modXYZW) ? (_X ? 3 : (_Y ? 2 : (_Z ? 1 : 0))) : 0) +#define SHIFT_XYZW(gprReg) \ + do { \ + if (_XYZW_SS && modXYZW && !_W) \ + { \ + xSHL(gprReg, ADD_XYZW); \ + } \ + } while (0) const __aligned16 u32 sse4_compvals[2][4] = { - { 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff }, //1111 - { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //1111 + {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff}, //1111 + {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}, //1111 }; // Note: If modXYZW is true, then it adjusts XYZW for Single Scalar operations -static void mVUupdateFlags(mV, const xmm& reg, const xmm& regT1in = xEmptyReg, const xmm& regT2in = xEmptyReg, bool modXYZW = 1) { - const x32& mReg = gprT1; - const x32& sReg = getFlagReg(sFLAG.write); +static void mVUupdateFlags(mV, const xmm& reg, const xmm& regT1in = xEmptyReg, const xmm& regT2in = xEmptyReg, bool modXYZW = 1) +{ + const x32& mReg = gprT1; + const x32& sReg = getFlagReg(sFLAG.write); bool regT1b = regT1in.IsEmpty(), regT2b = false; static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15}; //SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag); - if (!sFLAG.doFlag && !mFLAG.doFlag) { return; } + if (!sFLAG.doFlag && !mFLAG.doFlag) + return; const xmm& regT1 = regT1b ? mVU.regAlloc->allocReg() : regT1in; xmm regT2 = reg; - if ((mFLAG.doFlag && !(_XYZW_SS && modXYZW))) { + if ((mFLAG.doFlag && !(_XYZW_SS && modXYZW))) + { regT2 = regT2in; - if (regT2.IsEmpty()) { + if (regT2.IsEmpty()) + { regT2 = mVU.regAlloc->allocReg(); regT2b = true; } xPSHUF.D(regT2, reg, 0x1B); // Flip wzyx to xyzw } - else regT2 = reg; + else + regT2 = reg; - if (sFLAG.doFlag) { - mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag - if (sFLAG.doNonSticky) xAND(sReg, 0xfffc00ff); // Clear O,U,S,Z flags + if (sFLAG.doFlag) + { + mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag + if (sFLAG.doNonSticky) + xAND(sReg, 0xfffc00ff); // Clear O,U,S,Z flags } //-------------------------Check for Signed flags------------------------------ @@ -64,23 +77,25 @@ static void mVUupdateFlags(mV, const xmm& reg, const xmm& regT1in = xEmptyReg, c xCMPEQ.PS(regT1, regT2); // Set all F's if each vector is zero xMOVMSKPS(gprT2, regT1); // Used for Zero Flag Calculation - xAND(mReg, AND_XYZW); // Grab "Is Signed" bits from the previous calculation + xAND(mReg, AND_XYZW); // Grab "Is Signed" bits from the previous calculation xSHL(mReg, 4 + ADD_XYZW); //-------------------------Check for Zero flags------------------------------ - xAND(gprT2, AND_XYZW); // Grab "Is Zero" bits from the previous calculation - if (mFLAG.doFlag) { SHIFT_XYZW(gprT2); } + xAND(gprT2, AND_XYZW); // Grab "Is Zero" bits from the previous calculation + if (mFLAG.doFlag) + SHIFT_XYZW(gprT2); xOR(mReg, gprT2); //-------------------------Overflow Flags----------------------------------- - if (sFLAG.doFlag) { + if (sFLAG.doFlag) + { //Calculate overflow xMOVAPS(regT1, regT2); xAND.PS(regT1, ptr128[&sse4_compvals[1][0]]); // Remove sign flags (we don't care) xCMPNLT.PS(regT1, ptr128[&sse4_compvals[0][0]]); // Compare if T1 == FLT_MAX xMOVMSKPS(gprT2, regT1); // Grab sign bits for equal results - xAND(gprT2, AND_XYZW); // Grab "Is FLT_MAX" bits from the previous calculation + xAND(gprT2, AND_XYZW); // Grab "Is FLT_MAX" bits from the previous calculation xForwardJump32 oJMP(Jcc_Zero); xOR(sReg, 0x820000); oJMP.SetTarget(); @@ -90,24 +105,29 @@ static void mVUupdateFlags(mV, const xmm& reg, const xmm& regT1in = xEmptyReg, c } //-------------------------Write back flags------------------------------ - if (mFLAG.doFlag) mVUallocMFLAGb(mVU, mReg, mFLAG.write); // Set Mac Flag - if (sFLAG.doFlag) { + if (mFLAG.doFlag) + mVUallocMFLAGb(mVU, mReg, mFLAG.write); // Set Mac Flag + if (sFLAG.doFlag) + { xAND(mReg, 0xFF); // Ignore overflow bits, they're handled separately xOR(sReg, mReg); - if (sFLAG.doNonSticky) { + if (sFLAG.doNonSticky) + { xSHL(mReg, 8); - xOR (sReg, mReg); + xOR(sReg, mReg); } } - if (regT1b) mVU.regAlloc->clearNeeded(regT1); - if (regT2b) mVU.regAlloc->clearNeeded(regT2); + if (regT1b) + mVU.regAlloc->clearNeeded(regT1); + if (regT2b) + mVU.regAlloc->clearNeeded(regT2); } //------------------------------------------------------------------ // Helper Macros and Functions //------------------------------------------------------------------ -static void (*const SSE_PS[]) (microVU&, const xmm&, const xmm&, const xmm&, const xmm&) = { +static void (*const SSE_PS[])(microVU&, const xmm&, const xmm&, const xmm&, const xmm&) = { SSE_ADDPS, // 0 SSE_SUBPS, // 1 SSE_MULPS, // 2 @@ -116,7 +136,7 @@ static void (*const SSE_PS[]) (microVU&, const xmm&, const xmm&, const xmm&, con SSE_ADD2PS // 5 }; -static void (*const SSE_SS[]) (microVU&, const xmm&, const xmm&, const xmm&, const xmm&) = { +static void (*const SSE_SS[])(microVU&, const xmm&, const xmm&, const xmm&, const xmm&) = { SSE_ADDSS, // 0 SSE_SUBSS, // 1 SSE_MULSS, // 2 @@ -125,14 +145,16 @@ static void (*const SSE_SS[]) (microVU&, const xmm&, const xmm&, const xmm&, con SSE_ADD2SS // 5 }; -enum clampModes { - cFt = 0x01, // Clamp Ft / I-reg / Q-reg - cFs = 0x02, // Clamp Fs +enum clampModes +{ + cFt = 0x01, // Clamp Ft / I-reg / Q-reg + cFs = 0x02, // Clamp Fs cACC = 0x04, // Clamp ACC }; // Prints Opcode to MicroProgram Logs -static void mVU_printOP(microVU& mVU, int opCase, microOpcode opEnum, bool isACC) { +static void mVU_printOP(microVU& mVU, int opCase, microOpcode opEnum, bool isACC) +{ mVUlog(microOpcodeName[opEnum]); opCase1 { if (isACC) { mVUlogACC(); } else { mVUlogFd(); } mVUlogFt(); } opCase2 { if (isACC) { mVUlogACC(); } else { mVUlogFd(); } mVUlogBC(); } @@ -141,21 +163,24 @@ static void mVU_printOP(microVU& mVU, int opCase, microOpcode opEnum, bool isACC } // Sets Up Pass1 Info for Normal, BC, I, and Q Cases -static void setupPass1(microVU& mVU, int opCase, bool isACC, bool noFlagUpdate) { +static void setupPass1(microVU& mVU, int opCase, bool isACC, bool noFlagUpdate) +{ opCase1 { mVUanalyzeFMAC1(mVU, ((isACC) ? 0 : _Fd_), _Fs_, _Ft_); } opCase2 { mVUanalyzeFMAC3(mVU, ((isACC) ? 0 : _Fd_), _Fs_, _Ft_); } opCase3 { mVUanalyzeFMAC1(mVU, ((isACC) ? 0 : _Fd_), _Fs_, 0); } opCase4 { mVUanalyzeFMAC1(mVU, ((isACC) ? 0 : _Fd_), _Fs_, 0); } - if (noFlagUpdate) { //Max/Min Ops + if (noFlagUpdate) //Max/Min Ops sFLAG.doFlag = false; - } } // Safer to force 0 as the result for X minus X than to do actual subtraction -static bool doSafeSub(microVU& mVU, int opCase, int opType, bool isACC) { - opCase1 { - if ((opType == 1) && (_Ft_ == _Fs_)) { +static bool doSafeSub(microVU& mVU, int opCase, int opType, bool isACC) +{ + opCase1 + { + if ((opType == 1) && (_Ft_ == _Fs_)) + { const xmm& Fs = mVU.regAlloc->allocReg(-1, isACC ? 32 : _Fd_, _X_Y_Z_W); xPXOR(Fs, Fs); // Set to Positive 0 mVUupdateFlags(mVU, Fs); @@ -167,90 +192,130 @@ static bool doSafeSub(microVU& mVU, int opCase, int opType, bool isACC) { } // Sets Up Ft Reg for Normal, BC, I, and Q Cases -static void setupFtReg(microVU& mVU, xmm& Ft, xmm& tempFt, int opCase) { - opCase1 { - if (_XYZW_SS2) { Ft = mVU.regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); tempFt = Ft; } - else if (clampE) { Ft = mVU.regAlloc->allocReg(_Ft_, 0, 0xf); tempFt = Ft; } - else { Ft = mVU.regAlloc->allocReg(_Ft_); tempFt = xEmptyReg; } +static void setupFtReg(microVU& mVU, xmm& Ft, xmm& tempFt, int opCase) +{ + opCase1 + { + if (_XYZW_SS2) { Ft = mVU.regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); tempFt = Ft; } + else if (clampE) { Ft = mVU.regAlloc->allocReg(_Ft_, 0, 0xf); tempFt = Ft; } + else { Ft = mVU.regAlloc->allocReg(_Ft_); tempFt = xEmptyReg; } } - opCase2 { + opCase2 + { tempFt = mVU.regAlloc->allocReg(_Ft_); - Ft = mVU.regAlloc->allocReg(); + Ft = mVU.regAlloc->allocReg(); mVUunpack_xyzw(Ft, tempFt, _bc_); mVU.regAlloc->clearNeeded(tempFt); tempFt = Ft; } - opCase3 { Ft = mVU.regAlloc->allocReg(33, 0, _X_Y_Z_W); tempFt = Ft; } - opCase4 { - if (!clampE && _XYZW_SS && !mVUinfo.readQ) { Ft = xmmPQ; tempFt = xEmptyReg; } - else { Ft = mVU.regAlloc->allocReg(); tempFt = Ft; getQreg(Ft, mVUinfo.readQ); } + opCase3 + { + Ft = mVU.regAlloc->allocReg(33, 0, _X_Y_Z_W); + tempFt = Ft; + } + opCase4 + { + if (!clampE && _XYZW_SS && !mVUinfo.readQ) + { + Ft = xmmPQ; + tempFt = xEmptyReg; + } + else + { + Ft = mVU.regAlloc->allocReg(); + tempFt = Ft; + getQreg(Ft, mVUinfo.readQ); + } } } // Normal FMAC Opcodes -static void mVU_FMACa(microVU& mVU, int recPass, int opCase, int opType, bool isACC, microOpcode opEnum, int clampType) { +static void mVU_FMACa(microVU& mVU, int recPass, int opCase, int opType, bool isACC, microOpcode opEnum, int clampType) +{ pass1 { setupPass1(mVU, opCase, isACC, ((opType == 3) || (opType == 4))); } - pass2 { - if (doSafeSub(mVU, opCase, opType, isACC)) return; - + pass2 + { + if (doSafeSub(mVU, opCase, opType, isACC)) + return; + xmm Fs, Ft, ACC, tempFt; setupFtReg(mVU, Ft, tempFt, opCase); - if (isACC) { - Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); + if (isACC) + { + Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); ACC = mVU.regAlloc->allocReg((_X_Y_Z_W == 0xf) ? -1 : 32, 32, 0xf, 0); - if (_XYZW_SS2) xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W)); + if (_XYZW_SS2) + xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W)); + } + else + { + Fs = mVU.regAlloc->allocReg(_Fs_, _Fd_, _X_Y_Z_W); } - else { Fs = mVU.regAlloc->allocReg(_Fs_, _Fd_, _X_Y_Z_W); } if (clampType & cFt) mVUclamp2(mVU, Ft, xEmptyReg, _X_Y_Z_W); if (clampType & cFs) mVUclamp2(mVU, Fs, xEmptyReg, _X_Y_Z_W); if (_XYZW_SS) SSE_SS[opType](mVU, Fs, Ft, xEmptyReg, xEmptyReg); - else SSE_PS[opType](mVU, Fs, Ft, xEmptyReg, xEmptyReg); + else SSE_PS[opType](mVU, Fs, Ft, xEmptyReg, xEmptyReg); - if (isACC) { - if (_XYZW_SS) xMOVSS(ACC, Fs); - else mVUmergeRegs(ACC, Fs, _X_Y_Z_W); + if (isACC) + { + if (_XYZW_SS) + xMOVSS(ACC, Fs); + else + mVUmergeRegs(ACC, Fs, _X_Y_Z_W); mVUupdateFlags(mVU, ACC, Fs, tempFt); - if (_XYZW_SS2) xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W)); + if (_XYZW_SS2) + xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W)); mVU.regAlloc->clearNeeded(ACC); } - else mVUupdateFlags(mVU, Fs, tempFt); + else + mVUupdateFlags(mVU, Fs, tempFt); mVU.regAlloc->clearNeeded(Fs); // Always Clear Written Reg First mVU.regAlloc->clearNeeded(Ft); mVU.profiler.EmitOp(opEnum); } pass3 { mVU_printOP(mVU, opCase, opEnum, isACC); } - pass4 { if ((opType != 3) && (opType != 4)) mVUregs.needExactMatch |= 8; } + pass4 + { + if ((opType != 3) && (opType != 4)) + mVUregs.needExactMatch |= 8; + } } // MADDA/MSUBA Opcodes -static void mVU_FMACb(microVU& mVU, int recPass, int opCase, int opType, microOpcode opEnum, int clampType) { +static void mVU_FMACb(microVU& mVU, int recPass, int opCase, int opType, microOpcode opEnum, int clampType) +{ pass1 { setupPass1(mVU, opCase, true, false); } - pass2 { + pass2 + { xmm Fs, Ft, ACC, tempFt; setupFtReg(mVU, Ft, tempFt, opCase); - Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); + Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); ACC = mVU.regAlloc->allocReg(32, 32, 0xf, false); - if (_XYZW_SS2) { xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W)); } + if (_XYZW_SS2) + xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W)); if (clampType & cFt) mVUclamp2(mVU, Ft, xEmptyReg, _X_Y_Z_W); if (clampType & cFs) mVUclamp2(mVU, Fs, xEmptyReg, _X_Y_Z_W); if (_XYZW_SS) SSE_SS[2](mVU, Fs, Ft, xEmptyReg, xEmptyReg); - else SSE_PS[2](mVU, Fs, Ft, xEmptyReg, xEmptyReg); + else SSE_PS[2](mVU, Fs, Ft, xEmptyReg, xEmptyReg); - if (_XYZW_SS || _X_Y_Z_W == 0xf) { + if (_XYZW_SS || _X_Y_Z_W == 0xf) + { if (_XYZW_SS) SSE_SS[opType](mVU, ACC, Fs, tempFt, xEmptyReg); - else SSE_PS[opType](mVU, ACC, Fs, tempFt, xEmptyReg); + else SSE_PS[opType](mVU, ACC, Fs, tempFt, xEmptyReg); mVUupdateFlags(mVU, ACC, Fs, tempFt); - if (_XYZW_SS && _X_Y_Z_W != 8) xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W)); + if (_XYZW_SS && _X_Y_Z_W != 8) + xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W)); } - else { + else + { const xmm& tempACC = mVU.regAlloc->allocReg(); xMOVAPS(tempACC, ACC); SSE_PS[opType](mVU, tempACC, Fs, tempFt, xEmptyReg); @@ -269,25 +334,30 @@ static void mVU_FMACb(microVU& mVU, int recPass, int opCase, int opType, microOp } // MADD Opcodes -static void mVU_FMACc(microVU& mVU, int recPass, int opCase, microOpcode opEnum, int clampType) { +static void mVU_FMACc(microVU& mVU, int recPass, int opCase, microOpcode opEnum, int clampType) +{ pass1 { setupPass1(mVU, opCase, false, false); } - pass2 { + pass2 + { xmm Fs, Ft, ACC, tempFt; setupFtReg(mVU, Ft, tempFt, opCase); ACC = mVU.regAlloc->allocReg(32); - Fs = mVU.regAlloc->allocReg(_Fs_, _Fd_, _X_Y_Z_W); + Fs = mVU.regAlloc->allocReg(_Fs_, _Fd_, _X_Y_Z_W); - if (_XYZW_SS2) { xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W)); } + if (_XYZW_SS2) + xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W)); if (clampType & cFt) mVUclamp2(mVU, Ft, xEmptyReg, _X_Y_Z_W); if (clampType & cFs) mVUclamp2(mVU, Fs, xEmptyReg, _X_Y_Z_W); if (clampType & cACC) mVUclamp2(mVU, ACC, xEmptyReg, _X_Y_Z_W); - if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, xEmptyReg, xEmptyReg); SSE_SS[0](mVU, Fs, ACC, tempFt, xEmptyReg); } - else { SSE_PS[2](mVU, Fs, Ft, xEmptyReg, xEmptyReg); SSE_PS[0](mVU, Fs, ACC, tempFt, xEmptyReg); } - if (_XYZW_SS2) { xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W)); } + if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, xEmptyReg, xEmptyReg); SSE_SS[0](mVU, Fs, ACC, tempFt, xEmptyReg); } + else { SSE_PS[2](mVU, Fs, Ft, xEmptyReg, xEmptyReg); SSE_PS[0](mVU, Fs, ACC, tempFt, xEmptyReg); } + + if (_XYZW_SS2) + xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W)); mVUupdateFlags(mVU, Fs, tempFt); @@ -301,9 +371,11 @@ static void mVU_FMACc(microVU& mVU, int recPass, int opCase, microOpcode opEnum, } // MSUB Opcodes -static void mVU_FMACd(microVU& mVU, int recPass, int opCase, microOpcode opEnum, int clampType) { +static void mVU_FMACd(microVU& mVU, int recPass, int opCase, microOpcode opEnum, int clampType) +{ pass1 { setupPass1(mVU, opCase, false, false); } - pass2 { + pass2 + { xmm Fs, Ft, Fd, tempFt; setupFtReg(mVU, Ft, tempFt, opCase); @@ -315,7 +387,7 @@ static void mVU_FMACd(microVU& mVU, int recPass, int opCase, microOpcode opEnum, if (clampType & cACC) mVUclamp2(mVU, Fd, xEmptyReg, _X_Y_Z_W); if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, xEmptyReg, xEmptyReg); SSE_SS[1](mVU, Fd, Fs, tempFt, xEmptyReg); } - else { SSE_PS[2](mVU, Fs, Ft, xEmptyReg, xEmptyReg); SSE_PS[1](mVU, Fd, Fs, tempFt, xEmptyReg); } + else { SSE_PS[2](mVU, Fs, Ft, xEmptyReg, xEmptyReg); SSE_PS[1](mVU, Fd, Fs, tempFt, xEmptyReg); } mVUupdateFlags(mVU, Fd, Fs, tempFt); @@ -329,23 +401,32 @@ static void mVU_FMACd(microVU& mVU, int recPass, int opCase, microOpcode opEnum, } // ABS Opcode -mVUop(mVU_ABS) { +mVUop(mVU_ABS) +{ pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); } - pass2 { - if (!_Ft_) return; + pass2 + { + if (!_Ft_) + return; const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); xAND.PS(Fs, ptr128[mVUglob.absclip]); mVU.regAlloc->clearNeeded(Fs); mVU.profiler.EmitOp(opABS); } - pass3 { mVUlog("ABS"); mVUlogFtFs(); } + pass3 + { + mVUlog("ABS"); + mVUlogFtFs(); + } } // OPMULA Opcode -mVUop(mVU_OPMULA) { +mVUop(mVU_OPMULA) +{ pass1 { mVUanalyzeFMAC1(mVU, 0, _Fs_, _Ft_); } - pass2 { - const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); + pass2 + { + const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 32, _X_Y_Z_W); xPSHUF.D(Fs, Fs, 0xC9); // WXZY @@ -356,16 +437,23 @@ mVUop(mVU_OPMULA) { mVU.regAlloc->clearNeeded(Fs); mVU.profiler.EmitOp(opOPMULA); } - pass3 { mVUlog("OPMULA"); mVUlogACC(); mVUlogFt(); } + pass3 + { + mVUlog("OPMULA"); + mVUlogACC(); + mVUlogFt(); + } pass4 { mVUregs.needExactMatch |= 8; } } // OPMSUB Opcode -mVUop(mVU_OPMSUB) { +mVUop(mVU_OPMSUB) +{ pass1 { mVUanalyzeFMAC1(mVU, _Fd_, _Fs_, _Ft_); } - pass2 { - const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, 0xf); - const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, 0xf); + pass2 + { + const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, 0xf); + const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, 0xf); const xmm& ACC = mVU.regAlloc->allocReg(32, _Fd_, _X_Y_Z_W); xPSHUF.D(Fs, Fs, 0xC9); // WXZY @@ -378,22 +466,31 @@ mVUop(mVU_OPMSUB) { mVU.regAlloc->clearNeeded(ACC); mVU.profiler.EmitOp(opOPMSUB); } - pass3 { mVUlog("OPMSUB"); mVUlogFd(); mVUlogFt(); } + pass3 + { + mVUlog("OPMSUB"); + mVUlogFd(); + mVUlogFt(); + } pass4 { mVUregs.needExactMatch |= 8; } } // FTOI0/FTIO4/FTIO12/FTIO15 Opcodes -static void mVU_FTOIx(mP, const float* addr, microOpcode opEnum) { +static void mVU_FTOIx(mP, const float* addr, microOpcode opEnum) +{ pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); } - pass2 { - if (!_Ft_) return; + pass2 + { + if (!_Ft_) + return; const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); const xmm& t1 = mVU.regAlloc->allocReg(); const xmm& t2 = mVU.regAlloc->allocReg(); // Note: For help understanding this algorithm see recVUMI_FTOI_Saturate() xMOVAPS(t1, Fs); - if (addr) { xMUL.PS(Fs, ptr128[addr]); } + if (addr) + xMUL.PS(Fs, ptr128[addr]); xCVTTPS2DQ(Fs, Fs); xPXOR(t1, ptr128[mVUglob.signbit]); xPSRA.D(t1, 31); @@ -407,30 +504,44 @@ static void mVU_FTOIx(mP, const float* addr, microOpcode opEnum) { mVU.regAlloc->clearNeeded(t2); mVU.profiler.EmitOp(opEnum); } - pass3 { mVUlog(microOpcodeName[opEnum]); mVUlogFtFs(); } + pass3 + { + mVUlog(microOpcodeName[opEnum]); + mVUlogFtFs(); + } } // ITOF0/ITOF4/ITOF12/ITOF15 Opcodes -static void mVU_ITOFx(mP, const float* addr, microOpcode opEnum) { +static void mVU_ITOFx(mP, const float* addr, microOpcode opEnum) +{ pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); } - pass2 { - if (!_Ft_) return; + pass2 + { + if (!_Ft_) + return; const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); xCVTDQ2PS(Fs, Fs); - if (addr) { xMUL.PS(Fs, ptr128[addr]); } + if (addr) + xMUL.PS(Fs, ptr128[addr]); //mVUclamp2(Fs, xmmT1, 15); // Clamp (not sure if this is needed) mVU.regAlloc->clearNeeded(Fs); mVU.profiler.EmitOp(opEnum); } - pass3 { mVUlog(microOpcodeName[opEnum]); mVUlogFtFs(); } + pass3 + { + mVUlog(microOpcodeName[opEnum]); + mVUlogFtFs(); + } } // Clip Opcode -mVUop(mVU_CLIP) { +mVUop(mVU_CLIP) +{ pass1 { mVUanalyzeFMAC4(mVU, _Fs_, _Ft_); } - pass2 { + pass2 + { const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, 0xf); const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, 0x1); const xmm& t1 = mVU.regAlloc->allocReg(); @@ -466,7 +577,11 @@ mVUop(mVU_CLIP) { mVU.regAlloc->clearNeeded(t1); mVU.profiler.EmitOp(opCLIP); } - pass3 { mVUlog("CLIP"); mVUlogCLIP(); } + pass3 + { + mVUlog("CLIP"); + mVUlogCLIP(); + } } //------------------------------------------------------------------ @@ -514,13 +629,13 @@ mVUop(mVU_MULAq) { mVU_FMACa(mVU, recPass, 4, 2, true, opMULAq, 0); } mVUop(mVU_MULAx) { mVU_FMACa(mVU, recPass, 2, 2, true, opMULAx, cFs);} // Clamp (TOTA, DoM, ...) mVUop(mVU_MULAy) { mVU_FMACa(mVU, recPass, 2, 2, true, opMULAy, cFs);} // Clamp (TOTA, DoM, ...) mVUop(mVU_MULAz) { mVU_FMACa(mVU, recPass, 2, 2, true, opMULAz, cFs);} // Clamp (TOTA, DoM, ...) -mVUop(mVU_MULAw) { mVU_FMACa(mVU, recPass, 2, 2, true, opMULAw, (_XYZW_PS) ? (cFs | cFt) : cFs); } // Clamp (TOTA, DoM, ...)- Ft for Superman - Shadow Of Apokolips -mVUop(mVU_MADD) { mVU_FMACc(mVU, recPass, 1, opMADD, 0); } -mVUop(mVU_MADDi) { mVU_FMACc(mVU, recPass, 3, opMADDi, 0); } -mVUop(mVU_MADDq) { mVU_FMACc(mVU, recPass, 4, opMADDq, 0); } -mVUop(mVU_MADDx) { mVU_FMACc(mVU, recPass, 2, opMADDx, cFs); } // Clamp (TOTA, DoM, ...) -mVUop(mVU_MADDy) { mVU_FMACc(mVU, recPass, 2, opMADDy, cFs); } // Clamp (TOTA, DoM, ...) -mVUop(mVU_MADDz) { mVU_FMACc(mVU, recPass, 2, opMADDz, cFs); } // Clamp (TOTA, DoM, ...) +mVUop(mVU_MULAw) { mVU_FMACa(mVU, recPass, 2, 2, true, opMULAw, (_XYZW_PS) ? (cFs | cFt) : cFs); } // Clamp (TOTA, DoM, ...)- Ft for Superman - Shadow Of Apokolips +mVUop(mVU_MADD) { mVU_FMACc(mVU, recPass, 1, opMADD, 0); } +mVUop(mVU_MADDi) { mVU_FMACc(mVU, recPass, 3, opMADDi, 0); } +mVUop(mVU_MADDq) { mVU_FMACc(mVU, recPass, 4, opMADDq, 0); } +mVUop(mVU_MADDx) { mVU_FMACc(mVU, recPass, 2, opMADDx, cFs); } // Clamp (TOTA, DoM, ...) +mVUop(mVU_MADDy) { mVU_FMACc(mVU, recPass, 2, opMADDy, cFs); } // Clamp (TOTA, DoM, ...) +mVUop(mVU_MADDz) { mVU_FMACc(mVU, recPass, 2, opMADDz, cFs); } // Clamp (TOTA, DoM, ...) mVUop(mVU_MADDw) { mVU_FMACc(mVU, recPass, 2, opMADDw, (isCOP2)?(cACC|cFt|cFs):cFs);} // Clamp (ICO (COP2), TOTA, DoM) mVUop(mVU_MADDA) { mVU_FMACb(mVU, recPass, 1, 0, opMADDA, 0); } mVUop(mVU_MADDAi) { mVU_FMACb(mVU, recPass, 3, 0, opMADDAi, 0); } @@ -529,7 +644,7 @@ mVUop(mVU_MADDAx) { mVU_FMACb(mVU, recPass, 2, 0, opMADDAx, cFs);} // Cla mVUop(mVU_MADDAy) { mVU_FMACb(mVU, recPass, 2, 0, opMADDAy, cFs);} // Clamp (TOTA, DoM, ...) mVUop(mVU_MADDAz) { mVU_FMACb(mVU, recPass, 2, 0, opMADDAz, cFs);} // Clamp (TOTA, DoM, ...) mVUop(mVU_MADDAw) { mVU_FMACb(mVU, recPass, 2, 0, opMADDAw, cFs);} // Clamp (TOTA, DoM, ...) -mVUop(mVU_MSUB) { mVU_FMACd(mVU, recPass, 1, opMSUB, (isCOP2) ? cFs : 0); } // Clamp ( Superman - Shadow Of Apokolips) +mVUop(mVU_MSUB) { mVU_FMACd(mVU, recPass, 1, opMSUB, (isCOP2) ? cFs : 0); } // Clamp ( Superman - Shadow Of Apokolips) mVUop(mVU_MSUBi) { mVU_FMACd(mVU, recPass, 3, opMSUBi, 0); } mVUop(mVU_MSUBq) { mVU_FMACd(mVU, recPass, 4, opMSUBq, 0); } mVUop(mVU_MSUBx) { mVU_FMACd(mVU, recPass, 2, opMSUBx, 0); } diff --git a/pcsx2/x86/newVif.h b/pcsx2/x86/newVif.h index f75c0b01e9..7b74122555 100644 --- a/pcsx2/x86/newVif.h +++ b/pcsx2/x86/newVif.h @@ -24,8 +24,8 @@ using namespace x86Emitter; // newVif_HashBucket.h uses this typedef, so it has to be declared first. -typedef u32 (__fastcall *nVifCall)(void*, const void*); -typedef void (__fastcall *nVifrecCall)(uptr dest, uptr src); +typedef u32 (__fastcall* nVifCall)(void*, const void*); +typedef void(__fastcall* nVifrecCall)(uptr dest, uptr src); #include "newVif_HashBucket.h" @@ -38,13 +38,13 @@ extern void dVifRelease (int idx); extern void VifUnpackSSE_Init(); extern void VifUnpackSSE_Destroy(); -_vifT extern void dVifUnpack (const u8* data, bool isFill); +_vifT extern void dVifUnpack(const u8* data, bool isFill); #define VUFT VIFUnpackFuncTable -#define _v0 0 -#define _v1 0x55 -#define _v2 0xaa -#define _v3 0xff +#define _v0 0 +#define _v1 0x55 +#define _v2 0xaa +#define _v3 0xff #define xmmCol0 xmm2 #define xmmCol1 xmm3 #define xmmCol2 xmm4 @@ -52,20 +52,22 @@ _vifT extern void dVifUnpack (const u8* data, bool isFill); #define xmmRow xmm6 #define xmmTemp xmm7 -struct nVifStruct { +struct nVifStruct +{ // Buffer for partial transfers (should always be first to ensure alignment) // Maximum buffer size is 256 (vifRegs.Num max range) * 16 (quadword) - __aligned16 u8 buffer[256*16]; - u32 bSize; // Size of 'buffer' + __aligned16 u8 buffer[256*16]; + u32 bSize; // Size of 'buffer' // VIF0 or VIF1 - provided for debugging helpfulness only, and is generally unused. // (templates are used for most or all VIF indexing) - u32 idx; + u32 idx; - RecompiledCodeReserve* recReserve; - u8* recWritePtr; // current write pos into the reserve + RecompiledCodeReserve* recReserve; + u8* recWritePtr; // current write pos into the reserve + + HashBucket vifBlocks; // Vif Blocks - HashBucket vifBlocks; // Vif Blocks nVifStruct() = default; }; @@ -75,7 +77,7 @@ extern void resetNewVif(int idx); extern void releaseNewVif(int idx); extern __aligned16 nVifStruct nVif[2]; -extern __aligned16 nVifCall nVifUpk[(2*2*16)*4]; // ([USN][Masking][Unpack Type]) [curCycle] -extern __aligned16 u32 nVifMask[3][4][4]; // [MaskNumber][CycleNumber][Vector] +extern __aligned16 nVifCall nVifUpk[(2 * 2 * 16) * 4]; // ([USN][Masking][Unpack Type]) [curCycle] +extern __aligned16 u32 nVifMask[3][4][4]; // [MaskNumber][CycleNumber][Vector] static const bool newVifDynaRec = 1; // Use code in newVif_Dynarec.inl diff --git a/pcsx2/x86/newVif_Dynarec.cpp b/pcsx2/x86/newVif_Dynarec.cpp index 1d0280b219..78652b6402 100644 --- a/pcsx2/x86/newVif_Dynarec.cpp +++ b/pcsx2/x86/newVif_Dynarec.cpp @@ -22,7 +22,8 @@ #include "MTVU.h" #include "common/Perf.h" -static void recReset(int idx) { +static void recReset(int idx) +{ nVif[idx].vifBlocks.reset(); nVif[idx].recReserve->Reset(); @@ -30,26 +31,30 @@ static void recReset(int idx) { nVif[idx].recWritePtr = nVif[idx].recReserve->GetPtr(); } -void dVifReserve(int idx) { - if(!nVif[idx].recReserve) +void dVifReserve(int idx) +{ + if (!nVif[idx].recReserve) nVif[idx].recReserve = new RecompiledCodeReserve(pxsFmt(L"VIF%u Unpack Recompiler Cache", idx), _8mb); auto offset = idx ? HostMemoryMap::VIF1recOffset : HostMemoryMap::VIF0recOffset; nVif[idx].recReserve->Reserve(GetVmMemory().MainMemory(), offset, 8 * _1mb); } -void dVifReset(int idx) { +void dVifReset(int idx) +{ pxAssertDev(nVif[idx].recReserve, "Dynamic VIF recompiler reserve must be created prior to VIF use or reset!"); recReset(idx); } -void dVifClose(int idx) { +void dVifClose(int idx) +{ if (nVif[idx].recReserve) nVif[idx].recReserve->Reset(); } -void dVifRelease(int idx) { +void dVifRelease(int idx) +{ dVifClose(idx); safe_delete(nVif[idx].recReserve); } @@ -59,61 +64,79 @@ VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlo , vB(vifBlock_) { const int wl = vB.wl ? vB.wl : 256; //0 is taken as 256 (KH2) - isFill = (vB.cl < wl); - usn = (vB.upkType>>5) & 1; - doMask = (vB.upkType>>4) & 1; - doMode = vB.mode & 3; - IsAligned = vB.aligned; - vCL = 0; + isFill = (vB.cl < wl); + usn = (vB.upkType>>5) & 1; + doMask = (vB.upkType>>4) & 1; + doMode = vB.mode & 3; + IsAligned = vB.aligned; + vCL = 0; } __fi void makeMergeMask(u32& x) { - x = ((x&0x40)>>6) | ((x&0x10)>>3) | (x&4) | ((x&1)<<3); + x = ((x & 0x40) >> 6) | ((x & 0x10) >> 3) | (x & 4) | ((x & 1) << 3); } -__fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const { +__fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const +{ const int idx = v.idx; const vifStruct& vif = MTVU_VifX; //This could have ended up copying the row when there was no row to write.1810080 u32 m0 = vB.mask; //The actual mask example 0x03020100 - u32 m3 = ((m0 & 0xaaaaaaaa)>>1) & ~m0; //all the upper bits, so our example 0x01010000 & 0xFCFDFEFF = 0x00010000 just the cols (shifted right for maskmerge) - u32 m2 = (m0 & 0x55555555) & (~m0>>1); // 0x1000100 & 0xFE7EFF7F = 0x00000100 Just the row + u32 m3 = ((m0 & 0xaaaaaaaa) >> 1) & ~m0; //all the upper bits, so our example 0x01010000 & 0xFCFDFEFF = 0x00010000 just the cols (shifted right for maskmerge) + u32 m2 = (m0 & 0x55555555) & (~m0 >> 1); // 0x1000100 & 0xFE7EFF7F = 0x00000100 Just the row - if((m2&&doMask)||doMode) { xMOVAPS(xmmRow, ptr128[&vif.MaskRow]); MSKPATH3_LOG("Moving row");} - if (m3&&doMask) { + if ((m2 && doMask) || doMode) + { + xMOVAPS(xmmRow, ptr128[&vif.MaskRow]); + MSKPATH3_LOG("Moving row"); + } + if (m3 && doMask) + { MSKPATH3_LOG("Merging Cols"); xMOVAPS(xmmCol0, ptr128[&vif.MaskCol]); - if ((cS>=2) && (m3&0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1); - if ((cS>=3) && (m3&0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2); - if ((cS>=4) && (m3&0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3); - if ((cS>=1) && (m3&0x000000ff)) xPSHUF.D(xmmCol0, xmmCol0, _v0); + if ((cS >= 2) && (m3 & 0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1); + if ((cS >= 3) && (m3 & 0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2); + if ((cS >= 4) && (m3 & 0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3); + if ((cS >= 1) && (m3 & 0x000000ff)) xPSHUF.D(xmmCol0, xmmCol0, _v0); } //if (doMask||doMode) loadRowCol((nVifStruct&)v); } -void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const { +void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const +{ pxAssertDev(regX.Id <= 1, "Reg Overflow! XMM2 thru XMM6 are reserved for masking."); int cc = std::min(vCL, 3); u32 m0 = (vB.mask >> (cc * 8)) & 0xff; //The actual mask example 0xE4 (protect, col, row, clear) - u32 m3 = ((m0 & 0xaa)>>1) & ~m0; //all the upper bits (cols shifted right) cancelling out any write protects 0x10 - u32 m2 = (m0 & 0x55) & (~m0>>1); // all the lower bits (rows)cancelling out any write protects 0x04 - u32 m4 = (m0 & ~((m3<<1) | m2)) & 0x55; // = 0xC0 & 0x55 = 0x40 (for merge mask) + u32 m3 = ((m0 & 0xaa) >> 1) & ~m0; //all the upper bits (cols shifted right) cancelling out any write protects 0x10 + u32 m2 = (m0 & 0x55) & (~m0 >> 1); // all the lower bits (rows)cancelling out any write protects 0x04 + u32 m4 = (m0 & ~((m3 << 1) | m2)) & 0x55; // = 0xC0 & 0x55 = 0x40 (for merge mask) makeMergeMask(m2); makeMergeMask(m3); makeMergeMask(m4); - if (doMask&&m2) { mergeVectors(regX, xmmRow, xmmTemp, m2); } // Merge MaskRow - if (doMask&&m3) { mergeVectors(regX, xRegisterSSE(xmmCol0.Id+cc), xmmTemp, m3); } // Merge MaskCol - if (doMask&&m4) { xMOVAPS(xmmTemp, ptr[dstIndirect]); - mergeVectors(regX, xmmTemp, xmmTemp, m4); } // Merge Write Protect - if (doMode) { - u32 m5 = ~(m2|m3|m4) & 0xf; + if (doMask && m2) // Merge MaskRow + { + mergeVectors(regX, xmmRow, xmmTemp, m2); + } + if (doMask && m3) // Merge MaskCol + { + mergeVectors(regX, xRegisterSSE(xmmCol0.Id + cc), xmmTemp, m3); + } + if (doMask && m4) // Merge Write Protect + { + xMOVAPS(xmmTemp, ptr[dstIndirect]); + mergeVectors(regX, xmmTemp, xmmTemp, m4); + } + if (doMode) + { + u32 m5 = ~(m2 | m3 | m4) & 0xf; - if (!doMask) m5 = 0xf; + if (!doMask) + m5 = 0xf; if (m5 < 0xf) { @@ -126,9 +149,9 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const { { mergeVectors(xmmTemp, xmmRow, xmmTemp, m5); xPADD.D(regX, xmmTemp); - if (doMode == 2) mergeVectors(xmmRow, regX, xmmTemp, m5); + if (doMode == 2) + mergeVectors(xmmRow, regX, xmmTemp, m5); } - } else { @@ -139,14 +162,16 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const { else { xPADD.D(regX, xmmRow); - if (doMode == 2) { xMOVAPS(xmmRow, regX); } + if (doMode == 2) + xMOVAPS(xmmRow, regX); } } } xMOVAPS(ptr32[dstIndirect], regX); } -void VifUnpackSSE_Dynarec::writeBackRow() const { +void VifUnpackSSE_Dynarec::writeBackRow() const +{ const int idx = v.idx; xMOVAPS(ptr128[&(MTVU_VifX.MaskRow)], xmmRow); @@ -154,62 +179,92 @@ void VifUnpackSSE_Dynarec::writeBackRow() const { // ToDo: Do we need to write back to vifregs.rX too!? :/ } -static void ShiftDisplacementWindow( xAddressVoid& addr, const xRegisterLong& modReg ) +static void ShiftDisplacementWindow(xAddressVoid& addr, const xRegisterLong& modReg) { // Shifts the displacement factor of a given indirect address, so that the address // remains in the optimal 0xf0 range (which allows for byte-form displacements when // generating instructions). int addImm = 0; - while( addr.Displacement >= 0x80 ) + while (addr.Displacement >= 0x80) { addImm += 0xf0; addr -= 0xf0; } - if(addImm) { xADD(modReg, addImm); } + if (addImm) + xADD(modReg, addImm); } -void VifUnpackSSE_Dynarec::ModUnpack( int upknum, bool PostOp ) +void VifUnpackSSE_Dynarec::ModUnpack(int upknum, bool PostOp) { - switch( upknum ) + switch (upknum) { case 0: case 1: - case 2: if(PostOp) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration & 0x3; } break; + case 2: + if (PostOp) + { + UnpkLoopIteration++; + UnpkLoopIteration = UnpkLoopIteration & 0x3; + } + break; case 4: case 5: - case 6: if(PostOp) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration & 0x1; } break; + case 6: + if (PostOp) + { + UnpkLoopIteration++; + UnpkLoopIteration = UnpkLoopIteration & 0x1; + } + break; - case 8: if(PostOp) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration & 0x1; } break; - case 9: if (!PostOp) { UnpkLoopIteration++; } break; - case 10:if (!PostOp) { UnpkLoopIteration++; } break; + case 8: + if (PostOp) + { + UnpkLoopIteration++; + UnpkLoopIteration = UnpkLoopIteration & 0x1; + } + break; + case 9: + if (!PostOp) + { + UnpkLoopIteration++; + } + break; + case 10: + if (!PostOp) + { + UnpkLoopIteration++; + } + break; - case 12: break; - case 13: break; - case 14: break; - case 15: break; + case 12: + case 13: + case 14: + case 15: + break; case 3: case 7: case 11: - pxFailRel( wxsFormat( L"Vpu/Vif - Invalid Unpack! [%d]", upknum ) ); - break; + pxFailRel(wxsFormat(L"Vpu/Vif - Invalid Unpack! [%d]", upknum)); + break; } - } -void VifUnpackSSE_Dynarec::CompileRoutine() { - const int wl = vB.wl ? vB.wl : 256; //0 is taken as 256 (KH2) - const int upkNum = vB.upkType & 0xf; - const u8& vift = nVifT[upkNum]; - const int cycleSize = isFill ? vB.cl : wl; - const int blockSize = isFill ? wl : vB.cl; - const int skipSize = blockSize - cycleSize; +void VifUnpackSSE_Dynarec::CompileRoutine() +{ + const int wl = vB.wl ? vB.wl : 256; // 0 is taken as 256 (KH2) + const int upkNum = vB.upkType & 0xf; + const u8& vift = nVifT[upkNum]; + const int cycleSize = isFill ? vB.cl : wl; + const int blockSize = isFill ? wl : vB.cl; + const int skipSize = blockSize - cycleSize; - uint vNum = vB.num ? vB.num : 256; - doMode = (upkNum == 0xf) ? 0 : doMode; // V4_5 has no mode feature. + uint vNum = vB.num ? vB.num : 256; + doMode = (upkNum == 0xf) ? 0 : doMode; // V4_5 has no mode feature. UnpkNoOfIterations = 0; MSKPATH3_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum); @@ -218,16 +273,18 @@ void VifUnpackSSE_Dynarec::CompileRoutine() { // Value passed determines # of col regs we need to load SetMasks(isFill ? blockSize : cycleSize); - while (vNum) { + while (vNum) + { - ShiftDisplacementWindow( dstIndirect, arg1reg ); + ShiftDisplacementWindow(dstIndirect, arg1reg); - if(UnpkNoOfIterations == 0) - ShiftDisplacementWindow( srcIndirect, arg2reg ); //Don't need to do this otherwise as we arent reading the source. + if (UnpkNoOfIterations == 0) + ShiftDisplacementWindow(srcIndirect, arg2reg); //Don't need to do this otherwise as we arent reading the source. - if (vCL < cycleSize) { + if (vCL < cycleSize) + { ModUnpack(upkNum, false); xUnpack(upkNum); xMovDest(); @@ -238,9 +295,11 @@ void VifUnpackSSE_Dynarec::CompileRoutine() { srcIndirect += vift; vNum--; - if (++vCL == blockSize) vCL = 0; + if (++vCL == blockSize) + vCL = 0; } - else if (isFill) { + else if (isFill) + { //Filling doesn't need anything fancy, it's pretty much a normal write, just doesnt increment the source. //DevCon.WriteLn("filling mode!"); xUnpack(upkNum); @@ -249,38 +308,44 @@ void VifUnpackSSE_Dynarec::CompileRoutine() { dstIndirect += 16; vNum--; - if (++vCL == blockSize) vCL = 0; + if (++vCL == blockSize) + vCL = 0; } - else { + else + { dstIndirect += (16 * skipSize); vCL = 0; } } - if (doMode>=2) writeBackRow(); + if (doMode >= 2) + writeBackRow(); xRET(); } -static u16 dVifComputeLength(uint cl, uint wl, u8 num, bool isFill) { - uint length = (num > 0) ? (num * 16) : 4096; // 0 = 256 +static u16 dVifComputeLength(uint cl, uint wl, u8 num, bool isFill) +{ + uint length = (num > 0) ? (num * 16) : 4096; // 0 = 256 - if (!isFill) { - uint skipSize = (cl - wl) * 16; - uint blocks = (num + (wl-1)) / wl; //Need to round up num's to calculate skip size correctly. - length += (blocks-1) * skipSize; + if (!isFill) + { + uint skipSize = (cl - wl) * 16; + uint blocks = (num + (wl - 1)) / wl; //Need to round up num's to calculate skip size correctly. + length += (blocks - 1) * skipSize; } return std::min(length, 0xFFFFu); } -_vifT __fi nVifBlock* dVifCompile(nVifBlock& block, bool isFill) { +_vifT __fi nVifBlock* dVifCompile(nVifBlock& block, bool isFill) +{ nVifStruct& v = nVif[idx]; // Check size before the compilation - if (v.recWritePtr > (v.recReserve->GetPtrEnd() - _256kb)) { + if (v.recWritePtr > (v.recReserve->GetPtrEnd() - _256kb)) + { DevCon.WriteLn(L"nVif Recompiler Cache Reset! [%ls > %ls]", - pxsPtr(v.recWritePtr), pxsPtr(v.recReserve->GetPtrEnd()) - ); + pxsPtr(v.recWritePtr), pxsPtr(v.recReserve->GetPtrEnd())); recReset(idx); } @@ -299,16 +364,17 @@ _vifT __fi nVifBlock* dVifCompile(nVifBlock& block, bool isFill) { return █ } -_vifT __fi void dVifUnpack(const u8* data, bool isFill) { +_vifT __fi void dVifUnpack(const u8* data, bool isFill) +{ nVifStruct& v = nVif[idx]; - vifStruct& vif = MTVU_VifX; + vifStruct& vif = MTVU_VifX; VIFregisters& vifRegs = MTVU_VifXRegs; - const u8 upkType = (vif.cmd & 0x1f) | (vif.usn << 5); - const int doMask = isFill? 1 : (vif.cmd & 0x10); + const u8 upkType = (vif.cmd & 0x1f) | (vif.usn << 5); + const int doMask = isFill ? 1 : (vif.cmd & 0x10); - nVifBlock block; + nVifBlock block; // Performance note: initial code was using u8/u16 field of the struct // directly. However reading back the data (as u32) in HashBucket.find @@ -337,24 +403,28 @@ _vifT __fi void dVifUnpack(const u8* data, bool isFill) { //); // Seach in cache before trying to compile the block - nVifBlock* b = v.vifBlocks.find(block); - if (unlikely(b == nullptr)) { + nVifBlock* b = v.vifBlocks.find(block); + if (unlikely(b == nullptr)) + { b = dVifCompile(block, isFill); } { // Execute the block - const VURegs& VU = vuRegs[idx]; - const uint vuMemLimit = idx ? 0x4000 : 0x1000; + const VURegs& VU = vuRegs[idx]; + const uint vuMemLimit = idx ? 0x4000 : 0x1000; - u8* startmem = VU.Mem + (vif.tag.addr & (vuMemLimit-0x10)); - u8* endmem = VU.Mem + vuMemLimit; + u8* startmem = VU.Mem + (vif.tag.addr & (vuMemLimit - 0x10)); + u8* endmem = VU.Mem + vuMemLimit; - if (likely((startmem + b->length) <= endmem)) { + if (likely((startmem + b->length) <= endmem)) + { // No wrapping, you can run the fast dynarec ((nVifrecCall)b->startPtr)((uptr)startmem, (uptr)data); - } else { + } + else + { VIF_LOG("Running Interpreter Block: nVif%x - VU Mem Ptr Overflow; falling back to interpreter. Start = %x End = %x num = %x, wl = %x, cl = %x", - v.idx, vif.tag.addr, vif.tag.addr + (block.num * 16), block.num, block.wl, block.cl); + v.idx, vif.tag.addr, vif.tag.addr + (block.num * 16), block.num, block.wl, block.cl); _nVifUnpack(idx, data, vifRegs.mode, isFill); } } diff --git a/pcsx2/x86/newVif_HashBucket.h b/pcsx2/x86/newVif_HashBucket.h index 990700a9ad..96d3bca94e 100644 --- a/pcsx2/x86/newVif_HashBucket.h +++ b/pcsx2/x86/newVif_HashBucket.h @@ -19,21 +19,24 @@ // nVifBlock - Ordered for Hashing; the 'num' and 'upkType' fields are // used as the hash bucket selector. -union nVifBlock { +union nVifBlock +{ // Warning: order depends on the newVifDynaRec code - struct { - u8 num; // [00] Num Field - u8 upkType; // [01] Unpack Type [usn1:mask1:upk*4] - u16 length; // [02] Extra: pre computed Length - u32 mask; // [04] Mask Field - u8 mode; // [08] Mode Field - u8 aligned; // [09] Packet Alignment - u8 cl; // [10] CL Field - u8 wl; // [11] WL Field - uptr startPtr; // [12] Start Ptr of RecGen Code + struct + { + u8 num; // [00] Num Field + u8 upkType; // [01] Unpack Type [usn1:mask1:upk*4] + u16 length; // [02] Extra: pre computed Length + u32 mask; // [04] Mask Field + u8 mode; // [08] Mode Field + u8 aligned; // [09] Packet Alignment + u8 cl; // [10] CL Field + u8 wl; // [11] WL Field + uptr startPtr; // [12] Start Ptr of RecGen Code }; - struct { + struct + { u16 hash_key; u16 _pad0; u32 key0; @@ -54,21 +57,25 @@ union nVifBlock { // The hash function is determined by taking the first bytes of data and // performing a modulus the size of hSize. So the most diverse-data should // be in the first bytes of the struct. (hence why nVifBlock is specifically sorted) -class HashBucket { +class HashBucket +{ protected: std::array m_bucket; public: - HashBucket() { + HashBucket() + { m_bucket.fill(nullptr); } ~HashBucket() { clear(); } - __fi nVifBlock* find(const nVifBlock& dataPtr) { + __fi nVifBlock* find(const nVifBlock& dataPtr) + { nVifBlock* chainpos = m_bucket[dataPtr.hash_key]; - while (true) { + while (true) + { if (chainpos->key0 == dataPtr.key0 && chainpos->key1 == dataPtr.key1) return chainpos; @@ -79,32 +86,36 @@ public: } } - void add(const nVifBlock& dataPtr) { + void add(const nVifBlock& dataPtr) + { u32 b = dataPtr.hash_key; - u32 size = bucket_size( dataPtr ); + u32 size = bucket_size(dataPtr); // Warning there is an extra +1 due to the empty cell // Performance note: 64B align to reduce cache miss penalty in `find` - if( (m_bucket[b] = (nVifBlock*)pcsx2_aligned_realloc( m_bucket[b], sizeof(nVifBlock)*(size+2), 64, sizeof(nVifBlock)*(size+1) )) == NULL ) { + if ((m_bucket[b] = (nVifBlock*)pcsx2_aligned_realloc(m_bucket[b], sizeof(nVifBlock) * (size + 2), 64, sizeof(nVifBlock) * (size + 1))) == NULL) + { throw Exception::OutOfMemory( - wxsFormat(L"HashBucket Chain (bucket size=%d)", size+2) - ); + wxsFormat(L"HashBucket Chain (bucket size=%d)", size + 2)); } // Replace the empty cell by the new block and create a new empty cell memcpy(&m_bucket[b][size++], &dataPtr, sizeof(nVifBlock)); memset(&m_bucket[b][size], 0, sizeof(nVifBlock)); - if( size > 3 ) DevCon.Warning( "recVifUnpk: Bucket 0x%04x has %d micro-programs", b, size ); + if (size > 3) + DevCon.Warning("recVifUnpk: Bucket 0x%04x has %d micro-programs", b, size); } - u32 bucket_size(const nVifBlock& dataPtr) { + u32 bucket_size(const nVifBlock& dataPtr) + { nVifBlock* chainpos = m_bucket[dataPtr.hash_key]; u32 size = 0; - while (chainpos->startPtr != 0) { + while (chainpos->startPtr != 0) + { size++; chainpos++; } @@ -112,20 +123,22 @@ public: return size; } - void clear() { + void clear() + { for (auto& bucket : m_bucket) safe_aligned_free(bucket); } - void reset() { + void reset() + { clear(); // Allocate an empty cell for all buckets - for (auto& bucket : m_bucket) { - if( (bucket = (nVifBlock*)_aligned_malloc( sizeof(nVifBlock), 64 )) == nullptr ) { - throw Exception::OutOfMemory( - wxsFormat(L"HashBucket Chain (bucket size=%d)", 1) - ); + for (auto& bucket : m_bucket) + { + if ((bucket = (nVifBlock*)_aligned_malloc(sizeof(nVifBlock), 64)) == nullptr) + { + throw Exception::OutOfMemory(wxsFormat(L"HashBucket Chain (bucket size=%d)", 1)); } memset(bucket, 0, sizeof(nVifBlock)); diff --git a/pcsx2/x86/newVif_Unpack.cpp b/pcsx2/x86/newVif_Unpack.cpp index 5671c00dae..7010a90abc 100644 --- a/pcsx2/x86/newVif_Unpack.cpp +++ b/pcsx2/x86/newVif_Unpack.cpp @@ -23,16 +23,16 @@ #include "newVif.h" #include "MTVU.h" -__aligned16 nVifStruct nVif[2]; +__aligned16 nVifStruct nVif[2]; // Interpreter-style SSE unpacks. Array layout matches the interpreter C unpacks. // ([USN][Masking][Unpack Type]) [curCycle] -__aligned16 nVifCall nVifUpk[(2*2*16) *4]; +__aligned16 nVifCall nVifUpk[(2 * 2 * 16) * 4]; // This is used by the interpreted SSE unpacks only. Recompiled SSE unpacks // and the interpreted C unpacks use the vif.MaskRow/MaskCol members directly. // [MaskNumber][CycleNumber][Vector] -__aligned16 u32 nVifMask[3][4][4] = {0}; +__aligned16 u32 nVifMask[3][4][4] = {0}; // Number of bytes of data in the source stream needed for each vector. // [equivalent to ((32 >> VL) * (VN+1)) / 8] @@ -56,7 +56,7 @@ __aligned16 const u8 nVifT[16] = { }; // ---------------------------------------------------------------------------- -template< int idx, bool doMode, bool isFill > +template __ri void __fastcall _nVifUnpackLoop(const u8* data); typedef void __fastcall FnType_VifUnpackLoop(const u8* data); @@ -64,10 +64,14 @@ typedef FnType_VifUnpackLoop* Fnptr_VifUnpackLoop; // Unpacks Until 'Num' is 0 static const __aligned16 Fnptr_VifUnpackLoop UnpackLoopTable[2][2][2] = { - {{ _nVifUnpackLoop<0,0,0>, _nVifUnpackLoop<0,0,1> }, - { _nVifUnpackLoop<0,1,0>, _nVifUnpackLoop<0,1,1> },}, - {{ _nVifUnpackLoop<1,0,0>, _nVifUnpackLoop<1,0,1> }, - { _nVifUnpackLoop<1,1,0>, _nVifUnpackLoop<1,1,1> },}, + { + {_nVifUnpackLoop<0, 0, 0>, _nVifUnpackLoop<0, 0, 1>}, + {_nVifUnpackLoop<0, 1, 0>, _nVifUnpackLoop<0, 1, 1>}, + }, + { + {_nVifUnpackLoop<1, 0, 0>, _nVifUnpackLoop<1, 0, 1>}, + {_nVifUnpackLoop<1, 1, 0>, _nVifUnpackLoop<1, 1, 1>}, + }, }; // ---------------------------------------------------------------------------- @@ -80,21 +84,26 @@ void resetNewVif(int idx) nVif[idx].bSize = 0; memzero(nVif[idx].buffer); - if (newVifDynaRec) dVifReset(idx); + if (newVifDynaRec) + dVifReset(idx); } -void closeNewVif(int idx) { +void closeNewVif(int idx) +{ } -void releaseNewVif(int idx) { +void releaseNewVif(int idx) +{ } -static __fi u8* getVUptr(uint idx, int offset) { - return (u8*)(vuRegs[idx].Mem + ( offset & (idx ? 0x3ff0 : 0xff0) )); +static __fi u8* getVUptr(uint idx, int offset) +{ + return (u8*)(vuRegs[idx].Mem + (offset & (idx ? 0x3ff0 : 0xff0))); } -_vifT int nVifUnpack(const u8* data) { +_vifT int nVifUnpack(const u8* data) +{ nVifStruct& v = nVif[idx]; vifStruct& vif = GetVifX; VIFregisters& vifRegs = vifXRegs; @@ -102,47 +111,57 @@ _vifT int nVifUnpack(const u8* data) { const uint wl = vifRegs.cycle.wl ? vifRegs.cycle.wl : 256; const uint ret = std::min(vif.vifpacketsize, vif.tag.size); const bool isFill = (vifRegs.cycle.cl < wl); - s32 size = ret << 2; - - if (ret == vif.tag.size) { // Full Transfer - if (v.bSize) { // Last transfer was partial + s32 size = ret << 2; + + if (ret == vif.tag.size) // Full Transfer + { + if (v.bSize) // Last transfer was partial + { memcpy(&v.buffer[v.bSize], data, size); - v.bSize += size; - size = v.bSize; - data = v.buffer; + v.bSize += size; + size = v.bSize; + data = v.buffer; - vif.cl = 0; - vifRegs.num = (vifXRegs.code >> 16) & 0xff; // grab NUM form the original VIFcode input. - if (!vifRegs.num) vifRegs.num = 256; + vif.cl = 0; + vifRegs.num = (vifXRegs.code >> 16) & 0xff; // grab NUM form the original VIFcode input. + if (!vifRegs.num) + vifRegs.num = 256; } - if (!idx || !THREAD_VU1) { - if (newVifDynaRec) dVifUnpack(data, isFill); - else _nVifUnpack(idx, data, vifRegs.mode, isFill); + if (!idx || !THREAD_VU1) + { + if (newVifDynaRec) + dVifUnpack(data, isFill); + else + _nVifUnpack(idx, data, vifRegs.mode, isFill); } - else vu1Thread.VifUnpack(vif, vifRegs, (u8*)data, (size + 4) & ~0x3); + else + vu1Thread.VifUnpack(vif, vifRegs, (u8*)data, (size + 4) & ~0x3); - vif.pass = 0; - vif.tag.size = 0; - vif.cmd = 0; - vifRegs.num = 0; - v.bSize = 0; + vif.pass = 0; + vif.tag.size = 0; + vif.cmd = 0; + vifRegs.num = 0; + v.bSize = 0; } - else { // Partial Transfer + else // Partial Transfer + { memcpy(&v.buffer[v.bSize], data, size); - v.bSize += size; + v.bSize += size; vif.tag.size -= ret; - const u8& vSize = nVifT[vif.cmd & 0x0f]; + const u8& vSize = nVifT[vif.cmd & 0x0f]; // We need to provide accurate accounting of the NUM register, in case games decided // to read back from it mid-transfer. Since so few games actually use partial transfers // of VIF unpacks, this code should not be any bottleneck. - if (!isFill) { + if (!isFill) + { vifRegs.num -= (size / vSize); } - else { + else + { int dataSize = (size / vSize); vifRegs.num = vifRegs.num - (((dataSize / vifRegs.cycle.cl) * (vifRegs.cycle.wl - vifRegs.cycle.cl)) + dataSize); } @@ -156,29 +175,32 @@ template int nVifUnpack<1>(const u8* data); // This is used by the interpreted SSE unpacks only. Recompiled SSE unpacks // and the interpreted C unpacks use the vif.MaskRow/MaskCol members directly. -static void setMasks(const vifStruct& vif, const VIFregisters& v) { - for (int i = 0; i < 16; i++) { - int m = (v.mask >> (i*2)) & 3; - switch (m) { +static void setMasks(const vifStruct& vif, const VIFregisters& v) +{ + for (int i = 0; i < 16; i++) + { + int m = (v.mask >> (i * 2)) & 3; + switch (m) + { case 0: // Data - nVifMask[0][i/4][i%4] = 0xffffffff; - nVifMask[1][i/4][i%4] = 0; - nVifMask[2][i/4][i%4] = 0; + nVifMask[0][i / 4][i % 4] = 0xffffffff; + nVifMask[1][i / 4][i % 4] = 0; + nVifMask[2][i / 4][i % 4] = 0; break; case 1: // MaskRow - nVifMask[0][i/4][i%4] = 0; - nVifMask[1][i/4][i%4] = 0; - nVifMask[2][i/4][i%4] = vif.MaskRow._u32[i%4]; + nVifMask[0][i / 4][i % 4] = 0; + nVifMask[1][i / 4][i % 4] = 0; + nVifMask[2][i / 4][i % 4] = vif.MaskRow._u32[i % 4]; break; case 2: // MaskCol - nVifMask[0][i/4][i%4] = 0; - nVifMask[1][i/4][i%4] = 0; - nVifMask[2][i/4][i%4] = vif.MaskCol._u32[i/4]; + nVifMask[0][i / 4][i % 4] = 0; + nVifMask[1][i / 4][i % 4] = 0; + nVifMask[2][i / 4][i % 4] = vif.MaskCol._u32[i / 4]; break; case 3: // Write Protect - nVifMask[0][i/4][i%4] = 0; - nVifMask[1][i/4][i%4] = 0xffffffff; - nVifMask[2][i/4][i%4] = 0; + nVifMask[0][i / 4][i % 4] = 0; + nVifMask[1][i / 4][i % 4] = 0xffffffff; + nVifMask[2][i / 4][i % 4] = 0; break; } } @@ -205,40 +227,45 @@ static void setMasks(const vifStruct& vif, const VIFregisters& v) { // // size - size of the packet fragment incoming from DMAC. -template< int idx, bool doMode, bool isFill > -__ri void __fastcall _nVifUnpackLoop(const u8* data) { +template +__ri void __fastcall _nVifUnpackLoop(const u8* data) +{ - vifStruct& vif = MTVU_VifX; + vifStruct& vif = MTVU_VifX; VIFregisters& vifRegs = MTVU_VifXRegs; // skipSize used for skipping writes only - const int skipSize = (vifRegs.cycle.cl - vifRegs.cycle.wl) * 16; + const int skipSize = (vifRegs.cycle.cl - vifRegs.cycle.wl) * 16; //DevCon.WriteLn("[%d][%d][%d][num=%d][upk=%d][cl=%d][bl=%d][skip=%d]", isFill, doMask, doMode, vifRegs.num, upkNum, vif.cl, blockSize, skipSize); - if (!doMode && (vif.cmd & 0x10)) setMasks(vif, vifRegs); + if (!doMode && (vif.cmd & 0x10)) + setMasks(vif, vifRegs); - const int usn = !!vif.usn; - const int upkNum = vif.cmd & 0x1f; - const u8& vSize = nVifT[upkNum & 0x0f]; + const int usn = !!vif.usn; + const int upkNum = vif.cmd & 0x1f; + const u8& vSize = nVifT[upkNum & 0x0f]; //uint vl = vif.cmd & 0x03; //uint vn = (vif.cmd >> 2) & 0x3; //uint vSize = ((32 >> vl) * (vn+1)) / 8; // size of data (in bytes) used for each write cycle - const nVifCall* fnbase = &nVifUpk[ ((usn*2*16) + upkNum) * (4*1) ]; - const UNPACKFUNCTYPE ft = VIFfuncTable[idx][doMode ? vifRegs.mode : 0][ ((usn*2*16) + upkNum) ]; + const nVifCall* fnbase = &nVifUpk[((usn * 2 * 16) + upkNum) * (4 * 1)]; + const UNPACKFUNCTYPE ft = VIFfuncTable[idx][doMode ? vifRegs.mode : 0][((usn * 2 * 16) + upkNum)]; - pxAssume (vif.cl == 0); + pxAssume(vif.cl == 0); //pxAssume (vifRegs.cycle.wl > 0); - do { + do + { u8* dest = getVUptr(idx, vif.tag.addr); - if (doMode) { - //if (1) { + if (doMode) + { + //if (1) { ft(dest, data); } - else { + else + { //DevCon.WriteLn("SSE Unpack!"); uint cl3 = std::min(vif.cl, 3); fnbase[cl3](dest, data); @@ -248,16 +275,20 @@ __ri void __fastcall _nVifUnpackLoop(const u8* data) { --vifRegs.num; ++vif.cl; - if (isFill) { + if (isFill) + { //DevCon.WriteLn("isFill!"); - if (vif.cl <= vifRegs.cycle.cl) data += vSize; - else if (vif.cl == vifRegs.cycle.wl) vif.cl = 0; + if (vif.cl <= vifRegs.cycle.cl) + data += vSize; + else if (vif.cl == vifRegs.cycle.wl) + vif.cl = 0; } else { data += vSize; - if (vif.cl >= vifRegs.cycle.wl) { + if (vif.cl >= vifRegs.cycle.wl) + { vif.tag.addr += skipSize; vif.cl = 0; } @@ -265,8 +296,8 @@ __ri void __fastcall _nVifUnpackLoop(const u8* data) { } while (vifRegs.num); } -__fi void _nVifUnpack(int idx, const u8* data, uint mode, bool isFill) { +__fi void _nVifUnpack(int idx, const u8* data, uint mode, bool isFill) +{ - UnpackLoopTable[idx][!!mode][isFill]( data ); + UnpackLoopTable[idx][!!mode][isFill](data); } - diff --git a/pcsx2/x86/newVif_UnpackSSE.cpp b/pcsx2/x86/newVif_UnpackSSE.cpp index d59c52328d..84a8ce1b52 100644 --- a/pcsx2/x86/newVif_UnpackSSE.cpp +++ b/pcsx2/x86/newVif_UnpackSSE.cpp @@ -16,11 +16,11 @@ #include "PrecompiledHeader.h" #include "newVif_UnpackSSE.h" -#define xMOV8(regX, loc) xMOVSSZX(regX, loc) -#define xMOV16(regX, loc) xMOVSSZX(regX, loc) -#define xMOV32(regX, loc) xMOVSSZX(regX, loc) -#define xMOV64(regX, loc) xMOVUPS(regX, loc) -#define xMOV128(regX, loc) xMOVUPS(regX, loc) +#define xMOV8(regX, loc) xMOVSSZX(regX, loc) +#define xMOV16(regX, loc) xMOVSSZX(regX, loc) +#define xMOV32(regX, loc) xMOVSSZX(regX, loc) +#define xMOV64(regX, loc) xMOVUPS (regX, loc) +#define xMOV128(regX, loc) xMOVUPS (regX, loc) static const __aligned16 u32 SSEXYZWMask[4][4] = { @@ -34,7 +34,8 @@ static const __aligned16 u32 SSEXYZWMask[4][4] = static RecompiledCodeReserve* nVifUpkExec = NULL; // Merges xmm vectors without modifying source reg -void mergeVectors(xRegisterSSE dest, xRegisterSSE src, xRegisterSSE temp, int xyzw) { +void mergeVectors(xRegisterSSE dest, xRegisterSSE src, xRegisterSSE temp, int xyzw) +{ mVUmergeRegs(dest, src, xyzw); } @@ -49,92 +50,96 @@ VifUnpackSSE_Base::VifUnpackSSE_Base() , IsAligned(0) , dstIndirect(arg1reg) , srcIndirect(arg2reg) - , workReg( xmm1 ) - , destReg( xmm0 ) + , workReg(xmm1) + , destReg(xmm0) { } -void VifUnpackSSE_Base::xMovDest() const { - if (IsUnmaskedOp()) { xMOVAPS (ptr[dstIndirect], destReg); } - else { doMaskWrite(destReg); } +void VifUnpackSSE_Base::xMovDest() const +{ + if (IsUnmaskedOp()) { xMOVAPS (ptr[dstIndirect], destReg); } + else { doMaskWrite(destReg); } } -void VifUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const { - if (usn) { xPSRL.D(regX, n); } - else { xPSRA.D(regX, n); } +void VifUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const +{ + if (usn) { xPSRL.D(regX, n); } + else { xPSRA.D(regX, n); } } -void VifUnpackSSE_Base::xPMOVXX8(const xRegisterSSE& regX) const { - if (usn) xPMOVZX.BD(regX, ptr32[srcIndirect]); - else xPMOVSX.BD(regX, ptr32[srcIndirect]); +void VifUnpackSSE_Base::xPMOVXX8(const xRegisterSSE& regX) const +{ + if (usn) xPMOVZX.BD(regX, ptr32[srcIndirect]); + else xPMOVSX.BD(regX, ptr32[srcIndirect]); } -void VifUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const { - if (usn) xPMOVZX.WD(regX, ptr64[srcIndirect]); - else xPMOVSX.WD(regX, ptr64[srcIndirect]); +void VifUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const +{ + if (usn) xPMOVZX.WD(regX, ptr64[srcIndirect]); + else xPMOVSX.WD(regX, ptr64[srcIndirect]); } -void VifUnpackSSE_Base::xUPK_S_32() const { +void VifUnpackSSE_Base::xUPK_S_32() const +{ - switch(UnpkLoopIteration) + switch (UnpkLoopIteration) { case 0: - xMOV128 (workReg, ptr32[srcIndirect]); - xPSHUF.D (destReg, workReg, _v0); + xMOV128(workReg, ptr32[srcIndirect]); + xPSHUF.D(destReg, workReg, _v0); break; case 1: - xPSHUF.D (destReg, workReg, _v1); + xPSHUF.D(destReg, workReg, _v1); break; case 2: - xPSHUF.D (destReg, workReg, _v2); + xPSHUF.D(destReg, workReg, _v2); break; case 3: - xPSHUF.D (destReg, workReg, _v3); + xPSHUF.D(destReg, workReg, _v3); break; } - } -void VifUnpackSSE_Base::xUPK_S_16() const { +void VifUnpackSSE_Base::xUPK_S_16() const +{ - switch(UnpkLoopIteration) + switch (UnpkLoopIteration) { case 0: - xPMOVXX16 (workReg); - xPSHUF.D (destReg, workReg, _v0); + xPMOVXX16(workReg); + xPSHUF.D(destReg, workReg, _v0); break; case 1: - xPSHUF.D (destReg, workReg, _v1); + xPSHUF.D(destReg, workReg, _v1); break; case 2: - xPSHUF.D (destReg, workReg, _v2); + xPSHUF.D(destReg, workReg, _v2); break; case 3: - xPSHUF.D (destReg, workReg, _v3); + xPSHUF.D(destReg, workReg, _v3); break; } - } -void VifUnpackSSE_Base::xUPK_S_8() const { +void VifUnpackSSE_Base::xUPK_S_8() const +{ - switch(UnpkLoopIteration) + switch (UnpkLoopIteration) { case 0: - xPMOVXX8 (workReg); - xPSHUF.D (destReg, workReg, _v0); + xPMOVXX8(workReg); + xPSHUF.D(destReg, workReg, _v0); break; case 1: - xPSHUF.D (destReg, workReg, _v1); + xPSHUF.D(destReg, workReg, _v1); break; case 2: - xPSHUF.D (destReg, workReg, _v2); + xPSHUF.D(destReg, workReg, _v2); break; case 3: - xPSHUF.D (destReg, workReg, _v3); + xPSHUF.D(destReg, workReg, _v3); break; } - } // The V2 + V3 unpacks have freaky behaviour, the manual claims "indeterminate". @@ -142,141 +147,147 @@ void VifUnpackSSE_Base::xUPK_S_8() const { // and games like Lemmings, And1 Streetball rely on this data to be like this! // I have commented after each shuffle to show what data is going where - Ref -void VifUnpackSSE_Base::xUPK_V2_32() const { +void VifUnpackSSE_Base::xUPK_V2_32() const +{ - if(UnpkLoopIteration == 0) + if (UnpkLoopIteration == 0) { - xMOV128 (workReg, ptr32[srcIndirect]); - xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0 - if(IsAligned)xAND.PS( destReg, ptr128[SSEXYZWMask[0]]); //zero last word - tested on ps2 + xMOV128(workReg, ptr32[srcIndirect]); + xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0 + if (IsAligned) + xAND.PS(destReg, ptr128[SSEXYZWMask[0]]); //zero last word - tested on ps2 } else { - xPSHUF.D (destReg, workReg, 0xEE); //v3v2v3v2 - if(IsAligned)xAND.PS( destReg, ptr128[SSEXYZWMask[0]]); //zero last word - tested on ps2 - + xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2 + if (IsAligned) + xAND.PS(destReg, ptr128[SSEXYZWMask[0]]); //zero last word - tested on ps2 } - } -void VifUnpackSSE_Base::xUPK_V2_16() const { +void VifUnpackSSE_Base::xUPK_V2_16() const +{ - if(UnpkLoopIteration == 0) - { - xPMOVXX16 (workReg); - xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0 - } - else - { - xPSHUF.D (destReg, workReg, 0xEE); //v3v2v3v2 - } - - -} - -void VifUnpackSSE_Base::xUPK_V2_8() const { - - if(UnpkLoopIteration == 0) + if (UnpkLoopIteration == 0) { - xPMOVXX8 (workReg); - xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0 + xPMOVXX16(workReg); + xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0 } else { - xPSHUF.D (destReg, workReg, 0xEE); //v3v2v3v2 + xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2 } - } -void VifUnpackSSE_Base::xUPK_V3_32() const { +void VifUnpackSSE_Base::xUPK_V2_8() const +{ - xMOV128 (destReg, ptr128[srcIndirect]); - if(UnpkLoopIteration != IsAligned) - xAND.PS( destReg, ptr128[SSEXYZWMask[0]]); + if (UnpkLoopIteration == 0) + { + xPMOVXX8(workReg); + xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0 + } + else + { + xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2 + } } -void VifUnpackSSE_Base::xUPK_V3_16() const { +void VifUnpackSSE_Base::xUPK_V3_32() const +{ - xPMOVXX16 (destReg); + xMOV128(destReg, ptr128[srcIndirect]); + if (UnpkLoopIteration != IsAligned) + xAND.PS(destReg, ptr128[SSEXYZWMask[0]]); +} + +void VifUnpackSSE_Base::xUPK_V3_16() const +{ + + xPMOVXX16(destReg); //With V3-16, it takes the first vector from the next position as the W vector //However - IF the end of this iteration of the unpack falls on a quadword boundary, W becomes 0 //IsAligned is the position through the current QW in the vif packet //Iteration counts where we are in the packet. - int result = (((UnpkLoopIteration/4) + 1 + (4-IsAligned)) & 0x3); + int result = (((UnpkLoopIteration / 4) + 1 + (4 - IsAligned)) & 0x3); - if ((UnpkLoopIteration & 0x1) == 0 && result == 0){ + if ((UnpkLoopIteration & 0x1) == 0 && result == 0) + { xAND.PS(destReg, ptr128[SSEXYZWMask[0]]); //zero last word on QW boundary if whole 32bit word is used - tested on ps2 } } -void VifUnpackSSE_Base::xUPK_V3_8() const { +void VifUnpackSSE_Base::xUPK_V3_8() const +{ - xPMOVXX8 (destReg); + xPMOVXX8(destReg); if (UnpkLoopIteration != IsAligned) xAND.PS(destReg, ptr128[SSEXYZWMask[0]]); } -void VifUnpackSSE_Base::xUPK_V4_32() const { - - xMOV128 (destReg, ptr32[srcIndirect]); -} - -void VifUnpackSSE_Base::xUPK_V4_16() const { - - xPMOVXX16 (destReg); -} - -void VifUnpackSSE_Base::xUPK_V4_8() const { - - xPMOVXX8 (destReg); -} - -void VifUnpackSSE_Base::xUPK_V4_5() const { - - xMOV16 (workReg, ptr32[srcIndirect]); - xPSHUF.D (workReg, workReg, _v0); - xPSLL.D (workReg, 3); // ABG|R5.000 - xMOVAPS (destReg, workReg); // x|x|x|R - xPSRL.D (workReg, 8); // ABG - xPSLL.D (workReg, 3); // AB|G5.000 - mVUmergeRegs(destReg, workReg, 0x4);// x|x|G|R - xPSRL.D (workReg, 8); // AB - xPSLL.D (workReg, 3); // A|B5.000 - mVUmergeRegs(destReg, workReg, 0x2);// x|B|G|R - xPSRL.D (workReg, 8); // A - xPSLL.D (workReg, 7); // A.0000000 - mVUmergeRegs(destReg, workReg, 0x1);// A|B|G|R - xPSLL.D (destReg, 24); // can optimize to - xPSRL.D (destReg, 24); // single AND... -} - -void VifUnpackSSE_Base::xUnpack( int upknum ) const +void VifUnpackSSE_Base::xUPK_V4_32() const { - switch( upknum ) + xMOV128(destReg, ptr32[srcIndirect]); +} + +void VifUnpackSSE_Base::xUPK_V4_16() const +{ + xPMOVXX16(destReg); +} + +void VifUnpackSSE_Base::xUPK_V4_8() const +{ + xPMOVXX8(destReg); +} + +void VifUnpackSSE_Base::xUPK_V4_5() const +{ + + xMOV16 (workReg, ptr32[srcIndirect]); + xPSHUF.D (workReg, workReg, _v0); + xPSLL.D (workReg, 3); // ABG|R5.000 + xMOVAPS (destReg, workReg); // x|x|x|R + xPSRL.D (workReg, 8); // ABG + xPSLL.D (workReg, 3); // AB|G5.000 + mVUmergeRegs(destReg, workReg, 0x4);// x|x|G|R + xPSRL.D (workReg, 8); // AB + xPSLL.D (workReg, 3); // A|B5.000 + mVUmergeRegs(destReg, workReg, 0x2);// x|B|G|R + xPSRL.D (workReg, 8); // A + xPSLL.D (workReg, 7); // A.0000000 + mVUmergeRegs(destReg, workReg, 0x1);// A|B|G|R + xPSLL.D (destReg, 24); // can optimize to + xPSRL.D (destReg, 24); // single AND... +} + +void VifUnpackSSE_Base::xUnpack(int upknum) const +{ + switch (upknum) { - case 0: xUPK_S_32(); break; - case 1: xUPK_S_16(); break; - case 2: xUPK_S_8(); break; + case 0: xUPK_S_32(); break; + case 1: xUPK_S_16(); break; + case 2: xUPK_S_8(); break; - case 4: xUPK_V2_32(); break; - case 5: xUPK_V2_16(); break; - case 6: xUPK_V2_8(); break; + case 4: xUPK_V2_32(); break; + case 5: xUPK_V2_16(); break; + case 6: xUPK_V2_8(); break; - case 8: xUPK_V3_32(); break; - case 9: xUPK_V3_16(); break; - case 10: xUPK_V3_8(); break; + case 8: xUPK_V3_32(); break; + case 9: xUPK_V3_16(); break; + case 10: xUPK_V3_8(); break; + + case 12: xUPK_V4_32(); break; + case 13: xUPK_V4_16(); break; + case 14: xUPK_V4_8(); break; + case 15: xUPK_V4_5(); break; - case 12: xUPK_V4_32(); break; - case 13: xUPK_V4_16(); break; - case 14: xUPK_V4_8(); break; - case 15: xUPK_V4_5(); break; case 3: case 7: case 11: - pxFailRel( wxsFormat( L"Vpu/Vif - Invalid Unpack! [%d]", upknum ) ); - break; + pxFailRel(wxsFormat(L"Vpu/Vif - Invalid Unpack! [%d]", upknum)); + break; } } @@ -286,13 +297,14 @@ void VifUnpackSSE_Base::xUnpack( int upknum ) const VifUnpackSSE_Simple::VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_) { - curCycle = curCycle_; - usn = usn_; - doMask = domask_; - IsAligned = true; + curCycle = curCycle_; + usn = usn_; + doMask = domask_; + IsAligned = true; } -void VifUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const { +void VifUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const +{ xMOVAPS(xmm7, ptr[dstIndirect]); int offX = std::min(curCycle, 3); xPAND(regX, ptr32[nVifMask[0][offX]]); @@ -303,18 +315,20 @@ void VifUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const { } // ecx = dest, edx = src -static void nVifGen(int usn, int mask, int curCycle) { +static void nVifGen(int usn, int mask, int curCycle) +{ - int usnpart = usn*2*16; - int maskpart = mask*16; + int usnpart = usn * 2 * 16; + int maskpart = mask * 16; - VifUnpackSSE_Simple vpugen( !!usn, !!mask, curCycle ); + VifUnpackSSE_Simple vpugen(!!usn, !!mask, curCycle); - for( int i=0; i<16; ++i ) + for (int i = 0; i < 16; ++i) { - nVifCall& ucall( nVifUpk[((usnpart+maskpart+i) * 4) + curCycle] ); + nVifCall& ucall(nVifUpk[((usnpart + maskpart + i) * 4) + curCycle]); ucall = NULL; - if( nVifT[i] == 0 ) continue; + if (nVifT[i] == 0) + continue; ucall = (nVifCall)xGetAlignedCallTarget(); vpugen.xUnpack(i); @@ -325,9 +339,10 @@ static void nVifGen(int usn, int mask, int curCycle) { void VifUnpackSSE_Init() { - if (nVifUpkExec) return; + if (nVifUpkExec) + return; - DevCon.WriteLn( "Generating SSE-optimized unpacking functions for VIF interpreters..." ); + DevCon.WriteLn("Generating SSE-optimized unpacking functions for VIF interpreters..."); nVifUpkExec = new RecompiledCodeReserve(L"VIF SSE-optimized Unpacking Functions", _64kb); nVifUpkExec->SetProfilerName("iVIF-SSE"); @@ -335,17 +350,16 @@ void VifUnpackSSE_Init() nVifUpkExec->ThrowIfNotOk(); - xSetPtr( *nVifUpkExec ); + xSetPtr(*nVifUpkExec); - for (int a = 0; a < 2; a++) { - for (int b = 0; b < 2; b++) { - for (int c = 0; c < 4; c++) { + for (int a = 0; a < 2; a++) + for (int b = 0; b < 2; b++) + for (int c = 0; c < 4; c++) nVifGen(a, b, c); - }}} nVifUpkExec->ForbidModification(); - DevCon.WriteLn( "Unpack function generation complete. Generated function statistics:" ); + DevCon.WriteLn("Unpack function generation complete. Generated function statistics:"); DevCon.Indent().WriteLn( L"Reserved buffer : %u bytes @ %ls\n" L"x86 code generated : %u bytes\n", @@ -357,5 +371,5 @@ void VifUnpackSSE_Init() void VifUnpackSSE_Destroy() { - safe_delete( nVifUpkExec ); + safe_delete(nVifUpkExec); } diff --git a/pcsx2/x86/newVif_UnpackSSE.h b/pcsx2/x86/newVif_UnpackSSE.h index 43133d27af..3523504b22 100644 --- a/pcsx2/x86/newVif_UnpackSSE.h +++ b/pcsx2/x86/newVif_UnpackSSE.h @@ -31,29 +31,29 @@ extern void mergeVectors(xRegisterSSE dest, xRegisterSSE src, xRegisterSSE temp, class VifUnpackSSE_Base { public: - bool usn; // unsigned flag - bool doMask; // masking write enable flag - int UnpkLoopIteration; - int UnpkNoOfIterations; - int IsAligned; + bool usn; // unsigned flag + bool doMask; // masking write enable flag + int UnpkLoopIteration; + int UnpkNoOfIterations; + int IsAligned; protected: - xAddressVoid dstIndirect; - xAddressVoid srcIndirect; - xRegisterSSE workReg; - xRegisterSSE destReg; + xAddressVoid dstIndirect; + xAddressVoid srcIndirect; + xRegisterSSE workReg; + xRegisterSSE destReg; public: VifUnpackSSE_Base(); virtual ~VifUnpackSSE_Base() = default; - virtual void xUnpack( int upktype ) const; - virtual bool IsUnmaskedOp() const=0; + virtual void xUnpack(int upktype) const; + virtual bool IsUnmaskedOp() const = 0; virtual void xMovDest() const; protected: - virtual void doMaskWrite(const xRegisterSSE& regX ) const=0; + virtual void doMaskWrite(const xRegisterSSE& regX) const = 0; virtual void xShiftR(const xRegisterSSE& regX, int n) const; virtual void xPMOVXX8(const xRegisterSSE& regX) const; @@ -75,7 +75,6 @@ protected: virtual void xUPK_V4_16() const; virtual void xUPK_V4_8() const; virtual void xUPK_V4_5() const; - }; // -------------------------------------------------------------------------------------- @@ -86,16 +85,16 @@ class VifUnpackSSE_Simple : public VifUnpackSSE_Base typedef VifUnpackSSE_Base _parent; public: - int curCycle; + int curCycle; public: VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_); virtual ~VifUnpackSSE_Simple() = default; - virtual bool IsUnmaskedOp() const{ return !doMask; } + virtual bool IsUnmaskedOp() const { return !doMask; } protected: - virtual void doMaskWrite(const xRegisterSSE& regX ) const; + virtual void doMaskWrite(const xRegisterSSE& regX) const; }; // -------------------------------------------------------------------------------------- @@ -106,44 +105,43 @@ class VifUnpackSSE_Dynarec : public VifUnpackSSE_Base typedef VifUnpackSSE_Base _parent; public: - bool isFill; - int doMode; // two bit value representing... something! - + bool isFill; + int doMode; // two bit value representing... something! + protected: - const nVifStruct& v; // vif0 or vif1 - const nVifBlock& vB; // some pre-collected data from VifStruct - int vCL; // internal copy of vif->cl + const nVifStruct& v; // vif0 or vif1 + const nVifBlock& vB; // some pre-collected data from VifStruct + int vCL; // internal copy of vif->cl public: VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_); - VifUnpackSSE_Dynarec(const VifUnpackSSE_Dynarec& src) // copy constructor + VifUnpackSSE_Dynarec(const VifUnpackSSE_Dynarec& src) // copy constructor : _parent(src) , v(src.v) , vB(src.vB) { - isFill = src.isFill; - vCL = src.vCL; + isFill = src.isFill; + vCL = src.vCL; } virtual ~VifUnpackSSE_Dynarec() = default; - virtual bool IsUnmaskedOp() const{ return !doMode && !doMask; } + virtual bool IsUnmaskedOp() const { return !doMode && !doMask; } - void ModUnpack( int upknum, bool PostOp ); + void ModUnpack(int upknum, bool PostOp); void CompileRoutine(); - + protected: virtual void doMaskWrite(const xRegisterSSE& regX) const; void SetMasks(int cS) const; void writeBackRow() const; - static VifUnpackSSE_Dynarec FillingWrite( const VifUnpackSSE_Dynarec& src ) + static VifUnpackSSE_Dynarec FillingWrite(const VifUnpackSSE_Dynarec& src) { - VifUnpackSSE_Dynarec fillingWrite( src ); + VifUnpackSSE_Dynarec fillingWrite(src); fillingWrite.doMask = true; fillingWrite.doMode = 0; return fillingWrite; } }; -