From 4d8769ecc530827790b404ec000809d2a6b9d366 Mon Sep 17 00:00:00 2001 From: sudonim1 Date: Sun, 5 Apr 2009 14:19:13 +0000 Subject: [PATCH 01/40] Implemented another block lookup method. Fixed an unused instruction in the emitter. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@913 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/BaseblockEx.cpp | 23 ++++++++++++++++++++--- pcsx2/x86/BaseblockEx.h | 2 +- pcsx2/x86/ix86/ix86.inl | 2 +- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/pcsx2/x86/BaseblockEx.cpp b/pcsx2/x86/BaseblockEx.cpp index 22a6765e60..bb933f301d 100644 --- a/pcsx2/x86/BaseblockEx.cpp +++ b/pcsx2/x86/BaseblockEx.cpp @@ -70,10 +70,27 @@ int BaseBlocks::LastIndex(u32 startpc) const return imin; } -BASEBLOCKEX* BaseBlocks::GetByX86(uptr ip) const +BASEBLOCKEX* BaseBlocks::GetByX86(uptr ip) { - // TODO - return 0; + if (0 == blocks.size()) + return 0; + + int imin = 0, imax = blocks.size() - 1, imid; + + while(imin != imax) { + imid = (imin+imax+1)>>1; + + if (blocks[imid].fnptr > ip) + imax = imid - 1; + else + imin = imid; + } + + if (ip < blocks[imin].fnptr || + ip >= blocks[imin].fnptr + blocks[imin].x86size) + return 0; + + return &blocks[imin]; } void BaseBlocks::Link(u32 pc, uptr jumpptr) diff --git a/pcsx2/x86/BaseblockEx.h b/pcsx2/x86/BaseblockEx.h index 6ebe592d5e..ab52f311ae 100644 --- a/pcsx2/x86/BaseblockEx.h +++ b/pcsx2/x86/BaseblockEx.h @@ -73,7 +73,7 @@ public: BASEBLOCKEX* New(u32 startpc, uptr fnptr); int LastIndex (u32 startpc) const; - BASEBLOCKEX* GetByX86(uptr ip) const; + BASEBLOCKEX* GetByX86(uptr ip); inline int Index (u32 startpc) const { diff --git a/pcsx2/x86/ix86/ix86.inl b/pcsx2/x86/ix86/ix86.inl index ae4f5829af..6a6376de3c 100644 --- a/pcsx2/x86/ix86/ix86.inl +++ b/pcsx2/x86/ix86/ix86.inl @@ -2356,7 +2356,7 @@ emitterT void eAND32RmtoR( x86IntRegType to, x86IntRegType from ) emitterT void eAND32RmtoROffset( x86IntRegType to, x86IntRegType from, int offset ) { RexRB(0,to,from); - write16( 0x23 ); + write8( 0x23 ); WriteRmOffsetFrom(to,from,offset); } From 014b5c667192e6d6ddd4b3999f43a396dd905d45 Mon Sep 17 00:00:00 2001 From: mattmenke Date: Sun, 5 Apr 2009 18:37:06 +0000 Subject: [PATCH 02/40] LilyPad: Fixed "Swap with Pad 1" git-svn-id: http://pcsx2.googlecode.com/svn/trunk@914 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/LilyPad/Config.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plugins/LilyPad/Config.cpp b/plugins/LilyPad/Config.cpp index 99769d6c85..39e9e30ad1 100644 --- a/plugins/LilyPad/Config.cpp +++ b/plugins/LilyPad/Config.cpp @@ -1867,7 +1867,7 @@ INT_PTR CALLBACK GeneralDialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, L InsertMenuItemW(hMenu, index, 1, &info); } else { - info.wID = port2+2*slot2; + info.wID = port2+2*slot2+1; wsprintfW(text, L"Swap with %s", pad); InsertMenuItemW(hMenu, 0, 1, &info); } @@ -1879,6 +1879,7 @@ INT_PTR CALLBACK GeneralDialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, L DestroyMenu(hMenu); if (!res) break; if (res > 0) { + res--; slot2 = res / 2; port2 = res&1; PadConfig padCfgTemp = config.padConfigs[port1][slot1]; From 923802f9367a3ae60cdb1b2c087e9341b7b7b8b0 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Mon, 6 Apr 2009 07:32:10 +0000 Subject: [PATCH 03/40] microVU: more recompiler first-pass implementation stuff... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@915 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/iVU0micro.cpp | 33 +++++++++++++-- pcsx2/x86/iVU1micro.cpp | 28 ++++++++++++- pcsx2/x86/microVU.cpp | 26 ++++++------ pcsx2/x86/microVU_Alloc.h | 1 + pcsx2/x86/microVU_Analyze.inl | 14 +++++++ pcsx2/x86/microVU_Compile.inl | 26 ++++-------- pcsx2/x86/microVU_Lower.inl | 78 ++++++++++++++++++++++++++++++----- pcsx2/x86/microVU_Misc.h | 24 ++++++++--- 8 files changed, 180 insertions(+), 50 deletions(-) diff --git a/pcsx2/x86/iVU0micro.cpp b/pcsx2/x86/iVU0micro.cpp index 4b70381294..2dfaac7669 100644 --- a/pcsx2/x86/iVU0micro.cpp +++ b/pcsx2/x86/iVU0micro.cpp @@ -23,11 +23,12 @@ #include "VUmicro.h" #include "iVUzerorec.h" +#ifndef PCSX2_MICROVU_ namespace VU0micro { - void recAlloc() - { - SuperVUAlloc(0); + void recAlloc() + { + SuperVUAlloc(0); } void __fastcall recClear(u32 Addr, u32 Size) @@ -62,6 +63,32 @@ namespace VU0micro FreezeXMMRegs(0); } } +#else + +extern void initVUrec(VURegs* vuRegs, const int vuIndex); +extern void closeVUrec(const int vuIndex); +extern void resetVUrec(const int vuIndex); +extern void clearVUrec(u32 addr, u32 size, const int vuIndex); +extern void runVUrec(u32 startPC, u32 cycles, const int vuIndex); + +namespace VU0micro +{ + void recAlloc() { initVUrec(&VU0, 0); } + void __fastcall recClear(u32 Addr, u32 Size) { clearVUrec(Addr, Size, 0); } + void recShutdown() { closeVUrec(0); } + static void recReset() { resetVUrec(0); x86FpuState = FPU_STATE; } + static void recStep() {} + static void recExecuteBlock() + { + if((VU0.VI[REG_VPU_STAT].UL & 1) == 0) return; + + FreezeXMMRegs(1); + runVUrec(VU0.VI[REG_TPC].UL & 0xfff, 0xffffffff, 0); + FreezeXMMRegs(0); + } + +} +#endif using namespace VU0micro; diff --git a/pcsx2/x86/iVU1micro.cpp b/pcsx2/x86/iVU1micro.cpp index 2c4cc00bb4..87f3099d7b 100644 --- a/pcsx2/x86/iVU1micro.cpp +++ b/pcsx2/x86/iVU1micro.cpp @@ -29,7 +29,7 @@ #ifdef _DEBUG extern u32 vudump; #endif - +#ifndef PCSX2_MICROVU_ namespace VU1micro { void recAlloc() @@ -121,6 +121,32 @@ namespace VU1micro FreezeXMMRegs(0); } } +#else + +extern void initVUrec(VURegs* vuRegs, const int vuIndex); +extern void closeVUrec(const int vuIndex); +extern void resetVUrec(const int vuIndex); +extern void clearVUrec(u32 addr, u32 size, const int vuIndex); +extern void runVUrec(u32 startPC, u32 cycles, const int vuIndex); + +namespace VU1micro +{ + void recAlloc() { initVUrec(&VU1, 1); } + void __fastcall recClear(u32 Addr, u32 Size) { clearVUrec(Addr, Size, 1); } + void recShutdown() { closeVUrec(1); } + static void recReset() { resetVUrec(1); x86FpuState = FPU_STATE; } + static void recStep() {} + static void recExecuteBlock() { + + if((VU0.VI[REG_VPU_STAT].UL & 0x100) == 0) return; + assert( (VU1.VI[REG_TPC].UL&7) == 0 ); + + FreezeXMMRegs(1); + runVUrec(VU1.VI[REG_TPC].UL & 0x3fff, 0xffffffff, 1); + FreezeXMMRegs(0); + } +} +#endif using namespace VU1micro; diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 4613a13acb..39e9697f38 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -42,7 +42,7 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) { mVU->index = vuIndex; mVU->microSize = (vuIndex ? 0x4000 : 0x1000); mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 8; - mVU->cacheAddr = 0xC0000000 + (vuIndex ? mVU->cacheSize : 0); + mVU->cacheAddr = (vuIndex ? 0x1e840000 : 0x0e840000); mVU->cache = NULL; mVUreset(); @@ -55,16 +55,16 @@ microVUt(void) mVUreset() { mVUclose(); // Close // Create Block Managers - for (int i; i <= mVU->prog.max; i++) { - for (u32 j; j < (mVU->progSize / 2); j++) { + for (int i = 0; i <= mVU->prog.max; i++) { + for (u32 j = 0; j < (mVU->progSize / 2); j++) { mVU->prog.prog[i].block[j] = new microBlockManager(); } } // Dynarec Cache - mVU->cache = SysMmapEx(mVU->cacheAddr, mVU->cacheSize, 0x10000000, (vuIndex ? "Micro VU1" : "Micro VU0")); - if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache)); - + mVU->cache = SysMmapEx(mVU->cacheAddr, mVU->cacheSize, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); + if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache)); + // Other Variables memset(&mVU->prog, 0, sizeof(mVU->prog)); mVU->prog.finished = 1; @@ -81,8 +81,8 @@ microVUt(void) mVUclose() { if ( mVU->cache ) { HostSys::Munmap( mVU->cache, mVU->cacheSize ); mVU->cache = NULL; } // Delete Block Managers - for (int i; i <= mVU->prog.max; i++) { - for (u32 j; j < (mVU->progSize / 2); j++) { + for (int i = 0; i <= mVU->prog.max; i++) { + for (u32 j = 0; j < (mVU->progSize / 2); j++) { if (mVU->prog.prog[i].block[j]) delete mVU->prog.prog[i].block[j]; } } @@ -277,27 +277,27 @@ extern "C" { // Wrapper Functions - Called by other parts of the Emu //------------------------------------------------------------------ -__forceinline void initVUrec(VURegs* vuRegs, const int vuIndex) { +void initVUrec(VURegs* vuRegs, const int vuIndex) { if (!vuIndex) mVUinit<0>(vuRegs); else mVUinit<1>(vuRegs); } -__forceinline void closeVUrec(const int vuIndex) { +void closeVUrec(const int vuIndex) { if (!vuIndex) mVUclose<0>(); else mVUclose<1>(); } -__forceinline void resetVUrec(const int vuIndex) { +void resetVUrec(const int vuIndex) { if (!vuIndex) mVUreset<0>(); else mVUreset<1>(); } -__forceinline void clearVUrec(u32 addr, u32 size, const int vuIndex) { +void clearVUrec(u32 addr, u32 size, const int vuIndex) { if (!vuIndex) mVUclear<0>(addr, size); else mVUclear<1>(addr, size); } -__forceinline void runVUrec(u32 startPC, u32 cycles, const int vuIndex) { +void runVUrec(u32 startPC, u32 cycles, const int vuIndex) { if (!vuIndex) startVU0(startPC, cycles); else startVU1(startPC, cycles); } diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index c539eb1297..f039c0a980 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -54,6 +54,7 @@ struct microAllocInfo { u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes) u32 cycles; // Cycles for current block u32 curPC; // Current PC + u32 startPC; // Start PC for Cur Block u32 info[pSize]; // bit 00 = Lower Instruction is NOP // bit 01 // bit 02 diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index f35299ab28..a493da8968 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -143,4 +143,18 @@ microVUt(void) mVUanalyzeEFU2(int Fs, u8 xCycles) { analyzePreg(xCycles); } +//------------------------------------------------------------------ +// Sflag - Status Flag Opcodes +//------------------------------------------------------------------ + +#define analyzeVIreg1(reg) { if (reg) { mVUstall = aMax(mVUstall, mVUregs.VI[reg]); } } +#define analyzeVIreg2(reg, aCycles) { if (reg) { mVUregsTemp.VIreg = reg; mVUregsTemp.VI = aCycles; } } + +microVUt(void) mVUanalyzeSflag(int It) { + microVU* mVU = mVUx; + if (!It) { mVUinfo |= _isNOP; return; } + mVUinfo |= _isSflag; + analyzeVIreg2(It, 1); +} + #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 49c517de5e..686e4a6563 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -19,18 +19,6 @@ #pragma once #ifdef PCSX2_MICROVU -#ifdef mVUdebug -#define mVUdebugStuff1() { \ - if (curI & _Ibit_) { SysPrintf("microVU: I-bit set!\n"); } \ - if (curI & _Ebit_) { SysPrintf("microVU: E-bit set!\n"); } \ - if (curI & _Mbit_) { SysPrintf("microVU: M-bit set!\n"); } \ - if (curI & _Dbit_) { SysPrintf("microVU: D-bit set!\n"); } \ - if (curI & _Tbit_) { SysPrintf("microVU: T-bit set!\n"); } \ -} -#else -#define mVUdebugStuff1() {} -#endif - #define createBlock(blockEndPtr) { \ block.pipelineState = pipelineState; \ block.x86ptrStart = x86ptrStart; \ @@ -59,19 +47,18 @@ microVUt(void) mVUsetCycles() { microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) { microVU* mVU = mVUx; microBlock block; - int branch; iPC = startPC / 4; - + // Searches for Existing Compiled Block (if found, then returns; else, compile) microBlock* pblock = mVUblock[iPC]->search(pipelineState, pState); if (block) { x86SetPtr(pblock->x86ptrEnd); return; } // First Pass setCode(); - branch = 0; - mVUbranch = 0; - mVUcycles = 1; // Skips "M" phase, and starts counting cycles at "T" stage - for (;;) { + mVUbranch = 0; + mVUstartPC = iPC; + mVUcycles = 1; // Skips "M" phase, and starts counting cycles at "T" stage + for (int branch = 0;; ) { startLoop(); mVUopU(); if (curI & _Ebit_) { branch = 1; } @@ -89,6 +76,9 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, iPC = startPC; setCode(); for (bool x = 1; x; ) { + // + // ToDo: status/mac flag stuff + // if (isEOB) { x = 0; } else if (isBranch) { mVUopU(); incPC(2); } diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 8e7859e033..a7e8438c24 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -23,6 +23,10 @@ // Micro VU Micromode Lower instructions //------------------------------------------------------------------ +//------------------------------------------------------------------ +// DIV/SQRT/RSQRT +//------------------------------------------------------------------ + #define testZero(xmmReg, xmmTemp, gprTemp) { \ SSE_XORPS_XMM_to_XMM(xmmTemp, xmmTemp); /* Clear xmmTemp (make it 0) */ \ SSE_CMPEQPS_XMM_to_XMM(xmmTemp, xmmReg); /* Set all F's if zero */ \ @@ -128,6 +132,10 @@ microVUf(void) mVU_RSQRT() { } } +//------------------------------------------------------------------ +// EATAN/EEXP/ELENG/ERCPR/ERLENG/ERSADD/ERSQRT/ESADD/ESIN/ESQRT/ESUM +//------------------------------------------------------------------ + #define EATANhelper(addr) { \ SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \ SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \ @@ -401,6 +409,10 @@ microVUf(void) mVU_ESUM() { } } +//------------------------------------------------------------------ +// FCAND/FCEQ/FCGET/FCOR/FCSET +//------------------------------------------------------------------ + microVUf(void) mVU_FCAND() { microVU* mVU = mVUx; if (!recPass) {} @@ -456,6 +468,10 @@ microVUf(void) mVU_FCSET() { } } +//------------------------------------------------------------------ +// FMAND/FMEQ/FMOR +//------------------------------------------------------------------ + microVUf(void) mVU_FMAND() { microVU* mVU = mVUx; if (!recPass) {} @@ -491,9 +507,13 @@ microVUf(void) mVU_FMOR() { } } +//------------------------------------------------------------------ +// FSAND/FSEQ/FSOR/FSSET +//------------------------------------------------------------------ + microVUf(void) mVU_FSAND() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeSflag(_Ft_); } else { mVUallocSFLAGa(gprT1, fvsInstance); AND16ItoR(gprT1, _Imm12_); @@ -503,7 +523,7 @@ microVUf(void) mVU_FSAND() { microVUf(void) mVU_FSEQ() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeSflag(_Ft_); } else { mVUallocSFLAGa(gprT1, fvsInstance); XOR16ItoR(gprT1, _Imm12_); @@ -515,7 +535,7 @@ microVUf(void) mVU_FSEQ() { microVUf(void) mVU_FSOR() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeSflag(_Ft_); } else { mVUallocSFLAGa(gprT1, fvsInstance); OR16ItoR(gprT1, _Imm12_); @@ -536,6 +556,10 @@ microVUf(void) mVU_FSSET() { } } +//------------------------------------------------------------------ +// IADD/IADDI/IADDIU/IAND/IOR/ISUB/ISUBIU +//------------------------------------------------------------------ + microVUf(void) mVU_IADD() { microVU* mVU = mVUx; if (!recPass) {} @@ -623,6 +647,10 @@ microVUf(void) mVU_ISUBIU() { } } +//------------------------------------------------------------------ +// MOVE/MFIR/MFP/MTIR/MR32 +//------------------------------------------------------------------ + microVUf(void) mVU_MOVE() { microVU* mVU = mVUx; if (!recPass) { /*If (!_Ft_ || (_Ft_ == _Fs_)) nop();*/ } @@ -672,6 +700,10 @@ microVUf(void) mVU_MR32() { } } +//------------------------------------------------------------------ +// ILW/ILWR +//------------------------------------------------------------------ + microVUf(void) mVU_ILW() { microVU* mVU = mVUx; if (!recPass) { /*If (!_Ft_) nop();*/ } @@ -709,6 +741,10 @@ microVUf(void) mVU_ILWR() { } } +//------------------------------------------------------------------ +// ISW/ISWR +//------------------------------------------------------------------ + microVUf(void) mVU_ISW() { microVU* mVU = mVUx; if (!recPass) {} @@ -757,6 +793,10 @@ microVUf(void) mVU_ISWR() { } } +//------------------------------------------------------------------ +// LQ/LQD/LQI +//------------------------------------------------------------------ + microVUf(void) mVU_LQ() { microVU* mVU = mVUx; if (!recPass) { /*If (!_Ft_) nop();*/ } @@ -818,6 +858,10 @@ microVUf(void) mVU_LQI() { } } +//------------------------------------------------------------------ +// SQ/SQD/SQI +//------------------------------------------------------------------ + microVUf(void) mVU_SQ() { microVU* mVU = mVUx; if (!recPass) {} @@ -875,6 +919,10 @@ microVUf(void) mVU_SQI() { } } +//------------------------------------------------------------------ +// RINIT/RGET/RNEXT/RXOR +//------------------------------------------------------------------ + microVUf(void) mVU_RINIT() { microVU* mVU = mVUx; if (!recPass) {} @@ -938,21 +986,27 @@ microVUf(void) mVU_RXOR() { } } +//------------------------------------------------------------------ +// WaitP/WaitQ +//------------------------------------------------------------------ + microVUf(void) mVU_WAITP() { microVU* mVU = mVUx; - if (!recPass) {} - else {} + if (!recPass) { mVUstall = aMax(mVUstall, ((mVUregs.p) ? (mVUregs.p - 1) : 0)); } } microVUf(void) mVU_WAITQ() { microVU* mVU = mVUx; - if (!recPass) {} - else {} + if (!recPass) { mVUstall = aMax(mVUstall, mVUregs.q); } } +//------------------------------------------------------------------ +// XTOP/XITOP +//------------------------------------------------------------------ + microVUf(void) mVU_XTOP() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; return; } analyzeVIreg2(_Ft_, 1); } else { MOVZX32M16toR( gprT1, (uptr)&mVU->regs->vifRegs->top); mVUallocVIb(gprT1, _Ft_); @@ -961,13 +1015,17 @@ microVUf(void) mVU_XTOP() { microVUf(void) mVU_XITOP() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; return; } analyzeVIreg2(_Ft_, 1); } else { MOVZX32M16toR( gprT1, (uptr)&mVU->regs->vifRegs->itop ); mVUallocVIb(gprT1, _Ft_); } } +//------------------------------------------------------------------ +// XGkick +//------------------------------------------------------------------ + microVUt(void) __fastcall mVU_XGKICK_(u32 addr) { microVU* mVU = mVUx; u32 *data = (u32*)(mVU->regs->Mem + (addr&0x3fff)); @@ -990,7 +1048,7 @@ microVUf(void) mVU_XGKICK() { } //------------------------------------------------------------------ -// Branches +// Branches/Jumps //------------------------------------------------------------------ microVUf(void) mVU_B() { diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index ce9272f201..90232a5088 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -66,9 +66,9 @@ declareAllVariables //------------------------------------------------------------------ // Helper Macros //------------------------------------------------------------------ -#define _Ft_ ((mVU->code >> 16) & 0x1F) // The rt part of the instruction register -#define _Fs_ ((mVU->code >> 11) & 0x1F) // The rd part of the instruction register -#define _Fd_ ((mVU->code >> 6) & 0x1F) // The sa part of the instruction register +#define _Ft_ ((mVU->code >> 16) & 0x1F) // The ft/it part of the instruction register +#define _Fs_ ((mVU->code >> 11) & 0x1F) // The fs/is part of the instruction register +#define _Fd_ ((mVU->code >> 6) & 0x1F) // The fd/id part of the instruction register #define _X ((mVU->code>>24) & 0x1) #define _Y ((mVU->code>>23) & 0x1) @@ -144,11 +144,12 @@ declareAllVariables #define mVUbranch mVUallocInfo.branch #define mVUcycles mVUallocInfo.cycles #define mVUstall mVUallocInfo.maxStall -#define mVUdivFlag mVUallocInfo.divFlag -#define mVUdivFlagT mVUallocInfo.divFlagTimer +//#define mVUdivFlag mVUallocInfo.divFlag +//#define mVUdivFlagT mVUallocInfo.divFlagTimer #define mVUregs mVUallocInfo.regs #define mVUregsTemp mVUallocInfo.regsTemp #define mVUinfo mVUallocInfo.info[mVUallocInfo.curPC / 2] +#define mVUstartPC mVUallocInfo.startPC #define iPC mVUallocInfo.curPC #define xPC ((iPC / 2) * 8) #define incCycles(x) { mVUcycles += x; } @@ -157,6 +158,7 @@ declareAllVariables #define _isBranch (1<<1) // Cur Instruction is a Branch #define _isEOB (1<<2) // End of Block #define _isBdelay (1<<3) // Cur Instruction in Branch Delay slot +#define _isSflag (1<<4) // Cur Instruction uses status flag #define _writeQ (1<<5) #define _readQ (1<<6) #define _writeP (1<<7) @@ -177,6 +179,7 @@ declareAllVariables #define isBranch (mVUinfo & (1<<1)) #define isEOB (mVUinfo & (1<<2)) #define isBdelay (mVUinfo & (1<<3)) +#define isSflag (mVUinfo & (1<<4)) #define writeQ ((mVUinfo >> 5) & 1) #define readQ ((mVUinfo >> 6) & 1) #define writeP ((mVUinfo >> 7) & 1) @@ -200,3 +203,14 @@ declareAllVariables #define isMMX(_VIreg_) (_VIreg_ >= 1 && _VIreg_ <=9) #define mmVI(_VIreg_) (_VIreg_ - 1) +#ifdef mVUdebug +#define mVUdebugStuff1() { \ + if (curI & _Ibit_) { SysPrintf("microVU: I-bit set!\n"); } \ + if (curI & _Ebit_) { SysPrintf("microVU: E-bit set!\n"); } \ + if (curI & _Mbit_) { SysPrintf("microVU: M-bit set!\n"); } \ + if (curI & _Dbit_) { SysPrintf("microVU: D-bit set!\n"); } \ + if (curI & _Tbit_) { SysPrintf("microVU: T-bit set!\n"); } \ +} +#else +#define mVUdebugStuff1() {} +#endif From 60e7428671a37168c5cc3fcb37e84908f3f41d69 Mon Sep 17 00:00:00 2001 From: refraction Date: Mon, 6 Apr 2009 23:48:59 +0000 Subject: [PATCH 04/40] Did some testing on the V3_# unpacks, they do some strange stuff for what goes in the W vector every 6qw of original data. Also fixed the use of the size variable so Xmen works again. Note: Nobody will notice this as SSE unpacks are forced on (for now) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@916 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 11 +++++++++-- pcsx2/VifDma.cpp | 9 ++++++--- pcsx2/VifDma.h | 2 +- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index a2ebe6f961..3e6626391f 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -357,8 +357,10 @@ static void _UNPACKpart(u32 offnum, u32 &x, T y) } template -static void _UNPACKpart(u32 offnum, u32 &x, T y, int &size) +static void _UNPACKpart(u32 offnum, u32 &x, T y, int size) { + if(size == 0) return; + if (_vifRegs->offset == offnum) { switch (offnum) @@ -409,7 +411,12 @@ void __fastcall UNPACK_V3(u32 *dest, T *data, int size) _UNPACKpart(OFFSET_X, *dest++, *data++, size); _UNPACKpart(OFFSET_Y, *dest++, *data++, size); _UNPACKpart(OFFSET_Z, *dest++, *data++, size); - _UNPACKpart(OFFSET_W, *dest, *data); + //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) + if((_vif->qwcalign % 24) == 0) + _UNPACKpart(OFFSET_W, *dest, 0); + else + _UNPACKpart(OFFSET_W, *dest, *data); + if (_vifRegs->offset == 4) _vifRegs->offset = 0; } diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 02361285c2..32840541c0 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -440,7 +440,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma Console::WriteLn("Unpack align offset = 0"); } destinc = (4 - ft->qsize) + unpacksize; - + vif->qwcalign += unpacksize * ft->dsize; func(dest, (u32*)cdata, unpacksize); size -= unpacksize * ft->dsize; cdata += unpacksize * ft->dsize; @@ -482,6 +482,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma while ((size >= ft->gsize) && (vifRegs->num > 0)) { + vif->qwcalign += ft->gsize; func(dest, (u32*)cdata, ft->qsize); cdata += ft->gsize; size -= ft->gsize; @@ -595,6 +596,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma while ((size >= ft->gsize) && (vifRegs->num > 0)) { + vif->qwcalign += ft->gsize; //Must do this before the transfer, else the confusing packets dont go right :P func(dest, (u32*)cdata, ft->qsize); cdata += ft->gsize; size -= ft->gsize; @@ -649,6 +651,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma //VIF_LOG("warning, end with size = %d", size); /* unpack one qword */ + vif->qwcalign += (size / ft->dsize) * ft->dsize; func(dest, (u32*)cdata, size / ft->dsize); size = 0; @@ -786,7 +789,7 @@ static __forceinline void vif0UNPACK(u32 *data) len = ((((32 >> vl) * (vn + 1)) * n) + 31) >> 5; } - vif0.wl = 0; + vif0.qwcalign = 0; vif0.cl = 0; vif0.tag.cmd = vif0.cmd; vif0.tag.addr &= 0xfff; @@ -1516,9 +1519,9 @@ static __forceinline void vif1UNPACK(u32 *data) else vif1.tag.addr = vif1Regs->code & 0x3ff; + vif1.qwcalign = 0; vif1.cl = 0; vif1.tag.addr <<= 4; - vif1.tag.cmd = vif1.cmd; } diff --git a/pcsx2/VifDma.h b/pcsx2/VifDma.h index a7fed1c30b..6af78214e7 100644 --- a/pcsx2/VifDma.h +++ b/pcsx2/VifDma.h @@ -32,7 +32,7 @@ struct vifStruct { int cmd; int irq; int cl; - int wl; + int qwcalign; u8 usn; // The next three should be boolean, and will be next time I break savestate compatability. --arcum42 From 2588dc0309e0025da2181fffda9f7bad2ff431fe Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Tue, 7 Apr 2009 08:42:25 +0000 Subject: [PATCH 05/40] Emitter renovations of a large scale sort (only up to phase 1). Intel's 'group 1' instructions now use a completely new ModRM/SIB encoder, along with a nicely object-oriented interface. I created some macros to retain backward compat for now, and will continue implementing the rest of the instructions later as I have time. Also: Removed x86/64 instructions from the emitter. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@917 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 4 + pcsx2/x86/iMMI.cpp | 4 +- pcsx2/x86/iR3000A.cpp | 2 +- pcsx2/x86/iVUmicroLower.cpp | 94 +- pcsx2/x86/ix86-32/iCore-32.cpp | 16 +- pcsx2/x86/ix86-32/iR5900-32.cpp | 4 +- pcsx2/x86/ix86-32/iR5900LoadStore.cpp | 4 +- pcsx2/x86/ix86-32/recVTLB.cpp | 48 +- pcsx2/x86/ix86/ix86.cpp | 196 ++++ pcsx2/x86/ix86/ix86.h | 27 +- pcsx2/x86/ix86/ix86.inl | 1115 +------------------- pcsx2/x86/ix86/ix86_group1.inl | 258 +++++ pcsx2/x86/ix86/ix86_macros.h | 109 +- pcsx2/x86/ix86/ix86_mmx.inl | 8 +- pcsx2/x86/ix86/ix86_sse.inl | 75 +- pcsx2/x86/ix86/ix86_sse_helpers.h | 30 +- pcsx2/x86/ix86/ix86_types.h | 209 ++++ 17 files changed, 905 insertions(+), 1298 deletions(-) create mode 100644 pcsx2/x86/ix86/ix86_group1.inl diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index a11a52c051..2618f59630 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -2982,6 +2982,10 @@ /> + + diff --git a/pcsx2/x86/iMMI.cpp b/pcsx2/x86/iMMI.cpp index 4fe4632b0a..a9edfba6c2 100644 --- a/pcsx2/x86/iMMI.cpp +++ b/pcsx2/x86/iMMI.cpp @@ -1982,8 +1982,8 @@ void recQFSRV() MOV32MtoR(EAX, (uptr)&cpuRegs.sa); SHL32ItoR(EAX, 4); // Multiply SA bytes by 16 bytes (the amount of bytes in QFSRVhelper() macros) - AND32I8toR(EAX, 0xf0); // This can possibly be removed but keeping it incase theres garbage in SA (cottonvibes) - ADD32ItoEAX((uptr)x86Ptr[0] + 7); // ADD32 = 5 bytes, JMPR = 2 bytes + AND32ItoR(EAX, 0xf0); // This can possibly be removed but keeping it incase theres garbage in SA (cottonvibes) + ADD32ItoR(EAX, (uptr)x86Ptr[0] + 7); // ADD32 = 5 bytes, JMPR = 2 bytes JMPR(EAX); // Jumps to a QFSRVhelper() case below (a total of 16 different cases) // Case 0: diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index a1b1ec756d..23d0d45bf0 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -316,7 +316,7 @@ void _psxMoveGPRtoM(u32 to, int fromgpr) void _psxMoveGPRtoRm(x86IntRegType to, int fromgpr) { if( PSX_IS_CONST1(fromgpr) ) - MOV32ItoRmOffset( to, g_psxConstRegs[fromgpr], 0 ); + MOV32ItoRm( to, g_psxConstRegs[fromgpr] ); else { // check x86 MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[ fromgpr ] ); diff --git a/pcsx2/x86/iVUmicroLower.cpp b/pcsx2/x86/iVUmicroLower.cpp index 58156d0926..3271d69681 100644 --- a/pcsx2/x86/iVUmicroLower.cpp +++ b/pcsx2/x86/iVUmicroLower.cpp @@ -609,31 +609,31 @@ void _loadEAX(VURegs *VU, int x86reg, uptr offset, int info) if( x86reg >= 0 ) { switch(_X_Y_Z_W) { case 3: // ZW - SSE_MOVHPS_RmOffset_to_XMM(EEREC_T, x86reg, offset+8); + SSE_MOVHPS_Rm_to_XMM(EEREC_T, x86reg, offset+8); break; case 6: // YZ - SSE_SHUFPS_RmOffset_to_XMM(EEREC_T, x86reg, offset, 0x9c); + SSE_SHUFPS_Rm_to_XMM(EEREC_T, x86reg, offset, 0x9c); SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0x78); break; case 8: // X - SSE_MOVSS_RmOffset_to_XMM(EEREC_TEMP, x86reg, offset); + SSE_MOVSS_Rm_to_XMM(EEREC_TEMP, x86reg, offset); SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP); break; case 9: // XW - SSE_SHUFPS_RmOffset_to_XMM(EEREC_T, x86reg, offset, 0xc9); + SSE_SHUFPS_Rm_to_XMM(EEREC_T, x86reg, offset, 0xc9); SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xd2); break; case 12: // XY - SSE_MOVLPS_RmOffset_to_XMM(EEREC_T, x86reg, offset); + SSE_MOVLPS_Rm_to_XMM(EEREC_T, x86reg, offset); break; case 15: - if( VU == &VU1 ) SSE_MOVAPSRmtoROffset(EEREC_T, x86reg, offset); - else SSE_MOVUPSRmtoROffset(EEREC_T, x86reg, offset); + if( VU == &VU1 ) SSE_MOVAPSRmtoR(EEREC_T, x86reg, offset); + else SSE_MOVUPSRmtoR(EEREC_T, x86reg, offset); break; default: - if( VU == &VU1 ) SSE_MOVAPSRmtoROffset(EEREC_TEMP, x86reg, offset); - else SSE_MOVUPSRmtoROffset(EEREC_TEMP, x86reg, offset); + if( VU == &VU1 ) SSE_MOVAPSRmtoR(EEREC_TEMP, x86reg, offset); + else SSE_MOVUPSRmtoR(EEREC_TEMP, x86reg, offset); VU_MERGE_REGS(EEREC_T, EEREC_TEMP); break; @@ -795,15 +795,15 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) if ( _Fs_ == 0 ) { if ( _XYZW_SS ) { u32 c = _W ? 0x3f800000 : 0; - if ( x86reg >= 0 ) MOV32ItoRmOffset(x86reg, c, offset+(_W?12:(_Z?8:(_Y?4:0)))); + if ( x86reg >= 0 ) MOV32ItoRm(x86reg, c, offset+(_W?12:(_Z?8:(_Y?4:0)))); else MOV32ItoM(offset+(_W?12:(_Z?8:(_Y?4:0))), c); } else { if ( x86reg >= 0 ) { - if ( _X ) MOV32ItoRmOffset(x86reg, 0x00000000, offset); - if ( _Y ) MOV32ItoRmOffset(x86reg, 0x00000000, offset+4); - if ( _Z ) MOV32ItoRmOffset(x86reg, 0x00000000, offset+8); - if ( _W ) MOV32ItoRmOffset(x86reg, 0x3f800000, offset+12); + if ( _X ) MOV32ItoRm(x86reg, 0x00000000, offset); + if ( _Y ) MOV32ItoRm(x86reg, 0x00000000, offset+4); + if ( _Z ) MOV32ItoRm(x86reg, 0x00000000, offset+8); + if ( _W ) MOV32ItoRm(x86reg, 0x3f800000, offset+12); } else { if ( _X ) MOV32ItoM(offset, 0x00000000); @@ -818,29 +818,29 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) switch ( _X_Y_Z_W ) { case 1: // W SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x27); - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); else SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP); break; case 2: // Z SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+8); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+8); else SSE_MOVSS_XMM_to_M32(offset+8, EEREC_TEMP); break; case 3: // ZW - if ( x86reg >= 0 ) SSE_MOVHPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+8); + if ( x86reg >= 0 ) SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_S, offset+8); else SSE_MOVHPS_XMM_to_M64(offset+8, EEREC_S); break; case 4: // Y SSE2_PSHUFLW_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x4e); - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+4); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+4); else SSE_MOVSS_XMM_to_M32(offset+4, EEREC_TEMP); break; case 5: // YW SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xB1); SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); if ( x86reg >= 0 ) { - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset+4); - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset+4); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); } else { SSE_MOVSS_XMM_to_M32(offset+4, EEREC_S); @@ -850,14 +850,14 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) break; case 6: // YZ SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0xc9); - if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+4); + if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+4); else SSE_MOVLPS_XMM_to_M64(offset+4, EEREC_TEMP); break; case 7: // YZW SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x93); //ZYXW if ( x86reg >= 0 ) { - SSE_MOVHPS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+4); - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12); + SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+4); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); } else { SSE_MOVHPS_XMM_to_M64(offset+4, EEREC_TEMP); @@ -865,26 +865,26 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) } break; case 8: // X - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); else SSE_MOVSS_XMM_to_M32(offset, EEREC_S); break; case 9: // XW SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); else SSE_MOVSS_XMM_to_M32(offset, EEREC_S); if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); else SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55); - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); else SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP); break; case 10: //XZ SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); if ( x86reg >= 0 ) { - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset); - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+8); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+8); } else { SSE_MOVSS_XMM_to_M32(offset, EEREC_S); @@ -893,8 +893,8 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) break; case 11: //XZW if ( x86reg >= 0 ) { - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset); - SSE_MOVHPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+8); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); + SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_S, offset+8); } else { SSE_MOVSS_XMM_to_M32(offset, EEREC_S); @@ -902,14 +902,14 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) } break; case 12: // XY - if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+0); + if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_Rm(x86reg, EEREC_S, offset+0); else SSE_MOVLPS_XMM_to_M64(offset, EEREC_S); break; case 13: // XYW SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x4b); //YXZW if ( x86reg >= 0 ) { - SSE_MOVHPS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+0); - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12); + SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+0); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); } else { SSE_MOVHPS_XMM_to_M64(offset, EEREC_TEMP); @@ -919,8 +919,8 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) case 14: // XYZ SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); if ( x86reg >= 0 ) { - SSE_MOVLPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+0); - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+8); + SSE_MOVLPS_XMM_to_Rm(x86reg, EEREC_S, offset+0); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+8); } else { SSE_MOVLPS_XMM_to_M64(offset, EEREC_S); @@ -929,11 +929,11 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) break; case 15: // XYZW if ( VU == &VU1 ) { - if( x86reg >= 0 ) SSE_MOVAPSRtoRmOffset(x86reg, EEREC_S, offset+0); + if( x86reg >= 0 ) SSE_MOVAPSRtoRm(x86reg, EEREC_S, offset+0); else SSE_MOVAPS_XMM_to_M128(offset, EEREC_S); } else { - if( x86reg >= 0 ) SSE_MOVUPSRtoRmOffset(x86reg, EEREC_S, offset+0); + if( x86reg >= 0 ) SSE_MOVUPSRtoRm(x86reg, EEREC_S, offset+0); else { if( offset & 15 ) SSE_MOVUPS_XMM_to_M128(offset, EEREC_S); else SSE_MOVAPS_XMM_to_M128(offset, EEREC_S); @@ -1018,7 +1018,7 @@ void recVUMI_ILW(VURegs *VU, int info) } else { int fsreg = ALLOCVI(_Fs_, MODE_READ); - MOV32RmtoROffset(ftreg, recVUTransformAddr(fsreg, VU, _Fs_, imm), (uptr)VU->Mem + off); + MOV32RmtoR(ftreg, recVUTransformAddr(fsreg, VU, _Fs_, imm), (uptr)VU->Mem + off); } } //------------------------------------------------------------------ @@ -1051,10 +1051,10 @@ void recVUMI_ISW( VURegs *VU, int info ) x86reg = recVUTransformAddr(fsreg, VU, _Fs_, imm); - if (_X) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem); - if (_Y) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+4); - if (_Z) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+8); - if (_W) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+12); + if (_X) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem); + if (_Y) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+4); + if (_Z) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+8); + if (_W) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+12); } } //------------------------------------------------------------------ @@ -1082,7 +1082,7 @@ void recVUMI_ILWR( VURegs *VU, int info ) } else { int fsreg = ALLOCVI(_Fs_, MODE_READ); - MOVZX32Rm16toROffset(ftreg, recVUTransformAddr(fsreg, VU, _Fs_, 0), (uptr)VU->Mem + off); + MOVZX32Rm16toR(ftreg, recVUTransformAddr(fsreg, VU, _Fs_, 0), (uptr)VU->Mem + off); } } //------------------------------------------------------------------ @@ -1109,10 +1109,10 @@ void recVUMI_ISWR( VURegs *VU, int info ) int fsreg = ALLOCVI(_Fs_, MODE_READ); x86reg = recVUTransformAddr(fsreg, VU, _Fs_, 0); - if (_X) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem); - if (_Y) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+4); - if (_Z) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+8); - if (_W) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+12); + if (_X) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem); + if (_Y) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+4); + if (_Z) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+8); + if (_W) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+12); } } //------------------------------------------------------------------ diff --git a/pcsx2/x86/ix86-32/iCore-32.cpp b/pcsx2/x86/ix86-32/iCore-32.cpp index 1021644be3..d9a9e75664 100644 --- a/pcsx2/x86/ix86-32/iCore-32.cpp +++ b/pcsx2/x86/ix86-32/iCore-32.cpp @@ -1050,12 +1050,12 @@ void _recMove128MtoM(u32 to, u32 from) // fixme - see above function! void _recMove128RmOffsettoM(u32 to, u32 offset) { - MOV32RmtoROffset(EAX, ECX, offset); - MOV32RmtoROffset(EDX, ECX, offset+4); + MOV32RmtoR(EAX, ECX, offset); + MOV32RmtoR(EDX, ECX, offset+4); MOV32RtoM(to, EAX); MOV32RtoM(to+4, EDX); - MOV32RmtoROffset(EAX, ECX, offset+8); - MOV32RmtoROffset(EDX, ECX, offset+12); + MOV32RmtoR(EAX, ECX, offset+8); + MOV32RmtoR(EDX, ECX, offset+12); MOV32RtoM(to+8, EAX); MOV32RtoM(to+12, EDX); } @@ -1065,12 +1065,12 @@ void _recMove128MtoRmOffset(u32 offset, u32 from) { MOV32MtoR(EAX, from); MOV32MtoR(EDX, from+4); - MOV32RtoRmOffset(ECX, EAX, offset); - MOV32RtoRmOffset(ECX, EDX, offset+4); + MOV32RtoRm(ECX, EAX, offset); + MOV32RtoRm(ECX, EDX, offset+4); MOV32MtoR(EAX, from+8); MOV32MtoR(EDX, from+12); - MOV32RtoRmOffset(ECX, EAX, offset+8); - MOV32RtoRmOffset(ECX, EDX, offset+12); + MOV32RtoRm(ECX, EAX, offset+8); + MOV32RtoRm(ECX, EDX, offset+12); } static PCSX2_ALIGNED16(u32 s_ones[2]) = {0xffffffff, 0xffffffff}; diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 6d568c95b8..34d9662b9b 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -367,7 +367,7 @@ void _eeMoveGPRtoM(u32 to, int fromgpr) void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr) { if( GPR_IS_CONST1(fromgpr) ) - MOV32ItoRmOffset( to, g_cpuConstRegs[fromgpr].UL[0], 0 ); + MOV32ItoRm( to, g_cpuConstRegs[fromgpr].UL[0] ); else { int mmreg; @@ -380,7 +380,7 @@ void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr) } else { MOV32MtoR(EAX, (int)&cpuRegs.GPR.r[ fromgpr ].UL[ 0 ] ); - MOV32RtoRm(to, EAX ); + MOV32RtoRm( to, EAX ); } } } diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp index 83925362c3..1c39766598 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp @@ -2101,7 +2101,7 @@ void recLoad64( u32 bits, bool sign ) if ( _Imm_ != 0 ) ADD32ItoR( ECX, _Imm_ ); if( bits == 128 ) // force 16 byte alignment on 128 bit reads - AND32I8toR(ECX,0xF0); + AND32ItoR(ECX,~0x0F); // emitter automatically encodes this as an 8-bit sign-extended imm8 _eeOnLoadWrite(_Rt_); EEINST_RESETSIGNEXT(_Rt_); // remove the sign extension @@ -2198,7 +2198,7 @@ void recStore(u32 sz, bool edxAlreadyAssigned=false) if ( _Imm_ != 0 ) ADD32ItoR(ECX, _Imm_); if (sz==128) - AND32I8toR(ECX,0xF0); + AND32ItoR(ECX,~0x0F); vtlb_DynGenWrite(sz); } diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index cbc567c68d..dd45f51d82 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -33,12 +33,12 @@ void MOV128_MtoM( x86IntRegType destRm, x86IntRegType srcRm ) { MOV32RmtoR(EAX,srcRm); MOV32RtoRm(destRm,EAX); - MOV32RmtoROffset(EAX,srcRm,4); - MOV32RtoRmOffset(destRm,EAX,4); - MOV32RmtoROffset(EAX,srcRm,8); - MOV32RtoRmOffset(destRm,EAX,8); - MOV32RmtoROffset(EAX,srcRm,12); - MOV32RtoRmOffset(destRm,EAX,12); + MOV32RmtoR(EAX,srcRm,4); + MOV32RtoRm(destRm,EAX,4); + MOV32RmtoR(EAX,srcRm,8); + MOV32RtoRm(destRm,EAX,8); + MOV32RmtoR(EAX,srcRm,12); + MOV32RtoRm(destRm,EAX,12); } /* @@ -121,8 +121,8 @@ static void _vtlb_DynGen_DirectRead( u32 bits, bool sign ) if( _hasFreeMMXreg() ) { const int freereg = _allocMMXreg(-1, MMX_TEMP, 0); - MOVQRmtoROffset(freereg,ECX,0); - MOVQRtoRmOffset(EDX,freereg,0); + MOVQRmtoR(freereg,ECX); + MOVQRtoRm(EDX,freereg); _freeMMXreg(freereg); } else @@ -130,8 +130,8 @@ static void _vtlb_DynGen_DirectRead( u32 bits, bool sign ) MOV32RmtoR(EAX,ECX); MOV32RtoRm(EDX,EAX); - MOV32RmtoROffset(EAX,ECX,4); - MOV32RtoRmOffset(EDX,EAX,4); + MOV32RmtoR(EAX,ECX,4); + MOV32RtoRm(EDX,EAX,4); } break; @@ -139,8 +139,8 @@ static void _vtlb_DynGen_DirectRead( u32 bits, bool sign ) if( _hasFreeXMMreg() ) { const int freereg = _allocTempXMMreg( XMMT_INT, -1 ); - SSE2_MOVDQARmtoROffset(freereg,ECX,0); - SSE2_MOVDQARtoRmOffset(EDX,freereg,0); + SSE2_MOVDQARmtoR(freereg,ECX); + SSE2_MOVDQARtoRm(EDX,freereg); _freeXMMreg(freereg); } else @@ -255,7 +255,7 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) { const int freereg = _allocMMXreg(-1, MMX_TEMP, 0); MOVQMtoR(freereg,ppf); - MOVQRtoRmOffset(EDX,freereg,0); + MOVQRtoRm(EDX,freereg); _freeMMXreg(freereg); } else @@ -264,7 +264,7 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) MOV32RtoRm(EDX,EAX); MOV32MtoR(EAX,ppf+4); - MOV32RtoRmOffset(EDX,EAX,4); + MOV32RtoRm(EDX,EAX,4); } break; @@ -273,7 +273,7 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) { const int freereg = _allocTempXMMreg( XMMT_INT, -1 ); SSE2_MOVDQA_M128_to_XMM( freereg, ppf ); - SSE2_MOVDQARtoRmOffset(EDX,freereg,0); + SSE2_MOVDQARtoRm(EDX,freereg); _freeXMMreg(freereg); } else @@ -406,8 +406,8 @@ static void _vtlb_DynGen_DirectWrite( u32 bits ) if( _hasFreeMMXreg() ) { const int freereg = _allocMMXreg(-1, MMX_TEMP, 0); - MOVQRmtoROffset(freereg,EDX,0); - MOVQRtoRmOffset(ECX,freereg,0); + MOVQRmtoR(freereg,EDX); + MOVQRtoRm(ECX,freereg); _freeMMXreg( freereg ); } else @@ -415,8 +415,8 @@ static void _vtlb_DynGen_DirectWrite( u32 bits ) MOV32RmtoR(EAX,EDX); MOV32RtoRm(ECX,EAX); - MOV32RmtoROffset(EAX,EDX,4); - MOV32RtoRmOffset(ECX,EAX,4); + MOV32RmtoR(EAX,EDX,4); + MOV32RtoRm(ECX,EAX,4); } break; @@ -424,8 +424,8 @@ static void _vtlb_DynGen_DirectWrite( u32 bits ) if( _hasFreeXMMreg() ) { const int freereg = _allocTempXMMreg( XMMT_INT, -1 ); - SSE2_MOVDQARmtoROffset(freereg,EDX,0); - SSE2_MOVDQARtoRmOffset(ECX,freereg,0); + SSE2_MOVDQARmtoR(freereg,EDX); + SSE2_MOVDQARtoRm(ECX,freereg); _freeXMMreg( freereg ); } else @@ -502,7 +502,7 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const ) if( _hasFreeMMXreg() ) { const int freereg = _allocMMXreg(-1, MMX_TEMP, 0); - MOVQRmtoROffset(freereg,EDX,0); + MOVQRmtoR(freereg,EDX); MOVQRtoM(ppf,freereg); _freeMMXreg( freereg ); } @@ -511,7 +511,7 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const ) MOV32RmtoR(EAX,EDX); MOV32RtoM(ppf,EAX); - MOV32RmtoROffset(EAX,EDX,4); + MOV32RmtoR(EAX,EDX,4); MOV32RtoM(ppf+4,EAX); } break; @@ -520,7 +520,7 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const ) if( _hasFreeXMMreg() ) { const int freereg = _allocTempXMMreg( XMMT_INT, -1 ); - SSE2_MOVDQARmtoROffset(freereg,EDX,0); + SSE2_MOVDQARmtoR(freereg,EDX); SSE2_MOVDQA_XMM_to_M128(ppf,freereg); _freeXMMreg( freereg ); } diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 11acfce6b5..35d90c8079 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -39,3 +39,199 @@ PCSX2_ALIGNED16(u32 p2[4]); PCSX2_ALIGNED16(float f[4]); XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT }; + +namespace x86Emitter +{ + const x86IndexerType ptr; + + ////////////////////////////////////////////////////////////////////////////////////////// + // + const x86Register x86Register::Empty( -1 ); + + const x86Register eax( 0 ); + const x86Register ebx( 3 ); + const x86Register ecx( 1 ); + const x86Register edx( 2 ); + const x86Register esi( 6 ); + const x86Register edi( 7 ); + const x86Register ebp( 5 ); + const x86Register esp( 4 ); + + const x86Register16 ax( 0 ); + const x86Register16 bx( 3 ); + const x86Register16 cx( 1 ); + const x86Register16 dx( 2 ); + const x86Register16 si( 6 ); + const x86Register16 di( 7 ); + const x86Register16 bp( 5 ); + const x86Register16 sp( 4 ); + + const x86Register8 al( 0 ); + const x86Register8 cl( 1 ); + const x86Register8 dl( 2 ); + const x86Register8 bl( 3 ); + const x86Register8 ah( 4 ); + const x86Register8 ch( 5 ); + const x86Register8 dh( 6 ); + const x86Register8 bh( 7 ); + + ////////////////////////////////////////////////////////////////////////////////////////// + // x86Register Method Implementations + // + x86ModRm x86Register::operator+( const x86Register& right ) const + { + return x86ModRm( *this, right ); + } + + x86ModRm x86Register::operator+( const x86ModRm& right ) const + { + return right + *this; + } + + ////////////////////////////////////////////////////////////////////////////////////////// + // ModSib Method Implementations + // + x86ModRm x86ModRm::FromIndexReg( x86Register index, int scale, int displacement ) + { + return x86ModRm( x86Register::Empty, index, scale, displacement ); + } + + x86Register x86ModRm::GetEitherReg() const + { + return Base.IsEmpty() ? Base : Index; + } + + x86ModRm& x86ModRm::Add( const x86Register& src ) + { + if( src == Index ) + { + Factor++; + } + else if( src == Base ) + { + // Compound the existing register reference into the Index/Scale pair. + Base = x86Register::Empty; + + if( src == Index ) + Factor++; + else + { + jASSUME( Index.IsEmpty() ); // or die if we already have an index! + Index = src; + Factor = 2; + } + } + else if( Base.IsEmpty() ) + Base = src; + else if( Index.IsEmpty() ) + Index = src; + else + assert( false ); // oops, only 2 regs allowed per ModRm! + + return *this; + } + + x86ModRm& x86ModRm::Add( const x86ModRm& src ) + { + Add( src.Base ); + Add( src.Displacement ); + + // If the factor is 1, we can just treat index like a base register also. + if( src.Factor == 1 ) + { + Add( src.Index ); + } + else if( Index.IsEmpty() ) + { + Index = src.Index; + Factor = 1; + } + else if( Index == src.Index ) + Factor++; + else + assert( false ); // oops, only 2 regs allowed! + + return *this; + } + + + x86ModRm x86ptr( x86Register base ) { return x86ModRm( base ); } + + // ------------------------------------------------------------------------ + // Generates a 'reduced' ModSib form, which has valid Base, Index, and Scale values. + // Necessary because by default ModSib compounds registers into Index when possible. + // + void ModSib::Reduce() + { + // If no index reg, then nothing for us to do... + if( Index.IsEmpty() || Scale == 0 ) return; + + // The Scale has a series of valid forms, all shown here: + + switch( Scale ) + { + case 1: Scale = 0; break; + case 2: Scale = 1; break; + + case 3: // becomes [reg*2+reg] + jASSUME( Base.IsEmpty() ); + Base = Index; + Scale = 1; + break; + + case 4: Scale = 2; break; + + case 5: // becomes [reg*4+reg] + jASSUME( Base.IsEmpty() ); + Base = Index; + Scale = 2; + break; + + case 6: // invalid! + assert( false ); + break; + + case 7: // so invalid! + assert( false ); + break; + + case 8: Scale = 3; break; + case 9: // becomes [reg*8+reg] + jASSUME( Base.IsEmpty() ); + Base = Index; + Scale = 3; + break; + } + } + + ModSib::ModSib( const x86ModRm& src ) : + Base( src.Base ), + Index( src.Index ), + Scale( src.Factor ), + Displacement( src.Displacement ) + { + Reduce(); + } + + ModSib::ModSib( x86Register base, x86Register index, int scale, s32 displacement ) : + Base( base ), + Index( index ), + Scale( scale ), + Displacement( displacement ) + { + Reduce(); + } + + ModSib::ModSib( s32 displacement ) : + Base(), + Index(), + Scale(0), + Displacement( displacement ) + { + } + + x86Register ModSib::GetEitherReg() const + { + return Base.IsEmpty() ? Base : Index; + } +} diff --git a/pcsx2/x86/ix86/ix86.h b/pcsx2/x86/ix86/ix86.h index 797dc2eaf7..34a10fbe59 100644 --- a/pcsx2/x86/ix86/ix86.h +++ b/pcsx2/x86/ix86/ix86.h @@ -42,6 +42,11 @@ #define RexRB(w, reg, base) assert( !(w || (reg) >= 8 || (base)>=8) ) #define RexRXB(w, reg, index, base) assert( !(w || (reg) >= 8 || (index) >= 8 || (base) >= 8) ) +// We use int param for offsets and then test them for validity in the recompiler. +// This helps catch programmer errors better than using an auto-truncated s8 parameter. +#define assertOffset8(ofs) assert( ofs < 128 && ofs >= -128 ) + + //------------------------------------------------------------------ // write functions //------------------------------------------------------------------ @@ -49,23 +54,27 @@ extern u8 *x86Ptr[EmitterId_Count]; extern u8 *j8Ptr[32]; extern u32 *j32Ptr[32]; -emitterT void write8( u8 val ) { - *x86Ptr[I] = (u8)val; +emitterT void write8( u8 val ) +{ + *x86Ptr[I] = (u8)val; x86Ptr[I]++; -} +} -emitterT void write16( u16 val ) { - *(u16*)x86Ptr[I] = (u16)val; +emitterT void write16( u16 val ) +{ + *(u16*)x86Ptr[I] = val; x86Ptr[I] += 2; } -emitterT void write24( u32 val ) { +emitterT void write24( u32 val ) +{ *x86Ptr[I]++ = (u8)(val & 0xff); *x86Ptr[I]++ = (u8)((val >> 8) & 0xff); *x86Ptr[I]++ = (u8)((val >> 16) & 0xff); } -emitterT void write32( u32 val ) { +emitterT void write32( u32 val ) +{ *(u32*)x86Ptr[I] = val; x86Ptr[I] += 4; } @@ -93,7 +102,6 @@ emitterT void ex86AlignExecutable( int align ); //------------------------------------------------------------------ // General Emitter Helper functions //------------------------------------------------------------------ -emitterT void WriteRmOffset(x86IntRegType to, int offset); emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset); emitterT void ModRM( int mod, int reg, int rm ); emitterT void SibSB( int ss, int index, int base ); @@ -112,15 +120,12 @@ emitterT void eCALL32( u32 to ); emitterT void eLEA32RtoR(x86IntRegType to, x86IntRegType from, u32 offset); emitterT void eLEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale); emitterT void eNOP( void ); -emitterT void eAND32I8toR( x86IntRegType to, u8 from ); emitterT void eAND32ItoM( uptr to, u32 from ); emitterT void eLEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1); -emitterT void eAND32I8toM( uptr to, u8 from ); emitterT void eLEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale); - #define MMXONLY(code) code #define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) diff --git a/pcsx2/x86/ix86/ix86.inl b/pcsx2/x86/ix86/ix86.inl index 6a6376de3c..301401ae4b 100644 --- a/pcsx2/x86/ix86/ix86.inl +++ b/pcsx2/x86/ix86/ix86.inl @@ -34,39 +34,10 @@ #include "System.h" #include "ix86.h" -emitterT void WriteRmOffset(x86IntRegType to, s32 offset) -{ - if( (to&7) == ESP ) { - if( offset == 0 ) { - ModRM( 0, 0, 4 ); - SibSB( 0, ESP, 4 ); - } - else if( offset <= 127 && offset >= -128 ) { - ModRM( 1, 0, 4 ); - SibSB( 0, ESP, 4 ); - write8(offset); - } - else { - ModRM( 2, 0, 4 ); - SibSB( 0, ESP, 4 ); - write32(offset); - } - } - else { - if( offset == 0 ) { - ModRM( 0, 0, to ); - } - else if( offset <= 127 && offset >= -128 ) { - ModRM( 1, 0, to ); - write8(offset); - } - else { - ModRM( 2, 0, to ); - write32(offset); - } - } -} +#include "ix86_group1.inl" +// Note: the 'to' field can either be a register or a special opcode extension specifier +// depending on the opcode's encoding. emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset) { if ((from&7) == ESP) { @@ -294,101 +265,6 @@ emitterT void eNOP( void ) // mov instructions / //////////////////////////////////// -/* mov r64 to r64 */ -emitterT void eMOV64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1, from, to); - write8( 0x89 ); - ModRM( 3, from, to ); -} - -/* mov r64 to m64 */ -emitterT void eMOV64RtoM( uptr to, x86IntRegType from ) -{ - RexR(1, from); - write8( 0x89 ); - ModRM( 0, from, DISP32 ); - write32( (u32)MEMADDR(to, 4) ); -} - -/* mov m64 to r64 */ -emitterT void eMOV64MtoR( x86IntRegType to, uptr from ) -{ - RexR(1, to); - write8( 0x8B ); - ModRM( 0, to, DISP32 ); - write32( (u32)MEMADDR(from, 4) ); -} - -/* mov imm32 to m64 */ -emitterT void eMOV64I32toM(uptr to, u32 from ) -{ - Rex(1, 0, 0, 0); - write8( 0xC7 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -// mov imm64 to r64 -emitterT void eMOV64ItoR( x86IntRegType to, u64 from) -{ - RexB(1, to); - write8( 0xB8 | (to & 0x7) ); - write64( from ); -} - -/* mov imm32 to r64 */ -emitterT void eMOV64I32toR( x86IntRegType to, s32 from ) -{ - RexB(1, to); - write8( 0xC7 ); - ModRM( 0, 0, to ); - write32( from ); -} - -// mov imm64 to [r64+off] -emitterT void eMOV64ItoRmOffset( x86IntRegType to, u32 from, int offset) -{ - RexB(1,to); - write8( 0xC7 ); - WriteRmOffset(to, offset); - write32(from); -} - -// mov [r64+offset] to r64 -emitterT void eMOV64RmOffsettoR( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(1, to, from); - write8( 0x8B ); - WriteRmOffsetFrom(to, from, offset); -} - -/* mov [r64][r64*scale] to r64 */ -emitterT void eMOV64RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { - RexRXB(1, to, from2, from); - write8( 0x8B ); - ModRM( 0, to, 0x4 ); - SibSB(scale, from2, from ); -} - -/* mov r64 to [r64+offset] */ -emitterT void eMOV64RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(1,from,to); - write8( 0x89 ); - WriteRmOffsetFrom(from, to, offset); -} - -/* mov r64 to [r64][r64*scale] */ -emitterT void eMOV64RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { - RexRXB(1, to, from2, from); - write8( 0x89 ); - ModRM( 0, to, 0x4 ); - SibSB(scale, from2, from ); -} - - /* mov r32 to r32 */ emitterT void eMOV32RtoR( x86IntRegType to, x86IntRegType from ) { @@ -423,21 +299,16 @@ emitterT void eMOV32MtoR( x86IntRegType to, uptr from ) write32( MEMADDR(from, 4) ); } -/* mov [r32] to r32 */ -emitterT void eMOV32RmtoR( x86IntRegType to, x86IntRegType from ) { - RexRB(0, to, from); - write8(0x8B); - WriteRmOffsetFrom(to, from, 0); -} - -emitterT void eMOV32RmtoROffset( x86IntRegType to, x86IntRegType from, int offset ) { +emitterT void eMOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) +{ RexRB(0, to, from); write8( 0x8B ); WriteRmOffsetFrom(to, from, offset); } /* mov [r32+r32*scale] to r32 */ -emitterT void eMOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { +emitterT void eMOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +{ RexRXB(0,to,from2,from); write8( 0x8B ); ModRM( 0, to, 0x4 ); @@ -445,7 +316,7 @@ emitterT void eMOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType } // mov r32 to [r32<( 0x8B ); @@ -454,22 +325,9 @@ emitterT void eMOV32RmSOffsettoR( x86IntRegType to, x86IntRegType from1, int fro write32(from2); } -/* mov r32 to [r32] */ -emitterT void eMOV32RtoRm( x86IntRegType to, x86IntRegType from ) { - RexRB(0, from, to); - if ((to&7) == ESP) { - write8( 0x89 ); - ModRM( 0, from, 0x4 ); - SibSB( 0, 0x4, 0x4 ); - } - else { - write8( 0x89 ); - ModRM( 0, from, to ); - } -} - /* mov r32 to [r32][r32*scale] */ -emitterT void eMOV32RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { +emitterT void eMOV32RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +{ RexRXB(0, to, from2, from); write8( 0x89 ); ModRM( 0, to, 0x4 ); @@ -494,16 +352,16 @@ emitterT void eMOV32ItoM(uptr to, u32 from ) } // mov imm32 to [r32+off] -emitterT void eMOV32ItoRmOffset( x86IntRegType to, u32 from, int offset) +emitterT void eMOV32ItoRm( x86IntRegType to, u32 from, int offset=0) { RexB(0,to); write8( 0xC7 ); - WriteRmOffset(to, offset); + WriteRmOffsetFrom(0, to, offset); write32(from); } // mov r32 to [r32+off] -emitterT void eMOV32RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset) +emitterT void eMOV32RtoRm( x86IntRegType to, x86IntRegType from, int offset=0) { RexRB(0,from,to); write8( 0x89 ); @@ -530,15 +388,7 @@ emitterT void eMOV16MtoR( x86IntRegType to, uptr from ) write32( MEMADDR(from, 4) ); } -emitterT void eMOV16RmtoR( x86IntRegType to, x86IntRegType from) -{ - write8( 0x66 ); - RexRB(0,to,from); - write8( 0x8B ); - WriteRmOffsetFrom(to, from, 0); -} - -emitterT void eMOV16RmtoROffset( x86IntRegType to, x86IntRegType from, int offset ) +emitterT void eMOV16RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) { write8( 0x66 ); RexRB(0,to,from); @@ -546,7 +396,7 @@ emitterT void eMOV16RmtoROffset( x86IntRegType to, x86IntRegType from, int offse WriteRmOffsetFrom(to, from, offset); } -emitterT void eMOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale ) +emitterT void eMOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale=0 ) { write8(0x66); RexRXB(0,to,from1,0); @@ -556,14 +406,6 @@ emitterT void eMOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 fro write32(from2); } -emitterT void eMOV16RtoRm(x86IntRegType to, x86IntRegType from) -{ - write8( 0x66 ); - RexRB(0,from,to); - write8( 0x89 ); - ModRM( 0, from, to ); -} - /* mov imm16 to m16 */ emitterT void eMOV16ItoM( uptr to, u16 from ) { @@ -575,7 +417,8 @@ emitterT void eMOV16ItoM( uptr to, u16 from ) } /* mov r16 to [r32][r32*scale] */ -emitterT void eMOV16RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { +emitterT void eMOV16RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +{ write8( 0x66 ); RexRXB(0,to,from2,from); write8( 0x89 ); @@ -591,7 +434,7 @@ emitterT void eMOV16ItoR( x86IntRegType to, u16 from ) } // mov imm16 to [r16+off] -emitterT void eMOV16ItoRmOffset( x86IntRegType to, u16 from, u32 offset) +emitterT void eMOV16ItoRm( x86IntRegType to, u16 from, u32 offset=0 ) { write8(0x66); RexB(0,to); @@ -601,7 +444,7 @@ emitterT void eMOV16ItoRmOffset( x86IntRegType to, u16 from, u32 offset) } // mov r16 to [r16+off] -emitterT void eMOV16RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset) +emitterT void eMOV16RtoRm( x86IntRegType to, x86IntRegType from, int offset=0 ) { write8(0x66); RexRB(0,from,to); @@ -627,22 +470,14 @@ emitterT void eMOV8MtoR( x86IntRegType to, uptr from ) write32( MEMADDR(from, 4) ); } -/* mov [r32] to r8 */ -emitterT void eMOV8RmtoR(x86IntRegType to, x86IntRegType from) -{ - RexRB(0,to,from); - write8( 0x8A ); - WriteRmOffsetFrom(to, from, 0); -} - -emitterT void eMOV8RmtoROffset(x86IntRegType to, x86IntRegType from, int offset) +emitterT void eMOV8RmtoR(x86IntRegType to, x86IntRegType from, int offset=0) { RexRB(0,to,from); write8( 0x8A ); WriteRmOffsetFrom(to, from, offset); } -emitterT void eMOV8RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale ) +emitterT void eMOV8RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale=0 ) { RexRXB(0,to,from1,0); write8( 0x8A ); @@ -651,13 +486,6 @@ emitterT void eMOV8RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from write32(from2); } -emitterT void eMOV8RtoRm(x86IntRegType to, x86IntRegType from) -{ - RexRB(0,from,to); - write8( 0x88 ); - WriteRmOffsetFrom(from, to, 0); -} - /* mov imm8 to m8 */ emitterT void eMOV8ItoM( uptr to, u8 from ) { @@ -676,7 +504,7 @@ emitterT void eMOV8ItoR( x86IntRegType to, u8 from ) } // mov imm8 to [r8+off] -emitterT void eMOV8ItoRmOffset( x86IntRegType to, u8 from, int offset) +emitterT void eMOV8ItoRm( x86IntRegType to, u8 from, int offset=0) { assert( to != ESP ); RexB(0,to); @@ -686,7 +514,7 @@ emitterT void eMOV8ItoRmOffset( x86IntRegType to, u8 from, int offset) } // mov r8 to [r8+off] -emitterT void eMOV8RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset) +emitterT void eMOV8RtoRm( x86IntRegType to, x86IntRegType from, int offset=0) { assert( to != ESP ); RexRB(0,from,to); @@ -733,14 +561,7 @@ emitterT void eMOVSX32R16toR( x86IntRegType to, x86IntRegType from ) ModRM( 3, to, from ); } -emitterT void eMOVSX32Rm16toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xBF0F ); - ModRM( 0, to, from ); -} - -emitterT void eMOVSX32Rm16toROffset( x86IntRegType to, x86IntRegType from, int offset ) +emitterT void eMOVSX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset=0 ) { RexRB(0,to,from); write16( 0xBF0F ); @@ -764,14 +585,7 @@ emitterT void eMOVZX32R8toR( x86IntRegType to, x86IntRegType from ) ModRM( 3, to, from ); } -emitterT void eMOVZX32Rm8toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xB60F ); - ModRM( 0, to, from ); -} - -emitterT void eMOVZX32Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset ) +emitterT void eMOVZX32Rm8toR( x86IntRegType to, x86IntRegType from, int offset=0 ) { RexRB(0,to,from); write16( 0xB60F ); @@ -795,14 +609,7 @@ emitterT void eMOVZX32R16toR( x86IntRegType to, x86IntRegType from ) ModRM( 3, to, from ); } -emitterT void eMOVZX32Rm16toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xB70F ); - ModRM( 0, to, from ); -} - -emitterT void eMOVZX32Rm16toROffset( x86IntRegType to, x86IntRegType from, int offset ) +emitterT void eMOVZX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset=0 ) { RexRB(0,to,from); write16( 0xB70F ); @@ -1014,130 +821,6 @@ emitterT void eCMOVLE32MtoR( x86IntRegType to, uptr from ) // arithmetic instructions / //////////////////////////////////// -/* add imm32 to r64 */ -emitterT void eADD64ItoR( x86IntRegType to, u32 from ) -{ - Rex(1, 0, 0, to >> 3); - if ( to == EAX) { - write8( 0x05 ); - } - else { - write8( 0x81 ); - ModRM( 3, 0, to ); - } - write32( from ); -} - -/* add m64 to r64 */ -emitterT void eADD64MtoR( x86IntRegType to, uptr from ) -{ - Rex(1, to >> 3, 0, 0); - write8( 0x03 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* add r64 to r64 */ -emitterT void eADD64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1, from, to); - write8( 0x01 ); - ModRM( 3, from, to ); -} - -/* add imm32 to EAX */ -emitterT void eADD32ItoEAX( s32 imm ) -{ - write8( 0x05 ); - write32( imm ); -} - -/* add imm32 to r32 */ -emitterT void eADD32ItoR( x86IntRegType to, s32 imm ) -{ - RexB(0, to); - if (imm <= 127 && imm >= -128) - { - write8( 0x83 ); - ModRM( 3, 0, to ); - write8( (s8)imm ); - } - else - { - if ( to == EAX ) { - eADD32ItoEAX(imm); - } - else { - write8( 0x81 ); - ModRM( 3, 0, to ); - write32( imm ); - } - } -} - -/* add imm32 to m32 */ -emitterT void eADD32ItoM( uptr to, s32 imm ) -{ - if(imm <= 127 && imm >= -128) - { - write8( 0x83 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 8) ); - write8( imm ); - } - else - { - write8( 0x81 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( imm ); - } -} - -// add imm32 to [r32+off] -emitterT void eADD32ItoRmOffset( x86IntRegType to, s32 imm, s32 offset) -{ - RexB(0,to); - if(imm <= 127 && imm >= -128) - { - write8( 0x83 ); - WriteRmOffset(to,offset); - write8(imm); - } - else - { - write8( 0x81 ); - WriteRmOffset(to,offset); - write32(imm); - } -} - -/* add r32 to r32 */ -emitterT void eADD32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0, from, to); - write8( 0x01 ); - ModRM( 3, from, to ); -} - -/* add r32 to m32 */ -emitterT void eADD32RtoM(uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x01 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* add m32 to r32 */ -emitterT void eADD32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x03 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - // add r16 to r16 emitterT void eADD16RtoR( x86IntRegType to , x86IntRegType from ) { @@ -1221,55 +904,6 @@ emitterT void eADD8MtoR( x86IntRegType to, uptr from ) write32( MEMADDR(from, 4) ); } -/* adc imm32 to r32 */ -emitterT void eADC32ItoR( x86IntRegType to, u32 from ) -{ - RexB(0,to); - if ( to == EAX ) { - write8( 0x15 ); - } - else { - write8( 0x81 ); - ModRM( 3, 2, to ); - } - write32( from ); -} - -/* adc imm32 to m32 */ -emitterT void eADC32ItoM( uptr to, u32 from ) -{ - write8( 0x81 ); - ModRM( 0, 2, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* adc r32 to r32 */ -emitterT void eADC32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x11 ); - ModRM( 3, from, to ); -} - -/* adc m32 to r32 */ -emitterT void eADC32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x13 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// adc r32 to m32 -emitterT void eADC32RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x11 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - /* inc r32 */ emitterT void eINC32R( x86IntRegType to ) { @@ -1301,85 +935,6 @@ emitterT void eINC16M( u32 to ) } -/* sub imm32 to r64 */ -emitterT void eSUB64ItoR( x86IntRegType to, u32 from ) -{ - RexB(1, to); - if ( to == EAX ) { - write8( 0x2D ); - } - else { - write8( 0x81 ); - ModRM( 3, 5, to ); - } - write32( from ); -} - -/* sub r64 to r64 */ -emitterT void eSUB64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1, from, to); - write8( 0x29 ); - ModRM( 3, from, to ); -} - -/* sub m64 to r64 */ -emitterT void eSUB64MtoR( x86IntRegType to, uptr from ) -{ - RexR(1, to); - write8( 0x2B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* sub imm32 to r32 */ -emitterT void eSUB32ItoR( x86IntRegType to, u32 from ) -{ - RexB(0,to); - if ( to == EAX ) { - write8( 0x2D ); - } - else { - write8( 0x81 ); - ModRM( 3, 5, to ); - } - write32( from ); -} - -/* sub imm32 to m32 */ -emitterT void eSUB32ItoM( uptr to, u32 from ) -{ - write8( 0x81 ); - ModRM( 0, 5, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* sub r32 to r32 */ -emitterT void eSUB32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0, from, to); - write8( 0x29 ); - ModRM( 3, from, to ); -} - -/* sub m32 to r32 */ -emitterT void eSUB32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x2B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// sub r32 to m32 -emitterT void eSUB32RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x29 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} // sub r16 to r16 emitterT void eSUB16RtoR( x86IntRegType to, u16 from ) @@ -1422,60 +977,6 @@ emitterT void eSUB16MtoR( x86IntRegType to, uptr from ) { write32( MEMADDR(from, 4) ); } -/* sbb r64 to r64 */ -emitterT void eSBB64RtoR( x86IntRegType to, x86IntRegType from ) { - RexRB(1, from,to); - write8( 0x19 ); - ModRM( 3, from, to ); -} - -/* sbb imm32 to r32 */ -emitterT void eSBB32ItoR( x86IntRegType to, u32 from ) { - RexB(0,to); - if ( to == EAX ) { - write8( 0x1D ); - } - else { - write8( 0x81 ); - ModRM( 3, 3, to ); - } - write32( from ); -} - -/* sbb imm32 to m32 */ -emitterT void eSBB32ItoM( uptr to, u32 from ) { - write8( 0x81 ); - ModRM( 0, 3, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* sbb r32 to r32 */ -emitterT void eSBB32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x19 ); - ModRM( 3, from, to ); -} - -/* sbb m32 to r32 */ -emitterT void eSBB32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x1B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* sbb r32 to m32 */ -emitterT void eSBB32RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x19 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - /* dec r32 */ emitterT void eDEC32R( x86IntRegType to ) { @@ -1582,51 +1083,6 @@ emitterT void eIDIV32M( u32 from ) // shifting instructions / //////////////////////////////////// -/* shl imm8 to r64 */ -emitterT void eSHL64ItoR( x86IntRegType to, u8 from ) -{ - RexB(1, to); - if ( from == 1 ) - { - write8( 0xD1 ); - ModRM( 3, 4, to ); - return; - } - write8( 0xC1 ); - ModRM( 3, 4, to ); - write8( from ); -} - -/* shl cl to r64 */ -emitterT void eSHL64CLtoR( x86IntRegType to ) -{ - RexB(1, to); - write8( 0xD3 ); - ModRM( 3, 4, to ); -} - -/* shr imm8 to r64 */ -emitterT void eSHR64ItoR( x86IntRegType to, u8 from ) -{ - RexB(1,to); - if ( from == 1 ) { - write8( 0xD1 ); - ModRM( 3, 5, to ); - return; - } - write8( 0xC1 ); - ModRM( 3, 5, to ); - write8( from ); -} - -/* shr cl to r64 */ -emitterT void eSHR64CLtoR( x86IntRegType to ) -{ - RexB(1, to); - write8( 0xD3 ); - ModRM( 3, 5, to ); -} - /* shl imm8 to r32 */ emitterT void eSHL32ItoR( x86IntRegType to, u8 from ) { @@ -1775,29 +1231,6 @@ emitterT void eSHR8ItoR( x86IntRegType to, u8 from ) } } -/* sar imm8 to r64 */ -emitterT void eSAR64ItoR( x86IntRegType to, u8 from ) -{ - RexB(1,to); - if ( from == 1 ) - { - write8( 0xD1 ); - ModRM( 3, 7, to ); - return; - } - write8( 0xC1 ); - ModRM( 3, 7, to ); - write8( from ); -} - -/* sar cl to r64 */ -emitterT void eSAR64CLtoR( x86IntRegType to ) -{ - RexB(1, to); - write8( 0xD3 ); - ModRM( 3, 7, to ); -} - /* sar imm8 to r32 */ emitterT void eSAR32ItoR( x86IntRegType to, u8 from ) { @@ -1846,7 +1279,7 @@ emitterT void eSAR16ItoR( x86IntRegType to, u8 from ) write8( from ); } -emitterT void eROR32ItoR( x86IntRegType to,u8 from ) +/*emitterT void eROR32ItoR( x86IntRegType to,u8 from ) { RexB(0,to); if ( from == 1 ) { @@ -1859,7 +1292,7 @@ emitterT void eROR32ItoR( x86IntRegType to,u8 from ) write8( 0xc8 | to ); write8( from ); } -} +}*/ emitterT void eRCR32ItoR( x86IntRegType to, u8 from ) { @@ -1917,95 +1350,6 @@ emitterT void eSHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) // logical instructions / //////////////////////////////////// -/* or imm32 to r32 */ -emitterT void eOR64ItoR( x86IntRegType to, u32 from ) -{ - RexB(1, to); - if ( to == EAX ) { - write8( 0x0D ); - } - else { - write8( 0x81 ); - ModRM( 3, 1, to ); - } - write32( from ); -} - -/* or m64 to r64 */ -emitterT void eOR64MtoR( x86IntRegType to, uptr from ) -{ - RexR(1, to); - write8( 0x0B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* or r64 to r64 */ -emitterT void eOR64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1, from, to); - write8( 0x09 ); - ModRM( 3, from, to ); -} - -// or r32 to m64 -emitterT void eOR64RtoM(uptr to, x86IntRegType from ) -{ - RexR(1,from); - write8( 0x09 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* or imm32 to r32 */ -emitterT void eOR32ItoR( x86IntRegType to, u32 from ) -{ - RexB(0,to); - if ( to == EAX ) { - write8( 0x0D ); - } - else { - write8( 0x81 ); - ModRM( 3, 1, to ); - } - write32( from ); -} - -/* or imm32 to m32 */ -emitterT void eOR32ItoM(uptr to, u32 from ) -{ - write8( 0x81 ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* or r32 to r32 */ -emitterT void eOR32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x09 ); - ModRM( 3, from, to ); -} - -/* or r32 to m32 */ -emitterT void eOR32RtoM(uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x09 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* or m32 to r32 */ -emitterT void eOR32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x0B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - // or r16 to r16 emitterT void eOR16RtoR( x86IntRegType to, x86IntRegType from ) { @@ -2095,271 +1439,6 @@ emitterT void eOR8MtoR( x86IntRegType to, uptr from ) write32( MEMADDR(from, 4) ); } -/* xor imm32 to r64 */ -emitterT void eXOR64ItoR( x86IntRegType to, u32 from ) -{ - RexB(1,to); - if ( to == EAX ) { - write8( 0x35 ); - } else { - write8( 0x81 ); - ModRM( 3, 6, to ); - } - write32( from ); -} - -/* xor r64 to r64 */ -emitterT void eXOR64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1, from, to); - write8( 0x31 ); - ModRM( 3, from, to ); -} - -/* xor m64 to r64 */ -emitterT void eXOR64MtoR( x86IntRegType to, uptr from ) -{ - RexR(1, to); - write8( 0x33 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* xor r64 to m64 */ -emitterT void eXOR64RtoM( uptr to, x86IntRegType from ) -{ - RexR(1,from); - write8( 0x31 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* xor imm32 to r32 */ -emitterT void eXOR32ItoR( x86IntRegType to, u32 from ) -{ - RexB(0,to); - if ( to == EAX ) { - write8( 0x35 ); - } - else { - write8( 0x81 ); - ModRM( 3, 6, to ); - } - write32( from ); -} - -/* xor imm32 to m32 */ -emitterT void eXOR32ItoM( uptr to, u32 from ) -{ - write8( 0x81 ); - ModRM( 0, 6, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* xor r32 to r32 */ -emitterT void eXOR32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x31 ); - ModRM( 3, from, to ); -} - -/* xor r16 to r16 */ -emitterT void eXOR16RtoR( x86IntRegType to, x86IntRegType from ) -{ - write8( 0x66 ); - RexRB(0,from,to); - write8( 0x31 ); - ModRM( 3, from, to ); -} - -/* xor r32 to m32 */ -emitterT void eXOR32RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x31 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* xor m32 to r32 */ -emitterT void eXOR32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x33 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// xor imm16 to r16 -emitterT void eXOR16ItoR( x86IntRegType to, u16 from ) -{ - write8(0x66); - RexB(0,to); - if ( to == EAX ) { - write8( 0x35 ); - } - else { - write8( 0x81 ); - ModRM( 3, 6, to ); - } - write16( from ); -} - -// xor r16 to m16 -emitterT void eXOR16RtoM( uptr to, x86IntRegType from ) -{ - write8(0x66); - RexR(0,from); - write8( 0x31 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* and imm32 to r64 */ -emitterT void eAND64I32toR( x86IntRegType to, u32 from ) -{ - RexB(1, to); - if ( to == EAX ) { - write8( 0x25 ); - } else { - write8( 0x81 ); - ModRM( 3, 0x4, to ); - } - write32( from ); -} - -/* and m64 to r64 */ -emitterT void eAND64MtoR( x86IntRegType to, uptr from ) -{ - RexR(1, to); - write8( 0x23 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* and r64 to m64 */ -emitterT void eAND64RtoM( uptr to, x86IntRegType from ) -{ - RexR(1, from); - write8( 0x21 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* and r64 to r64 */ -emitterT void eAND64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1, from, to); - write8( 0x21 ); - ModRM( 3, from, to ); -} - -/* and imm32 to m64 */ -emitterT void eAND64I32toM( uptr to, u32 from ) -{ - Rex(1,0,0,0); - write8( 0x81 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* and imm32 to r32 */ -emitterT void eAND32ItoR( x86IntRegType to, u32 from ) -{ - RexB(0,to); - if(from < 0x80) { - eAND32I8toR(to, (u8)from); - } - else { - if ( to == EAX ) { - write8( 0x25 ); - } - else { - write8( 0x81 ); - ModRM( 3, 0x4, to ); - } - write32( from ); - } -} - -/* and sign ext imm8 to r32 */ -emitterT void eAND32I8toR( x86IntRegType to, u8 from ) -{ - RexB(0,to); - write8( 0x83 ); - ModRM( 3, 0x4, to ); - write8( from ); -} - -/* and imm32 to m32 */ -emitterT void eAND32ItoM( uptr to, u32 from ) -{ - if(from < 0x80) { - eAND32I8toM(to, (u8)from); - } - else { - write8( 0x81 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); - } -} - - -/* and sign ext imm8 to m32 */ -emitterT void eAND32I8toM( uptr to, u8 from ) -{ - write8( 0x83 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); -} - -/* and r32 to r32 */ -emitterT void eAND32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x21 ); - ModRM( 3, from, to ); -} - -/* and r32 to m32 */ -emitterT void eAND32RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x21 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* and m32 to r32 */ -emitterT void eAND32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x23 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// Warning: Untested form of AND. -emitterT void eAND32RmtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write8( 0x23 ); - ModRM( 0, to, from ); -} - -// Warning: Untested form of AND. -emitterT void eAND32RmtoROffset( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(0,to,from); - write8( 0x23 ); - WriteRmOffsetFrom(to,from,offset); -} - // and r16 to r16 emitterT void eAND16RtoR( x86IntRegType to, x86IntRegType from ) { @@ -2480,14 +1559,6 @@ emitterT void eAND8MtoR( x86IntRegType to, uptr from ) write32( MEMADDR(from, 4)); } -/* not r64 */ -emitterT void eNOT64R( x86IntRegType from ) -{ - RexB(1, from); - write8( 0xF7 ); - ModRM( 3, 2, from ); -} - /* not r32 */ emitterT void eNOT32R( x86IntRegType from ) { @@ -2504,14 +1575,6 @@ emitterT void eNOT32M( u32 from ) write32( MEMADDR(from, 4)); } -/* neg r64 */ -emitterT void eNEG64R( x86IntRegType from ) -{ - RexB(1, from); - write8( 0xF7 ); - ModRM( 3, 3, from ); -} - /* neg r32 */ emitterT void eNEG32R( x86IntRegType from ) { @@ -2875,14 +1938,6 @@ emitterT void eCALL32R( x86IntRegType to ) ModRM( 3, 2, to ); } -/* call r64 */ -emitterT void eCALL64R( x86IntRegType to ) -{ - RexB(0, to); - write8( 0xFF ); - ModRM( 3, 2, to ); -} - /* call m32 */ emitterT void eCALL32M( u32 to ) { @@ -2895,98 +1950,40 @@ emitterT void eCALL32M( u32 to ) // misc instructions / //////////////////////////////////// -/* cmp imm32 to r64 */ -emitterT void eCMP64I32toR( x86IntRegType to, u32 from ) -{ - RexB(1, to); - if ( to == EAX ) { - write8( 0x3D ); - } - else { - write8( 0x81 ); - ModRM( 3, 7, to ); - } - write32( from ); -} - -/* cmp m64 to r64 */ -emitterT void eCMP64MtoR( x86IntRegType to, uptr from ) -{ - RexR(1, to); - write8( 0x3B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// cmp r64 to r64 -emitterT void eCMP64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1,from,to); - write8( 0x39 ); - ModRM( 3, from, to ); -} - -/* cmp imm32 to r32 */ -emitterT void eCMP32ItoR( x86IntRegType to, u32 from ) +// cmp imm8 to [r32] (byte ptr) +emitterT void eCMP8I8toRm( x86IntRegType to, s8 from, s8 off=0 ) { RexB(0,to); - if ( to == EAX ) { - write8( 0x3D ); - } - else { - write8( 0x81 ); - ModRM( 3, 7, to ); - } - write32( from ); -} - -/* cmp imm32 to m32 */ -emitterT void eCMP32ItoM( uptr to, u32 from ) -{ - write8( 0x81 ); - ModRM( 0, 7, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* cmp r32 to r32 */ -emitterT void eCMP32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x39 ); - ModRM( 3, from, to ); -} - -/* cmp m32 to r32 */ -emitterT void eCMP32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x3B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// cmp imm8 to [r32] -emitterT void eCMP32I8toRm( x86IntRegType to, u8 from) -{ - RexB(0,to); - write8( 0x83 ); - ModRM( 0, 7, to ); - write8(from); -} - -// cmp imm32 to [r32+off] -emitterT void eCMP32I8toRmOffset8( x86IntRegType to, u8 from, u8 off) -{ - RexB(0,to); - write8( 0x83 ); - ModRM( 1, 7, to ); - write8(off); + write8( 0x80 ); + ModRM( (off != 0), 7, to ); + if( off != 0 ) write8(off); write8(from); } // cmp imm8 to [r32] -emitterT void eCMP32I8toM( uptr to, u8 from) +emitterT void eCMP32I8toRm( x86IntRegType to, u8 from, s8 off=0 ) +{ + RexB(0,to); + write8( 0x83 ); + ModRM( (off!=0), 7, to ); + if( off != 0 ) write8(off); + write8(from); +} + +// cmp imm32 to [r32] +emitterT void eCMP32ItoRm( x86IntRegType to, u32 from, s8 off=0 ) +{ + // fixme : This should use the imm8 form if 'from' is between 127 and -128. + + RexB(0,to); + write8( 0x81 ); + ModRM( (off != 0), 7, to ); + if( off != 0 ) write8(off); + write32(from); +} + +// cmp imm8 to [mem] (dword ptr) +emitterT void eCMP32I8toM( uptr to, u8 from ) { write8( 0x83 ); ModRM( 0, 7, DISP32 ); diff --git a/pcsx2/x86/ix86/ix86_group1.inl b/pcsx2/x86/ix86/ix86_group1.inl new file mode 100644 index 0000000000..ccc98726d8 --- /dev/null +++ b/pcsx2/x86/ix86/ix86_group1.inl @@ -0,0 +1,258 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +//------------------------------------------------------------------ +// x86 Group 1 Instructions +//------------------------------------------------------------------ +// Group 1 instructions all adhere to the same encoding scheme, and so they all +// share the same emitter which has been coded here. +// +// Group 1 Table: [column value is the Reg field of the ModRM byte] +// +// 0 1 2 3 4 5 6 7 +// ADD OR ADC SBB AND SUB XOR CMP +// + +namespace x86Emitter { + +static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field) +static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field) + +// ------------------------------------------------------------------------ +// returns TRUE if this instruction requires SIB to be encoded, or FALSE if the +// instruction ca be encoded as ModRm alone. +emitterT bool NeedsSibMagic( const ModSib& info ) +{ + // no registers? no sibs! + if( info.Base.IsEmpty() && info.Index.IsEmpty() ) return false; + + // A scaled register needs a SIB + if( info.Scale != 0 && !info.Index.IsEmpty() ) return true; + + // two registers needs a SIB + if( !info.Base.IsEmpty() && !info.Index.IsEmpty() ) return true; + + // If register is ESP, then we need a SIB: + if( info.Base == esp || info.Index == esp ) return true; + + return false; +} + +// ------------------------------------------------------------------------ +// Conditionally generates Sib encoding information! +// +// regfield - register field to be written to the ModRm. This is either a register specifier +// or an opcode extension. In either case, the instruction determines the value for us. +// +emitterT void EmitSibMagic( int regfield, const ModSib& info ) +{ + int displacement_size = (info.Displacement == 0) ? 0 : + ( ( info.IsByteSizeDisp() ) ? 1 : 2 ); + + if( !NeedsSibMagic( info ) ) + { + // Use ModRm-only encoding, with the rm field holding an index/base register, if + // one has been specified. If neither register is specified then use Disp32 form, + // which is encoded as "EBP w/o displacement" (which is why EBP must always be + // encoded *with* a displacement of 0, if it would otherwise not have one). + + x86Register basereg = info.GetEitherReg(); + + if( basereg.IsEmpty() ) + ModRM( 0, regfield, ModRm_UseDisp32 ); + else + { + if( basereg == ebp && displacement_size == 0 ) + displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! + + ModRM( displacement_size, regfield, basereg.Id ); + } + } + else + { + ModRM( displacement_size, regfield, ModRm_UseSib ); + SibSB( info.Index.Id, info.Scale, info.Base.Id ); + } + + switch( displacement_size ) + { + case 0: break; + case 1: write8( info.Displacement ); break; + case 2: write32( info.Displacement ); break; + jNO_DEFAULT + } +} + +// ------------------------------------------------------------------------ +// Conditionally generates Sib encoding information! +// +// regfield - register field to be written to the ModRm. This is either a register specifier +// or an opcode extension. In either case, the instruction determines the value for us. +// +emitterT void EmitSibMagic( x86Register regfield, const ModSib& info ) +{ + EmitSibMagic( regfield.Id, info ); +} + +enum Group1InstructionType +{ + G1Type_ADD=0, + G1Type_OR, + G1Type_ADC, + G1Type_SBB, + G1Type_AND, + G1Type_SUB, + G1Type_XOR, + G1Type_CMP +}; + + +emitterT void Group1_32( Group1InstructionType inst, x86Register to, x86Register from ) +{ + write8( 0x01 | (inst<<3) ); + ModRM( 3, from.Id, to.Id ); +} + +emitterT void Group1_32( Group1InstructionType inst, x86Register to, u32 imm ) +{ + if( is_s8( imm ) ) + { + write8( 0x83 ); + ModRM( 3, inst, to.Id ); + write8( (s8)imm ); + } + else + { + if( to == eax ) + write8( 0x05 | (inst<<3) ); + else + { + write8( 0x81 ); + ModRM( 3, inst, to.Id ); + } + write32( imm ); + } +} + +emitterT void Group1_32( Group1InstructionType inst, const ModSib& sibdest, u32 imm ) +{ + write8( is_s8( imm ) ? 0x83 : 0x81 ); + + EmitSibMagic( inst, sibdest ); + + if( is_s8( imm ) ) + write8( (s8)imm ); + else + write32( imm ); +} + +emitterT void Group1_32( Group1InstructionType inst, const ModSib& sibdest, x86Register from ) +{ + write8( 0x01 | (inst<<3) ); + EmitSibMagic( from, sibdest ); +} + +/* add m32 to r32 */ +emitterT void Group1_32( Group1InstructionType inst, x86Register to, const ModSib& sibsrc ) +{ + write8( 0x03 | (inst<<3) ); + EmitSibMagic( to, sibsrc ); +} + +emitterT void Group1_8( Group1InstructionType inst, x86Register to, s8 imm ) +{ + if( to == eax ) + { + write8( 0x04 | (inst<<3) ); + write8( imm ); + } + else + { + write8( 0x80 ); + ModRM( 3, inst, to.Id ); + write8( imm ); + } +} + +////////////////////////////////////////////////////////////////////////////////////////// +// +#define DEFINE_GROUP1_OPCODE( lwr, cod ) \ + emitterT void lwr##32( x86Register to, x86Register from ) { Group1_32( G1Type_##cod, to, from ); } \ + emitterT void lwr##32( x86Register to, u32 imm ) { Group1_32( G1Type_##cod, to, imm ); } \ + emitterT void lwr##32( x86Register to, void* from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ + emitterT void lwr##32( void* to, x86Register from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ + emitterT void lwr##32( void* to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } \ + emitterT void lwr##32( x86Register to, const x86ModRm& from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ + emitterT void lwr##32( const x86ModRm& to, x86Register from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ + emitterT void lwr##32( const x86ModRm& to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } + +DEFINE_GROUP1_OPCODE( add, ADD ); +DEFINE_GROUP1_OPCODE( cmp, CMP ); +DEFINE_GROUP1_OPCODE( or, OR ); +DEFINE_GROUP1_OPCODE( adc, ADC ); +DEFINE_GROUP1_OPCODE( sbb, SBB ); +DEFINE_GROUP1_OPCODE( and, AND ); +DEFINE_GROUP1_OPCODE( sub, SUB ); +DEFINE_GROUP1_OPCODE( xor, XOR ); + +} // end namespace x86Emitter + + +static __forceinline x86Emitter::x86Register _reghlp( x86IntRegType src ) +{ + return x86Emitter::x86Register( src ); +} + + +static __forceinline x86Emitter::x86ModRm _mrmhlp( x86IntRegType src ) +{ + return x86Emitter::x86ModRm( _reghlp(src) ); +} + +////////////////////////////////////////////////////////////////////////////////////////// +// +#define DEFINE_GROUP1_OPCODE_LEGACY( lwr, cod ) \ + emitterT void e##cod##32RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::lwr##32( _reghlp(to), _reghlp(from) ); } \ + emitterT void e##cod##32ItoR( x86IntRegType to, u32 imm ) { x86Emitter::lwr##32( _reghlp(to), imm ); } \ + emitterT void e##cod##32MtoR( x86IntRegType to, uptr from ) { x86Emitter::lwr##32( _reghlp(to), (void*)from ); } \ + emitterT void e##cod##32RtoM( uptr to, x86IntRegType from ) { x86Emitter::lwr##32( (void*)to, _reghlp(from) ); } \ + emitterT void e##cod##32ItoM( uptr to, u32 imm ) { x86Emitter::lwr##32( (void*)to, imm ); } \ + emitterT void e##cod##32ItoRm( x86IntRegType to, u32 imm, int offset=0 ){ x86Emitter::lwr##32( _mrmhlp(to) + offset, imm ); } \ + emitterT void e##cod##32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) { x86Emitter::lwr##32( _reghlp(to), _mrmhlp(from) + offset ); } \ + emitterT void e##cod##32RtoRm( x86IntRegType to, x86IntRegType from, int offset=0 ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, _reghlp(from) ); } + +DEFINE_GROUP1_OPCODE_LEGACY( add, ADD ); +DEFINE_GROUP1_OPCODE_LEGACY( cmp, CMP ); +DEFINE_GROUP1_OPCODE_LEGACY( or, OR ); +DEFINE_GROUP1_OPCODE_LEGACY( adc, ADC ); +DEFINE_GROUP1_OPCODE_LEGACY( sbb, SBB ); +DEFINE_GROUP1_OPCODE_LEGACY( and, AND ); +DEFINE_GROUP1_OPCODE_LEGACY( sub, SUB ); +DEFINE_GROUP1_OPCODE_LEGACY( xor, XOR ); + +emitterT void eAND32I8toR( x86IntRegType to, s8 from ) +{ + x86Emitter::and32( _reghlp(to), from ); +} + +emitterT void eAND32I8toM( uptr to, s8 from ) +{ + x86Emitter::and32( (void*)to, from ); +} diff --git a/pcsx2/x86/ix86/ix86_macros.h b/pcsx2/x86/ix86/ix86_macros.h index 2ad9cdfe47..6179fc7e1d 100644 --- a/pcsx2/x86/ix86/ix86_macros.h +++ b/pcsx2/x86/ix86/ix86_macros.h @@ -51,66 +51,51 @@ //------------------------------------------------------------------ // mov instructions //------------------------------------------------------------------ -#define MOV64RtoR eMOV64RtoR<_EmitterId_> -#define MOV64RtoM eMOV64RtoM<_EmitterId_> -#define MOV64MtoR eMOV64MtoR<_EmitterId_> -#define MOV64I32toM eMOV64I32toM<_EmitterId_> -#define MOV64I32toR eMOV64I32toR<_EmitterId_> -#define MOV64ItoR eMOV64ItoR<_EmitterId_> -#define MOV64ItoRmOffset eMOV64ItoRmOffset<_EmitterId_> -#define MOV64RmOffsettoR eMOV64RmOffsettoR<_EmitterId_> -#define MOV64RmStoR eMOV64RmStoR<_EmitterId_> -#define MOV64RtoRmOffset eMOV64RtoRmOffset<_EmitterId_> -#define MOV64RtoRmS eMOV64RtoRmS<_EmitterId_> #define MOV32RtoR eMOV32RtoR<_EmitterId_> #define MOV32RtoM eMOV32RtoM<_EmitterId_> #define MOV32MtoR eMOV32MtoR<_EmitterId_> #define MOV32RmtoR eMOV32RmtoR<_EmitterId_> -#define MOV32RmtoROffset eMOV32RmtoROffset<_EmitterId_> +#define MOV32RmtoR eMOV32RmtoR<_EmitterId_> #define MOV32RmStoR eMOV32RmStoR<_EmitterId_> #define MOV32RmSOffsettoR eMOV32RmSOffsettoR<_EmitterId_> #define MOV32RtoRm eMOV32RtoRm<_EmitterId_> #define MOV32RtoRmS eMOV32RtoRmS<_EmitterId_> #define MOV32ItoR eMOV32ItoR<_EmitterId_> #define MOV32ItoM eMOV32ItoM<_EmitterId_> -#define MOV32ItoRmOffset eMOV32ItoRmOffset<_EmitterId_> -#define MOV32RtoRmOffset eMOV32RtoRmOffset<_EmitterId_> +#define MOV32ItoRm eMOV32ItoRm<_EmitterId_> +#define MOV32RtoRm eMOV32RtoRm<_EmitterId_> #define MOV16RtoM eMOV16RtoM<_EmitterId_> #define MOV16MtoR eMOV16MtoR<_EmitterId_> #define MOV16RmtoR eMOV16RmtoR<_EmitterId_> -#define MOV16RmtoROffset eMOV16RmtoROffset<_EmitterId_> +#define MOV16RmtoR eMOV16RmtoR<_EmitterId_> #define MOV16RmSOffsettoR eMOV16RmSOffsettoR<_EmitterId_> #define MOV16RtoRm eMOV16RtoRm<_EmitterId_> #define MOV16ItoM eMOV16ItoM<_EmitterId_> #define MOV16RtoRmS eMOV16RtoRmS<_EmitterId_> #define MOV16ItoR eMOV16ItoR<_EmitterId_> -#define MOV16ItoRmOffset eMOV16ItoRmOffset<_EmitterId_> -#define MOV16RtoRmOffset eMOV16RtoRmOffset<_EmitterId_> +#define MOV16ItoRm eMOV16ItoRm<_EmitterId_> +#define MOV16RtoRm eMOV16RtoRm<_EmitterId_> #define MOV8RtoM eMOV8RtoM<_EmitterId_> #define MOV8MtoR eMOV8MtoR<_EmitterId_> #define MOV8RmtoR eMOV8RmtoR<_EmitterId_> -#define MOV8RmtoROffset eMOV8RmtoROffset<_EmitterId_> +#define MOV8RmtoR eMOV8RmtoR<_EmitterId_> #define MOV8RmSOffsettoR eMOV8RmSOffsettoR<_EmitterId_> #define MOV8RtoRm eMOV8RtoRm<_EmitterId_> #define MOV8ItoM eMOV8ItoM<_EmitterId_> #define MOV8ItoR eMOV8ItoR<_EmitterId_> -#define MOV8ItoRmOffset eMOV8ItoRmOffset<_EmitterId_> -#define MOV8RtoRmOffset eMOV8RtoRmOffset<_EmitterId_> +#define MOV8ItoRm eMOV8ItoRm<_EmitterId_> +#define MOV8RtoRm eMOV8RtoRm<_EmitterId_> #define MOVSX32R8toR eMOVSX32R8toR<_EmitterId_> #define MOVSX32Rm8toR eMOVSX32Rm8toR<_EmitterId_> -#define MOVSX32Rm8toROffset eMOVSX32Rm8toROffset<_EmitterId_> #define MOVSX32M8toR eMOVSX32M8toR<_EmitterId_> #define MOVSX32R16toR eMOVSX32R16toR<_EmitterId_> #define MOVSX32Rm16toR eMOVSX32Rm16toR<_EmitterId_> -#define MOVSX32Rm16toROffset eMOVSX32Rm16toROffset<_EmitterId_> #define MOVSX32M16toR eMOVSX32M16toR<_EmitterId_> #define MOVZX32R8toR eMOVZX32R8toR<_EmitterId_> #define MOVZX32Rm8toR eMOVZX32Rm8toR<_EmitterId_> -#define MOVZX32Rm8toROffset eMOVZX32Rm8toROffset<_EmitterId_> #define MOVZX32M8toR eMOVZX32M8toR<_EmitterId_> #define MOVZX32R16toR eMOVZX32R16toR<_EmitterId_> #define MOVZX32Rm16toR eMOVZX32Rm16toR<_EmitterId_> -#define MOVZX32Rm16toROffset eMOVZX32Rm16toROffset<_EmitterId_> #define MOVZX32M16toR eMOVZX32M16toR<_EmitterId_> #define CMOVBE32RtoR eCMOVBE32RtoR<_EmitterId_> #define CMOVBE32MtoR eCMOVBE32MtoR<_EmitterId_> @@ -147,12 +132,10 @@ //------------------------------------------------------------------ // arithmetic instructions //------------------------------------------------------------------ -#define ADD64ItoR eADD64ItoR<_EmitterId_> -#define ADD64MtoR eADD64MtoR<_EmitterId_> #define ADD32ItoEAX eADD32ItoEAX<_EmitterId_> #define ADD32ItoR eADD32ItoR<_EmitterId_> #define ADD32ItoM eADD32ItoM<_EmitterId_> -#define ADD32ItoRmOffset eADD32ItoRmOffset<_EmitterId_> +#define ADD32ItoRm eADD32ItoRm<_EmitterId_> #define ADD32RtoR eADD32RtoR<_EmitterId_> #define ADD32RtoM eADD32RtoM<_EmitterId_> #define ADD32MtoR eADD32MtoR<_EmitterId_> @@ -171,7 +154,6 @@ #define INC32M eINC32M<_EmitterId_> #define INC16R eINC16R<_EmitterId_> #define INC16M eINC16M<_EmitterId_> -#define SUB64MtoR eSUB64MtoR<_EmitterId_> #define SUB32ItoR eSUB32ItoR<_EmitterId_> #define SUB32ItoM eSUB32ItoM<_EmitterId_> #define SUB32RtoR eSUB32RtoR<_EmitterId_> @@ -181,7 +163,6 @@ #define SUB16ItoR eSUB16ItoR<_EmitterId_> #define SUB16ItoM eSUB16ItoM<_EmitterId_> #define SUB16MtoR eSUB16MtoR<_EmitterId_> -#define SBB64RtoR eSBB64RtoR<_EmitterId_> #define SBB32ItoR eSBB32ItoR<_EmitterId_> #define SBB32ItoM eSBB32ItoM<_EmitterId_> #define SBB32RtoR eSBB32RtoR<_EmitterId_> @@ -203,12 +184,6 @@ //------------------------------------------------------------------ // shifting instructions //------------------------------------------------------------------ -#define SHL64ItoR eSHL64ItoR<_EmitterId_> -#define SHL64CLtoR eSHL64CLtoR<_EmitterId_> -#define SHR64ItoR eSHR64ItoR<_EmitterId_> -#define SHR64CLtoR eSHR64CLtoR<_EmitterId_> -#define SAR64ItoR eSAR64ItoR<_EmitterId_> -#define SAR64CLtoR eSAR64CLtoR<_EmitterId_> #define SHL32ItoR eSHL32ItoR<_EmitterId_> #define SHL32ItoM eSHL32ItoM<_EmitterId_> #define SHL32CLtoR eSHL32CLtoR<_EmitterId_> @@ -231,10 +206,6 @@ //------------------------------------------------------------------ // logical instructions //------------------------------------------------------------------ -#define OR64ItoR eOR64ItoR<_EmitterId_> -#define OR64MtoR eOR64MtoR<_EmitterId_> -#define OR64RtoR eOR64RtoR<_EmitterId_> -#define OR64RtoM eOR64RtoM<_EmitterId_> #define OR32ItoR eOR32ItoR<_EmitterId_> #define OR32ItoM eOR32ItoM<_EmitterId_> #define OR32RtoR eOR32RtoR<_EmitterId_> @@ -249,11 +220,6 @@ #define OR8RtoM eOR8RtoM<_EmitterId_> #define OR8ItoM eOR8ItoM<_EmitterId_> #define OR8MtoR eOR8MtoR<_EmitterId_> -#define XOR64ItoR eXOR64ItoR<_EmitterId_> -#define XOR64RtoR eXOR64RtoR<_EmitterId_> -#define XOR64MtoR eXOR64MtoR<_EmitterId_> -#define XOR64RtoR eXOR64RtoR<_EmitterId_> -#define XOR64RtoM eXOR64RtoM<_EmitterId_> #define XOR32ItoR eXOR32ItoR<_EmitterId_> #define XOR32ItoM eXOR32ItoM<_EmitterId_> #define XOR32RtoR eXOR32RtoR<_EmitterId_> @@ -262,11 +228,6 @@ #define XOR32MtoR eXOR32MtoR<_EmitterId_> #define XOR16RtoM eXOR16RtoM<_EmitterId_> #define XOR16ItoR eXOR16ItoR<_EmitterId_> -#define AND64I32toR eAND64I32toR<_EmitterId_> -#define AND64MtoR eAND64MtoR<_EmitterId_> -#define AND64RtoM eAND64RtoM<_EmitterId_> -#define AND64RtoR eAND64RtoR<_EmitterId_> -#define AND64I32toM eAND64I32toM<_EmitterId_> #define AND32ItoR eAND32ItoR<_EmitterId_> #define AND32I8toR eAND32I8toR<_EmitterId_> #define AND32ItoM eAND32ItoM<_EmitterId_> @@ -275,7 +236,7 @@ #define AND32RtoM eAND32RtoM<_EmitterId_> #define AND32MtoR eAND32MtoR<_EmitterId_> #define AND32RmtoR eAND32RmtoR<_EmitterId_> -#define AND32RmtoROffset eAND32RmtoROffset<_EmitterId_> +#define AND32RmtoR eAND32RmtoR<_EmitterId_> #define AND16RtoR eAND16RtoR<_EmitterId_> #define AND16ItoR eAND16ItoR<_EmitterId_> #define AND16ItoM eAND16ItoM<_EmitterId_> @@ -286,10 +247,8 @@ #define AND8RtoM eAND8RtoM<_EmitterId_> #define AND8MtoR eAND8MtoR<_EmitterId_> #define AND8RtoR eAND8RtoR<_EmitterId_> -#define NOT64R eNOT64R<_EmitterId_> #define NOT32R eNOT32R<_EmitterId_> #define NOT32M eNOT32M<_EmitterId_> -#define NEG64R eNEG64R<_EmitterId_> #define NEG32R eNEG32R<_EmitterId_> #define NEG32M eNEG32M<_EmitterId_> #define NEG16R eNEG16R<_EmitterId_> @@ -349,15 +308,13 @@ //------------------------------------------------------------------ // misc instructions //------------------------------------------------------------------ -#define CMP64I32toR eCMP64I32toR<_EmitterId_> -#define CMP64MtoR eCMP64MtoR<_EmitterId_> -#define CMP64RtoR eCMP64RtoR<_EmitterId_> #define CMP32ItoR eCMP32ItoR<_EmitterId_> #define CMP32ItoM eCMP32ItoM<_EmitterId_> #define CMP32RtoR eCMP32RtoR<_EmitterId_> #define CMP32MtoR eCMP32MtoR<_EmitterId_> +#define CMP32ItoRm eCMP32ItoRm<_EmitterId_> +#define CMP8I8toRm eCMP8I8toRm<_EmitterId_> #define CMP32I8toRm eCMP32I8toRm<_EmitterId_> -#define CMP32I8toRmOffset8 eCMP32I8toRmOffset8<_EmitterId_> #define CMP32I8toM eCMP32I8toM<_EmitterId_> #define CMP16ItoR eCMP16ItoR<_EmitterId_> #define CMP16ItoM eCMP16ItoM<_EmitterId_> @@ -539,16 +496,16 @@ #define PUNPCKHDQMtoR ePUNPCKHDQMtoR<_EmitterId_> #define MOVQ64ItoR eMOVQ64ItoR<_EmitterId_> #define MOVQRtoR eMOVQRtoR<_EmitterId_> -#define MOVQRmtoROffset eMOVQRmtoROffset<_EmitterId_> -#define MOVQRtoRmOffset eMOVQRtoRmOffset<_EmitterId_> +#define MOVQRmtoR eMOVQRmtoR<_EmitterId_> +#define MOVQRtoRm eMOVQRtoRm<_EmitterId_> #define MOVDMtoMMX eMOVDMtoMMX<_EmitterId_> #define MOVDMMXtoM eMOVDMMXtoM<_EmitterId_> #define MOVD32RtoMMX eMOVD32RtoMMX<_EmitterId_> #define MOVD32RmtoMMX eMOVD32RmtoMMX<_EmitterId_> -#define MOVD32RmOffsettoMMX eMOVD32RmOffsettoMMX<_EmitterId_> +#define MOVD32RmtoMMX eMOVD32RmtoMMX<_EmitterId_> #define MOVD32MMXtoR eMOVD32MMXtoR<_EmitterId_> #define MOVD32MMXtoRm eMOVD32MMXtoRm<_EmitterId_> -#define MOVD32MMXtoRmOffset eMOVD32MMXtoRmOffset<_EmitterId_> +#define MOVD32MMXtoRm eMOVD32MMXtoRm<_EmitterId_> #define PINSRWRtoMMX ePINSRWRtoMMX<_EmitterId_> #define PSHUFWRtoR ePSHUFWRtoR<_EmitterId_> #define PSHUFWMtoR ePSHUFWMtoR<_EmitterId_> @@ -575,33 +532,31 @@ #define SSE_MOVSS_XMM_to_M32 eSSE_MOVSS_XMM_to_M32<_EmitterId_> #define SSE_MOVSS_XMM_to_Rm eSSE_MOVSS_XMM_to_Rm<_EmitterId_> #define SSE_MOVSS_XMM_to_XMM eSSE_MOVSS_XMM_to_XMM<_EmitterId_> -#define SSE_MOVSS_RmOffset_to_XMM eSSE_MOVSS_RmOffset_to_XMM<_EmitterId_> -#define SSE_MOVSS_XMM_to_RmOffset eSSE_MOVSS_XMM_to_RmOffset<_EmitterId_> +#define SSE_MOVSS_Rm_to_XMM eSSE_MOVSS_Rm_to_XMM<_EmitterId_> +#define SSE_MOVSS_XMM_to_Rm eSSE_MOVSS_XMM_to_Rm<_EmitterId_> #define SSE_MASKMOVDQU_XMM_to_XMM eSSE_MASKMOVDQU_XMM_to_XMM<_EmitterId_> #define SSE_MOVLPS_M64_to_XMM eSSE_MOVLPS_M64_to_XMM<_EmitterId_> #define SSE_MOVLPS_XMM_to_M64 eSSE_MOVLPS_XMM_to_M64<_EmitterId_> -#define SSE_MOVLPS_RmOffset_to_XMM eSSE_MOVLPS_RmOffset_to_XMM<_EmitterId_> -#define SSE_MOVLPS_XMM_to_RmOffset eSSE_MOVLPS_XMM_to_RmOffset<_EmitterId_> +#define SSE_MOVLPS_Rm_to_XMM eSSE_MOVLPS_Rm_to_XMM<_EmitterId_> +#define SSE_MOVLPS_XMM_to_Rm eSSE_MOVLPS_XMM_to_Rm<_EmitterId_> #define SSE_MOVHPS_M64_to_XMM eSSE_MOVHPS_M64_to_XMM<_EmitterId_> #define SSE_MOVHPS_XMM_to_M64 eSSE_MOVHPS_XMM_to_M64<_EmitterId_> -#define SSE_MOVHPS_RmOffset_to_XMM eSSE_MOVHPS_RmOffset_to_XMM<_EmitterId_> -#define SSE_MOVHPS_XMM_to_RmOffset eSSE_MOVHPS_XMM_to_RmOffset<_EmitterId_> +#define SSE_MOVHPS_Rm_to_XMM eSSE_MOVHPS_Rm_to_XMM<_EmitterId_> +#define SSE_MOVHPS_XMM_to_Rm eSSE_MOVHPS_XMM_to_Rm<_EmitterId_> #define SSE_MOVLHPS_XMM_to_XMM eSSE_MOVLHPS_XMM_to_XMM<_EmitterId_> #define SSE_MOVHLPS_XMM_to_XMM eSSE_MOVHLPS_XMM_to_XMM<_EmitterId_> #define SSE_MOVLPSRmtoR eSSE_MOVLPSRmtoR<_EmitterId_> -#define SSE_MOVLPSRmtoROffset eSSE_MOVLPSRmtoROffset<_EmitterId_> #define SSE_MOVLPSRtoRm eSSE_MOVLPSRtoRm<_EmitterId_> -#define SSE_MOVLPSRtoRmOffset eSSE_MOVLPSRtoRmOffset<_EmitterId_> #define SSE_MOVAPSRmStoR eSSE_MOVAPSRmStoR<_EmitterId_> #define SSE_MOVAPSRtoRmS eSSE_MOVAPSRtoRmS<_EmitterId_> -#define SSE_MOVAPSRtoRmOffset eSSE_MOVAPSRtoRmOffset<_EmitterId_> -#define SSE_MOVAPSRmtoROffset eSSE_MOVAPSRmtoROffset<_EmitterId_> +#define SSE_MOVAPSRtoRm eSSE_MOVAPSRtoRm<_EmitterId_> +#define SSE_MOVAPSRmtoR eSSE_MOVAPSRmtoR<_EmitterId_> #define SSE_MOVUPSRmStoR eSSE_MOVUPSRmStoR<_EmitterId_> #define SSE_MOVUPSRtoRmS eSSE_MOVUPSRtoRmS<_EmitterId_> #define SSE_MOVUPSRtoRm eSSE_MOVUPSRtoRm<_EmitterId_> #define SSE_MOVUPSRmtoR eSSE_MOVUPSRmtoR<_EmitterId_> -#define SSE_MOVUPSRmtoROffset eSSE_MOVUPSRmtoROffset<_EmitterId_> -#define SSE_MOVUPSRtoRmOffset eSSE_MOVUPSRtoRmOffset<_EmitterId_> +#define SSE_MOVUPSRmtoR eSSE_MOVUPSRmtoR<_EmitterId_> +#define SSE_MOVUPSRtoRm eSSE_MOVUPSRtoRm<_EmitterId_> #define SSE_RCPPS_XMM_to_XMM eSSE_RCPPS_XMM_to_XMM<_EmitterId_> #define SSE_RCPPS_M128_to_XMM eSSE_RCPPS_M128_to_XMM<_EmitterId_> #define SSE_RCPSS_XMM_to_XMM eSSE_RCPSS_XMM_to_XMM<_EmitterId_> @@ -676,7 +631,7 @@ #define SSE_UNPCKHPS_XMM_to_XMM eSSE_UNPCKHPS_XMM_to_XMM<_EmitterId_> #define SSE_SHUFPS_XMM_to_XMM eSSE_SHUFPS_XMM_to_XMM<_EmitterId_> #define SSE_SHUFPS_M128_to_XMM eSSE_SHUFPS_M128_to_XMM<_EmitterId_> -#define SSE_SHUFPS_RmOffset_to_XMM eSSE_SHUFPS_RmOffset_to_XMM<_EmitterId_> +#define SSE_SHUFPS_Rm_to_XMM eSSE_SHUFPS_Rm_to_XMM<_EmitterId_> #define SSE_CMPEQPS_M128_to_XMM eSSE_CMPEQPS_M128_to_XMM<_EmitterId_> #define SSE_CMPEQPS_XMM_to_XMM eSSE_CMPEQPS_XMM_to_XMM<_EmitterId_> #define SSE_CMPLTPS_M128_to_XMM eSSE_CMPLTPS_M128_to_XMM<_EmitterId_> @@ -780,8 +735,8 @@ #define SSE2_MOVQ_XMM_to_M64 eSSE2_MOVQ_XMM_to_M64<_EmitterId_> #define SSE2_MOVDQ2Q_XMM_to_MM eSSE2_MOVDQ2Q_XMM_to_MM<_EmitterId_> #define SSE2_MOVQ2DQ_MM_to_XMM eSSE2_MOVQ2DQ_MM_to_XMM<_EmitterId_> -#define SSE2_MOVDQARtoRmOffset eSSE2_MOVDQARtoRmOffset<_EmitterId_> -#define SSE2_MOVDQARmtoROffset eSSE2_MOVDQARmtoROffset<_EmitterId_> +#define SSE2_MOVDQARtoRm eSSE2_MOVDQARtoRm<_EmitterId_> +#define SSE2_MOVDQARmtoR eSSE2_MOVDQARmtoR<_EmitterId_> #define SSE2_CVTDQ2PS_M128_to_XMM eSSE2_CVTDQ2PS_M128_to_XMM<_EmitterId_> #define SSE2_CVTDQ2PS_XMM_to_XMM eSSE2_CVTDQ2PS_XMM_to_XMM<_EmitterId_> #define SSE2_CVTPS2DQ_M128_to_XMM eSSE2_CVTPS2DQ_M128_to_XMM<_EmitterId_> @@ -920,11 +875,11 @@ #define SSE2_MOVD_M32_to_XMM eSSE2_MOVD_M32_to_XMM<_EmitterId_> #define SSE2_MOVD_R_to_XMM eSSE2_MOVD_R_to_XMM<_EmitterId_> #define SSE2_MOVD_Rm_to_XMM eSSE2_MOVD_Rm_to_XMM<_EmitterId_> -#define SSE2_MOVD_RmOffset_to_XMM eSSE2_MOVD_RmOffset_to_XMM<_EmitterId_> +#define SSE2_MOVD_Rm_to_XMM eSSE2_MOVD_Rm_to_XMM<_EmitterId_> #define SSE2_MOVD_XMM_to_M32 eSSE2_MOVD_XMM_to_M32<_EmitterId_> #define SSE2_MOVD_XMM_to_R eSSE2_MOVD_XMM_to_R<_EmitterId_> #define SSE2_MOVD_XMM_to_Rm eSSE2_MOVD_XMM_to_Rm<_EmitterId_> -#define SSE2_MOVD_XMM_to_RmOffset eSSE2_MOVD_XMM_to_RmOffset<_EmitterId_> +#define SSE2_MOVD_XMM_to_Rm eSSE2_MOVD_XMM_to_Rm<_EmitterId_> #define SSE2_MOVQ_XMM_to_R eSSE2_MOVQ_XMM_to_R<_EmitterId_> #define SSE2_MOVQ_R_to_XMM eSSE2_MOVQ_R_to_XMM<_EmitterId_> //------------------------------------------------------------------ diff --git a/pcsx2/x86/ix86/ix86_mmx.inl b/pcsx2/x86/ix86/ix86_mmx.inl index 18126cd6e1..bc7d183319 100644 --- a/pcsx2/x86/ix86/ix86_mmx.inl +++ b/pcsx2/x86/ix86/ix86_mmx.inl @@ -482,11 +482,11 @@ emitterT void eMOVQRtoR( x86MMXRegType to, x86MMXRegType from ) ModRM( 3, to, from ); } -emitterT void eMOVQRmtoROffset( x86MMXRegType to, x86IntRegType from, u32 offset ) +emitterT void eMOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset=0 ) { write16( 0x6F0F ); - if( offset < 128 ) { + if( offset < 128 && offset >= -128) { ModRM( 1, to, from ); write8(offset); } @@ -496,11 +496,11 @@ emitterT void eMOVQRmtoROffset( x86MMXRegType to, x86IntRegType from, u32 offset } } -emitterT void eMOVQRtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset ) +emitterT void eMOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ) { write16( 0x7F0F ); - if( offset < 128 ) { + if( offset < 128 && offset >= -128) { ModRM( 1, from , to ); write8(offset); } diff --git a/pcsx2/x86/ix86/ix86_sse.inl b/pcsx2/x86/ix86/ix86_sse.inl index 971a33af17..a52ba6ccd7 100644 --- a/pcsx2/x86/ix86/ix86_sse.inl +++ b/pcsx2/x86/ix86/ix86_sse.inl @@ -18,18 +18,22 @@ #pragma once -//------------------------------------------------------------------ -// SSE instructions -//------------------------------------------------------------------ - +////////////////////////////////////////////////////////////////////////////////////////// +// AlwaysUseMovaps [const] +// // This tells the recompiler's emitter to always use movaps instead of movdqa. Both instructions // do the exact same thing, but movaps is 1 byte shorter, and thus results in a cleaner L1 cache // and some marginal speed gains as a result. (it's possible someday in the future the per- // formance of the two instructions could change, so this constant is provided to restore MOVDQA // use easily at a later time, if needed). - +// static const bool AlwaysUseMovaps = true; + +//------------------------------------------------------------------ +// SSE instructions +//------------------------------------------------------------------ + #define SSEMtoR( code, overb ) \ assert( to < XMMREGS ), \ RexR(0, to), \ @@ -140,7 +144,7 @@ static const bool AlwaysUseMovaps = true; write8( op ) /* movups [r32][r32*scale] to xmm1 */ -emitterT void eSSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) +emitterT void eSSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { RexRXB(0, to, from2, from); write16( 0x100f ); @@ -149,7 +153,7 @@ emitterT void eSSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntReg } /* movups xmm1 to [r32][r32*scale] */ -emitterT void eSSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) +emitterT void eSSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { RexRXB(1, to, from2, from); write16( 0x110f ); @@ -181,7 +185,7 @@ emitterT void eSSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ) ModRM( 0, to, from ); } -emitterT void eSSE_MOVLPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) +emitterT void eSSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); write16( 0x120f ); @@ -196,7 +200,7 @@ emitterT void eSSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from ) ModRM( 0, from, to ); } -emitterT void eSSE_MOVLPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset ) +emitterT void eSSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, from, to); write16( 0x130f ); @@ -204,7 +208,7 @@ emitterT void eSSE_MOVLPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int } /* movaps [r32][r32*scale] to xmm1 */ -emitterT void eSSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) +emitterT void eSSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { assert( from != EBP ); RexRXB(0, to, from2, from); @@ -214,7 +218,7 @@ emitterT void eSSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntReg } /* movaps xmm1 to [r32][r32*scale] */ -emitterT void eSSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) +emitterT void eSSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { assert( from != EBP ); RexRXB(0, to, from2, from); @@ -224,7 +228,7 @@ emitterT void eSSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntReg } // movaps [r32+offset] to r32 -emitterT void eSSE_MOVAPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) +emitterT void eSSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); write16( 0x280f ); @@ -232,7 +236,7 @@ emitterT void eSSE_MOVAPSRmtoROffset( x86SSERegType to, x86IntRegType from, int } // movaps r32 to [r32+offset] -emitterT void eSSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) +emitterT void eSSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) { RexRB(0, from, to); write16( 0x290f ); @@ -240,10 +244,10 @@ emitterT void eSSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int } // movdqa [r32+offset] to r32 -emitterT void eSSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) +emitterT void eSSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) { if( AlwaysUseMovaps ) - eSSE_MOVAPSRmtoROffset( to, from, offset ); + eSSE_MOVAPSRmtoR( to, from, offset ); else { write8(0x66); @@ -254,10 +258,10 @@ emitterT void eSSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int } // movdqa r32 to [r32+offset] -emitterT void eSSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) +emitterT void eSSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) { if( AlwaysUseMovaps ) - eSSE_MOVAPSRtoRmOffset( to, from, offset ); + eSSE_MOVAPSRtoRm( to, from, offset ); else { write8(0x66); @@ -268,7 +272,7 @@ emitterT void eSSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int } // movups [r32+offset] to r32 -emitterT void eSSE_MOVUPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) +emitterT void eSSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); write16( 0x100f ); @@ -276,7 +280,7 @@ emitterT void eSSE_MOVUPSRmtoROffset( x86SSERegType to, x86IntRegType from, int } // movups r32 to [r32+offset] -emitterT void eSSE_MOVUPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) +emitterT void eSSE_MOVUPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) { RexRB(0, from, to); write16( 0x110f ); @@ -328,17 +332,10 @@ emitterT void eSSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from) //********************************************************************************** emitterT void eSSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x100f, 0 ); } emitterT void eSSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { SSE_SS_RtoM( 0x110f, 0 ); } -emitterT void eSSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from ) -{ - write8(0xf3); - RexRB(0, from, to); - write16(0x110f); - ModRM(0, from, to); -} emitterT void eSSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSE_SS_RtoR( 0x100f ); } } -emitterT void eSSE_MOVSS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +emitterT void eSSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) { write8(0xf3); RexRB(0, to, from); @@ -346,7 +343,7 @@ emitterT void eSSE_MOVSS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, WriteRmOffsetFrom(to, from, offset); } -emitterT void eSSE_MOVSS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) +emitterT void eSSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) { write8(0xf3); RexRB(0, from, to); @@ -361,14 +358,14 @@ emitterT void eSSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) emitterT void eSSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x120f, 0 ); } emitterT void eSSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x130f, 0 ); } -emitterT void eSSE_MOVLPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +emitterT void eSSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); write16( 0x120f ); WriteRmOffsetFrom(to, from, offset); } -emitterT void eSSE_MOVLPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) +emitterT void eSSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) { RexRB(0, from, to); write16(0x130f); @@ -382,14 +379,14 @@ emitterT void eSSE_MOVLPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, emitterT void eSSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x160f, 0 ); } emitterT void eSSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x170f, 0 ); } -emitterT void eSSE_MOVHPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +emitterT void eSSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); write16( 0x160f ); WriteRmOffsetFrom(to, from, offset); } -emitterT void eSSE_MOVHPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) +emitterT void eSSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) { RexRB(0, from, to); write16(0x170f); @@ -756,7 +753,7 @@ emitterT void eSSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSER emitterT void eSSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR( 0xC60F ); write8( imm8 ); } emitterT void eSSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR( 0xC60F, 1 ); write8( imm8 ); } -emitterT void eSSE_SHUFPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ) +emitterT void eSSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ) { RexRB(0, to, from); write16(0xc60f); @@ -903,7 +900,7 @@ emitterT void eSSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from ) ModRM( 0, to, from); } -emitterT void eSSE2_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +emitterT void eSSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) { write8(0x66); RexRB(0, to, from); @@ -914,15 +911,7 @@ emitterT void eSSE2_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, emitterT void eSSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); } emitterT void eSSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { _SSERtoR66(0x7E0F); } -emitterT void eSSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from ) -{ - write8(0x66); - RexRB(0, from, to); - write16( 0x7e0f ); - ModRM( 0, from, to ); -} - -emitterT void eSSE2_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) +emitterT void eSSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) { write8(0x66); RexRB(0, from, to); diff --git a/pcsx2/x86/ix86/ix86_sse_helpers.h b/pcsx2/x86/ix86/ix86_sse_helpers.h index 9caa04c6a2..ccd7ef5373 100644 --- a/pcsx2/x86/ix86/ix86_sse_helpers.h +++ b/pcsx2/x86/ix86/ix86_sse_helpers.h @@ -47,16 +47,16 @@ static __forceinline void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegTyp else SSE_MOVAPS_XMM_to_XMM(to, from); } -static __forceinline void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) +static __forceinline void SSEX_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) { - if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset); - else SSE_MOVAPSRmtoROffset(to, from, offset); + if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoR(to, from, offset); + else SSE_MOVAPSRmtoR(to, from, offset); } -static __forceinline void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) +static __forceinline void SSEX_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) { - if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset); - else SSE_MOVAPSRtoRmOffset(to, from, offset); + if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRm(to, from, offset); + else SSE_MOVAPSRtoRm(to, from, offset); } static __forceinline void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from ) @@ -83,22 +83,16 @@ static __forceinline void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) else SSE_MOVSS_XMM_to_M32(to, from); } -static __forceinline void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from ) +static __forceinline void SSEX_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) { - if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from); - else SSE_MOVSS_XMM_to_Rm(to, from); + if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_Rm_to_XMM(to, from, offset); + else SSE_MOVSS_Rm_to_XMM(to, from, offset); } -static __forceinline void SSEX_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +static __forceinline void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) { - if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_RmOffset_to_XMM(to, from, offset); - else SSE_MOVSS_RmOffset_to_XMM(to, from, offset); -} - -static __forceinline void SSEX_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_RmOffset(to, from, offset); - else SSE_MOVSS_XMM_to_RmOffset(to, from, offset); + if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from, offset); + else SSE_MOVSS_XMM_to_Rm(to, from, offset); } static __forceinline void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from ) diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index e4fb71d84d..70c20eb803 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -29,6 +29,7 @@ // general types typedef int x86IntRegType; + #define EAX 0 #define EBX 3 #define ECX 1 @@ -149,3 +150,211 @@ struct CPUINFO{ extern CPUINFO cpuinfo; //------------------------------------------------------------------ + +static __forceinline bool is_s8( u32 imm ) { return (s8)imm == (s32)imm; } + +namespace x86Emitter +{ + class x86ModRm; + + ////////////////////////////////////////////////////////////////////////////////////////// + // + struct x86Register + { + static const x86Register Empty; // defined as an empty/unused value (-1) + + int Id; + + x86Register( const x86Register& src ) : Id( src.Id ) {} + x86Register() : Id( -1 ) {} + explicit x86Register( int regId ) : Id( regId ) { } + + bool IsEmpty() const { return Id == -1; } + + bool operator==( const x86Register& src ) const { return Id == src.Id; } + bool operator!=( const x86Register& src ) const { return Id != src.Id; } + + x86ModRm operator+( const x86Register& right ) const; + x86ModRm operator+( const x86ModRm& right ) const; + + x86Register& operator=( const x86Register& src ) + { + Id = src.Id; + return *this; + } + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // Similar to x86Register, but without the ability to add/combine them with ModSib. + // + class x86Register16 + { + public: + static const x86Register16 Empty; + + int Id; + + x86Register16( const x86Register16& src ) : Id( src.Id ) {} + x86Register16() : Id( -1 ) {} + explicit x86Register16( int regId ) : Id( regId ) { } + + bool IsEmpty() const { return Id == -1; } + + bool operator==( const x86Register16& src ) const { return Id == src.Id; } + bool operator!=( const x86Register16& src ) const { return Id != src.Id; } + + x86Register16& operator=( const x86Register16& src ) + { + Id = src.Id; + return *this; + } + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // Similar to x86Register, but without the ability to add/combine them with ModSib. + // + class x86Register8 + { + public: + static const x86Register8 Empty; + + int Id; + + x86Register8( const x86Register16& src ) : Id( src.Id ) {} + x86Register8() : Id( -1 ) {} + explicit x86Register8( int regId ) : Id( regId ) { } + + bool IsEmpty() const { return Id == -1; } + + bool operator==( const x86Register8& src ) const { return Id == src.Id; } + bool operator!=( const x86Register8& src ) const { return Id != src.Id; } + + x86Register8& operator=( const x86Register8& src ) + { + Id = src.Id; + return *this; + } + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // + class x86ModRm + { + public: + x86Register Base; // base register (no scale) + x86Register Index; // index reg gets multiplied by the scale + int Factor; // scale applied to the index register, in factor form (not a shift!) + s32 Displacement; // address displacement + + public: + x86ModRm( x86Register base, x86Register index, int factor=1, s32 displacement=0 ) : + Base( base ), + Index( index ), + Factor( factor ), + Displacement( displacement ) + { + } + + explicit x86ModRm( x86Register base, int displacement=0 ) : + Base( base ), + Index(), + Factor(0), + Displacement( displacement ) + { + } + + explicit x86ModRm( s32 displacement ) : + Base(), + Index(), + Factor(0), + Displacement( displacement ) + { + } + + static x86ModRm FromIndexReg( x86Register index, int scale=0, s32 displacement=0 ); + + public: + bool IsByteSizeDisp() const { return is_s8( Displacement ); } + x86Register GetEitherReg() const; + + x86ModRm& Add( s32 imm ) + { + Displacement += imm; + return *this; + } + + x86ModRm& Add( const x86Register& src ); + x86ModRm& Add( const x86ModRm& src ); + + x86ModRm operator+( const x86Register& right ) const { return x86ModRm( *this ).Add( right ); } + x86ModRm operator+( const x86ModRm& right ) const { return x86ModRm( *this ).Add( right ); } + x86ModRm operator+( const s32 imm ) const { return x86ModRm( *this ).Add( imm ); } + x86ModRm operator-( const s32 imm ) const { return x86ModRm( *this ).Add( -imm ); } + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // ModSib - Internal low-level representation of the ModRM/SIB information. + // + // This class serves two purposes: It houses 'reduced' ModRM/SIB info only, which means that + // the Base, Index, Scale, and Displacement values are all valid, and it serves as a type- + // safe layer between the x86Register's operators (which generate x86ModRm types) and the + // emitter's ModSib instruction forms. Without this, the x86Register would pass as a + // ModSib type implicitly, and that would cause ambiguity on a number of instructions. + // + class ModSib + { + public: + x86Register Base; // base register (no scale) + x86Register Index; // index reg gets multiplied by the scale + int Scale; // scale applied to the index register, in scale/shift form + s32 Displacement; // offset applied to the Base/Index registers. + + ModSib( const x86ModRm& src ); + ModSib( x86Register base, x86Register index, int scale=0, s32 displacement=0 ); + ModSib( s32 disp ); + + x86Register GetEitherReg() const; + bool IsByteSizeDisp() const { return is_s8( Displacement ); } + + protected: + void Reduce(); + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // x86IndexerType - This is a static class which provisions our ptr[] syntax. + // + struct x86IndexerType + { + ModSib operator[]( x86Register src ) const + { + return ModSib( src, x86Register::Empty ); + } + + ModSib operator[]( const x86ModRm& src ) const + { + return ModSib( src ); + } + + ModSib operator[]( uptr src ) const + { + return ModSib( src ); + } + + ModSib operator[]( void* src ) const + { + return ModSib( (uptr)src ); + } + }; + + // ------------------------------------------------------------------------ + extern const x86Register eax; + extern const x86Register ebx; + extern const x86Register ecx; + extern const x86Register edx; + extern const x86Register esi; + extern const x86Register edi; + extern const x86Register ebp; + extern const x86Register esp; + + extern const x86IndexerType ptr; +} \ No newline at end of file From 5f35577543b6da993b37ace29b42822485050678 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Tue, 7 Apr 2009 12:25:56 +0000 Subject: [PATCH 06/40] Linux compiles again. Added back in potentially obsolete code, since it's still called. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@918 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.h | 1 + pcsx2/x86/ix86/Makefile.am | 2 +- pcsx2/x86/ix86/ix86.cpp | 2 +- pcsx2/x86/ix86/ix86.inl | 35 +++++++++++++++++++++++++++++++++++ pcsx2/x86/ix86/ix86_types.h | 2 +- 5 files changed, 39 insertions(+), 3 deletions(-) diff --git a/pcsx2/Vif.h b/pcsx2/Vif.h index 4ad1cb233e..90c3a6f105 100644 --- a/pcsx2/Vif.h +++ b/pcsx2/Vif.h @@ -24,6 +24,7 @@ struct vifCycle { u8 pad[2]; }; +// r0-r3 and c0-c3 would be more managable as arrays. struct VIFregisters { u32 stat; u32 pad0[3]; diff --git a/pcsx2/x86/ix86/Makefile.am b/pcsx2/x86/ix86/Makefile.am index 7f76f134be..880d6f18e3 100644 --- a/pcsx2/x86/ix86/Makefile.am +++ b/pcsx2/x86/ix86/Makefile.am @@ -1,4 +1,4 @@ INCLUDES = -I@srcdir@/.. -I@srcdir@/../../ -I@srcdir@/../../../common/include -I@srcdir@/../../../3rdparty noinst_LIBRARIES = libix86.a -libix86_a_SOURCES = ix86.cpp ix86.inl ix86_3dnow.inl ix86.h ix86_fpu.inl ix86_mmx.inl ix86_sse.inl ix86_tools.cpp ix86_cpudetect.cpp ix86_macros.h \ No newline at end of file +libix86_a_SOURCES = ix86.cpp ix86.inl ix86_3dnow.inl ix86.h ix86_fpu.inl ix86_mmx.inl ix86_sse.inl ix86_tools.cpp ix86_cpudetect.cpp ix86_macros.h ix86_group1.inl \ No newline at end of file diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 35d90c8079..aec35d65bc 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -42,7 +42,7 @@ XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT }; namespace x86Emitter { - const x86IndexerType ptr; + x86IndexerType ptr; ////////////////////////////////////////////////////////////////////////////////////////// // diff --git a/pcsx2/x86/ix86/ix86.inl b/pcsx2/x86/ix86/ix86.inl index 301401ae4b..b94bab0e8d 100644 --- a/pcsx2/x86/ix86/ix86.inl +++ b/pcsx2/x86/ix86/ix86.inl @@ -38,6 +38,41 @@ // Note: the 'to' field can either be a register or a special opcode extension specifier // depending on the opcode's encoding. + +// I added this back in because it's called once from eMOV8ItoRm and eMOV16ItoRm. +emitterT void WriteRmOffset(x86IntRegType to, s32 offset) +{ + if ((to&7) == ESP) { + if( offset == 0 ) { + ModRM( 0, 0, 4 ); + SibSB( 0, ESP, 4 ); + } + else if( offset <= 127 && offset >= -128 ) { + ModRM( 1, 0, 4 ); + SibSB( 0, ESP, 4 ); + write8(offset); + } + else { + ModRM( 2, 0, 4 ); + SibSB( 0, ESP, 4 ); + write32(offset); + } + } + else { + if( offset == 0 ) { + ModRM( 0, 0, to ); + } + else if( offset <= 127 && offset >= -128 ) { + ModRM( 1, 0, to ); + write8(offset); + } + else { + ModRM( 2, 0, to ); + write32(offset); + } + } +} + emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset) { if ((from&7) == ESP) { diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index 70c20eb803..be0e0d3ec1 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -356,5 +356,5 @@ namespace x86Emitter extern const x86Register ebp; extern const x86Register esp; - extern const x86IndexerType ptr; + extern x86IndexerType ptr; } \ No newline at end of file From bf3d124e23bf9d81ded39fd108cc34be3279a14b Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Tue, 7 Apr 2009 16:54:02 +0000 Subject: [PATCH 07/40] Switched the emitter over to using Thread-Local storage (TLS), which removes all the templates and brings us back to a more traditional-looking, macro-free, and intellisense-friendly implementation. Plus it's a lot less prone to errors and will make debugging easier down the road. (next commit will rename the files back to .cpp and get them out of the header includes) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@919 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/PrecompiledHeader.h | 21 - pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 4 - pcsx2/x86/iMMI.cpp | 6 +- pcsx2/x86/iR3000A.cpp | 24 +- pcsx2/x86/iR3000Atables.cpp | 34 +- pcsx2/x86/iVUzerorec.cpp | 26 +- pcsx2/x86/ix86-32/iCore-32.cpp | 4 +- pcsx2/x86/ix86-32/iR5900-32.cpp | 22 +- pcsx2/x86/ix86-32/iR5900LoadStore.cpp | 8 +- pcsx2/x86/ix86/ix86.cpp | 7 +- pcsx2/x86/ix86/ix86.h | 73 +- pcsx2/x86/ix86/ix86.inl | 1924 ++++++++++---------- pcsx2/x86/ix86/ix86_3dnow.inl | 210 +-- pcsx2/x86/ix86/ix86_fpu.inl | 246 +-- pcsx2/x86/ix86/ix86_group1.inl | 111 +- pcsx2/x86/ix86/ix86_mmx.inl | 608 +++---- pcsx2/x86/ix86/ix86_sse.inl | 1284 ++++++------- 17 files changed, 2281 insertions(+), 2331 deletions(-) diff --git a/pcsx2/PrecompiledHeader.h b/pcsx2/PrecompiledHeader.h index 84c3e977d2..36195bb08d 100644 --- a/pcsx2/PrecompiledHeader.h +++ b/pcsx2/PrecompiledHeader.h @@ -155,24 +155,3 @@ static __forceinline u32 timeGetTime() # define __releaseinline __forceinline #endif -////////////////////////////////////////////////////////////////////////////////////////// -// Emitter Instance Identifiers. If you add a new emitter, do it here also. -// Note: Currently most of the instances map back to 0, since existing dynarec code all -// shares iCore and must therefore all share the same emitter instance. -// (note: these don't really belong here per-se, but it's an easy spot to use for now) -enum -{ - EmitterId_R5900 = 0, - EmitterId_R3000a = EmitterId_R5900, - EmitterId_VU0micro = EmitterId_R5900, - EmitterId_VU1micro = EmitterId_R5900, - - // Cotton's new microVU, which is iCore-free - EmitterId_microVU0, - EmitterId_microVU1, - - // Air's eventual IopRec, which will also be iCore-free - EmitterId_R3000air, - - EmitterId_Count // must always be last! -}; diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index 2618f59630..a77fc861b6 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -2986,10 +2986,6 @@ RelativePath="..\..\x86\ix86\ix86_group1.inl" > - - diff --git a/pcsx2/x86/iMMI.cpp b/pcsx2/x86/iMMI.cpp index a9edfba6c2..8dabe0b5c8 100644 --- a/pcsx2/x86/iMMI.cpp +++ b/pcsx2/x86/iMMI.cpp @@ -1956,14 +1956,14 @@ CPU_SSE_XMMCACHE_END // Both Macros are 16 bytes so we can use a shift instead of a Mul instruction #define QFSRVhelper0() { \ ajmp[0] = JMP32(0); \ - x86Ptr[0] += 11; \ + x86Ptr += 11; \ } #define QFSRVhelper(shift1, shift2) { \ SSE2_PSRLDQ_I8_to_XMM(EEREC_D, shift1); \ SSE2_PSLLDQ_I8_to_XMM(t0reg, shift2); \ ajmp[shift1] = JMP32(0); \ - x86Ptr[0] += 1; \ + x86Ptr += 1; \ } void recQFSRV() @@ -1983,7 +1983,7 @@ void recQFSRV() MOV32MtoR(EAX, (uptr)&cpuRegs.sa); SHL32ItoR(EAX, 4); // Multiply SA bytes by 16 bytes (the amount of bytes in QFSRVhelper() macros) AND32ItoR(EAX, 0xf0); // This can possibly be removed but keeping it incase theres garbage in SA (cottonvibes) - ADD32ItoR(EAX, (uptr)x86Ptr[0] + 7); // ADD32 = 5 bytes, JMPR = 2 bytes + ADD32ItoR(EAX, (uptr)x86Ptr + 7); // ADD32 = 5 bytes, JMPR = 2 bytes JMPR(EAX); // Jumps to a QFSRVhelper() case below (a total of 16 different cases) // Case 0: diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 23d0d45bf0..0309d6eced 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -171,7 +171,7 @@ static void iIopDumpBlock( int startpc, u8 * ptr ) #ifdef __LINUX__ // dump the asm f = fopen( "mydump1", "wb" ); - fwrite( ptr, 1, (uptr)x86Ptr[0] - (uptr)ptr, f ); + fwrite( ptr, 1, (uptr)x86Ptr - (uptr)ptr, f ); fclose( f ); sprintf( command, "objdump -D --target=binary --architecture=i386 -M intel mydump1 | cat %s - > tempdump", filename ); system( command ); @@ -772,7 +772,7 @@ void psxSetBranchReg(u32 reg) _psxFlushCall(FLUSH_EVERYTHING); iPsxBranchTest(0xffffffff, 1); - JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 5 )); + JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 )); } void psxSetBranchImm( u32 imm ) @@ -828,7 +828,7 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch) if( newpc != 0xffffffff ) { CMP32ItoM((uptr)&psxRegs.pc, newpc); - JNE32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 6 )); + JNE32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 6 )); } // Skip branch jump target here: @@ -864,7 +864,7 @@ void rpsxSYSCALL() ADD32ItoM((uptr)&psxRegs.cycle, psxScaleBlockCycles() ); SUB32ItoM((uptr)&psxCycleEE, psxScaleBlockCycles()*8 ); - JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 5 )); + JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 )); // jump target for skipping blockCycle updates x86SetJ8(j8Ptr[0]); @@ -884,7 +884,7 @@ void rpsxBREAK() j8Ptr[0] = JE8(0); ADD32ItoM((uptr)&psxRegs.cycle, psxScaleBlockCycles() ); SUB32ItoM((uptr)&psxCycleEE, psxScaleBlockCycles()*8 ); - JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 5 )); + JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 )); x86SetJ8(j8Ptr[0]); //if (!psxbranch) psxbranch = 2; @@ -1004,7 +1004,7 @@ void iopRecRecompile(u32 startpc) x86SetPtr( recPtr ); x86Align(16); - recPtr = x86Ptr[_EmitterId_]; + recPtr = x86Ptr; s_pCurBlock = PSX_GETBLOCK(startpc); @@ -1025,7 +1025,7 @@ void iopRecRecompile(u32 startpc) psxbranch = 0; - s_pCurBlock->SetFnptr( (uptr)x86Ptr[0] ); + s_pCurBlock->SetFnptr( (uptr)x86Ptr ); s_psxBlockCycles = 0; // reset recomp state variables @@ -1160,7 +1160,7 @@ StartRecomp: iPsxBranchTest(0xffffffff, 1); - JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 5 )); + JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 )); } else { if( psxbranch ) assert( !willbranch3 ); @@ -1180,12 +1180,12 @@ StartRecomp: } } - assert( x86Ptr[0] < recMem+RECMEM_SIZE ); + assert( x86Ptr < recMem+RECMEM_SIZE ); - assert(x86Ptr[_EmitterId_] - recPtr < 0x10000); - s_pCurBlockEx->x86size = x86Ptr[_EmitterId_] - recPtr; + assert(x86Ptr - recPtr < 0x10000); + s_pCurBlockEx->x86size = x86Ptr - recPtr; - recPtr = x86Ptr[0]; + recPtr = x86Ptr; assert( (g_psxHasConstReg&g_psxFlushedConstReg) == g_psxHasConstReg ); diff --git a/pcsx2/x86/iR3000Atables.cpp b/pcsx2/x86/iR3000Atables.cpp index 1b1f4486e6..fddd6f9690 100644 --- a/pcsx2/x86/iR3000Atables.cpp +++ b/pcsx2/x86/iR3000Atables.cpp @@ -1258,7 +1258,7 @@ void rpsxJALR() static void* s_pbranchjmp; static u32 s_do32 = 0; -#define JUMPVALID(pjmp) (( x86Ptr[0] - (u8*)pjmp ) <= 0x80) +#define JUMPVALID(pjmp) (( x86Ptr - (u8*)pjmp ) <= 0x80) void rpsxSetBranchEQ(int info, int process) { @@ -1305,7 +1305,7 @@ void rpsxBEQ_process(int info, int process) else { _psxFlushAllUnused(); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; s_do32 = 0; psxSaveBranchState(); @@ -1318,7 +1318,7 @@ void rpsxBEQ_process(int info, int process) x86SetJ8A( (u8*)s_pbranchjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); s_do32 = 1; psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); @@ -1369,7 +1369,7 @@ void rpsxBNE_process(int info, int process) } _psxFlushAllUnused(); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; s_do32 = 0; rpsxSetBranchEQ(info, process); @@ -1381,7 +1381,7 @@ void rpsxBNE_process(int info, int process) x86SetJ8A( (u8*)s_pbranchjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); s_do32 = 1; psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); @@ -1423,7 +1423,7 @@ void rpsxBLTZ() } CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; u8* pjmp = JL8(0); psxSaveBranchState(); @@ -1435,7 +1435,7 @@ void rpsxBLTZ() x86SetJ8A( pjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); psxLoadBranchState(); @@ -1470,7 +1470,7 @@ void rpsxBGEZ() } CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; u8* pjmp = JGE8(0); psxSaveBranchState(); @@ -1482,7 +1482,7 @@ void rpsxBGEZ() x86SetJ8A( pjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); psxLoadBranchState(); @@ -1524,7 +1524,7 @@ void rpsxBLTZAL() } CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; u8* pjmp = JL8(0); psxSaveBranchState(); @@ -1538,7 +1538,7 @@ void rpsxBLTZAL() x86SetJ8A( pjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); psxLoadBranchState(); @@ -1577,7 +1577,7 @@ void rpsxBGEZAL() } CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; u8* pjmp = JGE8(0); MOV32ItoM((uptr)&psxRegs.GPR.r[31], psxpc+4); @@ -1591,7 +1591,7 @@ void rpsxBGEZAL() x86SetJ8A( pjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); psxLoadBranchState(); @@ -1631,7 +1631,7 @@ void rpsxBLEZ() _clearNeededX86regs(); CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; u8* pjmp = JLE8(0); psxSaveBranchState(); @@ -1642,7 +1642,7 @@ void rpsxBLEZ() x86SetJ8A( pjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); psxLoadBranchState(); @@ -1679,7 +1679,7 @@ void rpsxBGTZ() _clearNeededX86regs(); CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; u8* pjmp = JG8(0); psxSaveBranchState(); @@ -1690,7 +1690,7 @@ void rpsxBGTZ() x86SetJ8A( pjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); psxLoadBranchState(); diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index 4dea960b87..80fbdc21ec 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -833,7 +833,7 @@ static VuFunctionHeader* SuperVURecompileProgram(u32 startpc, int vuindex) SuperVURecompile(); - s_recVUPtr = x86Ptr[0]; + s_recVUPtr = x86Ptr; // set the function's range VuFunctionHeader::RANGE r; @@ -1889,7 +1889,7 @@ void VuBaseBlock::AssignVFRegs() if( i == XMMREGS ) return; // nothing changed } - u8* oldX86 = x86Ptr[0]; + u8* oldX86 = x86Ptr; FORIT(itinst, insts) { @@ -2078,7 +2078,7 @@ void VuBaseBlock::AssignVFRegs() } } - assert( x86Ptr[0] == oldX86 ); + assert( x86Ptr == oldX86 ); u32 analyzechildren = !(type&BLOCKTYPE_ANALYZED); type |= BLOCKTYPE_ANALYZED; @@ -2466,7 +2466,7 @@ static void SuperVURecompile() AND32ItoM( (uptr)&VU->vifRegs->stat, ~0x4 ); MOV32ItoM((uptr)&VU->VI[REG_TPC], pchild->endpc); - JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr[0] + 5 )); + JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr + 5 )); } // only other case is when there are two branches else assert( (*itblock)->insts.back().regs[0].pipe == VUPIPE_BRANCH ); @@ -2606,11 +2606,11 @@ void SuperVUTestVU0Condition(u32 incstack) ADD32ItoR(ESP, incstack); //CALLFunc((u32)timeout); - JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr[0] + 5 )); + JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr + 5 )); x86SetJ8(ptr); } - else JAE32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr[0] + 6 ) ); + else JAE32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr + 6 ) ); } void VuBaseBlock::Recompile() @@ -2618,7 +2618,7 @@ void VuBaseBlock::Recompile() if( type & BLOCKTYPE_ANALYZED ) return; x86Align(16); - pcode = x86Ptr[0]; + pcode = x86Ptr; #ifdef _DEBUG MOV32ItoM((uptr)&s_vufnheader, s_pFnHeader->startpc); @@ -2726,7 +2726,7 @@ void VuBaseBlock::Recompile() AND32ItoM( (uptr)&VU0.VI[ REG_VPU_STAT ].UL, s_vu?~0x100:~0x001 ); // E flag AND32ItoM( (uptr)&VU->vifRegs->stat, ~0x4 ); if( !branch ) MOV32ItoM((uptr)&VU->VI[REG_TPC], endpc); - JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr[0] + 5 )); + JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr + 5 )); } else { @@ -2868,7 +2868,7 @@ void VuBaseBlock::Recompile() } } - pendcode = x86Ptr[0]; + pendcode = x86Ptr; type |= BLOCKTYPE_ANALYZED; LISTBLOCKS::iterator itchild; @@ -3569,7 +3569,7 @@ void recVUMI_BranchHandle() if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION) MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), bpc); MOV32ItoR(s_JumpX86, 0); - s_pCurBlock->pChildJumps[curjump] = (u32*)x86Ptr[0]-1; + s_pCurBlock->pChildJumps[curjump] = (u32*)x86Ptr-1; if( !(s_pCurInst->type & INST_BRANCH_DELAY) ) { j8Ptr[1] = JMP8(0); @@ -3578,7 +3578,7 @@ void recVUMI_BranchHandle() if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION ) MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), pc+8); MOV32ItoR(s_JumpX86, 0); - s_pCurBlock->pChildJumps[curjump+1] = (u32*)x86Ptr[0]-1; + s_pCurBlock->pChildJumps[curjump+1] = (u32*)x86Ptr-1; x86SetJ8( j8Ptr[ 1 ] ); } @@ -3815,7 +3815,7 @@ void recVUMI_B( VURegs* vuu, s32 info ) if( s_pCurBlock->blocks.size() > 1 ) { s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); MOV32ItoR(s_JumpX86, 0); - s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr[0]-1; + s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr-1; s_UnconditionalDelay = 1; } @@ -3841,7 +3841,7 @@ void recVUMI_BAL( VURegs* vuu, s32 info ) if( s_pCurBlock->blocks.size() > 1 ) { s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); MOV32ItoR(s_JumpX86, 0); - s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr[0]-1; + s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr-1; s_UnconditionalDelay = 1; } diff --git a/pcsx2/x86/ix86-32/iCore-32.cpp b/pcsx2/x86/ix86-32/iCore-32.cpp index d9a9e75664..05655f66d9 100644 --- a/pcsx2/x86/ix86-32/iCore-32.cpp +++ b/pcsx2/x86/ix86-32/iCore-32.cpp @@ -161,7 +161,7 @@ void _flushConstRegs() zero_cnt++; } - rewindPtr = x86Ptr[_EmitterId_]; + rewindPtr = x86Ptr; for (i = 1, j = 0; i < 32; j++ && ++i, j %= 2) { if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1< %s", filename ); @@ -579,8 +579,8 @@ void recResetEE( void ) // so a fix will have to wait until later. -_- (air) //x86SetPtr(recMem+REC_CACHEMEM); - //dyna_block_discard_recmem=(u8*)x86Ptr[0]; - //JMP32( (uptr)&dyna_block_discard - ( (u32)x86Ptr[0] + 5 )); + //dyna_block_discard_recmem=(u8*)x86Ptr; + //JMP32( (uptr)&dyna_block_discard - ( (u32)x86Ptr + 5 )); x86SetPtr(recMem); @@ -791,7 +791,7 @@ void recSYSCALL( void ) { CMP32ItoM((uptr)&cpuRegs.pc, pc); j8Ptr[0] = JE8(0); ADD32ItoM((uptr)&cpuRegs.cycle, eeScaleBlockCycles()); - JMP32((uptr)DispatcherReg - ( (uptr)x86Ptr[0] + 5 )); + JMP32((uptr)DispatcherReg - ( (uptr)x86Ptr + 5 )); x86SetJ8(j8Ptr[0]); //branch = 2; } @@ -1148,7 +1148,7 @@ static void iBranchTest(u32 newpc, bool noDispatch) if (!noDispatch) { if (newpc == 0xffffffff) - JS32((uptr)DispatcherReg - ( (uptr)x86Ptr[0] + 6 )); + JS32((uptr)DispatcherReg - ( (uptr)x86Ptr + 6 )); else iBranch(newpc, 1); } @@ -1375,7 +1375,7 @@ void recRecompile( const u32 startpc ) x86SetPtr( recPtr ); x86Align(16); - recPtr = x86Ptr[_EmitterId_]; + recPtr = x86Ptr; s_pCurBlock = PC_GETBLOCK(startpc); @@ -1714,7 +1714,7 @@ StartRecomp: { // was dyna_block_discard_recmem. See note in recResetEE for details. CMP32ItoM((uptr)PSM(lpc),*(u32*)PSM(lpc)); - JNE32(((u32)&dyna_block_discard)- ( (u32)x86Ptr[0] + 6 )); + JNE32(((u32)&dyna_block_discard)- ( (u32)x86Ptr + 6 )); stg-=4; lpc+=4; @@ -1800,14 +1800,14 @@ StartRecomp: } } - assert( x86Ptr[0] < recMem+REC_CACHEMEM ); + assert( x86Ptr < recMem+REC_CACHEMEM ); assert( recStackPtr < recStack+RECSTACK_SIZE ); assert( x86FpuState == 0 ); - assert(x86Ptr[_EmitterId_] - recPtr < 0x10000); - s_pCurBlockEx->x86size = x86Ptr[_EmitterId_] - recPtr; + assert(x86Ptr - recPtr < 0x10000); + s_pCurBlockEx->x86size = x86Ptr - recPtr; - recPtr = x86Ptr[0]; + recPtr = x86Ptr; assert( (g_cpuHasConstReg&g_cpuFlushedConstReg) == g_cpuHasConstReg ); diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp index 1c39766598..aaf93d1d47 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp @@ -1930,7 +1930,7 @@ void recLQC2( void ) dohw = recSetMemLocation(_Rs_, _Imm_, mmregs, 2, 0); if( _Ft_ ) { - u8* rawreadptr = x86Ptr[0]; + u8* rawreadptr = x86Ptr; if( mmreg >= 0 ) { SSEX_MOVDQARmtoROffset(mmreg, ECX, PS2MEM_BASE_+s_nAddMemOffset); @@ -1945,7 +1945,7 @@ void recLQC2( void ) // check if writing to VUs CMP32ItoR(ECX, 0x11000000); - JAE8(rawreadptr - (x86Ptr[0]+2)); + JAE8(rawreadptr - (x86Ptr+2)); PUSH32I( (int)&VU0.VF[_Ft_].UD[0] ); CALLFunc( (int)recMemRead128 ); @@ -1999,7 +1999,7 @@ void recSQC2( void ) mmregs = _eePrepareReg(_Rs_); dohw = recSetMemLocation(_Rs_, _Imm_, mmregs, 2, 0); - rawreadptr = x86Ptr[0]; + rawreadptr = x86Ptr; if( (mmreg = _checkXMMreg(XMMTYPE_VFREG, _Ft_, MODE_READ)) >= 0) { SSEX_MOVDQARtoRmOffset(ECX, mmreg, PS2MEM_BASE_+s_nAddMemOffset); @@ -2039,7 +2039,7 @@ void recSQC2( void ) // check if writing to VUs CMP32ItoR(ECX, 0x11000000); - JAE8(rawreadptr - (x86Ptr[0]+2)); + JAE8(rawreadptr - (x86Ptr+2)); // some type of hardware write if( (mmreg = _checkXMMreg(XMMTYPE_VFREG, _Ft_, MODE_READ)) >= 0) { diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index aec35d65bc..3a8e1d2830 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -29,10 +29,9 @@ #include "System.h" #include "ix86.h" -u8 *x86Ptr[EmitterId_Count]; - -u8 *j8Ptr[32]; -u32 *j32Ptr[32]; +__threadlocal u8 *x86Ptr; +__threadlocal u8 *j8Ptr[32]; +__threadlocal u32 *j32Ptr[32]; PCSX2_ALIGNED16(u32 p[4]); PCSX2_ALIGNED16(u32 p2[4]); diff --git a/pcsx2/x86/ix86/ix86.h b/pcsx2/x86/ix86/ix86.h index 34a10fbe59..556405dc09 100644 --- a/pcsx2/x86/ix86/ix86.h +++ b/pcsx2/x86/ix86/ix86.h @@ -32,7 +32,7 @@ //------------------------------------------------------------------ // Helper Macros //------------------------------------------------------------------ -#define emitterT template +#define emitterT static __forceinline #define MEMADDR(addr, oplen) (addr) @@ -46,57 +46,62 @@ // This helps catch programmer errors better than using an auto-truncated s8 parameter. #define assertOffset8(ofs) assert( ofs < 128 && ofs >= -128 ) +#ifdef _MSC_VER +#define __threadlocal __declspec(thread) +#else +#define __threadlocal __thread +#endif //------------------------------------------------------------------ // write functions //------------------------------------------------------------------ -extern u8 *x86Ptr[EmitterId_Count]; -extern u8 *j8Ptr[32]; -extern u32 *j32Ptr[32]; +extern __threadlocal u8 *x86Ptr; +extern __threadlocal u8 *j8Ptr[32]; +extern __threadlocal u32 *j32Ptr[32]; emitterT void write8( u8 val ) { - *x86Ptr[I] = (u8)val; - x86Ptr[I]++; + *x86Ptr = (u8)val; + x86Ptr++; } emitterT void write16( u16 val ) { - *(u16*)x86Ptr[I] = val; - x86Ptr[I] += 2; + *(u16*)x86Ptr = val; + x86Ptr += 2; } emitterT void write24( u32 val ) { - *x86Ptr[I]++ = (u8)(val & 0xff); - *x86Ptr[I]++ = (u8)((val >> 8) & 0xff); - *x86Ptr[I]++ = (u8)((val >> 16) & 0xff); + *x86Ptr++ = (u8)(val & 0xff); + *x86Ptr++ = (u8)((val >> 8) & 0xff); + *x86Ptr++ = (u8)((val >> 16) & 0xff); } emitterT void write32( u32 val ) { - *(u32*)x86Ptr[I] = val; - x86Ptr[I] += 4; + *(u32*)x86Ptr = val; + x86Ptr += 4; } emitterT void write64( u64 val ){ - *(u64*)x86Ptr[I] = val; - x86Ptr[I] += 8; + *(u64*)x86Ptr = val; + x86Ptr += 8; } //------------------------------------------------------------------ //------------------------------------------------------------------ // jump/align functions //------------------------------------------------------------------ -emitterT void ex86SetPtr( u8 *ptr ); -emitterT void ex86SetJ8( u8 *j8 ); -emitterT void ex86SetJ8A( u8 *j8 ); -emitterT void ex86SetJ16( u16 *j16 ); -emitterT void ex86SetJ16A( u16 *j16 ); -emitterT void ex86SetJ32( u32 *j32 ); -emitterT void ex86SetJ32A( u32 *j32 ); -emitterT void ex86Align( int bytes ); -emitterT void ex86AlignExecutable( int align ); +emitterT void x86SetPtr( u8 *ptr ); +emitterT void x86SetJ8( u8 *j8 ); +emitterT void x86SetJ8A( u8 *j8 ); +emitterT void x86SetJ16( u16 *j16 ); +emitterT void x86SetJ16A( u16 *j16 ); +emitterT void x86SetJ32( u32 *j32 ); +emitterT void x86SetJ32A( u32 *j32 ); +emitterT void x86Align( int bytes ); +emitterT void x86AlignExecutable( int align ); //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -113,23 +118,21 @@ emitterT u32* J32Rel( int cc, u32 to ); emitterT u64 GetCPUTick( void ); //------------------------------------------------------------------ -emitterT void eMOV32RtoR( x86IntRegType to, x86IntRegType from ); -emitterT u32* eJMP32( uptr to ); -emitterT u8* eJMP8( u8 to ); -emitterT void eCALL32( u32 to ); -emitterT void eLEA32RtoR(x86IntRegType to, x86IntRegType from, u32 offset); -emitterT void eLEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale); -emitterT void eNOP( void ); -emitterT void eAND32ItoM( uptr to, u32 from ); -emitterT void eLEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1); -emitterT void eLEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale); +emitterT void MOV32RtoR( x86IntRegType to, x86IntRegType from ); +emitterT u32* JMP32( uptr to ); +emitterT u8* JMP8( u8 to ); +emitterT void CALL32( u32 to ); +emitterT void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale); +emitterT void NOP( void ); +emitterT void AND32ItoM( uptr to, u32 from ); +emitterT void LEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1); +emitterT void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale); #define MMXONLY(code) code #define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) -#include "ix86_macros.h" #include "ix86.inl" #include "ix86_3dnow.inl" #include "ix86_fpu.inl" diff --git a/pcsx2/x86/ix86/ix86.inl b/pcsx2/x86/ix86/ix86.inl index b94bab0e8d..b29427da43 100644 --- a/pcsx2/x86/ix86/ix86.inl +++ b/pcsx2/x86/ix86/ix86.inl @@ -39,141 +39,107 @@ // Note: the 'to' field can either be a register or a special opcode extension specifier // depending on the opcode's encoding. -// I added this back in because it's called once from eMOV8ItoRm and eMOV16ItoRm. -emitterT void WriteRmOffset(x86IntRegType to, s32 offset) -{ - if ((to&7) == ESP) { - if( offset == 0 ) { - ModRM( 0, 0, 4 ); - SibSB( 0, ESP, 4 ); - } - else if( offset <= 127 && offset >= -128 ) { - ModRM( 1, 0, 4 ); - SibSB( 0, ESP, 4 ); - write8(offset); - } - else { - ModRM( 2, 0, 4 ); - SibSB( 0, ESP, 4 ); - write32(offset); - } - } - else { - if( offset == 0 ) { - ModRM( 0, 0, to ); - } - else if( offset <= 127 && offset >= -128 ) { - ModRM( 1, 0, to ); - write8(offset); - } - else { - ModRM( 2, 0, to ); - write32(offset); - } - } -} - emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset) { if ((from&7) == ESP) { if( offset == 0 ) { - ModRM( 0, to, 0x4 ); - SibSB( 0, 0x4, 0x4 ); + ModRM( 0, to, 0x4 ); + SibSB( 0, 0x4, 0x4 ); } else if( offset <= 127 && offset >= -128 ) { - ModRM( 1, to, 0x4 ); - SibSB( 0, 0x4, 0x4 ); - write8(offset); + ModRM( 1, to, 0x4 ); + SibSB( 0, 0x4, 0x4 ); + write8(offset); } else { - ModRM( 2, to, 0x4 ); - SibSB( 0, 0x4, 0x4 ); - write32(offset); + ModRM( 2, to, 0x4 ); + SibSB( 0, 0x4, 0x4 ); + write32(offset); } } else { if( offset == 0 ) { - ModRM( 0, to, from ); + ModRM( 0, to, from ); } else if( offset <= 127 && offset >= -128 ) { - ModRM( 1, to, from ); - write8(offset); + ModRM( 1, to, from ); + write8(offset); } else { - ModRM( 2, to, from ); - write32(offset); + ModRM( 2, to, from ); + write32(offset); } } } emitterT void ModRM( s32 mod, s32 reg, s32 rm ) { - write8( ( mod << 6 ) | ( (reg & 7) << 3 ) | ( rm & 7 ) ); + write8( ( mod << 6 ) | ( (reg & 7) << 3 ) | ( rm & 7 ) ); } emitterT void SibSB( s32 ss, s32 index, s32 base ) { - write8( ( ss << 6 ) | ( (index & 7) << 3 ) | ( base & 7 ) ); + write8( ( ss << 6 ) | ( (index & 7) << 3 ) | ( base & 7 ) ); } emitterT void SET8R( int cc, int to ) { RexB(0, to); - write8( 0x0F ); - write8( cc ); - write8( 0xC0 | ( to ) ); + write8( 0x0F ); + write8( cc ); + write8( 0xC0 | ( to ) ); } emitterT u8* J8Rel( int cc, int to ) { - write8( cc ); - write8( to ); - return (u8*)(x86Ptr[I] - 1); + write8( cc ); + write8( to ); + return (u8*)(x86Ptr - 1); } emitterT u16* J16Rel( int cc, u32 to ) { - write16( 0x0F66 ); - write8( cc ); - write16( to ); - return (u16*)( x86Ptr[I] - 2 ); + write16( 0x0F66 ); + write8( cc ); + write16( to ); + return (u16*)( x86Ptr - 2 ); } emitterT u32* J32Rel( int cc, u32 to ) { - write8( 0x0F ); - write8( cc ); - write32( to ); - return (u32*)( x86Ptr[I] - 4 ); + write8( 0x0F ); + write8( cc ); + write32( to ); + return (u32*)( x86Ptr - 4 ); } emitterT void CMOV32RtoR( int cc, int to, int from ) { RexRB(0, to, from); - write8( 0x0F ); - write8( cc ); - ModRM( 3, to, from ); + write8( 0x0F ); + write8( cc ); + ModRM( 3, to, from ); } emitterT void CMOV32MtoR( int cc, int to, uptr from ) { RexR(0, to); - write8( 0x0F ); - write8( cc ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x0F ); + write8( cc ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } //////////////////////////////////////////////////// -emitterT void ex86SetPtr( u8* ptr ) +emitterT void x86SetPtr( u8* ptr ) { - x86Ptr[I] = ptr; + x86Ptr = ptr; } //////////////////////////////////////////////////// -emitterT void ex86SetJ8( u8* j8 ) +emitterT void x86SetJ8( u8* j8 ) { - u32 jump = ( x86Ptr[I] - j8 ) - 1; + u32 jump = ( x86Ptr - j8 ) - 1; if ( jump > 0x7f ) { Console::Error( "j8 greater than 0x7f!!" ); @@ -182,31 +148,31 @@ emitterT void ex86SetJ8( u8* j8 ) *j8 = (u8)jump; } -emitterT void ex86SetJ8A( u8* j8 ) +emitterT void x86SetJ8A( u8* j8 ) { - u32 jump = ( x86Ptr[I] - j8 ) - 1; + u32 jump = ( x86Ptr - j8 ) - 1; if ( jump > 0x7f ) { Console::Error( "j8 greater than 0x7f!!" ); assert(0); } - if( ((uptr)x86Ptr[I]&0xf) > 4 ) { + if( ((uptr)x86Ptr&0xf) > 4 ) { - uptr newjump = jump + 16-((uptr)x86Ptr[I]&0xf); + uptr newjump = jump + 16-((uptr)x86Ptr&0xf); if( newjump <= 0x7f ) { jump = newjump; - while((uptr)x86Ptr[I]&0xf) *x86Ptr[I]++ = 0x90; + while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; } } *j8 = (u8)jump; } -emitterT void ex86SetJ16( u16 *j16 ) +emitterT void x86SetJ16( u16 *j16 ) { // doesn't work - u32 jump = ( x86Ptr[I] - (u8*)j16 ) - 2; + u32 jump = ( x86Ptr - (u8*)j16 ) - 2; if ( jump > 0x7fff ) { Console::Error( "j16 greater than 0x7fff!!" ); @@ -215,84 +181,84 @@ emitterT void ex86SetJ16( u16 *j16 ) *j16 = (u16)jump; } -emitterT void ex86SetJ16A( u16 *j16 ) +emitterT void x86SetJ16A( u16 *j16 ) { - if( ((uptr)x86Ptr[I]&0xf) > 4 ) { - while((uptr)x86Ptr[I]&0xf) *x86Ptr[I]++ = 0x90; + if( ((uptr)x86Ptr&0xf) > 4 ) { + while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; } - ex86SetJ16(j16); + x86SetJ16(j16); } //////////////////////////////////////////////////// -emitterT void ex86SetJ32( u32* j32 ) +emitterT void x86SetJ32( u32* j32 ) { - *j32 = ( x86Ptr[I] - (u8*)j32 ) - 4; + *j32 = ( x86Ptr - (u8*)j32 ) - 4; } -emitterT void ex86SetJ32A( u32* j32 ) +emitterT void x86SetJ32A( u32* j32 ) { - while((uptr)x86Ptr[I]&0xf) *x86Ptr[I]++ = 0x90; - ex86SetJ32(j32); + while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; + x86SetJ32(j32); } //////////////////////////////////////////////////// -emitterT void ex86Align( int bytes ) +emitterT void x86Align( int bytes ) { // forward align - x86Ptr[I] = (u8*)( ( (uptr)x86Ptr[I] + bytes - 1) & ~( bytes - 1 ) ); + x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~( bytes - 1 ) ); } //////////////////////////////////////////////////// // Generates executable code to align to the given alignment (could be useful for the second leg // of if/else conditionals, which usually fall through a jump target label). -emitterT void ex86AlignExecutable( int align ) +emitterT void x86AlignExecutable( int align ) { - uptr newx86 = ( (uptr)x86Ptr[I] + align - 1) & ~( align - 1 ); - uptr bytes = ( newx86 - (uptr)x86Ptr[I] ); + uptr newx86 = ( (uptr)x86Ptr + align - 1) & ~( align - 1 ); + uptr bytes = ( newx86 - (uptr)x86Ptr ); switch( bytes ) { case 0: break; - case 1: eNOP(); break; - case 2: eMOV32RtoR( ESI, ESI ); break; - case 3: write8(0x08D); write8(0x024); write8(0x024); break; - case 5: eNOP(); // falls through to 4... - case 4: write8(0x08D); write8(0x064); write8(0x024); write8(0); break; - case 6: write8(0x08D); write8(0x0B6); write32(0); break; - case 8: eNOP(); // falls through to 7... - case 7: write8(0x08D); write8(0x034); write8(0x035); write32(0); break; + case 1: NOP(); break; + case 2: MOV32RtoR( ESI, ESI ); break; + case 3: write8(0x08D); write8(0x024); write8(0x024); break; + case 5: NOP(); // falls through to 4... + case 4: write8(0x08D); write8(0x064); write8(0x024); write8(0); break; + case 6: write8(0x08D); write8(0x0B6); write32(0); break; + case 8: NOP(); // falls through to 7... + case 7: write8(0x08D); write8(0x034); write8(0x035); write32(0); break; default: { // for larger alignments, just use a JMP... - u8* aligned_target = eJMP8(0); - x86Ptr[I] = (u8*)newx86; - ex86SetJ8( aligned_target ); + u8* aligned_target = JMP8(0); + x86Ptr = (u8*)newx86; + x86SetJ8( aligned_target ); } } - jASSUME( x86Ptr[0] == (u8*)newx86 ); + jASSUME( x86Ptr == (u8*)newx86 ); } /********************/ /* IX86 intructions */ /********************/ -emitterT void eSTC( void ) +emitterT void STC( void ) { - write8( 0xF9 ); + write8( 0xF9 ); } -emitterT void eCLC( void ) +emitterT void CLC( void ) { - write8( 0xF8 ); + write8( 0xF8 ); } // NOP 1-byte -emitterT void eNOP( void ) +emitterT void NOP( void ) { - write8(0x90); + write8(0x90); } @@ -301,555 +267,555 @@ emitterT void eNOP( void ) //////////////////////////////////// /* mov r32 to r32 */ -emitterT void eMOV32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void MOV32RtoR( x86IntRegType to, x86IntRegType from ) { RexRB(0, from, to); - write8( 0x89 ); - ModRM( 3, from, to ); + write8( 0x89 ); + ModRM( 3, from, to ); } /* mov r32 to m32 */ -emitterT void eMOV32RtoM( uptr to, x86IntRegType from ) +emitterT void MOV32RtoM( uptr to, x86IntRegType from ) { RexR(0, from); if (from == EAX) { - write8(0xA3); + write8(0xA3); } else { - write8( 0x89 ); - ModRM( 0, from, DISP32 ); + write8( 0x89 ); + ModRM( 0, from, DISP32 ); } - write32( MEMADDR(to, 4) ); + write32( MEMADDR(to, 4) ); } /* mov m32 to r32 */ -emitterT void eMOV32MtoR( x86IntRegType to, uptr from ) +emitterT void MOV32MtoR( x86IntRegType to, uptr from ) { RexR(0, to); if (to == EAX) { - write8(0xA1); + write8(0xA1); } else { - write8( 0x8B ); - ModRM( 0, to, DISP32 ); + write8( 0x8B ); + ModRM( 0, to, DISP32 ); } - write32( MEMADDR(from, 4) ); + write32( MEMADDR(from, 4) ); } -emitterT void eMOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) +emitterT void MOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); - write8( 0x8B ); - WriteRmOffsetFrom(to, from, offset); + write8( 0x8B ); + WriteRmOffsetFrom(to, from, offset); } /* mov [r32+r32*scale] to r32 */ -emitterT void eMOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void MOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { RexRXB(0,to,from2,from); - write8( 0x8B ); - ModRM( 0, to, 0x4 ); - SibSB(scale, from2, from ); + write8( 0x8B ); + ModRM( 0, to, 0x4 ); + SibSB(scale, from2, from ); } // mov r32 to [r32<( 0x8B ); - ModRM( 0, to, 0x4 ); - ModRM( scale, from1, 5); - write32(from2); + write8( 0x8B ); + ModRM( 0, to, 0x4 ); + ModRM( scale, from1, 5); + write32(from2); } /* mov r32 to [r32][r32*scale] */ -emitterT void eMOV32RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void MOV32RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { RexRXB(0, to, from2, from); - write8( 0x89 ); - ModRM( 0, to, 0x4 ); - SibSB(scale, from2, from ); + write8( 0x89 ); + ModRM( 0, to, 0x4 ); + SibSB(scale, from2, from ); } /* mov imm32 to r32 */ -emitterT void eMOV32ItoR( x86IntRegType to, u32 from ) +emitterT void MOV32ItoR( x86IntRegType to, u32 from ) { RexB(0, to); - write8( 0xB8 | (to & 0x7) ); - write32( from ); + write8( 0xB8 | (to & 0x7) ); + write32( from ); } /* mov imm32 to m32 */ -emitterT void eMOV32ItoM(uptr to, u32 from ) +emitterT void MOV32ItoM(uptr to, u32 from ) { - write8( 0xC7 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); + write8( 0xC7 ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 8) ); + write32( from ); } // mov imm32 to [r32+off] -emitterT void eMOV32ItoRm( x86IntRegType to, u32 from, int offset=0) +emitterT void MOV32ItoRm( x86IntRegType to, u32 from, int offset=0) { RexB(0,to); - write8( 0xC7 ); - WriteRmOffsetFrom(0, to, offset); - write32(from); + write8( 0xC7 ); + WriteRmOffsetFrom(0, to, offset); + write32(from); } // mov r32 to [r32+off] -emitterT void eMOV32RtoRm( x86IntRegType to, x86IntRegType from, int offset=0) +emitterT void MOV32RtoRm( x86IntRegType to, x86IntRegType from, int offset=0) { RexRB(0,from,to); - write8( 0x89 ); - WriteRmOffsetFrom(from, to, offset); + write8( 0x89 ); + WriteRmOffsetFrom(from, to, offset); } /* mov r16 to m16 */ -emitterT void eMOV16RtoM(uptr to, x86IntRegType from ) +emitterT void MOV16RtoM(uptr to, x86IntRegType from ) { - write8( 0x66 ); + write8( 0x66 ); RexR(0,from); - write8( 0x89 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0x89 ); + ModRM( 0, from, DISP32 ); + write32( MEMADDR(to, 4) ); } /* mov m16 to r16 */ -emitterT void eMOV16MtoR( x86IntRegType to, uptr from ) +emitterT void MOV16MtoR( x86IntRegType to, uptr from ) { - write8( 0x66 ); + write8( 0x66 ); RexR(0,to); - write8( 0x8B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x8B ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void eMOV16RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) +emitterT void MOV16RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) { - write8( 0x66 ); + write8( 0x66 ); RexRB(0,to,from); - write8( 0x8B ); - WriteRmOffsetFrom(to, from, offset); + write8( 0x8B ); + WriteRmOffsetFrom(to, from, offset); } -emitterT void eMOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale=0 ) +emitterT void MOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale=0 ) { - write8(0x66); + write8(0x66); RexRXB(0,to,from1,0); - write8( 0x8B ); - ModRM( 0, to, SIB ); - SibSB( scale, from1, SIBDISP); - write32(from2); + write8( 0x8B ); + ModRM( 0, to, SIB ); + SibSB( scale, from1, SIBDISP); + write32(from2); } /* mov imm16 to m16 */ -emitterT void eMOV16ItoM( uptr to, u16 from ) +emitterT void MOV16ItoM( uptr to, u16 from ) { - write8( 0x66 ); - write8( 0xC7 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); + write8( 0x66 ); + write8( 0xC7 ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 6) ); + write16( from ); } /* mov r16 to [r32][r32*scale] */ -emitterT void eMOV16RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void MOV16RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { - write8( 0x66 ); + write8( 0x66 ); RexRXB(0,to,from2,from); - write8( 0x89 ); - ModRM( 0, to, 0x4 ); - SibSB(scale, from2, from ); + write8( 0x89 ); + ModRM( 0, to, 0x4 ); + SibSB(scale, from2, from ); } -emitterT void eMOV16ItoR( x86IntRegType to, u16 from ) +emitterT void MOV16ItoR( x86IntRegType to, u16 from ) { RexB(0, to); - write16( 0xB866 | ((to & 0x7)<<8) ); - write16( from ); + write16( 0xB866 | ((to & 0x7)<<8) ); + write16( from ); } // mov imm16 to [r16+off] -emitterT void eMOV16ItoRm( x86IntRegType to, u16 from, u32 offset=0 ) +emitterT void MOV16ItoRm( x86IntRegType to, u16 from, u32 offset=0 ) { - write8(0x66); + write8(0x66); RexB(0,to); - write8( 0xC7 ); - WriteRmOffset(to, offset); - write16(from); + write8( 0xC7 ); + WriteRmOffsetFrom(0, to, offset); + write16(from); } // mov r16 to [r16+off] -emitterT void eMOV16RtoRm( x86IntRegType to, x86IntRegType from, int offset=0 ) +emitterT void MOV16RtoRm( x86IntRegType to, x86IntRegType from, int offset=0 ) { - write8(0x66); + write8(0x66); RexRB(0,from,to); - write8( 0x89 ); - WriteRmOffsetFrom(from, to, offset); + write8( 0x89 ); + WriteRmOffsetFrom(from, to, offset); } /* mov r8 to m8 */ -emitterT void eMOV8RtoM( uptr to, x86IntRegType from ) +emitterT void MOV8RtoM( uptr to, x86IntRegType from ) { RexR(0,from); - write8( 0x88 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0x88 ); + ModRM( 0, from, DISP32 ); + write32( MEMADDR(to, 4) ); } /* mov m8 to r8 */ -emitterT void eMOV8MtoR( x86IntRegType to, uptr from ) +emitterT void MOV8MtoR( x86IntRegType to, uptr from ) { RexR(0,to); - write8( 0x8A ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x8A ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void eMOV8RmtoR(x86IntRegType to, x86IntRegType from, int offset=0) +emitterT void MOV8RmtoR(x86IntRegType to, x86IntRegType from, int offset=0) { RexRB(0,to,from); - write8( 0x8A ); - WriteRmOffsetFrom(to, from, offset); + write8( 0x8A ); + WriteRmOffsetFrom(to, from, offset); } -emitterT void eMOV8RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale=0 ) +emitterT void MOV8RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale=0 ) { RexRXB(0,to,from1,0); - write8( 0x8A ); - ModRM( 0, to, SIB ); - SibSB( scale, from1, SIBDISP); - write32(from2); + write8( 0x8A ); + ModRM( 0, to, SIB ); + SibSB( scale, from1, SIBDISP); + write32(from2); } /* mov imm8 to m8 */ -emitterT void eMOV8ItoM( uptr to, u8 from ) +emitterT void MOV8ItoM( uptr to, u8 from ) { - write8( 0xC6 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); + write8( 0xC6 ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 5) ); + write8( from ); } // mov imm8 to r8 -emitterT void eMOV8ItoR( x86IntRegType to, u8 from ) +emitterT void MOV8ItoR( x86IntRegType to, u8 from ) { RexB(0, to); - write8( 0xB0 | (to & 0x7) ); - write8( from ); + write8( 0xB0 | (to & 0x7) ); + write8( from ); } // mov imm8 to [r8+off] -emitterT void eMOV8ItoRm( x86IntRegType to, u8 from, int offset=0) +emitterT void MOV8ItoRm( x86IntRegType to, u8 from, int offset=0) { assert( to != ESP ); RexB(0,to); - write8( 0xC6 ); - WriteRmOffset(to,offset); - write8(from); + write8( 0xC6 ); + WriteRmOffsetFrom(0, to,offset); + write8(from); } // mov r8 to [r8+off] -emitterT void eMOV8RtoRm( x86IntRegType to, x86IntRegType from, int offset=0) +emitterT void MOV8RtoRm( x86IntRegType to, x86IntRegType from, int offset=0) { assert( to != ESP ); RexRB(0,from,to); - write8( 0x88 ); - WriteRmOffsetFrom(from,to,offset); + write8( 0x88 ); + WriteRmOffsetFrom(from,to,offset); } /* movsx r8 to r32 */ -emitterT void eMOVSX32R8toR( x86IntRegType to, x86IntRegType from ) +emitterT void MOVSX32R8toR( x86IntRegType to, x86IntRegType from ) { RexRB(0,to,from); - write16( 0xBE0F ); - ModRM( 3, to, from ); + write16( 0xBE0F ); + ModRM( 3, to, from ); } -emitterT void eMOVSX32Rm8toR( x86IntRegType to, x86IntRegType from ) +emitterT void MOVSX32Rm8toR( x86IntRegType to, x86IntRegType from ) { RexRB(0,to,from); - write16( 0xBE0F ); - ModRM( 0, to, from ); + write16( 0xBE0F ); + ModRM( 0, to, from ); } -emitterT void eMOVSX32Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset ) +emitterT void MOVSX32Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset ) { RexRB(0,to,from); - write16( 0xBE0F ); - WriteRmOffsetFrom(to,from,offset); + write16( 0xBE0F ); + WriteRmOffsetFrom(to,from,offset); } /* movsx m8 to r32 */ -emitterT void eMOVSX32M8toR( x86IntRegType to, u32 from ) +emitterT void MOVSX32M8toR( x86IntRegType to, u32 from ) { RexR(0,to); - write16( 0xBE0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xBE0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* movsx r16 to r32 */ -emitterT void eMOVSX32R16toR( x86IntRegType to, x86IntRegType from ) +emitterT void MOVSX32R16toR( x86IntRegType to, x86IntRegType from ) { RexRB(0,to,from); - write16( 0xBF0F ); - ModRM( 3, to, from ); + write16( 0xBF0F ); + ModRM( 3, to, from ); } -emitterT void eMOVSX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset=0 ) +emitterT void MOVSX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset=0 ) { RexRB(0,to,from); - write16( 0xBF0F ); - WriteRmOffsetFrom(to,from,offset); + write16( 0xBF0F ); + WriteRmOffsetFrom(to,from,offset); } /* movsx m16 to r32 */ -emitterT void eMOVSX32M16toR( x86IntRegType to, u32 from ) +emitterT void MOVSX32M16toR( x86IntRegType to, u32 from ) { RexR(0,to); - write16( 0xBF0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xBF0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* movzx r8 to r32 */ -emitterT void eMOVZX32R8toR( x86IntRegType to, x86IntRegType from ) +emitterT void MOVZX32R8toR( x86IntRegType to, x86IntRegType from ) { RexRB(0,to,from); - write16( 0xB60F ); - ModRM( 3, to, from ); + write16( 0xB60F ); + ModRM( 3, to, from ); } -emitterT void eMOVZX32Rm8toR( x86IntRegType to, x86IntRegType from, int offset=0 ) +emitterT void MOVZX32Rm8toR( x86IntRegType to, x86IntRegType from, int offset=0 ) { RexRB(0,to,from); - write16( 0xB60F ); - WriteRmOffsetFrom(to,from,offset); + write16( 0xB60F ); + WriteRmOffsetFrom(to,from,offset); } /* movzx m8 to r32 */ -emitterT void eMOVZX32M8toR( x86IntRegType to, u32 from ) +emitterT void MOVZX32M8toR( x86IntRegType to, u32 from ) { RexR(0,to); - write16( 0xB60F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xB60F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* movzx r16 to r32 */ -emitterT void eMOVZX32R16toR( x86IntRegType to, x86IntRegType from ) +emitterT void MOVZX32R16toR( x86IntRegType to, x86IntRegType from ) { RexRB(0,to,from); - write16( 0xB70F ); - ModRM( 3, to, from ); + write16( 0xB70F ); + ModRM( 3, to, from ); } -emitterT void eMOVZX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset=0 ) +emitterT void MOVZX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset=0 ) { RexRB(0,to,from); - write16( 0xB70F ); - WriteRmOffsetFrom(to,from,offset); + write16( 0xB70F ); + WriteRmOffsetFrom(to,from,offset); } /* movzx m16 to r32 */ -emitterT void eMOVZX32M16toR( x86IntRegType to, u32 from ) +emitterT void MOVZX32M16toR( x86IntRegType to, u32 from ) { RexR(0,to); - write16( 0xB70F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xB70F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* cmovbe r32 to r32 */ -emitterT void eCMOVBE32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVBE32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x46, to, from ); + CMOV32RtoR( 0x46, to, from ); } /* cmovbe m32 to r32*/ -emitterT void eCMOVBE32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVBE32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x46, to, from ); + CMOV32MtoR( 0x46, to, from ); } /* cmovb r32 to r32 */ -emitterT void eCMOVB32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVB32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x42, to, from ); + CMOV32RtoR( 0x42, to, from ); } /* cmovb m32 to r32*/ -emitterT void eCMOVB32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVB32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x42, to, from ); + CMOV32MtoR( 0x42, to, from ); } /* cmovae r32 to r32 */ -emitterT void eCMOVAE32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVAE32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x43, to, from ); + CMOV32RtoR( 0x43, to, from ); } /* cmovae m32 to r32*/ -emitterT void eCMOVAE32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVAE32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x43, to, from ); + CMOV32MtoR( 0x43, to, from ); } /* cmova r32 to r32 */ -emitterT void eCMOVA32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVA32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x47, to, from ); + CMOV32RtoR( 0x47, to, from ); } /* cmova m32 to r32*/ -emitterT void eCMOVA32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVA32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x47, to, from ); + CMOV32MtoR( 0x47, to, from ); } /* cmovo r32 to r32 */ -emitterT void eCMOVO32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVO32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x40, to, from ); + CMOV32RtoR( 0x40, to, from ); } /* cmovo m32 to r32 */ -emitterT void eCMOVO32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVO32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x40, to, from ); + CMOV32MtoR( 0x40, to, from ); } /* cmovp r32 to r32 */ -emitterT void eCMOVP32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVP32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x4A, to, from ); + CMOV32RtoR( 0x4A, to, from ); } /* cmovp m32 to r32 */ -emitterT void eCMOVP32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVP32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x4A, to, from ); + CMOV32MtoR( 0x4A, to, from ); } /* cmovs r32 to r32 */ -emitterT void eCMOVS32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVS32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x48, to, from ); + CMOV32RtoR( 0x48, to, from ); } /* cmovs m32 to r32 */ -emitterT void eCMOVS32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVS32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x48, to, from ); + CMOV32MtoR( 0x48, to, from ); } /* cmovno r32 to r32 */ -emitterT void eCMOVNO32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVNO32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x41, to, from ); + CMOV32RtoR( 0x41, to, from ); } /* cmovno m32 to r32 */ -emitterT void eCMOVNO32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVNO32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x41, to, from ); + CMOV32MtoR( 0x41, to, from ); } /* cmovnp r32 to r32 */ -emitterT void eCMOVNP32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVNP32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x4B, to, from ); + CMOV32RtoR( 0x4B, to, from ); } /* cmovnp m32 to r32 */ -emitterT void eCMOVNP32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVNP32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x4B, to, from ); + CMOV32MtoR( 0x4B, to, from ); } /* cmovns r32 to r32 */ -emitterT void eCMOVNS32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVNS32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x49, to, from ); + CMOV32RtoR( 0x49, to, from ); } /* cmovns m32 to r32 */ -emitterT void eCMOVNS32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVNS32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x49, to, from ); + CMOV32MtoR( 0x49, to, from ); } /* cmovne r32 to r32 */ -emitterT void eCMOVNE32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVNE32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x45, to, from ); + CMOV32RtoR( 0x45, to, from ); } /* cmovne m32 to r32*/ -emitterT void eCMOVNE32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVNE32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x45, to, from ); + CMOV32MtoR( 0x45, to, from ); } /* cmove r32 to r32*/ -emitterT void eCMOVE32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVE32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x44, to, from ); + CMOV32RtoR( 0x44, to, from ); } /* cmove m32 to r32*/ -emitterT void eCMOVE32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVE32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x44, to, from ); + CMOV32MtoR( 0x44, to, from ); } /* cmovg r32 to r32*/ -emitterT void eCMOVG32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVG32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x4F, to, from ); + CMOV32RtoR( 0x4F, to, from ); } /* cmovg m32 to r32*/ -emitterT void eCMOVG32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVG32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x4F, to, from ); + CMOV32MtoR( 0x4F, to, from ); } /* cmovge r32 to r32*/ -emitterT void eCMOVGE32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVGE32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x4D, to, from ); + CMOV32RtoR( 0x4D, to, from ); } /* cmovge m32 to r32*/ -emitterT void eCMOVGE32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVGE32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x4D, to, from ); + CMOV32MtoR( 0x4D, to, from ); } /* cmovl r32 to r32*/ -emitterT void eCMOVL32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVL32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x4C, to, from ); + CMOV32RtoR( 0x4C, to, from ); } /* cmovl m32 to r32*/ -emitterT void eCMOVL32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVL32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x4C, to, from ); + CMOV32MtoR( 0x4C, to, from ); } /* cmovle r32 to r32*/ -emitterT void eCMOVLE32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVLE32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x4E, to, from ); + CMOV32RtoR( 0x4E, to, from ); } /* cmovle m32 to r32*/ -emitterT void eCMOVLE32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVLE32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x4E, to, from ); + CMOV32MtoR( 0x4E, to, from ); } //////////////////////////////////// @@ -857,261 +823,261 @@ emitterT void eCMOVLE32MtoR( x86IntRegType to, uptr from ) //////////////////////////////////// // add r16 to r16 -emitterT void eADD16RtoR( x86IntRegType to , x86IntRegType from ) +emitterT void ADD16RtoR( x86IntRegType to , x86IntRegType from ) { - write8(0x66); + write8(0x66); RexRB(0,to,from); - write8( 0x03 ); - ModRM( 3, to, from ); + write8( 0x03 ); + ModRM( 3, to, from ); } /* add imm16 to r16 */ -emitterT void eADD16ItoR( x86IntRegType to, s16 imm ) +emitterT void ADD16ItoR( x86IntRegType to, s16 imm ) { - write8( 0x66 ); + write8( 0x66 ); RexB(0,to); if ( to == EAX) { - write8( 0x05 ); - write16( imm ); + write8( 0x05 ); + write16( imm ); } else if(imm <= 127 && imm >= -128) { - write8( 0x83 ); - ModRM( 3, 0, to ); - write8((u8)imm ); + write8( 0x83 ); + ModRM( 3, 0, to ); + write8((u8)imm ); } else { - write8( 0x81 ); - ModRM( 3, 0, to ); - write16( imm ); + write8( 0x81 ); + ModRM( 3, 0, to ); + write16( imm ); } } /* add imm16 to m16 */ -emitterT void eADD16ItoM( uptr to, s16 imm ) +emitterT void ADD16ItoM( uptr to, s16 imm ) { - write8( 0x66 ); + write8( 0x66 ); if(imm <= 127 && imm >= -128) { - write8( 0x83 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 6) ); - write8((u8)imm ); + write8( 0x83 ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 6) ); + write8((u8)imm ); } else { - write8( 0x81 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( imm ); + write8( 0x81 ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 6) ); + write16( imm ); } } /* add r16 to m16 */ -emitterT void eADD16RtoM(uptr to, x86IntRegType from ) +emitterT void ADD16RtoM(uptr to, x86IntRegType from ) { - write8( 0x66 ); + write8( 0x66 ); RexR(0,from); - write8( 0x01 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0x01 ); + ModRM( 0, from, DISP32 ); + write32( MEMADDR(to, 4) ); } /* add m16 to r16 */ -emitterT void eADD16MtoR( x86IntRegType to, uptr from ) +emitterT void ADD16MtoR( x86IntRegType to, uptr from ) { - write8( 0x66 ); + write8( 0x66 ); RexR(0,to); - write8( 0x03 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x03 ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } // add m8 to r8 -emitterT void eADD8MtoR( x86IntRegType to, uptr from ) +emitterT void ADD8MtoR( x86IntRegType to, uptr from ) { RexR(0,to); - write8( 0x02 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x02 ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* inc r32 */ -emitterT void eINC32R( x86IntRegType to ) +emitterT void INC32R( x86IntRegType to ) { - write8( 0x40 + to ); + write8( 0x40 + to ); } /* inc m32 */ -emitterT void eINC32M( u32 to ) +emitterT void INC32M( u32 to ) { - write8( 0xFF ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0xFF ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 4) ); } /* inc r16 */ -emitterT void eINC16R( x86IntRegType to ) +emitterT void INC16R( x86IntRegType to ) { - write8( 0x66 ); - write8( 0x40 + to ); + write8( 0x66 ); + write8( 0x40 + to ); } /* inc m16 */ -emitterT void eINC16M( u32 to ) +emitterT void INC16M( u32 to ) { - write8( 0x66 ); - write8( 0xFF ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0x66 ); + write8( 0xFF ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 4) ); } // sub r16 to r16 -emitterT void eSUB16RtoR( x86IntRegType to, u16 from ) +emitterT void SUB16RtoR( x86IntRegType to, u16 from ) { - write8(0x66); + write8(0x66); RexRB(0,to,from); - write8( 0x2b ); - ModRM( 3, to, from ); + write8( 0x2b ); + ModRM( 3, to, from ); } /* sub imm16 to r16 */ -emitterT void eSUB16ItoR( x86IntRegType to, u16 from ) { - write8( 0x66 ); +emitterT void SUB16ItoR( x86IntRegType to, u16 from ) { + write8( 0x66 ); RexB(0,to); if ( to == EAX ) { - write8( 0x2D ); + write8( 0x2D ); } else { - write8( 0x81 ); - ModRM( 3, 5, to ); + write8( 0x81 ); + ModRM( 3, 5, to ); } - write16( from ); + write16( from ); } /* sub imm16 to m16 */ -emitterT void eSUB16ItoM( uptr to, u16 from ) { - write8( 0x66 ); - write8( 0x81 ); - ModRM( 0, 5, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); +emitterT void SUB16ItoM( uptr to, u16 from ) { + write8( 0x66 ); + write8( 0x81 ); + ModRM( 0, 5, DISP32 ); + write32( MEMADDR(to, 6) ); + write16( from ); } /* sub m16 to r16 */ -emitterT void eSUB16MtoR( x86IntRegType to, uptr from ) { - write8( 0x66 ); +emitterT void SUB16MtoR( x86IntRegType to, uptr from ) { + write8( 0x66 ); RexR(0,to); - write8( 0x2B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x2B ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* dec r32 */ -emitterT void eDEC32R( x86IntRegType to ) +emitterT void DEC32R( x86IntRegType to ) { - write8( 0x48 + to ); + write8( 0x48 + to ); } /* dec m32 */ -emitterT void eDEC32M( u32 to ) +emitterT void DEC32M( u32 to ) { - write8( 0xFF ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0xFF ); + ModRM( 0, 1, DISP32 ); + write32( MEMADDR(to, 4) ); } /* dec r16 */ -emitterT void eDEC16R( x86IntRegType to ) +emitterT void DEC16R( x86IntRegType to ) { - write8( 0x66 ); - write8( 0x48 + to ); + write8( 0x66 ); + write8( 0x48 + to ); } /* dec m16 */ -emitterT void eDEC16M( u32 to ) +emitterT void DEC16M( u32 to ) { - write8( 0x66 ); - write8( 0xFF ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0x66 ); + write8( 0xFF ); + ModRM( 0, 1, DISP32 ); + write32( MEMADDR(to, 4) ); } /* mul eax by r32 to edx:eax */ -emitterT void eMUL32R( x86IntRegType from ) +emitterT void MUL32R( x86IntRegType from ) { RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 4, from ); + write8( 0xF7 ); + ModRM( 3, 4, from ); } /* imul eax by r32 to edx:eax */ -emitterT void eIMUL32R( x86IntRegType from ) +emitterT void IMUL32R( x86IntRegType from ) { RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 5, from ); + write8( 0xF7 ); + ModRM( 3, 5, from ); } /* mul eax by m32 to edx:eax */ -emitterT void eMUL32M( u32 from ) +emitterT void MUL32M( u32 from ) { - write8( 0xF7 ); - ModRM( 0, 4, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xF7 ); + ModRM( 0, 4, DISP32 ); + write32( MEMADDR(from, 4) ); } /* imul eax by m32 to edx:eax */ -emitterT void eIMUL32M( u32 from ) +emitterT void IMUL32M( u32 from ) { - write8( 0xF7 ); - ModRM( 0, 5, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xF7 ); + ModRM( 0, 5, DISP32 ); + write32( MEMADDR(from, 4) ); } /* imul r32 by r32 to r32 */ -emitterT void eIMUL32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void IMUL32RtoR( x86IntRegType to, x86IntRegType from ) { RexRB(0,to,from); - write16( 0xAF0F ); - ModRM( 3, to, from ); + write16( 0xAF0F ); + ModRM( 3, to, from ); } /* div eax by r32 to edx:eax */ -emitterT void eDIV32R( x86IntRegType from ) +emitterT void DIV32R( x86IntRegType from ) { RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 6, from ); + write8( 0xF7 ); + ModRM( 3, 6, from ); } /* idiv eax by r32 to edx:eax */ -emitterT void eIDIV32R( x86IntRegType from ) +emitterT void IDIV32R( x86IntRegType from ) { RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 7, from ); + write8( 0xF7 ); + ModRM( 3, 7, from ); } /* div eax by m32 to edx:eax */ -emitterT void eDIV32M( u32 from ) +emitterT void DIV32M( u32 from ) { - write8( 0xF7 ); - ModRM( 0, 6, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xF7 ); + ModRM( 0, 6, DISP32 ); + write32( MEMADDR(from, 4) ); } /* idiv eax by m32 to edx:eax */ -emitterT void eIDIV32M( u32 from ) +emitterT void IDIV32M( u32 from ) { - write8( 0xF7 ); - ModRM( 0, 7, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xF7 ); + ModRM( 0, 7, DISP32 ); + write32( MEMADDR(from, 4) ); } //////////////////////////////////// @@ -1119,266 +1085,266 @@ emitterT void eIDIV32M( u32 from ) //////////////////////////////////// /* shl imm8 to r32 */ -emitterT void eSHL32ItoR( x86IntRegType to, u8 from ) +emitterT void SHL32ItoR( x86IntRegType to, u8 from ) { RexB(0, to); if ( from == 1 ) { - write8( 0xD1 ); - write8( 0xE0 | (to & 0x7) ); + write8( 0xD1 ); + write8( 0xE0 | (to & 0x7) ); return; } - write8( 0xC1 ); - ModRM( 3, 4, to ); - write8( from ); + write8( 0xC1 ); + ModRM( 3, 4, to ); + write8( from ); } /* shl imm8 to m32 */ -emitterT void eSHL32ItoM( uptr to, u8 from ) +emitterT void SHL32ItoM( uptr to, u8 from ) { if ( from == 1 ) { - write8( 0xD1 ); - ModRM( 0, 4, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0xD1 ); + ModRM( 0, 4, DISP32 ); + write32( MEMADDR(to, 4) ); } else { - write8( 0xC1 ); - ModRM( 0, 4, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); + write8( 0xC1 ); + ModRM( 0, 4, DISP32 ); + write32( MEMADDR(to, 5) ); + write8( from ); } } /* shl cl to r32 */ -emitterT void eSHL32CLtoR( x86IntRegType to ) +emitterT void SHL32CLtoR( x86IntRegType to ) { RexB(0,to); - write8( 0xD3 ); - ModRM( 3, 4, to ); + write8( 0xD3 ); + ModRM( 3, 4, to ); } // shl imm8 to r16 -emitterT void eSHL16ItoR( x86IntRegType to, u8 from ) +emitterT void SHL16ItoR( x86IntRegType to, u8 from ) { - write8(0x66); + write8(0x66); RexB(0,to); if ( from == 1 ) { - write8( 0xD1 ); - write8( 0xE0 | (to & 0x7) ); + write8( 0xD1 ); + write8( 0xE0 | (to & 0x7) ); return; } - write8( 0xC1 ); - ModRM( 3, 4, to ); - write8( from ); + write8( 0xC1 ); + ModRM( 3, 4, to ); + write8( from ); } // shl imm8 to r8 -emitterT void eSHL8ItoR( x86IntRegType to, u8 from ) +emitterT void SHL8ItoR( x86IntRegType to, u8 from ) { RexB(0,to); if ( from == 1 ) { - write8( 0xD0 ); - write8( 0xE0 | (to & 0x7) ); + write8( 0xD0 ); + write8( 0xE0 | (to & 0x7) ); return; } - write8( 0xC0 ); - ModRM( 3, 4, to ); - write8( from ); + write8( 0xC0 ); + ModRM( 3, 4, to ); + write8( from ); } /* shr imm8 to r32 */ -emitterT void eSHR32ItoR( x86IntRegType to, u8 from ) { +emitterT void SHR32ItoR( x86IntRegType to, u8 from ) { RexB(0,to); if ( from == 1 ) { - write8( 0xD1 ); - write8( 0xE8 | (to & 0x7) ); + write8( 0xD1 ); + write8( 0xE8 | (to & 0x7) ); } else { - write8( 0xC1 ); - ModRM( 3, 5, to ); - write8( from ); + write8( 0xC1 ); + ModRM( 3, 5, to ); + write8( from ); } } /* shr imm8 to m32 */ -emitterT void eSHR32ItoM( uptr to, u8 from ) +emitterT void SHR32ItoM( uptr to, u8 from ) { if ( from == 1 ) { - write8( 0xD1 ); - ModRM( 0, 5, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0xD1 ); + ModRM( 0, 5, DISP32 ); + write32( MEMADDR(to, 4) ); } else { - write8( 0xC1 ); - ModRM( 0, 5, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); + write8( 0xC1 ); + ModRM( 0, 5, DISP32 ); + write32( MEMADDR(to, 5) ); + write8( from ); } } /* shr cl to r32 */ -emitterT void eSHR32CLtoR( x86IntRegType to ) +emitterT void SHR32CLtoR( x86IntRegType to ) { RexB(0,to); - write8( 0xD3 ); - ModRM( 3, 5, to ); + write8( 0xD3 ); + ModRM( 3, 5, to ); } // shr imm8 to r16 -emitterT void eSHR16ItoR( x86IntRegType to, u8 from ) +emitterT void SHR16ItoR( x86IntRegType to, u8 from ) { RexB(0,to); if ( from == 1 ) { - write8( 0xD1 ); - ModRM( 3, 5, to ); + write8( 0xD1 ); + ModRM( 3, 5, to ); } else { - write8( 0xC1 ); - ModRM( 3, 5, to ); - write8( from ); + write8( 0xC1 ); + ModRM( 3, 5, to ); + write8( from ); } } // shr imm8 to r8 -emitterT void eSHR8ItoR( x86IntRegType to, u8 from ) +emitterT void SHR8ItoR( x86IntRegType to, u8 from ) { RexB(0,to); if ( from == 1 ) { - write8( 0xD0 ); - write8( 0xE8 | (to & 0x7) ); + write8( 0xD0 ); + write8( 0xE8 | (to & 0x7) ); } else { - write8( 0xC0 ); - ModRM( 3, 5, to ); - write8( from ); + write8( 0xC0 ); + ModRM( 3, 5, to ); + write8( from ); } } /* sar imm8 to r32 */ -emitterT void eSAR32ItoR( x86IntRegType to, u8 from ) +emitterT void SAR32ItoR( x86IntRegType to, u8 from ) { RexB(0,to); if ( from == 1 ) { - write8( 0xD1 ); - ModRM( 3, 7, to ); + write8( 0xD1 ); + ModRM( 3, 7, to ); return; } - write8( 0xC1 ); - ModRM( 3, 7, to ); - write8( from ); + write8( 0xC1 ); + ModRM( 3, 7, to ); + write8( from ); } /* sar imm8 to m32 */ -emitterT void eSAR32ItoM( uptr to, u8 from ) +emitterT void SAR32ItoM( uptr to, u8 from ) { - write8( 0xC1 ); - ModRM( 0, 7, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); + write8( 0xC1 ); + ModRM( 0, 7, DISP32 ); + write32( MEMADDR(to, 5) ); + write8( from ); } /* sar cl to r32 */ -emitterT void eSAR32CLtoR( x86IntRegType to ) +emitterT void SAR32CLtoR( x86IntRegType to ) { RexB(0,to); - write8( 0xD3 ); - ModRM( 3, 7, to ); + write8( 0xD3 ); + ModRM( 3, 7, to ); } // sar imm8 to r16 -emitterT void eSAR16ItoR( x86IntRegType to, u8 from ) +emitterT void SAR16ItoR( x86IntRegType to, u8 from ) { - write8(0x66); + write8(0x66); RexB(0,to); if ( from == 1 ) { - write8( 0xD1 ); - ModRM( 3, 7, to ); + write8( 0xD1 ); + ModRM( 3, 7, to ); return; } - write8( 0xC1 ); - ModRM( 3, 7, to ); - write8( from ); + write8( 0xC1 ); + ModRM( 3, 7, to ); + write8( from ); } -/*emitterT void eROR32ItoR( x86IntRegType to,u8 from ) +/*emitterT void ROR32ItoR( x86IntRegType to,u8 from ) { RexB(0,to); if ( from == 1 ) { - write8( 0xd1 ); - write8( 0xc8 | to ); + write8( 0xd1 ); + write8( 0xc8 | to ); } else { - write8( 0xc1 ); - write8( 0xc8 | to ); - write8( from ); + write8( 0xc1 ); + write8( 0xc8 | to ); + write8( from ); } }*/ -emitterT void eRCR32ItoR( x86IntRegType to, u8 from ) +emitterT void RCR32ItoR( x86IntRegType to, u8 from ) { RexB(0,to); if ( from == 1 ) { - write8( 0xd1 ); - ModRM(3, 3, to); + write8( 0xd1 ); + ModRM(3, 3, to); } else { - write8( 0xc1 ); - ModRM(3, 3, to); - write8( from ); + write8( 0xc1 ); + ModRM(3, 3, to); + write8( from ); } } -emitterT void eRCR32ItoM( uptr to, u8 from ) +emitterT void RCR32ItoM( uptr to, u8 from ) { RexB(0,to); if ( from == 1 ) { - write8( 0xd1 ); - ModRM( 0, 3, DISP32 ); - write32( MEMADDR(to, 8) ); + write8( 0xd1 ); + ModRM( 0, 3, DISP32 ); + write32( MEMADDR(to, 8) ); } else { - write8( 0xc1 ); - ModRM( 0, 3, DISP32 ); - write32( MEMADDR(to, 8) ); - write8( from ); + write8( 0xc1 ); + ModRM( 0, 3, DISP32 ); + write32( MEMADDR(to, 8) ); + write8( from ); } } // shld imm8 to r32 -emitterT void eSHLD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) +emitterT void SHLD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) { RexRB(0,from,to); - write8( 0x0F ); - write8( 0xA4 ); - ModRM( 3, from, to ); - write8( shift ); + write8( 0x0F ); + write8( 0xA4 ); + ModRM( 3, from, to ); + write8( shift ); } // shrd imm8 to r32 -emitterT void eSHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) +emitterT void SHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) { RexRB(0,from,to); - write8( 0x0F ); - write8( 0xAC ); - ModRM( 3, from, to ); - write8( shift ); + write8( 0x0F ); + write8( 0xAC ); + ModRM( 3, from, to ); + write8( shift ); } //////////////////////////////////// @@ -1386,252 +1352,252 @@ emitterT void eSHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) //////////////////////////////////// // or r16 to r16 -emitterT void eOR16RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void OR16RtoR( x86IntRegType to, x86IntRegType from ) { - write8(0x66); + write8(0x66); RexRB(0,from,to); - write8( 0x09 ); - ModRM( 3, from, to ); + write8( 0x09 ); + ModRM( 3, from, to ); } // or imm16 to r16 -emitterT void eOR16ItoR( x86IntRegType to, u16 from ) +emitterT void OR16ItoR( x86IntRegType to, u16 from ) { - write8(0x66); + write8(0x66); RexB(0,to); if ( to == EAX ) { - write8( 0x0D ); + write8( 0x0D ); } else { - write8( 0x81 ); - ModRM( 3, 1, to ); + write8( 0x81 ); + ModRM( 3, 1, to ); } - write16( from ); + write16( from ); } // or imm16 to m316 -emitterT void eOR16ItoM( uptr to, u16 from ) +emitterT void OR16ItoM( uptr to, u16 from ) { - write8(0x66); - write8( 0x81 ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); + write8(0x66); + write8( 0x81 ); + ModRM( 0, 1, DISP32 ); + write32( MEMADDR(to, 6) ); + write16( from ); } /* or m16 to r16 */ -emitterT void eOR16MtoR( x86IntRegType to, uptr from ) +emitterT void OR16MtoR( x86IntRegType to, uptr from ) { - write8(0x66); + write8(0x66); RexR(0,to); - write8( 0x0B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x0B ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } // or r16 to m16 -emitterT void eOR16RtoM( uptr to, x86IntRegType from ) +emitterT void OR16RtoM( uptr to, x86IntRegType from ) { - write8(0x66); + write8(0x66); RexR(0,from); - write8( 0x09 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0x09 ); + ModRM( 0, from, DISP32 ); + write32( MEMADDR(to, 4) ); } // or r8 to r8 -emitterT void eOR8RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void OR8RtoR( x86IntRegType to, x86IntRegType from ) { RexRB(0,from,to); - write8( 0x08 ); - ModRM( 3, from, to ); + write8( 0x08 ); + ModRM( 3, from, to ); } // or r8 to m8 -emitterT void eOR8RtoM( uptr to, x86IntRegType from ) +emitterT void OR8RtoM( uptr to, x86IntRegType from ) { RexR(0,from); - write8( 0x08 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0x08 ); + ModRM( 0, from, DISP32 ); + write32( MEMADDR(to, 4) ); } // or imm8 to m8 -emitterT void eOR8ItoM( uptr to, u8 from ) +emitterT void OR8ItoM( uptr to, u8 from ) { - write8( 0x80 ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); + write8( 0x80 ); + ModRM( 0, 1, DISP32 ); + write32( MEMADDR(to, 5) ); + write8( from ); } // or m8 to r8 -emitterT void eOR8MtoR( x86IntRegType to, uptr from ) +emitterT void OR8MtoR( x86IntRegType to, uptr from ) { RexR(0,to); - write8( 0x0A ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x0A ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } // and r16 to r16 -emitterT void eAND16RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void AND16RtoR( x86IntRegType to, x86IntRegType from ) { - write8(0x66); + write8(0x66); RexRB(0,to,from); - write8( 0x23 ); - ModRM( 3, to, from ); + write8( 0x23 ); + ModRM( 3, to, from ); } /* and imm16 to r16 */ -emitterT void eAND16ItoR( x86IntRegType to, u16 from ) +emitterT void AND16ItoR( x86IntRegType to, u16 from ) { - write8(0x66); + write8(0x66); RexB(0,to); if ( to == EAX ) { - write8( 0x25 ); - write16( from ); + write8( 0x25 ); + write16( from ); } else if ( from < 0x80 ) { - write8( 0x83 ); - ModRM( 3, 0x4, to ); - write8((u8)from ); + write8( 0x83 ); + ModRM( 3, 0x4, to ); + write8((u8)from ); } else { - write8( 0x81 ); - ModRM( 3, 0x4, to ); - write16( from ); + write8( 0x81 ); + ModRM( 3, 0x4, to ); + write16( from ); } } /* and imm16 to m16 */ -emitterT void eAND16ItoM( uptr to, u16 from ) +emitterT void AND16ItoM( uptr to, u16 from ) { - write8(0x66); + write8(0x66); if ( from < 0x80 ) { - write8( 0x83 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 6) ); - write8((u8)from ); + write8( 0x83 ); + ModRM( 0, 0x4, DISP32 ); + write32( MEMADDR(to, 6) ); + write8((u8)from ); } else { - write8( 0x81 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); + write8( 0x81 ); + ModRM( 0, 0x4, DISP32 ); + write32( MEMADDR(to, 6) ); + write16( from ); } } /* and r16 to m16 */ -emitterT void eAND16RtoM( uptr to, x86IntRegType from ) +emitterT void AND16RtoM( uptr to, x86IntRegType from ) { - write8( 0x66 ); + write8( 0x66 ); RexR(0,from); - write8( 0x21 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0x21 ); + ModRM( 0, from, DISP32 ); + write32( MEMADDR(to, 4) ); } /* and m16 to r16 */ -emitterT void eAND16MtoR( x86IntRegType to, uptr from ) +emitterT void AND16MtoR( x86IntRegType to, uptr from ) { - write8( 0x66 ); + write8( 0x66 ); RexR(0,to); - write8( 0x23 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4)); + write8( 0x23 ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4)); } /* and imm8 to r8 */ -emitterT void eAND8ItoR( x86IntRegType to, u8 from ) +emitterT void AND8ItoR( x86IntRegType to, u8 from ) { RexB(0,to); if ( to == EAX ) { - write8( 0x24 ); + write8( 0x24 ); } else { - write8( 0x80 ); - ModRM( 3, 0x4, to ); + write8( 0x80 ); + ModRM( 3, 0x4, to ); } - write8( from ); + write8( from ); } /* and imm8 to m8 */ -emitterT void eAND8ItoM( uptr to, u8 from ) +emitterT void AND8ItoM( uptr to, u8 from ) { - write8( 0x80 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); + write8( 0x80 ); + ModRM( 0, 0x4, DISP32 ); + write32( MEMADDR(to, 5) ); + write8( from ); } // and r8 to r8 -emitterT void eAND8RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void AND8RtoR( x86IntRegType to, x86IntRegType from ) { RexRB(0,to,from); - write8( 0x22 ); - ModRM( 3, to, from ); + write8( 0x22 ); + ModRM( 3, to, from ); } /* and r8 to m8 */ -emitterT void eAND8RtoM( uptr to, x86IntRegType from ) +emitterT void AND8RtoM( uptr to, x86IntRegType from ) { RexR(0,from); - write8( 0x20 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0x20 ); + ModRM( 0, from, DISP32 ); + write32( MEMADDR(to, 4) ); } /* and m8 to r8 */ -emitterT void eAND8MtoR( x86IntRegType to, uptr from ) +emitterT void AND8MtoR( x86IntRegType to, uptr from ) { RexR(0,to); - write8( 0x22 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4)); + write8( 0x22 ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4)); } /* not r32 */ -emitterT void eNOT32R( x86IntRegType from ) +emitterT void NOT32R( x86IntRegType from ) { RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 2, from ); + write8( 0xF7 ); + ModRM( 3, 2, from ); } // not m32 -emitterT void eNOT32M( u32 from ) +emitterT void NOT32M( u32 from ) { - write8( 0xF7 ); - ModRM( 0, 2, DISP32 ); - write32( MEMADDR(from, 4)); + write8( 0xF7 ); + ModRM( 0, 2, DISP32 ); + write32( MEMADDR(from, 4)); } /* neg r32 */ -emitterT void eNEG32R( x86IntRegType from ) +emitterT void NEG32R( x86IntRegType from ) { RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 3, from ); + write8( 0xF7 ); + ModRM( 3, 3, from ); } -emitterT void eNEG32M( u32 from ) +emitterT void NEG32M( u32 from ) { - write8( 0xF7 ); - ModRM( 0, 3, DISP32 ); - write32( MEMADDR(from, 4)); + write8( 0xF7 ); + ModRM( 0, 3, DISP32 ); + write32( MEMADDR(from, 4)); } /* neg r16 */ -emitterT void eNEG16R( x86IntRegType from ) +emitterT void NEG16R( x86IntRegType from ) { - write8( 0x66 ); + write8( 0x66 ); RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 3, from ); + write8( 0xF7 ); + ModRM( 3, 3, from ); } //////////////////////////////////// @@ -1639,346 +1605,346 @@ emitterT void eNEG16R( x86IntRegType from ) //////////////////////////////////// emitterT u8* JMP( uptr to ) { - uptr jump = ( x86Ptr[0] - (u8*)to ) - 1; + uptr jump = ( x86Ptr - (u8*)to ) - 1; if ( jump > 0x7f ) { assert( to <= 0xffffffff ); - return (u8*)eJMP32( to ); + return (u8*)JMP32( to ); } else { - return (u8*)eJMP8( to ); + return (u8*)JMP8( to ); } } /* jmp rel8 */ -emitterT u8* eJMP8( u8 to ) +emitterT u8* JMP8( u8 to ) { - write8( 0xEB ); - write8( to ); - return x86Ptr[I] - 1; + write8( 0xEB ); + write8( to ); + return x86Ptr - 1; } /* jmp rel32 */ -emitterT u32* eJMP32( uptr to ) +emitterT u32* JMP32( uptr to ) { assert( (sptr)to <= 0x7fffffff && (sptr)to >= -0x7fffffff ); - write8( 0xE9 ); - write32( to ); - return (u32*)(x86Ptr[I] - 4 ); + write8( 0xE9 ); + write32( to ); + return (u32*)(x86Ptr - 4 ); } /* jmp r32/r64 */ -emitterT void eJMPR( x86IntRegType to ) +emitterT void JMPR( x86IntRegType to ) { RexB(0, to); - write8( 0xFF ); - ModRM( 3, 4, to ); + write8( 0xFF ); + ModRM( 3, 4, to ); } // jmp m32 -emitterT void eJMP32M( uptr to ) +emitterT void JMP32M( uptr to ) { - write8( 0xFF ); - ModRM( 0, 4, DISP32 ); - write32( MEMADDR(to, 4)); + write8( 0xFF ); + ModRM( 0, 4, DISP32 ); + write32( MEMADDR(to, 4)); } /* jp rel8 */ -emitterT u8* eJP8( u8 to ) { - return J8Rel( 0x7A, to ); +emitterT u8* JP8( u8 to ) { + return J8Rel( 0x7A, to ); } /* jnp rel8 */ -emitterT u8* eJNP8( u8 to ) { - return J8Rel( 0x7B, to ); +emitterT u8* JNP8( u8 to ) { + return J8Rel( 0x7B, to ); } /* je rel8 */ -emitterT u8* eJE8( u8 to ) { - return J8Rel( 0x74, to ); +emitterT u8* JE8( u8 to ) { + return J8Rel( 0x74, to ); } /* jz rel8 */ -emitterT u8* eJZ8( u8 to ) +emitterT u8* JZ8( u8 to ) { - return J8Rel( 0x74, to ); + return J8Rel( 0x74, to ); } /* js rel8 */ -emitterT u8* eJS8( u8 to ) +emitterT u8* JS8( u8 to ) { - return J8Rel( 0x78, to ); + return J8Rel( 0x78, to ); } /* jns rel8 */ -emitterT u8* eJNS8( u8 to ) +emitterT u8* JNS8( u8 to ) { - return J8Rel( 0x79, to ); + return J8Rel( 0x79, to ); } /* jg rel8 */ -emitterT u8* eJG8( u8 to ) +emitterT u8* JG8( u8 to ) { - return J8Rel( 0x7F, to ); + return J8Rel( 0x7F, to ); } /* jge rel8 */ -emitterT u8* eJGE8( u8 to ) +emitterT u8* JGE8( u8 to ) { - return J8Rel( 0x7D, to ); + return J8Rel( 0x7D, to ); } /* jl rel8 */ -emitterT u8* eJL8( u8 to ) +emitterT u8* JL8( u8 to ) { - return J8Rel( 0x7C, to ); + return J8Rel( 0x7C, to ); } /* ja rel8 */ -emitterT u8* eJA8( u8 to ) +emitterT u8* JA8( u8 to ) { - return J8Rel( 0x77, to ); + return J8Rel( 0x77, to ); } -emitterT u8* eJAE8( u8 to ) +emitterT u8* JAE8( u8 to ) { - return J8Rel( 0x73, to ); + return J8Rel( 0x73, to ); } /* jb rel8 */ -emitterT u8* eJB8( u8 to ) +emitterT u8* JB8( u8 to ) { - return J8Rel( 0x72, to ); + return J8Rel( 0x72, to ); } /* jbe rel8 */ -emitterT u8* eJBE8( u8 to ) +emitterT u8* JBE8( u8 to ) { - return J8Rel( 0x76, to ); + return J8Rel( 0x76, to ); } /* jle rel8 */ -emitterT u8* eJLE8( u8 to ) +emitterT u8* JLE8( u8 to ) { - return J8Rel( 0x7E, to ); + return J8Rel( 0x7E, to ); } /* jne rel8 */ -emitterT u8* eJNE8( u8 to ) +emitterT u8* JNE8( u8 to ) { - return J8Rel( 0x75, to ); + return J8Rel( 0x75, to ); } /* jnz rel8 */ -emitterT u8* eJNZ8( u8 to ) +emitterT u8* JNZ8( u8 to ) { - return J8Rel( 0x75, to ); + return J8Rel( 0x75, to ); } /* jng rel8 */ -emitterT u8* eJNG8( u8 to ) +emitterT u8* JNG8( u8 to ) { - return J8Rel( 0x7E, to ); + return J8Rel( 0x7E, to ); } /* jnge rel8 */ -emitterT u8* eJNGE8( u8 to ) +emitterT u8* JNGE8( u8 to ) { - return J8Rel( 0x7C, to ); + return J8Rel( 0x7C, to ); } /* jnl rel8 */ -emitterT u8* eJNL8( u8 to ) +emitterT u8* JNL8( u8 to ) { - return J8Rel( 0x7D, to ); + return J8Rel( 0x7D, to ); } /* jnle rel8 */ -emitterT u8* eJNLE8( u8 to ) +emitterT u8* JNLE8( u8 to ) { - return J8Rel( 0x7F, to ); + return J8Rel( 0x7F, to ); } /* jo rel8 */ -emitterT u8* eJO8( u8 to ) +emitterT u8* JO8( u8 to ) { - return J8Rel( 0x70, to ); + return J8Rel( 0x70, to ); } /* jno rel8 */ -emitterT u8* eJNO8( u8 to ) +emitterT u8* JNO8( u8 to ) { - return J8Rel( 0x71, to ); + return J8Rel( 0x71, to ); } /* Untested and slower, use 32bit versions instead // ja rel16 emitterT u16* eJA16( u16 to ) { -return J16Rel( 0x87, to ); +return J16Rel( 0x87, to ); } // jb rel16 emitterT u16* eJB16( u16 to ) { -return J16Rel( 0x82, to ); +return J16Rel( 0x82, to ); } // je rel16 emitterT u16* eJE16( u16 to ) { -return J16Rel( 0x84, to ); +return J16Rel( 0x84, to ); } // jz rel16 emitterT u16* eJZ16( u16 to ) { -return J16Rel( 0x84, to ); +return J16Rel( 0x84, to ); } */ // jb rel32 -emitterT u32* eJB32( u32 to ) +emitterT u32* JB32( u32 to ) { - return J32Rel( 0x82, to ); + return J32Rel( 0x82, to ); } /* je rel32 */ -emitterT u32* eJE32( u32 to ) +emitterT u32* JE32( u32 to ) { - return J32Rel( 0x84, to ); + return J32Rel( 0x84, to ); } /* jz rel32 */ -emitterT u32* eJZ32( u32 to ) +emitterT u32* JZ32( u32 to ) { - return J32Rel( 0x84, to ); + return J32Rel( 0x84, to ); } /* js rel32 */ -emitterT u32* eJS32( u32 to ) +emitterT u32* JS32( u32 to ) { - return J32Rel( 0x88, to ); + return J32Rel( 0x88, to ); } /* jns rel32 */ -emitterT u32* eJNS32( u32 to ) +emitterT u32* JNS32( u32 to ) { - return J32Rel( 0x89, to ); + return J32Rel( 0x89, to ); } /* jg rel32 */ -emitterT u32* eJG32( u32 to ) +emitterT u32* JG32( u32 to ) { - return J32Rel( 0x8F, to ); + return J32Rel( 0x8F, to ); } /* jge rel32 */ -emitterT u32* eJGE32( u32 to ) +emitterT u32* JGE32( u32 to ) { - return J32Rel( 0x8D, to ); + return J32Rel( 0x8D, to ); } /* jl rel32 */ -emitterT u32* eJL32( u32 to ) +emitterT u32* JL32( u32 to ) { - return J32Rel( 0x8C, to ); + return J32Rel( 0x8C, to ); } /* jle rel32 */ -emitterT u32* eJLE32( u32 to ) +emitterT u32* JLE32( u32 to ) { - return J32Rel( 0x8E, to ); + return J32Rel( 0x8E, to ); } /* ja rel32 */ -emitterT u32* eJA32( u32 to ) +emitterT u32* JA32( u32 to ) { - return J32Rel( 0x87, to ); + return J32Rel( 0x87, to ); } /* jae rel32 */ -emitterT u32* eJAE32( u32 to ) +emitterT u32* JAE32( u32 to ) { - return J32Rel( 0x83, to ); + return J32Rel( 0x83, to ); } /* jne rel32 */ -emitterT u32* eJNE32( u32 to ) +emitterT u32* JNE32( u32 to ) { - return J32Rel( 0x85, to ); + return J32Rel( 0x85, to ); } /* jnz rel32 */ -emitterT u32* eJNZ32( u32 to ) +emitterT u32* JNZ32( u32 to ) { - return J32Rel( 0x85, to ); + return J32Rel( 0x85, to ); } /* jng rel32 */ -emitterT u32* eJNG32( u32 to ) +emitterT u32* JNG32( u32 to ) { - return J32Rel( 0x8E, to ); + return J32Rel( 0x8E, to ); } /* jnge rel32 */ -emitterT u32* eJNGE32( u32 to ) +emitterT u32* JNGE32( u32 to ) { - return J32Rel( 0x8C, to ); + return J32Rel( 0x8C, to ); } /* jnl rel32 */ -emitterT u32* eJNL32( u32 to ) +emitterT u32* JNL32( u32 to ) { - return J32Rel( 0x8D, to ); + return J32Rel( 0x8D, to ); } /* jnle rel32 */ -emitterT u32* eJNLE32( u32 to ) +emitterT u32* JNLE32( u32 to ) { - return J32Rel( 0x8F, to ); + return J32Rel( 0x8F, to ); } /* jo rel32 */ -emitterT u32* eJO32( u32 to ) +emitterT u32* JO32( u32 to ) { - return J32Rel( 0x80, to ); + return J32Rel( 0x80, to ); } /* jno rel32 */ -emitterT u32* eJNO32( u32 to ) +emitterT u32* JNO32( u32 to ) { - return J32Rel( 0x81, to ); + return J32Rel( 0x81, to ); } /* call func */ -emitterT void eCALLFunc( uptr func ) +emitterT void CALLFunc( uptr func ) { - func -= ( (uptr)x86Ptr[0] + 5 ); + func -= ( (uptr)x86Ptr + 5 ); assert( (sptr)func <= 0x7fffffff && (sptr)func >= -0x7fffffff ); - eCALL32(func); + CALL32(func); } /* call rel32 */ -emitterT void eCALL32( u32 to ) +emitterT void CALL32( u32 to ) { - write8( 0xE8 ); - write32( to ); + write8( 0xE8 ); + write32( to ); } /* call r32 */ -emitterT void eCALL32R( x86IntRegType to ) +emitterT void CALL32R( x86IntRegType to ) { - write8( 0xFF ); - ModRM( 3, 2, to ); + write8( 0xFF ); + ModRM( 3, 2, to ); } /* call m32 */ -emitterT void eCALL32M( u32 to ) +emitterT void CALL32M( u32 to ) { - write8( 0xFF ); - ModRM( 0, 2, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0xFF ); + ModRM( 0, 2, DISP32 ); + write32( MEMADDR(to, 4) ); } //////////////////////////////////// @@ -1986,410 +1952,406 @@ emitterT void eCALL32M( u32 to ) //////////////////////////////////// // cmp imm8 to [r32] (byte ptr) -emitterT void eCMP8I8toRm( x86IntRegType to, s8 from, s8 off=0 ) +emitterT void CMP8I8toRm( x86IntRegType to, s8 from, s8 off=0 ) { RexB(0,to); - write8( 0x80 ); - ModRM( (off != 0), 7, to ); - if( off != 0 ) write8(off); - write8(from); + write8( 0x80 ); + ModRM( (off != 0), 7, to ); + if( off != 0 ) write8(off); + write8(from); } // cmp imm8 to [r32] -emitterT void eCMP32I8toRm( x86IntRegType to, u8 from, s8 off=0 ) +emitterT void CMP32I8toRm( x86IntRegType to, u8 from, s8 off=0 ) { RexB(0,to); - write8( 0x83 ); - ModRM( (off!=0), 7, to ); - if( off != 0 ) write8(off); - write8(from); + write8( 0x83 ); + ModRM( (off!=0), 7, to ); + if( off != 0 ) write8(off); + write8(from); } // cmp imm32 to [r32] -emitterT void eCMP32ItoRm( x86IntRegType to, u32 from, s8 off=0 ) +emitterT void CMP32ItoRm( x86IntRegType to, u32 from, s8 off=0 ) { // fixme : This should use the imm8 form if 'from' is between 127 and -128. RexB(0,to); - write8( 0x81 ); - ModRM( (off != 0), 7, to ); - if( off != 0 ) write8(off); - write32(from); + write8( 0x81 ); + ModRM( (off != 0), 7, to ); + if( off != 0 ) write8(off); + write32(from); } // cmp imm8 to [mem] (dword ptr) -emitterT void eCMP32I8toM( uptr to, u8 from ) +emitterT void CMP32I8toM( uptr to, u8 from ) { - write8( 0x83 ); - ModRM( 0, 7, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); + write8( 0x83 ); + ModRM( 0, 7, DISP32 ); + write32( MEMADDR(to, 5) ); + write8( from ); } /* cmp imm16 to r16 */ -emitterT void eCMP16ItoR( x86IntRegType to, u16 from ) +emitterT void CMP16ItoR( x86IntRegType to, u16 from ) { - write8( 0x66 ); + write8( 0x66 ); RexB(0,to); if ( to == EAX ) { - write8( 0x3D ); + write8( 0x3D ); } else { - write8( 0x81 ); - ModRM( 3, 7, to ); + write8( 0x81 ); + ModRM( 3, 7, to ); } - write16( from ); + write16( from ); } /* cmp imm16 to m16 */ -emitterT void eCMP16ItoM( uptr to, u16 from ) +emitterT void CMP16ItoM( uptr to, u16 from ) { - write8( 0x66 ); - write8( 0x81 ); - ModRM( 0, 7, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); + write8( 0x66 ); + write8( 0x81 ); + ModRM( 0, 7, DISP32 ); + write32( MEMADDR(to, 6) ); + write16( from ); } /* cmp r16 to r16 */ -emitterT void eCMP16RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMP16RtoR( x86IntRegType to, x86IntRegType from ) { - write8( 0x66 ); + write8( 0x66 ); RexRB(0,from,to); - write8( 0x39 ); - ModRM( 3, from, to ); + write8( 0x39 ); + ModRM( 3, from, to ); } /* cmp m16 to r16 */ -emitterT void eCMP16MtoR( x86IntRegType to, uptr from ) +emitterT void CMP16MtoR( x86IntRegType to, uptr from ) { - write8( 0x66 ); + write8( 0x66 ); RexR(0,to); - write8( 0x3B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x3B ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } // cmp imm8 to r8 -emitterT void eCMP8ItoR( x86IntRegType to, u8 from ) +emitterT void CMP8ItoR( x86IntRegType to, u8 from ) { RexB(0,to); if ( to == EAX ) { - write8( 0x3C ); + write8( 0x3C ); } else { - write8( 0x80 ); - ModRM( 3, 7, to ); + write8( 0x80 ); + ModRM( 3, 7, to ); } - write8( from ); + write8( from ); } // cmp m8 to r8 -emitterT void eCMP8MtoR( x86IntRegType to, uptr from ) +emitterT void CMP8MtoR( x86IntRegType to, uptr from ) { RexR(0,to); - write8( 0x3A ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x3A ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* test imm32 to r32 */ -emitterT void eTEST32ItoR( x86IntRegType to, u32 from ) +emitterT void TEST32ItoR( x86IntRegType to, u32 from ) { RexB(0,to); if ( to == EAX ) { - write8( 0xA9 ); + write8( 0xA9 ); } else { - write8( 0xF7 ); - ModRM( 3, 0, to ); + write8( 0xF7 ); + ModRM( 3, 0, to ); } - write32( from ); + write32( from ); } -emitterT void eTEST32ItoM( uptr to, u32 from ) +emitterT void TEST32ItoM( uptr to, u32 from ) { - write8( 0xF7 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); + write8( 0xF7 ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 8) ); + write32( from ); } /* test r32 to r32 */ -emitterT void eTEST32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void TEST32RtoR( x86IntRegType to, x86IntRegType from ) { RexRB(0,from,to); - write8( 0x85 ); - ModRM( 3, from, to ); + write8( 0x85 ); + ModRM( 3, from, to ); } // test imm32 to [r32] -emitterT void eTEST32ItoRm( x86IntRegType to, u32 from ) +emitterT void TEST32ItoRm( x86IntRegType to, u32 from ) { RexB(0,to); - write8( 0xF7 ); - ModRM( 0, 0, to ); - write32(from); + write8( 0xF7 ); + ModRM( 0, 0, to ); + write32(from); } // test imm16 to r16 -emitterT void eTEST16ItoR( x86IntRegType to, u16 from ) +emitterT void TEST16ItoR( x86IntRegType to, u16 from ) { - write8(0x66); + write8(0x66); RexB(0,to); if ( to == EAX ) { - write8( 0xA9 ); + write8( 0xA9 ); } else { - write8( 0xF7 ); - ModRM( 3, 0, to ); + write8( 0xF7 ); + ModRM( 3, 0, to ); } - write16( from ); + write16( from ); } // test r16 to r16 -emitterT void eTEST16RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void TEST16RtoR( x86IntRegType to, x86IntRegType from ) { - write8(0x66); + write8(0x66); RexRB(0,from,to); - write8( 0x85 ); - ModRM( 3, from, to ); + write8( 0x85 ); + ModRM( 3, from, to ); } // test r8 to r8 -emitterT void eTEST8RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void TEST8RtoR( x86IntRegType to, x86IntRegType from ) { RexRB(0, from, to); - write8( 0x84 ); - ModRM( 3, from, to ); + write8( 0x84 ); + ModRM( 3, from, to ); } // test imm8 to r8 -emitterT void eTEST8ItoR( x86IntRegType to, u8 from ) +emitterT void TEST8ItoR( x86IntRegType to, u8 from ) { RexB(0,to); if ( to == EAX ) { - write8( 0xA8 ); + write8( 0xA8 ); } else { - write8( 0xF6 ); - ModRM( 3, 0, to ); + write8( 0xF6 ); + ModRM( 3, 0, to ); } - write8( from ); + write8( from ); } // test imm8 to r8 -emitterT void eTEST8ItoM( uptr to, u8 from ) +emitterT void TEST8ItoM( uptr to, u8 from ) { - write8( 0xF6 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); + write8( 0xF6 ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 5) ); + write8( from ); } /* sets r8 */ -emitterT void eSETS8R( x86IntRegType to ) +emitterT void SETS8R( x86IntRegType to ) { - SET8R( 0x98, to ); + SET8R( 0x98, to ); } /* setl r8 */ -emitterT void eSETL8R( x86IntRegType to ) +emitterT void SETL8R( x86IntRegType to ) { - SET8R( 0x9C, to ); + SET8R( 0x9C, to ); } // setge r8 -emitterT void eSETGE8R( x86IntRegType to ) { SET8R(0x9d, to); } +emitterT void SETGE8R( x86IntRegType to ) { SET8R(0x9d, to); } // setg r8 -emitterT void eSETG8R( x86IntRegType to ) { SET8R(0x9f, to); } +emitterT void SETG8R( x86IntRegType to ) { SET8R(0x9f, to); } // seta r8 -emitterT void eSETA8R( x86IntRegType to ) { SET8R(0x97, to); } +emitterT void SETA8R( x86IntRegType to ) { SET8R(0x97, to); } // setae r8 -emitterT void eSETAE8R( x86IntRegType to ) { SET8R(0x99, to); } +emitterT void SETAE8R( x86IntRegType to ) { SET8R(0x99, to); } /* setb r8 */ -emitterT void eSETB8R( x86IntRegType to ) { SET8R( 0x92, to ); } +emitterT void SETB8R( x86IntRegType to ) { SET8R( 0x92, to ); } /* setb r8 */ -emitterT void eSETNZ8R( x86IntRegType to ) { SET8R( 0x95, to ); } +emitterT void SETNZ8R( x86IntRegType to ) { SET8R( 0x95, to ); } // setz r8 -emitterT void eSETZ8R( x86IntRegType to ) { SET8R(0x94, to); } +emitterT void SETZ8R( x86IntRegType to ) { SET8R(0x94, to); } // sete r8 -emitterT void eSETE8R( x86IntRegType to ) { SET8R(0x94, to); } +emitterT void SETE8R( x86IntRegType to ) { SET8R(0x94, to); } /* push imm32 */ -emitterT void ePUSH32I( u32 from ) +emitterT void PUSH32I( u32 from ) {; -write8( 0x68 ); -write32( from ); +write8( 0x68 ); +write32( from ); } /* push r32 */ -emitterT void ePUSH32R( x86IntRegType from ) { write8( 0x50 | from ); } +emitterT void PUSH32R( x86IntRegType from ) { write8( 0x50 | from ); } /* push m32 */ -emitterT void ePUSH32M( u32 from ) +emitterT void PUSH32M( u32 from ) { - write8( 0xFF ); - ModRM( 0, 6, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xFF ); + ModRM( 0, 6, DISP32 ); + write32( MEMADDR(from, 4) ); } /* pop r32 */ -emitterT void ePOP32R( x86IntRegType from ) { write8( 0x58 | from ); } +emitterT void POP32R( x86IntRegType from ) { write8( 0x58 | from ); } /* pushad */ -emitterT void ePUSHA32( void ) { write8( 0x60 ); } +emitterT void PUSHA32( void ) { write8( 0x60 ); } /* popad */ -emitterT void ePOPA32( void ) { write8( 0x61 ); } - -emitterT void ePUSHR(x86IntRegType from) { ePUSH32R(from); } -emitterT void ePOPR(x86IntRegType from) { ePOP32R(from); } - +emitterT void POPA32( void ) { write8( 0x61 ); } /* pushfd */ -emitterT void ePUSHFD( void ) { write8( 0x9C ); } +emitterT void PUSHFD( void ) { write8( 0x9C ); } /* popfd */ -emitterT void ePOPFD( void ) { write8( 0x9D ); } +emitterT void POPFD( void ) { write8( 0x9D ); } -emitterT void eRET( void ) { /*write8( 0xf3 ); <-- K8 opt?*/ write8( 0xC3 ); } +emitterT void RET( void ) { /*write8( 0xf3 ); <-- K8 opt?*/ write8( 0xC3 ); } -emitterT void eCBW( void ) { write16( 0x9866 ); } -emitterT void eCWD( void ) { write8( 0x98 ); } -emitterT void eCDQ( void ) { write8( 0x99 ); } -emitterT void eCWDE() { write8(0x98); } +emitterT void CBW( void ) { write16( 0x9866 ); } +emitterT void CWD( void ) { write8( 0x98 ); } +emitterT void CDQ( void ) { write8( 0x99 ); } +emitterT void CWDE() { write8(0x98); } -emitterT void eLAHF() { write8(0x9f); } -emitterT void eSAHF() { write8(0x9e); } +emitterT void LAHF() { write8(0x9f); } +emitterT void SAHF() { write8(0x9e); } -emitterT void eBT32ItoR( x86IntRegType to, u8 from ) +emitterT void BT32ItoR( x86IntRegType to, u8 from ) { - write16( 0xBA0F ); - ModRM(3, 4, to); - write8( from ); + write16( 0xBA0F ); + ModRM(3, 4, to); + write8( from ); } -emitterT void eBTR32ItoR( x86IntRegType to, u8 from ) +emitterT void BTR32ItoR( x86IntRegType to, u8 from ) { - write16( 0xBA0F ); - ModRM(3, 6, to); - write8( from ); + write16( 0xBA0F ); + ModRM(3, 6, to); + write8( from ); } -emitterT void eBSRRtoR(x86IntRegType to, x86IntRegType from) +emitterT void BSRRtoR(x86IntRegType to, x86IntRegType from) { - write16( 0xBD0F ); - ModRM( 3, from, to ); + write16( 0xBD0F ); + ModRM( 3, from, to ); } -emitterT void eBSWAP32R( x86IntRegType to ) +emitterT void BSWAP32R( x86IntRegType to ) { - write8( 0x0F ); - write8( 0xC8 + to ); + write8( 0x0F ); + write8( 0xC8 + to ); } -// to = from + offset -emitterT void eLEA16RtoR(x86IntRegType to, x86IntRegType from, u16 offset) -{ - write8(0x66); - eLEA32RtoR(to, from, offset); -} - -emitterT void eLEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset) +emitterT void LEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset) { RexRB(0,to,from); - write8(0x8d); + write8(0x8d); if( (from&7) == ESP ) { if( offset == 0 ) { - ModRM(1, to, from); - write8(0x24); + ModRM(1, to, from); + write8(0x24); } else if( offset <= 127 && offset >= -128 ) { - ModRM(1, to, from); - write8(0x24); - write8(offset); + ModRM(1, to, from); + write8(0x24); + write8(offset); } else { - ModRM(2, to, from); - write8(0x24); - write32(offset); + ModRM(2, to, from); + write8(0x24); + write32(offset); } } else { if( offset == 0 && from != EBP && from!=ESP ) { - ModRM(0, to, from); + ModRM(0, to, from); } else if( offset <= 127 && offset >= -128 ) { - ModRM(1, to, from); - write8(offset); + ModRM(1, to, from); + write8(offset); } else { - ModRM(2, to, from); - write32(offset); + ModRM(2, to, from); + write32(offset); } } } +// to = from + offset +emitterT void LEA16RtoR(x86IntRegType to, x86IntRegType from, u16 offset) +{ + write8(0x66); + LEA32RtoR(to, from, offset); +} + // to = from0 + from1 -emitterT void eLEA16RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1) +emitterT void LEA16RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1) { - write8(0x66); - eLEA32RRtoR(to, from0, from1); + write8(0x66); + LEA32RRtoR(to, from0, from1); } -emitterT void eLEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1) +emitterT void LEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1) { RexRXB(0, to, from0, from1); - write8(0x8d); + write8(0x8d); if( (from1&7) == EBP ) { - ModRM(1, to, 4); - ModRM(0, from0, from1); - write8(0); + ModRM(1, to, 4); + ModRM(0, from0, from1); + write8(0); } else { - ModRM(0, to, 4); - ModRM(0, from0, from1); + ModRM(0, to, 4); + ModRM(0, from0, from1); } } // to = from << scale (max is 3) -emitterT void eLEA16RStoR(x86IntRegType to, x86IntRegType from, u32 scale) +emitterT void LEA16RStoR(x86IntRegType to, x86IntRegType from, u32 scale) { - write8(0x66); - eLEA32RStoR(to, from, scale); + write8(0x66); + LEA32RStoR(to, from, scale); } // Don't inline recursive functions -emitterT void eLEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale) +emitterT void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale) { if( to == from ) { - eSHL32ItoR(to, scale); + SHL32ItoR(to, scale); return; } if( from != ESP ) { RexRXB(0,to,from,0); - write8(0x8d); - ModRM(0, to, 4); - ModRM(scale, from, 5); - write32(0); + write8(0x8d); + ModRM(0, to, 4); + ModRM(scale, from, 5); + write32(0); } else { assert( to != ESP ); - eMOV32RtoR(to, from); - eLEA32RStoR(to, to, scale); + MOV32RtoR(to, from); + LEA32RStoR(to, to, scale); } } diff --git a/pcsx2/x86/ix86/ix86_3dnow.inl b/pcsx2/x86/ix86/ix86_3dnow.inl index 5fdcce2347..e7a667f409 100644 --- a/pcsx2/x86/ix86/ix86_3dnow.inl +++ b/pcsx2/x86/ix86/ix86_3dnow.inl @@ -23,179 +23,179 @@ //------------------------------------------------------------------ /* femms */ -emitterT void eFEMMS( void ) +emitterT void FEMMS( void ) { - write16( 0x0E0F ); + write16( 0x0E0F ); } -emitterT void ePFCMPEQMtoR( x86IntRegType to, uptr from ) +emitterT void PFCMPEQMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0xB0 ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0xB0 ); } -emitterT void ePFCMPGTMtoR( x86IntRegType to, uptr from ) +emitterT void PFCMPGTMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0xA0 ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0xA0 ); } -emitterT void ePFCMPGEMtoR( x86IntRegType to, uptr from ) +emitterT void PFCMPGEMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x90 ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x90 ); } -emitterT void ePFADDMtoR( x86IntRegType to, uptr from ) +emitterT void PFADDMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x9E ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x9E ); } -emitterT void ePFADDRtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFADDRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x9E ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x9E ); } -emitterT void ePFSUBMtoR( x86IntRegType to, uptr from ) +emitterT void PFSUBMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x9A ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x9A ); } -emitterT void ePFSUBRtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFSUBRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x9A ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x9A ); } -emitterT void ePFMULMtoR( x86IntRegType to, uptr from ) +emitterT void PFMULMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0xB4 ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0xB4 ); } -emitterT void ePFMULRtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFMULRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0xB4 ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xB4 ); } -emitterT void ePFRCPMtoR( x86IntRegType to, uptr from ) +emitterT void PFRCPMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x96 ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x96 ); } -emitterT void ePFRCPRtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFRCPRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x96 ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x96 ); } -emitterT void ePFRCPIT1RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFRCPIT1RtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0xA6 ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xA6 ); } -emitterT void ePFRCPIT2RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFRCPIT2RtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0xB6 ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xB6 ); } -emitterT void ePFRSQRTRtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFRSQRTRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x97 ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x97 ); } -emitterT void ePFRSQIT1RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFRSQIT1RtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0xA7 ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xA7 ); } -emitterT void ePF2IDMtoR( x86IntRegType to, uptr from ) +emitterT void PF2IDMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x1D ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x1D ); } -emitterT void ePF2IDRtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PF2IDRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x1D ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x1D ); } -emitterT void ePI2FDMtoR( x86IntRegType to, uptr from ) +emitterT void PI2FDMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x0D ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x0D ); } -emitterT void ePI2FDRtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PI2FDRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x0D ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x0D ); } -emitterT void ePFMAXMtoR( x86IntRegType to, uptr from ) +emitterT void PFMAXMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0xA4 ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0xA4 ); } -emitterT void ePFMAXRtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFMAXRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0xA4 ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xA4 ); } -emitterT void ePFMINMtoR( x86IntRegType to, uptr from ) +emitterT void PFMINMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x94 ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x94 ); } -emitterT void ePFMINRtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFMINRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x94 ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x94 ); } diff --git a/pcsx2/x86/ix86/ix86_fpu.inl b/pcsx2/x86/ix86/ix86_fpu.inl index ae20b6c4ec..7ed607f30c 100644 --- a/pcsx2/x86/ix86/ix86_fpu.inl +++ b/pcsx2/x86/ix86/ix86_fpu.inl @@ -24,253 +24,253 @@ //------------------------------------------------------------------ /* fild m32 to fpu reg stack */ -emitterT void eFILD32( u32 from ) +emitterT void FILD32( u32 from ) { - write8( 0xDB ); - ModRM( 0, 0x0, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xDB ); + ModRM( 0, 0x0, DISP32 ); + write32( MEMADDR(from, 4) ); } /* fistp m32 from fpu reg stack */ -emitterT void eFISTP32( u32 from ) +emitterT void FISTP32( u32 from ) { - write8( 0xDB ); - ModRM( 0, 0x3, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xDB ); + ModRM( 0, 0x3, DISP32 ); + write32( MEMADDR(from, 4) ); } /* fld m32 to fpu reg stack */ -emitterT void eFLD32( u32 from ) +emitterT void FLD32( u32 from ) { - write8( 0xD9 ); - ModRM( 0, 0x0, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xD9 ); + ModRM( 0, 0x0, DISP32 ); + write32( MEMADDR(from, 4) ); } // fld st(i) -emitterT void eFLD(int st) { write16(0xc0d9+(st<<8)); } -emitterT void eFLD1() { write16(0xe8d9); } -emitterT void eFLDL2E() { write16(0xead9); } +emitterT void FLD(int st) { write16(0xc0d9+(st<<8)); } +emitterT void FLD1() { write16(0xe8d9); } +emitterT void FLDL2E() { write16(0xead9); } /* fst m32 from fpu reg stack */ -emitterT void eFST32( u32 to ) +emitterT void FST32( u32 to ) { - write8( 0xD9 ); - ModRM( 0, 0x2, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0xD9 ); + ModRM( 0, 0x2, DISP32 ); + write32( MEMADDR(to, 4) ); } /* fstp m32 from fpu reg stack */ -emitterT void eFSTP32( u32 to ) +emitterT void FSTP32( u32 to ) { - write8( 0xD9 ); - ModRM( 0, 0x3, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0xD9 ); + ModRM( 0, 0x3, DISP32 ); + write32( MEMADDR(to, 4) ); } // fstp st(i) -emitterT void eFSTP(int st) { write16(0xd8dd+(st<<8)); } +emitterT void FSTP(int st) { write16(0xd8dd+(st<<8)); } /* fldcw fpu control word from m16 */ -emitterT void eFLDCW( u32 from ) +emitterT void FLDCW( u32 from ) { - write8( 0xD9 ); - ModRM( 0, 0x5, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xD9 ); + ModRM( 0, 0x5, DISP32 ); + write32( MEMADDR(from, 4) ); } /* fnstcw fpu control word to m16 */ -emitterT void eFNSTCW( u32 to ) +emitterT void FNSTCW( u32 to ) { - write8( 0xD9 ); - ModRM( 0, 0x7, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0xD9 ); + ModRM( 0, 0x7, DISP32 ); + write32( MEMADDR(to, 4) ); } -emitterT void eFNSTSWtoAX() { write16(0xE0DF); } -emitterT void eFXAM() { write16(0xe5d9); } -emitterT void eFDECSTP() { write16(0xf6d9); } -emitterT void eFRNDINT() { write16(0xfcd9); } -emitterT void eFXCH(int st) { write16(0xc8d9+(st<<8)); } -emitterT void eF2XM1() { write16(0xf0d9); } -emitterT void eFSCALE() { write16(0xfdd9); } -emitterT void eFPATAN(void) { write16(0xf3d9); } -emitterT void eFSIN(void) { write16(0xfed9); } +emitterT void FNSTSWtoAX() { write16(0xE0DF); } +emitterT void FXAM() { write16(0xe5d9); } +emitterT void FDECSTP() { write16(0xf6d9); } +emitterT void FRNDINT() { write16(0xfcd9); } +emitterT void FXCH(int st) { write16(0xc8d9+(st<<8)); } +emitterT void F2XM1() { write16(0xf0d9); } +emitterT void FSCALE() { write16(0xfdd9); } +emitterT void FPATAN(void) { write16(0xf3d9); } +emitterT void FSIN(void) { write16(0xfed9); } /* fadd ST(src) to fpu reg stack ST(0) */ -emitterT void eFADD32Rto0( x86IntRegType src ) +emitterT void FADD32Rto0( x86IntRegType src ) { - write8( 0xD8 ); - write8( 0xC0 + src ); + write8( 0xD8 ); + write8( 0xC0 + src ); } /* fadd ST(0) to fpu reg stack ST(src) */ -emitterT void eFADD320toR( x86IntRegType src ) +emitterT void FADD320toR( x86IntRegType src ) { - write8( 0xDC ); - write8( 0xC0 + src ); + write8( 0xDC ); + write8( 0xC0 + src ); } /* fsub ST(src) to fpu reg stack ST(0) */ -emitterT void eFSUB32Rto0( x86IntRegType src ) +emitterT void FSUB32Rto0( x86IntRegType src ) { - write8( 0xD8 ); - write8( 0xE0 + src ); + write8( 0xD8 ); + write8( 0xE0 + src ); } /* fsub ST(0) to fpu reg stack ST(src) */ -emitterT void eFSUB320toR( x86IntRegType src ) +emitterT void FSUB320toR( x86IntRegType src ) { - write8( 0xDC ); - write8( 0xE8 + src ); + write8( 0xDC ); + write8( 0xE8 + src ); } /* fsubp -> substract ST(0) from ST(1), store in ST(1) and POP stack */ -emitterT void eFSUBP( void ) +emitterT void FSUBP( void ) { - write8( 0xDE ); - write8( 0xE9 ); + write8( 0xDE ); + write8( 0xE9 ); } /* fmul ST(src) to fpu reg stack ST(0) */ -emitterT void eFMUL32Rto0( x86IntRegType src ) +emitterT void FMUL32Rto0( x86IntRegType src ) { - write8( 0xD8 ); - write8( 0xC8 + src ); + write8( 0xD8 ); + write8( 0xC8 + src ); } /* fmul ST(0) to fpu reg stack ST(src) */ -emitterT void eFMUL320toR( x86IntRegType src ) +emitterT void FMUL320toR( x86IntRegType src ) { - write8( 0xDC ); - write8( 0xC8 + src ); + write8( 0xDC ); + write8( 0xC8 + src ); } /* fdiv ST(src) to fpu reg stack ST(0) */ -emitterT void eFDIV32Rto0( x86IntRegType src ) +emitterT void FDIV32Rto0( x86IntRegType src ) { - write8( 0xD8 ); - write8( 0xF0 + src ); + write8( 0xD8 ); + write8( 0xF0 + src ); } /* fdiv ST(0) to fpu reg stack ST(src) */ -emitterT void eFDIV320toR( x86IntRegType src ) +emitterT void FDIV320toR( x86IntRegType src ) { - write8( 0xDC ); - write8( 0xF8 + src ); + write8( 0xDC ); + write8( 0xF8 + src ); } -emitterT void eFDIV320toRP( x86IntRegType src ) +emitterT void FDIV320toRP( x86IntRegType src ) { - write8( 0xDE ); - write8( 0xF8 + src ); + write8( 0xDE ); + write8( 0xF8 + src ); } /* fadd m32 to fpu reg stack */ -emitterT void eFADD32( u32 from ) +emitterT void FADD32( u32 from ) { - write8( 0xD8 ); - ModRM( 0, 0x0, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xD8 ); + ModRM( 0, 0x0, DISP32 ); + write32( MEMADDR(from, 4) ); } /* fsub m32 to fpu reg stack */ -emitterT void eFSUB32( u32 from ) +emitterT void FSUB32( u32 from ) { - write8( 0xD8 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xD8 ); + ModRM( 0, 0x4, DISP32 ); + write32( MEMADDR(from, 4) ); } /* fmul m32 to fpu reg stack */ -emitterT void eFMUL32( u32 from ) +emitterT void FMUL32( u32 from ) { - write8( 0xD8 ); - ModRM( 0, 0x1, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xD8 ); + ModRM( 0, 0x1, DISP32 ); + write32( MEMADDR(from, 4) ); } /* fdiv m32 to fpu reg stack */ -emitterT void eFDIV32( u32 from ) +emitterT void FDIV32( u32 from ) { - write8( 0xD8 ); - ModRM( 0, 0x6, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xD8 ); + ModRM( 0, 0x6, DISP32 ); + write32( MEMADDR(from, 4) ); } /* fabs fpu reg stack */ -emitterT void eFABS( void ) +emitterT void FABS( void ) { - write16( 0xE1D9 ); + write16( 0xE1D9 ); } /* fsqrt fpu reg stack */ -emitterT void eFSQRT( void ) +emitterT void FSQRT( void ) { - write16( 0xFAD9 ); + write16( 0xFAD9 ); } /* fchs fpu reg stack */ -emitterT void eFCHS( void ) +emitterT void FCHS( void ) { - write16( 0xE0D9 ); + write16( 0xE0D9 ); } /* fcomi st, st(i) */ -emitterT void eFCOMI( x86IntRegType src ) +emitterT void FCOMI( x86IntRegType src ) { - write8( 0xDB ); - write8( 0xF0 + src ); + write8( 0xDB ); + write8( 0xF0 + src ); } /* fcomip st, st(i) */ -emitterT void eFCOMIP( x86IntRegType src ) +emitterT void FCOMIP( x86IntRegType src ) { - write8( 0xDF ); - write8( 0xF0 + src ); + write8( 0xDF ); + write8( 0xF0 + src ); } /* fucomi st, st(i) */ -emitterT void eFUCOMI( x86IntRegType src ) +emitterT void FUCOMI( x86IntRegType src ) { - write8( 0xDB ); - write8( 0xE8 + src ); + write8( 0xDB ); + write8( 0xE8 + src ); } /* fucomip st, st(i) */ -emitterT void eFUCOMIP( x86IntRegType src ) +emitterT void FUCOMIP( x86IntRegType src ) { - write8( 0xDF ); - write8( 0xE8 + src ); + write8( 0xDF ); + write8( 0xE8 + src ); } /* fcom m32 to fpu reg stack */ -emitterT void eFCOM32( u32 from ) +emitterT void FCOM32( u32 from ) { - write8( 0xD8 ); - ModRM( 0, 0x2, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xD8 ); + ModRM( 0, 0x2, DISP32 ); + write32( MEMADDR(from, 4) ); } /* fcomp m32 to fpu reg stack */ -emitterT void eFCOMP32( u32 from ) +emitterT void FCOMP32( u32 from ) { - write8( 0xD8 ); - ModRM( 0, 0x3, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xD8 ); + ModRM( 0, 0x3, DISP32 ); + write32( MEMADDR(from, 4) ); } #define FCMOV32( low, high ) \ { \ - write8( low ); \ - write8( high + from ); \ + write8( low ); \ + write8( high + from ); \ } -emitterT void eFCMOVB32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC0 ); } -emitterT void eFCMOVE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC8 ); } -emitterT void eFCMOVBE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD0 ); } -emitterT void eFCMOVU32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD8 ); } -emitterT void eFCMOVNB32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC0 ); } -emitterT void eFCMOVNE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC8 ); } -emitterT void eFCMOVNBE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD0 ); } -emitterT void eFCMOVNU32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD8 ); } +emitterT void FCMOVB32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC0 ); } +emitterT void FCMOVE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC8 ); } +emitterT void FCMOVBE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD0 ); } +emitterT void FCMOVU32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD8 ); } +emitterT void FCMOVNB32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC0 ); } +emitterT void FCMOVNE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC8 ); } +emitterT void FCMOVNBE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD0 ); } +emitterT void FCMOVNU32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD8 ); } diff --git a/pcsx2/x86/ix86/ix86_group1.inl b/pcsx2/x86/ix86/ix86_group1.inl index ccc98726d8..99e1d6d4ff 100644 --- a/pcsx2/x86/ix86/ix86_group1.inl +++ b/pcsx2/x86/ix86/ix86_group1.inl @@ -66,7 +66,7 @@ emitterT void EmitSibMagic( int regfield, const ModSib& info ) int displacement_size = (info.Displacement == 0) ? 0 : ( ( info.IsByteSizeDisp() ) ? 1 : 2 ); - if( !NeedsSibMagic( info ) ) + if( !NeedsSibMagic( info ) ) { // Use ModRm-only encoding, with the rm field holding an index/base register, if // one has been specified. If neither register is specified then use Disp32 form, @@ -76,26 +76,26 @@ emitterT void EmitSibMagic( int regfield, const ModSib& info ) x86Register basereg = info.GetEitherReg(); if( basereg.IsEmpty() ) - ModRM( 0, regfield, ModRm_UseDisp32 ); + ModRM( 0, regfield, ModRm_UseDisp32 ); else { if( basereg == ebp && displacement_size == 0 ) displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! - ModRM( displacement_size, regfield, basereg.Id ); + ModRM( displacement_size, regfield, basereg.Id ); } } else { - ModRM( displacement_size, regfield, ModRm_UseSib ); - SibSB( info.Index.Id, info.Scale, info.Base.Id ); + ModRM( displacement_size, regfield, ModRm_UseSib ); + SibSB( info.Index.Id, info.Scale, info.Base.Id ); } switch( displacement_size ) { case 0: break; - case 1: write8( info.Displacement ); break; - case 2: write32( info.Displacement ); break; + case 1: write8( info.Displacement ); break; + case 2: write32( info.Displacement ); break; jNO_DEFAULT } } @@ -108,7 +108,7 @@ emitterT void EmitSibMagic( int regfield, const ModSib& info ) // emitterT void EmitSibMagic( x86Register regfield, const ModSib& info ) { - EmitSibMagic( regfield.Id, info ); + EmitSibMagic( regfield.Id, info ); } enum Group1InstructionType @@ -126,82 +126,93 @@ enum Group1InstructionType emitterT void Group1_32( Group1InstructionType inst, x86Register to, x86Register from ) { - write8( 0x01 | (inst<<3) ); - ModRM( 3, from.Id, to.Id ); + write8( 0x01 | (inst<<3) ); + ModRM( 3, from.Id, to.Id ); } emitterT void Group1_32( Group1InstructionType inst, x86Register to, u32 imm ) { if( is_s8( imm ) ) { - write8( 0x83 ); - ModRM( 3, inst, to.Id ); - write8( (s8)imm ); + write8( 0x83 ); + ModRM( 3, inst, to.Id ); + write8( (s8)imm ); } else { if( to == eax ) - write8( 0x05 | (inst<<3) ); + write8( 0x05 | (inst<<3) ); else { - write8( 0x81 ); - ModRM( 3, inst, to.Id ); + write8( 0x81 ); + ModRM( 3, inst, to.Id ); } - write32( imm ); + write32( imm ); } } emitterT void Group1_32( Group1InstructionType inst, const ModSib& sibdest, u32 imm ) { - write8( is_s8( imm ) ? 0x83 : 0x81 ); + write8( is_s8( imm ) ? 0x83 : 0x81 ); - EmitSibMagic( inst, sibdest ); + EmitSibMagic( inst, sibdest ); if( is_s8( imm ) ) - write8( (s8)imm ); + write8( (s8)imm ); else - write32( imm ); + write32( imm ); } emitterT void Group1_32( Group1InstructionType inst, const ModSib& sibdest, x86Register from ) { - write8( 0x01 | (inst<<3) ); - EmitSibMagic( from, sibdest ); + write8( 0x01 | (inst<<3) ); + EmitSibMagic( from, sibdest ); } /* add m32 to r32 */ emitterT void Group1_32( Group1InstructionType inst, x86Register to, const ModSib& sibsrc ) { - write8( 0x03 | (inst<<3) ); - EmitSibMagic( to, sibsrc ); + write8( 0x03 | (inst<<3) ); + EmitSibMagic( to, sibsrc ); } emitterT void Group1_8( Group1InstructionType inst, x86Register to, s8 imm ) { if( to == eax ) { - write8( 0x04 | (inst<<3) ); - write8( imm ); + write8( 0x04 | (inst<<3) ); + write8( imm ); } else { - write8( 0x80 ); - ModRM( 3, inst, to.Id ); - write8( imm ); + write8( 0x80 ); + ModRM( 3, inst, to.Id ); + write8( imm ); } } ////////////////////////////////////////////////////////////////////////////////////////// // #define DEFINE_GROUP1_OPCODE( lwr, cod ) \ - emitterT void lwr##32( x86Register to, x86Register from ) { Group1_32( G1Type_##cod, to, from ); } \ - emitterT void lwr##32( x86Register to, u32 imm ) { Group1_32( G1Type_##cod, to, imm ); } \ - emitterT void lwr##32( x86Register to, void* from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ - emitterT void lwr##32( void* to, x86Register from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ - emitterT void lwr##32( void* to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } \ - emitterT void lwr##32( x86Register to, const x86ModRm& from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ - emitterT void lwr##32( const x86ModRm& to, x86Register from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ - emitterT void lwr##32( const x86ModRm& to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } + emitterT void lwr##32( x86Register to, x86Register from ) { Group1_32( G1Type_##cod, to, from ); } \ + emitterT void lwr##32( x86Register to, u32 imm ) { Group1_32( G1Type_##cod, to, imm ); } \ + emitterT void lwr##32( x86Register to, void* from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ + emitterT void lwr##32( void* to, x86Register from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ + emitterT void lwr##32( void* to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } \ + emitterT void lwr##32( x86Register to, const x86ModRm& from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ + emitterT void lwr##32( const x86ModRm& to, x86Register from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ + emitterT void lwr##32( const x86ModRm& to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } + +/* + emitterT void lwr##16( x86Register16 to, x86Register16 from ) { Group1_32( G1Type_##cod, to, from ); } \ + emitterT void lwr##16( x86Register16 to, u16 imm ) { Group1_32( G1Type_##cod, to, imm ); } \ + emitterT void lwr##16( x86Register16 to, void* from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ + emitterT void lwr##16( void* to, x86Register16 from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ + emitterT void lwr##16( void* to, u16 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } \ + emitterT void lwr##16( x86Register16 to, const x86ModRm& from ){ Group1_32( G1Type_##cod, to, ptr[from] ); } \ + emitterT void lwr##16( const x86ModRm& to, x86Register16 from ){ Group1_32( G1Type_##cod, ptr[to], from ); } \ + emitterT void lwr##16( const x86ModRm& to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } +*/ DEFINE_GROUP1_OPCODE( add, ADD ); DEFINE_GROUP1_OPCODE( cmp, CMP ); @@ -229,14 +240,14 @@ static __forceinline x86Emitter::x86ModRm _mrmhlp( x86IntRegType src ) ////////////////////////////////////////////////////////////////////////////////////////// // #define DEFINE_GROUP1_OPCODE_LEGACY( lwr, cod ) \ - emitterT void e##cod##32RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::lwr##32( _reghlp(to), _reghlp(from) ); } \ - emitterT void e##cod##32ItoR( x86IntRegType to, u32 imm ) { x86Emitter::lwr##32( _reghlp(to), imm ); } \ - emitterT void e##cod##32MtoR( x86IntRegType to, uptr from ) { x86Emitter::lwr##32( _reghlp(to), (void*)from ); } \ - emitterT void e##cod##32RtoM( uptr to, x86IntRegType from ) { x86Emitter::lwr##32( (void*)to, _reghlp(from) ); } \ - emitterT void e##cod##32ItoM( uptr to, u32 imm ) { x86Emitter::lwr##32( (void*)to, imm ); } \ - emitterT void e##cod##32ItoRm( x86IntRegType to, u32 imm, int offset=0 ){ x86Emitter::lwr##32( _mrmhlp(to) + offset, imm ); } \ - emitterT void e##cod##32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) { x86Emitter::lwr##32( _reghlp(to), _mrmhlp(from) + offset ); } \ - emitterT void e##cod##32RtoRm( x86IntRegType to, x86IntRegType from, int offset=0 ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, _reghlp(from) ); } + emitterT void cod##32RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::lwr##32( _reghlp(to), _reghlp(from) ); } \ + emitterT void cod##32ItoR( x86IntRegType to, u32 imm ) { x86Emitter::lwr##32( _reghlp(to), imm ); } \ + emitterT void cod##32MtoR( x86IntRegType to, uptr from ) { x86Emitter::lwr##32( _reghlp(to), (void*)from ); } \ + emitterT void cod##32RtoM( uptr to, x86IntRegType from ) { x86Emitter::lwr##32( (void*)to, _reghlp(from) ); } \ + emitterT void cod##32ItoM( uptr to, u32 imm ) { x86Emitter::lwr##32( (void*)to, imm ); } \ + emitterT void cod##32ItoRm( x86IntRegType to, u32 imm, int offset=0 ){ x86Emitter::lwr##32( _mrmhlp(to) + offset, imm ); } \ + emitterT void cod##32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) { x86Emitter::lwr##32( _reghlp(to), _mrmhlp(from) + offset ); } \ + emitterT void cod##32RtoRm( x86IntRegType to, x86IntRegType from, int offset=0 ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, _reghlp(from) ); } DEFINE_GROUP1_OPCODE_LEGACY( add, ADD ); DEFINE_GROUP1_OPCODE_LEGACY( cmp, CMP ); @@ -247,12 +258,12 @@ DEFINE_GROUP1_OPCODE_LEGACY( and, AND ); DEFINE_GROUP1_OPCODE_LEGACY( sub, SUB ); DEFINE_GROUP1_OPCODE_LEGACY( xor, XOR ); -emitterT void eAND32I8toR( x86IntRegType to, s8 from ) +emitterT void AND32I8toR( x86IntRegType to, s8 from ) { - x86Emitter::and32( _reghlp(to), from ); + x86Emitter::and32( _reghlp(to), from ); } -emitterT void eAND32I8toM( uptr to, s8 from ) +emitterT void AND32I8toM( uptr to, s8 from ) { - x86Emitter::and32( (void*)to, from ); + x86Emitter::and32( (void*)to, from ); } diff --git a/pcsx2/x86/ix86/ix86_mmx.inl b/pcsx2/x86/ix86/ix86_mmx.inl index bc7d183319..f63686e700 100644 --- a/pcsx2/x86/ix86/ix86_mmx.inl +++ b/pcsx2/x86/ix86/ix86_mmx.inl @@ -25,623 +25,623 @@ //------------------------------------------------------------------ /* movq m64 to r64 */ -emitterT void eMOVQMtoR( x86MMXRegType to, uptr from ) +emitterT void MOVQMtoR( x86MMXRegType to, uptr from ) { - write16( 0x6F0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0x6F0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* movq r64 to m64 */ -emitterT void eMOVQRtoM( uptr to, x86MMXRegType from ) +emitterT void MOVQRtoM( uptr to, x86MMXRegType from ) { - write16( 0x7F0F ); - ModRM( 0, from, DISP32 ); - write32(MEMADDR(to, 4)); + write16( 0x7F0F ); + ModRM( 0, from, DISP32 ); + write32(MEMADDR(to, 4)); } /* pand r64 to r64 */ -emitterT void ePANDRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PANDRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xDB0F ); - ModRM( 3, to, from ); + write16( 0xDB0F ); + ModRM( 3, to, from ); } -emitterT void ePANDNRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xDF0F ); - ModRM( 3, to, from ); + write16( 0xDF0F ); + ModRM( 3, to, from ); } /* por r64 to r64 */ -emitterT void ePORRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PORRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xEB0F ); - ModRM( 3, to, from ); + write16( 0xEB0F ); + ModRM( 3, to, from ); } /* pxor r64 to r64 */ -emitterT void ePXORRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PXORRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xEF0F ); - ModRM( 3, to, from ); + write16( 0xEF0F ); + ModRM( 3, to, from ); } /* psllq r64 to r64 */ -emitterT void ePSLLQRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSLLQRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xF30F ); - ModRM( 3, to, from ); + write16( 0xF30F ); + ModRM( 3, to, from ); } /* psllq m64 to r64 */ -emitterT void ePSLLQMtoR( x86MMXRegType to, uptr from ) +emitterT void PSLLQMtoR( x86MMXRegType to, uptr from ) { - write16( 0xF30F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xF30F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* psllq imm8 to r64 */ -emitterT void ePSLLQItoR( x86MMXRegType to, u8 from ) +emitterT void PSLLQItoR( x86MMXRegType to, u8 from ) { - write16( 0x730F ); - ModRM( 3, 6, to); - write8( from ); + write16( 0x730F ); + ModRM( 3, 6, to); + write8( from ); } /* psrlq r64 to r64 */ -emitterT void ePSRLQRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSRLQRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xD30F ); - ModRM( 3, to, from ); + write16( 0xD30F ); + ModRM( 3, to, from ); } /* psrlq m64 to r64 */ -emitterT void ePSRLQMtoR( x86MMXRegType to, uptr from ) +emitterT void PSRLQMtoR( x86MMXRegType to, uptr from ) { - write16( 0xD30F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xD30F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* psrlq imm8 to r64 */ -emitterT void ePSRLQItoR( x86MMXRegType to, u8 from ) +emitterT void PSRLQItoR( x86MMXRegType to, u8 from ) { - write16( 0x730F ); - ModRM( 3, 2, to); - write8( from ); + write16( 0x730F ); + ModRM( 3, 2, to); + write8( from ); } /* paddusb r64 to r64 */ -emitterT void ePADDUSBRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PADDUSBRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xDC0F ); - ModRM( 3, to, from ); + write16( 0xDC0F ); + ModRM( 3, to, from ); } /* paddusb m64 to r64 */ -emitterT void ePADDUSBMtoR( x86MMXRegType to, uptr from ) +emitterT void PADDUSBMtoR( x86MMXRegType to, uptr from ) { - write16( 0xDC0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xDC0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* paddusw r64 to r64 */ -emitterT void ePADDUSWRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PADDUSWRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xDD0F ); - ModRM( 3, to, from ); + write16( 0xDD0F ); + ModRM( 3, to, from ); } /* paddusw m64 to r64 */ -emitterT void ePADDUSWMtoR( x86MMXRegType to, uptr from ) +emitterT void PADDUSWMtoR( x86MMXRegType to, uptr from ) { - write16( 0xDD0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xDD0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* paddb r64 to r64 */ -emitterT void ePADDBRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PADDBRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xFC0F ); - ModRM( 3, to, from ); + write16( 0xFC0F ); + ModRM( 3, to, from ); } /* paddb m64 to r64 */ -emitterT void ePADDBMtoR( x86MMXRegType to, uptr from ) +emitterT void PADDBMtoR( x86MMXRegType to, uptr from ) { - write16( 0xFC0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xFC0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* paddw r64 to r64 */ -emitterT void ePADDWRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PADDWRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xFD0F ); - ModRM( 3, to, from ); + write16( 0xFD0F ); + ModRM( 3, to, from ); } /* paddw m64 to r64 */ -emitterT void ePADDWMtoR( x86MMXRegType to, uptr from ) +emitterT void PADDWMtoR( x86MMXRegType to, uptr from ) { - write16( 0xFD0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xFD0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* paddd r64 to r64 */ -emitterT void ePADDDRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PADDDRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xFE0F ); - ModRM( 3, to, from ); + write16( 0xFE0F ); + ModRM( 3, to, from ); } /* paddd m64 to r64 */ -emitterT void ePADDDMtoR( x86MMXRegType to, uptr from ) +emitterT void PADDDMtoR( x86MMXRegType to, uptr from ) { - write16( 0xFE0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xFE0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* emms */ -emitterT void eEMMS() +emitterT void EMMS() { - write16( 0x770F ); + write16( 0x770F ); } -emitterT void ePADDSBRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PADDSBRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xEC0F ); - ModRM( 3, to, from ); + write16( 0xEC0F ); + ModRM( 3, to, from ); } -emitterT void ePADDSWRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PADDSWRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xED0F ); - ModRM( 3, to, from ); + write16( 0xED0F ); + ModRM( 3, to, from ); } // paddq m64 to r64 (sse2 only?) -emitterT void ePADDQMtoR( x86MMXRegType to, uptr from ) +emitterT void PADDQMtoR( x86MMXRegType to, uptr from ) { - write16( 0xD40F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xD40F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } // paddq r64 to r64 (sse2 only?) -emitterT void ePADDQRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PADDQRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xD40F ); - ModRM( 3, to, from ); + write16( 0xD40F ); + ModRM( 3, to, from ); } -emitterT void ePSUBSBRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSUBSBRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xE80F ); - ModRM( 3, to, from ); + write16( 0xE80F ); + ModRM( 3, to, from ); } -emitterT void ePSUBSWRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSUBSWRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xE90F ); - ModRM( 3, to, from ); + write16( 0xE90F ); + ModRM( 3, to, from ); } -emitterT void ePSUBBRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSUBBRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xF80F ); - ModRM( 3, to, from ); + write16( 0xF80F ); + ModRM( 3, to, from ); } -emitterT void ePSUBWRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSUBWRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xF90F ); - ModRM( 3, to, from ); + write16( 0xF90F ); + ModRM( 3, to, from ); } -emitterT void ePSUBDRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSUBDRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xFA0F ); - ModRM( 3, to, from ); + write16( 0xFA0F ); + ModRM( 3, to, from ); } -emitterT void ePSUBDMtoR( x86MMXRegType to, uptr from ) +emitterT void PSUBDMtoR( x86MMXRegType to, uptr from ) { - write16( 0xFA0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xFA0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void ePSUBUSBRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSUBUSBRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xD80F ); - ModRM( 3, to, from ); + write16( 0xD80F ); + ModRM( 3, to, from ); } -emitterT void ePSUBUSWRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSUBUSWRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xD90F ); - ModRM( 3, to, from ); + write16( 0xD90F ); + ModRM( 3, to, from ); } // psubq m64 to r64 (sse2 only?) -emitterT void ePSUBQMtoR( x86MMXRegType to, uptr from ) +emitterT void PSUBQMtoR( x86MMXRegType to, uptr from ) { - write16( 0xFB0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xFB0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } // psubq r64 to r64 (sse2 only?) -emitterT void ePSUBQRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSUBQRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xFB0F ); - ModRM( 3, to, from ); + write16( 0xFB0F ); + ModRM( 3, to, from ); } // pmuludq m64 to r64 (sse2 only?) -emitterT void ePMULUDQMtoR( x86MMXRegType to, uptr from ) +emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from ) { - write16( 0xF40F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xF40F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } // pmuludq r64 to r64 (sse2 only?) -emitterT void ePMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xF40F ); - ModRM( 3, to, from ); + write16( 0xF40F ); + ModRM( 3, to, from ); } -emitterT void ePCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0x740F ); - ModRM( 3, to, from ); + write16( 0x740F ); + ModRM( 3, to, from ); } -emitterT void ePCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0x750F ); - ModRM( 3, to, from ); + write16( 0x750F ); + ModRM( 3, to, from ); } -emitterT void ePCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0x760F ); - ModRM( 3, to, from ); + write16( 0x760F ); + ModRM( 3, to, from ); } -emitterT void ePCMPEQDMtoR( x86MMXRegType to, uptr from ) +emitterT void PCMPEQDMtoR( x86MMXRegType to, uptr from ) { - write16( 0x760F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0x760F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void ePCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0x640F ); - ModRM( 3, to, from ); + write16( 0x640F ); + ModRM( 3, to, from ); } -emitterT void ePCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0x650F ); - ModRM( 3, to, from ); + write16( 0x650F ); + ModRM( 3, to, from ); } -emitterT void ePCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0x660F ); - ModRM( 3, to, from ); + write16( 0x660F ); + ModRM( 3, to, from ); } -emitterT void ePCMPGTDMtoR( x86MMXRegType to, uptr from ) +emitterT void PCMPGTDMtoR( x86MMXRegType to, uptr from ) { - write16( 0x660F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0x660F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void ePSRLWItoR( x86MMXRegType to, u8 from ) +emitterT void PSRLWItoR( x86MMXRegType to, u8 from ) { - write16( 0x710F ); - ModRM( 3, 2 , to ); - write8( from ); + write16( 0x710F ); + ModRM( 3, 2 , to ); + write8( from ); } -emitterT void ePSRLDItoR( x86MMXRegType to, u8 from ) +emitterT void PSRLDItoR( x86MMXRegType to, u8 from ) { - write16( 0x720F ); - ModRM( 3, 2 , to ); - write8( from ); + write16( 0x720F ); + ModRM( 3, 2 , to ); + write8( from ); } -emitterT void ePSRLDRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSRLDRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xD20F ); - ModRM( 3, to, from ); + write16( 0xD20F ); + ModRM( 3, to, from ); } -emitterT void ePSLLWItoR( x86MMXRegType to, u8 from ) +emitterT void PSLLWItoR( x86MMXRegType to, u8 from ) { - write16( 0x710F ); - ModRM( 3, 6 , to ); - write8( from ); + write16( 0x710F ); + ModRM( 3, 6 , to ); + write8( from ); } -emitterT void ePSLLDItoR( x86MMXRegType to, u8 from ) +emitterT void PSLLDItoR( x86MMXRegType to, u8 from ) { - write16( 0x720F ); - ModRM( 3, 6 , to ); - write8( from ); + write16( 0x720F ); + ModRM( 3, 6 , to ); + write8( from ); } -emitterT void ePSLLDRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSLLDRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xF20F ); - ModRM( 3, to, from ); + write16( 0xF20F ); + ModRM( 3, to, from ); } -emitterT void ePSRAWItoR( x86MMXRegType to, u8 from ) +emitterT void PSRAWItoR( x86MMXRegType to, u8 from ) { - write16( 0x710F ); - ModRM( 3, 4 , to ); - write8( from ); + write16( 0x710F ); + ModRM( 3, 4 , to ); + write8( from ); } -emitterT void ePSRADItoR( x86MMXRegType to, u8 from ) +emitterT void PSRADItoR( x86MMXRegType to, u8 from ) { - write16( 0x720F ); - ModRM( 3, 4 , to ); - write8( from ); + write16( 0x720F ); + ModRM( 3, 4 , to ); + write8( from ); } -emitterT void ePSRADRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSRADRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xE20F ); - ModRM( 3, to, from ); + write16( 0xE20F ); + ModRM( 3, to, from ); } /* por m64 to r64 */ -emitterT void ePORMtoR( x86MMXRegType to, uptr from ) +emitterT void PORMtoR( x86MMXRegType to, uptr from ) { - write16( 0xEB0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xEB0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* pxor m64 to r64 */ -emitterT void ePXORMtoR( x86MMXRegType to, uptr from ) +emitterT void PXORMtoR( x86MMXRegType to, uptr from ) { - write16( 0xEF0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xEF0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* pand m64 to r64 */ -emitterT void ePANDMtoR( x86MMXRegType to, uptr from ) +emitterT void PANDMtoR( x86MMXRegType to, uptr from ) { - //u64 rip = (u64)x86Ptr[0] + 7; - write16( 0xDB0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + //u64 rip = (u64)x86Ptr + 7; + write16( 0xDB0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void ePANDNMtoR( x86MMXRegType to, uptr from ) +emitterT void PANDNMtoR( x86MMXRegType to, uptr from ) { - write16( 0xDF0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xDF0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void ePUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0x6A0F ); - ModRM( 3, to, from ); + write16( 0x6A0F ); + ModRM( 3, to, from ); } -emitterT void ePUNPCKHDQMtoR( x86MMXRegType to, uptr from ) +emitterT void PUNPCKHDQMtoR( x86MMXRegType to, uptr from ) { - write16( 0x6A0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0x6A0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void ePUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0x620F ); - ModRM( 3, to, from ); + write16( 0x620F ); + ModRM( 3, to, from ); } -emitterT void ePUNPCKLDQMtoR( x86MMXRegType to, uptr from ) +emitterT void PUNPCKLDQMtoR( x86MMXRegType to, uptr from ) { - write16( 0x620F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0x620F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void eMOVQ64ItoR( x86MMXRegType reg, u64 i ) +emitterT void MOVQ64ItoR( x86MMXRegType reg, u64 i ) { - eMOVQMtoR( reg, ( uptr )(x86Ptr[0]) + 2 + 7 ); - eJMP8( 8 ); - write64( i ); + MOVQMtoR( reg, ( uptr )(x86Ptr) + 2 + 7 ); + JMP8( 8 ); + write64( i ); } -emitterT void eMOVQRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0x6F0F ); - ModRM( 3, to, from ); + write16( 0x6F0F ); + ModRM( 3, to, from ); } -emitterT void eMOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset=0 ) +emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset=0 ) { - write16( 0x6F0F ); + write16( 0x6F0F ); if( offset < 128 && offset >= -128) { - ModRM( 1, to, from ); - write8(offset); + ModRM( 1, to, from ); + write8(offset); } else { - ModRM( 2, to, from ); - write32(offset); + ModRM( 2, to, from ); + write32(offset); } } -emitterT void eMOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ) +emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ) { - write16( 0x7F0F ); + write16( 0x7F0F ); if( offset < 128 && offset >= -128) { - ModRM( 1, from , to ); - write8(offset); + ModRM( 1, from , to ); + write8(offset); } else { - ModRM( 2, from, to ); - write32(offset); + ModRM( 2, from, to ); + write32(offset); } } /* movd m32 to r64 */ -emitterT void eMOVDMtoMMX( x86MMXRegType to, uptr from ) +emitterT void MOVDMtoMMX( x86MMXRegType to, uptr from ) { - write16( 0x6E0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0x6E0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* movd r64 to m32 */ -emitterT void eMOVDMMXtoM( uptr to, x86MMXRegType from ) +emitterT void MOVDMMXtoM( uptr to, x86MMXRegType from ) { - write16( 0x7E0F ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); + write16( 0x7E0F ); + ModRM( 0, from, DISP32 ); + write32( MEMADDR(to, 4) ); } -emitterT void eMOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) +emitterT void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) { - write16( 0x6E0F ); - ModRM( 3, to, from ); + write16( 0x6E0F ); + ModRM( 3, to, from ); } -emitterT void eMOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from ) +emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from ) { - write16( 0x6E0F ); - ModRM( 0, to, from ); + write16( 0x6E0F ); + ModRM( 0, to, from ); } -emitterT void eMOVD32RmOffsettoMMX( x86MMXRegType to, x86IntRegType from, u32 offset ) +emitterT void MOVD32RmOffsettoMMX( x86MMXRegType to, x86IntRegType from, u32 offset ) { - write16( 0x6E0F ); + write16( 0x6E0F ); if( offset < 128 ) { - ModRM( 1, to, from ); - write8(offset); + ModRM( 1, to, from ); + write8(offset); } else { - ModRM( 2, to, from ); - write32(offset); + ModRM( 2, to, from ); + write32(offset); } } -emitterT void eMOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) +emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) { - write16( 0x7E0F ); - ModRM( 3, from, to ); + write16( 0x7E0F ); + ModRM( 3, from, to ); } -emitterT void eMOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from ) +emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from ) { - write16( 0x7E0F ); - ModRM( 0, from, to ); + write16( 0x7E0F ); + ModRM( 0, from, to ); if( to >= 4 ) { // no idea why assert( to == ESP ); - write8(0x24); + write8(0x24); } } -emitterT void eMOVD32MMXtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset ) +emitterT void MOVD32MMXtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset ) { - write16( 0x7E0F ); + write16( 0x7E0F ); if( offset < 128 ) { - ModRM( 1, from, to ); - write8(offset); + ModRM( 1, from, to ); + write8(offset); } else { - ModRM( 2, from, to ); - write32(offset); + ModRM( 2, from, to ); + write32(offset); } } ///* movd r32 to r64 */ -//emitterT void eMOVD32MMXtoMMX( x86MMXRegType to, x86MMXRegType from ) +//emitterT void MOVD32MMXtoMMX( x86MMXRegType to, x86MMXRegType from ) //{ -// write16( 0x6E0F ); -// ModRM( 3, to, from ); +// write16( 0x6E0F ); +// ModRM( 3, to, from ); //} // ///* movq r64 to r32 */ -//emitterT void eMOVD64MMXtoMMX( x86MMXRegType to, x86MMXRegType from ) +//emitterT void MOVD64MMXtoMMX( x86MMXRegType to, x86MMXRegType from ) //{ -// write16( 0x7E0F ); -// ModRM( 3, from, to ); +// write16( 0x7E0F ); +// ModRM( 3, from, to ); //} // untested -emitterT void ePACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from) +emitterT void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from) { - write16( 0x630F ); - ModRM( 3, to, from ); + write16( 0x630F ); + ModRM( 3, to, from ); } -emitterT void ePACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from) +emitterT void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from) { - write16( 0x6B0F ); - ModRM( 3, to, from ); + write16( 0x6B0F ); + ModRM( 3, to, from ); } -emitterT void ePMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) +emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { - write16( 0xD70F ); - ModRM( 3, to, from ); + write16( 0xD70F ); + ModRM( 3, to, from ); } -emitterT void ePINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) +emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) { if (to > 7 || from > 7) Rex(1, to >> 3, 0, from >> 3); - write16( 0xc40f ); - ModRM( 3, to, from ); - write8( imm8 ); + write16( 0xc40f ); + ModRM( 3, to, from ); + write8( imm8 ); } -emitterT void ePSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) +emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) { - write16(0x700f); - ModRM( 3, to, from ); - write8(imm8); + write16(0x700f); + ModRM( 3, to, from ); + write8(imm8); } -emitterT void ePSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) +emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) { - write16( 0x700f ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); - write8(imm8); + write16( 0x700f ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); + write8(imm8); } -emitterT void eMASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) +emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) { - write16(0xf70f); - ModRM( 3, to, from ); + write16(0xf70f); + ModRM( 3, to, from ); } diff --git a/pcsx2/x86/ix86/ix86_sse.inl b/pcsx2/x86/ix86/ix86_sse.inl index a52ba6ccd7..af25c1cbaa 100644 --- a/pcsx2/x86/ix86/ix86_sse.inl +++ b/pcsx2/x86/ix86/ix86_sse.inl @@ -37,474 +37,474 @@ static const bool AlwaysUseMovaps = true; #define SSEMtoR( code, overb ) \ assert( to < XMMREGS ), \ RexR(0, to), \ - write16( code ), \ - ModRM( 0, to, DISP32 ), \ - write32( MEMADDR(from, 4 + overb) ) + write16( code ), \ + ModRM( 0, to, DISP32 ), \ + write32( MEMADDR(from, 4 + overb) ) #define SSERtoM( code, overb ) \ assert( from < XMMREGS), \ RexR(0, from), \ - write16( code ), \ - ModRM( 0, from, DISP32 ), \ - write32( MEMADDR(to, 4 + overb) ) + write16( code ), \ + ModRM( 0, from, DISP32 ), \ + write32( MEMADDR(to, 4 + overb) ) #define SSE_SS_MtoR( code, overb ) \ assert( to < XMMREGS ), \ - write8( 0xf3 ), \ + write8( 0xf3 ), \ RexR(0, to), \ - write16( code ), \ - ModRM( 0, to, DISP32 ), \ - write32( MEMADDR(from, 4 + overb) ) + write16( code ), \ + ModRM( 0, to, DISP32 ), \ + write32( MEMADDR(from, 4 + overb) ) #define SSE_SS_RtoM( code, overb ) \ assert( from < XMMREGS), \ - write8( 0xf3 ), \ + write8( 0xf3 ), \ RexR(0, from), \ - write16( code ), \ - ModRM( 0, from, DISP32 ), \ - write32( MEMADDR(to, 4 + overb) ) + write16( code ), \ + ModRM( 0, from, DISP32 ), \ + write32( MEMADDR(to, 4 + overb) ) #define SSERtoR( code ) \ assert( to < XMMREGS && from < XMMREGS), \ RexRB(0, to, from), \ - write16( code ), \ - ModRM( 3, to, from ) + write16( code ), \ + ModRM( 3, to, from ) #define SSEMtoR66( code ) \ - write8( 0x66 ), \ + write8( 0x66 ), \ SSEMtoR( code, 0 ) #define SSERtoM66( code ) \ - write8( 0x66 ), \ + write8( 0x66 ), \ SSERtoM( code, 0 ) #define SSERtoR66( code ) \ - write8( 0x66 ), \ + write8( 0x66 ), \ SSERtoR( code ) #define _SSERtoR66( code ) \ assert( to < XMMREGS && from < XMMREGS), \ - write8( 0x66 ), \ + write8( 0x66 ), \ RexRB(0, from, to), \ - write16( code ), \ - ModRM( 3, from, to ) + write16( code ), \ + ModRM( 3, from, to ) #define SSE_SS_RtoR( code ) \ assert( to < XMMREGS && from < XMMREGS), \ - write8( 0xf3 ), \ + write8( 0xf3 ), \ RexRB(0, to, from), \ - write16( code ), \ - ModRM( 3, to, from ) + write16( code ), \ + ModRM( 3, to, from ) #define SSE_SD_MtoR( code, overb ) \ assert( to < XMMREGS ) , \ - write8( 0xf2 ), \ + write8( 0xf2 ), \ RexR(0, to), \ - write16( code ), \ - ModRM( 0, to, DISP32 ), \ - write32( MEMADDR(from, 4 + overb) ) \ + write16( code ), \ + ModRM( 0, to, DISP32 ), \ + write32( MEMADDR(from, 4 + overb) ) \ #define SSE_SD_RtoM( code, overb ) \ assert( from < XMMREGS) , \ - write8( 0xf2 ), \ + write8( 0xf2 ), \ RexR(0, from), \ - write16( code ), \ - ModRM( 0, from, DISP32 ), \ - write32( MEMADDR(to, 4 + overb) ) \ + write16( code ), \ + ModRM( 0, from, DISP32 ), \ + write32( MEMADDR(to, 4 + overb) ) \ #define SSE_SD_RtoR( code ) \ assert( to < XMMREGS && from < XMMREGS) , \ - write8( 0xf2 ), \ + write8( 0xf2 ), \ RexRB(0, to, from), \ - write16( code ), \ - ModRM( 3, to, from ) + write16( code ), \ + ModRM( 3, to, from ) #define CMPPSMtoR( op ) \ SSEMtoR( 0xc20f, 1 ), \ - write8( op ) + write8( op ) #define CMPPSRtoR( op ) \ SSERtoR( 0xc20f ), \ - write8( op ) + write8( op ) #define CMPSSMtoR( op ) \ SSE_SS_MtoR( 0xc20f, 1 ), \ - write8( op ) + write8( op ) #define CMPSSRtoR( op ) \ SSE_SS_RtoR( 0xc20f ), \ - write8( op ) + write8( op ) #define CMPSDMtoR( op ) \ SSE_SD_MtoR( 0xc20f, 1 ), \ - write8( op ) + write8( op ) #define CMPSDRtoR( op ) \ SSE_SD_RtoR( 0xc20f ), \ - write8( op ) + write8( op ) /* movups [r32][r32*scale] to xmm1 */ -emitterT void eSSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { RexRXB(0, to, from2, from); - write16( 0x100f ); - ModRM( 0, to, 0x4 ); - SibSB( scale, from2, from ); + write16( 0x100f ); + ModRM( 0, to, 0x4 ); + SibSB( scale, from2, from ); } /* movups xmm1 to [r32][r32*scale] */ -emitterT void eSSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { RexRXB(1, to, from2, from); - write16( 0x110f ); - ModRM( 0, to, 0x4 ); - SibSB( scale, from2, from ); + write16( 0x110f ); + ModRM( 0, to, 0x4 ); + SibSB( scale, from2, from ); } /* movups [r32] to r32 */ -emitterT void eSSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ) +emitterT void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ) { RexRB(0, to, from); - write16( 0x100f ); - ModRM( 0, to, from ); + write16( 0x100f ); + ModRM( 0, to, from ); } /* movups r32 to [r32] */ -emitterT void eSSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ) +emitterT void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ) { RexRB(0, from, to); - write16( 0x110f ); - ModRM( 0, from, to ); + write16( 0x110f ); + ModRM( 0, from, to ); } /* movlps [r32] to r32 */ -emitterT void eSSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ) +emitterT void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ) { RexRB(1, to, from); - write16( 0x120f ); - ModRM( 0, to, from ); + write16( 0x120f ); + ModRM( 0, to, from ); } -emitterT void eSSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); - write16( 0x120f ); - WriteRmOffsetFrom(to, from, offset); + write16( 0x120f ); + WriteRmOffsetFrom(to, from, offset); } /* movaps r32 to [r32] */ -emitterT void eSSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from ) +emitterT void SSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from ) { RexRB(0, from, to); - write16( 0x130f ); - ModRM( 0, from, to ); + write16( 0x130f ); + ModRM( 0, from, to ); } -emitterT void eSSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, from, to); - write16( 0x130f ); - WriteRmOffsetFrom(from, to, offset); + write16( 0x130f ); + WriteRmOffsetFrom(from, to, offset); } /* movaps [r32][r32*scale] to xmm1 */ -emitterT void eSSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { assert( from != EBP ); RexRXB(0, to, from2, from); - write16( 0x280f ); - ModRM( 0, to, 0x4 ); - SibSB( scale, from2, from ); + write16( 0x280f ); + ModRM( 0, to, 0x4 ); + SibSB( scale, from2, from ); } /* movaps xmm1 to [r32][r32*scale] */ -emitterT void eSSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { assert( from != EBP ); RexRXB(0, to, from2, from); - write16( 0x290f ); - ModRM( 0, to, 0x4 ); - SibSB( scale, from2, from ); + write16( 0x290f ); + ModRM( 0, to, 0x4 ); + SibSB( scale, from2, from ); } // movaps [r32+offset] to r32 -emitterT void eSSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); - write16( 0x280f ); - WriteRmOffsetFrom(to, from, offset); + write16( 0x280f ); + WriteRmOffsetFrom(to, from, offset); } // movaps r32 to [r32+offset] -emitterT void eSSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) { RexRB(0, from, to); - write16( 0x290f ); - WriteRmOffsetFrom(from, to, offset); + write16( 0x290f ); + WriteRmOffsetFrom(from, to, offset); } // movdqa [r32+offset] to r32 -emitterT void eSSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) { if( AlwaysUseMovaps ) - eSSE_MOVAPSRmtoR( to, from, offset ); + SSE_MOVAPSRmtoR( to, from, offset ); else { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write16( 0x6f0f ); - WriteRmOffsetFrom(to, from, offset); + write16( 0x6f0f ); + WriteRmOffsetFrom(to, from, offset); } } // movdqa r32 to [r32+offset] -emitterT void eSSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) { if( AlwaysUseMovaps ) - eSSE_MOVAPSRtoRm( to, from, offset ); + SSE_MOVAPSRtoRm( to, from, offset ); else { - write8(0x66); + write8(0x66); RexRB(0, from, to); - write16( 0x7f0f ); - WriteRmOffsetFrom(from, to, offset); + write16( 0x7f0f ); + WriteRmOffsetFrom(from, to, offset); } } // movups [r32+offset] to r32 -emitterT void eSSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); - write16( 0x100f ); - WriteRmOffsetFrom(to, from, offset); + write16( 0x100f ); + WriteRmOffsetFrom(to, from, offset); } // movups r32 to [r32+offset] -emitterT void eSSE_MOVUPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVUPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) { RexRB(0, from, to); - write16( 0x110f ); - WriteRmOffsetFrom(from, to, offset); + write16( 0x110f ); + WriteRmOffsetFrom(from, to, offset); } //**********************************************************************************/ //MOVAPS: Move aligned Packed Single Precision FP values * //********************************************************************************** -emitterT void eSSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x280f, 0 ); } -emitterT void eSSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x290f, 0 ); } -emitterT void eSSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSERtoR( 0x280f ); } } +emitterT void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x280f, 0 ); } +emitterT void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x290f, 0 ); } +emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSERtoR( 0x280f ); } } -emitterT void eSSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x100f, 0 ); } -emitterT void eSSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x110f, 0 ); } +emitterT void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x100f, 0 ); } +emitterT void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x110f, 0 ); } -emitterT void eSSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x100f); } -emitterT void eSSE2_MOVSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x100f, 0); } -emitterT void eSSE2_MOVSD_XMM_to_M64( uptr to, x86SSERegType from ) { SSE_SD_RtoM( 0x110f, 0); } +emitterT void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x100f); } +emitterT void SSE2_MOVSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x100f, 0); } +emitterT void SSE2_MOVSD_XMM_to_M64( uptr to, x86SSERegType from ) { SSE_SD_RtoM( 0x110f, 0); } -emitterT void eSSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ) +emitterT void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ) { - write8(0xf3); SSEMtoR( 0x7e0f, 0); + write8(0xf3); SSEMtoR( 0x7e0f, 0); } -emitterT void eSSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +emitterT void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { - write8(0xf3); SSERtoR( 0x7e0f); + write8(0xf3); SSERtoR( 0x7e0f); } -emitterT void eSSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ) +emitterT void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM66(0xd60f); } -emitterT void eSSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from) +emitterT void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from) { - write8(0xf2); + write8(0xf2); SSERtoR( 0xd60f); } -emitterT void eSSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from) +emitterT void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from) { - write8(0xf3); + write8(0xf3); SSERtoR( 0xd60f); } //**********************************************************************************/ //MOVSS: Move Scalar Single-Precision FP value * //********************************************************************************** -emitterT void eSSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x100f, 0 ); } -emitterT void eSSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { SSE_SS_RtoM( 0x110f, 0 ); } +emitterT void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x100f, 0 ); } +emitterT void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { SSE_SS_RtoM( 0x110f, 0 ); } -emitterT void eSSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSE_SS_RtoR( 0x100f ); } } +emitterT void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSE_SS_RtoR( 0x100f ); } } -emitterT void eSSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) { - write8(0xf3); + write8(0xf3); RexRB(0, to, from); - write16( 0x100f ); - WriteRmOffsetFrom(to, from, offset); + write16( 0x100f ); + WriteRmOffsetFrom(to, from, offset); } -emitterT void eSSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) { - write8(0xf3); + write8(0xf3); RexRB(0, from, to); - write16(0x110f); - WriteRmOffsetFrom(from, to, offset); + write16(0x110f); + WriteRmOffsetFrom(from, to, offset); } -emitterT void eSSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xf70f ); } +emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xf70f ); } //**********************************************************************************/ //MOVLPS: Move low Packed Single-Precision FP * //********************************************************************************** -emitterT void eSSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x120f, 0 ); } -emitterT void eSSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x130f, 0 ); } +emitterT void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x120f, 0 ); } +emitterT void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x130f, 0 ); } -emitterT void eSSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); - write16( 0x120f ); - WriteRmOffsetFrom(to, from, offset); + write16( 0x120f ); + WriteRmOffsetFrom(to, from, offset); } -emitterT void eSSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) { RexRB(0, from, to); - write16(0x130f); - WriteRmOffsetFrom(from, to, offset); + write16(0x130f); + WriteRmOffsetFrom(from, to, offset); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MOVHPS: Move High Packed Single-Precision FP * //********************************************************************************** -emitterT void eSSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x160f, 0 ); } -emitterT void eSSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x170f, 0 ); } +emitterT void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x160f, 0 ); } +emitterT void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x170f, 0 ); } -emitterT void eSSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); - write16( 0x160f ); - WriteRmOffsetFrom(to, from, offset); + write16( 0x160f ); + WriteRmOffsetFrom(to, from, offset); } -emitterT void eSSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) { RexRB(0, from, to); - write16(0x170f); - WriteRmOffsetFrom(from, to, offset); + write16(0x170f); + WriteRmOffsetFrom(from, to, offset); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MOVLHPS: Moved packed Single-Precision FP low to high * //********************************************************************************** -emitterT void eSSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x160f ); } +emitterT void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x160f ); } ////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MOVHLPS: Moved packed Single-Precision FP High to Low * //********************************************************************************** -emitterT void eSSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x120f ); } +emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x120f ); } /////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //ANDPS: Logical Bit-wise AND for Single FP * //********************************************************************************** -emitterT void eSSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x540f, 0 ); } -emitterT void eSSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x540f ); } +emitterT void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x540f, 0 ); } +emitterT void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x540f ); } -emitterT void eSSE2_ANDPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x540f ); } -emitterT void eSSE2_ANDPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x540f ); } +emitterT void SSE2_ANDPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x540f ); } +emitterT void SSE2_ANDPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x540f ); } /////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //ANDNPS : Logical Bit-wise AND NOT of Single-precision FP values * //********************************************************************************** -emitterT void eSSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x550f, 0 ); } -emitterT void eSSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x550f ); } +emitterT void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x550f, 0 ); } +emitterT void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x550f ); } -emitterT void eSSE2_ANDNPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x550f ); } -emitterT void eSSE2_ANDNPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x550f ); } +emitterT void SSE2_ANDNPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x550f ); } +emitterT void SSE2_ANDNPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x550f ); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //RCPPS : Packed Single-Precision FP Reciprocal * //********************************************************************************** -emitterT void eSSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x530f ); } -emitterT void eSSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x530f, 0 ); } +emitterT void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x530f ); } +emitterT void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x530f, 0 ); } -emitterT void eSSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR(0x530f); } -emitterT void eSSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR(0x530f, 0); } +emitterT void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR(0x530f); } +emitterT void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR(0x530f, 0); } ////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //ORPS : Bit-wise Logical OR of Single-Precision FP Data * //********************************************************************************** -emitterT void eSSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x560f, 0 ); } -emitterT void eSSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x560f ); } +emitterT void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x560f, 0 ); } +emitterT void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x560f ); } -emitterT void eSSE2_ORPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x560f ); } -emitterT void eSSE2_ORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x560f ); } +emitterT void SSE2_ORPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x560f ); } +emitterT void SSE2_ORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x560f ); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //XORPS : Bitwise Logical XOR of Single-Precision FP Values * //********************************************************************************** -emitterT void eSSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x570f, 0 ); } -emitterT void eSSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x570f ); } +emitterT void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x570f, 0 ); } +emitterT void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x570f ); } -emitterT void eSSE2_XORPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x570f ); } -emitterT void eSSE2_XORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x570f ); } +emitterT void SSE2_XORPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x570f ); } +emitterT void SSE2_XORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x570f ); } /////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //ADDPS : ADD Packed Single-Precision FP Values * //********************************************************************************** -emitterT void eSSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x580f, 0 ); } -emitterT void eSSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x580f ); } +emitterT void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x580f, 0 ); } +emitterT void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x580f ); } //////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //ADDSS : ADD Scalar Single-Precision FP Values * //********************************************************************************** -emitterT void eSSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x580f, 0 ); } -emitterT void eSSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x580f ); } +emitterT void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x580f, 0 ); } +emitterT void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x580f ); } -emitterT void eSSE2_ADDSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x580f, 0 ); } -emitterT void eSSE2_ADDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x580f ); } +emitterT void SSE2_ADDSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x580f, 0 ); } +emitterT void SSE2_ADDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x580f ); } ///////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //SUBPS: Packed Single-Precision FP Subtract * //********************************************************************************** -emitterT void eSSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5c0f, 0 ); } -emitterT void eSSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5c0f ); } +emitterT void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5c0f, 0 ); } +emitterT void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5c0f ); } /////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //SUBSS : Scalar Single-Precision FP Subtract * //********************************************************************************** -emitterT void eSSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5c0f, 0 ); } -emitterT void eSSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5c0f ); } +emitterT void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5c0f, 0 ); } +emitterT void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5c0f ); } -emitterT void eSSE2_SUBSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5c0f, 0 ); } -emitterT void eSSE2_SUBSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5c0f ); } +emitterT void SSE2_SUBSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5c0f, 0 ); } +emitterT void SSE2_SUBSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5c0f ); } ///////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MULPS : Packed Single-Precision FP Multiply * //********************************************************************************** -emitterT void eSSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x590f, 0 ); } -emitterT void eSSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x590f ); } +emitterT void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x590f, 0 ); } +emitterT void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x590f ); } //////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MULSS : Scalar Single-Precision FP Multiply * //********************************************************************************** -emitterT void eSSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x590f, 0 ); } -emitterT void eSSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x590f ); } +emitterT void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x590f, 0 ); } +emitterT void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x590f ); } -emitterT void eSSE2_MULSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x590f, 0 ); } -emitterT void eSSE2_MULSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x590f ); } +emitterT void SSE2_MULSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x590f, 0 ); } +emitterT void SSE2_MULSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x590f ); } //////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ @@ -513,22 +513,22 @@ emitterT void eSSE2_MULSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { S //missing SSE_CMPPS_I8_to_XMM // SSE_CMPPS_M32_to_XMM // SSE_CMPPS_XMM_to_XMM -emitterT void eSSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 0 ); } -emitterT void eSSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 0 ); } -emitterT void eSSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 1 ); } -emitterT void eSSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 1 ); } -emitterT void eSSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 2 ); } -emitterT void eSSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 2 ); } -emitterT void eSSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 3 ); } -emitterT void eSSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 3 ); } -emitterT void eSSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 4 ); } -emitterT void eSSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 4 ); } -emitterT void eSSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 5 ); } -emitterT void eSSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 5 ); } -emitterT void eSSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 6 ); } -emitterT void eSSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 6 ); } -emitterT void eSSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 7 ); } -emitterT void eSSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 7 ); } +emitterT void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 0 ); } +emitterT void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 0 ); } +emitterT void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 1 ); } +emitterT void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 1 ); } +emitterT void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 2 ); } +emitterT void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 2 ); } +emitterT void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 3 ); } +emitterT void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 3 ); } +emitterT void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 4 ); } +emitterT void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 4 ); } +emitterT void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 5 ); } +emitterT void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 5 ); } +emitterT void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 6 ); } +emitterT void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 6 ); } +emitterT void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 7 ); } +emitterT void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 7 ); } /////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ @@ -537,194 +537,194 @@ emitterT void eSSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) //missing SSE_CMPSS_I8_to_XMM // SSE_CMPSS_M32_to_XMM // SSE_CMPSS_XMM_to_XMM -emitterT void eSSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 0 ); } -emitterT void eSSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 0 ); } -emitterT void eSSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 1 ); } -emitterT void eSSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 1 ); } -emitterT void eSSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 2 ); } -emitterT void eSSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 2 ); } -emitterT void eSSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 3 ); } -emitterT void eSSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 3 ); } -emitterT void eSSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 4 ); } -emitterT void eSSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 4 ); } -emitterT void eSSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 5 ); } -emitterT void eSSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 5 ); } -emitterT void eSSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 6 ); } -emitterT void eSSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 6 ); } -emitterT void eSSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 7 ); } -emitterT void eSSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 7 ); } +emitterT void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 0 ); } +emitterT void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 0 ); } +emitterT void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 1 ); } +emitterT void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 1 ); } +emitterT void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 2 ); } +emitterT void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 2 ); } +emitterT void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 3 ); } +emitterT void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 3 ); } +emitterT void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 4 ); } +emitterT void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 4 ); } +emitterT void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 5 ); } +emitterT void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 5 ); } +emitterT void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 6 ); } +emitterT void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 6 ); } +emitterT void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 7 ); } +emitterT void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 7 ); } -emitterT void eSSE2_CMPEQSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 0 ); } -emitterT void eSSE2_CMPEQSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 0 ); } -emitterT void eSSE2_CMPLTSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 1 ); } -emitterT void eSSE2_CMPLTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 1 ); } -emitterT void eSSE2_CMPLESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 2 ); } -emitterT void eSSE2_CMPLESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 2 ); } -emitterT void eSSE2_CMPUNORDSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 3 ); } -emitterT void eSSE2_CMPUNORDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 3 ); } -emitterT void eSSE2_CMPNESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 4 ); } -emitterT void eSSE2_CMPNESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 4 ); } -emitterT void eSSE2_CMPNLTSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 5 ); } -emitterT void eSSE2_CMPNLTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 5 ); } -emitterT void eSSE2_CMPNLESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 6 ); } -emitterT void eSSE2_CMPNLESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 6 ); } -emitterT void eSSE2_CMPORDSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 7 ); } -emitterT void eSSE2_CMPORDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 7 ); } +emitterT void SSE2_CMPEQSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 0 ); } +emitterT void SSE2_CMPEQSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 0 ); } +emitterT void SSE2_CMPLTSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 1 ); } +emitterT void SSE2_CMPLTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 1 ); } +emitterT void SSE2_CMPLESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 2 ); } +emitterT void SSE2_CMPLESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 2 ); } +emitterT void SSE2_CMPUNORDSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 3 ); } +emitterT void SSE2_CMPUNORDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 3 ); } +emitterT void SSE2_CMPNESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 4 ); } +emitterT void SSE2_CMPNESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 4 ); } +emitterT void SSE2_CMPNLTSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 5 ); } +emitterT void SSE2_CMPNLTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 5 ); } +emitterT void SSE2_CMPNLESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 6 ); } +emitterT void SSE2_CMPNLESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 6 ); } +emitterT void SSE2_CMPORDSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 7 ); } +emitterT void SSE2_CMPORDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 7 ); } -emitterT void eSSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ) +emitterT void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ) { RexR(0, to); - write16( 0x2e0f ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0x2e0f ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void eSSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +emitterT void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { RexRB(0, to, from); - write16( 0x2e0f ); - ModRM( 3, to, from ); + write16( 0x2e0f ); + ModRM( 3, to, from ); } -emitterT void eSSE2_UCOMISD_M64_to_XMM( x86SSERegType to, uptr from ) +emitterT void SSE2_UCOMISD_M64_to_XMM( x86SSERegType to, uptr from ) { - write8(0x66); + write8(0x66); RexR(0, to); - write16( 0x2e0f ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0x2e0f ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void eSSE2_UCOMISD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +emitterT void SSE2_UCOMISD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write16( 0x2e0f ); - ModRM( 3, to, from ); + write16( 0x2e0f ); + ModRM( 3, to, from ); } ////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //RSQRTPS : Packed Single-Precision FP Square Root Reciprocal * //********************************************************************************** -emitterT void eSSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x520f, 0 ); } -emitterT void eSSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x520f ); } +emitterT void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x520f, 0 ); } +emitterT void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x520f ); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //RSQRTSS : Scalar Single-Precision FP Square Root Reciprocal * //********************************************************************************** -emitterT void eSSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x520f, 0 ); } -emitterT void eSSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x520f ); } +emitterT void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x520f, 0 ); } +emitterT void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x520f ); } //////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //SQRTPS : Packed Single-Precision FP Square Root * //********************************************************************************** -emitterT void eSSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x510f, 0 ); } -emitterT void eSSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x510f ); } +emitterT void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x510f, 0 ); } +emitterT void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x510f ); } ////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //SQRTSS : Scalar Single-Precision FP Square Root * //********************************************************************************** -emitterT void eSSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x510f, 0 ); } -emitterT void eSSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x510f ); } +emitterT void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x510f, 0 ); } +emitterT void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x510f ); } -emitterT void eSSE2_SQRTSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x510f, 0 ); } -emitterT void eSSE2_SQRTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SD_RtoR( 0x510f ); } +emitterT void SSE2_SQRTSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x510f, 0 ); } +emitterT void SSE2_SQRTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SD_RtoR( 0x510f ); } //////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MAXPS: Return Packed Single-Precision FP Maximum * //********************************************************************************** -emitterT void eSSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5f0f, 0 ); } -emitterT void eSSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5f0f ); } +emitterT void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5f0f, 0 ); } +emitterT void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5f0f ); } -emitterT void eSSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5f0f ); } -emitterT void eSSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5f0f ); } +emitterT void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5f0f ); } +emitterT void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5f0f ); } ///////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MAXSS: Return Scalar Single-Precision FP Maximum * //********************************************************************************** -emitterT void eSSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5f0f, 0 ); } -emitterT void eSSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5f0f ); } +emitterT void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5f0f, 0 ); } +emitterT void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5f0f ); } -emitterT void eSSE2_MAXSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5f0f, 0 ); } -emitterT void eSSE2_MAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5f0f ); } +emitterT void SSE2_MAXSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5f0f, 0 ); } +emitterT void SSE2_MAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5f0f ); } ///////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //CVTPI2PS: Packed Signed INT32 to Packed Single FP Conversion * //********************************************************************************** -emitterT void eSSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x2a0f, 0 ); } -emitterT void eSSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { SSERtoR( 0x2a0f ); } +emitterT void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x2a0f, 0 ); } +emitterT void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { SSERtoR( 0x2a0f ); } /////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //CVTPS2PI: Packed Single FP to Packed Signed INT32 Conversion * //********************************************************************************** -emitterT void eSSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { SSEMtoR( 0x2d0f, 0 ); } -emitterT void eSSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { SSERtoR( 0x2d0f ); } +emitterT void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { SSEMtoR( 0x2d0f, 0 ); } +emitterT void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { SSERtoR( 0x2d0f ); } -emitterT void eSSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { write8(0xf3); SSEMtoR(0x2c0f, 0); } -emitterT void eSSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) +emitterT void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { write8(0xf3); SSEMtoR(0x2c0f, 0); } +emitterT void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) { - write8(0xf3); + write8(0xf3); RexRB(0, to, from); - write16(0x2c0f); - ModRM(3, to, from); + write16(0x2c0f); + ModRM(3, to, from); } -emitterT void eSSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x2a0f, 0); } -emitterT void eSSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) +emitterT void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x2a0f, 0); } +emitterT void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) { - write8(0xf3); + write8(0xf3); RexRB(0, to, from); - write16(0x2a0f); - ModRM(3, to, from); + write16(0x2a0f); + ModRM(3, to, from); } -emitterT void eSSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from) { SSE_SS_MtoR(0x5a0f, 0); } -emitterT void eSSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x5a0f); } +emitterT void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from) { SSE_SS_MtoR(0x5a0f, 0); } +emitterT void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x5a0f); } -emitterT void eSSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { SSE_SD_MtoR(0x5a0f, 0); } -emitterT void eSSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SD_RtoR(0x5a0f); } +emitterT void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { SSE_SD_MtoR(0x5a0f, 0); } +emitterT void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SD_RtoR(0x5a0f); } /////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //CVTDQ2PS: Packed Signed INT32 to Packed Single Precision FP Conversion * //********************************************************************************** -emitterT void eSSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5b0f, 0 ); } -emitterT void eSSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5b0f ); } +emitterT void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5b0f, 0 ); } +emitterT void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5b0f ); } //**********************************************************************************/ //CVTPS2DQ: Packed Single Precision FP to Packed Signed INT32 Conversion * //********************************************************************************** -emitterT void eSSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5b0f ); } -emitterT void eSSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5b0f ); } +emitterT void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5b0f ); } +emitterT void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5b0f ); } -emitterT void eSSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ write8(0xf3); SSERtoR(0x5b0f); } +emitterT void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ write8(0xf3); SSERtoR(0x5b0f); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MINPS: Return Packed Single-Precision FP Minimum * //********************************************************************************** -emitterT void eSSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5d0f, 0 ); } -emitterT void eSSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5d0f ); } +emitterT void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5d0f, 0 ); } +emitterT void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5d0f ); } -emitterT void eSSE2_MINPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5d0f ); } -emitterT void eSSE2_MINPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5d0f ); } +emitterT void SSE2_MINPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5d0f ); } +emitterT void SSE2_MINPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5d0f ); } ////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MINSS: Return Scalar Single-Precision FP Minimum * //********************************************************************************** -emitterT void eSSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5d0f, 0 ); } -emitterT void eSSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5d0f ); } +emitterT void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5d0f, 0 ); } +emitterT void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5d0f ); } -emitterT void eSSE2_MINSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5d0f, 0 ); } -emitterT void eSSE2_MINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5d0f ); } +emitterT void SSE2_MINSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5d0f, 0 ); } +emitterT void SSE2_MINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5d0f ); } /////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ @@ -734,7 +734,7 @@ emitterT void eSSE2_MINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { S // SSE_PMAXSW_M64_to_MM // SSE2_PMAXSW_M128_to_XMM // SSE2_PMAXSW_XMM_to_XMM -emitterT void eSSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEE0F ); } +emitterT void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEE0F ); } /////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ @@ -744,659 +744,659 @@ emitterT void eSSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSER // SSE_PMINSW_M64_to_MM // SSE2_PMINSW_M128_to_XMM // SSE2_PMINSW_XMM_to_XMM -emitterT void eSSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEA0F ); } +emitterT void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEA0F ); } ////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //SHUFPS: Shuffle Packed Single-Precision FP Values * //********************************************************************************** -emitterT void eSSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR( 0xC60F ); write8( imm8 ); } -emitterT void eSSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR( 0xC60F, 1 ); write8( imm8 ); } +emitterT void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR( 0xC60F ); write8( imm8 ); } +emitterT void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR( 0xC60F, 1 ); write8( imm8 ); } -emitterT void eSSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ) +emitterT void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ) { RexRB(0, to, from); - write16(0xc60f); - WriteRmOffsetFrom(to, from, offset); - write8(imm8); + write16(0xc60f); + WriteRmOffsetFrom(to, from, offset); + write8(imm8); } ////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //SHUFPD: Shuffle Packed Double-Precision FP Values * //********************************************************************************** -emitterT void eSSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR66( 0xC60F ); write8( imm8 ); } -emitterT void eSSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0xC60F ); write8( imm8 ); } +emitterT void SSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR66( 0xC60F ); write8( imm8 ); } +emitterT void SSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0xC60F ); write8( imm8 ); } //////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PSHUFD: Shuffle Packed DoubleWords * //********************************************************************************** -emitterT void eSSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) +emitterT void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR66( 0x700F ); - write8( imm8 ); + write8( imm8 ); } -emitterT void eSSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0x700F ); write8( imm8 ); } +emitterT void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0x700F ); write8( imm8 ); } -emitterT void eSSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF2); SSERtoR(0x700F); write8(imm8); } -emitterT void eSSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF2); SSEMtoR(0x700F, 1); write8(imm8); } -emitterT void eSSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF3); SSERtoR(0x700F); write8(imm8); } -emitterT void eSSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF3); SSEMtoR(0x700F, 1); write8(imm8); } +emitterT void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF2); SSERtoR(0x700F); write8(imm8); } +emitterT void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF2); SSEMtoR(0x700F, 1); write8(imm8); } +emitterT void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF3); SSERtoR(0x700F); write8(imm8); } +emitterT void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF3); SSEMtoR(0x700F, 1); write8(imm8); } /////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //UNPCKLPS: Unpack and Interleave low Packed Single-Precision FP Data * //********************************************************************************** -emitterT void eSSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x140f, 0); } -emitterT void eSSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x140F ); } +emitterT void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x140f, 0); } +emitterT void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x140F ); } //////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //UNPCKHPS: Unpack and Interleave High Packed Single-Precision FP Data * //********************************************************************************** -emitterT void eSSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x150f, 0); } -emitterT void eSSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x150F ); } +emitterT void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x150f, 0); } +emitterT void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x150F ); } //////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //DIVPS : Packed Single-Precision FP Divide * //********************************************************************************** -emitterT void eSSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5e0F, 0 ); } -emitterT void eSSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5e0F ); } +emitterT void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5e0F, 0 ); } +emitterT void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5e0F ); } ////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //DIVSS : Scalar Single-Precision FP Divide * //********************************************************************************** -emitterT void eSSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5e0F, 0 ); } -emitterT void eSSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5e0F ); } +emitterT void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5e0F, 0 ); } +emitterT void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5e0F ); } -emitterT void eSSE2_DIVSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5e0F, 0 ); } -emitterT void eSSE2_DIVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5e0F ); } +emitterT void SSE2_DIVSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5e0F, 0 ); } +emitterT void SSE2_DIVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5e0F ); } ///////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //STMXCSR : Store Streaming SIMD Extension Control/Status * //********************************************************************************** -emitterT void eSSE_STMXCSR( uptr from ) { - write16( 0xAE0F ); - ModRM( 0, 0x3, DISP32 ); - write32( MEMADDR(from, 4) ); +emitterT void SSE_STMXCSR( uptr from ) { + write16( 0xAE0F ); + ModRM( 0, 0x3, DISP32 ); + write32( MEMADDR(from, 4) ); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //LDMXCSR : Load Streaming SIMD Extension Control/Status * //********************************************************************************** -emitterT void eSSE_LDMXCSR( uptr from ) { - write16( 0xAE0F ); - ModRM( 0, 0x2, DISP32 ); - write32( MEMADDR(from, 4) ); +emitterT void SSE_LDMXCSR( uptr from ) { + write16( 0xAE0F ); + ModRM( 0, 0x2, DISP32 ); + write32( MEMADDR(from, 4) ); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PADDB,PADDW,PADDD : Add Packed Integers * //********************************************************************************** -emitterT void eSSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFC0F ); } -emitterT void eSSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFC0F ); } -emitterT void eSSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFD0F ); } -emitterT void eSSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFD0F ); } -emitterT void eSSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFE0F ); } -emitterT void eSSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFE0F ); } -emitterT void eSSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD40F ); } -emitterT void eSSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ) { SSEMtoR66( 0xD40F ); } +emitterT void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFC0F ); } +emitterT void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFC0F ); } +emitterT void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFD0F ); } +emitterT void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFD0F ); } +emitterT void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFE0F ); } +emitterT void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFE0F ); } +emitterT void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD40F ); } +emitterT void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ) { SSEMtoR66( 0xD40F ); } /////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PCMPxx: Compare Packed Integers * //********************************************************************************** -emitterT void eSSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x640F ); } -emitterT void eSSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x640F ); } -emitterT void eSSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x650F ); } -emitterT void eSSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x650F ); } -emitterT void eSSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x660F ); } -emitterT void eSSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x660F ); } -emitterT void eSSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x740F ); } -emitterT void eSSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x740F ); } -emitterT void eSSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x750F ); } -emitterT void eSSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x750F ); } -emitterT void eSSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x760F ); } -emitterT void eSSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x760F ); } +emitterT void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x640F ); } +emitterT void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x640F ); } +emitterT void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x650F ); } +emitterT void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x650F ); } +emitterT void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x660F ); } +emitterT void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x660F ); } +emitterT void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x740F ); } +emitterT void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x740F ); } +emitterT void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x750F ); } +emitterT void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x750F ); } +emitterT void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x760F ); } +emitterT void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x760F ); } //////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PEXTRW,PINSRW: Packed Extract/Insert Word * //********************************************************************************** -emitterT void eSSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); } -emitterT void eSSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); } +emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); } +emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); } //////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PSUBx: Subtract Packed Integers * //********************************************************************************** -emitterT void eSSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF80F ); } -emitterT void eSSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF80F ); } -emitterT void eSSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF90F ); } -emitterT void eSSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF90F ); } -emitterT void eSSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFA0F ); } -emitterT void eSSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFA0F ); } -emitterT void eSSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFB0F ); } -emitterT void eSSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFB0F ); } +emitterT void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF80F ); } +emitterT void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF80F ); } +emitterT void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF90F ); } +emitterT void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF90F ); } +emitterT void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFA0F ); } +emitterT void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFA0F ); } +emitterT void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFB0F ); } +emitterT void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFB0F ); } /////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MOVD: Move Dword(32bit) to /from XMM reg * //********************************************************************************** -emitterT void eSSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66(0x6E0F); } -emitterT void eSSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) { SSERtoR66(0x6E0F); } +emitterT void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66(0x6E0F); } +emitterT void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) { SSERtoR66(0x6E0F); } -emitterT void eSSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from ) +emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from ) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write16( 0x6e0f ); - ModRM( 0, to, from); + write16( 0x6e0f ); + ModRM( 0, to, from); } -emitterT void eSSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write16( 0x6e0f ); - WriteRmOffsetFrom(to, from, offset); + write16( 0x6e0f ); + WriteRmOffsetFrom(to, from, offset); } -emitterT void eSSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); } -emitterT void eSSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { _SSERtoR66(0x7E0F); } +emitterT void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); } +emitterT void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { _SSERtoR66(0x7E0F); } -emitterT void eSSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) { - write8(0x66); + write8(0x66); RexRB(0, from, to); - write16( 0x7e0f ); - WriteRmOffsetFrom(from, to, offset); + write16( 0x7e0f ); + WriteRmOffsetFrom(from, to, offset); } //////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //POR : SSE Bitwise OR * //********************************************************************************** -emitterT void eSSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEB0F ); } -emitterT void eSSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEB0F ); } +emitterT void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEB0F ); } +emitterT void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEB0F ); } // logical and to &= from -emitterT void eSSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDB0F ); } -emitterT void eSSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDB0F ); } +emitterT void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDB0F ); } +emitterT void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDB0F ); } // to = (~to) & from -emitterT void eSSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDF0F ); } -emitterT void eSSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDF0F ); } +emitterT void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDF0F ); } +emitterT void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDF0F ); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PXOR : SSE Bitwise XOR * //********************************************************************************** -emitterT void eSSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEF0F ); } -emitterT void eSSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEF0F ); } +emitterT void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEF0F ); } +emitterT void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEF0F ); } /////////////////////////////////////////////////////////////////////////////////////// -emitterT void eSSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from) { if( AlwaysUseMovaps ) eSSE_MOVAPS_M128_to_XMM( to, from ); else SSEMtoR66(0x6F0F); } -emitterT void eSSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ) { if( AlwaysUseMovaps ) eSSE_MOVAPS_XMM_to_M128( to, from ); else SSERtoM66(0x7F0F); } -emitterT void eSSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { if( AlwaysUseMovaps ) eSSE_MOVAPS_XMM_to_XMM( to, from ); else if( to != from ) SSERtoR66(0x6F0F); } +emitterT void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from) { if( AlwaysUseMovaps ) SSE_MOVAPS_M128_to_XMM( to, from ); else SSEMtoR66(0x6F0F); } +emitterT void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ) { if( AlwaysUseMovaps ) SSE_MOVAPS_XMM_to_M128( to, from ); else SSERtoM66(0x7F0F); } +emitterT void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { if( AlwaysUseMovaps ) SSE_MOVAPS_XMM_to_XMM( to, from ); else if( to != from ) SSERtoR66(0x6F0F); } -emitterT void eSSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from) +emitterT void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from) { if( AlwaysUseMovaps ) - eSSE_MOVUPS_M128_to_XMM( to, from ); + SSE_MOVUPS_M128_to_XMM( to, from ); else { - write8(0xF3); + write8(0xF3); SSEMtoR(0x6F0F, 0); } } -emitterT void eSSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from) +emitterT void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from) { if( AlwaysUseMovaps ) - eSSE_MOVUPS_XMM_to_M128( to, from ); + SSE_MOVUPS_XMM_to_M128( to, from ); else { - write8(0xF3); + write8(0xF3); SSERtoM(0x7F0F, 0); } } // shift right logical -emitterT void eSSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD10F); } -emitterT void eSSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD10F); } -emitterT void eSSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD10F); } +emitterT void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD10F); } +emitterT void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x710F ); - ModRM( 3, 2 , to ); - write8( imm8 ); + write16( 0x710F ); + ModRM( 3, 2 , to ); + write8( imm8 ); } -emitterT void eSSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD20F); } -emitterT void eSSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD20F); } -emitterT void eSSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD20F); } +emitterT void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD20F); } +emitterT void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x720F ); - ModRM( 3, 2 , to ); - write8( imm8 ); + write16( 0x720F ); + ModRM( 3, 2 , to ); + write8( imm8 ); } -emitterT void eSSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD30F); } -emitterT void eSSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD30F); } -emitterT void eSSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD30F); } +emitterT void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD30F); } +emitterT void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x730F ); - ModRM( 3, 2 , to ); - write8( imm8 ); + write16( 0x730F ); + ModRM( 3, 2 , to ); + write8( imm8 ); } -emitterT void eSSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x730F ); - ModRM( 3, 3 , to ); - write8( imm8 ); + write16( 0x730F ); + ModRM( 3, 3 , to ); + write8( imm8 ); } // shift right arithmetic -emitterT void eSSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE10F); } -emitterT void eSSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE10F); } -emitterT void eSSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE10F); } +emitterT void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE10F); } +emitterT void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x710F ); - ModRM( 3, 4 , to ); - write8( imm8 ); + write16( 0x710F ); + ModRM( 3, 4 , to ); + write8( imm8 ); } -emitterT void eSSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE20F); } -emitterT void eSSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE20F); } -emitterT void eSSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE20F); } +emitterT void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE20F); } +emitterT void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x720F ); - ModRM( 3, 4 , to ); - write8( imm8 ); + write16( 0x720F ); + ModRM( 3, 4 , to ); + write8( imm8 ); } // shift left logical -emitterT void eSSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF10F); } -emitterT void eSSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF10F); } -emitterT void eSSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF10F); } +emitterT void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF10F); } +emitterT void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x710F ); - ModRM( 3, 6 , to ); - write8( imm8 ); + write16( 0x710F ); + ModRM( 3, 6 , to ); + write8( imm8 ); } -emitterT void eSSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF20F); } -emitterT void eSSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF20F); } -emitterT void eSSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF20F); } +emitterT void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF20F); } +emitterT void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x720F ); - ModRM( 3, 6 , to ); - write8( imm8 ); + write16( 0x720F ); + ModRM( 3, 6 , to ); + write8( imm8 ); } -emitterT void eSSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF30F); } -emitterT void eSSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF30F); } -emitterT void eSSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF30F); } +emitterT void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF30F); } +emitterT void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x730F ); - ModRM( 3, 6 , to ); - write8( imm8 ); + write16( 0x730F ); + ModRM( 3, 6 , to ); + write8( imm8 ); } -emitterT void eSSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x730F ); - ModRM( 3, 7 , to ); - write8( imm8 ); + write16( 0x730F ); + ModRM( 3, 7 , to ); + write8( imm8 ); } -emitterT void eSSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEE0F ); } -emitterT void eSSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEE0F ); } +emitterT void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEE0F ); } +emitterT void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEE0F ); } -emitterT void eSSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDE0F ); } -emitterT void eSSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDE0F ); } +emitterT void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDE0F ); } +emitterT void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDE0F ); } -emitterT void eSSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEA0F ); } -emitterT void eSSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEA0F ); } +emitterT void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEA0F ); } +emitterT void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEA0F ); } -emitterT void eSSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDA0F ); } -emitterT void eSSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDA0F ); } +emitterT void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDA0F ); } +emitterT void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDA0F ); } -emitterT void eSSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEC0F ); } -emitterT void eSSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEC0F ); } +emitterT void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEC0F ); } +emitterT void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEC0F ); } -emitterT void eSSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xED0F ); } -emitterT void eSSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xED0F ); } +emitterT void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xED0F ); } +emitterT void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xED0F ); } -emitterT void eSSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xE80F ); } -emitterT void eSSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xE80F ); } +emitterT void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xE80F ); } +emitterT void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xE80F ); } -emitterT void eSSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xE90F ); } -emitterT void eSSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xE90F ); } +emitterT void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xE90F ); } +emitterT void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xE90F ); } -emitterT void eSSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD80F ); } -emitterT void eSSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD80F ); } -emitterT void eSSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD90F ); } -emitterT void eSSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD90F ); } +emitterT void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD80F ); } +emitterT void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD80F ); } +emitterT void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD90F ); } +emitterT void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD90F ); } -emitterT void eSSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDC0F ); } -emitterT void eSSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDC0F ); } -emitterT void eSSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDD0F ); } -emitterT void eSSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDD0F ); } +emitterT void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDC0F ); } +emitterT void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDC0F ); } +emitterT void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDD0F ); } +emitterT void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDD0F ); } //**********************************************************************************/ //PACKSSWB,PACKSSDW: Pack Saturate Signed Word //********************************************************************************** -emitterT void eSSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x630F ); } -emitterT void eSSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x630F ); } -emitterT void eSSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6B0F ); } -emitterT void eSSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6B0F ); } +emitterT void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x630F ); } +emitterT void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x630F ); } +emitterT void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6B0F ); } +emitterT void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6B0F ); } -emitterT void eSSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x670F ); } -emitterT void eSSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x670F ); } +emitterT void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x670F ); } +emitterT void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x670F ); } //**********************************************************************************/ //PUNPCKHWD: Unpack 16bit high //********************************************************************************** -emitterT void eSSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x600F ); } -emitterT void eSSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x600F ); } +emitterT void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x600F ); } +emitterT void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x600F ); } -emitterT void eSSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x680F ); } -emitterT void eSSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x680F ); } +emitterT void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x680F ); } +emitterT void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x680F ); } -emitterT void eSSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x610F ); } -emitterT void eSSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x610F ); } -emitterT void eSSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x690F ); } -emitterT void eSSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x690F ); } +emitterT void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x610F ); } +emitterT void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x610F ); } +emitterT void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x690F ); } +emitterT void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x690F ); } -emitterT void eSSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x620F ); } -emitterT void eSSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x620F ); } -emitterT void eSSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6A0F ); } -emitterT void eSSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6A0F ); } +emitterT void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x620F ); } +emitterT void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x620F ); } +emitterT void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6A0F ); } +emitterT void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6A0F ); } -emitterT void eSSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6C0F ); } -emitterT void eSSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6C0F ); } +emitterT void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6C0F ); } +emitterT void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6C0F ); } -emitterT void eSSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6D0F ); } -emitterT void eSSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6D0F ); } +emitterT void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6D0F ); } +emitterT void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6D0F ); } -emitterT void eSSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xD50F ); } -emitterT void eSSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xD50F ); } -emitterT void eSSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xE50F ); } -emitterT void eSSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xE50F ); } +emitterT void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xD50F ); } +emitterT void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xD50F ); } +emitterT void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xE50F ); } +emitterT void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xE50F ); } -emitterT void eSSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xF40F ); } -emitterT void eSSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xF40F ); } +emitterT void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xF40F ); } +emitterT void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xF40F ); } -emitterT void eSSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0xD70F); } +emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0xD70F); } -emitterT void eSSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); } -emitterT void eSSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); } +emitterT void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); } +emitterT void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); } -emitterT void eSSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF50F); } +emitterT void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF50F); } -emitterT void eSSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf2); SSERtoR( 0x7c0f ); } -emitterT void eSSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf2); SSEMtoR( 0x7c0f, 0 ); } +emitterT void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf2); SSERtoR( 0x7c0f ); } +emitterT void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf2); SSEMtoR( 0x7c0f, 0 ); } -emitterT void eSSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0xf3); +emitterT void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { + write8(0xf3); RexRB(0, to, from); - write16( 0x120f); - ModRM( 3, to, from ); + write16( 0x120f); + ModRM( 3, to, from ); } -emitterT void eSSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x120f, 0); } -emitterT void eSSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf3); SSERtoR(0x160f); } -emitterT void eSSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x160f, 0); } +emitterT void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x120f, 0); } +emitterT void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf3); SSERtoR(0x160f); } +emitterT void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x160f, 0); } // SSSE3 -emitterT void eSSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x1C380F); - ModRM(3, to, from); + write24(0x1C380F); + ModRM(3, to, from); } -emitterT void eSSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x1D380F); - ModRM(3, to, from); + write24(0x1D380F); + ModRM(3, to, from); } -emitterT void eSSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x1E380F); - ModRM(3, to, from); + write24(0x1E380F); + ModRM(3, to, from); } -emitterT void eSSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) +emitterT void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x0F3A0F); - ModRM(3, to, from); - write8(imm8); + write24(0x0F3A0F); + ModRM(3, to, from); + write8(imm8); } -emitterT void eSSSE3_PSIGNB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSSE3_PSIGNB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x08380F); - ModRM(3, to, from); + write24(0x08380F); + ModRM(3, to, from); } -emitterT void eSSSE3_PSIGNW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSSE3_PSIGNW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x09380F); - ModRM(3, to, from); + write24(0x09380F); + ModRM(3, to, from); } -emitterT void eSSSE3_PSIGND_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSSE3_PSIGND_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x0A380F); - ModRM(3, to, from); + write24(0x0A380F); + ModRM(3, to, from); } // SSE4.1 -emitterT void eSSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) +emitterT void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) { - write8(0x66); - write24(0x403A0F); - ModRM(3, to, from); - write8(imm8); + write8(0x66); + write24(0x403A0F); + ModRM(3, to, from); + write8(imm8); } -emitterT void eSSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8) +emitterT void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8) { - write8(0x66); - write24(0x403A0F); - ModRM(0, to, DISP32); - write32(MEMADDR(from, 4)); - write8(imm8); + write8(0x66); + write24(0x403A0F); + ModRM(0, to, DISP32); + write32(MEMADDR(from, 4)); + write8(imm8); } -emitterT void eSSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) +emitterT void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x213A0F); - ModRM(3, to, from); - write8(imm8); + write24(0x213A0F); + ModRM(3, to, from); + write8(imm8); } -emitterT void eSSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8) +emitterT void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x173A0F); - ModRM(3, to, from); - write8(imm8); + write24(0x173A0F); + ModRM(3, to, from); + write8(imm8); } -emitterT void eSSE4_BLENDPS_XMM_to_XMM(x86IntRegType to, x86SSERegType from, u8 imm8) +emitterT void SSE4_BLENDPS_XMM_to_XMM(x86IntRegType to, x86SSERegType from, u8 imm8) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x0C3A0F); - ModRM(3, to, from); - write8(imm8); + write24(0x0C3A0F); + ModRM(3, to, from); + write8(imm8); } -emitterT void eSSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x14380F); - ModRM(3, to, from); + write24(0x14380F); + ModRM(3, to, from); } -emitterT void eSSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from) +emitterT void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from) { - write8(0x66); + write8(0x66); RexR(0, to); - write24(0x14380F); - ModRM(0, to, DISP32); - write32(MEMADDR(from, 4)); + write24(0x14380F); + ModRM(0, to, DISP32); + write32(MEMADDR(from, 4)); } -emitterT void eSSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x25380F); - ModRM(3, to, from); + write24(0x25380F); + ModRM(3, to, from); } -emitterT void eSSE4_PMOVZXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSE4_PMOVZXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x35380F); - ModRM(3, to, from); + write24(0x35380F); + ModRM(3, to, from); } -emitterT void eSSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8) +emitterT void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x223A0F); - ModRM(3, to, from); - write8(imm8); + write24(0x223A0F); + ModRM(3, to, from); + write8(imm8); } -emitterT void eSSE4_PMAXSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSE4_PMAXSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x3D380F); - ModRM(3, to, from); + write24(0x3D380F); + ModRM(3, to, from); } -emitterT void eSSE4_PMINSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSE4_PMINSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x39380F); - ModRM(3, to, from); + write24(0x39380F); + ModRM(3, to, from); } -emitterT void eSSE4_PMAXUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSE4_PMAXUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x3F380F); - ModRM(3, to, from); + write24(0x3F380F); + ModRM(3, to, from); } -emitterT void eSSE4_PMINUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSE4_PMINUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x3B380F); - ModRM(3, to, from); + write24(0x3B380F); + ModRM(3, to, from); } -emitterT void eSSE4_PMAXSD_M128_to_XMM(x86SSERegType to, uptr from) +emitterT void SSE4_PMAXSD_M128_to_XMM(x86SSERegType to, uptr from) { - write8(0x66); + write8(0x66); RexR(0, to); - write24(0x3D380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); + write24(0x3D380F); + ModRM( 0, to, DISP32 ); + write32(MEMADDR(from, 4)); } -emitterT void eSSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from) +emitterT void SSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from) { - write8(0x66); + write8(0x66); RexR(0, to); - write24(0x39380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); + write24(0x39380F); + ModRM( 0, to, DISP32 ); + write32(MEMADDR(from, 4)); } -emitterT void eSSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from) +emitterT void SSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from) { - write8(0x66); + write8(0x66); RexR(0, to); - write24(0x3F380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); + write24(0x3F380F); + ModRM( 0, to, DISP32 ); + write32(MEMADDR(from, 4)); } -emitterT void eSSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from) +emitterT void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from) { - write8(0x66); + write8(0x66); RexR(0, to); - write24(0x3B380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); + write24(0x3B380F); + ModRM( 0, to, DISP32 ); + write32(MEMADDR(from, 4)); } -emitterT void eSSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x28380F); - ModRM(3, to, from); + write24(0x28380F); + ModRM(3, to, from); } From 1d9adee468b4eb65dab5e2ab7624ff4bc9b25ba2 Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Tue, 7 Apr 2009 17:35:09 +0000 Subject: [PATCH 08/40] couple of changes, very minor speedup git-svn-id: http://pcsx2.googlecode.com/svn/trunk@920 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Counters.cpp | 30 ++++++++++++++---------------- pcsx2/Counters.h | 16 ++++++++-------- pcsx2/x86/iR3000A.cpp | 6 +++--- pcsx2/x86/iVUzerorec.cpp | 9 +++++---- 4 files changed, 30 insertions(+), 31 deletions(-) diff --git a/pcsx2/Counters.cpp b/pcsx2/Counters.cpp index a58ec1d0a3..b9b9a7c9ed 100644 --- a/pcsx2/Counters.cpp +++ b/pcsx2/Counters.cpp @@ -164,7 +164,7 @@ struct vSyncTimingInfo static vSyncTimingInfo vSyncInfo; -static __forceinline void vSyncInfoCalc( vSyncTimingInfo* info, u32 framesPerSecond, u32 scansPerFrame ) +static void vSyncInfoCalc( vSyncTimingInfo* info, u32 framesPerSecond, u32 scansPerFrame ) { // Important: Cannot use floats or doubles here. The emulator changes rounding modes // depending on user-set speedhack options, and it can break float/double code @@ -270,8 +270,6 @@ u32 UpdateVSyncRate() return (u32)m_iTicks; } -extern u32 vu0time; - void frameLimitReset() { m_iStart = GetCPUTicks(); @@ -282,13 +280,13 @@ void frameLimitReset() // See the GS FrameSkip function for details on why this is here and not in the GS. static __forceinline void frameLimit() { + if( CHECK_FRAMELIMIT == PCSX2_FRAMELIMIT_NORMAL ) return; + if( Config.CustomFps >= 999 ) return; // means the user would rather just have framelimiting turned off... + s64 sDeltaTime; u64 uExpectedEnd; u64 iEnd; - if( CHECK_FRAMELIMIT == PCSX2_FRAMELIMIT_NORMAL ) return; - if( Config.CustomFps >= 999 ) return; // means the user would rather just have framelimiting turned off... - uExpectedEnd = m_iStart + m_iTicks; iEnd = GetCPUTicks(); @@ -465,7 +463,7 @@ __forceinline bool rcntUpdate_vSync() return false; } -static __forceinline void __fastcall _cpuTestTarget( int i ) +static __forceinline void _cpuTestTarget( int i ) { if (counters[i].count < counters[i].target) return; @@ -538,7 +536,7 @@ __forceinline bool rcntUpdate() return retval; } -static void _rcntSetGate( int index ) +static __forceinline void _rcntSetGate( int index ) { if (counters[index].mode.EnableGate) { @@ -563,7 +561,7 @@ static void _rcntSetGate( int index ) } // mode - 0 means hblank source, 8 means vblank source. -void __fastcall rcntStartGate(bool isVblank, u32 sCycle) +__forceinline void rcntStartGate(bool isVblank, u32 sCycle) { int i; @@ -624,7 +622,7 @@ void __fastcall rcntStartGate(bool isVblank, u32 sCycle) } // mode - 0 means hblank signal, 8 means vblank signal. -void __fastcall rcntEndGate(bool isVblank , u32 sCycle) +__forceinline void rcntEndGate(bool isVblank , u32 sCycle) { int i; @@ -665,7 +663,7 @@ void __fastcall rcntEndGate(bool isVblank , u32 sCycle) // rcntUpdate, since we're being called from there anyway. } -void __fastcall rcntWmode(int index, u32 value) +__forceinline void rcntWmode(int index, u32 value) { if(counters[index].mode.IsCounting) { if(counters[index].mode.ClockSource != 0x3) { @@ -696,7 +694,7 @@ void __fastcall rcntWmode(int index, u32 value) _rcntSet( index ); } -void __fastcall rcntWcount(int index, u32 value) +__forceinline void rcntWcount(int index, u32 value) { EECNT_LOG("EE Counter[%d] writeCount = %x, oldcount=%x, target=%x", index, value, counters[index].count, counters[index].target ); @@ -722,7 +720,7 @@ void __fastcall rcntWcount(int index, u32 value) _rcntSet( index ); } -void __fastcall rcntWtarget(int index, u32 value) +__forceinline void rcntWtarget(int index, u32 value) { EECNT_LOG("EE Counter[%d] writeTarget = %x", index, value); @@ -738,13 +736,13 @@ void __fastcall rcntWtarget(int index, u32 value) _rcntSet( index ); } -void __fastcall rcntWhold(int index, u32 value) +__forceinline void rcntWhold(int index, u32 value) { EECNT_LOG("EE Counter[%d] Hold Write = %x", index, value); counters[index].hold = value; } -u32 __fastcall rcntRcount(int index) +__forceinline u32 rcntRcount(int index) { u32 ret; @@ -759,7 +757,7 @@ u32 __fastcall rcntRcount(int index) return ret; } -u32 __fastcall rcntCycle(int index) +__forceinline u32 rcntCycle(int index) { if (counters[index].mode.IsCounting && (counters[index].mode.ClockSource != 0x3)) return counters[index].count + ((cpuRegs.cycle - counters[index].sCycleT) / counters[index].rate); diff --git a/pcsx2/Counters.h b/pcsx2/Counters.h index 6e20dda0c2..5333bcb111 100644 --- a/pcsx2/Counters.h +++ b/pcsx2/Counters.h @@ -139,14 +139,14 @@ extern bool rcntUpdate_vSync(); extern bool rcntUpdate(); extern void rcntInit(); -extern void __fastcall rcntStartGate(bool mode, u32 sCycle); -extern void __fastcall rcntEndGate(bool mode, u32 sCycle); -extern void __fastcall rcntWcount(int index, u32 value); -extern void __fastcall rcntWmode(int index, u32 value); -extern void __fastcall rcntWtarget(int index, u32 value); -extern void __fastcall rcntWhold(int index, u32 value); -extern u32 __fastcall rcntRcount(int index); -extern u32 __fastcall rcntCycle(int index); +extern void rcntStartGate(bool mode, u32 sCycle); +extern void rcntEndGate(bool mode, u32 sCycle); +extern void rcntWcount(int index, u32 value); +extern void rcntWmode(int index, u32 value); +extern void rcntWtarget(int index, u32 value); +extern void rcntWhold(int index, u32 value); +extern u32 rcntRcount(int index); +extern u32 rcntCycle(int index); u32 UpdateVSyncRate(); void frameLimitReset(); diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 0309d6eced..1911acd813 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -647,7 +647,7 @@ static void recExecute() //for (;;) R3000AExecute(); } -static s32 recExecuteBlock( s32 eeCycles ) +static __forceinline s32 recExecuteBlock( s32 eeCycles ) { psxBreak = 0; psxCycleEE = eeCycles; @@ -741,7 +741,7 @@ static __forceinline u32 psxRecClearMem(u32 pc) return upperextent - pc; } -static void recClear(u32 Addr, u32 Size) +static __forceinline void recClearIOP(u32 Addr, u32 Size) { u32 pc = Addr; while (pc < Addr + Size*4) @@ -1198,7 +1198,7 @@ R3000Acpu psxRec = { recResetIOP, recExecute, recExecuteBlock, - recClear, + recClearIOP, recShutdown }; diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index 80fbdc21ec..78ab51b4f6 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -2302,10 +2302,11 @@ void SuperVUCleanupProgram(u32 startpc, int vuindex) //memset(recVUStack, 0, SUPERVU_STACKSIZE * 4); - // Clear allocation info to prevent bad data being used in other parts of pcsx2; doing this just incase (cottonvibes) - _initXMMregs(); - _initMMXregs(); - _initX86regs(); + // Could clear allocation info to prevent possibly bad data being used in other parts of pcsx2; + // not doing this because it's slow and not needed (rama) + // _initXMMregs(); + // _initMMXregs(); + // _initX86regs(); } #if defined(_MSC_VER) From 5f354c3cee536e1ce258d6a007b51f95cb616b94 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Tue, 7 Apr 2009 21:54:50 +0000 Subject: [PATCH 09/40] Reverted the emitter back to a c/cpp form from inl files (probably wasn't necessary, but I don't like having code in header/inl files when I can help it). Also: * Fixed a couple potential bugs in some Rm forms of MMX instructions. * Improved compilation times by isolating BaseBlockEx.h to the files the needed it (it uses STL junks). * Removed some dead code form emitters and BaseBlockEx. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@921 96395faa-99c1-11dd-bbfe-3dabce05a288 --- common/include/Pcsx2Config.h | 8 +- pcsx2/Common.h | 7 +- pcsx2/Exceptions.h | 8 +- pcsx2/PrecompiledHeader.h | 3 + pcsx2/R5900OpcodeTables.h | 2 - pcsx2/System.h | 2 +- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 146 +- pcsx2/x86/BaseblockEx.h | 8 +- pcsx2/x86/iCore.cpp | 2 +- pcsx2/x86/iR3000A.cpp | 2 + pcsx2/x86/iR3000A.h | 2 - pcsx2/x86/iR5900.h | 2 - pcsx2/x86/ix86-32/iCore-32.cpp | 2 +- pcsx2/x86/ix86-32/iR5900-32.cpp | 3 + pcsx2/x86/ix86/ix86.cpp | 418 +++-- pcsx2/x86/ix86/ix86.h | 1455 ++++++++++++++++- .../ix86/{ix86_3dnow.inl => ix86_3dnow.cpp} | 3 +- pcsx2/x86/ix86/ix86_cpudetect.cpp | 6 +- pcsx2/x86/ix86/{ix86_fpu.inl => ix86_fpu.cpp} | 4 +- .../ix86/{ix86_group1.inl => ix86_group1.cpp} | 88 +- pcsx2/x86/ix86/ix86_internal.h | 43 + pcsx2/x86/ix86/{ix86.inl => ix86_legacy.cpp} | 149 +- pcsx2/x86/ix86/{ix86_mmx.inl => ix86_mmx.cpp} | 83 +- pcsx2/x86/ix86/{ix86_sse.inl => ix86_sse.cpp} | 201 ++- pcsx2/x86/ix86/ix86_sse_helpers.h | 176 +- pcsx2/x86/ix86/ix86_tools.cpp | 2 +- pcsx2/x86/microVU.h | 1 - 27 files changed, 1995 insertions(+), 831 deletions(-) rename pcsx2/x86/ix86/{ix86_3dnow.inl => ix86_3dnow.cpp} (94%) rename pcsx2/x86/ix86/{ix86_fpu.inl => ix86_fpu.cpp} (94%) rename pcsx2/x86/ix86/{ix86_group1.inl => ix86_group1.cpp} (67%) create mode 100644 pcsx2/x86/ix86/ix86_internal.h rename pcsx2/x86/ix86/{ix86.inl => ix86_legacy.cpp} (89%) rename pcsx2/x86/ix86/{ix86_mmx.inl => ix86_mmx.cpp} (86%) rename pcsx2/x86/ix86/{ix86_sse.inl => ix86_sse.cpp} (89%) diff --git a/common/include/Pcsx2Config.h b/common/include/Pcsx2Config.h index 8a71d245c4..d99f47debe 100644 --- a/common/include/Pcsx2Config.h +++ b/common/include/Pcsx2Config.h @@ -20,11 +20,11 @@ #define __PCSX2CONFIG_H__ // Hack so that you can still use this file from C (not C++), or from a plugin without access to Paths.h. -#ifdef PLUGIN_ONLY +// .. and removed in favor of a less hackish approach (air) + +#ifndef g_MaxPath #define g_MaxPath 255 -#else -#include "Paths.h" - #endif +#endif ///////////////////////////////////////////////////////////////////////// // Session Configuration Override Flags diff --git a/pcsx2/Common.h b/pcsx2/Common.h index 1461bb83bd..46135a75ac 100644 --- a/pcsx2/Common.h +++ b/pcsx2/Common.h @@ -29,8 +29,10 @@ #define PCSX2_VERSION "(beta)" -#include "Plugins.h" +#include "System.h" + #include "SaveState.h" +#include "Plugins.h" #include "DebugTools/Debug.h" #include "Memory.h" @@ -40,7 +42,4 @@ #include "Elfheader.h" #include "Patch.h" -#include "System.h" -#include "Pcsx2Config.h" - #endif /* __COMMON_H__ */ diff --git a/pcsx2/Exceptions.h b/pcsx2/Exceptions.h index c15ffb5acb..640e61c0e1 100644 --- a/pcsx2/Exceptions.h +++ b/pcsx2/Exceptions.h @@ -16,11 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -#ifndef _PCSX2_EXCEPTIONS_H_ -#define _PCSX2_EXCEPTIONS_H_ - -#include -#include "StringUtils.h" +#pragma once // This class provides an easy and clean method for ensuring objects are not copyable. class NoncopyableObject @@ -380,5 +376,3 @@ namespace Exception {} }; } - -#endif diff --git a/pcsx2/PrecompiledHeader.h b/pcsx2/PrecompiledHeader.h index 36195bb08d..e2b24c72e3 100644 --- a/pcsx2/PrecompiledHeader.h +++ b/pcsx2/PrecompiledHeader.h @@ -33,6 +33,7 @@ ////////////////////////////////////////////////////////////////////////////////////////// // Include the STL junk that's actually handy. +#include #include #include #include @@ -69,7 +70,9 @@ typedef int BOOL; #include "zlib/zlib.h" #include "PS2Etypes.h" +#include "MemcpyFast.h" #include "StringUtils.h" +#include "Exceptions.h" //////////////////////////////////////////////////////////////////// // Compiler/OS specific macros and defines -- Begin Section diff --git a/pcsx2/R5900OpcodeTables.h b/pcsx2/R5900OpcodeTables.h index 8f4d956848..cd2a5e499c 100644 --- a/pcsx2/R5900OpcodeTables.h +++ b/pcsx2/R5900OpcodeTables.h @@ -18,8 +18,6 @@ #ifndef _R5900_OPCODETABLES_H #define _R5900_OPCODETABLES_H -#include - #include "PS2Etypes.h" // TODO : Move these into the OpcodeTables namespace diff --git a/pcsx2/System.h b/pcsx2/System.h index 80c7516749..09dff0196c 100644 --- a/pcsx2/System.h +++ b/pcsx2/System.h @@ -20,9 +20,9 @@ #define __SYSTEM_H__ #include "PS2Etypes.h" +#include "Paths.h" #include "Pcsx2Config.h" #include "Exceptions.h" -#include "Paths.h" #include "MemcpyFast.h" #include "SafeArray.h" #include "Misc.h" diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index a77fc861b6..b013011d39 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -947,7 +947,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + - - - - - - - - - - - - -#include +#include // used by BaseBlockEx #include -// used to keep block information -#define BLOCKTYPE_DELAYSLOT 1 // if bit set, delay slot - // Every potential jump point in the PS2's addressable memory has a BASEBLOCK // associated with it. So that means a BASEBLOCK for every 4 bytes of PS2 // addressable memory. Yay! @@ -119,7 +114,6 @@ public: } }; -#define GET_BLOCKTYPE(b) ((b)->Type) #define PC_GETBLOCK_(x, reclut) ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x)*(sizeof(BASEBLOCK)/4))) static void recLUT_SetPage(uptr reclut[0x10000], uptr hwlut[0x10000], diff --git a/pcsx2/x86/iCore.cpp b/pcsx2/x86/iCore.cpp index 6ab0be3488..0ce3a2c5b5 100644 --- a/pcsx2/x86/iCore.cpp +++ b/pcsx2/x86/iCore.cpp @@ -18,7 +18,7 @@ #include "PrecompiledHeader.h" -#include "Misc.h" +#include "System.h" #include "iR5900.h" #include "Vif.h" #include "VU.h" diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 1911acd813..a506bdc4a8 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -24,6 +24,8 @@ #include "PrecompiledHeader.h" #include "iR3000A.h" +#include "BaseblockEx.h" + #include #ifndef _WIN32 diff --git a/pcsx2/x86/iR3000A.h b/pcsx2/x86/iR3000A.h index ebf78ec3cb..220eb33ff0 100644 --- a/pcsx2/x86/iR3000A.h +++ b/pcsx2/x86/iR3000A.h @@ -18,12 +18,10 @@ #ifndef _R3000A_SUPERREC_ #define _R3000A_SUPERREC_ -#define _EmitterId_ EmitterId_R3000a #include "ix86/ix86.h" #include "R3000A.h" #include "iCore.h" -#include "BaseblockEx.h" // Cycle penalties for particularly slow instructions. static const int psxInstCycles_Mult = 7; diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index 15fc5ef33b..5f0644c073 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -19,13 +19,11 @@ #ifndef __IR5900_H__ #define __IR5900_H__ -#define _EmitterId_ EmitterId_R5900 #include "ix86/ix86.h" #include "ix86/ix86_sse_helpers.h" #include "R5900.h" #include "VU.h" #include "iCore.h" -#include "BaseblockEx.h" // needed for recClear and stuff // Yay! These work now! (air) ... almost (air) #define ARITHMETICIMM_RECOMPILE diff --git a/pcsx2/x86/ix86-32/iCore-32.cpp b/pcsx2/x86/ix86-32/iCore-32.cpp index 05655f66d9..9904f342a1 100644 --- a/pcsx2/x86/ix86-32/iCore-32.cpp +++ b/pcsx2/x86/ix86-32/iCore-32.cpp @@ -17,7 +17,7 @@ */ #include "PrecompiledHeader.h" -#include "Misc.h" +#include "System.h" #include "iR5900.h" #include "Vif.h" #include "VU.h" diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index e231ef9ca3..cbe8ca8130 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -30,6 +30,9 @@ #include "iR5900Jump.h" #include "iR5900LoadStore.h" #include "iR5900Move.h" + +#include "BaseblockEx.h" + #include "iMMI.h" #include "iFPU.h" #include "iCOP0.h" diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 3a8e1d2830..bbdf9da5ff 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -27,7 +27,7 @@ #include "PrecompiledHeader.h" #include "System.h" -#include "ix86.h" +#include "ix86_internal.h" __threadlocal u8 *x86Ptr; __threadlocal u8 *j8Ptr[32]; @@ -39,198 +39,266 @@ PCSX2_ALIGNED16(float f[4]); XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT }; -namespace x86Emitter +namespace x86Emitter { + +x86IndexerType ptr; + +////////////////////////////////////////////////////////////////////////////////////////// +// +const x86Register x86Register::Empty( -1 ); + +const x86Register eax( 0 ); +const x86Register ebx( 3 ); +const x86Register ecx( 1 ); +const x86Register edx( 2 ); +const x86Register esi( 6 ); +const x86Register edi( 7 ); +const x86Register ebp( 5 ); +const x86Register esp( 4 ); + +const x86Register16 ax( 0 ); +const x86Register16 bx( 3 ); +const x86Register16 cx( 1 ); +const x86Register16 dx( 2 ); +const x86Register16 si( 6 ); +const x86Register16 di( 7 ); +const x86Register16 bp( 5 ); +const x86Register16 sp( 4 ); + +const x86Register8 al( 0 ); +const x86Register8 cl( 1 ); +const x86Register8 dl( 2 ); +const x86Register8 bl( 3 ); +const x86Register8 ah( 4 ); +const x86Register8 ch( 5 ); +const x86Register8 dh( 6 ); +const x86Register8 bh( 7 ); + +////////////////////////////////////////////////////////////////////////////////////////// +// x86Register Method Implementations +// +x86ModRm x86Register::operator+( const x86Register& right ) const { - x86IndexerType ptr; + return x86ModRm( *this, right ); +} - ////////////////////////////////////////////////////////////////////////////////////////// - // - const x86Register x86Register::Empty( -1 ); +x86ModRm x86Register::operator+( const x86ModRm& right ) const +{ + return right + *this; +} - const x86Register eax( 0 ); - const x86Register ebx( 3 ); - const x86Register ecx( 1 ); - const x86Register edx( 2 ); - const x86Register esi( 6 ); - const x86Register edi( 7 ); - const x86Register ebp( 5 ); - const x86Register esp( 4 ); - - const x86Register16 ax( 0 ); - const x86Register16 bx( 3 ); - const x86Register16 cx( 1 ); - const x86Register16 dx( 2 ); - const x86Register16 si( 6 ); - const x86Register16 di( 7 ); - const x86Register16 bp( 5 ); - const x86Register16 sp( 4 ); - - const x86Register8 al( 0 ); - const x86Register8 cl( 1 ); - const x86Register8 dl( 2 ); - const x86Register8 bl( 3 ); - const x86Register8 ah( 4 ); - const x86Register8 ch( 5 ); - const x86Register8 dh( 6 ); - const x86Register8 bh( 7 ); - - ////////////////////////////////////////////////////////////////////////////////////////// - // x86Register Method Implementations - // - x86ModRm x86Register::operator+( const x86Register& right ) const +////////////////////////////////////////////////////////////////////////////////////////// +// x86ModRm Method Implementations +// +x86ModRm& x86ModRm::Add( const x86Register& src ) +{ + if( src == Index ) { - return x86ModRm( *this, right ); + Factor++; } + else if( src == Base ) + { + // Compound the existing register reference into the Index/Scale pair. + Base = x86Register::Empty; - x86ModRm x86Register::operator+( const x86ModRm& right ) const - { - return right + *this; - } - - ////////////////////////////////////////////////////////////////////////////////////////// - // ModSib Method Implementations - // - x86ModRm x86ModRm::FromIndexReg( x86Register index, int scale, int displacement ) - { - return x86ModRm( x86Register::Empty, index, scale, displacement ); - } - - x86Register x86ModRm::GetEitherReg() const - { - return Base.IsEmpty() ? Base : Index; - } - - x86ModRm& x86ModRm::Add( const x86Register& src ) - { if( src == Index ) - { Factor++; - } - else if( src == Base ) + else { - // Compound the existing register reference into the Index/Scale pair. - Base = x86Register::Empty; - - if( src == Index ) - Factor++; - else - { - jASSUME( Index.IsEmpty() ); // or die if we already have an index! - Index = src; - Factor = 2; - } - } - else if( Base.IsEmpty() ) - Base = src; - else if( Index.IsEmpty() ) + jASSUME( Index.IsEmpty() ); // or die if we already have an index! Index = src; - else - assert( false ); // oops, only 2 regs allowed per ModRm! - - return *this; - } - - x86ModRm& x86ModRm::Add( const x86ModRm& src ) - { - Add( src.Base ); - Add( src.Displacement ); - - // If the factor is 1, we can just treat index like a base register also. - if( src.Factor == 1 ) - { - Add( src.Index ); - } - else if( Index.IsEmpty() ) - { - Index = src.Index; - Factor = 1; - } - else if( Index == src.Index ) - Factor++; - else - assert( false ); // oops, only 2 regs allowed! - - return *this; - } - - - x86ModRm x86ptr( x86Register base ) { return x86ModRm( base ); } - - // ------------------------------------------------------------------------ - // Generates a 'reduced' ModSib form, which has valid Base, Index, and Scale values. - // Necessary because by default ModSib compounds registers into Index when possible. - // - void ModSib::Reduce() - { - // If no index reg, then nothing for us to do... - if( Index.IsEmpty() || Scale == 0 ) return; - - // The Scale has a series of valid forms, all shown here: - - switch( Scale ) - { - case 1: Scale = 0; break; - case 2: Scale = 1; break; - - case 3: // becomes [reg*2+reg] - jASSUME( Base.IsEmpty() ); - Base = Index; - Scale = 1; - break; - - case 4: Scale = 2; break; - - case 5: // becomes [reg*4+reg] - jASSUME( Base.IsEmpty() ); - Base = Index; - Scale = 2; - break; - - case 6: // invalid! - assert( false ); - break; - - case 7: // so invalid! - assert( false ); - break; - - case 8: Scale = 3; break; - case 9: // becomes [reg*8+reg] - jASSUME( Base.IsEmpty() ); - Base = Index; - Scale = 3; - break; + Factor = 2; } } + else if( Base.IsEmpty() ) + Base = src; + else if( Index.IsEmpty() ) + Index = src; + else + assert( false ); // oops, only 2 regs allowed per ModRm! - ModSib::ModSib( const x86ModRm& src ) : - Base( src.Base ), - Index( src.Index ), - Scale( src.Factor ), - Displacement( src.Displacement ) + return *this; +} + +x86ModRm& x86ModRm::Add( const x86ModRm& src ) +{ + Add( src.Base ); + Add( src.Displacement ); + + // If the factor is 1, we can just treat index like a base register also. + if( src.Factor == 1 ) { - Reduce(); + Add( src.Index ); } + else if( Index.IsEmpty() ) + { + Index = src.Index; + Factor = 1; + } + else if( Index == src.Index ) + Factor++; + else + assert( false ); // oops, only 2 regs allowed! + + return *this; +} + +////////////////////////////////////////////////////////////////////////////////////////// +// ModSib Method Implementations +// + +// ------------------------------------------------------------------------ +// Generates a 'reduced' ModSib form, which has valid Base, Index, and Scale values. +// Necessary because by default ModSib compounds registers into Index when possible. +// +void ModSib::Reduce() +{ + // If no index reg, then nothing for us to do... + if( Index.IsEmpty() || Scale == 0 ) return; - ModSib::ModSib( x86Register base, x86Register index, int scale, s32 displacement ) : - Base( base ), - Index( index ), - Scale( scale ), - Displacement( displacement ) + // The Scale has a series of valid forms, all shown here: + + switch( Scale ) { - Reduce(); - } + case 1: Scale = 0; break; + case 2: Scale = 1; break; - ModSib::ModSib( s32 displacement ) : - Base(), - Index(), - Scale(0), - Displacement( displacement ) - { - } + case 3: // becomes [reg*2+reg] + jASSUME( Base.IsEmpty() ); + Base = Index; + Scale = 1; + break; + + case 4: Scale = 2; break; - x86Register ModSib::GetEitherReg() const - { - return Base.IsEmpty() ? Base : Index; + case 5: // becomes [reg*4+reg] + jASSUME( Base.IsEmpty() ); + Base = Index; + Scale = 2; + break; + + case 6: // invalid! + assert( false ); + break; + + case 7: // so invalid! + assert( false ); + break; + + case 8: Scale = 3; break; + case 9: // becomes [reg*8+reg] + jASSUME( Base.IsEmpty() ); + Base = Index; + Scale = 3; + break; } } + +ModSib::ModSib( const x86ModRm& src ) : + Base( src.Base ), + Index( src.Index ), + Scale( src.Factor ), + Displacement( src.Displacement ) +{ + Reduce(); +} + +ModSib::ModSib( x86Register base, x86Register index, int scale, s32 displacement ) : + Base( base ), + Index( index ), + Scale( scale ), + Displacement( displacement ) +{ + Reduce(); +} + +ModSib::ModSib( s32 displacement ) : + Base(), + Index(), + Scale(0), + Displacement( displacement ) +{ +} + +x86Register ModSib::GetEitherReg() const +{ + return Base.IsEmpty() ? Base : Index; +} + +// ------------------------------------------------------------------------ +// returns TRUE if this instruction requires SIB to be encoded, or FALSE if the +// instruction ca be encoded as ModRm alone. +emitterT bool NeedsSibMagic( const ModSib& info ) +{ + // no registers? no sibs! + if( info.Base.IsEmpty() && info.Index.IsEmpty() ) return false; + + // A scaled register needs a SIB + if( info.Scale != 0 && !info.Index.IsEmpty() ) return true; + + // two registers needs a SIB + if( !info.Base.IsEmpty() && !info.Index.IsEmpty() ) return true; + + // If register is ESP, then we need a SIB: + if( info.Base == esp || info.Index == esp ) return true; + + return false; +} + +// ------------------------------------------------------------------------ +// Conditionally generates Sib encoding information! +// +// regfield - register field to be written to the ModRm. This is either a register specifier +// or an opcode extension. In either case, the instruction determines the value for us. +// +emitterT void EmitSibMagic( int regfield, const ModSib& info ) +{ + int displacement_size = (info.Displacement == 0) ? 0 : + ( ( info.IsByteSizeDisp() ) ? 1 : 2 ); + + if( !NeedsSibMagic( info ) ) + { + // Use ModRm-only encoding, with the rm field holding an index/base register, if + // one has been specified. If neither register is specified then use Disp32 form, + // which is encoded as "EBP w/o displacement" (which is why EBP must always be + // encoded *with* a displacement of 0, if it would otherwise not have one). + + x86Register basereg = info.GetEitherReg(); + + if( basereg.IsEmpty() ) + ModRM( 0, regfield, ModRm_UseDisp32 ); + else + { + if( basereg == ebp && displacement_size == 0 ) + displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! + + ModRM( displacement_size, regfield, basereg.Id ); + } + } + else + { + ModRM( displacement_size, regfield, ModRm_UseSib ); + SibSB( info.Index.Id, info.Scale, info.Base.Id ); + } + + switch( displacement_size ) + { + case 0: break; + case 1: write8( info.Displacement ); break; + case 2: write32( info.Displacement ); break; + jNO_DEFAULT + } +} + +// ------------------------------------------------------------------------ +// Conditionally generates Sib encoding information! +// +// regfield - register field to be written to the ModRm. This is either a register specifier +// or an opcode extension. In either case, the instruction determines the value for us. +// +emitterT void EmitSibMagic( x86Register regfield, const ModSib& info ) +{ + EmitSibMagic( regfield.Id, info ); +} + +} diff --git a/pcsx2/x86/ix86/ix86.h b/pcsx2/x86/ix86/ix86.h index 556405dc09..3151de883f 100644 --- a/pcsx2/x86/ix86/ix86.h +++ b/pcsx2/x86/ix86/ix86.h @@ -25,116 +25,1451 @@ */ #pragma once -#define _ix86_included_ // used for sanity checks by headers dependent on this one. #include "ix86_types.h" -//------------------------------------------------------------------ -// Helper Macros -//------------------------------------------------------------------ -#define emitterT static __forceinline - -#define MEMADDR(addr, oplen) (addr) - -#define Rex(w,r,x,b) assert(0) -#define RexR(w, reg) assert( !(w || (reg)>=8) ) -#define RexB(w, base) assert( !(w || (base)>=8) ) -#define RexRB(w, reg, base) assert( !(w || (reg) >= 8 || (base)>=8) ) -#define RexRXB(w, reg, index, base) assert( !(w || (reg) >= 8 || (index) >= 8 || (base) >= 8) ) - -// We use int param for offsets and then test them for validity in the recompiler. -// This helps catch programmer errors better than using an auto-truncated s8 parameter. -#define assertOffset8(ofs) assert( ofs < 128 && ofs >= -128 ) - #ifdef _MSC_VER #define __threadlocal __declspec(thread) #else #define __threadlocal __thread #endif +#define MMXONLY(code) code + + //------------------------------------------------------------------ // write functions //------------------------------------------------------------------ +#define emitterT __forceinline + extern __threadlocal u8 *x86Ptr; extern __threadlocal u8 *j8Ptr[32]; extern __threadlocal u32 *j32Ptr[32]; -emitterT void write8( u8 val ) +static emitterT void write8( u8 val ) { *x86Ptr = (u8)val; x86Ptr++; } -emitterT void write16( u16 val ) +static emitterT void write16( u16 val ) { *(u16*)x86Ptr = val; x86Ptr += 2; } -emitterT void write24( u32 val ) +static emitterT void write24( u32 val ) { *x86Ptr++ = (u8)(val & 0xff); *x86Ptr++ = (u8)((val >> 8) & 0xff); *x86Ptr++ = (u8)((val >> 16) & 0xff); } -emitterT void write32( u32 val ) +static emitterT void write32( u32 val ) { *(u32*)x86Ptr = val; x86Ptr += 4; } -emitterT void write64( u64 val ){ +static emitterT void write64( u64 val ) +{ *(u64*)x86Ptr = val; x86Ptr += 8; } + //------------------------------------------------------------------ //------------------------------------------------------------------ // jump/align functions //------------------------------------------------------------------ -emitterT void x86SetPtr( u8 *ptr ); -emitterT void x86SetJ8( u8 *j8 ); -emitterT void x86SetJ8A( u8 *j8 ); -emitterT void x86SetJ16( u16 *j16 ); -emitterT void x86SetJ16A( u16 *j16 ); -emitterT void x86SetJ32( u32 *j32 ); -emitterT void x86SetJ32A( u32 *j32 ); -emitterT void x86Align( int bytes ); -emitterT void x86AlignExecutable( int align ); +extern void x86SetPtr( u8 *ptr ); +extern void x86SetJ8( u8 *j8 ); +extern void x86SetJ8A( u8 *j8 ); +extern void x86SetJ16( u16 *j16 ); +extern void x86SetJ16A( u16 *j16 ); +extern void x86SetJ32( u32 *j32 ); +extern void x86SetJ32A( u32 *j32 ); +extern void x86Align( int bytes ); +extern void x86AlignExecutable( int align ); //------------------------------------------------------------------ -//------------------------------------------------------------------ -// General Emitter Helper functions -//------------------------------------------------------------------ -emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset); -emitterT void ModRM( int mod, int reg, int rm ); -emitterT void SibSB( int ss, int index, int base ); -emitterT void SET8R( int cc, int to ); -emitterT void CMOV32RtoR( int cc, int to, int from ); -emitterT void CMOV32MtoR( int cc, int to, uptr from ); -emitterT u8* J8Rel( int cc, int to ); -emitterT u32* J32Rel( int cc, u32 to ); -emitterT u64 GetCPUTick( void ); -//------------------------------------------------------------------ +extern void CLC( void ); +extern void NOP( void ); -emitterT void MOV32RtoR( x86IntRegType to, x86IntRegType from ); -emitterT u32* JMP32( uptr to ); -emitterT u8* JMP8( u8 to ); -emitterT void CALL32( u32 to ); -emitterT void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale); -emitterT void NOP( void ); -emitterT void AND32ItoM( uptr to, u32 from ); -emitterT void LEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1); -emitterT void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale); +//////////////////////////////////// +// mov instructions // +//////////////////////////////////// +// mov r32 to r32 +extern void MOV32RtoR( x86IntRegType to, x86IntRegType from ); +// mov r32 to m32 +extern void MOV32RtoM( uptr to, x86IntRegType from ); +// mov m32 to r32 +extern void MOV32MtoR( x86IntRegType to, uptr from ); +// mov [r32] to r32 +extern void MOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ); +// mov [r32][r32< subtract ST(0) from ST(1), store in ST(1) and POP stack +extern void FSUBP( void ); +// fmul ST(src) to fpu reg stack ST(0) +extern void FMUL32Rto0( x86IntRegType src ); +// fmul ST(0) to fpu reg stack ST(src) +extern void FMUL320toR( x86IntRegType src ); +// fdiv ST(src) to fpu reg stack ST(0) +extern void FDIV32Rto0( x86IntRegType src ); +// fdiv ST(0) to fpu reg stack ST(src) +extern void FDIV320toR( x86IntRegType src ); +// fdiv ST(0) to fpu reg stack ST(src), pop stack, store in ST(src) +extern void FDIV320toRP( x86IntRegType src ); + +// fadd m32 to fpu reg stack +extern void FADD32( u32 from ); +// fsub m32 to fpu reg stack +extern void FSUB32( u32 from ); +// fmul m32 to fpu reg stack +extern void FMUL32( u32 from ); +// fdiv m32 to fpu reg stack +extern void FDIV32( u32 from ); +// fcomi st, st( i) +extern void FCOMI( x86IntRegType src ); +// fcomip st, st( i) +extern void FCOMIP( x86IntRegType src ); +// fucomi st, st( i) +extern void FUCOMI( x86IntRegType src ); +// fucomip st, st( i) +extern void FUCOMIP( x86IntRegType src ); +// fcom m32 to fpu reg stack +extern void FCOM32( u32 from ); +// fabs fpu reg stack +extern void FABS( void ); +// fsqrt fpu reg stack +extern void FSQRT( void ); +// ftan fpu reg stack +extern void FPATAN( void ); +// fsin fpu reg stack +extern void FSIN( void ); +// fchs fpu reg stack +extern void FCHS( void ); + +// fcmovb fpu reg to fpu reg stack +extern void FCMOVB32( x86IntRegType from ); +// fcmove fpu reg to fpu reg stack +extern void FCMOVE32( x86IntRegType from ); +// fcmovbe fpu reg to fpu reg stack +extern void FCMOVBE32( x86IntRegType from ); +// fcmovu fpu reg to fpu reg stack +extern void FCMOVU32( x86IntRegType from ); +// fcmovnb fpu reg to fpu reg stack +extern void FCMOVNB32( x86IntRegType from ); +// fcmovne fpu reg to fpu reg stack +extern void FCMOVNE32( x86IntRegType from ); +// fcmovnbe fpu reg to fpu reg stack +extern void FCMOVNBE32( x86IntRegType from ); +// fcmovnu fpu reg to fpu reg stack +extern void FCMOVNU32( x86IntRegType from ); +extern void FCOMP32( u32 from ); +extern void FNSTSWtoAX( void ); #define MMXONLY(code) code -#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) -#include "ix86.inl" -#include "ix86_3dnow.inl" -#include "ix86_fpu.inl" -#include "ix86_mmx.inl" -#include "ix86_sse.inl" +//****************** +// MMX instructions +//****************** + +// r64 = mm + +// movq m64 to r64 +extern void MOVQMtoR( x86MMXRegType to, uptr from ); +// movq r64 to m64 +extern void MOVQRtoM( uptr to, x86MMXRegType from ); + +// pand r64 to r64 +extern void PANDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); +// pand m64 to r64 ; +extern void PANDMtoR( x86MMXRegType to, uptr from ); +// pandn r64 to r64 +extern void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); +// pandn r64 to r64 +extern void PANDNMtoR( x86MMXRegType to, uptr from ); +// por r64 to r64 +extern void PORRtoR( x86MMXRegType to, x86MMXRegType from ); +// por m64 to r64 +extern void PORMtoR( x86MMXRegType to, uptr from ); +// pxor r64 to r64 +extern void PXORRtoR( x86MMXRegType to, x86MMXRegType from ); +// pxor m64 to r64 +extern void PXORMtoR( x86MMXRegType to, uptr from ); + +// psllq r64 to r64 +extern void PSLLQRtoR( x86MMXRegType to, x86MMXRegType from ); +// psllq m64 to r64 +extern void PSLLQMtoR( x86MMXRegType to, uptr from ); +// psllq imm8 to r64 +extern void PSLLQItoR( x86MMXRegType to, u8 from ); +// psrlq r64 to r64 +extern void PSRLQRtoR( x86MMXRegType to, x86MMXRegType from ); +// psrlq m64 to r64 +extern void PSRLQMtoR( x86MMXRegType to, uptr from ); +// psrlq imm8 to r64 +extern void PSRLQItoR( x86MMXRegType to, u8 from ); + +// paddusb r64 to r64 +extern void PADDUSBRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddusb m64 to r64 +extern void PADDUSBMtoR( x86MMXRegType to, uptr from ); +// paddusw r64 to r64 +extern void PADDUSWRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddusw m64 to r64 +extern void PADDUSWMtoR( x86MMXRegType to, uptr from ); + +// paddb r64 to r64 +extern void PADDBRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddb m64 to r64 +extern void PADDBMtoR( x86MMXRegType to, uptr from ); +// paddw r64 to r64 +extern void PADDWRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddw m64 to r64 +extern void PADDWMtoR( x86MMXRegType to, uptr from ); +// paddd r64 to r64 +extern void PADDDRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddd m64 to r64 +extern void PADDDMtoR( x86MMXRegType to, uptr from ); +extern void PADDSBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PADDSWRtoR( x86MMXRegType to, x86MMXRegType from ); + +// paddq m64 to r64 (sse2 only?) +extern void PADDQMtoR( x86MMXRegType to, uptr from ); +// paddq r64 to r64 (sse2 only?) +extern void PADDQRtoR( x86MMXRegType to, x86MMXRegType from ); + +extern void PSUBSBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBSWRtoR( x86MMXRegType to, x86MMXRegType from ); + +extern void PSUBBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBWRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBDMtoR( x86MMXRegType to, uptr from ); + +// psubq m64 to r64 (sse2 only?) +extern void PSUBQMtoR( x86MMXRegType to, uptr from ); +// psubq r64 to r64 (sse2 only?) +extern void PSUBQRtoR( x86MMXRegType to, x86MMXRegType from ); + +// pmuludq m64 to r64 (sse2 only?) +extern void PMULUDQMtoR( x86MMXRegType to, uptr from ); +// pmuludq r64 to r64 (sse2 only?) +extern void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ); + +extern void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPEQDMtoR( x86MMXRegType to, uptr from ); +extern void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPGTDMtoR( x86MMXRegType to, uptr from ); +extern void PSRLWItoR( x86MMXRegType to, u8 from ); +extern void PSRLDItoR( x86MMXRegType to, u8 from ); +extern void PSRLDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSLLWItoR( x86MMXRegType to, u8 from ); +extern void PSLLDItoR( x86MMXRegType to, u8 from ); +extern void PSLLDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSRAWItoR( x86MMXRegType to, u8 from ); +extern void PSRADItoR( x86MMXRegType to, u8 from ); +extern void PSRADRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PUNPCKLDQMtoR( x86MMXRegType to, uptr from ); +extern void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PUNPCKHDQMtoR( x86MMXRegType to, uptr from ); +extern void MOVQ64ItoR( x86MMXRegType reg, u64 i ); //Prototype.Todo add all consts to end of block.not after jr $+8 +extern void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset=0 ); +extern void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ); +extern void MOVDMtoMMX( x86MMXRegType to, uptr from ); +extern void MOVDMMXtoM( uptr to, x86MMXRegType from ); +extern void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ); +extern void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset=0 ); +extern void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ); +extern void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ); +extern void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ); +extern void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8); +extern void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8); +extern void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from); + +// emms +extern void EMMS( void ); + +//**********************************************************************************/ +//PACKSSWB,PACKSSDW: Pack Saturate Signed Word 64bits +//********************************************************************************** +extern void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from); +extern void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from); + +extern void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from); + +//********************* +// SSE instructions * +//********************* +extern void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ); +extern void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ); + +extern void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ); +extern void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from ); +extern void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ); +extern void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ); +extern void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ); +extern void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from ); +extern void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ); + +extern void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ); +extern void SSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ); +extern void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ); + +extern void SSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVUPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ); + +extern void SSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ); +extern void SSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); + +extern void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ); + +extern void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); +extern void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); +extern void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ); +extern void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ); +extern void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ); + +extern void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from); +extern void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from); +extern void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from); +extern void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from); + +extern void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MINPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MINPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); +extern void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ); +extern void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +// VectorPath +extern void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); + +extern void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); +extern void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); + +extern void SSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); + +extern void SSE_STMXCSR( uptr from ); +extern void SSE_LDMXCSR( uptr from ); + + +//********************* +// SSE 2 Instructions* +//********************* + +extern void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from); +extern void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from); + +extern void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from); +extern void SSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from); + +extern void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); + +extern void SSE2_ANDPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_ANDPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_UCOMISD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_UCOMISD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_SQRTSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_SQRTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MAXSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_XORPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_XORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_ADDSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_ADDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_SUBSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_SUBSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MULSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MULSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_DIVSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_DIVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MINSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +//**********************************************************************************/ +//PACKSSWB,PACKSSDW: Pack Saturate Signed Word +//********************************************************************************** +extern void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from); + +//**********************************************************************************/ +//PUNPCKHWD: Unpack 16bit high +//********************************************************************************** +extern void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from); + +// mult by half words +extern void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from); + + +//**********************************************************************************/ +//PMOVMSKB: Create 16bit mask from signs of 8bit integers +//********************************************************************************** +extern void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from); + +extern void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from); +extern void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from); + +//**********************************************************************************/ +//PEXTRW,PINSRW: Packed Extract/Insert Word * +//********************************************************************************** +extern void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ); +extern void SSE_PINSRW_R32_to_XMM(x86SSERegType from, x86IntRegType to, u8 imm8 ); + + +//**********************************************************************************/ +//PSUBx: Subtract Packed Integers * +//********************************************************************************** +extern void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ); +/////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PCMPxx: Compare Packed Integers * +//********************************************************************************** +extern void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ); + +//**********************************************************************************/ +//MOVD: Move Dword(32bit) to /from XMM reg * +//********************************************************************************** +extern void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ); +extern void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ); +extern void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ); +extern void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MOVQ_XMM_to_R( x86IntRegType to, x86SSERegType from ); +extern void SSE2_MOVQ_R_to_XMM( x86SSERegType to, x86IntRegType from ); +extern void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ); + +//**********************************************************************************/ +//MOVD: Move Qword(64bit) to/from MMX/XMM reg * +//********************************************************************************** +extern void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from); +extern void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from); + + +//**********************************************************************************/ +//POR : SSE Bitwise OR * +//********************************************************************************** +extern void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ); + +extern void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from); + +// SSSE3 + +extern void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); + +// SSE4.1 + +#ifndef _MM_MK_INSERTPS_NDX +#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) +#endif + +extern void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); +extern void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8); +extern void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); +extern void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8); +extern void SSE4_BLENDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); +extern void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8); +extern void SSE4_PMAXSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMINSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMAXUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMINUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMAXSD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); + +//********************* +// 3DNOW instructions * +//********************* +extern void FEMMS( void ); +extern void PFCMPEQMtoR( x86IntRegType to, uptr from ); +extern void PFCMPGTMtoR( x86IntRegType to, uptr from ); +extern void PFCMPGEMtoR( x86IntRegType to, uptr from ); +extern void PFADDMtoR( x86IntRegType to, uptr from ); +extern void PFADDRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFSUBMtoR( x86IntRegType to, uptr from ); +extern void PFSUBRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFMULMtoR( x86IntRegType to, uptr from ); +extern void PFMULRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRCPMtoR( x86IntRegType to, uptr from ); +extern void PFRCPRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRCPIT1RtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRCPIT2RtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRSQRTRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRSQIT1RtoR( x86IntRegType to, x86IntRegType from ); +extern void PF2IDMtoR( x86IntRegType to, uptr from ); +extern void PI2FDMtoR( x86IntRegType to, uptr from ); +extern void PI2FDRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFMAXMtoR( x86IntRegType to, uptr from ); +extern void PFMAXRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFMINMtoR( x86IntRegType to, uptr from ); +extern void PFMINRtoR( x86IntRegType to, x86IntRegType from ); diff --git a/pcsx2/x86/ix86/ix86_3dnow.inl b/pcsx2/x86/ix86/ix86_3dnow.cpp similarity index 94% rename from pcsx2/x86/ix86/ix86_3dnow.inl rename to pcsx2/x86/ix86/ix86_3dnow.cpp index e7a667f409..ae6743cc3d 100644 --- a/pcsx2/x86/ix86/ix86_3dnow.inl +++ b/pcsx2/x86/ix86/ix86_3dnow.cpp @@ -16,7 +16,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -#pragma once +#include "PrecompiledHeader.h" +#include "ix86_internal.h" //------------------------------------------------------------------ // 3DNOW instructions diff --git a/pcsx2/x86/ix86/ix86_cpudetect.cpp b/pcsx2/x86/ix86/ix86_cpudetect.cpp index bf88feeb32..7e3323e4a7 100644 --- a/pcsx2/x86/ix86/ix86_cpudetect.cpp +++ b/pcsx2/x86/ix86/ix86_cpudetect.cpp @@ -18,10 +18,8 @@ #include "PrecompiledHeader.h" -#define _EmitterId_ 0 - -#include "ix86.h" -#include "Misc.h" +#include "ix86_internal.h" +#include "System.h" #include "Threading.h" #include "RedtapeWindows.h" diff --git a/pcsx2/x86/ix86/ix86_fpu.inl b/pcsx2/x86/ix86/ix86_fpu.cpp similarity index 94% rename from pcsx2/x86/ix86/ix86_fpu.inl rename to pcsx2/x86/ix86/ix86_fpu.cpp index 7ed607f30c..d7e3a65963 100644 --- a/pcsx2/x86/ix86/ix86_fpu.inl +++ b/pcsx2/x86/ix86/ix86_fpu.cpp @@ -16,8 +16,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -#pragma once -//#include "PrecompiledHeader.h" +#include "PrecompiledHeader.h" +#include "ix86_internal.h" //------------------------------------------------------------------ // FPU instructions diff --git a/pcsx2/x86/ix86/ix86_group1.inl b/pcsx2/x86/ix86/ix86_group1.cpp similarity index 67% rename from pcsx2/x86/ix86/ix86_group1.inl rename to pcsx2/x86/ix86/ix86_group1.cpp index 99e1d6d4ff..1af689e82c 100644 --- a/pcsx2/x86/ix86/ix86_group1.inl +++ b/pcsx2/x86/ix86/ix86_group1.cpp @@ -16,7 +16,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -#pragma once +#include "PrecompiledHeader.h" +#include "ix86_internal.h" //------------------------------------------------------------------ // x86 Group 1 Instructions @@ -32,85 +33,6 @@ namespace x86Emitter { -static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field) -static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field) - -// ------------------------------------------------------------------------ -// returns TRUE if this instruction requires SIB to be encoded, or FALSE if the -// instruction ca be encoded as ModRm alone. -emitterT bool NeedsSibMagic( const ModSib& info ) -{ - // no registers? no sibs! - if( info.Base.IsEmpty() && info.Index.IsEmpty() ) return false; - - // A scaled register needs a SIB - if( info.Scale != 0 && !info.Index.IsEmpty() ) return true; - - // two registers needs a SIB - if( !info.Base.IsEmpty() && !info.Index.IsEmpty() ) return true; - - // If register is ESP, then we need a SIB: - if( info.Base == esp || info.Index == esp ) return true; - - return false; -} - -// ------------------------------------------------------------------------ -// Conditionally generates Sib encoding information! -// -// regfield - register field to be written to the ModRm. This is either a register specifier -// or an opcode extension. In either case, the instruction determines the value for us. -// -emitterT void EmitSibMagic( int regfield, const ModSib& info ) -{ - int displacement_size = (info.Displacement == 0) ? 0 : - ( ( info.IsByteSizeDisp() ) ? 1 : 2 ); - - if( !NeedsSibMagic( info ) ) - { - // Use ModRm-only encoding, with the rm field holding an index/base register, if - // one has been specified. If neither register is specified then use Disp32 form, - // which is encoded as "EBP w/o displacement" (which is why EBP must always be - // encoded *with* a displacement of 0, if it would otherwise not have one). - - x86Register basereg = info.GetEitherReg(); - - if( basereg.IsEmpty() ) - ModRM( 0, regfield, ModRm_UseDisp32 ); - else - { - if( basereg == ebp && displacement_size == 0 ) - displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! - - ModRM( displacement_size, regfield, basereg.Id ); - } - } - else - { - ModRM( displacement_size, regfield, ModRm_UseSib ); - SibSB( info.Index.Id, info.Scale, info.Base.Id ); - } - - switch( displacement_size ) - { - case 0: break; - case 1: write8( info.Displacement ); break; - case 2: write32( info.Displacement ); break; - jNO_DEFAULT - } -} - -// ------------------------------------------------------------------------ -// Conditionally generates Sib encoding information! -// -// regfield - register field to be written to the ModRm. This is either a register specifier -// or an opcode extension. In either case, the instruction determines the value for us. -// -emitterT void EmitSibMagic( x86Register regfield, const ModSib& info ) -{ - EmitSibMagic( regfield.Id, info ); -} - enum Group1InstructionType { G1Type_ADD=0, @@ -245,9 +167,9 @@ static __forceinline x86Emitter::x86ModRm _mrmhlp( x86IntRegType src ) emitterT void cod##32MtoR( x86IntRegType to, uptr from ) { x86Emitter::lwr##32( _reghlp(to), (void*)from ); } \ emitterT void cod##32RtoM( uptr to, x86IntRegType from ) { x86Emitter::lwr##32( (void*)to, _reghlp(from) ); } \ emitterT void cod##32ItoM( uptr to, u32 imm ) { x86Emitter::lwr##32( (void*)to, imm ); } \ - emitterT void cod##32ItoRm( x86IntRegType to, u32 imm, int offset=0 ){ x86Emitter::lwr##32( _mrmhlp(to) + offset, imm ); } \ - emitterT void cod##32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) { x86Emitter::lwr##32( _reghlp(to), _mrmhlp(from) + offset ); } \ - emitterT void cod##32RtoRm( x86IntRegType to, x86IntRegType from, int offset=0 ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, _reghlp(from) ); } + emitterT void cod##32ItoRm( x86IntRegType to, u32 imm, int offset ){ x86Emitter::lwr##32( _mrmhlp(to) + offset, imm ); } \ + emitterT void cod##32RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##32( _reghlp(to), _mrmhlp(from) + offset ); } \ + emitterT void cod##32RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, _reghlp(from) ); } DEFINE_GROUP1_OPCODE_LEGACY( add, ADD ); DEFINE_GROUP1_OPCODE_LEGACY( cmp, CMP ); diff --git a/pcsx2/x86/ix86/ix86_internal.h b/pcsx2/x86/ix86/ix86_internal.h new file mode 100644 index 0000000000..83ec23a291 --- /dev/null +++ b/pcsx2/x86/ix86/ix86_internal.h @@ -0,0 +1,43 @@ + +#pragma once +#include "ix86.h" + +//------------------------------------------------------------------ +// Helper Macros +//------------------------------------------------------------------ + +#define MEMADDR(addr, oplen) (addr) + +#define Rex(w,r,x,b) assert(0) +#define RexR(w, reg) assert( !(w || (reg)>=8) ) +#define RexB(w, base) assert( !(w || (base)>=8) ) +#define RexRB(w, reg, base) assert( !(w || (reg) >= 8 || (base)>=8) ) +#define RexRXB(w, reg, index, base) assert( !(w || (reg) >= 8 || (index) >= 8 || (base) >= 8) ) + +#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) + +static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field) +static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field) + + +//------------------------------------------------------------------ +// General Emitter Helper functions +//------------------------------------------------------------------ + +namespace x86Emitter +{ + extern void EmitSibMagic( int regfield, const ModSib& info ); + extern void EmitSibMagic( x86Register regfield, const ModSib& info ); + extern bool NeedsSibMagic( const ModSib& info ); +} + +// From here out are the legacy (old) emitter functions... + +extern void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset); +extern void ModRM( int mod, int reg, int rm ); +extern void SibSB( int ss, int index, int base ); +extern void SET8R( int cc, int to ); +extern u8* J8Rel( int cc, int to ); +extern u32* J32Rel( int cc, u32 to ); +extern u64 GetCPUTick( void ); +//------------------------------------------------------------------ diff --git a/pcsx2/x86/ix86/ix86.inl b/pcsx2/x86/ix86/ix86_legacy.cpp similarity index 89% rename from pcsx2/x86/ix86/ix86.inl rename to pcsx2/x86/ix86/ix86_legacy.cpp index b29427da43..8a99058c02 100644 --- a/pcsx2/x86/ix86/ix86.inl +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -27,14 +27,12 @@ #pragma once //------------------------------------------------------------------ -// ix86 instructions +// ix86 legacy emitter functions //------------------------------------------------------------------ #include "PrecompiledHeader.h" #include "System.h" -#include "ix86.h" - -#include "ix86_group1.inl" +#include "ix86_internal.h" // Note: the 'to' field can either be a register or a special opcode extension specifier // depending on the opcode's encoding. @@ -46,7 +44,7 @@ emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset ModRM( 0, to, 0x4 ); SibSB( 0, 0x4, 0x4 ); } - else if( offset <= 127 && offset >= -128 ) { + else if( is_s8( offset ) ) { ModRM( 1, to, 0x4 ); SibSB( 0, 0x4, 0x4 ); write8(offset); @@ -61,7 +59,7 @@ emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset if( offset == 0 ) { ModRM( 0, to, from ); } - else if( offset <= 127 && offset >= -128 ) { + else if( is_s8( offset ) ) { ModRM( 1, to, from ); write8(offset); } @@ -136,8 +134,13 @@ emitterT void x86SetPtr( u8* ptr ) x86Ptr = ptr; } -//////////////////////////////////////////////////// -emitterT void x86SetJ8( u8* j8 ) +////////////////////////////////////////////////////////////////////////////////////////// +// Jump Label API (as rough as it might be) +// +// I don't auto-inline these because of the console logging in case of error, which tends +// to cause quite a bit of code bloat. +// +void x86SetJ8( u8* j8 ) { u32 jump = ( x86Ptr - j8 ) - 1; @@ -148,7 +151,7 @@ emitterT void x86SetJ8( u8* j8 ) *j8 = (u8)jump; } -emitterT void x86SetJ8A( u8* j8 ) +void x86SetJ8A( u8* j8 ) { u32 jump = ( x86Ptr - j8 ) - 1; @@ -169,26 +172,6 @@ emitterT void x86SetJ8A( u8* j8 ) *j8 = (u8)jump; } -emitterT void x86SetJ16( u16 *j16 ) -{ - // doesn't work - u32 jump = ( x86Ptr - (u8*)j16 ) - 2; - - if ( jump > 0x7fff ) { - Console::Error( "j16 greater than 0x7fff!!" ); - assert(0); - } - *j16 = (u16)jump; -} - -emitterT void x86SetJ16A( u16 *j16 ) -{ - if( ((uptr)x86Ptr&0xf) > 4 ) { - while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; - } - x86SetJ16(j16); -} - //////////////////////////////////////////////////// emitterT void x86SetJ32( u32* j32 ) { @@ -211,25 +194,29 @@ emitterT void x86Align( int bytes ) //////////////////////////////////////////////////// // Generates executable code to align to the given alignment (could be useful for the second leg // of if/else conditionals, which usually fall through a jump target label). -emitterT void x86AlignExecutable( int align ) +// +// Note: Left in for now just in case, but usefulness is moot. Only K8's and older (non-Prescott) +// P4s benefit from this, and we don't optimize for those platforms anyway. +// +void x86AlignExecutable( int align ) { uptr newx86 = ( (uptr)x86Ptr + align - 1) & ~( align - 1 ); uptr bytes = ( newx86 - (uptr)x86Ptr ); switch( bytes ) { - case 0: break; + case 0: break; - case 1: NOP(); break; - case 2: MOV32RtoR( ESI, ESI ); break; - case 3: write8(0x08D); write8(0x024); write8(0x024); break; - case 5: NOP(); // falls through to 4... - case 4: write8(0x08D); write8(0x064); write8(0x024); write8(0); break; - case 6: write8(0x08D); write8(0x0B6); write32(0); break; - case 8: NOP(); // falls through to 7... - case 7: write8(0x08D); write8(0x034); write8(0x035); write32(0); break; + case 1: NOP(); break; + case 2: MOV32RtoR( ESI, ESI ); break; + case 3: write8(0x08D); write8(0x024); write8(0x024); break; + case 5: NOP(); // falls through to 4... + case 4: write8(0x08D); write8(0x064); write8(0x024); write8(0); break; + case 6: write8(0x08D); write8(0x0B6); write32(0); break; + case 8: NOP(); // falls through to 7... + case 7: write8(0x08D); write8(0x034); write8(0x035); write32(0); break; - default: + default: { // for larger alignments, just use a JMP... u8* aligned_target = JMP8(0); @@ -242,7 +229,7 @@ emitterT void x86AlignExecutable( int align ) } /********************/ -/* IX86 intructions */ +/* IX86 instructions */ /********************/ emitterT void STC( void ) @@ -300,7 +287,7 @@ emitterT void MOV32MtoR( x86IntRegType to, uptr from ) write32( MEMADDR(from, 4) ); } -emitterT void MOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) +emitterT void MOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { RexRB(0, to, from); write8( 0x8B ); @@ -308,7 +295,7 @@ emitterT void MOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) } /* mov [r32+r32*scale] to r32 */ -emitterT void MOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void MOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale ) { RexRXB(0,to,from2,from); write8( 0x8B ); @@ -317,7 +304,7 @@ emitterT void MOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType f } // mov r32 to [r32<= -128) + else if(is_s8(imm)) { write8( 0x83 ); ModRM( 3, 0, to ); @@ -860,7 +840,7 @@ emitterT void ADD16ItoR( x86IntRegType to, s16 imm ) emitterT void ADD16ItoM( uptr to, s16 imm ) { write8( 0x66 ); - if(imm <= 127 && imm >= -128) + if(is_s8(imm)) { write8( 0x83 ); ModRM( 0, 0, DISP32 ); @@ -1776,31 +1756,6 @@ emitterT u8* JNO8( u8 to ) { return J8Rel( 0x71, to ); } -/* Untested and slower, use 32bit versions instead -// ja rel16 -emitterT u16* eJA16( u16 to ) -{ -return J16Rel( 0x87, to ); -} - -// jb rel16 -emitterT u16* eJB16( u16 to ) -{ -return J16Rel( 0x82, to ); -} - -// je rel16 -emitterT u16* eJE16( u16 to ) -{ -return J16Rel( 0x84, to ); -} - -// jz rel16 -emitterT u16* eJZ16( u16 to ) -{ -return J16Rel( 0x84, to ); -} -*/ // jb rel32 emitterT u32* JB32( u32 to ) { @@ -2271,7 +2226,7 @@ emitterT void LEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset) ModRM(1, to, from); write8(0x24); } - else if( offset <= 127 && offset >= -128 ) { + else if( is_s8(offset) ) { ModRM(1, to, from); write8(0x24); write8(offset); @@ -2286,7 +2241,7 @@ emitterT void LEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset) if( offset == 0 && from != EBP && from!=ESP ) { ModRM(0, to, from); } - else if( offset <= 127 && offset >= -128 ) { + else if( is_s8(offset) ) { ModRM(1, to, from); write8(offset); } @@ -2298,7 +2253,7 @@ emitterT void LEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset) } // to = from + offset -emitterT void LEA16RtoR(x86IntRegType to, x86IntRegType from, u16 offset) +emitterT void LEA16RtoR(x86IntRegType to, x86IntRegType from, s16 offset) { write8(0x66); LEA32RtoR(to, from, offset); diff --git a/pcsx2/x86/ix86/ix86_mmx.inl b/pcsx2/x86/ix86/ix86_mmx.cpp similarity index 86% rename from pcsx2/x86/ix86/ix86_mmx.inl rename to pcsx2/x86/ix86/ix86_mmx.cpp index f63686e700..77f8f33c97 100644 --- a/pcsx2/x86/ix86/ix86_mmx.inl +++ b/pcsx2/x86/ix86/ix86_mmx.cpp @@ -16,7 +16,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -#pragma once +#include "PrecompiledHeader.h" +#include "ix86_internal.h" //------------------------------------------------------------------ // MMX instructions @@ -482,32 +483,16 @@ emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) ModRM( 3, to, from ); } -emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset=0 ) +emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset ) { write16( 0x6F0F ); - - if( offset < 128 && offset >= -128) { - ModRM( 1, to, from ); - write8(offset); - } - else { - ModRM( 2, to, from ); - write32(offset); - } + WriteRmOffsetFrom( to, from, offset ); } -emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ) +emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { write16( 0x7F0F ); - - if( offset < 128 && offset >= -128) { - ModRM( 1, from , to ); - write8(offset); - } - else { - ModRM( 2, from, to ); - write32(offset); - } + WriteRmOffsetFrom( from, to, offset ); } /* movd m32 to r64 */ @@ -532,24 +517,10 @@ emitterT void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) ModRM( 3, to, from ); } -emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from ) +emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset ) { write16( 0x6E0F ); - ModRM( 0, to, from ); -} - -emitterT void MOVD32RmOffsettoMMX( x86MMXRegType to, x86IntRegType from, u32 offset ) -{ - write16( 0x6E0F ); - - if( offset < 128 ) { - ModRM( 1, to, from ); - write8(offset); - } - else { - ModRM( 2, to, from ); - write32(offset); - } + WriteRmOffsetFrom( to, from, offset ); } emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) @@ -558,46 +529,12 @@ emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) ModRM( 3, from, to ); } -emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from ) +emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { write16( 0x7E0F ); - ModRM( 0, from, to ); - if( to >= 4 ) { - // no idea why - assert( to == ESP ); - write8(0x24); - } - + WriteRmOffsetFrom( from, to, offset ); } -emitterT void MOVD32MMXtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset ) -{ - write16( 0x7E0F ); - - if( offset < 128 ) { - ModRM( 1, from, to ); - write8(offset); - } - else { - ModRM( 2, from, to ); - write32(offset); - } -} - -///* movd r32 to r64 */ -//emitterT void MOVD32MMXtoMMX( x86MMXRegType to, x86MMXRegType from ) -//{ -// write16( 0x6E0F ); -// ModRM( 3, to, from ); -//} -// -///* movq r64 to r32 */ -//emitterT void MOVD64MMXtoMMX( x86MMXRegType to, x86MMXRegType from ) -//{ -// write16( 0x7E0F ); -// ModRM( 3, from, to ); -//} - // untested emitterT void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from) { diff --git a/pcsx2/x86/ix86/ix86_sse.inl b/pcsx2/x86/ix86/ix86_sse.cpp similarity index 89% rename from pcsx2/x86/ix86/ix86_sse.inl rename to pcsx2/x86/ix86/ix86_sse.cpp index af25c1cbaa..ffeb51365b 100644 --- a/pcsx2/x86/ix86/ix86_sse.inl +++ b/pcsx2/x86/ix86/ix86_sse.cpp @@ -16,7 +16,9 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -#pragma once +#include "PrecompiledHeader.h" +#include "ix86_internal.h" +#include "ix86_sse_helpers.h" ////////////////////////////////////////////////////////////////////////////////////////// // AlwaysUseMovaps [const] @@ -144,7 +146,7 @@ static const bool AlwaysUseMovaps = true; write8( op ) /* movups [r32][r32*scale] to xmm1 */ -emitterT void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) { RexRXB(0, to, from2, from); write16( 0x100f ); @@ -153,7 +155,7 @@ emitterT void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegT } /* movups xmm1 to [r32][r32*scale] */ -emitterT void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) { RexRXB(1, to, from2, from); write16( 0x110f ); @@ -185,7 +187,7 @@ emitterT void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ) ModRM( 0, to, from ); } -emitterT void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset ) { RexRB(0, to, from); write16( 0x120f ); @@ -200,7 +202,7 @@ emitterT void SSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from ) ModRM( 0, from, to ); } -emitterT void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset ) { RexRB(0, from, to); write16( 0x130f ); @@ -208,7 +210,7 @@ emitterT void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset= } /* movaps [r32][r32*scale] to xmm1 */ -emitterT void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) { assert( from != EBP ); RexRXB(0, to, from2, from); @@ -218,7 +220,7 @@ emitterT void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegT } /* movaps xmm1 to [r32][r32*scale] */ -emitterT void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) { assert( from != EBP ); RexRXB(0, to, from2, from); @@ -228,7 +230,7 @@ emitterT void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegT } // movaps [r32+offset] to r32 -emitterT void SSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset ) { RexRB(0, to, from); write16( 0x280f ); @@ -236,7 +238,7 @@ emitterT void SSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset= } // movaps r32 to [r32+offset] -emitterT void SSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset ) { RexRB(0, from, to); write16( 0x290f ); @@ -244,7 +246,7 @@ emitterT void SSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset= } // movdqa [r32+offset] to r32 -emitterT void SSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset ) { if( AlwaysUseMovaps ) SSE_MOVAPSRmtoR( to, from, offset ); @@ -258,7 +260,7 @@ emitterT void SSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset } // movdqa r32 to [r32+offset] -emitterT void SSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset ) { if( AlwaysUseMovaps ) SSE_MOVAPSRtoRm( to, from, offset ); @@ -272,7 +274,7 @@ emitterT void SSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset } // movups [r32+offset] to r32 -emitterT void SSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset ) { RexRB(0, to, from); write16( 0x100f ); @@ -280,7 +282,7 @@ emitterT void SSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset= } // movups r32 to [r32+offset] -emitterT void SSE_MOVUPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVUPSRtoRm( x86IntRegType to, x86SSERegType from, int offset ) { RexRB(0, from, to); write16( 0x110f ); @@ -335,7 +337,7 @@ emitterT void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { SSE_SS_RtoM emitterT void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSE_SS_RtoR( 0x100f ); } } -emitterT void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { write8(0xf3); RexRB(0, to, from); @@ -343,7 +345,7 @@ emitterT void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int off WriteRmOffsetFrom(to, from, offset); } -emitterT void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { write8(0xf3); RexRB(0, from, to); @@ -358,14 +360,14 @@ emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) emitterT void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x120f, 0 ); } emitterT void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x130f, 0 ); } -emitterT void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { RexRB(0, to, from); write16( 0x120f ); WriteRmOffsetFrom(to, from, offset); } -emitterT void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { RexRB(0, from, to); write16(0x130f); @@ -379,14 +381,14 @@ emitterT void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int of emitterT void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x160f, 0 ); } emitterT void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x170f, 0 ); } -emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { RexRB(0, to, from); write16( 0x160f ); WriteRmOffsetFrom(to, from, offset); } -emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { RexRB(0, from, to); write16(0x170f); @@ -900,7 +902,7 @@ emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from ) ModRM( 0, to, from); } -emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { write8(0x66); RexRB(0, to, from); @@ -911,7 +913,7 @@ emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int off emitterT void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); } emitterT void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { _SSERtoR66(0x7E0F); } -emitterT void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { write8(0x66); RexRB(0, from, to); @@ -1400,3 +1402,160 @@ emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) write24(0x28380F); ModRM(3, to, from); } + +////////////////////////////////////////////////////////////////////////////////////////// +// SSE-X Helpers (generates either INT or FLOAT versions of certain SSE instructions) +// This header should always be included *after* ix86.h. + +// Added AlwaysUseMovaps check to the relevant functions here, which helps reduce the +// overhead of dynarec instructions that use these, even thought the same check would +// have been done redundantly by the emitter function. + +emitterT void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from); + else SSE_MOVAPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from); + else SSE_MOVAPS_XMM_to_M128(to, from); +} + +emitterT void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from); + else SSE_MOVAPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoR(to, from, offset); + else SSE_MOVAPSRmtoR(to, from, offset); +} + +emitterT void SSEX_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRm(to, from, offset); + else SSE_MOVAPSRtoRm(to, from, offset); +} + +emitterT void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from); + else SSE_MOVUPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from); + else SSE_MOVUPS_XMM_to_M128(to, from); +} + +emitterT void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from); + else SSE_MOVSS_M32_to_XMM(to, from); +} + +emitterT void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_M32(to, from); + else SSE_MOVSS_XMM_to_M32(to, from); +} + +emitterT void SSEX_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_Rm_to_XMM(to, from, offset); + else SSE_MOVSS_Rm_to_XMM(to, from, offset); +} + +emitterT void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from, offset); + else SSE_MOVSS_XMM_to_Rm(to, from, offset); +} + +emitterT void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_POR_M128_to_XMM(to, from); + else SSE_ORPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_POR_XMM_to_XMM(to, from); + else SSE_ORPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_PXOR_M128_to_XMM(to, from); + else SSE_XORPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_PXOR_XMM_to_XMM(to, from); + else SSE_XORPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_PAND_M128_to_XMM(to, from); + else SSE_ANDPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_PAND_XMM_to_XMM(to, from); + else SSE_ANDPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_PANDN_M128_to_XMM(to, from); + else SSE_ANDNPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_PANDN_XMM_to_XMM(to, from); + else SSE_ANDNPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKLDQ_M128_to_XMM(to, from); + else SSE_UNPCKLPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKLDQ_XMM_to_XMM(to, from); + else SSE_UNPCKLPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKHDQ_M128_to_XMM(to, from); + else SSE_UNPCKHPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKHDQ_XMM_to_XMM(to, from); + else SSE_UNPCKHPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( g_xmmtypes[from] == XMMT_INT ) { + SSE2_PUNPCKHQDQ_XMM_to_XMM(to, from); + if( to != from ) SSE2_PSHUFD_XMM_to_XMM(to, to, 0x4e); + } + else { + SSE_MOVHLPS_XMM_to_XMM(to, from); + } +} diff --git a/pcsx2/x86/ix86/ix86_sse_helpers.h b/pcsx2/x86/ix86/ix86_sse_helpers.h index ccd7ef5373..b198c336b5 100644 --- a/pcsx2/x86/ix86/ix86_sse_helpers.h +++ b/pcsx2/x86/ix86/ix86_sse_helpers.h @@ -22,158 +22,30 @@ // SSE-X Helpers (generates either INT or FLOAT versions of certain SSE instructions) // This header should always be included *after* ix86.h. -#ifndef _ix86_included_ -#error Dependency fail: Please define _EmitterId_ and include ix86.h first. -#endif - // Added AlwaysUseMovaps check to the relevant functions here, which helps reduce the // overhead of dynarec instructions that use these. -static __forceinline void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from); - else SSE_MOVAPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from); - else SSE_MOVAPS_XMM_to_M128(to, from); -} - -static __forceinline void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from); - else SSE_MOVAPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoR(to, from, offset); - else SSE_MOVAPSRmtoR(to, from, offset); -} - -static __forceinline void SSEX_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRm(to, from, offset); - else SSE_MOVAPSRtoRm(to, from, offset); -} - -static __forceinline void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from); - else SSE_MOVUPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from); - else SSE_MOVUPS_XMM_to_M128(to, from); -} - -static __forceinline void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from); - else SSE_MOVSS_M32_to_XMM(to, from); -} - -static __forceinline void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_M32(to, from); - else SSE_MOVSS_XMM_to_M32(to, from); -} - -static __forceinline void SSEX_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_Rm_to_XMM(to, from, offset); - else SSE_MOVSS_Rm_to_XMM(to, from, offset); -} - -static __forceinline void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from, offset); - else SSE_MOVSS_XMM_to_Rm(to, from, offset); -} - -static __forceinline void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from ) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_POR_M128_to_XMM(to, from); - else SSE_ORPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_POR_XMM_to_XMM(to, from); - else SSE_ORPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from ) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_PXOR_M128_to_XMM(to, from); - else SSE_XORPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_PXOR_XMM_to_XMM(to, from); - else SSE_XORPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from ) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_PAND_M128_to_XMM(to, from); - else SSE_ANDPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_PAND_XMM_to_XMM(to, from); - else SSE_ANDPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_PANDN_M128_to_XMM(to, from); - else SSE_ANDNPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_PANDN_XMM_to_XMM(to, from); - else SSE_ANDNPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKLDQ_M128_to_XMM(to, from); - else SSE_UNPCKLPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKLDQ_XMM_to_XMM(to, from); - else SSE_UNPCKLPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKHDQ_M128_to_XMM(to, from); - else SSE_UNPCKHPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKHDQ_XMM_to_XMM(to, from); - else SSE_UNPCKHPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - if( g_xmmtypes[from] == XMMT_INT ) { - SSE2_PUNPCKHQDQ_XMM_to_XMM(to, from); - if( to != from ) SSE2_PSHUFD_XMM_to_XMM(to, to, 0x4e); - } - else { - SSE_MOVHLPS_XMM_to_XMM(to, from); - } -} +extern void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ); +extern void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSEX_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSEX_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ); +extern void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from ); +extern void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from ); +extern void SSEX_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); +extern void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); \ No newline at end of file diff --git a/pcsx2/x86/ix86/ix86_tools.cpp b/pcsx2/x86/ix86/ix86_tools.cpp index 5ef532ffcd..13028b7432 100644 --- a/pcsx2/x86/ix86/ix86_tools.cpp +++ b/pcsx2/x86/ix86/ix86_tools.cpp @@ -18,7 +18,7 @@ #include "PrecompiledHeader.h" -#include "Misc.h" +#include "System.h" #include "ix86/ix86.h" // used to make sure regs don't get changed while in recompiler diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index c37a084043..a95251ffc0 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -18,7 +18,6 @@ #pragma once #define mVUdebug // Prints Extra Info to Console -#define _EmitterId_ (vuIndex+1) #include "Common.h" #include "VU.h" #include "GS.h" From 920e99145e0b0903d268e80f0822fe29802b123f Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Tue, 7 Apr 2009 23:48:43 +0000 Subject: [PATCH 10/40] Implemented the 16 bit forms of Group 1 instructions into the new emitter. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@922 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/ix86.h | 21 +- pcsx2/x86/ix86/ix86_group1.cpp | 103 ++++++---- pcsx2/x86/ix86/ix86_legacy.cpp | 339 +-------------------------------- pcsx2/x86/ix86/ix86_types.h | 4 +- 4 files changed, 85 insertions(+), 382 deletions(-) diff --git a/pcsx2/x86/ix86/ix86.h b/pcsx2/x86/ix86/ix86.h index 3151de883f..939ff21e48 100644 --- a/pcsx2/x86/ix86/ix86.h +++ b/pcsx2/x86/ix86/ix86.h @@ -46,9 +46,16 @@ extern __threadlocal u8 *x86Ptr; extern __threadlocal u8 *j8Ptr[32]; extern __threadlocal u32 *j32Ptr[32]; +template< typename T > +static emitterT void x86write( T val ) +{ + *(T*)x86Ptr = val; + x86Ptr += sizeof(T); +} + static emitterT void write8( u8 val ) { - *x86Ptr = (u8)val; + *x86Ptr = val; x86Ptr++; } @@ -271,9 +278,9 @@ extern void ADD32MtoR( x86IntRegType to, uptr from ); // add r16 to r16 extern void ADD16RtoR( x86IntRegType to , x86IntRegType from ); // add imm16 to r16 -extern void ADD16ItoR( x86IntRegType to, s16 from ); +extern void ADD16ItoR( x86IntRegType to, u16 imm ); // add imm16 to m16 -extern void ADD16ItoM( uptr to, s16 from ); +extern void ADD16ItoM( uptr to, u16 imm ); // add r16 to m16 extern void ADD16RtoM( uptr to, x86IntRegType from ); // add m16 to r16 @@ -313,7 +320,7 @@ extern void SUB32MtoR( x86IntRegType to, uptr from ) ; // sub r32 to m32 extern void SUB32RtoM( uptr to, x86IntRegType from ); // sub r16 to r16 -extern void SUB16RtoR( x86IntRegType to, u16 from ); +extern void SUB16RtoR( x86IntRegType to, x86IntRegType from ); // sub imm16 to r16 extern void SUB16ItoR( x86IntRegType to, u16 from ); // sub imm16 to m16 @@ -643,12 +650,6 @@ extern void CMP32ItoM( uptr to, u32 from ); extern void CMP32RtoR( x86IntRegType to, x86IntRegType from ); // cmp m32 to r32 extern void CMP32MtoR( x86IntRegType to, uptr from ); -// cmp imm32 to [r32] -extern void CMP32I8toRm( x86IntRegType to, u8 from); -// cmp imm32 to [r32+off] -extern void CMP32I8toRm8( x86IntRegType to, u8 from, u8 off); -// cmp imm8 to [r32] -extern void CMP32I8toM( uptr to, u8 from); // cmp imm16 to r16 extern void CMP16ItoR( x86IntRegType to, u16 from ); diff --git a/pcsx2/x86/ix86/ix86_group1.cpp b/pcsx2/x86/ix86/ix86_group1.cpp index 1af689e82c..190550e1eb 100644 --- a/pcsx2/x86/ix86/ix86_group1.cpp +++ b/pcsx2/x86/ix86/ix86_group1.cpp @@ -46,13 +46,27 @@ enum Group1InstructionType }; -emitterT void Group1_32( Group1InstructionType inst, x86Register to, x86Register from ) +static emitterT void Group1( Group1InstructionType inst, x86Register to, x86Register from ) { write8( 0x01 | (inst<<3) ); ModRM( 3, from.Id, to.Id ); } -emitterT void Group1_32( Group1InstructionType inst, x86Register to, u32 imm ) +static emitterT void Group1( Group1InstructionType inst, const ModSib& sibdest, x86Register from ) +{ + write8( 0x01 | (inst<<3) ); + EmitSibMagic( from, sibdest ); +} + +/* add m32 to r32 */ +static emitterT void Group1( Group1InstructionType inst, x86Register to, const ModSib& sibsrc ) +{ + write8( 0x03 | (inst<<3) ); + EmitSibMagic( to, sibsrc ); +} + +template< typename T > +static emitterT void Group1_Imm( Group1InstructionType inst, x86Register to, T imm ) { if( is_s8( imm ) ) { @@ -69,11 +83,12 @@ emitterT void Group1_32( Group1InstructionType inst, x86Register to, u32 imm ) write8( 0x81 ); ModRM( 3, inst, to.Id ); } - write32( imm ); + x86write( imm ); } } -emitterT void Group1_32( Group1InstructionType inst, const ModSib& sibdest, u32 imm ) +template< typename T > +static emitterT void Group1_Imm( Group1InstructionType inst, const ModSib& sibdest, T imm ) { write8( is_s8( imm ) ? 0x83 : 0x81 ); @@ -82,23 +97,10 @@ emitterT void Group1_32( Group1InstructionType inst, const ModSib& sibdest, u32 if( is_s8( imm ) ) write8( (s8)imm ); else - write32( imm ); + x86write( imm ); } -emitterT void Group1_32( Group1InstructionType inst, const ModSib& sibdest, x86Register from ) -{ - write8( 0x01 | (inst<<3) ); - EmitSibMagic( from, sibdest ); -} - -/* add m32 to r32 */ -emitterT void Group1_32( Group1InstructionType inst, x86Register to, const ModSib& sibsrc ) -{ - write8( 0x03 | (inst<<3) ); - EmitSibMagic( to, sibsrc ); -} - -emitterT void Group1_8( Group1InstructionType inst, x86Register to, s8 imm ) +static emitterT void Group1_8( Group1InstructionType inst, x86Register to, s8 imm ) { if( to == eax ) { @@ -113,28 +115,30 @@ emitterT void Group1_8( Group1InstructionType inst, x86Register to, s8 imm ) } } +// 16 bit instruction prefix! +static __forceinline void prefix16() { write8(0x66); } +static __forceinline x86Register cvt2reg( x86Register16 src ) { return x86Register( src.Id ); } + ////////////////////////////////////////////////////////////////////////////////////////// // #define DEFINE_GROUP1_OPCODE( lwr, cod ) \ - emitterT void lwr##32( x86Register to, x86Register from ) { Group1_32( G1Type_##cod, to, from ); } \ - emitterT void lwr##32( x86Register to, u32 imm ) { Group1_32( G1Type_##cod, to, imm ); } \ - emitterT void lwr##32( x86Register to, void* from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ - emitterT void lwr##32( void* to, x86Register from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ - emitterT void lwr##32( void* to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } \ - emitterT void lwr##32( x86Register to, const x86ModRm& from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ - emitterT void lwr##32( const x86ModRm& to, x86Register from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ - emitterT void lwr##32( const x86ModRm& to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } - -/* - emitterT void lwr##16( x86Register16 to, x86Register16 from ) { Group1_32( G1Type_##cod, to, from ); } \ - emitterT void lwr##16( x86Register16 to, u16 imm ) { Group1_32( G1Type_##cod, to, imm ); } \ - emitterT void lwr##16( x86Register16 to, void* from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ - emitterT void lwr##16( void* to, x86Register16 from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ - emitterT void lwr##16( void* to, u16 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } \ - emitterT void lwr##16( x86Register16 to, const x86ModRm& from ){ Group1_32( G1Type_##cod, to, ptr[from] ); } \ - emitterT void lwr##16( const x86ModRm& to, x86Register16 from ){ Group1_32( G1Type_##cod, ptr[to], from ); } \ - emitterT void lwr##16( const x86ModRm& to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } -*/ + emitterT void lwr##32( x86Register to, x86Register from ) { Group1( G1Type_##cod, to, from ); } \ + emitterT void lwr##32( x86Register to, void* from ) { Group1( G1Type_##cod, to, ptr[from] ); } \ + emitterT void lwr##32( void* to, x86Register from ) { Group1( G1Type_##cod, ptr[to], from ); } \ + emitterT void lwr##32( x86Register to, const x86ModRm& from ) { Group1( G1Type_##cod, to, ptr[from] ); } \ + emitterT void lwr##32( const x86ModRm& to, x86Register from ) { Group1( G1Type_##cod, ptr[to], from ); } \ + emitterT void lwr##32( x86Register to, u32 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \ + emitterT void lwr##32( void* to, u32 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ + emitterT void lwr##32( const x86ModRm& to, u32 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ + \ + emitterT void lwr##16( x86Register16 to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, cvt2reg(to), cvt2reg(from) ); } \ + emitterT void lwr##16( x86Register16 to, void* from ) { prefix16(); Group1( G1Type_##cod, cvt2reg(to), ptr[from] ); } \ + emitterT void lwr##16( void* to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, ptr[to], cvt2reg(from) ); } \ + emitterT void lwr##16( x86Register16 to, const x86ModRm& from ){ prefix16(); Group1( G1Type_##cod, cvt2reg(to), ptr[from] ); } \ + emitterT void lwr##16( const x86ModRm& to, x86Register16 from ){ prefix16(); Group1( G1Type_##cod, ptr[to], cvt2reg(from) ); } \ + emitterT void lwr##16( x86Register16 to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, cvt2reg(to), imm ); } \ + emitterT void lwr##16( void* to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ + emitterT void lwr##16( const x86ModRm& to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, ptr[to], imm ); } DEFINE_GROUP1_OPCODE( add, ADD ); DEFINE_GROUP1_OPCODE( cmp, CMP ); @@ -153,6 +157,10 @@ static __forceinline x86Emitter::x86Register _reghlp( x86IntRegType src ) return x86Emitter::x86Register( src ); } +static __forceinline x86Emitter::x86Register16 _reghlp16( x86IntRegType src ) +{ + return x86Emitter::x86Register16( src ); +} static __forceinline x86Emitter::x86ModRm _mrmhlp( x86IntRegType src ) { @@ -164,12 +172,21 @@ static __forceinline x86Emitter::x86ModRm _mrmhlp( x86IntRegType src ) #define DEFINE_GROUP1_OPCODE_LEGACY( lwr, cod ) \ emitterT void cod##32RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::lwr##32( _reghlp(to), _reghlp(from) ); } \ emitterT void cod##32ItoR( x86IntRegType to, u32 imm ) { x86Emitter::lwr##32( _reghlp(to), imm ); } \ - emitterT void cod##32MtoR( x86IntRegType to, uptr from ) { x86Emitter::lwr##32( _reghlp(to), (void*)from ); } \ - emitterT void cod##32RtoM( uptr to, x86IntRegType from ) { x86Emitter::lwr##32( (void*)to, _reghlp(from) ); } \ + emitterT void cod##32MtoR( x86IntRegType to, uptr from ) { x86Emitter::lwr##32( _reghlp(to), (void*)from ); } \ + emitterT void cod##32RtoM( uptr to, x86IntRegType from ) { x86Emitter::lwr##32( (void*)to, _reghlp(from) ); } \ emitterT void cod##32ItoM( uptr to, u32 imm ) { x86Emitter::lwr##32( (void*)to, imm ); } \ - emitterT void cod##32ItoRm( x86IntRegType to, u32 imm, int offset ){ x86Emitter::lwr##32( _mrmhlp(to) + offset, imm ); } \ + emitterT void cod##32ItoRm( x86IntRegType to, u32 imm, int offset ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, imm ); } \ emitterT void cod##32RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##32( _reghlp(to), _mrmhlp(from) + offset ); } \ - emitterT void cod##32RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, _reghlp(from) ); } + emitterT void cod##32RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, _reghlp(from) ); } \ + \ + emitterT void cod##16RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::lwr##16( _reghlp16(to), _reghlp16(from) ); } \ + emitterT void cod##16ItoR( x86IntRegType to, u16 imm ) { x86Emitter::lwr##16( _reghlp16(to), imm ); } \ + emitterT void cod##16MtoR( x86IntRegType to, uptr from ) { x86Emitter::lwr##16( _reghlp16(to), (void*)from ); } \ + emitterT void cod##16RtoM( uptr to, x86IntRegType from ) { x86Emitter::lwr##16( (void*)to, _reghlp16(from) ); } \ + emitterT void cod##16ItoM( uptr to, u16 imm ) { x86Emitter::lwr##16( (void*)to, imm ); } \ + emitterT void cod##16ItoRm( x86IntRegType to, u16 imm, int offset ) { x86Emitter::lwr##16( _mrmhlp(to) + offset, imm ); } \ + emitterT void cod##16RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##16( _reghlp16(to), _mrmhlp(from) + offset ); } \ + emitterT void cod##16RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##16( _mrmhlp(to) + offset, _reghlp16(from) ); } DEFINE_GROUP1_OPCODE_LEGACY( add, ADD ); DEFINE_GROUP1_OPCODE_LEGACY( cmp, CMP ); @@ -180,6 +197,8 @@ DEFINE_GROUP1_OPCODE_LEGACY( and, AND ); DEFINE_GROUP1_OPCODE_LEGACY( sub, SUB ); DEFINE_GROUP1_OPCODE_LEGACY( xor, XOR ); +// Special forms needed by the legacy emitter syntax: + emitterT void AND32I8toR( x86IntRegType to, s8 from ) { x86Emitter::and32( _reghlp(to), from ); diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index 8a99058c02..3f4134836c 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -802,80 +802,6 @@ emitterT void CMOVLE32MtoR( x86IntRegType to, uptr from ) // arithmetic instructions / //////////////////////////////////// -// add r16 to r16 -emitterT void ADD16RtoR( x86IntRegType to , x86IntRegType from ) -{ - write8(0x66); - RexRB(0,to,from); - write8( 0x03 ); - ModRM( 3, to, from ); -} - -/* add imm16 to r16 */ -emitterT void ADD16ItoR( x86IntRegType to, s16 imm ) -{ - write8( 0x66 ); - RexB(0,to); - - if (to == EAX) - { - write8( 0x05 ); - write16( imm ); - } - else if(is_s8(imm)) - { - write8( 0x83 ); - ModRM( 3, 0, to ); - write8((u8)imm ); - } - else - { - write8( 0x81 ); - ModRM( 3, 0, to ); - write16( imm ); - } -} - -/* add imm16 to m16 */ -emitterT void ADD16ItoM( uptr to, s16 imm ) -{ - write8( 0x66 ); - if(is_s8(imm)) - { - write8( 0x83 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 6) ); - write8((u8)imm ); - } - else - { - write8( 0x81 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( imm ); - } -} - -/* add r16 to m16 */ -emitterT void ADD16RtoM(uptr to, x86IntRegType from ) -{ - write8( 0x66 ); - RexR(0,from); - write8( 0x01 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* add m16 to r16 */ -emitterT void ADD16MtoR( x86IntRegType to, uptr from ) -{ - write8( 0x66 ); - RexR(0,to); - write8( 0x03 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - // add m8 to r8 emitterT void ADD8MtoR( x86IntRegType to, uptr from ) { @@ -915,49 +841,6 @@ emitterT void INC16M( u32 to ) write32( MEMADDR(to, 4) ); } - - -// sub r16 to r16 -emitterT void SUB16RtoR( x86IntRegType to, u16 from ) -{ - write8(0x66); - RexRB(0,to,from); - write8( 0x2b ); - ModRM( 3, to, from ); -} - -/* sub imm16 to r16 */ -emitterT void SUB16ItoR( x86IntRegType to, u16 from ) { - write8( 0x66 ); - RexB(0,to); - if ( to == EAX ) { - write8( 0x2D ); - } - else { - write8( 0x81 ); - ModRM( 3, 5, to ); - } - write16( from ); -} - -/* sub imm16 to m16 */ -emitterT void SUB16ItoM( uptr to, u16 from ) { - write8( 0x66 ); - write8( 0x81 ); - ModRM( 0, 5, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); -} - -/* sub m16 to r16 */ -emitterT void SUB16MtoR( x86IntRegType to, uptr from ) { - write8( 0x66 ); - RexR(0,to); - write8( 0x2B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - /* dec r32 */ emitterT void DEC32R( x86IntRegType to ) { @@ -1331,60 +1214,6 @@ emitterT void SHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) // logical instructions / //////////////////////////////////// -// or r16 to r16 -emitterT void OR16RtoR( x86IntRegType to, x86IntRegType from ) -{ - write8(0x66); - RexRB(0,from,to); - write8( 0x09 ); - ModRM( 3, from, to ); -} - -// or imm16 to r16 -emitterT void OR16ItoR( x86IntRegType to, u16 from ) -{ - write8(0x66); - RexB(0,to); - if ( to == EAX ) { - write8( 0x0D ); - } - else { - write8( 0x81 ); - ModRM( 3, 1, to ); - } - write16( from ); -} - -// or imm16 to m316 -emitterT void OR16ItoM( uptr to, u16 from ) -{ - write8(0x66); - write8( 0x81 ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); -} - -/* or m16 to r16 */ -emitterT void OR16MtoR( x86IntRegType to, uptr from ) -{ - write8(0x66); - RexR(0,to); - write8( 0x0B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// or r16 to m16 -emitterT void OR16RtoM( uptr to, x86IntRegType from ) -{ - write8(0x66); - RexR(0,from); - write8( 0x09 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - // or r8 to r8 emitterT void OR8RtoR( x86IntRegType to, x86IntRegType from ) { @@ -1420,77 +1249,6 @@ emitterT void OR8MtoR( x86IntRegType to, uptr from ) write32( MEMADDR(from, 4) ); } -// and r16 to r16 -emitterT void AND16RtoR( x86IntRegType to, x86IntRegType from ) -{ - write8(0x66); - RexRB(0,to,from); - write8( 0x23 ); - ModRM( 3, to, from ); -} - -/* and imm16 to r16 */ -emitterT void AND16ItoR( x86IntRegType to, u16 from ) -{ - write8(0x66); - RexB(0,to); - - if ( to == EAX ) { - write8( 0x25 ); - write16( from ); - } - else if ( from < 0x80 ) { - write8( 0x83 ); - ModRM( 3, 0x4, to ); - write8((u8)from ); - } - else { - write8( 0x81 ); - ModRM( 3, 0x4, to ); - write16( from ); - } -} - -/* and imm16 to m16 */ -emitterT void AND16ItoM( uptr to, u16 from ) -{ - write8(0x66); - if ( from < 0x80 ) { - write8( 0x83 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 6) ); - write8((u8)from ); - } - else - { - write8( 0x81 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); - - } -} - -/* and r16 to m16 */ -emitterT void AND16RtoM( uptr to, x86IntRegType from ) -{ - write8( 0x66 ); - RexR(0,from); - write8( 0x21 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* and m16 to r16 */ -emitterT void AND16MtoR( x86IntRegType to, uptr from ) -{ - write8( 0x66 ); - RexR(0,to); - write8( 0x23 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4)); -} - /* and imm8 to r8 */ emitterT void AND8ItoR( x86IntRegType to, u8 from ) { @@ -1906,93 +1664,6 @@ emitterT void CALL32M( u32 to ) // misc instructions / //////////////////////////////////// -// cmp imm8 to [r32] (byte ptr) -emitterT void CMP8I8toRm( x86IntRegType to, s8 from, s8 off=0 ) -{ - RexB(0,to); - write8( 0x80 ); - ModRM( (off != 0), 7, to ); - if( off != 0 ) write8(off); - write8(from); -} - -// cmp imm8 to [r32] -emitterT void CMP32I8toRm( x86IntRegType to, u8 from, s8 off=0 ) -{ - RexB(0,to); - write8( 0x83 ); - ModRM( (off!=0), 7, to ); - if( off != 0 ) write8(off); - write8(from); -} - -// cmp imm32 to [r32] -emitterT void CMP32ItoRm( x86IntRegType to, u32 from, s8 off=0 ) -{ - // fixme : This should use the imm8 form if 'from' is between 127 and -128. - - RexB(0,to); - write8( 0x81 ); - ModRM( (off != 0), 7, to ); - if( off != 0 ) write8(off); - write32(from); -} - -// cmp imm8 to [mem] (dword ptr) -emitterT void CMP32I8toM( uptr to, u8 from ) -{ - write8( 0x83 ); - ModRM( 0, 7, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); -} - -/* cmp imm16 to r16 */ -emitterT void CMP16ItoR( x86IntRegType to, u16 from ) -{ - write8( 0x66 ); - RexB(0,to); - if ( to == EAX ) - { - write8( 0x3D ); - } - else - { - write8( 0x81 ); - ModRM( 3, 7, to ); - } - write16( from ); -} - -/* cmp imm16 to m16 */ -emitterT void CMP16ItoM( uptr to, u16 from ) -{ - write8( 0x66 ); - write8( 0x81 ); - ModRM( 0, 7, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); -} - -/* cmp r16 to r16 */ -emitterT void CMP16RtoR( x86IntRegType to, x86IntRegType from ) -{ - write8( 0x66 ); - RexRB(0,from,to); - write8( 0x39 ); - ModRM( 3, from, to ); -} - -/* cmp m16 to r16 */ -emitterT void CMP16MtoR( x86IntRegType to, uptr from ) -{ - write8( 0x66 ); - RexR(0,to); - write8( 0x3B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - // cmp imm8 to r8 emitterT void CMP8ItoR( x86IntRegType to, u8 from ) { @@ -2018,6 +1689,16 @@ emitterT void CMP8MtoR( x86IntRegType to, uptr from ) write32( MEMADDR(from, 4) ); } +// cmp imm8 to [r32] (byte ptr) +emitterT void CMP8I8toRm( x86IntRegType to, s8 from, s8 off=0 ) +{ + RexB(0,to); + write8( 0x80 ); + ModRM( (off != 0), 7, to ); + if( off != 0 ) write8(off); + write8(from); +} + /* test imm32 to r32 */ emitterT void TEST32ItoR( x86IntRegType to, u32 from ) { diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index be0e0d3ec1..589badb78a 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -151,7 +151,9 @@ struct CPUINFO{ extern CPUINFO cpuinfo; //------------------------------------------------------------------ -static __forceinline bool is_s8( u32 imm ) { return (s8)imm == (s32)imm; } +// templated version of is_s8 is required, so that u16's get correct sign extension treatment. +template< typename T > +static __forceinline bool is_s8( T imm ) { return (s8)imm == (s32)imm; } namespace x86Emitter { From 3dd99a09327656d094ae85c61f9940c4c73535a9 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Wed, 8 Apr 2009 06:25:40 +0000 Subject: [PATCH 11/40] More updates to the new emitter: switched over some Push/Pop instructions, did a fully compliant implementation of LEa (both 16 and 32!), and fixed a couple small bugs in the ModRM/Sib encoder regarding EBP as an [index*scale] formation. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@923 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/iVUmicroLower.cpp | 2 +- pcsx2/x86/ix86/ix86.cpp | 265 +++++++++++++++++++++++++++----- pcsx2/x86/ix86/ix86.h | 50 ++++++ pcsx2/x86/ix86/ix86_group1.cpp | 187 ++++++++++++----------- pcsx2/x86/ix86/ix86_internal.h | 2 +- pcsx2/x86/ix86/ix86_legacy.cpp | 271 +++++---------------------------- pcsx2/x86/ix86/ix86_types.h | 111 +++++++++----- 7 files changed, 495 insertions(+), 393 deletions(-) diff --git a/pcsx2/x86/iVUmicroLower.cpp b/pcsx2/x86/iVUmicroLower.cpp index 3271d69681..9f9d816cf7 100644 --- a/pcsx2/x86/iVUmicroLower.cpp +++ b/pcsx2/x86/iVUmicroLower.cpp @@ -354,7 +354,7 @@ void recVUMI_IADD( VURegs *VU, int info ) if( fdreg == fsreg ) ADD32RtoR(fdreg, ftreg); else if( fdreg == ftreg ) ADD32RtoR(fdreg, fsreg); - else LEA16RRtoR(fdreg, fsreg, ftreg); + else LEA32RRtoR(fdreg, fsreg, ftreg); MOVZX32R16toR(fdreg, fdreg); // neeed since don't know if fdreg's upper bits are 0 } } diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index bbdf9da5ff..a13c7b34a7 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -45,16 +45,16 @@ x86IndexerType ptr; ////////////////////////////////////////////////////////////////////////////////////////// // -const x86Register x86Register::Empty( -1 ); +const x86Register32 x86Register32::Empty( -1 ); -const x86Register eax( 0 ); -const x86Register ebx( 3 ); -const x86Register ecx( 1 ); -const x86Register edx( 2 ); -const x86Register esi( 6 ); -const x86Register edi( 7 ); -const x86Register ebp( 5 ); -const x86Register esp( 4 ); +const x86Register32 eax( 0 ); +const x86Register32 ebx( 3 ); +const x86Register32 ecx( 1 ); +const x86Register32 edx( 2 ); +const x86Register32 esi( 6 ); +const x86Register32 edi( 7 ); +const x86Register32 ebp( 5 ); +const x86Register32 esp( 4 ); const x86Register16 ax( 0 ); const x86Register16 bx( 3 ); @@ -77,20 +77,30 @@ const x86Register8 bh( 7 ); ////////////////////////////////////////////////////////////////////////////////////////// // x86Register Method Implementations // -x86ModRm x86Register::operator+( const x86Register& right ) const +x86ModRm x86Register32::operator+( const x86Register32& right ) const { return x86ModRm( *this, right ); } -x86ModRm x86Register::operator+( const x86ModRm& right ) const +x86ModRm x86Register32::operator+( const x86ModRm& right ) const { return right + *this; } +x86ModRm x86Register32::operator+( s32 right ) const +{ + return x86ModRm( *this, right ); +} + +x86ModRm x86Register32::operator*( u32 right ) const +{ + return x86ModRm( Empty, *this, right ); +} + ////////////////////////////////////////////////////////////////////////////////////////// // x86ModRm Method Implementations // -x86ModRm& x86ModRm::Add( const x86Register& src ) +x86ModRm& x86ModRm::Add( const x86IndexReg& src ) { if( src == Index ) { @@ -99,7 +109,7 @@ x86ModRm& x86ModRm::Add( const x86Register& src ) else if( src == Base ) { // Compound the existing register reference into the Index/Scale pair. - Base = x86Register::Empty; + Base = x86IndexReg::Empty; if( src == Index ) Factor++; @@ -153,13 +163,20 @@ x86ModRm& x86ModRm::Add( const x86ModRm& src ) // void ModSib::Reduce() { - // If no index reg, then nothing for us to do... - if( Index.IsEmpty() || Scale == 0 ) return; + // If no index reg, then load the base register into the index slot. + if( Index.IsEmpty() ) + { + Index = Base; + Scale = 0; + Base = x86IndexReg::Empty; + return; + } // The Scale has a series of valid forms, all shown here: switch( Scale ) { + case 0: break; case 1: Scale = 0; break; case 2: Scale = 1; break; @@ -203,7 +220,7 @@ ModSib::ModSib( const x86ModRm& src ) : Reduce(); } -ModSib::ModSib( x86Register base, x86Register index, int scale, s32 displacement ) : +ModSib::ModSib( x86IndexReg base, x86IndexReg index, int scale, s32 displacement ) : Base( base ), Index( index ), Scale( scale ), @@ -220,27 +237,24 @@ ModSib::ModSib( s32 displacement ) : { } -x86Register ModSib::GetEitherReg() const -{ - return Base.IsEmpty() ? Base : Index; -} - // ------------------------------------------------------------------------ // returns TRUE if this instruction requires SIB to be encoded, or FALSE if the // instruction ca be encoded as ModRm alone. -emitterT bool NeedsSibMagic( const ModSib& info ) +bool NeedsSibMagic( const ModSib& info ) { // no registers? no sibs! - if( info.Base.IsEmpty() && info.Index.IsEmpty() ) return false; + if( info.Index.IsEmpty() ) return false; // A scaled register needs a SIB - if( info.Scale != 0 && !info.Index.IsEmpty() ) return true; + if( info.Scale != 0 ) return true; // two registers needs a SIB - if( !info.Base.IsEmpty() && !info.Index.IsEmpty() ) return true; + if( !info.Base.IsEmpty() ) return true; - // If register is ESP, then we need a SIB: - if( info.Base == esp || info.Index == esp ) return true; + // If index register is ESP, then we need a SIB: + // (the ModSib::Reduce() ensures that stand-alone ESP will be in the + // index position for us) + if( info.Index == esp ) return true; return false; } @@ -251,7 +265,7 @@ emitterT bool NeedsSibMagic( const ModSib& info ) // regfield - register field to be written to the ModRm. This is either a register specifier // or an opcode extension. In either case, the instruction determines the value for us. // -emitterT void EmitSibMagic( int regfield, const ModSib& info ) +void EmitSibMagic( int regfield, const ModSib& info ) { int displacement_size = (info.Displacement == 0) ? 0 : ( ( info.IsByteSizeDisp() ) ? 1 : 2 ); @@ -263,29 +277,45 @@ emitterT void EmitSibMagic( int regfield, const ModSib& info ) // which is encoded as "EBP w/o displacement" (which is why EBP must always be // encoded *with* a displacement of 0, if it would otherwise not have one). - x86Register basereg = info.GetEitherReg(); - - if( basereg.IsEmpty() ) + if( info.Index.IsEmpty() ) ModRM( 0, regfield, ModRm_UseDisp32 ); else { - if( basereg == ebp && displacement_size == 0 ) + if( info.Index == ebp && displacement_size == 0 ) displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! - ModRM( displacement_size, regfield, basereg.Id ); + ModRM( displacement_size, regfield, info.Index.Id ); } } else { - ModRM( displacement_size, regfield, ModRm_UseSib ); - SibSB( info.Index.Id, info.Scale, info.Base.Id ); + // In order to encode "just" index*scale (and no base), we have to encode + // it as a special [index*scale + displacement] form, which is done by + // specifying EBP as the base register and setting the displacement field + // to zero. (same as ModRm w/o SIB form above, basically, except the + // ModRm_UseDisp flag is specified in the SIB instead of the ModRM field). + + if( info.Base.IsEmpty() ) + { + ModRM( 0, regfield, ModRm_UseSib ); + SibSB( info.Scale, info.Index.Id, ModRm_UseDisp32 ); + displacement_size = 2; + } + else + { + if( info.Base == ebp && displacement_size == 0 ) + displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! + + ModRM( displacement_size, regfield, ModRm_UseSib ); + SibSB( info.Scale, info.Index.Id, info.Base.Id ); + } } switch( displacement_size ) { - case 0: break; - case 1: write8( info.Displacement ); break; - case 2: write32( info.Displacement ); break; + case 0: break; + case 1: write8( info.Displacement ); break; + case 2: write32( info.Displacement ); break; jNO_DEFAULT } } @@ -296,9 +326,166 @@ emitterT void EmitSibMagic( int regfield, const ModSib& info ) // regfield - register field to be written to the ModRm. This is either a register specifier // or an opcode extension. In either case, the instruction determines the value for us. // -emitterT void EmitSibMagic( x86Register regfield, const ModSib& info ) +emitterT void EmitSibMagic( x86Register32 regfield, const ModSib& info ) { EmitSibMagic( regfield.Id, info ); } +template< typename ToReg > +static void EmitLeaMagic( ToReg to, const ModSib& src, bool is16bit=false ) +{ + int displacement_size = (src.Displacement == 0) ? 0 : + ( ( src.IsByteSizeDisp() ) ? 1 : 2 ); + + // See EmitSibMagic for commenting on SIB encoding. + + if( !NeedsSibMagic( src ) ) + { + // LEA Land: means we have either 1-register encoding or just an offset. + // offset is encodable as an immediate MOV, and a register is encodable + // as a register MOV. + + if( src.Index.IsEmpty() ) + { + if( is16bit ) + MOV16ItoR( to.Id, src.Displacement ); + else + MOV32ItoR( to.Id, src.Displacement ); + return; + } + else if( displacement_size == 0 ) + { + if( is16bit ) + MOV16RtoR( to.Id, src.Index.Id ); + else + MOV32RtoR( to.Id, src.Index.Id ); + return; + } + else + { + // note: no need to do ebp+0 check since we encode all 0 displacements as + // register assignments above (via MOV) + + write8( 0x8d ); + ModRM( displacement_size, to.Id, src.Index.Id ); + } + } + else + { + if( src.Base.IsEmpty() ) + { + if( displacement_size == 0 ) + { + // Encode [Index*Scale] as a combination of Mov and Shl. + // This is more efficient because of the bloated format which requires + // a 32 bit displacement. + + if( is16bit ) + { + MOV16RtoR( to.Id, src.Index.Id ); + SHL16ItoR( to.Id, src.Scale ); + } + else + { + MOV32RtoR( to.Id, src.Index.Id ); + SHL32ItoR( to.Id, src.Scale ); + } + return; + } + + write8( 0x8d ); + ModRM( 0, to.Id, ModRm_UseSib ); + SibSB( src.Scale, src.Index.Id, ModRm_UseDisp32 ); + displacement_size = 2; // force 32bit displacement. + } + else + { + if( src.Base == ebp && displacement_size == 0 ) + displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! + + write8( 0x8d ); + ModRM( displacement_size, to.Id, ModRm_UseSib ); + SibSB( src.Scale, src.Index.Id, src.Base.Id ); + } + } + + switch( displacement_size ) + { + case 0: break; + case 1: write8( src.Displacement ); break; + case 2: write32( src.Displacement ); break; + jNO_DEFAULT + } + +} + +emitterT void LEA32( x86Register32 to, const ModSib& src ) +{ + EmitLeaMagic( to, src ); +} + + +emitterT void LEA16( x86Register16 to, const ModSib& src ) +{ + // fixme: is this right? Does Lea16 use 32 bit displacement and ModRM form? + + write8( 0x66 ); + EmitLeaMagic( to, src ); +} + +////////////////////////////////////////////////////////////////////////////////////////// +// Miscellaneous Section! +// Various Instructions with no parameter and no special encoding logic. +// +emitterT void RET() { write8( 0xC3 ); } +emitterT void CBW() { write16( 0x9866 ); } +emitterT void CWD() { write8( 0x98 ); } +emitterT void CDQ() { write8( 0x99 ); } +emitterT void CWDE() { write8( 0x98 ); } + +emitterT void LAHF() { write8( 0x9f ); } +emitterT void SAHF() { write8( 0x9e ); } + + +////////////////////////////////////////////////////////////////////////////////////////// +// Push / Pop Emitters +// +// fixme? push/pop instructions always push and pop aligned to whatever mode the cpu +// is running in. So even thought these say push32, they would essentially be push64 on +// an x64 build. Should I rename them accordingly? --air +// +// Note: pushad/popad implementations are intentionally left out. The instructions are +// invalid in x64, and are super slow on x32. Use multiple Push/Pop instructions instead. + + +emitterT void POP( x86Register32 from ) +{ + write8( 0x58 | from.Id ); +} + +emitterT void POP( const ModSib& from ) +{ + write8( 0x8f ); EmitSibMagic( 0, from ); +} + +emitterT void PUSH( u32 imm ) +{ + write8( 0x68 ); write32( imm ); +} + +emitterT void PUSH( x86Register32 from ) +{ + write8( 0x50 | from.Id ); +} + +emitterT void PUSH( const ModSib& from ) +{ + write8( 0xff ); EmitSibMagic( 6, from ); +} + +// pushes the EFLAGS register onto the stack +emitterT void PUSHFD() { write8( 0x9C ); } +// pops the EFLAGS register from the stack +emitterT void POPFD() { write8( 0x9D ); } + } diff --git a/pcsx2/x86/ix86/ix86.h b/pcsx2/x86/ix86/ix86.h index 939ff21e48..bf89a17853 100644 --- a/pcsx2/x86/ix86/ix86.h +++ b/pcsx2/x86/ix86/ix86.h @@ -100,6 +100,54 @@ extern void x86Align( int bytes ); extern void x86AlignExecutable( int align ); //------------------------------------------------------------------ +////////////////////////////////////////////////////////////////////////////////////////// +// New C++ Emitter! +// +// To use it just include the x86Emitter namespace into your file/class/function off choice. + +namespace x86Emitter +{ + extern void POP( x86Register32 from ); + extern void POP( const ModSib& from ); + + extern void PUSH( u32 imm ); + extern void PUSH( x86Register32 from ); + extern void PUSH( const ModSib& from ); + + extern void LEA32( x86Register32 to, const ModSib& src ); + extern void LEA16( x86Register16 to, const ModSib& src ); + + + static __forceinline void POP( void* from ) { POP( ptr[from] ); } + static __forceinline void PUSH( void* from ) { PUSH( ptr[from] ); } + + #define DECLARE_GROUP1_OPCODE_HELPER( lwr, bits ) \ + emitterT void lwr##bits( x86Register##bits to, x86Register##bits from ); \ + emitterT void lwr##bits( x86Register##bits to, void* from ); \ + emitterT void lwr##bits( x86Register##bits to, const ModSib& from ); \ + emitterT void lwr##bits( x86Register##bits to, u##bits imm ); \ + emitterT void lwr##bits( const ModSib& to, x86Register##bits from ); \ + emitterT void lwr##bits( void* to, x86Register##bits from ); \ + emitterT void lwr##bits( void* to, u##bits imm ); \ + emitterT void lwr##bits( const ModSib& to, u##bits imm ); + + #define DECLARE_GROUP1_OPCODE( lwr ) \ + DECLARE_GROUP1_OPCODE_HELPER( lwr, 32 ) + DECLARE_GROUP1_OPCODE_HELPER( lwr, 16 ) + DECLARE_GROUP1_OPCODE_HELPER( lwr, 8 ) + + DECLARE_GROUP1_OPCODE( ADD ) + DECLARE_GROUP1_OPCODE( CMP ) + DECLARE_GROUP1_OPCODE( OR ) + DECLARE_GROUP1_OPCODE( ADC ) + DECLARE_GROUP1_OPCODE( SBB ) + DECLARE_GROUP1_OPCODE( AND ) + DECLARE_GROUP1_OPCODE( SUB ) + DECLARE_GROUP1_OPCODE( XOR ) + +} + + extern void CLC( void ); extern void NOP( void ); @@ -130,6 +178,8 @@ extern void MOV32ItoRm( x86IntRegType to, u32 from, int offset=0); // mov r32 to [r32+off] extern void MOV32RtoRm( x86IntRegType to, x86IntRegType from, int offset=0); +// mov r16 to r16 +extern void MOV16RtoR( x86IntRegType to, x86IntRegType from ) ; // mov r16 to m16 extern void MOV16RtoM( uptr to, x86IntRegType from ); // mov m16 to r16 diff --git a/pcsx2/x86/ix86/ix86_group1.cpp b/pcsx2/x86/ix86/ix86_group1.cpp index 190550e1eb..f76950c0ef 100644 --- a/pcsx2/x86/ix86/ix86_group1.cpp +++ b/pcsx2/x86/ix86/ix86_group1.cpp @@ -33,6 +33,20 @@ namespace x86Emitter { +////////////////////////////////////////////////////////////////////////////////////////// +// x86RegConverter - this class is used internally by the emitter as a helper for +// converting 8 and 16 register forms into 32 bit forms. This way the end-user exposed API +// can use type-safe 8/16/32 bit register types, and the underlying code can use a single +// unified emitter to generate all function variations + prefixes and such. :) +// +class x86RegConverter : public x86Register32 +{ +public: + x86RegConverter( x86Register32 src ) : x86Register32( src ) {} + x86RegConverter( x86Register16 src ) : x86Register32( src.Id ) {} + x86RegConverter( x86Register8 src ) : x86Register32( src.Id ) {} +}; + enum Group1InstructionType { G1Type_ADD=0, @@ -46,29 +60,32 @@ enum Group1InstructionType }; -static emitterT void Group1( Group1InstructionType inst, x86Register to, x86Register from ) +static emitterT void Group1( Group1InstructionType inst, x86RegConverter to, x86RegConverter from, bool bit8form=false ) { - write8( 0x01 | (inst<<3) ); + write8( (bit8form ? 0 : 1) | (inst<<3) ); ModRM( 3, from.Id, to.Id ); } -static emitterT void Group1( Group1InstructionType inst, const ModSib& sibdest, x86Register from ) +static emitterT void Group1( Group1InstructionType inst, const ModSib& sibdest, x86RegConverter from, bool bit8form=false ) { - write8( 0x01 | (inst<<3) ); + write8( (bit8form ? 0 : 1) | (inst<<3) ); EmitSibMagic( from, sibdest ); } -/* add m32 to r32 */ -static emitterT void Group1( Group1InstructionType inst, x86Register to, const ModSib& sibsrc ) +static emitterT void Group1( Group1InstructionType inst, x86RegConverter to, const ModSib& sibsrc, bool bit8form=false ) { - write8( 0x03 | (inst<<3) ); + write8( (bit8form ? 2 : 3) | (inst<<3) ); EmitSibMagic( to, sibsrc ); } +// Note: this function emits based on the operand size of imm, so 16 bit imms generate a 16 bit +// instruction (AX,BX,etc). template< typename T > -static emitterT void Group1_Imm( Group1InstructionType inst, x86Register to, T imm ) +static emitterT void Group1_Imm( Group1InstructionType inst, x86RegConverter to, T imm ) { - if( is_s8( imm ) ) + bool bit8form = (sizeof(T) == 1); + + if( !bit8form && is_s8( imm ) ) { write8( 0x83 ); ModRM( 3, inst, to.Id ); @@ -77,84 +94,81 @@ static emitterT void Group1_Imm( Group1InstructionType inst, x86Register to, T i else { if( to == eax ) - write8( 0x05 | (inst<<3) ); + write8( (bit8form ? 4 : 5) | (inst<<3) ); else { - write8( 0x81 ); + write8( bit8form ? 0x80 : 0x81 ); ModRM( 3, inst, to.Id ); } x86write( imm ); } } +// Note: this function emits based on the operand size of imm, so 16 bit imms generate a 16 bit +// instruction (AX,BX,etc). template< typename T > static emitterT void Group1_Imm( Group1InstructionType inst, const ModSib& sibdest, T imm ) { - write8( is_s8( imm ) ? 0x83 : 0x81 ); + bool bit8form = (sizeof(T) == 1); + + write8( bit8form ? 0x80 : (is_s8( imm ) ? 0x83 : 0x81) ); EmitSibMagic( inst, sibdest ); - if( is_s8( imm ) ) + if( !bit8form && is_s8( imm ) ) write8( (s8)imm ); else x86write( imm ); } -static emitterT void Group1_8( Group1InstructionType inst, x86Register to, s8 imm ) -{ - if( to == eax ) - { - write8( 0x04 | (inst<<3) ); - write8( imm ); - } - else - { - write8( 0x80 ); - ModRM( 3, inst, to.Id ); - write8( imm ); - } -} - // 16 bit instruction prefix! static __forceinline void prefix16() { write8(0x66); } -static __forceinline x86Register cvt2reg( x86Register16 src ) { return x86Register( src.Id ); } ////////////////////////////////////////////////////////////////////////////////////////// // -#define DEFINE_GROUP1_OPCODE( lwr, cod ) \ - emitterT void lwr##32( x86Register to, x86Register from ) { Group1( G1Type_##cod, to, from ); } \ - emitterT void lwr##32( x86Register to, void* from ) { Group1( G1Type_##cod, to, ptr[from] ); } \ - emitterT void lwr##32( void* to, x86Register from ) { Group1( G1Type_##cod, ptr[to], from ); } \ - emitterT void lwr##32( x86Register to, const x86ModRm& from ) { Group1( G1Type_##cod, to, ptr[from] ); } \ - emitterT void lwr##32( const x86ModRm& to, x86Register from ) { Group1( G1Type_##cod, ptr[to], from ); } \ - emitterT void lwr##32( x86Register to, u32 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \ - emitterT void lwr##32( void* to, u32 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ - emitterT void lwr##32( const x86ModRm& to, u32 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ +#define DEFINE_GROUP1_OPCODE( cod ) \ + emitterT void cod##32( x86Register32 to, x86Register32 from ) { Group1( G1Type_##cod, to, from ); } \ + emitterT void cod##32( x86Register32 to, void* from ) { Group1( G1Type_##cod, to, ptr[from] ); } \ + emitterT void cod##32( x86Register32 to, const ModSib& from ) { Group1( G1Type_##cod, to, from ); } \ + emitterT void cod##32( x86Register32 to, u32 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \ + emitterT void cod##32( const ModSib& to, x86Register32 from ) { Group1( G1Type_##cod, to, from ); } \ + emitterT void cod##32( void* to, x86Register32 from ) { Group1( G1Type_##cod, ptr[to], from ); } \ + emitterT void cod##32( void* to, u32 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ + emitterT void cod##32( const ModSib& to, u32 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \ \ - emitterT void lwr##16( x86Register16 to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, cvt2reg(to), cvt2reg(from) ); } \ - emitterT void lwr##16( x86Register16 to, void* from ) { prefix16(); Group1( G1Type_##cod, cvt2reg(to), ptr[from] ); } \ - emitterT void lwr##16( void* to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, ptr[to], cvt2reg(from) ); } \ - emitterT void lwr##16( x86Register16 to, const x86ModRm& from ){ prefix16(); Group1( G1Type_##cod, cvt2reg(to), ptr[from] ); } \ - emitterT void lwr##16( const x86ModRm& to, x86Register16 from ){ prefix16(); Group1( G1Type_##cod, ptr[to], cvt2reg(from) ); } \ - emitterT void lwr##16( x86Register16 to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, cvt2reg(to), imm ); } \ - emitterT void lwr##16( void* to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ - emitterT void lwr##16( const x86ModRm& to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, ptr[to], imm ); } + emitterT void cod##16( x86Register16 to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \ + emitterT void cod##16( x86Register16 to, void* from ) { prefix16(); Group1( G1Type_##cod, to, ptr[from] ); } \ + emitterT void cod##16( x86Register16 to, const ModSib& from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \ + emitterT void cod##16( x86Register16 to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, to, imm ); } \ + emitterT void cod##16( const ModSib& to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \ + emitterT void cod##16( void* to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, ptr[to], from ); } \ + emitterT void cod##16( void* to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ + emitterT void cod##16( const ModSib& to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, to, imm ); } \ + \ + emitterT void cod##8( x86Register8 to, x86Register8 from ) { Group1( G1Type_##cod, to, from , true ); } \ + emitterT void cod##8( x86Register8 to, void* from ) { Group1( G1Type_##cod, to, ptr[from], true ); } \ + emitterT void cod##8( x86Register8 to, const ModSib& from ) { Group1( G1Type_##cod, to, from , true ); } \ + emitterT void cod##8( x86Register8 to, u8 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \ + emitterT void cod##8( const ModSib& to, x86Register8 from ) { Group1( G1Type_##cod, to, from , true ); } \ + emitterT void cod##8( void* to, x86Register8 from ) { Group1( G1Type_##cod, ptr[to], from , true ); } \ + emitterT void cod##8( void* to, u8 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ + emitterT void cod##8( const ModSib& to, u8 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } -DEFINE_GROUP1_OPCODE( add, ADD ); -DEFINE_GROUP1_OPCODE( cmp, CMP ); -DEFINE_GROUP1_OPCODE( or, OR ); -DEFINE_GROUP1_OPCODE( adc, ADC ); -DEFINE_GROUP1_OPCODE( sbb, SBB ); -DEFINE_GROUP1_OPCODE( and, AND ); -DEFINE_GROUP1_OPCODE( sub, SUB ); -DEFINE_GROUP1_OPCODE( xor, XOR ); +DEFINE_GROUP1_OPCODE( ADD ) +DEFINE_GROUP1_OPCODE( CMP ) +DEFINE_GROUP1_OPCODE( OR ) +DEFINE_GROUP1_OPCODE( ADC ) +DEFINE_GROUP1_OPCODE( SBB ) +DEFINE_GROUP1_OPCODE( AND ) +DEFINE_GROUP1_OPCODE( SUB ) +DEFINE_GROUP1_OPCODE( XOR ) } // end namespace x86Emitter -static __forceinline x86Emitter::x86Register _reghlp( x86IntRegType src ) +static __forceinline x86Emitter::x86Register32 _reghlp32( x86IntRegType src ) { - return x86Emitter::x86Register( src ); + return x86Emitter::x86Register32( src ); } static __forceinline x86Emitter::x86Register16 _reghlp16( x86IntRegType src ) @@ -162,49 +176,50 @@ static __forceinline x86Emitter::x86Register16 _reghlp16( x86IntRegType src ) return x86Emitter::x86Register16( src ); } -static __forceinline x86Emitter::x86ModRm _mrmhlp( x86IntRegType src ) +static __forceinline x86Emitter::x86Register8 _reghlp8( x86IntRegType src ) { - return x86Emitter::x86ModRm( _reghlp(src) ); + return x86Emitter::x86Register8( src ); +} + +static __forceinline x86Emitter::ModSib _mrmhlp( x86IntRegType src ) +{ + return x86Emitter::ModSib( x86Emitter::x86ModRm( _reghlp32(src) ) ); } ////////////////////////////////////////////////////////////////////////////////////////// // -#define DEFINE_GROUP1_OPCODE_LEGACY( lwr, cod ) \ - emitterT void cod##32RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::lwr##32( _reghlp(to), _reghlp(from) ); } \ - emitterT void cod##32ItoR( x86IntRegType to, u32 imm ) { x86Emitter::lwr##32( _reghlp(to), imm ); } \ - emitterT void cod##32MtoR( x86IntRegType to, uptr from ) { x86Emitter::lwr##32( _reghlp(to), (void*)from ); } \ - emitterT void cod##32RtoM( uptr to, x86IntRegType from ) { x86Emitter::lwr##32( (void*)to, _reghlp(from) ); } \ - emitterT void cod##32ItoM( uptr to, u32 imm ) { x86Emitter::lwr##32( (void*)to, imm ); } \ - emitterT void cod##32ItoRm( x86IntRegType to, u32 imm, int offset ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, imm ); } \ - emitterT void cod##32RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##32( _reghlp(to), _mrmhlp(from) + offset ); } \ - emitterT void cod##32RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, _reghlp(from) ); } \ - \ - emitterT void cod##16RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::lwr##16( _reghlp16(to), _reghlp16(from) ); } \ - emitterT void cod##16ItoR( x86IntRegType to, u16 imm ) { x86Emitter::lwr##16( _reghlp16(to), imm ); } \ - emitterT void cod##16MtoR( x86IntRegType to, uptr from ) { x86Emitter::lwr##16( _reghlp16(to), (void*)from ); } \ - emitterT void cod##16RtoM( uptr to, x86IntRegType from ) { x86Emitter::lwr##16( (void*)to, _reghlp16(from) ); } \ - emitterT void cod##16ItoM( uptr to, u16 imm ) { x86Emitter::lwr##16( (void*)to, imm ); } \ - emitterT void cod##16ItoRm( x86IntRegType to, u16 imm, int offset ) { x86Emitter::lwr##16( _mrmhlp(to) + offset, imm ); } \ - emitterT void cod##16RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##16( _reghlp16(to), _mrmhlp(from) + offset ); } \ - emitterT void cod##16RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##16( _mrmhlp(to) + offset, _reghlp16(from) ); } +#define DEFINE_LEGACY_HELPER( cod, bits ) \ + emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::cod##bits( _reghlp##bits(to), _reghlp##bits(from) ); } \ + emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { x86Emitter::cod##bits( _reghlp##bits(to), imm ); } \ + emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { x86Emitter::cod##bits( _reghlp##bits(to), (void*)from ); } \ + emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { x86Emitter::cod##bits( (void*)to, _reghlp##bits(from) ); } \ + emitterT void cod##bits##ItoM( uptr to, u##bits imm ) { x86Emitter::cod##bits( (void*)to, imm ); } \ + emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { x86Emitter::cod##bits( _mrmhlp(to) + offset, imm ); } \ + emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::cod##bits( _reghlp##bits(to), _mrmhlp(from) + offset ); } \ + emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::cod##bits( _mrmhlp(to) + offset, _reghlp##bits(from) ); } -DEFINE_GROUP1_OPCODE_LEGACY( add, ADD ); -DEFINE_GROUP1_OPCODE_LEGACY( cmp, CMP ); -DEFINE_GROUP1_OPCODE_LEGACY( or, OR ); -DEFINE_GROUP1_OPCODE_LEGACY( adc, ADC ); -DEFINE_GROUP1_OPCODE_LEGACY( sbb, SBB ); -DEFINE_GROUP1_OPCODE_LEGACY( and, AND ); -DEFINE_GROUP1_OPCODE_LEGACY( sub, SUB ); -DEFINE_GROUP1_OPCODE_LEGACY( xor, XOR ); +#define DEFINE_GROUP1_OPCODE_LEGACY( cod ) \ + DEFINE_LEGACY_HELPER( cod, 32 ) \ + DEFINE_LEGACY_HELPER( cod, 16 ) \ + DEFINE_LEGACY_HELPER( cod, 8 ) + +DEFINE_GROUP1_OPCODE_LEGACY( ADD ) +DEFINE_GROUP1_OPCODE_LEGACY( CMP ) +DEFINE_GROUP1_OPCODE_LEGACY( OR ) +DEFINE_GROUP1_OPCODE_LEGACY( ADC ) +DEFINE_GROUP1_OPCODE_LEGACY( SBB ) +DEFINE_GROUP1_OPCODE_LEGACY( AND ) +DEFINE_GROUP1_OPCODE_LEGACY( SUB ) +DEFINE_GROUP1_OPCODE_LEGACY( XOR ) // Special forms needed by the legacy emitter syntax: emitterT void AND32I8toR( x86IntRegType to, s8 from ) { - x86Emitter::and32( _reghlp(to), from ); + x86Emitter::AND32( _reghlp32(to), from ); } emitterT void AND32I8toM( uptr to, s8 from ) { - x86Emitter::and32( (void*)to, from ); + x86Emitter::AND32( (void*)to, from ); } diff --git a/pcsx2/x86/ix86/ix86_internal.h b/pcsx2/x86/ix86/ix86_internal.h index 83ec23a291..4f3f72f2e4 100644 --- a/pcsx2/x86/ix86/ix86_internal.h +++ b/pcsx2/x86/ix86/ix86_internal.h @@ -27,7 +27,7 @@ static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod fi namespace x86Emitter { extern void EmitSibMagic( int regfield, const ModSib& info ); - extern void EmitSibMagic( x86Register regfield, const ModSib& info ); + extern void EmitSibMagic( x86Register32 regfield, const ModSib& info ); extern bool NeedsSibMagic( const ModSib& info ); } diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index 3f4134836c..752414a1b2 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -24,8 +24,6 @@ * cottonvibes(@gmail.com) */ -#pragma once - //------------------------------------------------------------------ // ix86 legacy emitter functions //------------------------------------------------------------------ @@ -34,6 +32,8 @@ #include "System.h" #include "ix86_internal.h" +using namespace x86Emitter; + // Note: the 'to' field can either be a register or a special opcode extension specifier // depending on the opcode's encoding. @@ -256,6 +256,8 @@ emitterT void NOP( void ) /* mov r32 to r32 */ emitterT void MOV32RtoR( x86IntRegType to, x86IntRegType from ) { + if( to == from ) return; + RexRB(0, from, to); write8( 0x89 ); ModRM( 3, from, to ); @@ -356,6 +358,18 @@ emitterT void MOV32RtoRm( x86IntRegType to, x86IntRegType from, int offset) WriteRmOffsetFrom(from, to, offset); } + +/* mov r32 to r32 */ +emitterT void MOV16RtoR( x86IntRegType to, x86IntRegType from ) +{ + if( to == from ) return; + + write8( 0x66 ); + RexRB(0, from, to); + write8( 0x89 ); + ModRM( 3, from, to ); +} + /* mov r16 to m16 */ emitterT void MOV16RtoM(uptr to, x86IntRegType from ) { @@ -802,15 +816,6 @@ emitterT void CMOVLE32MtoR( x86IntRegType to, uptr from ) // arithmetic instructions / //////////////////////////////////// -// add m8 to r8 -emitterT void ADD8MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x02 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - /* inc r32 */ emitterT void INC32R( x86IntRegType to ) { @@ -1214,90 +1219,6 @@ emitterT void SHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) // logical instructions / //////////////////////////////////// -// or r8 to r8 -emitterT void OR8RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x08 ); - ModRM( 3, from, to ); -} - -// or r8 to m8 -emitterT void OR8RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x08 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -// or imm8 to m8 -emitterT void OR8ItoM( uptr to, u8 from ) -{ - write8( 0x80 ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); -} - -// or m8 to r8 -emitterT void OR8MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x0A ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* and imm8 to r8 */ -emitterT void AND8ItoR( x86IntRegType to, u8 from ) -{ - RexB(0,to); - if ( to == EAX ) { - write8( 0x24 ); - } - else { - write8( 0x80 ); - ModRM( 3, 0x4, to ); - } - write8( from ); -} - -/* and imm8 to m8 */ -emitterT void AND8ItoM( uptr to, u8 from ) -{ - write8( 0x80 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); -} - -// and r8 to r8 -emitterT void AND8RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write8( 0x22 ); - ModRM( 3, to, from ); -} - -/* and r8 to m8 */ -emitterT void AND8RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x20 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* and m8 to r8 */ -emitterT void AND8MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x22 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4)); -} - /* not r32 */ emitterT void NOT32R( x86IntRegType from ) { @@ -1664,41 +1585,6 @@ emitterT void CALL32M( u32 to ) // misc instructions / //////////////////////////////////// -// cmp imm8 to r8 -emitterT void CMP8ItoR( x86IntRegType to, u8 from ) -{ - RexB(0,to); - if ( to == EAX ) - { - write8( 0x3C ); - } - else - { - write8( 0x80 ); - ModRM( 3, 7, to ); - } - write8( from ); -} - -// cmp m8 to r8 -emitterT void CMP8MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x3A ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// cmp imm8 to [r32] (byte ptr) -emitterT void CMP8I8toRm( x86IntRegType to, s8 from, s8 off=0 ) -{ - RexB(0,to); - write8( 0x80 ); - ModRM( (off != 0), 7, to ); - if( off != 0 ) write8(off); - write8(from); -} - /* test imm32 to r32 */ emitterT void TEST32ItoR( x86IntRegType to, u32 from ) { @@ -1830,31 +1716,19 @@ emitterT void SETZ8R( x86IntRegType to ) { SET8R(0x94, to); } emitterT void SETE8R( x86IntRegType to ) { SET8R(0x94, to); } /* push imm32 */ -emitterT void PUSH32I( u32 from ) -{; -write8( 0x68 ); -write32( from ); -} +emitterT void PUSH32I( u32 from ) { PUSH( from ); } /* push r32 */ -emitterT void PUSH32R( x86IntRegType from ) { write8( 0x50 | from ); } +emitterT void PUSH32R( x86IntRegType from ) { PUSH( x86Register32( from ) ); } /* push m32 */ -emitterT void PUSH32M( u32 from ) +emitterT void PUSH32M( u32 from ) { - write8( 0xFF ); - ModRM( 0, 6, DISP32 ); - write32( MEMADDR(from, 4) ); + PUSH( ptr[from] ); } /* pop r32 */ -emitterT void POP32R( x86IntRegType from ) { write8( 0x58 | from ); } - -/* pushad */ -emitterT void PUSHA32( void ) { write8( 0x60 ); } - -/* popad */ -emitterT void POPA32( void ) { write8( 0x61 ); } +emitterT void POP32R( x86IntRegType from ) { POP( x86Register32( from ) ); } /* pushfd */ emitterT void PUSHFD( void ) { write8( 0x9C ); } @@ -1899,95 +1773,34 @@ emitterT void BSWAP32R( x86IntRegType to ) emitterT void LEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset) { - RexRB(0,to,from); - write8(0x8d); - - if( (from&7) == ESP ) { - if( offset == 0 ) { - ModRM(1, to, from); - write8(0x24); - } - else if( is_s8(offset) ) { - ModRM(1, to, from); - write8(0x24); - write8(offset); - } - else { - ModRM(2, to, from); - write8(0x24); - write32(offset); - } - } - else { - if( offset == 0 && from != EBP && from!=ESP ) { - ModRM(0, to, from); - } - else if( is_s8(offset) ) { - ModRM(1, to, from); - write8(offset); - } - else { - ModRM(2, to, from); - write32(offset); - } - } -} - -// to = from + offset -emitterT void LEA16RtoR(x86IntRegType to, x86IntRegType from, s16 offset) -{ - write8(0x66); - LEA32RtoR(to, from, offset); -} - -// to = from0 + from1 -emitterT void LEA16RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1) -{ - write8(0x66); - LEA32RRtoR(to, from0, from1); + LEA32( x86Register32( to ), ptr[x86IndexReg(from)+offset] ); } emitterT void LEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1) { - RexRXB(0, to, from0, from1); - write8(0x8d); - - if( (from1&7) == EBP ) { - ModRM(1, to, 4); - ModRM(0, from0, from1); - write8(0); - } - else { - ModRM(0, to, 4); - ModRM(0, from0, from1); - } -} - -// to = from << scale (max is 3) -emitterT void LEA16RStoR(x86IntRegType to, x86IntRegType from, u32 scale) -{ - write8(0x66); - LEA32RStoR(to, from, scale); + LEA32( x86Register32( to ), ptr[x86IndexReg(from0)+x86IndexReg(from1)] ); } // Don't inline recursive functions emitterT void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale) { - if( to == from ) { - SHL32ItoR(to, scale); - return; - } - - if( from != ESP ) { - RexRXB(0,to,from,0); - write8(0x8d); - ModRM(0, to, 4); - ModRM(scale, from, 5); - write32(0); - } - else { - assert( to != ESP ); - MOV32RtoR(to, from); - LEA32RStoR(to, to, scale); - } + LEA32( x86Register32( to ), ptr[x86IndexReg(from)*(1<= -1 && Id < 8 ); } bool IsEmpty() const { return Id == -1; } - bool operator==( const x86Register& src ) const { return Id == src.Id; } - bool operator!=( const x86Register& src ) const { return Id != src.Id; } + bool operator==( const x86Register32& src ) const { return Id == src.Id; } + bool operator!=( const x86Register32& src ) const { return Id != src.Id; } - x86ModRm operator+( const x86Register& right ) const; + x86ModRm operator+( const x86Register32& right ) const; x86ModRm operator+( const x86ModRm& right ) const; + x86ModRm operator+( s32 right ) const; + + x86ModRm operator*( u32 factor ) const; - x86Register& operator=( const x86Register& src ) + x86Register32& operator=( const x86Register32& src ) { Id = src.Id; return *this; } }; - + ////////////////////////////////////////////////////////////////////////////////////////// // Similar to x86Register, but without the ability to add/combine them with ModSib. // @@ -198,7 +201,7 @@ namespace x86Emitter x86Register16( const x86Register16& src ) : Id( src.Id ) {} x86Register16() : Id( -1 ) {} - explicit x86Register16( int regId ) : Id( regId ) { } + explicit x86Register16( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } bool IsEmpty() const { return Id == -1; } @@ -224,7 +227,7 @@ namespace x86Emitter x86Register8( const x86Register16& src ) : Id( src.Id ) {} x86Register8() : Id( -1 ) {} - explicit x86Register8( int regId ) : Id( regId ) { } + explicit x86Register8( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } bool IsEmpty() const { return Id == -1; } @@ -237,19 +240,22 @@ namespace x86Emitter return *this; } }; + + // Use 32 bit registers as out index register (for ModSig memory address calculations) + typedef x86Register32 x86IndexReg; ////////////////////////////////////////////////////////////////////////////////////////// // class x86ModRm { public: - x86Register Base; // base register (no scale) - x86Register Index; // index reg gets multiplied by the scale + x86IndexReg Base; // base register (no scale) + x86IndexReg Index; // index reg gets multiplied by the scale int Factor; // scale applied to the index register, in factor form (not a shift!) s32 Displacement; // address displacement public: - x86ModRm( x86Register base, x86Register index, int factor=1, s32 displacement=0 ) : + x86ModRm( x86IndexReg base, x86IndexReg index, int factor=1, s32 displacement=0 ) : Base( base ), Index( index ), Factor( factor ), @@ -257,7 +263,7 @@ namespace x86Emitter { } - explicit x86ModRm( x86Register base, int displacement=0 ) : + explicit x86ModRm( x86IndexReg base, int displacement=0 ) : Base( base ), Index(), Factor(0), @@ -273,11 +279,11 @@ namespace x86Emitter { } - static x86ModRm FromIndexReg( x86Register index, int scale=0, s32 displacement=0 ); + static x86ModRm FromIndexReg( x86IndexReg index, int scale=0, s32 displacement=0 ); public: bool IsByteSizeDisp() const { return is_s8( Displacement ); } - x86Register GetEitherReg() const; + x86IndexReg GetEitherReg() const; x86ModRm& Add( s32 imm ) { @@ -285,10 +291,10 @@ namespace x86Emitter return *this; } - x86ModRm& Add( const x86Register& src ); + x86ModRm& Add( const x86IndexReg& src ); x86ModRm& Add( const x86ModRm& src ); - x86ModRm operator+( const x86Register& right ) const { return x86ModRm( *this ).Add( right ); } + x86ModRm operator+( const x86IndexReg& right ) const { return x86ModRm( *this ).Add( right ); } x86ModRm operator+( const x86ModRm& right ) const { return x86ModRm( *this ).Add( right ); } x86ModRm operator+( const s32 imm ) const { return x86ModRm( *this ).Add( imm ); } x86ModRm operator-( const s32 imm ) const { return x86ModRm( *this ).Add( -imm ); } @@ -306,18 +312,27 @@ namespace x86Emitter class ModSib { public: - x86Register Base; // base register (no scale) - x86Register Index; // index reg gets multiplied by the scale + x86IndexReg Base; // base register (no scale) + x86IndexReg Index; // index reg gets multiplied by the scale int Scale; // scale applied to the index register, in scale/shift form s32 Displacement; // offset applied to the Base/Index registers. - ModSib( const x86ModRm& src ); - ModSib( x86Register base, x86Register index, int scale=0, s32 displacement=0 ); - ModSib( s32 disp ); + explicit ModSib( const x86ModRm& src ); + explicit ModSib( s32 disp ); + ModSib( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ); - x86Register GetEitherReg() const; + x86IndexReg GetEitherReg() const; bool IsByteSizeDisp() const { return is_s8( Displacement ); } + ModSib& Add( s32 imm ) + { + Displacement += imm; + return *this; + } + + ModSib operator+( const s32 imm ) const { return ModSib( *this ).Add( imm ); } + ModSib operator-( const s32 imm ) const { return ModSib( *this ).Add( -imm ); } + protected: void Reduce(); }; @@ -327,9 +342,13 @@ namespace x86Emitter // struct x86IndexerType { - ModSib operator[]( x86Register src ) const + // passthrough instruction, allows ModSib to pass silently through ptr translation + // without doing anything and without compiler error. + const ModSib& operator[]( const ModSib& src ) const { return src; } + + ModSib operator[]( x86IndexReg src ) const { - return ModSib( src, x86Register::Empty ); + return ModSib( src, x86IndexReg::Empty ); } ModSib operator[]( const x86ModRm& src ) const @@ -349,14 +368,32 @@ namespace x86Emitter }; // ------------------------------------------------------------------------ - extern const x86Register eax; - extern const x86Register ebx; - extern const x86Register ecx; - extern const x86Register edx; - extern const x86Register esi; - extern const x86Register edi; - extern const x86Register ebp; - extern const x86Register esp; - extern x86IndexerType ptr; + + extern const x86Register32 eax; + extern const x86Register32 ebx; + extern const x86Register32 ecx; + extern const x86Register32 edx; + extern const x86Register32 esi; + extern const x86Register32 edi; + extern const x86Register32 ebp; + extern const x86Register32 esp; + + extern const x86Register16 ax; + extern const x86Register16 bx; + extern const x86Register16 cx; + extern const x86Register16 dx; + extern const x86Register16 si; + extern const x86Register16 di; + extern const x86Register16 bp; + extern const x86Register16 sp; + + extern const x86Register8 al; + extern const x86Register8 cl; + extern const x86Register8 dl; + extern const x86Register8 bl; + extern const x86Register8 ah; + extern const x86Register8 ch; + extern const x86Register8 dh; + extern const x86Register8 bh; } \ No newline at end of file From e3a87fecd926fa910f9a84fe9268b5dd97030e28 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Wed, 8 Apr 2009 10:07:14 +0000 Subject: [PATCH 12/40] microVU: -fixed rm instructions to work with Jake's emitter changes -implemented the case where upper and lower instructions write to same reg at once (the lower instruction's result is discarded) -implemented more first pass analyzing stuff -fixed various bugs... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@924 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU.cpp | 2 +- pcsx2/x86/microVU.h | 6 +-- pcsx2/x86/microVU_Alloc.h | 31 +++------------- pcsx2/x86/microVU_Alloc.inl | 14 +++---- pcsx2/x86/microVU_Analyze.inl | 33 +++++++++++++++++ pcsx2/x86/microVU_Compile.inl | 48 ++++++++++++++++++++---- pcsx2/x86/microVU_Lower.inl | 70 +++++++++++++++++------------------ pcsx2/x86/microVU_Misc.h | 9 +++-- pcsx2/x86/microVU_Misc.inl | 54 +++++++++++++-------------- 9 files changed, 157 insertions(+), 110 deletions(-) diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 39e9697f38..0e537472e6 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -41,7 +41,7 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) { mVU->regs = vuRegsPtr; mVU->index = vuIndex; mVU->microSize = (vuIndex ? 0x4000 : 0x1000); - mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 8; + mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 4; mVU->cacheAddr = (vuIndex ? 0x1e840000 : 0x0e840000); mVU->cache = NULL; diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index a95251ffc0..049520d255 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -91,9 +91,9 @@ public: template struct microProgram { - u32 data[progSize]; + u32 data[progSize/4]; u32 used; // Number of times its been used - microBlockManager* block[progSize / 2]; + microBlockManager* block[progSize/8]; microAllocInfo allocInfo; }; @@ -116,7 +116,7 @@ struct microVU { u32 cacheAddr; // VU Cache Start Address static const u32 cacheSize = 0x500000; // VU Cache Size - microProgManager<0x1000> prog; // Micro Program Data + microProgManager<0x4000> prog; // Micro Program Data VURegs* regs; // VU Regs Struct u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to) diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index f039c0a980..e277f12723 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -33,6 +33,8 @@ struct microRegInfo { u8 VI[32]; u8 q; u8 p; + u8 r; + u8 xgkick; }; struct microTempRegInfo { @@ -42,6 +44,8 @@ struct microTempRegInfo { u8 VIreg; // Index of the VI reg u8 q; // Holds cycle info for Q reg u8 p; // Holds cycle info for P reg + u8 r; // Holds cycle info for R reg (Will never cause stalls, but useful to know if R is modified) + u8 xgkick; // Holds the cycle info for XGkick }; template @@ -49,34 +53,9 @@ struct microAllocInfo { microRegInfo regs; // Pipeline info microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle) u8 branch; // 0 = No Branch, 1 = Branch, 2 = Conditional Branch, 3 = Jump (JALR/JR) - u8 divFlag; // 0 = Transfer DS/IS flags normally, 1 = Clear DS/IS Flags, > 1 = set DS/IS flags to bit 2::1 of divFlag - u8 divFlagTimer; // Used to ensure divFlag's contents are merged at the appropriate time. u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes) u32 cycles; // Cycles for current block u32 curPC; // Current PC u32 startPC; // Start PC for Cur Block - u32 info[pSize]; // bit 00 = Lower Instruction is NOP - // bit 01 - // bit 02 - // bit 03 - // bit 04 - // bit 05 = Write to Q1 or Q2? - // bit 06 = Read Q1 or Q2? - // bit 07 = Read/Write to P1 or P2? - // bit 08 = Update Mac Flags? - // bit 09 = Update Status Flags? - // bit 10 = Used with bit 11 to make a 2-bit key for mac flag instance - // bit 11 - // bit 12 = Used with bit 13 to make a 2-bit key for status flag instance - // bit 13 - // bit 14 = Used with bit 15 to make a 2-bit key for clip flag instance - // bit 15 - // bit 16 = Used with bit 17 to make a 2-bit key for mac flag instance - // bit 17 - // bit 18 = Used with bit 19 to make a 2-bit key for status flag instance - // bit 19 - // bit 20 = Used with bit 21 to make a 2-bit key for clip flag instance - // bit 21 - // bit 22 = Read VI(Fs) from backup memory? - // bit 23 = Read VI(Ft) from backup memory? + u32 info[pSize/8]; // Info for Instructions in current block }; diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index ca6cee9e09..3316d7360c 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -201,11 +201,11 @@ microVUt(void) mVUallocFMAC5b(int& ACC, int& Fs) { // FMAC6 - Normal FMAC Opcodes (I Reg) //------------------------------------------------------------------ -#define getIreg(reg) { \ +#define getIreg(reg, modXYZW) { \ MOV32ItoR(gprT1, mVU->iReg); \ SSE2_MOVD_R_to_XMM(reg, gprT1); \ if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 8); \ - if (!_XYZW_SS) { mVUunpack_xyzw(reg, reg, 0); } \ + if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw(reg, reg, 0); } \ } microVUt(void) mVUallocFMAC6a(int& Fd, int& Fs, int& Ft) { @@ -213,7 +213,7 @@ microVUt(void) mVUallocFMAC6a(int& Fd, int& Fs, int& Ft) { Fs = xmmFs; Ft = xmmFt; Fd = xmmFs; - getIreg(Ft); + getIreg(Ft, 1); getReg6(Fs, _Fs_); } @@ -230,7 +230,7 @@ microVUt(void) mVUallocFMAC7a(int& ACC, int& Fs, int& Ft) { ACC = xmmACC; Fs = (_X_Y_Z_W == 15) ? xmmACC : xmmFs; Ft = xmmFt; - getIreg(Ft); + getIreg(Ft, 0); if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } else if (!_Fs_) { getZero4(Fs); } else { getReg4(Fs, _Fs_); } @@ -374,7 +374,7 @@ microVUt(void) mVUallocFMAC12a(int& Fd, int& ACC, int& Fs, int& Ft) { Ft = xmmFt; Fd = xmmFs; ACC = xmmACC; - getIreg(Ft); + getIreg(Ft, 0); if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } else if (!_Fs_) { getZero4(Fs); } else { getReg4(Fs, _Fs_); } @@ -395,7 +395,7 @@ microVUt(void) mVUallocFMAC13a(int& Fd, int& ACC, int& Fs, int& Ft) { Fd = xmmT1; ACC = xmmT1; SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC); - getIreg(Ft); + getIreg(Ft, 0); if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } else if (!_Fs_) { getZero4(Fs); } else { getReg4(Fs, _Fs_); } @@ -480,7 +480,7 @@ microVUt(void) mVUallocFMAC16a(int& ACCw, int& ACCr, int& Fs, int& Ft) { ACCw = xmmACC; ACCr = ((_X_Y_Z_W == 15) || (_X_Y_Z_W == 8)) ? xmmACC : xmmT1; SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC); - getIreg(Ft); + getIreg(Ft, 0); if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } else if (!_Fs_) { getZero4(Fs); } else { getReg4(Fs, _Fs_); } diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index a493da8968..dd5918cdf0 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -143,6 +143,25 @@ microVUt(void) mVUanalyzeEFU2(int Fs, u8 xCycles) { analyzePreg(xCycles); } +//------------------------------------------------------------------ +// R*** - R Reg Opcodes +//------------------------------------------------------------------ + +#define analyzeRreg() { mVUregsTemp.r = 1; } + +microVUt(void) mVUanalyzeR1(int Fs, int Fsf) { + microVU* mVU = mVUx; + analyzeReg5(Fs, Fsf); + analyzeRreg(); +} + +microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) { + microVU* mVU = mVUx; + if (!Ft) { mVUinfo |= ((canBeNOP) ? _isNOP : _noWriteVF); return; } + analyzeReg2(Ft); + analyzeRreg(); +} + //------------------------------------------------------------------ // Sflag - Status Flag Opcodes //------------------------------------------------------------------ @@ -157,4 +176,18 @@ microVUt(void) mVUanalyzeSflag(int It) { analyzeVIreg2(It, 1); } +//------------------------------------------------------------------ +// XGkick +//------------------------------------------------------------------ + +#define analyzeXGkick1() { mVUstall = aMax(mVUstall, mVUregs.xgkick); } +#define analyzeXGkick2(x) { mVUregsTemp.xgkick = x; } + +microVUt(void) mVUanalyzeXGkick(int Fs, int xCycles) { + microVU* mVU = mVUx; + analyzeVIreg1(Fs); + analyzeXGkick1(); + analyzeXGkick2(xCycles); +} + #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 686e4a6563..554cf2612b 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -29,19 +29,51 @@ } \ } -#define curI mVUcurProg.data[iPC] -#define setCode() { mVU->code = curI; } -#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); } -#define startLoop() { mVUdebugStuff1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); } +#define curI mVUcurProg.data[iPC] +#define setCode() { mVU->code = curI; } +#define startLoop() { mVUdebugStuff1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); } +#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); } +#define incCycles(x) { mVUincCycles(x); } +#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); } + +microVUt(void) mVUincCycles(int x) { + mVUcycles += x; + for (int z = 31; z > 0; z--) { + calcCycles(mVUregs.VF[z].x, x); + calcCycles(mVUregs.VF[z].y, x); + calcCycles(mVUregs.VF[z].z, x); + calcCycles(mVUregs.VF[z].w, x); + } + for (int z = 16; z > 0; z--) { + calcCycles(mVUregs.VI[z], x); + } + if (mVUregs.q) { + calcCycles(mVUregs.q, x); + if (!mVUregs.q) {} // Do Status Flag Merging Stuff? + } + calcCycles(mVUregs.p, x); + calcCycles(mVUregs.r, x); + calcCycles(mVUregs.xgkick, x); +} microVUt(void) mVUsetCycles() { microVU* mVU = mVUx; incCycles(mVUstall); + if (mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1] && !mVUregsTemp.VFreg[0]) { // If upper Op && lower Op write to same VF reg + mVUinfo |= (mVUregsTemp.r || mVUregsTemp.VI) ? _noWriteVF : _isNOP; // If lower Op doesn't modify anything else, then make it a NOP + //mVUregsTemp.VF[1].reg = mVUregsTemp.VF[0]; // Just use cycles from upper Op (incorrect?) + mVUregsTemp.VF[1].x = aMax(mVUregsTemp.VF[0].x, mVUregsTemp.VF[1].x); // Use max cycles from each vector (correct?) + mVUregsTemp.VF[1].y = aMax(mVUregsTemp.VF[0].y, mVUregsTemp.VF[1].y); + mVUregsTemp.VF[1].z = aMax(mVUregsTemp.VF[0].z, mVUregsTemp.VF[1].z); + mVUregsTemp.VF[1].w = aMax(mVUregsTemp.VF[0].w, mVUregsTemp.VF[1].w); + } mVUregs.VF[mVUregsTemp.VFreg[0]].reg = mVUregsTemp.VF[0].reg; - mVUregs.VF[mVUregsTemp.VFreg[1]].reg =(mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1]) ? (aMax(mVUregsTemp.VF[0].reg, mVUregsTemp.VF[1].reg)) : (mVUregsTemp.VF[1].reg); + mVUregs.VF[mVUregsTemp.VFreg[1]].reg = mVUregsTemp.VF[1].reg; mVUregs.VI[mVUregsTemp.VIreg] = mVUregsTemp.VI; mVUregs.q = mVUregsTemp.q; mVUregs.p = mVUregsTemp.p; + mVUregs.r = mVUregsTemp.r; + mVUregs.xgkick = mVUregsTemp.xgkick; } microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) { @@ -70,14 +102,15 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, else if (branch == 1) { branch = 2; } if (mVUbranch) { branch = 3; mVUbranch = 0; mVUinfo |= _isBranch; } incPC(1); + incCycles(1); } // Second Pass - iPC = startPC; + iPC = mVUstartPC; setCode(); for (bool x = 1; x; ) { // - // ToDo: status/mac flag stuff + // ToDo: status/mac flag stuff? // if (isEOB) { x = 0; } else if (isBranch) { mVUopU(); incPC(2); } @@ -85,6 +118,7 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, mVUopU(); if (isNop) { if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } } else { incPC(1); mVUopL(); } + if (!isBdelay) { incPC(1); } else { incPC(-2); // Go back to Branch Opcode diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index a7e8438c24..5ac22e06e7 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -648,18 +648,9 @@ microVUf(void) mVU_ISUBIU() { } //------------------------------------------------------------------ -// MOVE/MFIR/MFP/MTIR/MR32 +// MFIR/MFP/MOVE/MR32/MTIR //------------------------------------------------------------------ -microVUf(void) mVU_MOVE() { - microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_ || (_Ft_ == _Fs_)) nop();*/ } - else { - mVUloadReg(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W); - mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); - } -} - microVUf(void) mVU_MFIR() { microVU* mVU = mVUx; if (!recPass) { /*If (!_Ft_) nop();*/ } @@ -681,12 +672,12 @@ microVUf(void) mVU_MFP() { } } -microVUf(void) mVU_MTIR() { +microVUf(void) mVU_MOVE() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { /*If (!_Ft_ || (_Ft_ == _Fs_)) nop();*/ } else { - MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]); - mVUallocVIb(gprT1, _Ft_); + mVUloadReg(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W); + mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } } @@ -700,6 +691,15 @@ microVUf(void) mVU_MR32() { } } +microVUf(void) mVU_MTIR() { + microVU* mVU = mVUx; + if (!recPass) {} + else { + MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]); + mVUallocVIb(gprT1, _Ft_); + } +} + //------------------------------------------------------------------ // ILW/ILWR //------------------------------------------------------------------ @@ -716,7 +716,7 @@ microVUf(void) mVU_ILW() { mVUallocVIa(gprT1, _Fs_); ADD32ItoR(gprT1, _Imm11_); mVUaddrFix(gprT1); - MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works. + MOV32RmtoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS); if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff); mVUallocVIb(gprT1, _Ft_); } @@ -728,13 +728,13 @@ microVUf(void) mVU_ILWR() { if (!recPass) { /*If (!_Ft_) nop();*/ } else { if (!_Fs_) { - MOVZX32M16toR( gprT1, (uptr)mVU->regs->Mem + offsetSS ); + MOVZX32M16toR(gprT1, (uptr)mVU->regs->Mem + offsetSS); mVUallocVIb(gprT1, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); mVUaddrFix(gprT1); - MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works. + MOV32RmtoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS); if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff); mVUallocVIb(gprT1, _Ft_); } @@ -762,10 +762,10 @@ microVUf(void) mVU_ISW() { mVUallocVIa(gprT2, _Ft_); ADD32ItoR(gprT1, _Imm11_); mVUaddrFix(gprT1); - if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem); - if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4); - if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8); - if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12); + if (_X) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem); + if (_Y) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+4); + if (_Z) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+8); + if (_W) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+12); } } } @@ -785,10 +785,10 @@ microVUf(void) mVU_ISWR() { mVUallocVIa(gprT1, _Fs_); mVUallocVIa(gprT2, _Ft_); mVUaddrFix(gprT1); - if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem); - if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4); - if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8); - if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12); + if (_X) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem); + if (_Y) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+4); + if (_Z) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+8); + if (_W) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+12); } } } @@ -819,7 +819,7 @@ microVUf(void) mVU_LQD() { microVU* mVU = mVUx; if (!recPass) {} else { - if (!_Fs_ && _Ft_) { + if (!_Fs_ && _Ft_ && !noWriteVF) { mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } @@ -827,7 +827,7 @@ microVUf(void) mVU_LQD() { mVUallocVIa(gprT1, _Fs_); SUB16ItoR(gprT1, 1); mVUallocVIb(gprT1, _Fs_); // ToDo: Backup to memory check. - if (_Ft_) { + if (_Ft_ && !noWriteVF) { mVUaddrFix(gprT1); mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); @@ -840,13 +840,13 @@ microVUf(void) mVU_LQI() { microVU* mVU = mVUx; if (!recPass) {} else { - if (!_Fs_ && _Ft_) { + if (!_Fs_ && _Ft_ && !noWriteVF) { mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } else { mVUallocVIa((_Ft_) ? gprT1 : gprT2, _Fs_); - if (_Ft_) { + if (_Ft_ && !noWriteVF) { MOV32RtoR(gprT2, gprT1); mVUaddrFix(gprT1); mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); @@ -925,7 +925,7 @@ microVUf(void) mVU_SQI() { microVUf(void) mVU_RINIT() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeR1(_Fs_, _Fsf_); } else { if (_Fs_ || (_Fsf_ == 3)) { getReg8(gprR, _Fs_, _Fsf_); @@ -938,7 +938,7 @@ microVUf(void) mVU_RINIT() { microVUt(void) mVU_RGET_() { microVU* mVU = mVUx; - if (_Ft_) { + if (!noWriteVF) { if (_X) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[0], gprR); if (_Y) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[1], gprR); if (_Z) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[2], gprR); @@ -948,13 +948,13 @@ microVUt(void) mVU_RGET_() { microVUf(void) mVU_RGET() { microVU* mVU = mVUx; - if (!recPass) { /*if (!_Ft_) nop();*/ } + if (!recPass) { mVUanalyzeR2(_Ft_, 1); } else { mVU_RGET_(); } } microVUf(void) mVU_RNEXT() { microVU* mVU = mVUx; - if (!recPass) { /*if (!_Ft_) nop();*/ } + if (!recPass) { mVUanalyzeR2(_Ft_, 0); } else { // algorithm from www.project-fao.org MOV32RtoR(gprT1, gprR); @@ -976,7 +976,7 @@ microVUf(void) mVU_RNEXT() { microVUf(void) mVU_RXOR() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeR1(_Fs_, _Fsf_); } else { if (_Fs_ || (_Fsf_ == 3)) { getReg8(gprT1, _Fs_, _Fsf_); @@ -1039,7 +1039,7 @@ void __fastcall mVU_XGKICK1(u32 addr) { mVU_XGKICK_<1>(addr); } microVUf(void) mVU_XGKICK() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeXGkick(_Fs_, 4); } else { mVUallocVIa(gprT2, _Fs_); // gprT2 = ECX for __fastcall if (!vuIndex) CALLFunc((uptr)mVU_XGKICK0); diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 90232a5088..dd52f1b489 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -152,7 +152,6 @@ declareAllVariables #define mVUstartPC mVUallocInfo.startPC #define iPC mVUallocInfo.curPC #define xPC ((iPC / 2) * 8) -#define incCycles(x) { mVUcycles += x; } #define _isNOP (1<<0) // Skip Lower Instruction #define _isBranch (1<<1) // Cur Instruction is a Branch @@ -168,12 +167,13 @@ declareAllVariables #define _doStatus (1<<9) #define _fmInstance (3<<10) #define _fsInstance (3<<12) -#define _fcInstance (3<<14) -#define _fpmInstance (3<<10) #define _fpsInstance (3<<12) +#define _fcInstance (3<<14) +#define _fpcInstance (3<<14) #define _fvmInstance (3<<16) #define _fvsInstance (3<<18) -#define _fvcInstance (3<<14) +#define _fvcInstance (3<<20) +#define _noWriteVF (1<<21) // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0) #define isNOP (mVUinfo & (1<<0)) #define isBranch (mVUinfo & (1<<1)) @@ -195,6 +195,7 @@ declareAllVariables #define fvmInstance ((mVUinfo >> 16) & 3) #define fvsInstance ((mVUinfo >> 18) & 3) #define fvcInstance ((mVUinfo >> 20) & 3) +#define noWriteVF (mVUinfo & (1<<21)) //#define getFs (mVUinfo & (1<<13)) //#define getFt (mVUinfo & (1<<14)) diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 6977b765e1..208a3852c7 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -85,11 +85,11 @@ microVUx(void) mVUloadReg(int reg, uptr offset, int xyzw) { microVUx(void) mVUloadReg2(int reg, int gprReg, uptr offset, int xyzw) { switch( xyzw ) { - case 8: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset); break; // X - case 4: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+4); break; // Y - case 2: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+8); break; // Z - case 1: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+12); break; // W - default: SSE_MOVAPSRmtoROffset(reg, gprReg, offset); break; + case 8: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset); break; // X + case 4: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+4); break; // Y + case 2: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+8); break; // Z + case 1: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+12); break; // W + default: SSE_MOVAPSRmtoR(reg, gprReg, offset); break; } } @@ -142,44 +142,44 @@ microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) { switch ( xyzw ) { case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1); SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4); + SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12); break; // YW case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9); - SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1); + SSE_MOVLPS_XMM_to_Rm(gprReg, xmmT1, offset+4); break; // YZ case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW - SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + SSE_MOVHPS_XMM_to_Rm(gprReg, xmmT1, offset+4); + SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12); break; // YZW case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); + SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1); else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12); break; // XW case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1); + SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); + SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8); break; //XZ - case 11: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); - SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg); + case 11: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); + SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+8); break; //XZW case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW - SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset, xmmT1); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + SSE_MOVHPS_XMM_to_Rm(gprReg, xmmT1, offset); + SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12); break; // XYW case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1); + SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset); + SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8); break; // XYZ - case 8: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); break; // X - case 4: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg); break; // Y - case 2: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // Z - case 1: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, reg); break; // W - case 12: SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg); break; // XY - case 3: SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // ZW - default: SSE_MOVAPSRtoRmOffset(gprReg, offset, reg); break; // XYZW + case 8: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); break; // X + case 4: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4); break; // Y + case 2: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+8); break; // Z + case 1: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12); break; // W + case 12: SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset); break; // XY + case 3: SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+8); break; // ZW + default: SSE_MOVAPSRtoRm(gprReg, reg, offset); break; // XYZW } } From 08c6f68abaa47070a272a3e840b6ebf29e3fc303 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Wed, 8 Apr 2009 13:34:51 +0000 Subject: [PATCH 13/40] Fix Linux again (and again and again and again...) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@925 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Common.h | 2 +- pcsx2/x86/ix86-32/iR5900-32.cpp | 5 ++++- pcsx2/x86/ix86/Makefile.am | 3 ++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pcsx2/Common.h b/pcsx2/Common.h index 46135a75ac..4daeee5663 100644 --- a/pcsx2/Common.h +++ b/pcsx2/Common.h @@ -31,8 +31,8 @@ #include "System.h" -#include "SaveState.h" #include "Plugins.h" +#include "SaveState.h" #include "DebugTools/Debug.h" #include "Memory.h" diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index cbe8ca8130..60fa164cc6 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -131,8 +131,11 @@ static void iDumpBlock( int startpc, u8 * ptr ) Console::Status( "dump1 %x:%x, %x", params startpc, pc, cpuRegs.cycle ); Path::CreateDirectory( "dumps" ); +#ifndef __LINUX__ ssprintf( filename, "dumps\\R5900dump%.8X.txt", startpc ); - +#else + ssprintf( filename, "dumps/R5900dump%.8X.txt", startpc ); +#endif fflush( stdout ); // f = fopen( "dump1", "wb" ); // fwrite( ptr, 1, (u32)x86Ptr - (u32)ptr, f ); diff --git a/pcsx2/x86/ix86/Makefile.am b/pcsx2/x86/ix86/Makefile.am index 880d6f18e3..30e16e264a 100644 --- a/pcsx2/x86/ix86/Makefile.am +++ b/pcsx2/x86/ix86/Makefile.am @@ -1,4 +1,5 @@ INCLUDES = -I@srcdir@/.. -I@srcdir@/../../ -I@srcdir@/../../../common/include -I@srcdir@/../../../3rdparty noinst_LIBRARIES = libix86.a -libix86_a_SOURCES = ix86.cpp ix86.inl ix86_3dnow.inl ix86.h ix86_fpu.inl ix86_mmx.inl ix86_sse.inl ix86_tools.cpp ix86_cpudetect.cpp ix86_macros.h ix86_group1.inl \ No newline at end of file +libix86_a_SOURCES = ix86_mmx.cpp ix86_tools.cpp ix86.cpp ix86_3dnow.cpp ix86_fpu.cpp ix86_legacy.cpp ix86_sse.cpp ix86_cpudetect.cpp ix86_group1.cpp \ +ix86_internal.h ix86.h ix86_macros.h ix86_sse_helpers.h ix86_types.h From f4e9178c06009f03bc00e9f0dd44f4c358b027f1 Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Wed, 8 Apr 2009 17:15:51 +0000 Subject: [PATCH 14/40] backing up some changes git-svn-id: http://pcsx2.googlecode.com/svn/trunk@926 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/R3000A.cpp | 2 +- pcsx2/R3000A.h | 2 +- pcsx2/R5900.cpp | 14 ++++++-------- pcsx2/R5900.h | 8 ++++---- pcsx2/x86/iR3000A.cpp | 2 +- pcsx2/x86/ix86-32/iR5900-32.cpp | 2 +- 6 files changed, 14 insertions(+), 16 deletions(-) diff --git a/pcsx2/R3000A.cpp b/pcsx2/R3000A.cpp index 50931387a3..c821733549 100644 --- a/pcsx2/R3000A.cpp +++ b/pcsx2/R3000A.cpp @@ -224,7 +224,7 @@ static __forceinline void _psxTestInterrupts() } } -void psxBranchTest() +__releaseinline void psxBranchTest() { if( psxTestCycle( psxNextsCounter, psxNextCounter ) ) { diff --git a/pcsx2/R3000A.h b/pcsx2/R3000A.h index 7ab13663e8..9a93b851c1 100644 --- a/pcsx2/R3000A.h +++ b/pcsx2/R3000A.h @@ -200,7 +200,7 @@ extern R3000Acpu psxRec; void psxReset(); void psxShutdown(); void psxException(u32 code, u32 step); -void psxBranchTest(); +extern void psxBranchTest(); void psxExecuteBios(); void psxMemReset(); diff --git a/pcsx2/R5900.cpp b/pcsx2/R5900.cpp index b19d06dd0b..35e7dda1fe 100644 --- a/pcsx2/R5900.cpp +++ b/pcsx2/R5900.cpp @@ -106,7 +106,7 @@ void cpuShutdown() disR5900FreeSyms(); } -__releaseinline void __fastcall cpuException(u32 code, u32 bd) +__releaseinline void cpuException(u32 code, u32 bd) { cpuRegs.branch = 0; // Tells the interpreter that an exception occurred during a branch. bool errLevel2, checkStatus; @@ -244,7 +244,7 @@ void cpuTestMissingHwInts() { } // sets a branch test to occur some time from an arbitrary starting point. -__forceinline int __fastcall cpuSetNextBranch( u32 startCycle, s32 delta ) +__forceinline void cpuSetNextBranch( u32 startCycle, s32 delta ) { // typecast the conditional to signed so that things don't blow up // if startCycle is greater than our next branch cycle. @@ -252,20 +252,18 @@ __forceinline int __fastcall cpuSetNextBranch( u32 startCycle, s32 delta ) if( (int)(g_nextBranchCycle - startCycle) > delta ) { g_nextBranchCycle = startCycle + delta; - return 1; } - return 0; } // sets a branch to occur some time from the current cycle -__forceinline int __fastcall cpuSetNextBranchDelta( s32 delta ) +__forceinline void cpuSetNextBranchDelta( s32 delta ) { - return cpuSetNextBranch( cpuRegs.cycle, delta ); + cpuSetNextBranch( cpuRegs.cycle, delta ); } // tests the cpu cycle agaisnt the given start and delta values. // Returns true if the delta time has passed. -__forceinline int __fastcall cpuTestCycle( u32 startCycle, s32 delta ) +__forceinline int cpuTestCycle( u32 startCycle, s32 delta ) { // typecast the conditional to signed so that things don't explode // if the startCycle is ahead of our current cpu cycle. @@ -279,7 +277,7 @@ __forceinline void cpuSetBranch() g_nextBranchCycle = cpuRegs.cycle; } -void cpuClearInt( uint i ) +__forceinline void cpuClearInt( uint i ) { jASSUME( i < 32 ); cpuRegs.interrupt &= ~(1 << i); diff --git a/pcsx2/R5900.h b/pcsx2/R5900.h index b2dd4db46f..cb482ee6bf 100644 --- a/pcsx2/R5900.h +++ b/pcsx2/R5900.h @@ -257,14 +257,14 @@ extern void cpuInit(); extern void cpuReset(); // can throw Exception::FileNotFound. extern void cpuShutdown(); extern void cpuExecuteBios(); -extern void __fastcall cpuException(u32 code, u32 bd); +extern void cpuException(u32 code, u32 bd); extern void cpuTlbMissR(u32 addr, u32 bd); extern void cpuTlbMissW(u32 addr, u32 bd); extern void cpuTestHwInts(); -extern int __fastcall cpuSetNextBranch( u32 startCycle, s32 delta ); -extern int __fastcall cpuSetNextBranchDelta( s32 delta ); -extern int __fastcall cpuTestCycle( u32 startCycle, s32 delta ); +extern void cpuSetNextBranch( u32 startCycle, s32 delta ); +extern void cpuSetNextBranchDelta( s32 delta ); +extern int cpuTestCycle( u32 startCycle, s32 delta ); extern void cpuSetBranch(); extern bool _cpuBranchTest_Shared(); // for internal use by the Dynarecs and Ints inside R5900: diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index a506bdc4a8..38e2021609 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -798,7 +798,7 @@ void psxSetBranchImm( u32 imm ) // So for now these are new settings that work. // (rama) -static u32 psxScaleBlockCycles() +static __forceinline u32 psxScaleBlockCycles() { return s_psxBlockCycles * (CHECK_IOP_CYCLERATE ? 2 : 1); } diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 60fa164cc6..2dd76b7778 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -683,7 +683,7 @@ static void __naked DispatcherReg() } } -__forceinline void recExecute() +void recExecute() { // Optimization note : Compared pushad against manually pushing the regs one-by-one. // Manually pushing is faster, especially on Core2's and such. :) From 57f9c2bc64ff054fc703641836e958b331ce2f76 Mon Sep 17 00:00:00 2001 From: Nneeve Date: Wed, 8 Apr 2009 18:19:48 +0000 Subject: [PATCH 15/40] Disabled a VU recompiler option that caused some SPS in Ratchet and Clank and didn't actually affect speed. Modified VU stalling logic of MR32 and MTIR instructions and modified FDIV stalling. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@927 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VUops.cpp | 14 ++++++++++++-- pcsx2/x86/iMMI.cpp | 11 ++--------- pcsx2/x86/iVUmicro.cpp | 3 ++- pcsx2/x86/iVUzerorec.cpp | 8 ++++---- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index e043760350..299803d2d6 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -2508,13 +2508,23 @@ void _vuRegsMTIR(VURegs * VU, _VURegsNum *VUregsn) { VUregsn->pipe = VUPIPE_FMAC; VUregsn->VFwrite = 0; VUregsn->VFread0 = _Fs_; - VUregsn->VFr0xyzw= _XYZW; + VUregsn->VFr0xyzw= 1 << (3-_Fsf_); VUregsn->VFread1 = 0; VUregsn->VIwrite = 1 << _Ft_; VUregsn->VIread = GET_VF0_FLAG(_Fs_); } -VUREGS_FTFS(MR32); +void _vuRegsMR32(VURegs * VU, _VURegsNum *VUregsn) { + VUregsn->pipe = VUPIPE_FMAC; + VUregsn->VFwrite = _Ft_; + VUregsn->VFwxyzw = _XYZW; + VUregsn->VFread0 = _Fs_; + VUregsn->VFr0xyzw= (_XYZW >> 1) | ((_XYZW << 3) & 0xf); //rotate + VUregsn->VFread1 = 0; + VUregsn->VFr1xyzw = 0xff; + VUregsn->VIwrite = 0; + VUregsn->VIread = (_Ft_ ? GET_VF0_FLAG(_Fs_) : 0); +} void _vuRegsLQ(VURegs * VU, _VURegsNum *VUregsn) { VUregsn->pipe = VUPIPE_FMAC; diff --git a/pcsx2/x86/iMMI.cpp b/pcsx2/x86/iMMI.cpp index 8dabe0b5c8..e031a7a6c6 100644 --- a/pcsx2/x86/iMMI.cpp +++ b/pcsx2/x86/iMMI.cpp @@ -2676,9 +2676,6 @@ CPU_SSE_XMMCACHE_END recCall( Interp::PHMADH, _Rd_ ); } -//////////////////////////////////////////////////// -//upper word of each doubleword in LO and HI is undocumented/undefined -//contains the NOT of the upper multiplication result (before the substraction of the lower multiplication result) void recPMSUBH() { CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI) @@ -2740,12 +2737,8 @@ CPU_SSE_XMMCACHE_END } //////////////////////////////////////////////////// - -// rs = ... a1 a0 -// rt = ... b1 b0 -// rd = ... a1*b1 - a0*b0 -// hi = ... -// lo = ... (undefined by doc)NOT(a1*b1), a1*b1 - a0*b0 +//upper word of each doubleword in LO and HI is undocumented/undefined +//it contains the NOT of the upper multiplication result (before the substraction of the lower multiplication result) void recPHMSBH() { CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI) diff --git a/pcsx2/x86/iVUmicro.cpp b/pcsx2/x86/iVUmicro.cpp index 2422e5ad10..9335151798 100644 --- a/pcsx2/x86/iVUmicro.cpp +++ b/pcsx2/x86/iVUmicro.cpp @@ -280,6 +280,7 @@ void _recvuIALUTestStall(VURegs * VU, int reg) { VU->ialu[i].enable = 0; vucycle+= cycle; + _recvuTestPipes(VU, true); } void _recvuFMACAdd(VURegs * VU, int reg, int xyzw) { @@ -387,7 +388,7 @@ void _recvuFlushFDIV(VURegs * VU) { if (VU->fdiv.enable == 0) return; - cycle = VU->fdiv.Cycle - (vucycle - VU->fdiv.sCycle); + cycle = VU->fdiv.Cycle + 1 - (vucycle - VU->fdiv.sCycle); //VU->fdiv.Cycle contains the latency minus 1 (6 or 12) // Console::WriteLn("waiting FDIV pipe %d", params cycle); VU->fdiv.enable = 0; vucycle+= cycle; diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index 78ab51b4f6..0f454e638e 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -58,7 +58,7 @@ extern void iDumpVU1Registers(); #define SUPERVU_PROPAGATEFLAGS // the correct behavior of VUs, for some reason superman breaks gfx with it on... #ifndef _DEBUG -#define SUPERVU_INTERCACHING // registers won't be flushed at block boundaries (faster) +//#define SUPERVU_INTERCACHING // registers won't be flushed at block boundaries (faster) (nothing noticable speed-wise, causes SPS in Ratchet and clank (Nneeve) ) #endif #define SUPERVU_CHECKCONDITION 0 // has to be 0!! @@ -2060,9 +2060,9 @@ void VuBaseBlock::AssignVFRegs() _freeXMMreg(free1); _freeXMMreg(free2); } - else if( regs->VIwrite & (1<VIwrite & (1<VIwrite & (1<VIwrite & (1<VIread & (1< Date: Wed, 8 Apr 2009 21:19:50 +0000 Subject: [PATCH 16/40] Fixed Gradius V, had to destroy the templates arcum did a bit to get it to work without ape escape crashing (sorry mate lol. Took out my V3_# discovery, ape escape is getting spikey now, so ill just remove it. Also altered V2_# to work slightly different incase the packet starts on the Y vector, it now wont suffer underrunning (possible bad data) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@928 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/DebugTools/Debug.h | 2 + pcsx2/Vif.cpp | 108 ++++++++++++++------------------------- pcsx2/VifDma.cpp | 14 ++--- 3 files changed, 47 insertions(+), 77 deletions(-) diff --git a/pcsx2/DebugTools/Debug.h b/pcsx2/DebugTools/Debug.h index 1e42cea714..5abcd4f2d0 100644 --- a/pcsx2/DebugTools/Debug.h +++ b/pcsx2/DebugTools/Debug.h @@ -190,6 +190,8 @@ extern bool SrcLog_GPU( const char* fmt, ... ); #define MEMCARDS_LOG 0&& #endif +//#define VIFUNPACKDEBUG //enable unpack debugging output + #ifdef VIFUNPACKDEBUG #define VIFUNPACK_LOG VIF_LOG #else diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 3e6626391f..792cd4ee44 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -333,89 +333,57 @@ static __releaseinline void writeW(u32 &dest, u32 data) template static void _UNPACKpart(u32 offnum, u32 &x, T y) { - if (_vifRegs->offset == offnum) - { - switch (offnum) - { - case OFFSET_X: - writeX(x,y); - break; - case OFFSET_Y: - writeY(x,y); - break; - case OFFSET_Z: - writeZ(x,y); - break; - case OFFSET_W: - writeW(x,y); - break; - default: - break; - } - _vifRegs->offset++; - } -} -template -static void _UNPACKpart(u32 offnum, u32 &x, T y, int size) -{ - if(size == 0) return; - - if (_vifRegs->offset == offnum) + switch (offnum) { - switch (offnum) - { - case OFFSET_X: - writeX(x,y); - break; - case OFFSET_Y: - writeY(x,y); - break; - case OFFSET_Z: - writeZ(x,y); - break; - case OFFSET_W: - writeW(x,y); - break; - default: - break; - } - size--; - _vifRegs->offset++; + case OFFSET_X: + writeX(x,y); + break; + case OFFSET_Y: + writeY(x,y); + break; + case OFFSET_Z: + writeZ(x,y); + break; + case OFFSET_W: + writeW(x,y); + break; + default: + break; } + _vifRegs->offset++; + } template void __fastcall UNPACK_S(u32 *dest, T *data, int size) { - _UNPACKpart(OFFSET_X, *dest++, *data, size); - _UNPACKpart(OFFSET_Y, *dest++, *data, size); - _UNPACKpart(OFFSET_Z, *dest++, *data, size); - _UNPACKpart(OFFSET_W, *dest , *data, size); + if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, *data); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, *data); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_Z ) { _UNPACKpart(OFFSET_Z, *dest++, *data); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_W ) { _UNPACKpart(OFFSET_W, *dest , *data); size--; } if (_vifRegs->offset == 4) _vifRegs->offset = 0; } template void __fastcall UNPACK_V2(u32 *dest, T *data, int size) { - _UNPACKpart(OFFSET_X, *dest++, *data++, size); - _UNPACKpart(OFFSET_Y, *dest++, *data--, size); - _UNPACKpart(OFFSET_Z, *dest++, *data++); - _UNPACKpart(OFFSET_W, *dest , *data); + if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, *data++); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, *data); size--; } + if( _vifRegs->offset == OFFSET_Z )_UNPACKpart(OFFSET_Z, *dest++, *dest-2); + if( _vifRegs->offset == OFFSET_W )_UNPACKpart(OFFSET_W, *dest , *data); if (_vifRegs->offset == 4) _vifRegs->offset = 0; } template void __fastcall UNPACK_V3(u32 *dest, T *data, int size) { - _UNPACKpart(OFFSET_X, *dest++, *data++, size); - _UNPACKpart(OFFSET_Y, *dest++, *data++, size); - _UNPACKpart(OFFSET_Z, *dest++, *data++, size); + if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, *data++); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, *data++); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_Z ) { _UNPACKpart(OFFSET_Z, *dest++, *data++); size--; } //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) - if((_vif->qwcalign % 24) == 0) - _UNPACKpart(OFFSET_W, *dest, 0); - else - _UNPACKpart(OFFSET_W, *dest, *data); + //Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate + if( _vifRegs->offset == OFFSET_W )_UNPACKpart(OFFSET_W, *dest, *data); if (_vifRegs->offset == 4) _vifRegs->offset = 0; } @@ -423,19 +391,19 @@ void __fastcall UNPACK_V3(u32 *dest, T *data, int size) template void __fastcall UNPACK_V4(u32 *dest, T *data , int size) { - _UNPACKpart(OFFSET_X, *dest++, *data++, size); - _UNPACKpart(OFFSET_Y, *dest++, *data++, size); - _UNPACKpart(OFFSET_Z, *dest++, *data++, size); - _UNPACKpart(OFFSET_W, *dest , *data, size); + if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, *data++); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, *data++); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_Z ) { _UNPACKpart(OFFSET_Z, *dest++, *data++); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_W ) { _UNPACKpart(OFFSET_W, *dest , *data); size--; } if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size) { - _UNPACKpart(OFFSET_X, *dest++, ((*data & 0x001f) << 3), size); - _UNPACKpart(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2), size); - _UNPACKpart(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7), size); - _UNPACKpart(OFFSET_W, *dest, ((*data & 0x8000) >> 8), size); + if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, ((*data & 0x001f) << 3)); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2)); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_Z ) { _UNPACKpart(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7)); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_W ) { _UNPACKpart(OFFSET_W, *dest, ((*data & 0x8000) >> 8)); size--; } if (_vifRegs->offset == 4) _vifRegs->offset = 0; } diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 32840541c0..cd7866e224 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -29,7 +29,7 @@ using namespace std; // for min / max -//#define VIFUNPACKDEBUG //enable unpack debugging output + #define gif ((DMACh*)&PS2MEM_HW[0xA000]) @@ -379,6 +379,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x", VIFdmanum, v->cmd & 0xf, v->size, size, v->addr); + VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset); #ifdef _DEBUG if (v->size != size) { @@ -422,6 +423,8 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma VIFUNPACK_LOG("Aligning packet size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr); + if(((size / ft->dsize) + vifRegs->offset) < (u32)ft->qsize) + VIFUNPACK_LOG("Warning! Size needed to align %x size chunks available %x offset %x", ft->qsize - ((size / ft->dsize) + vifRegs->offset), vifRegs->offset); // SSE doesn't handle such small data if (v->size != (size >> 2)) ProcessMemSkip(size, unpackType, VIFdmanum); @@ -439,8 +442,9 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma unpacksize = 0; Console::WriteLn("Unpack align offset = 0"); } + VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft->qsize) + unpacksize, vifRegs->offset); destinc = (4 - ft->qsize) + unpacksize; - vif->qwcalign += unpacksize * ft->dsize; + func(dest, (u32*)cdata, unpacksize); size -= unpacksize * ft->dsize; cdata += unpacksize * ft->dsize; @@ -482,7 +486,6 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma while ((size >= ft->gsize) && (vifRegs->num > 0)) { - vif->qwcalign += ft->gsize; func(dest, (u32*)cdata, ft->qsize); cdata += ft->gsize; size -= ft->gsize; @@ -596,7 +599,6 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma while ((size >= ft->gsize) && (vifRegs->num > 0)) { - vif->qwcalign += ft->gsize; //Must do this before the transfer, else the confusing packets dont go right :P func(dest, (u32*)cdata, ft->qsize); cdata += ft->gsize; size -= ft->gsize; @@ -651,7 +653,6 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma //VIF_LOG("warning, end with size = %d", size); /* unpack one qword */ - vif->qwcalign += (size / ft->dsize) * ft->dsize; func(dest, (u32*)cdata, size / ft->dsize); size = 0; @@ -789,7 +790,6 @@ static __forceinline void vif0UNPACK(u32 *data) len = ((((32 >> vl) * (vn + 1)) * n) + 31) >> 5; } - vif0.qwcalign = 0; vif0.cl = 0; vif0.tag.cmd = vif0.cmd; vif0.tag.addr &= 0xfff; @@ -1519,7 +1519,7 @@ static __forceinline void vif1UNPACK(u32 *data) else vif1.tag.addr = vif1Regs->code & 0x3ff; - vif1.qwcalign = 0; + vif1Regs->offset = 0; vif1.cl = 0; vif1.tag.addr <<= 4; vif1.tag.cmd = vif1.cmd; From 76b52a97a1b82b2d9979246da10c5fc6628124f9 Mon Sep 17 00:00:00 2001 From: mattmenke Date: Thu, 9 Apr 2009 02:02:49 +0000 Subject: [PATCH 17/40] LilyPad: Small/large motor defaults should work for most devices, when creating new effect bindings. Keyboard queue fixed up a bit, mainly to favor escape down when PCSX2 is dying. Fix for ignore bindings being swapped with the swap pad bindings buttons. Updated version number, thinking of releasing soon. No known bugs, not that much more to do. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@929 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Sio.cpp | 7 +-- plugins/LilyPad/Config.cpp | 55 ++++++++++++++++++++--- plugins/LilyPad/KeyboardQueue.cpp | 75 +++++++++++++++---------------- plugins/LilyPad/LilyPad.cpp | 22 +++++---- plugins/LilyPad/XInput.cpp | 9 ++-- 5 files changed, 105 insertions(+), 63 deletions(-) diff --git a/pcsx2/Sio.cpp b/pcsx2/Sio.cpp index b40fdbccbd..45c3274a2a 100644 --- a/pcsx2/Sio.cpp +++ b/pcsx2/Sio.cpp @@ -451,16 +451,16 @@ void SIO_CommandWrite(u8 value,int way) { break; case 0x21: // Set pad slot. - sio.mtapst = 0x21; + sio.mtapst = value; sio.bufcount = 6; // No idea why this is 6, saved from old code. break; case 0x22: // Set memcard slot. - sio.mtapst = 0x22; + sio.mtapst = value; sio.bufcount = 6; // No idea why this is 6, saved from old code. break; } - // Commented out values are from original code. Break multitap in bios. + // Commented out values are from original code. They break multitap in bios. sio.buf[sio.bufcount-1]=0;//'+'; sio.buf[sio.bufcount]=0;//'Z'; return; @@ -554,6 +554,7 @@ void InitializeSIO(u8 value) int port = sio.GetMultitapPort(); if (!IsMtapPresent(port)) { + // If "unplug" multitap mid game, set active slots to 0. sio.activePadSlot[port] = 0; sio.activeMemcardSlot[port] = 0; } diff --git a/plugins/LilyPad/Config.cpp b/plugins/LilyPad/Config.cpp index 39e9e30ad1..2e2050a573 100644 --- a/plugins/LilyPad/Config.cpp +++ b/plugins/LilyPad/Config.cpp @@ -1404,18 +1404,59 @@ INT_PTR CALLBACK DialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, LPARAM l if (i >= 0) { unsigned int index = (unsigned int)SendMessage(GetDlgItem(hWnd, IDC_FORCEFEEDBACK), CB_GETITEMDATA, i, 0); if (index < (unsigned int) dm->numDevices) { + Device *dev = dm->devices[index]; ForceFeedbackBinding *b; - int count = CreateEffectBinding(dm->devices[index], 0, port, slot, cmd-ID_BIG_MOTOR, &b); + int count = CreateEffectBinding(dev, 0, port, slot, cmd-ID_BIG_MOTOR, &b); if (b) { - for (int j=0; j<2 && j devices[index]->numFFAxes; j++) { - b->axes[j].force = BASE_SENSITIVITY; + int needSet = 1; + if (dev->api == XINPUT && dev->numFFAxes == 2) { + needSet = 0; + if (cmd == ID_BIG_MOTOR) { + b->axes[0].force = BASE_SENSITIVITY; + } + else { + b->axes[1].force = BASE_SENSITIVITY; + } + } + else if (dev->api == DI) { + int bigIndex=0, littleIndex=0; + int constantEffect = 0, squareEffect = 0; + int j; + for (j=0; jnumFFAxes; j++) { + // DI object instance. 0 is x-axis, 1 is y-axis. + int instance = (dev->ffAxes[j].id>>8)&0xFFFF; + if (instance == 0) { + bigIndex = j; + } + else if (instance == 1) { + littleIndex = j; + } + } + for (j=0; jnumFFEffectTypes; j++) { + if (!wcsicmp(L"13541C20-8E33-11D0-9AD0-00A0C9A06E35", dev->ffEffectTypes[j].effectID)) constantEffect = j; + if (!wcsicmp(L"13541C22-8E33-11D0-9AD0-00A0C9A06E35", dev->ffEffectTypes[j].effectID)) squareEffect = j; + } + needSet = 0; + if (cmd == ID_BIG_MOTOR) { + b->axes[bigIndex].force = BASE_SENSITIVITY; + b->axes[littleIndex].force = 1; + b->effectIndex = constantEffect; + } + else { + b->axes[bigIndex].force = 1; + b->axes[littleIndex].force = BASE_SENSITIVITY; + b->effectIndex = squareEffect; + } + } + if (needSet) { + for (int j=0; j<2 && j numFFAxes; j++) { + b->axes[j].force = BASE_SENSITIVITY; + } } - } - if (count >= 0) { - PropSheet_Changed(hWndProp, hWnd); UnselectAll(hWndList); ListView_SetItemState(hWndList, count, LVIS_SELECTED, LVIS_SELECTED); } + PropSheet_Changed(hWndProp, hWnd); } } } @@ -1886,6 +1927,7 @@ INT_PTR CALLBACK GeneralDialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, L config.padConfigs[port1][slot1] = config.padConfigs[port2][slot2]; config.padConfigs[port2][slot2] = padCfgTemp; for (int i=0; inumDevices; i++) { + if (dm->devices[i]->type == IGNORE) continue; PadBindings bindings = dm->devices[i]->pads[port1][slot1]; dm->devices[i]->pads[port1][slot1] = dm->devices[i]->pads[port2][slot2]; dm->devices[i]->pads[port2][slot2] = bindings; @@ -1893,6 +1935,7 @@ INT_PTR CALLBACK GeneralDialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, L } else { for (int i=0; inumDevices; i++) { + if (dm->devices[i]->type == IGNORE) continue; free(dm->devices[i]->pads[port1][slot1].bindings); for (int j=0; jdevices[i]->pads[port1][slot1].numFFBindings; j++) { free(dm->devices[i]->pads[port1][slot1].ffBindings[j].axes); diff --git a/plugins/LilyPad/KeyboardQueue.cpp b/plugins/LilyPad/KeyboardQueue.cpp index 39ad231bf3..70badb83de 100644 --- a/plugins/LilyPad/KeyboardQueue.cpp +++ b/plugins/LilyPad/KeyboardQueue.cpp @@ -1,13 +1,16 @@ // This is undoubtedly completely unnecessary. #include "KeyboardQueue.h" -static int numQueuedEvents = 0; -static keyEvent queuedEvents[20]; - // What MS calls a single process Mutex. Faster, supposedly. // More importantly, can be abbreviated, amusingly, as cSection. static CRITICAL_SECTION cSection; -static int csInitialized = 0; +static u8 csInitialized = 0; + +#define EVENT_QUEUE_LEN 16 +// Actually points one beyond the last queued event. +static u8 lastQueuedEvent = 0; +static u8 nextQueuedEvent = 0; +static keyEvent queuedEvents[EVENT_QUEUE_LEN]; void QueueKeyEvent(int key, int event) { if (!csInitialized) { @@ -15,50 +18,42 @@ void QueueKeyEvent(int key, int event) { InitializeCriticalSection(&cSection); } EnterCriticalSection(&cSection); - if (numQueuedEvents >= 15) { - // Generally shouldn't happen. - for (int i=0; i<15; i++) { - queuedEvents[i] = queuedEvents[i+5]; - } - numQueuedEvents = 15; + + // Don't queue events if escape is on top of queue. This is just for safety + // purposes when a game is killing the emulator for whatever reason. + if (nextQueuedEvent == lastQueuedEvent || + queuedEvents[nextQueuedEvent].key != VK_ESCAPE || + queuedEvents[nextQueuedEvent].evt != KEYPRESS) { + // Clear queue on escape down, bringing escape to front. May do something + // with shift/ctrl/alt and F-keys, later. + if (event == KEYPRESS && key == VK_ESCAPE) { + nextQueuedEvent = lastQueuedEvent; + } + + queuedEvents[lastQueuedEvent].key = key; + queuedEvents[lastQueuedEvent].evt = event; + + lastQueuedEvent = (lastQueuedEvent + 1) % EVENT_QUEUE_LEN; + // If queue wrapped around, remove last element. + if (nextQueuedEvent == lastQueuedEvent) { + nextQueuedEvent = (nextQueuedEvent + 1) % EVENT_QUEUE_LEN; + } } - int index = numQueuedEvents; - // Move escape to top of queue. May do something - // with shift/ctrl/alt and F-keys, later. - if (event == KEYPRESS && key == VK_ESCAPE) { - while (index) { - queuedEvents[index-1] = queuedEvents[index]; - index--; - } - } - queuedEvents[index].key = key; - queuedEvents[index].evt = event; - numQueuedEvents ++; LeaveCriticalSection(&cSection); } int GetQueuedKeyEvent(keyEvent *event) { - int out = 0; - if (numQueuedEvents) { - EnterCriticalSection(&cSection); - // Shouldn't be 0, but just in case... - if (numQueuedEvents) { - *event = queuedEvents[0]; - numQueuedEvents--; - out = 1; - for (int i=0; iAddDevice(new XInputDevice(i, temp)); } From 62d6c0f3e73a09bd66deefdff3602bf82a36cc45 Mon Sep 17 00:00:00 2001 From: mattmenke Date: Thu, 9 Apr 2009 02:07:45 +0000 Subject: [PATCH 18/40] LilyPad: Debug line removed. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@930 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/LilyPad/XInput.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/LilyPad/XInput.cpp b/plugins/LilyPad/XInput.cpp index 00e4900086..68bf7cd087 100644 --- a/plugins/LilyPad/XInput.cpp +++ b/plugins/LilyPad/XInput.cpp @@ -194,7 +194,7 @@ void EnumXInputDevices() { pXInputEnable(1); for (i=0; i<4; i++) { XINPUT_STATE state; - if (!i || ERROR_SUCCESS == pXInputGetState(i, &state)) { + if (ERROR_SUCCESS == pXInputGetState(i, &state)) { wsprintfW(temp, L"XInput Pad %i", i); dm->AddDevice(new XInputDevice(i, temp)); } From 88ae29ac56800afdd63bd943cf857f9eff2bf153 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Thu, 9 Apr 2009 15:22:59 +0000 Subject: [PATCH 19/40] More Vif Unpacking cleanup. (And probably not the last of it.) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@931 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 136 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 106 insertions(+), 30 deletions(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 792cd4ee44..0ae8139a95 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -333,7 +333,6 @@ static __releaseinline void writeW(u32 &dest, u32 data) template static void _UNPACKpart(u32 offnum, u32 &x, T y) { - switch (offnum) { case OFFSET_X: @@ -352,59 +351,136 @@ static void _UNPACKpart(u32 offnum, u32 &x, T y) break; } _vifRegs->offset++; - } template void __fastcall UNPACK_S(u32 *dest, T *data, int size) { - if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, *data); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, *data); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_Z ) { _UNPACKpart(OFFSET_Z, *dest++, *data); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_W ) { _UNPACKpart(OFFSET_W, *dest , *data); size--; } - if (_vifRegs->offset == 4) _vifRegs->offset = 0; + while (size > 0) + { + switch (_vifRegs->offset) + { + case OFFSET_X: + case OFFSET_Y: + case OFFSET_Z: + _UNPACKpart(_vifRegs->offset, *dest++, *data); + size--; + break; + case OFFSET_W: + _UNPACKpart(_vifRegs->offset, *dest , *data); + size--; + break; + default: + if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; + break; + } + } } template void __fastcall UNPACK_V2(u32 *dest, T *data, int size) { - if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, *data++); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, *data); size--; } - if( _vifRegs->offset == OFFSET_Z )_UNPACKpart(OFFSET_Z, *dest++, *dest-2); - if( _vifRegs->offset == OFFSET_W )_UNPACKpart(OFFSET_W, *dest , *data); - if (_vifRegs->offset == 4) _vifRegs->offset = 0; + while (size > 0) + { + switch (_vifRegs->offset) + { + case OFFSET_X: + _UNPACKpart(_vifRegs->offset, *dest++, *data++); + size--; + break; + case OFFSET_Y: + _UNPACKpart(_vifRegs->offset, *dest++, *data); + size--; + break; + case OFFSET_Z: + _UNPACKpart(_vifRegs->offset, *dest++, *dest-2); + break; + case OFFSET_W: + _UNPACKpart(_vifRegs->offset, *dest , *data); + break; + default: + if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; + break; + } + } } template void __fastcall UNPACK_V3(u32 *dest, T *data, int size) { - if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, *data++); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, *data++); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_Z ) { _UNPACKpart(OFFSET_Z, *dest++, *data++); size--; } - //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) - //Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate - if( _vifRegs->offset == OFFSET_W )_UNPACKpart(OFFSET_W, *dest, *data); - - if (_vifRegs->offset == 4) _vifRegs->offset = 0; + while (size > 0) + { + switch (_vifRegs->offset) + { + case OFFSET_X: + case OFFSET_Y: + case OFFSET_Z: + _UNPACKpart(_vifRegs->offset, *dest++, *data++); + size--; + break; + //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) + //Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate + case OFFSET_W: + _UNPACKpart(_vifRegs->offset, *dest, *data); + break; + default: + if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; + break; + } + } } template void __fastcall UNPACK_V4(u32 *dest, T *data , int size) { - if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, *data++); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, *data++); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_Z ) { _UNPACKpart(OFFSET_Z, *dest++, *data++); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_W ) { _UNPACKpart(OFFSET_W, *dest , *data); size--; } - if (_vifRegs->offset == 4) _vifRegs->offset = 0; + while (size > 0) + { + switch (_vifRegs->offset) + { + case OFFSET_X: + case OFFSET_Y: + case OFFSET_Z: + _UNPACKpart(_vifRegs->offset, *dest++, *data++); + size--; + break; + case OFFSET_W: + _UNPACKpart(_vifRegs->offset, *dest , *data); + size--; + break; + default: + if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; + break; + } + } } void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size) { - if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, ((*data & 0x001f) << 3)); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2)); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_Z ) { _UNPACKpart(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7)); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_W ) { _UNPACKpart(OFFSET_W, *dest, ((*data & 0x8000) >> 8)); size--; } - if (_vifRegs->offset == 4) _vifRegs->offset = 0; + while (size > 0) + { + switch (_vifRegs->offset) + { + case OFFSET_X: + _UNPACKpart(_vifRegs->offset, *dest++, ((*data & 0x001f) << 3)); + size--; + break; + case OFFSET_Y: + _UNPACKpart(_vifRegs->offset, *dest++, ((*data & 0x03e0) >> 2)); + size--; + break; + case OFFSET_Z: + _UNPACKpart(_vifRegs->offset, *dest++, ((*data & 0x7c00) >> 7)); + size--; + break; + case OFFSET_W: + _UNPACKpart(_vifRegs->offset, *dest, ((*data & 0x8000) >> 8)); + size--; + break; + default: + if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; + break; + } + } } void __fastcall UNPACK_S_32(u32 *dest, u32 *data, int size) From cf0c393514fac218c965575746d537dfb6320eef Mon Sep 17 00:00:00 2001 From: refraction Date: Thu, 9 Apr 2009 20:15:01 +0000 Subject: [PATCH 20/40] Fixed Outlaw Tennis error on the loading bar, as a strange side effect, this fixes the missing textures in Crash N Burn too. What is more annoying is this code use to be in the emulator ages ago (before processing skipping) and it was removed as we didn't think it actually had a use! :D git-svn-id: http://pcsx2.googlecode.com/svn/trunk@933 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VifDma.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index cd7866e224..099470d993 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -328,6 +328,16 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int break; } + //Append any skips in to the equasion + + if (vifRegs->cycle.cl > vifRegs->cycle.wl) + { + VIFUNPACK_LOG("Old addr %x CL %x WL %x", vif->tag.addr, vifRegs->cycle.cl, vifRegs->cycle.wl); + vif->tag.addr += (size / (unpack->gsize*vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl)*16); + VIFUNPACK_LOG("New addr %x CL %x WL %x", vif->tag.addr, vifRegs->cycle.cl, vifRegs->cycle.wl); + } + + if ((vif->tag.addr & 0xf) == unpack->gsize) { vif->tag.addr += 16 - unpack->gsize; From feaed9f4fa2e409402e02512d20ea94e178389a8 Mon Sep 17 00:00:00 2001 From: refraction Date: Thu, 9 Apr 2009 21:02:37 +0000 Subject: [PATCH 21/40] The last changes to clean up Vif wouldn't have worked in some situations, tried to rearrange it and space things out and skipping unnecessary checks git-svn-id: http://pcsx2.googlecode.com/svn/trunk@935 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 145 ++++++++++++++++---------------------------------- 1 file changed, 46 insertions(+), 99 deletions(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 0ae8139a95..23587c1f4a 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -356,77 +356,56 @@ static void _UNPACKpart(u32 offnum, u32 &x, T y) template void __fastcall UNPACK_S(u32 *dest, T *data, int size) { - while (size > 0) - { - switch (_vifRegs->offset) - { - case OFFSET_X: - case OFFSET_Y: - case OFFSET_Z: - _UNPACKpart(_vifRegs->offset, *dest++, *data); - size--; - break; - case OFFSET_W: - _UNPACKpart(_vifRegs->offset, *dest , *data); - size--; - break; - default: - if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; - break; - } - } + //S-# will always be a complete packet, no matter what. So we can skip the offset bits + writeX(*dest++, *data); + writeY(*dest++, *data); + writeZ(*dest++, *data); + writeW(*dest , *data); } template void __fastcall UNPACK_V2(u32 *dest, T *data, int size) { - while (size > 0) + if(_vifRegs->offset == OFFSET_X && size > 0) { - switch (_vifRegs->offset) - { - case OFFSET_X: - _UNPACKpart(_vifRegs->offset, *dest++, *data++); - size--; - break; - case OFFSET_Y: - _UNPACKpart(_vifRegs->offset, *dest++, *data); - size--; - break; - case OFFSET_Z: - _UNPACKpart(_vifRegs->offset, *dest++, *dest-2); - break; - case OFFSET_W: - _UNPACKpart(_vifRegs->offset, *dest , *data); - break; - default: - if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; - break; - } + _UNPACKpart(_vifRegs->offset, *dest++, *data++); + } + if(_vifRegs->offset == OFFSET_Y && size > 0) + { + _UNPACKpart(_vifRegs->offset, *dest++, *data); + } + if(_vifRegs->offset == OFFSET_Z) + { + _UNPACKpart(_vifRegs->offset, *dest++, *dest-2); + } + if(_vifRegs->offset == OFFSET_W) + { + _UNPACKpart(_vifRegs->offset, *dest, *data); + _vifRegs->offset = 0; } } template void __fastcall UNPACK_V3(u32 *dest, T *data, int size) { - while (size > 0) + if(_vifRegs->offset == OFFSET_X && size > 0) { - switch (_vifRegs->offset) - { - case OFFSET_X: - case OFFSET_Y: - case OFFSET_Z: - _UNPACKpart(_vifRegs->offset, *dest++, *data++); - size--; - break; - //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) - //Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate - case OFFSET_W: - _UNPACKpart(_vifRegs->offset, *dest, *data); - break; - default: - if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; - break; - } + _UNPACKpart(_vifRegs->offset, *dest++, *data++); + } + if(_vifRegs->offset == OFFSET_Y && size > 0) + { + _UNPACKpart(_vifRegs->offset, *dest++, *data++); + } + if(_vifRegs->offset == OFFSET_Z) + { + _UNPACKpart(_vifRegs->offset, *dest++, *data++); + } + if(_vifRegs->offset == OFFSET_W) + { + //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) + //Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate + _UNPACKpart(_vifRegs->offset, *dest, *data); + _vifRegs->offset = 0; } } @@ -435,52 +414,20 @@ void __fastcall UNPACK_V4(u32 *dest, T *data , int size) { while (size > 0) { - switch (_vifRegs->offset) - { - case OFFSET_X: - case OFFSET_Y: - case OFFSET_Z: - _UNPACKpart(_vifRegs->offset, *dest++, *data++); - size--; - break; - case OFFSET_W: - _UNPACKpart(_vifRegs->offset, *dest , *data); - size--; - break; - default: - if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; - break; - } + _UNPACKpart(_vifRegs->offset, *dest++, *data++); + size--; } + + if (_vifRegs->offset > OFFSET_W) _vifRegs->offset = 0; } void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size) { - while (size > 0) - { - switch (_vifRegs->offset) - { - case OFFSET_X: - _UNPACKpart(_vifRegs->offset, *dest++, ((*data & 0x001f) << 3)); - size--; - break; - case OFFSET_Y: - _UNPACKpart(_vifRegs->offset, *dest++, ((*data & 0x03e0) >> 2)); - size--; - break; - case OFFSET_Z: - _UNPACKpart(_vifRegs->offset, *dest++, ((*data & 0x7c00) >> 7)); - size--; - break; - case OFFSET_W: - _UNPACKpart(_vifRegs->offset, *dest, ((*data & 0x8000) >> 8)); - size--; - break; - default: - if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; - break; - } - } + //As with S-#, this will always be a complete packet + writeX(*dest++, ((*data & 0x001f) << 3)); + writeY(*dest++, ((*data & 0x03e0) >> 2)); + writeZ(*dest++, ((*data & 0x7c00) >> 7)); + writeW(*dest, ((*data & 0x8000) >> 8)); } void __fastcall UNPACK_S_32(u32 *dest, u32 *data, int size) From 08d6f10d57fd1baa7a3dfe76a36a059a97fef8ae Mon Sep 17 00:00:00 2001 From: refraction Date: Thu, 9 Apr 2009 23:06:11 +0000 Subject: [PATCH 22/40] Slap my wrists for the silliest error ever :p only thing that gave it away was the sirens on top of the heads in Ape Escape 3 had no light lol git-svn-id: http://pcsx2.googlecode.com/svn/trunk@937 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 23587c1f4a..3ae921089d 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -369,10 +369,12 @@ void __fastcall UNPACK_V2(u32 *dest, T *data, int size) if(_vifRegs->offset == OFFSET_X && size > 0) { _UNPACKpart(_vifRegs->offset, *dest++, *data++); + size--; } if(_vifRegs->offset == OFFSET_Y && size > 0) { _UNPACKpart(_vifRegs->offset, *dest++, *data); + size--; } if(_vifRegs->offset == OFFSET_Z) { @@ -391,10 +393,12 @@ void __fastcall UNPACK_V3(u32 *dest, T *data, int size) if(_vifRegs->offset == OFFSET_X && size > 0) { _UNPACKpart(_vifRegs->offset, *dest++, *data++); + size--; } if(_vifRegs->offset == OFFSET_Y && size > 0) { _UNPACKpart(_vifRegs->offset, *dest++, *data++); + size--; } if(_vifRegs->offset == OFFSET_Z) { From ccacbedc50b575271d456524366efe4a12732015 Mon Sep 17 00:00:00 2001 From: refraction Date: Thu, 9 Apr 2009 23:20:13 +0000 Subject: [PATCH 23/40] last silly mistake, promise :P git-svn-id: http://pcsx2.googlecode.com/svn/trunk@938 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 3ae921089d..147f450188 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -400,7 +400,7 @@ void __fastcall UNPACK_V3(u32 *dest, T *data, int size) _UNPACKpart(_vifRegs->offset, *dest++, *data++); size--; } - if(_vifRegs->offset == OFFSET_Z) + if(_vifRegs->offset == OFFSET_Z && size > 0) { _UNPACKpart(_vifRegs->offset, *dest++, *data++); } From 98258eeffe83a5006006c29727940602cfbe6be3 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Thu, 9 Apr 2009 23:57:58 +0000 Subject: [PATCH 24/40] Yes, more Vif work. writeX, writeY, writeZ, and writeW are all merged into one function. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@939 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 334 +++++++++----------------------------------------- pcsx2/Vif.h | 90 ++++++++++++++ 2 files changed, 145 insertions(+), 279 deletions(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 147f450188..684927a591 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -44,7 +44,7 @@ enum UnpackOffset OFFSET_X = 0, OFFSET_Y = 1, OFFSET_Z = 2, - OFFSET_W =3 + OFFSET_W = 3 }; #define spr0 ((DMACh*)&PS2MEM_HW[0xD000]) @@ -54,25 +54,29 @@ __forceinline static int _limit(int a, int max) return (a > max) ? max : a; } -static __releaseinline void writeX(u32 &dest, u32 data) +static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) { int n; + u32 vifRowReg = getVifRowRegs(offnum); if (_vifRegs->code & 0x10000000) { switch (_vif->cl) { case 0: - n = (_vifRegs->mask) & 0x3; + if (offnum == OFFSET_X) + n = (_vifRegs->mask) & 0x3; + else + n = (_vifRegs->mask >> (offnum * 2)) & 0x3; break; case 1: - n = (_vifRegs->mask >> 8) & 0x3; + n = (_vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3; break; case 2: - n = (_vifRegs->mask >> 16) & 0x3; + n = (_vifRegs->mask >> (16 + (offnum * 2))) & 0x3; break; default: - n = (_vifRegs->mask >> 24) & 0x3; + n = (_vifRegs->mask >> (24 + (offnum * 2))) & 0x3; break; } } @@ -85,271 +89,48 @@ static __releaseinline void writeX(u32 &dest, u32 data) { dest = data; } - else if (_vifRegs->mode == 1) + else switch (_vifRegs->mode) { - dest = data + _vifRegs->r0; - } - else if (_vifRegs->mode == 2) - { - _vifRegs->r0 += data; - dest = _vifRegs->r0; - } - else - { - dest = data; + case 1: + dest = data + vifRowReg; + break; + case 2: + vifRowReg += data; + dest = vifRowReg; + break; + default: + dest = data; + break; } break; case 1: - dest = _vifRegs->r0; + dest = vifRowReg; break; case 2: - switch (_vif->cl) - { - case 0: - dest = _vifRegs->c0; - break; - case 1: - dest = _vifRegs->c1; - break; - case 2: - dest = _vifRegs->c2; - break; - default: - dest = _vifRegs->c3; - break; - } + if (_vif->cl > 2) + dest = getVifColRegs(3); + else + dest = getVifColRegs(_vif->cl); break; } + setVifRowRegs(offnum, vifRowReg); // VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r0,data); } -static __releaseinline void writeY(u32 &dest, u32 data) +template +static __releaseinline void _UNPACKpart(u32 offnum, u32 &x, T y, int size) { - int n; - - if (_vifRegs->code & 0x10000000) + if (size > 0) { - switch (_vif->cl) - { - case 0: - n = (_vifRegs->mask >> 2) & 0x3; - break; - case 1: - n = (_vifRegs->mask >> 10) & 0x3; - break; - case 2: - n = (_vifRegs->mask >> 18) & 0x3; - break; - default: - n = (_vifRegs->mask >> 26) & 0x3; - break; - } + writeXYZW(offnum, x, y); + _vifRegs->offset++; } - else n = 0; - - switch (n) - { - case 0: - if ((_vif->cmd & 0x6F) == 0x6f) - { - dest = data; - } - else if (_vifRegs->mode == 1) - { - dest = data + _vifRegs->r1; - } - else if (_vifRegs->mode == 2) - { - _vifRegs->r1 += data; - dest = _vifRegs->r1; - } - else - { - dest = data; - } - break; - case 1: - dest = _vifRegs->r1; - break; - case 2: - switch (_vif->cl) - { - case 0: - dest = _vifRegs->c0; - break; - case 1: - dest = _vifRegs->c1; - break; - case 2: - dest = _vifRegs->c2; - break; - default: - dest = _vifRegs->c3; - break; - } - break; - } -// VIF_LOG("writeY %8.8x : Mode %d, r1 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r1,data); -} - -static __releaseinline void writeZ(u32 &dest, u32 data) -{ - int n; - - if (_vifRegs->code & 0x10000000) - { - switch (_vif->cl) - { - case 0: - n = (_vifRegs->mask >> 4) & 0x3; - break; - case 1: - n = (_vifRegs->mask >> 12) & 0x3; - break; - case 2: - n = (_vifRegs->mask >> 20) & 0x3; - break; - default: - n = (_vifRegs->mask >> 28) & 0x3; - break; - } - } - else n = 0; - - switch (n) - { - case 0: - if ((_vif->cmd & 0x6F) == 0x6f) - { - dest = data; - } - else if (_vifRegs->mode == 1) - { - dest = data + _vifRegs->r2; - } - else if (_vifRegs->mode == 2) - { - _vifRegs->r2 += data; - dest = _vifRegs->r2; - } - else - { - dest = data; - } - break; - case 1: - dest = _vifRegs->r2; - break; - case 2: - switch (_vif->cl) - { - case 0: - dest = _vifRegs->c0; - break; - case 1: - dest = _vifRegs->c1; - break; - case 2: - dest = _vifRegs->c2; - break; - default: - dest = _vifRegs->c3; - break; - } - break; - } -// VIF_LOG("writeZ %8.8x : Mode %d, r2 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r2,data); -} - -static __releaseinline void writeW(u32 &dest, u32 data) -{ - int n; - - if (_vifRegs->code & 0x10000000) - { - switch (_vif->cl) - { - case 0: - n = (_vifRegs->mask >> 6) & 0x3; - break; - case 1: - n = (_vifRegs->mask >> 14) & 0x3; - break; - case 2: - n = (_vifRegs->mask >> 22) & 0x3; - break; - default: - n = (_vifRegs->mask >> 30) & 0x3; - break; - } - } - else n = 0; - - switch (n) - { - case 0: - if ((_vif->cmd & 0x6F) == 0x6f) - { - dest = data; - } - else if (_vifRegs->mode == 1) - { - dest = data + _vifRegs->r3; - } - else if (_vifRegs->mode == 2) - { - _vifRegs->r3 += data; - dest = _vifRegs->r3; - } - else - { - dest = data; - } - break; - case 1: - dest = _vifRegs->r3; - break; - case 2: - switch (_vif->cl) - { - case 0: - dest = _vifRegs->c0; - break; - case 1: - dest = _vifRegs->c1; - break; - case 2: - dest = _vifRegs->c2; - break; - default: - dest = _vifRegs->c3; - break; - } - break; - } -// VIF_LOG("writeW %8.8x : Mode %d, r3 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r3,data); } template -static void _UNPACKpart(u32 offnum, u32 &x, T y) +static __releaseinline void _UNPACKpart(u32 offnum, u32 &x, T y) { - switch (offnum) - { - case OFFSET_X: - writeX(x,y); - break; - case OFFSET_Y: - writeY(x,y); - break; - case OFFSET_Z: - writeZ(x,y); - break; - case OFFSET_W: - writeW(x,y); - break; - default: - break; - } + writeXYZW(offnum, x, y); _vifRegs->offset++; } @@ -357,24 +138,22 @@ template void __fastcall UNPACK_S(u32 *dest, T *data, int size) { //S-# will always be a complete packet, no matter what. So we can skip the offset bits - writeX(*dest++, *data); - writeY(*dest++, *data); - writeZ(*dest++, *data); - writeW(*dest , *data); + writeXYZW(OFFSET_X, *dest++, *data); + writeXYZW(OFFSET_Y, *dest++, *data); + writeXYZW(OFFSET_Z, *dest++, *data); + writeXYZW(OFFSET_W, *dest , *data); } template void __fastcall UNPACK_V2(u32 *dest, T *data, int size) { - if(_vifRegs->offset == OFFSET_X && size > 0) + if(_vifRegs->offset == OFFSET_X) { - _UNPACKpart(_vifRegs->offset, *dest++, *data++); - size--; + _UNPACKpart(_vifRegs->offset, *dest++, *data++, size--); } - if(_vifRegs->offset == OFFSET_Y && size > 0) + if(_vifRegs->offset == OFFSET_Y) { - _UNPACKpart(_vifRegs->offset, *dest++, *data); - size--; + _UNPACKpart(_vifRegs->offset, *dest++, *data, size--); } if(_vifRegs->offset == OFFSET_Z) { @@ -383,24 +162,22 @@ void __fastcall UNPACK_V2(u32 *dest, T *data, int size) if(_vifRegs->offset == OFFSET_W) { _UNPACKpart(_vifRegs->offset, *dest, *data); - _vifRegs->offset = 0; + _vifRegs->offset = OFFSET_X; } } template void __fastcall UNPACK_V3(u32 *dest, T *data, int size) { - if(_vifRegs->offset == OFFSET_X && size > 0) + if(_vifRegs->offset == OFFSET_X) { - _UNPACKpart(_vifRegs->offset, *dest++, *data++); - size--; + _UNPACKpart(_vifRegs->offset, *dest++, *data++, size--); } - if(_vifRegs->offset == OFFSET_Y && size > 0) + if(_vifRegs->offset == OFFSET_Y) { - _UNPACKpart(_vifRegs->offset, *dest++, *data++); - size--; + _UNPACKpart(_vifRegs->offset, *dest++, *data++, size--); } - if(_vifRegs->offset == OFFSET_Z && size > 0) + if(_vifRegs->offset == OFFSET_Z) { _UNPACKpart(_vifRegs->offset, *dest++, *data++); } @@ -409,7 +186,7 @@ void __fastcall UNPACK_V3(u32 *dest, T *data, int size) //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) //Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate _UNPACKpart(_vifRegs->offset, *dest, *data); - _vifRegs->offset = 0; + _vifRegs->offset = OFFSET_X; } } @@ -418,20 +195,19 @@ void __fastcall UNPACK_V4(u32 *dest, T *data , int size) { while (size > 0) { - _UNPACKpart(_vifRegs->offset, *dest++, *data++); - size--; + _UNPACKpart(_vifRegs->offset, *dest++, *data++, size--); } - if (_vifRegs->offset > OFFSET_W) _vifRegs->offset = 0; + if (_vifRegs->offset > OFFSET_W) _vifRegs->offset = OFFSET_X; } void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size) { //As with S-#, this will always be a complete packet - writeX(*dest++, ((*data & 0x001f) << 3)); - writeY(*dest++, ((*data & 0x03e0) >> 2)); - writeZ(*dest++, ((*data & 0x7c00) >> 7)); - writeW(*dest, ((*data & 0x8000) >> 8)); + writeXYZW(OFFSET_X, *dest++, ((*data & 0x001f) << 3)); + writeXYZW(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2)); + writeXYZW(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7)); + writeXYZW(OFFSET_W, *dest, ((*data & 0x8000) >> 8)); } void __fastcall UNPACK_S_32(u32 *dest, u32 *data, int size) diff --git a/pcsx2/Vif.h b/pcsx2/Vif.h index 90c3a6f105..eb26758fbf 100644 --- a/pcsx2/Vif.h +++ b/pcsx2/Vif.h @@ -87,6 +87,96 @@ extern "C" extern u32* _vifCol; } +__forceinline void setVifRowRegs(u32 reg, u32 data) +{ + switch (reg) + { + case 0: + _vifRegs->r0 = data; + break; + case 1: + _vifRegs->r1 = data; + break; + case 2: + _vifRegs->r2 = data; + break; + case 3: + _vifRegs->r3 = data; + break; + default: + assert(0); + break; + } +} + +__forceinline u32 getVifRowRegs(u32 reg) +{ + switch (reg) + { + case 0: + return _vifRegs->r0; + break; + case 1: + return _vifRegs->r1; + break; + case 2: + return _vifRegs->r2; + break; + case 3: + return _vifRegs->r3; + break; + default: + assert(0); + return 0; + break; + } +} + +__forceinline void setVifColRegs(u32 reg, u32 data) +{ + switch (reg) + { + case 0: + _vifRegs->c0 = data; + break; + case 1: + _vifRegs->c1 = data; + break; + case 2: + _vifRegs->c2 = data; + break; + case 3: + _vifRegs->c3 = data; + break; + default: + assert(0); + break; + } +} + +__forceinline u32 getVifColRegs(u32 reg) +{ + switch (reg) + { + case 0: + return _vifRegs->c0; + break; + case 1: + return _vifRegs->c1; + break; + case 2: + return _vifRegs->c2; + break; + case 3: + return _vifRegs->c3; + break; + default: + assert(0); + return 0; + break; + } +} + #define vif0Regs ((VIFregisters*)&PS2MEM_HW[0x3800]) #define vif1Regs ((VIFregisters*)&PS2MEM_HW[0x3c00]) From 19362d3c55729180c426cf0df9da0be19d15492c Mon Sep 17 00:00:00 2001 From: refraction Date: Fri, 10 Apr 2009 00:02:42 +0000 Subject: [PATCH 25/40] More unpack changes, tried to simplify a few sums a bit when processing skipping and a few other misc bits git-svn-id: http://pcsx2.googlecode.com/svn/trunk@940 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VifDma.cpp | 76 ++++++++++++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 28 deletions(-) diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 099470d993..f0aa2ec660 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -272,39 +272,39 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int switch (unpackType) { case 0x0: - vif->tag.addr += size * 4; + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing S-32 skip, size = %d", size); break; case 0x1: - vif->tag.addr += size * 8; + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing S-16 skip, size = %d", size); break; case 0x2: - vif->tag.addr += size * 16; + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing S-8 skip, size = %d", size); break; case 0x4: - vif->tag.addr += size + ((size / unpack->gsize) * 8); + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V2-32 skip, size = %d", size); break; case 0x5: - vif->tag.addr += (size * 2) + ((size / unpack->gsize) * 8); + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V2-16 skip, size = %d", size); break; case 0x6: - vif->tag.addr += (size * 4) + ((size / unpack->gsize) * 8); - VIFUNPACK_LOG("Processing V2-8 skip, size = %d", size); + vif->tag.addr += (size / unpack->gsize) * 16; + DevCon::Notice("Processing V2-8 skip, size = %d", params size); break; case 0x8: - vif->tag.addr += size + ((size / unpack->gsize) * 4); + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V3-32 skip, size = %d", size); break; case 0x9: - vif->tag.addr += (size * 2) + ((size / unpack->gsize) * 4); - VIFUNPACK_LOG("Processing V3-16 skip, size = %d", size); + vif->tag.addr += (size / unpack->gsize) * 16; + DevCon::Notice("Processing V3-16 skip, size = %d", params size); break; case 0xA: - vif->tag.addr += (size * 4) + ((size / unpack->gsize) * 4); + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V3-8 skip, size = %d", size); break; case 0xC: @@ -312,15 +312,15 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int VIFUNPACK_LOG("Processing V4-32 skip, size = %d, CL = %d, WL = %d", size, vif1Regs->cycle.cl, vif1Regs->cycle.wl); break; case 0xD: - vif->tag.addr += size * 2; + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V4-16 skip, size = %d", size); break; case 0xE: - vif->tag.addr += size * 4; + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V4-8 skip, size = %d", size); break; case 0xF: - vif->tag.addr += size * 8; + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V4-5 skip, size = %d", size); break; default: @@ -337,11 +337,6 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int VIFUNPACK_LOG("New addr %x CL %x WL %x", vif->tag.addr, vifRegs->cycle.cl, vifRegs->cycle.wl); } - - if ((vif->tag.addr & 0xf) == unpack->gsize) - { - vif->tag.addr += 16 - unpack->gsize; - } } static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) @@ -386,8 +381,8 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma dest = (u32*)(VU->Mem + v->addr); - VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x", - VIFdmanum, v->cmd & 0xf, v->size, size, v->addr); + VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x", + VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num); VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset); #ifdef _DEBUG @@ -427,18 +422,33 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma memsize = size; #endif + if (v->size != (size >> 2)) + ProcessMemSkip(size, unpackType, VIFdmanum); + + + if(vif->tag.addr > (u32)(VIFdmanum ? 0x4000 : 0x1000)) + { + //Sanity Check (memory overflow) + DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, vif->tag.addr, VIFdmanum ? 0x4000 : 0x1000); + + } + if (_vifRegs->offset > 0) { int destinc, unpacksize; + //This is just to make sure the alignment isnt loopy on a split packet + if(_vifRegs->offset != ((vif->tag.addr & 0xf) >> 2)) + { + DevCon::Error("Warning: Unpack alignment error"); + } + VIFUNPACK_LOG("Aligning packet size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr); if(((size / ft->dsize) + vifRegs->offset) < (u32)ft->qsize) VIFUNPACK_LOG("Warning! Size needed to align %x size chunks available %x offset %x", ft->qsize - ((size / ft->dsize) + vifRegs->offset), vifRegs->offset); // SSE doesn't handle such small data - if (v->size != (size >> 2)) - ProcessMemSkip(size, unpackType, VIFdmanum); - + if (vifRegs->offset < (u32)ft->qsize) { if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset)) @@ -473,11 +483,10 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma { dest += destinc; } + VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr); } - else if (v->size != (size >> 2)) - ProcessMemSkip(size, unpackType, VIFdmanum); if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write { @@ -614,6 +623,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma size -= ft->gsize; vifRegs->num--; + //if(vifRegs->num == loophere) dest = (u32*)(VU->Mem); ++vif->cl; if (vif->cl == vifRegs->cycle.wl) { @@ -624,6 +634,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma { dest += 4; } + } // have to update @@ -663,9 +674,10 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma //VIF_LOG("warning, end with size = %d", size); /* unpack one qword */ + vif->tag.addr += (size / ft->dsize) * 4; func(dest, (u32*)cdata, size / ft->dsize); size = 0; - + VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr); } @@ -674,8 +686,11 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma { VIF_LOG("VIFunpack - filling write"); + if((u32)(size / ft->gsize) < vifRegs->num && vifRegs->cycle.cl != 0) + DevCon::Notice("Filling write warning! Size < packet size and CL != 0"); + VIFUNPACK_LOG("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, unpackType); - while (size >= ft->gsize || vifRegs->num > 0) + while (vifRegs->num > 0) { if (vif->cl == vifRegs->cycle.wl) { @@ -693,6 +708,11 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma { vif->cl = 0; } + if(size < ft->gsize) + { + VIF_LOG("Out of Filling write data"); + break; + } } else { From f127f69b3e9fb108effefcb9f6413ad0ace18437 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Fri, 10 Apr 2009 01:34:04 +0000 Subject: [PATCH 26/40] A few tweaks to the unpacking code. _UNPACKPart isn't really neccessary anymore, and optimised writeXYZW a little. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@941 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 88 ++++++++++++++++++++++++++++----------------------- pcsx2/Vif.h | 29 +++++++---------- 2 files changed, 59 insertions(+), 58 deletions(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 684927a591..8157efef21 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -47,8 +47,6 @@ enum UnpackOffset OFFSET_W = 3 }; -#define spr0 ((DMACh*)&PS2MEM_HW[0xD000]) - __forceinline static int _limit(int a, int max) { return (a > max) ? max : a; @@ -95,8 +93,8 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) dest = data + vifRowReg; break; case 2: - vifRowReg += data; - dest = vifRowReg; + // vifRowReg isn't used after this, or I would make it equal to dest here. + dest = setVifRowRegs(offnum, vifRowReg + data); break; default: dest = data; @@ -107,33 +105,13 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) dest = vifRowReg; break; case 2: - if (_vif->cl > 2) - dest = getVifColRegs(3); - else - dest = getVifColRegs(_vif->cl); + dest = getVifColRegs((_vif->cl > 2) ? 3 : _vif->cl); break; + jNO_DEFAULT; } - setVifRowRegs(offnum, vifRowReg); // VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r0,data); } -template -static __releaseinline void _UNPACKpart(u32 offnum, u32 &x, T y, int size) -{ - if (size > 0) - { - writeXYZW(offnum, x, y); - _vifRegs->offset++; - } -} - -template -static __releaseinline void _UNPACKpart(u32 offnum, u32 &x, T y) -{ - writeXYZW(offnum, x, y); - _vifRegs->offset++; -} - template void __fastcall UNPACK_S(u32 *dest, T *data, int size) { @@ -147,21 +125,35 @@ void __fastcall UNPACK_S(u32 *dest, T *data, int size) template void __fastcall UNPACK_V2(u32 *dest, T *data, int size) { - if(_vifRegs->offset == OFFSET_X) + if (_vifRegs->offset == OFFSET_X) { - _UNPACKpart(_vifRegs->offset, *dest++, *data++, size--); + if (size > 0) + { + writeXYZW(_vifRegs->offset, *dest++, *data++); + _vifRegs->offset = OFFSET_Y; + size--; + } } - if(_vifRegs->offset == OFFSET_Y) + + if (_vifRegs->offset == OFFSET_Y) { - _UNPACKpart(_vifRegs->offset, *dest++, *data, size--); + if (size > 0) + { + writeXYZW(_vifRegs->offset, *dest++, *data); + _vifRegs->offset = OFFSET_Z; + size--; + } } - if(_vifRegs->offset == OFFSET_Z) + + if (_vifRegs->offset == OFFSET_Z) { - _UNPACKpart(_vifRegs->offset, *dest++, *dest-2); + writeXYZW(_vifRegs->offset, *dest++, *dest-2); + _vifRegs->offset = OFFSET_W; } - if(_vifRegs->offset == OFFSET_W) + + if (_vifRegs->offset == OFFSET_W) { - _UNPACKpart(_vifRegs->offset, *dest, *data); + writeXYZW(_vifRegs->offset, *dest, *data); _vifRegs->offset = OFFSET_X; } } @@ -171,21 +163,35 @@ void __fastcall UNPACK_V3(u32 *dest, T *data, int size) { if(_vifRegs->offset == OFFSET_X) { - _UNPACKpart(_vifRegs->offset, *dest++, *data++, size--); + if (size > 0) + { + writeXYZW(_vifRegs->offset, *dest++, *data++); + _vifRegs->offset = OFFSET_Y; + size--; + } } + if(_vifRegs->offset == OFFSET_Y) { - _UNPACKpart(_vifRegs->offset, *dest++, *data++, size--); + if (size > 0) + { + writeXYZW(_vifRegs->offset, *dest++, *data++); + _vifRegs->offset = OFFSET_Z; + size--; + } } + if(_vifRegs->offset == OFFSET_Z) { - _UNPACKpart(_vifRegs->offset, *dest++, *data++); + writeXYZW(_vifRegs->offset, *dest++, *data++); + _vifRegs->offset = OFFSET_W; } - if(_vifRegs->offset == OFFSET_W) + + if(_vifRegs->offset == OFFSET_W) { //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) //Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate - _UNPACKpart(_vifRegs->offset, *dest, *data); + writeXYZW(_vifRegs->offset, *dest, *data); _vifRegs->offset = OFFSET_X; } } @@ -195,7 +201,9 @@ void __fastcall UNPACK_V4(u32 *dest, T *data , int size) { while (size > 0) { - _UNPACKpart(_vifRegs->offset, *dest++, *data++, size--); + writeXYZW(_vifRegs->offset, *dest++, *data++); + _vifRegs->offset++; + size--; } if (_vifRegs->offset > OFFSET_W) _vifRegs->offset = OFFSET_X; diff --git a/pcsx2/Vif.h b/pcsx2/Vif.h index eb26758fbf..fa318d7618 100644 --- a/pcsx2/Vif.h +++ b/pcsx2/Vif.h @@ -87,7 +87,7 @@ extern "C" extern u32* _vifCol; } -__forceinline void setVifRowRegs(u32 reg, u32 data) +static __forceinline u32 setVifRowRegs(u32 reg, u32 data) { switch (reg) { @@ -103,13 +103,12 @@ __forceinline void setVifRowRegs(u32 reg, u32 data) case 3: _vifRegs->r3 = data; break; - default: - assert(0); - break; + jNO_DEFAULT; } + return data; } -__forceinline u32 getVifRowRegs(u32 reg) +static __forceinline u32 getVifRowRegs(u32 reg) { switch (reg) { @@ -125,14 +124,11 @@ __forceinline u32 getVifRowRegs(u32 reg) case 3: return _vifRegs->r3; break; - default: - assert(0); - return 0; - break; + jNO_DEFAULT; } } -__forceinline void setVifColRegs(u32 reg, u32 data) +static __forceinline u32 setVifColRegs(u32 reg, u32 data) { switch (reg) { @@ -148,13 +144,12 @@ __forceinline void setVifColRegs(u32 reg, u32 data) case 3: _vifRegs->c3 = data; break; - default: - assert(0); - break; + jNO_DEFAULT; } + return data; } -__forceinline u32 getVifColRegs(u32 reg) +static __forceinline u32 getVifColRegs(u32 reg) { switch (reg) { @@ -170,15 +165,13 @@ __forceinline u32 getVifColRegs(u32 reg) case 3: return _vifRegs->c3; break; - default: - assert(0); - return 0; - break; + jNO_DEFAULT; } } #define vif0Regs ((VIFregisters*)&PS2MEM_HW[0x3800]) #define vif1Regs ((VIFregisters*)&PS2MEM_HW[0x3c00]) +#define spr0 ((DMACh*)&PS2MEM_HW[0xD000]) void dmaVIF0(); void dmaVIF1(); From b1769a2061e22e7bc59ce5842fd631099ac9886a Mon Sep 17 00:00:00 2001 From: arcum42 Date: Fri, 10 Apr 2009 01:50:31 +0000 Subject: [PATCH 27/40] Take care of Issue 139. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@942 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/ix86.cpp | 2 +- pcsx2/x86/ix86/ix86_types.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index a13c7b34a7..f62cabf789 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -41,7 +41,7 @@ XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT }; namespace x86Emitter { -x86IndexerType ptr; +const x86IndexerType ptr; ////////////////////////////////////////////////////////////////////////////////////////// // diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index 0ab5d6bcf9..ede42f92af 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -365,10 +365,12 @@ namespace x86Emitter { return ModSib( (uptr)src ); } + + x86IndexerType() {} }; // ------------------------------------------------------------------------ - extern x86IndexerType ptr; + extern const x86IndexerType ptr; extern const x86Register32 eax; extern const x86Register32 ebx; From b0da55cb891d0a444ad2526b99312d275b49847d Mon Sep 17 00:00:00 2001 From: gabest11 Date: Fri, 10 Apr 2009 07:12:29 +0000 Subject: [PATCH 28/40] GSdx: this should probably fix taking snapshots with dx9, also upped the version to .15, since the revision number has passed what the last release still had from the old repository. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@943 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GS.h | 2 +- plugins/GSdx/GSRendererHW.h | 4 ++-- plugins/GSdx/GSTexture9.cpp | 18 +++++++----------- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/plugins/GSdx/GS.h b/plugins/GSdx/GS.h index 388af54658..f670531aa4 100644 --- a/plugins/GSdx/GS.h +++ b/plugins/GSdx/GS.h @@ -26,7 +26,7 @@ #pragma once -#define PLUGIN_VERSION 14 +#define PLUGIN_VERSION 15 #include "GSVector.h" diff --git a/plugins/GSdx/GSRendererHW.h b/plugins/GSdx/GSRendererHW.h index 2ad320f9e8..e343852712 100644 --- a/plugins/GSdx/GSRendererHW.h +++ b/plugins/GSdx/GSRendererHW.h @@ -349,8 +349,6 @@ protected: OverrideOutput(); - m_tc->InvalidateTextures(context->FRAME, context->ZBUF); - if(s_dump) { CString str; @@ -360,6 +358,8 @@ protected: if(s_savez) ds->m_texture.Save(str); // if(s_savez) m_dev.SaveToFileD32S8X24(ds->m_texture, str); // TODO } + + m_tc->InvalidateTextures(context->FRAME, context->ZBUF); } virtual void Draw(int prim, Texture& rt, Texture& ds, typename GSTextureCache::GSTexture* tex) = 0; diff --git a/plugins/GSdx/GSTexture9.cpp b/plugins/GSdx/GSTexture9.cpp index 8d75cc2845..bff9df141c 100644 --- a/plugins/GSdx/GSTexture9.cpp +++ b/plugins/GSdx/GSTexture9.cpp @@ -140,8 +140,8 @@ void GSTexture9::Unmap() bool GSTexture9::Save(CString fn, bool dds) { - CComPtr res; - + CComPtr surface; + if(m_desc.Usage & D3DUSAGE_DEPTHSTENCIL) { HRESULT hr; @@ -153,8 +153,6 @@ bool GSTexture9::Save(CString fn, bool dds) if(desc.Format != D3DFMT_D32F_LOCKABLE) return false; - CComPtr surface; - hr = m_dev->CreateOffscreenPlainSurface(desc.Width, desc.Height, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &surface, NULL); D3DLOCKED_RECT slr, dlr; @@ -175,24 +173,22 @@ bool GSTexture9::Save(CString fn, bool dds) m_surface->UnlockRect(); surface->UnlockRect(); - - res = surface; } else { - res = m_surface; + surface = m_surface; } - if(CComQIPtr surface = res) + if(surface != NULL) { return SUCCEEDED(D3DXSaveSurfaceToFile(fn, dds ? D3DXIFF_DDS : D3DXIFF_BMP, surface, NULL, NULL)); } - - if(CComQIPtr texture = res) +/* + if(CComQIPtr texture = surface) { return SUCCEEDED(D3DXSaveTextureToFile(fn, dds ? D3DXIFF_DDS : D3DXIFF_BMP, texture, NULL)); } - +*/ return false; } From 432b060109a3b7da0ad8566bbc75424b1d3ef842 Mon Sep 17 00:00:00 2001 From: refraction Date: Fri, 10 Apr 2009 13:08:19 +0000 Subject: [PATCH 29/40] Fix for one small bug, doesnt fix tekken 5 tho :( git-svn-id: http://pcsx2.googlecode.com/svn/trunk@944 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 8157efef21..48efdf60fc 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -183,8 +183,12 @@ void __fastcall UNPACK_V3(u32 *dest, T *data, int size) if(_vifRegs->offset == OFFSET_Z) { - writeXYZW(_vifRegs->offset, *dest++, *data++); - _vifRegs->offset = OFFSET_W; + if (size > 0) + { + writeXYZW(_vifRegs->offset, *dest++, *data++); + _vifRegs->offset = OFFSET_W; + size--; + } } if(_vifRegs->offset == OFFSET_W) From e1bf40546f434180c5482de4d0b273ec19a29e39 Mon Sep 17 00:00:00 2001 From: refraction Date: Fri, 10 Apr 2009 13:15:52 +0000 Subject: [PATCH 30/40] Why this broke Tekken 5 i don't know! (answers on a postcard) anyhow, fixed :) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@945 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 48efdf60fc..f20103e72d 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -107,7 +107,9 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) case 2: dest = getVifColRegs((_vif->cl > 2) ? 3 : _vif->cl); break; - jNO_DEFAULT; + case 3: + //Masked so don't do anything + break; } // VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r0,data); } From 756127d09602cadc61634f207bbea473a89849e2 Mon Sep 17 00:00:00 2001 From: refraction Date: Fri, 10 Apr 2009 13:29:20 +0000 Subject: [PATCH 31/40] Fixed recently discovered bug from VIF which could have potentially happened anywhere jNODEFAULT is used (nobody noticed lol) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@946 96395faa-99c1-11dd-bbfe-3dabce05a288 --- common/include/PS2Etypes.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/common/include/PS2Etypes.h b/common/include/PS2Etypes.h index 705d22aa46..6f652e8736 100644 --- a/common/include/PS2Etypes.h +++ b/common/include/PS2Etypes.h @@ -55,8 +55,6 @@ // disable the default case in a switch #define jNO_DEFAULT \ { \ - break; \ - \ default: \ jASSUME(0); \ break; \ From 8738f75ba72296e1aa3dfc354f750e5f7c361a07 Mon Sep 17 00:00:00 2001 From: refraction Date: Fri, 10 Apr 2009 13:42:37 +0000 Subject: [PATCH 32/40] Fixed alignment problems noticed in Digital Devil Saga git-svn-id: http://pcsx2.googlecode.com/svn/trunk@947 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VifDma.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index f0aa2ec660..4d2ec15a8c 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -336,7 +336,9 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int vif->tag.addr += (size / (unpack->gsize*vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl)*16); VIFUNPACK_LOG("New addr %x CL %x WL %x", vif->tag.addr, vifRegs->cycle.cl, vifRegs->cycle.wl); } - + + //This is sorted out later + vif->tag.addr &= ~0xf; } static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) From 6775b8a5cc047ac9267d119780273b92d632e9a9 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Fri, 10 Apr 2009 19:45:27 +0000 Subject: [PATCH 33/40] microVU: - added microVU_Execution.inl - dispatcher stuff is now recompiled with pcsx2's emitter instead of using inline asm, its cleaner than inline asm and its more portable since the asm won't have to be ported to GCC. - lots of first-pass implementation for lower opcodes - implemented documented branch behavior (first pass stuff only) Note: theres some undocumented stuff branches do according to Nneeve's tests, but i won't implement those for now since 99% of games shouldn't need it, and according to the tests, the behavior seems kind-of random/erratic. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@948 96395faa-99c1-11dd-bbfe-3dabce05a288 --- common/include/Pcsx2Config.h | 3 +- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 4 + pcsx2/x86/iVU0micro.cpp | 2 + pcsx2/x86/iVU1micro.cpp | 2 + pcsx2/x86/ix86/ix86_cpudetect.cpp | 1 + pcsx2/x86/microVU.cpp | 101 ++------------------- pcsx2/x86/microVU.h | 29 +++--- pcsx2/x86/microVU_Alloc.inl | 18 ++-- pcsx2/x86/microVU_Analyze.inl | 57 +++++++++++- pcsx2/x86/microVU_Compile.inl | 14 +-- pcsx2/x86/microVU_Lower.inl | 49 +++++++--- pcsx2/x86/microVU_Misc.h | 31 +++++-- 12 files changed, 154 insertions(+), 157 deletions(-) diff --git a/common/include/Pcsx2Config.h b/common/include/Pcsx2Config.h index d99f47debe..2da21f9f11 100644 --- a/common/include/Pcsx2Config.h +++ b/common/include/Pcsx2Config.h @@ -44,7 +44,8 @@ extern SessionOverrideFlags g_Session; ////////////////////////////////////////////////////////////////////////// // Pcsx2 User Configuration Options! -//#define PCSX2_MICROVU // Use Micro VU recs instead of Zero VU Recs +//#define PCSX2_MICROVU // Use Micro VU recs instead of Zero VU Recs +//#define PCSX2_MICROVU_ // Fully enable Micro VU recs (temporary option for now) #define PCSX2_GSMULTITHREAD 1 // uses multi-threaded gs #define PCSX2_EEREC 0x10 #define PCSX2_VU0REC 0x20 diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index b013011d39..ba2df10aca 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -2506,6 +2506,10 @@ RelativePath="..\..\x86\microVU_Compile.inl" > + + diff --git a/pcsx2/x86/iVU0micro.cpp b/pcsx2/x86/iVU0micro.cpp index 2dfaac7669..55af7a5c54 100644 --- a/pcsx2/x86/iVU0micro.cpp +++ b/pcsx2/x86/iVU0micro.cpp @@ -83,8 +83,10 @@ namespace VU0micro if((VU0.VI[REG_VPU_STAT].UL & 1) == 0) return; FreezeXMMRegs(1); + FreezeMMXRegs(1); runVUrec(VU0.VI[REG_TPC].UL & 0xfff, 0xffffffff, 0); FreezeXMMRegs(0); + FreezeMMXRegs(0); } } diff --git a/pcsx2/x86/iVU1micro.cpp b/pcsx2/x86/iVU1micro.cpp index 87f3099d7b..7debdcfa8e 100644 --- a/pcsx2/x86/iVU1micro.cpp +++ b/pcsx2/x86/iVU1micro.cpp @@ -142,8 +142,10 @@ namespace VU1micro assert( (VU1.VI[REG_TPC].UL&7) == 0 ); FreezeXMMRegs(1); + FreezeMMXRegs(0); runVUrec(VU1.VI[REG_TPC].UL & 0x3fff, 0xffffffff, 1); FreezeXMMRegs(0); + FreezeMMXRegs(0); } } #endif diff --git a/pcsx2/x86/ix86/ix86_cpudetect.cpp b/pcsx2/x86/ix86/ix86_cpudetect.cpp index 7e3323e4a7..b1fc04a96e 100644 --- a/pcsx2/x86/ix86/ix86_cpudetect.cpp +++ b/pcsx2/x86/ix86/ix86_cpudetect.cpp @@ -398,6 +398,7 @@ void cpudetectInit() cpudetectSSE3(recSSE); HostSys::Munmap( recSSE, 0x1000 ); } + else { Console::Error("Error: Failed to allocate memory for SSE3 State detection."); } ////////////////////////////////////// // Core Counting! diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 0e537472e6..d2aef9b3bd 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -64,7 +64,12 @@ microVUt(void) mVUreset() { // Dynarec Cache mVU->cache = SysMmapEx(mVU->cacheAddr, mVU->cacheSize, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache)); - + mVU->ptr = mVU->cache; + + // Setup Entrance/Exit Points + mVUdispatcherA(); + mVUdispatcherB(); + // Other Variables memset(&mVU->prog, 0, sizeof(mVU->prog)); mVU->prog.finished = 1; @@ -99,33 +104,6 @@ microVUt(void) mVUclear(u32 addr, u32 size) { // that its probably not worth it... } -// Executes for number of cycles -microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) { -/* - Pseudocode: (ToDo: implement # of cycles) - 1) Search for existing program - 2) If program not found, goto 5 - 3) Search for recompiled block - 4) If recompiled block found, goto 6 - 5) Recompile as much blocks as possible - 6) Return start execution address of block -*/ - microVU* mVU = mVUx; - if ( mVUsearchProg(mVU) ) { // Found Program - //microBlock* block = mVU->prog.prog[mVU->prog.cur].block[startPC]->search(mVU->prog.lastPipelineState); - //if (block) return block->x86ptrStart; // Found Block - } - // Recompile code - return NULL; -} - -void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles) { - return mVUexecute<0>(startPC, cycles); -} -void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles) { - return mVUexecute<1>(startPC, cycles); -} - //------------------------------------------------------------------ // Micro VU - Private Functions //------------------------------------------------------------------ @@ -206,73 +184,6 @@ __forceinline void mVUinvalidateBlock(microVU* mVU, u32 addr, u32 size) { } } -//------------------------------------------------------------------ -// Dispatcher Functions -//------------------------------------------------------------------ - -#ifdef _MSC_VER -// Runs VU0 for number of cycles -__declspec(naked) void __fastcall startVU0(u32 startPC, u32 cycles) { - __asm { - // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. - call mVUexecuteVU0 - - /*backup cpu state*/ - push ebx; - push ebp; - push esi; - push edi; - - ldmxcsr g_sseVUMXCSR - /* Should set xmmZ? */ - jmp eax - } -} - -// Runs VU1 for number of cycles -__declspec(naked) void __fastcall startVU1(u32 startPC, u32 cycles) { - __asm { - - call mVUexecuteVU1 - - /*backup cpu state*/ - push ebx; - push ebp; - push esi; - push edi; - - ldmxcsr g_sseVUMXCSR - - jmp eax - } -} - -// Exit point -__declspec(naked) void __fastcall endVU0(u32 startPC, u32 cycles) { - __asm { - - //call mVUcleanUpVU0 - - /*restore cpu state*/ - pop edi; - pop esi; - pop ebp; - pop ebx; - - ldmxcsr g_sseMXCSR - emms - - ret - } -} -#else -extern "C" { - extern void __fastcall startVU0(u32 startPC, u32 cycles); - extern void __fastcall startVU1(u32 startPC, u32 cycles); - extern void __fastcall endVU0(u32 startPC, u32 cycles); -} -#endif - //------------------------------------------------------------------ // Wrapper Functions - Called by other parts of the Emu //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 049520d255..41412f70bf 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -125,17 +125,7 @@ struct microVU { u32 iReg; // iReg (only used in recompilation, not execution) u32 clipFlag[4]; // 4 instances of clip flag (used in execution) u32 divFlag; // 1 instance of I/D flags - -/* - uptr x86eax; // Accumulator register. Used in arithmetic operations. - uptr x86ecx; // Counter register. Used in shift/rotate instructions. - uptr x86edx; // Data register. Used in arithmetic operations and I/O operations. - uptr x86ebx; // Base register. Used as a pointer to data (located in DS in segmented mode). - uptr x86esp; // Stack Pointer register. Pointer to the top of the stack. - uptr x86ebp; // Stack Base Pointer register. Used to point to the base of the stack. - uptr x86esi; // Source register. Used as a pointer to a source in stream operations. - uptr x86edi; // Destination register. Used as a pointer to a destination in stream operations. -*/ + u32 VIbackup[2]; // Holds a backup of a VI reg if modified before a branch }; // microVU rec structs @@ -146,14 +136,24 @@ extern PCSX2_ALIGNED16(microVU microVU1); extern void (*mVU_UPPER_OPCODE[64])( VURegs* VU, s32 info ); extern void (*mVU_LOWER_OPCODE[128])( VURegs* VU, s32 info ); +// Main Functions +microVUt(void) mVUinit(VURegs*); +microVUt(void) mVUreset(); +microVUt(void) mVUclose(); +microVUt(void) mVUclear(u32, u32); + +// Private Functions __forceinline void mVUclearProg(microVU* mVU, int progIndex); __forceinline int mVUfindLeastUsedProg(microVU* mVU); __forceinline int mVUsearchProg(microVU* mVU); __forceinline void mVUcacheProg(microVU* mVU, int progIndex); +void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles); +void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles); -#ifdef __LINUX__ -microVUt(void) mVUreset(); -microVUt(void) mVUclose(); +#ifndef __LINUX__ +typedef void (__fastcall *mVUrecCall)(u32, u32); +#else +typedef void (*mVUrecCall)(u32, u32) __attribute__((__fastcall)); // Not sure if this is correct syntax (should be close xD) #endif // Include all the *.inl files (Needed because C++ sucks with templates and *.cpp files) @@ -162,3 +162,4 @@ microVUt(void) mVUclose(); #include "microVU_Alloc.inl" #include "microVU_Tables.inl" #include "microVU_Compile.inl" +#include "microVU_Execute.inl" diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index 3316d7360c..dee76e8f92 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -708,19 +708,7 @@ microVUt(void) mVUallocCFLAGb(int reg, int fInstance) { microVU* mVU = mVUx; MOV32RtoM(mVU->clipFlag[fInstance], reg); } -/* -microVUt(void) mVUallocDFLAGa(int reg) { - microVU* mVU = mVUx; - //if (!mVUdivFlag) { MOV32MtoR(reg, (uptr)&mVU->divFlag[readQ]); AND32ItoR(reg, 0xc00); } - //else if (mVUdivFlag & 1) { XOR32RtoR(reg, reg); } - //else { MOV32ItoR(reg, (u32)((mVUdivFlag << 9) & 0xc00)); } -} -microVUt(void) mVUallocDFLAGb(int reg) { - microVU* mVU = mVUx; - //MOV32RtoM((uptr)&mVU->divFlag[writeQ], reg); -} -*/ //------------------------------------------------------------------ // VI Reg Allocators //------------------------------------------------------------------ @@ -734,6 +722,12 @@ microVUt(void) mVUallocVIa(int GPRreg, int _reg_) { microVUt(void) mVUallocVIb(int GPRreg, int _reg_) { microVU* mVU = mVUx; + if (backupVI) { // Backs up reg to memory (used when VI is modified b4 a branch) + MOV32RtoM((uptr)&mVU->VIbackup[1], GPRreg); + mVUallocVIa(GPRreg, _reg_); + MOV32RtoM((uptr)&mVU->VIbackup[0], GPRreg); + MOV32MtoR(GPRreg, (uptr)&mVU->VIbackup[1]); + } if (_reg_ == 0) { return; } else if (_reg_ < 9) { MOVD32RtoMMX(mmVI(_reg_), GPRreg); } else { MOV16RtoM((uptr)&mVU->regs->VI[_reg_].UL, GPRreg); } diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index dd5918cdf0..6768153aee 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -143,6 +143,32 @@ microVUt(void) mVUanalyzeEFU2(int Fs, u8 xCycles) { analyzePreg(xCycles); } +//------------------------------------------------------------------ +// LQx - LQ/LQD/LQI Opcodes +//------------------------------------------------------------------ + +#define analyzeVIreg1(reg) { if (reg) { mVUstall = aMax(mVUstall, mVUregs.VI[reg]); } } +#define analyzeVIreg2(reg, aCycles) { if (reg) { mVUregsTemp.VIreg = reg; mVUregsTemp.VI = aCycles; mVUinfo |= _writesVI; mVU->VIbackup[0] = reg; } } + +microVUt(void) mVUanalyzeLQ(int Ft, int Is, bool writeIs) { + microVU* mVU = mVUx; + analyzeVIreg1(Is); + analyzeReg2(Ft); + if (!Ft) { mVUinfo |= (writeIs && Is) ? _noWriteVF : _isNOP; } + if (writeIs) { analyzeVIreg2(Is, 1); } +} + +//------------------------------------------------------------------ +// SQx - SQ/SQD/SQI Opcodes +//------------------------------------------------------------------ + +microVUt(void) mVUanalyzeSQ(int Fs, int It, bool writeIt) { + microVU* mVU = mVUx; + analyzeReg1(Fs); + analyzeVIreg1(It); + if (writeIt) { analyzeVIreg2(It, 1); } +} + //------------------------------------------------------------------ // R*** - R Reg Opcodes //------------------------------------------------------------------ @@ -166,9 +192,6 @@ microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) { // Sflag - Status Flag Opcodes //------------------------------------------------------------------ -#define analyzeVIreg1(reg) { if (reg) { mVUstall = aMax(mVUstall, mVUregs.VI[reg]); } } -#define analyzeVIreg2(reg, aCycles) { if (reg) { mVUregsTemp.VIreg = reg; mVUregsTemp.VI = aCycles; } } - microVUt(void) mVUanalyzeSflag(int It) { microVU* mVU = mVUx; if (!It) { mVUinfo |= _isNOP; return; } @@ -190,4 +213,32 @@ microVUt(void) mVUanalyzeXGkick(int Fs, int xCycles) { analyzeXGkick2(xCycles); } +//------------------------------------------------------------------ +// Branches - Branch Opcodes +//------------------------------------------------------------------ + +#define analyzeBranchVI(reg, infoVal) { \ + if (reg && (mVUcycles > 1)) { /* Ensures branch is not first opcode in block */ \ + incPC(-2); \ + if (writesVI && (reg == mVU->VIbackup[0])) { /* If prev Op modified VI reg */ \ + mVUinfo |= _backupVI; \ + incPC(2); \ + mVUinfo |= infoVal; \ + } \ + else { incPC(2); } \ + } \ +} + +microVUt(void) mVUanalyzeBranch1(int Is) { + microVU* mVU = mVUx; + if (mVUregs.VI[Is]) { analyzeVIreg1(Is); } + else { analyzeBranchVI(Is, _memReadIs); } +} + +microVUt(void) mVUanalyzeBranch2(int Is, int It) { + microVU* mVU = mVUx; + if (mVUregs.VI[Is] || mVUregs.VI[It]) { analyzeVIreg1(Is); analyzeVIreg1(It); } + else { analyzeBranchVI(Is, _memReadIs); analyzeBranchVI(It, _memReadIt);} +} + #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 554cf2612b..c9a8217641 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -29,11 +29,7 @@ } \ } -#define curI mVUcurProg.data[iPC] -#define setCode() { mVU->code = curI; } -#define startLoop() { mVUdebugStuff1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); } -#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); } -#define incCycles(x) { mVUincCycles(x); } +#define startLoop() { mVUdebug1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); } #define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); } microVUt(void) mVUincCycles(int x) { @@ -115,10 +111,10 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, if (isEOB) { x = 0; } else if (isBranch) { mVUopU(); incPC(2); } - mVUopU(); - if (isNop) { if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } } - else { incPC(1); mVUopL(); } - + if (isNop) { mVUopU(); if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } } + else if (!swapOps) { mVUopU(); incPC(1); mVUopL(); } + else { incPC(1); mVUopL(); incPC(-1); mVUopU(); incPC(1); } + if (!isBdelay) { incPC(1); } else { incPC(-2); // Go back to Branch Opcode diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 5ac22e06e7..18e73ebde7 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -799,7 +799,7 @@ microVUf(void) mVU_ISWR() { microVUf(void) mVU_LQ() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_) nop();*/ } + if (!recPass) { mVUanalyzeLQ(_Ft_, _Fs_, 0); } else { if (!_Fs_) { mVUloadReg(xmmFt, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W); @@ -817,9 +817,9 @@ microVUf(void) mVU_LQ() { microVUf(void) mVU_LQD() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeLQ(_Ft_, _Fs_, 1); } else { - if (!_Fs_ && _Ft_ && !noWriteVF) { + if (!_Fs_ && !noWriteVF) { mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } @@ -827,7 +827,7 @@ microVUf(void) mVU_LQD() { mVUallocVIa(gprT1, _Fs_); SUB16ItoR(gprT1, 1); mVUallocVIb(gprT1, _Fs_); // ToDo: Backup to memory check. - if (_Ft_ && !noWriteVF) { + if (!noWriteVF) { mVUaddrFix(gprT1); mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); @@ -838,15 +838,15 @@ microVUf(void) mVU_LQD() { microVUf(void) mVU_LQI() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeLQ(_Ft_, _Fs_, 1); } else { - if (!_Fs_ && _Ft_ && !noWriteVF) { + if (!_Fs_ && !noWriteVF) { mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } else { mVUallocVIa((_Ft_) ? gprT1 : gprT2, _Fs_); - if (_Ft_ && !noWriteVF) { + if (!noWriteVF) { MOV32RtoR(gprT2, gprT1); mVUaddrFix(gprT1); mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); @@ -864,7 +864,7 @@ microVUf(void) mVU_LQI() { microVUf(void) mVU_SQ() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeSQ(_Fs_, _Ft_, 0); } else { if (!_Ft_) { getReg7(xmmFs, _Fs_); @@ -882,7 +882,7 @@ microVUf(void) mVU_SQ() { microVUf(void) mVU_SQD() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeSQ(_Fs_, _Ft_, 1); } else { if (!_Ft_) { getReg7(xmmFs, _Fs_); @@ -901,7 +901,7 @@ microVUf(void) mVU_SQD() { microVUf(void) mVU_SQI() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeSQ(_Fs_, _Ft_, 1); } else { if (!_Ft_) { getReg7(xmmFs, _Fs_); @@ -1042,8 +1042,10 @@ microVUf(void) mVU_XGKICK() { if (!recPass) { mVUanalyzeXGkick(_Fs_, 4); } else { mVUallocVIa(gprT2, _Fs_); // gprT2 = ECX for __fastcall + PUSH32R(gprR); // gprR = EDX is volatile so backup if (!vuIndex) CALLFunc((uptr)mVU_XGKICK0); else CALLFunc((uptr)mVU_XGKICK1); + POP32R(gprR); // Restore } } @@ -1058,7 +1060,8 @@ microVUf(void) mVU_B() { microVUf(void) mVU_BAL() { microVU* mVU = mVUx; mVUbranch = 1; - if (recPass) { + if (!recPass) { analyzeVIreg2(_Ft_, 1); } + else { MOV32ItoR(gprT1, (xPC + (2 * 8)) & 0xffff); mVUallocVIb(gprT1, _Ft_); } @@ -1066,34 +1069,50 @@ microVUf(void) mVU_BAL() { microVUf(void) mVU_IBEQ() { microVU* mVU = mVUx; mVUbranch = 2; + if (!recPass) { mVUanalyzeBranch2(_Fs_, _Ft_); } + else {} } microVUf(void) mVU_IBGEZ() { microVU* mVU = mVUx; mVUbranch = 2; + if (!recPass) { mVUanalyzeBranch1(_Fs_); } + else {} } microVUf(void) mVU_IBGTZ() { microVU* mVU = mVUx; mVUbranch = 2; -} -microVUf(void) mVU_IBLTZ() { - microVU* mVU = mVUx; - mVUbranch = 2; + if (!recPass) { mVUanalyzeBranch1(_Fs_); } + else {} } microVUf(void) mVU_IBLEZ() { microVU* mVU = mVUx; mVUbranch = 2; + if (!recPass) { mVUanalyzeBranch1(_Fs_); } + else {} +} +microVUf(void) mVU_IBLTZ() { + microVU* mVU = mVUx; + mVUbranch = 2; + if (!recPass) { mVUanalyzeBranch1(_Fs_); } + else {} } microVUf(void) mVU_IBNE() { microVU* mVU = mVUx; mVUbranch = 2; + if (!recPass) { mVUanalyzeBranch2(_Fs_, _Ft_); } + else {} } microVUf(void) mVU_JR() { microVU* mVU = mVUx; mVUbranch = 3; + if (!recPass) { mVUanalyzeBranch1(_Fs_); } + else {} } microVUf(void) mVU_JALR() { microVU* mVU = mVUx; mVUbranch = 3; + if (!recPass) { mVUanalyzeBranch1(_Fs_); analyzeVIreg2(_Ft_, 1); } + else {} } #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index dd52f1b489..5a1267a556 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -144,14 +144,16 @@ declareAllVariables #define mVUbranch mVUallocInfo.branch #define mVUcycles mVUallocInfo.cycles #define mVUstall mVUallocInfo.maxStall -//#define mVUdivFlag mVUallocInfo.divFlag -//#define mVUdivFlagT mVUallocInfo.divFlagTimer #define mVUregs mVUallocInfo.regs #define mVUregsTemp mVUallocInfo.regsTemp #define mVUinfo mVUallocInfo.info[mVUallocInfo.curPC / 2] #define mVUstartPC mVUallocInfo.startPC #define iPC mVUallocInfo.curPC #define xPC ((iPC / 2) * 8) +#define curI mVUcurProg.data[iPC] +#define setCode() { mVU->code = curI; } +#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); } +#define incCycles(x) { mVUincCycles(x); } #define _isNOP (1<<0) // Skip Lower Instruction #define _isBranch (1<<1) // Cur Instruction is a Branch @@ -174,6 +176,11 @@ declareAllVariables #define _fvsInstance (3<<18) #define _fvcInstance (3<<20) #define _noWriteVF (1<<21) // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0) +#define _backupVI (1<<22) // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR) +#define _memReadIs (1<<23) // Read Is (VI reg) from memory (used by branches) +#define _memReadIt (1<<24) // Read If (VI reg) from memory (used by branches) +#define _writesVI (1<<25) // Current Instruction writes to VI +#define _swapOps (1<<26) // Runs Lower Instruction Before Upper Instruction #define isNOP (mVUinfo & (1<<0)) #define isBranch (mVUinfo & (1<<1)) @@ -196,16 +203,18 @@ declareAllVariables #define fvsInstance ((mVUinfo >> 18) & 3) #define fvcInstance ((mVUinfo >> 20) & 3) #define noWriteVF (mVUinfo & (1<<21)) - -//#define getFs (mVUinfo & (1<<13)) -//#define getFt (mVUinfo & (1<<14)) -//#define fpmInstance (((u8)((mVUinfo & (3<<10)) >> 10) - 1) & 0x3) +#define backupVI (mVUinfo & (1<<22)) +#define memReadIs (mVUinfo & (1<<23)) +#define memReadIt (mVUinfo & (1<<24)) +#define writesVI (mVUinfo & (1<<25)) +#define swapOps (mVUinfo & (1<<26)) #define isMMX(_VIreg_) (_VIreg_ >= 1 && _VIreg_ <=9) #define mmVI(_VIreg_) (_VIreg_ - 1) #ifdef mVUdebug -#define mVUdebugStuff1() { \ +#define mVUlog Console::Notice +#define mVUdebug1() { \ if (curI & _Ibit_) { SysPrintf("microVU: I-bit set!\n"); } \ if (curI & _Ebit_) { SysPrintf("microVU: E-bit set!\n"); } \ if (curI & _Mbit_) { SysPrintf("microVU: M-bit set!\n"); } \ @@ -213,5 +222,11 @@ declareAllVariables if (curI & _Tbit_) { SysPrintf("microVU: T-bit set!\n"); } \ } #else -#define mVUdebugStuff1() {} +#define mVUlog 0&& +#define mVUdebug1() {} #endif + +#define mVUcachCheck(x) { \ + uptr diff = mVU->ptr - mVU->cache; \ + if (diff > x) { Console::Error("microVU Error: Program went over it's cache limit. Size = %x", params diff); } \ +} From 653286a6922497afac6839a3aeb8c50b66e12f83 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Fri, 10 Apr 2009 19:47:13 +0000 Subject: [PATCH 34/40] forgot to add microVU_Execution.inl in the last commit xD git-svn-id: http://pcsx2.googlecode.com/svn/trunk@949 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU_Execute.inl | 162 ++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 pcsx2/x86/microVU_Execute.inl diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl new file mode 100644 index 0000000000..61347d3898 --- /dev/null +++ b/pcsx2/x86/microVU_Execute.inl @@ -0,0 +1,162 @@ +/* Pcsx2 - Pc Ps2 Emulator +* Copyright (C) 2009 Pcsx2-Playground Team +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +*/ +#pragma once +#ifdef PCSX2_MICROVU + +//------------------------------------------------------------------ +// Dispatcher Functions +//------------------------------------------------------------------ + +// Generates the code for entering recompiled blocks +microVUt(void) mVUdispatcherA() { + static u32 PCSX2_ALIGNED16(vuMXCSR); + microVU* mVU = mVUx; + x86SetPtr(mVU->ptr); + + // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. + if (!vuIndex) { CALLFunc((uptr)mVUexecuteVU0); } + else { CALLFunc((uptr)mVUexecuteVU1); } + + // Backup cpu state + PUSH32R(EBX); + PUSH32R(EBP); + PUSH32R(ESI); + PUSH32R(EDI); + + // Load VU's MXCSR state + vuMXCSR = g_sseVUMXCSR; + SSE_LDMXCSR((uptr)&vuMXCSR); + + // Load Regs + MOV32MtoR(gprR, (uptr)&mVU->regs->VI[REG_R]); + MOV32MtoR(gprF0, (uptr)&mVU->regs->VI[REG_STATUS_FLAG]); + MOV32MtoR(gprF1, (uptr)&mVU->regs->VI[REG_MAC_FLAG]); + SHL32ItoR(gprF0, 16); + AND32ItoR(gprF1, 0xffff); + OR32RtoR (gprF0, gprF1); + MOV32RtoR(gprF1, gprF0); + MOV32RtoR(gprF2, gprF0); + MOV32RtoR(gprF3, gprF0); + + for (int i = 0; i < 8; i++) { + MOVQMtoR(i, (uptr)&mVU->regs->VI[i+1]); + } + + SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC); + SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals); + SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals); + SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_P]); + SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->regs->VI[REG_Q]); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ + + // Jump to Recompiled Code Block + JMPR(EAX); + mVU->ptr = x86Ptr; +} + +// Generates the code to exit from recompiled blocks +microVUt(void) mVUdispatcherB() { + static u32 PCSX2_ALIGNED16(eeMXCSR); + microVU* mVU = mVUx; + x86SetPtr(mVU->ptr); + + // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. + if (!vuIndex) { CALLFunc((uptr)mVUcleanUpVU0); } + else { CALLFunc((uptr)mVUcleanUpVU1); } + + // Load EE's MXCSR state + eeMXCSR = g_sseMXCSR; + SSE_LDMXCSR((uptr)&eeMXCSR); + + // Save Regs + MOV32RtoR(gprT1, gprF0); // ToDo: Ensure Correct Flag instances + AND32ItoR(gprT1, 0xffff); + SHR32ItoR(gprF0, 16); + MOV32RtoM((uptr)&mVU->regs->VI[REG_R], gprR); + MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG], gprT1); + MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG], gprF0); + + for (int i = 0; i < 8; i++) { + MOVDMMXtoM((uptr)&mVU->regs->VI[i+1], i); + } + + SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->ACC, xmmACC); + SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q], xmmPQ); // ToDo: Ensure Correct Q/P instances + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0); // wzyx = PPPP + SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P], xmmPQ); + + // Restore cpu state + POP32R(EDI); + POP32R(ESI); + POP32R(EBP); + POP32R(EBX); + + EMMS(); + RET(); + + mVU->ptr = x86Ptr; + mVUcachCheck(512); +} + +//------------------------------------------------------------------ +// Execution Functions +//------------------------------------------------------------------ + +// Executes for number of cycles +microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) { +/* + Pseudocode: (ToDo: implement # of cycles) + 1) Search for existing program + 2) If program not found, goto 5 + 3) Search for recompiled block + 4) If recompiled block found, goto 6 + 5) Recompile as much blocks as possible + 6) Return start execution address of block +*/ + microVU* mVU = mVUx; + mVUlog("microVU%x: startPC = 0x%x, cycles = 0x%x", params vuIndex, startPC, cycles); + if ( mVUsearchProg(mVU) ) { // Found Program + //microBlock* block = mVU->prog.prog[mVU->prog.cur].block[startPC]->search(mVU->prog.lastPipelineState); + //if (block) return block->x86ptrStart; // Found Block + } + // Recompile code + return NULL; +} + +//------------------------------------------------------------------ +// Cleanup Functions +//------------------------------------------------------------------ + +microVUt(void) mVUcleanUp() { + microVU* mVU = mVUx; + mVU->ptr = x86Ptr; + mVUcachCheck(1024); // ToDo: Implement Program Cache Limits +} + +//------------------------------------------------------------------ +// Caller Functions +//------------------------------------------------------------------ + +void __fastcall startVU0(u32 startPC, u32 cycles) { ((mVUrecCall)microVU0.cache)(startPC, cycles); } +void __fastcall startVU1(u32 startPC, u32 cycles) { ((mVUrecCall)microVU1.cache)(startPC, cycles); } +void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles) { return mVUexecute<0>(startPC, cycles); } +void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles) { return mVUexecute<1>(startPC, cycles); } +void mVUcleanUpVU0() { mVUcleanUp<0>(); } +void mVUcleanUpVU1() { mVUcleanUp<1>(); } + +#endif //PCSX2_MICROVU From cf995d07163e56551a9dc712eec3072b045adfa3 Mon Sep 17 00:00:00 2001 From: gabest11 Date: Fri, 10 Apr 2009 19:55:05 +0000 Subject: [PATCH 35/40] GSdx: GoW2 fix, 16 bit drawing that caused the green overlay is skipped (character shadow) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@950 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSRendererHW.h | 29 +++++++++++++++++++++++++++++ plugins/GSdx/GSState.cpp | 18 +++++++++++++++++- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/plugins/GSdx/GSRendererHW.h b/plugins/GSdx/GSRendererHW.h index e343852712..e7b46f7c6f 100644 --- a/plugins/GSdx/GSRendererHW.h +++ b/plugins/GSdx/GSRendererHW.h @@ -507,6 +507,35 @@ protected: #pragma endregion + #pragma region GoW2 z buffer clear + + if(m_game.title == CRC::GodOfWar2) + { + DWORD FBP = m_context->FRAME.Block(); + DWORD FBW = m_context->FRAME.FBW; + DWORD FPSM = m_context->FRAME.PSM; + + if(FBP == 0x00f00 && FPSM == PSM_PSMZ24) + { + GIFRegTEX0 TEX0; + + TEX0.TBP0 = FBP; + TEX0.TBW = FBW; + TEX0.PSM = FPSM; + + if(GSTextureCache::GSDepthStencil* ds = m_tc->GetDepthStencil(TEX0, m_width, m_height)) + { + m_dev.ClearDepth(ds->m_texture, 0); + } + + return false; + } + + return true; + } + + #pragma endregion + return true; } diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index ff17150ec4..de18465acd 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -2081,6 +2081,22 @@ bool GSC_GodOfWar(const GSFrameInfo& fi, int& skip) return true; } +bool GSC_GodOfWar2(const GSFrameInfo& fi, int& skip) +{ + if(skip == 0) + { + if(fi.TME && fi.FBP == 0x00100 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x00100 && fi.TPSM == PSM_PSMCT16) + { + skip = 30; // shadows + } + } + else + { + } + + return true; +} + bool GSC_GiTS(const GSFrameInfo& fi, int& skip) { if(skip == 0) @@ -2172,7 +2188,7 @@ bool GSState::IsBadFrame(int& skip) map[CRC::Tekken5] = GSC_Tekken5; map[CRC::IkkiTousen] = GSC_IkkiTousen; map[CRC::GodOfWar] = GSC_GodOfWar; - map[CRC::GodOfWar2] = GSC_GodOfWar; + map[CRC::GodOfWar2] = GSC_GodOfWar2; map[CRC::GiTS] = GSC_GiTS; map[CRC::Onimusha3] = GSC_Onimusha3; map[CRC::TalesOfAbyss] = GSC_TalesOfAbyss; From 9c8a9712a9673f6d7156927f4d28dc8451626dec Mon Sep 17 00:00:00 2001 From: gabest11 Date: Sat, 11 Apr 2009 08:54:48 +0000 Subject: [PATCH 36/40] GSdx: GoW2 fix #2, pal version this time git-svn-id: http://pcsx2.googlecode.com/svn/trunk@952 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSRendererHW.h | 2 +- plugins/GSdx/GSState.cpp | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/plugins/GSdx/GSRendererHW.h b/plugins/GSdx/GSRendererHW.h index e7b46f7c6f..c5aa8e1f88 100644 --- a/plugins/GSdx/GSRendererHW.h +++ b/plugins/GSdx/GSRendererHW.h @@ -515,7 +515,7 @@ protected: DWORD FBW = m_context->FRAME.FBW; DWORD FPSM = m_context->FRAME.PSM; - if(FBP == 0x00f00 && FPSM == PSM_PSMZ24) + if((FBP == 0x00f00 || FBP == 0x00100) && FPSM == PSM_PSMZ24) // ntsc 0xf00, pal 0x100 { GIFRegTEX0 TEX0; diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index de18465acd..0664a1fff6 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -2085,10 +2085,15 @@ bool GSC_GodOfWar2(const GSFrameInfo& fi, int& skip) { if(skip == 0) { - if(fi.TME && fi.FBP == 0x00100 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x00100 && fi.TPSM == PSM_PSMCT16) + if(fi.TME && fi.FBP == 0x00100 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x00100 && fi.TPSM == PSM_PSMCT16 // ntsc + || fi.TME && fi.FBP == 0x02100 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x02100 && fi.TPSM == PSM_PSMCT16) // pal { skip = 30; // shadows } + else if(fi.TME && fi.FBP == 0x00500 && fi.FPSM == PSM_PSMCT24 && fi.TBP0 == 0x02100 && fi.TPSM == PSM_PSMCT32) // pal + { + // skip = 17; // only looks correct at native resolution + } } else { From 076e9e5386eb170c1d2bd9108302e8fbd3c0d26a Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Sat, 11 Apr 2009 09:25:47 +0000 Subject: [PATCH 37/40] more microVU stuff... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@953 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU.cpp | 17 +++-- pcsx2/x86/microVU.h | 13 +++- pcsx2/x86/microVU_Alloc.h | 3 +- pcsx2/x86/microVU_Analyze.inl | 75 ++++++++++++++++++++-- pcsx2/x86/microVU_Compile.inl | 86 ++++++++++++++++++++----- pcsx2/x86/microVU_Execute.inl | 18 +++--- pcsx2/x86/microVU_Lower.inl | 116 ++++++++++++++++++++-------------- pcsx2/x86/microVU_Misc.h | 10 ++- pcsx2/x86/microVU_Misc.inl | 2 +- 9 files changed, 252 insertions(+), 88 deletions(-) diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index d2aef9b3bd..d54d8367ab 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -42,7 +42,6 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) { mVU->index = vuIndex; mVU->microSize = (vuIndex ? 0x4000 : 0x1000); mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 4; - mVU->cacheAddr = (vuIndex ? 0x1e840000 : 0x0e840000); mVU->cache = NULL; mVUreset(); @@ -62,7 +61,7 @@ microVUt(void) mVUreset() { } // Dynarec Cache - mVU->cache = SysMmapEx(mVU->cacheAddr, mVU->cacheSize, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); + mVU->cache = SysMmapEx((vuIndex ? 0x1e840000 : 0x0e840000), mVU->cacheSize, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache)); mVU->ptr = mVU->cache; @@ -70,12 +69,21 @@ microVUt(void) mVUreset() { mVUdispatcherA(); mVUdispatcherB(); - // Other Variables + // Program Variables memset(&mVU->prog, 0, sizeof(mVU->prog)); mVU->prog.finished = 1; mVU->prog.cleared = 1; mVU->prog.cur = -1; mVU->prog.total = -1; + + // Setup Dynarec Cache Limits for Each Program + u8* z = (mVU->cache + 512); // Dispatcher Code is in first 512 bytes + for (int i = 0; i <= mVU->prog.max; i++) { + mVU->prog.prog[i].x86start = z; + mVU->prog.prog[i].x86ptr = z; + z += (mVU->cacheSize / (mVU->prog.max + 1)); + mVU->prog.prog[i].x86end = z; + } } // Free Allocated Resources @@ -111,6 +119,7 @@ microVUt(void) mVUclear(u32 addr, u32 size) { // Clears program data (Sets used to 1 because calling this function implies the program will be used at least once) __forceinline void mVUclearProg(microVU* mVU, int progIndex) { mVU->prog.prog[progIndex].used = 1; + mVU->prog.prog[progIndex].x86ptr = mVU->prog.prog[progIndex].x86start; for (u32 i = 0; i < (mVU->progSize / 2); i++) { mVU->prog.prog[progIndex].block[i]->reset(); } @@ -149,7 +158,7 @@ __forceinline int mVUsearchProg(microVU* mVU) { for (int i = 0; i <= mVU->prog.total; i++) { //if (i == mVU->prog.cur) continue; // We can skip the current program. (ToDo: Verify that games don't clear, and send the same microprogram :/) if (!memcmp_mmx(mVU->prog.prog[i].data, mVU->regs->Micro, mVU->microSize)) { - if (i == mVU->prog.cur) SysPrintf("microVU: Same micro program sent!\n"); + if (i == mVU->prog.cur) { mVUlog("microVU: Same micro program sent!"); } mVU->prog.cur = i; mVU->prog.cleared = 0; mVU->prog.prog[i].used++; diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 41412f70bf..81294fbe28 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -92,7 +92,10 @@ public: template struct microProgram { u32 data[progSize/4]; - u32 used; // Number of times its been used + u32 used; // Number of times its been used + u8* x86ptr; // Pointer to program's recompilation code + u8* x86start; // Start of program's rec-cache + u8* x86end; // Limit of program's rec-cache microBlockManager* block[progSize/8]; microAllocInfo allocInfo; }; @@ -112,20 +115,24 @@ struct microProgManager { struct microVU { u32 index; // VU Index (VU0 or VU1) u32 microSize; // VU Micro Memory Size - u32 progSize; // VU Micro Program Size (microSize/8) - u32 cacheAddr; // VU Cache Start Address + u32 progSize; // VU Micro Program Size (microSize/4) static const u32 cacheSize = 0x500000; // VU Cache Size microProgManager<0x4000> prog; // Micro Program Data VURegs* regs; // VU Regs Struct u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to) + u8* startFunct; // Ptr Function to the Start code for recompiled programs + u8* exitFunct; // Ptr Function to the Exit code for recompiled programs u8* ptr; // Pointer to next place to write recompiled code to u32 code; // Contains the current Instruction u32 iReg; // iReg (only used in recompilation, not execution) u32 clipFlag[4]; // 4 instances of clip flag (used in execution) u32 divFlag; // 1 instance of I/D flags u32 VIbackup[2]; // Holds a backup of a VI reg if modified before a branch + u32 branch; // Holds branch compare result (IBxx) OR Holds address to Jump to (JALR/JR) + u32 p; // Holds current P instance index + u32 q; // Holds current Q instance index }; // microVU rec structs diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index e277f12723..cb6bbbbb5f 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -52,9 +52,10 @@ template struct microAllocInfo { microRegInfo regs; // Pipeline info microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle) - u8 branch; // 0 = No Branch, 1 = Branch, 2 = Conditional Branch, 3 = Jump (JALR/JR) + u8 branch; // 0 = No Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes) u32 cycles; // Cycles for current block + u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block) u32 curPC; // Current PC u32 startPC; // Start PC for Cur Block u32 info[pSize/8]; // Info for Instructions in current block diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 6768153aee..224ebbbd09 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -102,6 +102,49 @@ microVUt(void) mVUanalyzeFMAC4(int Fs, int Ft) { analyzeReg4(Ft); } +//------------------------------------------------------------------ +// IALU - IALU Opcodes +//------------------------------------------------------------------ + +#define analyzeVIreg1(reg) { if (reg) { mVUstall = aMax(mVUstall, mVUregs.VI[reg]); } } +#define analyzeVIreg2(reg, aCycles) { if (reg) { mVUregsTemp.VIreg = reg; mVUregsTemp.VI = aCycles; mVUinfo |= _writesVI; mVU->VIbackup[0] = reg; } } + +microVUt(void) mVUanalyzeIALU1(int Id, int Is, int It) { + microVU* mVU = mVUx; + if (!Id) { mVUinfo |= _isNOP; } + analyzeVIreg1(Is); + analyzeVIreg1(It); + analyzeVIreg2(Id, 1); +} + +microVUt(void) mVUanalyzeIALU2(int Is, int It) { + microVU* mVU = mVUx; + if (!It) { mVUinfo |= _isNOP; } + analyzeVIreg1(Is); + analyzeVIreg2(It, 1); +} + +//------------------------------------------------------------------ +// MR32 - MR32 Opcode +//------------------------------------------------------------------ + +// Flips xyzw stalls to yzwx +#define analyzeReg6(reg) { \ + if (reg) { \ + if (_X) { mVUstall = aMax(mVUstall, aReg(reg).y); } \ + if (_Y) { mVUstall = aMax(mVUstall, aReg(reg).z); } \ + if (_Z) { mVUstall = aMax(mVUstall, aReg(reg).w); } \ + if (_W) { mVUstall = aMax(mVUstall, aReg(reg).x); } \ + } \ +} + +microVUt(void) mVUanalyzeMR32(int Fs, int Ft) { + microVU* mVU = mVUx; + if (!Ft) { mVUinfo |= _isNOP; } + analyzeReg6(Fs); + analyzeReg2(Ft); +} + //------------------------------------------------------------------ // FDIV - DIV/SQRT/RSQRT Opcodes //------------------------------------------------------------------ @@ -144,11 +187,18 @@ microVUt(void) mVUanalyzeEFU2(int Fs, u8 xCycles) { } //------------------------------------------------------------------ -// LQx - LQ/LQD/LQI Opcodes +// MFP - MFP Opcode //------------------------------------------------------------------ -#define analyzeVIreg1(reg) { if (reg) { mVUstall = aMax(mVUstall, mVUregs.VI[reg]); } } -#define analyzeVIreg2(reg, aCycles) { if (reg) { mVUregsTemp.VIreg = reg; mVUregsTemp.VI = aCycles; mVUinfo |= _writesVI; mVU->VIbackup[0] = reg; } } +microVUt(void) mVUanalyzeMFP(int Ft) { + microVU* mVU = mVUx; // ToDo: Needs special info for P reg? + if (!Ft) { mVUinfo |= _isNOP; } + analyzeReg2(Ft); +} + +//------------------------------------------------------------------ +// LQx - LQ/LQD/LQI Opcodes +//------------------------------------------------------------------ microVUt(void) mVUanalyzeLQ(int Ft, int Is, bool writeIs) { microVU* mVU = mVUx; @@ -183,7 +233,7 @@ microVUt(void) mVUanalyzeR1(int Fs, int Fsf) { microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) { microVU* mVU = mVUx; - if (!Ft) { mVUinfo |= ((canBeNOP) ? _isNOP : _noWriteVF); return; } + if (!Ft) { mVUinfo |= ((canBeNOP) ? _isNOP : _noWriteVF); } analyzeReg2(Ft); analyzeRreg(); } @@ -194,11 +244,22 @@ microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) { microVUt(void) mVUanalyzeSflag(int It) { microVU* mVU = mVUx; - if (!It) { mVUinfo |= _isNOP; return; } - mVUinfo |= _isSflag; + if (!It) { mVUinfo |= _isNOP; } + else { mVUinfo |= _isSflag | _swapOps; } // ToDo: set s flag at right time analyzeVIreg2(It, 1); } +microVUt(void) mVUanalyzeFSSET() { + microVU* mVU = mVUx; + int i, curPC = iPC; + for (i = mVUcount; i > 0; i--) { + incPC2(-2); + if (isSflag) break; + mVUinfo &= ~_doStatus; + } + iPC = curPC; +} + //------------------------------------------------------------------ // XGkick //------------------------------------------------------------------ @@ -218,7 +279,7 @@ microVUt(void) mVUanalyzeXGkick(int Fs, int xCycles) { //------------------------------------------------------------------ #define analyzeBranchVI(reg, infoVal) { \ - if (reg && (mVUcycles > 1)) { /* Ensures branch is not first opcode in block */ \ + if (reg && (mVUcount > 0)) { /* Ensures branch is not first opcode in block */ \ incPC(-2); \ if (writesVI && (reg == mVU->VIbackup[0])) { /* If prev Op modified VI reg */ \ mVUinfo |= _backupVI; \ diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index c9a8217641..85c4938aa7 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -29,10 +29,25 @@ } \ } +#define branchCase(Xcmp) \ + CMP16ItoM((uptr)mVU->branch, 0); \ + ajmp = Xcmp((uptr)0); \ + break + +#define branchCase2() { \ + incPC(-2); \ + MOV32ItoR(gprT1, (xPC + (2 * 8)) & ((vuIndex) ? 0x3fff:0xfff)); \ + mVUallocVIb(gprT1, _Ft_); \ + incPC(+2); \ +} + #define startLoop() { mVUdebug1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); } #define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); } +#define incP() { mVU->p = (mVU->p+1) & 1; } +#define incQ() { mVU->q = (mVU->q+1) & 1; } microVUt(void) mVUincCycles(int x) { + microVU* mVU = mVUx; mVUcycles += x; for (int z = 31; z > 0; z--) { calcCycles(mVUregs.VF[z].x, x); @@ -45,9 +60,12 @@ microVUt(void) mVUincCycles(int x) { } if (mVUregs.q) { calcCycles(mVUregs.q, x); - if (!mVUregs.q) {} // Do Status Flag Merging Stuff? + if (!mVUregs.q) { incQ(); } // Do Status Flag Merging Stuff? + } + if (mVUregs.p) { + calcCycles(mVUregs.p, x); + if (!mVUregs.p) { incP(); } } - calcCycles(mVUregs.p, x); calcCycles(mVUregs.r, x); calcCycles(mVUregs.xgkick, x); } @@ -57,8 +75,7 @@ microVUt(void) mVUsetCycles() { incCycles(mVUstall); if (mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1] && !mVUregsTemp.VFreg[0]) { // If upper Op && lower Op write to same VF reg mVUinfo |= (mVUregsTemp.r || mVUregsTemp.VI) ? _noWriteVF : _isNOP; // If lower Op doesn't modify anything else, then make it a NOP - //mVUregsTemp.VF[1].reg = mVUregsTemp.VF[0]; // Just use cycles from upper Op (incorrect?) - mVUregsTemp.VF[1].x = aMax(mVUregsTemp.VF[0].x, mVUregsTemp.VF[1].x); // Use max cycles from each vector (correct?) + mVUregsTemp.VF[1].x = aMax(mVUregsTemp.VF[0].x, mVUregsTemp.VF[1].x); // Use max cycles from each vector mVUregsTemp.VF[1].y = aMax(mVUregsTemp.VF[0].y, mVUregsTemp.VF[1].y); mVUregsTemp.VF[1].z = aMax(mVUregsTemp.VF[0].z, mVUregsTemp.VF[1].z); mVUregsTemp.VF[1].w = aMax(mVUregsTemp.VF[0].w, mVUregsTemp.VF[1].w); @@ -72,20 +89,28 @@ microVUt(void) mVUsetCycles() { mVUregs.xgkick = mVUregsTemp.xgkick; } -microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) { +//------------------------------------------------------------------ +// Recompiler +//------------------------------------------------------------------ + +microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) { microVU* mVU = mVUx; microBlock block; + u8* thisPtr = mVUcurProg.x86Ptr; iPC = startPC / 4; // Searches for Existing Compiled Block (if found, then returns; else, compile) - microBlock* pblock = mVUblock[iPC]->search(pipelineState, pState); - if (block) { x86SetPtr(pblock->x86ptrEnd); return; } + microBlock* pblock = mVUblock[iPC/2]->search(pipelineState, pState); + if (block) { return pblock->x86ptrStart; } // First Pass setCode(); mVUbranch = 0; mVUstartPC = iPC; + mVUcount = 0; mVUcycles = 1; // Skips "M" phase, and starts counting cycles at "T" stage + mVU->p = 0; // All blocks start at p index #0 + mVU->q = 0; // All blocks start at q index #0 for (int branch = 0;; ) { startLoop(); mVUopU(); @@ -94,11 +119,15 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, if (curI & _Ibit_) { incPC(1); mVUinfo |= _isNOP; } else { incPC(1); mVUopL(); } mVUsetCycles(); + if (mVU->p) { mVUinfo |= _readP; } + if (mVU->q) { mVUinfo |= _readQ; } + else { mVUinfo |= _writeQ; } if (branch >= 2) { mVUinfo |= _isEOB | ((branch == 3) ? _isBdelay : 0); if (mVUbranch) { Console::Error("microVU Warning: Branch in E-bit/Branch delay slot!"); mVUinfo |= _isNOP; } break; } else if (branch == 1) { branch = 2; } if (mVUbranch) { branch = 3; mVUbranch = 0; mVUinfo |= _isBranch; } incPC(1); incCycles(1); + mVUcount++; } // Second Pass @@ -109,24 +138,51 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, // ToDo: status/mac flag stuff? // if (isEOB) { x = 0; } - else if (isBranch) { mVUopU(); incPC(2); } + //if (isBranch2) { mVUopU(); incPC(2); } if (isNop) { mVUopU(); if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } } else if (!swapOps) { mVUopU(); incPC(1); mVUopL(); } else { incPC(1); mVUopL(); incPC(-1); mVUopU(); incPC(1); } if (!isBdelay) { incPC(1); } - else { - incPC(-2); // Go back to Branch Opcode - mVUopL(); // Run Branch Opcode + else { + u32* ajmp; switch (mVUbranch) { - case 1: break; - case 2: break; - case 3: break; + case 3: branchCase(JZ32); // IBEQ + case 4: branchCase(JGE32); // IBGEZ + case 5: branchCase(JG32); // IBGTZ + case 6: branchCase(JLE32); // IBLEQ + case 7: branchCase(JL32); // IBLTZ + case 8: branchCase(JNZ32); // IBNEQ + case 2: branchCase2(); // BAL + case 1: + // search for block + ajmp = JMP32((uptr)0); + + break; // B/BAL + case 9: branchCase2(); // JALR + case 10: break; // JR/JALR + //mVUcurProg.x86Ptr } - break; + return thisPtr; } } + // Do E-bit end stuff here + + incCycles(55); // Ensures Valid P/Q instances + mVUcycles -= 55; + if (mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe5); } + SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q], xmmPQ); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVU->p ? 3 : 2); + SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P], xmmPQ); + + MOV32ItoM((uptr)&mVU->p, mVU->p); + MOV32ItoM((uptr)&mVU->q, mVU->q); + AND32ItoM((uptr)µVU0.regs.VI[REG_VPU_STAT].UL, (vuIndex ? ~0x100 : ~0x001)); // VBS0/VBS1 flag + AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Not sure what this does but zerorecs do it... + MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC], xPC); + JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5)); + return thisPtr; } #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index 61347d3898..a1a875e39a 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -27,6 +27,7 @@ microVUt(void) mVUdispatcherA() { static u32 PCSX2_ALIGNED16(vuMXCSR); microVU* mVU = mVUx; x86SetPtr(mVU->ptr); + mVU->startFunct = mVU->ptr; // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. if (!vuIndex) { CALLFunc((uptr)mVUexecuteVU0); } @@ -74,6 +75,7 @@ microVUt(void) mVUdispatcherB() { static u32 PCSX2_ALIGNED16(eeMXCSR); microVU* mVU = mVUx; x86SetPtr(mVU->ptr); + mVU->exitFunct = mVU->ptr; // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. if (!vuIndex) { CALLFunc((uptr)mVUcleanUpVU0); } @@ -96,9 +98,9 @@ microVUt(void) mVUdispatcherB() { } SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->ACC, xmmACC); - SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q], xmmPQ); // ToDo: Ensure Correct Q/P instances - SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0); // wzyx = PPPP - SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P], xmmPQ); + //SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q], xmmPQ); // ToDo: Ensure Correct Q/P instances + //SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0); // wzyx = PPPP + //SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P], xmmPQ); // Restore cpu state POP32R(EDI); @@ -110,7 +112,7 @@ microVUt(void) mVUdispatcherB() { RET(); mVU->ptr = x86Ptr; - mVUcachCheck(512); + mVUcachCheck(mVU->cache, 512); } //------------------------------------------------------------------ @@ -144,16 +146,16 @@ microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) { microVUt(void) mVUcleanUp() { microVU* mVU = mVUx; - mVU->ptr = x86Ptr; - mVUcachCheck(1024); // ToDo: Implement Program Cache Limits + mVU->ptr = mVUcurProg.x86ptr; + mVUcachCheck(mVUcurProg.x86start, (uptr)(mVUcurProg.x86end - mVUcurProg.x86start)); } //------------------------------------------------------------------ // Caller Functions //------------------------------------------------------------------ -void __fastcall startVU0(u32 startPC, u32 cycles) { ((mVUrecCall)microVU0.cache)(startPC, cycles); } -void __fastcall startVU1(u32 startPC, u32 cycles) { ((mVUrecCall)microVU1.cache)(startPC, cycles); } +void __fastcall startVU0(u32 startPC, u32 cycles) { ((mVUrecCall)microVU0.startFunct)(startPC, cycles); } +void __fastcall startVU1(u32 startPC, u32 cycles) { ((mVUrecCall)microVU1.startFunct)(startPC, cycles); } void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles) { return mVUexecute<0>(startPC, cycles); } void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles) { return mVUexecute<1>(startPC, cycles); } void mVUcleanUpVU0() { mVUcleanUp<0>(); } diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 18e73ebde7..a247f00a6c 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -545,14 +545,12 @@ microVUf(void) mVU_FSOR() { microVUf(void) mVU_FSSET() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeFSSET(); } else { int flagReg; getFlagReg(flagReg, fsInstance); - MOV16ItoR(gprT1, (_Imm12_ & 0xfc0)); - //if (_Imm12_ & 0xc00) { mVUdivFlag = _Imm12_ >> 9; } - //else { mVUdivFlag = 1; } - //mVUdivFlagT = 4; + AND32ItoR(flagReg, 0x03f); + OR32ItoR(flagReg, (_Imm12_ & 0xfc0)); } } @@ -562,7 +560,7 @@ microVUf(void) mVU_FSSET() { microVUf(void) mVU_IADD() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); if (_Ft_ != _Fs_) { @@ -576,7 +574,7 @@ microVUf(void) mVU_IADD() { microVUf(void) mVU_IADDI() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU2(_Fs_, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); ADD16ItoR(gprT1, _Imm5_); @@ -586,7 +584,7 @@ microVUf(void) mVU_IADDI() { microVUf(void) mVU_IADDIU() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU2(_Fs_, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); ADD16ItoR(gprT1, _Imm12_); @@ -596,7 +594,7 @@ microVUf(void) mVU_IADDIU() { microVUf(void) mVU_IAND() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); if (_Ft_ != _Fs_) { @@ -609,7 +607,7 @@ microVUf(void) mVU_IAND() { microVUf(void) mVU_IOR() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); if (_Ft_ != _Fs_) { @@ -622,7 +620,7 @@ microVUf(void) mVU_IOR() { microVUf(void) mVU_ISUB() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } else { if (_Ft_ != _Fs_) { mVUallocVIa(gprT1, _Fs_); @@ -639,7 +637,7 @@ microVUf(void) mVU_ISUB() { microVUf(void) mVU_ISUBIU() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU2(_Fs_, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); SUB16ItoR(gprT1, _Imm12_); @@ -653,7 +651,7 @@ microVUf(void) mVU_ISUBIU() { microVUf(void) mVU_MFIR() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_) nop();*/ } + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeReg2(_Ft_); } else { mVUallocVIa(gprT1, _Fs_); MOVSX32R16toR(gprT1, gprT1); @@ -665,7 +663,7 @@ microVUf(void) mVU_MFIR() { microVUf(void) mVU_MFP() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_) nop();*/ } + if (!recPass) { mVUanalyzeMFP(_Ft_); } else { getPreg(xmmFt); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); @@ -674,7 +672,7 @@ microVUf(void) mVU_MFP() { microVUf(void) mVU_MOVE() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_ || (_Ft_ == _Fs_)) nop();*/ } + if (!recPass) { if (!_Ft_ || (_Ft_ == _Fs_)) { mVUinfo |= _isNOP; } analyzeReg1(_Fs_); analyzeReg2(_Ft_); } else { mVUloadReg(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W); mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); @@ -683,7 +681,7 @@ microVUf(void) mVU_MOVE() { microVUf(void) mVU_MR32() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_) nop();*/ } + if (!recPass) { mVUanalyzeMR32(_Fs_, _Ft_); } else { mVUloadReg(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], (_X_Y_Z_W == 8) ? 4 : 15); if (_X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x39); } @@ -693,7 +691,7 @@ microVUf(void) mVU_MR32() { microVUf(void) mVU_MTIR() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeReg5(_Fs_, _Fsf_); analyzeVIreg2(_Ft_, 1); } else { MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]); mVUallocVIb(gprT1, _Ft_); @@ -706,7 +704,7 @@ microVUf(void) mVU_MTIR() { microVUf(void) mVU_ILW() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_) nop();*/ } + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeVIreg2(_Ft_, 4); } else { if (!_Fs_) { MOVZX32M16toR( gprT1, (uptr)mVU->regs->Mem + getVUmem(_Imm11_) + offsetSS ); @@ -725,7 +723,7 @@ microVUf(void) mVU_ILW() { microVUf(void) mVU_ILWR() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_) nop();*/ } + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeVIreg2(_Ft_, 4); } else { if (!_Fs_) { MOVZX32M16toR(gprT1, (uptr)mVU->regs->Mem + offsetSS); @@ -747,7 +745,7 @@ microVUf(void) mVU_ILWR() { microVUf(void) mVU_ISW() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { analyzeVIreg1(_Fs_); analyzeVIreg1(_Ft_); } else { if (!_Fs_) { int imm = getVUmem(_Imm11_); @@ -772,7 +770,7 @@ microVUf(void) mVU_ISW() { microVUf(void) mVU_ISWR() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { analyzeVIreg1(_Fs_); analyzeVIreg1(_Ft_); } else { if (!_Fs_) { mVUallocVIa(gprT1, _Ft_); @@ -1006,7 +1004,7 @@ microVUf(void) mVU_WAITQ() { microVUf(void) mVU_XTOP() { microVU* mVU = mVUx; - if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; return; } analyzeVIreg2(_Ft_, 1); } + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg2(_Ft_, 1); } else { MOVZX32M16toR( gprT1, (uptr)&mVU->regs->vifRegs->top); mVUallocVIb(gprT1, _Ft_); @@ -1015,7 +1013,7 @@ microVUf(void) mVU_XTOP() { microVUf(void) mVU_XITOP() { microVU* mVU = mVUx; - if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; return; } analyzeVIreg2(_Ft_, 1); } + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg2(_Ft_, 1); } else { MOVZX32M16toR( gprT1, (uptr)&mVU->regs->vifRegs->itop ); mVUallocVIb(gprT1, _Ft_); @@ -1055,64 +1053,90 @@ microVUf(void) mVU_XGKICK() { microVUf(void) mVU_B() { microVU* mVU = mVUx; - mVUbranch = 1; + mVUbranch = 1; + if (!recPass) { /*mVUinfo |= _isBranch2;*/ } } microVUf(void) mVU_BAL() { microVU* mVU = mVUx; - mVUbranch = 1; - if (!recPass) { analyzeVIreg2(_Ft_, 1); } - else { - MOV32ItoR(gprT1, (xPC + (2 * 8)) & 0xffff); - mVUallocVIb(gprT1, _Ft_); - } + mVUbranch = 2; + if (!recPass) { /*mVUinfo |= _isBranch2;*/ analyzeVIreg2(_Ft_, 1); } + else {} } microVUf(void) mVU_IBEQ() { microVU* mVU = mVUx; - mVUbranch = 2; + mVUbranch = 3; if (!recPass) { mVUanalyzeBranch2(_Fs_, _Ft_); } - else {} + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + if (memReadIt) XOR32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else { mVUallocVIa(gprT2, _Ft_); XOR32RtoR(gprT1, gprT2); } + MOV32RtoM((uptr)mVU->branch, gprT1); + } } microVUf(void) mVU_IBGEZ() { microVU* mVU = mVUx; - mVUbranch = 2; + mVUbranch = 4; if (!recPass) { mVUanalyzeBranch1(_Fs_); } - else {} + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + //SHR32ItoR(gprT1, 15); + MOV32RtoM((uptr)mVU->branch, gprT1); + } } microVUf(void) mVU_IBGTZ() { microVU* mVU = mVUx; - mVUbranch = 2; + mVUbranch = 5; if (!recPass) { mVUanalyzeBranch1(_Fs_); } - else {} + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + MOV32RtoM((uptr)mVU->branch, gprT1); + } } microVUf(void) mVU_IBLEZ() { microVU* mVU = mVUx; - mVUbranch = 2; + mVUbranch = 6; if (!recPass) { mVUanalyzeBranch1(_Fs_); } - else {} + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + MOV32RtoM((uptr)mVU->branch, gprT1); + } } microVUf(void) mVU_IBLTZ() { microVU* mVU = mVUx; - mVUbranch = 2; + mVUbranch = 7; if (!recPass) { mVUanalyzeBranch1(_Fs_); } - else {} + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + //SHR32ItoR(gprT1, 15); + MOV32RtoM((uptr)mVU->branch, gprT1); + } } microVUf(void) mVU_IBNE() { microVU* mVU = mVUx; - mVUbranch = 2; + mVUbranch = 8; if (!recPass) { mVUanalyzeBranch2(_Fs_, _Ft_); } - else {} + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + if (memReadIt) XOR32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else { mVUallocVIa(gprT2, _Ft_); XOR32RtoR(gprT1, gprT2); } + MOV32RtoM((uptr)mVU->branch, gprT1); + } } microVUf(void) mVU_JR() { microVU* mVU = mVUx; - mVUbranch = 3; + mVUbranch = 9; if (!recPass) { mVUanalyzeBranch1(_Fs_); } - else {} } microVUf(void) mVU_JALR() { microVU* mVU = mVUx; - mVUbranch = 3; + mVUbranch = 10; if (!recPass) { mVUanalyzeBranch1(_Fs_); analyzeVIreg2(_Ft_, 1); } - else {} } #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 5a1267a556..aaf45425d4 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -143,6 +143,7 @@ declareAllVariables #define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo #define mVUbranch mVUallocInfo.branch #define mVUcycles mVUallocInfo.cycles +#define mVUcount mVUallocInfo.count #define mVUstall mVUallocInfo.maxStall #define mVUregs mVUallocInfo.regs #define mVUregsTemp mVUallocInfo.regsTemp @@ -153,6 +154,7 @@ declareAllVariables #define curI mVUcurProg.data[iPC] #define setCode() { mVU->code = curI; } #define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); } +#define incPC2(x) { iPC = ((iPC + x) & (mVU->progSize-1)); } #define incCycles(x) { mVUincCycles(x); } #define _isNOP (1<<0) // Skip Lower Instruction @@ -181,6 +183,7 @@ declareAllVariables #define _memReadIt (1<<24) // Read If (VI reg) from memory (used by branches) #define _writesVI (1<<25) // Current Instruction writes to VI #define _swapOps (1<<26) // Runs Lower Instruction Before Upper Instruction +//#define _isBranch2 (1<<27) // Cur Instruction is a Branch that writes VI regs (BAL/JALR) #define isNOP (mVUinfo & (1<<0)) #define isBranch (mVUinfo & (1<<1)) @@ -208,6 +211,7 @@ declareAllVariables #define memReadIt (mVUinfo & (1<<24)) #define writesVI (mVUinfo & (1<<25)) #define swapOps (mVUinfo & (1<<26)) +//#define isBranch2 (mVUinfo & (1<<27)) #define isMMX(_VIreg_) (_VIreg_ >= 1 && _VIreg_ <=9) #define mmVI(_VIreg_) (_VIreg_ - 1) @@ -226,7 +230,7 @@ declareAllVariables #define mVUdebug1() {} #endif -#define mVUcachCheck(x) { \ - uptr diff = mVU->ptr - mVU->cache; \ - if (diff > x) { Console::Error("microVU Error: Program went over it's cache limit. Size = %x", params diff); } \ +#define mVUcachCheck(start, limit) { \ + uptr diff = mVU->ptr - start; \ + if (diff >= limit) { Console::Error("microVU Error: Program went over it's cache limit. Size = %x", params diff); } \ } diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 208a3852c7..4154048046 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -251,7 +251,7 @@ microVUt(void) mVUaddrFix(int gprReg) { u8 *jmpA, *jmpB; CMP32ItoR(EAX, 0x400); jmpA = JL8(0); // if addr >= 0x4000, reads VU1's VF regs and VI regs - AND32ItoR(EAX, 0x43f); + AND32ItoR(EAX, 0x43f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs! jmpB = JMP8(0); x86SetJ8(jmpA); AND32ItoR(EAX, 0xff); // if addr < 0x4000, wrap around From a388d2969c172b9266309e665d800f8148969ba6 Mon Sep 17 00:00:00 2001 From: refraction Date: Sat, 11 Apr 2009 13:23:05 +0000 Subject: [PATCH 38/40] Fixed bug from Issue 144. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@954 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VifDma.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 4d2ec15a8c..e6155bc4ea 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -607,7 +607,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma if ((unpackType == 0xC) && (vifRegs->cycle.cl == vifRegs->cycle.wl)) //No use when SSE is available { // v4-32 - if ((vifRegs->mode == 0) && !(vifRegs->code & 0x10000000) && (vif->usn == 0)) + if (v->size == (size >> 2) && (vifRegs->mode == 0) && !(vifRegs->code & 0x10000000) && (vif->usn == 0)) { vifRegs->num -= size >> 4; memcpy_fast((u8*)dest, cdata, size); From 4df3f80d30acbbab927c87ff7a31a7275ae28420 Mon Sep 17 00:00:00 2001 From: refraction Date: Sat, 11 Apr 2009 17:45:14 +0000 Subject: [PATCH 39/40] Optimized and split up the unpack call a bit so less checks are being run, should bring an overall speed increase. Also got rid of some duplicate pointer rubbish which was all over the place. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@955 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 89 +++--- pcsx2/Vif.h | 38 +-- pcsx2/VifDma.cpp | 463 +++++++++++++++-------------- pcsx2/x86/aVif.S | 20 +- pcsx2/x86/ix86-32/aVif_proc-32.asm | 20 +- 5 files changed, 331 insertions(+), 299 deletions(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index f20103e72d..cb81e5f6b2 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -25,10 +25,10 @@ #include "Vif.h" #include "VifDma.h" -VIFregisters *_vifRegs; -u32* _vifRow = NULL, *_vifCol = NULL; -u32* _vifMaskRegs = NULL; -vifStruct *_vif; +VIFregisters *vifRegs; +u32* vifRow = NULL, *vifCol = NULL; +u32* vifMaskRegs = NULL; +vifStruct *vif; PCSX2_ALIGNED16(u32 g_vifRow0[4]); PCSX2_ALIGNED16(u32 g_vifCol0[4]); @@ -57,24 +57,24 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) int n; u32 vifRowReg = getVifRowRegs(offnum); - if (_vifRegs->code & 0x10000000) + if (vifRegs->code & 0x10000000) { - switch (_vif->cl) + switch (vif->cl) { case 0: if (offnum == OFFSET_X) - n = (_vifRegs->mask) & 0x3; + n = (vifRegs->mask) & 0x3; else - n = (_vifRegs->mask >> (offnum * 2)) & 0x3; + n = (vifRegs->mask >> (offnum * 2)) & 0x3; break; case 1: - n = (_vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3; + n = (vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3; break; case 2: - n = (_vifRegs->mask >> (16 + (offnum * 2))) & 0x3; + n = (vifRegs->mask >> (16 + (offnum * 2))) & 0x3; break; default: - n = (_vifRegs->mask >> (24 + (offnum * 2))) & 0x3; + n = (vifRegs->mask >> (24 + (offnum * 2))) & 0x3; break; } } @@ -83,11 +83,11 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) switch (n) { case 0: - if ((_vif->cmd & 0x6F) == 0x6f) + if ((vif->cmd & 0x6F) == 0x6f) { dest = data; } - else switch (_vifRegs->mode) + else switch (vifRegs->mode) { case 1: dest = data + vifRowReg; @@ -105,13 +105,12 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) dest = vifRowReg; break; case 2: - dest = getVifColRegs((_vif->cl > 2) ? 3 : _vif->cl); + dest = getVifColRegs((vif->cl > 2) ? 3 : vif->cl); break; case 3: - //Masked so don't do anything break; } -// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r0,data); +// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,vifRegs->mode,vifRegs->r0,data); } template @@ -127,78 +126,78 @@ void __fastcall UNPACK_S(u32 *dest, T *data, int size) template void __fastcall UNPACK_V2(u32 *dest, T *data, int size) { - if (_vifRegs->offset == OFFSET_X) + if (vifRegs->offset == OFFSET_X) { if (size > 0) { - writeXYZW(_vifRegs->offset, *dest++, *data++); - _vifRegs->offset = OFFSET_Y; + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_Y; size--; } } - if (_vifRegs->offset == OFFSET_Y) + if (vifRegs->offset == OFFSET_Y) { if (size > 0) { - writeXYZW(_vifRegs->offset, *dest++, *data); - _vifRegs->offset = OFFSET_Z; + writeXYZW(vifRegs->offset, *dest++, *data); + vifRegs->offset = OFFSET_Z; size--; } } - if (_vifRegs->offset == OFFSET_Z) + if (vifRegs->offset == OFFSET_Z) { - writeXYZW(_vifRegs->offset, *dest++, *dest-2); - _vifRegs->offset = OFFSET_W; + writeXYZW(vifRegs->offset, *dest++, *dest-2); + vifRegs->offset = OFFSET_W; } - if (_vifRegs->offset == OFFSET_W) + if (vifRegs->offset == OFFSET_W) { - writeXYZW(_vifRegs->offset, *dest, *data); - _vifRegs->offset = OFFSET_X; + writeXYZW(vifRegs->offset, *dest, *data); + vifRegs->offset = OFFSET_X; } } template void __fastcall UNPACK_V3(u32 *dest, T *data, int size) { - if(_vifRegs->offset == OFFSET_X) + if(vifRegs->offset == OFFSET_X) { if (size > 0) { - writeXYZW(_vifRegs->offset, *dest++, *data++); - _vifRegs->offset = OFFSET_Y; + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_Y; size--; } } - if(_vifRegs->offset == OFFSET_Y) + if(vifRegs->offset == OFFSET_Y) { if (size > 0) { - writeXYZW(_vifRegs->offset, *dest++, *data++); - _vifRegs->offset = OFFSET_Z; + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_Z; size--; } } - if(_vifRegs->offset == OFFSET_Z) + if(vifRegs->offset == OFFSET_Z) { if (size > 0) { - writeXYZW(_vifRegs->offset, *dest++, *data++); - _vifRegs->offset = OFFSET_W; + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_W; size--; } } - if(_vifRegs->offset == OFFSET_W) + if(vifRegs->offset == OFFSET_W) { //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) //Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate - writeXYZW(_vifRegs->offset, *dest, *data); - _vifRegs->offset = OFFSET_X; + writeXYZW(vifRegs->offset, *dest, *data); + vifRegs->offset = OFFSET_X; } } @@ -207,12 +206,12 @@ void __fastcall UNPACK_V4(u32 *dest, T *data , int size) { while (size > 0) { - writeXYZW(_vifRegs->offset, *dest++, *data++); - _vifRegs->offset++; + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset++; size--; } - if (_vifRegs->offset > OFFSET_W) _vifRegs->offset = OFFSET_X; + if (vifRegs->offset > OFFSET_W) vifRegs->offset = OFFSET_X; } void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size) @@ -391,7 +390,7 @@ static __forceinline int mfifoVIF1rbTransfer() return ret; } -static __forceinline int mfifoVIF1chain() +static __forceinline int mfifo_VIF1chain() { int ret; @@ -531,7 +530,7 @@ void vifMFIFOInterrupt() { g_vifCycles = 0; - if (vif1.inprogress == 1) mfifoVIF1chain(); + if (vif1.inprogress == 1) mfifo_VIF1chain(); if (vif1.irq && vif1.tag.size == 0) { diff --git a/pcsx2/Vif.h b/pcsx2/Vif.h index fa318d7618..e01cb32bd8 100644 --- a/pcsx2/Vif.h +++ b/pcsx2/Vif.h @@ -81,9 +81,9 @@ struct VIFregisters { extern "C" { // these use cdecl for Asm code references. - extern VIFregisters *_vifRegs; - extern u32* _vifMaskRegs; - extern u32* _vifRow; + extern VIFregisters *vifRegs; + extern u32* vifMaskRegs; + extern u32* vifRow; extern u32* _vifCol; } @@ -92,16 +92,16 @@ static __forceinline u32 setVifRowRegs(u32 reg, u32 data) switch (reg) { case 0: - _vifRegs->r0 = data; + vifRegs->r0 = data; break; case 1: - _vifRegs->r1 = data; + vifRegs->r1 = data; break; case 2: - _vifRegs->r2 = data; + vifRegs->r2 = data; break; case 3: - _vifRegs->r3 = data; + vifRegs->r3 = data; break; jNO_DEFAULT; } @@ -113,16 +113,16 @@ static __forceinline u32 getVifRowRegs(u32 reg) switch (reg) { case 0: - return _vifRegs->r0; + return vifRegs->r0; break; case 1: - return _vifRegs->r1; + return vifRegs->r1; break; case 2: - return _vifRegs->r2; + return vifRegs->r2; break; case 3: - return _vifRegs->r3; + return vifRegs->r3; break; jNO_DEFAULT; } @@ -133,16 +133,16 @@ static __forceinline u32 setVifColRegs(u32 reg, u32 data) switch (reg) { case 0: - _vifRegs->c0 = data; + vifRegs->c0 = data; break; case 1: - _vifRegs->c1 = data; + vifRegs->c1 = data; break; case 2: - _vifRegs->c2 = data; + vifRegs->c2 = data; break; case 3: - _vifRegs->c3 = data; + vifRegs->c3 = data; break; jNO_DEFAULT; } @@ -154,16 +154,16 @@ static __forceinline u32 getVifColRegs(u32 reg) switch (reg) { case 0: - return _vifRegs->c0; + return vifRegs->c0; break; case 1: - return _vifRegs->c1; + return vifRegs->c1; break; case 2: - return _vifRegs->c2; + return vifRegs->c2; break; case 3: - return _vifRegs->c3; + return vifRegs->c3; break; jNO_DEFAULT; } diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index e6155bc4ea..8efee3181d 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -37,10 +37,10 @@ using namespace std; // for min / max extern "C" { // Need cdecl on these for ASM references. - extern VIFregisters *_vifRegs; - extern u32* _vifMaskRegs; - extern u32* _vifRow; - extern u32* _vifCol; + extern VIFregisters *vifRegs; + extern u32* vifMaskRegs; + extern u32* vifRow; + extern u32* vifCol; } PCSX2_ALIGNED16_EXTERN(u32 g_vifRow0[4]); @@ -48,7 +48,7 @@ PCSX2_ALIGNED16_EXTERN(u32 g_vifCol0[4]); PCSX2_ALIGNED16_EXTERN(u32 g_vifRow1[4]); PCSX2_ALIGNED16_EXTERN(u32 g_vifCol1[4]); -extern vifStruct *_vif; +extern vifStruct *vif; vifStruct vif0, vif1; @@ -254,21 +254,9 @@ __forceinline static int _limit(int a, int max) static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int VIFdmanum) { const VIFUnpackFuncTable *unpack; - vifStruct *vif; - VIFregisters *vifRegs; + unpack = &VIFfuncTable[ unpackType ]; - if (VIFdmanum == 0) - { - vif = &vif0; - vifRegs = vif0Regs; - } - else - { - vif = &vif1; - vifRegs = vif1Regs; - } - switch (unpackType) { case 0x0: @@ -338,85 +326,49 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int } //This is sorted out later - vif->tag.addr &= ~0xf; + if((vif->tag.addr & 0xf) != (vifRegs->offset * 4)) + { + VIFUNPACK_LOG("addr aligned to %x", vif->tag.addr); + vif->tag.addr = (vif->tag.addr & ~0xf) + (vifRegs->offset * 4); + } + } -static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) +static int VIFalign(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) { u32 *dest; u32 unpackType; UNPACKFUNCTYPE func; const VIFUnpackFuncTable *ft; - vifStruct *vif; - VIFregisters *vifRegs; VURegs * VU; u8 *cdata = (u8*)data; + #ifdef _DEBUG u32 memsize = VIFdmanum ? 0x4000 : 0x1000; #endif - _mm_prefetch((char*)data, _MM_HINT_NTA); - if (VIFdmanum == 0) { VU = &VU0; - vif = &vif0; - vifRegs = vif0Regs; assert(v->addr < memsize); } else { - VU = &VU1; - vif = &vif1; - vifRegs = vif1Regs; assert(v->addr < memsize); - - if (vu1MicroIsSkipping()) - { - // don't process since the frame is dummy - vif->tag.addr += (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16); - return; - } } dest = (u32*)(VU->Mem + v->addr); - VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x", + VIF_LOG("VIF%d UNPACK Align: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x", VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num); - - VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset); -#ifdef _DEBUG - if (v->size != size) - { - VIF_LOG("*PCSX2*: warning v->size != size"); - } - - if ((v->addr + size*4) > memsize) - { - Console::Notice("*PCSX2*: fixme unpack overflow"); - Console::WriteLn("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x", - params VIFdmanum, v->cmd & 0xf, v->size, size, v->addr); - } -#endif // The unpack type unpackType = v->cmd & 0xf; - - if (size == 0) - { - VIFUNPACK_LOG("*PCSX2*: Unpack %x with size 0!! v->size = %d cl = %d, wl = %d, mode %d mask %x", v->cmd, v->size, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mode, vifRegs->mask); - } - - _mm_prefetch((char*)data + 128, _MM_HINT_NTA); - _vifRegs = (VIFregisters*)vifRegs; - _vifMaskRegs = VIFdmanum ? g_vif1Masks : g_vif0Masks; - _vif = vif; - _vifRow = VIFdmanum ? g_vifRow1 : g_vifRow0; ft = &VIFfuncTable[ unpackType ]; - func = _vif->usn ? ft->funcU : ft->funcS; + func = vif->usn ? ft->funcU : ft->funcS; size <<= 2; @@ -424,23 +376,12 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma memsize = size; #endif - if (v->size != (size >> 2)) - ProcessMemSkip(size, unpackType, VIFdmanum); - - - if(vif->tag.addr > (u32)(VIFdmanum ? 0x4000 : 0x1000)) - { - //Sanity Check (memory overflow) - DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, vif->tag.addr, VIFdmanum ? 0x4000 : 0x1000); - - } - - if (_vifRegs->offset > 0) - { - int destinc, unpacksize; + if(vif1Regs->offset != 0) + { + int unpacksize; //This is just to make sure the alignment isnt loopy on a split packet - if(_vifRegs->offset != ((vif->tag.addr & 0xf) >> 2)) + if(vifRegs->offset != ((vif->tag.addr & 0xf) >> 2)) { DevCon::Error("Warning: Unpack alignment error"); } @@ -449,48 +390,50 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma if(((size / ft->dsize) + vifRegs->offset) < (u32)ft->qsize) VIFUNPACK_LOG("Warning! Size needed to align %x size chunks available %x offset %x", ft->qsize - ((size / ft->dsize) + vifRegs->offset), vifRegs->offset); - // SSE doesn't handle such small data - if (vifRegs->offset < (u32)ft->qsize) + if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset)) { - if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset)) - { - Console::WriteLn("Wasn't enough left size/dsize = %x left to write %x", params(size / ft->dsize), (ft->qsize - vifRegs->offset)); - } + DevCon::Error("Wasn't enough left size/dsize = %x left to write %x", params(size / ft->dsize), (ft->qsize - vifRegs->offset)); + } unpacksize = min(((u32)size / (u32)ft->dsize), ((u32)ft->qsize - vifRegs->offset)); - } - else - { - unpacksize = 0; - Console::WriteLn("Unpack align offset = 0"); - } - VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft->qsize) + unpacksize, vifRegs->offset); - destinc = (4 - ft->qsize) + unpacksize; + + VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft->qsize) + unpacksize, vifRegs->offset); + func(dest, (u32*)cdata, unpacksize); size -= unpacksize * ft->dsize; - cdata += unpacksize * ft->dsize; - + vifRegs->num--; ++vif->cl; + if (vif->cl == vifRegs->cycle.wl) { if (vifRegs->cycle.cl != vifRegs->cycle.wl) - dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + destinc; + { + vif->tag.addr += (((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + ((4 - ft->qsize) + unpacksize)) * 4; + //dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + destinc; + } else - dest += destinc; + { + vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4; + //dest += destinc; + } vif->cl = 0; + VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr); + return size >> 2; + } else { - dest += destinc; + vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4; + dest += (4 - ft->qsize) + unpacksize; + cdata += unpacksize * ft->dsize; + VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr); } - - VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr); - } + - if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write + if (vif->cl != 0) //Check alignment for SSE unpacks { #ifdef _DEBUG @@ -499,7 +442,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma int incdest; - if (vif->cl != 0) + if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write { // continuation from last stream @@ -516,22 +459,109 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma if (vif->cl == vifRegs->cycle.wl) { dest += incdest; + vif->tag.addr += incdest * 4; vif->cl = 0; break; } dest += 4; + vif->tag.addr += 16; } - // have to update - _vifRow[0] = _vifRegs->r0; - _vifRow[1] = _vifRegs->r1; - _vifRow[2] = _vifRegs->r2; - _vifRow[3] = _vifRegs->r3; + if(vifRegs->mode == 2) + { + //Update the reg rows for SSE + vifRow = VIFdmanum ? g_vifRow1 : g_vifRow0; + vifRow[0] = vifRegs->r0; + vifRow[1] = vifRegs->r1; + vifRow[2] = vifRegs->r2; + vifRow[3] = vifRegs->r3; + } } + } + return size>>2; +} - if ((size >= ft->gsize) && !(v->addr&0xf)) + +static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) +{ + u32 *dest; + u32 unpackType; + UNPACKFUNCTYPE func; + const VIFUnpackFuncTable *ft; + VURegs * VU; + u8 *cdata = (u8*)data; + +#ifdef _DEBUG + u32 memsize = VIFdmanum ? 0x4000 : 0x1000; +#endif + + _mm_prefetch((char*)data, _MM_HINT_NTA); + + if (VIFdmanum == 0) + { + VU = &VU0; + //vifRegs = vif0Regs; + assert(v->addr < memsize); + } + else + { + + VU = &VU1; + //vifRegs = vif1Regs; + assert(v->addr < memsize); + + if (vu1MicroIsSkipping()) + { + // don't process since the frame is dummy + vif->tag.addr += (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16); + return; + } + } + + dest = (u32*)(VU->Mem + v->addr); + + VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x", + VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num); + + VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset); + + // The unpack type + unpackType = v->cmd & 0xf; + + _mm_prefetch((char*)data + 128, _MM_HINT_NTA); + + ft = &VIFfuncTable[ unpackType ]; + func = vif->usn ? ft->funcU : ft->funcS; + + size <<= 2; + +#ifdef _DEBUG + memsize = size; +#endif + + +#ifdef VIFUNPACKDEBUG + + if()vif->tag.addr + (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * + ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16)) > (u32)(VIFdmanum ? 0x4000 : 0x1000)) + { + //Sanity Check (memory overflow) + DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, vif->tag.addr, VIFdmanum ? 0x4000 : 0x1000); + + } +#endif + + if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write + { + +#ifdef _DEBUG + static int s_count = 0; +#endif + + + if (size >= ft->gsize) { const UNPACKPARTFUNCTYPESSE* pfn; int writemask; @@ -579,6 +609,16 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma if (oldcycle != -1) *(u32*)&vifRegs->cycle = oldcycle; + if(vifRegs->mode == 2) + { + //Update the reg rows for non SSE + vifRegs->r0 = vifRow[0]; + vifRegs->r1 = vifRow[1]; + vifRegs->r2 = vifRow[2]; + vifRegs->r3 = vifRow[3]; + } + + // if size is left over, update the src,dst pointers if (writemask > 0) { @@ -586,92 +626,38 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma cdata += left * ft->gsize; dest = (u32*)((u8*)dest + ((left / vifRegs->cycle.wl) * vifRegs->cycle.cl + left % vifRegs->cycle.wl) * 16); vifRegs->num -= left; - _vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize; + vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize; + size = writemask; + + if (size >= ft->dsize && vifRegs->num > 0) + { + //VIF_LOG("warning, end with size = %d", size); + + /* unpack one qword */ + vif->tag.addr += (size / ft->dsize) * 4; + func(dest, (u32*)cdata, size / ft->dsize); + size = 0; + + if(vifRegs->mode == 2) + { + //Update the reg rows for SSE + vifRow[0] = vifRegs->r0; + vifRow[1] = vifRegs->r1; + vifRow[2] = vifRegs->r2; + vifRow[3] = vifRegs->r3; + } + VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr); + } } else { vifRegs->num -= size / ft->gsize; - if (vifRegs->num > 0) _vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize; + if (vifRegs->num > 0) vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize; + size = 0; } - size = writemask; - - _vifRegs->r0 = _vifRow[0]; - _vifRegs->r1 = _vifRow[1]; - _vifRegs->r2 = _vifRow[2]; - _vifRegs->r3 = _vifRow[3]; - } - else - { - - if ((unpackType == 0xC) && (vifRegs->cycle.cl == vifRegs->cycle.wl)) //No use when SSE is available - { - // v4-32 - if (v->size == (size >> 2) && (vifRegs->mode == 0) && !(vifRegs->code & 0x10000000) && (vif->usn == 0)) - { - vifRegs->num -= size >> 4; - memcpy_fast((u8*)dest, cdata, size); - size = 0; - return; - } - } - - incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4; - - while ((size >= ft->gsize) && (vifRegs->num > 0)) - { - func(dest, (u32*)cdata, ft->qsize); - cdata += ft->gsize; - size -= ft->gsize; - - vifRegs->num--; - //if(vifRegs->num == loophere) dest = (u32*)(VU->Mem); - ++vif->cl; - if (vif->cl == vifRegs->cycle.wl) - { - dest += incdest; - vif->cl = 0; - } - else - { - dest += 4; - } - - } - - // have to update - _vifRow[0] = _vifRegs->r0; - _vifRow[1] = _vifRegs->r1; - _vifRow[2] = _vifRegs->r2; - _vifRow[3] = _vifRegs->r3; - } - - // used for debugging vif -// { -// int i, j, k; -// u32* curdest = olddest; -// FILE* ftemp = fopen("temp.txt", s_count?"a+":"w"); -// fprintf(ftemp, "%x %x %x\n", s_count, size, vif->tag.addr); -// fprintf(ftemp, "%x %x %x\n", vifRegs->code>>24, vifRegs->mode, *(u32*)&vifRegs->cycle); -// fprintf(ftemp, "row: %x %x %x %x\n", _vifRow[0], _vifRow[1], _vifRow[2], _vifRow[3]); -// //fprintf(ftemp, "row2: %x %x %x %x\n", _vifRegs->r0, _vifRegs->r1, _vifRegs->r2, _vifRegs->r3); -// -// for(i = 0; i < memsize; ) { -// for(k = 0; k < vifRegs->cycle.wl; ++k) { -// for(j = 0; j <= ((vifRegs->code>>26)&3); ++j) { -// fprintf(ftemp, "%x ", curdest[4*k+j]); -// } -// } -// -// fprintf(ftemp, "\n"); -// curdest += 4*vifRegs->cycle.cl; -// i += (((vifRegs->code>>26)&3)+1)*ft->dsize*vifRegs->cycle.wl; -// } -// fclose(ftemp); -// } -// s_count++; - - if (size >= ft->dsize && vifRegs->num > 0) + } + else if (size >= ft->dsize && vifRegs->num > 0) //Else write what we do have { //VIF_LOG("warning, end with size = %d", size); @@ -679,14 +665,20 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma vif->tag.addr += (size / ft->dsize) * 4; func(dest, (u32*)cdata, size / ft->dsize); size = 0; - + + if(vifRegs->mode == 2) + { + //Update the reg rows for SSE + vifRow[0] = vifRegs->r0; + vifRow[1] = vifRegs->r1; + vifRow[2] = vifRegs->r2; + vifRow[3] = vifRegs->r3; + } VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr); } - } else /* filling write */ { - VIF_LOG("VIFunpack - filling write"); if((u32)(size / ft->gsize) < vifRegs->num && vifRegs->cycle.cl != 0) DevCon::Notice("Filling write warning! Size < packet size and CL != 0"); @@ -827,11 +819,16 @@ static __forceinline void vif0UNPACK(u32 *data) vif0.tag.addr &= 0xfff; vif0.tag.size = len; vif0Regs->offset = 0; + + vifRegs = (VIFregisters*)vif0Regs; + vifMaskRegs = g_vif0Masks; + vif = &vif0; + vifRow = g_vifRow0; } -static __forceinline void _vif0mpgTransfer(u32 addr, u32 *data, int size) +static __forceinline void vif0mpgTransfer(u32 addr, u32 *data, int size) { - /* Console::WriteLn("_vif0mpgTransfer addr=%x; size=%x", params addr, size); + /* Console::WriteLn("vif0mpgTransfer addr=%x; size=%x", params addr, size); { FILE *f = fopen("vu1.raw", "wb"); fwrite(data, 1, size*4, f); @@ -935,7 +932,7 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG { if (vif0.vifpacketsize < vif0.tag.size) { - _vif0mpgTransfer(vif0.tag.addr, data, vif0.vifpacketsize); + vif0mpgTransfer(vif0.tag.addr, data, vif0.vifpacketsize); vif0.tag.addr += vif0.vifpacketsize << 2; vif0.tag.size -= vif0.vifpacketsize; return vif0.vifpacketsize; @@ -944,7 +941,7 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG { int ret; - _vif0mpgTransfer(vif0.tag.addr, data, vif0.tag.size); + vif0mpgTransfer(vif0.tag.addr, data, vif0.tag.size); ret = vif0.tag.size; vif0.tag.size = 0; vif0.cmd = 0; @@ -959,6 +956,9 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK { /* size is less that the total size, transfer is 'in pieces' */ VIFunpack(data, &vif0.tag, vif0.vifpacketsize, VIF0dmanum); + + ProcessMemSkip(vif0.vifpacketsize << 2, (vif0.cmd & 0xf), VIF0dmanum); + vif0.tag.size -= vif0.vifpacketsize; FreezeXMMRegs(0); return vif0.vifpacketsize; @@ -966,14 +966,27 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK else { /* we got all the data, transfer it fully */ - int ret; + int ret = vif0.tag.size; - VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum); - ret = vif0.tag.size; - vif0.tag.size = 0; - vif0.cmd = 0; - FreezeXMMRegs(0); - return ret; + //Align data after a split transfer first + if(vif0Regs->offset != 0 || vif0.cl != 0) + { + vif0.tag.size = VIFalign(data, &vif0.tag, vif0.tag.size, VIF0dmanum); + data += ret - vif0.tag.size; + if(vif0.tag.size > 0) VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum); + vif0.tag.size = 0; + vif0.cmd = 0; + FreezeXMMRegs(0); + return ret; + } + else + { + VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum); + vif0.tag.size = 0; + vif0.cmd = 0; + FreezeXMMRegs(0); + return ret; + } } } @@ -1555,11 +1568,16 @@ static __forceinline void vif1UNPACK(u32 *data) vif1.cl = 0; vif1.tag.addr <<= 4; vif1.tag.cmd = vif1.cmd; + + vifRegs = (VIFregisters*)vif1Regs; + vifMaskRegs = g_vif1Masks; + vif = &vif1; + vifRow = g_vifRow1; } -static __forceinline void _vif1mpgTransfer(u32 addr, u32 *data, int size) +static __forceinline void vif1mpgTransfer(u32 addr, u32 *data, int size) { - /* Console::WriteLn("_vif1mpgTransfer addr=%x; size=%x", params addr, size); + /* Console::WriteLn("vif1mpgTransfer addr=%x; size=%x", params addr, size); { FILE *f = fopen("vu1.raw", "wb"); fwrite(data, 1, size*4, f); @@ -1661,7 +1679,7 @@ static int __fastcall Vif1TransMPG(u32 *data) { if (vif1.vifpacketsize < vif1.tag.size) { - _vif1mpgTransfer(vif1.tag.addr, data, vif1.vifpacketsize); + vif1mpgTransfer(vif1.tag.addr, data, vif1.vifpacketsize); vif1.tag.addr += vif1.vifpacketsize << 2; vif1.tag.size -= vif1.vifpacketsize; return vif1.vifpacketsize; @@ -1669,7 +1687,7 @@ static int __fastcall Vif1TransMPG(u32 *data) else { int ret; - _vif1mpgTransfer(vif1.tag.addr, data, vif1.tag.size); + vif1mpgTransfer(vif1.tag.addr, data, vif1.tag.size); ret = vif1.tag.size; vif1.tag.size = 0; vif1.cmd = 0; @@ -1770,20 +1788,35 @@ static int __fastcall Vif1TransUnpack(u32 *data) /* size is less that the total size, transfer is 'in pieces' */ VIFunpack(data, &vif1.tag, vif1.vifpacketsize, VIF1dmanum); + + ProcessMemSkip(vif1.vifpacketsize << 2, (vif1.cmd & 0xf), VIF1dmanum); vif1.tag.size -= vif1.vifpacketsize; FreezeXMMRegs(0); return vif1.vifpacketsize; } else { - int ret; - /* we got all the data, transfer it fully */ - VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum); - ret = vif1.tag.size; - vif1.tag.size = 0; - vif1.cmd = 0; - FreezeXMMRegs(0); - return ret; + int ret = vif1.tag.size; + + if(vif1Regs->offset != 0 || vif1.cl != 0) + { + vif1.tag.size = VIFalign(data, &vif1.tag, vif1.tag.size, VIF1dmanum); + data += ret - vif1.tag.size; + if(vif1.tag.size > 0) VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum); + vif1.tag.size = 0; + vif1.cmd = 0; + FreezeXMMRegs(0); + return ret; + } + else + { + /* we got all the data, transfer it fully */ + VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum); + vif1.tag.size = 0; + vif1.cmd = 0; + FreezeXMMRegs(0); + return ret; + } } } diff --git a/pcsx2/x86/aVif.S b/pcsx2/x86/aVif.S index 05a2e9248f..e4b64685f6 100644 --- a/pcsx2/x86/aVif.S +++ b/pcsx2/x86/aVif.S @@ -18,9 +18,9 @@ */ .intel_syntax noprefix -.extern _vifRegs -.extern _vifMaskRegs -.extern _vifRow +.extern vifRegs +.extern vifMaskRegs +.extern vifRow #define VIF_ESP esp #define VIF_SRC esi @@ -108,7 +108,7 @@ // setting up masks #define UNPACK_Setup_Mask_SSE(CL) \ - mov VIF_TMPADDR, _vifMaskRegs; \ + mov VIF_TMPADDR, vifMaskRegs; \ movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \ movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \ movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(CL)]; \ @@ -118,7 +118,7 @@ #define UNPACK_Start_Setup_Mask_SSE_0(CL) UNPACK_Setup_Mask_SSE(CL) #define UNPACK_Start_Setup_Mask_SSE_1(CL) \ - mov VIF_TMPADDR, _vifMaskRegs; \ + mov VIF_TMPADDR, vifMaskRegs; \ movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \ movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \ pand XMM_ROWMASK, XMM_ROW; \ @@ -129,12 +129,12 @@ #define UNPACK_Setup_Mask_SSE_0_1(CL) #define UNPACK_Setup_Mask_SSE_1_1(CL) \ - mov VIF_TMPADDR, _vifMaskRegs; \ + mov VIF_TMPADDR, vifMaskRegs; \ movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \ // ignore CL, since vif.cycle.wl == 1 #define UNPACK_Setup_Mask_SSE_2_1(CL) \ - mov VIF_TMPADDR, _vifMaskRegs; \ + mov VIF_TMPADDR, vifMaskRegs; \ movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 16]; \ movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 32]; \ movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \ @@ -1312,9 +1312,9 @@ #pragma warning(disable:4731) #define SAVE_ROW_REG_BASE \ - mov VIF_TMPADDR, _vifRow; \ + mov VIF_TMPADDR, vifRow; \ movdqa xmmword ptr [VIF_TMPADDR], XMM_ROW; \ - mov VIF_TMPADDR, _vifRegs; \ + mov VIF_TMPADDR, vifRegs; \ movss dword ptr [VIF_TMPADDR+0x100], XMM_ROW; \ psrldq XMM_ROW, 4; \ movss dword ptr [VIF_TMPADDR+0x110], XMM_ROW; \ @@ -1349,7 +1349,7 @@ .globl UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType; \ UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType: \ INIT_ARGS(); \ - mov VIF_TMPADDR, _vifRegs; \ + mov VIF_TMPADDR, vifRegs; \ movzx VIF_INC, byte ptr [VIF_TMPADDR + 0x40]; \ movzx VIF_SAVEEBX, byte ptr [VIF_TMPADDR + 0x41]; \ sub VIF_INC, VIF_SAVEEBX; \ diff --git a/pcsx2/x86/ix86-32/aVif_proc-32.asm b/pcsx2/x86/ix86-32/aVif_proc-32.asm index 12c8b969b4..62fd377795 100644 --- a/pcsx2/x86/ix86-32/aVif_proc-32.asm +++ b/pcsx2/x86/ix86-32/aVif_proc-32.asm @@ -5,9 +5,9 @@ .xmm -extern _vifRegs:ptr -extern _vifMaskRegs:ptr -extern _vifRow:ptr +extern vifRegs:ptr +extern vifMaskRegs:ptr +extern vifRow:ptr extern s_TempDecompress:ptr @@ -104,7 +104,7 @@ UNPACK_Regular_SSE_2 macro r0 UNPACK_Setup_Mask_SSE macro CL - mov eax, [_vifMaskRegs] + mov eax, [vifMaskRegs] movdqa xmm4, [eax + 64*(CL) + 16] movdqa xmm5, [eax + 64*(CL) + 32] movdqa xmm3, [eax + 64*(CL)] @@ -118,7 +118,7 @@ UNPACK_Start_Setup_Mask_SSE_0 macro CL endm UNPACK_Start_Setup_Mask_SSE_1 macro CL - mov eax, [_vifMaskRegs] + mov eax, [vifMaskRegs] movdqa xmm4, [eax + 64*(CL) + 16] movdqa xmm5, [eax + 64*(CL) + 32] pand xmm4, xmm6 @@ -132,14 +132,14 @@ UNPACK_Start_Setup_Mask_SSE_2 macro CL UNPACK_Setup_Mask_SSE_0_1 macro CL endm UNPACK_Setup_Mask_SSE_1_1 macro CL - mov eax, [_vifMaskRegs] + mov eax, [vifMaskRegs] movdqa xmm3, [eax + 64*(0)] endm UNPACK_Setup_Mask_SSE_2_1 macro CL - mov eax, [_vifMaskRegs] + mov eax, [vifMaskRegs] movdqa xmm4, [eax + 64*(0) + 16] movdqa xmm5, [eax + 64*(0) + 32] movdqa xmm3, [eax + 64*(0)] @@ -1521,9 +1521,9 @@ UNPACK_V4_5SSE_1A macro CL, TOTALCL, MaskType, ModeType SAVE_ROW_REG_BASE macro - mov eax, [_vifRow] + mov eax, [vifRow] movdqa [eax], xmm6 - mov eax, [_vifRegs] + mov eax, [vifRegs] movss dword ptr [eax+0100h], xmm6 psrldq xmm6, 4 movss dword ptr [eax+0110h], xmm6 @@ -1557,7 +1557,7 @@ defUNPACK_SkippingWrite macro name, MaskType, ModeType, qsize, sign, SAVE_ROW_RE push ebx INIT_ARGS - mov eax, [_vifRegs] + mov eax, [vifRegs] movzx ecx, byte ptr [eax + 040h] movzx ebx, byte ptr [eax + 041h] sub ecx, ebx From e100933069a8894771b44378f9207a912755557b Mon Sep 17 00:00:00 2001 From: refraction Date: Sat, 11 Apr 2009 19:23:39 +0000 Subject: [PATCH 40/40] Look over there! A THREE HEADED MONKEY! git-svn-id: http://pcsx2.googlecode.com/svn/trunk@956 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VifDma.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 8efee3181d..4d466169cc 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -281,7 +281,7 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int break; case 0x6: vif->tag.addr += (size / unpack->gsize) * 16; - DevCon::Notice("Processing V2-8 skip, size = %d", params size); + VIFUNPACK_LOG("Processing V2-8 skip, size = %d", size); break; case 0x8: vif->tag.addr += (size / unpack->gsize) * 16; @@ -289,7 +289,7 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int break; case 0x9: vif->tag.addr += (size / unpack->gsize) * 16; - DevCon::Notice("Processing V3-16 skip, size = %d", params size); + VIFUNPACK_LOG("Processing V3-16 skip, size = %d", size); break; case 0xA: vif->tag.addr += (size / unpack->gsize) * 16;